xref: /titanic_41/usr/src/uts/common/io/scsi/targets/sd.c (revision 6e628f2786bf7adece487b606a56068e35e3fcd2)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * SCSI disk target driver.
30  */
31 #include <sys/scsi/scsi.h>
32 #include <sys/dkbad.h>
33 #include <sys/dklabel.h>
34 #include <sys/dkio.h>
35 #include <sys/fdio.h>
36 #include <sys/cdio.h>
37 #include <sys/mhd.h>
38 #include <sys/vtoc.h>
39 #include <sys/dktp/fdisk.h>
40 #include <sys/file.h>
41 #include <sys/stat.h>
42 #include <sys/kstat.h>
43 #include <sys/vtrace.h>
44 #include <sys/note.h>
45 #include <sys/thread.h>
46 #include <sys/proc.h>
47 #include <sys/efi_partition.h>
48 #include <sys/var.h>
49 #include <sys/aio_req.h>
50 
51 #ifdef __lock_lint
52 #define	_LP64
53 #define	__amd64
54 #endif
55 
56 #if (defined(__fibre))
57 /* Note: is there a leadville version of the following? */
58 #include <sys/fc4/fcal_linkapp.h>
59 #endif
60 #include <sys/taskq.h>
61 #include <sys/uuid.h>
62 #include <sys/byteorder.h>
63 #include <sys/sdt.h>
64 
65 #include "sd_xbuf.h"
66 
67 #include <sys/scsi/targets/sddef.h>
68 
69 
70 /*
71  * Loadable module info.
72  */
73 #if (defined(__fibre))
74 #define	SD_MODULE_NAME	"SCSI SSA/FCAL Disk Driver %I%"
75 char _depends_on[]	= "misc/scsi drv/fcp";
76 #else
77 #define	SD_MODULE_NAME	"SCSI Disk Driver %I%"
78 char _depends_on[]	= "misc/scsi";
79 #endif
80 
81 /*
82  * Define the interconnect type, to allow the driver to distinguish
83  * between parallel SCSI (sd) and fibre channel (ssd) behaviors.
84  *
85  * This is really for backward compatability. In the future, the driver
86  * should actually check the "interconnect-type" property as reported by
87  * the HBA; however at present this property is not defined by all HBAs,
88  * so we will use this #define (1) to permit the driver to run in
89  * backward-compatability mode; and (2) to print a notification message
90  * if an FC HBA does not support the "interconnect-type" property.  The
91  * behavior of the driver will be to assume parallel SCSI behaviors unless
92  * the "interconnect-type" property is defined by the HBA **AND** has a
93  * value of either INTERCONNECT_FIBRE, INTERCONNECT_SSA, or
94  * INTERCONNECT_FABRIC, in which case the driver will assume Fibre
95  * Channel behaviors (as per the old ssd).  (Note that the
96  * INTERCONNECT_1394 and INTERCONNECT_USB types are not supported and
97  * will result in the driver assuming parallel SCSI behaviors.)
98  *
99  * (see common/sys/scsi/impl/services.h)
100  *
101  * Note: For ssd semantics, don't use INTERCONNECT_FABRIC as the default
102  * since some FC HBAs may already support that, and there is some code in
103  * the driver that already looks for it.  Using INTERCONNECT_FABRIC as the
104  * default would confuse that code, and besides things should work fine
105  * anyways if the FC HBA already reports INTERCONNECT_FABRIC for the
106  * "interconnect_type" property.
107  *
108  * Notes for off-by-1 workaround:
109  * -----------------------------
110  *
111  *    SCSI READ_CAPACITY command returns the LBA number of the
112  *    last logical block, but sd once treated this number as
113  *    disks' capacity on x86 platform. And LBAs are addressed
114  *    based 0. So the last block was lost on x86 platform.
115  *
116  *    Now, we remove this workaround. In order for present sd
117  *    driver to work with disks which are labeled/partitioned
118  *    via previous sd, we add workaround as follows:
119  *
120  *    1) Locate backup EFI label: sd searchs the next to last
121  *       block for backup EFI label if it can't find it on the
122  *       last block;
123  *    2) Calculate geometry: refer to sd_convert_geometry(), If
124  *       capacity increasing by 1 causes disks' capacity to cross
125  *       over the limits in table CHS_values, geometry info will
126  *       change. This will raise an issue: In case that primary
127  *       VTOC label is destroyed, format commandline can restore
128  *       it via backup VTOC labels. And format locates backup VTOC
129  *       labels by use of geometry from sd driver. So changing
130  *       geometry will prevent format from finding backup VTOC
131  *       labels. To eliminate this side effect for compatibility,
132  *       sd uses (capacity -1) to calculate geometry;
133  *    3) 1TB disks: VTOC uses 32-bit signed int, thus sd doesn't
134  *       support VTOC for a disk which has more than DK_MAX_BLOCKS
135  *       LBAs. However, for exactly 1TB disk, it was treated as
136  *       (1T - 512)B in the past, and could have VTOC. To overcome
137  *       this, if an exactly 1TB disk has solaris fdisk partition,
138  *       it will be allowed to work with sd.
139  */
140 #if (defined(__fibre))
141 #define	SD_DEFAULT_INTERCONNECT_TYPE	SD_INTERCONNECT_FIBRE
142 #else
143 #define	SD_DEFAULT_INTERCONNECT_TYPE	SD_INTERCONNECT_PARALLEL
144 #endif
145 
146 /*
147  * The name of the driver, established from the module name in _init.
148  */
149 static	char *sd_label			= NULL;
150 
151 /*
152  * Driver name is unfortunately prefixed on some driver.conf properties.
153  */
154 #if (defined(__fibre))
155 #define	sd_max_xfer_size		ssd_max_xfer_size
156 #define	sd_config_list			ssd_config_list
157 static	char *sd_max_xfer_size		= "ssd_max_xfer_size";
158 static	char *sd_config_list		= "ssd-config-list";
159 #else
160 static	char *sd_max_xfer_size		= "sd_max_xfer_size";
161 static	char *sd_config_list		= "sd-config-list";
162 #endif
163 
164 /*
165  * Driver global variables
166  */
167 
168 #if (defined(__fibre))
169 /*
170  * These #defines are to avoid namespace collisions that occur because this
171  * code is currently used to compile two seperate driver modules: sd and ssd.
172  * All global variables need to be treated this way (even if declared static)
173  * in order to allow the debugger to resolve the names properly.
174  * It is anticipated that in the near future the ssd module will be obsoleted,
175  * at which time this namespace issue should go away.
176  */
177 #define	sd_state			ssd_state
178 #define	sd_io_time			ssd_io_time
179 #define	sd_failfast_enable		ssd_failfast_enable
180 #define	sd_ua_retry_count		ssd_ua_retry_count
181 #define	sd_report_pfa			ssd_report_pfa
182 #define	sd_max_throttle			ssd_max_throttle
183 #define	sd_min_throttle			ssd_min_throttle
184 #define	sd_rot_delay			ssd_rot_delay
185 
186 #define	sd_retry_on_reservation_conflict	\
187 					ssd_retry_on_reservation_conflict
188 #define	sd_reinstate_resv_delay		ssd_reinstate_resv_delay
189 #define	sd_resv_conflict_name		ssd_resv_conflict_name
190 
191 #define	sd_component_mask		ssd_component_mask
192 #define	sd_level_mask			ssd_level_mask
193 #define	sd_debug_un			ssd_debug_un
194 #define	sd_error_level			ssd_error_level
195 
196 #define	sd_xbuf_active_limit		ssd_xbuf_active_limit
197 #define	sd_xbuf_reserve_limit		ssd_xbuf_reserve_limit
198 
199 #define	sd_tr				ssd_tr
200 #define	sd_reset_throttle_timeout	ssd_reset_throttle_timeout
201 #define	sd_qfull_throttle_timeout	ssd_qfull_throttle_timeout
202 #define	sd_qfull_throttle_enable	ssd_qfull_throttle_enable
203 #define	sd_check_media_time		ssd_check_media_time
204 #define	sd_wait_cmds_complete		ssd_wait_cmds_complete
205 #define	sd_label_mutex			ssd_label_mutex
206 #define	sd_detach_mutex			ssd_detach_mutex
207 #define	sd_log_buf			ssd_log_buf
208 #define	sd_log_mutex			ssd_log_mutex
209 
210 #define	sd_disk_table			ssd_disk_table
211 #define	sd_disk_table_size		ssd_disk_table_size
212 #define	sd_sense_mutex			ssd_sense_mutex
213 #define	sd_cdbtab			ssd_cdbtab
214 
215 #define	sd_cb_ops			ssd_cb_ops
216 #define	sd_ops				ssd_ops
217 #define	sd_additional_codes		ssd_additional_codes
218 
219 #define	sd_minor_data			ssd_minor_data
220 #define	sd_minor_data_efi		ssd_minor_data_efi
221 
222 #define	sd_tq				ssd_tq
223 #define	sd_wmr_tq			ssd_wmr_tq
224 #define	sd_taskq_name			ssd_taskq_name
225 #define	sd_wmr_taskq_name		ssd_wmr_taskq_name
226 #define	sd_taskq_minalloc		ssd_taskq_minalloc
227 #define	sd_taskq_maxalloc		ssd_taskq_maxalloc
228 
229 #define	sd_dump_format_string		ssd_dump_format_string
230 
231 #define	sd_iostart_chain		ssd_iostart_chain
232 #define	sd_iodone_chain			ssd_iodone_chain
233 
234 #define	sd_pm_idletime			ssd_pm_idletime
235 
236 #define	sd_force_pm_supported		ssd_force_pm_supported
237 
238 #define	sd_dtype_optical_bind		ssd_dtype_optical_bind
239 
240 #endif
241 
242 
243 #ifdef	SDDEBUG
244 int	sd_force_pm_supported		= 0;
245 #endif	/* SDDEBUG */
246 
247 void *sd_state				= NULL;
248 int sd_io_time				= SD_IO_TIME;
249 int sd_failfast_enable			= 1;
250 int sd_ua_retry_count			= SD_UA_RETRY_COUNT;
251 int sd_report_pfa			= 1;
252 int sd_max_throttle			= SD_MAX_THROTTLE;
253 int sd_min_throttle			= SD_MIN_THROTTLE;
254 int sd_rot_delay			= 4; /* Default 4ms Rotation delay */
255 int sd_qfull_throttle_enable		= TRUE;
256 
257 int sd_retry_on_reservation_conflict	= 1;
258 int sd_reinstate_resv_delay		= SD_REINSTATE_RESV_DELAY;
259 _NOTE(SCHEME_PROTECTS_DATA("safe sharing", sd_reinstate_resv_delay))
260 
261 static int sd_dtype_optical_bind	= -1;
262 
263 /* Note: the following is not a bug, it really is "sd_" and not "ssd_" */
264 static	char *sd_resv_conflict_name	= "sd_retry_on_reservation_conflict";
265 
266 /*
267  * Global data for debug logging. To enable debug printing, sd_component_mask
268  * and sd_level_mask should be set to the desired bit patterns as outlined in
269  * sddef.h.
270  */
271 uint_t	sd_component_mask		= 0x0;
272 uint_t	sd_level_mask			= 0x0;
273 struct	sd_lun *sd_debug_un		= NULL;
274 uint_t	sd_error_level			= SCSI_ERR_RETRYABLE;
275 
276 /* Note: these may go away in the future... */
277 static uint32_t	sd_xbuf_active_limit	= 512;
278 static uint32_t sd_xbuf_reserve_limit	= 16;
279 
280 static struct sd_resv_reclaim_request	sd_tr = { NULL, NULL, NULL, 0, 0, 0 };
281 
282 /*
283  * Timer value used to reset the throttle after it has been reduced
284  * (typically in response to TRAN_BUSY or STATUS_QFULL)
285  */
286 static int sd_reset_throttle_timeout	= SD_RESET_THROTTLE_TIMEOUT;
287 static int sd_qfull_throttle_timeout	= SD_QFULL_THROTTLE_TIMEOUT;
288 
289 /*
290  * Interval value associated with the media change scsi watch.
291  */
292 static int sd_check_media_time		= 3000000;
293 
294 /*
295  * Wait value used for in progress operations during a DDI_SUSPEND
296  */
297 static int sd_wait_cmds_complete	= SD_WAIT_CMDS_COMPLETE;
298 
299 /*
300  * sd_label_mutex protects a static buffer used in the disk label
301  * component of the driver
302  */
303 static kmutex_t sd_label_mutex;
304 
305 /*
306  * sd_detach_mutex protects un_layer_count, un_detach_count, and
307  * un_opens_in_progress in the sd_lun structure.
308  */
309 static kmutex_t sd_detach_mutex;
310 
311 _NOTE(MUTEX_PROTECTS_DATA(sd_detach_mutex,
312 	sd_lun::{un_layer_count un_detach_count un_opens_in_progress}))
313 
314 /*
315  * Global buffer and mutex for debug logging
316  */
317 static char	sd_log_buf[1024];
318 static kmutex_t	sd_log_mutex;
319 
320 
321 /*
322  * "Smart" Probe Caching structs, globals, #defines, etc.
323  * For parallel scsi and non-self-identify device only.
324  */
325 
326 /*
327  * The following resources and routines are implemented to support
328  * "smart" probing, which caches the scsi_probe() results in an array,
329  * in order to help avoid long probe times.
330  */
331 struct sd_scsi_probe_cache {
332 	struct	sd_scsi_probe_cache	*next;
333 	dev_info_t	*pdip;
334 	int		cache[NTARGETS_WIDE];
335 };
336 
337 static kmutex_t	sd_scsi_probe_cache_mutex;
338 static struct	sd_scsi_probe_cache *sd_scsi_probe_cache_head = NULL;
339 
340 /*
341  * Really we only need protection on the head of the linked list, but
342  * better safe than sorry.
343  */
344 _NOTE(MUTEX_PROTECTS_DATA(sd_scsi_probe_cache_mutex,
345     sd_scsi_probe_cache::next sd_scsi_probe_cache::pdip))
346 
347 _NOTE(MUTEX_PROTECTS_DATA(sd_scsi_probe_cache_mutex,
348     sd_scsi_probe_cache_head))
349 
350 
351 /*
352  * Vendor specific data name property declarations
353  */
354 
355 #if defined(__fibre) || defined(__i386) ||defined(__amd64)
356 
357 static sd_tunables seagate_properties = {
358 	SEAGATE_THROTTLE_VALUE,
359 	0,
360 	0,
361 	0,
362 	0,
363 	0,
364 	0,
365 	0,
366 	0
367 };
368 
369 
370 static sd_tunables fujitsu_properties = {
371 	FUJITSU_THROTTLE_VALUE,
372 	0,
373 	0,
374 	0,
375 	0,
376 	0,
377 	0,
378 	0,
379 	0
380 };
381 
382 static sd_tunables ibm_properties = {
383 	IBM_THROTTLE_VALUE,
384 	0,
385 	0,
386 	0,
387 	0,
388 	0,
389 	0,
390 	0,
391 	0
392 };
393 
394 static sd_tunables purple_properties = {
395 	PURPLE_THROTTLE_VALUE,
396 	0,
397 	0,
398 	PURPLE_BUSY_RETRIES,
399 	PURPLE_RESET_RETRY_COUNT,
400 	PURPLE_RESERVE_RELEASE_TIME,
401 	0,
402 	0,
403 	0
404 };
405 
406 static sd_tunables sve_properties = {
407 	SVE_THROTTLE_VALUE,
408 	0,
409 	0,
410 	SVE_BUSY_RETRIES,
411 	SVE_RESET_RETRY_COUNT,
412 	SVE_RESERVE_RELEASE_TIME,
413 	SVE_MIN_THROTTLE_VALUE,
414 	SVE_DISKSORT_DISABLED_FLAG,
415 	0
416 };
417 
418 static sd_tunables maserati_properties = {
419 	0,
420 	0,
421 	0,
422 	0,
423 	0,
424 	0,
425 	0,
426 	MASERATI_DISKSORT_DISABLED_FLAG,
427 	MASERATI_LUN_RESET_ENABLED_FLAG
428 };
429 
430 static sd_tunables pirus_properties = {
431 	PIRUS_THROTTLE_VALUE,
432 	0,
433 	PIRUS_NRR_COUNT,
434 	PIRUS_BUSY_RETRIES,
435 	PIRUS_RESET_RETRY_COUNT,
436 	0,
437 	PIRUS_MIN_THROTTLE_VALUE,
438 	PIRUS_DISKSORT_DISABLED_FLAG,
439 	PIRUS_LUN_RESET_ENABLED_FLAG
440 };
441 
442 #endif
443 
444 #if (defined(__sparc) && !defined(__fibre)) || \
445 	(defined(__i386) || defined(__amd64))
446 
447 
448 static sd_tunables elite_properties = {
449 	ELITE_THROTTLE_VALUE,
450 	0,
451 	0,
452 	0,
453 	0,
454 	0,
455 	0,
456 	0,
457 	0
458 };
459 
460 static sd_tunables st31200n_properties = {
461 	ST31200N_THROTTLE_VALUE,
462 	0,
463 	0,
464 	0,
465 	0,
466 	0,
467 	0,
468 	0,
469 	0
470 };
471 
472 #endif /* Fibre or not */
473 
474 static sd_tunables lsi_properties_scsi = {
475 	LSI_THROTTLE_VALUE,
476 	0,
477 	LSI_NOTREADY_RETRIES,
478 	0,
479 	0,
480 	0,
481 	0,
482 	0,
483 	0
484 };
485 
486 static sd_tunables symbios_properties = {
487 	SYMBIOS_THROTTLE_VALUE,
488 	0,
489 	SYMBIOS_NOTREADY_RETRIES,
490 	0,
491 	0,
492 	0,
493 	0,
494 	0,
495 	0
496 };
497 
498 static sd_tunables lsi_properties = {
499 	0,
500 	0,
501 	LSI_NOTREADY_RETRIES,
502 	0,
503 	0,
504 	0,
505 	0,
506 	0,
507 	0
508 };
509 
510 static sd_tunables lsi_oem_properties = {
511 	0,
512 	0,
513 	LSI_OEM_NOTREADY_RETRIES,
514 	0,
515 	0,
516 	0,
517 	0,
518 	0,
519 	0
520 };
521 
522 
523 
524 #if (defined(SD_PROP_TST))
525 
526 #define	SD_TST_CTYPE_VAL	CTYPE_CDROM
527 #define	SD_TST_THROTTLE_VAL	16
528 #define	SD_TST_NOTREADY_VAL	12
529 #define	SD_TST_BUSY_VAL		60
530 #define	SD_TST_RST_RETRY_VAL	36
531 #define	SD_TST_RSV_REL_TIME	60
532 
533 static sd_tunables tst_properties = {
534 	SD_TST_THROTTLE_VAL,
535 	SD_TST_CTYPE_VAL,
536 	SD_TST_NOTREADY_VAL,
537 	SD_TST_BUSY_VAL,
538 	SD_TST_RST_RETRY_VAL,
539 	SD_TST_RSV_REL_TIME,
540 	0,
541 	0,
542 	0
543 };
544 #endif
545 
546 /* This is similiar to the ANSI toupper implementation */
547 #define	SD_TOUPPER(C)	(((C) >= 'a' && (C) <= 'z') ? (C) - 'a' + 'A' : (C))
548 
549 /*
550  * Static Driver Configuration Table
551  *
552  * This is the table of disks which need throttle adjustment (or, perhaps
553  * something else as defined by the flags at a future time.)  device_id
554  * is a string consisting of concatenated vid (vendor), pid (product/model)
555  * and revision strings as defined in the scsi_inquiry structure.  Offsets of
556  * the parts of the string are as defined by the sizes in the scsi_inquiry
557  * structure.  Device type is searched as far as the device_id string is
558  * defined.  Flags defines which values are to be set in the driver from the
559  * properties list.
560  *
561  * Entries below which begin and end with a "*" are a special case.
562  * These do not have a specific vendor, and the string which follows
563  * can appear anywhere in the 16 byte PID portion of the inquiry data.
564  *
565  * Entries below which begin and end with a " " (blank) are a special
566  * case. The comparison function will treat multiple consecutive blanks
567  * as equivalent to a single blank. For example, this causes a
568  * sd_disk_table entry of " NEC CDROM " to match a device's id string
569  * of  "NEC       CDROM".
570  *
571  * Note: The MD21 controller type has been obsoleted.
572  *	 ST318202F is a Legacy device
573  *	 MAM3182FC, MAM3364FC, MAM3738FC do not appear to have ever been
574  *	 made with an FC connection. The entries here are a legacy.
575  */
576 static sd_disk_config_t sd_disk_table[] = {
577 #if defined(__fibre) || defined(__i386) || defined(__amd64)
578 	{ "SEAGATE ST34371FC", SD_CONF_BSET_THROTTLE, &seagate_properties },
579 	{ "SEAGATE ST19171FC", SD_CONF_BSET_THROTTLE, &seagate_properties },
580 	{ "SEAGATE ST39102FC", SD_CONF_BSET_THROTTLE, &seagate_properties },
581 	{ "SEAGATE ST39103FC", SD_CONF_BSET_THROTTLE, &seagate_properties },
582 	{ "SEAGATE ST118273F", SD_CONF_BSET_THROTTLE, &seagate_properties },
583 	{ "SEAGATE ST318202F", SD_CONF_BSET_THROTTLE, &seagate_properties },
584 	{ "SEAGATE ST318203F", SD_CONF_BSET_THROTTLE, &seagate_properties },
585 	{ "SEAGATE ST136403F", SD_CONF_BSET_THROTTLE, &seagate_properties },
586 	{ "SEAGATE ST318304F", SD_CONF_BSET_THROTTLE, &seagate_properties },
587 	{ "SEAGATE ST336704F", SD_CONF_BSET_THROTTLE, &seagate_properties },
588 	{ "SEAGATE ST373405F", SD_CONF_BSET_THROTTLE, &seagate_properties },
589 	{ "SEAGATE ST336605F", SD_CONF_BSET_THROTTLE, &seagate_properties },
590 	{ "SEAGATE ST336752F", SD_CONF_BSET_THROTTLE, &seagate_properties },
591 	{ "SEAGATE ST318452F", SD_CONF_BSET_THROTTLE, &seagate_properties },
592 	{ "FUJITSU MAG3091F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
593 	{ "FUJITSU MAG3182F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
594 	{ "FUJITSU MAA3182F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
595 	{ "FUJITSU MAF3364F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
596 	{ "FUJITSU MAL3364F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
597 	{ "FUJITSU MAL3738F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
598 	{ "FUJITSU MAM3182FC",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
599 	{ "FUJITSU MAM3364FC",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
600 	{ "FUJITSU MAM3738FC",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
601 	{ "IBM     DDYFT1835",  SD_CONF_BSET_THROTTLE, &ibm_properties },
602 	{ "IBM     DDYFT3695",  SD_CONF_BSET_THROTTLE, &ibm_properties },
603 	{ "IBM     IC35LF2D2",  SD_CONF_BSET_THROTTLE, &ibm_properties },
604 	{ "IBM     IC35LF2PR",  SD_CONF_BSET_THROTTLE, &ibm_properties },
605 	{ "IBM     3526",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
606 	{ "IBM     3542",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
607 	{ "IBM     3552",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
608 	{ "IBM     1722",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
609 	{ "IBM     1742",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
610 	{ "IBM     1815",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
611 	{ "IBM     FAStT",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
612 	{ "IBM     1814",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
613 	{ "IBM     1814-200",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
614 	{ "LSI     INF",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
615 	{ "ENGENIO INF",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
616 	{ "SGI     TP",		SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
617 	{ "SGI     IS",		SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
618 	{ "*CSM100_*",		SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
619 	{ "*CSM200_*",		SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
620 	{ "Fujitsu SX300",	SD_CONF_BSET_THROTTLE,  &lsi_oem_properties },
621 	{ "LSI",		SD_CONF_BSET_NRR_COUNT, &lsi_properties },
622 	{ "SUN     T3", SD_CONF_BSET_THROTTLE |
623 			SD_CONF_BSET_BSY_RETRY_COUNT|
624 			SD_CONF_BSET_RST_RETRIES|
625 			SD_CONF_BSET_RSV_REL_TIME,
626 		&purple_properties },
627 	{ "SUN     SESS01", SD_CONF_BSET_THROTTLE |
628 		SD_CONF_BSET_BSY_RETRY_COUNT|
629 		SD_CONF_BSET_RST_RETRIES|
630 		SD_CONF_BSET_RSV_REL_TIME|
631 		SD_CONF_BSET_MIN_THROTTLE|
632 		SD_CONF_BSET_DISKSORT_DISABLED,
633 		&sve_properties },
634 	{ "SUN     T4", SD_CONF_BSET_THROTTLE |
635 			SD_CONF_BSET_BSY_RETRY_COUNT|
636 			SD_CONF_BSET_RST_RETRIES|
637 			SD_CONF_BSET_RSV_REL_TIME,
638 		&purple_properties },
639 	{ "SUN     SVE01", SD_CONF_BSET_DISKSORT_DISABLED |
640 		SD_CONF_BSET_LUN_RESET_ENABLED,
641 		&maserati_properties },
642 	{ "SUN     SE6920", SD_CONF_BSET_THROTTLE |
643 		SD_CONF_BSET_NRR_COUNT|
644 		SD_CONF_BSET_BSY_RETRY_COUNT|
645 		SD_CONF_BSET_RST_RETRIES|
646 		SD_CONF_BSET_MIN_THROTTLE|
647 		SD_CONF_BSET_DISKSORT_DISABLED|
648 		SD_CONF_BSET_LUN_RESET_ENABLED,
649 		&pirus_properties },
650 	{ "SUN     SE6940", SD_CONF_BSET_THROTTLE |
651 		SD_CONF_BSET_NRR_COUNT|
652 		SD_CONF_BSET_BSY_RETRY_COUNT|
653 		SD_CONF_BSET_RST_RETRIES|
654 		SD_CONF_BSET_MIN_THROTTLE|
655 		SD_CONF_BSET_DISKSORT_DISABLED|
656 		SD_CONF_BSET_LUN_RESET_ENABLED,
657 		&pirus_properties },
658 	{ "SUN     StorageTek 6920", SD_CONF_BSET_THROTTLE |
659 		SD_CONF_BSET_NRR_COUNT|
660 		SD_CONF_BSET_BSY_RETRY_COUNT|
661 		SD_CONF_BSET_RST_RETRIES|
662 		SD_CONF_BSET_MIN_THROTTLE|
663 		SD_CONF_BSET_DISKSORT_DISABLED|
664 		SD_CONF_BSET_LUN_RESET_ENABLED,
665 		&pirus_properties },
666 	{ "SUN     StorageTek 6940", SD_CONF_BSET_THROTTLE |
667 		SD_CONF_BSET_NRR_COUNT|
668 		SD_CONF_BSET_BSY_RETRY_COUNT|
669 		SD_CONF_BSET_RST_RETRIES|
670 		SD_CONF_BSET_MIN_THROTTLE|
671 		SD_CONF_BSET_DISKSORT_DISABLED|
672 		SD_CONF_BSET_LUN_RESET_ENABLED,
673 		&pirus_properties },
674 	{ "SUN     PSX1000", SD_CONF_BSET_THROTTLE |
675 		SD_CONF_BSET_NRR_COUNT|
676 		SD_CONF_BSET_BSY_RETRY_COUNT|
677 		SD_CONF_BSET_RST_RETRIES|
678 		SD_CONF_BSET_MIN_THROTTLE|
679 		SD_CONF_BSET_DISKSORT_DISABLED|
680 		SD_CONF_BSET_LUN_RESET_ENABLED,
681 		&pirus_properties },
682 	{ "SUN     SE6330", SD_CONF_BSET_THROTTLE |
683 		SD_CONF_BSET_NRR_COUNT|
684 		SD_CONF_BSET_BSY_RETRY_COUNT|
685 		SD_CONF_BSET_RST_RETRIES|
686 		SD_CONF_BSET_MIN_THROTTLE|
687 		SD_CONF_BSET_DISKSORT_DISABLED|
688 		SD_CONF_BSET_LUN_RESET_ENABLED,
689 		&pirus_properties },
690 	{ "STK     OPENstorage", SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
691 	{ "STK     OpenStorage", SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
692 	{ "STK     BladeCtlr",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
693 	{ "STK     FLEXLINE",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
694 	{ "SYMBIOS", SD_CONF_BSET_NRR_COUNT, &symbios_properties },
695 #endif /* fibre or NON-sparc platforms */
696 #if ((defined(__sparc) && !defined(__fibre)) ||\
697 	(defined(__i386) || defined(__amd64)))
698 	{ "SEAGATE ST42400N", SD_CONF_BSET_THROTTLE, &elite_properties },
699 	{ "SEAGATE ST31200N", SD_CONF_BSET_THROTTLE, &st31200n_properties },
700 	{ "SEAGATE ST41600N", SD_CONF_BSET_TUR_CHECK, NULL },
701 	{ "CONNER  CP30540",  SD_CONF_BSET_NOCACHE,  NULL },
702 	{ "*SUN0104*", SD_CONF_BSET_FAB_DEVID, NULL },
703 	{ "*SUN0207*", SD_CONF_BSET_FAB_DEVID, NULL },
704 	{ "*SUN0327*", SD_CONF_BSET_FAB_DEVID, NULL },
705 	{ "*SUN0340*", SD_CONF_BSET_FAB_DEVID, NULL },
706 	{ "*SUN0424*", SD_CONF_BSET_FAB_DEVID, NULL },
707 	{ "*SUN0669*", SD_CONF_BSET_FAB_DEVID, NULL },
708 	{ "*SUN1.0G*", SD_CONF_BSET_FAB_DEVID, NULL },
709 	{ "SYMBIOS INF-01-00       ", SD_CONF_BSET_FAB_DEVID, NULL },
710 	{ "SYMBIOS", SD_CONF_BSET_THROTTLE|SD_CONF_BSET_NRR_COUNT,
711 	    &symbios_properties },
712 	{ "LSI", SD_CONF_BSET_THROTTLE | SD_CONF_BSET_NRR_COUNT,
713 	    &lsi_properties_scsi },
714 #if defined(__i386) || defined(__amd64)
715 	{ " NEC CD-ROM DRIVE:260 ", (SD_CONF_BSET_PLAYMSF_BCD
716 				    | SD_CONF_BSET_READSUB_BCD
717 				    | SD_CONF_BSET_READ_TOC_ADDR_BCD
718 				    | SD_CONF_BSET_NO_READ_HEADER
719 				    | SD_CONF_BSET_READ_CD_XD4), NULL },
720 
721 	{ " NEC CD-ROM DRIVE:270 ", (SD_CONF_BSET_PLAYMSF_BCD
722 				    | SD_CONF_BSET_READSUB_BCD
723 				    | SD_CONF_BSET_READ_TOC_ADDR_BCD
724 				    | SD_CONF_BSET_NO_READ_HEADER
725 				    | SD_CONF_BSET_READ_CD_XD4), NULL },
726 #endif /* __i386 || __amd64 */
727 #endif /* sparc NON-fibre or NON-sparc platforms */
728 
729 #if (defined(SD_PROP_TST))
730 	{ "VENDOR  PRODUCT ", (SD_CONF_BSET_THROTTLE
731 				| SD_CONF_BSET_CTYPE
732 				| SD_CONF_BSET_NRR_COUNT
733 				| SD_CONF_BSET_FAB_DEVID
734 				| SD_CONF_BSET_NOCACHE
735 				| SD_CONF_BSET_BSY_RETRY_COUNT
736 				| SD_CONF_BSET_PLAYMSF_BCD
737 				| SD_CONF_BSET_READSUB_BCD
738 				| SD_CONF_BSET_READ_TOC_TRK_BCD
739 				| SD_CONF_BSET_READ_TOC_ADDR_BCD
740 				| SD_CONF_BSET_NO_READ_HEADER
741 				| SD_CONF_BSET_READ_CD_XD4
742 				| SD_CONF_BSET_RST_RETRIES
743 				| SD_CONF_BSET_RSV_REL_TIME
744 				| SD_CONF_BSET_TUR_CHECK), &tst_properties},
745 #endif
746 };
747 
748 static const int sd_disk_table_size =
749 	sizeof (sd_disk_table)/ sizeof (sd_disk_config_t);
750 
751 
752 /*
753  * Return codes of sd_uselabel().
754  */
755 #define	SD_LABEL_IS_VALID		0
756 #define	SD_LABEL_IS_INVALID		1
757 
758 #define	SD_INTERCONNECT_PARALLEL	0
759 #define	SD_INTERCONNECT_FABRIC		1
760 #define	SD_INTERCONNECT_FIBRE		2
761 #define	SD_INTERCONNECT_SSA		3
762 #define	SD_INTERCONNECT_SATA		4
763 #define	SD_IS_PARALLEL_SCSI(un)		\
764 	((un)->un_interconnect_type == SD_INTERCONNECT_PARALLEL)
765 #define	SD_IS_SERIAL(un)		\
766 	((un)->un_interconnect_type == SD_INTERCONNECT_SATA)
767 
768 /*
769  * Definitions used by device id registration routines
770  */
771 #define	VPD_HEAD_OFFSET		3	/* size of head for vpd page */
772 #define	VPD_PAGE_LENGTH		3	/* offset for pge length data */
773 #define	VPD_MODE_PAGE		1	/* offset into vpd pg for "page code" */
774 #define	WD_NODE			7	/* the whole disk minor */
775 
776 static kmutex_t sd_sense_mutex = {0};
777 
778 /*
779  * Macros for updates of the driver state
780  */
781 #define	New_state(un, s)        \
782 	(un)->un_last_state = (un)->un_state, (un)->un_state = (s)
783 #define	Restore_state(un)	\
784 	{ uchar_t tmp = (un)->un_last_state; New_state((un), tmp); }
785 
786 static struct sd_cdbinfo sd_cdbtab[] = {
787 	{ CDB_GROUP0, 0x00,	   0x1FFFFF,   0xFF,	    },
788 	{ CDB_GROUP1, SCMD_GROUP1, 0xFFFFFFFF, 0xFFFF,	    },
789 	{ CDB_GROUP5, SCMD_GROUP5, 0xFFFFFFFF, 0xFFFFFFFF,  },
790 	{ CDB_GROUP4, SCMD_GROUP4, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFF, },
791 };
792 
793 /*
794  * Specifies the number of seconds that must have elapsed since the last
795  * cmd. has completed for a device to be declared idle to the PM framework.
796  */
797 static int sd_pm_idletime = 1;
798 
799 /*
800  * Internal function prototypes
801  */
802 
803 #if (defined(__fibre))
804 /*
805  * These #defines are to avoid namespace collisions that occur because this
806  * code is currently used to compile two seperate driver modules: sd and ssd.
807  * All function names need to be treated this way (even if declared static)
808  * in order to allow the debugger to resolve the names properly.
809  * It is anticipated that in the near future the ssd module will be obsoleted,
810  * at which time this ugliness should go away.
811  */
812 #define	sd_log_trace			ssd_log_trace
813 #define	sd_log_info			ssd_log_info
814 #define	sd_log_err			ssd_log_err
815 #define	sdprobe				ssdprobe
816 #define	sdinfo				ssdinfo
817 #define	sd_prop_op			ssd_prop_op
818 #define	sd_scsi_probe_cache_init	ssd_scsi_probe_cache_init
819 #define	sd_scsi_probe_cache_fini	ssd_scsi_probe_cache_fini
820 #define	sd_scsi_clear_probe_cache	ssd_scsi_clear_probe_cache
821 #define	sd_scsi_probe_with_cache	ssd_scsi_probe_with_cache
822 #define	sd_spin_up_unit			ssd_spin_up_unit
823 #define	sd_enable_descr_sense		ssd_enable_descr_sense
824 #define	sd_reenable_dsense_task		ssd_reenable_dsense_task
825 #define	sd_set_mmc_caps			ssd_set_mmc_caps
826 #define	sd_read_unit_properties		ssd_read_unit_properties
827 #define	sd_process_sdconf_file		ssd_process_sdconf_file
828 #define	sd_process_sdconf_table		ssd_process_sdconf_table
829 #define	sd_sdconf_id_match		ssd_sdconf_id_match
830 #define	sd_blank_cmp			ssd_blank_cmp
831 #define	sd_chk_vers1_data		ssd_chk_vers1_data
832 #define	sd_set_vers1_properties		ssd_set_vers1_properties
833 #define	sd_validate_geometry		ssd_validate_geometry
834 
835 #if defined(_SUNOS_VTOC_16)
836 #define	sd_convert_geometry		ssd_convert_geometry
837 #endif
838 
839 #define	sd_resync_geom_caches		ssd_resync_geom_caches
840 #define	sd_read_fdisk			ssd_read_fdisk
841 #define	sd_get_physical_geometry	ssd_get_physical_geometry
842 #define	sd_get_virtual_geometry		ssd_get_virtual_geometry
843 #define	sd_update_block_info		ssd_update_block_info
844 #define	sd_swap_efi_gpt			ssd_swap_efi_gpt
845 #define	sd_swap_efi_gpe			ssd_swap_efi_gpe
846 #define	sd_validate_efi			ssd_validate_efi
847 #define	sd_use_efi			ssd_use_efi
848 #define	sd_uselabel			ssd_uselabel
849 #define	sd_build_default_label		ssd_build_default_label
850 #define	sd_has_max_chs_vals		ssd_has_max_chs_vals
851 #define	sd_inq_fill			ssd_inq_fill
852 #define	sd_register_devid		ssd_register_devid
853 #define	sd_get_devid_block		ssd_get_devid_block
854 #define	sd_get_devid			ssd_get_devid
855 #define	sd_create_devid			ssd_create_devid
856 #define	sd_write_deviceid		ssd_write_deviceid
857 #define	sd_check_vpd_page_support	ssd_check_vpd_page_support
858 #define	sd_setup_pm			ssd_setup_pm
859 #define	sd_create_pm_components		ssd_create_pm_components
860 #define	sd_ddi_suspend			ssd_ddi_suspend
861 #define	sd_ddi_pm_suspend		ssd_ddi_pm_suspend
862 #define	sd_ddi_resume			ssd_ddi_resume
863 #define	sd_ddi_pm_resume		ssd_ddi_pm_resume
864 #define	sdpower				ssdpower
865 #define	sdattach			ssdattach
866 #define	sddetach			ssddetach
867 #define	sd_unit_attach			ssd_unit_attach
868 #define	sd_unit_detach			ssd_unit_detach
869 #define	sd_set_unit_attributes		ssd_set_unit_attributes
870 #define	sd_create_minor_nodes		ssd_create_minor_nodes
871 #define	sd_create_errstats		ssd_create_errstats
872 #define	sd_set_errstats			ssd_set_errstats
873 #define	sd_set_pstats			ssd_set_pstats
874 #define	sddump				ssddump
875 #define	sd_scsi_poll			ssd_scsi_poll
876 #define	sd_send_polled_RQS		ssd_send_polled_RQS
877 #define	sd_ddi_scsi_poll		ssd_ddi_scsi_poll
878 #define	sd_init_event_callbacks		ssd_init_event_callbacks
879 #define	sd_event_callback		ssd_event_callback
880 #define	sd_cache_control		ssd_cache_control
881 #define	sd_get_write_cache_enabled	ssd_get_write_cache_enabled
882 #define	sd_make_device			ssd_make_device
883 #define	sdopen				ssdopen
884 #define	sdclose				ssdclose
885 #define	sd_ready_and_valid		ssd_ready_and_valid
886 #define	sdmin				ssdmin
887 #define	sdread				ssdread
888 #define	sdwrite				ssdwrite
889 #define	sdaread				ssdaread
890 #define	sdawrite			ssdawrite
891 #define	sdstrategy			ssdstrategy
892 #define	sdioctl				ssdioctl
893 #define	sd_mapblockaddr_iostart		ssd_mapblockaddr_iostart
894 #define	sd_mapblocksize_iostart		ssd_mapblocksize_iostart
895 #define	sd_checksum_iostart		ssd_checksum_iostart
896 #define	sd_checksum_uscsi_iostart	ssd_checksum_uscsi_iostart
897 #define	sd_pm_iostart			ssd_pm_iostart
898 #define	sd_core_iostart			ssd_core_iostart
899 #define	sd_mapblockaddr_iodone		ssd_mapblockaddr_iodone
900 #define	sd_mapblocksize_iodone		ssd_mapblocksize_iodone
901 #define	sd_checksum_iodone		ssd_checksum_iodone
902 #define	sd_checksum_uscsi_iodone	ssd_checksum_uscsi_iodone
903 #define	sd_pm_iodone			ssd_pm_iodone
904 #define	sd_initpkt_for_buf		ssd_initpkt_for_buf
905 #define	sd_destroypkt_for_buf		ssd_destroypkt_for_buf
906 #define	sd_setup_rw_pkt			ssd_setup_rw_pkt
907 #define	sd_setup_next_rw_pkt		ssd_setup_next_rw_pkt
908 #define	sd_buf_iodone			ssd_buf_iodone
909 #define	sd_uscsi_strategy		ssd_uscsi_strategy
910 #define	sd_initpkt_for_uscsi		ssd_initpkt_for_uscsi
911 #define	sd_destroypkt_for_uscsi		ssd_destroypkt_for_uscsi
912 #define	sd_uscsi_iodone			ssd_uscsi_iodone
913 #define	sd_xbuf_strategy		ssd_xbuf_strategy
914 #define	sd_xbuf_init			ssd_xbuf_init
915 #define	sd_pm_entry			ssd_pm_entry
916 #define	sd_pm_exit			ssd_pm_exit
917 
918 #define	sd_pm_idletimeout_handler	ssd_pm_idletimeout_handler
919 #define	sd_pm_timeout_handler		ssd_pm_timeout_handler
920 
921 #define	sd_add_buf_to_waitq		ssd_add_buf_to_waitq
922 #define	sdintr				ssdintr
923 #define	sd_start_cmds			ssd_start_cmds
924 #define	sd_send_scsi_cmd		ssd_send_scsi_cmd
925 #define	sd_bioclone_alloc		ssd_bioclone_alloc
926 #define	sd_bioclone_free		ssd_bioclone_free
927 #define	sd_shadow_buf_alloc		ssd_shadow_buf_alloc
928 #define	sd_shadow_buf_free		ssd_shadow_buf_free
929 #define	sd_print_transport_rejected_message	\
930 					ssd_print_transport_rejected_message
931 #define	sd_retry_command		ssd_retry_command
932 #define	sd_set_retry_bp			ssd_set_retry_bp
933 #define	sd_send_request_sense_command	ssd_send_request_sense_command
934 #define	sd_start_retry_command		ssd_start_retry_command
935 #define	sd_start_direct_priority_command	\
936 					ssd_start_direct_priority_command
937 #define	sd_return_failed_command	ssd_return_failed_command
938 #define	sd_return_failed_command_no_restart	\
939 					ssd_return_failed_command_no_restart
940 #define	sd_return_command		ssd_return_command
941 #define	sd_sync_with_callback		ssd_sync_with_callback
942 #define	sdrunout			ssdrunout
943 #define	sd_mark_rqs_busy		ssd_mark_rqs_busy
944 #define	sd_mark_rqs_idle		ssd_mark_rqs_idle
945 #define	sd_reduce_throttle		ssd_reduce_throttle
946 #define	sd_restore_throttle		ssd_restore_throttle
947 #define	sd_print_incomplete_msg		ssd_print_incomplete_msg
948 #define	sd_init_cdb_limits		ssd_init_cdb_limits
949 #define	sd_pkt_status_good		ssd_pkt_status_good
950 #define	sd_pkt_status_check_condition	ssd_pkt_status_check_condition
951 #define	sd_pkt_status_busy		ssd_pkt_status_busy
952 #define	sd_pkt_status_reservation_conflict	\
953 					ssd_pkt_status_reservation_conflict
954 #define	sd_pkt_status_qfull		ssd_pkt_status_qfull
955 #define	sd_handle_request_sense		ssd_handle_request_sense
956 #define	sd_handle_auto_request_sense	ssd_handle_auto_request_sense
957 #define	sd_print_sense_failed_msg	ssd_print_sense_failed_msg
958 #define	sd_validate_sense_data		ssd_validate_sense_data
959 #define	sd_decode_sense			ssd_decode_sense
960 #define	sd_print_sense_msg		ssd_print_sense_msg
961 #define	sd_sense_key_no_sense		ssd_sense_key_no_sense
962 #define	sd_sense_key_recoverable_error	ssd_sense_key_recoverable_error
963 #define	sd_sense_key_not_ready		ssd_sense_key_not_ready
964 #define	sd_sense_key_medium_or_hardware_error	\
965 					ssd_sense_key_medium_or_hardware_error
966 #define	sd_sense_key_illegal_request	ssd_sense_key_illegal_request
967 #define	sd_sense_key_unit_attention	ssd_sense_key_unit_attention
968 #define	sd_sense_key_fail_command	ssd_sense_key_fail_command
969 #define	sd_sense_key_blank_check	ssd_sense_key_blank_check
970 #define	sd_sense_key_aborted_command	ssd_sense_key_aborted_command
971 #define	sd_sense_key_default		ssd_sense_key_default
972 #define	sd_print_retry_msg		ssd_print_retry_msg
973 #define	sd_print_cmd_incomplete_msg	ssd_print_cmd_incomplete_msg
974 #define	sd_pkt_reason_cmd_incomplete	ssd_pkt_reason_cmd_incomplete
975 #define	sd_pkt_reason_cmd_tran_err	ssd_pkt_reason_cmd_tran_err
976 #define	sd_pkt_reason_cmd_reset		ssd_pkt_reason_cmd_reset
977 #define	sd_pkt_reason_cmd_aborted	ssd_pkt_reason_cmd_aborted
978 #define	sd_pkt_reason_cmd_timeout	ssd_pkt_reason_cmd_timeout
979 #define	sd_pkt_reason_cmd_unx_bus_free	ssd_pkt_reason_cmd_unx_bus_free
980 #define	sd_pkt_reason_cmd_tag_reject	ssd_pkt_reason_cmd_tag_reject
981 #define	sd_pkt_reason_default		ssd_pkt_reason_default
982 #define	sd_reset_target			ssd_reset_target
983 #define	sd_start_stop_unit_callback	ssd_start_stop_unit_callback
984 #define	sd_start_stop_unit_task		ssd_start_stop_unit_task
985 #define	sd_taskq_create			ssd_taskq_create
986 #define	sd_taskq_delete			ssd_taskq_delete
987 #define	sd_media_change_task		ssd_media_change_task
988 #define	sd_handle_mchange		ssd_handle_mchange
989 #define	sd_send_scsi_DOORLOCK		ssd_send_scsi_DOORLOCK
990 #define	sd_send_scsi_READ_CAPACITY	ssd_send_scsi_READ_CAPACITY
991 #define	sd_send_scsi_READ_CAPACITY_16	ssd_send_scsi_READ_CAPACITY_16
992 #define	sd_send_scsi_GET_CONFIGURATION	ssd_send_scsi_GET_CONFIGURATION
993 #define	sd_send_scsi_feature_GET_CONFIGURATION	\
994 					sd_send_scsi_feature_GET_CONFIGURATION
995 #define	sd_send_scsi_START_STOP_UNIT	ssd_send_scsi_START_STOP_UNIT
996 #define	sd_send_scsi_INQUIRY		ssd_send_scsi_INQUIRY
997 #define	sd_send_scsi_TEST_UNIT_READY	ssd_send_scsi_TEST_UNIT_READY
998 #define	sd_send_scsi_PERSISTENT_RESERVE_IN	\
999 					ssd_send_scsi_PERSISTENT_RESERVE_IN
1000 #define	sd_send_scsi_PERSISTENT_RESERVE_OUT	\
1001 					ssd_send_scsi_PERSISTENT_RESERVE_OUT
1002 #define	sd_send_scsi_SYNCHRONIZE_CACHE	ssd_send_scsi_SYNCHRONIZE_CACHE
1003 #define	sd_send_scsi_SYNCHRONIZE_CACHE_biodone	\
1004 					ssd_send_scsi_SYNCHRONIZE_CACHE_biodone
1005 #define	sd_send_scsi_MODE_SENSE		ssd_send_scsi_MODE_SENSE
1006 #define	sd_send_scsi_MODE_SELECT	ssd_send_scsi_MODE_SELECT
1007 #define	sd_send_scsi_RDWR		ssd_send_scsi_RDWR
1008 #define	sd_send_scsi_LOG_SENSE		ssd_send_scsi_LOG_SENSE
1009 #define	sd_alloc_rqs			ssd_alloc_rqs
1010 #define	sd_free_rqs			ssd_free_rqs
1011 #define	sd_dump_memory			ssd_dump_memory
1012 #define	sd_uscsi_ioctl			ssd_uscsi_ioctl
1013 #define	sd_get_media_info		ssd_get_media_info
1014 #define	sd_dkio_ctrl_info		ssd_dkio_ctrl_info
1015 #define	sd_dkio_get_geometry		ssd_dkio_get_geometry
1016 #define	sd_dkio_set_geometry		ssd_dkio_set_geometry
1017 #define	sd_dkio_get_partition		ssd_dkio_get_partition
1018 #define	sd_dkio_set_partition		ssd_dkio_set_partition
1019 #define	sd_dkio_partition		ssd_dkio_partition
1020 #define	sd_dkio_get_vtoc		ssd_dkio_get_vtoc
1021 #define	sd_dkio_get_efi			ssd_dkio_get_efi
1022 #define	sd_build_user_vtoc		ssd_build_user_vtoc
1023 #define	sd_dkio_set_vtoc		ssd_dkio_set_vtoc
1024 #define	sd_dkio_set_efi			ssd_dkio_set_efi
1025 #define	sd_build_label_vtoc		ssd_build_label_vtoc
1026 #define	sd_write_label			ssd_write_label
1027 #define	sd_clear_vtoc			ssd_clear_vtoc
1028 #define	sd_clear_efi			ssd_clear_efi
1029 #define	sd_get_tunables_from_conf	ssd_get_tunables_from_conf
1030 #define	sd_setup_next_xfer		ssd_setup_next_xfer
1031 #define	sd_dkio_get_temp		ssd_dkio_get_temp
1032 #define	sd_dkio_get_mboot		ssd_dkio_get_mboot
1033 #define	sd_dkio_set_mboot		ssd_dkio_set_mboot
1034 #define	sd_setup_default_geometry	ssd_setup_default_geometry
1035 #define	sd_update_fdisk_and_vtoc	ssd_update_fdisk_and_vtoc
1036 #define	sd_check_mhd			ssd_check_mhd
1037 #define	sd_mhd_watch_cb			ssd_mhd_watch_cb
1038 #define	sd_mhd_watch_incomplete		ssd_mhd_watch_incomplete
1039 #define	sd_sname			ssd_sname
1040 #define	sd_mhd_resvd_recover		ssd_mhd_resvd_recover
1041 #define	sd_resv_reclaim_thread		ssd_resv_reclaim_thread
1042 #define	sd_take_ownership		ssd_take_ownership
1043 #define	sd_reserve_release		ssd_reserve_release
1044 #define	sd_rmv_resv_reclaim_req		ssd_rmv_resv_reclaim_req
1045 #define	sd_mhd_reset_notify_cb		ssd_mhd_reset_notify_cb
1046 #define	sd_persistent_reservation_in_read_keys	\
1047 					ssd_persistent_reservation_in_read_keys
1048 #define	sd_persistent_reservation_in_read_resv	\
1049 					ssd_persistent_reservation_in_read_resv
1050 #define	sd_mhdioc_takeown		ssd_mhdioc_takeown
1051 #define	sd_mhdioc_failfast		ssd_mhdioc_failfast
1052 #define	sd_mhdioc_release		ssd_mhdioc_release
1053 #define	sd_mhdioc_register_devid	ssd_mhdioc_register_devid
1054 #define	sd_mhdioc_inkeys		ssd_mhdioc_inkeys
1055 #define	sd_mhdioc_inresv		ssd_mhdioc_inresv
1056 #define	sr_change_blkmode		ssr_change_blkmode
1057 #define	sr_change_speed			ssr_change_speed
1058 #define	sr_atapi_change_speed		ssr_atapi_change_speed
1059 #define	sr_pause_resume			ssr_pause_resume
1060 #define	sr_play_msf			ssr_play_msf
1061 #define	sr_play_trkind			ssr_play_trkind
1062 #define	sr_read_all_subcodes		ssr_read_all_subcodes
1063 #define	sr_read_subchannel		ssr_read_subchannel
1064 #define	sr_read_tocentry		ssr_read_tocentry
1065 #define	sr_read_tochdr			ssr_read_tochdr
1066 #define	sr_read_cdda			ssr_read_cdda
1067 #define	sr_read_cdxa			ssr_read_cdxa
1068 #define	sr_read_mode1			ssr_read_mode1
1069 #define	sr_read_mode2			ssr_read_mode2
1070 #define	sr_read_cd_mode2		ssr_read_cd_mode2
1071 #define	sr_sector_mode			ssr_sector_mode
1072 #define	sr_eject			ssr_eject
1073 #define	sr_ejected			ssr_ejected
1074 #define	sr_check_wp			ssr_check_wp
1075 #define	sd_check_media			ssd_check_media
1076 #define	sd_media_watch_cb		ssd_media_watch_cb
1077 #define	sd_delayed_cv_broadcast		ssd_delayed_cv_broadcast
1078 #define	sr_volume_ctrl			ssr_volume_ctrl
1079 #define	sr_read_sony_session_offset	ssr_read_sony_session_offset
1080 #define	sd_log_page_supported		ssd_log_page_supported
1081 #define	sd_check_for_writable_cd	ssd_check_for_writable_cd
1082 #define	sd_wm_cache_constructor		ssd_wm_cache_constructor
1083 #define	sd_wm_cache_destructor		ssd_wm_cache_destructor
1084 #define	sd_range_lock			ssd_range_lock
1085 #define	sd_get_range			ssd_get_range
1086 #define	sd_free_inlist_wmap		ssd_free_inlist_wmap
1087 #define	sd_range_unlock			ssd_range_unlock
1088 #define	sd_read_modify_write_task	ssd_read_modify_write_task
1089 #define	sddump_do_read_of_rmw		ssddump_do_read_of_rmw
1090 
1091 #define	sd_iostart_chain		ssd_iostart_chain
1092 #define	sd_iodone_chain			ssd_iodone_chain
1093 #define	sd_initpkt_map			ssd_initpkt_map
1094 #define	sd_destroypkt_map		ssd_destroypkt_map
1095 #define	sd_chain_type_map		ssd_chain_type_map
1096 #define	sd_chain_index_map		ssd_chain_index_map
1097 
1098 #define	sd_failfast_flushctl		ssd_failfast_flushctl
1099 #define	sd_failfast_flushq		ssd_failfast_flushq
1100 #define	sd_failfast_flushq_callback	ssd_failfast_flushq_callback
1101 
1102 #define	sd_is_lsi			ssd_is_lsi
1103 
1104 #endif	/* #if (defined(__fibre)) */
1105 
1106 
1107 int _init(void);
1108 int _fini(void);
1109 int _info(struct modinfo *modinfop);
1110 
1111 /*PRINTFLIKE3*/
1112 static void sd_log_trace(uint_t comp, struct sd_lun *un, const char *fmt, ...);
1113 /*PRINTFLIKE3*/
1114 static void sd_log_info(uint_t comp, struct sd_lun *un, const char *fmt, ...);
1115 /*PRINTFLIKE3*/
1116 static void sd_log_err(uint_t comp, struct sd_lun *un, const char *fmt, ...);
1117 
1118 static int sdprobe(dev_info_t *devi);
1119 static int sdinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg,
1120     void **result);
1121 static int sd_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op,
1122     int mod_flags, char *name, caddr_t valuep, int *lengthp);
1123 
1124 /*
1125  * Smart probe for parallel scsi
1126  */
1127 static void sd_scsi_probe_cache_init(void);
1128 static void sd_scsi_probe_cache_fini(void);
1129 static void sd_scsi_clear_probe_cache(void);
1130 static int  sd_scsi_probe_with_cache(struct scsi_device *devp, int (*fn)());
1131 
1132 static int	sd_spin_up_unit(struct sd_lun *un);
1133 #ifdef _LP64
1134 static void	sd_enable_descr_sense(struct sd_lun *un);
1135 static void	sd_reenable_dsense_task(void *arg);
1136 #endif /* _LP64 */
1137 
1138 static void	sd_set_mmc_caps(struct sd_lun *un);
1139 
1140 static void sd_read_unit_properties(struct sd_lun *un);
1141 static int  sd_process_sdconf_file(struct sd_lun *un);
1142 static void sd_get_tunables_from_conf(struct sd_lun *un, int flags,
1143     int *data_list, sd_tunables *values);
1144 static void sd_process_sdconf_table(struct sd_lun *un);
1145 static int  sd_sdconf_id_match(struct sd_lun *un, char *id, int idlen);
1146 static int  sd_blank_cmp(struct sd_lun *un, char *id, int idlen);
1147 static int  sd_chk_vers1_data(struct sd_lun *un, int flags, int *prop_list,
1148 	int list_len, char *dataname_ptr);
1149 static void sd_set_vers1_properties(struct sd_lun *un, int flags,
1150     sd_tunables *prop_list);
1151 static int  sd_validate_geometry(struct sd_lun *un, int path_flag);
1152 
1153 #if defined(_SUNOS_VTOC_16)
1154 static void sd_convert_geometry(uint64_t capacity, struct dk_geom *un_g);
1155 #endif
1156 
1157 static void sd_resync_geom_caches(struct sd_lun *un, int capacity, int lbasize,
1158 	int path_flag);
1159 static int  sd_read_fdisk(struct sd_lun *un, uint_t capacity, int lbasize,
1160 	int path_flag);
1161 static void sd_get_physical_geometry(struct sd_lun *un,
1162 	struct geom_cache *pgeom_p, int capacity, int lbasize, int path_flag);
1163 static void sd_get_virtual_geometry(struct sd_lun *un, int capacity,
1164 	int lbasize);
1165 static int  sd_uselabel(struct sd_lun *un, struct dk_label *l, int path_flag);
1166 static void sd_swap_efi_gpt(efi_gpt_t *);
1167 static void sd_swap_efi_gpe(int nparts, efi_gpe_t *);
1168 static int sd_validate_efi(efi_gpt_t *);
1169 static int sd_use_efi(struct sd_lun *, int);
1170 static void sd_build_default_label(struct sd_lun *un);
1171 
1172 #if defined(_FIRMWARE_NEEDS_FDISK)
1173 static int  sd_has_max_chs_vals(struct ipart *fdp);
1174 #endif
1175 static void sd_inq_fill(char *p, int l, char *s);
1176 
1177 
1178 static void sd_register_devid(struct sd_lun *un, dev_info_t *devi,
1179     int reservation_flag);
1180 static daddr_t  sd_get_devid_block(struct sd_lun *un);
1181 static int  sd_get_devid(struct sd_lun *un);
1182 static int  sd_get_serialnum(struct sd_lun *un, uchar_t *wwn, int *len);
1183 static ddi_devid_t sd_create_devid(struct sd_lun *un);
1184 static int  sd_write_deviceid(struct sd_lun *un);
1185 static int  sd_get_devid_page(struct sd_lun *un, uchar_t *wwn, int *len);
1186 static int  sd_check_vpd_page_support(struct sd_lun *un);
1187 
1188 static void sd_setup_pm(struct sd_lun *un, dev_info_t *devi);
1189 static void sd_create_pm_components(dev_info_t *devi, struct sd_lun *un);
1190 
1191 static int  sd_ddi_suspend(dev_info_t *devi);
1192 static int  sd_ddi_pm_suspend(struct sd_lun *un);
1193 static int  sd_ddi_resume(dev_info_t *devi);
1194 static int  sd_ddi_pm_resume(struct sd_lun *un);
1195 static int  sdpower(dev_info_t *devi, int component, int level);
1196 
1197 static int  sdattach(dev_info_t *devi, ddi_attach_cmd_t cmd);
1198 static int  sddetach(dev_info_t *devi, ddi_detach_cmd_t cmd);
1199 static int  sd_unit_attach(dev_info_t *devi);
1200 static int  sd_unit_detach(dev_info_t *devi);
1201 
1202 static void sd_set_unit_attributes(struct sd_lun *un, dev_info_t *devi);
1203 static int  sd_create_minor_nodes(struct sd_lun *un, dev_info_t *devi);
1204 static void sd_create_errstats(struct sd_lun *un, int instance);
1205 static void sd_set_errstats(struct sd_lun *un);
1206 static void sd_set_pstats(struct sd_lun *un);
1207 
1208 static int  sddump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk);
1209 static int  sd_scsi_poll(struct sd_lun *un, struct scsi_pkt *pkt);
1210 static int  sd_send_polled_RQS(struct sd_lun *un);
1211 static int  sd_ddi_scsi_poll(struct scsi_pkt *pkt);
1212 
1213 #if (defined(__fibre))
1214 /*
1215  * Event callbacks (photon)
1216  */
1217 static void sd_init_event_callbacks(struct sd_lun *un);
1218 static void  sd_event_callback(dev_info_t *, ddi_eventcookie_t, void *, void *);
1219 #endif
1220 
1221 /*
1222  * Defines for sd_cache_control
1223  */
1224 
1225 #define	SD_CACHE_ENABLE		1
1226 #define	SD_CACHE_DISABLE	0
1227 #define	SD_CACHE_NOCHANGE	-1
1228 
1229 static int   sd_cache_control(struct sd_lun *un, int rcd_flag, int wce_flag);
1230 static int   sd_get_write_cache_enabled(struct sd_lun *un, int *is_enabled);
1231 static dev_t sd_make_device(dev_info_t *devi);
1232 
1233 static void  sd_update_block_info(struct sd_lun *un, uint32_t lbasize,
1234 	uint64_t capacity);
1235 
1236 /*
1237  * Driver entry point functions.
1238  */
1239 static int  sdopen(dev_t *dev_p, int flag, int otyp, cred_t *cred_p);
1240 static int  sdclose(dev_t dev, int flag, int otyp, cred_t *cred_p);
1241 static int  sd_ready_and_valid(struct sd_lun *un);
1242 
1243 static void sdmin(struct buf *bp);
1244 static int sdread(dev_t dev, struct uio *uio, cred_t *cred_p);
1245 static int sdwrite(dev_t dev, struct uio *uio, cred_t *cred_p);
1246 static int sdaread(dev_t dev, struct aio_req *aio, cred_t *cred_p);
1247 static int sdawrite(dev_t dev, struct aio_req *aio, cred_t *cred_p);
1248 
1249 static int sdstrategy(struct buf *bp);
1250 static int sdioctl(dev_t, int, intptr_t, int, cred_t *, int *);
1251 
1252 /*
1253  * Function prototypes for layering functions in the iostart chain.
1254  */
1255 static void sd_mapblockaddr_iostart(int index, struct sd_lun *un,
1256 	struct buf *bp);
1257 static void sd_mapblocksize_iostart(int index, struct sd_lun *un,
1258 	struct buf *bp);
1259 static void sd_checksum_iostart(int index, struct sd_lun *un, struct buf *bp);
1260 static void sd_checksum_uscsi_iostart(int index, struct sd_lun *un,
1261 	struct buf *bp);
1262 static void sd_pm_iostart(int index, struct sd_lun *un, struct buf *bp);
1263 static void sd_core_iostart(int index, struct sd_lun *un, struct buf *bp);
1264 
1265 /*
1266  * Function prototypes for layering functions in the iodone chain.
1267  */
1268 static void sd_buf_iodone(int index, struct sd_lun *un, struct buf *bp);
1269 static void sd_uscsi_iodone(int index, struct sd_lun *un, struct buf *bp);
1270 static void sd_mapblockaddr_iodone(int index, struct sd_lun *un,
1271 	struct buf *bp);
1272 static void sd_mapblocksize_iodone(int index, struct sd_lun *un,
1273 	struct buf *bp);
1274 static void sd_checksum_iodone(int index, struct sd_lun *un, struct buf *bp);
1275 static void sd_checksum_uscsi_iodone(int index, struct sd_lun *un,
1276 	struct buf *bp);
1277 static void sd_pm_iodone(int index, struct sd_lun *un, struct buf *bp);
1278 
1279 /*
1280  * Prototypes for functions to support buf(9S) based IO.
1281  */
1282 static void sd_xbuf_strategy(struct buf *bp, ddi_xbuf_t xp, void *arg);
1283 static int sd_initpkt_for_buf(struct buf *, struct scsi_pkt **);
1284 static void sd_destroypkt_for_buf(struct buf *);
1285 static int sd_setup_rw_pkt(struct sd_lun *un, struct scsi_pkt **pktpp,
1286 	struct buf *bp, int flags,
1287 	int (*callback)(caddr_t), caddr_t callback_arg,
1288 	diskaddr_t lba, uint32_t blockcount);
1289 #if defined(__i386) || defined(__amd64)
1290 static int sd_setup_next_rw_pkt(struct sd_lun *un, struct scsi_pkt *pktp,
1291 	struct buf *bp, diskaddr_t lba, uint32_t blockcount);
1292 #endif /* defined(__i386) || defined(__amd64) */
1293 
1294 /*
1295  * Prototypes for functions to support USCSI IO.
1296  */
1297 static int sd_uscsi_strategy(struct buf *bp);
1298 static int sd_initpkt_for_uscsi(struct buf *, struct scsi_pkt **);
1299 static void sd_destroypkt_for_uscsi(struct buf *);
1300 
1301 static void sd_xbuf_init(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
1302 	uchar_t chain_type, void *pktinfop);
1303 
1304 static int  sd_pm_entry(struct sd_lun *un);
1305 static void sd_pm_exit(struct sd_lun *un);
1306 
1307 static void sd_pm_idletimeout_handler(void *arg);
1308 
1309 /*
1310  * sd_core internal functions (used at the sd_core_io layer).
1311  */
1312 static void sd_add_buf_to_waitq(struct sd_lun *un, struct buf *bp);
1313 static void sdintr(struct scsi_pkt *pktp);
1314 static void sd_start_cmds(struct sd_lun *un, struct buf *immed_bp);
1315 
1316 static int sd_send_scsi_cmd(dev_t dev, struct uscsi_cmd *incmd,
1317 	enum uio_seg cdbspace, enum uio_seg dataspace, enum uio_seg rqbufspace,
1318 	int path_flag);
1319 
1320 static struct buf *sd_bioclone_alloc(struct buf *bp, size_t datalen,
1321 	daddr_t blkno, int (*func)(struct buf *));
1322 static struct buf *sd_shadow_buf_alloc(struct buf *bp, size_t datalen,
1323 	uint_t bflags, daddr_t blkno, int (*func)(struct buf *));
1324 static void sd_bioclone_free(struct buf *bp);
1325 static void sd_shadow_buf_free(struct buf *bp);
1326 
1327 static void sd_print_transport_rejected_message(struct sd_lun *un,
1328 	struct sd_xbuf *xp, int code);
1329 static void sd_print_incomplete_msg(struct sd_lun *un, struct buf *bp,
1330     void *arg, int code);
1331 static void sd_print_sense_failed_msg(struct sd_lun *un, struct buf *bp,
1332     void *arg, int code);
1333 static void sd_print_cmd_incomplete_msg(struct sd_lun *un, struct buf *bp,
1334     void *arg, int code);
1335 
1336 static void sd_retry_command(struct sd_lun *un, struct buf *bp,
1337 	int retry_check_flag,
1338 	void (*user_funcp)(struct sd_lun *un, struct buf *bp, void *argp,
1339 		int c),
1340 	void *user_arg, int failure_code,  clock_t retry_delay,
1341 	void (*statp)(kstat_io_t *));
1342 
1343 static void sd_set_retry_bp(struct sd_lun *un, struct buf *bp,
1344 	clock_t retry_delay, void (*statp)(kstat_io_t *));
1345 
1346 static void sd_send_request_sense_command(struct sd_lun *un, struct buf *bp,
1347 	struct scsi_pkt *pktp);
1348 static void sd_start_retry_command(void *arg);
1349 static void sd_start_direct_priority_command(void *arg);
1350 static void sd_return_failed_command(struct sd_lun *un, struct buf *bp,
1351 	int errcode);
1352 static void sd_return_failed_command_no_restart(struct sd_lun *un,
1353 	struct buf *bp, int errcode);
1354 static void sd_return_command(struct sd_lun *un, struct buf *bp);
1355 static void sd_sync_with_callback(struct sd_lun *un);
1356 static int sdrunout(caddr_t arg);
1357 
1358 static void sd_mark_rqs_busy(struct sd_lun *un, struct buf *bp);
1359 static struct buf *sd_mark_rqs_idle(struct sd_lun *un, struct sd_xbuf *xp);
1360 
1361 static void sd_reduce_throttle(struct sd_lun *un, int throttle_type);
1362 static void sd_restore_throttle(void *arg);
1363 
1364 static void sd_init_cdb_limits(struct sd_lun *un);
1365 
1366 static void sd_pkt_status_good(struct sd_lun *un, struct buf *bp,
1367 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1368 
1369 /*
1370  * Error handling functions
1371  */
1372 static void sd_pkt_status_check_condition(struct sd_lun *un, struct buf *bp,
1373 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1374 static void sd_pkt_status_busy(struct sd_lun *un, struct buf *bp,
1375 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1376 static void sd_pkt_status_reservation_conflict(struct sd_lun *un,
1377 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1378 static void sd_pkt_status_qfull(struct sd_lun *un, struct buf *bp,
1379 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1380 
1381 static void sd_handle_request_sense(struct sd_lun *un, struct buf *bp,
1382 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1383 static void sd_handle_auto_request_sense(struct sd_lun *un, struct buf *bp,
1384 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1385 static int sd_validate_sense_data(struct sd_lun *un, struct buf *bp,
1386 	struct sd_xbuf *xp);
1387 static void sd_decode_sense(struct sd_lun *un, struct buf *bp,
1388 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1389 
1390 static void sd_print_sense_msg(struct sd_lun *un, struct buf *bp,
1391 	void *arg, int code);
1392 
1393 static void sd_sense_key_no_sense(struct sd_lun *un, struct buf *bp,
1394 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1395 static void sd_sense_key_recoverable_error(struct sd_lun *un,
1396 	uint8_t *sense_datap,
1397 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1398 static void sd_sense_key_not_ready(struct sd_lun *un,
1399 	uint8_t *sense_datap,
1400 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1401 static void sd_sense_key_medium_or_hardware_error(struct sd_lun *un,
1402 	uint8_t *sense_datap,
1403 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1404 static void sd_sense_key_illegal_request(struct sd_lun *un, struct buf *bp,
1405 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1406 static void sd_sense_key_unit_attention(struct sd_lun *un,
1407 	uint8_t *sense_datap,
1408 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1409 static void sd_sense_key_fail_command(struct sd_lun *un, struct buf *bp,
1410 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1411 static void sd_sense_key_blank_check(struct sd_lun *un, struct buf *bp,
1412 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1413 static void sd_sense_key_aborted_command(struct sd_lun *un, struct buf *bp,
1414 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1415 static void sd_sense_key_default(struct sd_lun *un,
1416 	uint8_t *sense_datap,
1417 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1418 
1419 static void sd_print_retry_msg(struct sd_lun *un, struct buf *bp,
1420 	void *arg, int flag);
1421 
1422 static void sd_pkt_reason_cmd_incomplete(struct sd_lun *un, struct buf *bp,
1423 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1424 static void sd_pkt_reason_cmd_tran_err(struct sd_lun *un, struct buf *bp,
1425 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1426 static void sd_pkt_reason_cmd_reset(struct sd_lun *un, struct buf *bp,
1427 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1428 static void sd_pkt_reason_cmd_aborted(struct sd_lun *un, struct buf *bp,
1429 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1430 static void sd_pkt_reason_cmd_timeout(struct sd_lun *un, struct buf *bp,
1431 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1432 static void sd_pkt_reason_cmd_unx_bus_free(struct sd_lun *un, struct buf *bp,
1433 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1434 static void sd_pkt_reason_cmd_tag_reject(struct sd_lun *un, struct buf *bp,
1435 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1436 static void sd_pkt_reason_default(struct sd_lun *un, struct buf *bp,
1437 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1438 
1439 static void sd_reset_target(struct sd_lun *un, struct scsi_pkt *pktp);
1440 
1441 static void sd_start_stop_unit_callback(void *arg);
1442 static void sd_start_stop_unit_task(void *arg);
1443 
1444 static void sd_taskq_create(void);
1445 static void sd_taskq_delete(void);
1446 static void sd_media_change_task(void *arg);
1447 
1448 static int sd_handle_mchange(struct sd_lun *un);
1449 static int sd_send_scsi_DOORLOCK(struct sd_lun *un, int flag, int path_flag);
1450 static int sd_send_scsi_READ_CAPACITY(struct sd_lun *un, uint64_t *capp,
1451 	uint32_t *lbap, int path_flag);
1452 static int sd_send_scsi_READ_CAPACITY_16(struct sd_lun *un, uint64_t *capp,
1453 	uint32_t *lbap, int path_flag);
1454 static int sd_send_scsi_START_STOP_UNIT(struct sd_lun *un, int flag,
1455 	int path_flag);
1456 static int sd_send_scsi_INQUIRY(struct sd_lun *un, uchar_t *bufaddr,
1457 	size_t buflen, uchar_t evpd, uchar_t page_code, size_t *residp);
1458 static int sd_send_scsi_TEST_UNIT_READY(struct sd_lun *un, int flag);
1459 static int sd_send_scsi_PERSISTENT_RESERVE_IN(struct sd_lun *un,
1460 	uchar_t usr_cmd, uint16_t data_len, uchar_t *data_bufp);
1461 static int sd_send_scsi_PERSISTENT_RESERVE_OUT(struct sd_lun *un,
1462 	uchar_t usr_cmd, uchar_t *usr_bufp);
1463 static int sd_send_scsi_SYNCHRONIZE_CACHE(struct sd_lun *un,
1464 	struct dk_callback *dkc);
1465 static int sd_send_scsi_SYNCHRONIZE_CACHE_biodone(struct buf *bp);
1466 static int sd_send_scsi_GET_CONFIGURATION(struct sd_lun *un,
1467 	struct uscsi_cmd *ucmdbuf, uchar_t *rqbuf, uint_t rqbuflen,
1468 	uchar_t *bufaddr, uint_t buflen);
1469 static int sd_send_scsi_feature_GET_CONFIGURATION(struct sd_lun *un,
1470 	struct uscsi_cmd *ucmdbuf, uchar_t *rqbuf, uint_t rqbuflen,
1471 	uchar_t *bufaddr, uint_t buflen, char feature);
1472 static int sd_send_scsi_MODE_SENSE(struct sd_lun *un, int cdbsize,
1473 	uchar_t *bufaddr, size_t buflen, uchar_t page_code, int path_flag);
1474 static int sd_send_scsi_MODE_SELECT(struct sd_lun *un, int cdbsize,
1475 	uchar_t *bufaddr, size_t buflen, uchar_t save_page, int path_flag);
1476 static int sd_send_scsi_RDWR(struct sd_lun *un, uchar_t cmd, void *bufaddr,
1477 	size_t buflen, daddr_t start_block, int path_flag);
1478 #define	sd_send_scsi_READ(un, bufaddr, buflen, start_block, path_flag)	\
1479 	sd_send_scsi_RDWR(un, SCMD_READ, bufaddr, buflen, start_block, \
1480 	path_flag)
1481 #define	sd_send_scsi_WRITE(un, bufaddr, buflen, start_block, path_flag)	\
1482 	sd_send_scsi_RDWR(un, SCMD_WRITE, bufaddr, buflen, start_block,\
1483 	path_flag)
1484 
1485 static int sd_send_scsi_LOG_SENSE(struct sd_lun *un, uchar_t *bufaddr,
1486 	uint16_t buflen, uchar_t page_code, uchar_t page_control,
1487 	uint16_t param_ptr, int path_flag);
1488 
1489 static int  sd_alloc_rqs(struct scsi_device *devp, struct sd_lun *un);
1490 static void sd_free_rqs(struct sd_lun *un);
1491 
1492 static void sd_dump_memory(struct sd_lun *un, uint_t comp, char *title,
1493 	uchar_t *data, int len, int fmt);
1494 static void sd_panic_for_res_conflict(struct sd_lun *un);
1495 
1496 /*
1497  * Disk Ioctl Function Prototypes
1498  */
1499 static int sd_uscsi_ioctl(dev_t dev, caddr_t arg, int flag);
1500 static int sd_get_media_info(dev_t dev, caddr_t arg, int flag);
1501 static int sd_dkio_ctrl_info(dev_t dev, caddr_t arg, int flag);
1502 static int sd_dkio_get_geometry(dev_t dev, caddr_t arg, int flag,
1503 	int geom_validated);
1504 static int sd_dkio_set_geometry(dev_t dev, caddr_t arg, int flag);
1505 static int sd_dkio_get_partition(dev_t dev, caddr_t arg, int flag,
1506 	int geom_validated);
1507 static int sd_dkio_set_partition(dev_t dev, caddr_t arg, int flag);
1508 static int sd_dkio_get_vtoc(dev_t dev, caddr_t arg, int flag,
1509 	int geom_validated);
1510 static int sd_dkio_get_efi(dev_t dev, caddr_t arg, int flag);
1511 static int sd_dkio_partition(dev_t dev, caddr_t arg, int flag);
1512 static void sd_build_user_vtoc(struct sd_lun *un, struct vtoc *user_vtoc);
1513 static int sd_dkio_set_vtoc(dev_t dev, caddr_t arg, int flag);
1514 static int sd_dkio_set_efi(dev_t dev, caddr_t arg, int flag);
1515 static int sd_build_label_vtoc(struct sd_lun *un, struct vtoc *user_vtoc);
1516 static int sd_write_label(dev_t dev);
1517 static int sd_set_vtoc(struct sd_lun *un, struct dk_label *dkl);
1518 static void sd_clear_vtoc(struct sd_lun *un);
1519 static void sd_clear_efi(struct sd_lun *un);
1520 static int sd_dkio_get_temp(dev_t dev, caddr_t arg, int flag);
1521 static int sd_dkio_get_mboot(dev_t dev, caddr_t arg, int flag);
1522 static int sd_dkio_set_mboot(dev_t dev, caddr_t arg, int flag);
1523 static void sd_setup_default_geometry(struct sd_lun *un);
1524 #if defined(__i386) || defined(__amd64)
1525 static int sd_update_fdisk_and_vtoc(struct sd_lun *un);
1526 #endif
1527 
1528 /*
1529  * Multi-host Ioctl Prototypes
1530  */
1531 static int sd_check_mhd(dev_t dev, int interval);
1532 static int sd_mhd_watch_cb(caddr_t arg, struct scsi_watch_result *resultp);
1533 static void sd_mhd_watch_incomplete(struct sd_lun *un, struct scsi_pkt *pkt);
1534 static char *sd_sname(uchar_t status);
1535 static void sd_mhd_resvd_recover(void *arg);
1536 static void sd_resv_reclaim_thread();
1537 static int sd_take_ownership(dev_t dev, struct mhioctkown *p);
1538 static int sd_reserve_release(dev_t dev, int cmd);
1539 static void sd_rmv_resv_reclaim_req(dev_t dev);
1540 static void sd_mhd_reset_notify_cb(caddr_t arg);
1541 static int sd_persistent_reservation_in_read_keys(struct sd_lun *un,
1542 	mhioc_inkeys_t *usrp, int flag);
1543 static int sd_persistent_reservation_in_read_resv(struct sd_lun *un,
1544 	mhioc_inresvs_t *usrp, int flag);
1545 static int sd_mhdioc_takeown(dev_t dev, caddr_t arg, int flag);
1546 static int sd_mhdioc_failfast(dev_t dev, caddr_t arg, int flag);
1547 static int sd_mhdioc_release(dev_t dev);
1548 static int sd_mhdioc_register_devid(dev_t dev);
1549 static int sd_mhdioc_inkeys(dev_t dev, caddr_t arg, int flag);
1550 static int sd_mhdioc_inresv(dev_t dev, caddr_t arg, int flag);
1551 
1552 /*
1553  * SCSI removable prototypes
1554  */
1555 static int sr_change_blkmode(dev_t dev, int cmd, intptr_t data, int flag);
1556 static int sr_change_speed(dev_t dev, int cmd, intptr_t data, int flag);
1557 static int sr_atapi_change_speed(dev_t dev, int cmd, intptr_t data, int flag);
1558 static int sr_pause_resume(dev_t dev, int mode);
1559 static int sr_play_msf(dev_t dev, caddr_t data, int flag);
1560 static int sr_play_trkind(dev_t dev, caddr_t data, int flag);
1561 static int sr_read_all_subcodes(dev_t dev, caddr_t data, int flag);
1562 static int sr_read_subchannel(dev_t dev, caddr_t data, int flag);
1563 static int sr_read_tocentry(dev_t dev, caddr_t data, int flag);
1564 static int sr_read_tochdr(dev_t dev, caddr_t data, int flag);
1565 static int sr_read_cdda(dev_t dev, caddr_t data, int flag);
1566 static int sr_read_cdxa(dev_t dev, caddr_t data, int flag);
1567 static int sr_read_mode1(dev_t dev, caddr_t data, int flag);
1568 static int sr_read_mode2(dev_t dev, caddr_t data, int flag);
1569 static int sr_read_cd_mode2(dev_t dev, caddr_t data, int flag);
1570 static int sr_sector_mode(dev_t dev, uint32_t blksize);
1571 static int sr_eject(dev_t dev);
1572 static void sr_ejected(register struct sd_lun *un);
1573 static int sr_check_wp(dev_t dev);
1574 static int sd_check_media(dev_t dev, enum dkio_state state);
1575 static int sd_media_watch_cb(caddr_t arg, struct scsi_watch_result *resultp);
1576 static void sd_delayed_cv_broadcast(void *arg);
1577 static int sr_volume_ctrl(dev_t dev, caddr_t data, int flag);
1578 static int sr_read_sony_session_offset(dev_t dev, caddr_t data, int flag);
1579 
1580 static int sd_log_page_supported(struct sd_lun *un, int log_page);
1581 
1582 /*
1583  * Function Prototype for the non-512 support (DVDRAM, MO etc.) functions.
1584  */
1585 static void sd_check_for_writable_cd(struct sd_lun *un);
1586 static int sd_wm_cache_constructor(void *wm, void *un, int flags);
1587 static void sd_wm_cache_destructor(void *wm, void *un);
1588 static struct sd_w_map *sd_range_lock(struct sd_lun *un, daddr_t startb,
1589 	daddr_t endb, ushort_t typ);
1590 static struct sd_w_map *sd_get_range(struct sd_lun *un, daddr_t startb,
1591 	daddr_t endb);
1592 static void sd_free_inlist_wmap(struct sd_lun *un, struct sd_w_map *wmp);
1593 static void sd_range_unlock(struct sd_lun *un, struct sd_w_map *wm);
1594 static void sd_read_modify_write_task(void * arg);
1595 static int
1596 sddump_do_read_of_rmw(struct sd_lun *un, uint64_t blkno, uint64_t nblk,
1597 	struct buf **bpp);
1598 
1599 
1600 /*
1601  * Function prototypes for failfast support.
1602  */
1603 static void sd_failfast_flushq(struct sd_lun *un);
1604 static int sd_failfast_flushq_callback(struct buf *bp);
1605 
1606 /*
1607  * Function prototypes to check for lsi devices
1608  */
1609 static void sd_is_lsi(struct sd_lun *un);
1610 
1611 /*
1612  * Function prototypes for x86 support
1613  */
1614 #if defined(__i386) || defined(__amd64)
1615 static int sd_setup_next_xfer(struct sd_lun *un, struct buf *bp,
1616 		struct scsi_pkt *pkt, struct sd_xbuf *xp);
1617 #endif
1618 
1619 /*
1620  * Constants for failfast support:
1621  *
1622  * SD_FAILFAST_INACTIVE: Instance is currently in a normal state, with NO
1623  * failfast processing being performed.
1624  *
1625  * SD_FAILFAST_ACTIVE: Instance is in the failfast state and is performing
1626  * failfast processing on all bufs with B_FAILFAST set.
1627  */
1628 
1629 #define	SD_FAILFAST_INACTIVE		0
1630 #define	SD_FAILFAST_ACTIVE		1
1631 
1632 /*
1633  * Bitmask to control behavior of buf(9S) flushes when a transition to
1634  * the failfast state occurs. Optional bits include:
1635  *
1636  * SD_FAILFAST_FLUSH_ALL_BUFS: When set, flush ALL bufs including those that
1637  * do NOT have B_FAILFAST set. When clear, only bufs with B_FAILFAST will
1638  * be flushed.
1639  *
1640  * SD_FAILFAST_FLUSH_ALL_QUEUES: When set, flush any/all other queues in the
1641  * driver, in addition to the regular wait queue. This includes the xbuf
1642  * queues. When clear, only the driver's wait queue will be flushed.
1643  */
1644 #define	SD_FAILFAST_FLUSH_ALL_BUFS	0x01
1645 #define	SD_FAILFAST_FLUSH_ALL_QUEUES	0x02
1646 
1647 /*
1648  * The default behavior is to only flush bufs that have B_FAILFAST set, but
1649  * to flush all queues within the driver.
1650  */
1651 static int sd_failfast_flushctl = SD_FAILFAST_FLUSH_ALL_QUEUES;
1652 
1653 
1654 /*
1655  * SD Testing Fault Injection
1656  */
1657 #ifdef SD_FAULT_INJECTION
1658 static void sd_faultinjection_ioctl(int cmd, intptr_t arg, struct sd_lun *un);
1659 static void sd_faultinjection(struct scsi_pkt *pktp);
1660 static void sd_injection_log(char *buf, struct sd_lun *un);
1661 #endif
1662 
1663 /*
1664  * Device driver ops vector
1665  */
1666 static struct cb_ops sd_cb_ops = {
1667 	sdopen,			/* open */
1668 	sdclose,		/* close */
1669 	sdstrategy,		/* strategy */
1670 	nodev,			/* print */
1671 	sddump,			/* dump */
1672 	sdread,			/* read */
1673 	sdwrite,		/* write */
1674 	sdioctl,		/* ioctl */
1675 	nodev,			/* devmap */
1676 	nodev,			/* mmap */
1677 	nodev,			/* segmap */
1678 	nochpoll,		/* poll */
1679 	sd_prop_op,		/* cb_prop_op */
1680 	0,			/* streamtab  */
1681 	D_64BIT | D_MP | D_NEW | D_HOTPLUG, /* Driver compatibility flags */
1682 	CB_REV,			/* cb_rev */
1683 	sdaread, 		/* async I/O read entry point */
1684 	sdawrite		/* async I/O write entry point */
1685 };
1686 
1687 static struct dev_ops sd_ops = {
1688 	DEVO_REV,		/* devo_rev, */
1689 	0,			/* refcnt  */
1690 	sdinfo,			/* info */
1691 	nulldev,		/* identify */
1692 	sdprobe,		/* probe */
1693 	sdattach,		/* attach */
1694 	sddetach,		/* detach */
1695 	nodev,			/* reset */
1696 	&sd_cb_ops,		/* driver operations */
1697 	NULL,			/* bus operations */
1698 	sdpower			/* power */
1699 };
1700 
1701 
1702 /*
1703  * This is the loadable module wrapper.
1704  */
1705 #include <sys/modctl.h>
1706 
1707 static struct modldrv modldrv = {
1708 	&mod_driverops,		/* Type of module. This one is a driver */
1709 	SD_MODULE_NAME,		/* Module name. */
1710 	&sd_ops			/* driver ops */
1711 };
1712 
1713 
1714 static struct modlinkage modlinkage = {
1715 	MODREV_1,
1716 	&modldrv,
1717 	NULL
1718 };
1719 
1720 
1721 static struct scsi_asq_key_strings sd_additional_codes[] = {
1722 	0x81, 0, "Logical Unit is Reserved",
1723 	0x85, 0, "Audio Address Not Valid",
1724 	0xb6, 0, "Media Load Mechanism Failed",
1725 	0xB9, 0, "Audio Play Operation Aborted",
1726 	0xbf, 0, "Buffer Overflow for Read All Subcodes Command",
1727 	0x53, 2, "Medium removal prevented",
1728 	0x6f, 0, "Authentication failed during key exchange",
1729 	0x6f, 1, "Key not present",
1730 	0x6f, 2, "Key not established",
1731 	0x6f, 3, "Read without proper authentication",
1732 	0x6f, 4, "Mismatched region to this logical unit",
1733 	0x6f, 5, "Region reset count error",
1734 	0xffff, 0x0, NULL
1735 };
1736 
1737 
1738 /*
1739  * Struct for passing printing information for sense data messages
1740  */
1741 struct sd_sense_info {
1742 	int	ssi_severity;
1743 	int	ssi_pfa_flag;
1744 };
1745 
1746 /*
1747  * Table of function pointers for iostart-side routines. Seperate "chains"
1748  * of layered function calls are formed by placing the function pointers
1749  * sequentially in the desired order. Functions are called according to an
1750  * incrementing table index ordering. The last function in each chain must
1751  * be sd_core_iostart(). The corresponding iodone-side routines are expected
1752  * in the sd_iodone_chain[] array.
1753  *
1754  * Note: It may seem more natural to organize both the iostart and iodone
1755  * functions together, into an array of structures (or some similar
1756  * organization) with a common index, rather than two seperate arrays which
1757  * must be maintained in synchronization. The purpose of this division is
1758  * to achiece improved performance: individual arrays allows for more
1759  * effective cache line utilization on certain platforms.
1760  */
1761 
1762 typedef void (*sd_chain_t)(int index, struct sd_lun *un, struct buf *bp);
1763 
1764 
1765 static sd_chain_t sd_iostart_chain[] = {
1766 
1767 	/* Chain for buf IO for disk drive targets (PM enabled) */
1768 	sd_mapblockaddr_iostart,	/* Index: 0 */
1769 	sd_pm_iostart,			/* Index: 1 */
1770 	sd_core_iostart,		/* Index: 2 */
1771 
1772 	/* Chain for buf IO for disk drive targets (PM disabled) */
1773 	sd_mapblockaddr_iostart,	/* Index: 3 */
1774 	sd_core_iostart,		/* Index: 4 */
1775 
1776 	/* Chain for buf IO for removable-media targets (PM enabled) */
1777 	sd_mapblockaddr_iostart,	/* Index: 5 */
1778 	sd_mapblocksize_iostart,	/* Index: 6 */
1779 	sd_pm_iostart,			/* Index: 7 */
1780 	sd_core_iostart,		/* Index: 8 */
1781 
1782 	/* Chain for buf IO for removable-media targets (PM disabled) */
1783 	sd_mapblockaddr_iostart,	/* Index: 9 */
1784 	sd_mapblocksize_iostart,	/* Index: 10 */
1785 	sd_core_iostart,		/* Index: 11 */
1786 
1787 	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
1788 	sd_mapblockaddr_iostart,	/* Index: 12 */
1789 	sd_checksum_iostart,		/* Index: 13 */
1790 	sd_pm_iostart,			/* Index: 14 */
1791 	sd_core_iostart,		/* Index: 15 */
1792 
1793 	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
1794 	sd_mapblockaddr_iostart,	/* Index: 16 */
1795 	sd_checksum_iostart,		/* Index: 17 */
1796 	sd_core_iostart,		/* Index: 18 */
1797 
1798 	/* Chain for USCSI commands (all targets) */
1799 	sd_pm_iostart,			/* Index: 19 */
1800 	sd_core_iostart,		/* Index: 20 */
1801 
1802 	/* Chain for checksumming USCSI commands (all targets) */
1803 	sd_checksum_uscsi_iostart,	/* Index: 21 */
1804 	sd_pm_iostart,			/* Index: 22 */
1805 	sd_core_iostart,		/* Index: 23 */
1806 
1807 	/* Chain for "direct" USCSI commands (all targets) */
1808 	sd_core_iostart,		/* Index: 24 */
1809 
1810 	/* Chain for "direct priority" USCSI commands (all targets) */
1811 	sd_core_iostart,		/* Index: 25 */
1812 };
1813 
1814 /*
1815  * Macros to locate the first function of each iostart chain in the
1816  * sd_iostart_chain[] array. These are located by the index in the array.
1817  */
1818 #define	SD_CHAIN_DISK_IOSTART			0
1819 #define	SD_CHAIN_DISK_IOSTART_NO_PM		3
1820 #define	SD_CHAIN_RMMEDIA_IOSTART		5
1821 #define	SD_CHAIN_RMMEDIA_IOSTART_NO_PM		9
1822 #define	SD_CHAIN_CHKSUM_IOSTART			12
1823 #define	SD_CHAIN_CHKSUM_IOSTART_NO_PM		16
1824 #define	SD_CHAIN_USCSI_CMD_IOSTART		19
1825 #define	SD_CHAIN_USCSI_CHKSUM_IOSTART		21
1826 #define	SD_CHAIN_DIRECT_CMD_IOSTART		24
1827 #define	SD_CHAIN_PRIORITY_CMD_IOSTART		25
1828 
1829 
1830 /*
1831  * Table of function pointers for the iodone-side routines for the driver-
1832  * internal layering mechanism.  The calling sequence for iodone routines
1833  * uses a decrementing table index, so the last routine called in a chain
1834  * must be at the lowest array index location for that chain.  The last
1835  * routine for each chain must be either sd_buf_iodone() (for buf(9S) IOs)
1836  * or sd_uscsi_iodone() (for uscsi IOs).  Other than this, the ordering
1837  * of the functions in an iodone side chain must correspond to the ordering
1838  * of the iostart routines for that chain.  Note that there is no iodone
1839  * side routine that corresponds to sd_core_iostart(), so there is no
1840  * entry in the table for this.
1841  */
1842 
1843 static sd_chain_t sd_iodone_chain[] = {
1844 
1845 	/* Chain for buf IO for disk drive targets (PM enabled) */
1846 	sd_buf_iodone,			/* Index: 0 */
1847 	sd_mapblockaddr_iodone,		/* Index: 1 */
1848 	sd_pm_iodone,			/* Index: 2 */
1849 
1850 	/* Chain for buf IO for disk drive targets (PM disabled) */
1851 	sd_buf_iodone,			/* Index: 3 */
1852 	sd_mapblockaddr_iodone,		/* Index: 4 */
1853 
1854 	/* Chain for buf IO for removable-media targets (PM enabled) */
1855 	sd_buf_iodone,			/* Index: 5 */
1856 	sd_mapblockaddr_iodone,		/* Index: 6 */
1857 	sd_mapblocksize_iodone,		/* Index: 7 */
1858 	sd_pm_iodone,			/* Index: 8 */
1859 
1860 	/* Chain for buf IO for removable-media targets (PM disabled) */
1861 	sd_buf_iodone,			/* Index: 9 */
1862 	sd_mapblockaddr_iodone,		/* Index: 10 */
1863 	sd_mapblocksize_iodone,		/* Index: 11 */
1864 
1865 	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
1866 	sd_buf_iodone,			/* Index: 12 */
1867 	sd_mapblockaddr_iodone,		/* Index: 13 */
1868 	sd_checksum_iodone,		/* Index: 14 */
1869 	sd_pm_iodone,			/* Index: 15 */
1870 
1871 	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
1872 	sd_buf_iodone,			/* Index: 16 */
1873 	sd_mapblockaddr_iodone,		/* Index: 17 */
1874 	sd_checksum_iodone,		/* Index: 18 */
1875 
1876 	/* Chain for USCSI commands (non-checksum targets) */
1877 	sd_uscsi_iodone,		/* Index: 19 */
1878 	sd_pm_iodone,			/* Index: 20 */
1879 
1880 	/* Chain for USCSI commands (checksum targets) */
1881 	sd_uscsi_iodone,		/* Index: 21 */
1882 	sd_checksum_uscsi_iodone,	/* Index: 22 */
1883 	sd_pm_iodone,			/* Index: 22 */
1884 
1885 	/* Chain for "direct" USCSI commands (all targets) */
1886 	sd_uscsi_iodone,		/* Index: 24 */
1887 
1888 	/* Chain for "direct priority" USCSI commands (all targets) */
1889 	sd_uscsi_iodone,		/* Index: 25 */
1890 };
1891 
1892 
1893 /*
1894  * Macros to locate the "first" function in the sd_iodone_chain[] array for
1895  * each iodone-side chain. These are located by the array index, but as the
1896  * iodone side functions are called in a decrementing-index order, the
1897  * highest index number in each chain must be specified (as these correspond
1898  * to the first function in the iodone chain that will be called by the core
1899  * at IO completion time).
1900  */
1901 
1902 #define	SD_CHAIN_DISK_IODONE			2
1903 #define	SD_CHAIN_DISK_IODONE_NO_PM		4
1904 #define	SD_CHAIN_RMMEDIA_IODONE			8
1905 #define	SD_CHAIN_RMMEDIA_IODONE_NO_PM		11
1906 #define	SD_CHAIN_CHKSUM_IODONE			15
1907 #define	SD_CHAIN_CHKSUM_IODONE_NO_PM		18
1908 #define	SD_CHAIN_USCSI_CMD_IODONE		20
1909 #define	SD_CHAIN_USCSI_CHKSUM_IODONE		22
1910 #define	SD_CHAIN_DIRECT_CMD_IODONE		24
1911 #define	SD_CHAIN_PRIORITY_CMD_IODONE		25
1912 
1913 
1914 
1915 
1916 /*
1917  * Array to map a layering chain index to the appropriate initpkt routine.
1918  * The redundant entries are present so that the index used for accessing
1919  * the above sd_iostart_chain and sd_iodone_chain tables can be used directly
1920  * with this table as well.
1921  */
1922 typedef int (*sd_initpkt_t)(struct buf *, struct scsi_pkt **);
1923 
1924 static sd_initpkt_t	sd_initpkt_map[] = {
1925 
1926 	/* Chain for buf IO for disk drive targets (PM enabled) */
1927 	sd_initpkt_for_buf,		/* Index: 0 */
1928 	sd_initpkt_for_buf,		/* Index: 1 */
1929 	sd_initpkt_for_buf,		/* Index: 2 */
1930 
1931 	/* Chain for buf IO for disk drive targets (PM disabled) */
1932 	sd_initpkt_for_buf,		/* Index: 3 */
1933 	sd_initpkt_for_buf,		/* Index: 4 */
1934 
1935 	/* Chain for buf IO for removable-media targets (PM enabled) */
1936 	sd_initpkt_for_buf,		/* Index: 5 */
1937 	sd_initpkt_for_buf,		/* Index: 6 */
1938 	sd_initpkt_for_buf,		/* Index: 7 */
1939 	sd_initpkt_for_buf,		/* Index: 8 */
1940 
1941 	/* Chain for buf IO for removable-media targets (PM disabled) */
1942 	sd_initpkt_for_buf,		/* Index: 9 */
1943 	sd_initpkt_for_buf,		/* Index: 10 */
1944 	sd_initpkt_for_buf,		/* Index: 11 */
1945 
1946 	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
1947 	sd_initpkt_for_buf,		/* Index: 12 */
1948 	sd_initpkt_for_buf,		/* Index: 13 */
1949 	sd_initpkt_for_buf,		/* Index: 14 */
1950 	sd_initpkt_for_buf,		/* Index: 15 */
1951 
1952 	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
1953 	sd_initpkt_for_buf,		/* Index: 16 */
1954 	sd_initpkt_for_buf,		/* Index: 17 */
1955 	sd_initpkt_for_buf,		/* Index: 18 */
1956 
1957 	/* Chain for USCSI commands (non-checksum targets) */
1958 	sd_initpkt_for_uscsi,		/* Index: 19 */
1959 	sd_initpkt_for_uscsi,		/* Index: 20 */
1960 
1961 	/* Chain for USCSI commands (checksum targets) */
1962 	sd_initpkt_for_uscsi,		/* Index: 21 */
1963 	sd_initpkt_for_uscsi,		/* Index: 22 */
1964 	sd_initpkt_for_uscsi,		/* Index: 22 */
1965 
1966 	/* Chain for "direct" USCSI commands (all targets) */
1967 	sd_initpkt_for_uscsi,		/* Index: 24 */
1968 
1969 	/* Chain for "direct priority" USCSI commands (all targets) */
1970 	sd_initpkt_for_uscsi,		/* Index: 25 */
1971 
1972 };
1973 
1974 
1975 /*
1976  * Array to map a layering chain index to the appropriate destroypktpkt routine.
1977  * The redundant entries are present so that the index used for accessing
1978  * the above sd_iostart_chain and sd_iodone_chain tables can be used directly
1979  * with this table as well.
1980  */
1981 typedef void (*sd_destroypkt_t)(struct buf *);
1982 
1983 static sd_destroypkt_t	sd_destroypkt_map[] = {
1984 
1985 	/* Chain for buf IO for disk drive targets (PM enabled) */
1986 	sd_destroypkt_for_buf,		/* Index: 0 */
1987 	sd_destroypkt_for_buf,		/* Index: 1 */
1988 	sd_destroypkt_for_buf,		/* Index: 2 */
1989 
1990 	/* Chain for buf IO for disk drive targets (PM disabled) */
1991 	sd_destroypkt_for_buf,		/* Index: 3 */
1992 	sd_destroypkt_for_buf,		/* Index: 4 */
1993 
1994 	/* Chain for buf IO for removable-media targets (PM enabled) */
1995 	sd_destroypkt_for_buf,		/* Index: 5 */
1996 	sd_destroypkt_for_buf,		/* Index: 6 */
1997 	sd_destroypkt_for_buf,		/* Index: 7 */
1998 	sd_destroypkt_for_buf,		/* Index: 8 */
1999 
2000 	/* Chain for buf IO for removable-media targets (PM disabled) */
2001 	sd_destroypkt_for_buf,		/* Index: 9 */
2002 	sd_destroypkt_for_buf,		/* Index: 10 */
2003 	sd_destroypkt_for_buf,		/* Index: 11 */
2004 
2005 	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
2006 	sd_destroypkt_for_buf,		/* Index: 12 */
2007 	sd_destroypkt_for_buf,		/* Index: 13 */
2008 	sd_destroypkt_for_buf,		/* Index: 14 */
2009 	sd_destroypkt_for_buf,		/* Index: 15 */
2010 
2011 	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
2012 	sd_destroypkt_for_buf,		/* Index: 16 */
2013 	sd_destroypkt_for_buf,		/* Index: 17 */
2014 	sd_destroypkt_for_buf,		/* Index: 18 */
2015 
2016 	/* Chain for USCSI commands (non-checksum targets) */
2017 	sd_destroypkt_for_uscsi,	/* Index: 19 */
2018 	sd_destroypkt_for_uscsi,	/* Index: 20 */
2019 
2020 	/* Chain for USCSI commands (checksum targets) */
2021 	sd_destroypkt_for_uscsi,	/* Index: 21 */
2022 	sd_destroypkt_for_uscsi,	/* Index: 22 */
2023 	sd_destroypkt_for_uscsi,	/* Index: 22 */
2024 
2025 	/* Chain for "direct" USCSI commands (all targets) */
2026 	sd_destroypkt_for_uscsi,	/* Index: 24 */
2027 
2028 	/* Chain for "direct priority" USCSI commands (all targets) */
2029 	sd_destroypkt_for_uscsi,	/* Index: 25 */
2030 
2031 };
2032 
2033 
2034 
2035 /*
2036  * Array to map a layering chain index to the appropriate chain "type".
2037  * The chain type indicates a specific property/usage of the chain.
2038  * The redundant entries are present so that the index used for accessing
2039  * the above sd_iostart_chain and sd_iodone_chain tables can be used directly
2040  * with this table as well.
2041  */
2042 
2043 #define	SD_CHAIN_NULL			0	/* for the special RQS cmd */
2044 #define	SD_CHAIN_BUFIO			1	/* regular buf IO */
2045 #define	SD_CHAIN_USCSI			2	/* regular USCSI commands */
2046 #define	SD_CHAIN_DIRECT			3	/* uscsi, w/ bypass power mgt */
2047 #define	SD_CHAIN_DIRECT_PRIORITY	4	/* uscsi, w/ bypass power mgt */
2048 						/* (for error recovery) */
2049 
2050 static int sd_chain_type_map[] = {
2051 
2052 	/* Chain for buf IO for disk drive targets (PM enabled) */
2053 	SD_CHAIN_BUFIO,			/* Index: 0 */
2054 	SD_CHAIN_BUFIO,			/* Index: 1 */
2055 	SD_CHAIN_BUFIO,			/* Index: 2 */
2056 
2057 	/* Chain for buf IO for disk drive targets (PM disabled) */
2058 	SD_CHAIN_BUFIO,			/* Index: 3 */
2059 	SD_CHAIN_BUFIO,			/* Index: 4 */
2060 
2061 	/* Chain for buf IO for removable-media targets (PM enabled) */
2062 	SD_CHAIN_BUFIO,			/* Index: 5 */
2063 	SD_CHAIN_BUFIO,			/* Index: 6 */
2064 	SD_CHAIN_BUFIO,			/* Index: 7 */
2065 	SD_CHAIN_BUFIO,			/* Index: 8 */
2066 
2067 	/* Chain for buf IO for removable-media targets (PM disabled) */
2068 	SD_CHAIN_BUFIO,			/* Index: 9 */
2069 	SD_CHAIN_BUFIO,			/* Index: 10 */
2070 	SD_CHAIN_BUFIO,			/* Index: 11 */
2071 
2072 	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
2073 	SD_CHAIN_BUFIO,			/* Index: 12 */
2074 	SD_CHAIN_BUFIO,			/* Index: 13 */
2075 	SD_CHAIN_BUFIO,			/* Index: 14 */
2076 	SD_CHAIN_BUFIO,			/* Index: 15 */
2077 
2078 	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
2079 	SD_CHAIN_BUFIO,			/* Index: 16 */
2080 	SD_CHAIN_BUFIO,			/* Index: 17 */
2081 	SD_CHAIN_BUFIO,			/* Index: 18 */
2082 
2083 	/* Chain for USCSI commands (non-checksum targets) */
2084 	SD_CHAIN_USCSI,			/* Index: 19 */
2085 	SD_CHAIN_USCSI,			/* Index: 20 */
2086 
2087 	/* Chain for USCSI commands (checksum targets) */
2088 	SD_CHAIN_USCSI,			/* Index: 21 */
2089 	SD_CHAIN_USCSI,			/* Index: 22 */
2090 	SD_CHAIN_USCSI,			/* Index: 22 */
2091 
2092 	/* Chain for "direct" USCSI commands (all targets) */
2093 	SD_CHAIN_DIRECT,		/* Index: 24 */
2094 
2095 	/* Chain for "direct priority" USCSI commands (all targets) */
2096 	SD_CHAIN_DIRECT_PRIORITY,	/* Index: 25 */
2097 };
2098 
2099 
2100 /* Macro to return TRUE if the IO has come from the sd_buf_iostart() chain. */
2101 #define	SD_IS_BUFIO(xp)			\
2102 	(sd_chain_type_map[(xp)->xb_chain_iostart] == SD_CHAIN_BUFIO)
2103 
2104 /* Macro to return TRUE if the IO has come from the "direct priority" chain. */
2105 #define	SD_IS_DIRECT_PRIORITY(xp)	\
2106 	(sd_chain_type_map[(xp)->xb_chain_iostart] == SD_CHAIN_DIRECT_PRIORITY)
2107 
2108 
2109 
2110 /*
2111  * Struct, array, and macros to map a specific chain to the appropriate
2112  * layering indexes in the sd_iostart_chain[] and sd_iodone_chain[] arrays.
2113  *
2114  * The sd_chain_index_map[] array is used at attach time to set the various
2115  * un_xxx_chain type members of the sd_lun softstate to the specific layering
2116  * chain to be used with the instance. This allows different instances to use
2117  * different chain for buf IO, uscsi IO, etc.. Also, since the xb_chain_iostart
2118  * and xb_chain_iodone index values in the sd_xbuf are initialized to these
2119  * values at sd_xbuf init time, this allows (1) layering chains may be changed
2120  * dynamically & without the use of locking; and (2) a layer may update the
2121  * xb_chain_io[start|done] member in a given xbuf with its current index value,
2122  * to allow for deferred processing of an IO within the same chain from a
2123  * different execution context.
2124  */
2125 
2126 struct sd_chain_index {
2127 	int	sci_iostart_index;
2128 	int	sci_iodone_index;
2129 };
2130 
2131 static struct sd_chain_index	sd_chain_index_map[] = {
2132 	{ SD_CHAIN_DISK_IOSTART,		SD_CHAIN_DISK_IODONE },
2133 	{ SD_CHAIN_DISK_IOSTART_NO_PM,		SD_CHAIN_DISK_IODONE_NO_PM },
2134 	{ SD_CHAIN_RMMEDIA_IOSTART,		SD_CHAIN_RMMEDIA_IODONE },
2135 	{ SD_CHAIN_RMMEDIA_IOSTART_NO_PM,	SD_CHAIN_RMMEDIA_IODONE_NO_PM },
2136 	{ SD_CHAIN_CHKSUM_IOSTART,		SD_CHAIN_CHKSUM_IODONE },
2137 	{ SD_CHAIN_CHKSUM_IOSTART_NO_PM,	SD_CHAIN_CHKSUM_IODONE_NO_PM },
2138 	{ SD_CHAIN_USCSI_CMD_IOSTART,		SD_CHAIN_USCSI_CMD_IODONE },
2139 	{ SD_CHAIN_USCSI_CHKSUM_IOSTART,	SD_CHAIN_USCSI_CHKSUM_IODONE },
2140 	{ SD_CHAIN_DIRECT_CMD_IOSTART,		SD_CHAIN_DIRECT_CMD_IODONE },
2141 	{ SD_CHAIN_PRIORITY_CMD_IOSTART,	SD_CHAIN_PRIORITY_CMD_IODONE },
2142 };
2143 
2144 
2145 /*
2146  * The following are indexes into the sd_chain_index_map[] array.
2147  */
2148 
2149 /* un->un_buf_chain_type must be set to one of these */
2150 #define	SD_CHAIN_INFO_DISK		0
2151 #define	SD_CHAIN_INFO_DISK_NO_PM	1
2152 #define	SD_CHAIN_INFO_RMMEDIA		2
2153 #define	SD_CHAIN_INFO_RMMEDIA_NO_PM	3
2154 #define	SD_CHAIN_INFO_CHKSUM		4
2155 #define	SD_CHAIN_INFO_CHKSUM_NO_PM	5
2156 
2157 /* un->un_uscsi_chain_type must be set to one of these */
2158 #define	SD_CHAIN_INFO_USCSI_CMD		6
2159 /* USCSI with PM disabled is the same as DIRECT */
2160 #define	SD_CHAIN_INFO_USCSI_CMD_NO_PM	8
2161 #define	SD_CHAIN_INFO_USCSI_CHKSUM	7
2162 
2163 /* un->un_direct_chain_type must be set to one of these */
2164 #define	SD_CHAIN_INFO_DIRECT_CMD	8
2165 
2166 /* un->un_priority_chain_type must be set to one of these */
2167 #define	SD_CHAIN_INFO_PRIORITY_CMD	9
2168 
2169 /* size for devid inquiries */
2170 #define	MAX_INQUIRY_SIZE		0xF0
2171 
2172 /*
2173  * Macros used by functions to pass a given buf(9S) struct along to the
2174  * next function in the layering chain for further processing.
2175  *
2176  * In the following macros, passing more than three arguments to the called
2177  * routines causes the optimizer for the SPARC compiler to stop doing tail
2178  * call elimination which results in significant performance degradation.
2179  */
2180 #define	SD_BEGIN_IOSTART(index, un, bp)	\
2181 	((*(sd_iostart_chain[index]))(index, un, bp))
2182 
2183 #define	SD_BEGIN_IODONE(index, un, bp)	\
2184 	((*(sd_iodone_chain[index]))(index, un, bp))
2185 
2186 #define	SD_NEXT_IOSTART(index, un, bp)				\
2187 	((*(sd_iostart_chain[(index) + 1]))((index) + 1, un, bp))
2188 
2189 #define	SD_NEXT_IODONE(index, un, bp)				\
2190 	((*(sd_iodone_chain[(index) - 1]))((index) - 1, un, bp))
2191 
2192 /*
2193  *    Function: _init
2194  *
2195  * Description: This is the driver _init(9E) entry point.
2196  *
2197  * Return Code: Returns the value from mod_install(9F) or
2198  *		ddi_soft_state_init(9F) as appropriate.
2199  *
2200  *     Context: Called when driver module loaded.
2201  */
2202 
2203 int
2204 _init(void)
2205 {
2206 	int	err;
2207 
2208 	/* establish driver name from module name */
2209 	sd_label = mod_modname(&modlinkage);
2210 
2211 	err = ddi_soft_state_init(&sd_state, sizeof (struct sd_lun),
2212 		SD_MAXUNIT);
2213 
2214 	if (err != 0) {
2215 		return (err);
2216 	}
2217 
2218 	mutex_init(&sd_detach_mutex, NULL, MUTEX_DRIVER, NULL);
2219 	mutex_init(&sd_log_mutex,    NULL, MUTEX_DRIVER, NULL);
2220 	mutex_init(&sd_label_mutex,  NULL, MUTEX_DRIVER, NULL);
2221 
2222 	mutex_init(&sd_tr.srq_resv_reclaim_mutex, NULL, MUTEX_DRIVER, NULL);
2223 	cv_init(&sd_tr.srq_resv_reclaim_cv, NULL, CV_DRIVER, NULL);
2224 	cv_init(&sd_tr.srq_inprocess_cv, NULL, CV_DRIVER, NULL);
2225 
2226 	/*
2227 	 * it's ok to init here even for fibre device
2228 	 */
2229 	sd_scsi_probe_cache_init();
2230 
2231 	/*
2232 	 * Creating taskq before mod_install ensures that all callers (threads)
2233 	 * that enter the module after a successfull mod_install encounter
2234 	 * a valid taskq.
2235 	 */
2236 	sd_taskq_create();
2237 
2238 	err = mod_install(&modlinkage);
2239 	if (err != 0) {
2240 		/* delete taskq if install fails */
2241 		sd_taskq_delete();
2242 
2243 		mutex_destroy(&sd_detach_mutex);
2244 		mutex_destroy(&sd_log_mutex);
2245 		mutex_destroy(&sd_label_mutex);
2246 
2247 		mutex_destroy(&sd_tr.srq_resv_reclaim_mutex);
2248 		cv_destroy(&sd_tr.srq_resv_reclaim_cv);
2249 		cv_destroy(&sd_tr.srq_inprocess_cv);
2250 
2251 		sd_scsi_probe_cache_fini();
2252 
2253 		ddi_soft_state_fini(&sd_state);
2254 		return (err);
2255 	}
2256 
2257 	return (err);
2258 }
2259 
2260 
2261 /*
2262  *    Function: _fini
2263  *
2264  * Description: This is the driver _fini(9E) entry point.
2265  *
2266  * Return Code: Returns the value from mod_remove(9F)
2267  *
2268  *     Context: Called when driver module is unloaded.
2269  */
2270 
2271 int
2272 _fini(void)
2273 {
2274 	int err;
2275 
2276 	if ((err = mod_remove(&modlinkage)) != 0) {
2277 		return (err);
2278 	}
2279 
2280 	sd_taskq_delete();
2281 
2282 	mutex_destroy(&sd_detach_mutex);
2283 	mutex_destroy(&sd_log_mutex);
2284 	mutex_destroy(&sd_label_mutex);
2285 	mutex_destroy(&sd_tr.srq_resv_reclaim_mutex);
2286 
2287 	sd_scsi_probe_cache_fini();
2288 
2289 	cv_destroy(&sd_tr.srq_resv_reclaim_cv);
2290 	cv_destroy(&sd_tr.srq_inprocess_cv);
2291 
2292 	ddi_soft_state_fini(&sd_state);
2293 
2294 	return (err);
2295 }
2296 
2297 
2298 /*
2299  *    Function: _info
2300  *
2301  * Description: This is the driver _info(9E) entry point.
2302  *
2303  *   Arguments: modinfop - pointer to the driver modinfo structure
2304  *
2305  * Return Code: Returns the value from mod_info(9F).
2306  *
2307  *     Context: Kernel thread context
2308  */
2309 
2310 int
2311 _info(struct modinfo *modinfop)
2312 {
2313 	return (mod_info(&modlinkage, modinfop));
2314 }
2315 
2316 
2317 /*
2318  * The following routines implement the driver message logging facility.
2319  * They provide component- and level- based debug output filtering.
2320  * Output may also be restricted to messages for a single instance by
2321  * specifying a soft state pointer in sd_debug_un. If sd_debug_un is set
2322  * to NULL, then messages for all instances are printed.
2323  *
2324  * These routines have been cloned from each other due to the language
2325  * constraints of macros and variable argument list processing.
2326  */
2327 
2328 
2329 /*
2330  *    Function: sd_log_err
2331  *
2332  * Description: This routine is called by the SD_ERROR macro for debug
2333  *		logging of error conditions.
2334  *
2335  *   Arguments: comp - driver component being logged
2336  *		dev  - pointer to driver info structure
2337  *		fmt  - error string and format to be logged
2338  */
2339 
2340 static void
2341 sd_log_err(uint_t comp, struct sd_lun *un, const char *fmt, ...)
2342 {
2343 	va_list		ap;
2344 	dev_info_t	*dev;
2345 
2346 	ASSERT(un != NULL);
2347 	dev = SD_DEVINFO(un);
2348 	ASSERT(dev != NULL);
2349 
2350 	/*
2351 	 * Filter messages based on the global component and level masks.
2352 	 * Also print if un matches the value of sd_debug_un, or if
2353 	 * sd_debug_un is set to NULL.
2354 	 */
2355 	if ((sd_component_mask & comp) && (sd_level_mask & SD_LOGMASK_ERROR) &&
2356 	    ((sd_debug_un == NULL) || (sd_debug_un == un))) {
2357 		mutex_enter(&sd_log_mutex);
2358 		va_start(ap, fmt);
2359 		(void) vsprintf(sd_log_buf, fmt, ap);
2360 		va_end(ap);
2361 		scsi_log(dev, sd_label, CE_CONT, "%s", sd_log_buf);
2362 		mutex_exit(&sd_log_mutex);
2363 	}
2364 #ifdef SD_FAULT_INJECTION
2365 	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::sd_injection_mask));
2366 	if (un->sd_injection_mask & comp) {
2367 		mutex_enter(&sd_log_mutex);
2368 		va_start(ap, fmt);
2369 		(void) vsprintf(sd_log_buf, fmt, ap);
2370 		va_end(ap);
2371 		sd_injection_log(sd_log_buf, un);
2372 		mutex_exit(&sd_log_mutex);
2373 	}
2374 #endif
2375 }
2376 
2377 
2378 /*
2379  *    Function: sd_log_info
2380  *
2381  * Description: This routine is called by the SD_INFO macro for debug
2382  *		logging of general purpose informational conditions.
2383  *
2384  *   Arguments: comp - driver component being logged
2385  *		dev  - pointer to driver info structure
2386  *		fmt  - info string and format to be logged
2387  */
2388 
2389 static void
2390 sd_log_info(uint_t component, struct sd_lun *un, const char *fmt, ...)
2391 {
2392 	va_list		ap;
2393 	dev_info_t	*dev;
2394 
2395 	ASSERT(un != NULL);
2396 	dev = SD_DEVINFO(un);
2397 	ASSERT(dev != NULL);
2398 
2399 	/*
2400 	 * Filter messages based on the global component and level masks.
2401 	 * Also print if un matches the value of sd_debug_un, or if
2402 	 * sd_debug_un is set to NULL.
2403 	 */
2404 	if ((sd_component_mask & component) &&
2405 	    (sd_level_mask & SD_LOGMASK_INFO) &&
2406 	    ((sd_debug_un == NULL) || (sd_debug_un == un))) {
2407 		mutex_enter(&sd_log_mutex);
2408 		va_start(ap, fmt);
2409 		(void) vsprintf(sd_log_buf, fmt, ap);
2410 		va_end(ap);
2411 		scsi_log(dev, sd_label, CE_CONT, "%s", sd_log_buf);
2412 		mutex_exit(&sd_log_mutex);
2413 	}
2414 #ifdef SD_FAULT_INJECTION
2415 	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::sd_injection_mask));
2416 	if (un->sd_injection_mask & component) {
2417 		mutex_enter(&sd_log_mutex);
2418 		va_start(ap, fmt);
2419 		(void) vsprintf(sd_log_buf, fmt, ap);
2420 		va_end(ap);
2421 		sd_injection_log(sd_log_buf, un);
2422 		mutex_exit(&sd_log_mutex);
2423 	}
2424 #endif
2425 }
2426 
2427 
2428 /*
2429  *    Function: sd_log_trace
2430  *
2431  * Description: This routine is called by the SD_TRACE macro for debug
2432  *		logging of trace conditions (i.e. function entry/exit).
2433  *
2434  *   Arguments: comp - driver component being logged
2435  *		dev  - pointer to driver info structure
2436  *		fmt  - trace string and format to be logged
2437  */
2438 
2439 static void
2440 sd_log_trace(uint_t component, struct sd_lun *un, const char *fmt, ...)
2441 {
2442 	va_list		ap;
2443 	dev_info_t	*dev;
2444 
2445 	ASSERT(un != NULL);
2446 	dev = SD_DEVINFO(un);
2447 	ASSERT(dev != NULL);
2448 
2449 	/*
2450 	 * Filter messages based on the global component and level masks.
2451 	 * Also print if un matches the value of sd_debug_un, or if
2452 	 * sd_debug_un is set to NULL.
2453 	 */
2454 	if ((sd_component_mask & component) &&
2455 	    (sd_level_mask & SD_LOGMASK_TRACE) &&
2456 	    ((sd_debug_un == NULL) || (sd_debug_un == un))) {
2457 		mutex_enter(&sd_log_mutex);
2458 		va_start(ap, fmt);
2459 		(void) vsprintf(sd_log_buf, fmt, ap);
2460 		va_end(ap);
2461 		scsi_log(dev, sd_label, CE_CONT, "%s", sd_log_buf);
2462 		mutex_exit(&sd_log_mutex);
2463 	}
2464 #ifdef SD_FAULT_INJECTION
2465 	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::sd_injection_mask));
2466 	if (un->sd_injection_mask & component) {
2467 		mutex_enter(&sd_log_mutex);
2468 		va_start(ap, fmt);
2469 		(void) vsprintf(sd_log_buf, fmt, ap);
2470 		va_end(ap);
2471 		sd_injection_log(sd_log_buf, un);
2472 		mutex_exit(&sd_log_mutex);
2473 	}
2474 #endif
2475 }
2476 
2477 
2478 /*
2479  *    Function: sdprobe
2480  *
2481  * Description: This is the driver probe(9e) entry point function.
2482  *
2483  *   Arguments: devi - opaque device info handle
2484  *
2485  * Return Code: DDI_PROBE_SUCCESS: If the probe was successful.
2486  *              DDI_PROBE_FAILURE: If the probe failed.
2487  *              DDI_PROBE_PARTIAL: If the instance is not present now,
2488  *				   but may be present in the future.
2489  */
2490 
2491 static int
2492 sdprobe(dev_info_t *devi)
2493 {
2494 	struct scsi_device	*devp;
2495 	int			rval;
2496 	int			instance;
2497 
2498 	/*
2499 	 * if it wasn't for pln, sdprobe could actually be nulldev
2500 	 * in the "__fibre" case.
2501 	 */
2502 	if (ddi_dev_is_sid(devi) == DDI_SUCCESS) {
2503 		return (DDI_PROBE_DONTCARE);
2504 	}
2505 
2506 	devp = ddi_get_driver_private(devi);
2507 
2508 	if (devp == NULL) {
2509 		/* Ooops... nexus driver is mis-configured... */
2510 		return (DDI_PROBE_FAILURE);
2511 	}
2512 
2513 	instance = ddi_get_instance(devi);
2514 
2515 	if (ddi_get_soft_state(sd_state, instance) != NULL) {
2516 		return (DDI_PROBE_PARTIAL);
2517 	}
2518 
2519 	/*
2520 	 * Call the SCSA utility probe routine to see if we actually
2521 	 * have a target at this SCSI nexus.
2522 	 */
2523 	switch (sd_scsi_probe_with_cache(devp, NULL_FUNC)) {
2524 	case SCSIPROBE_EXISTS:
2525 		switch (devp->sd_inq->inq_dtype) {
2526 		case DTYPE_DIRECT:
2527 			rval = DDI_PROBE_SUCCESS;
2528 			break;
2529 		case DTYPE_RODIRECT:
2530 			/* CDs etc. Can be removable media */
2531 			rval = DDI_PROBE_SUCCESS;
2532 			break;
2533 		case DTYPE_OPTICAL:
2534 			/*
2535 			 * Rewritable optical driver HP115AA
2536 			 * Can also be removable media
2537 			 */
2538 
2539 			/*
2540 			 * Do not attempt to bind to  DTYPE_OPTICAL if
2541 			 * pre solaris 9 sparc sd behavior is required
2542 			 *
2543 			 * If first time through and sd_dtype_optical_bind
2544 			 * has not been set in /etc/system check properties
2545 			 */
2546 
2547 			if (sd_dtype_optical_bind  < 0) {
2548 			    sd_dtype_optical_bind = ddi_prop_get_int
2549 				(DDI_DEV_T_ANY,	devi,	0,
2550 				"optical-device-bind",	1);
2551 			}
2552 
2553 			if (sd_dtype_optical_bind == 0) {
2554 				rval = DDI_PROBE_FAILURE;
2555 			} else {
2556 				rval = DDI_PROBE_SUCCESS;
2557 			}
2558 			break;
2559 
2560 		case DTYPE_NOTPRESENT:
2561 		default:
2562 			rval = DDI_PROBE_FAILURE;
2563 			break;
2564 		}
2565 		break;
2566 	default:
2567 		rval = DDI_PROBE_PARTIAL;
2568 		break;
2569 	}
2570 
2571 	/*
2572 	 * This routine checks for resource allocation prior to freeing,
2573 	 * so it will take care of the "smart probing" case where a
2574 	 * scsi_probe() may or may not have been issued and will *not*
2575 	 * free previously-freed resources.
2576 	 */
2577 	scsi_unprobe(devp);
2578 	return (rval);
2579 }
2580 
2581 
2582 /*
2583  *    Function: sdinfo
2584  *
2585  * Description: This is the driver getinfo(9e) entry point function.
2586  * 		Given the device number, return the devinfo pointer from
2587  *		the scsi_device structure or the instance number
2588  *		associated with the dev_t.
2589  *
2590  *   Arguments: dip     - pointer to device info structure
2591  *		infocmd - command argument (DDI_INFO_DEVT2DEVINFO,
2592  *			  DDI_INFO_DEVT2INSTANCE)
2593  *		arg     - driver dev_t
2594  *		resultp - user buffer for request response
2595  *
2596  * Return Code: DDI_SUCCESS
2597  *              DDI_FAILURE
2598  */
2599 /* ARGSUSED */
2600 static int
2601 sdinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
2602 {
2603 	struct sd_lun	*un;
2604 	dev_t		dev;
2605 	int		instance;
2606 	int		error;
2607 
2608 	switch (infocmd) {
2609 	case DDI_INFO_DEVT2DEVINFO:
2610 		dev = (dev_t)arg;
2611 		instance = SDUNIT(dev);
2612 		if ((un = ddi_get_soft_state(sd_state, instance)) == NULL) {
2613 			return (DDI_FAILURE);
2614 		}
2615 		*result = (void *) SD_DEVINFO(un);
2616 		error = DDI_SUCCESS;
2617 		break;
2618 	case DDI_INFO_DEVT2INSTANCE:
2619 		dev = (dev_t)arg;
2620 		instance = SDUNIT(dev);
2621 		*result = (void *)(uintptr_t)instance;
2622 		error = DDI_SUCCESS;
2623 		break;
2624 	default:
2625 		error = DDI_FAILURE;
2626 	}
2627 	return (error);
2628 }
2629 
2630 /*
2631  *    Function: sd_prop_op
2632  *
2633  * Description: This is the driver prop_op(9e) entry point function.
2634  *		Return the number of blocks for the partition in question
2635  *		or forward the request to the property facilities.
2636  *
2637  *   Arguments: dev       - device number
2638  *		dip       - pointer to device info structure
2639  *		prop_op   - property operator
2640  *		mod_flags - DDI_PROP_DONTPASS, don't pass to parent
2641  *		name      - pointer to property name
2642  *		valuep    - pointer or address of the user buffer
2643  *		lengthp   - property length
2644  *
2645  * Return Code: DDI_PROP_SUCCESS
2646  *              DDI_PROP_NOT_FOUND
2647  *              DDI_PROP_UNDEFINED
2648  *              DDI_PROP_NO_MEMORY
2649  *              DDI_PROP_BUF_TOO_SMALL
2650  */
2651 
2652 static int
2653 sd_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, int mod_flags,
2654 	char *name, caddr_t valuep, int *lengthp)
2655 {
2656 	int		instance = ddi_get_instance(dip);
2657 	struct sd_lun	*un;
2658 	uint64_t	nblocks64;
2659 
2660 	/*
2661 	 * Our dynamic properties are all device specific and size oriented.
2662 	 * Requests issued under conditions where size is valid are passed
2663 	 * to ddi_prop_op_nblocks with the size information, otherwise the
2664 	 * request is passed to ddi_prop_op. Size depends on valid geometry.
2665 	 */
2666 	un = ddi_get_soft_state(sd_state, instance);
2667 	if ((dev == DDI_DEV_T_ANY) || (un == NULL) ||
2668 	    (un->un_f_geometry_is_valid == FALSE)) {
2669 		return (ddi_prop_op(dev, dip, prop_op, mod_flags,
2670 		    name, valuep, lengthp));
2671 	} else {
2672 		/* get nblocks value */
2673 		ASSERT(!mutex_owned(SD_MUTEX(un)));
2674 		mutex_enter(SD_MUTEX(un));
2675 		nblocks64 = (ulong_t)un->un_map[SDPART(dev)].dkl_nblk;
2676 		mutex_exit(SD_MUTEX(un));
2677 
2678 		return (ddi_prop_op_nblocks(dev, dip, prop_op, mod_flags,
2679 		    name, valuep, lengthp, nblocks64));
2680 	}
2681 }
2682 
2683 /*
2684  * The following functions are for smart probing:
2685  * sd_scsi_probe_cache_init()
2686  * sd_scsi_probe_cache_fini()
2687  * sd_scsi_clear_probe_cache()
2688  * sd_scsi_probe_with_cache()
2689  */
2690 
2691 /*
2692  *    Function: sd_scsi_probe_cache_init
2693  *
2694  * Description: Initializes the probe response cache mutex and head pointer.
2695  *
2696  *     Context: Kernel thread context
2697  */
2698 
2699 static void
2700 sd_scsi_probe_cache_init(void)
2701 {
2702 	mutex_init(&sd_scsi_probe_cache_mutex, NULL, MUTEX_DRIVER, NULL);
2703 	sd_scsi_probe_cache_head = NULL;
2704 }
2705 
2706 
2707 /*
2708  *    Function: sd_scsi_probe_cache_fini
2709  *
2710  * Description: Frees all resources associated with the probe response cache.
2711  *
2712  *     Context: Kernel thread context
2713  */
2714 
2715 static void
2716 sd_scsi_probe_cache_fini(void)
2717 {
2718 	struct sd_scsi_probe_cache *cp;
2719 	struct sd_scsi_probe_cache *ncp;
2720 
2721 	/* Clean up our smart probing linked list */
2722 	for (cp = sd_scsi_probe_cache_head; cp != NULL; cp = ncp) {
2723 		ncp = cp->next;
2724 		kmem_free(cp, sizeof (struct sd_scsi_probe_cache));
2725 	}
2726 	sd_scsi_probe_cache_head = NULL;
2727 	mutex_destroy(&sd_scsi_probe_cache_mutex);
2728 }
2729 
2730 
2731 /*
2732  *    Function: sd_scsi_clear_probe_cache
2733  *
2734  * Description: This routine clears the probe response cache. This is
2735  *		done when open() returns ENXIO so that when deferred
2736  *		attach is attempted (possibly after a device has been
2737  *		turned on) we will retry the probe. Since we don't know
2738  *		which target we failed to open, we just clear the
2739  *		entire cache.
2740  *
2741  *     Context: Kernel thread context
2742  */
2743 
2744 static void
2745 sd_scsi_clear_probe_cache(void)
2746 {
2747 	struct sd_scsi_probe_cache	*cp;
2748 	int				i;
2749 
2750 	mutex_enter(&sd_scsi_probe_cache_mutex);
2751 	for (cp = sd_scsi_probe_cache_head; cp != NULL; cp = cp->next) {
2752 		/*
2753 		 * Reset all entries to SCSIPROBE_EXISTS.  This will
2754 		 * force probing to be performed the next time
2755 		 * sd_scsi_probe_with_cache is called.
2756 		 */
2757 		for (i = 0; i < NTARGETS_WIDE; i++) {
2758 			cp->cache[i] = SCSIPROBE_EXISTS;
2759 		}
2760 	}
2761 	mutex_exit(&sd_scsi_probe_cache_mutex);
2762 }
2763 
2764 
2765 /*
2766  *    Function: sd_scsi_probe_with_cache
2767  *
2768  * Description: This routine implements support for a scsi device probe
2769  *		with cache. The driver maintains a cache of the target
2770  *		responses to scsi probes. If we get no response from a
2771  *		target during a probe inquiry, we remember that, and we
2772  *		avoid additional calls to scsi_probe on non-zero LUNs
2773  *		on the same target until the cache is cleared. By doing
2774  *		so we avoid the 1/4 sec selection timeout for nonzero
2775  *		LUNs. lun0 of a target is always probed.
2776  *
2777  *   Arguments: devp     - Pointer to a scsi_device(9S) structure
2778  *              waitfunc - indicates what the allocator routines should
2779  *			   do when resources are not available. This value
2780  *			   is passed on to scsi_probe() when that routine
2781  *			   is called.
2782  *
2783  * Return Code: SCSIPROBE_NORESP if a NORESP in probe response cache;
2784  *		otherwise the value returned by scsi_probe(9F).
2785  *
2786  *     Context: Kernel thread context
2787  */
2788 
2789 static int
2790 sd_scsi_probe_with_cache(struct scsi_device *devp, int (*waitfn)())
2791 {
2792 	struct sd_scsi_probe_cache	*cp;
2793 	dev_info_t	*pdip = ddi_get_parent(devp->sd_dev);
2794 	int		lun, tgt;
2795 
2796 	lun = ddi_prop_get_int(DDI_DEV_T_ANY, devp->sd_dev, DDI_PROP_DONTPASS,
2797 	    SCSI_ADDR_PROP_LUN, 0);
2798 	tgt = ddi_prop_get_int(DDI_DEV_T_ANY, devp->sd_dev, DDI_PROP_DONTPASS,
2799 	    SCSI_ADDR_PROP_TARGET, -1);
2800 
2801 	/* Make sure caching enabled and target in range */
2802 	if ((tgt < 0) || (tgt >= NTARGETS_WIDE)) {
2803 		/* do it the old way (no cache) */
2804 		return (scsi_probe(devp, waitfn));
2805 	}
2806 
2807 	mutex_enter(&sd_scsi_probe_cache_mutex);
2808 
2809 	/* Find the cache for this scsi bus instance */
2810 	for (cp = sd_scsi_probe_cache_head; cp != NULL; cp = cp->next) {
2811 		if (cp->pdip == pdip) {
2812 			break;
2813 		}
2814 	}
2815 
2816 	/* If we can't find a cache for this pdip, create one */
2817 	if (cp == NULL) {
2818 		int i;
2819 
2820 		cp = kmem_zalloc(sizeof (struct sd_scsi_probe_cache),
2821 		    KM_SLEEP);
2822 		cp->pdip = pdip;
2823 		cp->next = sd_scsi_probe_cache_head;
2824 		sd_scsi_probe_cache_head = cp;
2825 		for (i = 0; i < NTARGETS_WIDE; i++) {
2826 			cp->cache[i] = SCSIPROBE_EXISTS;
2827 		}
2828 	}
2829 
2830 	mutex_exit(&sd_scsi_probe_cache_mutex);
2831 
2832 	/* Recompute the cache for this target if LUN zero */
2833 	if (lun == 0) {
2834 		cp->cache[tgt] = SCSIPROBE_EXISTS;
2835 	}
2836 
2837 	/* Don't probe if cache remembers a NORESP from a previous LUN. */
2838 	if (cp->cache[tgt] != SCSIPROBE_EXISTS) {
2839 		return (SCSIPROBE_NORESP);
2840 	}
2841 
2842 	/* Do the actual probe; save & return the result */
2843 	return (cp->cache[tgt] = scsi_probe(devp, waitfn));
2844 }
2845 
2846 
2847 /*
2848  *    Function: sd_spin_up_unit
2849  *
2850  * Description: Issues the following commands to spin-up the device:
2851  *		START STOP UNIT, and INQUIRY.
2852  *
2853  *   Arguments: un - driver soft state (unit) structure
2854  *
2855  * Return Code: 0 - success
2856  *		EIO - failure
2857  *		EACCES - reservation conflict
2858  *
2859  *     Context: Kernel thread context
2860  */
2861 
2862 static int
2863 sd_spin_up_unit(struct sd_lun *un)
2864 {
2865 	size_t	resid		= 0;
2866 	int	has_conflict	= FALSE;
2867 	uchar_t *bufaddr;
2868 
2869 	ASSERT(un != NULL);
2870 
2871 	/*
2872 	 * Send a throwaway START UNIT command.
2873 	 *
2874 	 * If we fail on this, we don't care presently what precisely
2875 	 * is wrong.  EMC's arrays will also fail this with a check
2876 	 * condition (0x2/0x4/0x3) if the device is "inactive," but
2877 	 * we don't want to fail the attach because it may become
2878 	 * "active" later.
2879 	 */
2880 	if (sd_send_scsi_START_STOP_UNIT(un, SD_TARGET_START, SD_PATH_DIRECT)
2881 	    == EACCES)
2882 		has_conflict = TRUE;
2883 
2884 	/*
2885 	 * Send another INQUIRY command to the target. This is necessary for
2886 	 * non-removable media direct access devices because their INQUIRY data
2887 	 * may not be fully qualified until they are spun up (perhaps via the
2888 	 * START command above).  Note: This seems to be needed for some
2889 	 * legacy devices only.) The INQUIRY command should succeed even if a
2890 	 * Reservation Conflict is present.
2891 	 */
2892 	bufaddr = kmem_zalloc(SUN_INQSIZE, KM_SLEEP);
2893 	if (sd_send_scsi_INQUIRY(un, bufaddr, SUN_INQSIZE, 0, 0, &resid) != 0) {
2894 		kmem_free(bufaddr, SUN_INQSIZE);
2895 		return (EIO);
2896 	}
2897 
2898 	/*
2899 	 * If we got enough INQUIRY data, copy it over the old INQUIRY data.
2900 	 * Note that this routine does not return a failure here even if the
2901 	 * INQUIRY command did not return any data.  This is a legacy behavior.
2902 	 */
2903 	if ((SUN_INQSIZE - resid) >= SUN_MIN_INQLEN) {
2904 		bcopy(bufaddr, SD_INQUIRY(un), SUN_INQSIZE);
2905 	}
2906 
2907 	kmem_free(bufaddr, SUN_INQSIZE);
2908 
2909 	/* If we hit a reservation conflict above, tell the caller. */
2910 	if (has_conflict == TRUE) {
2911 		return (EACCES);
2912 	}
2913 
2914 	return (0);
2915 }
2916 
2917 #ifdef _LP64
2918 /*
2919  *    Function: sd_enable_descr_sense
2920  *
2921  * Description: This routine attempts to select descriptor sense format
2922  *		using the Control mode page.  Devices that support 64 bit
2923  *		LBAs (for >2TB luns) should also implement descriptor
2924  *		sense data so we will call this function whenever we see
2925  *		a lun larger than 2TB.  If for some reason the device
2926  *		supports 64 bit LBAs but doesn't support descriptor sense
2927  *		presumably the mode select will fail.  Everything will
2928  *		continue to work normally except that we will not get
2929  *		complete sense data for commands that fail with an LBA
2930  *		larger than 32 bits.
2931  *
2932  *   Arguments: un - driver soft state (unit) structure
2933  *
2934  *     Context: Kernel thread context only
2935  */
2936 
2937 static void
2938 sd_enable_descr_sense(struct sd_lun *un)
2939 {
2940 	uchar_t			*header;
2941 	struct mode_control_scsi3 *ctrl_bufp;
2942 	size_t			buflen;
2943 	size_t			bd_len;
2944 
2945 	/*
2946 	 * Read MODE SENSE page 0xA, Control Mode Page
2947 	 */
2948 	buflen = MODE_HEADER_LENGTH + MODE_BLK_DESC_LENGTH +
2949 	    sizeof (struct mode_control_scsi3);
2950 	header = kmem_zalloc(buflen, KM_SLEEP);
2951 	if (sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, header, buflen,
2952 	    MODEPAGE_CTRL_MODE, SD_PATH_DIRECT) != 0) {
2953 		SD_ERROR(SD_LOG_COMMON, un,
2954 		    "sd_enable_descr_sense: mode sense ctrl page failed\n");
2955 		goto eds_exit;
2956 	}
2957 
2958 	/*
2959 	 * Determine size of Block Descriptors in order to locate
2960 	 * the mode page data. ATAPI devices return 0, SCSI devices
2961 	 * should return MODE_BLK_DESC_LENGTH.
2962 	 */
2963 	bd_len  = ((struct mode_header *)header)->bdesc_length;
2964 
2965 	ctrl_bufp = (struct mode_control_scsi3 *)
2966 	    (header + MODE_HEADER_LENGTH + bd_len);
2967 
2968 	/*
2969 	 * Clear PS bit for MODE SELECT
2970 	 */
2971 	ctrl_bufp->mode_page.ps = 0;
2972 
2973 	/*
2974 	 * Set D_SENSE to enable descriptor sense format.
2975 	 */
2976 	ctrl_bufp->d_sense = 1;
2977 
2978 	/*
2979 	 * Use MODE SELECT to commit the change to the D_SENSE bit
2980 	 */
2981 	if (sd_send_scsi_MODE_SELECT(un, CDB_GROUP0, header,
2982 	    buflen, SD_DONTSAVE_PAGE, SD_PATH_DIRECT) != 0) {
2983 		SD_INFO(SD_LOG_COMMON, un,
2984 		    "sd_enable_descr_sense: mode select ctrl page failed\n");
2985 		goto eds_exit;
2986 	}
2987 
2988 eds_exit:
2989 	kmem_free(header, buflen);
2990 }
2991 
2992 /*
2993  *    Function: sd_reenable_dsense_task
2994  *
2995  * Description: Re-enable descriptor sense after device or bus reset
2996  *
2997  *     Context: Executes in a taskq() thread context
2998  */
2999 static void
3000 sd_reenable_dsense_task(void *arg)
3001 {
3002 	struct	sd_lun	*un = arg;
3003 
3004 	ASSERT(un != NULL);
3005 	sd_enable_descr_sense(un);
3006 }
3007 #endif /* _LP64 */
3008 
3009 /*
3010  *    Function: sd_set_mmc_caps
3011  *
3012  * Description: This routine determines if the device is MMC compliant and if
3013  *		the device supports CDDA via a mode sense of the CDVD
3014  *		capabilities mode page. Also checks if the device is a
3015  *		dvdram writable device.
3016  *
3017  *   Arguments: un - driver soft state (unit) structure
3018  *
3019  *     Context: Kernel thread context only
3020  */
3021 
3022 static void
3023 sd_set_mmc_caps(struct sd_lun *un)
3024 {
3025 	struct mode_header_grp2		*sense_mhp;
3026 	uchar_t				*sense_page;
3027 	caddr_t				buf;
3028 	int				bd_len;
3029 	int				status;
3030 	struct uscsi_cmd		com;
3031 	int				rtn;
3032 	uchar_t				*out_data_rw, *out_data_hd;
3033 	uchar_t				*rqbuf_rw, *rqbuf_hd;
3034 
3035 	ASSERT(un != NULL);
3036 
3037 	/*
3038 	 * The flags which will be set in this function are - mmc compliant,
3039 	 * dvdram writable device, cdda support. Initialize them to FALSE
3040 	 * and if a capability is detected - it will be set to TRUE.
3041 	 */
3042 	un->un_f_mmc_cap = FALSE;
3043 	un->un_f_dvdram_writable_device = FALSE;
3044 	un->un_f_cfg_cdda = FALSE;
3045 
3046 	buf = kmem_zalloc(BUFLEN_MODE_CDROM_CAP, KM_SLEEP);
3047 	status = sd_send_scsi_MODE_SENSE(un, CDB_GROUP1, (uchar_t *)buf,
3048 	    BUFLEN_MODE_CDROM_CAP, MODEPAGE_CDROM_CAP, SD_PATH_DIRECT);
3049 
3050 	if (status != 0) {
3051 		/* command failed; just return */
3052 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3053 		return;
3054 	}
3055 	/*
3056 	 * If the mode sense request for the CDROM CAPABILITIES
3057 	 * page (0x2A) succeeds the device is assumed to be MMC.
3058 	 */
3059 	un->un_f_mmc_cap = TRUE;
3060 
3061 	/* Get to the page data */
3062 	sense_mhp = (struct mode_header_grp2 *)buf;
3063 	bd_len = (sense_mhp->bdesc_length_hi << 8) |
3064 	    sense_mhp->bdesc_length_lo;
3065 	if (bd_len > MODE_BLK_DESC_LENGTH) {
3066 		/*
3067 		 * We did not get back the expected block descriptor
3068 		 * length so we cannot determine if the device supports
3069 		 * CDDA. However, we still indicate the device is MMC
3070 		 * according to the successful response to the page
3071 		 * 0x2A mode sense request.
3072 		 */
3073 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3074 		    "sd_set_mmc_caps: Mode Sense returned "
3075 		    "invalid block descriptor length\n");
3076 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3077 		return;
3078 	}
3079 
3080 	/* See if read CDDA is supported */
3081 	sense_page = (uchar_t *)(buf + MODE_HEADER_LENGTH_GRP2 +
3082 	    bd_len);
3083 	un->un_f_cfg_cdda = (sense_page[5] & 0x01) ? TRUE : FALSE;
3084 
3085 	/* See if writing DVD RAM is supported. */
3086 	un->un_f_dvdram_writable_device = (sense_page[3] & 0x20) ? TRUE : FALSE;
3087 	if (un->un_f_dvdram_writable_device == TRUE) {
3088 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3089 		return;
3090 	}
3091 
3092 	/*
3093 	 * If the device presents DVD or CD capabilities in the mode
3094 	 * page, we can return here since a RRD will not have
3095 	 * these capabilities.
3096 	 */
3097 	if ((sense_page[2] & 0x3f) || (sense_page[3] & 0x3f)) {
3098 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3099 		return;
3100 	}
3101 	kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3102 
3103 	/*
3104 	 * If un->un_f_dvdram_writable_device is still FALSE,
3105 	 * check for a Removable Rigid Disk (RRD).  A RRD
3106 	 * device is identified by the features RANDOM_WRITABLE and
3107 	 * HARDWARE_DEFECT_MANAGEMENT.
3108 	 */
3109 	out_data_rw = kmem_zalloc(SD_CURRENT_FEATURE_LEN, KM_SLEEP);
3110 	rqbuf_rw = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3111 
3112 	rtn = sd_send_scsi_feature_GET_CONFIGURATION(un, &com, rqbuf_rw,
3113 	    SENSE_LENGTH, out_data_rw, SD_CURRENT_FEATURE_LEN,
3114 	    RANDOM_WRITABLE);
3115 	if (rtn != 0) {
3116 		kmem_free(out_data_rw, SD_CURRENT_FEATURE_LEN);
3117 		kmem_free(rqbuf_rw, SENSE_LENGTH);
3118 		return;
3119 	}
3120 
3121 	out_data_hd = kmem_zalloc(SD_CURRENT_FEATURE_LEN, KM_SLEEP);
3122 	rqbuf_hd = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3123 
3124 	rtn = sd_send_scsi_feature_GET_CONFIGURATION(un, &com, rqbuf_hd,
3125 	    SENSE_LENGTH, out_data_hd, SD_CURRENT_FEATURE_LEN,
3126 	    HARDWARE_DEFECT_MANAGEMENT);
3127 	if (rtn == 0) {
3128 		/*
3129 		 * We have good information, check for random writable
3130 		 * and hardware defect features.
3131 		 */
3132 		if ((out_data_rw[9] & RANDOM_WRITABLE) &&
3133 		    (out_data_hd[9] & HARDWARE_DEFECT_MANAGEMENT)) {
3134 			un->un_f_dvdram_writable_device = TRUE;
3135 		}
3136 	}
3137 
3138 	kmem_free(out_data_rw, SD_CURRENT_FEATURE_LEN);
3139 	kmem_free(rqbuf_rw, SENSE_LENGTH);
3140 	kmem_free(out_data_hd, SD_CURRENT_FEATURE_LEN);
3141 	kmem_free(rqbuf_hd, SENSE_LENGTH);
3142 }
3143 
3144 /*
3145  *    Function: sd_check_for_writable_cd
3146  *
3147  * Description: This routine determines if the media in the device is
3148  *		writable or not. It uses the get configuration command (0x46)
3149  *		to determine if the media is writable
3150  *
3151  *   Arguments: un - driver soft state (unit) structure
3152  *
3153  *     Context: Never called at interrupt context.
3154  */
3155 
3156 static void
3157 sd_check_for_writable_cd(struct sd_lun *un)
3158 {
3159 	struct uscsi_cmd		com;
3160 	uchar_t				*out_data;
3161 	uchar_t				*rqbuf;
3162 	int				rtn;
3163 	uchar_t				*out_data_rw, *out_data_hd;
3164 	uchar_t				*rqbuf_rw, *rqbuf_hd;
3165 	struct mode_header_grp2		*sense_mhp;
3166 	uchar_t				*sense_page;
3167 	caddr_t				buf;
3168 	int				bd_len;
3169 	int				status;
3170 
3171 	ASSERT(un != NULL);
3172 	ASSERT(mutex_owned(SD_MUTEX(un)));
3173 
3174 	/*
3175 	 * Initialize the writable media to false, if configuration info.
3176 	 * tells us otherwise then only we will set it.
3177 	 */
3178 	un->un_f_mmc_writable_media = FALSE;
3179 	mutex_exit(SD_MUTEX(un));
3180 
3181 	out_data = kmem_zalloc(SD_PROFILE_HEADER_LEN, KM_SLEEP);
3182 	rqbuf = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3183 
3184 	rtn = sd_send_scsi_GET_CONFIGURATION(un, &com, rqbuf, SENSE_LENGTH,
3185 	    out_data, SD_PROFILE_HEADER_LEN);
3186 
3187 	mutex_enter(SD_MUTEX(un));
3188 	if (rtn == 0) {
3189 		/*
3190 		 * We have good information, check for writable DVD.
3191 		 */
3192 		if ((out_data[6] == 0) && (out_data[7] == 0x12)) {
3193 			un->un_f_mmc_writable_media = TRUE;
3194 			kmem_free(out_data, SD_PROFILE_HEADER_LEN);
3195 			kmem_free(rqbuf, SENSE_LENGTH);
3196 			return;
3197 		}
3198 	}
3199 
3200 	kmem_free(out_data, SD_PROFILE_HEADER_LEN);
3201 	kmem_free(rqbuf, SENSE_LENGTH);
3202 
3203 	/*
3204 	 * Determine if this is a RRD type device.
3205 	 */
3206 	mutex_exit(SD_MUTEX(un));
3207 	buf = kmem_zalloc(BUFLEN_MODE_CDROM_CAP, KM_SLEEP);
3208 	status = sd_send_scsi_MODE_SENSE(un, CDB_GROUP1, (uchar_t *)buf,
3209 	    BUFLEN_MODE_CDROM_CAP, MODEPAGE_CDROM_CAP, SD_PATH_DIRECT);
3210 	mutex_enter(SD_MUTEX(un));
3211 	if (status != 0) {
3212 		/* command failed; just return */
3213 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3214 		return;
3215 	}
3216 
3217 	/* Get to the page data */
3218 	sense_mhp = (struct mode_header_grp2 *)buf;
3219 	bd_len = (sense_mhp->bdesc_length_hi << 8) | sense_mhp->bdesc_length_lo;
3220 	if (bd_len > MODE_BLK_DESC_LENGTH) {
3221 		/*
3222 		 * We did not get back the expected block descriptor length so
3223 		 * we cannot check the mode page.
3224 		 */
3225 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3226 		    "sd_check_for_writable_cd: Mode Sense returned "
3227 		    "invalid block descriptor length\n");
3228 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3229 		return;
3230 	}
3231 
3232 	/*
3233 	 * If the device presents DVD or CD capabilities in the mode
3234 	 * page, we can return here since a RRD device will not have
3235 	 * these capabilities.
3236 	 */
3237 	sense_page = (uchar_t *)(buf + MODE_HEADER_LENGTH_GRP2 + bd_len);
3238 	if ((sense_page[2] & 0x3f) || (sense_page[3] & 0x3f)) {
3239 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3240 		return;
3241 	}
3242 	kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3243 
3244 	/*
3245 	 * If un->un_f_mmc_writable_media is still FALSE,
3246 	 * check for RRD type media.  A RRD device is identified
3247 	 * by the features RANDOM_WRITABLE and HARDWARE_DEFECT_MANAGEMENT.
3248 	 */
3249 	mutex_exit(SD_MUTEX(un));
3250 	out_data_rw = kmem_zalloc(SD_CURRENT_FEATURE_LEN, KM_SLEEP);
3251 	rqbuf_rw = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3252 
3253 	rtn = sd_send_scsi_feature_GET_CONFIGURATION(un, &com, rqbuf_rw,
3254 	    SENSE_LENGTH, out_data_rw, SD_CURRENT_FEATURE_LEN,
3255 	    RANDOM_WRITABLE);
3256 	if (rtn != 0) {
3257 		kmem_free(out_data_rw, SD_CURRENT_FEATURE_LEN);
3258 		kmem_free(rqbuf_rw, SENSE_LENGTH);
3259 		mutex_enter(SD_MUTEX(un));
3260 		return;
3261 	}
3262 
3263 	out_data_hd = kmem_zalloc(SD_CURRENT_FEATURE_LEN, KM_SLEEP);
3264 	rqbuf_hd = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3265 
3266 	rtn = sd_send_scsi_feature_GET_CONFIGURATION(un, &com, rqbuf_hd,
3267 	    SENSE_LENGTH, out_data_hd, SD_CURRENT_FEATURE_LEN,
3268 	    HARDWARE_DEFECT_MANAGEMENT);
3269 	mutex_enter(SD_MUTEX(un));
3270 	if (rtn == 0) {
3271 		/*
3272 		 * We have good information, check for random writable
3273 		 * and hardware defect features as current.
3274 		 */
3275 		if ((out_data_rw[9] & RANDOM_WRITABLE) &&
3276 		    (out_data_rw[10] & 0x1) &&
3277 		    (out_data_hd[9] & HARDWARE_DEFECT_MANAGEMENT) &&
3278 		    (out_data_hd[10] & 0x1)) {
3279 			un->un_f_mmc_writable_media = TRUE;
3280 		}
3281 	}
3282 
3283 	kmem_free(out_data_rw, SD_CURRENT_FEATURE_LEN);
3284 	kmem_free(rqbuf_rw, SENSE_LENGTH);
3285 	kmem_free(out_data_hd, SD_CURRENT_FEATURE_LEN);
3286 	kmem_free(rqbuf_hd, SENSE_LENGTH);
3287 }
3288 
3289 /*
3290  *    Function: sd_read_unit_properties
3291  *
3292  * Description: The following implements a property lookup mechanism.
3293  *		Properties for particular disks (keyed on vendor, model
3294  *		and rev numbers) are sought in the sd.conf file via
3295  *		sd_process_sdconf_file(), and if not found there, are
3296  *		looked for in a list hardcoded in this driver via
3297  *		sd_process_sdconf_table() Once located the properties
3298  *		are used to update the driver unit structure.
3299  *
3300  *   Arguments: un - driver soft state (unit) structure
3301  */
3302 
3303 static void
3304 sd_read_unit_properties(struct sd_lun *un)
3305 {
3306 	/*
3307 	 * sd_process_sdconf_file returns SD_FAILURE if it cannot find
3308 	 * the "sd-config-list" property (from the sd.conf file) or if
3309 	 * there was not a match for the inquiry vid/pid. If this event
3310 	 * occurs the static driver configuration table is searched for
3311 	 * a match.
3312 	 */
3313 	ASSERT(un != NULL);
3314 	if (sd_process_sdconf_file(un) == SD_FAILURE) {
3315 		sd_process_sdconf_table(un);
3316 	}
3317 
3318 	/* check for LSI device */
3319 	sd_is_lsi(un);
3320 
3321 
3322 }
3323 
3324 
3325 /*
3326  *    Function: sd_process_sdconf_file
3327  *
3328  * Description: Use ddi_getlongprop to obtain the properties from the
3329  *		driver's config file (ie, sd.conf) and update the driver
3330  *		soft state structure accordingly.
3331  *
3332  *   Arguments: un - driver soft state (unit) structure
3333  *
3334  * Return Code: SD_SUCCESS - The properties were successfully set according
3335  *			     to the driver configuration file.
3336  *		SD_FAILURE - The driver config list was not obtained or
3337  *			     there was no vid/pid match. This indicates that
3338  *			     the static config table should be used.
3339  *
3340  * The config file has a property, "sd-config-list", which consists of
3341  * one or more duplets as follows:
3342  *
3343  *  sd-config-list=
3344  *	<duplet>,
3345  *	[<duplet>,]
3346  *	[<duplet>];
3347  *
3348  * The structure of each duplet is as follows:
3349  *
3350  *  <duplet>:= <vid+pid>,<data-property-name_list>
3351  *
3352  * The first entry of the duplet is the device ID string (the concatenated
3353  * vid & pid; not to be confused with a device_id).  This is defined in
3354  * the same way as in the sd_disk_table.
3355  *
3356  * The second part of the duplet is a string that identifies a
3357  * data-property-name-list. The data-property-name-list is defined as
3358  * follows:
3359  *
3360  *  <data-property-name-list>:=<data-property-name> [<data-property-name>]
3361  *
3362  * The syntax of <data-property-name> depends on the <version> field.
3363  *
3364  * If version = SD_CONF_VERSION_1 we have the following syntax:
3365  *
3366  * 	<data-property-name>:=<version>,<flags>,<prop0>,<prop1>,.....<propN>
3367  *
3368  * where the prop0 value will be used to set prop0 if bit0 set in the
3369  * flags, prop1 if bit1 set, etc. and N = SD_CONF_MAX_ITEMS -1
3370  *
3371  */
3372 
3373 static int
3374 sd_process_sdconf_file(struct sd_lun *un)
3375 {
3376 	char	*config_list = NULL;
3377 	int	config_list_len;
3378 	int	len;
3379 	int	dupletlen = 0;
3380 	char	*vidptr;
3381 	int	vidlen;
3382 	char	*dnlist_ptr;
3383 	char	*dataname_ptr;
3384 	int	dnlist_len;
3385 	int	dataname_len;
3386 	int	*data_list;
3387 	int	data_list_len;
3388 	int	rval = SD_FAILURE;
3389 	int	i;
3390 
3391 	ASSERT(un != NULL);
3392 
3393 	/* Obtain the configuration list associated with the .conf file */
3394 	if (ddi_getlongprop(DDI_DEV_T_ANY, SD_DEVINFO(un), DDI_PROP_DONTPASS,
3395 	    sd_config_list, (caddr_t)&config_list, &config_list_len)
3396 	    != DDI_PROP_SUCCESS) {
3397 		return (SD_FAILURE);
3398 	}
3399 
3400 	/*
3401 	 * Compare vids in each duplet to the inquiry vid - if a match is
3402 	 * made, get the data value and update the soft state structure
3403 	 * accordingly.
3404 	 *
3405 	 * Note: This algorithm is complex and difficult to maintain. It should
3406 	 * be replaced with a more robust implementation.
3407 	 */
3408 	for (len = config_list_len, vidptr = config_list; len > 0;
3409 	    vidptr += dupletlen, len -= dupletlen) {
3410 		/*
3411 		 * Note: The assumption here is that each vid entry is on
3412 		 * a unique line from its associated duplet.
3413 		 */
3414 		vidlen = dupletlen = (int)strlen(vidptr);
3415 		if ((vidlen == 0) ||
3416 		    (sd_sdconf_id_match(un, vidptr, vidlen) != SD_SUCCESS)) {
3417 			dupletlen++;
3418 			continue;
3419 		}
3420 
3421 		/*
3422 		 * dnlist contains 1 or more blank separated
3423 		 * data-property-name entries
3424 		 */
3425 		dnlist_ptr = vidptr + vidlen + 1;
3426 		dnlist_len = (int)strlen(dnlist_ptr);
3427 		dupletlen += dnlist_len + 2;
3428 
3429 		/*
3430 		 * Set a pointer for the first data-property-name
3431 		 * entry in the list
3432 		 */
3433 		dataname_ptr = dnlist_ptr;
3434 		dataname_len = 0;
3435 
3436 		/*
3437 		 * Loop through all data-property-name entries in the
3438 		 * data-property-name-list setting the properties for each.
3439 		 */
3440 		while (dataname_len < dnlist_len) {
3441 			int version;
3442 
3443 			/*
3444 			 * Determine the length of the current
3445 			 * data-property-name entry by indexing until a
3446 			 * blank or NULL is encountered. When the space is
3447 			 * encountered reset it to a NULL for compliance
3448 			 * with ddi_getlongprop().
3449 			 */
3450 			for (i = 0; ((dataname_ptr[i] != ' ') &&
3451 			    (dataname_ptr[i] != '\0')); i++) {
3452 				;
3453 			}
3454 
3455 			dataname_len += i;
3456 			/* If not null terminated, Make it so */
3457 			if (dataname_ptr[i] == ' ') {
3458 				dataname_ptr[i] = '\0';
3459 			}
3460 			dataname_len++;
3461 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3462 			    "sd_process_sdconf_file: disk:%s, data:%s\n",
3463 			    vidptr, dataname_ptr);
3464 
3465 			/* Get the data list */
3466 			if (ddi_getlongprop(DDI_DEV_T_ANY, SD_DEVINFO(un), 0,
3467 			    dataname_ptr, (caddr_t)&data_list, &data_list_len)
3468 			    != DDI_PROP_SUCCESS) {
3469 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
3470 				    "sd_process_sdconf_file: data property (%s)"
3471 				    " has no value\n", dataname_ptr);
3472 				dataname_ptr = dnlist_ptr + dataname_len;
3473 				continue;
3474 			}
3475 
3476 			version = data_list[0];
3477 
3478 			if (version == SD_CONF_VERSION_1) {
3479 				sd_tunables values;
3480 
3481 				/* Set the properties */
3482 				if (sd_chk_vers1_data(un, data_list[1],
3483 				    &data_list[2], data_list_len, dataname_ptr)
3484 				    == SD_SUCCESS) {
3485 					sd_get_tunables_from_conf(un,
3486 					    data_list[1], &data_list[2],
3487 					    &values);
3488 					sd_set_vers1_properties(un,
3489 					    data_list[1], &values);
3490 					rval = SD_SUCCESS;
3491 				} else {
3492 					rval = SD_FAILURE;
3493 				}
3494 			} else {
3495 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3496 				    "data property %s version 0x%x is invalid.",
3497 				    dataname_ptr, version);
3498 				rval = SD_FAILURE;
3499 			}
3500 			kmem_free(data_list, data_list_len);
3501 			dataname_ptr = dnlist_ptr + dataname_len;
3502 		}
3503 	}
3504 
3505 	/* free up the memory allocated by ddi_getlongprop */
3506 	if (config_list) {
3507 		kmem_free(config_list, config_list_len);
3508 	}
3509 
3510 	return (rval);
3511 }
3512 
3513 /*
3514  *    Function: sd_get_tunables_from_conf()
3515  *
3516  *
3517  *    This function reads the data list from the sd.conf file and pulls
3518  *    the values that can have numeric values as arguments and places
3519  *    the values in the apropriate sd_tunables member.
3520  *    Since the order of the data list members varies across platforms
3521  *    This function reads them from the data list in a platform specific
3522  *    order and places them into the correct sd_tunable member that is
3523  *    a consistant across all platforms.
3524  */
3525 static void
3526 sd_get_tunables_from_conf(struct sd_lun *un, int flags, int *data_list,
3527     sd_tunables *values)
3528 {
3529 	int i;
3530 	int mask;
3531 
3532 	bzero(values, sizeof (sd_tunables));
3533 
3534 	for (i = 0; i < SD_CONF_MAX_ITEMS; i++) {
3535 
3536 		mask = 1 << i;
3537 		if (mask > flags) {
3538 			break;
3539 		}
3540 
3541 		switch (mask & flags) {
3542 		case 0:	/* This mask bit not set in flags */
3543 			continue;
3544 		case SD_CONF_BSET_THROTTLE:
3545 			values->sdt_throttle = data_list[i];
3546 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3547 			    "sd_get_tunables_from_conf: throttle = %d\n",
3548 			    values->sdt_throttle);
3549 			break;
3550 		case SD_CONF_BSET_CTYPE:
3551 			values->sdt_ctype = data_list[i];
3552 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3553 			    "sd_get_tunables_from_conf: ctype = %d\n",
3554 			    values->sdt_ctype);
3555 			break;
3556 		case SD_CONF_BSET_NRR_COUNT:
3557 			values->sdt_not_rdy_retries = data_list[i];
3558 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3559 			    "sd_get_tunables_from_conf: not_rdy_retries = %d\n",
3560 			    values->sdt_not_rdy_retries);
3561 			break;
3562 		case SD_CONF_BSET_BSY_RETRY_COUNT:
3563 			values->sdt_busy_retries = data_list[i];
3564 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3565 			    "sd_get_tunables_from_conf: busy_retries = %d\n",
3566 			    values->sdt_busy_retries);
3567 			break;
3568 		case SD_CONF_BSET_RST_RETRIES:
3569 			values->sdt_reset_retries = data_list[i];
3570 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3571 			    "sd_get_tunables_from_conf: reset_retries = %d\n",
3572 			    values->sdt_reset_retries);
3573 			break;
3574 		case SD_CONF_BSET_RSV_REL_TIME:
3575 			values->sdt_reserv_rel_time = data_list[i];
3576 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3577 			    "sd_get_tunables_from_conf: reserv_rel_time = %d\n",
3578 			    values->sdt_reserv_rel_time);
3579 			break;
3580 		case SD_CONF_BSET_MIN_THROTTLE:
3581 			values->sdt_min_throttle = data_list[i];
3582 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3583 			    "sd_get_tunables_from_conf: min_throttle = %d\n",
3584 			    values->sdt_min_throttle);
3585 			break;
3586 		case SD_CONF_BSET_DISKSORT_DISABLED:
3587 			values->sdt_disk_sort_dis = data_list[i];
3588 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3589 			    "sd_get_tunables_from_conf: disk_sort_dis = %d\n",
3590 			    values->sdt_disk_sort_dis);
3591 			break;
3592 		case SD_CONF_BSET_LUN_RESET_ENABLED:
3593 			values->sdt_lun_reset_enable = data_list[i];
3594 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3595 			    "sd_get_tunables_from_conf: lun_reset_enable = %d"
3596 			    "\n", values->sdt_lun_reset_enable);
3597 			break;
3598 		}
3599 	}
3600 }
3601 
3602 /*
3603  *    Function: sd_process_sdconf_table
3604  *
3605  * Description: Search the static configuration table for a match on the
3606  *		inquiry vid/pid and update the driver soft state structure
3607  *		according to the table property values for the device.
3608  *
3609  *		The form of a configuration table entry is:
3610  *		  <vid+pid>,<flags>,<property-data>
3611  *		  "SEAGATE ST42400N",1,63,0,0			(Fibre)
3612  *		  "SEAGATE ST42400N",1,63,0,0,0,0		(Sparc)
3613  *		  "SEAGATE ST42400N",1,63,0,0,0,0,0,0,0,0,0,0	(Intel)
3614  *
3615  *   Arguments: un - driver soft state (unit) structure
3616  */
3617 
3618 static void
3619 sd_process_sdconf_table(struct sd_lun *un)
3620 {
3621 	char	*id = NULL;
3622 	int	table_index;
3623 	int	idlen;
3624 
3625 	ASSERT(un != NULL);
3626 	for (table_index = 0; table_index < sd_disk_table_size;
3627 	    table_index++) {
3628 		id = sd_disk_table[table_index].device_id;
3629 		idlen = strlen(id);
3630 		if (idlen == 0) {
3631 			continue;
3632 		}
3633 
3634 		/*
3635 		 * The static configuration table currently does not
3636 		 * implement version 10 properties. Additionally,
3637 		 * multiple data-property-name entries are not
3638 		 * implemented in the static configuration table.
3639 		 */
3640 		if (sd_sdconf_id_match(un, id, idlen) == SD_SUCCESS) {
3641 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3642 			    "sd_process_sdconf_table: disk %s\n", id);
3643 			sd_set_vers1_properties(un,
3644 			    sd_disk_table[table_index].flags,
3645 			    sd_disk_table[table_index].properties);
3646 			break;
3647 		}
3648 	}
3649 }
3650 
3651 
3652 /*
3653  *    Function: sd_sdconf_id_match
3654  *
3655  * Description: This local function implements a case sensitive vid/pid
3656  *		comparison as well as the boundary cases of wild card and
3657  *		multiple blanks.
3658  *
3659  *		Note: An implicit assumption made here is that the scsi
3660  *		inquiry structure will always keep the vid, pid and
3661  *		revision strings in consecutive sequence, so they can be
3662  *		read as a single string. If this assumption is not the
3663  *		case, a separate string, to be used for the check, needs
3664  *		to be built with these strings concatenated.
3665  *
3666  *   Arguments: un - driver soft state (unit) structure
3667  *		id - table or config file vid/pid
3668  *		idlen  - length of the vid/pid (bytes)
3669  *
3670  * Return Code: SD_SUCCESS - Indicates a match with the inquiry vid/pid
3671  *		SD_FAILURE - Indicates no match with the inquiry vid/pid
3672  */
3673 
3674 static int
3675 sd_sdconf_id_match(struct sd_lun *un, char *id, int idlen)
3676 {
3677 	struct scsi_inquiry	*sd_inq;
3678 	int 			rval = SD_SUCCESS;
3679 
3680 	ASSERT(un != NULL);
3681 	sd_inq = un->un_sd->sd_inq;
3682 	ASSERT(id != NULL);
3683 
3684 	/*
3685 	 * We use the inq_vid as a pointer to a buffer containing the
3686 	 * vid and pid and use the entire vid/pid length of the table
3687 	 * entry for the comparison. This works because the inq_pid
3688 	 * data member follows inq_vid in the scsi_inquiry structure.
3689 	 */
3690 	if (strncasecmp(sd_inq->inq_vid, id, idlen) != 0) {
3691 		/*
3692 		 * The user id string is compared to the inquiry vid/pid
3693 		 * using a case insensitive comparison and ignoring
3694 		 * multiple spaces.
3695 		 */
3696 		rval = sd_blank_cmp(un, id, idlen);
3697 		if (rval != SD_SUCCESS) {
3698 			/*
3699 			 * User id strings that start and end with a "*"
3700 			 * are a special case. These do not have a
3701 			 * specific vendor, and the product string can
3702 			 * appear anywhere in the 16 byte PID portion of
3703 			 * the inquiry data. This is a simple strstr()
3704 			 * type search for the user id in the inquiry data.
3705 			 */
3706 			if ((id[0] == '*') && (id[idlen - 1] == '*')) {
3707 				char	*pidptr = &id[1];
3708 				int	i;
3709 				int	j;
3710 				int	pidstrlen = idlen - 2;
3711 				j = sizeof (SD_INQUIRY(un)->inq_pid) -
3712 				    pidstrlen;
3713 
3714 				if (j < 0) {
3715 					return (SD_FAILURE);
3716 				}
3717 				for (i = 0; i < j; i++) {
3718 					if (bcmp(&SD_INQUIRY(un)->inq_pid[i],
3719 					    pidptr, pidstrlen) == 0) {
3720 						rval = SD_SUCCESS;
3721 						break;
3722 					}
3723 				}
3724 			}
3725 		}
3726 	}
3727 	return (rval);
3728 }
3729 
3730 
3731 /*
3732  *    Function: sd_blank_cmp
3733  *
3734  * Description: If the id string starts and ends with a space, treat
3735  *		multiple consecutive spaces as equivalent to a single
3736  *		space. For example, this causes a sd_disk_table entry
3737  *		of " NEC CDROM " to match a device's id string of
3738  *		"NEC       CDROM".
3739  *
3740  *		Note: The success exit condition for this routine is if
3741  *		the pointer to the table entry is '\0' and the cnt of
3742  *		the inquiry length is zero. This will happen if the inquiry
3743  *		string returned by the device is padded with spaces to be
3744  *		exactly 24 bytes in length (8 byte vid + 16 byte pid). The
3745  *		SCSI spec states that the inquiry string is to be padded with
3746  *		spaces.
3747  *
3748  *   Arguments: un - driver soft state (unit) structure
3749  *		id - table or config file vid/pid
3750  *		idlen  - length of the vid/pid (bytes)
3751  *
3752  * Return Code: SD_SUCCESS - Indicates a match with the inquiry vid/pid
3753  *		SD_FAILURE - Indicates no match with the inquiry vid/pid
3754  */
3755 
3756 static int
3757 sd_blank_cmp(struct sd_lun *un, char *id, int idlen)
3758 {
3759 	char		*p1;
3760 	char		*p2;
3761 	int		cnt;
3762 	cnt = sizeof (SD_INQUIRY(un)->inq_vid) +
3763 	    sizeof (SD_INQUIRY(un)->inq_pid);
3764 
3765 	ASSERT(un != NULL);
3766 	p2 = un->un_sd->sd_inq->inq_vid;
3767 	ASSERT(id != NULL);
3768 	p1 = id;
3769 
3770 	if ((id[0] == ' ') && (id[idlen - 1] == ' ')) {
3771 		/*
3772 		 * Note: string p1 is terminated by a NUL but string p2
3773 		 * isn't.  The end of p2 is determined by cnt.
3774 		 */
3775 		for (;;) {
3776 			/* skip over any extra blanks in both strings */
3777 			while ((*p1 != '\0') && (*p1 == ' ')) {
3778 				p1++;
3779 			}
3780 			while ((cnt != 0) && (*p2 == ' ')) {
3781 				p2++;
3782 				cnt--;
3783 			}
3784 
3785 			/* compare the two strings */
3786 			if ((cnt == 0) ||
3787 			    (SD_TOUPPER(*p1) != SD_TOUPPER(*p2))) {
3788 				break;
3789 			}
3790 			while ((cnt > 0) &&
3791 			    (SD_TOUPPER(*p1) == SD_TOUPPER(*p2))) {
3792 				p1++;
3793 				p2++;
3794 				cnt--;
3795 			}
3796 		}
3797 	}
3798 
3799 	/* return SD_SUCCESS if both strings match */
3800 	return (((*p1 == '\0') && (cnt == 0)) ? SD_SUCCESS : SD_FAILURE);
3801 }
3802 
3803 
3804 /*
3805  *    Function: sd_chk_vers1_data
3806  *
3807  * Description: Verify the version 1 device properties provided by the
3808  *		user via the configuration file
3809  *
3810  *   Arguments: un	     - driver soft state (unit) structure
3811  *		flags	     - integer mask indicating properties to be set
3812  *		prop_list    - integer list of property values
3813  *		list_len     - length of user provided data
3814  *
3815  * Return Code: SD_SUCCESS - Indicates the user provided data is valid
3816  *		SD_FAILURE - Indicates the user provided data is invalid
3817  */
3818 
3819 static int
3820 sd_chk_vers1_data(struct sd_lun *un, int flags, int *prop_list,
3821     int list_len, char *dataname_ptr)
3822 {
3823 	int i;
3824 	int mask = 1;
3825 	int index = 0;
3826 
3827 	ASSERT(un != NULL);
3828 
3829 	/* Check for a NULL property name and list */
3830 	if (dataname_ptr == NULL) {
3831 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3832 		    "sd_chk_vers1_data: NULL data property name.");
3833 		return (SD_FAILURE);
3834 	}
3835 	if (prop_list == NULL) {
3836 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3837 		    "sd_chk_vers1_data: %s NULL data property list.",
3838 		    dataname_ptr);
3839 		return (SD_FAILURE);
3840 	}
3841 
3842 	/* Display a warning if undefined bits are set in the flags */
3843 	if (flags & ~SD_CONF_BIT_MASK) {
3844 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3845 		    "sd_chk_vers1_data: invalid bits 0x%x in data list %s. "
3846 		    "Properties not set.",
3847 		    (flags & ~SD_CONF_BIT_MASK), dataname_ptr);
3848 		return (SD_FAILURE);
3849 	}
3850 
3851 	/*
3852 	 * Verify the length of the list by identifying the highest bit set
3853 	 * in the flags and validating that the property list has a length
3854 	 * up to the index of this bit.
3855 	 */
3856 	for (i = 0; i < SD_CONF_MAX_ITEMS; i++) {
3857 		if (flags & mask) {
3858 			index++;
3859 		}
3860 		mask = 1 << i;
3861 	}
3862 	if ((list_len / sizeof (int)) < (index + 2)) {
3863 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3864 		    "sd_chk_vers1_data: "
3865 		    "Data property list %s size is incorrect. "
3866 		    "Properties not set.", dataname_ptr);
3867 		scsi_log(SD_DEVINFO(un), sd_label, CE_CONT, "Size expected: "
3868 		    "version + 1 flagword + %d properties", SD_CONF_MAX_ITEMS);
3869 		return (SD_FAILURE);
3870 	}
3871 	return (SD_SUCCESS);
3872 }
3873 
3874 
3875 /*
3876  *    Function: sd_set_vers1_properties
3877  *
3878  * Description: Set version 1 device properties based on a property list
3879  *		retrieved from the driver configuration file or static
3880  *		configuration table. Version 1 properties have the format:
3881  *
3882  * 	<data-property-name>:=<version>,<flags>,<prop0>,<prop1>,.....<propN>
3883  *
3884  *		where the prop0 value will be used to set prop0 if bit0
3885  *		is set in the flags
3886  *
3887  *   Arguments: un	     - driver soft state (unit) structure
3888  *		flags	     - integer mask indicating properties to be set
3889  *		prop_list    - integer list of property values
3890  */
3891 
3892 static void
3893 sd_set_vers1_properties(struct sd_lun *un, int flags, sd_tunables *prop_list)
3894 {
3895 	ASSERT(un != NULL);
3896 
3897 	/*
3898 	 * Set the flag to indicate cache is to be disabled. An attempt
3899 	 * to disable the cache via sd_cache_control() will be made
3900 	 * later during attach once the basic initialization is complete.
3901 	 */
3902 	if (flags & SD_CONF_BSET_NOCACHE) {
3903 		un->un_f_opt_disable_cache = TRUE;
3904 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3905 		    "sd_set_vers1_properties: caching disabled flag set\n");
3906 	}
3907 
3908 	/* CD-specific configuration parameters */
3909 	if (flags & SD_CONF_BSET_PLAYMSF_BCD) {
3910 		un->un_f_cfg_playmsf_bcd = TRUE;
3911 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3912 		    "sd_set_vers1_properties: playmsf_bcd set\n");
3913 	}
3914 	if (flags & SD_CONF_BSET_READSUB_BCD) {
3915 		un->un_f_cfg_readsub_bcd = TRUE;
3916 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3917 		    "sd_set_vers1_properties: readsub_bcd set\n");
3918 	}
3919 	if (flags & SD_CONF_BSET_READ_TOC_TRK_BCD) {
3920 		un->un_f_cfg_read_toc_trk_bcd = TRUE;
3921 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3922 		    "sd_set_vers1_properties: read_toc_trk_bcd set\n");
3923 	}
3924 	if (flags & SD_CONF_BSET_READ_TOC_ADDR_BCD) {
3925 		un->un_f_cfg_read_toc_addr_bcd = TRUE;
3926 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3927 		    "sd_set_vers1_properties: read_toc_addr_bcd set\n");
3928 	}
3929 	if (flags & SD_CONF_BSET_NO_READ_HEADER) {
3930 		un->un_f_cfg_no_read_header = TRUE;
3931 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3932 			    "sd_set_vers1_properties: no_read_header set\n");
3933 	}
3934 	if (flags & SD_CONF_BSET_READ_CD_XD4) {
3935 		un->un_f_cfg_read_cd_xd4 = TRUE;
3936 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3937 		    "sd_set_vers1_properties: read_cd_xd4 set\n");
3938 	}
3939 
3940 	/* Support for devices which do not have valid/unique serial numbers */
3941 	if (flags & SD_CONF_BSET_FAB_DEVID) {
3942 		un->un_f_opt_fab_devid = TRUE;
3943 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3944 		    "sd_set_vers1_properties: fab_devid bit set\n");
3945 	}
3946 
3947 	/* Support for user throttle configuration */
3948 	if (flags & SD_CONF_BSET_THROTTLE) {
3949 		ASSERT(prop_list != NULL);
3950 		un->un_saved_throttle = un->un_throttle =
3951 		    prop_list->sdt_throttle;
3952 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3953 		    "sd_set_vers1_properties: throttle set to %d\n",
3954 		    prop_list->sdt_throttle);
3955 	}
3956 
3957 	/* Set the per disk retry count according to the conf file or table. */
3958 	if (flags & SD_CONF_BSET_NRR_COUNT) {
3959 		ASSERT(prop_list != NULL);
3960 		if (prop_list->sdt_not_rdy_retries) {
3961 			un->un_notready_retry_count =
3962 				prop_list->sdt_not_rdy_retries;
3963 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3964 			    "sd_set_vers1_properties: not ready retry count"
3965 			    " set to %d\n", un->un_notready_retry_count);
3966 		}
3967 	}
3968 
3969 	/* The controller type is reported for generic disk driver ioctls */
3970 	if (flags & SD_CONF_BSET_CTYPE) {
3971 		ASSERT(prop_list != NULL);
3972 		switch (prop_list->sdt_ctype) {
3973 		case CTYPE_CDROM:
3974 			un->un_ctype = prop_list->sdt_ctype;
3975 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3976 			    "sd_set_vers1_properties: ctype set to "
3977 			    "CTYPE_CDROM\n");
3978 			break;
3979 		case CTYPE_CCS:
3980 			un->un_ctype = prop_list->sdt_ctype;
3981 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3982 				"sd_set_vers1_properties: ctype set to "
3983 				"CTYPE_CCS\n");
3984 			break;
3985 		case CTYPE_ROD:		/* RW optical */
3986 			un->un_ctype = prop_list->sdt_ctype;
3987 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3988 			    "sd_set_vers1_properties: ctype set to "
3989 			    "CTYPE_ROD\n");
3990 			break;
3991 		default:
3992 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3993 			    "sd_set_vers1_properties: Could not set "
3994 			    "invalid ctype value (%d)",
3995 			    prop_list->sdt_ctype);
3996 		}
3997 	}
3998 
3999 	/* Purple failover timeout */
4000 	if (flags & SD_CONF_BSET_BSY_RETRY_COUNT) {
4001 		ASSERT(prop_list != NULL);
4002 		un->un_busy_retry_count =
4003 			prop_list->sdt_busy_retries;
4004 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4005 		    "sd_set_vers1_properties: "
4006 		    "busy retry count set to %d\n",
4007 		    un->un_busy_retry_count);
4008 	}
4009 
4010 	/* Purple reset retry count */
4011 	if (flags & SD_CONF_BSET_RST_RETRIES) {
4012 		ASSERT(prop_list != NULL);
4013 		un->un_reset_retry_count =
4014 			prop_list->sdt_reset_retries;
4015 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4016 		    "sd_set_vers1_properties: "
4017 		    "reset retry count set to %d\n",
4018 		    un->un_reset_retry_count);
4019 	}
4020 
4021 	/* Purple reservation release timeout */
4022 	if (flags & SD_CONF_BSET_RSV_REL_TIME) {
4023 		ASSERT(prop_list != NULL);
4024 		un->un_reserve_release_time =
4025 			prop_list->sdt_reserv_rel_time;
4026 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4027 		    "sd_set_vers1_properties: "
4028 		    "reservation release timeout set to %d\n",
4029 		    un->un_reserve_release_time);
4030 	}
4031 
4032 	/*
4033 	 * Driver flag telling the driver to verify that no commands are pending
4034 	 * for a device before issuing a Test Unit Ready. This is a workaround
4035 	 * for a firmware bug in some Seagate eliteI drives.
4036 	 */
4037 	if (flags & SD_CONF_BSET_TUR_CHECK) {
4038 		un->un_f_cfg_tur_check = TRUE;
4039 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4040 		    "sd_set_vers1_properties: tur queue check set\n");
4041 	}
4042 
4043 	if (flags & SD_CONF_BSET_MIN_THROTTLE) {
4044 		un->un_min_throttle = prop_list->sdt_min_throttle;
4045 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4046 		    "sd_set_vers1_properties: min throttle set to %d\n",
4047 		    un->un_min_throttle);
4048 	}
4049 
4050 	if (flags & SD_CONF_BSET_DISKSORT_DISABLED) {
4051 		un->un_f_disksort_disabled =
4052 		    (prop_list->sdt_disk_sort_dis != 0) ?
4053 		    TRUE : FALSE;
4054 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4055 		    "sd_set_vers1_properties: disksort disabled "
4056 		    "flag set to %d\n",
4057 		    prop_list->sdt_disk_sort_dis);
4058 	}
4059 
4060 	if (flags & SD_CONF_BSET_LUN_RESET_ENABLED) {
4061 		un->un_f_lun_reset_enabled =
4062 		    (prop_list->sdt_lun_reset_enable != 0) ?
4063 		    TRUE : FALSE;
4064 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4065 		    "sd_set_vers1_properties: lun reset enabled "
4066 		    "flag set to %d\n",
4067 		    prop_list->sdt_lun_reset_enable);
4068 	}
4069 
4070 	/*
4071 	 * Validate the throttle values.
4072 	 * If any of the numbers are invalid, set everything to defaults.
4073 	 */
4074 	if ((un->un_throttle < SD_LOWEST_VALID_THROTTLE) ||
4075 	    (un->un_min_throttle < SD_LOWEST_VALID_THROTTLE) ||
4076 	    (un->un_min_throttle > un->un_throttle)) {
4077 		un->un_saved_throttle = un->un_throttle = sd_max_throttle;
4078 		un->un_min_throttle = sd_min_throttle;
4079 	}
4080 }
4081 
4082 /*
4083  *   Function: sd_is_lsi()
4084  *
4085  *   Description: Check for lsi devices, step throught the static device
4086  *	table to match vid/pid.
4087  *
4088  *   Args: un - ptr to sd_lun
4089  *
4090  *   Notes:  When creating new LSI property, need to add the new LSI property
4091  *		to this function.
4092  */
4093 static void
4094 sd_is_lsi(struct sd_lun *un)
4095 {
4096 	char	*id = NULL;
4097 	int	table_index;
4098 	int	idlen;
4099 	void	*prop;
4100 
4101 	ASSERT(un != NULL);
4102 	for (table_index = 0; table_index < sd_disk_table_size;
4103 	    table_index++) {
4104 		id = sd_disk_table[table_index].device_id;
4105 		idlen = strlen(id);
4106 		if (idlen == 0) {
4107 			continue;
4108 		}
4109 
4110 		if (sd_sdconf_id_match(un, id, idlen) == SD_SUCCESS) {
4111 			prop = sd_disk_table[table_index].properties;
4112 			if (prop == &lsi_properties ||
4113 			    prop == &lsi_oem_properties ||
4114 			    prop == &lsi_properties_scsi ||
4115 			    prop == &symbios_properties) {
4116 				un->un_f_cfg_is_lsi = TRUE;
4117 			}
4118 			break;
4119 		}
4120 	}
4121 }
4122 
4123 
4124 /*
4125  * The following routines support reading and interpretation of disk labels,
4126  * including Solaris BE (8-slice) vtoc's, Solaris LE (16-slice) vtoc's, and
4127  * fdisk tables.
4128  */
4129 
4130 /*
4131  *    Function: sd_validate_geometry
4132  *
4133  * Description: Read the label from the disk (if present). Update the unit's
4134  *		geometry and vtoc information from the data in the label.
4135  *		Verify that the label is valid.
4136  *
4137  *   Arguments: un - driver soft state (unit) structure
4138  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
4139  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
4140  *			to use the USCSI "direct" chain and bypass the normal
4141  *			command waitq.
4142  *
4143  * Return Code: 0 - Successful completion
4144  *		EINVAL  - Invalid value in un->un_tgt_blocksize or
4145  *			  un->un_blockcount; or label on disk is corrupted
4146  *			  or unreadable.
4147  *		EACCES  - Reservation conflict at the device.
4148  *		ENOMEM  - Resource allocation error
4149  *		ENOTSUP - geometry not applicable
4150  *
4151  *     Context: Kernel thread only (can sleep).
4152  */
4153 
4154 static int
4155 sd_validate_geometry(struct sd_lun *un, int path_flag)
4156 {
4157 	static	char		labelstring[128];
4158 	static	char		buf[256];
4159 	char	*label		= NULL;
4160 	int	label_error = 0;
4161 	int	gvalid		= un->un_f_geometry_is_valid;
4162 	int	lbasize;
4163 	uint_t	capacity;
4164 	int	count;
4165 
4166 	ASSERT(un != NULL);
4167 	ASSERT(mutex_owned(SD_MUTEX(un)));
4168 
4169 	/*
4170 	 * If the required values are not valid, then try getting them
4171 	 * once via read capacity. If that fails, then fail this call.
4172 	 * This is necessary with the new mpxio failover behavior in
4173 	 * the T300 where we can get an attach for the inactive path
4174 	 * before the active path. The inactive path fails commands with
4175 	 * sense data of 02,04,88 which happens to the read capacity
4176 	 * before mpxio has had sufficient knowledge to know if it should
4177 	 * force a fail over or not. (Which it won't do at attach anyhow).
4178 	 * If the read capacity at attach time fails, un_tgt_blocksize and
4179 	 * un_blockcount won't be valid.
4180 	 */
4181 	if ((un->un_f_tgt_blocksize_is_valid != TRUE) ||
4182 	    (un->un_f_blockcount_is_valid != TRUE)) {
4183 		uint64_t	cap;
4184 		uint32_t	lbasz;
4185 		int		rval;
4186 
4187 		mutex_exit(SD_MUTEX(un));
4188 		rval = sd_send_scsi_READ_CAPACITY(un, &cap,
4189 		    &lbasz, SD_PATH_DIRECT);
4190 		mutex_enter(SD_MUTEX(un));
4191 		if (rval == 0) {
4192 			/*
4193 			 * The following relies on
4194 			 * sd_send_scsi_READ_CAPACITY never
4195 			 * returning 0 for capacity and/or lbasize.
4196 			 */
4197 			sd_update_block_info(un, lbasz, cap);
4198 		}
4199 
4200 		if ((un->un_f_tgt_blocksize_is_valid != TRUE) ||
4201 		    (un->un_f_blockcount_is_valid != TRUE)) {
4202 			return (EINVAL);
4203 		}
4204 	}
4205 
4206 	/*
4207 	 * Copy the lbasize and capacity so that if they're reset while we're
4208 	 * not holding the SD_MUTEX, we will continue to use valid values
4209 	 * after the SD_MUTEX is reacquired. (4119659)
4210 	 */
4211 	lbasize  = un->un_tgt_blocksize;
4212 	capacity = un->un_blockcount;
4213 
4214 #if defined(_SUNOS_VTOC_16)
4215 	/*
4216 	 * Set up the "whole disk" fdisk partition; this should always
4217 	 * exist, regardless of whether the disk contains an fdisk table
4218 	 * or vtoc.
4219 	 */
4220 	un->un_map[P0_RAW_DISK].dkl_cylno = 0;
4221 	un->un_map[P0_RAW_DISK].dkl_nblk  = capacity;
4222 #endif
4223 
4224 	/*
4225 	 * Refresh the logical and physical geometry caches.
4226 	 * (data from MODE SENSE format/rigid disk geometry pages,
4227 	 * and scsi_ifgetcap("geometry").
4228 	 */
4229 	sd_resync_geom_caches(un, capacity, lbasize, path_flag);
4230 
4231 	label_error = sd_use_efi(un, path_flag);
4232 	if (label_error == 0) {
4233 		/* found a valid EFI label */
4234 		SD_TRACE(SD_LOG_IO_PARTITION, un,
4235 			"sd_validate_geometry: found EFI label\n");
4236 		un->un_solaris_offset = 0;
4237 		un->un_solaris_size = capacity;
4238 		return (ENOTSUP);
4239 	}
4240 	if (un->un_blockcount > DK_MAX_BLOCKS) {
4241 		if (label_error == ESRCH) {
4242 			/*
4243 			 * they've configured a LUN over 1TB, but used
4244 			 * format.dat to restrict format's view of the
4245 			 * capacity to be under 1TB
4246 			 */
4247 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
4248 "is >1TB and has a VTOC label: use format(1M) to either decrease the");
4249 			scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
4250 "size to be < 1TB or relabel the disk with an EFI label");
4251 		} else {
4252 			/* unlabeled disk over 1TB */
4253 #if defined(__i386) || defined(__amd64)
4254 			/*
4255 			 * Refer to comments on off-by-1 at the head of the file
4256 			 * A 1TB disk was treated as (1T - 512)B in the past,
4257 			 * thus, it might have valid solaris partition. We
4258 			 * will return ENOTSUP later only if this disk has no
4259 			 * valid solaris partition.
4260 			 */
4261 			if ((un->un_tgt_blocksize != un->un_sys_blocksize) ||
4262 			    (un->un_blockcount - 1 > DK_MAX_BLOCKS) ||
4263 			    un->un_f_has_removable_media ||
4264 			    un->un_f_is_hotpluggable)
4265 #endif
4266 				return (ENOTSUP);
4267 		}
4268 	}
4269 	label_error = 0;
4270 
4271 	/*
4272 	 * at this point it is either labeled with a VTOC or it is
4273 	 * under 1TB (<= 1TB actually for off-by-1)
4274 	 */
4275 	if (un->un_f_vtoc_label_supported) {
4276 		struct	dk_label *dkl;
4277 		offset_t dkl1;
4278 		offset_t label_addr, real_addr;
4279 		int	rval;
4280 		size_t	buffer_size;
4281 
4282 		/*
4283 		 * Note: This will set up un->un_solaris_size and
4284 		 * un->un_solaris_offset.
4285 		 */
4286 		switch (sd_read_fdisk(un, capacity, lbasize, path_flag)) {
4287 		case SD_CMD_RESERVATION_CONFLICT:
4288 			ASSERT(mutex_owned(SD_MUTEX(un)));
4289 			return (EACCES);
4290 		case SD_CMD_FAILURE:
4291 			ASSERT(mutex_owned(SD_MUTEX(un)));
4292 			return (ENOMEM);
4293 		}
4294 
4295 		if (un->un_solaris_size <= DK_LABEL_LOC) {
4296 
4297 #if defined(__i386) || defined(__amd64)
4298 			/*
4299 			 * Refer to comments on off-by-1 at the head of the file
4300 			 * This is for 1TB disk only. Since that there is no
4301 			 * solaris partitions, return ENOTSUP as we do for
4302 			 * >1TB disk.
4303 			 */
4304 			if (un->un_blockcount > DK_MAX_BLOCKS)
4305 				return (ENOTSUP);
4306 #endif
4307 			/*
4308 			 * Found fdisk table but no Solaris partition entry,
4309 			 * so don't call sd_uselabel() and don't create
4310 			 * a default label.
4311 			 */
4312 			label_error = 0;
4313 			un->un_f_geometry_is_valid = TRUE;
4314 			goto no_solaris_partition;
4315 		}
4316 		label_addr = (daddr_t)(un->un_solaris_offset + DK_LABEL_LOC);
4317 
4318 #if defined(__i386) || defined(__amd64)
4319 		/*
4320 		 * Refer to comments on off-by-1 at the head of the file
4321 		 * Now, this 1TB disk has valid solaris partition. It
4322 		 * must be created by previous sd driver, we have to
4323 		 * treat it as (1T-512)B.
4324 		 */
4325 		if (un->un_blockcount > DK_MAX_BLOCKS) {
4326 			un->un_f_capacity_adjusted = 1;
4327 			un->un_blockcount = DK_MAX_BLOCKS;
4328 			un->un_map[P0_RAW_DISK].dkl_nblk  = DK_MAX_BLOCKS;
4329 
4330 			/*
4331 			 * Refer to sd_read_fdisk, when there is no
4332 			 * fdisk partition table, un_solaris_size is
4333 			 * set to disk's capacity. In this case, we
4334 			 * need to adjust it
4335 			 */
4336 			if (un->un_solaris_size > DK_MAX_BLOCKS)
4337 				un->un_solaris_size = DK_MAX_BLOCKS;
4338 			sd_resync_geom_caches(un, DK_MAX_BLOCKS,
4339 			    lbasize, path_flag);
4340 		}
4341 #endif
4342 
4343 		/*
4344 		 * sys_blocksize != tgt_blocksize, need to re-adjust
4345 		 * blkno and save the index to beginning of dk_label
4346 		 */
4347 		real_addr = SD_SYS2TGTBLOCK(un, label_addr);
4348 		buffer_size = SD_REQBYTES2TGTBYTES(un,
4349 		    sizeof (struct dk_label));
4350 
4351 		SD_TRACE(SD_LOG_IO_PARTITION, un, "sd_validate_geometry: "
4352 		    "label_addr: 0x%x allocation size: 0x%x\n",
4353 		    label_addr, buffer_size);
4354 		dkl = kmem_zalloc(buffer_size, KM_NOSLEEP);
4355 		if (dkl == NULL) {
4356 			return (ENOMEM);
4357 		}
4358 
4359 		mutex_exit(SD_MUTEX(un));
4360 		rval = sd_send_scsi_READ(un, dkl, buffer_size, real_addr,
4361 		    path_flag);
4362 		mutex_enter(SD_MUTEX(un));
4363 
4364 		switch (rval) {
4365 		case 0:
4366 			/*
4367 			 * sd_uselabel will establish that the geometry
4368 			 * is valid.
4369 			 * For sys_blocksize != tgt_blocksize, need
4370 			 * to index into the beginning of dk_label
4371 			 */
4372 			dkl1 = (daddr_t)dkl
4373 				+ SD_TGTBYTEOFFSET(un, label_addr, real_addr);
4374 			if (sd_uselabel(un, (struct dk_label *)(uintptr_t)dkl1,
4375 			    path_flag) != SD_LABEL_IS_VALID) {
4376 				label_error = EINVAL;
4377 			}
4378 			break;
4379 		case EACCES:
4380 			label_error = EACCES;
4381 			break;
4382 		default:
4383 			label_error = EINVAL;
4384 			break;
4385 		}
4386 
4387 		kmem_free(dkl, buffer_size);
4388 
4389 #if defined(_SUNOS_VTOC_8)
4390 		label = (char *)un->un_asciilabel;
4391 #elif defined(_SUNOS_VTOC_16)
4392 		label = (char *)un->un_vtoc.v_asciilabel;
4393 #else
4394 #error "No VTOC format defined."
4395 #endif
4396 	}
4397 
4398 	/*
4399 	 * If a valid label was not found, AND if no reservation conflict
4400 	 * was detected, then go ahead and create a default label (4069506).
4401 	 */
4402 	if (un->un_f_default_vtoc_supported && (label_error != EACCES)) {
4403 		if (un->un_f_geometry_is_valid == FALSE) {
4404 			sd_build_default_label(un);
4405 		}
4406 		label_error = 0;
4407 	}
4408 
4409 no_solaris_partition:
4410 	if ((!un->un_f_has_removable_media ||
4411 	    (un->un_f_has_removable_media &&
4412 		un->un_mediastate == DKIO_EJECTED)) &&
4413 		(un->un_state == SD_STATE_NORMAL && !gvalid)) {
4414 		/*
4415 		 * Print out a message indicating who and what we are.
4416 		 * We do this only when we happen to really validate the
4417 		 * geometry. We may call sd_validate_geometry() at other
4418 		 * times, e.g., ioctl()'s like Get VTOC in which case we
4419 		 * don't want to print the label.
4420 		 * If the geometry is valid, print the label string,
4421 		 * else print vendor and product info, if available
4422 		 */
4423 		if ((un->un_f_geometry_is_valid == TRUE) && (label != NULL)) {
4424 			SD_INFO(SD_LOG_ATTACH_DETACH, un, "?<%s>\n", label);
4425 		} else {
4426 			mutex_enter(&sd_label_mutex);
4427 			sd_inq_fill(SD_INQUIRY(un)->inq_vid, VIDMAX,
4428 			    labelstring);
4429 			sd_inq_fill(SD_INQUIRY(un)->inq_pid, PIDMAX,
4430 			    &labelstring[64]);
4431 			(void) sprintf(buf, "?Vendor '%s', product '%s'",
4432 			    labelstring, &labelstring[64]);
4433 			if (un->un_f_blockcount_is_valid == TRUE) {
4434 				(void) sprintf(&buf[strlen(buf)],
4435 				    ", %llu %u byte blocks\n",
4436 				    (longlong_t)un->un_blockcount,
4437 				    un->un_tgt_blocksize);
4438 			} else {
4439 				(void) sprintf(&buf[strlen(buf)],
4440 				    ", (unknown capacity)\n");
4441 			}
4442 			SD_INFO(SD_LOG_ATTACH_DETACH, un, buf);
4443 			mutex_exit(&sd_label_mutex);
4444 		}
4445 	}
4446 
4447 #if defined(_SUNOS_VTOC_16)
4448 	/*
4449 	 * If we have valid geometry, set up the remaining fdisk partitions.
4450 	 * Note that dkl_cylno is not used for the fdisk map entries, so
4451 	 * we set it to an entirely bogus value.
4452 	 */
4453 	for (count = 0; count < FD_NUMPART; count++) {
4454 		un->un_map[FDISK_P1 + count].dkl_cylno = -1;
4455 		un->un_map[FDISK_P1 + count].dkl_nblk =
4456 		    un->un_fmap[count].fmap_nblk;
4457 
4458 		un->un_offset[FDISK_P1 + count] =
4459 		    un->un_fmap[count].fmap_start;
4460 	}
4461 #endif
4462 
4463 	for (count = 0; count < NDKMAP; count++) {
4464 #if defined(_SUNOS_VTOC_8)
4465 		struct dk_map *lp  = &un->un_map[count];
4466 		un->un_offset[count] =
4467 		    un->un_g.dkg_nhead * un->un_g.dkg_nsect * lp->dkl_cylno;
4468 #elif defined(_SUNOS_VTOC_16)
4469 		struct dkl_partition *vp = &un->un_vtoc.v_part[count];
4470 
4471 		un->un_offset[count] = vp->p_start + un->un_solaris_offset;
4472 #else
4473 #error "No VTOC format defined."
4474 #endif
4475 	}
4476 
4477 	return (label_error);
4478 }
4479 
4480 
4481 #if defined(_SUNOS_VTOC_16)
4482 /*
4483  * Macro: MAX_BLKS
4484  *
4485  *	This macro is used for table entries where we need to have the largest
4486  *	possible sector value for that head & SPT (sectors per track)
4487  *	combination.  Other entries for some smaller disk sizes are set by
4488  *	convention to match those used by X86 BIOS usage.
4489  */
4490 #define	MAX_BLKS(heads, spt)	UINT16_MAX * heads * spt, heads, spt
4491 
4492 /*
4493  *    Function: sd_convert_geometry
4494  *
4495  * Description: Convert physical geometry into a dk_geom structure. In
4496  *		other words, make sure we don't wrap 16-bit values.
4497  *		e.g. converting from geom_cache to dk_geom
4498  *
4499  *     Context: Kernel thread only
4500  */
4501 static void
4502 sd_convert_geometry(uint64_t capacity, struct dk_geom *un_g)
4503 {
4504 	int i;
4505 	static const struct chs_values {
4506 		uint_t max_cap;		/* Max Capacity for this HS. */
4507 		uint_t nhead;		/* Heads to use. */
4508 		uint_t nsect;		/* SPT to use. */
4509 	} CHS_values[] = {
4510 		{0x00200000,  64, 32},		/* 1GB or smaller disk. */
4511 		{0x01000000, 128, 32},		/* 8GB or smaller disk. */
4512 		{MAX_BLKS(255,  63)},		/* 502.02GB or smaller disk. */
4513 		{MAX_BLKS(255, 126)},		/* .98TB or smaller disk. */
4514 		{DK_MAX_BLOCKS, 255, 189}	/* Max size is just under 1TB */
4515 	};
4516 
4517 	/* Unlabeled SCSI floppy device */
4518 	if (capacity <= 0x1000) {
4519 		un_g->dkg_nhead = 2;
4520 		un_g->dkg_ncyl = 80;
4521 		un_g->dkg_nsect = capacity / (un_g->dkg_nhead * un_g->dkg_ncyl);
4522 		return;
4523 	}
4524 
4525 	/*
4526 	 * For all devices we calculate cylinders using the
4527 	 * heads and sectors we assign based on capacity of the
4528 	 * device.  The table is designed to be compatible with the
4529 	 * way other operating systems lay out fdisk tables for X86
4530 	 * and to insure that the cylinders never exceed 65535 to
4531 	 * prevent problems with X86 ioctls that report geometry.
4532 	 * We use SPT that are multiples of 63, since other OSes that
4533 	 * are not limited to 16-bits for cylinders stop at 63 SPT
4534 	 * we make do by using multiples of 63 SPT.
4535 	 *
4536 	 * Note than capacities greater than or equal to 1TB will simply
4537 	 * get the largest geometry from the table. This should be okay
4538 	 * since disks this large shouldn't be using CHS values anyway.
4539 	 */
4540 	for (i = 0; CHS_values[i].max_cap < capacity &&
4541 	    CHS_values[i].max_cap != DK_MAX_BLOCKS; i++)
4542 		;
4543 
4544 	un_g->dkg_nhead = CHS_values[i].nhead;
4545 	un_g->dkg_nsect = CHS_values[i].nsect;
4546 }
4547 #endif
4548 
4549 
4550 /*
4551  *    Function: sd_resync_geom_caches
4552  *
4553  * Description: (Re)initialize both geometry caches: the virtual geometry
4554  *		information is extracted from the HBA (the "geometry"
4555  *		capability), and the physical geometry cache data is
4556  *		generated by issuing MODE SENSE commands.
4557  *
4558  *   Arguments: un - driver soft state (unit) structure
4559  *		capacity - disk capacity in #blocks
4560  *		lbasize - disk block size in bytes
4561  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
4562  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
4563  *			to use the USCSI "direct" chain and bypass the normal
4564  *			command waitq.
4565  *
4566  *     Context: Kernel thread only (can sleep).
4567  */
4568 
4569 static void
4570 sd_resync_geom_caches(struct sd_lun *un, int capacity, int lbasize,
4571 	int path_flag)
4572 {
4573 	struct 	geom_cache 	pgeom;
4574 	struct 	geom_cache	*pgeom_p = &pgeom;
4575 	int 	spc;
4576 	unsigned short nhead;
4577 	unsigned short nsect;
4578 
4579 	ASSERT(un != NULL);
4580 	ASSERT(mutex_owned(SD_MUTEX(un)));
4581 
4582 	/*
4583 	 * Ask the controller for its logical geometry.
4584 	 * Note: if the HBA does not support scsi_ifgetcap("geometry"),
4585 	 * then the lgeom cache will be invalid.
4586 	 */
4587 	sd_get_virtual_geometry(un, capacity, lbasize);
4588 
4589 	/*
4590 	 * Initialize the pgeom cache from lgeom, so that if MODE SENSE
4591 	 * doesn't work, DKIOCG_PHYSGEOM can return reasonable values.
4592 	 */
4593 	if (un->un_lgeom.g_nsect == 0 || un->un_lgeom.g_nhead == 0) {
4594 		/*
4595 		 * Note: Perhaps this needs to be more adaptive? The rationale
4596 		 * is that, if there's no HBA geometry from the HBA driver, any
4597 		 * guess is good, since this is the physical geometry. If MODE
4598 		 * SENSE fails this gives a max cylinder size for non-LBA access
4599 		 */
4600 		nhead = 255;
4601 		nsect = 63;
4602 	} else {
4603 		nhead = un->un_lgeom.g_nhead;
4604 		nsect = un->un_lgeom.g_nsect;
4605 	}
4606 
4607 	if (ISCD(un)) {
4608 		pgeom_p->g_nhead = 1;
4609 		pgeom_p->g_nsect = nsect * nhead;
4610 	} else {
4611 		pgeom_p->g_nhead = nhead;
4612 		pgeom_p->g_nsect = nsect;
4613 	}
4614 
4615 	spc = pgeom_p->g_nhead * pgeom_p->g_nsect;
4616 	pgeom_p->g_capacity = capacity;
4617 	pgeom_p->g_ncyl = pgeom_p->g_capacity / spc;
4618 	pgeom_p->g_acyl = 0;
4619 
4620 	/*
4621 	 * Retrieve fresh geometry data from the hardware, stash it
4622 	 * here temporarily before we rebuild the incore label.
4623 	 *
4624 	 * We want to use the MODE SENSE commands to derive the
4625 	 * physical geometry of the device, but if either command
4626 	 * fails, the logical geometry is used as the fallback for
4627 	 * disk label geometry.
4628 	 */
4629 	mutex_exit(SD_MUTEX(un));
4630 	sd_get_physical_geometry(un, pgeom_p, capacity, lbasize, path_flag);
4631 	mutex_enter(SD_MUTEX(un));
4632 
4633 	/*
4634 	 * Now update the real copy while holding the mutex. This
4635 	 * way the global copy is never in an inconsistent state.
4636 	 */
4637 	bcopy(pgeom_p, &un->un_pgeom,  sizeof (un->un_pgeom));
4638 
4639 	SD_INFO(SD_LOG_COMMON, un, "sd_resync_geom_caches: "
4640 	    "(cached from lgeom)\n");
4641 	SD_INFO(SD_LOG_COMMON, un,
4642 	    "   ncyl: %ld; acyl: %d; nhead: %d; nsect: %d\n",
4643 	    un->un_pgeom.g_ncyl, un->un_pgeom.g_acyl,
4644 	    un->un_pgeom.g_nhead, un->un_pgeom.g_nsect);
4645 	SD_INFO(SD_LOG_COMMON, un, "   lbasize: %d; capacity: %ld; "
4646 	    "intrlv: %d; rpm: %d\n", un->un_pgeom.g_secsize,
4647 	    un->un_pgeom.g_capacity, un->un_pgeom.g_intrlv,
4648 	    un->un_pgeom.g_rpm);
4649 }
4650 
4651 
4652 /*
4653  *    Function: sd_read_fdisk
4654  *
4655  * Description: utility routine to read the fdisk table.
4656  *
4657  *   Arguments: un - driver soft state (unit) structure
4658  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
4659  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
4660  *			to use the USCSI "direct" chain and bypass the normal
4661  *			command waitq.
4662  *
4663  * Return Code: SD_CMD_SUCCESS
4664  *		SD_CMD_FAILURE
4665  *
4666  *     Context: Kernel thread only (can sleep).
4667  */
4668 /* ARGSUSED */
4669 static int
4670 sd_read_fdisk(struct sd_lun *un, uint_t capacity, int lbasize, int path_flag)
4671 {
4672 #if defined(_NO_FDISK_PRESENT)
4673 
4674 	un->un_solaris_offset = 0;
4675 	un->un_solaris_size = capacity;
4676 	bzero(un->un_fmap, sizeof (struct fmap) * FD_NUMPART);
4677 	return (SD_CMD_SUCCESS);
4678 
4679 #elif defined(_FIRMWARE_NEEDS_FDISK)
4680 
4681 	struct ipart	*fdp;
4682 	struct mboot	*mbp;
4683 	struct ipart	fdisk[FD_NUMPART];
4684 	int		i;
4685 	char		sigbuf[2];
4686 	caddr_t		bufp;
4687 	int		uidx;
4688 	int		rval;
4689 	int		lba = 0;
4690 	uint_t		solaris_offset;	/* offset to solaris part. */
4691 	daddr_t		solaris_size;	/* size of solaris partition */
4692 	uint32_t	blocksize;
4693 
4694 	ASSERT(un != NULL);
4695 	ASSERT(mutex_owned(SD_MUTEX(un)));
4696 	ASSERT(un->un_f_tgt_blocksize_is_valid == TRUE);
4697 
4698 	blocksize = un->un_tgt_blocksize;
4699 
4700 	/*
4701 	 * Start off assuming no fdisk table
4702 	 */
4703 	solaris_offset = 0;
4704 	solaris_size   = capacity;
4705 
4706 	mutex_exit(SD_MUTEX(un));
4707 	bufp = kmem_zalloc(blocksize, KM_SLEEP);
4708 	rval = sd_send_scsi_READ(un, bufp, blocksize, 0, path_flag);
4709 	mutex_enter(SD_MUTEX(un));
4710 
4711 	if (rval != 0) {
4712 		SD_ERROR(SD_LOG_ATTACH_DETACH, un,
4713 		    "sd_read_fdisk: fdisk read err\n");
4714 		kmem_free(bufp, blocksize);
4715 		return (SD_CMD_FAILURE);
4716 	}
4717 
4718 	mbp = (struct mboot *)bufp;
4719 
4720 	/*
4721 	 * The fdisk table does not begin on a 4-byte boundary within the
4722 	 * master boot record, so we copy it to an aligned structure to avoid
4723 	 * alignment exceptions on some processors.
4724 	 */
4725 	bcopy(&mbp->parts[0], fdisk, sizeof (fdisk));
4726 
4727 	/*
4728 	 * Check for lba support before verifying sig; sig might not be
4729 	 * there, say on a blank disk, but the max_chs mark may still
4730 	 * be present.
4731 	 *
4732 	 * Note: LBA support and BEFs are an x86-only concept but this
4733 	 * code should work OK on SPARC as well.
4734 	 */
4735 
4736 	/*
4737 	 * First, check for lba-access-ok on root node (or prom root node)
4738 	 * if present there, don't need to search fdisk table.
4739 	 */
4740 	if (ddi_getprop(DDI_DEV_T_ANY, ddi_root_node(), 0,
4741 	    "lba-access-ok", 0) != 0) {
4742 		/* All drives do LBA; don't search fdisk table */
4743 		lba = 1;
4744 	} else {
4745 		/* Okay, look for mark in fdisk table */
4746 		for (fdp = fdisk, i = 0; i < FD_NUMPART; i++, fdp++) {
4747 			/* accumulate "lba" value from all partitions */
4748 			lba = (lba || sd_has_max_chs_vals(fdp));
4749 		}
4750 	}
4751 
4752 	if (lba != 0) {
4753 		dev_t dev = sd_make_device(SD_DEVINFO(un));
4754 
4755 		if (ddi_getprop(dev, SD_DEVINFO(un), DDI_PROP_DONTPASS,
4756 		    "lba-access-ok", 0) == 0) {
4757 			/* not found; create it */
4758 			if (ddi_prop_create(dev, SD_DEVINFO(un), 0,
4759 			    "lba-access-ok", (caddr_t)NULL, 0) !=
4760 			    DDI_PROP_SUCCESS) {
4761 				SD_ERROR(SD_LOG_ATTACH_DETACH, un,
4762 				    "sd_read_fdisk: Can't create lba property "
4763 				    "for instance %d\n",
4764 				    ddi_get_instance(SD_DEVINFO(un)));
4765 			}
4766 		}
4767 	}
4768 
4769 	bcopy(&mbp->signature, sigbuf, sizeof (sigbuf));
4770 
4771 	/*
4772 	 * Endian-independent signature check
4773 	 */
4774 	if (((sigbuf[1] & 0xFF) != ((MBB_MAGIC >> 8) & 0xFF)) ||
4775 	    (sigbuf[0] != (MBB_MAGIC & 0xFF))) {
4776 		SD_ERROR(SD_LOG_ATTACH_DETACH, un,
4777 		    "sd_read_fdisk: no fdisk\n");
4778 		bzero(un->un_fmap, sizeof (struct fmap) * FD_NUMPART);
4779 		rval = SD_CMD_SUCCESS;
4780 		goto done;
4781 	}
4782 
4783 #ifdef SDDEBUG
4784 	if (sd_level_mask & SD_LOGMASK_INFO) {
4785 		fdp = fdisk;
4786 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_read_fdisk:\n");
4787 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "         relsect    "
4788 		    "numsect         sysid       bootid\n");
4789 		for (i = 0; i < FD_NUMPART; i++, fdp++) {
4790 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4791 			    "    %d:  %8d   %8d     0x%08x     0x%08x\n",
4792 			    i, fdp->relsect, fdp->numsect,
4793 			    fdp->systid, fdp->bootid);
4794 		}
4795 	}
4796 #endif
4797 
4798 	/*
4799 	 * Try to find the unix partition
4800 	 */
4801 	uidx = -1;
4802 	solaris_offset = 0;
4803 	solaris_size   = 0;
4804 
4805 	for (fdp = fdisk, i = 0; i < FD_NUMPART; i++, fdp++) {
4806 		int	relsect;
4807 		int	numsect;
4808 
4809 		if (fdp->numsect == 0) {
4810 			un->un_fmap[i].fmap_start = 0;
4811 			un->un_fmap[i].fmap_nblk  = 0;
4812 			continue;
4813 		}
4814 
4815 		/*
4816 		 * Data in the fdisk table is little-endian.
4817 		 */
4818 		relsect = LE_32(fdp->relsect);
4819 		numsect = LE_32(fdp->numsect);
4820 
4821 		un->un_fmap[i].fmap_start = relsect;
4822 		un->un_fmap[i].fmap_nblk  = numsect;
4823 
4824 		if (fdp->systid != SUNIXOS &&
4825 		    fdp->systid != SUNIXOS2 &&
4826 		    fdp->systid != EFI_PMBR) {
4827 			continue;
4828 		}
4829 
4830 		/*
4831 		 * use the last active solaris partition id found
4832 		 * (there should only be 1 active partition id)
4833 		 *
4834 		 * if there are no active solaris partition id
4835 		 * then use the first inactive solaris partition id
4836 		 */
4837 		if ((uidx == -1) || (fdp->bootid == ACTIVE)) {
4838 			uidx = i;
4839 			solaris_offset = relsect;
4840 			solaris_size   = numsect;
4841 		}
4842 	}
4843 
4844 	SD_INFO(SD_LOG_ATTACH_DETACH, un, "fdisk 0x%x 0x%lx",
4845 	    un->un_solaris_offset, un->un_solaris_size);
4846 
4847 	rval = SD_CMD_SUCCESS;
4848 
4849 done:
4850 
4851 	/*
4852 	 * Clear the VTOC info, only if the Solaris partition entry
4853 	 * has moved, changed size, been deleted, or if the size of
4854 	 * the partition is too small to even fit the label sector.
4855 	 */
4856 	if ((un->un_solaris_offset != solaris_offset) ||
4857 	    (un->un_solaris_size != solaris_size) ||
4858 	    solaris_size <= DK_LABEL_LOC) {
4859 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "fdisk moved 0x%x 0x%lx",
4860 			solaris_offset, solaris_size);
4861 		bzero(&un->un_g, sizeof (struct dk_geom));
4862 		bzero(&un->un_vtoc, sizeof (struct dk_vtoc));
4863 		bzero(&un->un_map, NDKMAP * (sizeof (struct dk_map)));
4864 		un->un_f_geometry_is_valid = FALSE;
4865 	}
4866 	un->un_solaris_offset = solaris_offset;
4867 	un->un_solaris_size = solaris_size;
4868 	kmem_free(bufp, blocksize);
4869 	return (rval);
4870 
4871 #else	/* #elif defined(_FIRMWARE_NEEDS_FDISK) */
4872 #error "fdisk table presence undetermined for this platform."
4873 #endif	/* #if defined(_NO_FDISK_PRESENT) */
4874 }
4875 
4876 
4877 /*
4878  *    Function: sd_get_physical_geometry
4879  *
4880  * Description: Retrieve the MODE SENSE page 3 (Format Device Page) and
4881  *		MODE SENSE page 4 (Rigid Disk Drive Geometry Page) from the
4882  *		target, and use this information to initialize the physical
4883  *		geometry cache specified by pgeom_p.
4884  *
4885  *		MODE SENSE is an optional command, so failure in this case
4886  *		does not necessarily denote an error. We want to use the
4887  *		MODE SENSE commands to derive the physical geometry of the
4888  *		device, but if either command fails, the logical geometry is
4889  *		used as the fallback for disk label geometry.
4890  *
4891  *		This requires that un->un_blockcount and un->un_tgt_blocksize
4892  *		have already been initialized for the current target and
4893  *		that the current values be passed as args so that we don't
4894  *		end up ever trying to use -1 as a valid value. This could
4895  *		happen if either value is reset while we're not holding
4896  *		the mutex.
4897  *
4898  *   Arguments: un - driver soft state (unit) structure
4899  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
4900  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
4901  *			to use the USCSI "direct" chain and bypass the normal
4902  *			command waitq.
4903  *
4904  *     Context: Kernel thread only (can sleep).
4905  */
4906 
4907 static void
4908 sd_get_physical_geometry(struct sd_lun *un, struct geom_cache *pgeom_p,
4909 	int capacity, int lbasize, int path_flag)
4910 {
4911 	struct	mode_format	*page3p;
4912 	struct	mode_geometry	*page4p;
4913 	struct	mode_header	*headerp;
4914 	int	sector_size;
4915 	int	nsect;
4916 	int	nhead;
4917 	int	ncyl;
4918 	int	intrlv;
4919 	int	spc;
4920 	int	modesense_capacity;
4921 	int	rpm;
4922 	int	bd_len;
4923 	int	mode_header_length;
4924 	uchar_t	*p3bufp;
4925 	uchar_t	*p4bufp;
4926 	int	cdbsize;
4927 
4928 	ASSERT(un != NULL);
4929 	ASSERT(!(mutex_owned(SD_MUTEX(un))));
4930 
4931 	if (un->un_f_blockcount_is_valid != TRUE) {
4932 		return;
4933 	}
4934 
4935 	if (un->un_f_tgt_blocksize_is_valid != TRUE) {
4936 		return;
4937 	}
4938 
4939 	if (lbasize == 0) {
4940 		if (ISCD(un)) {
4941 			lbasize = 2048;
4942 		} else {
4943 			lbasize = un->un_sys_blocksize;
4944 		}
4945 	}
4946 	pgeom_p->g_secsize = (unsigned short)lbasize;
4947 
4948 	cdbsize = (un->un_f_cfg_is_atapi == TRUE) ? CDB_GROUP2 : CDB_GROUP0;
4949 
4950 	/*
4951 	 * Retrieve MODE SENSE page 3 - Format Device Page
4952 	 */
4953 	p3bufp = kmem_zalloc(SD_MODE_SENSE_PAGE3_LENGTH, KM_SLEEP);
4954 	if (sd_send_scsi_MODE_SENSE(un, cdbsize, p3bufp,
4955 	    SD_MODE_SENSE_PAGE3_LENGTH, SD_MODE_SENSE_PAGE3_CODE, path_flag)
4956 	    != 0) {
4957 		SD_ERROR(SD_LOG_COMMON, un,
4958 		    "sd_get_physical_geometry: mode sense page 3 failed\n");
4959 		goto page3_exit;
4960 	}
4961 
4962 	/*
4963 	 * Determine size of Block Descriptors in order to locate the mode
4964 	 * page data.  ATAPI devices return 0, SCSI devices should return
4965 	 * MODE_BLK_DESC_LENGTH.
4966 	 */
4967 	headerp = (struct mode_header *)p3bufp;
4968 	if (un->un_f_cfg_is_atapi == TRUE) {
4969 		struct mode_header_grp2 *mhp =
4970 		    (struct mode_header_grp2 *)headerp;
4971 		mode_header_length = MODE_HEADER_LENGTH_GRP2;
4972 		bd_len = (mhp->bdesc_length_hi << 8) | mhp->bdesc_length_lo;
4973 	} else {
4974 		mode_header_length = MODE_HEADER_LENGTH;
4975 		bd_len = ((struct mode_header *)headerp)->bdesc_length;
4976 	}
4977 
4978 	if (bd_len > MODE_BLK_DESC_LENGTH) {
4979 		SD_ERROR(SD_LOG_COMMON, un, "sd_get_physical_geometry: "
4980 		    "received unexpected bd_len of %d, page3\n", bd_len);
4981 		goto page3_exit;
4982 	}
4983 
4984 	page3p = (struct mode_format *)
4985 	    ((caddr_t)headerp + mode_header_length + bd_len);
4986 
4987 	if (page3p->mode_page.code != SD_MODE_SENSE_PAGE3_CODE) {
4988 		SD_ERROR(SD_LOG_COMMON, un, "sd_get_physical_geometry: "
4989 		    "mode sense pg3 code mismatch %d\n",
4990 		    page3p->mode_page.code);
4991 		goto page3_exit;
4992 	}
4993 
4994 	/*
4995 	 * Use this physical geometry data only if BOTH MODE SENSE commands
4996 	 * complete successfully; otherwise, revert to the logical geometry.
4997 	 * So, we need to save everything in temporary variables.
4998 	 */
4999 	sector_size = BE_16(page3p->data_bytes_sect);
5000 
5001 	/*
5002 	 * 1243403: The NEC D38x7 drives do not support MODE SENSE sector size
5003 	 */
5004 	if (sector_size == 0) {
5005 		sector_size = (ISCD(un)) ? 2048 : un->un_sys_blocksize;
5006 	} else {
5007 		sector_size &= ~(un->un_sys_blocksize - 1);
5008 	}
5009 
5010 	nsect  = BE_16(page3p->sect_track);
5011 	intrlv = BE_16(page3p->interleave);
5012 
5013 	SD_INFO(SD_LOG_COMMON, un,
5014 	    "sd_get_physical_geometry: Format Parameters (page 3)\n");
5015 	SD_INFO(SD_LOG_COMMON, un,
5016 	    "   mode page: %d; nsect: %d; sector size: %d;\n",
5017 	    page3p->mode_page.code, nsect, sector_size);
5018 	SD_INFO(SD_LOG_COMMON, un,
5019 	    "   interleave: %d; track skew: %d; cylinder skew: %d;\n", intrlv,
5020 	    BE_16(page3p->track_skew),
5021 	    BE_16(page3p->cylinder_skew));
5022 
5023 
5024 	/*
5025 	 * Retrieve MODE SENSE page 4 - Rigid Disk Drive Geometry Page
5026 	 */
5027 	p4bufp = kmem_zalloc(SD_MODE_SENSE_PAGE4_LENGTH, KM_SLEEP);
5028 	if (sd_send_scsi_MODE_SENSE(un, cdbsize, p4bufp,
5029 	    SD_MODE_SENSE_PAGE4_LENGTH, SD_MODE_SENSE_PAGE4_CODE, path_flag)
5030 	    != 0) {
5031 		SD_ERROR(SD_LOG_COMMON, un,
5032 		    "sd_get_physical_geometry: mode sense page 4 failed\n");
5033 		goto page4_exit;
5034 	}
5035 
5036 	/*
5037 	 * Determine size of Block Descriptors in order to locate the mode
5038 	 * page data.  ATAPI devices return 0, SCSI devices should return
5039 	 * MODE_BLK_DESC_LENGTH.
5040 	 */
5041 	headerp = (struct mode_header *)p4bufp;
5042 	if (un->un_f_cfg_is_atapi == TRUE) {
5043 		struct mode_header_grp2 *mhp =
5044 		    (struct mode_header_grp2 *)headerp;
5045 		bd_len = (mhp->bdesc_length_hi << 8) | mhp->bdesc_length_lo;
5046 	} else {
5047 		bd_len = ((struct mode_header *)headerp)->bdesc_length;
5048 	}
5049 
5050 	if (bd_len > MODE_BLK_DESC_LENGTH) {
5051 		SD_ERROR(SD_LOG_COMMON, un, "sd_get_physical_geometry: "
5052 		    "received unexpected bd_len of %d, page4\n", bd_len);
5053 		goto page4_exit;
5054 	}
5055 
5056 	page4p = (struct mode_geometry *)
5057 	    ((caddr_t)headerp + mode_header_length + bd_len);
5058 
5059 	if (page4p->mode_page.code != SD_MODE_SENSE_PAGE4_CODE) {
5060 		SD_ERROR(SD_LOG_COMMON, un, "sd_get_physical_geometry: "
5061 		    "mode sense pg4 code mismatch %d\n",
5062 		    page4p->mode_page.code);
5063 		goto page4_exit;
5064 	}
5065 
5066 	/*
5067 	 * Stash the data now, after we know that both commands completed.
5068 	 */
5069 
5070 	mutex_enter(SD_MUTEX(un));
5071 
5072 	nhead = (int)page4p->heads;	/* uchar, so no conversion needed */
5073 	spc   = nhead * nsect;
5074 	ncyl  = (page4p->cyl_ub << 16) + (page4p->cyl_mb << 8) + page4p->cyl_lb;
5075 	rpm   = BE_16(page4p->rpm);
5076 
5077 	modesense_capacity = spc * ncyl;
5078 
5079 	SD_INFO(SD_LOG_COMMON, un,
5080 	    "sd_get_physical_geometry: Geometry Parameters (page 4)\n");
5081 	SD_INFO(SD_LOG_COMMON, un,
5082 	    "   cylinders: %d; heads: %d; rpm: %d;\n", ncyl, nhead, rpm);
5083 	SD_INFO(SD_LOG_COMMON, un,
5084 	    "   computed capacity(h*s*c): %d;\n", modesense_capacity);
5085 	SD_INFO(SD_LOG_COMMON, un, "   pgeom_p: %p; read cap: %d\n",
5086 	    (void *)pgeom_p, capacity);
5087 
5088 	/*
5089 	 * Compensate if the drive's geometry is not rectangular, i.e.,
5090 	 * the product of C * H * S returned by MODE SENSE >= that returned
5091 	 * by read capacity. This is an idiosyncrasy of the original x86
5092 	 * disk subsystem.
5093 	 */
5094 	if (modesense_capacity >= capacity) {
5095 		SD_INFO(SD_LOG_COMMON, un,
5096 		    "sd_get_physical_geometry: adjusting acyl; "
5097 		    "old: %d; new: %d\n", pgeom_p->g_acyl,
5098 		    (modesense_capacity - capacity + spc - 1) / spc);
5099 		if (sector_size != 0) {
5100 			/* 1243403: NEC D38x7 drives don't support sec size */
5101 			pgeom_p->g_secsize = (unsigned short)sector_size;
5102 		}
5103 		pgeom_p->g_nsect    = (unsigned short)nsect;
5104 		pgeom_p->g_nhead    = (unsigned short)nhead;
5105 		pgeom_p->g_capacity = capacity;
5106 		pgeom_p->g_acyl	    =
5107 		    (modesense_capacity - pgeom_p->g_capacity + spc - 1) / spc;
5108 		pgeom_p->g_ncyl	    = ncyl - pgeom_p->g_acyl;
5109 	}
5110 
5111 	pgeom_p->g_rpm    = (unsigned short)rpm;
5112 	pgeom_p->g_intrlv = (unsigned short)intrlv;
5113 
5114 	SD_INFO(SD_LOG_COMMON, un,
5115 	    "sd_get_physical_geometry: mode sense geometry:\n");
5116 	SD_INFO(SD_LOG_COMMON, un,
5117 	    "   nsect: %d; sector size: %d; interlv: %d\n",
5118 	    nsect, sector_size, intrlv);
5119 	SD_INFO(SD_LOG_COMMON, un,
5120 	    "   nhead: %d; ncyl: %d; rpm: %d; capacity(ms): %d\n",
5121 	    nhead, ncyl, rpm, modesense_capacity);
5122 	SD_INFO(SD_LOG_COMMON, un,
5123 	    "sd_get_physical_geometry: (cached)\n");
5124 	SD_INFO(SD_LOG_COMMON, un,
5125 	    "   ncyl: %ld; acyl: %d; nhead: %d; nsect: %d\n",
5126 	    un->un_pgeom.g_ncyl,  un->un_pgeom.g_acyl,
5127 	    un->un_pgeom.g_nhead, un->un_pgeom.g_nsect);
5128 	SD_INFO(SD_LOG_COMMON, un,
5129 	    "   lbasize: %d; capacity: %ld; intrlv: %d; rpm: %d\n",
5130 	    un->un_pgeom.g_secsize, un->un_pgeom.g_capacity,
5131 	    un->un_pgeom.g_intrlv, un->un_pgeom.g_rpm);
5132 
5133 	mutex_exit(SD_MUTEX(un));
5134 
5135 page4_exit:
5136 	kmem_free(p4bufp, SD_MODE_SENSE_PAGE4_LENGTH);
5137 page3_exit:
5138 	kmem_free(p3bufp, SD_MODE_SENSE_PAGE3_LENGTH);
5139 }
5140 
5141 
5142 /*
5143  *    Function: sd_get_virtual_geometry
5144  *
5145  * Description: Ask the controller to tell us about the target device.
5146  *
5147  *   Arguments: un - pointer to softstate
5148  *		capacity - disk capacity in #blocks
5149  *		lbasize - disk block size in bytes
5150  *
5151  *     Context: Kernel thread only
5152  */
5153 
5154 static void
5155 sd_get_virtual_geometry(struct sd_lun *un, int capacity, int lbasize)
5156 {
5157 	struct	geom_cache 	*lgeom_p = &un->un_lgeom;
5158 	uint_t	geombuf;
5159 	int	spc;
5160 
5161 	ASSERT(un != NULL);
5162 	ASSERT(mutex_owned(SD_MUTEX(un)));
5163 
5164 	mutex_exit(SD_MUTEX(un));
5165 
5166 	/* Set sector size, and total number of sectors */
5167 	(void) scsi_ifsetcap(SD_ADDRESS(un), "sector-size",   lbasize,  1);
5168 	(void) scsi_ifsetcap(SD_ADDRESS(un), "total-sectors", capacity, 1);
5169 
5170 	/* Let the HBA tell us its geometry */
5171 	geombuf = (uint_t)scsi_ifgetcap(SD_ADDRESS(un), "geometry", 1);
5172 
5173 	mutex_enter(SD_MUTEX(un));
5174 
5175 	/* A value of -1 indicates an undefined "geometry" property */
5176 	if (geombuf == (-1)) {
5177 		return;
5178 	}
5179 
5180 	/* Initialize the logical geometry cache. */
5181 	lgeom_p->g_nhead   = (geombuf >> 16) & 0xffff;
5182 	lgeom_p->g_nsect   = geombuf & 0xffff;
5183 	lgeom_p->g_secsize = un->un_sys_blocksize;
5184 
5185 	spc = lgeom_p->g_nhead * lgeom_p->g_nsect;
5186 
5187 	/*
5188 	 * Note: The driver originally converted the capacity value from
5189 	 * target blocks to system blocks. However, the capacity value passed
5190 	 * to this routine is already in terms of system blocks (this scaling
5191 	 * is done when the READ CAPACITY command is issued and processed).
5192 	 * This 'error' may have gone undetected because the usage of g_ncyl
5193 	 * (which is based upon g_capacity) is very limited within the driver
5194 	 */
5195 	lgeom_p->g_capacity = capacity;
5196 
5197 	/*
5198 	 * Set ncyl to zero if the hba returned a zero nhead or nsect value. The
5199 	 * hba may return zero values if the device has been removed.
5200 	 */
5201 	if (spc == 0) {
5202 		lgeom_p->g_ncyl = 0;
5203 	} else {
5204 		lgeom_p->g_ncyl = lgeom_p->g_capacity / spc;
5205 	}
5206 	lgeom_p->g_acyl = 0;
5207 
5208 	SD_INFO(SD_LOG_COMMON, un, "sd_get_virtual_geometry: (cached)\n");
5209 	SD_INFO(SD_LOG_COMMON, un,
5210 	    "   ncyl: %ld; acyl: %d; nhead: %d; nsect: %d\n",
5211 	    un->un_lgeom.g_ncyl,  un->un_lgeom.g_acyl,
5212 	    un->un_lgeom.g_nhead, un->un_lgeom.g_nsect);
5213 	SD_INFO(SD_LOG_COMMON, un, "   lbasize: %d; capacity: %ld; "
5214 	    "intrlv: %d; rpm: %d\n", un->un_lgeom.g_secsize,
5215 	    un->un_lgeom.g_capacity, un->un_lgeom.g_intrlv, un->un_lgeom.g_rpm);
5216 }
5217 
5218 
5219 /*
5220  *    Function: sd_update_block_info
5221  *
5222  * Description: Calculate a byte count to sector count bitshift value
5223  *		from sector size.
5224  *
5225  *   Arguments: un: unit struct.
5226  *		lbasize: new target sector size
5227  *		capacity: new target capacity, ie. block count
5228  *
5229  *     Context: Kernel thread context
5230  */
5231 
5232 static void
5233 sd_update_block_info(struct sd_lun *un, uint32_t lbasize, uint64_t capacity)
5234 {
5235 	if (lbasize != 0) {
5236 		un->un_tgt_blocksize = lbasize;
5237 		un->un_f_tgt_blocksize_is_valid	= TRUE;
5238 	}
5239 
5240 	if (capacity != 0) {
5241 		un->un_blockcount		= capacity;
5242 		un->un_f_blockcount_is_valid	= TRUE;
5243 	}
5244 }
5245 
5246 
5247 static void
5248 sd_swap_efi_gpt(efi_gpt_t *e)
5249 {
5250 	_NOTE(ASSUMING_PROTECTED(*e))
5251 	e->efi_gpt_Signature = LE_64(e->efi_gpt_Signature);
5252 	e->efi_gpt_Revision = LE_32(e->efi_gpt_Revision);
5253 	e->efi_gpt_HeaderSize = LE_32(e->efi_gpt_HeaderSize);
5254 	e->efi_gpt_HeaderCRC32 = LE_32(e->efi_gpt_HeaderCRC32);
5255 	e->efi_gpt_MyLBA = LE_64(e->efi_gpt_MyLBA);
5256 	e->efi_gpt_AlternateLBA = LE_64(e->efi_gpt_AlternateLBA);
5257 	e->efi_gpt_FirstUsableLBA = LE_64(e->efi_gpt_FirstUsableLBA);
5258 	e->efi_gpt_LastUsableLBA = LE_64(e->efi_gpt_LastUsableLBA);
5259 	UUID_LE_CONVERT(e->efi_gpt_DiskGUID, e->efi_gpt_DiskGUID);
5260 	e->efi_gpt_PartitionEntryLBA = LE_64(e->efi_gpt_PartitionEntryLBA);
5261 	e->efi_gpt_NumberOfPartitionEntries =
5262 	    LE_32(e->efi_gpt_NumberOfPartitionEntries);
5263 	e->efi_gpt_SizeOfPartitionEntry =
5264 	    LE_32(e->efi_gpt_SizeOfPartitionEntry);
5265 	e->efi_gpt_PartitionEntryArrayCRC32 =
5266 	    LE_32(e->efi_gpt_PartitionEntryArrayCRC32);
5267 }
5268 
5269 static void
5270 sd_swap_efi_gpe(int nparts, efi_gpe_t *p)
5271 {
5272 	int i;
5273 
5274 	_NOTE(ASSUMING_PROTECTED(*p))
5275 	for (i = 0; i < nparts; i++) {
5276 		UUID_LE_CONVERT(p[i].efi_gpe_PartitionTypeGUID,
5277 		    p[i].efi_gpe_PartitionTypeGUID);
5278 		p[i].efi_gpe_StartingLBA = LE_64(p[i].efi_gpe_StartingLBA);
5279 		p[i].efi_gpe_EndingLBA = LE_64(p[i].efi_gpe_EndingLBA);
5280 		/* PartitionAttrs */
5281 	}
5282 }
5283 
5284 static int
5285 sd_validate_efi(efi_gpt_t *labp)
5286 {
5287 	if (labp->efi_gpt_Signature != EFI_SIGNATURE)
5288 		return (EINVAL);
5289 	/* at least 96 bytes in this version of the spec. */
5290 	if (sizeof (efi_gpt_t) - sizeof (labp->efi_gpt_Reserved2) >
5291 	    labp->efi_gpt_HeaderSize)
5292 		return (EINVAL);
5293 	/* this should be 128 bytes */
5294 	if (labp->efi_gpt_SizeOfPartitionEntry != sizeof (efi_gpe_t))
5295 		return (EINVAL);
5296 	return (0);
5297 }
5298 
5299 static int
5300 sd_use_efi(struct sd_lun *un, int path_flag)
5301 {
5302 	int		i;
5303 	int		rval = 0;
5304 	efi_gpe_t	*partitions;
5305 	uchar_t		*buf;
5306 	uint_t		lbasize;
5307 	uint64_t	cap;
5308 	uint_t		nparts;
5309 	diskaddr_t	gpe_lba;
5310 
5311 	ASSERT(mutex_owned(SD_MUTEX(un)));
5312 	lbasize = un->un_tgt_blocksize;
5313 
5314 	mutex_exit(SD_MUTEX(un));
5315 
5316 	buf = kmem_zalloc(EFI_MIN_ARRAY_SIZE, KM_SLEEP);
5317 
5318 	if (un->un_tgt_blocksize != un->un_sys_blocksize) {
5319 		rval = EINVAL;
5320 		goto done_err;
5321 	}
5322 
5323 	rval = sd_send_scsi_READ(un, buf, lbasize, 0, path_flag);
5324 	if (rval) {
5325 		goto done_err;
5326 	}
5327 	if (((struct dk_label *)buf)->dkl_magic == DKL_MAGIC) {
5328 		/* not ours */
5329 		rval = ESRCH;
5330 		goto done_err;
5331 	}
5332 
5333 	rval = sd_send_scsi_READ(un, buf, lbasize, 1, path_flag);
5334 	if (rval) {
5335 		goto done_err;
5336 	}
5337 	sd_swap_efi_gpt((efi_gpt_t *)buf);
5338 
5339 	if ((rval = sd_validate_efi((efi_gpt_t *)buf)) != 0) {
5340 		/*
5341 		 * Couldn't read the primary, try the backup.  Our
5342 		 * capacity at this point could be based on CHS, so
5343 		 * check what the device reports.
5344 		 */
5345 		rval = sd_send_scsi_READ_CAPACITY(un, &cap, &lbasize,
5346 		    path_flag);
5347 		if (rval) {
5348 			goto done_err;
5349 		}
5350 
5351 		/*
5352 		 * The MMC standard allows READ CAPACITY to be
5353 		 * inaccurate by a bounded amount (in the interest of
5354 		 * response latency).  As a result, failed READs are
5355 		 * commonplace (due to the reading of metadata and not
5356 		 * data). Depending on the per-Vendor/drive Sense data,
5357 		 * the failed READ can cause many (unnecessary) retries.
5358 		 */
5359 		if ((rval = sd_send_scsi_READ(un, buf, lbasize,
5360 		    cap - 1, (ISCD(un)) ? SD_PATH_DIRECT_PRIORITY :
5361 			path_flag)) != 0) {
5362 				goto done_err;
5363 		}
5364 
5365 		sd_swap_efi_gpt((efi_gpt_t *)buf);
5366 		if ((rval = sd_validate_efi((efi_gpt_t *)buf)) != 0) {
5367 
5368 			/*
5369 			 * Refer to comments related to off-by-1 at the
5370 			 * header of this file. Search the next to last
5371 			 * block for backup EFI label.
5372 			 */
5373 			if ((rval = sd_send_scsi_READ(un, buf, lbasize,
5374 			    cap - 2, (ISCD(un)) ? SD_PATH_DIRECT_PRIORITY :
5375 				path_flag)) != 0) {
5376 					goto done_err;
5377 			}
5378 			sd_swap_efi_gpt((efi_gpt_t *)buf);
5379 			if ((rval = sd_validate_efi((efi_gpt_t *)buf)) != 0)
5380 				goto done_err;
5381 		}
5382 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
5383 		    "primary label corrupt; using backup\n");
5384 	}
5385 
5386 	nparts = ((efi_gpt_t *)buf)->efi_gpt_NumberOfPartitionEntries;
5387 	gpe_lba = ((efi_gpt_t *)buf)->efi_gpt_PartitionEntryLBA;
5388 
5389 	rval = sd_send_scsi_READ(un, buf, EFI_MIN_ARRAY_SIZE, gpe_lba,
5390 	    path_flag);
5391 	if (rval) {
5392 		goto done_err;
5393 	}
5394 	partitions = (efi_gpe_t *)buf;
5395 
5396 	if (nparts > MAXPART) {
5397 		nparts = MAXPART;
5398 	}
5399 	sd_swap_efi_gpe(nparts, partitions);
5400 
5401 	mutex_enter(SD_MUTEX(un));
5402 
5403 	/* Fill in partition table. */
5404 	for (i = 0; i < nparts; i++) {
5405 		if (partitions->efi_gpe_StartingLBA != 0 ||
5406 		    partitions->efi_gpe_EndingLBA != 0) {
5407 			un->un_map[i].dkl_cylno =
5408 			    partitions->efi_gpe_StartingLBA;
5409 			un->un_map[i].dkl_nblk =
5410 			    partitions->efi_gpe_EndingLBA -
5411 			    partitions->efi_gpe_StartingLBA + 1;
5412 			un->un_offset[i] =
5413 			    partitions->efi_gpe_StartingLBA;
5414 		}
5415 		if (i == WD_NODE) {
5416 			/*
5417 			 * minor number 7 corresponds to the whole disk
5418 			 */
5419 			un->un_map[i].dkl_cylno = 0;
5420 			un->un_map[i].dkl_nblk = un->un_blockcount;
5421 			un->un_offset[i] = 0;
5422 		}
5423 		partitions++;
5424 	}
5425 	un->un_solaris_offset = 0;
5426 	un->un_solaris_size = cap;
5427 	un->un_f_geometry_is_valid = TRUE;
5428 
5429 	/* clear the vtoc label */
5430 	bzero(&un->un_vtoc, sizeof (struct dk_vtoc));
5431 
5432 	kmem_free(buf, EFI_MIN_ARRAY_SIZE);
5433 	return (0);
5434 
5435 done_err:
5436 	kmem_free(buf, EFI_MIN_ARRAY_SIZE);
5437 	mutex_enter(SD_MUTEX(un));
5438 	/*
5439 	 * if we didn't find something that could look like a VTOC
5440 	 * and the disk is over 1TB, we know there isn't a valid label.
5441 	 * Otherwise let sd_uselabel decide what to do.  We only
5442 	 * want to invalidate this if we're certain the label isn't
5443 	 * valid because sd_prop_op will now fail, which in turn
5444 	 * causes things like opens and stats on the partition to fail.
5445 	 */
5446 	if ((un->un_blockcount > DK_MAX_BLOCKS) && (rval != ESRCH)) {
5447 		un->un_f_geometry_is_valid = FALSE;
5448 	}
5449 	return (rval);
5450 }
5451 
5452 
5453 /*
5454  *    Function: sd_uselabel
5455  *
5456  * Description: Validate the disk label and update the relevant data (geometry,
5457  *		partition, vtoc, and capacity data) in the sd_lun struct.
5458  *		Marks the geometry of the unit as being valid.
5459  *
5460  *   Arguments: un: unit struct.
5461  *		dk_label: disk label
5462  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
5463  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
5464  *			to use the USCSI "direct" chain and bypass the normal
5465  *			command waitq.
5466  *
5467  * Return Code: SD_LABEL_IS_VALID: Label read from disk is OK; geometry,
5468  *		partition, vtoc, and capacity data are good.
5469  *
5470  *		SD_LABEL_IS_INVALID: Magic number or checksum error in the
5471  *		label; or computed capacity does not jibe with capacity
5472  *		reported from the READ CAPACITY command.
5473  *
5474  *     Context: Kernel thread only (can sleep).
5475  */
5476 
5477 static int
5478 sd_uselabel(struct sd_lun *un, struct dk_label *labp, int path_flag)
5479 {
5480 	short	*sp;
5481 	short	sum;
5482 	short	count;
5483 	int	label_error = SD_LABEL_IS_VALID;
5484 	int	i;
5485 	int	capacity;
5486 	int	part_end;
5487 	int	track_capacity;
5488 	int	err;
5489 #if defined(_SUNOS_VTOC_16)
5490 	struct	dkl_partition	*vpartp;
5491 #endif
5492 	ASSERT(un != NULL);
5493 	ASSERT(mutex_owned(SD_MUTEX(un)));
5494 
5495 	/* Validate the magic number of the label. */
5496 	if (labp->dkl_magic != DKL_MAGIC) {
5497 #if defined(__sparc)
5498 		if ((un->un_state == SD_STATE_NORMAL) &&
5499 			un->un_f_vtoc_errlog_supported) {
5500 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
5501 			    "Corrupt label; wrong magic number\n");
5502 		}
5503 #endif
5504 		return (SD_LABEL_IS_INVALID);
5505 	}
5506 
5507 	/* Validate the checksum of the label. */
5508 	sp  = (short *)labp;
5509 	sum = 0;
5510 	count = sizeof (struct dk_label) / sizeof (short);
5511 	while (count--)	 {
5512 		sum ^= *sp++;
5513 	}
5514 
5515 	if (sum != 0) {
5516 #if	defined(_SUNOS_VTOC_16)
5517 		if ((un->un_state == SD_STATE_NORMAL) && !ISCD(un)) {
5518 #elif defined(_SUNOS_VTOC_8)
5519 		if ((un->un_state == SD_STATE_NORMAL) &&
5520 		    un->un_f_vtoc_errlog_supported) {
5521 #endif
5522 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
5523 			    "Corrupt label - label checksum failed\n");
5524 		}
5525 		return (SD_LABEL_IS_INVALID);
5526 	}
5527 
5528 
5529 	/*
5530 	 * Fill in geometry structure with data from label.
5531 	 */
5532 	bzero(&un->un_g, sizeof (struct dk_geom));
5533 	un->un_g.dkg_ncyl   = labp->dkl_ncyl;
5534 	un->un_g.dkg_acyl   = labp->dkl_acyl;
5535 	un->un_g.dkg_bcyl   = 0;
5536 	un->un_g.dkg_nhead  = labp->dkl_nhead;
5537 	un->un_g.dkg_nsect  = labp->dkl_nsect;
5538 	un->un_g.dkg_intrlv = labp->dkl_intrlv;
5539 
5540 #if defined(_SUNOS_VTOC_8)
5541 	un->un_g.dkg_gap1   = labp->dkl_gap1;
5542 	un->un_g.dkg_gap2   = labp->dkl_gap2;
5543 	un->un_g.dkg_bhead  = labp->dkl_bhead;
5544 #endif
5545 #if defined(_SUNOS_VTOC_16)
5546 	un->un_dkg_skew = labp->dkl_skew;
5547 #endif
5548 
5549 #if defined(__i386) || defined(__amd64)
5550 	un->un_g.dkg_apc = labp->dkl_apc;
5551 #endif
5552 
5553 	/*
5554 	 * Currently we rely on the values in the label being accurate. If
5555 	 * dlk_rpm or dlk_pcly are zero in the label, use a default value.
5556 	 *
5557 	 * Note: In the future a MODE SENSE may be used to retrieve this data,
5558 	 * although this command is optional in SCSI-2.
5559 	 */
5560 	un->un_g.dkg_rpm  = (labp->dkl_rpm  != 0) ? labp->dkl_rpm  : 3600;
5561 	un->un_g.dkg_pcyl = (labp->dkl_pcyl != 0) ? labp->dkl_pcyl :
5562 	    (un->un_g.dkg_ncyl + un->un_g.dkg_acyl);
5563 
5564 	/*
5565 	 * The Read and Write reinstruct values may not be valid
5566 	 * for older disks.
5567 	 */
5568 	un->un_g.dkg_read_reinstruct  = labp->dkl_read_reinstruct;
5569 	un->un_g.dkg_write_reinstruct = labp->dkl_write_reinstruct;
5570 
5571 	/* Fill in partition table. */
5572 #if defined(_SUNOS_VTOC_8)
5573 	for (i = 0; i < NDKMAP; i++) {
5574 		un->un_map[i].dkl_cylno = labp->dkl_map[i].dkl_cylno;
5575 		un->un_map[i].dkl_nblk  = labp->dkl_map[i].dkl_nblk;
5576 	}
5577 #endif
5578 #if  defined(_SUNOS_VTOC_16)
5579 	vpartp		= labp->dkl_vtoc.v_part;
5580 	track_capacity	= labp->dkl_nhead * labp->dkl_nsect;
5581 
5582 	/* Prevent divide by zero */
5583 	if (track_capacity == 0) {
5584 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
5585 		    "Corrupt label - zero nhead or nsect value\n");
5586 
5587 		return (SD_LABEL_IS_INVALID);
5588 	}
5589 
5590 	for (i = 0; i < NDKMAP; i++, vpartp++) {
5591 		un->un_map[i].dkl_cylno = vpartp->p_start / track_capacity;
5592 		un->un_map[i].dkl_nblk  = vpartp->p_size;
5593 	}
5594 #endif
5595 
5596 	/* Fill in VTOC Structure. */
5597 	bcopy(&labp->dkl_vtoc, &un->un_vtoc, sizeof (struct dk_vtoc));
5598 #if defined(_SUNOS_VTOC_8)
5599 	/*
5600 	 * The 8-slice vtoc does not include the ascii label; save it into
5601 	 * the device's soft state structure here.
5602 	 */
5603 	bcopy(labp->dkl_asciilabel, un->un_asciilabel, LEN_DKL_ASCII);
5604 #endif
5605 
5606 	/* Now look for a valid capacity. */
5607 	track_capacity	= (un->un_g.dkg_nhead * un->un_g.dkg_nsect);
5608 	capacity	= (un->un_g.dkg_ncyl  * track_capacity);
5609 
5610 	if (un->un_g.dkg_acyl) {
5611 #if defined(__i386) || defined(__amd64)
5612 		/* we may have > 1 alts cylinder */
5613 		capacity += (track_capacity * un->un_g.dkg_acyl);
5614 #else
5615 		capacity += track_capacity;
5616 #endif
5617 	}
5618 
5619 	/*
5620 	 * Force check here to ensure the computed capacity is valid.
5621 	 * If capacity is zero, it indicates an invalid label and
5622 	 * we should abort updating the relevant data then.
5623 	 */
5624 	if (capacity == 0) {
5625 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
5626 		    "Corrupt label - no valid capacity could be retrieved\n");
5627 
5628 		return (SD_LABEL_IS_INVALID);
5629 	}
5630 
5631 	/* Mark the geometry as valid. */
5632 	un->un_f_geometry_is_valid = TRUE;
5633 
5634 	/*
5635 	 * At this point, un->un_blockcount should contain valid data from
5636 	 * the READ CAPACITY command.
5637 	 */
5638 	if (un->un_f_blockcount_is_valid != TRUE) {
5639 		/*
5640 		 * We have a situation where the target didn't give us a good
5641 		 * READ CAPACITY value, yet there appears to be a valid label.
5642 		 * In this case, we'll fake the capacity.
5643 		 */
5644 		un->un_blockcount = capacity;
5645 		un->un_f_blockcount_is_valid = TRUE;
5646 		goto done;
5647 	}
5648 
5649 
5650 	if ((capacity <= un->un_blockcount) ||
5651 	    (un->un_state != SD_STATE_NORMAL)) {
5652 #if defined(_SUNOS_VTOC_8)
5653 		/*
5654 		 * We can't let this happen on drives that are subdivided
5655 		 * into logical disks (i.e., that have an fdisk table).
5656 		 * The un_blockcount field should always hold the full media
5657 		 * size in sectors, period.  This code would overwrite
5658 		 * un_blockcount with the size of the Solaris fdisk partition.
5659 		 */
5660 		SD_ERROR(SD_LOG_COMMON, un,
5661 		    "sd_uselabel: Label %d blocks; Drive %d blocks\n",
5662 		    capacity, un->un_blockcount);
5663 		un->un_blockcount = capacity;
5664 		un->un_f_blockcount_is_valid = TRUE;
5665 #endif	/* defined(_SUNOS_VTOC_8) */
5666 		goto done;
5667 	}
5668 
5669 	if (ISCD(un)) {
5670 		/* For CDROMs, we trust that the data in the label is OK. */
5671 #if defined(_SUNOS_VTOC_8)
5672 		for (i = 0; i < NDKMAP; i++) {
5673 			part_end = labp->dkl_nhead * labp->dkl_nsect *
5674 			    labp->dkl_map[i].dkl_cylno +
5675 			    labp->dkl_map[i].dkl_nblk  - 1;
5676 
5677 			if ((labp->dkl_map[i].dkl_nblk) &&
5678 			    (part_end > un->un_blockcount)) {
5679 				un->un_f_geometry_is_valid = FALSE;
5680 				break;
5681 			}
5682 		}
5683 #endif
5684 #if defined(_SUNOS_VTOC_16)
5685 		vpartp = &(labp->dkl_vtoc.v_part[0]);
5686 		for (i = 0; i < NDKMAP; i++, vpartp++) {
5687 			part_end = vpartp->p_start + vpartp->p_size;
5688 			if ((vpartp->p_size > 0) &&
5689 			    (part_end > un->un_blockcount)) {
5690 				un->un_f_geometry_is_valid = FALSE;
5691 				break;
5692 			}
5693 		}
5694 #endif
5695 	} else {
5696 		uint64_t t_capacity;
5697 		uint32_t t_lbasize;
5698 
5699 		mutex_exit(SD_MUTEX(un));
5700 		err = sd_send_scsi_READ_CAPACITY(un, &t_capacity, &t_lbasize,
5701 		    path_flag);
5702 		ASSERT(t_capacity <= DK_MAX_BLOCKS);
5703 		mutex_enter(SD_MUTEX(un));
5704 
5705 		if (err == 0) {
5706 			sd_update_block_info(un, t_lbasize, t_capacity);
5707 		}
5708 
5709 		if (capacity > un->un_blockcount) {
5710 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
5711 			    "Corrupt label - bad geometry\n");
5712 			scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
5713 			    "Label says %u blocks; Drive says %llu blocks\n",
5714 			    capacity, (unsigned long long)un->un_blockcount);
5715 			un->un_f_geometry_is_valid = FALSE;
5716 			label_error = SD_LABEL_IS_INVALID;
5717 		}
5718 	}
5719 
5720 done:
5721 
5722 	SD_INFO(SD_LOG_COMMON, un, "sd_uselabel: (label geometry)\n");
5723 	SD_INFO(SD_LOG_COMMON, un,
5724 	    "   ncyl: %d; acyl: %d; nhead: %d; nsect: %d\n",
5725 	    un->un_g.dkg_ncyl,  un->un_g.dkg_acyl,
5726 	    un->un_g.dkg_nhead, un->un_g.dkg_nsect);
5727 	SD_INFO(SD_LOG_COMMON, un,
5728 	    "   lbasize: %d; capacity: %d; intrlv: %d; rpm: %d\n",
5729 	    un->un_tgt_blocksize, un->un_blockcount,
5730 	    un->un_g.dkg_intrlv, un->un_g.dkg_rpm);
5731 	SD_INFO(SD_LOG_COMMON, un, "   wrt_reinstr: %d; rd_reinstr: %d\n",
5732 	    un->un_g.dkg_write_reinstruct, un->un_g.dkg_read_reinstruct);
5733 
5734 	ASSERT(mutex_owned(SD_MUTEX(un)));
5735 
5736 	return (label_error);
5737 }
5738 
5739 
5740 /*
5741  *    Function: sd_build_default_label
5742  *
5743  * Description: Generate a default label for those devices that do not have
5744  *		one, e.g., new media, removable cartridges, etc..
5745  *
5746  *     Context: Kernel thread only
5747  */
5748 
5749 static void
5750 sd_build_default_label(struct sd_lun *un)
5751 {
5752 #if defined(_SUNOS_VTOC_16)
5753 	uint_t	phys_spc;
5754 	uint_t	disksize;
5755 	struct	dk_geom un_g;
5756 	uint64_t capacity;
5757 #endif
5758 
5759 	ASSERT(un != NULL);
5760 	ASSERT(mutex_owned(SD_MUTEX(un)));
5761 
5762 #if defined(_SUNOS_VTOC_8)
5763 	/*
5764 	 * Note: This is a legacy check for non-removable devices on VTOC_8
5765 	 * only. This may be a valid check for VTOC_16 as well.
5766 	 * Once we understand why there is this difference between SPARC and
5767 	 * x86 platform, we could remove this legacy check.
5768 	 */
5769 	ASSERT(un->un_f_default_vtoc_supported);
5770 #endif
5771 
5772 	bzero(&un->un_g, sizeof (struct dk_geom));
5773 	bzero(&un->un_vtoc, sizeof (struct dk_vtoc));
5774 	bzero(&un->un_map, NDKMAP * (sizeof (struct dk_map)));
5775 
5776 #if defined(_SUNOS_VTOC_8)
5777 
5778 	/*
5779 	 * It's a REMOVABLE media, therefore no label (on sparc, anyway).
5780 	 * But it is still necessary to set up various geometry information,
5781 	 * and we are doing this here.
5782 	 */
5783 
5784 	/*
5785 	 * For the rpm, we use the minimum for the disk.  For the head, cyl,
5786 	 * and number of sector per track, if the capacity <= 1GB, head = 64,
5787 	 * sect = 32.  else head = 255, sect 63 Note: the capacity should be
5788 	 * equal to C*H*S values.  This will cause some truncation of size due
5789 	 * to round off errors. For CD-ROMs, this truncation can have adverse
5790 	 * side effects, so returning ncyl and nhead as 1. The nsect will
5791 	 * overflow for most of CD-ROMs as nsect is of type ushort. (4190569)
5792 	 */
5793 	if (ISCD(un)) {
5794 		/*
5795 		 * Preserve the old behavior for non-writable
5796 		 * medias. Since dkg_nsect is a ushort, it
5797 		 * will lose bits as cdroms have more than
5798 		 * 65536 sectors. So if we recalculate
5799 		 * capacity, it will become much shorter.
5800 		 * But the dkg_* information is not
5801 		 * used for CDROMs so it is OK. But for
5802 		 * Writable CDs we need this information
5803 		 * to be valid (for newfs say). So we
5804 		 * make nsect and nhead > 1 that way
5805 		 * nsect can still stay within ushort limit
5806 		 * without losing any bits.
5807 		 */
5808 		if (un->un_f_mmc_writable_media == TRUE) {
5809 			un->un_g.dkg_nhead = 64;
5810 			un->un_g.dkg_nsect = 32;
5811 			un->un_g.dkg_ncyl = un->un_blockcount / (64 * 32);
5812 			un->un_blockcount = un->un_g.dkg_ncyl *
5813 			    un->un_g.dkg_nhead * un->un_g.dkg_nsect;
5814 		} else {
5815 			un->un_g.dkg_ncyl  = 1;
5816 			un->un_g.dkg_nhead = 1;
5817 			un->un_g.dkg_nsect = un->un_blockcount;
5818 		}
5819 	} else {
5820 		if (un->un_blockcount <= 0x1000) {
5821 			/* unlabeled SCSI floppy device */
5822 			un->un_g.dkg_nhead = 2;
5823 			un->un_g.dkg_ncyl = 80;
5824 			un->un_g.dkg_nsect = un->un_blockcount / (2 * 80);
5825 		} else if (un->un_blockcount <= 0x200000) {
5826 			un->un_g.dkg_nhead = 64;
5827 			un->un_g.dkg_nsect = 32;
5828 			un->un_g.dkg_ncyl  = un->un_blockcount / (64 * 32);
5829 		} else {
5830 			un->un_g.dkg_nhead = 255;
5831 			un->un_g.dkg_nsect = 63;
5832 			un->un_g.dkg_ncyl  = un->un_blockcount / (255 * 63);
5833 		}
5834 		un->un_blockcount =
5835 		    un->un_g.dkg_ncyl * un->un_g.dkg_nhead * un->un_g.dkg_nsect;
5836 	}
5837 
5838 	un->un_g.dkg_acyl	= 0;
5839 	un->un_g.dkg_bcyl	= 0;
5840 	un->un_g.dkg_rpm	= 200;
5841 	un->un_asciilabel[0]	= '\0';
5842 	un->un_g.dkg_pcyl	= un->un_g.dkg_ncyl;
5843 
5844 	un->un_map[0].dkl_cylno = 0;
5845 	un->un_map[0].dkl_nblk  = un->un_blockcount;
5846 	un->un_map[2].dkl_cylno = 0;
5847 	un->un_map[2].dkl_nblk  = un->un_blockcount;
5848 
5849 #elif defined(_SUNOS_VTOC_16)
5850 
5851 	if (un->un_solaris_size == 0) {
5852 		/*
5853 		 * Got fdisk table but no solaris entry therefore
5854 		 * don't create a default label
5855 		 */
5856 		un->un_f_geometry_is_valid = TRUE;
5857 		return;
5858 	}
5859 
5860 	/*
5861 	 * For CDs we continue to use the physical geometry to calculate
5862 	 * number of cylinders. All other devices must convert the
5863 	 * physical geometry (geom_cache) to values that will fit
5864 	 * in a dk_geom structure.
5865 	 */
5866 	if (ISCD(un)) {
5867 		phys_spc = un->un_pgeom.g_nhead * un->un_pgeom.g_nsect;
5868 	} else {
5869 		/* Convert physical geometry to disk geometry */
5870 		bzero(&un_g, sizeof (struct dk_geom));
5871 
5872 		/*
5873 		 * Refer to comments related to off-by-1 at the
5874 		 * header of this file.
5875 		 * Before caculating geometry, capacity should be
5876 		 * decreased by 1. That un_f_capacity_adjusted is
5877 		 * TRUE means that we are treating a 1TB disk as
5878 		 * (1T - 512)B. And the capacity of disks is already
5879 		 * decreased by 1.
5880 		 */
5881 		if (!un->un_f_capacity_adjusted &&
5882 		    !un->un_f_has_removable_media &&
5883 		    !un->un_f_is_hotpluggable &&
5884 			un->un_tgt_blocksize == un->un_sys_blocksize)
5885 			capacity = un->un_blockcount - 1;
5886 		else
5887 			capacity = un->un_blockcount;
5888 
5889 		sd_convert_geometry(capacity, &un_g);
5890 		bcopy(&un_g, &un->un_g, sizeof (un->un_g));
5891 		phys_spc = un->un_g.dkg_nhead * un->un_g.dkg_nsect;
5892 	}
5893 
5894 	ASSERT(phys_spc != 0);
5895 	un->un_g.dkg_pcyl = un->un_solaris_size / phys_spc;
5896 	un->un_g.dkg_acyl = DK_ACYL;
5897 	un->un_g.dkg_ncyl = un->un_g.dkg_pcyl - DK_ACYL;
5898 	disksize = un->un_g.dkg_ncyl * phys_spc;
5899 
5900 	if (ISCD(un)) {
5901 		/*
5902 		 * CD's don't use the "heads * sectors * cyls"-type of
5903 		 * geometry, but instead use the entire capacity of the media.
5904 		 */
5905 		disksize = un->un_solaris_size;
5906 		un->un_g.dkg_nhead = 1;
5907 		un->un_g.dkg_nsect = 1;
5908 		un->un_g.dkg_rpm =
5909 		    (un->un_pgeom.g_rpm == 0) ? 200 : un->un_pgeom.g_rpm;
5910 
5911 		un->un_vtoc.v_part[0].p_start = 0;
5912 		un->un_vtoc.v_part[0].p_size  = disksize;
5913 		un->un_vtoc.v_part[0].p_tag   = V_BACKUP;
5914 		un->un_vtoc.v_part[0].p_flag  = V_UNMNT;
5915 
5916 		un->un_map[0].dkl_cylno = 0;
5917 		un->un_map[0].dkl_nblk  = disksize;
5918 		un->un_offset[0] = 0;
5919 
5920 	} else {
5921 		/*
5922 		 * Hard disks and removable media cartridges
5923 		 */
5924 		un->un_g.dkg_rpm =
5925 		    (un->un_pgeom.g_rpm == 0) ? 3600: un->un_pgeom.g_rpm;
5926 		un->un_vtoc.v_sectorsz = un->un_sys_blocksize;
5927 
5928 		/* Add boot slice */
5929 		un->un_vtoc.v_part[8].p_start = 0;
5930 		un->un_vtoc.v_part[8].p_size  = phys_spc;
5931 		un->un_vtoc.v_part[8].p_tag   = V_BOOT;
5932 		un->un_vtoc.v_part[8].p_flag  = V_UNMNT;
5933 
5934 		un->un_map[8].dkl_cylno = 0;
5935 		un->un_map[8].dkl_nblk  = phys_spc;
5936 		un->un_offset[8] = 0;
5937 	}
5938 
5939 	un->un_g.dkg_apc = 0;
5940 	un->un_vtoc.v_nparts = V_NUMPAR;
5941 	un->un_vtoc.v_version = V_VERSION;
5942 
5943 	/* Add backup slice */
5944 	un->un_vtoc.v_part[2].p_start = 0;
5945 	un->un_vtoc.v_part[2].p_size  = disksize;
5946 	un->un_vtoc.v_part[2].p_tag   = V_BACKUP;
5947 	un->un_vtoc.v_part[2].p_flag  = V_UNMNT;
5948 
5949 	un->un_map[2].dkl_cylno = 0;
5950 	un->un_map[2].dkl_nblk  = disksize;
5951 	un->un_offset[2] = 0;
5952 
5953 	(void) sprintf(un->un_vtoc.v_asciilabel, "DEFAULT cyl %d alt %d"
5954 	    " hd %d sec %d", un->un_g.dkg_ncyl, un->un_g.dkg_acyl,
5955 	    un->un_g.dkg_nhead, un->un_g.dkg_nsect);
5956 
5957 #else
5958 #error "No VTOC format defined."
5959 #endif
5960 
5961 	un->un_g.dkg_read_reinstruct  = 0;
5962 	un->un_g.dkg_write_reinstruct = 0;
5963 
5964 	un->un_g.dkg_intrlv = 1;
5965 
5966 	un->un_vtoc.v_sanity  = VTOC_SANE;
5967 
5968 	un->un_f_geometry_is_valid = TRUE;
5969 
5970 	SD_INFO(SD_LOG_COMMON, un,
5971 	    "sd_build_default_label: Default label created: "
5972 	    "cyl: %d\tacyl: %d\tnhead: %d\tnsect: %d\tcap: %d\n",
5973 	    un->un_g.dkg_ncyl, un->un_g.dkg_acyl, un->un_g.dkg_nhead,
5974 	    un->un_g.dkg_nsect, un->un_blockcount);
5975 }
5976 
5977 
5978 #if defined(_FIRMWARE_NEEDS_FDISK)
5979 /*
5980  * Max CHS values, as they are encoded into bytes, for 1022/254/63
5981  */
5982 #define	LBA_MAX_SECT	(63 | ((1022 & 0x300) >> 2))
5983 #define	LBA_MAX_CYL	(1022 & 0xFF)
5984 #define	LBA_MAX_HEAD	(254)
5985 
5986 
5987 /*
5988  *    Function: sd_has_max_chs_vals
5989  *
5990  * Description: Return TRUE if Cylinder-Head-Sector values are all at maximum.
5991  *
5992  *   Arguments: fdp - ptr to CHS info
5993  *
5994  * Return Code: True or false
5995  *
5996  *     Context: Any.
5997  */
5998 
5999 static int
6000 sd_has_max_chs_vals(struct ipart *fdp)
6001 {
6002 	return ((fdp->begcyl  == LBA_MAX_CYL)	&&
6003 	    (fdp->beghead == LBA_MAX_HEAD)	&&
6004 	    (fdp->begsect == LBA_MAX_SECT)	&&
6005 	    (fdp->endcyl  == LBA_MAX_CYL)	&&
6006 	    (fdp->endhead == LBA_MAX_HEAD)	&&
6007 	    (fdp->endsect == LBA_MAX_SECT));
6008 }
6009 #endif
6010 
6011 
6012 /*
6013  *    Function: sd_inq_fill
6014  *
6015  * Description: Print a piece of inquiry data, cleaned up for non-printable
6016  *		characters and stopping at the first space character after
6017  *		the beginning of the passed string;
6018  *
6019  *   Arguments: p - source string
6020  *		l - maximum length to copy
6021  *		s - destination string
6022  *
6023  *     Context: Any.
6024  */
6025 
6026 static void
6027 sd_inq_fill(char *p, int l, char *s)
6028 {
6029 	unsigned i = 0;
6030 	char c;
6031 
6032 	while (i++ < l) {
6033 		if ((c = *p++) < ' ' || c >= 0x7F) {
6034 			c = '*';
6035 		} else if (i != 1 && c == ' ') {
6036 			break;
6037 		}
6038 		*s++ = c;
6039 	}
6040 	*s++ = 0;
6041 }
6042 
6043 
6044 /*
6045  *    Function: sd_register_devid
6046  *
6047  * Description: This routine will obtain the device id information from the
6048  *		target, obtain the serial number, and register the device
6049  *		id with the ddi framework.
6050  *
6051  *   Arguments: devi - the system's dev_info_t for the device.
6052  *		un - driver soft state (unit) structure
6053  *		reservation_flag - indicates if a reservation conflict
6054  *		occurred during attach
6055  *
6056  *     Context: Kernel Thread
6057  */
6058 static void
6059 sd_register_devid(struct sd_lun *un, dev_info_t *devi, int reservation_flag)
6060 {
6061 	int		rval		= 0;
6062 	uchar_t		*inq80		= NULL;
6063 	size_t		inq80_len	= MAX_INQUIRY_SIZE;
6064 	size_t		inq80_resid	= 0;
6065 	uchar_t		*inq83		= NULL;
6066 	size_t		inq83_len	= MAX_INQUIRY_SIZE;
6067 	size_t		inq83_resid	= 0;
6068 
6069 	ASSERT(un != NULL);
6070 	ASSERT(mutex_owned(SD_MUTEX(un)));
6071 	ASSERT((SD_DEVINFO(un)) == devi);
6072 
6073 	/*
6074 	 * This is the case of antiquated Sun disk drives that have the
6075 	 * FAB_DEVID property set in the disk_table.  These drives
6076 	 * manage the devid's by storing them in last 2 available sectors
6077 	 * on the drive and have them fabricated by the ddi layer by calling
6078 	 * ddi_devid_init and passing the DEVID_FAB flag.
6079 	 */
6080 	if (un->un_f_opt_fab_devid == TRUE) {
6081 		/*
6082 		 * Depending on EINVAL isn't reliable, since a reserved disk
6083 		 * may result in invalid geometry, so check to make sure a
6084 		 * reservation conflict did not occur during attach.
6085 		 */
6086 		if ((sd_get_devid(un) == EINVAL) &&
6087 		    (reservation_flag != SD_TARGET_IS_RESERVED)) {
6088 			/*
6089 			 * The devid is invalid AND there is no reservation
6090 			 * conflict.  Fabricate a new devid.
6091 			 */
6092 			(void) sd_create_devid(un);
6093 		}
6094 
6095 		/* Register the devid if it exists */
6096 		if (un->un_devid != NULL) {
6097 			(void) ddi_devid_register(SD_DEVINFO(un),
6098 			    un->un_devid);
6099 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
6100 			    "sd_register_devid: Devid Fabricated\n");
6101 		}
6102 		return;
6103 	}
6104 
6105 	/*
6106 	 * We check the availibility of the World Wide Name (0x83) and Unit
6107 	 * Serial Number (0x80) pages in sd_check_vpd_page_support(), and using
6108 	 * un_vpd_page_mask from them, we decide which way to get the WWN.  If
6109 	 * 0x83 is availible, that is the best choice.  Our next choice is
6110 	 * 0x80.  If neither are availible, we munge the devid from the device
6111 	 * vid/pid/serial # for Sun qualified disks, or use the ddi framework
6112 	 * to fabricate a devid for non-Sun qualified disks.
6113 	 */
6114 	if (sd_check_vpd_page_support(un) == 0) {
6115 		/* collect page 80 data if available */
6116 		if (un->un_vpd_page_mask & SD_VPD_UNIT_SERIAL_PG) {
6117 
6118 			mutex_exit(SD_MUTEX(un));
6119 			inq80 = kmem_zalloc(inq80_len, KM_SLEEP);
6120 			rval = sd_send_scsi_INQUIRY(un, inq80, inq80_len,
6121 			    0x01, 0x80, &inq80_resid);
6122 
6123 			if (rval != 0) {
6124 				kmem_free(inq80, inq80_len);
6125 				inq80 = NULL;
6126 				inq80_len = 0;
6127 			}
6128 			mutex_enter(SD_MUTEX(un));
6129 		}
6130 
6131 		/* collect page 83 data if available */
6132 		if (un->un_vpd_page_mask & SD_VPD_DEVID_WWN_PG) {
6133 			mutex_exit(SD_MUTEX(un));
6134 			inq83 = kmem_zalloc(inq83_len, KM_SLEEP);
6135 			rval = sd_send_scsi_INQUIRY(un, inq83, inq83_len,
6136 			    0x01, 0x83, &inq83_resid);
6137 
6138 			if (rval != 0) {
6139 				kmem_free(inq83, inq83_len);
6140 				inq83 = NULL;
6141 				inq83_len = 0;
6142 			}
6143 			mutex_enter(SD_MUTEX(un));
6144 		}
6145 	}
6146 
6147 	/* encode best devid possible based on data available */
6148 	if (ddi_devid_scsi_encode(DEVID_SCSI_ENCODE_VERSION_LATEST,
6149 	    (char *)ddi_driver_name(SD_DEVINFO(un)),
6150 	    (uchar_t *)SD_INQUIRY(un), sizeof (*SD_INQUIRY(un)),
6151 	    inq80, inq80_len - inq80_resid, inq83, inq83_len -
6152 	    inq83_resid, &un->un_devid) == DDI_SUCCESS) {
6153 
6154 		/* devid successfully encoded, register devid */
6155 		(void) ddi_devid_register(SD_DEVINFO(un), un->un_devid);
6156 
6157 	} else {
6158 		/*
6159 		 * Unable to encode a devid based on data available.
6160 		 * This is not a Sun qualified disk.  Older Sun disk
6161 		 * drives that have the SD_FAB_DEVID property
6162 		 * set in the disk_table and non Sun qualified
6163 		 * disks are treated in the same manner.  These
6164 		 * drives manage the devid's by storing them in
6165 		 * last 2 available sectors on the drive and
6166 		 * have them fabricated by the ddi layer by
6167 		 * calling ddi_devid_init and passing the
6168 		 * DEVID_FAB flag.
6169 		 * Create a fabricate devid only if there's no
6170 		 * fabricate devid existed.
6171 		 */
6172 		if (sd_get_devid(un) == EINVAL) {
6173 			(void) sd_create_devid(un);
6174 			un->un_f_opt_fab_devid = TRUE;
6175 		}
6176 
6177 		/* Register the devid if it exists */
6178 		if (un->un_devid != NULL) {
6179 			(void) ddi_devid_register(SD_DEVINFO(un),
6180 			    un->un_devid);
6181 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
6182 			    "sd_register_devid: devid fabricated using "
6183 			    "ddi framework\n");
6184 		}
6185 	}
6186 
6187 	/* clean up resources */
6188 	if (inq80 != NULL) {
6189 		kmem_free(inq80, inq80_len);
6190 	}
6191 	if (inq83 != NULL) {
6192 		kmem_free(inq83, inq83_len);
6193 	}
6194 }
6195 
6196 static daddr_t
6197 sd_get_devid_block(struct sd_lun *un)
6198 {
6199 	daddr_t			spc, blk, head, cyl;
6200 
6201 	if (un->un_blockcount <= DK_MAX_BLOCKS) {
6202 		/* this geometry doesn't allow us to write a devid */
6203 		if (un->un_g.dkg_acyl < 2) {
6204 			return (-1);
6205 		}
6206 
6207 		/*
6208 		 * Subtract 2 guarantees that the next to last cylinder
6209 		 * is used
6210 		 */
6211 		cyl  = un->un_g.dkg_ncyl  + un->un_g.dkg_acyl - 2;
6212 		spc  = un->un_g.dkg_nhead * un->un_g.dkg_nsect;
6213 		head = un->un_g.dkg_nhead - 1;
6214 		blk  = (cyl * (spc - un->un_g.dkg_apc)) +
6215 		    (head * un->un_g.dkg_nsect) + 1;
6216 	} else {
6217 		if (un->un_reserved != -1) {
6218 			blk = un->un_map[un->un_reserved].dkl_cylno + 1;
6219 		} else {
6220 			return (-1);
6221 		}
6222 	}
6223 	return (blk);
6224 }
6225 
6226 /*
6227  *    Function: sd_get_devid
6228  *
6229  * Description: This routine will return 0 if a valid device id has been
6230  *		obtained from the target and stored in the soft state. If a
6231  *		valid device id has not been previously read and stored, a
6232  *		read attempt will be made.
6233  *
6234  *   Arguments: un - driver soft state (unit) structure
6235  *
6236  * Return Code: 0 if we successfully get the device id
6237  *
6238  *     Context: Kernel Thread
6239  */
6240 
6241 static int
6242 sd_get_devid(struct sd_lun *un)
6243 {
6244 	struct dk_devid		*dkdevid;
6245 	ddi_devid_t		tmpid;
6246 	uint_t			*ip;
6247 	size_t			sz;
6248 	daddr_t			blk;
6249 	int			status;
6250 	int			chksum;
6251 	int			i;
6252 	size_t			buffer_size;
6253 
6254 	ASSERT(un != NULL);
6255 	ASSERT(mutex_owned(SD_MUTEX(un)));
6256 
6257 	SD_TRACE(SD_LOG_ATTACH_DETACH, un, "sd_get_devid: entry: un: 0x%p\n",
6258 	    un);
6259 
6260 	if (un->un_devid != NULL) {
6261 		return (0);
6262 	}
6263 
6264 	blk = sd_get_devid_block(un);
6265 	if (blk < 0)
6266 		return (EINVAL);
6267 
6268 	/*
6269 	 * Read and verify device id, stored in the reserved cylinders at the
6270 	 * end of the disk. Backup label is on the odd sectors of the last
6271 	 * track of the last cylinder. Device id will be on track of the next
6272 	 * to last cylinder.
6273 	 */
6274 	buffer_size = SD_REQBYTES2TGTBYTES(un, sizeof (struct dk_devid));
6275 	mutex_exit(SD_MUTEX(un));
6276 	dkdevid = kmem_alloc(buffer_size, KM_SLEEP);
6277 	status = sd_send_scsi_READ(un, dkdevid, buffer_size, blk,
6278 	    SD_PATH_DIRECT);
6279 	if (status != 0) {
6280 		goto error;
6281 	}
6282 
6283 	/* Validate the revision */
6284 	if ((dkdevid->dkd_rev_hi != DK_DEVID_REV_MSB) ||
6285 	    (dkdevid->dkd_rev_lo != DK_DEVID_REV_LSB)) {
6286 		status = EINVAL;
6287 		goto error;
6288 	}
6289 
6290 	/* Calculate the checksum */
6291 	chksum = 0;
6292 	ip = (uint_t *)dkdevid;
6293 	for (i = 0; i < ((un->un_sys_blocksize - sizeof (int))/sizeof (int));
6294 	    i++) {
6295 		chksum ^= ip[i];
6296 	}
6297 
6298 	/* Compare the checksums */
6299 	if (DKD_GETCHKSUM(dkdevid) != chksum) {
6300 		status = EINVAL;
6301 		goto error;
6302 	}
6303 
6304 	/* Validate the device id */
6305 	if (ddi_devid_valid((ddi_devid_t)&dkdevid->dkd_devid) != DDI_SUCCESS) {
6306 		status = EINVAL;
6307 		goto error;
6308 	}
6309 
6310 	/*
6311 	 * Store the device id in the driver soft state
6312 	 */
6313 	sz = ddi_devid_sizeof((ddi_devid_t)&dkdevid->dkd_devid);
6314 	tmpid = kmem_alloc(sz, KM_SLEEP);
6315 
6316 	mutex_enter(SD_MUTEX(un));
6317 
6318 	un->un_devid = tmpid;
6319 	bcopy(&dkdevid->dkd_devid, un->un_devid, sz);
6320 
6321 	kmem_free(dkdevid, buffer_size);
6322 
6323 	SD_TRACE(SD_LOG_ATTACH_DETACH, un, "sd_get_devid: exit: un:0x%p\n", un);
6324 
6325 	return (status);
6326 error:
6327 	mutex_enter(SD_MUTEX(un));
6328 	kmem_free(dkdevid, buffer_size);
6329 	return (status);
6330 }
6331 
6332 
6333 /*
6334  *    Function: sd_create_devid
6335  *
6336  * Description: This routine will fabricate the device id and write it
6337  *		to the disk.
6338  *
6339  *   Arguments: un - driver soft state (unit) structure
6340  *
6341  * Return Code: value of the fabricated device id
6342  *
6343  *     Context: Kernel Thread
6344  */
6345 
6346 static ddi_devid_t
6347 sd_create_devid(struct sd_lun *un)
6348 {
6349 	ASSERT(un != NULL);
6350 
6351 	/* Fabricate the devid */
6352 	if (ddi_devid_init(SD_DEVINFO(un), DEVID_FAB, 0, NULL, &un->un_devid)
6353 	    == DDI_FAILURE) {
6354 		return (NULL);
6355 	}
6356 
6357 	/* Write the devid to disk */
6358 	if (sd_write_deviceid(un) != 0) {
6359 		ddi_devid_free(un->un_devid);
6360 		un->un_devid = NULL;
6361 	}
6362 
6363 	return (un->un_devid);
6364 }
6365 
6366 
6367 /*
6368  *    Function: sd_write_deviceid
6369  *
6370  * Description: This routine will write the device id to the disk
6371  *		reserved sector.
6372  *
6373  *   Arguments: un - driver soft state (unit) structure
6374  *
6375  * Return Code: EINVAL
6376  *		value returned by sd_send_scsi_cmd
6377  *
6378  *     Context: Kernel Thread
6379  */
6380 
6381 static int
6382 sd_write_deviceid(struct sd_lun *un)
6383 {
6384 	struct dk_devid		*dkdevid;
6385 	daddr_t			blk;
6386 	uint_t			*ip, chksum;
6387 	int			status;
6388 	int			i;
6389 
6390 	ASSERT(mutex_owned(SD_MUTEX(un)));
6391 
6392 	blk = sd_get_devid_block(un);
6393 	if (blk < 0)
6394 		return (-1);
6395 	mutex_exit(SD_MUTEX(un));
6396 
6397 	/* Allocate the buffer */
6398 	dkdevid = kmem_zalloc(un->un_sys_blocksize, KM_SLEEP);
6399 
6400 	/* Fill in the revision */
6401 	dkdevid->dkd_rev_hi = DK_DEVID_REV_MSB;
6402 	dkdevid->dkd_rev_lo = DK_DEVID_REV_LSB;
6403 
6404 	/* Copy in the device id */
6405 	mutex_enter(SD_MUTEX(un));
6406 	bcopy(un->un_devid, &dkdevid->dkd_devid,
6407 	    ddi_devid_sizeof(un->un_devid));
6408 	mutex_exit(SD_MUTEX(un));
6409 
6410 	/* Calculate the checksum */
6411 	chksum = 0;
6412 	ip = (uint_t *)dkdevid;
6413 	for (i = 0; i < ((un->un_sys_blocksize - sizeof (int))/sizeof (int));
6414 	    i++) {
6415 		chksum ^= ip[i];
6416 	}
6417 
6418 	/* Fill-in checksum */
6419 	DKD_FORMCHKSUM(chksum, dkdevid);
6420 
6421 	/* Write the reserved sector */
6422 	status = sd_send_scsi_WRITE(un, dkdevid, un->un_sys_blocksize, blk,
6423 	    SD_PATH_DIRECT);
6424 
6425 	kmem_free(dkdevid, un->un_sys_blocksize);
6426 
6427 	mutex_enter(SD_MUTEX(un));
6428 	return (status);
6429 }
6430 
6431 
6432 /*
6433  *    Function: sd_check_vpd_page_support
6434  *
6435  * Description: This routine sends an inquiry command with the EVPD bit set and
6436  *		a page code of 0x00 to the device. It is used to determine which
6437  *		vital product pages are availible to find the devid. We are
6438  *		looking for pages 0x83 or 0x80.  If we return a negative 1, the
6439  *		device does not support that command.
6440  *
6441  *   Arguments: un  - driver soft state (unit) structure
6442  *
6443  * Return Code: 0 - success
6444  *		1 - check condition
6445  *
6446  *     Context: This routine can sleep.
6447  */
6448 
6449 static int
6450 sd_check_vpd_page_support(struct sd_lun *un)
6451 {
6452 	uchar_t	*page_list	= NULL;
6453 	uchar_t	page_length	= 0xff;	/* Use max possible length */
6454 	uchar_t	evpd		= 0x01;	/* Set the EVPD bit */
6455 	uchar_t	page_code	= 0x00;	/* Supported VPD Pages */
6456 	int    	rval		= 0;
6457 	int	counter;
6458 
6459 	ASSERT(un != NULL);
6460 	ASSERT(mutex_owned(SD_MUTEX(un)));
6461 
6462 	mutex_exit(SD_MUTEX(un));
6463 
6464 	/*
6465 	 * We'll set the page length to the maximum to save figuring it out
6466 	 * with an additional call.
6467 	 */
6468 	page_list =  kmem_zalloc(page_length, KM_SLEEP);
6469 
6470 	rval = sd_send_scsi_INQUIRY(un, page_list, page_length, evpd,
6471 	    page_code, NULL);
6472 
6473 	mutex_enter(SD_MUTEX(un));
6474 
6475 	/*
6476 	 * Now we must validate that the device accepted the command, as some
6477 	 * drives do not support it.  If the drive does support it, we will
6478 	 * return 0, and the supported pages will be in un_vpd_page_mask.  If
6479 	 * not, we return -1.
6480 	 */
6481 	if ((rval == 0) && (page_list[VPD_MODE_PAGE] == 0x00)) {
6482 		/* Loop to find one of the 2 pages we need */
6483 		counter = 4;  /* Supported pages start at byte 4, with 0x00 */
6484 
6485 		/*
6486 		 * Pages are returned in ascending order, and 0x83 is what we
6487 		 * are hoping for.
6488 		 */
6489 		while ((page_list[counter] <= 0x83) &&
6490 		    (counter <= (page_list[VPD_PAGE_LENGTH] +
6491 		    VPD_HEAD_OFFSET))) {
6492 			/*
6493 			 * Add 3 because page_list[3] is the number of
6494 			 * pages minus 3
6495 			 */
6496 
6497 			switch (page_list[counter]) {
6498 			case 0x00:
6499 				un->un_vpd_page_mask |= SD_VPD_SUPPORTED_PG;
6500 				break;
6501 			case 0x80:
6502 				un->un_vpd_page_mask |= SD_VPD_UNIT_SERIAL_PG;
6503 				break;
6504 			case 0x81:
6505 				un->un_vpd_page_mask |= SD_VPD_OPERATING_PG;
6506 				break;
6507 			case 0x82:
6508 				un->un_vpd_page_mask |= SD_VPD_ASCII_OP_PG;
6509 				break;
6510 			case 0x83:
6511 				un->un_vpd_page_mask |= SD_VPD_DEVID_WWN_PG;
6512 				break;
6513 			}
6514 			counter++;
6515 		}
6516 
6517 	} else {
6518 		rval = -1;
6519 
6520 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
6521 		    "sd_check_vpd_page_support: This drive does not implement "
6522 		    "VPD pages.\n");
6523 	}
6524 
6525 	kmem_free(page_list, page_length);
6526 
6527 	return (rval);
6528 }
6529 
6530 
6531 /*
6532  *    Function: sd_setup_pm
6533  *
6534  * Description: Initialize Power Management on the device
6535  *
6536  *     Context: Kernel Thread
6537  */
6538 
6539 static void
6540 sd_setup_pm(struct sd_lun *un, dev_info_t *devi)
6541 {
6542 	uint_t	log_page_size;
6543 	uchar_t	*log_page_data;
6544 	int	rval;
6545 
6546 	/*
6547 	 * Since we are called from attach, holding a mutex for
6548 	 * un is unnecessary. Because some of the routines called
6549 	 * from here require SD_MUTEX to not be held, assert this
6550 	 * right up front.
6551 	 */
6552 	ASSERT(!mutex_owned(SD_MUTEX(un)));
6553 	/*
6554 	 * Since the sd device does not have the 'reg' property,
6555 	 * cpr will not call its DDI_SUSPEND/DDI_RESUME entries.
6556 	 * The following code is to tell cpr that this device
6557 	 * DOES need to be suspended and resumed.
6558 	 */
6559 	(void) ddi_prop_update_string(DDI_DEV_T_NONE, devi,
6560 	    "pm-hardware-state", "needs-suspend-resume");
6561 
6562 	/*
6563 	 * This complies with the new power management framework
6564 	 * for certain desktop machines. Create the pm_components
6565 	 * property as a string array property.
6566 	 */
6567 	if (un->un_f_pm_supported) {
6568 		/*
6569 		 * not all devices have a motor, try it first.
6570 		 * some devices may return ILLEGAL REQUEST, some
6571 		 * will hang
6572 		 * The following START_STOP_UNIT is used to check if target
6573 		 * device has a motor.
6574 		 */
6575 		un->un_f_start_stop_supported = TRUE;
6576 		if (sd_send_scsi_START_STOP_UNIT(un, SD_TARGET_START,
6577 		    SD_PATH_DIRECT) != 0) {
6578 			un->un_f_start_stop_supported = FALSE;
6579 		}
6580 
6581 		/*
6582 		 * create pm properties anyways otherwise the parent can't
6583 		 * go to sleep
6584 		 */
6585 		(void) sd_create_pm_components(devi, un);
6586 		un->un_f_pm_is_enabled = TRUE;
6587 		return;
6588 	}
6589 
6590 	if (!un->un_f_log_sense_supported) {
6591 		un->un_power_level = SD_SPINDLE_ON;
6592 		un->un_f_pm_is_enabled = FALSE;
6593 		return;
6594 	}
6595 
6596 	rval = sd_log_page_supported(un, START_STOP_CYCLE_PAGE);
6597 
6598 #ifdef	SDDEBUG
6599 	if (sd_force_pm_supported) {
6600 		/* Force a successful result */
6601 		rval = 1;
6602 	}
6603 #endif
6604 
6605 	/*
6606 	 * If the start-stop cycle counter log page is not supported
6607 	 * or if the pm-capable property is SD_PM_CAPABLE_FALSE (0)
6608 	 * then we should not create the pm_components property.
6609 	 */
6610 	if (rval == -1) {
6611 		/*
6612 		 * Error.
6613 		 * Reading log sense failed, most likely this is
6614 		 * an older drive that does not support log sense.
6615 		 * If this fails auto-pm is not supported.
6616 		 */
6617 		un->un_power_level = SD_SPINDLE_ON;
6618 		un->un_f_pm_is_enabled = FALSE;
6619 
6620 	} else if (rval == 0) {
6621 		/*
6622 		 * Page not found.
6623 		 * The start stop cycle counter is implemented as page
6624 		 * START_STOP_CYCLE_PAGE_VU_PAGE (0x31) in older disks. For
6625 		 * newer disks it is implemented as START_STOP_CYCLE_PAGE (0xE).
6626 		 */
6627 		if (sd_log_page_supported(un, START_STOP_CYCLE_VU_PAGE) == 1) {
6628 			/*
6629 			 * Page found, use this one.
6630 			 */
6631 			un->un_start_stop_cycle_page = START_STOP_CYCLE_VU_PAGE;
6632 			un->un_f_pm_is_enabled = TRUE;
6633 		} else {
6634 			/*
6635 			 * Error or page not found.
6636 			 * auto-pm is not supported for this device.
6637 			 */
6638 			un->un_power_level = SD_SPINDLE_ON;
6639 			un->un_f_pm_is_enabled = FALSE;
6640 		}
6641 	} else {
6642 		/*
6643 		 * Page found, use it.
6644 		 */
6645 		un->un_start_stop_cycle_page = START_STOP_CYCLE_PAGE;
6646 		un->un_f_pm_is_enabled = TRUE;
6647 	}
6648 
6649 
6650 	if (un->un_f_pm_is_enabled == TRUE) {
6651 		log_page_size = START_STOP_CYCLE_COUNTER_PAGE_SIZE;
6652 		log_page_data = kmem_zalloc(log_page_size, KM_SLEEP);
6653 
6654 		rval = sd_send_scsi_LOG_SENSE(un, log_page_data,
6655 		    log_page_size, un->un_start_stop_cycle_page,
6656 		    0x01, 0, SD_PATH_DIRECT);
6657 #ifdef	SDDEBUG
6658 		if (sd_force_pm_supported) {
6659 			/* Force a successful result */
6660 			rval = 0;
6661 		}
6662 #endif
6663 
6664 		/*
6665 		 * If the Log sense for Page( Start/stop cycle counter page)
6666 		 * succeeds, then power managment is supported and we can
6667 		 * enable auto-pm.
6668 		 */
6669 		if (rval == 0)  {
6670 			(void) sd_create_pm_components(devi, un);
6671 		} else {
6672 			un->un_power_level = SD_SPINDLE_ON;
6673 			un->un_f_pm_is_enabled = FALSE;
6674 		}
6675 
6676 		kmem_free(log_page_data, log_page_size);
6677 	}
6678 }
6679 
6680 
6681 /*
6682  *    Function: sd_create_pm_components
6683  *
6684  * Description: Initialize PM property.
6685  *
6686  *     Context: Kernel thread context
6687  */
6688 
6689 static void
6690 sd_create_pm_components(dev_info_t *devi, struct sd_lun *un)
6691 {
6692 	char *pm_comp[] = { "NAME=spindle-motor", "0=off", "1=on", NULL };
6693 
6694 	ASSERT(!mutex_owned(SD_MUTEX(un)));
6695 
6696 	if (ddi_prop_update_string_array(DDI_DEV_T_NONE, devi,
6697 	    "pm-components", pm_comp, 3) == DDI_PROP_SUCCESS) {
6698 		/*
6699 		 * When components are initially created they are idle,
6700 		 * power up any non-removables.
6701 		 * Note: the return value of pm_raise_power can't be used
6702 		 * for determining if PM should be enabled for this device.
6703 		 * Even if you check the return values and remove this
6704 		 * property created above, the PM framework will not honor the
6705 		 * change after the first call to pm_raise_power. Hence,
6706 		 * removal of that property does not help if pm_raise_power
6707 		 * fails. In the case of removable media, the start/stop
6708 		 * will fail if the media is not present.
6709 		 */
6710 		if (un->un_f_attach_spinup && (pm_raise_power(SD_DEVINFO(un), 0,
6711 		    SD_SPINDLE_ON) == DDI_SUCCESS)) {
6712 			mutex_enter(SD_MUTEX(un));
6713 			un->un_power_level = SD_SPINDLE_ON;
6714 			mutex_enter(&un->un_pm_mutex);
6715 			/* Set to on and not busy. */
6716 			un->un_pm_count = 0;
6717 		} else {
6718 			mutex_enter(SD_MUTEX(un));
6719 			un->un_power_level = SD_SPINDLE_OFF;
6720 			mutex_enter(&un->un_pm_mutex);
6721 			/* Set to off. */
6722 			un->un_pm_count = -1;
6723 		}
6724 		mutex_exit(&un->un_pm_mutex);
6725 		mutex_exit(SD_MUTEX(un));
6726 	} else {
6727 		un->un_power_level = SD_SPINDLE_ON;
6728 		un->un_f_pm_is_enabled = FALSE;
6729 	}
6730 }
6731 
6732 
6733 /*
6734  *    Function: sd_ddi_suspend
6735  *
6736  * Description: Performs system power-down operations. This includes
6737  *		setting the drive state to indicate its suspended so
6738  *		that no new commands will be accepted. Also, wait for
6739  *		all commands that are in transport or queued to a timer
6740  *		for retry to complete. All timeout threads are cancelled.
6741  *
6742  * Return Code: DDI_FAILURE or DDI_SUCCESS
6743  *
6744  *     Context: Kernel thread context
6745  */
6746 
6747 static int
6748 sd_ddi_suspend(dev_info_t *devi)
6749 {
6750 	struct	sd_lun	*un;
6751 	clock_t		wait_cmds_complete;
6752 
6753 	un = ddi_get_soft_state(sd_state, ddi_get_instance(devi));
6754 	if (un == NULL) {
6755 		return (DDI_FAILURE);
6756 	}
6757 
6758 	SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: entry\n");
6759 
6760 	mutex_enter(SD_MUTEX(un));
6761 
6762 	/* Return success if the device is already suspended. */
6763 	if (un->un_state == SD_STATE_SUSPENDED) {
6764 		mutex_exit(SD_MUTEX(un));
6765 		SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: "
6766 		    "device already suspended, exiting\n");
6767 		return (DDI_SUCCESS);
6768 	}
6769 
6770 	/* Return failure if the device is being used by HA */
6771 	if (un->un_resvd_status &
6772 	    (SD_RESERVE | SD_WANT_RESERVE | SD_LOST_RESERVE)) {
6773 		mutex_exit(SD_MUTEX(un));
6774 		SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: "
6775 		    "device in use by HA, exiting\n");
6776 		return (DDI_FAILURE);
6777 	}
6778 
6779 	/*
6780 	 * Return failure if the device is in a resource wait
6781 	 * or power changing state.
6782 	 */
6783 	if ((un->un_state == SD_STATE_RWAIT) ||
6784 	    (un->un_state == SD_STATE_PM_CHANGING)) {
6785 		mutex_exit(SD_MUTEX(un));
6786 		SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: "
6787 		    "device in resource wait state, exiting\n");
6788 		return (DDI_FAILURE);
6789 	}
6790 
6791 
6792 	un->un_save_state = un->un_last_state;
6793 	New_state(un, SD_STATE_SUSPENDED);
6794 
6795 	/*
6796 	 * Wait for all commands that are in transport or queued to a timer
6797 	 * for retry to complete.
6798 	 *
6799 	 * While waiting, no new commands will be accepted or sent because of
6800 	 * the new state we set above.
6801 	 *
6802 	 * Wait till current operation has completed. If we are in the resource
6803 	 * wait state (with an intr outstanding) then we need to wait till the
6804 	 * intr completes and starts the next cmd. We want to wait for
6805 	 * SD_WAIT_CMDS_COMPLETE seconds before failing the DDI_SUSPEND.
6806 	 */
6807 	wait_cmds_complete = ddi_get_lbolt() +
6808 	    (sd_wait_cmds_complete * drv_usectohz(1000000));
6809 
6810 	while (un->un_ncmds_in_transport != 0) {
6811 		/*
6812 		 * Fail if commands do not finish in the specified time.
6813 		 */
6814 		if (cv_timedwait(&un->un_disk_busy_cv, SD_MUTEX(un),
6815 		    wait_cmds_complete) == -1) {
6816 			/*
6817 			 * Undo the state changes made above. Everything
6818 			 * must go back to it's original value.
6819 			 */
6820 			Restore_state(un);
6821 			un->un_last_state = un->un_save_state;
6822 			/* Wake up any threads that might be waiting. */
6823 			cv_broadcast(&un->un_suspend_cv);
6824 			mutex_exit(SD_MUTEX(un));
6825 			SD_ERROR(SD_LOG_IO_PM, un,
6826 			    "sd_ddi_suspend: failed due to outstanding cmds\n");
6827 			SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: exiting\n");
6828 			return (DDI_FAILURE);
6829 		}
6830 	}
6831 
6832 	/*
6833 	 * Cancel SCSI watch thread and timeouts, if any are active
6834 	 */
6835 
6836 	if (SD_OK_TO_SUSPEND_SCSI_WATCHER(un)) {
6837 		opaque_t temp_token = un->un_swr_token;
6838 		mutex_exit(SD_MUTEX(un));
6839 		scsi_watch_suspend(temp_token);
6840 		mutex_enter(SD_MUTEX(un));
6841 	}
6842 
6843 	if (un->un_reset_throttle_timeid != NULL) {
6844 		timeout_id_t temp_id = un->un_reset_throttle_timeid;
6845 		un->un_reset_throttle_timeid = NULL;
6846 		mutex_exit(SD_MUTEX(un));
6847 		(void) untimeout(temp_id);
6848 		mutex_enter(SD_MUTEX(un));
6849 	}
6850 
6851 	if (un->un_dcvb_timeid != NULL) {
6852 		timeout_id_t temp_id = un->un_dcvb_timeid;
6853 		un->un_dcvb_timeid = NULL;
6854 		mutex_exit(SD_MUTEX(un));
6855 		(void) untimeout(temp_id);
6856 		mutex_enter(SD_MUTEX(un));
6857 	}
6858 
6859 	mutex_enter(&un->un_pm_mutex);
6860 	if (un->un_pm_timeid != NULL) {
6861 		timeout_id_t temp_id = un->un_pm_timeid;
6862 		un->un_pm_timeid = NULL;
6863 		mutex_exit(&un->un_pm_mutex);
6864 		mutex_exit(SD_MUTEX(un));
6865 		(void) untimeout(temp_id);
6866 		mutex_enter(SD_MUTEX(un));
6867 	} else {
6868 		mutex_exit(&un->un_pm_mutex);
6869 	}
6870 
6871 	if (un->un_retry_timeid != NULL) {
6872 		timeout_id_t temp_id = un->un_retry_timeid;
6873 		un->un_retry_timeid = NULL;
6874 		mutex_exit(SD_MUTEX(un));
6875 		(void) untimeout(temp_id);
6876 		mutex_enter(SD_MUTEX(un));
6877 	}
6878 
6879 	if (un->un_direct_priority_timeid != NULL) {
6880 		timeout_id_t temp_id = un->un_direct_priority_timeid;
6881 		un->un_direct_priority_timeid = NULL;
6882 		mutex_exit(SD_MUTEX(un));
6883 		(void) untimeout(temp_id);
6884 		mutex_enter(SD_MUTEX(un));
6885 	}
6886 
6887 	if (un->un_f_is_fibre == TRUE) {
6888 		/*
6889 		 * Remove callbacks for insert and remove events
6890 		 */
6891 		if (un->un_insert_event != NULL) {
6892 			mutex_exit(SD_MUTEX(un));
6893 			(void) ddi_remove_event_handler(un->un_insert_cb_id);
6894 			mutex_enter(SD_MUTEX(un));
6895 			un->un_insert_event = NULL;
6896 		}
6897 
6898 		if (un->un_remove_event != NULL) {
6899 			mutex_exit(SD_MUTEX(un));
6900 			(void) ddi_remove_event_handler(un->un_remove_cb_id);
6901 			mutex_enter(SD_MUTEX(un));
6902 			un->un_remove_event = NULL;
6903 		}
6904 	}
6905 
6906 	mutex_exit(SD_MUTEX(un));
6907 
6908 	SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: exit\n");
6909 
6910 	return (DDI_SUCCESS);
6911 }
6912 
6913 
6914 /*
6915  *    Function: sd_ddi_pm_suspend
6916  *
6917  * Description: Set the drive state to low power.
6918  *		Someone else is required to actually change the drive
6919  *		power level.
6920  *
6921  *   Arguments: un - driver soft state (unit) structure
6922  *
6923  * Return Code: DDI_FAILURE or DDI_SUCCESS
6924  *
6925  *     Context: Kernel thread context
6926  */
6927 
6928 static int
6929 sd_ddi_pm_suspend(struct sd_lun *un)
6930 {
6931 	ASSERT(un != NULL);
6932 	SD_TRACE(SD_LOG_POWER, un, "sd_ddi_pm_suspend: entry\n");
6933 
6934 	ASSERT(!mutex_owned(SD_MUTEX(un)));
6935 	mutex_enter(SD_MUTEX(un));
6936 
6937 	/*
6938 	 * Exit if power management is not enabled for this device, or if
6939 	 * the device is being used by HA.
6940 	 */
6941 	if ((un->un_f_pm_is_enabled == FALSE) || (un->un_resvd_status &
6942 	    (SD_RESERVE | SD_WANT_RESERVE | SD_LOST_RESERVE))) {
6943 		mutex_exit(SD_MUTEX(un));
6944 		SD_TRACE(SD_LOG_POWER, un, "sd_ddi_pm_suspend: exiting\n");
6945 		return (DDI_SUCCESS);
6946 	}
6947 
6948 	SD_INFO(SD_LOG_POWER, un, "sd_ddi_pm_suspend: un_ncmds_in_driver=%ld\n",
6949 	    un->un_ncmds_in_driver);
6950 
6951 	/*
6952 	 * See if the device is not busy, ie.:
6953 	 *    - we have no commands in the driver for this device
6954 	 *    - not waiting for resources
6955 	 */
6956 	if ((un->un_ncmds_in_driver == 0) &&
6957 	    (un->un_state != SD_STATE_RWAIT)) {
6958 		/*
6959 		 * The device is not busy, so it is OK to go to low power state.
6960 		 * Indicate low power, but rely on someone else to actually
6961 		 * change it.
6962 		 */
6963 		mutex_enter(&un->un_pm_mutex);
6964 		un->un_pm_count = -1;
6965 		mutex_exit(&un->un_pm_mutex);
6966 		un->un_power_level = SD_SPINDLE_OFF;
6967 	}
6968 
6969 	mutex_exit(SD_MUTEX(un));
6970 
6971 	SD_TRACE(SD_LOG_POWER, un, "sd_ddi_pm_suspend: exit\n");
6972 
6973 	return (DDI_SUCCESS);
6974 }
6975 
6976 
6977 /*
6978  *    Function: sd_ddi_resume
6979  *
6980  * Description: Performs system power-up operations..
6981  *
6982  * Return Code: DDI_SUCCESS
6983  *		DDI_FAILURE
6984  *
6985  *     Context: Kernel thread context
6986  */
6987 
6988 static int
6989 sd_ddi_resume(dev_info_t *devi)
6990 {
6991 	struct	sd_lun	*un;
6992 
6993 	un = ddi_get_soft_state(sd_state, ddi_get_instance(devi));
6994 	if (un == NULL) {
6995 		return (DDI_FAILURE);
6996 	}
6997 
6998 	SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_resume: entry\n");
6999 
7000 	mutex_enter(SD_MUTEX(un));
7001 	Restore_state(un);
7002 
7003 	/*
7004 	 * Restore the state which was saved to give the
7005 	 * the right state in un_last_state
7006 	 */
7007 	un->un_last_state = un->un_save_state;
7008 	/*
7009 	 * Note: throttle comes back at full.
7010 	 * Also note: this MUST be done before calling pm_raise_power
7011 	 * otherwise the system can get hung in biowait. The scenario where
7012 	 * this'll happen is under cpr suspend. Writing of the system
7013 	 * state goes through sddump, which writes 0 to un_throttle. If
7014 	 * writing the system state then fails, example if the partition is
7015 	 * too small, then cpr attempts a resume. If throttle isn't restored
7016 	 * from the saved value until after calling pm_raise_power then
7017 	 * cmds sent in sdpower are not transported and sd_send_scsi_cmd hangs
7018 	 * in biowait.
7019 	 */
7020 	un->un_throttle = un->un_saved_throttle;
7021 
7022 	/*
7023 	 * The chance of failure is very rare as the only command done in power
7024 	 * entry point is START command when you transition from 0->1 or
7025 	 * unknown->1. Put it to SPINDLE ON state irrespective of the state at
7026 	 * which suspend was done. Ignore the return value as the resume should
7027 	 * not be failed. In the case of removable media the media need not be
7028 	 * inserted and hence there is a chance that raise power will fail with
7029 	 * media not present.
7030 	 */
7031 	if (un->un_f_attach_spinup) {
7032 		mutex_exit(SD_MUTEX(un));
7033 		(void) pm_raise_power(SD_DEVINFO(un), 0, SD_SPINDLE_ON);
7034 		mutex_enter(SD_MUTEX(un));
7035 	}
7036 
7037 	/*
7038 	 * Don't broadcast to the suspend cv and therefore possibly
7039 	 * start I/O until after power has been restored.
7040 	 */
7041 	cv_broadcast(&un->un_suspend_cv);
7042 	cv_broadcast(&un->un_state_cv);
7043 
7044 	/* restart thread */
7045 	if (SD_OK_TO_RESUME_SCSI_WATCHER(un)) {
7046 		scsi_watch_resume(un->un_swr_token);
7047 	}
7048 
7049 #if (defined(__fibre))
7050 	if (un->un_f_is_fibre == TRUE) {
7051 		/*
7052 		 * Add callbacks for insert and remove events
7053 		 */
7054 		if (strcmp(un->un_node_type, DDI_NT_BLOCK_CHAN)) {
7055 			sd_init_event_callbacks(un);
7056 		}
7057 	}
7058 #endif
7059 
7060 	/*
7061 	 * Transport any pending commands to the target.
7062 	 *
7063 	 * If this is a low-activity device commands in queue will have to wait
7064 	 * until new commands come in, which may take awhile. Also, we
7065 	 * specifically don't check un_ncmds_in_transport because we know that
7066 	 * there really are no commands in progress after the unit was
7067 	 * suspended and we could have reached the throttle level, been
7068 	 * suspended, and have no new commands coming in for awhile. Highly
7069 	 * unlikely, but so is the low-activity disk scenario.
7070 	 */
7071 	ddi_xbuf_dispatch(un->un_xbuf_attr);
7072 
7073 	sd_start_cmds(un, NULL);
7074 	mutex_exit(SD_MUTEX(un));
7075 
7076 	SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_resume: exit\n");
7077 
7078 	return (DDI_SUCCESS);
7079 }
7080 
7081 
7082 /*
7083  *    Function: sd_ddi_pm_resume
7084  *
7085  * Description: Set the drive state to powered on.
7086  *		Someone else is required to actually change the drive
7087  *		power level.
7088  *
7089  *   Arguments: un - driver soft state (unit) structure
7090  *
7091  * Return Code: DDI_SUCCESS
7092  *
7093  *     Context: Kernel thread context
7094  */
7095 
7096 static int
7097 sd_ddi_pm_resume(struct sd_lun *un)
7098 {
7099 	ASSERT(un != NULL);
7100 
7101 	ASSERT(!mutex_owned(SD_MUTEX(un)));
7102 	mutex_enter(SD_MUTEX(un));
7103 	un->un_power_level = SD_SPINDLE_ON;
7104 
7105 	ASSERT(!mutex_owned(&un->un_pm_mutex));
7106 	mutex_enter(&un->un_pm_mutex);
7107 	if (SD_DEVICE_IS_IN_LOW_POWER(un)) {
7108 		un->un_pm_count++;
7109 		ASSERT(un->un_pm_count == 0);
7110 		/*
7111 		 * Note: no longer do the cv_broadcast on un_suspend_cv. The
7112 		 * un_suspend_cv is for a system resume, not a power management
7113 		 * device resume. (4297749)
7114 		 *	 cv_broadcast(&un->un_suspend_cv);
7115 		 */
7116 	}
7117 	mutex_exit(&un->un_pm_mutex);
7118 	mutex_exit(SD_MUTEX(un));
7119 
7120 	return (DDI_SUCCESS);
7121 }
7122 
7123 
7124 /*
7125  *    Function: sd_pm_idletimeout_handler
7126  *
7127  * Description: A timer routine that's active only while a device is busy.
7128  *		The purpose is to extend slightly the pm framework's busy
7129  *		view of the device to prevent busy/idle thrashing for
7130  *		back-to-back commands. Do this by comparing the current time
7131  *		to the time at which the last command completed and when the
7132  *		difference is greater than sd_pm_idletime, call
7133  *		pm_idle_component. In addition to indicating idle to the pm
7134  *		framework, update the chain type to again use the internal pm
7135  *		layers of the driver.
7136  *
7137  *   Arguments: arg - driver soft state (unit) structure
7138  *
7139  *     Context: Executes in a timeout(9F) thread context
7140  */
7141 
7142 static void
7143 sd_pm_idletimeout_handler(void *arg)
7144 {
7145 	struct sd_lun *un = arg;
7146 
7147 	time_t	now;
7148 
7149 	mutex_enter(&sd_detach_mutex);
7150 	if (un->un_detach_count != 0) {
7151 		/* Abort if the instance is detaching */
7152 		mutex_exit(&sd_detach_mutex);
7153 		return;
7154 	}
7155 	mutex_exit(&sd_detach_mutex);
7156 
7157 	now = ddi_get_time();
7158 	/*
7159 	 * Grab both mutexes, in the proper order, since we're accessing
7160 	 * both PM and softstate variables.
7161 	 */
7162 	mutex_enter(SD_MUTEX(un));
7163 	mutex_enter(&un->un_pm_mutex);
7164 	if (((now - un->un_pm_idle_time) > sd_pm_idletime) &&
7165 	    (un->un_ncmds_in_driver == 0) && (un->un_pm_count == 0)) {
7166 		/*
7167 		 * Update the chain types.
7168 		 * This takes affect on the next new command received.
7169 		 */
7170 		if (un->un_f_non_devbsize_supported) {
7171 			un->un_buf_chain_type = SD_CHAIN_INFO_RMMEDIA;
7172 		} else {
7173 			un->un_buf_chain_type = SD_CHAIN_INFO_DISK;
7174 		}
7175 		un->un_uscsi_chain_type  = SD_CHAIN_INFO_USCSI_CMD;
7176 
7177 		SD_TRACE(SD_LOG_IO_PM, un,
7178 		    "sd_pm_idletimeout_handler: idling device\n");
7179 		(void) pm_idle_component(SD_DEVINFO(un), 0);
7180 		un->un_pm_idle_timeid = NULL;
7181 	} else {
7182 		un->un_pm_idle_timeid =
7183 			timeout(sd_pm_idletimeout_handler, un,
7184 			(drv_usectohz((clock_t)300000))); /* 300 ms. */
7185 	}
7186 	mutex_exit(&un->un_pm_mutex);
7187 	mutex_exit(SD_MUTEX(un));
7188 }
7189 
7190 
7191 /*
7192  *    Function: sd_pm_timeout_handler
7193  *
7194  * Description: Callback to tell framework we are idle.
7195  *
7196  *     Context: timeout(9f) thread context.
7197  */
7198 
7199 static void
7200 sd_pm_timeout_handler(void *arg)
7201 {
7202 	struct sd_lun *un = arg;
7203 
7204 	(void) pm_idle_component(SD_DEVINFO(un), 0);
7205 	mutex_enter(&un->un_pm_mutex);
7206 	un->un_pm_timeid = NULL;
7207 	mutex_exit(&un->un_pm_mutex);
7208 }
7209 
7210 
7211 /*
7212  *    Function: sdpower
7213  *
7214  * Description: PM entry point.
7215  *
7216  * Return Code: DDI_SUCCESS
7217  *		DDI_FAILURE
7218  *
7219  *     Context: Kernel thread context
7220  */
7221 
7222 static int
7223 sdpower(dev_info_t *devi, int component, int level)
7224 {
7225 	struct sd_lun	*un;
7226 	int		instance;
7227 	int		rval = DDI_SUCCESS;
7228 	uint_t		i, log_page_size, maxcycles, ncycles;
7229 	uchar_t		*log_page_data;
7230 	int		log_sense_page;
7231 	int		medium_present;
7232 	time_t		intvlp;
7233 	dev_t		dev;
7234 	struct pm_trans_data	sd_pm_tran_data;
7235 	uchar_t		save_state;
7236 	int		sval;
7237 	uchar_t		state_before_pm;
7238 	int		got_semaphore_here;
7239 
7240 	instance = ddi_get_instance(devi);
7241 
7242 	if (((un = ddi_get_soft_state(sd_state, instance)) == NULL) ||
7243 	    (SD_SPINDLE_OFF > level) || (level > SD_SPINDLE_ON) ||
7244 	    component != 0) {
7245 		return (DDI_FAILURE);
7246 	}
7247 
7248 	dev = sd_make_device(SD_DEVINFO(un));
7249 
7250 	SD_TRACE(SD_LOG_IO_PM, un, "sdpower: entry, level = %d\n", level);
7251 
7252 	/*
7253 	 * Must synchronize power down with close.
7254 	 * Attempt to decrement/acquire the open/close semaphore,
7255 	 * but do NOT wait on it. If it's not greater than zero,
7256 	 * ie. it can't be decremented without waiting, then
7257 	 * someone else, either open or close, already has it
7258 	 * and the try returns 0. Use that knowledge here to determine
7259 	 * if it's OK to change the device power level.
7260 	 * Also, only increment it on exit if it was decremented, ie. gotten,
7261 	 * here.
7262 	 */
7263 	got_semaphore_here = sema_tryp(&un->un_semoclose);
7264 
7265 	mutex_enter(SD_MUTEX(un));
7266 
7267 	SD_INFO(SD_LOG_POWER, un, "sdpower: un_ncmds_in_driver = %ld\n",
7268 	    un->un_ncmds_in_driver);
7269 
7270 	/*
7271 	 * If un_ncmds_in_driver is non-zero it indicates commands are
7272 	 * already being processed in the driver, or if the semaphore was
7273 	 * not gotten here it indicates an open or close is being processed.
7274 	 * At the same time somebody is requesting to go low power which
7275 	 * can't happen, therefore we need to return failure.
7276 	 */
7277 	if ((level == SD_SPINDLE_OFF) &&
7278 	    ((un->un_ncmds_in_driver != 0) || (got_semaphore_here == 0))) {
7279 		mutex_exit(SD_MUTEX(un));
7280 
7281 		if (got_semaphore_here != 0) {
7282 			sema_v(&un->un_semoclose);
7283 		}
7284 		SD_TRACE(SD_LOG_IO_PM, un,
7285 		    "sdpower: exit, device has queued cmds.\n");
7286 		return (DDI_FAILURE);
7287 	}
7288 
7289 	/*
7290 	 * if it is OFFLINE that means the disk is completely dead
7291 	 * in our case we have to put the disk in on or off by sending commands
7292 	 * Of course that will fail anyway so return back here.
7293 	 *
7294 	 * Power changes to a device that's OFFLINE or SUSPENDED
7295 	 * are not allowed.
7296 	 */
7297 	if ((un->un_state == SD_STATE_OFFLINE) ||
7298 	    (un->un_state == SD_STATE_SUSPENDED)) {
7299 		mutex_exit(SD_MUTEX(un));
7300 
7301 		if (got_semaphore_here != 0) {
7302 			sema_v(&un->un_semoclose);
7303 		}
7304 		SD_TRACE(SD_LOG_IO_PM, un,
7305 		    "sdpower: exit, device is off-line.\n");
7306 		return (DDI_FAILURE);
7307 	}
7308 
7309 	/*
7310 	 * Change the device's state to indicate it's power level
7311 	 * is being changed. Do this to prevent a power off in the
7312 	 * middle of commands, which is especially bad on devices
7313 	 * that are really powered off instead of just spun down.
7314 	 */
7315 	state_before_pm = un->un_state;
7316 	un->un_state = SD_STATE_PM_CHANGING;
7317 
7318 	mutex_exit(SD_MUTEX(un));
7319 
7320 	/*
7321 	 * If "pm-capable" property is set to TRUE by HBA drivers,
7322 	 * bypass the following checking, otherwise, check the log
7323 	 * sense information for this device
7324 	 */
7325 	if ((level == SD_SPINDLE_OFF) && un->un_f_log_sense_supported) {
7326 		/*
7327 		 * Get the log sense information to understand whether the
7328 		 * the powercycle counts have gone beyond the threshhold.
7329 		 */
7330 		log_page_size = START_STOP_CYCLE_COUNTER_PAGE_SIZE;
7331 		log_page_data = kmem_zalloc(log_page_size, KM_SLEEP);
7332 
7333 		mutex_enter(SD_MUTEX(un));
7334 		log_sense_page = un->un_start_stop_cycle_page;
7335 		mutex_exit(SD_MUTEX(un));
7336 
7337 		rval = sd_send_scsi_LOG_SENSE(un, log_page_data,
7338 		    log_page_size, log_sense_page, 0x01, 0, SD_PATH_DIRECT);
7339 #ifdef	SDDEBUG
7340 		if (sd_force_pm_supported) {
7341 			/* Force a successful result */
7342 			rval = 0;
7343 		}
7344 #endif
7345 		if (rval != 0) {
7346 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
7347 			    "Log Sense Failed\n");
7348 			kmem_free(log_page_data, log_page_size);
7349 			/* Cannot support power management on those drives */
7350 
7351 			if (got_semaphore_here != 0) {
7352 				sema_v(&un->un_semoclose);
7353 			}
7354 			/*
7355 			 * On exit put the state back to it's original value
7356 			 * and broadcast to anyone waiting for the power
7357 			 * change completion.
7358 			 */
7359 			mutex_enter(SD_MUTEX(un));
7360 			un->un_state = state_before_pm;
7361 			cv_broadcast(&un->un_suspend_cv);
7362 			mutex_exit(SD_MUTEX(un));
7363 			SD_TRACE(SD_LOG_IO_PM, un,
7364 			    "sdpower: exit, Log Sense Failed.\n");
7365 			return (DDI_FAILURE);
7366 		}
7367 
7368 		/*
7369 		 * From the page data - Convert the essential information to
7370 		 * pm_trans_data
7371 		 */
7372 		maxcycles =
7373 		    (log_page_data[0x1c] << 24) | (log_page_data[0x1d] << 16) |
7374 		    (log_page_data[0x1E] << 8)  | log_page_data[0x1F];
7375 
7376 		sd_pm_tran_data.un.scsi_cycles.lifemax = maxcycles;
7377 
7378 		ncycles =
7379 		    (log_page_data[0x24] << 24) | (log_page_data[0x25] << 16) |
7380 		    (log_page_data[0x26] << 8)  | log_page_data[0x27];
7381 
7382 		sd_pm_tran_data.un.scsi_cycles.ncycles = ncycles;
7383 
7384 		for (i = 0; i < DC_SCSI_MFR_LEN; i++) {
7385 			sd_pm_tran_data.un.scsi_cycles.svc_date[i] =
7386 			    log_page_data[8+i];
7387 		}
7388 
7389 		kmem_free(log_page_data, log_page_size);
7390 
7391 		/*
7392 		 * Call pm_trans_check routine to get the Ok from
7393 		 * the global policy
7394 		 */
7395 
7396 		sd_pm_tran_data.format = DC_SCSI_FORMAT;
7397 		sd_pm_tran_data.un.scsi_cycles.flag = 0;
7398 
7399 		rval = pm_trans_check(&sd_pm_tran_data, &intvlp);
7400 #ifdef	SDDEBUG
7401 		if (sd_force_pm_supported) {
7402 			/* Force a successful result */
7403 			rval = 1;
7404 		}
7405 #endif
7406 		switch (rval) {
7407 		case 0:
7408 			/*
7409 			 * Not Ok to Power cycle or error in parameters passed
7410 			 * Would have given the advised time to consider power
7411 			 * cycle. Based on the new intvlp parameter we are
7412 			 * supposed to pretend we are busy so that pm framework
7413 			 * will never call our power entry point. Because of
7414 			 * that install a timeout handler and wait for the
7415 			 * recommended time to elapse so that power management
7416 			 * can be effective again.
7417 			 *
7418 			 * To effect this behavior, call pm_busy_component to
7419 			 * indicate to the framework this device is busy.
7420 			 * By not adjusting un_pm_count the rest of PM in
7421 			 * the driver will function normally, and independant
7422 			 * of this but because the framework is told the device
7423 			 * is busy it won't attempt powering down until it gets
7424 			 * a matching idle. The timeout handler sends this.
7425 			 * Note: sd_pm_entry can't be called here to do this
7426 			 * because sdpower may have been called as a result
7427 			 * of a call to pm_raise_power from within sd_pm_entry.
7428 			 *
7429 			 * If a timeout handler is already active then
7430 			 * don't install another.
7431 			 */
7432 			mutex_enter(&un->un_pm_mutex);
7433 			if (un->un_pm_timeid == NULL) {
7434 				un->un_pm_timeid =
7435 				    timeout(sd_pm_timeout_handler,
7436 				    un, intvlp * drv_usectohz(1000000));
7437 				mutex_exit(&un->un_pm_mutex);
7438 				(void) pm_busy_component(SD_DEVINFO(un), 0);
7439 			} else {
7440 				mutex_exit(&un->un_pm_mutex);
7441 			}
7442 			if (got_semaphore_here != 0) {
7443 				sema_v(&un->un_semoclose);
7444 			}
7445 			/*
7446 			 * On exit put the state back to it's original value
7447 			 * and broadcast to anyone waiting for the power
7448 			 * change completion.
7449 			 */
7450 			mutex_enter(SD_MUTEX(un));
7451 			un->un_state = state_before_pm;
7452 			cv_broadcast(&un->un_suspend_cv);
7453 			mutex_exit(SD_MUTEX(un));
7454 
7455 			SD_TRACE(SD_LOG_IO_PM, un, "sdpower: exit, "
7456 			    "trans check Failed, not ok to power cycle.\n");
7457 			return (DDI_FAILURE);
7458 
7459 		case -1:
7460 			if (got_semaphore_here != 0) {
7461 				sema_v(&un->un_semoclose);
7462 			}
7463 			/*
7464 			 * On exit put the state back to it's original value
7465 			 * and broadcast to anyone waiting for the power
7466 			 * change completion.
7467 			 */
7468 			mutex_enter(SD_MUTEX(un));
7469 			un->un_state = state_before_pm;
7470 			cv_broadcast(&un->un_suspend_cv);
7471 			mutex_exit(SD_MUTEX(un));
7472 			SD_TRACE(SD_LOG_IO_PM, un,
7473 			    "sdpower: exit, trans check command Failed.\n");
7474 			return (DDI_FAILURE);
7475 		}
7476 	}
7477 
7478 	if (level == SD_SPINDLE_OFF) {
7479 		/*
7480 		 * Save the last state... if the STOP FAILS we need it
7481 		 * for restoring
7482 		 */
7483 		mutex_enter(SD_MUTEX(un));
7484 		save_state = un->un_last_state;
7485 		/*
7486 		 * There must not be any cmds. getting processed
7487 		 * in the driver when we get here. Power to the
7488 		 * device is potentially going off.
7489 		 */
7490 		ASSERT(un->un_ncmds_in_driver == 0);
7491 		mutex_exit(SD_MUTEX(un));
7492 
7493 		/*
7494 		 * For now suspend the device completely before spindle is
7495 		 * turned off
7496 		 */
7497 		if ((rval = sd_ddi_pm_suspend(un)) == DDI_FAILURE) {
7498 			if (got_semaphore_here != 0) {
7499 				sema_v(&un->un_semoclose);
7500 			}
7501 			/*
7502 			 * On exit put the state back to it's original value
7503 			 * and broadcast to anyone waiting for the power
7504 			 * change completion.
7505 			 */
7506 			mutex_enter(SD_MUTEX(un));
7507 			un->un_state = state_before_pm;
7508 			cv_broadcast(&un->un_suspend_cv);
7509 			mutex_exit(SD_MUTEX(un));
7510 			SD_TRACE(SD_LOG_IO_PM, un,
7511 			    "sdpower: exit, PM suspend Failed.\n");
7512 			return (DDI_FAILURE);
7513 		}
7514 	}
7515 
7516 	/*
7517 	 * The transition from SPINDLE_OFF to SPINDLE_ON can happen in open,
7518 	 * close, or strategy. Dump no long uses this routine, it uses it's
7519 	 * own code so it can be done in polled mode.
7520 	 */
7521 
7522 	medium_present = TRUE;
7523 
7524 	/*
7525 	 * When powering up, issue a TUR in case the device is at unit
7526 	 * attention.  Don't do retries. Bypass the PM layer, otherwise
7527 	 * a deadlock on un_pm_busy_cv will occur.
7528 	 */
7529 	if (level == SD_SPINDLE_ON) {
7530 		(void) sd_send_scsi_TEST_UNIT_READY(un,
7531 		    SD_DONT_RETRY_TUR | SD_BYPASS_PM);
7532 	}
7533 
7534 	SD_TRACE(SD_LOG_IO_PM, un, "sdpower: sending \'%s\' unit\n",
7535 	    ((level == SD_SPINDLE_ON) ? "START" : "STOP"));
7536 
7537 	sval = sd_send_scsi_START_STOP_UNIT(un,
7538 	    ((level == SD_SPINDLE_ON) ? SD_TARGET_START : SD_TARGET_STOP),
7539 	    SD_PATH_DIRECT);
7540 	/* Command failed, check for media present. */
7541 	if ((sval == ENXIO) && un->un_f_has_removable_media) {
7542 		medium_present = FALSE;
7543 	}
7544 
7545 	/*
7546 	 * The conditions of interest here are:
7547 	 *   if a spindle off with media present fails,
7548 	 *	then restore the state and return an error.
7549 	 *   else if a spindle on fails,
7550 	 *	then return an error (there's no state to restore).
7551 	 * In all other cases we setup for the new state
7552 	 * and return success.
7553 	 */
7554 	switch (level) {
7555 	case SD_SPINDLE_OFF:
7556 		if ((medium_present == TRUE) && (sval != 0)) {
7557 			/* The stop command from above failed */
7558 			rval = DDI_FAILURE;
7559 			/*
7560 			 * The stop command failed, and we have media
7561 			 * present. Put the level back by calling the
7562 			 * sd_pm_resume() and set the state back to
7563 			 * it's previous value.
7564 			 */
7565 			(void) sd_ddi_pm_resume(un);
7566 			mutex_enter(SD_MUTEX(un));
7567 			un->un_last_state = save_state;
7568 			mutex_exit(SD_MUTEX(un));
7569 			break;
7570 		}
7571 		/*
7572 		 * The stop command from above succeeded.
7573 		 */
7574 		if (un->un_f_monitor_media_state) {
7575 			/*
7576 			 * Terminate watch thread in case of removable media
7577 			 * devices going into low power state. This is as per
7578 			 * the requirements of pm framework, otherwise commands
7579 			 * will be generated for the device (through watch
7580 			 * thread), even when the device is in low power state.
7581 			 */
7582 			mutex_enter(SD_MUTEX(un));
7583 			un->un_f_watcht_stopped = FALSE;
7584 			if (un->un_swr_token != NULL) {
7585 				opaque_t temp_token = un->un_swr_token;
7586 				un->un_f_watcht_stopped = TRUE;
7587 				un->un_swr_token = NULL;
7588 				mutex_exit(SD_MUTEX(un));
7589 				(void) scsi_watch_request_terminate(temp_token,
7590 				    SCSI_WATCH_TERMINATE_WAIT);
7591 			} else {
7592 				mutex_exit(SD_MUTEX(un));
7593 			}
7594 		}
7595 		break;
7596 
7597 	default:	/* The level requested is spindle on... */
7598 		/*
7599 		 * Legacy behavior: return success on a failed spinup
7600 		 * if there is no media in the drive.
7601 		 * Do this by looking at medium_present here.
7602 		 */
7603 		if ((sval != 0) && medium_present) {
7604 			/* The start command from above failed */
7605 			rval = DDI_FAILURE;
7606 			break;
7607 		}
7608 		/*
7609 		 * The start command from above succeeded
7610 		 * Resume the devices now that we have
7611 		 * started the disks
7612 		 */
7613 		(void) sd_ddi_pm_resume(un);
7614 
7615 		/*
7616 		 * Resume the watch thread since it was suspended
7617 		 * when the device went into low power mode.
7618 		 */
7619 		if (un->un_f_monitor_media_state) {
7620 			mutex_enter(SD_MUTEX(un));
7621 			if (un->un_f_watcht_stopped == TRUE) {
7622 				opaque_t temp_token;
7623 
7624 				un->un_f_watcht_stopped = FALSE;
7625 				mutex_exit(SD_MUTEX(un));
7626 				temp_token = scsi_watch_request_submit(
7627 				    SD_SCSI_DEVP(un),
7628 				    sd_check_media_time,
7629 				    SENSE_LENGTH, sd_media_watch_cb,
7630 				    (caddr_t)dev);
7631 				mutex_enter(SD_MUTEX(un));
7632 				un->un_swr_token = temp_token;
7633 			}
7634 			mutex_exit(SD_MUTEX(un));
7635 		}
7636 	}
7637 	if (got_semaphore_here != 0) {
7638 		sema_v(&un->un_semoclose);
7639 	}
7640 	/*
7641 	 * On exit put the state back to it's original value
7642 	 * and broadcast to anyone waiting for the power
7643 	 * change completion.
7644 	 */
7645 	mutex_enter(SD_MUTEX(un));
7646 	un->un_state = state_before_pm;
7647 	cv_broadcast(&un->un_suspend_cv);
7648 	mutex_exit(SD_MUTEX(un));
7649 
7650 	SD_TRACE(SD_LOG_IO_PM, un, "sdpower: exit, status = 0x%x\n", rval);
7651 
7652 	return (rval);
7653 }
7654 
7655 
7656 
7657 /*
7658  *    Function: sdattach
7659  *
7660  * Description: Driver's attach(9e) entry point function.
7661  *
7662  *   Arguments: devi - opaque device info handle
7663  *		cmd  - attach  type
7664  *
7665  * Return Code: DDI_SUCCESS
7666  *		DDI_FAILURE
7667  *
7668  *     Context: Kernel thread context
7669  */
7670 
7671 static int
7672 sdattach(dev_info_t *devi, ddi_attach_cmd_t cmd)
7673 {
7674 	switch (cmd) {
7675 	case DDI_ATTACH:
7676 		return (sd_unit_attach(devi));
7677 	case DDI_RESUME:
7678 		return (sd_ddi_resume(devi));
7679 	default:
7680 		break;
7681 	}
7682 	return (DDI_FAILURE);
7683 }
7684 
7685 
7686 /*
7687  *    Function: sddetach
7688  *
7689  * Description: Driver's detach(9E) entry point function.
7690  *
7691  *   Arguments: devi - opaque device info handle
7692  *		cmd  - detach  type
7693  *
7694  * Return Code: DDI_SUCCESS
7695  *		DDI_FAILURE
7696  *
7697  *     Context: Kernel thread context
7698  */
7699 
7700 static int
7701 sddetach(dev_info_t *devi, ddi_detach_cmd_t cmd)
7702 {
7703 	switch (cmd) {
7704 	case DDI_DETACH:
7705 		return (sd_unit_detach(devi));
7706 	case DDI_SUSPEND:
7707 		return (sd_ddi_suspend(devi));
7708 	default:
7709 		break;
7710 	}
7711 	return (DDI_FAILURE);
7712 }
7713 
7714 
7715 /*
7716  *     Function: sd_sync_with_callback
7717  *
7718  *  Description: Prevents sd_unit_attach or sd_unit_detach from freeing the soft
7719  *		 state while the callback routine is active.
7720  *
7721  *    Arguments: un: softstate structure for the instance
7722  *
7723  *	Context: Kernel thread context
7724  */
7725 
7726 static void
7727 sd_sync_with_callback(struct sd_lun *un)
7728 {
7729 	ASSERT(un != NULL);
7730 
7731 	mutex_enter(SD_MUTEX(un));
7732 
7733 	ASSERT(un->un_in_callback >= 0);
7734 
7735 	while (un->un_in_callback > 0) {
7736 		mutex_exit(SD_MUTEX(un));
7737 		delay(2);
7738 		mutex_enter(SD_MUTEX(un));
7739 	}
7740 
7741 	mutex_exit(SD_MUTEX(un));
7742 }
7743 
7744 /*
7745  *    Function: sd_unit_attach
7746  *
7747  * Description: Performs DDI_ATTACH processing for sdattach(). Allocates
7748  *		the soft state structure for the device and performs
7749  *		all necessary structure and device initializations.
7750  *
7751  *   Arguments: devi: the system's dev_info_t for the device.
7752  *
7753  * Return Code: DDI_SUCCESS if attach is successful.
7754  *		DDI_FAILURE if any part of the attach fails.
7755  *
7756  *     Context: Called at attach(9e) time for the DDI_ATTACH flag.
7757  *		Kernel thread context only.  Can sleep.
7758  */
7759 
7760 static int
7761 sd_unit_attach(dev_info_t *devi)
7762 {
7763 	struct	scsi_device	*devp;
7764 	struct	sd_lun		*un;
7765 	char			*variantp;
7766 	int	reservation_flag = SD_TARGET_IS_UNRESERVED;
7767 	int	instance;
7768 	int	rval;
7769 	int	wc_enabled;
7770 	uint64_t	capacity;
7771 	uint_t		lbasize;
7772 
7773 	/*
7774 	 * Retrieve the target driver's private data area. This was set
7775 	 * up by the HBA.
7776 	 */
7777 	devp = ddi_get_driver_private(devi);
7778 
7779 	/*
7780 	 * Since we have no idea what state things were left in by the last
7781 	 * user of the device, set up some 'default' settings, ie. turn 'em
7782 	 * off. The scsi_ifsetcap calls force re-negotiations with the drive.
7783 	 * Do this before the scsi_probe, which sends an inquiry.
7784 	 * This is a fix for bug (4430280).
7785 	 * Of special importance is wide-xfer. The drive could have been left
7786 	 * in wide transfer mode by the last driver to communicate with it,
7787 	 * this includes us. If that's the case, and if the following is not
7788 	 * setup properly or we don't re-negotiate with the drive prior to
7789 	 * transferring data to/from the drive, it causes bus parity errors,
7790 	 * data overruns, and unexpected interrupts. This first occurred when
7791 	 * the fix for bug (4378686) was made.
7792 	 */
7793 	(void) scsi_ifsetcap(&devp->sd_address, "lun-reset", 0, 1);
7794 	(void) scsi_ifsetcap(&devp->sd_address, "wide-xfer", 0, 1);
7795 	(void) scsi_ifsetcap(&devp->sd_address, "tagged-qing", 0, 1);
7796 	(void) scsi_ifsetcap(&devp->sd_address, "auto-rqsense", 0, 1);
7797 
7798 	/*
7799 	 * Use scsi_probe() to issue an INQUIRY command to the device.
7800 	 * This call will allocate and fill in the scsi_inquiry structure
7801 	 * and point the sd_inq member of the scsi_device structure to it.
7802 	 * If the attach succeeds, then this memory will not be de-allocated
7803 	 * (via scsi_unprobe()) until the instance is detached.
7804 	 */
7805 	if (scsi_probe(devp, SLEEP_FUNC) != SCSIPROBE_EXISTS) {
7806 		goto probe_failed;
7807 	}
7808 
7809 	/*
7810 	 * Check the device type as specified in the inquiry data and
7811 	 * claim it if it is of a type that we support.
7812 	 */
7813 	switch (devp->sd_inq->inq_dtype) {
7814 	case DTYPE_DIRECT:
7815 		break;
7816 	case DTYPE_RODIRECT:
7817 		break;
7818 	case DTYPE_OPTICAL:
7819 		break;
7820 	case DTYPE_NOTPRESENT:
7821 	default:
7822 		/* Unsupported device type; fail the attach. */
7823 		goto probe_failed;
7824 	}
7825 
7826 	/*
7827 	 * Allocate the soft state structure for this unit.
7828 	 *
7829 	 * We rely upon this memory being set to all zeroes by
7830 	 * ddi_soft_state_zalloc().  We assume that any member of the
7831 	 * soft state structure that is not explicitly initialized by
7832 	 * this routine will have a value of zero.
7833 	 */
7834 	instance = ddi_get_instance(devp->sd_dev);
7835 	if (ddi_soft_state_zalloc(sd_state, instance) != DDI_SUCCESS) {
7836 		goto probe_failed;
7837 	}
7838 
7839 	/*
7840 	 * Retrieve a pointer to the newly-allocated soft state.
7841 	 *
7842 	 * This should NEVER fail if the ddi_soft_state_zalloc() call above
7843 	 * was successful, unless something has gone horribly wrong and the
7844 	 * ddi's soft state internals are corrupt (in which case it is
7845 	 * probably better to halt here than just fail the attach....)
7846 	 */
7847 	if ((un = ddi_get_soft_state(sd_state, instance)) == NULL) {
7848 		panic("sd_unit_attach: NULL soft state on instance:0x%x",
7849 		    instance);
7850 		/*NOTREACHED*/
7851 	}
7852 
7853 	/*
7854 	 * Link the back ptr of the driver soft state to the scsi_device
7855 	 * struct for this lun.
7856 	 * Save a pointer to the softstate in the driver-private area of
7857 	 * the scsi_device struct.
7858 	 * Note: We cannot call SD_INFO, SD_TRACE, SD_ERROR, or SD_DIAG until
7859 	 * we first set un->un_sd below.
7860 	 */
7861 	un->un_sd = devp;
7862 	devp->sd_private = (opaque_t)un;
7863 
7864 	/*
7865 	 * The following must be after devp is stored in the soft state struct.
7866 	 */
7867 #ifdef SDDEBUG
7868 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
7869 	    "%s_unit_attach: un:0x%p instance:%d\n",
7870 	    ddi_driver_name(devi), un, instance);
7871 #endif
7872 
7873 	/*
7874 	 * Set up the device type and node type (for the minor nodes).
7875 	 * By default we assume that the device can at least support the
7876 	 * Common Command Set. Call it a CD-ROM if it reports itself
7877 	 * as a RODIRECT device.
7878 	 */
7879 	switch (devp->sd_inq->inq_dtype) {
7880 	case DTYPE_RODIRECT:
7881 		un->un_node_type = DDI_NT_CD_CHAN;
7882 		un->un_ctype	 = CTYPE_CDROM;
7883 		break;
7884 	case DTYPE_OPTICAL:
7885 		un->un_node_type = DDI_NT_BLOCK_CHAN;
7886 		un->un_ctype	 = CTYPE_ROD;
7887 		break;
7888 	default:
7889 		un->un_node_type = DDI_NT_BLOCK_CHAN;
7890 		un->un_ctype	 = CTYPE_CCS;
7891 		break;
7892 	}
7893 
7894 	/*
7895 	 * Try to read the interconnect type from the HBA.
7896 	 *
7897 	 * Note: This driver is currently compiled as two binaries, a parallel
7898 	 * scsi version (sd) and a fibre channel version (ssd). All functional
7899 	 * differences are determined at compile time. In the future a single
7900 	 * binary will be provided and the inteconnect type will be used to
7901 	 * differentiate between fibre and parallel scsi behaviors. At that time
7902 	 * it will be necessary for all fibre channel HBAs to support this
7903 	 * property.
7904 	 *
7905 	 * set un_f_is_fiber to TRUE ( default fiber )
7906 	 */
7907 	un->un_f_is_fibre = TRUE;
7908 	switch (scsi_ifgetcap(SD_ADDRESS(un), "interconnect-type", -1)) {
7909 	case INTERCONNECT_SSA:
7910 		un->un_interconnect_type = SD_INTERCONNECT_SSA;
7911 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
7912 		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_SSA\n", un);
7913 		break;
7914 	case INTERCONNECT_PARALLEL:
7915 		un->un_f_is_fibre = FALSE;
7916 		un->un_interconnect_type = SD_INTERCONNECT_PARALLEL;
7917 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
7918 		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_PARALLEL\n", un);
7919 		break;
7920 	case INTERCONNECT_SATA:
7921 		un->un_f_is_fibre = FALSE;
7922 		un->un_interconnect_type = SD_INTERCONNECT_SATA;
7923 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
7924 		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_SATA\n", un);
7925 		break;
7926 	case INTERCONNECT_FIBRE:
7927 		un->un_interconnect_type = SD_INTERCONNECT_FIBRE;
7928 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
7929 		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_FIBRE\n", un);
7930 		break;
7931 	case INTERCONNECT_FABRIC:
7932 		un->un_interconnect_type = SD_INTERCONNECT_FABRIC;
7933 		un->un_node_type = DDI_NT_BLOCK_FABRIC;
7934 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
7935 		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_FABRIC\n", un);
7936 		break;
7937 	default:
7938 #ifdef SD_DEFAULT_INTERCONNECT_TYPE
7939 		/*
7940 		 * The HBA does not support the "interconnect-type" property
7941 		 * (or did not provide a recognized type).
7942 		 *
7943 		 * Note: This will be obsoleted when a single fibre channel
7944 		 * and parallel scsi driver is delivered. In the meantime the
7945 		 * interconnect type will be set to the platform default.If that
7946 		 * type is not parallel SCSI, it means that we should be
7947 		 * assuming "ssd" semantics. However, here this also means that
7948 		 * the FC HBA is not supporting the "interconnect-type" property
7949 		 * like we expect it to, so log this occurrence.
7950 		 */
7951 		un->un_interconnect_type = SD_DEFAULT_INTERCONNECT_TYPE;
7952 		if (!SD_IS_PARALLEL_SCSI(un)) {
7953 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
7954 			    "sd_unit_attach: un:0x%p Assuming "
7955 			    "INTERCONNECT_FIBRE\n", un);
7956 		} else {
7957 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
7958 			    "sd_unit_attach: un:0x%p Assuming "
7959 			    "INTERCONNECT_PARALLEL\n", un);
7960 			un->un_f_is_fibre = FALSE;
7961 		}
7962 #else
7963 		/*
7964 		 * Note: This source will be implemented when a single fibre
7965 		 * channel and parallel scsi driver is delivered. The default
7966 		 * will be to assume that if a device does not support the
7967 		 * "interconnect-type" property it is a parallel SCSI HBA and
7968 		 * we will set the interconnect type for parallel scsi.
7969 		 */
7970 		un->un_interconnect_type = SD_INTERCONNECT_PARALLEL;
7971 		un->un_f_is_fibre = FALSE;
7972 #endif
7973 		break;
7974 	}
7975 
7976 	if (un->un_f_is_fibre == TRUE) {
7977 		if (scsi_ifgetcap(SD_ADDRESS(un), "scsi-version", 1) ==
7978 			SCSI_VERSION_3) {
7979 			switch (un->un_interconnect_type) {
7980 			case SD_INTERCONNECT_FIBRE:
7981 			case SD_INTERCONNECT_SSA:
7982 				un->un_node_type = DDI_NT_BLOCK_WWN;
7983 				break;
7984 			default:
7985 				break;
7986 			}
7987 		}
7988 	}
7989 
7990 	/*
7991 	 * Initialize the Request Sense command for the target
7992 	 */
7993 	if (sd_alloc_rqs(devp, un) != DDI_SUCCESS) {
7994 		goto alloc_rqs_failed;
7995 	}
7996 
7997 	/*
7998 	 * Set un_retry_count with SD_RETRY_COUNT, this is ok for Sparc
7999 	 * with seperate binary for sd and ssd.
8000 	 *
8001 	 * x86 has 1 binary, un_retry_count is set base on connection type.
8002 	 * The hardcoded values will go away when Sparc uses 1 binary
8003 	 * for sd and ssd.  This hardcoded values need to match
8004 	 * SD_RETRY_COUNT in sddef.h
8005 	 * The value used is base on interconnect type.
8006 	 * fibre = 3, parallel = 5
8007 	 */
8008 #if defined(__i386) || defined(__amd64)
8009 	un->un_retry_count = un->un_f_is_fibre ? 3 : 5;
8010 #else
8011 	un->un_retry_count = SD_RETRY_COUNT;
8012 #endif
8013 
8014 	/*
8015 	 * Set the per disk retry count to the default number of retries
8016 	 * for disks and CDROMs. This value can be overridden by the
8017 	 * disk property list or an entry in sd.conf.
8018 	 */
8019 	un->un_notready_retry_count =
8020 	    ISCD(un) ? CD_NOT_READY_RETRY_COUNT(un)
8021 			: DISK_NOT_READY_RETRY_COUNT(un);
8022 
8023 	/*
8024 	 * Set the busy retry count to the default value of un_retry_count.
8025 	 * This can be overridden by entries in sd.conf or the device
8026 	 * config table.
8027 	 */
8028 	un->un_busy_retry_count = un->un_retry_count;
8029 
8030 	/*
8031 	 * Init the reset threshold for retries.  This number determines
8032 	 * how many retries must be performed before a reset can be issued
8033 	 * (for certain error conditions). This can be overridden by entries
8034 	 * in sd.conf or the device config table.
8035 	 */
8036 	un->un_reset_retry_count = (un->un_retry_count / 2);
8037 
8038 	/*
8039 	 * Set the victim_retry_count to the default un_retry_count
8040 	 */
8041 	un->un_victim_retry_count = (2 * un->un_retry_count);
8042 
8043 	/*
8044 	 * Set the reservation release timeout to the default value of
8045 	 * 5 seconds. This can be overridden by entries in ssd.conf or the
8046 	 * device config table.
8047 	 */
8048 	un->un_reserve_release_time = 5;
8049 
8050 	/*
8051 	 * Set up the default maximum transfer size. Note that this may
8052 	 * get updated later in the attach, when setting up default wide
8053 	 * operations for disks.
8054 	 */
8055 #if defined(__i386) || defined(__amd64)
8056 	un->un_max_xfer_size = (uint_t)SD_DEFAULT_MAX_XFER_SIZE;
8057 #else
8058 	un->un_max_xfer_size = (uint_t)maxphys;
8059 #endif
8060 
8061 	/*
8062 	 * Get "allow bus device reset" property (defaults to "enabled" if
8063 	 * the property was not defined). This is to disable bus resets for
8064 	 * certain kinds of error recovery. Note: In the future when a run-time
8065 	 * fibre check is available the soft state flag should default to
8066 	 * enabled.
8067 	 */
8068 	if (un->un_f_is_fibre == TRUE) {
8069 		un->un_f_allow_bus_device_reset = TRUE;
8070 	} else {
8071 		if (ddi_getprop(DDI_DEV_T_ANY, devi, DDI_PROP_DONTPASS,
8072 			"allow-bus-device-reset", 1) != 0) {
8073 			un->un_f_allow_bus_device_reset = TRUE;
8074 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
8075 			"sd_unit_attach: un:0x%p Bus device reset enabled\n",
8076 				un);
8077 		} else {
8078 			un->un_f_allow_bus_device_reset = FALSE;
8079 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
8080 			"sd_unit_attach: un:0x%p Bus device reset disabled\n",
8081 				un);
8082 		}
8083 	}
8084 
8085 	/*
8086 	 * Check if this is an ATAPI device. ATAPI devices use Group 1
8087 	 * Read/Write commands and Group 2 Mode Sense/Select commands.
8088 	 *
8089 	 * Note: The "obsolete" way of doing this is to check for the "atapi"
8090 	 * property. The new "variant" property with a value of "atapi" has been
8091 	 * introduced so that future 'variants' of standard SCSI behavior (like
8092 	 * atapi) could be specified by the underlying HBA drivers by supplying
8093 	 * a new value for the "variant" property, instead of having to define a
8094 	 * new property.
8095 	 */
8096 	if (ddi_prop_get_int(DDI_DEV_T_ANY, devi, 0, "atapi", -1) != -1) {
8097 		un->un_f_cfg_is_atapi = TRUE;
8098 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
8099 		    "sd_unit_attach: un:0x%p Atapi device\n", un);
8100 	}
8101 	if (ddi_prop_lookup_string(DDI_DEV_T_ANY, devi, 0, "variant",
8102 	    &variantp) == DDI_PROP_SUCCESS) {
8103 		if (strcmp(variantp, "atapi") == 0) {
8104 			un->un_f_cfg_is_atapi = TRUE;
8105 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
8106 			    "sd_unit_attach: un:0x%p Atapi device\n", un);
8107 		}
8108 		ddi_prop_free(variantp);
8109 	}
8110 
8111 	un->un_cmd_timeout	= SD_IO_TIME;
8112 
8113 	/* Info on current states, statuses, etc. (Updated frequently) */
8114 	un->un_state		= SD_STATE_NORMAL;
8115 	un->un_last_state	= SD_STATE_NORMAL;
8116 
8117 	/* Control & status info for command throttling */
8118 	un->un_throttle		= sd_max_throttle;
8119 	un->un_saved_throttle	= sd_max_throttle;
8120 	un->un_min_throttle	= sd_min_throttle;
8121 
8122 	if (un->un_f_is_fibre == TRUE) {
8123 		un->un_f_use_adaptive_throttle = TRUE;
8124 	} else {
8125 		un->un_f_use_adaptive_throttle = FALSE;
8126 	}
8127 
8128 	/* Removable media support. */
8129 	cv_init(&un->un_state_cv, NULL, CV_DRIVER, NULL);
8130 	un->un_mediastate		= DKIO_NONE;
8131 	un->un_specified_mediastate	= DKIO_NONE;
8132 
8133 	/* CVs for suspend/resume (PM or DR) */
8134 	cv_init(&un->un_suspend_cv,   NULL, CV_DRIVER, NULL);
8135 	cv_init(&un->un_disk_busy_cv, NULL, CV_DRIVER, NULL);
8136 
8137 	/* Power management support. */
8138 	un->un_power_level = SD_SPINDLE_UNINIT;
8139 
8140 	cv_init(&un->un_wcc_cv,   NULL, CV_DRIVER, NULL);
8141 	un->un_f_wcc_inprog = 0;
8142 
8143 	/*
8144 	 * The open/close semaphore is used to serialize threads executing
8145 	 * in the driver's open & close entry point routines for a given
8146 	 * instance.
8147 	 */
8148 	(void) sema_init(&un->un_semoclose, 1, NULL, SEMA_DRIVER, NULL);
8149 
8150 	/*
8151 	 * The conf file entry and softstate variable is a forceful override,
8152 	 * meaning a non-zero value must be entered to change the default.
8153 	 */
8154 	un->un_f_disksort_disabled = FALSE;
8155 
8156 	/*
8157 	 * Retrieve the properties from the static driver table or the driver
8158 	 * configuration file (.conf) for this unit and update the soft state
8159 	 * for the device as needed for the indicated properties.
8160 	 * Note: the property configuration needs to occur here as some of the
8161 	 * following routines may have dependancies on soft state flags set
8162 	 * as part of the driver property configuration.
8163 	 */
8164 	sd_read_unit_properties(un);
8165 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
8166 	    "sd_unit_attach: un:0x%p property configuration complete.\n", un);
8167 
8168 	/*
8169 	 * Only if a device has "hotpluggable" property, it is
8170 	 * treated as hotpluggable device. Otherwise, it is
8171 	 * regarded as non-hotpluggable one.
8172 	 */
8173 	if (ddi_prop_get_int(DDI_DEV_T_ANY, devi, 0, "hotpluggable",
8174 	    -1) != -1) {
8175 		un->un_f_is_hotpluggable = TRUE;
8176 	}
8177 
8178 	/*
8179 	 * set unit's attributes(flags) according to "hotpluggable" and
8180 	 * RMB bit in INQUIRY data.
8181 	 */
8182 	sd_set_unit_attributes(un, devi);
8183 
8184 	/*
8185 	 * By default, we mark the capacity, lbasize, and geometry
8186 	 * as invalid. Only if we successfully read a valid capacity
8187 	 * will we update the un_blockcount and un_tgt_blocksize with the
8188 	 * valid values (the geometry will be validated later).
8189 	 */
8190 	un->un_f_blockcount_is_valid	= FALSE;
8191 	un->un_f_tgt_blocksize_is_valid	= FALSE;
8192 	un->un_f_geometry_is_valid	= FALSE;
8193 
8194 	/*
8195 	 * Use DEV_BSIZE and DEV_BSHIFT as defaults, until we can determine
8196 	 * otherwise.
8197 	 */
8198 	un->un_tgt_blocksize  = un->un_sys_blocksize  = DEV_BSIZE;
8199 	un->un_blockcount = 0;
8200 
8201 	/*
8202 	 * Set up the per-instance info needed to determine the correct
8203 	 * CDBs and other info for issuing commands to the target.
8204 	 */
8205 	sd_init_cdb_limits(un);
8206 
8207 	/*
8208 	 * Set up the IO chains to use, based upon the target type.
8209 	 */
8210 	if (un->un_f_non_devbsize_supported) {
8211 		un->un_buf_chain_type = SD_CHAIN_INFO_RMMEDIA;
8212 	} else {
8213 		un->un_buf_chain_type = SD_CHAIN_INFO_DISK;
8214 	}
8215 	un->un_uscsi_chain_type  = SD_CHAIN_INFO_USCSI_CMD;
8216 	un->un_direct_chain_type = SD_CHAIN_INFO_DIRECT_CMD;
8217 	un->un_priority_chain_type = SD_CHAIN_INFO_PRIORITY_CMD;
8218 
8219 	un->un_xbuf_attr = ddi_xbuf_attr_create(sizeof (struct sd_xbuf),
8220 	    sd_xbuf_strategy, un, sd_xbuf_active_limit,  sd_xbuf_reserve_limit,
8221 	    ddi_driver_major(devi), DDI_XBUF_QTHREAD_DRIVER);
8222 	ddi_xbuf_attr_register_devinfo(un->un_xbuf_attr, devi);
8223 
8224 
8225 	if (ISCD(un)) {
8226 		un->un_additional_codes = sd_additional_codes;
8227 	} else {
8228 		un->un_additional_codes = NULL;
8229 	}
8230 
8231 	/*
8232 	 * Create the kstats here so they can be available for attach-time
8233 	 * routines that send commands to the unit (either polled or via
8234 	 * sd_send_scsi_cmd).
8235 	 *
8236 	 * Note: This is a critical sequence that needs to be maintained:
8237 	 *	1) Instantiate the kstats here, before any routines using the
8238 	 *	   iopath (i.e. sd_send_scsi_cmd).
8239 	 *	2) Initialize the error stats (sd_set_errstats) and partition
8240 	 *	   stats (sd_set_pstats), following sd_validate_geometry(),
8241 	 *	   sd_register_devid(), and sd_cache_control().
8242 	 */
8243 
8244 	un->un_stats = kstat_create(sd_label, instance,
8245 	    NULL, "disk", KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT);
8246 	if (un->un_stats != NULL) {
8247 		un->un_stats->ks_lock = SD_MUTEX(un);
8248 		kstat_install(un->un_stats);
8249 	}
8250 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
8251 	    "sd_unit_attach: un:0x%p un_stats created\n", un);
8252 
8253 	sd_create_errstats(un, instance);
8254 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
8255 	    "sd_unit_attach: un:0x%p errstats created\n", un);
8256 
8257 	/*
8258 	 * The following if/else code was relocated here from below as part
8259 	 * of the fix for bug (4430280). However with the default setup added
8260 	 * on entry to this routine, it's no longer absolutely necessary for
8261 	 * this to be before the call to sd_spin_up_unit.
8262 	 */
8263 	if (SD_IS_PARALLEL_SCSI(un) || SD_IS_SERIAL(un)) {
8264 		/*
8265 		 * If SCSI-2 tagged queueing is supported by the target
8266 		 * and by the host adapter then we will enable it.
8267 		 */
8268 		un->un_tagflags = 0;
8269 		if ((devp->sd_inq->inq_rdf == RDF_SCSI2) &&
8270 		    (devp->sd_inq->inq_cmdque) &&
8271 		    (un->un_f_arq_enabled == TRUE)) {
8272 			if (scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing",
8273 			    1, 1) == 1) {
8274 				un->un_tagflags = FLAG_STAG;
8275 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
8276 				    "sd_unit_attach: un:0x%p tag queueing "
8277 				    "enabled\n", un);
8278 			} else if (scsi_ifgetcap(SD_ADDRESS(un),
8279 			    "untagged-qing", 0) == 1) {
8280 				un->un_f_opt_queueing = TRUE;
8281 				un->un_saved_throttle = un->un_throttle =
8282 				    min(un->un_throttle, 3);
8283 			} else {
8284 				un->un_f_opt_queueing = FALSE;
8285 				un->un_saved_throttle = un->un_throttle = 1;
8286 			}
8287 		} else if ((scsi_ifgetcap(SD_ADDRESS(un), "untagged-qing", 0)
8288 		    == 1) && (un->un_f_arq_enabled == TRUE)) {
8289 			/* The Host Adapter supports internal queueing. */
8290 			un->un_f_opt_queueing = TRUE;
8291 			un->un_saved_throttle = un->un_throttle =
8292 			    min(un->un_throttle, 3);
8293 		} else {
8294 			un->un_f_opt_queueing = FALSE;
8295 			un->un_saved_throttle = un->un_throttle = 1;
8296 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
8297 			    "sd_unit_attach: un:0x%p no tag queueing\n", un);
8298 		}
8299 
8300 		/*
8301 		 * Enable large transfers for SATA/SAS drives
8302 		 */
8303 		if (SD_IS_SERIAL(un)) {
8304 			un->un_max_xfer_size =
8305 			    ddi_getprop(DDI_DEV_T_ANY, devi, 0,
8306 			    sd_max_xfer_size, SD_MAX_XFER_SIZE);
8307 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
8308 			    "sd_unit_attach: un:0x%p max transfer "
8309 			    "size=0x%x\n", un, un->un_max_xfer_size);
8310 
8311 		}
8312 
8313 		/* Setup or tear down default wide operations for disks */
8314 
8315 		/*
8316 		 * Note: Legacy: it may be possible for both "sd_max_xfer_size"
8317 		 * and "ssd_max_xfer_size" to exist simultaneously on the same
8318 		 * system and be set to different values. In the future this
8319 		 * code may need to be updated when the ssd module is
8320 		 * obsoleted and removed from the system. (4299588)
8321 		 */
8322 		if (SD_IS_PARALLEL_SCSI(un) &&
8323 		    (devp->sd_inq->inq_rdf == RDF_SCSI2) &&
8324 		    (devp->sd_inq->inq_wbus16 || devp->sd_inq->inq_wbus32)) {
8325 			if (scsi_ifsetcap(SD_ADDRESS(un), "wide-xfer",
8326 			    1, 1) == 1) {
8327 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
8328 				    "sd_unit_attach: un:0x%p Wide Transfer "
8329 				    "enabled\n", un);
8330 			}
8331 
8332 			/*
8333 			 * If tagged queuing has also been enabled, then
8334 			 * enable large xfers
8335 			 */
8336 			if (un->un_saved_throttle == sd_max_throttle) {
8337 				un->un_max_xfer_size =
8338 				    ddi_getprop(DDI_DEV_T_ANY, devi, 0,
8339 				    sd_max_xfer_size, SD_MAX_XFER_SIZE);
8340 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
8341 				    "sd_unit_attach: un:0x%p max transfer "
8342 				    "size=0x%x\n", un, un->un_max_xfer_size);
8343 			}
8344 		} else {
8345 			if (scsi_ifsetcap(SD_ADDRESS(un), "wide-xfer",
8346 			    0, 1) == 1) {
8347 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
8348 				    "sd_unit_attach: un:0x%p "
8349 				    "Wide Transfer disabled\n", un);
8350 			}
8351 		}
8352 	} else {
8353 		un->un_tagflags = FLAG_STAG;
8354 		un->un_max_xfer_size = ddi_getprop(DDI_DEV_T_ANY,
8355 		    devi, 0, sd_max_xfer_size, SD_MAX_XFER_SIZE);
8356 	}
8357 
8358 	/*
8359 	 * If this target supports LUN reset, try to enable it.
8360 	 */
8361 	if (un->un_f_lun_reset_enabled) {
8362 		if (scsi_ifsetcap(SD_ADDRESS(un), "lun-reset", 1, 1) == 1) {
8363 			SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_unit_attach: "
8364 			    "un:0x%p lun_reset capability set\n", un);
8365 		} else {
8366 			SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_unit_attach: "
8367 			    "un:0x%p lun-reset capability not set\n", un);
8368 		}
8369 	}
8370 
8371 	/*
8372 	 * At this point in the attach, we have enough info in the
8373 	 * soft state to be able to issue commands to the target.
8374 	 *
8375 	 * All command paths used below MUST issue their commands as
8376 	 * SD_PATH_DIRECT. This is important as intermediate layers
8377 	 * are not all initialized yet (such as PM).
8378 	 */
8379 
8380 	/*
8381 	 * Send a TEST UNIT READY command to the device. This should clear
8382 	 * any outstanding UNIT ATTENTION that may be present.
8383 	 *
8384 	 * Note: Don't check for success, just track if there is a reservation,
8385 	 * this is a throw away command to clear any unit attentions.
8386 	 *
8387 	 * Note: This MUST be the first command issued to the target during
8388 	 * attach to ensure power on UNIT ATTENTIONS are cleared.
8389 	 * Pass in flag SD_DONT_RETRY_TUR to prevent the long delays associated
8390 	 * with attempts at spinning up a device with no media.
8391 	 */
8392 	if (sd_send_scsi_TEST_UNIT_READY(un, SD_DONT_RETRY_TUR) == EACCES) {
8393 		reservation_flag = SD_TARGET_IS_RESERVED;
8394 	}
8395 
8396 	/*
8397 	 * If the device is NOT a removable media device, attempt to spin
8398 	 * it up (using the START_STOP_UNIT command) and read its capacity
8399 	 * (using the READ CAPACITY command).  Note, however, that either
8400 	 * of these could fail and in some cases we would continue with
8401 	 * the attach despite the failure (see below).
8402 	 */
8403 	if (un->un_f_descr_format_supported) {
8404 		switch (sd_spin_up_unit(un)) {
8405 		case 0:
8406 			/*
8407 			 * Spin-up was successful; now try to read the
8408 			 * capacity.  If successful then save the results
8409 			 * and mark the capacity & lbasize as valid.
8410 			 */
8411 			SD_TRACE(SD_LOG_ATTACH_DETACH, un,
8412 			    "sd_unit_attach: un:0x%p spin-up successful\n", un);
8413 
8414 			switch (sd_send_scsi_READ_CAPACITY(un, &capacity,
8415 			    &lbasize, SD_PATH_DIRECT)) {
8416 			case 0: {
8417 				if (capacity > DK_MAX_BLOCKS) {
8418 #ifdef _LP64
8419 					if (capacity + 1 >
8420 					    SD_GROUP1_MAX_ADDRESS) {
8421 						/*
8422 						 * Enable descriptor format
8423 						 * sense data so that we can
8424 						 * get 64 bit sense data
8425 						 * fields.
8426 						 */
8427 						sd_enable_descr_sense(un);
8428 					}
8429 #else
8430 					/* 32-bit kernels can't handle this */
8431 					scsi_log(SD_DEVINFO(un),
8432 					    sd_label, CE_WARN,
8433 					    "disk has %llu blocks, which "
8434 					    "is too large for a 32-bit "
8435 					    "kernel", capacity);
8436 
8437 #if defined(__i386) || defined(__amd64)
8438 					/*
8439 					 * Refer to comments related to off-by-1
8440 					 * at the header of this file.
8441 					 * 1TB disk was treated as (1T - 512)B
8442 					 * in the past, so that it might has
8443 					 * valid VTOC and solaris partitions,
8444 					 * we have to allow it to continue to
8445 					 * work.
8446 					 */
8447 					if (capacity -1 > DK_MAX_BLOCKS)
8448 #endif
8449 					goto spinup_failed;
8450 #endif
8451 				}
8452 
8453 				/*
8454 				 * Here it's not necessary to check the case:
8455 				 * the capacity of the device is bigger than
8456 				 * what the max hba cdb can support. Because
8457 				 * sd_send_scsi_READ_CAPACITY will retrieve
8458 				 * the capacity by sending USCSI command, which
8459 				 * is constrained by the max hba cdb. Actually,
8460 				 * sd_send_scsi_READ_CAPACITY will return
8461 				 * EINVAL when using bigger cdb than required
8462 				 * cdb length. Will handle this case in
8463 				 * "case EINVAL".
8464 				 */
8465 
8466 				/*
8467 				 * The following relies on
8468 				 * sd_send_scsi_READ_CAPACITY never
8469 				 * returning 0 for capacity and/or lbasize.
8470 				 */
8471 				sd_update_block_info(un, lbasize, capacity);
8472 
8473 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
8474 				    "sd_unit_attach: un:0x%p capacity = %ld "
8475 				    "blocks; lbasize= %ld.\n", un,
8476 				    un->un_blockcount, un->un_tgt_blocksize);
8477 
8478 				break;
8479 			}
8480 			case EINVAL:
8481 				/*
8482 				 * In the case where the max-cdb-length property
8483 				 * is smaller than the required CDB length for
8484 				 * a SCSI device, a target driver can fail to
8485 				 * attach to that device.
8486 				 */
8487 				scsi_log(SD_DEVINFO(un),
8488 				    sd_label, CE_WARN,
8489 				    "disk capacity is too large "
8490 				    "for current cdb length");
8491 				goto spinup_failed;
8492 			case EACCES:
8493 				/*
8494 				 * Should never get here if the spin-up
8495 				 * succeeded, but code it in anyway.
8496 				 * From here, just continue with the attach...
8497 				 */
8498 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
8499 				    "sd_unit_attach: un:0x%p "
8500 				    "sd_send_scsi_READ_CAPACITY "
8501 				    "returned reservation conflict\n", un);
8502 				reservation_flag = SD_TARGET_IS_RESERVED;
8503 				break;
8504 			default:
8505 				/*
8506 				 * Likewise, should never get here if the
8507 				 * spin-up succeeded. Just continue with
8508 				 * the attach...
8509 				 */
8510 				break;
8511 			}
8512 			break;
8513 		case EACCES:
8514 			/*
8515 			 * Device is reserved by another host.  In this case
8516 			 * we could not spin it up or read the capacity, but
8517 			 * we continue with the attach anyway.
8518 			 */
8519 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
8520 			    "sd_unit_attach: un:0x%p spin-up reservation "
8521 			    "conflict.\n", un);
8522 			reservation_flag = SD_TARGET_IS_RESERVED;
8523 			break;
8524 		default:
8525 			/* Fail the attach if the spin-up failed. */
8526 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
8527 			    "sd_unit_attach: un:0x%p spin-up failed.", un);
8528 			goto spinup_failed;
8529 		}
8530 	}
8531 
8532 	/*
8533 	 * Check to see if this is a MMC drive
8534 	 */
8535 	if (ISCD(un)) {
8536 		sd_set_mmc_caps(un);
8537 	}
8538 
8539 	/*
8540 	 * Create the minor nodes for the device.
8541 	 * Note: If we want to support fdisk on both sparc and intel, this will
8542 	 * have to separate out the notion that VTOC8 is always sparc, and
8543 	 * VTOC16 is always intel (tho these can be the defaults).  The vtoc
8544 	 * type will have to be determined at run-time, and the fdisk
8545 	 * partitioning will have to have been read & set up before we
8546 	 * create the minor nodes. (any other inits (such as kstats) that
8547 	 * also ought to be done before creating the minor nodes?) (Doesn't
8548 	 * setting up the minor nodes kind of imply that we're ready to
8549 	 * handle an open from userland?)
8550 	 */
8551 	if (sd_create_minor_nodes(un, devi) != DDI_SUCCESS) {
8552 		goto create_minor_nodes_failed;
8553 	}
8554 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
8555 	    "sd_unit_attach: un:0x%p minor nodes created\n", un);
8556 
8557 	/*
8558 	 * Add a zero-length attribute to tell the world we support
8559 	 * kernel ioctls (for layered drivers)
8560 	 */
8561 	(void) ddi_prop_create(DDI_DEV_T_NONE, devi, DDI_PROP_CANSLEEP,
8562 	    DDI_KERNEL_IOCTL, NULL, 0);
8563 
8564 	/*
8565 	 * Add a boolean property to tell the world we support
8566 	 * the B_FAILFAST flag (for layered drivers)
8567 	 */
8568 	(void) ddi_prop_create(DDI_DEV_T_NONE, devi, DDI_PROP_CANSLEEP,
8569 	    "ddi-failfast-supported", NULL, 0);
8570 
8571 	/*
8572 	 * Initialize power management
8573 	 */
8574 	mutex_init(&un->un_pm_mutex, NULL, MUTEX_DRIVER, NULL);
8575 	cv_init(&un->un_pm_busy_cv, NULL, CV_DRIVER, NULL);
8576 	sd_setup_pm(un, devi);
8577 	if (un->un_f_pm_is_enabled == FALSE) {
8578 		/*
8579 		 * For performance, point to a jump table that does
8580 		 * not include pm.
8581 		 * The direct and priority chains don't change with PM.
8582 		 *
8583 		 * Note: this is currently done based on individual device
8584 		 * capabilities. When an interface for determining system
8585 		 * power enabled state becomes available, or when additional
8586 		 * layers are added to the command chain, these values will
8587 		 * have to be re-evaluated for correctness.
8588 		 */
8589 		if (un->un_f_non_devbsize_supported) {
8590 			un->un_buf_chain_type = SD_CHAIN_INFO_RMMEDIA_NO_PM;
8591 		} else {
8592 			un->un_buf_chain_type = SD_CHAIN_INFO_DISK_NO_PM;
8593 		}
8594 		un->un_uscsi_chain_type  = SD_CHAIN_INFO_USCSI_CMD_NO_PM;
8595 	}
8596 
8597 	/*
8598 	 * This property is set to 0 by HA software to avoid retries
8599 	 * on a reserved disk. (The preferred property name is
8600 	 * "retry-on-reservation-conflict") (1189689)
8601 	 *
8602 	 * Note: The use of a global here can have unintended consequences. A
8603 	 * per instance variable is preferrable to match the capabilities of
8604 	 * different underlying hba's (4402600)
8605 	 */
8606 	sd_retry_on_reservation_conflict = ddi_getprop(DDI_DEV_T_ANY, devi,
8607 	    DDI_PROP_DONTPASS, "retry-on-reservation-conflict",
8608 	    sd_retry_on_reservation_conflict);
8609 	if (sd_retry_on_reservation_conflict != 0) {
8610 		sd_retry_on_reservation_conflict = ddi_getprop(DDI_DEV_T_ANY,
8611 		    devi, DDI_PROP_DONTPASS, sd_resv_conflict_name,
8612 		    sd_retry_on_reservation_conflict);
8613 	}
8614 
8615 	/* Set up options for QFULL handling. */
8616 	if ((rval = ddi_getprop(DDI_DEV_T_ANY, devi, 0,
8617 	    "qfull-retries", -1)) != -1) {
8618 		(void) scsi_ifsetcap(SD_ADDRESS(un), "qfull-retries",
8619 		    rval, 1);
8620 	}
8621 	if ((rval = ddi_getprop(DDI_DEV_T_ANY, devi, 0,
8622 	    "qfull-retry-interval", -1)) != -1) {
8623 		(void) scsi_ifsetcap(SD_ADDRESS(un), "qfull-retry-interval",
8624 		    rval, 1);
8625 	}
8626 
8627 	/*
8628 	 * This just prints a message that announces the existence of the
8629 	 * device. The message is always printed in the system logfile, but
8630 	 * only appears on the console if the system is booted with the
8631 	 * -v (verbose) argument.
8632 	 */
8633 	ddi_report_dev(devi);
8634 
8635 	/*
8636 	 * The framework calls driver attach routines single-threaded
8637 	 * for a given instance.  However we still acquire SD_MUTEX here
8638 	 * because this required for calling the sd_validate_geometry()
8639 	 * and sd_register_devid() functions.
8640 	 */
8641 	mutex_enter(SD_MUTEX(un));
8642 	un->un_f_geometry_is_valid = FALSE;
8643 	un->un_mediastate = DKIO_NONE;
8644 	un->un_reserved = -1;
8645 
8646 	/*
8647 	 * Read and validate the device's geometry (ie, disk label)
8648 	 * A new unformatted drive will not have a valid geometry, but
8649 	 * the driver needs to successfully attach to this device so
8650 	 * the drive can be formatted via ioctls.
8651 	 */
8652 	if (((sd_validate_geometry(un, SD_PATH_DIRECT) ==
8653 	    ENOTSUP)) &&
8654 	    (un->un_blockcount < DK_MAX_BLOCKS)) {
8655 		/*
8656 		 * We found a small disk with an EFI label on it;
8657 		 * we need to fix up the minor nodes accordingly.
8658 		 */
8659 		ddi_remove_minor_node(devi, "h");
8660 		ddi_remove_minor_node(devi, "h,raw");
8661 		(void) ddi_create_minor_node(devi, "wd",
8662 		    S_IFBLK,
8663 		    (instance << SDUNIT_SHIFT) | WD_NODE,
8664 		    un->un_node_type, NULL);
8665 		(void) ddi_create_minor_node(devi, "wd,raw",
8666 		    S_IFCHR,
8667 		    (instance << SDUNIT_SHIFT) | WD_NODE,
8668 		    un->un_node_type, NULL);
8669 	}
8670 #if defined(__i386) || defined(__amd64)
8671 	else if (un->un_f_capacity_adjusted == 1) {
8672 		/*
8673 		 * Refer to comments related to off-by-1 at the
8674 		 * header of this file.
8675 		 * Adjust minor node for 1TB disk.
8676 		 */
8677 		ddi_remove_minor_node(devi, "wd");
8678 		ddi_remove_minor_node(devi, "wd,raw");
8679 		(void) ddi_create_minor_node(devi, "h",
8680 		    S_IFBLK,
8681 		    (instance << SDUNIT_SHIFT) | WD_NODE,
8682 		    un->un_node_type, NULL);
8683 		(void) ddi_create_minor_node(devi, "h,raw",
8684 		    S_IFCHR,
8685 		    (instance << SDUNIT_SHIFT) | WD_NODE,
8686 		    un->un_node_type, NULL);
8687 	}
8688 #endif
8689 	/*
8690 	 * Read and initialize the devid for the unit.
8691 	 */
8692 	ASSERT(un->un_errstats != NULL);
8693 	if (un->un_f_devid_supported) {
8694 		sd_register_devid(un, devi, reservation_flag);
8695 	}
8696 	mutex_exit(SD_MUTEX(un));
8697 
8698 #if (defined(__fibre))
8699 	/*
8700 	 * Register callbacks for fibre only.  You can't do this soley
8701 	 * on the basis of the devid_type because this is hba specific.
8702 	 * We need to query our hba capabilities to find out whether to
8703 	 * register or not.
8704 	 */
8705 	if (un->un_f_is_fibre) {
8706 	    if (strcmp(un->un_node_type, DDI_NT_BLOCK_CHAN)) {
8707 		sd_init_event_callbacks(un);
8708 		SD_TRACE(SD_LOG_ATTACH_DETACH, un,
8709 		    "sd_unit_attach: un:0x%p event callbacks inserted", un);
8710 	    }
8711 	}
8712 #endif
8713 
8714 	if (un->un_f_opt_disable_cache == TRUE) {
8715 		/*
8716 		 * Disable both read cache and write cache.  This is
8717 		 * the historic behavior of the keywords in the config file.
8718 		 */
8719 		if (sd_cache_control(un, SD_CACHE_DISABLE, SD_CACHE_DISABLE) !=
8720 		    0) {
8721 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
8722 			    "sd_unit_attach: un:0x%p Could not disable "
8723 			    "caching", un);
8724 			goto devid_failed;
8725 		}
8726 	}
8727 
8728 	/*
8729 	 * Check the value of the WCE bit now and
8730 	 * set un_f_write_cache_enabled accordingly.
8731 	 */
8732 	(void) sd_get_write_cache_enabled(un, &wc_enabled);
8733 	mutex_enter(SD_MUTEX(un));
8734 	un->un_f_write_cache_enabled = (wc_enabled != 0);
8735 	mutex_exit(SD_MUTEX(un));
8736 
8737 	/*
8738 	 * Set the pstat and error stat values here, so data obtained during the
8739 	 * previous attach-time routines is available.
8740 	 *
8741 	 * Note: This is a critical sequence that needs to be maintained:
8742 	 *	1) Instantiate the kstats before any routines using the iopath
8743 	 *	   (i.e. sd_send_scsi_cmd).
8744 	 *	2) Initialize the error stats (sd_set_errstats) and partition
8745 	 *	   stats (sd_set_pstats)here, following sd_validate_geometry(),
8746 	 *	   sd_register_devid(), and sd_cache_control().
8747 	 */
8748 	if (un->un_f_pkstats_enabled) {
8749 		sd_set_pstats(un);
8750 		SD_TRACE(SD_LOG_ATTACH_DETACH, un,
8751 		    "sd_unit_attach: un:0x%p pstats created and set\n", un);
8752 	}
8753 
8754 	sd_set_errstats(un);
8755 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
8756 	    "sd_unit_attach: un:0x%p errstats set\n", un);
8757 
8758 	/*
8759 	 * Find out what type of reservation this disk supports.
8760 	 */
8761 	switch (sd_send_scsi_PERSISTENT_RESERVE_IN(un, SD_READ_KEYS, 0, NULL)) {
8762 	case 0:
8763 		/*
8764 		 * SCSI-3 reservations are supported.
8765 		 */
8766 		un->un_reservation_type = SD_SCSI3_RESERVATION;
8767 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
8768 		    "sd_unit_attach: un:0x%p SCSI-3 reservations\n", un);
8769 		break;
8770 	case ENOTSUP:
8771 		/*
8772 		 * The PERSISTENT RESERVE IN command would not be recognized by
8773 		 * a SCSI-2 device, so assume the reservation type is SCSI-2.
8774 		 */
8775 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
8776 		    "sd_unit_attach: un:0x%p SCSI-2 reservations\n", un);
8777 		un->un_reservation_type = SD_SCSI2_RESERVATION;
8778 		break;
8779 	default:
8780 		/*
8781 		 * default to SCSI-3 reservations
8782 		 */
8783 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
8784 		    "sd_unit_attach: un:0x%p default SCSI3 reservations\n", un);
8785 		un->un_reservation_type = SD_SCSI3_RESERVATION;
8786 		break;
8787 	}
8788 
8789 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
8790 	    "sd_unit_attach: un:0x%p exit success\n", un);
8791 
8792 	return (DDI_SUCCESS);
8793 
8794 	/*
8795 	 * An error occurred during the attach; clean up & return failure.
8796 	 */
8797 
8798 devid_failed:
8799 
8800 setup_pm_failed:
8801 	ddi_remove_minor_node(devi, NULL);
8802 
8803 create_minor_nodes_failed:
8804 	/*
8805 	 * Cleanup from the scsi_ifsetcap() calls (437868)
8806 	 */
8807 	(void) scsi_ifsetcap(SD_ADDRESS(un), "lun-reset", 0, 1);
8808 	(void) scsi_ifsetcap(SD_ADDRESS(un), "wide-xfer", 0, 1);
8809 	(void) scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing", 0, 1);
8810 
8811 	if (un->un_f_is_fibre == FALSE) {
8812 	    (void) scsi_ifsetcap(SD_ADDRESS(un), "auto-rqsense", 0, 1);
8813 	}
8814 
8815 spinup_failed:
8816 
8817 	mutex_enter(SD_MUTEX(un));
8818 
8819 	/* Cancel callback for SD_PATH_DIRECT_PRIORITY cmd. restart */
8820 	if (un->un_direct_priority_timeid != NULL) {
8821 		timeout_id_t temp_id = un->un_direct_priority_timeid;
8822 		un->un_direct_priority_timeid = NULL;
8823 		mutex_exit(SD_MUTEX(un));
8824 		(void) untimeout(temp_id);
8825 		mutex_enter(SD_MUTEX(un));
8826 	}
8827 
8828 	/* Cancel any pending start/stop timeouts */
8829 	if (un->un_startstop_timeid != NULL) {
8830 		timeout_id_t temp_id = un->un_startstop_timeid;
8831 		un->un_startstop_timeid = NULL;
8832 		mutex_exit(SD_MUTEX(un));
8833 		(void) untimeout(temp_id);
8834 		mutex_enter(SD_MUTEX(un));
8835 	}
8836 
8837 	/* Cancel any pending reset-throttle timeouts */
8838 	if (un->un_reset_throttle_timeid != NULL) {
8839 		timeout_id_t temp_id = un->un_reset_throttle_timeid;
8840 		un->un_reset_throttle_timeid = NULL;
8841 		mutex_exit(SD_MUTEX(un));
8842 		(void) untimeout(temp_id);
8843 		mutex_enter(SD_MUTEX(un));
8844 	}
8845 
8846 	/* Cancel any pending retry timeouts */
8847 	if (un->un_retry_timeid != NULL) {
8848 		timeout_id_t temp_id = un->un_retry_timeid;
8849 		un->un_retry_timeid = NULL;
8850 		mutex_exit(SD_MUTEX(un));
8851 		(void) untimeout(temp_id);
8852 		mutex_enter(SD_MUTEX(un));
8853 	}
8854 
8855 	/* Cancel any pending delayed cv broadcast timeouts */
8856 	if (un->un_dcvb_timeid != NULL) {
8857 		timeout_id_t temp_id = un->un_dcvb_timeid;
8858 		un->un_dcvb_timeid = NULL;
8859 		mutex_exit(SD_MUTEX(un));
8860 		(void) untimeout(temp_id);
8861 		mutex_enter(SD_MUTEX(un));
8862 	}
8863 
8864 	mutex_exit(SD_MUTEX(un));
8865 
8866 	/* There should not be any in-progress I/O so ASSERT this check */
8867 	ASSERT(un->un_ncmds_in_transport == 0);
8868 	ASSERT(un->un_ncmds_in_driver == 0);
8869 
8870 	/* Do not free the softstate if the callback routine is active */
8871 	sd_sync_with_callback(un);
8872 
8873 	/*
8874 	 * Partition stats apparently are not used with removables. These would
8875 	 * not have been created during attach, so no need to clean them up...
8876 	 */
8877 	if (un->un_stats != NULL) {
8878 		kstat_delete(un->un_stats);
8879 		un->un_stats = NULL;
8880 	}
8881 	if (un->un_errstats != NULL) {
8882 		kstat_delete(un->un_errstats);
8883 		un->un_errstats = NULL;
8884 	}
8885 
8886 	ddi_xbuf_attr_unregister_devinfo(un->un_xbuf_attr, devi);
8887 	ddi_xbuf_attr_destroy(un->un_xbuf_attr);
8888 
8889 	ddi_prop_remove_all(devi);
8890 	sema_destroy(&un->un_semoclose);
8891 	cv_destroy(&un->un_state_cv);
8892 
8893 getrbuf_failed:
8894 
8895 	sd_free_rqs(un);
8896 
8897 alloc_rqs_failed:
8898 
8899 	devp->sd_private = NULL;
8900 	bzero(un, sizeof (struct sd_lun));	/* Clear any stale data! */
8901 
8902 get_softstate_failed:
8903 	/*
8904 	 * Note: the man pages are unclear as to whether or not doing a
8905 	 * ddi_soft_state_free(sd_state, instance) is the right way to
8906 	 * clean up after the ddi_soft_state_zalloc() if the subsequent
8907 	 * ddi_get_soft_state() fails.  The implication seems to be
8908 	 * that the get_soft_state cannot fail if the zalloc succeeds.
8909 	 */
8910 	ddi_soft_state_free(sd_state, instance);
8911 
8912 probe_failed:
8913 	scsi_unprobe(devp);
8914 #ifdef SDDEBUG
8915 	if ((sd_component_mask & SD_LOG_ATTACH_DETACH) &&
8916 	    (sd_level_mask & SD_LOGMASK_TRACE)) {
8917 		cmn_err(CE_CONT, "sd_unit_attach: un:0x%p exit failure\n",
8918 		    (void *)un);
8919 	}
8920 #endif
8921 	return (DDI_FAILURE);
8922 }
8923 
8924 
8925 /*
8926  *    Function: sd_unit_detach
8927  *
8928  * Description: Performs DDI_DETACH processing for sddetach().
8929  *
8930  * Return Code: DDI_SUCCESS
8931  *		DDI_FAILURE
8932  *
8933  *     Context: Kernel thread context
8934  */
8935 
8936 static int
8937 sd_unit_detach(dev_info_t *devi)
8938 {
8939 	struct scsi_device	*devp;
8940 	struct sd_lun		*un;
8941 	int			i;
8942 	dev_t			dev;
8943 	int			instance = ddi_get_instance(devi);
8944 
8945 	mutex_enter(&sd_detach_mutex);
8946 
8947 	/*
8948 	 * Fail the detach for any of the following:
8949 	 *  - Unable to get the sd_lun struct for the instance
8950 	 *  - A layered driver has an outstanding open on the instance
8951 	 *  - Another thread is already detaching this instance
8952 	 *  - Another thread is currently performing an open
8953 	 */
8954 	devp = ddi_get_driver_private(devi);
8955 	if ((devp == NULL) ||
8956 	    ((un = (struct sd_lun *)devp->sd_private) == NULL) ||
8957 	    (un->un_ncmds_in_driver != 0) || (un->un_layer_count != 0) ||
8958 	    (un->un_detach_count != 0) || (un->un_opens_in_progress != 0)) {
8959 		mutex_exit(&sd_detach_mutex);
8960 		return (DDI_FAILURE);
8961 	}
8962 
8963 	SD_TRACE(SD_LOG_ATTACH_DETACH, un, "sd_unit_detach: entry 0x%p\n", un);
8964 
8965 	/*
8966 	 * Mark this instance as currently in a detach, to inhibit any
8967 	 * opens from a layered driver.
8968 	 */
8969 	un->un_detach_count++;
8970 	mutex_exit(&sd_detach_mutex);
8971 
8972 	dev = sd_make_device(SD_DEVINFO(un));
8973 
8974 	_NOTE(COMPETING_THREADS_NOW);
8975 
8976 	mutex_enter(SD_MUTEX(un));
8977 
8978 	/*
8979 	 * Fail the detach if there are any outstanding layered
8980 	 * opens on this device.
8981 	 */
8982 	for (i = 0; i < NDKMAP; i++) {
8983 		if (un->un_ocmap.lyropen[i] != 0) {
8984 			goto err_notclosed;
8985 		}
8986 	}
8987 
8988 	/*
8989 	 * Verify there are NO outstanding commands issued to this device.
8990 	 * ie, un_ncmds_in_transport == 0.
8991 	 * It's possible to have outstanding commands through the physio
8992 	 * code path, even though everything's closed.
8993 	 */
8994 	if ((un->un_ncmds_in_transport != 0) || (un->un_retry_timeid != NULL) ||
8995 	    (un->un_direct_priority_timeid != NULL) ||
8996 	    (un->un_state == SD_STATE_RWAIT)) {
8997 		mutex_exit(SD_MUTEX(un));
8998 		SD_ERROR(SD_LOG_ATTACH_DETACH, un,
8999 		    "sd_dr_detach: Detach failure due to outstanding cmds\n");
9000 		goto err_stillbusy;
9001 	}
9002 
9003 	/*
9004 	 * If we have the device reserved, release the reservation.
9005 	 */
9006 	if ((un->un_resvd_status & SD_RESERVE) &&
9007 	    !(un->un_resvd_status & SD_LOST_RESERVE)) {
9008 		mutex_exit(SD_MUTEX(un));
9009 		/*
9010 		 * Note: sd_reserve_release sends a command to the device
9011 		 * via the sd_ioctlcmd() path, and can sleep.
9012 		 */
9013 		if (sd_reserve_release(dev, SD_RELEASE) != 0) {
9014 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
9015 			    "sd_dr_detach: Cannot release reservation \n");
9016 		}
9017 	} else {
9018 		mutex_exit(SD_MUTEX(un));
9019 	}
9020 
9021 	/*
9022 	 * Untimeout any reserve recover, throttle reset, restart unit
9023 	 * and delayed broadcast timeout threads. Protect the timeout pointer
9024 	 * from getting nulled by their callback functions.
9025 	 */
9026 	mutex_enter(SD_MUTEX(un));
9027 	if (un->un_resvd_timeid != NULL) {
9028 		timeout_id_t temp_id = un->un_resvd_timeid;
9029 		un->un_resvd_timeid = NULL;
9030 		mutex_exit(SD_MUTEX(un));
9031 		(void) untimeout(temp_id);
9032 		mutex_enter(SD_MUTEX(un));
9033 	}
9034 
9035 	if (un->un_reset_throttle_timeid != NULL) {
9036 		timeout_id_t temp_id = un->un_reset_throttle_timeid;
9037 		un->un_reset_throttle_timeid = NULL;
9038 		mutex_exit(SD_MUTEX(un));
9039 		(void) untimeout(temp_id);
9040 		mutex_enter(SD_MUTEX(un));
9041 	}
9042 
9043 	if (un->un_startstop_timeid != NULL) {
9044 		timeout_id_t temp_id = un->un_startstop_timeid;
9045 		un->un_startstop_timeid = NULL;
9046 		mutex_exit(SD_MUTEX(un));
9047 		(void) untimeout(temp_id);
9048 		mutex_enter(SD_MUTEX(un));
9049 	}
9050 
9051 	if (un->un_dcvb_timeid != NULL) {
9052 		timeout_id_t temp_id = un->un_dcvb_timeid;
9053 		un->un_dcvb_timeid = NULL;
9054 		mutex_exit(SD_MUTEX(un));
9055 		(void) untimeout(temp_id);
9056 	} else {
9057 		mutex_exit(SD_MUTEX(un));
9058 	}
9059 
9060 	/* Remove any pending reservation reclaim requests for this device */
9061 	sd_rmv_resv_reclaim_req(dev);
9062 
9063 	mutex_enter(SD_MUTEX(un));
9064 
9065 	/* Cancel any pending callbacks for SD_PATH_DIRECT_PRIORITY cmd. */
9066 	if (un->un_direct_priority_timeid != NULL) {
9067 		timeout_id_t temp_id = un->un_direct_priority_timeid;
9068 		un->un_direct_priority_timeid = NULL;
9069 		mutex_exit(SD_MUTEX(un));
9070 		(void) untimeout(temp_id);
9071 		mutex_enter(SD_MUTEX(un));
9072 	}
9073 
9074 	/* Cancel any active multi-host disk watch thread requests */
9075 	if (un->un_mhd_token != NULL) {
9076 		mutex_exit(SD_MUTEX(un));
9077 		 _NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::un_mhd_token));
9078 		if (scsi_watch_request_terminate(un->un_mhd_token,
9079 		    SCSI_WATCH_TERMINATE_NOWAIT)) {
9080 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
9081 			    "sd_dr_detach: Cannot cancel mhd watch request\n");
9082 			/*
9083 			 * Note: We are returning here after having removed
9084 			 * some driver timeouts above. This is consistent with
9085 			 * the legacy implementation but perhaps the watch
9086 			 * terminate call should be made with the wait flag set.
9087 			 */
9088 			goto err_stillbusy;
9089 		}
9090 		mutex_enter(SD_MUTEX(un));
9091 		un->un_mhd_token = NULL;
9092 	}
9093 
9094 	if (un->un_swr_token != NULL) {
9095 		mutex_exit(SD_MUTEX(un));
9096 		_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::un_swr_token));
9097 		if (scsi_watch_request_terminate(un->un_swr_token,
9098 		    SCSI_WATCH_TERMINATE_NOWAIT)) {
9099 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
9100 			    "sd_dr_detach: Cannot cancel swr watch request\n");
9101 			/*
9102 			 * Note: We are returning here after having removed
9103 			 * some driver timeouts above. This is consistent with
9104 			 * the legacy implementation but perhaps the watch
9105 			 * terminate call should be made with the wait flag set.
9106 			 */
9107 			goto err_stillbusy;
9108 		}
9109 		mutex_enter(SD_MUTEX(un));
9110 		un->un_swr_token = NULL;
9111 	}
9112 
9113 	mutex_exit(SD_MUTEX(un));
9114 
9115 	/*
9116 	 * Clear any scsi_reset_notifies. We clear the reset notifies
9117 	 * if we have not registered one.
9118 	 * Note: The sd_mhd_reset_notify_cb() fn tries to acquire SD_MUTEX!
9119 	 */
9120 	(void) scsi_reset_notify(SD_ADDRESS(un), SCSI_RESET_CANCEL,
9121 	    sd_mhd_reset_notify_cb, (caddr_t)un);
9122 
9123 	/*
9124 	 * protect the timeout pointers from getting nulled by
9125 	 * their callback functions during the cancellation process.
9126 	 * In such a scenario untimeout can be invoked with a null value.
9127 	 */
9128 	_NOTE(NO_COMPETING_THREADS_NOW);
9129 
9130 	mutex_enter(&un->un_pm_mutex);
9131 	if (un->un_pm_idle_timeid != NULL) {
9132 		timeout_id_t temp_id = un->un_pm_idle_timeid;
9133 		un->un_pm_idle_timeid = NULL;
9134 		mutex_exit(&un->un_pm_mutex);
9135 
9136 		/*
9137 		 * Timeout is active; cancel it.
9138 		 * Note that it'll never be active on a device
9139 		 * that does not support PM therefore we don't
9140 		 * have to check before calling pm_idle_component.
9141 		 */
9142 		(void) untimeout(temp_id);
9143 		(void) pm_idle_component(SD_DEVINFO(un), 0);
9144 		mutex_enter(&un->un_pm_mutex);
9145 	}
9146 
9147 	/*
9148 	 * Check whether there is already a timeout scheduled for power
9149 	 * management. If yes then don't lower the power here, that's.
9150 	 * the timeout handler's job.
9151 	 */
9152 	if (un->un_pm_timeid != NULL) {
9153 		timeout_id_t temp_id = un->un_pm_timeid;
9154 		un->un_pm_timeid = NULL;
9155 		mutex_exit(&un->un_pm_mutex);
9156 		/*
9157 		 * Timeout is active; cancel it.
9158 		 * Note that it'll never be active on a device
9159 		 * that does not support PM therefore we don't
9160 		 * have to check before calling pm_idle_component.
9161 		 */
9162 		(void) untimeout(temp_id);
9163 		(void) pm_idle_component(SD_DEVINFO(un), 0);
9164 
9165 	} else {
9166 		mutex_exit(&un->un_pm_mutex);
9167 		if ((un->un_f_pm_is_enabled == TRUE) &&
9168 		    (pm_lower_power(SD_DEVINFO(un), 0, SD_SPINDLE_OFF) !=
9169 		    DDI_SUCCESS)) {
9170 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
9171 		    "sd_dr_detach: Lower power request failed, ignoring.\n");
9172 			/*
9173 			 * Fix for bug: 4297749, item # 13
9174 			 * The above test now includes a check to see if PM is
9175 			 * supported by this device before call
9176 			 * pm_lower_power().
9177 			 * Note, the following is not dead code. The call to
9178 			 * pm_lower_power above will generate a call back into
9179 			 * our sdpower routine which might result in a timeout
9180 			 * handler getting activated. Therefore the following
9181 			 * code is valid and necessary.
9182 			 */
9183 			mutex_enter(&un->un_pm_mutex);
9184 			if (un->un_pm_timeid != NULL) {
9185 				timeout_id_t temp_id = un->un_pm_timeid;
9186 				un->un_pm_timeid = NULL;
9187 				mutex_exit(&un->un_pm_mutex);
9188 				(void) untimeout(temp_id);
9189 				(void) pm_idle_component(SD_DEVINFO(un), 0);
9190 			} else {
9191 				mutex_exit(&un->un_pm_mutex);
9192 			}
9193 		}
9194 	}
9195 
9196 	/*
9197 	 * Cleanup from the scsi_ifsetcap() calls (437868)
9198 	 * Relocated here from above to be after the call to
9199 	 * pm_lower_power, which was getting errors.
9200 	 */
9201 	(void) scsi_ifsetcap(SD_ADDRESS(un), "lun-reset", 0, 1);
9202 	(void) scsi_ifsetcap(SD_ADDRESS(un), "wide-xfer", 0, 1);
9203 	(void) scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing", 0, 1);
9204 
9205 	if (un->un_f_is_fibre == FALSE) {
9206 		(void) scsi_ifsetcap(SD_ADDRESS(un), "auto-rqsense", 0, 1);
9207 	}
9208 
9209 	/*
9210 	 * Remove any event callbacks, fibre only
9211 	 */
9212 	if (un->un_f_is_fibre == TRUE) {
9213 		if ((un->un_insert_event != NULL) &&
9214 			(ddi_remove_event_handler(un->un_insert_cb_id) !=
9215 				DDI_SUCCESS)) {
9216 			/*
9217 			 * Note: We are returning here after having done
9218 			 * substantial cleanup above. This is consistent
9219 			 * with the legacy implementation but this may not
9220 			 * be the right thing to do.
9221 			 */
9222 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
9223 				"sd_dr_detach: Cannot cancel insert event\n");
9224 			goto err_remove_event;
9225 		}
9226 		un->un_insert_event = NULL;
9227 
9228 		if ((un->un_remove_event != NULL) &&
9229 			(ddi_remove_event_handler(un->un_remove_cb_id) !=
9230 				DDI_SUCCESS)) {
9231 			/*
9232 			 * Note: We are returning here after having done
9233 			 * substantial cleanup above. This is consistent
9234 			 * with the legacy implementation but this may not
9235 			 * be the right thing to do.
9236 			 */
9237 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
9238 				"sd_dr_detach: Cannot cancel remove event\n");
9239 			goto err_remove_event;
9240 		}
9241 		un->un_remove_event = NULL;
9242 	}
9243 
9244 	/* Do not free the softstate if the callback routine is active */
9245 	sd_sync_with_callback(un);
9246 
9247 	/*
9248 	 * Hold the detach mutex here, to make sure that no other threads ever
9249 	 * can access a (partially) freed soft state structure.
9250 	 */
9251 	mutex_enter(&sd_detach_mutex);
9252 
9253 	/*
9254 	 * Clean up the soft state struct.
9255 	 * Cleanup is done in reverse order of allocs/inits.
9256 	 * At this point there should be no competing threads anymore.
9257 	 */
9258 
9259 	/* Unregister and free device id. */
9260 	ddi_devid_unregister(devi);
9261 	if (un->un_devid) {
9262 		ddi_devid_free(un->un_devid);
9263 		un->un_devid = NULL;
9264 	}
9265 
9266 	/*
9267 	 * Destroy wmap cache if it exists.
9268 	 */
9269 	if (un->un_wm_cache != NULL) {
9270 		kmem_cache_destroy(un->un_wm_cache);
9271 		un->un_wm_cache = NULL;
9272 	}
9273 
9274 	/* Remove minor nodes */
9275 	ddi_remove_minor_node(devi, NULL);
9276 
9277 	/*
9278 	 * kstat cleanup is done in detach for all device types (4363169).
9279 	 * We do not want to fail detach if the device kstats are not deleted
9280 	 * since there is a confusion about the devo_refcnt for the device.
9281 	 * We just delete the kstats and let detach complete successfully.
9282 	 */
9283 	if (un->un_stats != NULL) {
9284 		kstat_delete(un->un_stats);
9285 		un->un_stats = NULL;
9286 	}
9287 	if (un->un_errstats != NULL) {
9288 		kstat_delete(un->un_errstats);
9289 		un->un_errstats = NULL;
9290 	}
9291 
9292 	/* Remove partition stats */
9293 	if (un->un_f_pkstats_enabled) {
9294 		for (i = 0; i < NSDMAP; i++) {
9295 			if (un->un_pstats[i] != NULL) {
9296 				kstat_delete(un->un_pstats[i]);
9297 				un->un_pstats[i] = NULL;
9298 			}
9299 		}
9300 	}
9301 
9302 	/* Remove xbuf registration */
9303 	ddi_xbuf_attr_unregister_devinfo(un->un_xbuf_attr, devi);
9304 	ddi_xbuf_attr_destroy(un->un_xbuf_attr);
9305 
9306 	/* Remove driver properties */
9307 	ddi_prop_remove_all(devi);
9308 
9309 	mutex_destroy(&un->un_pm_mutex);
9310 	cv_destroy(&un->un_pm_busy_cv);
9311 
9312 	cv_destroy(&un->un_wcc_cv);
9313 
9314 	/* Open/close semaphore */
9315 	sema_destroy(&un->un_semoclose);
9316 
9317 	/* Removable media condvar. */
9318 	cv_destroy(&un->un_state_cv);
9319 
9320 	/* Suspend/resume condvar. */
9321 	cv_destroy(&un->un_suspend_cv);
9322 	cv_destroy(&un->un_disk_busy_cv);
9323 
9324 	sd_free_rqs(un);
9325 
9326 	/* Free up soft state */
9327 	devp->sd_private = NULL;
9328 	bzero(un, sizeof (struct sd_lun));
9329 	ddi_soft_state_free(sd_state, instance);
9330 
9331 	mutex_exit(&sd_detach_mutex);
9332 
9333 	/* This frees up the INQUIRY data associated with the device. */
9334 	scsi_unprobe(devp);
9335 
9336 	return (DDI_SUCCESS);
9337 
9338 err_notclosed:
9339 	mutex_exit(SD_MUTEX(un));
9340 
9341 err_stillbusy:
9342 	_NOTE(NO_COMPETING_THREADS_NOW);
9343 
9344 err_remove_event:
9345 	mutex_enter(&sd_detach_mutex);
9346 	un->un_detach_count--;
9347 	mutex_exit(&sd_detach_mutex);
9348 
9349 	SD_TRACE(SD_LOG_ATTACH_DETACH, un, "sd_unit_detach: exit failure\n");
9350 	return (DDI_FAILURE);
9351 }
9352 
9353 
9354 /*
9355  * Driver minor node structure and data table
9356  */
9357 struct driver_minor_data {
9358 	char	*name;
9359 	minor_t	minor;
9360 	int	type;
9361 };
9362 
9363 static struct driver_minor_data sd_minor_data[] = {
9364 	{"a", 0, S_IFBLK},
9365 	{"b", 1, S_IFBLK},
9366 	{"c", 2, S_IFBLK},
9367 	{"d", 3, S_IFBLK},
9368 	{"e", 4, S_IFBLK},
9369 	{"f", 5, S_IFBLK},
9370 	{"g", 6, S_IFBLK},
9371 	{"h", 7, S_IFBLK},
9372 #if defined(_SUNOS_VTOC_16)
9373 	{"i", 8, S_IFBLK},
9374 	{"j", 9, S_IFBLK},
9375 	{"k", 10, S_IFBLK},
9376 	{"l", 11, S_IFBLK},
9377 	{"m", 12, S_IFBLK},
9378 	{"n", 13, S_IFBLK},
9379 	{"o", 14, S_IFBLK},
9380 	{"p", 15, S_IFBLK},
9381 #endif			/* defined(_SUNOS_VTOC_16) */
9382 #if defined(_FIRMWARE_NEEDS_FDISK)
9383 	{"q", 16, S_IFBLK},
9384 	{"r", 17, S_IFBLK},
9385 	{"s", 18, S_IFBLK},
9386 	{"t", 19, S_IFBLK},
9387 	{"u", 20, S_IFBLK},
9388 #endif			/* defined(_FIRMWARE_NEEDS_FDISK) */
9389 	{"a,raw", 0, S_IFCHR},
9390 	{"b,raw", 1, S_IFCHR},
9391 	{"c,raw", 2, S_IFCHR},
9392 	{"d,raw", 3, S_IFCHR},
9393 	{"e,raw", 4, S_IFCHR},
9394 	{"f,raw", 5, S_IFCHR},
9395 	{"g,raw", 6, S_IFCHR},
9396 	{"h,raw", 7, S_IFCHR},
9397 #if defined(_SUNOS_VTOC_16)
9398 	{"i,raw", 8, S_IFCHR},
9399 	{"j,raw", 9, S_IFCHR},
9400 	{"k,raw", 10, S_IFCHR},
9401 	{"l,raw", 11, S_IFCHR},
9402 	{"m,raw", 12, S_IFCHR},
9403 	{"n,raw", 13, S_IFCHR},
9404 	{"o,raw", 14, S_IFCHR},
9405 	{"p,raw", 15, S_IFCHR},
9406 #endif			/* defined(_SUNOS_VTOC_16) */
9407 #if defined(_FIRMWARE_NEEDS_FDISK)
9408 	{"q,raw", 16, S_IFCHR},
9409 	{"r,raw", 17, S_IFCHR},
9410 	{"s,raw", 18, S_IFCHR},
9411 	{"t,raw", 19, S_IFCHR},
9412 	{"u,raw", 20, S_IFCHR},
9413 #endif			/* defined(_FIRMWARE_NEEDS_FDISK) */
9414 	{0}
9415 };
9416 
9417 static struct driver_minor_data sd_minor_data_efi[] = {
9418 	{"a", 0, S_IFBLK},
9419 	{"b", 1, S_IFBLK},
9420 	{"c", 2, S_IFBLK},
9421 	{"d", 3, S_IFBLK},
9422 	{"e", 4, S_IFBLK},
9423 	{"f", 5, S_IFBLK},
9424 	{"g", 6, S_IFBLK},
9425 	{"wd", 7, S_IFBLK},
9426 #if defined(_FIRMWARE_NEEDS_FDISK)
9427 	{"q", 16, S_IFBLK},
9428 	{"r", 17, S_IFBLK},
9429 	{"s", 18, S_IFBLK},
9430 	{"t", 19, S_IFBLK},
9431 	{"u", 20, S_IFBLK},
9432 #endif			/* defined(_FIRMWARE_NEEDS_FDISK) */
9433 	{"a,raw", 0, S_IFCHR},
9434 	{"b,raw", 1, S_IFCHR},
9435 	{"c,raw", 2, S_IFCHR},
9436 	{"d,raw", 3, S_IFCHR},
9437 	{"e,raw", 4, S_IFCHR},
9438 	{"f,raw", 5, S_IFCHR},
9439 	{"g,raw", 6, S_IFCHR},
9440 	{"wd,raw", 7, S_IFCHR},
9441 #if defined(_FIRMWARE_NEEDS_FDISK)
9442 	{"q,raw", 16, S_IFCHR},
9443 	{"r,raw", 17, S_IFCHR},
9444 	{"s,raw", 18, S_IFCHR},
9445 	{"t,raw", 19, S_IFCHR},
9446 	{"u,raw", 20, S_IFCHR},
9447 #endif			/* defined(_FIRMWARE_NEEDS_FDISK) */
9448 	{0}
9449 };
9450 
9451 
9452 /*
9453  *    Function: sd_create_minor_nodes
9454  *
9455  * Description: Create the minor device nodes for the instance.
9456  *
9457  *   Arguments: un - driver soft state (unit) structure
9458  *		devi - pointer to device info structure
9459  *
9460  * Return Code: DDI_SUCCESS
9461  *		DDI_FAILURE
9462  *
9463  *     Context: Kernel thread context
9464  */
9465 
9466 static int
9467 sd_create_minor_nodes(struct sd_lun *un, dev_info_t *devi)
9468 {
9469 	struct driver_minor_data	*dmdp;
9470 	struct scsi_device		*devp;
9471 	int				instance;
9472 	char				name[48];
9473 
9474 	ASSERT(un != NULL);
9475 	devp = ddi_get_driver_private(devi);
9476 	instance = ddi_get_instance(devp->sd_dev);
9477 
9478 	/*
9479 	 * Create all the minor nodes for this target.
9480 	 */
9481 	if (un->un_blockcount > DK_MAX_BLOCKS)
9482 		dmdp = sd_minor_data_efi;
9483 	else
9484 		dmdp = sd_minor_data;
9485 	while (dmdp->name != NULL) {
9486 
9487 		(void) sprintf(name, "%s", dmdp->name);
9488 
9489 		if (ddi_create_minor_node(devi, name, dmdp->type,
9490 		    (instance << SDUNIT_SHIFT) | dmdp->minor,
9491 		    un->un_node_type, NULL) == DDI_FAILURE) {
9492 			/*
9493 			 * Clean up any nodes that may have been created, in
9494 			 * case this fails in the middle of the loop.
9495 			 */
9496 			ddi_remove_minor_node(devi, NULL);
9497 			return (DDI_FAILURE);
9498 		}
9499 		dmdp++;
9500 	}
9501 
9502 	return (DDI_SUCCESS);
9503 }
9504 
9505 
9506 /*
9507  *    Function: sd_create_errstats
9508  *
9509  * Description: This routine instantiates the device error stats.
9510  *
9511  *		Note: During attach the stats are instantiated first so they are
9512  *		available for attach-time routines that utilize the driver
9513  *		iopath to send commands to the device. The stats are initialized
9514  *		separately so data obtained during some attach-time routines is
9515  *		available. (4362483)
9516  *
9517  *   Arguments: un - driver soft state (unit) structure
9518  *		instance - driver instance
9519  *
9520  *     Context: Kernel thread context
9521  */
9522 
9523 static void
9524 sd_create_errstats(struct sd_lun *un, int instance)
9525 {
9526 	struct	sd_errstats	*stp;
9527 	char	kstatmodule_err[KSTAT_STRLEN];
9528 	char	kstatname[KSTAT_STRLEN];
9529 	int	ndata = (sizeof (struct sd_errstats) / sizeof (kstat_named_t));
9530 
9531 	ASSERT(un != NULL);
9532 
9533 	if (un->un_errstats != NULL) {
9534 		return;
9535 	}
9536 
9537 	(void) snprintf(kstatmodule_err, sizeof (kstatmodule_err),
9538 	    "%serr", sd_label);
9539 	(void) snprintf(kstatname, sizeof (kstatname),
9540 	    "%s%d,err", sd_label, instance);
9541 
9542 	un->un_errstats = kstat_create(kstatmodule_err, instance, kstatname,
9543 	    "device_error", KSTAT_TYPE_NAMED, ndata, KSTAT_FLAG_PERSISTENT);
9544 
9545 	if (un->un_errstats == NULL) {
9546 		SD_ERROR(SD_LOG_ATTACH_DETACH, un,
9547 		    "sd_create_errstats: Failed kstat_create\n");
9548 		return;
9549 	}
9550 
9551 	stp = (struct sd_errstats *)un->un_errstats->ks_data;
9552 	kstat_named_init(&stp->sd_softerrs,	"Soft Errors",
9553 	    KSTAT_DATA_UINT32);
9554 	kstat_named_init(&stp->sd_harderrs,	"Hard Errors",
9555 	    KSTAT_DATA_UINT32);
9556 	kstat_named_init(&stp->sd_transerrs,	"Transport Errors",
9557 	    KSTAT_DATA_UINT32);
9558 	kstat_named_init(&stp->sd_vid,		"Vendor",
9559 	    KSTAT_DATA_CHAR);
9560 	kstat_named_init(&stp->sd_pid,		"Product",
9561 	    KSTAT_DATA_CHAR);
9562 	kstat_named_init(&stp->sd_revision,	"Revision",
9563 	    KSTAT_DATA_CHAR);
9564 	kstat_named_init(&stp->sd_serial,	"Serial No",
9565 	    KSTAT_DATA_CHAR);
9566 	kstat_named_init(&stp->sd_capacity,	"Size",
9567 	    KSTAT_DATA_ULONGLONG);
9568 	kstat_named_init(&stp->sd_rq_media_err,	"Media Error",
9569 	    KSTAT_DATA_UINT32);
9570 	kstat_named_init(&stp->sd_rq_ntrdy_err,	"Device Not Ready",
9571 	    KSTAT_DATA_UINT32);
9572 	kstat_named_init(&stp->sd_rq_nodev_err,	"No Device",
9573 	    KSTAT_DATA_UINT32);
9574 	kstat_named_init(&stp->sd_rq_recov_err,	"Recoverable",
9575 	    KSTAT_DATA_UINT32);
9576 	kstat_named_init(&stp->sd_rq_illrq_err,	"Illegal Request",
9577 	    KSTAT_DATA_UINT32);
9578 	kstat_named_init(&stp->sd_rq_pfa_err,	"Predictive Failure Analysis",
9579 	    KSTAT_DATA_UINT32);
9580 
9581 	un->un_errstats->ks_private = un;
9582 	un->un_errstats->ks_update  = nulldev;
9583 
9584 	kstat_install(un->un_errstats);
9585 }
9586 
9587 
9588 /*
9589  *    Function: sd_set_errstats
9590  *
9591  * Description: This routine sets the value of the vendor id, product id,
9592  *		revision, serial number, and capacity device error stats.
9593  *
9594  *		Note: During attach the stats are instantiated first so they are
9595  *		available for attach-time routines that utilize the driver
9596  *		iopath to send commands to the device. The stats are initialized
9597  *		separately so data obtained during some attach-time routines is
9598  *		available. (4362483)
9599  *
9600  *   Arguments: un - driver soft state (unit) structure
9601  *
9602  *     Context: Kernel thread context
9603  */
9604 
9605 static void
9606 sd_set_errstats(struct sd_lun *un)
9607 {
9608 	struct	sd_errstats	*stp;
9609 
9610 	ASSERT(un != NULL);
9611 	ASSERT(un->un_errstats != NULL);
9612 	stp = (struct sd_errstats *)un->un_errstats->ks_data;
9613 	ASSERT(stp != NULL);
9614 	(void) strncpy(stp->sd_vid.value.c, un->un_sd->sd_inq->inq_vid, 8);
9615 	(void) strncpy(stp->sd_pid.value.c, un->un_sd->sd_inq->inq_pid, 16);
9616 	(void) strncpy(stp->sd_revision.value.c,
9617 	    un->un_sd->sd_inq->inq_revision, 4);
9618 
9619 	/*
9620 	 * All the errstats are persistent across detach/attach,
9621 	 * so reset all the errstats here in case of the hot
9622 	 * replacement of disk drives, except for not changed
9623 	 * Sun qualified drives.
9624 	 */
9625 	if ((bcmp(&SD_INQUIRY(un)->inq_pid[9], "SUN", 3) != 0) ||
9626 	    (bcmp(&SD_INQUIRY(un)->inq_serial, stp->sd_serial.value.c,
9627 	    sizeof (SD_INQUIRY(un)->inq_serial)) != 0)) {
9628 		stp->sd_softerrs.value.ui32 = 0;
9629 		stp->sd_harderrs.value.ui32 = 0;
9630 		stp->sd_transerrs.value.ui32 = 0;
9631 		stp->sd_rq_media_err.value.ui32 = 0;
9632 		stp->sd_rq_ntrdy_err.value.ui32 = 0;
9633 		stp->sd_rq_nodev_err.value.ui32 = 0;
9634 		stp->sd_rq_recov_err.value.ui32 = 0;
9635 		stp->sd_rq_illrq_err.value.ui32 = 0;
9636 		stp->sd_rq_pfa_err.value.ui32 = 0;
9637 	}
9638 
9639 	/*
9640 	 * Set the "Serial No" kstat for Sun qualified drives (indicated by
9641 	 * "SUN" in bytes 25-27 of the inquiry data (bytes 9-11 of the pid)
9642 	 * (4376302))
9643 	 */
9644 	if (bcmp(&SD_INQUIRY(un)->inq_pid[9], "SUN", 3) == 0) {
9645 		bcopy(&SD_INQUIRY(un)->inq_serial, stp->sd_serial.value.c,
9646 		    sizeof (SD_INQUIRY(un)->inq_serial));
9647 	}
9648 
9649 	if (un->un_f_blockcount_is_valid != TRUE) {
9650 		/*
9651 		 * Set capacity error stat to 0 for no media. This ensures
9652 		 * a valid capacity is displayed in response to 'iostat -E'
9653 		 * when no media is present in the device.
9654 		 */
9655 		stp->sd_capacity.value.ui64 = 0;
9656 	} else {
9657 		/*
9658 		 * Multiply un_blockcount by un->un_sys_blocksize to get
9659 		 * capacity.
9660 		 *
9661 		 * Note: for non-512 blocksize devices "un_blockcount" has been
9662 		 * "scaled" in sd_send_scsi_READ_CAPACITY by multiplying by
9663 		 * (un_tgt_blocksize / un->un_sys_blocksize).
9664 		 */
9665 		stp->sd_capacity.value.ui64 = (uint64_t)
9666 		    ((uint64_t)un->un_blockcount * un->un_sys_blocksize);
9667 	}
9668 }
9669 
9670 
9671 /*
9672  *    Function: sd_set_pstats
9673  *
9674  * Description: This routine instantiates and initializes the partition
9675  *              stats for each partition with more than zero blocks.
9676  *		(4363169)
9677  *
9678  *   Arguments: un - driver soft state (unit) structure
9679  *
9680  *     Context: Kernel thread context
9681  */
9682 
9683 static void
9684 sd_set_pstats(struct sd_lun *un)
9685 {
9686 	char	kstatname[KSTAT_STRLEN];
9687 	int	instance;
9688 	int	i;
9689 
9690 	ASSERT(un != NULL);
9691 
9692 	instance = ddi_get_instance(SD_DEVINFO(un));
9693 
9694 	/* Note:x86: is this a VTOC8/VTOC16 difference? */
9695 	for (i = 0; i < NSDMAP; i++) {
9696 		if ((un->un_pstats[i] == NULL) &&
9697 		    (un->un_map[i].dkl_nblk != 0)) {
9698 			(void) snprintf(kstatname, sizeof (kstatname),
9699 			    "%s%d,%s", sd_label, instance,
9700 			    sd_minor_data[i].name);
9701 			un->un_pstats[i] = kstat_create(sd_label,
9702 			    instance, kstatname, "partition", KSTAT_TYPE_IO,
9703 			    1, KSTAT_FLAG_PERSISTENT);
9704 			if (un->un_pstats[i] != NULL) {
9705 				un->un_pstats[i]->ks_lock = SD_MUTEX(un);
9706 				kstat_install(un->un_pstats[i]);
9707 			}
9708 		}
9709 	}
9710 }
9711 
9712 
9713 #if (defined(__fibre))
9714 /*
9715  *    Function: sd_init_event_callbacks
9716  *
9717  * Description: This routine initializes the insertion and removal event
9718  *		callbacks. (fibre only)
9719  *
9720  *   Arguments: un - driver soft state (unit) structure
9721  *
9722  *     Context: Kernel thread context
9723  */
9724 
9725 static void
9726 sd_init_event_callbacks(struct sd_lun *un)
9727 {
9728 	ASSERT(un != NULL);
9729 
9730 	if ((un->un_insert_event == NULL) &&
9731 	    (ddi_get_eventcookie(SD_DEVINFO(un), FCAL_INSERT_EVENT,
9732 	    &un->un_insert_event) == DDI_SUCCESS)) {
9733 		/*
9734 		 * Add the callback for an insertion event
9735 		 */
9736 		(void) ddi_add_event_handler(SD_DEVINFO(un),
9737 		    un->un_insert_event, sd_event_callback, (void *)un,
9738 		    &(un->un_insert_cb_id));
9739 	}
9740 
9741 	if ((un->un_remove_event == NULL) &&
9742 	    (ddi_get_eventcookie(SD_DEVINFO(un), FCAL_REMOVE_EVENT,
9743 	    &un->un_remove_event) == DDI_SUCCESS)) {
9744 		/*
9745 		 * Add the callback for a removal event
9746 		 */
9747 		(void) ddi_add_event_handler(SD_DEVINFO(un),
9748 		    un->un_remove_event, sd_event_callback, (void *)un,
9749 		    &(un->un_remove_cb_id));
9750 	}
9751 }
9752 
9753 
9754 /*
9755  *    Function: sd_event_callback
9756  *
9757  * Description: This routine handles insert/remove events (photon). The
9758  *		state is changed to OFFLINE which can be used to supress
9759  *		error msgs. (fibre only)
9760  *
9761  *   Arguments: un - driver soft state (unit) structure
9762  *
9763  *     Context: Callout thread context
9764  */
9765 /* ARGSUSED */
9766 static void
9767 sd_event_callback(dev_info_t *dip, ddi_eventcookie_t event, void *arg,
9768     void *bus_impldata)
9769 {
9770 	struct sd_lun *un = (struct sd_lun *)arg;
9771 
9772 	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::un_insert_event));
9773 	if (event == un->un_insert_event) {
9774 		SD_TRACE(SD_LOG_COMMON, un, "sd_event_callback: insert event");
9775 		mutex_enter(SD_MUTEX(un));
9776 		if (un->un_state == SD_STATE_OFFLINE) {
9777 			if (un->un_last_state != SD_STATE_SUSPENDED) {
9778 				un->un_state = un->un_last_state;
9779 			} else {
9780 				/*
9781 				 * We have gone through SUSPEND/RESUME while
9782 				 * we were offline. Restore the last state
9783 				 */
9784 				un->un_state = un->un_save_state;
9785 			}
9786 		}
9787 		mutex_exit(SD_MUTEX(un));
9788 
9789 	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::un_remove_event));
9790 	} else if (event == un->un_remove_event) {
9791 		SD_TRACE(SD_LOG_COMMON, un, "sd_event_callback: remove event");
9792 		mutex_enter(SD_MUTEX(un));
9793 		/*
9794 		 * We need to handle an event callback that occurs during
9795 		 * the suspend operation, since we don't prevent it.
9796 		 */
9797 		if (un->un_state != SD_STATE_OFFLINE) {
9798 			if (un->un_state != SD_STATE_SUSPENDED) {
9799 				New_state(un, SD_STATE_OFFLINE);
9800 			} else {
9801 				un->un_last_state = SD_STATE_OFFLINE;
9802 			}
9803 		}
9804 		mutex_exit(SD_MUTEX(un));
9805 	} else {
9806 		scsi_log(SD_DEVINFO(un), sd_label, CE_NOTE,
9807 		    "!Unknown event\n");
9808 	}
9809 
9810 }
9811 #endif
9812 
9813 /*
9814  *    Function: sd_cache_control()
9815  *
9816  * Description: This routine is the driver entry point for setting
9817  *		read and write caching by modifying the WCE (write cache
9818  *		enable) and RCD (read cache disable) bits of mode
9819  *		page 8 (MODEPAGE_CACHING).
9820  *
9821  *   Arguments: un - driver soft state (unit) structure
9822  *		rcd_flag - flag for controlling the read cache
9823  *		wce_flag - flag for controlling the write cache
9824  *
9825  * Return Code: EIO
9826  *		code returned by sd_send_scsi_MODE_SENSE and
9827  *		sd_send_scsi_MODE_SELECT
9828  *
9829  *     Context: Kernel Thread
9830  */
9831 
9832 static int
9833 sd_cache_control(struct sd_lun *un, int rcd_flag, int wce_flag)
9834 {
9835 	struct mode_caching	*mode_caching_page;
9836 	uchar_t			*header;
9837 	size_t			buflen;
9838 	int			hdrlen;
9839 	int			bd_len;
9840 	int			rval = 0;
9841 	struct mode_header_grp2	*mhp;
9842 
9843 	ASSERT(un != NULL);
9844 
9845 	/*
9846 	 * Do a test unit ready, otherwise a mode sense may not work if this
9847 	 * is the first command sent to the device after boot.
9848 	 */
9849 	(void) sd_send_scsi_TEST_UNIT_READY(un, 0);
9850 
9851 	if (un->un_f_cfg_is_atapi == TRUE) {
9852 		hdrlen = MODE_HEADER_LENGTH_GRP2;
9853 	} else {
9854 		hdrlen = MODE_HEADER_LENGTH;
9855 	}
9856 
9857 	/*
9858 	 * Allocate memory for the retrieved mode page and its headers.  Set
9859 	 * a pointer to the page itself.  Use mode_cache_scsi3 to insure
9860 	 * we get all of the mode sense data otherwise, the mode select
9861 	 * will fail.  mode_cache_scsi3 is a superset of mode_caching.
9862 	 */
9863 	buflen = hdrlen + MODE_BLK_DESC_LENGTH +
9864 		sizeof (struct mode_cache_scsi3);
9865 
9866 	header = kmem_zalloc(buflen, KM_SLEEP);
9867 
9868 	/* Get the information from the device. */
9869 	if (un->un_f_cfg_is_atapi == TRUE) {
9870 		rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP1, header, buflen,
9871 		    MODEPAGE_CACHING, SD_PATH_DIRECT);
9872 	} else {
9873 		rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, header, buflen,
9874 		    MODEPAGE_CACHING, SD_PATH_DIRECT);
9875 	}
9876 	if (rval != 0) {
9877 		SD_ERROR(SD_LOG_IOCTL_RMMEDIA, un,
9878 		    "sd_cache_control: Mode Sense Failed\n");
9879 		kmem_free(header, buflen);
9880 		return (rval);
9881 	}
9882 
9883 	/*
9884 	 * Determine size of Block Descriptors in order to locate
9885 	 * the mode page data. ATAPI devices return 0, SCSI devices
9886 	 * should return MODE_BLK_DESC_LENGTH.
9887 	 */
9888 	if (un->un_f_cfg_is_atapi == TRUE) {
9889 		mhp	= (struct mode_header_grp2 *)header;
9890 		bd_len  = (mhp->bdesc_length_hi << 8) | mhp->bdesc_length_lo;
9891 	} else {
9892 		bd_len  = ((struct mode_header *)header)->bdesc_length;
9893 	}
9894 
9895 	if (bd_len > MODE_BLK_DESC_LENGTH) {
9896 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
9897 		    "sd_cache_control: Mode Sense returned invalid "
9898 		    "block descriptor length\n");
9899 		kmem_free(header, buflen);
9900 		return (EIO);
9901 	}
9902 
9903 	mode_caching_page = (struct mode_caching *)(header + hdrlen + bd_len);
9904 	if (mode_caching_page->mode_page.code != MODEPAGE_CACHING) {
9905 		SD_ERROR(SD_LOG_COMMON, un, "sd_cache_control: Mode Sense"
9906 		    " caching page code mismatch %d\n",
9907 		    mode_caching_page->mode_page.code);
9908 		kmem_free(header, buflen);
9909 		return (EIO);
9910 	}
9911 
9912 	/* Check the relevant bits on successful mode sense. */
9913 	if ((mode_caching_page->rcd && rcd_flag == SD_CACHE_ENABLE) ||
9914 	    (!mode_caching_page->rcd && rcd_flag == SD_CACHE_DISABLE) ||
9915 	    (mode_caching_page->wce && wce_flag == SD_CACHE_DISABLE) ||
9916 	    (!mode_caching_page->wce && wce_flag == SD_CACHE_ENABLE)) {
9917 
9918 		size_t sbuflen;
9919 		uchar_t save_pg;
9920 
9921 		/*
9922 		 * Construct select buffer length based on the
9923 		 * length of the sense data returned.
9924 		 */
9925 		sbuflen =  hdrlen + MODE_BLK_DESC_LENGTH +
9926 				sizeof (struct mode_page) +
9927 				(int)mode_caching_page->mode_page.length;
9928 
9929 		/*
9930 		 * Set the caching bits as requested.
9931 		 */
9932 		if (rcd_flag == SD_CACHE_ENABLE)
9933 			mode_caching_page->rcd = 0;
9934 		else if (rcd_flag == SD_CACHE_DISABLE)
9935 			mode_caching_page->rcd = 1;
9936 
9937 		if (wce_flag == SD_CACHE_ENABLE)
9938 			mode_caching_page->wce = 1;
9939 		else if (wce_flag == SD_CACHE_DISABLE)
9940 			mode_caching_page->wce = 0;
9941 
9942 		/*
9943 		 * Save the page if the mode sense says the
9944 		 * drive supports it.
9945 		 */
9946 		save_pg = mode_caching_page->mode_page.ps ?
9947 				SD_SAVE_PAGE : SD_DONTSAVE_PAGE;
9948 
9949 		/* Clear reserved bits before mode select. */
9950 		mode_caching_page->mode_page.ps = 0;
9951 
9952 		/*
9953 		 * Clear out mode header for mode select.
9954 		 * The rest of the retrieved page will be reused.
9955 		 */
9956 		bzero(header, hdrlen);
9957 
9958 		if (un->un_f_cfg_is_atapi == TRUE) {
9959 			mhp = (struct mode_header_grp2 *)header;
9960 			mhp->bdesc_length_hi = bd_len >> 8;
9961 			mhp->bdesc_length_lo = (uchar_t)bd_len & 0xff;
9962 		} else {
9963 			((struct mode_header *)header)->bdesc_length = bd_len;
9964 		}
9965 
9966 		/* Issue mode select to change the cache settings */
9967 		if (un->un_f_cfg_is_atapi == TRUE) {
9968 			rval = sd_send_scsi_MODE_SELECT(un, CDB_GROUP1, header,
9969 			    sbuflen, save_pg, SD_PATH_DIRECT);
9970 		} else {
9971 			rval = sd_send_scsi_MODE_SELECT(un, CDB_GROUP0, header,
9972 			    sbuflen, save_pg, SD_PATH_DIRECT);
9973 		}
9974 	}
9975 
9976 	kmem_free(header, buflen);
9977 	return (rval);
9978 }
9979 
9980 
9981 /*
9982  *    Function: sd_get_write_cache_enabled()
9983  *
9984  * Description: This routine is the driver entry point for determining if
9985  *		write caching is enabled.  It examines the WCE (write cache
9986  *		enable) bits of mode page 8 (MODEPAGE_CACHING).
9987  *
9988  *   Arguments: un - driver soft state (unit) structure
9989  *   		is_enabled - pointer to int where write cache enabled state
9990  *   			is returned (non-zero -> write cache enabled)
9991  *
9992  *
9993  * Return Code: EIO
9994  *		code returned by sd_send_scsi_MODE_SENSE
9995  *
9996  *     Context: Kernel Thread
9997  *
9998  * NOTE: If ioctl is added to disable write cache, this sequence should
9999  * be followed so that no locking is required for accesses to
10000  * un->un_f_write_cache_enabled:
10001  * 	do mode select to clear wce
10002  * 	do synchronize cache to flush cache
10003  * 	set un->un_f_write_cache_enabled = FALSE
10004  *
10005  * Conversely, an ioctl to enable the write cache should be done
10006  * in this order:
10007  * 	set un->un_f_write_cache_enabled = TRUE
10008  * 	do mode select to set wce
10009  */
10010 
10011 static int
10012 sd_get_write_cache_enabled(struct sd_lun *un, int *is_enabled)
10013 {
10014 	struct mode_caching	*mode_caching_page;
10015 	uchar_t			*header;
10016 	size_t			buflen;
10017 	int			hdrlen;
10018 	int			bd_len;
10019 	int			rval = 0;
10020 
10021 	ASSERT(un != NULL);
10022 	ASSERT(is_enabled != NULL);
10023 
10024 	/* in case of error, flag as enabled */
10025 	*is_enabled = TRUE;
10026 
10027 	/*
10028 	 * Do a test unit ready, otherwise a mode sense may not work if this
10029 	 * is the first command sent to the device after boot.
10030 	 */
10031 	(void) sd_send_scsi_TEST_UNIT_READY(un, 0);
10032 
10033 	if (un->un_f_cfg_is_atapi == TRUE) {
10034 		hdrlen = MODE_HEADER_LENGTH_GRP2;
10035 	} else {
10036 		hdrlen = MODE_HEADER_LENGTH;
10037 	}
10038 
10039 	/*
10040 	 * Allocate memory for the retrieved mode page and its headers.  Set
10041 	 * a pointer to the page itself.
10042 	 */
10043 	buflen = hdrlen + MODE_BLK_DESC_LENGTH + sizeof (struct mode_caching);
10044 	header = kmem_zalloc(buflen, KM_SLEEP);
10045 
10046 	/* Get the information from the device. */
10047 	if (un->un_f_cfg_is_atapi == TRUE) {
10048 		rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP1, header, buflen,
10049 		    MODEPAGE_CACHING, SD_PATH_DIRECT);
10050 	} else {
10051 		rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, header, buflen,
10052 		    MODEPAGE_CACHING, SD_PATH_DIRECT);
10053 	}
10054 	if (rval != 0) {
10055 		SD_ERROR(SD_LOG_IOCTL_RMMEDIA, un,
10056 		    "sd_get_write_cache_enabled: Mode Sense Failed\n");
10057 		kmem_free(header, buflen);
10058 		return (rval);
10059 	}
10060 
10061 	/*
10062 	 * Determine size of Block Descriptors in order to locate
10063 	 * the mode page data. ATAPI devices return 0, SCSI devices
10064 	 * should return MODE_BLK_DESC_LENGTH.
10065 	 */
10066 	if (un->un_f_cfg_is_atapi == TRUE) {
10067 		struct mode_header_grp2	*mhp;
10068 		mhp	= (struct mode_header_grp2 *)header;
10069 		bd_len  = (mhp->bdesc_length_hi << 8) | mhp->bdesc_length_lo;
10070 	} else {
10071 		bd_len  = ((struct mode_header *)header)->bdesc_length;
10072 	}
10073 
10074 	if (bd_len > MODE_BLK_DESC_LENGTH) {
10075 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
10076 		    "sd_get_write_cache_enabled: Mode Sense returned invalid "
10077 		    "block descriptor length\n");
10078 		kmem_free(header, buflen);
10079 		return (EIO);
10080 	}
10081 
10082 	mode_caching_page = (struct mode_caching *)(header + hdrlen + bd_len);
10083 	if (mode_caching_page->mode_page.code != MODEPAGE_CACHING) {
10084 		SD_ERROR(SD_LOG_COMMON, un, "sd_cache_control: Mode Sense"
10085 		    " caching page code mismatch %d\n",
10086 		    mode_caching_page->mode_page.code);
10087 		kmem_free(header, buflen);
10088 		return (EIO);
10089 	}
10090 	*is_enabled = mode_caching_page->wce;
10091 
10092 	kmem_free(header, buflen);
10093 	return (0);
10094 }
10095 
10096 
10097 /*
10098  *    Function: sd_make_device
10099  *
10100  * Description: Utility routine to return the Solaris device number from
10101  *		the data in the device's dev_info structure.
10102  *
10103  * Return Code: The Solaris device number
10104  *
10105  *     Context: Any
10106  */
10107 
10108 static dev_t
10109 sd_make_device(dev_info_t *devi)
10110 {
10111 	return (makedevice(ddi_name_to_major(ddi_get_name(devi)),
10112 	    ddi_get_instance(devi) << SDUNIT_SHIFT));
10113 }
10114 
10115 
10116 /*
10117  *    Function: sd_pm_entry
10118  *
10119  * Description: Called at the start of a new command to manage power
10120  *		and busy status of a device. This includes determining whether
10121  *		the current power state of the device is sufficient for
10122  *		performing the command or whether it must be changed.
10123  *		The PM framework is notified appropriately.
10124  *		Only with a return status of DDI_SUCCESS will the
10125  *		component be busy to the framework.
10126  *
10127  *		All callers of sd_pm_entry must check the return status
10128  *		and only call sd_pm_exit it it was DDI_SUCCESS. A status
10129  *		of DDI_FAILURE indicates the device failed to power up.
10130  *		In this case un_pm_count has been adjusted so the result
10131  *		on exit is still powered down, ie. count is less than 0.
10132  *		Calling sd_pm_exit with this count value hits an ASSERT.
10133  *
10134  * Return Code: DDI_SUCCESS or DDI_FAILURE
10135  *
10136  *     Context: Kernel thread context.
10137  */
10138 
10139 static int
10140 sd_pm_entry(struct sd_lun *un)
10141 {
10142 	int return_status = DDI_SUCCESS;
10143 
10144 	ASSERT(!mutex_owned(SD_MUTEX(un)));
10145 	ASSERT(!mutex_owned(&un->un_pm_mutex));
10146 
10147 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_entry: entry\n");
10148 
10149 	if (un->un_f_pm_is_enabled == FALSE) {
10150 		SD_TRACE(SD_LOG_IO_PM, un,
10151 		    "sd_pm_entry: exiting, PM not enabled\n");
10152 		return (return_status);
10153 	}
10154 
10155 	/*
10156 	 * Just increment a counter if PM is enabled. On the transition from
10157 	 * 0 ==> 1, mark the device as busy.  The iodone side will decrement
10158 	 * the count with each IO and mark the device as idle when the count
10159 	 * hits 0.
10160 	 *
10161 	 * If the count is less than 0 the device is powered down. If a powered
10162 	 * down device is successfully powered up then the count must be
10163 	 * incremented to reflect the power up. Note that it'll get incremented
10164 	 * a second time to become busy.
10165 	 *
10166 	 * Because the following has the potential to change the device state
10167 	 * and must release the un_pm_mutex to do so, only one thread can be
10168 	 * allowed through at a time.
10169 	 */
10170 
10171 	mutex_enter(&un->un_pm_mutex);
10172 	while (un->un_pm_busy == TRUE) {
10173 		cv_wait(&un->un_pm_busy_cv, &un->un_pm_mutex);
10174 	}
10175 	un->un_pm_busy = TRUE;
10176 
10177 	if (un->un_pm_count < 1) {
10178 
10179 		SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_entry: busy component\n");
10180 
10181 		/*
10182 		 * Indicate we are now busy so the framework won't attempt to
10183 		 * power down the device. This call will only fail if either
10184 		 * we passed a bad component number or the device has no
10185 		 * components. Neither of these should ever happen.
10186 		 */
10187 		mutex_exit(&un->un_pm_mutex);
10188 		return_status = pm_busy_component(SD_DEVINFO(un), 0);
10189 		ASSERT(return_status == DDI_SUCCESS);
10190 
10191 		mutex_enter(&un->un_pm_mutex);
10192 
10193 		if (un->un_pm_count < 0) {
10194 			mutex_exit(&un->un_pm_mutex);
10195 
10196 			SD_TRACE(SD_LOG_IO_PM, un,
10197 			    "sd_pm_entry: power up component\n");
10198 
10199 			/*
10200 			 * pm_raise_power will cause sdpower to be called
10201 			 * which brings the device power level to the
10202 			 * desired state, ON in this case. If successful,
10203 			 * un_pm_count and un_power_level will be updated
10204 			 * appropriately.
10205 			 */
10206 			return_status = pm_raise_power(SD_DEVINFO(un), 0,
10207 			    SD_SPINDLE_ON);
10208 
10209 			mutex_enter(&un->un_pm_mutex);
10210 
10211 			if (return_status != DDI_SUCCESS) {
10212 				/*
10213 				 * Power up failed.
10214 				 * Idle the device and adjust the count
10215 				 * so the result on exit is that we're
10216 				 * still powered down, ie. count is less than 0.
10217 				 */
10218 				SD_TRACE(SD_LOG_IO_PM, un,
10219 				    "sd_pm_entry: power up failed,"
10220 				    " idle the component\n");
10221 
10222 				(void) pm_idle_component(SD_DEVINFO(un), 0);
10223 				un->un_pm_count--;
10224 			} else {
10225 				/*
10226 				 * Device is powered up, verify the
10227 				 * count is non-negative.
10228 				 * This is debug only.
10229 				 */
10230 				ASSERT(un->un_pm_count == 0);
10231 			}
10232 		}
10233 
10234 		if (return_status == DDI_SUCCESS) {
10235 			/*
10236 			 * For performance, now that the device has been tagged
10237 			 * as busy, and it's known to be powered up, update the
10238 			 * chain types to use jump tables that do not include
10239 			 * pm. This significantly lowers the overhead and
10240 			 * therefore improves performance.
10241 			 */
10242 
10243 			mutex_exit(&un->un_pm_mutex);
10244 			mutex_enter(SD_MUTEX(un));
10245 			SD_TRACE(SD_LOG_IO_PM, un,
10246 			    "sd_pm_entry: changing uscsi_chain_type from %d\n",
10247 			    un->un_uscsi_chain_type);
10248 
10249 			if (un->un_f_non_devbsize_supported) {
10250 				un->un_buf_chain_type =
10251 				    SD_CHAIN_INFO_RMMEDIA_NO_PM;
10252 			} else {
10253 				un->un_buf_chain_type =
10254 				    SD_CHAIN_INFO_DISK_NO_PM;
10255 			}
10256 			un->un_uscsi_chain_type = SD_CHAIN_INFO_USCSI_CMD_NO_PM;
10257 
10258 			SD_TRACE(SD_LOG_IO_PM, un,
10259 			    "             changed  uscsi_chain_type to   %d\n",
10260 			    un->un_uscsi_chain_type);
10261 			mutex_exit(SD_MUTEX(un));
10262 			mutex_enter(&un->un_pm_mutex);
10263 
10264 			if (un->un_pm_idle_timeid == NULL) {
10265 				/* 300 ms. */
10266 				un->un_pm_idle_timeid =
10267 				    timeout(sd_pm_idletimeout_handler, un,
10268 				    (drv_usectohz((clock_t)300000)));
10269 				/*
10270 				 * Include an extra call to busy which keeps the
10271 				 * device busy with-respect-to the PM layer
10272 				 * until the timer fires, at which time it'll
10273 				 * get the extra idle call.
10274 				 */
10275 				(void) pm_busy_component(SD_DEVINFO(un), 0);
10276 			}
10277 		}
10278 	}
10279 	un->un_pm_busy = FALSE;
10280 	/* Next... */
10281 	cv_signal(&un->un_pm_busy_cv);
10282 
10283 	un->un_pm_count++;
10284 
10285 	SD_TRACE(SD_LOG_IO_PM, un,
10286 	    "sd_pm_entry: exiting, un_pm_count = %d\n", un->un_pm_count);
10287 
10288 	mutex_exit(&un->un_pm_mutex);
10289 
10290 	return (return_status);
10291 }
10292 
10293 
10294 /*
10295  *    Function: sd_pm_exit
10296  *
10297  * Description: Called at the completion of a command to manage busy
10298  *		status for the device. If the device becomes idle the
10299  *		PM framework is notified.
10300  *
10301  *     Context: Kernel thread context
10302  */
10303 
10304 static void
10305 sd_pm_exit(struct sd_lun *un)
10306 {
10307 	ASSERT(!mutex_owned(SD_MUTEX(un)));
10308 	ASSERT(!mutex_owned(&un->un_pm_mutex));
10309 
10310 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_exit: entry\n");
10311 
10312 	/*
10313 	 * After attach the following flag is only read, so don't
10314 	 * take the penalty of acquiring a mutex for it.
10315 	 */
10316 	if (un->un_f_pm_is_enabled == TRUE) {
10317 
10318 		mutex_enter(&un->un_pm_mutex);
10319 		un->un_pm_count--;
10320 
10321 		SD_TRACE(SD_LOG_IO_PM, un,
10322 		    "sd_pm_exit: un_pm_count = %d\n", un->un_pm_count);
10323 
10324 		ASSERT(un->un_pm_count >= 0);
10325 		if (un->un_pm_count == 0) {
10326 			mutex_exit(&un->un_pm_mutex);
10327 
10328 			SD_TRACE(SD_LOG_IO_PM, un,
10329 			    "sd_pm_exit: idle component\n");
10330 
10331 			(void) pm_idle_component(SD_DEVINFO(un), 0);
10332 
10333 		} else {
10334 			mutex_exit(&un->un_pm_mutex);
10335 		}
10336 	}
10337 
10338 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_exit: exiting\n");
10339 }
10340 
10341 
10342 /*
10343  *    Function: sdopen
10344  *
10345  * Description: Driver's open(9e) entry point function.
10346  *
10347  *   Arguments: dev_i   - pointer to device number
10348  *		flag    - how to open file (FEXCL, FNDELAY, FREAD, FWRITE)
10349  *		otyp    - open type (OTYP_BLK, OTYP_CHR, OTYP_LYR)
10350  *		cred_p  - user credential pointer
10351  *
10352  * Return Code: EINVAL
10353  *		ENXIO
10354  *		EIO
10355  *		EROFS
10356  *		EBUSY
10357  *
10358  *     Context: Kernel thread context
10359  */
10360 /* ARGSUSED */
10361 static int
10362 sdopen(dev_t *dev_p, int flag, int otyp, cred_t *cred_p)
10363 {
10364 	struct sd_lun	*un;
10365 	int		nodelay;
10366 	int		part;
10367 	uint64_t	partmask;
10368 	int		instance;
10369 	dev_t		dev;
10370 	int		rval = EIO;
10371 
10372 	/* Validate the open type */
10373 	if (otyp >= OTYPCNT) {
10374 		return (EINVAL);
10375 	}
10376 
10377 	dev = *dev_p;
10378 	instance = SDUNIT(dev);
10379 	mutex_enter(&sd_detach_mutex);
10380 
10381 	/*
10382 	 * Fail the open if there is no softstate for the instance, or
10383 	 * if another thread somewhere is trying to detach the instance.
10384 	 */
10385 	if (((un = ddi_get_soft_state(sd_state, instance)) == NULL) ||
10386 	    (un->un_detach_count != 0)) {
10387 		mutex_exit(&sd_detach_mutex);
10388 		/*
10389 		 * The probe cache only needs to be cleared when open (9e) fails
10390 		 * with ENXIO (4238046).
10391 		 */
10392 		/*
10393 		 * un-conditionally clearing probe cache is ok with
10394 		 * separate sd/ssd binaries
10395 		 * x86 platform can be an issue with both parallel
10396 		 * and fibre in 1 binary
10397 		 */
10398 		sd_scsi_clear_probe_cache();
10399 		return (ENXIO);
10400 	}
10401 
10402 	/*
10403 	 * The un_layer_count is to prevent another thread in specfs from
10404 	 * trying to detach the instance, which can happen when we are
10405 	 * called from a higher-layer driver instead of thru specfs.
10406 	 * This will not be needed when DDI provides a layered driver
10407 	 * interface that allows specfs to know that an instance is in
10408 	 * use by a layered driver & should not be detached.
10409 	 *
10410 	 * Note: the semantics for layered driver opens are exactly one
10411 	 * close for every open.
10412 	 */
10413 	if (otyp == OTYP_LYR) {
10414 		un->un_layer_count++;
10415 	}
10416 
10417 	/*
10418 	 * Keep a count of the current # of opens in progress. This is because
10419 	 * some layered drivers try to call us as a regular open. This can
10420 	 * cause problems that we cannot prevent, however by keeping this count
10421 	 * we can at least keep our open and detach routines from racing against
10422 	 * each other under such conditions.
10423 	 */
10424 	un->un_opens_in_progress++;
10425 	mutex_exit(&sd_detach_mutex);
10426 
10427 	nodelay  = (flag & (FNDELAY | FNONBLOCK));
10428 	part	 = SDPART(dev);
10429 	partmask = 1 << part;
10430 
10431 	/*
10432 	 * We use a semaphore here in order to serialize
10433 	 * open and close requests on the device.
10434 	 */
10435 	sema_p(&un->un_semoclose);
10436 
10437 	mutex_enter(SD_MUTEX(un));
10438 
10439 	/*
10440 	 * All device accesses go thru sdstrategy() where we check
10441 	 * on suspend status but there could be a scsi_poll command,
10442 	 * which bypasses sdstrategy(), so we need to check pm
10443 	 * status.
10444 	 */
10445 
10446 	if (!nodelay) {
10447 		while ((un->un_state == SD_STATE_SUSPENDED) ||
10448 		    (un->un_state == SD_STATE_PM_CHANGING)) {
10449 			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
10450 		}
10451 
10452 		mutex_exit(SD_MUTEX(un));
10453 		if (sd_pm_entry(un) != DDI_SUCCESS) {
10454 			rval = EIO;
10455 			SD_ERROR(SD_LOG_OPEN_CLOSE, un,
10456 			    "sdopen: sd_pm_entry failed\n");
10457 			goto open_failed_with_pm;
10458 		}
10459 		mutex_enter(SD_MUTEX(un));
10460 	}
10461 
10462 	/* check for previous exclusive open */
10463 	SD_TRACE(SD_LOG_OPEN_CLOSE, un, "sdopen: un=%p\n", (void *)un);
10464 	SD_TRACE(SD_LOG_OPEN_CLOSE, un,
10465 	    "sdopen: exclopen=%x, flag=%x, regopen=%x\n",
10466 	    un->un_exclopen, flag, un->un_ocmap.regopen[otyp]);
10467 
10468 	if (un->un_exclopen & (partmask)) {
10469 		goto excl_open_fail;
10470 	}
10471 
10472 	if (flag & FEXCL) {
10473 		int i;
10474 		if (un->un_ocmap.lyropen[part]) {
10475 			goto excl_open_fail;
10476 		}
10477 		for (i = 0; i < (OTYPCNT - 1); i++) {
10478 			if (un->un_ocmap.regopen[i] & (partmask)) {
10479 				goto excl_open_fail;
10480 			}
10481 		}
10482 	}
10483 
10484 	/*
10485 	 * Check the write permission if this is a removable media device,
10486 	 * NDELAY has not been set, and writable permission is requested.
10487 	 *
10488 	 * Note: If NDELAY was set and this is write-protected media the WRITE
10489 	 * attempt will fail with EIO as part of the I/O processing. This is a
10490 	 * more permissive implementation that allows the open to succeed and
10491 	 * WRITE attempts to fail when appropriate.
10492 	 */
10493 	if (un->un_f_chk_wp_open) {
10494 		if ((flag & FWRITE) && (!nodelay)) {
10495 			mutex_exit(SD_MUTEX(un));
10496 			/*
10497 			 * Defer the check for write permission on writable
10498 			 * DVD drive till sdstrategy and will not fail open even
10499 			 * if FWRITE is set as the device can be writable
10500 			 * depending upon the media and the media can change
10501 			 * after the call to open().
10502 			 */
10503 			if (un->un_f_dvdram_writable_device == FALSE) {
10504 				if (ISCD(un) || sr_check_wp(dev)) {
10505 				rval = EROFS;
10506 				mutex_enter(SD_MUTEX(un));
10507 				SD_ERROR(SD_LOG_OPEN_CLOSE, un, "sdopen: "
10508 				    "write to cd or write protected media\n");
10509 				goto open_fail;
10510 				}
10511 			}
10512 			mutex_enter(SD_MUTEX(un));
10513 		}
10514 	}
10515 
10516 	/*
10517 	 * If opening in NDELAY/NONBLOCK mode, just return.
10518 	 * Check if disk is ready and has a valid geometry later.
10519 	 */
10520 	if (!nodelay) {
10521 		mutex_exit(SD_MUTEX(un));
10522 		rval = sd_ready_and_valid(un);
10523 		mutex_enter(SD_MUTEX(un));
10524 		/*
10525 		 * Fail if device is not ready or if the number of disk
10526 		 * blocks is zero or negative for non CD devices.
10527 		 */
10528 		if ((rval != SD_READY_VALID) ||
10529 		    (!ISCD(un) && un->un_map[part].dkl_nblk <= 0)) {
10530 			rval = un->un_f_has_removable_media ? ENXIO : EIO;
10531 			SD_ERROR(SD_LOG_OPEN_CLOSE, un, "sdopen: "
10532 			    "device not ready or invalid disk block value\n");
10533 			goto open_fail;
10534 		}
10535 #if defined(__i386) || defined(__amd64)
10536 	} else {
10537 		uchar_t *cp;
10538 		/*
10539 		 * x86 requires special nodelay handling, so that p0 is
10540 		 * always defined and accessible.
10541 		 * Invalidate geometry only if device is not already open.
10542 		 */
10543 		cp = &un->un_ocmap.chkd[0];
10544 		while (cp < &un->un_ocmap.chkd[OCSIZE]) {
10545 			if (*cp != (uchar_t)0) {
10546 			    break;
10547 			}
10548 			cp++;
10549 		}
10550 		if (cp == &un->un_ocmap.chkd[OCSIZE]) {
10551 			un->un_f_geometry_is_valid = FALSE;
10552 		}
10553 
10554 #endif
10555 	}
10556 
10557 	if (otyp == OTYP_LYR) {
10558 		un->un_ocmap.lyropen[part]++;
10559 	} else {
10560 		un->un_ocmap.regopen[otyp] |= partmask;
10561 	}
10562 
10563 	/* Set up open and exclusive open flags */
10564 	if (flag & FEXCL) {
10565 		un->un_exclopen |= (partmask);
10566 	}
10567 
10568 	SD_TRACE(SD_LOG_OPEN_CLOSE, un, "sdopen: "
10569 	    "open of part %d type %d\n", part, otyp);
10570 
10571 	mutex_exit(SD_MUTEX(un));
10572 	if (!nodelay) {
10573 		sd_pm_exit(un);
10574 	}
10575 
10576 	sema_v(&un->un_semoclose);
10577 
10578 	mutex_enter(&sd_detach_mutex);
10579 	un->un_opens_in_progress--;
10580 	mutex_exit(&sd_detach_mutex);
10581 
10582 	SD_TRACE(SD_LOG_OPEN_CLOSE, un, "sdopen: exit success\n");
10583 	return (DDI_SUCCESS);
10584 
10585 excl_open_fail:
10586 	SD_ERROR(SD_LOG_OPEN_CLOSE, un, "sdopen: fail exclusive open\n");
10587 	rval = EBUSY;
10588 
10589 open_fail:
10590 	mutex_exit(SD_MUTEX(un));
10591 
10592 	/*
10593 	 * On a failed open we must exit the pm management.
10594 	 */
10595 	if (!nodelay) {
10596 		sd_pm_exit(un);
10597 	}
10598 open_failed_with_pm:
10599 	sema_v(&un->un_semoclose);
10600 
10601 	mutex_enter(&sd_detach_mutex);
10602 	un->un_opens_in_progress--;
10603 	if (otyp == OTYP_LYR) {
10604 		un->un_layer_count--;
10605 	}
10606 	mutex_exit(&sd_detach_mutex);
10607 
10608 	return (rval);
10609 }
10610 
10611 
10612 /*
10613  *    Function: sdclose
10614  *
10615  * Description: Driver's close(9e) entry point function.
10616  *
10617  *   Arguments: dev    - device number
10618  *		flag   - file status flag, informational only
10619  *		otyp   - close type (OTYP_BLK, OTYP_CHR, OTYP_LYR)
10620  *		cred_p - user credential pointer
10621  *
10622  * Return Code: ENXIO
10623  *
10624  *     Context: Kernel thread context
10625  */
10626 /* ARGSUSED */
10627 static int
10628 sdclose(dev_t dev, int flag, int otyp, cred_t *cred_p)
10629 {
10630 	struct sd_lun	*un;
10631 	uchar_t		*cp;
10632 	int		part;
10633 	int		nodelay;
10634 	int		rval = 0;
10635 
10636 	/* Validate the open type */
10637 	if (otyp >= OTYPCNT) {
10638 		return (ENXIO);
10639 	}
10640 
10641 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
10642 		return (ENXIO);
10643 	}
10644 
10645 	part = SDPART(dev);
10646 	nodelay = flag & (FNDELAY | FNONBLOCK);
10647 
10648 	SD_TRACE(SD_LOG_OPEN_CLOSE, un,
10649 	    "sdclose: close of part %d type %d\n", part, otyp);
10650 
10651 	/*
10652 	 * We use a semaphore here in order to serialize
10653 	 * open and close requests on the device.
10654 	 */
10655 	sema_p(&un->un_semoclose);
10656 
10657 	mutex_enter(SD_MUTEX(un));
10658 
10659 	/* Don't proceed if power is being changed. */
10660 	while (un->un_state == SD_STATE_PM_CHANGING) {
10661 		cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
10662 	}
10663 
10664 	if (un->un_exclopen & (1 << part)) {
10665 		un->un_exclopen &= ~(1 << part);
10666 	}
10667 
10668 	/* Update the open partition map */
10669 	if (otyp == OTYP_LYR) {
10670 		un->un_ocmap.lyropen[part] -= 1;
10671 	} else {
10672 		un->un_ocmap.regopen[otyp] &= ~(1 << part);
10673 	}
10674 
10675 	cp = &un->un_ocmap.chkd[0];
10676 	while (cp < &un->un_ocmap.chkd[OCSIZE]) {
10677 		if (*cp != NULL) {
10678 			break;
10679 		}
10680 		cp++;
10681 	}
10682 
10683 	if (cp == &un->un_ocmap.chkd[OCSIZE]) {
10684 		SD_TRACE(SD_LOG_OPEN_CLOSE, un, "sdclose: last close\n");
10685 
10686 		/*
10687 		 * We avoid persistance upon the last close, and set
10688 		 * the throttle back to the maximum.
10689 		 */
10690 		un->un_throttle = un->un_saved_throttle;
10691 
10692 		if (un->un_state == SD_STATE_OFFLINE) {
10693 			if (un->un_f_is_fibre == FALSE) {
10694 				scsi_log(SD_DEVINFO(un), sd_label,
10695 					CE_WARN, "offline\n");
10696 			}
10697 			un->un_f_geometry_is_valid = FALSE;
10698 
10699 		} else {
10700 			/*
10701 			 * Flush any outstanding writes in NVRAM cache.
10702 			 * Note: SYNCHRONIZE CACHE is an optional SCSI-2
10703 			 * cmd, it may not work for non-Pluto devices.
10704 			 * SYNCHRONIZE CACHE is not required for removables,
10705 			 * except DVD-RAM drives.
10706 			 *
10707 			 * Also note: because SYNCHRONIZE CACHE is currently
10708 			 * the only command issued here that requires the
10709 			 * drive be powered up, only do the power up before
10710 			 * sending the Sync Cache command. If additional
10711 			 * commands are added which require a powered up
10712 			 * drive, the following sequence may have to change.
10713 			 *
10714 			 * And finally, note that parallel SCSI on SPARC
10715 			 * only issues a Sync Cache to DVD-RAM, a newly
10716 			 * supported device.
10717 			 */
10718 #if defined(__i386) || defined(__amd64)
10719 			if (un->un_f_sync_cache_supported ||
10720 			    un->un_f_dvdram_writable_device == TRUE) {
10721 #else
10722 			if (un->un_f_dvdram_writable_device == TRUE) {
10723 #endif
10724 				mutex_exit(SD_MUTEX(un));
10725 				if (sd_pm_entry(un) == DDI_SUCCESS) {
10726 					rval =
10727 					    sd_send_scsi_SYNCHRONIZE_CACHE(un,
10728 					    NULL);
10729 					/* ignore error if not supported */
10730 					if (rval == ENOTSUP) {
10731 						rval = 0;
10732 					} else if (rval != 0) {
10733 						rval = EIO;
10734 					}
10735 					sd_pm_exit(un);
10736 				} else {
10737 					rval = EIO;
10738 				}
10739 				mutex_enter(SD_MUTEX(un));
10740 			}
10741 
10742 			/*
10743 			 * For devices which supports DOOR_LOCK, send an ALLOW
10744 			 * MEDIA REMOVAL command, but don't get upset if it
10745 			 * fails. We need to raise the power of the drive before
10746 			 * we can call sd_send_scsi_DOORLOCK()
10747 			 */
10748 			if (un->un_f_doorlock_supported) {
10749 				mutex_exit(SD_MUTEX(un));
10750 				if (sd_pm_entry(un) == DDI_SUCCESS) {
10751 					rval = sd_send_scsi_DOORLOCK(un,
10752 					    SD_REMOVAL_ALLOW, SD_PATH_DIRECT);
10753 
10754 					sd_pm_exit(un);
10755 					if (ISCD(un) && (rval != 0) &&
10756 					    (nodelay != 0)) {
10757 						rval = ENXIO;
10758 					}
10759 				} else {
10760 					rval = EIO;
10761 				}
10762 				mutex_enter(SD_MUTEX(un));
10763 			}
10764 
10765 			/*
10766 			 * If a device has removable media, invalidate all
10767 			 * parameters related to media, such as geometry,
10768 			 * blocksize, and blockcount.
10769 			 */
10770 			if (un->un_f_has_removable_media) {
10771 				sr_ejected(un);
10772 			}
10773 
10774 			/*
10775 			 * Destroy the cache (if it exists) which was
10776 			 * allocated for the write maps since this is
10777 			 * the last close for this media.
10778 			 */
10779 			if (un->un_wm_cache) {
10780 				/*
10781 				 * Check if there are pending commands.
10782 				 * and if there are give a warning and
10783 				 * do not destroy the cache.
10784 				 */
10785 				if (un->un_ncmds_in_driver > 0) {
10786 					scsi_log(SD_DEVINFO(un),
10787 					    sd_label, CE_WARN,
10788 					    "Unable to clean up memory "
10789 					    "because of pending I/O\n");
10790 				} else {
10791 					kmem_cache_destroy(
10792 					    un->un_wm_cache);
10793 					un->un_wm_cache = NULL;
10794 				}
10795 			}
10796 		}
10797 	}
10798 
10799 	mutex_exit(SD_MUTEX(un));
10800 	sema_v(&un->un_semoclose);
10801 
10802 	if (otyp == OTYP_LYR) {
10803 		mutex_enter(&sd_detach_mutex);
10804 		/*
10805 		 * The detach routine may run when the layer count
10806 		 * drops to zero.
10807 		 */
10808 		un->un_layer_count--;
10809 		mutex_exit(&sd_detach_mutex);
10810 	}
10811 
10812 	return (rval);
10813 }
10814 
10815 
10816 /*
10817  *    Function: sd_ready_and_valid
10818  *
10819  * Description: Test if device is ready and has a valid geometry.
10820  *
10821  *   Arguments: dev - device number
10822  *		un  - driver soft state (unit) structure
10823  *
10824  * Return Code: SD_READY_VALID		ready and valid label
10825  *		SD_READY_NOT_VALID	ready, geom ops never applicable
10826  *		SD_NOT_READY_VALID	not ready, no label
10827  *
10828  *     Context: Never called at interrupt context.
10829  */
10830 
10831 static int
10832 sd_ready_and_valid(struct sd_lun *un)
10833 {
10834 	struct sd_errstats	*stp;
10835 	uint64_t		capacity;
10836 	uint_t			lbasize;
10837 	int			rval = SD_READY_VALID;
10838 	char			name_str[48];
10839 
10840 	ASSERT(un != NULL);
10841 	ASSERT(!mutex_owned(SD_MUTEX(un)));
10842 
10843 	mutex_enter(SD_MUTEX(un));
10844 	/*
10845 	 * If a device has removable media, we must check if media is
10846 	 * ready when checking if this device is ready and valid.
10847 	 */
10848 	if (un->un_f_has_removable_media) {
10849 		mutex_exit(SD_MUTEX(un));
10850 		if (sd_send_scsi_TEST_UNIT_READY(un, 0) != 0) {
10851 			rval = SD_NOT_READY_VALID;
10852 			mutex_enter(SD_MUTEX(un));
10853 			goto done;
10854 		}
10855 
10856 		mutex_enter(SD_MUTEX(un));
10857 		if ((un->un_f_geometry_is_valid == FALSE) ||
10858 		    (un->un_f_blockcount_is_valid == FALSE) ||
10859 		    (un->un_f_tgt_blocksize_is_valid == FALSE)) {
10860 
10861 			/* capacity has to be read every open. */
10862 			mutex_exit(SD_MUTEX(un));
10863 			if (sd_send_scsi_READ_CAPACITY(un, &capacity,
10864 			    &lbasize, SD_PATH_DIRECT) != 0) {
10865 				mutex_enter(SD_MUTEX(un));
10866 				un->un_f_geometry_is_valid = FALSE;
10867 				rval = SD_NOT_READY_VALID;
10868 				goto done;
10869 			} else {
10870 				mutex_enter(SD_MUTEX(un));
10871 				sd_update_block_info(un, lbasize, capacity);
10872 			}
10873 		}
10874 
10875 		/*
10876 		 * Check if the media in the device is writable or not.
10877 		 */
10878 		if ((un->un_f_geometry_is_valid == FALSE) && ISCD(un)) {
10879 			sd_check_for_writable_cd(un);
10880 		}
10881 
10882 	} else {
10883 		/*
10884 		 * Do a test unit ready to clear any unit attention from non-cd
10885 		 * devices.
10886 		 */
10887 		mutex_exit(SD_MUTEX(un));
10888 		(void) sd_send_scsi_TEST_UNIT_READY(un, 0);
10889 		mutex_enter(SD_MUTEX(un));
10890 	}
10891 
10892 
10893 	/*
10894 	 * If this is a non 512 block device, allocate space for
10895 	 * the wmap cache. This is being done here since every time
10896 	 * a media is changed this routine will be called and the
10897 	 * block size is a function of media rather than device.
10898 	 */
10899 	if (un->un_f_non_devbsize_supported && NOT_DEVBSIZE(un)) {
10900 		if (!(un->un_wm_cache)) {
10901 			(void) snprintf(name_str, sizeof (name_str),
10902 			    "%s%d_cache",
10903 			    ddi_driver_name(SD_DEVINFO(un)),
10904 			    ddi_get_instance(SD_DEVINFO(un)));
10905 			un->un_wm_cache = kmem_cache_create(
10906 			    name_str, sizeof (struct sd_w_map),
10907 			    8, sd_wm_cache_constructor,
10908 			    sd_wm_cache_destructor, NULL,
10909 			    (void *)un, NULL, 0);
10910 			if (!(un->un_wm_cache)) {
10911 					rval = ENOMEM;
10912 					goto done;
10913 			}
10914 		}
10915 	}
10916 
10917 	if (un->un_state == SD_STATE_NORMAL) {
10918 		/*
10919 		 * If the target is not yet ready here (defined by a TUR
10920 		 * failure), invalidate the geometry and print an 'offline'
10921 		 * message. This is a legacy message, as the state of the
10922 		 * target is not actually changed to SD_STATE_OFFLINE.
10923 		 *
10924 		 * If the TUR fails for EACCES (Reservation Conflict), it
10925 		 * means there actually is nothing wrong with the target that
10926 		 * would require invalidating the geometry, so continue in
10927 		 * that case as if the TUR was successful.
10928 		 */
10929 		int err;
10930 
10931 		mutex_exit(SD_MUTEX(un));
10932 		err = sd_send_scsi_TEST_UNIT_READY(un, 0);
10933 		mutex_enter(SD_MUTEX(un));
10934 
10935 		if ((err != 0) && (err != EACCES)) {
10936 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
10937 			    "offline\n");
10938 			un->un_f_geometry_is_valid = FALSE;
10939 			rval = SD_NOT_READY_VALID;
10940 			goto done;
10941 		}
10942 	}
10943 
10944 	if (un->un_f_format_in_progress == FALSE) {
10945 		/*
10946 		 * Note: sd_validate_geometry may return TRUE, but that does
10947 		 * not necessarily mean un_f_geometry_is_valid == TRUE!
10948 		 */
10949 		rval = sd_validate_geometry(un, SD_PATH_DIRECT);
10950 		if (rval == ENOTSUP) {
10951 			if (un->un_f_geometry_is_valid == TRUE)
10952 				rval = 0;
10953 			else {
10954 				rval = SD_READY_NOT_VALID;
10955 				goto done;
10956 			}
10957 		}
10958 		if (rval != 0) {
10959 			/*
10960 			 * We don't check the validity of geometry for
10961 			 * CDROMs. Also we assume we have a good label
10962 			 * even if sd_validate_geometry returned ENOMEM.
10963 			 */
10964 			if (!ISCD(un) && rval != ENOMEM) {
10965 				rval = SD_NOT_READY_VALID;
10966 				goto done;
10967 			}
10968 		}
10969 	}
10970 
10971 	/*
10972 	 * If this device supports DOOR_LOCK command, try and send
10973 	 * this command to PREVENT MEDIA REMOVAL, but don't get upset
10974 	 * if it fails. For a CD, however, it is an error
10975 	 */
10976 	if (un->un_f_doorlock_supported) {
10977 		mutex_exit(SD_MUTEX(un));
10978 		if ((sd_send_scsi_DOORLOCK(un, SD_REMOVAL_PREVENT,
10979 		    SD_PATH_DIRECT) != 0) && ISCD(un)) {
10980 			rval = SD_NOT_READY_VALID;
10981 			mutex_enter(SD_MUTEX(un));
10982 			goto done;
10983 		}
10984 		mutex_enter(SD_MUTEX(un));
10985 	}
10986 
10987 	/* The state has changed, inform the media watch routines */
10988 	un->un_mediastate = DKIO_INSERTED;
10989 	cv_broadcast(&un->un_state_cv);
10990 	rval = SD_READY_VALID;
10991 
10992 done:
10993 
10994 	/*
10995 	 * Initialize the capacity kstat value, if no media previously
10996 	 * (capacity kstat is 0) and a media has been inserted
10997 	 * (un_blockcount > 0).
10998 	 */
10999 	if (un->un_errstats != NULL) {
11000 		stp = (struct sd_errstats *)un->un_errstats->ks_data;
11001 		if ((stp->sd_capacity.value.ui64 == 0) &&
11002 		    (un->un_f_blockcount_is_valid == TRUE)) {
11003 			stp->sd_capacity.value.ui64 =
11004 			    (uint64_t)((uint64_t)un->un_blockcount *
11005 			    un->un_sys_blocksize);
11006 		}
11007 	}
11008 
11009 	mutex_exit(SD_MUTEX(un));
11010 	return (rval);
11011 }
11012 
11013 
11014 /*
11015  *    Function: sdmin
11016  *
11017  * Description: Routine to limit the size of a data transfer. Used in
11018  *		conjunction with physio(9F).
11019  *
11020  *   Arguments: bp - pointer to the indicated buf(9S) struct.
11021  *
11022  *     Context: Kernel thread context.
11023  */
11024 
11025 static void
11026 sdmin(struct buf *bp)
11027 {
11028 	struct sd_lun	*un;
11029 	int		instance;
11030 
11031 	instance = SDUNIT(bp->b_edev);
11032 
11033 	un = ddi_get_soft_state(sd_state, instance);
11034 	ASSERT(un != NULL);
11035 
11036 	if (bp->b_bcount > un->un_max_xfer_size) {
11037 		bp->b_bcount = un->un_max_xfer_size;
11038 	}
11039 }
11040 
11041 
11042 /*
11043  *    Function: sdread
11044  *
11045  * Description: Driver's read(9e) entry point function.
11046  *
11047  *   Arguments: dev   - device number
11048  *		uio   - structure pointer describing where data is to be stored
11049  *			in user's space
11050  *		cred_p  - user credential pointer
11051  *
11052  * Return Code: ENXIO
11053  *		EIO
11054  *		EINVAL
11055  *		value returned by physio
11056  *
11057  *     Context: Kernel thread context.
11058  */
11059 /* ARGSUSED */
11060 static int
11061 sdread(dev_t dev, struct uio *uio, cred_t *cred_p)
11062 {
11063 	struct sd_lun	*un = NULL;
11064 	int		secmask;
11065 	int		err;
11066 
11067 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
11068 		return (ENXIO);
11069 	}
11070 
11071 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11072 
11073 	if ((un->un_f_geometry_is_valid == FALSE) && !ISCD(un)) {
11074 		mutex_enter(SD_MUTEX(un));
11075 		/*
11076 		 * Because the call to sd_ready_and_valid will issue I/O we
11077 		 * must wait here if either the device is suspended or
11078 		 * if it's power level is changing.
11079 		 */
11080 		while ((un->un_state == SD_STATE_SUSPENDED) ||
11081 		    (un->un_state == SD_STATE_PM_CHANGING)) {
11082 			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
11083 		}
11084 		un->un_ncmds_in_driver++;
11085 		mutex_exit(SD_MUTEX(un));
11086 		if ((sd_ready_and_valid(un)) != SD_READY_VALID) {
11087 			mutex_enter(SD_MUTEX(un));
11088 			un->un_ncmds_in_driver--;
11089 			ASSERT(un->un_ncmds_in_driver >= 0);
11090 			mutex_exit(SD_MUTEX(un));
11091 			return (EIO);
11092 		}
11093 		mutex_enter(SD_MUTEX(un));
11094 		un->un_ncmds_in_driver--;
11095 		ASSERT(un->un_ncmds_in_driver >= 0);
11096 		mutex_exit(SD_MUTEX(un));
11097 	}
11098 
11099 	/*
11100 	 * Read requests are restricted to multiples of the system block size.
11101 	 */
11102 	secmask = un->un_sys_blocksize - 1;
11103 
11104 	if (uio->uio_loffset & ((offset_t)(secmask))) {
11105 		SD_ERROR(SD_LOG_READ_WRITE, un,
11106 		    "sdread: file offset not modulo %d\n",
11107 		    un->un_sys_blocksize);
11108 		err = EINVAL;
11109 	} else if (uio->uio_iov->iov_len & (secmask)) {
11110 		SD_ERROR(SD_LOG_READ_WRITE, un,
11111 		    "sdread: transfer length not modulo %d\n",
11112 		    un->un_sys_blocksize);
11113 		err = EINVAL;
11114 	} else {
11115 		err = physio(sdstrategy, NULL, dev, B_READ, sdmin, uio);
11116 	}
11117 	return (err);
11118 }
11119 
11120 
11121 /*
11122  *    Function: sdwrite
11123  *
11124  * Description: Driver's write(9e) entry point function.
11125  *
11126  *   Arguments: dev   - device number
11127  *		uio   - structure pointer describing where data is stored in
11128  *			user's space
11129  *		cred_p  - user credential pointer
11130  *
11131  * Return Code: ENXIO
11132  *		EIO
11133  *		EINVAL
11134  *		value returned by physio
11135  *
11136  *     Context: Kernel thread context.
11137  */
11138 /* ARGSUSED */
11139 static int
11140 sdwrite(dev_t dev, struct uio *uio, cred_t *cred_p)
11141 {
11142 	struct sd_lun	*un = NULL;
11143 	int		secmask;
11144 	int		err;
11145 
11146 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
11147 		return (ENXIO);
11148 	}
11149 
11150 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11151 
11152 	if ((un->un_f_geometry_is_valid == FALSE) && !ISCD(un)) {
11153 		mutex_enter(SD_MUTEX(un));
11154 		/*
11155 		 * Because the call to sd_ready_and_valid will issue I/O we
11156 		 * must wait here if either the device is suspended or
11157 		 * if it's power level is changing.
11158 		 */
11159 		while ((un->un_state == SD_STATE_SUSPENDED) ||
11160 		    (un->un_state == SD_STATE_PM_CHANGING)) {
11161 			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
11162 		}
11163 		un->un_ncmds_in_driver++;
11164 		mutex_exit(SD_MUTEX(un));
11165 		if ((sd_ready_and_valid(un)) != SD_READY_VALID) {
11166 			mutex_enter(SD_MUTEX(un));
11167 			un->un_ncmds_in_driver--;
11168 			ASSERT(un->un_ncmds_in_driver >= 0);
11169 			mutex_exit(SD_MUTEX(un));
11170 			return (EIO);
11171 		}
11172 		mutex_enter(SD_MUTEX(un));
11173 		un->un_ncmds_in_driver--;
11174 		ASSERT(un->un_ncmds_in_driver >= 0);
11175 		mutex_exit(SD_MUTEX(un));
11176 	}
11177 
11178 	/*
11179 	 * Write requests are restricted to multiples of the system block size.
11180 	 */
11181 	secmask = un->un_sys_blocksize - 1;
11182 
11183 	if (uio->uio_loffset & ((offset_t)(secmask))) {
11184 		SD_ERROR(SD_LOG_READ_WRITE, un,
11185 		    "sdwrite: file offset not modulo %d\n",
11186 		    un->un_sys_blocksize);
11187 		err = EINVAL;
11188 	} else if (uio->uio_iov->iov_len & (secmask)) {
11189 		SD_ERROR(SD_LOG_READ_WRITE, un,
11190 		    "sdwrite: transfer length not modulo %d\n",
11191 		    un->un_sys_blocksize);
11192 		err = EINVAL;
11193 	} else {
11194 		err = physio(sdstrategy, NULL, dev, B_WRITE, sdmin, uio);
11195 	}
11196 	return (err);
11197 }
11198 
11199 
11200 /*
11201  *    Function: sdaread
11202  *
11203  * Description: Driver's aread(9e) entry point function.
11204  *
11205  *   Arguments: dev   - device number
11206  *		aio   - structure pointer describing where data is to be stored
11207  *		cred_p  - user credential pointer
11208  *
11209  * Return Code: ENXIO
11210  *		EIO
11211  *		EINVAL
11212  *		value returned by aphysio
11213  *
11214  *     Context: Kernel thread context.
11215  */
11216 /* ARGSUSED */
11217 static int
11218 sdaread(dev_t dev, struct aio_req *aio, cred_t *cred_p)
11219 {
11220 	struct sd_lun	*un = NULL;
11221 	struct uio	*uio = aio->aio_uio;
11222 	int		secmask;
11223 	int		err;
11224 
11225 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
11226 		return (ENXIO);
11227 	}
11228 
11229 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11230 
11231 	if ((un->un_f_geometry_is_valid == FALSE) && !ISCD(un)) {
11232 		mutex_enter(SD_MUTEX(un));
11233 		/*
11234 		 * Because the call to sd_ready_and_valid will issue I/O we
11235 		 * must wait here if either the device is suspended or
11236 		 * if it's power level is changing.
11237 		 */
11238 		while ((un->un_state == SD_STATE_SUSPENDED) ||
11239 		    (un->un_state == SD_STATE_PM_CHANGING)) {
11240 			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
11241 		}
11242 		un->un_ncmds_in_driver++;
11243 		mutex_exit(SD_MUTEX(un));
11244 		if ((sd_ready_and_valid(un)) != SD_READY_VALID) {
11245 			mutex_enter(SD_MUTEX(un));
11246 			un->un_ncmds_in_driver--;
11247 			ASSERT(un->un_ncmds_in_driver >= 0);
11248 			mutex_exit(SD_MUTEX(un));
11249 			return (EIO);
11250 		}
11251 		mutex_enter(SD_MUTEX(un));
11252 		un->un_ncmds_in_driver--;
11253 		ASSERT(un->un_ncmds_in_driver >= 0);
11254 		mutex_exit(SD_MUTEX(un));
11255 	}
11256 
11257 	/*
11258 	 * Read requests are restricted to multiples of the system block size.
11259 	 */
11260 	secmask = un->un_sys_blocksize - 1;
11261 
11262 	if (uio->uio_loffset & ((offset_t)(secmask))) {
11263 		SD_ERROR(SD_LOG_READ_WRITE, un,
11264 		    "sdaread: file offset not modulo %d\n",
11265 		    un->un_sys_blocksize);
11266 		err = EINVAL;
11267 	} else if (uio->uio_iov->iov_len & (secmask)) {
11268 		SD_ERROR(SD_LOG_READ_WRITE, un,
11269 		    "sdaread: transfer length not modulo %d\n",
11270 		    un->un_sys_blocksize);
11271 		err = EINVAL;
11272 	} else {
11273 		err = aphysio(sdstrategy, anocancel, dev, B_READ, sdmin, aio);
11274 	}
11275 	return (err);
11276 }
11277 
11278 
11279 /*
11280  *    Function: sdawrite
11281  *
11282  * Description: Driver's awrite(9e) entry point function.
11283  *
11284  *   Arguments: dev   - device number
11285  *		aio   - structure pointer describing where data is stored
11286  *		cred_p  - user credential pointer
11287  *
11288  * Return Code: ENXIO
11289  *		EIO
11290  *		EINVAL
11291  *		value returned by aphysio
11292  *
11293  *     Context: Kernel thread context.
11294  */
11295 /* ARGSUSED */
11296 static int
11297 sdawrite(dev_t dev, struct aio_req *aio, cred_t *cred_p)
11298 {
11299 	struct sd_lun	*un = NULL;
11300 	struct uio	*uio = aio->aio_uio;
11301 	int		secmask;
11302 	int		err;
11303 
11304 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
11305 		return (ENXIO);
11306 	}
11307 
11308 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11309 
11310 	if ((un->un_f_geometry_is_valid == FALSE) && !ISCD(un)) {
11311 		mutex_enter(SD_MUTEX(un));
11312 		/*
11313 		 * Because the call to sd_ready_and_valid will issue I/O we
11314 		 * must wait here if either the device is suspended or
11315 		 * if it's power level is changing.
11316 		 */
11317 		while ((un->un_state == SD_STATE_SUSPENDED) ||
11318 		    (un->un_state == SD_STATE_PM_CHANGING)) {
11319 			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
11320 		}
11321 		un->un_ncmds_in_driver++;
11322 		mutex_exit(SD_MUTEX(un));
11323 		if ((sd_ready_and_valid(un)) != SD_READY_VALID) {
11324 			mutex_enter(SD_MUTEX(un));
11325 			un->un_ncmds_in_driver--;
11326 			ASSERT(un->un_ncmds_in_driver >= 0);
11327 			mutex_exit(SD_MUTEX(un));
11328 			return (EIO);
11329 		}
11330 		mutex_enter(SD_MUTEX(un));
11331 		un->un_ncmds_in_driver--;
11332 		ASSERT(un->un_ncmds_in_driver >= 0);
11333 		mutex_exit(SD_MUTEX(un));
11334 	}
11335 
11336 	/*
11337 	 * Write requests are restricted to multiples of the system block size.
11338 	 */
11339 	secmask = un->un_sys_blocksize - 1;
11340 
11341 	if (uio->uio_loffset & ((offset_t)(secmask))) {
11342 		SD_ERROR(SD_LOG_READ_WRITE, un,
11343 		    "sdawrite: file offset not modulo %d\n",
11344 		    un->un_sys_blocksize);
11345 		err = EINVAL;
11346 	} else if (uio->uio_iov->iov_len & (secmask)) {
11347 		SD_ERROR(SD_LOG_READ_WRITE, un,
11348 		    "sdawrite: transfer length not modulo %d\n",
11349 		    un->un_sys_blocksize);
11350 		err = EINVAL;
11351 	} else {
11352 		err = aphysio(sdstrategy, anocancel, dev, B_WRITE, sdmin, aio);
11353 	}
11354 	return (err);
11355 }
11356 
11357 
11358 
11359 
11360 
11361 /*
11362  * Driver IO processing follows the following sequence:
11363  *
11364  *     sdioctl(9E)     sdstrategy(9E)         biodone(9F)
11365  *         |                |                     ^
11366  *         v                v                     |
11367  * sd_send_scsi_cmd()  ddi_xbuf_qstrategy()       +-------------------+
11368  *         |                |                     |                   |
11369  *         v                |                     |                   |
11370  * sd_uscsi_strategy() sd_xbuf_strategy()   sd_buf_iodone()   sd_uscsi_iodone()
11371  *         |                |                     ^                   ^
11372  *         v                v                     |                   |
11373  * SD_BEGIN_IOSTART()  SD_BEGIN_IOSTART()         |                   |
11374  *         |                |                     |                   |
11375  *     +---+                |                     +------------+      +-------+
11376  *     |                    |                                  |              |
11377  *     |   SD_NEXT_IOSTART()|                  SD_NEXT_IODONE()|              |
11378  *     |                    v                                  |              |
11379  *     |         sd_mapblockaddr_iostart()           sd_mapblockaddr_iodone() |
11380  *     |                    |                                  ^              |
11381  *     |   SD_NEXT_IOSTART()|                  SD_NEXT_IODONE()|              |
11382  *     |                    v                                  |              |
11383  *     |         sd_mapblocksize_iostart()           sd_mapblocksize_iodone() |
11384  *     |                    |                                  ^              |
11385  *     |   SD_NEXT_IOSTART()|                  SD_NEXT_IODONE()|              |
11386  *     |                    v                                  |              |
11387  *     |           sd_checksum_iostart()               sd_checksum_iodone()   |
11388  *     |                    |                                  ^              |
11389  *     +-> SD_NEXT_IOSTART()|                  SD_NEXT_IODONE()+------------->+
11390  *     |                    v                                  |              |
11391  *     |              sd_pm_iostart()                     sd_pm_iodone()      |
11392  *     |                    |                                  ^              |
11393  *     |                    |                                  |              |
11394  *     +-> SD_NEXT_IOSTART()|               SD_BEGIN_IODONE()--+--------------+
11395  *                          |                           ^
11396  *                          v                           |
11397  *                   sd_core_iostart()                  |
11398  *                          |                           |
11399  *                          |                           +------>(*destroypkt)()
11400  *                          +-> sd_start_cmds() <-+     |           |
11401  *                          |                     |     |           v
11402  *                          |                     |     |  scsi_destroy_pkt(9F)
11403  *                          |                     |     |
11404  *                          +->(*initpkt)()       +- sdintr()
11405  *                          |  |                        |  |
11406  *                          |  +-> scsi_init_pkt(9F)    |  +-> sd_handle_xxx()
11407  *                          |  +-> scsi_setup_cdb(9F)   |
11408  *                          |                           |
11409  *                          +--> scsi_transport(9F)     |
11410  *                                     |                |
11411  *                                     +----> SCSA ---->+
11412  *
11413  *
11414  * This code is based upon the following presumtions:
11415  *
11416  *   - iostart and iodone functions operate on buf(9S) structures. These
11417  *     functions perform the necessary operations on the buf(9S) and pass
11418  *     them along to the next function in the chain by using the macros
11419  *     SD_NEXT_IOSTART() (for iostart side functions) and SD_NEXT_IODONE()
11420  *     (for iodone side functions).
11421  *
11422  *   - The iostart side functions may sleep. The iodone side functions
11423  *     are called under interrupt context and may NOT sleep. Therefore
11424  *     iodone side functions also may not call iostart side functions.
11425  *     (NOTE: iostart side functions should NOT sleep for memory, as
11426  *     this could result in deadlock.)
11427  *
11428  *   - An iostart side function may call its corresponding iodone side
11429  *     function directly (if necessary).
11430  *
11431  *   - In the event of an error, an iostart side function can return a buf(9S)
11432  *     to its caller by calling SD_BEGIN_IODONE() (after setting B_ERROR and
11433  *     b_error in the usual way of course).
11434  *
11435  *   - The taskq mechanism may be used by the iodone side functions to dispatch
11436  *     requests to the iostart side functions.  The iostart side functions in
11437  *     this case would be called under the context of a taskq thread, so it's
11438  *     OK for them to block/sleep/spin in this case.
11439  *
11440  *   - iostart side functions may allocate "shadow" buf(9S) structs and
11441  *     pass them along to the next function in the chain.  The corresponding
11442  *     iodone side functions must coalesce the "shadow" bufs and return
11443  *     the "original" buf to the next higher layer.
11444  *
11445  *   - The b_private field of the buf(9S) struct holds a pointer to
11446  *     an sd_xbuf struct, which contains information needed to
11447  *     construct the scsi_pkt for the command.
11448  *
11449  *   - The SD_MUTEX(un) is NOT held across calls to the next layer. Each
11450  *     layer must acquire & release the SD_MUTEX(un) as needed.
11451  */
11452 
11453 
11454 /*
11455  * Create taskq for all targets in the system. This is created at
11456  * _init(9E) and destroyed at _fini(9E).
11457  *
11458  * Note: here we set the minalloc to a reasonably high number to ensure that
11459  * we will have an adequate supply of task entries available at interrupt time.
11460  * This is used in conjunction with the TASKQ_PREPOPULATE flag in
11461  * sd_create_taskq().  Since we do not want to sleep for allocations at
11462  * interrupt time, set maxalloc equal to minalloc. That way we will just fail
11463  * the command if we ever try to dispatch more than SD_TASKQ_MAXALLOC taskq
11464  * requests any one instant in time.
11465  */
11466 #define	SD_TASKQ_NUMTHREADS	8
11467 #define	SD_TASKQ_MINALLOC	256
11468 #define	SD_TASKQ_MAXALLOC	256
11469 
11470 static taskq_t	*sd_tq = NULL;
11471 _NOTE(SCHEME_PROTECTS_DATA("stable data", sd_tq))
11472 
11473 static int	sd_taskq_minalloc = SD_TASKQ_MINALLOC;
11474 static int	sd_taskq_maxalloc = SD_TASKQ_MAXALLOC;
11475 
11476 /*
11477  * The following task queue is being created for the write part of
11478  * read-modify-write of non-512 block size devices.
11479  * Limit the number of threads to 1 for now. This number has been choosen
11480  * considering the fact that it applies only to dvd ram drives/MO drives
11481  * currently. Performance for which is not main criteria at this stage.
11482  * Note: It needs to be explored if we can use a single taskq in future
11483  */
11484 #define	SD_WMR_TASKQ_NUMTHREADS	1
11485 static taskq_t	*sd_wmr_tq = NULL;
11486 _NOTE(SCHEME_PROTECTS_DATA("stable data", sd_wmr_tq))
11487 
11488 /*
11489  *    Function: sd_taskq_create
11490  *
11491  * Description: Create taskq thread(s) and preallocate task entries
11492  *
11493  * Return Code: Returns a pointer to the allocated taskq_t.
11494  *
11495  *     Context: Can sleep. Requires blockable context.
11496  *
11497  *       Notes: - The taskq() facility currently is NOT part of the DDI.
11498  *		  (definitely NOT recommeded for 3rd-party drivers!) :-)
11499  *		- taskq_create() will block for memory, also it will panic
11500  *		  if it cannot create the requested number of threads.
11501  *		- Currently taskq_create() creates threads that cannot be
11502  *		  swapped.
11503  *		- We use TASKQ_PREPOPULATE to ensure we have an adequate
11504  *		  supply of taskq entries at interrupt time (ie, so that we
11505  *		  do not have to sleep for memory)
11506  */
11507 
11508 static void
11509 sd_taskq_create(void)
11510 {
11511 	char	taskq_name[TASKQ_NAMELEN];
11512 
11513 	ASSERT(sd_tq == NULL);
11514 	ASSERT(sd_wmr_tq == NULL);
11515 
11516 	(void) snprintf(taskq_name, sizeof (taskq_name),
11517 	    "%s_drv_taskq", sd_label);
11518 	sd_tq = (taskq_create(taskq_name, SD_TASKQ_NUMTHREADS,
11519 	    (v.v_maxsyspri - 2), sd_taskq_minalloc, sd_taskq_maxalloc,
11520 	    TASKQ_PREPOPULATE));
11521 
11522 	(void) snprintf(taskq_name, sizeof (taskq_name),
11523 	    "%s_rmw_taskq", sd_label);
11524 	sd_wmr_tq = (taskq_create(taskq_name, SD_WMR_TASKQ_NUMTHREADS,
11525 	    (v.v_maxsyspri - 2), sd_taskq_minalloc, sd_taskq_maxalloc,
11526 	    TASKQ_PREPOPULATE));
11527 }
11528 
11529 
11530 /*
11531  *    Function: sd_taskq_delete
11532  *
11533  * Description: Complementary cleanup routine for sd_taskq_create().
11534  *
11535  *     Context: Kernel thread context.
11536  */
11537 
11538 static void
11539 sd_taskq_delete(void)
11540 {
11541 	ASSERT(sd_tq != NULL);
11542 	ASSERT(sd_wmr_tq != NULL);
11543 	taskq_destroy(sd_tq);
11544 	taskq_destroy(sd_wmr_tq);
11545 	sd_tq = NULL;
11546 	sd_wmr_tq = NULL;
11547 }
11548 
11549 
11550 /*
11551  *    Function: sdstrategy
11552  *
11553  * Description: Driver's strategy (9E) entry point function.
11554  *
11555  *   Arguments: bp - pointer to buf(9S)
11556  *
11557  * Return Code: Always returns zero
11558  *
11559  *     Context: Kernel thread context.
11560  */
11561 
11562 static int
11563 sdstrategy(struct buf *bp)
11564 {
11565 	struct sd_lun *un;
11566 
11567 	un = ddi_get_soft_state(sd_state, SD_GET_INSTANCE_FROM_BUF(bp));
11568 	if (un == NULL) {
11569 		bioerror(bp, EIO);
11570 		bp->b_resid = bp->b_bcount;
11571 		biodone(bp);
11572 		return (0);
11573 	}
11574 	/* As was done in the past, fail new cmds. if state is dumping. */
11575 	if (un->un_state == SD_STATE_DUMPING) {
11576 		bioerror(bp, ENXIO);
11577 		bp->b_resid = bp->b_bcount;
11578 		biodone(bp);
11579 		return (0);
11580 	}
11581 
11582 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11583 
11584 	/*
11585 	 * Commands may sneak in while we released the mutex in
11586 	 * DDI_SUSPEND, we should block new commands. However, old
11587 	 * commands that are still in the driver at this point should
11588 	 * still be allowed to drain.
11589 	 */
11590 	mutex_enter(SD_MUTEX(un));
11591 	/*
11592 	 * Must wait here if either the device is suspended or
11593 	 * if it's power level is changing.
11594 	 */
11595 	while ((un->un_state == SD_STATE_SUSPENDED) ||
11596 	    (un->un_state == SD_STATE_PM_CHANGING)) {
11597 		cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
11598 	}
11599 
11600 	un->un_ncmds_in_driver++;
11601 
11602 	/*
11603 	 * atapi: Since we are running the CD for now in PIO mode we need to
11604 	 * call bp_mapin here to avoid bp_mapin called interrupt context under
11605 	 * the HBA's init_pkt routine.
11606 	 */
11607 	if (un->un_f_cfg_is_atapi == TRUE) {
11608 		mutex_exit(SD_MUTEX(un));
11609 		bp_mapin(bp);
11610 		mutex_enter(SD_MUTEX(un));
11611 	}
11612 	SD_INFO(SD_LOG_IO, un, "sdstrategy: un_ncmds_in_driver = %ld\n",
11613 	    un->un_ncmds_in_driver);
11614 
11615 	mutex_exit(SD_MUTEX(un));
11616 
11617 	/*
11618 	 * This will (eventually) allocate the sd_xbuf area and
11619 	 * call sd_xbuf_strategy().  We just want to return the
11620 	 * result of ddi_xbuf_qstrategy so that we have an opt-
11621 	 * imized tail call which saves us a stack frame.
11622 	 */
11623 	return (ddi_xbuf_qstrategy(bp, un->un_xbuf_attr));
11624 }
11625 
11626 
11627 /*
11628  *    Function: sd_xbuf_strategy
11629  *
11630  * Description: Function for initiating IO operations via the
11631  *		ddi_xbuf_qstrategy() mechanism.
11632  *
11633  *     Context: Kernel thread context.
11634  */
11635 
11636 static void
11637 sd_xbuf_strategy(struct buf *bp, ddi_xbuf_t xp, void *arg)
11638 {
11639 	struct sd_lun *un = arg;
11640 
11641 	ASSERT(bp != NULL);
11642 	ASSERT(xp != NULL);
11643 	ASSERT(un != NULL);
11644 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11645 
11646 	/*
11647 	 * Initialize the fields in the xbuf and save a pointer to the
11648 	 * xbuf in bp->b_private.
11649 	 */
11650 	sd_xbuf_init(un, bp, xp, SD_CHAIN_BUFIO, NULL);
11651 
11652 	/* Send the buf down the iostart chain */
11653 	SD_BEGIN_IOSTART(((struct sd_xbuf *)xp)->xb_chain_iostart, un, bp);
11654 }
11655 
11656 
11657 /*
11658  *    Function: sd_xbuf_init
11659  *
11660  * Description: Prepare the given sd_xbuf struct for use.
11661  *
11662  *   Arguments: un - ptr to softstate
11663  *		bp - ptr to associated buf(9S)
11664  *		xp - ptr to associated sd_xbuf
11665  *		chain_type - IO chain type to use:
11666  *			SD_CHAIN_NULL
11667  *			SD_CHAIN_BUFIO
11668  *			SD_CHAIN_USCSI
11669  *			SD_CHAIN_DIRECT
11670  *			SD_CHAIN_DIRECT_PRIORITY
11671  *		pktinfop - ptr to private data struct for scsi_pkt(9S)
11672  *			initialization; may be NULL if none.
11673  *
11674  *     Context: Kernel thread context
11675  */
11676 
11677 static void
11678 sd_xbuf_init(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
11679 	uchar_t chain_type, void *pktinfop)
11680 {
11681 	int index;
11682 
11683 	ASSERT(un != NULL);
11684 	ASSERT(bp != NULL);
11685 	ASSERT(xp != NULL);
11686 
11687 	SD_INFO(SD_LOG_IO, un, "sd_xbuf_init: buf:0x%p chain type:0x%x\n",
11688 	    bp, chain_type);
11689 
11690 	xp->xb_un	= un;
11691 	xp->xb_pktp	= NULL;
11692 	xp->xb_pktinfo	= pktinfop;
11693 	xp->xb_private	= bp->b_private;
11694 	xp->xb_blkno	= (daddr_t)bp->b_blkno;
11695 
11696 	/*
11697 	 * Set up the iostart and iodone chain indexes in the xbuf, based
11698 	 * upon the specified chain type to use.
11699 	 */
11700 	switch (chain_type) {
11701 	case SD_CHAIN_NULL:
11702 		/*
11703 		 * Fall thru to just use the values for the buf type, even
11704 		 * tho for the NULL chain these values will never be used.
11705 		 */
11706 		/* FALLTHRU */
11707 	case SD_CHAIN_BUFIO:
11708 		index = un->un_buf_chain_type;
11709 		break;
11710 	case SD_CHAIN_USCSI:
11711 		index = un->un_uscsi_chain_type;
11712 		break;
11713 	case SD_CHAIN_DIRECT:
11714 		index = un->un_direct_chain_type;
11715 		break;
11716 	case SD_CHAIN_DIRECT_PRIORITY:
11717 		index = un->un_priority_chain_type;
11718 		break;
11719 	default:
11720 		/* We're really broken if we ever get here... */
11721 		panic("sd_xbuf_init: illegal chain type!");
11722 		/*NOTREACHED*/
11723 	}
11724 
11725 	xp->xb_chain_iostart = sd_chain_index_map[index].sci_iostart_index;
11726 	xp->xb_chain_iodone = sd_chain_index_map[index].sci_iodone_index;
11727 
11728 	/*
11729 	 * It might be a bit easier to simply bzero the entire xbuf above,
11730 	 * but it turns out that since we init a fair number of members anyway,
11731 	 * we save a fair number cycles by doing explicit assignment of zero.
11732 	 */
11733 	xp->xb_pkt_flags	= 0;
11734 	xp->xb_dma_resid	= 0;
11735 	xp->xb_retry_count	= 0;
11736 	xp->xb_victim_retry_count = 0;
11737 	xp->xb_ua_retry_count	= 0;
11738 	xp->xb_sense_bp		= NULL;
11739 	xp->xb_sense_status	= 0;
11740 	xp->xb_sense_state	= 0;
11741 	xp->xb_sense_resid	= 0;
11742 
11743 	bp->b_private	= xp;
11744 	bp->b_flags	&= ~(B_DONE | B_ERROR);
11745 	bp->b_resid	= 0;
11746 	bp->av_forw	= NULL;
11747 	bp->av_back	= NULL;
11748 	bioerror(bp, 0);
11749 
11750 	SD_INFO(SD_LOG_IO, un, "sd_xbuf_init: done.\n");
11751 }
11752 
11753 
11754 /*
11755  *    Function: sd_uscsi_strategy
11756  *
11757  * Description: Wrapper for calling into the USCSI chain via physio(9F)
11758  *
11759  *   Arguments: bp - buf struct ptr
11760  *
11761  * Return Code: Always returns 0
11762  *
11763  *     Context: Kernel thread context
11764  */
11765 
11766 static int
11767 sd_uscsi_strategy(struct buf *bp)
11768 {
11769 	struct sd_lun		*un;
11770 	struct sd_uscsi_info	*uip;
11771 	struct sd_xbuf		*xp;
11772 	uchar_t			chain_type;
11773 
11774 	ASSERT(bp != NULL);
11775 
11776 	un = ddi_get_soft_state(sd_state, SD_GET_INSTANCE_FROM_BUF(bp));
11777 	if (un == NULL) {
11778 		bioerror(bp, EIO);
11779 		bp->b_resid = bp->b_bcount;
11780 		biodone(bp);
11781 		return (0);
11782 	}
11783 
11784 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11785 
11786 	SD_TRACE(SD_LOG_IO, un, "sd_uscsi_strategy: entry: buf:0x%p\n", bp);
11787 
11788 	mutex_enter(SD_MUTEX(un));
11789 	/*
11790 	 * atapi: Since we are running the CD for now in PIO mode we need to
11791 	 * call bp_mapin here to avoid bp_mapin called interrupt context under
11792 	 * the HBA's init_pkt routine.
11793 	 */
11794 	if (un->un_f_cfg_is_atapi == TRUE) {
11795 		mutex_exit(SD_MUTEX(un));
11796 		bp_mapin(bp);
11797 		mutex_enter(SD_MUTEX(un));
11798 	}
11799 	un->un_ncmds_in_driver++;
11800 	SD_INFO(SD_LOG_IO, un, "sd_uscsi_strategy: un_ncmds_in_driver = %ld\n",
11801 	    un->un_ncmds_in_driver);
11802 	mutex_exit(SD_MUTEX(un));
11803 
11804 	/*
11805 	 * A pointer to a struct sd_uscsi_info is expected in bp->b_private
11806 	 */
11807 	ASSERT(bp->b_private != NULL);
11808 	uip = (struct sd_uscsi_info *)bp->b_private;
11809 
11810 	switch (uip->ui_flags) {
11811 	case SD_PATH_DIRECT:
11812 		chain_type = SD_CHAIN_DIRECT;
11813 		break;
11814 	case SD_PATH_DIRECT_PRIORITY:
11815 		chain_type = SD_CHAIN_DIRECT_PRIORITY;
11816 		break;
11817 	default:
11818 		chain_type = SD_CHAIN_USCSI;
11819 		break;
11820 	}
11821 
11822 	xp = kmem_alloc(sizeof (struct sd_xbuf), KM_SLEEP);
11823 	sd_xbuf_init(un, bp, xp, chain_type, uip->ui_cmdp);
11824 
11825 	/* Use the index obtained within xbuf_init */
11826 	SD_BEGIN_IOSTART(xp->xb_chain_iostart, un, bp);
11827 
11828 	SD_TRACE(SD_LOG_IO, un, "sd_uscsi_strategy: exit: buf:0x%p\n", bp);
11829 
11830 	return (0);
11831 }
11832 
11833 
11834 /*
11835  * These routines perform raw i/o operations.
11836  */
11837 /*ARGSUSED*/
11838 static void
11839 sduscsimin(struct buf *bp)
11840 {
11841 	/*
11842 	 * do not break up because the CDB count would then
11843 	 * be incorrect and data underruns would result (incomplete
11844 	 * read/writes which would be retried and then failed, see
11845 	 * sdintr().
11846 	 */
11847 }
11848 
11849 
11850 
11851 /*
11852  *    Function: sd_send_scsi_cmd
11853  *
11854  * Description: Runs a USCSI command for user (when called thru sdioctl),
11855  *		or for the driver
11856  *
11857  *   Arguments: dev - the dev_t for the device
11858  *		incmd - ptr to a valid uscsi_cmd struct
11859  *		cdbspace - UIO_USERSPACE or UIO_SYSSPACE
11860  *		dataspace - UIO_USERSPACE or UIO_SYSSPACE
11861  *		rqbufspace - UIO_USERSPACE or UIO_SYSSPACE
11862  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
11863  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
11864  *			to use the USCSI "direct" chain and bypass the normal
11865  *			command waitq.
11866  *
11867  * Return Code: 0 -  successful completion of the given command
11868  *		EIO - scsi_reset() failed, or see biowait()/physio() codes.
11869  *		ENXIO  - soft state not found for specified dev
11870  *		EINVAL
11871  *		EFAULT - copyin/copyout error
11872  *		return code of biowait(9F) or physio(9F):
11873  *			EIO - IO error, caller may check incmd->uscsi_status
11874  *			ENXIO
11875  *			EACCES - reservation conflict
11876  *
11877  *     Context: Waits for command to complete. Can sleep.
11878  */
11879 
11880 static int
11881 sd_send_scsi_cmd(dev_t dev, struct uscsi_cmd *incmd,
11882 	enum uio_seg cdbspace, enum uio_seg dataspace, enum uio_seg rqbufspace,
11883 	int path_flag)
11884 {
11885 	struct sd_uscsi_info	*uip;
11886 	struct uscsi_cmd	*uscmd;
11887 	struct sd_lun	*un;
11888 	struct buf	*bp;
11889 	int	rval;
11890 	int	flags;
11891 
11892 	un = ddi_get_soft_state(sd_state, SDUNIT(dev));
11893 	if (un == NULL) {
11894 		return (ENXIO);
11895 	}
11896 
11897 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11898 
11899 #ifdef SDDEBUG
11900 	switch (dataspace) {
11901 	case UIO_USERSPACE:
11902 		SD_TRACE(SD_LOG_IO, un,
11903 		    "sd_send_scsi_cmd: entry: un:0x%p UIO_USERSPACE\n", un);
11904 		break;
11905 	case UIO_SYSSPACE:
11906 		SD_TRACE(SD_LOG_IO, un,
11907 		    "sd_send_scsi_cmd: entry: un:0x%p UIO_SYSSPACE\n", un);
11908 		break;
11909 	default:
11910 		SD_TRACE(SD_LOG_IO, un,
11911 		    "sd_send_scsi_cmd: entry: un:0x%p UNEXPECTED SPACE\n", un);
11912 		break;
11913 	}
11914 #endif
11915 
11916 	/*
11917 	 * Perform resets directly; no need to generate a command to do it.
11918 	 */
11919 	if (incmd->uscsi_flags & (USCSI_RESET | USCSI_RESET_ALL)) {
11920 		flags = ((incmd->uscsi_flags & USCSI_RESET_ALL) != 0) ?
11921 		    RESET_ALL : RESET_TARGET;
11922 		SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_cmd: Issuing reset\n");
11923 		if (scsi_reset(SD_ADDRESS(un), flags) == 0) {
11924 			/* Reset attempt was unsuccessful */
11925 			SD_TRACE(SD_LOG_IO, un,
11926 			    "sd_send_scsi_cmd: reset: failure\n");
11927 			return (EIO);
11928 		}
11929 		SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_cmd: reset: success\n");
11930 		return (0);
11931 	}
11932 
11933 	/* Perfunctory sanity check... */
11934 	if (incmd->uscsi_cdblen <= 0) {
11935 		SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_cmd: "
11936 		    "invalid uscsi_cdblen, returning EINVAL\n");
11937 		return (EINVAL);
11938 	} else if (incmd->uscsi_cdblen > un->un_max_hba_cdb) {
11939 		SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_cmd: "
11940 		    "unsupported uscsi_cdblen, returning EINVAL\n");
11941 		return (EINVAL);
11942 	}
11943 
11944 	/*
11945 	 * In order to not worry about where the uscsi structure came from
11946 	 * (or where the cdb it points to came from) we're going to make
11947 	 * kmem_alloc'd copies of them here. This will also allow reference
11948 	 * to the data they contain long after this process has gone to
11949 	 * sleep and its kernel stack has been unmapped, etc.
11950 	 *
11951 	 * First get some memory for the uscsi_cmd struct and copy the
11952 	 * contents of the given uscsi_cmd struct into it.
11953 	 */
11954 	uscmd = kmem_zalloc(sizeof (struct uscsi_cmd), KM_SLEEP);
11955 	bcopy(incmd, uscmd, sizeof (struct uscsi_cmd));
11956 
11957 	SD_DUMP_MEMORY(un, SD_LOG_IO, "sd_send_scsi_cmd: uscsi_cmd",
11958 	    (uchar_t *)uscmd, sizeof (struct uscsi_cmd), SD_LOG_HEX);
11959 
11960 	/*
11961 	 * Now get some space for the CDB, and copy the given CDB into
11962 	 * it. Use ddi_copyin() in case the data is in user space.
11963 	 */
11964 	uscmd->uscsi_cdb = kmem_zalloc((size_t)incmd->uscsi_cdblen, KM_SLEEP);
11965 	flags = (cdbspace == UIO_SYSSPACE) ? FKIOCTL : 0;
11966 	if (ddi_copyin(incmd->uscsi_cdb, uscmd->uscsi_cdb,
11967 	    (uint_t)incmd->uscsi_cdblen, flags) != 0) {
11968 		kmem_free(uscmd->uscsi_cdb, (size_t)incmd->uscsi_cdblen);
11969 		kmem_free(uscmd, sizeof (struct uscsi_cmd));
11970 		return (EFAULT);
11971 	}
11972 
11973 	SD_DUMP_MEMORY(un, SD_LOG_IO, "sd_send_scsi_cmd: CDB",
11974 	    (uchar_t *)uscmd->uscsi_cdb, incmd->uscsi_cdblen, SD_LOG_HEX);
11975 
11976 	bp = getrbuf(KM_SLEEP);
11977 
11978 	/*
11979 	 * Allocate an sd_uscsi_info struct and fill it with the info
11980 	 * needed by sd_initpkt_for_uscsi().  Then put the pointer into
11981 	 * b_private in the buf for sd_initpkt_for_uscsi().  Note that
11982 	 * since we allocate the buf here in this function, we do not
11983 	 * need to preserve the prior contents of b_private.
11984 	 * The sd_uscsi_info struct is also used by sd_uscsi_strategy()
11985 	 */
11986 	uip = kmem_zalloc(sizeof (struct sd_uscsi_info), KM_SLEEP);
11987 	uip->ui_flags = path_flag;
11988 	uip->ui_cmdp  = uscmd;
11989 	bp->b_private = uip;
11990 
11991 	/*
11992 	 * Initialize Request Sense buffering, if requested.
11993 	 */
11994 	if (((uscmd->uscsi_flags & USCSI_RQENABLE) != 0) &&
11995 	    (uscmd->uscsi_rqlen != 0) && (uscmd->uscsi_rqbuf != NULL)) {
11996 		/*
11997 		 * Here uscmd->uscsi_rqbuf currently points to the caller's
11998 		 * buffer, but we replace this with a kernel buffer that
11999 		 * we allocate to use with the sense data. The sense data
12000 		 * (if present) gets copied into this new buffer before the
12001 		 * command is completed.  Then we copy the sense data from
12002 		 * our allocated buf into the caller's buffer below. Note
12003 		 * that incmd->uscsi_rqbuf and incmd->uscsi_rqlen are used
12004 		 * below to perform the copy back to the caller's buf.
12005 		 */
12006 		uscmd->uscsi_rqbuf = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
12007 		if (rqbufspace == UIO_USERSPACE) {
12008 			uscmd->uscsi_rqlen   = SENSE_LENGTH;
12009 			uscmd->uscsi_rqresid = SENSE_LENGTH;
12010 		} else {
12011 			uchar_t rlen = min(SENSE_LENGTH, uscmd->uscsi_rqlen);
12012 			uscmd->uscsi_rqlen   = rlen;
12013 			uscmd->uscsi_rqresid = rlen;
12014 		}
12015 	} else {
12016 		uscmd->uscsi_rqbuf = NULL;
12017 		uscmd->uscsi_rqlen   = 0;
12018 		uscmd->uscsi_rqresid = 0;
12019 	}
12020 
12021 	SD_INFO(SD_LOG_IO, un, "sd_send_scsi_cmd: rqbuf:0x%p  rqlen:%d\n",
12022 	    uscmd->uscsi_rqbuf, uscmd->uscsi_rqlen);
12023 
12024 	if (un->un_f_is_fibre == FALSE) {
12025 		/*
12026 		 * Force asynchronous mode, if necessary.  Doing this here
12027 		 * has the unfortunate effect of running other queued
12028 		 * commands async also, but since the main purpose of this
12029 		 * capability is downloading new drive firmware, we can
12030 		 * probably live with it.
12031 		 */
12032 		if ((uscmd->uscsi_flags & USCSI_ASYNC) != 0) {
12033 			if (scsi_ifgetcap(SD_ADDRESS(un), "synchronous", 1)
12034 				== 1) {
12035 				if (scsi_ifsetcap(SD_ADDRESS(un),
12036 					    "synchronous", 0, 1) == 1) {
12037 					SD_TRACE(SD_LOG_IO, un,
12038 					"sd_send_scsi_cmd: forced async ok\n");
12039 				} else {
12040 					SD_TRACE(SD_LOG_IO, un,
12041 					"sd_send_scsi_cmd:\
12042 					forced async failed\n");
12043 					rval = EINVAL;
12044 					goto done;
12045 				}
12046 			}
12047 		}
12048 
12049 		/*
12050 		 * Re-enable synchronous mode, if requested
12051 		 */
12052 		if (uscmd->uscsi_flags & USCSI_SYNC) {
12053 			if (scsi_ifgetcap(SD_ADDRESS(un), "synchronous", 1)
12054 				== 0) {
12055 				int i = scsi_ifsetcap(SD_ADDRESS(un),
12056 						"synchronous", 1, 1);
12057 				SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_cmd: "
12058 					"re-enabled sync %s\n",
12059 					(i == 1) ? "ok" : "failed");
12060 			}
12061 		}
12062 	}
12063 
12064 	/*
12065 	 * Commands sent with priority are intended for error recovery
12066 	 * situations, and do not have retries performed.
12067 	 */
12068 	if (path_flag == SD_PATH_DIRECT_PRIORITY) {
12069 		uscmd->uscsi_flags |= USCSI_DIAGNOSE;
12070 	}
12071 
12072 	/*
12073 	 * If we're going to do actual I/O, let physio do all the right things
12074 	 */
12075 	if (uscmd->uscsi_buflen != 0) {
12076 		struct iovec	aiov;
12077 		struct uio	auio;
12078 		struct uio	*uio = &auio;
12079 
12080 		bzero(&auio, sizeof (struct uio));
12081 		bzero(&aiov, sizeof (struct iovec));
12082 		aiov.iov_base = uscmd->uscsi_bufaddr;
12083 		aiov.iov_len  = uscmd->uscsi_buflen;
12084 		uio->uio_iov  = &aiov;
12085 
12086 		uio->uio_iovcnt  = 1;
12087 		uio->uio_resid   = uscmd->uscsi_buflen;
12088 		uio->uio_segflg  = dataspace;
12089 
12090 		/*
12091 		 * physio() will block here until the command completes....
12092 		 */
12093 		SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_cmd: calling physio.\n");
12094 
12095 		rval = physio(sd_uscsi_strategy, bp, dev,
12096 		    ((uscmd->uscsi_flags & USCSI_READ) ? B_READ : B_WRITE),
12097 		    sduscsimin, uio);
12098 
12099 		SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_cmd: "
12100 		    "returned from physio with 0x%x\n", rval);
12101 
12102 	} else {
12103 		/*
12104 		 * We have to mimic what physio would do here! Argh!
12105 		 */
12106 		bp->b_flags  = B_BUSY |
12107 		    ((uscmd->uscsi_flags & USCSI_READ) ? B_READ : B_WRITE);
12108 		bp->b_edev   = dev;
12109 		bp->b_dev    = cmpdev(dev);	/* maybe unnecessary? */
12110 		bp->b_bcount = 0;
12111 		bp->b_blkno  = 0;
12112 
12113 		SD_TRACE(SD_LOG_IO, un,
12114 		    "sd_send_scsi_cmd: calling sd_uscsi_strategy...\n");
12115 
12116 		(void) sd_uscsi_strategy(bp);
12117 
12118 		SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_cmd: calling biowait\n");
12119 
12120 		rval = biowait(bp);
12121 
12122 		SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_cmd: "
12123 		    "returned from  biowait with 0x%x\n", rval);
12124 	}
12125 
12126 done:
12127 
12128 #ifdef SDDEBUG
12129 	SD_INFO(SD_LOG_IO, un, "sd_send_scsi_cmd: "
12130 	    "uscsi_status: 0x%02x  uscsi_resid:0x%x\n",
12131 	    uscmd->uscsi_status, uscmd->uscsi_resid);
12132 	if (uscmd->uscsi_bufaddr != NULL) {
12133 		SD_INFO(SD_LOG_IO, un, "sd_send_scsi_cmd: "
12134 		    "uscmd->uscsi_bufaddr: 0x%p  uscmd->uscsi_buflen:%d\n",
12135 		    uscmd->uscsi_bufaddr, uscmd->uscsi_buflen);
12136 		if (dataspace == UIO_SYSSPACE) {
12137 			SD_DUMP_MEMORY(un, SD_LOG_IO,
12138 			    "data", (uchar_t *)uscmd->uscsi_bufaddr,
12139 			    uscmd->uscsi_buflen, SD_LOG_HEX);
12140 		}
12141 	}
12142 #endif
12143 
12144 	/*
12145 	 * Get the status and residual to return to the caller.
12146 	 */
12147 	incmd->uscsi_status = uscmd->uscsi_status;
12148 	incmd->uscsi_resid  = uscmd->uscsi_resid;
12149 
12150 	/*
12151 	 * If the caller wants sense data, copy back whatever sense data
12152 	 * we may have gotten, and update the relevant rqsense info.
12153 	 */
12154 	if (((uscmd->uscsi_flags & USCSI_RQENABLE) != 0) &&
12155 	    (uscmd->uscsi_rqlen != 0) && (uscmd->uscsi_rqbuf != NULL)) {
12156 
12157 		int rqlen = uscmd->uscsi_rqlen - uscmd->uscsi_rqresid;
12158 		rqlen = min(((int)incmd->uscsi_rqlen), rqlen);
12159 
12160 		/* Update the Request Sense status and resid */
12161 		incmd->uscsi_rqresid  = incmd->uscsi_rqlen - rqlen;
12162 		incmd->uscsi_rqstatus = uscmd->uscsi_rqstatus;
12163 
12164 		SD_INFO(SD_LOG_IO, un, "sd_send_scsi_cmd: "
12165 		    "uscsi_rqstatus: 0x%02x  uscsi_rqresid:0x%x\n",
12166 		    incmd->uscsi_rqstatus, incmd->uscsi_rqresid);
12167 
12168 		/* Copy out the sense data for user processes */
12169 		if ((incmd->uscsi_rqbuf != NULL) && (rqlen != 0)) {
12170 			int flags =
12171 			    (rqbufspace == UIO_USERSPACE) ? 0 : FKIOCTL;
12172 			if (ddi_copyout(uscmd->uscsi_rqbuf, incmd->uscsi_rqbuf,
12173 			    rqlen, flags) != 0) {
12174 				rval = EFAULT;
12175 			}
12176 			/*
12177 			 * Note: Can't touch incmd->uscsi_rqbuf so use
12178 			 * uscmd->uscsi_rqbuf instead. They're the same.
12179 			 */
12180 			SD_INFO(SD_LOG_IO, un, "sd_send_scsi_cmd: "
12181 			    "incmd->uscsi_rqbuf: 0x%p  rqlen:%d\n",
12182 			    incmd->uscsi_rqbuf, rqlen);
12183 			SD_DUMP_MEMORY(un, SD_LOG_IO, "rq",
12184 			    (uchar_t *)uscmd->uscsi_rqbuf, rqlen, SD_LOG_HEX);
12185 		}
12186 	}
12187 
12188 	/*
12189 	 * Free allocated resources and return; mapout the buf in case it was
12190 	 * mapped in by a lower layer.
12191 	 */
12192 	bp_mapout(bp);
12193 	freerbuf(bp);
12194 	kmem_free(uip, sizeof (struct sd_uscsi_info));
12195 	if (uscmd->uscsi_rqbuf != NULL) {
12196 		kmem_free(uscmd->uscsi_rqbuf, SENSE_LENGTH);
12197 	}
12198 	kmem_free(uscmd->uscsi_cdb, (size_t)uscmd->uscsi_cdblen);
12199 	kmem_free(uscmd, sizeof (struct uscsi_cmd));
12200 
12201 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_cmd: exit\n");
12202 
12203 	return (rval);
12204 }
12205 
12206 
12207 /*
12208  *    Function: sd_buf_iodone
12209  *
12210  * Description: Frees the sd_xbuf & returns the buf to its originator.
12211  *
12212  *     Context: May be called from interrupt context.
12213  */
12214 /* ARGSUSED */
12215 static void
12216 sd_buf_iodone(int index, struct sd_lun *un, struct buf *bp)
12217 {
12218 	struct sd_xbuf *xp;
12219 
12220 	ASSERT(un != NULL);
12221 	ASSERT(bp != NULL);
12222 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12223 
12224 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_buf_iodone: entry.\n");
12225 
12226 	xp = SD_GET_XBUF(bp);
12227 	ASSERT(xp != NULL);
12228 
12229 	mutex_enter(SD_MUTEX(un));
12230 
12231 	/*
12232 	 * Grab time when the cmd completed.
12233 	 * This is used for determining if the system has been
12234 	 * idle long enough to make it idle to the PM framework.
12235 	 * This is for lowering the overhead, and therefore improving
12236 	 * performance per I/O operation.
12237 	 */
12238 	un->un_pm_idle_time = ddi_get_time();
12239 
12240 	un->un_ncmds_in_driver--;
12241 	ASSERT(un->un_ncmds_in_driver >= 0);
12242 	SD_INFO(SD_LOG_IO, un, "sd_buf_iodone: un_ncmds_in_driver = %ld\n",
12243 	    un->un_ncmds_in_driver);
12244 
12245 	mutex_exit(SD_MUTEX(un));
12246 
12247 	ddi_xbuf_done(bp, un->un_xbuf_attr);	/* xbuf is gone after this */
12248 	biodone(bp);				/* bp is gone after this */
12249 
12250 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_buf_iodone: exit.\n");
12251 }
12252 
12253 
12254 /*
12255  *    Function: sd_uscsi_iodone
12256  *
12257  * Description: Frees the sd_xbuf & returns the buf to its originator.
12258  *
12259  *     Context: May be called from interrupt context.
12260  */
12261 /* ARGSUSED */
12262 static void
12263 sd_uscsi_iodone(int index, struct sd_lun *un, struct buf *bp)
12264 {
12265 	struct sd_xbuf *xp;
12266 
12267 	ASSERT(un != NULL);
12268 	ASSERT(bp != NULL);
12269 
12270 	xp = SD_GET_XBUF(bp);
12271 	ASSERT(xp != NULL);
12272 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12273 
12274 	SD_INFO(SD_LOG_IO, un, "sd_uscsi_iodone: entry.\n");
12275 
12276 	bp->b_private = xp->xb_private;
12277 
12278 	mutex_enter(SD_MUTEX(un));
12279 
12280 	/*
12281 	 * Grab time when the cmd completed.
12282 	 * This is used for determining if the system has been
12283 	 * idle long enough to make it idle to the PM framework.
12284 	 * This is for lowering the overhead, and therefore improving
12285 	 * performance per I/O operation.
12286 	 */
12287 	un->un_pm_idle_time = ddi_get_time();
12288 
12289 	un->un_ncmds_in_driver--;
12290 	ASSERT(un->un_ncmds_in_driver >= 0);
12291 	SD_INFO(SD_LOG_IO, un, "sd_uscsi_iodone: un_ncmds_in_driver = %ld\n",
12292 	    un->un_ncmds_in_driver);
12293 
12294 	mutex_exit(SD_MUTEX(un));
12295 
12296 	kmem_free(xp, sizeof (struct sd_xbuf));
12297 	biodone(bp);
12298 
12299 	SD_INFO(SD_LOG_IO, un, "sd_uscsi_iodone: exit.\n");
12300 }
12301 
12302 
12303 /*
12304  *    Function: sd_mapblockaddr_iostart
12305  *
12306  * Description: Verify request lies withing the partition limits for
12307  *		the indicated minor device.  Issue "overrun" buf if
12308  *		request would exceed partition range.  Converts
12309  *		partition-relative block address to absolute.
12310  *
12311  *     Context: Can sleep
12312  *
12313  *      Issues: This follows what the old code did, in terms of accessing
12314  *		some of the partition info in the unit struct without holding
12315  *		the mutext.  This is a general issue, if the partition info
12316  *		can be altered while IO is in progress... as soon as we send
12317  *		a buf, its partitioning can be invalid before it gets to the
12318  *		device.  Probably the right fix is to move partitioning out
12319  *		of the driver entirely.
12320  */
12321 
12322 static void
12323 sd_mapblockaddr_iostart(int index, struct sd_lun *un, struct buf *bp)
12324 {
12325 	daddr_t	nblocks;	/* #blocks in the given partition */
12326 	daddr_t	blocknum;	/* Block number specified by the buf */
12327 	size_t	requested_nblocks;
12328 	size_t	available_nblocks;
12329 	int	partition;
12330 	diskaddr_t	partition_offset;
12331 	struct sd_xbuf *xp;
12332 
12333 
12334 	ASSERT(un != NULL);
12335 	ASSERT(bp != NULL);
12336 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12337 
12338 	SD_TRACE(SD_LOG_IO_PARTITION, un,
12339 	    "sd_mapblockaddr_iostart: entry: buf:0x%p\n", bp);
12340 
12341 	xp = SD_GET_XBUF(bp);
12342 	ASSERT(xp != NULL);
12343 
12344 	/*
12345 	 * If the geometry is not indicated as valid, attempt to access
12346 	 * the unit & verify the geometry/label. This can be the case for
12347 	 * removable-media devices, of if the device was opened in
12348 	 * NDELAY/NONBLOCK mode.
12349 	 */
12350 	if ((un->un_f_geometry_is_valid != TRUE) &&
12351 	    (sd_ready_and_valid(un) != SD_READY_VALID)) {
12352 		/*
12353 		 * For removable devices it is possible to start an I/O
12354 		 * without a media by opening the device in nodelay mode.
12355 		 * Also for writable CDs there can be many scenarios where
12356 		 * there is no geometry yet but volume manager is trying to
12357 		 * issue a read() just because it can see TOC on the CD. So
12358 		 * do not print a message for removables.
12359 		 */
12360 		if (!un->un_f_has_removable_media) {
12361 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
12362 			    "i/o to invalid geometry\n");
12363 		}
12364 		bioerror(bp, EIO);
12365 		bp->b_resid = bp->b_bcount;
12366 		SD_BEGIN_IODONE(index, un, bp);
12367 		return;
12368 	}
12369 
12370 	partition = SDPART(bp->b_edev);
12371 
12372 	/* #blocks in partition */
12373 	nblocks = un->un_map[partition].dkl_nblk;    /* #blocks in partition */
12374 
12375 	/* Use of a local variable potentially improves performance slightly */
12376 	partition_offset = un->un_offset[partition];
12377 
12378 	/*
12379 	 * blocknum is the starting block number of the request. At this
12380 	 * point it is still relative to the start of the minor device.
12381 	 */
12382 	blocknum = xp->xb_blkno;
12383 
12384 	/*
12385 	 * Legacy: If the starting block number is one past the last block
12386 	 * in the partition, do not set B_ERROR in the buf.
12387 	 */
12388 	if (blocknum == nblocks)  {
12389 		goto error_exit;
12390 	}
12391 
12392 	/*
12393 	 * Confirm that the first block of the request lies within the
12394 	 * partition limits. Also the requested number of bytes must be
12395 	 * a multiple of the system block size.
12396 	 */
12397 	if ((blocknum < 0) || (blocknum >= nblocks) ||
12398 	    ((bp->b_bcount & (un->un_sys_blocksize - 1)) != 0)) {
12399 		bp->b_flags |= B_ERROR;
12400 		goto error_exit;
12401 	}
12402 
12403 	/*
12404 	 * If the requsted # blocks exceeds the available # blocks, that
12405 	 * is an overrun of the partition.
12406 	 */
12407 	requested_nblocks = SD_BYTES2SYSBLOCKS(un, bp->b_bcount);
12408 	available_nblocks = (size_t)(nblocks - blocknum);
12409 	ASSERT(nblocks >= blocknum);
12410 
12411 	if (requested_nblocks > available_nblocks) {
12412 		/*
12413 		 * Allocate an "overrun" buf to allow the request to proceed
12414 		 * for the amount of space available in the partition. The
12415 		 * amount not transferred will be added into the b_resid
12416 		 * when the operation is complete. The overrun buf
12417 		 * replaces the original buf here, and the original buf
12418 		 * is saved inside the overrun buf, for later use.
12419 		 */
12420 		size_t resid = SD_SYSBLOCKS2BYTES(un,
12421 		    (offset_t)(requested_nblocks - available_nblocks));
12422 		size_t count = bp->b_bcount - resid;
12423 		/*
12424 		 * Note: count is an unsigned entity thus it'll NEVER
12425 		 * be less than 0 so ASSERT the original values are
12426 		 * correct.
12427 		 */
12428 		ASSERT(bp->b_bcount >= resid);
12429 
12430 		bp = sd_bioclone_alloc(bp, count, blocknum,
12431 			(int (*)(struct buf *)) sd_mapblockaddr_iodone);
12432 		xp = SD_GET_XBUF(bp); /* Update for 'new' bp! */
12433 		ASSERT(xp != NULL);
12434 	}
12435 
12436 	/* At this point there should be no residual for this buf. */
12437 	ASSERT(bp->b_resid == 0);
12438 
12439 	/* Convert the block number to an absolute address. */
12440 	xp->xb_blkno += partition_offset;
12441 
12442 	SD_NEXT_IOSTART(index, un, bp);
12443 
12444 	SD_TRACE(SD_LOG_IO_PARTITION, un,
12445 	    "sd_mapblockaddr_iostart: exit 0: buf:0x%p\n", bp);
12446 
12447 	return;
12448 
12449 error_exit:
12450 	bp->b_resid = bp->b_bcount;
12451 	SD_BEGIN_IODONE(index, un, bp);
12452 	SD_TRACE(SD_LOG_IO_PARTITION, un,
12453 	    "sd_mapblockaddr_iostart: exit 1: buf:0x%p\n", bp);
12454 }
12455 
12456 
12457 /*
12458  *    Function: sd_mapblockaddr_iodone
12459  *
12460  * Description: Completion-side processing for partition management.
12461  *
12462  *     Context: May be called under interrupt context
12463  */
12464 
12465 static void
12466 sd_mapblockaddr_iodone(int index, struct sd_lun *un, struct buf *bp)
12467 {
12468 	/* int	partition; */	/* Not used, see below. */
12469 	ASSERT(un != NULL);
12470 	ASSERT(bp != NULL);
12471 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12472 
12473 	SD_TRACE(SD_LOG_IO_PARTITION, un,
12474 	    "sd_mapblockaddr_iodone: entry: buf:0x%p\n", bp);
12475 
12476 	if (bp->b_iodone == (int (*)(struct buf *)) sd_mapblockaddr_iodone) {
12477 		/*
12478 		 * We have an "overrun" buf to deal with...
12479 		 */
12480 		struct sd_xbuf	*xp;
12481 		struct buf	*obp;	/* ptr to the original buf */
12482 
12483 		xp = SD_GET_XBUF(bp);
12484 		ASSERT(xp != NULL);
12485 
12486 		/* Retrieve the pointer to the original buf */
12487 		obp = (struct buf *)xp->xb_private;
12488 		ASSERT(obp != NULL);
12489 
12490 		obp->b_resid = obp->b_bcount - (bp->b_bcount - bp->b_resid);
12491 		bioerror(obp, bp->b_error);
12492 
12493 		sd_bioclone_free(bp);
12494 
12495 		/*
12496 		 * Get back the original buf.
12497 		 * Note that since the restoration of xb_blkno below
12498 		 * was removed, the sd_xbuf is not needed.
12499 		 */
12500 		bp = obp;
12501 		/*
12502 		 * xp = SD_GET_XBUF(bp);
12503 		 * ASSERT(xp != NULL);
12504 		 */
12505 	}
12506 
12507 	/*
12508 	 * Convert sd->xb_blkno back to a minor-device relative value.
12509 	 * Note: this has been commented out, as it is not needed in the
12510 	 * current implementation of the driver (ie, since this function
12511 	 * is at the top of the layering chains, so the info will be
12512 	 * discarded) and it is in the "hot" IO path.
12513 	 *
12514 	 * partition = getminor(bp->b_edev) & SDPART_MASK;
12515 	 * xp->xb_blkno -= un->un_offset[partition];
12516 	 */
12517 
12518 	SD_NEXT_IODONE(index, un, bp);
12519 
12520 	SD_TRACE(SD_LOG_IO_PARTITION, un,
12521 	    "sd_mapblockaddr_iodone: exit: buf:0x%p\n", bp);
12522 }
12523 
12524 
12525 /*
12526  *    Function: sd_mapblocksize_iostart
12527  *
12528  * Description: Convert between system block size (un->un_sys_blocksize)
12529  *		and target block size (un->un_tgt_blocksize).
12530  *
12531  *     Context: Can sleep to allocate resources.
12532  *
12533  * Assumptions: A higher layer has already performed any partition validation,
12534  *		and converted the xp->xb_blkno to an absolute value relative
12535  *		to the start of the device.
12536  *
12537  *		It is also assumed that the higher layer has implemented
12538  *		an "overrun" mechanism for the case where the request would
12539  *		read/write beyond the end of a partition.  In this case we
12540  *		assume (and ASSERT) that bp->b_resid == 0.
12541  *
12542  *		Note: The implementation for this routine assumes the target
12543  *		block size remains constant between allocation and transport.
12544  */
12545 
12546 static void
12547 sd_mapblocksize_iostart(int index, struct sd_lun *un, struct buf *bp)
12548 {
12549 	struct sd_mapblocksize_info	*bsp;
12550 	struct sd_xbuf			*xp;
12551 	offset_t first_byte;
12552 	daddr_t	start_block, end_block;
12553 	daddr_t	request_bytes;
12554 	ushort_t is_aligned = FALSE;
12555 
12556 	ASSERT(un != NULL);
12557 	ASSERT(bp != NULL);
12558 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12559 	ASSERT(bp->b_resid == 0);
12560 
12561 	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
12562 	    "sd_mapblocksize_iostart: entry: buf:0x%p\n", bp);
12563 
12564 	/*
12565 	 * For a non-writable CD, a write request is an error
12566 	 */
12567 	if (ISCD(un) && ((bp->b_flags & B_READ) == 0) &&
12568 	    (un->un_f_mmc_writable_media == FALSE)) {
12569 		bioerror(bp, EIO);
12570 		bp->b_resid = bp->b_bcount;
12571 		SD_BEGIN_IODONE(index, un, bp);
12572 		return;
12573 	}
12574 
12575 	/*
12576 	 * We do not need a shadow buf if the device is using
12577 	 * un->un_sys_blocksize as its block size or if bcount == 0.
12578 	 * In this case there is no layer-private data block allocated.
12579 	 */
12580 	if ((un->un_tgt_blocksize == un->un_sys_blocksize) ||
12581 	    (bp->b_bcount == 0)) {
12582 		goto done;
12583 	}
12584 
12585 #if defined(__i386) || defined(__amd64)
12586 	/* We do not support non-block-aligned transfers for ROD devices */
12587 	ASSERT(!ISROD(un));
12588 #endif
12589 
12590 	xp = SD_GET_XBUF(bp);
12591 	ASSERT(xp != NULL);
12592 
12593 	SD_INFO(SD_LOG_IO_RMMEDIA, un, "sd_mapblocksize_iostart: "
12594 	    "tgt_blocksize:0x%x sys_blocksize: 0x%x\n",
12595 	    un->un_tgt_blocksize, un->un_sys_blocksize);
12596 	SD_INFO(SD_LOG_IO_RMMEDIA, un, "sd_mapblocksize_iostart: "
12597 	    "request start block:0x%x\n", xp->xb_blkno);
12598 	SD_INFO(SD_LOG_IO_RMMEDIA, un, "sd_mapblocksize_iostart: "
12599 	    "request len:0x%x\n", bp->b_bcount);
12600 
12601 	/*
12602 	 * Allocate the layer-private data area for the mapblocksize layer.
12603 	 * Layers are allowed to use the xp_private member of the sd_xbuf
12604 	 * struct to store the pointer to their layer-private data block, but
12605 	 * each layer also has the responsibility of restoring the prior
12606 	 * contents of xb_private before returning the buf/xbuf to the
12607 	 * higher layer that sent it.
12608 	 *
12609 	 * Here we save the prior contents of xp->xb_private into the
12610 	 * bsp->mbs_oprivate field of our layer-private data area. This value
12611 	 * is restored by sd_mapblocksize_iodone() just prior to freeing up
12612 	 * the layer-private area and returning the buf/xbuf to the layer
12613 	 * that sent it.
12614 	 *
12615 	 * Note that here we use kmem_zalloc for the allocation as there are
12616 	 * parts of the mapblocksize code that expect certain fields to be
12617 	 * zero unless explicitly set to a required value.
12618 	 */
12619 	bsp = kmem_zalloc(sizeof (struct sd_mapblocksize_info), KM_SLEEP);
12620 	bsp->mbs_oprivate = xp->xb_private;
12621 	xp->xb_private = bsp;
12622 
12623 	/*
12624 	 * This treats the data on the disk (target) as an array of bytes.
12625 	 * first_byte is the byte offset, from the beginning of the device,
12626 	 * to the location of the request. This is converted from a
12627 	 * un->un_sys_blocksize block address to a byte offset, and then back
12628 	 * to a block address based upon a un->un_tgt_blocksize block size.
12629 	 *
12630 	 * xp->xb_blkno should be absolute upon entry into this function,
12631 	 * but, but it is based upon partitions that use the "system"
12632 	 * block size. It must be adjusted to reflect the block size of
12633 	 * the target.
12634 	 *
12635 	 * Note that end_block is actually the block that follows the last
12636 	 * block of the request, but that's what is needed for the computation.
12637 	 */
12638 	first_byte  = SD_SYSBLOCKS2BYTES(un, (offset_t)xp->xb_blkno);
12639 	start_block = xp->xb_blkno = first_byte / un->un_tgt_blocksize;
12640 	end_block   = (first_byte + bp->b_bcount + un->un_tgt_blocksize - 1) /
12641 	    un->un_tgt_blocksize;
12642 
12643 	/* request_bytes is rounded up to a multiple of the target block size */
12644 	request_bytes = (end_block - start_block) * un->un_tgt_blocksize;
12645 
12646 	/*
12647 	 * See if the starting address of the request and the request
12648 	 * length are aligned on a un->un_tgt_blocksize boundary. If aligned
12649 	 * then we do not need to allocate a shadow buf to handle the request.
12650 	 */
12651 	if (((first_byte   % un->un_tgt_blocksize) == 0) &&
12652 	    ((bp->b_bcount % un->un_tgt_blocksize) == 0)) {
12653 		is_aligned = TRUE;
12654 	}
12655 
12656 	if ((bp->b_flags & B_READ) == 0) {
12657 		/*
12658 		 * Lock the range for a write operation. An aligned request is
12659 		 * considered a simple write; otherwise the request must be a
12660 		 * read-modify-write.
12661 		 */
12662 		bsp->mbs_wmp = sd_range_lock(un, start_block, end_block - 1,
12663 		    (is_aligned == TRUE) ? SD_WTYPE_SIMPLE : SD_WTYPE_RMW);
12664 	}
12665 
12666 	/*
12667 	 * Alloc a shadow buf if the request is not aligned. Also, this is
12668 	 * where the READ command is generated for a read-modify-write. (The
12669 	 * write phase is deferred until after the read completes.)
12670 	 */
12671 	if (is_aligned == FALSE) {
12672 
12673 		struct sd_mapblocksize_info	*shadow_bsp;
12674 		struct sd_xbuf	*shadow_xp;
12675 		struct buf	*shadow_bp;
12676 
12677 		/*
12678 		 * Allocate the shadow buf and it associated xbuf. Note that
12679 		 * after this call the xb_blkno value in both the original
12680 		 * buf's sd_xbuf _and_ the shadow buf's sd_xbuf will be the
12681 		 * same: absolute relative to the start of the device, and
12682 		 * adjusted for the target block size. The b_blkno in the
12683 		 * shadow buf will also be set to this value. We should never
12684 		 * change b_blkno in the original bp however.
12685 		 *
12686 		 * Note also that the shadow buf will always need to be a
12687 		 * READ command, regardless of whether the incoming command
12688 		 * is a READ or a WRITE.
12689 		 */
12690 		shadow_bp = sd_shadow_buf_alloc(bp, request_bytes, B_READ,
12691 		    xp->xb_blkno,
12692 		    (int (*)(struct buf *)) sd_mapblocksize_iodone);
12693 
12694 		shadow_xp = SD_GET_XBUF(shadow_bp);
12695 
12696 		/*
12697 		 * Allocate the layer-private data for the shadow buf.
12698 		 * (No need to preserve xb_private in the shadow xbuf.)
12699 		 */
12700 		shadow_xp->xb_private = shadow_bsp =
12701 		    kmem_zalloc(sizeof (struct sd_mapblocksize_info), KM_SLEEP);
12702 
12703 		/*
12704 		 * bsp->mbs_copy_offset is used later by sd_mapblocksize_iodone
12705 		 * to figure out where the start of the user data is (based upon
12706 		 * the system block size) in the data returned by the READ
12707 		 * command (which will be based upon the target blocksize). Note
12708 		 * that this is only really used if the request is unaligned.
12709 		 */
12710 		bsp->mbs_copy_offset = (ssize_t)(first_byte -
12711 		    ((offset_t)xp->xb_blkno * un->un_tgt_blocksize));
12712 		ASSERT((bsp->mbs_copy_offset >= 0) &&
12713 		    (bsp->mbs_copy_offset < un->un_tgt_blocksize));
12714 
12715 		shadow_bsp->mbs_copy_offset = bsp->mbs_copy_offset;
12716 
12717 		shadow_bsp->mbs_layer_index = bsp->mbs_layer_index = index;
12718 
12719 		/* Transfer the wmap (if any) to the shadow buf */
12720 		shadow_bsp->mbs_wmp = bsp->mbs_wmp;
12721 		bsp->mbs_wmp = NULL;
12722 
12723 		/*
12724 		 * The shadow buf goes on from here in place of the
12725 		 * original buf.
12726 		 */
12727 		shadow_bsp->mbs_orig_bp = bp;
12728 		bp = shadow_bp;
12729 	}
12730 
12731 	SD_INFO(SD_LOG_IO_RMMEDIA, un,
12732 	    "sd_mapblocksize_iostart: tgt start block:0x%x\n", xp->xb_blkno);
12733 	SD_INFO(SD_LOG_IO_RMMEDIA, un,
12734 	    "sd_mapblocksize_iostart: tgt request len:0x%x\n",
12735 	    request_bytes);
12736 	SD_INFO(SD_LOG_IO_RMMEDIA, un,
12737 	    "sd_mapblocksize_iostart: shadow buf:0x%x\n", bp);
12738 
12739 done:
12740 	SD_NEXT_IOSTART(index, un, bp);
12741 
12742 	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
12743 	    "sd_mapblocksize_iostart: exit: buf:0x%p\n", bp);
12744 }
12745 
12746 
12747 /*
12748  *    Function: sd_mapblocksize_iodone
12749  *
12750  * Description: Completion side processing for block-size mapping.
12751  *
12752  *     Context: May be called under interrupt context
12753  */
12754 
12755 static void
12756 sd_mapblocksize_iodone(int index, struct sd_lun *un, struct buf *bp)
12757 {
12758 	struct sd_mapblocksize_info	*bsp;
12759 	struct sd_xbuf	*xp;
12760 	struct sd_xbuf	*orig_xp;	/* sd_xbuf for the original buf */
12761 	struct buf	*orig_bp;	/* ptr to the original buf */
12762 	offset_t	shadow_end;
12763 	offset_t	request_end;
12764 	offset_t	shadow_start;
12765 	ssize_t		copy_offset;
12766 	size_t		copy_length;
12767 	size_t		shortfall;
12768 	uint_t		is_write;	/* TRUE if this bp is a WRITE */
12769 	uint_t		has_wmap;	/* TRUE is this bp has a wmap */
12770 
12771 	ASSERT(un != NULL);
12772 	ASSERT(bp != NULL);
12773 
12774 	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
12775 	    "sd_mapblocksize_iodone: entry: buf:0x%p\n", bp);
12776 
12777 	/*
12778 	 * There is no shadow buf or layer-private data if the target is
12779 	 * using un->un_sys_blocksize as its block size or if bcount == 0.
12780 	 */
12781 	if ((un->un_tgt_blocksize == un->un_sys_blocksize) ||
12782 	    (bp->b_bcount == 0)) {
12783 		goto exit;
12784 	}
12785 
12786 	xp = SD_GET_XBUF(bp);
12787 	ASSERT(xp != NULL);
12788 
12789 	/* Retrieve the pointer to the layer-private data area from the xbuf. */
12790 	bsp = xp->xb_private;
12791 
12792 	is_write = ((bp->b_flags & B_READ) == 0) ? TRUE : FALSE;
12793 	has_wmap = (bsp->mbs_wmp != NULL) ? TRUE : FALSE;
12794 
12795 	if (is_write) {
12796 		/*
12797 		 * For a WRITE request we must free up the block range that
12798 		 * we have locked up.  This holds regardless of whether this is
12799 		 * an aligned write request or a read-modify-write request.
12800 		 */
12801 		sd_range_unlock(un, bsp->mbs_wmp);
12802 		bsp->mbs_wmp = NULL;
12803 	}
12804 
12805 	if ((bp->b_iodone != (int(*)(struct buf *))sd_mapblocksize_iodone)) {
12806 		/*
12807 		 * An aligned read or write command will have no shadow buf;
12808 		 * there is not much else to do with it.
12809 		 */
12810 		goto done;
12811 	}
12812 
12813 	orig_bp = bsp->mbs_orig_bp;
12814 	ASSERT(orig_bp != NULL);
12815 	orig_xp = SD_GET_XBUF(orig_bp);
12816 	ASSERT(orig_xp != NULL);
12817 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12818 
12819 	if (!is_write && has_wmap) {
12820 		/*
12821 		 * A READ with a wmap means this is the READ phase of a
12822 		 * read-modify-write. If an error occurred on the READ then
12823 		 * we do not proceed with the WRITE phase or copy any data.
12824 		 * Just release the write maps and return with an error.
12825 		 */
12826 		if ((bp->b_resid != 0) || (bp->b_error != 0)) {
12827 			orig_bp->b_resid = orig_bp->b_bcount;
12828 			bioerror(orig_bp, bp->b_error);
12829 			sd_range_unlock(un, bsp->mbs_wmp);
12830 			goto freebuf_done;
12831 		}
12832 	}
12833 
12834 	/*
12835 	 * Here is where we set up to copy the data from the shadow buf
12836 	 * into the space associated with the original buf.
12837 	 *
12838 	 * To deal with the conversion between block sizes, these
12839 	 * computations treat the data as an array of bytes, with the
12840 	 * first byte (byte 0) corresponding to the first byte in the
12841 	 * first block on the disk.
12842 	 */
12843 
12844 	/*
12845 	 * shadow_start and shadow_len indicate the location and size of
12846 	 * the data returned with the shadow IO request.
12847 	 */
12848 	shadow_start  = SD_TGTBLOCKS2BYTES(un, (offset_t)xp->xb_blkno);
12849 	shadow_end    = shadow_start + bp->b_bcount - bp->b_resid;
12850 
12851 	/*
12852 	 * copy_offset gives the offset (in bytes) from the start of the first
12853 	 * block of the READ request to the beginning of the data.  We retrieve
12854 	 * this value from xb_pktp in the ORIGINAL xbuf, as it has been saved
12855 	 * there by sd_mapblockize_iostart(). copy_length gives the amount of
12856 	 * data to be copied (in bytes).
12857 	 */
12858 	copy_offset  = bsp->mbs_copy_offset;
12859 	ASSERT((copy_offset >= 0) && (copy_offset < un->un_tgt_blocksize));
12860 	copy_length  = orig_bp->b_bcount;
12861 	request_end  = shadow_start + copy_offset + orig_bp->b_bcount;
12862 
12863 	/*
12864 	 * Set up the resid and error fields of orig_bp as appropriate.
12865 	 */
12866 	if (shadow_end >= request_end) {
12867 		/* We got all the requested data; set resid to zero */
12868 		orig_bp->b_resid = 0;
12869 	} else {
12870 		/*
12871 		 * We failed to get enough data to fully satisfy the original
12872 		 * request. Just copy back whatever data we got and set
12873 		 * up the residual and error code as required.
12874 		 *
12875 		 * 'shortfall' is the amount by which the data received with the
12876 		 * shadow buf has "fallen short" of the requested amount.
12877 		 */
12878 		shortfall = (size_t)(request_end - shadow_end);
12879 
12880 		if (shortfall > orig_bp->b_bcount) {
12881 			/*
12882 			 * We did not get enough data to even partially
12883 			 * fulfill the original request.  The residual is
12884 			 * equal to the amount requested.
12885 			 */
12886 			orig_bp->b_resid = orig_bp->b_bcount;
12887 		} else {
12888 			/*
12889 			 * We did not get all the data that we requested
12890 			 * from the device, but we will try to return what
12891 			 * portion we did get.
12892 			 */
12893 			orig_bp->b_resid = shortfall;
12894 		}
12895 		ASSERT(copy_length >= orig_bp->b_resid);
12896 		copy_length  -= orig_bp->b_resid;
12897 	}
12898 
12899 	/* Propagate the error code from the shadow buf to the original buf */
12900 	bioerror(orig_bp, bp->b_error);
12901 
12902 	if (is_write) {
12903 		goto freebuf_done;	/* No data copying for a WRITE */
12904 	}
12905 
12906 	if (has_wmap) {
12907 		/*
12908 		 * This is a READ command from the READ phase of a
12909 		 * read-modify-write request. We have to copy the data given
12910 		 * by the user OVER the data returned by the READ command,
12911 		 * then convert the command from a READ to a WRITE and send
12912 		 * it back to the target.
12913 		 */
12914 		bcopy(orig_bp->b_un.b_addr, bp->b_un.b_addr + copy_offset,
12915 		    copy_length);
12916 
12917 		bp->b_flags &= ~((int)B_READ);	/* Convert to a WRITE */
12918 
12919 		/*
12920 		 * Dispatch the WRITE command to the taskq thread, which
12921 		 * will in turn send the command to the target. When the
12922 		 * WRITE command completes, we (sd_mapblocksize_iodone())
12923 		 * will get called again as part of the iodone chain
12924 		 * processing for it. Note that we will still be dealing
12925 		 * with the shadow buf at that point.
12926 		 */
12927 		if (taskq_dispatch(sd_wmr_tq, sd_read_modify_write_task, bp,
12928 		    KM_NOSLEEP) != 0) {
12929 			/*
12930 			 * Dispatch was successful so we are done. Return
12931 			 * without going any higher up the iodone chain. Do
12932 			 * not free up any layer-private data until after the
12933 			 * WRITE completes.
12934 			 */
12935 			return;
12936 		}
12937 
12938 		/*
12939 		 * Dispatch of the WRITE command failed; set up the error
12940 		 * condition and send this IO back up the iodone chain.
12941 		 */
12942 		bioerror(orig_bp, EIO);
12943 		orig_bp->b_resid = orig_bp->b_bcount;
12944 
12945 	} else {
12946 		/*
12947 		 * This is a regular READ request (ie, not a RMW). Copy the
12948 		 * data from the shadow buf into the original buf. The
12949 		 * copy_offset compensates for any "misalignment" between the
12950 		 * shadow buf (with its un->un_tgt_blocksize blocks) and the
12951 		 * original buf (with its un->un_sys_blocksize blocks).
12952 		 */
12953 		bcopy(bp->b_un.b_addr + copy_offset, orig_bp->b_un.b_addr,
12954 		    copy_length);
12955 	}
12956 
12957 freebuf_done:
12958 
12959 	/*
12960 	 * At this point we still have both the shadow buf AND the original
12961 	 * buf to deal with, as well as the layer-private data area in each.
12962 	 * Local variables are as follows:
12963 	 *
12964 	 * bp -- points to shadow buf
12965 	 * xp -- points to xbuf of shadow buf
12966 	 * bsp -- points to layer-private data area of shadow buf
12967 	 * orig_bp -- points to original buf
12968 	 *
12969 	 * First free the shadow buf and its associated xbuf, then free the
12970 	 * layer-private data area from the shadow buf. There is no need to
12971 	 * restore xb_private in the shadow xbuf.
12972 	 */
12973 	sd_shadow_buf_free(bp);
12974 	kmem_free(bsp, sizeof (struct sd_mapblocksize_info));
12975 
12976 	/*
12977 	 * Now update the local variables to point to the original buf, xbuf,
12978 	 * and layer-private area.
12979 	 */
12980 	bp = orig_bp;
12981 	xp = SD_GET_XBUF(bp);
12982 	ASSERT(xp != NULL);
12983 	ASSERT(xp == orig_xp);
12984 	bsp = xp->xb_private;
12985 	ASSERT(bsp != NULL);
12986 
12987 done:
12988 	/*
12989 	 * Restore xb_private to whatever it was set to by the next higher
12990 	 * layer in the chain, then free the layer-private data area.
12991 	 */
12992 	xp->xb_private = bsp->mbs_oprivate;
12993 	kmem_free(bsp, sizeof (struct sd_mapblocksize_info));
12994 
12995 exit:
12996 	SD_TRACE(SD_LOG_IO_RMMEDIA, SD_GET_UN(bp),
12997 	    "sd_mapblocksize_iodone: calling SD_NEXT_IODONE: buf:0x%p\n", bp);
12998 
12999 	SD_NEXT_IODONE(index, un, bp);
13000 }
13001 
13002 
13003 /*
13004  *    Function: sd_checksum_iostart
13005  *
13006  * Description: A stub function for a layer that's currently not used.
13007  *		For now just a placeholder.
13008  *
13009  *     Context: Kernel thread context
13010  */
13011 
13012 static void
13013 sd_checksum_iostart(int index, struct sd_lun *un, struct buf *bp)
13014 {
13015 	ASSERT(un != NULL);
13016 	ASSERT(bp != NULL);
13017 	ASSERT(!mutex_owned(SD_MUTEX(un)));
13018 	SD_NEXT_IOSTART(index, un, bp);
13019 }
13020 
13021 
13022 /*
13023  *    Function: sd_checksum_iodone
13024  *
13025  * Description: A stub function for a layer that's currently not used.
13026  *		For now just a placeholder.
13027  *
13028  *     Context: May be called under interrupt context
13029  */
13030 
13031 static void
13032 sd_checksum_iodone(int index, struct sd_lun *un, struct buf *bp)
13033 {
13034 	ASSERT(un != NULL);
13035 	ASSERT(bp != NULL);
13036 	ASSERT(!mutex_owned(SD_MUTEX(un)));
13037 	SD_NEXT_IODONE(index, un, bp);
13038 }
13039 
13040 
13041 /*
13042  *    Function: sd_checksum_uscsi_iostart
13043  *
13044  * Description: A stub function for a layer that's currently not used.
13045  *		For now just a placeholder.
13046  *
13047  *     Context: Kernel thread context
13048  */
13049 
13050 static void
13051 sd_checksum_uscsi_iostart(int index, struct sd_lun *un, struct buf *bp)
13052 {
13053 	ASSERT(un != NULL);
13054 	ASSERT(bp != NULL);
13055 	ASSERT(!mutex_owned(SD_MUTEX(un)));
13056 	SD_NEXT_IOSTART(index, un, bp);
13057 }
13058 
13059 
13060 /*
13061  *    Function: sd_checksum_uscsi_iodone
13062  *
13063  * Description: A stub function for a layer that's currently not used.
13064  *		For now just a placeholder.
13065  *
13066  *     Context: May be called under interrupt context
13067  */
13068 
13069 static void
13070 sd_checksum_uscsi_iodone(int index, struct sd_lun *un, struct buf *bp)
13071 {
13072 	ASSERT(un != NULL);
13073 	ASSERT(bp != NULL);
13074 	ASSERT(!mutex_owned(SD_MUTEX(un)));
13075 	SD_NEXT_IODONE(index, un, bp);
13076 }
13077 
13078 
13079 /*
13080  *    Function: sd_pm_iostart
13081  *
13082  * Description: iostart-side routine for Power mangement.
13083  *
13084  *     Context: Kernel thread context
13085  */
13086 
13087 static void
13088 sd_pm_iostart(int index, struct sd_lun *un, struct buf *bp)
13089 {
13090 	ASSERT(un != NULL);
13091 	ASSERT(bp != NULL);
13092 	ASSERT(!mutex_owned(SD_MUTEX(un)));
13093 	ASSERT(!mutex_owned(&un->un_pm_mutex));
13094 
13095 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iostart: entry\n");
13096 
13097 	if (sd_pm_entry(un) != DDI_SUCCESS) {
13098 		/*
13099 		 * Set up to return the failed buf back up the 'iodone'
13100 		 * side of the calling chain.
13101 		 */
13102 		bioerror(bp, EIO);
13103 		bp->b_resid = bp->b_bcount;
13104 
13105 		SD_BEGIN_IODONE(index, un, bp);
13106 
13107 		SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iostart: exit\n");
13108 		return;
13109 	}
13110 
13111 	SD_NEXT_IOSTART(index, un, bp);
13112 
13113 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iostart: exit\n");
13114 }
13115 
13116 
13117 /*
13118  *    Function: sd_pm_iodone
13119  *
13120  * Description: iodone-side routine for power mangement.
13121  *
13122  *     Context: may be called from interrupt context
13123  */
13124 
13125 static void
13126 sd_pm_iodone(int index, struct sd_lun *un, struct buf *bp)
13127 {
13128 	ASSERT(un != NULL);
13129 	ASSERT(bp != NULL);
13130 	ASSERT(!mutex_owned(&un->un_pm_mutex));
13131 
13132 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iodone: entry\n");
13133 
13134 	/*
13135 	 * After attach the following flag is only read, so don't
13136 	 * take the penalty of acquiring a mutex for it.
13137 	 */
13138 	if (un->un_f_pm_is_enabled == TRUE) {
13139 		sd_pm_exit(un);
13140 	}
13141 
13142 	SD_NEXT_IODONE(index, un, bp);
13143 
13144 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iodone: exit\n");
13145 }
13146 
13147 
13148 /*
13149  *    Function: sd_core_iostart
13150  *
13151  * Description: Primary driver function for enqueuing buf(9S) structs from
13152  *		the system and initiating IO to the target device
13153  *
13154  *     Context: Kernel thread context. Can sleep.
13155  *
13156  * Assumptions:  - The given xp->xb_blkno is absolute
13157  *		   (ie, relative to the start of the device).
13158  *		 - The IO is to be done using the native blocksize of
13159  *		   the device, as specified in un->un_tgt_blocksize.
13160  */
13161 /* ARGSUSED */
13162 static void
13163 sd_core_iostart(int index, struct sd_lun *un, struct buf *bp)
13164 {
13165 	struct sd_xbuf *xp;
13166 
13167 	ASSERT(un != NULL);
13168 	ASSERT(bp != NULL);
13169 	ASSERT(!mutex_owned(SD_MUTEX(un)));
13170 	ASSERT(bp->b_resid == 0);
13171 
13172 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_core_iostart: entry: bp:0x%p\n", bp);
13173 
13174 	xp = SD_GET_XBUF(bp);
13175 	ASSERT(xp != NULL);
13176 
13177 	mutex_enter(SD_MUTEX(un));
13178 
13179 	/*
13180 	 * If we are currently in the failfast state, fail any new IO
13181 	 * that has B_FAILFAST set, then return.
13182 	 */
13183 	if ((bp->b_flags & B_FAILFAST) &&
13184 	    (un->un_failfast_state == SD_FAILFAST_ACTIVE)) {
13185 		mutex_exit(SD_MUTEX(un));
13186 		bioerror(bp, EIO);
13187 		bp->b_resid = bp->b_bcount;
13188 		SD_BEGIN_IODONE(index, un, bp);
13189 		return;
13190 	}
13191 
13192 	if (SD_IS_DIRECT_PRIORITY(xp)) {
13193 		/*
13194 		 * Priority command -- transport it immediately.
13195 		 *
13196 		 * Note: We may want to assert that USCSI_DIAGNOSE is set,
13197 		 * because all direct priority commands should be associated
13198 		 * with error recovery actions which we don't want to retry.
13199 		 */
13200 		sd_start_cmds(un, bp);
13201 	} else {
13202 		/*
13203 		 * Normal command -- add it to the wait queue, then start
13204 		 * transporting commands from the wait queue.
13205 		 */
13206 		sd_add_buf_to_waitq(un, bp);
13207 		SD_UPDATE_KSTATS(un, kstat_waitq_enter, bp);
13208 		sd_start_cmds(un, NULL);
13209 	}
13210 
13211 	mutex_exit(SD_MUTEX(un));
13212 
13213 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_core_iostart: exit: bp:0x%p\n", bp);
13214 }
13215 
13216 
13217 /*
13218  *    Function: sd_init_cdb_limits
13219  *
13220  * Description: This is to handle scsi_pkt initialization differences
13221  *		between the driver platforms.
13222  *
13223  *		Legacy behaviors:
13224  *
13225  *		If the block number or the sector count exceeds the
13226  *		capabilities of a Group 0 command, shift over to a
13227  *		Group 1 command. We don't blindly use Group 1
13228  *		commands because a) some drives (CDC Wren IVs) get a
13229  *		bit confused, and b) there is probably a fair amount
13230  *		of speed difference for a target to receive and decode
13231  *		a 10 byte command instead of a 6 byte command.
13232  *
13233  *		The xfer time difference of 6 vs 10 byte CDBs is
13234  *		still significant so this code is still worthwhile.
13235  *		10 byte CDBs are very inefficient with the fas HBA driver
13236  *		and older disks. Each CDB byte took 1 usec with some
13237  *		popular disks.
13238  *
13239  *     Context: Must be called at attach time
13240  */
13241 
13242 static void
13243 sd_init_cdb_limits(struct sd_lun *un)
13244 {
13245 	int hba_cdb_limit;
13246 
13247 	/*
13248 	 * Use CDB_GROUP1 commands for most devices except for
13249 	 * parallel SCSI fixed drives in which case we get better
13250 	 * performance using CDB_GROUP0 commands (where applicable).
13251 	 */
13252 	un->un_mincdb = SD_CDB_GROUP1;
13253 #if !defined(__fibre)
13254 	if (!un->un_f_is_fibre && !un->un_f_cfg_is_atapi && !ISROD(un) &&
13255 	    !un->un_f_has_removable_media) {
13256 		un->un_mincdb = SD_CDB_GROUP0;
13257 	}
13258 #endif
13259 
13260 	/*
13261 	 * Try to read the max-cdb-length supported by HBA.
13262 	 */
13263 	un->un_max_hba_cdb = scsi_ifgetcap(SD_ADDRESS(un), "max-cdb-length", 1);
13264 	if (0 >= un->un_max_hba_cdb) {
13265 		un->un_max_hba_cdb = CDB_GROUP4;
13266 		hba_cdb_limit = SD_CDB_GROUP4;
13267 	} else if (0 < un->un_max_hba_cdb &&
13268 	    un->un_max_hba_cdb < CDB_GROUP1) {
13269 		hba_cdb_limit = SD_CDB_GROUP0;
13270 	} else if (CDB_GROUP1 <= un->un_max_hba_cdb &&
13271 	    un->un_max_hba_cdb < CDB_GROUP5) {
13272 		hba_cdb_limit = SD_CDB_GROUP1;
13273 	} else if (CDB_GROUP5 <= un->un_max_hba_cdb &&
13274 	    un->un_max_hba_cdb < CDB_GROUP4) {
13275 		hba_cdb_limit = SD_CDB_GROUP5;
13276 	} else {
13277 		hba_cdb_limit = SD_CDB_GROUP4;
13278 	}
13279 
13280 	/*
13281 	 * Use CDB_GROUP5 commands for removable devices.  Use CDB_GROUP4
13282 	 * commands for fixed disks unless we are building for a 32 bit
13283 	 * kernel.
13284 	 */
13285 #ifdef _LP64
13286 	un->un_maxcdb = (un->un_f_has_removable_media) ? SD_CDB_GROUP5 :
13287 	    min(hba_cdb_limit, SD_CDB_GROUP4);
13288 #else
13289 	un->un_maxcdb = (un->un_f_has_removable_media) ? SD_CDB_GROUP5 :
13290 	    min(hba_cdb_limit, SD_CDB_GROUP1);
13291 #endif
13292 
13293 	/*
13294 	 * x86 systems require the PKT_DMA_PARTIAL flag
13295 	 */
13296 #if defined(__x86)
13297 	un->un_pkt_flags = PKT_DMA_PARTIAL;
13298 #else
13299 	un->un_pkt_flags = 0;
13300 #endif
13301 
13302 	un->un_status_len = (int)((un->un_f_arq_enabled == TRUE)
13303 	    ? sizeof (struct scsi_arq_status) : 1);
13304 	un->un_cmd_timeout = (ushort_t)sd_io_time;
13305 	un->un_uscsi_timeout = ((ISCD(un)) ? 2 : 1) * un->un_cmd_timeout;
13306 }
13307 
13308 
13309 /*
13310  *    Function: sd_initpkt_for_buf
13311  *
13312  * Description: Allocate and initialize for transport a scsi_pkt struct,
13313  *		based upon the info specified in the given buf struct.
13314  *
13315  *		Assumes the xb_blkno in the request is absolute (ie,
13316  *		relative to the start of the device (NOT partition!).
13317  *		Also assumes that the request is using the native block
13318  *		size of the device (as returned by the READ CAPACITY
13319  *		command).
13320  *
13321  * Return Code: SD_PKT_ALLOC_SUCCESS
13322  *		SD_PKT_ALLOC_FAILURE
13323  *		SD_PKT_ALLOC_FAILURE_NO_DMA
13324  *		SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL
13325  *
13326  *     Context: Kernel thread and may be called from software interrupt context
13327  *		as part of a sdrunout callback. This function may not block or
13328  *		call routines that block
13329  */
13330 
13331 static int
13332 sd_initpkt_for_buf(struct buf *bp, struct scsi_pkt **pktpp)
13333 {
13334 	struct sd_xbuf	*xp;
13335 	struct scsi_pkt *pktp = NULL;
13336 	struct sd_lun	*un;
13337 	size_t		blockcount;
13338 	daddr_t		startblock;
13339 	int		rval;
13340 	int		cmd_flags;
13341 
13342 	ASSERT(bp != NULL);
13343 	ASSERT(pktpp != NULL);
13344 	xp = SD_GET_XBUF(bp);
13345 	ASSERT(xp != NULL);
13346 	un = SD_GET_UN(bp);
13347 	ASSERT(un != NULL);
13348 	ASSERT(mutex_owned(SD_MUTEX(un)));
13349 	ASSERT(bp->b_resid == 0);
13350 
13351 	SD_TRACE(SD_LOG_IO_CORE, un,
13352 	    "sd_initpkt_for_buf: entry: buf:0x%p\n", bp);
13353 
13354 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
13355 	if (xp->xb_pkt_flags & SD_XB_DMA_FREED) {
13356 		/*
13357 		 * Already have a scsi_pkt -- just need DMA resources.
13358 		 * We must recompute the CDB in case the mapping returns
13359 		 * a nonzero pkt_resid.
13360 		 * Note: if this is a portion of a PKT_DMA_PARTIAL transfer
13361 		 * that is being retried, the unmap/remap of the DMA resouces
13362 		 * will result in the entire transfer starting over again
13363 		 * from the very first block.
13364 		 */
13365 		ASSERT(xp->xb_pktp != NULL);
13366 		pktp = xp->xb_pktp;
13367 	} else {
13368 		pktp = NULL;
13369 	}
13370 #endif /* __i386 || __amd64 */
13371 
13372 	startblock = xp->xb_blkno;	/* Absolute block num. */
13373 	blockcount = SD_BYTES2TGTBLOCKS(un, bp->b_bcount);
13374 
13375 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
13376 
13377 	cmd_flags = un->un_pkt_flags | (xp->xb_pkt_flags & SD_XB_INITPKT_MASK);
13378 
13379 #else
13380 
13381 	cmd_flags = un->un_pkt_flags | xp->xb_pkt_flags;
13382 
13383 #endif
13384 
13385 	/*
13386 	 * sd_setup_rw_pkt will determine the appropriate CDB group to use,
13387 	 * call scsi_init_pkt, and build the CDB.
13388 	 */
13389 	rval = sd_setup_rw_pkt(un, &pktp, bp,
13390 	    cmd_flags, sdrunout, (caddr_t)un,
13391 	    startblock, blockcount);
13392 
13393 	if (rval == 0) {
13394 		/*
13395 		 * Success.
13396 		 *
13397 		 * If partial DMA is being used and required for this transfer.
13398 		 * set it up here.
13399 		 */
13400 		if ((un->un_pkt_flags & PKT_DMA_PARTIAL) != 0 &&
13401 		    (pktp->pkt_resid != 0)) {
13402 
13403 			/*
13404 			 * Save the CDB length and pkt_resid for the
13405 			 * next xfer
13406 			 */
13407 			xp->xb_dma_resid = pktp->pkt_resid;
13408 
13409 			/* rezero resid */
13410 			pktp->pkt_resid = 0;
13411 
13412 		} else {
13413 			xp->xb_dma_resid = 0;
13414 		}
13415 
13416 		pktp->pkt_flags = un->un_tagflags;
13417 		pktp->pkt_time  = un->un_cmd_timeout;
13418 		pktp->pkt_comp  = sdintr;
13419 
13420 		pktp->pkt_private = bp;
13421 		*pktpp = pktp;
13422 
13423 		SD_TRACE(SD_LOG_IO_CORE, un,
13424 		    "sd_initpkt_for_buf: exit: buf:0x%p\n", bp);
13425 
13426 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
13427 		xp->xb_pkt_flags &= ~SD_XB_DMA_FREED;
13428 #endif
13429 
13430 		return (SD_PKT_ALLOC_SUCCESS);
13431 
13432 	}
13433 
13434 	/*
13435 	 * SD_PKT_ALLOC_FAILURE is the only expected failure code
13436 	 * from sd_setup_rw_pkt.
13437 	 */
13438 	ASSERT(rval == SD_PKT_ALLOC_FAILURE);
13439 
13440 	if (rval == SD_PKT_ALLOC_FAILURE) {
13441 		*pktpp = NULL;
13442 		/*
13443 		 * Set the driver state to RWAIT to indicate the driver
13444 		 * is waiting on resource allocations. The driver will not
13445 		 * suspend, pm_suspend, or detatch while the state is RWAIT.
13446 		 */
13447 		New_state(un, SD_STATE_RWAIT);
13448 
13449 		SD_ERROR(SD_LOG_IO_CORE, un,
13450 		    "sd_initpkt_for_buf: No pktp. exit bp:0x%p\n", bp);
13451 
13452 		if ((bp->b_flags & B_ERROR) != 0) {
13453 			return (SD_PKT_ALLOC_FAILURE_NO_DMA);
13454 		}
13455 		return (SD_PKT_ALLOC_FAILURE);
13456 	} else {
13457 		/*
13458 		 * PKT_ALLOC_FAILURE_CDB_TOO_SMALL
13459 		 *
13460 		 * This should never happen.  Maybe someone messed with the
13461 		 * kernel's minphys?
13462 		 */
13463 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
13464 		    "Request rejected: too large for CDB: "
13465 		    "lba:0x%08lx  len:0x%08lx\n", startblock, blockcount);
13466 		SD_ERROR(SD_LOG_IO_CORE, un,
13467 		    "sd_initpkt_for_buf: No cp. exit bp:0x%p\n", bp);
13468 		return (SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL);
13469 
13470 	}
13471 }
13472 
13473 
13474 /*
13475  *    Function: sd_destroypkt_for_buf
13476  *
13477  * Description: Free the scsi_pkt(9S) for the given bp (buf IO processing).
13478  *
13479  *     Context: Kernel thread or interrupt context
13480  */
13481 
13482 static void
13483 sd_destroypkt_for_buf(struct buf *bp)
13484 {
13485 	ASSERT(bp != NULL);
13486 	ASSERT(SD_GET_UN(bp) != NULL);
13487 
13488 	SD_TRACE(SD_LOG_IO_CORE, SD_GET_UN(bp),
13489 	    "sd_destroypkt_for_buf: entry: buf:0x%p\n", bp);
13490 
13491 	ASSERT(SD_GET_PKTP(bp) != NULL);
13492 	scsi_destroy_pkt(SD_GET_PKTP(bp));
13493 
13494 	SD_TRACE(SD_LOG_IO_CORE, SD_GET_UN(bp),
13495 	    "sd_destroypkt_for_buf: exit: buf:0x%p\n", bp);
13496 }
13497 
13498 /*
13499  *    Function: sd_setup_rw_pkt
13500  *
13501  * Description: Determines appropriate CDB group for the requested LBA
13502  *		and transfer length, calls scsi_init_pkt, and builds
13503  *		the CDB.  Do not use for partial DMA transfers except
13504  *		for the initial transfer since the CDB size must
13505  *		remain constant.
13506  *
13507  *     Context: Kernel thread and may be called from software interrupt
13508  *		context as part of a sdrunout callback. This function may not
13509  *		block or call routines that block
13510  */
13511 
13512 
13513 int
13514 sd_setup_rw_pkt(struct sd_lun *un,
13515     struct scsi_pkt **pktpp, struct buf *bp, int flags,
13516     int (*callback)(caddr_t), caddr_t callback_arg,
13517     diskaddr_t lba, uint32_t blockcount)
13518 {
13519 	struct scsi_pkt *return_pktp;
13520 	union scsi_cdb *cdbp;
13521 	struct sd_cdbinfo *cp = NULL;
13522 	int i;
13523 
13524 	/*
13525 	 * See which size CDB to use, based upon the request.
13526 	 */
13527 	for (i = un->un_mincdb; i <= un->un_maxcdb; i++) {
13528 
13529 		/*
13530 		 * Check lba and block count against sd_cdbtab limits.
13531 		 * In the partial DMA case, we have to use the same size
13532 		 * CDB for all the transfers.  Check lba + blockcount
13533 		 * against the max LBA so we know that segment of the
13534 		 * transfer can use the CDB we select.
13535 		 */
13536 		if ((lba + blockcount - 1 <= sd_cdbtab[i].sc_maxlba) &&
13537 		    (blockcount <= sd_cdbtab[i].sc_maxlen)) {
13538 
13539 			/*
13540 			 * The command will fit into the CDB type
13541 			 * specified by sd_cdbtab[i].
13542 			 */
13543 			cp = sd_cdbtab + i;
13544 
13545 			/*
13546 			 * Call scsi_init_pkt so we can fill in the
13547 			 * CDB.
13548 			 */
13549 			return_pktp = scsi_init_pkt(SD_ADDRESS(un), *pktpp,
13550 			    bp, cp->sc_grpcode, un->un_status_len, 0,
13551 			    flags, callback, callback_arg);
13552 
13553 			if (return_pktp != NULL) {
13554 
13555 				/*
13556 				 * Return new value of pkt
13557 				 */
13558 				*pktpp = return_pktp;
13559 
13560 				/*
13561 				 * To be safe, zero the CDB insuring there is
13562 				 * no leftover data from a previous command.
13563 				 */
13564 				bzero(return_pktp->pkt_cdbp, cp->sc_grpcode);
13565 
13566 				/*
13567 				 * Handle partial DMA mapping
13568 				 */
13569 				if (return_pktp->pkt_resid != 0) {
13570 
13571 					/*
13572 					 * Not going to xfer as many blocks as
13573 					 * originally expected
13574 					 */
13575 					blockcount -=
13576 					    SD_BYTES2TGTBLOCKS(un,
13577 						return_pktp->pkt_resid);
13578 				}
13579 
13580 				cdbp = (union scsi_cdb *)return_pktp->pkt_cdbp;
13581 
13582 				/*
13583 				 * Set command byte based on the CDB
13584 				 * type we matched.
13585 				 */
13586 				cdbp->scc_cmd = cp->sc_grpmask |
13587 				    ((bp->b_flags & B_READ) ?
13588 					SCMD_READ : SCMD_WRITE);
13589 
13590 				SD_FILL_SCSI1_LUN(un, return_pktp);
13591 
13592 				/*
13593 				 * Fill in LBA and length
13594 				 */
13595 				ASSERT((cp->sc_grpcode == CDB_GROUP1) ||
13596 				    (cp->sc_grpcode == CDB_GROUP4) ||
13597 				    (cp->sc_grpcode == CDB_GROUP0) ||
13598 				    (cp->sc_grpcode == CDB_GROUP5));
13599 
13600 				if (cp->sc_grpcode == CDB_GROUP1) {
13601 					FORMG1ADDR(cdbp, lba);
13602 					FORMG1COUNT(cdbp, blockcount);
13603 					return (0);
13604 				} else if (cp->sc_grpcode == CDB_GROUP4) {
13605 					FORMG4LONGADDR(cdbp, lba);
13606 					FORMG4COUNT(cdbp, blockcount);
13607 					return (0);
13608 				} else if (cp->sc_grpcode == CDB_GROUP0) {
13609 					FORMG0ADDR(cdbp, lba);
13610 					FORMG0COUNT(cdbp, blockcount);
13611 					return (0);
13612 				} else if (cp->sc_grpcode == CDB_GROUP5) {
13613 					FORMG5ADDR(cdbp, lba);
13614 					FORMG5COUNT(cdbp, blockcount);
13615 					return (0);
13616 				}
13617 
13618 				/*
13619 				 * It should be impossible to not match one
13620 				 * of the CDB types above, so we should never
13621 				 * reach this point.  Set the CDB command byte
13622 				 * to test-unit-ready to avoid writing
13623 				 * to somewhere we don't intend.
13624 				 */
13625 				cdbp->scc_cmd = SCMD_TEST_UNIT_READY;
13626 				return (SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL);
13627 			} else {
13628 				/*
13629 				 * Couldn't get scsi_pkt
13630 				 */
13631 				return (SD_PKT_ALLOC_FAILURE);
13632 			}
13633 		}
13634 	}
13635 
13636 	/*
13637 	 * None of the available CDB types were suitable.  This really
13638 	 * should never happen:  on a 64 bit system we support
13639 	 * READ16/WRITE16 which will hold an entire 64 bit disk address
13640 	 * and on a 32 bit system we will refuse to bind to a device
13641 	 * larger than 2TB so addresses will never be larger than 32 bits.
13642 	 */
13643 	return (SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL);
13644 }
13645 
13646 #if defined(__i386) || defined(__amd64)
13647 /*
13648  *    Function: sd_setup_next_rw_pkt
13649  *
13650  * Description: Setup packet for partial DMA transfers, except for the
13651  * 		initial transfer.  sd_setup_rw_pkt should be used for
13652  *		the initial transfer.
13653  *
13654  *     Context: Kernel thread and may be called from interrupt context.
13655  */
13656 
13657 int
13658 sd_setup_next_rw_pkt(struct sd_lun *un,
13659     struct scsi_pkt *pktp, struct buf *bp,
13660     diskaddr_t lba, uint32_t blockcount)
13661 {
13662 	uchar_t com;
13663 	union scsi_cdb *cdbp;
13664 	uchar_t cdb_group_id;
13665 
13666 	ASSERT(pktp != NULL);
13667 	ASSERT(pktp->pkt_cdbp != NULL);
13668 
13669 	cdbp = (union scsi_cdb *)pktp->pkt_cdbp;
13670 	com = cdbp->scc_cmd;
13671 	cdb_group_id = CDB_GROUPID(com);
13672 
13673 	ASSERT((cdb_group_id == CDB_GROUPID_0) ||
13674 	    (cdb_group_id == CDB_GROUPID_1) ||
13675 	    (cdb_group_id == CDB_GROUPID_4) ||
13676 	    (cdb_group_id == CDB_GROUPID_5));
13677 
13678 	/*
13679 	 * Move pkt to the next portion of the xfer.
13680 	 * func is NULL_FUNC so we do not have to release
13681 	 * the disk mutex here.
13682 	 */
13683 	if (scsi_init_pkt(SD_ADDRESS(un), pktp, bp, 0, 0, 0, 0,
13684 	    NULL_FUNC, NULL) == pktp) {
13685 		/* Success.  Handle partial DMA */
13686 		if (pktp->pkt_resid != 0) {
13687 			blockcount -=
13688 			    SD_BYTES2TGTBLOCKS(un, pktp->pkt_resid);
13689 		}
13690 
13691 		cdbp->scc_cmd = com;
13692 		SD_FILL_SCSI1_LUN(un, pktp);
13693 		if (cdb_group_id == CDB_GROUPID_1) {
13694 			FORMG1ADDR(cdbp, lba);
13695 			FORMG1COUNT(cdbp, blockcount);
13696 			return (0);
13697 		} else if (cdb_group_id == CDB_GROUPID_4) {
13698 			FORMG4LONGADDR(cdbp, lba);
13699 			FORMG4COUNT(cdbp, blockcount);
13700 			return (0);
13701 		} else if (cdb_group_id == CDB_GROUPID_0) {
13702 			FORMG0ADDR(cdbp, lba);
13703 			FORMG0COUNT(cdbp, blockcount);
13704 			return (0);
13705 		} else if (cdb_group_id == CDB_GROUPID_5) {
13706 			FORMG5ADDR(cdbp, lba);
13707 			FORMG5COUNT(cdbp, blockcount);
13708 			return (0);
13709 		}
13710 
13711 		/* Unreachable */
13712 		return (SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL);
13713 	}
13714 
13715 	/*
13716 	 * Error setting up next portion of cmd transfer.
13717 	 * Something is definitely very wrong and this
13718 	 * should not happen.
13719 	 */
13720 	return (SD_PKT_ALLOC_FAILURE);
13721 }
13722 #endif /* defined(__i386) || defined(__amd64) */
13723 
13724 /*
13725  *    Function: sd_initpkt_for_uscsi
13726  *
13727  * Description: Allocate and initialize for transport a scsi_pkt struct,
13728  *		based upon the info specified in the given uscsi_cmd struct.
13729  *
13730  * Return Code: SD_PKT_ALLOC_SUCCESS
13731  *		SD_PKT_ALLOC_FAILURE
13732  *		SD_PKT_ALLOC_FAILURE_NO_DMA
13733  *		SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL
13734  *
13735  *     Context: Kernel thread and may be called from software interrupt context
13736  *		as part of a sdrunout callback. This function may not block or
13737  *		call routines that block
13738  */
13739 
13740 static int
13741 sd_initpkt_for_uscsi(struct buf *bp, struct scsi_pkt **pktpp)
13742 {
13743 	struct uscsi_cmd *uscmd;
13744 	struct sd_xbuf	*xp;
13745 	struct scsi_pkt	*pktp;
13746 	struct sd_lun	*un;
13747 	uint32_t	flags = 0;
13748 
13749 	ASSERT(bp != NULL);
13750 	ASSERT(pktpp != NULL);
13751 	xp = SD_GET_XBUF(bp);
13752 	ASSERT(xp != NULL);
13753 	un = SD_GET_UN(bp);
13754 	ASSERT(un != NULL);
13755 	ASSERT(mutex_owned(SD_MUTEX(un)));
13756 
13757 	/* The pointer to the uscsi_cmd struct is expected in xb_pktinfo */
13758 	uscmd = (struct uscsi_cmd *)xp->xb_pktinfo;
13759 	ASSERT(uscmd != NULL);
13760 
13761 	SD_TRACE(SD_LOG_IO_CORE, un,
13762 	    "sd_initpkt_for_uscsi: entry: buf:0x%p\n", bp);
13763 
13764 	/*
13765 	 * Allocate the scsi_pkt for the command.
13766 	 * Note: If PKT_DMA_PARTIAL flag is set, scsi_vhci binds a path
13767 	 *	 during scsi_init_pkt time and will continue to use the
13768 	 *	 same path as long as the same scsi_pkt is used without
13769 	 *	 intervening scsi_dma_free(). Since uscsi command does
13770 	 *	 not call scsi_dmafree() before retry failed command, it
13771 	 *	 is necessary to make sure PKT_DMA_PARTIAL flag is NOT
13772 	 *	 set such that scsi_vhci can use other available path for
13773 	 *	 retry. Besides, ucsci command does not allow DMA breakup,
13774 	 *	 so there is no need to set PKT_DMA_PARTIAL flag.
13775 	 */
13776 	pktp = scsi_init_pkt(SD_ADDRESS(un), NULL,
13777 	    ((bp->b_bcount != 0) ? bp : NULL), uscmd->uscsi_cdblen,
13778 	    sizeof (struct scsi_arq_status), 0,
13779 	    (un->un_pkt_flags & ~PKT_DMA_PARTIAL),
13780 	    sdrunout, (caddr_t)un);
13781 
13782 	if (pktp == NULL) {
13783 		*pktpp = NULL;
13784 		/*
13785 		 * Set the driver state to RWAIT to indicate the driver
13786 		 * is waiting on resource allocations. The driver will not
13787 		 * suspend, pm_suspend, or detatch while the state is RWAIT.
13788 		 */
13789 		New_state(un, SD_STATE_RWAIT);
13790 
13791 		SD_ERROR(SD_LOG_IO_CORE, un,
13792 		    "sd_initpkt_for_uscsi: No pktp. exit bp:0x%p\n", bp);
13793 
13794 		if ((bp->b_flags & B_ERROR) != 0) {
13795 			return (SD_PKT_ALLOC_FAILURE_NO_DMA);
13796 		}
13797 		return (SD_PKT_ALLOC_FAILURE);
13798 	}
13799 
13800 	/*
13801 	 * We do not do DMA breakup for USCSI commands, so return failure
13802 	 * here if all the needed DMA resources were not allocated.
13803 	 */
13804 	if ((un->un_pkt_flags & PKT_DMA_PARTIAL) &&
13805 	    (bp->b_bcount != 0) && (pktp->pkt_resid != 0)) {
13806 		scsi_destroy_pkt(pktp);
13807 		SD_ERROR(SD_LOG_IO_CORE, un, "sd_initpkt_for_uscsi: "
13808 		    "No partial DMA for USCSI. exit: buf:0x%p\n", bp);
13809 		return (SD_PKT_ALLOC_FAILURE_PKT_TOO_SMALL);
13810 	}
13811 
13812 	/* Init the cdb from the given uscsi struct */
13813 	(void) scsi_setup_cdb((union scsi_cdb *)pktp->pkt_cdbp,
13814 	    uscmd->uscsi_cdb[0], 0, 0, 0);
13815 
13816 	SD_FILL_SCSI1_LUN(un, pktp);
13817 
13818 	/*
13819 	 * Set up the optional USCSI flags. See the uscsi (7I) man page
13820 	 * for listing of the supported flags.
13821 	 */
13822 
13823 	if (uscmd->uscsi_flags & USCSI_SILENT) {
13824 		flags |= FLAG_SILENT;
13825 	}
13826 
13827 	if (uscmd->uscsi_flags & USCSI_DIAGNOSE) {
13828 		flags |= FLAG_DIAGNOSE;
13829 	}
13830 
13831 	if (uscmd->uscsi_flags & USCSI_ISOLATE) {
13832 		flags |= FLAG_ISOLATE;
13833 	}
13834 
13835 	if (un->un_f_is_fibre == FALSE) {
13836 		if (uscmd->uscsi_flags & USCSI_RENEGOT) {
13837 			flags |= FLAG_RENEGOTIATE_WIDE_SYNC;
13838 		}
13839 	}
13840 
13841 	/*
13842 	 * Set the pkt flags here so we save time later.
13843 	 * Note: These flags are NOT in the uscsi man page!!!
13844 	 */
13845 	if (uscmd->uscsi_flags & USCSI_HEAD) {
13846 		flags |= FLAG_HEAD;
13847 	}
13848 
13849 	if (uscmd->uscsi_flags & USCSI_NOINTR) {
13850 		flags |= FLAG_NOINTR;
13851 	}
13852 
13853 	/*
13854 	 * For tagged queueing, things get a bit complicated.
13855 	 * Check first for head of queue and last for ordered queue.
13856 	 * If neither head nor order, use the default driver tag flags.
13857 	 */
13858 	if ((uscmd->uscsi_flags & USCSI_NOTAG) == 0) {
13859 		if (uscmd->uscsi_flags & USCSI_HTAG) {
13860 			flags |= FLAG_HTAG;
13861 		} else if (uscmd->uscsi_flags & USCSI_OTAG) {
13862 			flags |= FLAG_OTAG;
13863 		} else {
13864 			flags |= un->un_tagflags & FLAG_TAGMASK;
13865 		}
13866 	}
13867 
13868 	if (uscmd->uscsi_flags & USCSI_NODISCON) {
13869 		flags = (flags & ~FLAG_TAGMASK) | FLAG_NODISCON;
13870 	}
13871 
13872 	pktp->pkt_flags = flags;
13873 
13874 	/* Copy the caller's CDB into the pkt... */
13875 	bcopy(uscmd->uscsi_cdb, pktp->pkt_cdbp, uscmd->uscsi_cdblen);
13876 
13877 	if (uscmd->uscsi_timeout == 0) {
13878 		pktp->pkt_time = un->un_uscsi_timeout;
13879 	} else {
13880 		pktp->pkt_time = uscmd->uscsi_timeout;
13881 	}
13882 
13883 	/* need it later to identify USCSI request in sdintr */
13884 	xp->xb_pkt_flags |= SD_XB_USCSICMD;
13885 
13886 	xp->xb_sense_resid = uscmd->uscsi_rqresid;
13887 
13888 	pktp->pkt_private = bp;
13889 	pktp->pkt_comp = sdintr;
13890 	*pktpp = pktp;
13891 
13892 	SD_TRACE(SD_LOG_IO_CORE, un,
13893 	    "sd_initpkt_for_uscsi: exit: buf:0x%p\n", bp);
13894 
13895 	return (SD_PKT_ALLOC_SUCCESS);
13896 }
13897 
13898 
13899 /*
13900  *    Function: sd_destroypkt_for_uscsi
13901  *
13902  * Description: Free the scsi_pkt(9S) struct for the given bp, for uscsi
13903  *		IOs.. Also saves relevant info into the associated uscsi_cmd
13904  *		struct.
13905  *
13906  *     Context: May be called under interrupt context
13907  */
13908 
13909 static void
13910 sd_destroypkt_for_uscsi(struct buf *bp)
13911 {
13912 	struct uscsi_cmd *uscmd;
13913 	struct sd_xbuf	*xp;
13914 	struct scsi_pkt	*pktp;
13915 	struct sd_lun	*un;
13916 
13917 	ASSERT(bp != NULL);
13918 	xp = SD_GET_XBUF(bp);
13919 	ASSERT(xp != NULL);
13920 	un = SD_GET_UN(bp);
13921 	ASSERT(un != NULL);
13922 	ASSERT(!mutex_owned(SD_MUTEX(un)));
13923 	pktp = SD_GET_PKTP(bp);
13924 	ASSERT(pktp != NULL);
13925 
13926 	SD_TRACE(SD_LOG_IO_CORE, un,
13927 	    "sd_destroypkt_for_uscsi: entry: buf:0x%p\n", bp);
13928 
13929 	/* The pointer to the uscsi_cmd struct is expected in xb_pktinfo */
13930 	uscmd = (struct uscsi_cmd *)xp->xb_pktinfo;
13931 	ASSERT(uscmd != NULL);
13932 
13933 	/* Save the status and the residual into the uscsi_cmd struct */
13934 	uscmd->uscsi_status = ((*(pktp)->pkt_scbp) & STATUS_MASK);
13935 	uscmd->uscsi_resid  = bp->b_resid;
13936 
13937 	/*
13938 	 * If enabled, copy any saved sense data into the area specified
13939 	 * by the uscsi command.
13940 	 */
13941 	if (((uscmd->uscsi_flags & USCSI_RQENABLE) != 0) &&
13942 	    (uscmd->uscsi_rqlen != 0) && (uscmd->uscsi_rqbuf != NULL)) {
13943 		/*
13944 		 * Note: uscmd->uscsi_rqbuf should always point to a buffer
13945 		 * at least SENSE_LENGTH bytes in size (see sd_send_scsi_cmd())
13946 		 */
13947 		uscmd->uscsi_rqstatus = xp->xb_sense_status;
13948 		uscmd->uscsi_rqresid  = xp->xb_sense_resid;
13949 		bcopy(xp->xb_sense_data, uscmd->uscsi_rqbuf, SENSE_LENGTH);
13950 	}
13951 
13952 	/* We are done with the scsi_pkt; free it now */
13953 	ASSERT(SD_GET_PKTP(bp) != NULL);
13954 	scsi_destroy_pkt(SD_GET_PKTP(bp));
13955 
13956 	SD_TRACE(SD_LOG_IO_CORE, un,
13957 	    "sd_destroypkt_for_uscsi: exit: buf:0x%p\n", bp);
13958 }
13959 
13960 
13961 /*
13962  *    Function: sd_bioclone_alloc
13963  *
13964  * Description: Allocate a buf(9S) and init it as per the given buf
13965  *		and the various arguments.  The associated sd_xbuf
13966  *		struct is (nearly) duplicated.  The struct buf *bp
13967  *		argument is saved in new_xp->xb_private.
13968  *
13969  *   Arguments: bp - ptr the the buf(9S) to be "shadowed"
13970  *		datalen - size of data area for the shadow bp
13971  *		blkno - starting LBA
13972  *		func - function pointer for b_iodone in the shadow buf. (May
13973  *			be NULL if none.)
13974  *
13975  * Return Code: Pointer to allocates buf(9S) struct
13976  *
13977  *     Context: Can sleep.
13978  */
13979 
13980 static struct buf *
13981 sd_bioclone_alloc(struct buf *bp, size_t datalen,
13982 	daddr_t blkno, int (*func)(struct buf *))
13983 {
13984 	struct	sd_lun	*un;
13985 	struct	sd_xbuf	*xp;
13986 	struct	sd_xbuf	*new_xp;
13987 	struct	buf	*new_bp;
13988 
13989 	ASSERT(bp != NULL);
13990 	xp = SD_GET_XBUF(bp);
13991 	ASSERT(xp != NULL);
13992 	un = SD_GET_UN(bp);
13993 	ASSERT(un != NULL);
13994 	ASSERT(!mutex_owned(SD_MUTEX(un)));
13995 
13996 	new_bp = bioclone(bp, 0, datalen, SD_GET_DEV(un), blkno, func,
13997 	    NULL, KM_SLEEP);
13998 
13999 	new_bp->b_lblkno	= blkno;
14000 
14001 	/*
14002 	 * Allocate an xbuf for the shadow bp and copy the contents of the
14003 	 * original xbuf into it.
14004 	 */
14005 	new_xp = kmem_alloc(sizeof (struct sd_xbuf), KM_SLEEP);
14006 	bcopy(xp, new_xp, sizeof (struct sd_xbuf));
14007 
14008 	/*
14009 	 * The given bp is automatically saved in the xb_private member
14010 	 * of the new xbuf.  Callers are allowed to depend on this.
14011 	 */
14012 	new_xp->xb_private = bp;
14013 
14014 	new_bp->b_private  = new_xp;
14015 
14016 	return (new_bp);
14017 }
14018 
14019 /*
14020  *    Function: sd_shadow_buf_alloc
14021  *
14022  * Description: Allocate a buf(9S) and init it as per the given buf
14023  *		and the various arguments.  The associated sd_xbuf
14024  *		struct is (nearly) duplicated.  The struct buf *bp
14025  *		argument is saved in new_xp->xb_private.
14026  *
14027  *   Arguments: bp - ptr the the buf(9S) to be "shadowed"
14028  *		datalen - size of data area for the shadow bp
14029  *		bflags - B_READ or B_WRITE (pseudo flag)
14030  *		blkno - starting LBA
14031  *		func - function pointer for b_iodone in the shadow buf. (May
14032  *			be NULL if none.)
14033  *
14034  * Return Code: Pointer to allocates buf(9S) struct
14035  *
14036  *     Context: Can sleep.
14037  */
14038 
14039 static struct buf *
14040 sd_shadow_buf_alloc(struct buf *bp, size_t datalen, uint_t bflags,
14041 	daddr_t blkno, int (*func)(struct buf *))
14042 {
14043 	struct	sd_lun	*un;
14044 	struct	sd_xbuf	*xp;
14045 	struct	sd_xbuf	*new_xp;
14046 	struct	buf	*new_bp;
14047 
14048 	ASSERT(bp != NULL);
14049 	xp = SD_GET_XBUF(bp);
14050 	ASSERT(xp != NULL);
14051 	un = SD_GET_UN(bp);
14052 	ASSERT(un != NULL);
14053 	ASSERT(!mutex_owned(SD_MUTEX(un)));
14054 
14055 	if (bp->b_flags & (B_PAGEIO | B_PHYS)) {
14056 		bp_mapin(bp);
14057 	}
14058 
14059 	bflags &= (B_READ | B_WRITE);
14060 #if defined(__i386) || defined(__amd64)
14061 	new_bp = getrbuf(KM_SLEEP);
14062 	new_bp->b_un.b_addr = kmem_zalloc(datalen, KM_SLEEP);
14063 	new_bp->b_bcount = datalen;
14064 	new_bp->b_flags = bflags |
14065 	    (bp->b_flags & ~(B_PAGEIO | B_PHYS | B_REMAPPED | B_SHADOW));
14066 #else
14067 	new_bp = scsi_alloc_consistent_buf(SD_ADDRESS(un), NULL,
14068 	    datalen, bflags, SLEEP_FUNC, NULL);
14069 #endif
14070 	new_bp->av_forw	= NULL;
14071 	new_bp->av_back	= NULL;
14072 	new_bp->b_dev	= bp->b_dev;
14073 	new_bp->b_blkno	= blkno;
14074 	new_bp->b_iodone = func;
14075 	new_bp->b_edev	= bp->b_edev;
14076 	new_bp->b_resid	= 0;
14077 
14078 	/* We need to preserve the B_FAILFAST flag */
14079 	if (bp->b_flags & B_FAILFAST) {
14080 		new_bp->b_flags |= B_FAILFAST;
14081 	}
14082 
14083 	/*
14084 	 * Allocate an xbuf for the shadow bp and copy the contents of the
14085 	 * original xbuf into it.
14086 	 */
14087 	new_xp = kmem_alloc(sizeof (struct sd_xbuf), KM_SLEEP);
14088 	bcopy(xp, new_xp, sizeof (struct sd_xbuf));
14089 
14090 	/* Need later to copy data between the shadow buf & original buf! */
14091 	new_xp->xb_pkt_flags |= PKT_CONSISTENT;
14092 
14093 	/*
14094 	 * The given bp is automatically saved in the xb_private member
14095 	 * of the new xbuf.  Callers are allowed to depend on this.
14096 	 */
14097 	new_xp->xb_private = bp;
14098 
14099 	new_bp->b_private  = new_xp;
14100 
14101 	return (new_bp);
14102 }
14103 
14104 /*
14105  *    Function: sd_bioclone_free
14106  *
14107  * Description: Deallocate a buf(9S) that was used for 'shadow' IO operations
14108  *		in the larger than partition operation.
14109  *
14110  *     Context: May be called under interrupt context
14111  */
14112 
14113 static void
14114 sd_bioclone_free(struct buf *bp)
14115 {
14116 	struct sd_xbuf	*xp;
14117 
14118 	ASSERT(bp != NULL);
14119 	xp = SD_GET_XBUF(bp);
14120 	ASSERT(xp != NULL);
14121 
14122 	/*
14123 	 * Call bp_mapout() before freeing the buf,  in case a lower
14124 	 * layer or HBA  had done a bp_mapin().  we must do this here
14125 	 * as we are the "originator" of the shadow buf.
14126 	 */
14127 	bp_mapout(bp);
14128 
14129 	/*
14130 	 * Null out b_iodone before freeing the bp, to ensure that the driver
14131 	 * never gets confused by a stale value in this field. (Just a little
14132 	 * extra defensiveness here.)
14133 	 */
14134 	bp->b_iodone = NULL;
14135 
14136 	freerbuf(bp);
14137 
14138 	kmem_free(xp, sizeof (struct sd_xbuf));
14139 }
14140 
14141 /*
14142  *    Function: sd_shadow_buf_free
14143  *
14144  * Description: Deallocate a buf(9S) that was used for 'shadow' IO operations.
14145  *
14146  *     Context: May be called under interrupt context
14147  */
14148 
14149 static void
14150 sd_shadow_buf_free(struct buf *bp)
14151 {
14152 	struct sd_xbuf	*xp;
14153 
14154 	ASSERT(bp != NULL);
14155 	xp = SD_GET_XBUF(bp);
14156 	ASSERT(xp != NULL);
14157 
14158 #if defined(__sparc)
14159 	/*
14160 	 * Call bp_mapout() before freeing the buf,  in case a lower
14161 	 * layer or HBA  had done a bp_mapin().  we must do this here
14162 	 * as we are the "originator" of the shadow buf.
14163 	 */
14164 	bp_mapout(bp);
14165 #endif
14166 
14167 	/*
14168 	 * Null out b_iodone before freeing the bp, to ensure that the driver
14169 	 * never gets confused by a stale value in this field. (Just a little
14170 	 * extra defensiveness here.)
14171 	 */
14172 	bp->b_iodone = NULL;
14173 
14174 #if defined(__i386) || defined(__amd64)
14175 	kmem_free(bp->b_un.b_addr, bp->b_bcount);
14176 	freerbuf(bp);
14177 #else
14178 	scsi_free_consistent_buf(bp);
14179 #endif
14180 
14181 	kmem_free(xp, sizeof (struct sd_xbuf));
14182 }
14183 
14184 
14185 /*
14186  *    Function: sd_print_transport_rejected_message
14187  *
14188  * Description: This implements the ludicrously complex rules for printing
14189  *		a "transport rejected" message.  This is to address the
14190  *		specific problem of having a flood of this error message
14191  *		produced when a failover occurs.
14192  *
14193  *     Context: Any.
14194  */
14195 
14196 static void
14197 sd_print_transport_rejected_message(struct sd_lun *un, struct sd_xbuf *xp,
14198 	int code)
14199 {
14200 	ASSERT(un != NULL);
14201 	ASSERT(mutex_owned(SD_MUTEX(un)));
14202 	ASSERT(xp != NULL);
14203 
14204 	/*
14205 	 * Print the "transport rejected" message under the following
14206 	 * conditions:
14207 	 *
14208 	 * - Whenever the SD_LOGMASK_DIAG bit of sd_level_mask is set
14209 	 * - The error code from scsi_transport() is NOT a TRAN_FATAL_ERROR.
14210 	 * - If the error code IS a TRAN_FATAL_ERROR, then the message is
14211 	 *   printed the FIRST time a TRAN_FATAL_ERROR is returned from
14212 	 *   scsi_transport(9F) (which indicates that the target might have
14213 	 *   gone off-line).  This uses the un->un_tran_fatal_count
14214 	 *   count, which is incremented whenever a TRAN_FATAL_ERROR is
14215 	 *   received, and reset to zero whenver a TRAN_ACCEPT is returned
14216 	 *   from scsi_transport().
14217 	 *
14218 	 * The FLAG_SILENT in the scsi_pkt must be CLEARED in ALL of
14219 	 * the preceeding cases in order for the message to be printed.
14220 	 */
14221 	if ((xp->xb_pktp->pkt_flags & FLAG_SILENT) == 0) {
14222 		if ((sd_level_mask & SD_LOGMASK_DIAG) ||
14223 		    (code != TRAN_FATAL_ERROR) ||
14224 		    (un->un_tran_fatal_count == 1)) {
14225 			switch (code) {
14226 			case TRAN_BADPKT:
14227 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
14228 				    "transport rejected bad packet\n");
14229 				break;
14230 			case TRAN_FATAL_ERROR:
14231 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
14232 				    "transport rejected fatal error\n");
14233 				break;
14234 			default:
14235 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
14236 				    "transport rejected (%d)\n", code);
14237 				break;
14238 			}
14239 		}
14240 	}
14241 }
14242 
14243 
14244 /*
14245  *    Function: sd_add_buf_to_waitq
14246  *
14247  * Description: Add the given buf(9S) struct to the wait queue for the
14248  *		instance.  If sorting is enabled, then the buf is added
14249  *		to the queue via an elevator sort algorithm (a la
14250  *		disksort(9F)).  The SD_GET_BLKNO(bp) is used as the sort key.
14251  *		If sorting is not enabled, then the buf is just added
14252  *		to the end of the wait queue.
14253  *
14254  * Return Code: void
14255  *
14256  *     Context: Does not sleep/block, therefore technically can be called
14257  *		from any context.  However if sorting is enabled then the
14258  *		execution time is indeterminate, and may take long if
14259  *		the wait queue grows large.
14260  */
14261 
14262 static void
14263 sd_add_buf_to_waitq(struct sd_lun *un, struct buf *bp)
14264 {
14265 	struct buf *ap;
14266 
14267 	ASSERT(bp != NULL);
14268 	ASSERT(un != NULL);
14269 	ASSERT(mutex_owned(SD_MUTEX(un)));
14270 
14271 	/* If the queue is empty, add the buf as the only entry & return. */
14272 	if (un->un_waitq_headp == NULL) {
14273 		ASSERT(un->un_waitq_tailp == NULL);
14274 		un->un_waitq_headp = un->un_waitq_tailp = bp;
14275 		bp->av_forw = NULL;
14276 		return;
14277 	}
14278 
14279 	ASSERT(un->un_waitq_tailp != NULL);
14280 
14281 	/*
14282 	 * If sorting is disabled, just add the buf to the tail end of
14283 	 * the wait queue and return.
14284 	 */
14285 	if (un->un_f_disksort_disabled) {
14286 		un->un_waitq_tailp->av_forw = bp;
14287 		un->un_waitq_tailp = bp;
14288 		bp->av_forw = NULL;
14289 		return;
14290 	}
14291 
14292 	/*
14293 	 * Sort thru the list of requests currently on the wait queue
14294 	 * and add the new buf request at the appropriate position.
14295 	 *
14296 	 * The un->un_waitq_headp is an activity chain pointer on which
14297 	 * we keep two queues, sorted in ascending SD_GET_BLKNO() order. The
14298 	 * first queue holds those requests which are positioned after
14299 	 * the current SD_GET_BLKNO() (in the first request); the second holds
14300 	 * requests which came in after their SD_GET_BLKNO() number was passed.
14301 	 * Thus we implement a one way scan, retracting after reaching
14302 	 * the end of the drive to the first request on the second
14303 	 * queue, at which time it becomes the first queue.
14304 	 * A one-way scan is natural because of the way UNIX read-ahead
14305 	 * blocks are allocated.
14306 	 *
14307 	 * If we lie after the first request, then we must locate the
14308 	 * second request list and add ourselves to it.
14309 	 */
14310 	ap = un->un_waitq_headp;
14311 	if (SD_GET_BLKNO(bp) < SD_GET_BLKNO(ap)) {
14312 		while (ap->av_forw != NULL) {
14313 			/*
14314 			 * Look for an "inversion" in the (normally
14315 			 * ascending) block numbers. This indicates
14316 			 * the start of the second request list.
14317 			 */
14318 			if (SD_GET_BLKNO(ap->av_forw) < SD_GET_BLKNO(ap)) {
14319 				/*
14320 				 * Search the second request list for the
14321 				 * first request at a larger block number.
14322 				 * We go before that; however if there is
14323 				 * no such request, we go at the end.
14324 				 */
14325 				do {
14326 					if (SD_GET_BLKNO(bp) <
14327 					    SD_GET_BLKNO(ap->av_forw)) {
14328 						goto insert;
14329 					}
14330 					ap = ap->av_forw;
14331 				} while (ap->av_forw != NULL);
14332 				goto insert;		/* after last */
14333 			}
14334 			ap = ap->av_forw;
14335 		}
14336 
14337 		/*
14338 		 * No inversions... we will go after the last, and
14339 		 * be the first request in the second request list.
14340 		 */
14341 		goto insert;
14342 	}
14343 
14344 	/*
14345 	 * Request is at/after the current request...
14346 	 * sort in the first request list.
14347 	 */
14348 	while (ap->av_forw != NULL) {
14349 		/*
14350 		 * We want to go after the current request (1) if
14351 		 * there is an inversion after it (i.e. it is the end
14352 		 * of the first request list), or (2) if the next
14353 		 * request is a larger block no. than our request.
14354 		 */
14355 		if ((SD_GET_BLKNO(ap->av_forw) < SD_GET_BLKNO(ap)) ||
14356 		    (SD_GET_BLKNO(bp) < SD_GET_BLKNO(ap->av_forw))) {
14357 			goto insert;
14358 		}
14359 		ap = ap->av_forw;
14360 	}
14361 
14362 	/*
14363 	 * Neither a second list nor a larger request, therefore
14364 	 * we go at the end of the first list (which is the same
14365 	 * as the end of the whole schebang).
14366 	 */
14367 insert:
14368 	bp->av_forw = ap->av_forw;
14369 	ap->av_forw = bp;
14370 
14371 	/*
14372 	 * If we inserted onto the tail end of the waitq, make sure the
14373 	 * tail pointer is updated.
14374 	 */
14375 	if (ap == un->un_waitq_tailp) {
14376 		un->un_waitq_tailp = bp;
14377 	}
14378 }
14379 
14380 
14381 /*
14382  *    Function: sd_start_cmds
14383  *
14384  * Description: Remove and transport cmds from the driver queues.
14385  *
14386  *   Arguments: un - pointer to the unit (soft state) struct for the target.
14387  *
14388  *		immed_bp - ptr to a buf to be transported immediately. Only
14389  *		the immed_bp is transported; bufs on the waitq are not
14390  *		processed and the un_retry_bp is not checked.  If immed_bp is
14391  *		NULL, then normal queue processing is performed.
14392  *
14393  *     Context: May be called from kernel thread context, interrupt context,
14394  *		or runout callback context. This function may not block or
14395  *		call routines that block.
14396  */
14397 
14398 static void
14399 sd_start_cmds(struct sd_lun *un, struct buf *immed_bp)
14400 {
14401 	struct	sd_xbuf	*xp;
14402 	struct	buf	*bp;
14403 	void	(*statp)(kstat_io_t *);
14404 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
14405 	void	(*saved_statp)(kstat_io_t *);
14406 #endif
14407 	int	rval;
14408 
14409 	ASSERT(un != NULL);
14410 	ASSERT(mutex_owned(SD_MUTEX(un)));
14411 	ASSERT(un->un_ncmds_in_transport >= 0);
14412 	ASSERT(un->un_throttle >= 0);
14413 
14414 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_start_cmds: entry\n");
14415 
14416 	do {
14417 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
14418 		saved_statp = NULL;
14419 #endif
14420 
14421 		/*
14422 		 * If we are syncing or dumping, fail the command to
14423 		 * avoid recursively calling back into scsi_transport().
14424 		 * The dump I/O itself uses a separate code path so this
14425 		 * only prevents non-dump I/O from being sent while dumping.
14426 		 * File system sync takes place before dumping begins.
14427 		 * During panic, filesystem I/O is allowed provided
14428 		 * un_in_callback is <= 1.  This is to prevent recursion
14429 		 * such as sd_start_cmds -> scsi_transport -> sdintr ->
14430 		 * sd_start_cmds and so on.  See panic.c for more information
14431 		 * about the states the system can be in during panic.
14432 		 */
14433 		if ((un->un_state == SD_STATE_DUMPING) ||
14434 		    (ddi_in_panic() && (un->un_in_callback > 1))) {
14435 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14436 			    "sd_start_cmds: panicking\n");
14437 			goto exit;
14438 		}
14439 
14440 		if ((bp = immed_bp) != NULL) {
14441 			/*
14442 			 * We have a bp that must be transported immediately.
14443 			 * It's OK to transport the immed_bp here without doing
14444 			 * the throttle limit check because the immed_bp is
14445 			 * always used in a retry/recovery case. This means
14446 			 * that we know we are not at the throttle limit by
14447 			 * virtue of the fact that to get here we must have
14448 			 * already gotten a command back via sdintr(). This also
14449 			 * relies on (1) the command on un_retry_bp preventing
14450 			 * further commands from the waitq from being issued;
14451 			 * and (2) the code in sd_retry_command checking the
14452 			 * throttle limit before issuing a delayed or immediate
14453 			 * retry. This holds even if the throttle limit is
14454 			 * currently ratcheted down from its maximum value.
14455 			 */
14456 			statp = kstat_runq_enter;
14457 			if (bp == un->un_retry_bp) {
14458 				ASSERT((un->un_retry_statp == NULL) ||
14459 				    (un->un_retry_statp == kstat_waitq_enter) ||
14460 				    (un->un_retry_statp ==
14461 				    kstat_runq_back_to_waitq));
14462 				/*
14463 				 * If the waitq kstat was incremented when
14464 				 * sd_set_retry_bp() queued this bp for a retry,
14465 				 * then we must set up statp so that the waitq
14466 				 * count will get decremented correctly below.
14467 				 * Also we must clear un->un_retry_statp to
14468 				 * ensure that we do not act on a stale value
14469 				 * in this field.
14470 				 */
14471 				if ((un->un_retry_statp == kstat_waitq_enter) ||
14472 				    (un->un_retry_statp ==
14473 				    kstat_runq_back_to_waitq)) {
14474 					statp = kstat_waitq_to_runq;
14475 				}
14476 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
14477 				saved_statp = un->un_retry_statp;
14478 #endif
14479 				un->un_retry_statp = NULL;
14480 
14481 				SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
14482 				    "sd_start_cmds: un:0x%p: GOT retry_bp:0x%p "
14483 				    "un_throttle:%d un_ncmds_in_transport:%d\n",
14484 				    un, un->un_retry_bp, un->un_throttle,
14485 				    un->un_ncmds_in_transport);
14486 			} else {
14487 				SD_TRACE(SD_LOG_IO_CORE, un, "sd_start_cmds: "
14488 				    "processing priority bp:0x%p\n", bp);
14489 			}
14490 
14491 		} else if ((bp = un->un_waitq_headp) != NULL) {
14492 			/*
14493 			 * A command on the waitq is ready to go, but do not
14494 			 * send it if:
14495 			 *
14496 			 * (1) the throttle limit has been reached, or
14497 			 * (2) a retry is pending, or
14498 			 * (3) a START_STOP_UNIT callback pending, or
14499 			 * (4) a callback for a SD_PATH_DIRECT_PRIORITY
14500 			 *	command is pending.
14501 			 *
14502 			 * For all of these conditions, IO processing will
14503 			 * restart after the condition is cleared.
14504 			 */
14505 			if (un->un_ncmds_in_transport >= un->un_throttle) {
14506 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14507 				    "sd_start_cmds: exiting, "
14508 				    "throttle limit reached!\n");
14509 				goto exit;
14510 			}
14511 			if (un->un_retry_bp != NULL) {
14512 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14513 				    "sd_start_cmds: exiting, retry pending!\n");
14514 				goto exit;
14515 			}
14516 			if (un->un_startstop_timeid != NULL) {
14517 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14518 				    "sd_start_cmds: exiting, "
14519 				    "START_STOP pending!\n");
14520 				goto exit;
14521 			}
14522 			if (un->un_direct_priority_timeid != NULL) {
14523 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14524 				    "sd_start_cmds: exiting, "
14525 				    "SD_PATH_DIRECT_PRIORITY cmd. pending!\n");
14526 				goto exit;
14527 			}
14528 
14529 			/* Dequeue the command */
14530 			un->un_waitq_headp = bp->av_forw;
14531 			if (un->un_waitq_headp == NULL) {
14532 				un->un_waitq_tailp = NULL;
14533 			}
14534 			bp->av_forw = NULL;
14535 			statp = kstat_waitq_to_runq;
14536 			SD_TRACE(SD_LOG_IO_CORE, un,
14537 			    "sd_start_cmds: processing waitq bp:0x%p\n", bp);
14538 
14539 		} else {
14540 			/* No work to do so bail out now */
14541 			SD_TRACE(SD_LOG_IO_CORE, un,
14542 			    "sd_start_cmds: no more work, exiting!\n");
14543 			goto exit;
14544 		}
14545 
14546 		/*
14547 		 * Reset the state to normal. This is the mechanism by which
14548 		 * the state transitions from either SD_STATE_RWAIT or
14549 		 * SD_STATE_OFFLINE to SD_STATE_NORMAL.
14550 		 * If state is SD_STATE_PM_CHANGING then this command is
14551 		 * part of the device power control and the state must
14552 		 * not be put back to normal. Doing so would would
14553 		 * allow new commands to proceed when they shouldn't,
14554 		 * the device may be going off.
14555 		 */
14556 		if ((un->un_state != SD_STATE_SUSPENDED) &&
14557 		    (un->un_state != SD_STATE_PM_CHANGING)) {
14558 			New_state(un, SD_STATE_NORMAL);
14559 		    }
14560 
14561 		xp = SD_GET_XBUF(bp);
14562 		ASSERT(xp != NULL);
14563 
14564 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
14565 		/*
14566 		 * Allocate the scsi_pkt if we need one, or attach DMA
14567 		 * resources if we have a scsi_pkt that needs them. The
14568 		 * latter should only occur for commands that are being
14569 		 * retried.
14570 		 */
14571 		if ((xp->xb_pktp == NULL) ||
14572 		    ((xp->xb_pkt_flags & SD_XB_DMA_FREED) != 0)) {
14573 #else
14574 		if (xp->xb_pktp == NULL) {
14575 #endif
14576 			/*
14577 			 * There is no scsi_pkt allocated for this buf. Call
14578 			 * the initpkt function to allocate & init one.
14579 			 *
14580 			 * The scsi_init_pkt runout callback functionality is
14581 			 * implemented as follows:
14582 			 *
14583 			 * 1) The initpkt function always calls
14584 			 *    scsi_init_pkt(9F) with sdrunout specified as the
14585 			 *    callback routine.
14586 			 * 2) A successful packet allocation is initialized and
14587 			 *    the I/O is transported.
14588 			 * 3) The I/O associated with an allocation resource
14589 			 *    failure is left on its queue to be retried via
14590 			 *    runout or the next I/O.
14591 			 * 4) The I/O associated with a DMA error is removed
14592 			 *    from the queue and failed with EIO. Processing of
14593 			 *    the transport queues is also halted to be
14594 			 *    restarted via runout or the next I/O.
14595 			 * 5) The I/O associated with a CDB size or packet
14596 			 *    size error is removed from the queue and failed
14597 			 *    with EIO. Processing of the transport queues is
14598 			 *    continued.
14599 			 *
14600 			 * Note: there is no interface for canceling a runout
14601 			 * callback. To prevent the driver from detaching or
14602 			 * suspending while a runout is pending the driver
14603 			 * state is set to SD_STATE_RWAIT
14604 			 *
14605 			 * Note: using the scsi_init_pkt callback facility can
14606 			 * result in an I/O request persisting at the head of
14607 			 * the list which cannot be satisfied even after
14608 			 * multiple retries. In the future the driver may
14609 			 * implement some kind of maximum runout count before
14610 			 * failing an I/O.
14611 			 *
14612 			 * Note: the use of funcp below may seem superfluous,
14613 			 * but it helps warlock figure out the correct
14614 			 * initpkt function calls (see [s]sd.wlcmd).
14615 			 */
14616 			struct scsi_pkt	*pktp;
14617 			int (*funcp)(struct buf *bp, struct scsi_pkt **pktp);
14618 
14619 			ASSERT(bp != un->un_rqs_bp);
14620 
14621 			funcp = sd_initpkt_map[xp->xb_chain_iostart];
14622 			switch ((*funcp)(bp, &pktp)) {
14623 			case  SD_PKT_ALLOC_SUCCESS:
14624 				xp->xb_pktp = pktp;
14625 				SD_TRACE(SD_LOG_IO_CORE, un,
14626 				    "sd_start_cmd: SD_PKT_ALLOC_SUCCESS 0x%p\n",
14627 				    pktp);
14628 				goto got_pkt;
14629 
14630 			case SD_PKT_ALLOC_FAILURE:
14631 				/*
14632 				 * Temporary (hopefully) resource depletion.
14633 				 * Since retries and RQS commands always have a
14634 				 * scsi_pkt allocated, these cases should never
14635 				 * get here. So the only cases this needs to
14636 				 * handle is a bp from the waitq (which we put
14637 				 * back onto the waitq for sdrunout), or a bp
14638 				 * sent as an immed_bp (which we just fail).
14639 				 */
14640 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14641 				    "sd_start_cmds: SD_PKT_ALLOC_FAILURE\n");
14642 
14643 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
14644 
14645 				if (bp == immed_bp) {
14646 					/*
14647 					 * If SD_XB_DMA_FREED is clear, then
14648 					 * this is a failure to allocate a
14649 					 * scsi_pkt, and we must fail the
14650 					 * command.
14651 					 */
14652 					if ((xp->xb_pkt_flags &
14653 					    SD_XB_DMA_FREED) == 0) {
14654 						break;
14655 					}
14656 
14657 					/*
14658 					 * If this immediate command is NOT our
14659 					 * un_retry_bp, then we must fail it.
14660 					 */
14661 					if (bp != un->un_retry_bp) {
14662 						break;
14663 					}
14664 
14665 					/*
14666 					 * We get here if this cmd is our
14667 					 * un_retry_bp that was DMAFREED, but
14668 					 * scsi_init_pkt() failed to reallocate
14669 					 * DMA resources when we attempted to
14670 					 * retry it. This can happen when an
14671 					 * mpxio failover is in progress, but
14672 					 * we don't want to just fail the
14673 					 * command in this case.
14674 					 *
14675 					 * Use timeout(9F) to restart it after
14676 					 * a 100ms delay.  We don't want to
14677 					 * let sdrunout() restart it, because
14678 					 * sdrunout() is just supposed to start
14679 					 * commands that are sitting on the
14680 					 * wait queue.  The un_retry_bp stays
14681 					 * set until the command completes, but
14682 					 * sdrunout can be called many times
14683 					 * before that happens.  Since sdrunout
14684 					 * cannot tell if the un_retry_bp is
14685 					 * already in the transport, it could
14686 					 * end up calling scsi_transport() for
14687 					 * the un_retry_bp multiple times.
14688 					 *
14689 					 * Also: don't schedule the callback
14690 					 * if some other callback is already
14691 					 * pending.
14692 					 */
14693 					if (un->un_retry_statp == NULL) {
14694 						/*
14695 						 * restore the kstat pointer to
14696 						 * keep kstat counts coherent
14697 						 * when we do retry the command.
14698 						 */
14699 						un->un_retry_statp =
14700 						    saved_statp;
14701 					}
14702 
14703 					if ((un->un_startstop_timeid == NULL) &&
14704 					    (un->un_retry_timeid == NULL) &&
14705 					    (un->un_direct_priority_timeid ==
14706 					    NULL)) {
14707 
14708 						un->un_retry_timeid =
14709 						    timeout(
14710 						    sd_start_retry_command,
14711 						    un, SD_RESTART_TIMEOUT);
14712 					}
14713 					goto exit;
14714 				}
14715 
14716 #else
14717 				if (bp == immed_bp) {
14718 					break;	/* Just fail the command */
14719 				}
14720 #endif
14721 
14722 				/* Add the buf back to the head of the waitq */
14723 				bp->av_forw = un->un_waitq_headp;
14724 				un->un_waitq_headp = bp;
14725 				if (un->un_waitq_tailp == NULL) {
14726 					un->un_waitq_tailp = bp;
14727 				}
14728 				goto exit;
14729 
14730 			case SD_PKT_ALLOC_FAILURE_NO_DMA:
14731 				/*
14732 				 * HBA DMA resource failure. Fail the command
14733 				 * and continue processing of the queues.
14734 				 */
14735 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14736 				    "sd_start_cmds: "
14737 				    "SD_PKT_ALLOC_FAILURE_NO_DMA\n");
14738 				break;
14739 
14740 			case SD_PKT_ALLOC_FAILURE_PKT_TOO_SMALL:
14741 				/*
14742 				 * Note:x86: Partial DMA mapping not supported
14743 				 * for USCSI commands, and all the needed DMA
14744 				 * resources were not allocated.
14745 				 */
14746 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14747 				    "sd_start_cmds: "
14748 				    "SD_PKT_ALLOC_FAILURE_PKT_TOO_SMALL\n");
14749 				break;
14750 
14751 			case SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL:
14752 				/*
14753 				 * Note:x86: Request cannot fit into CDB based
14754 				 * on lba and len.
14755 				 */
14756 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14757 				    "sd_start_cmds: "
14758 				    "SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL\n");
14759 				break;
14760 
14761 			default:
14762 				/* Should NEVER get here! */
14763 				panic("scsi_initpkt error");
14764 				/*NOTREACHED*/
14765 			}
14766 
14767 			/*
14768 			 * Fatal error in allocating a scsi_pkt for this buf.
14769 			 * Update kstats & return the buf with an error code.
14770 			 * We must use sd_return_failed_command_no_restart() to
14771 			 * avoid a recursive call back into sd_start_cmds().
14772 			 * However this also means that we must keep processing
14773 			 * the waitq here in order to avoid stalling.
14774 			 */
14775 			if (statp == kstat_waitq_to_runq) {
14776 				SD_UPDATE_KSTATS(un, kstat_waitq_exit, bp);
14777 			}
14778 			sd_return_failed_command_no_restart(un, bp, EIO);
14779 			if (bp == immed_bp) {
14780 				/* immed_bp is gone by now, so clear this */
14781 				immed_bp = NULL;
14782 			}
14783 			continue;
14784 		}
14785 got_pkt:
14786 		if (bp == immed_bp) {
14787 			/* goto the head of the class.... */
14788 			xp->xb_pktp->pkt_flags |= FLAG_HEAD;
14789 		}
14790 
14791 		un->un_ncmds_in_transport++;
14792 		SD_UPDATE_KSTATS(un, statp, bp);
14793 
14794 		/*
14795 		 * Call scsi_transport() to send the command to the target.
14796 		 * According to SCSA architecture, we must drop the mutex here
14797 		 * before calling scsi_transport() in order to avoid deadlock.
14798 		 * Note that the scsi_pkt's completion routine can be executed
14799 		 * (from interrupt context) even before the call to
14800 		 * scsi_transport() returns.
14801 		 */
14802 		SD_TRACE(SD_LOG_IO_CORE, un,
14803 		    "sd_start_cmds: calling scsi_transport()\n");
14804 		DTRACE_PROBE1(scsi__transport__dispatch, struct buf *, bp);
14805 
14806 		mutex_exit(SD_MUTEX(un));
14807 		rval = scsi_transport(xp->xb_pktp);
14808 		mutex_enter(SD_MUTEX(un));
14809 
14810 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14811 		    "sd_start_cmds: scsi_transport() returned %d\n", rval);
14812 
14813 		switch (rval) {
14814 		case TRAN_ACCEPT:
14815 			/* Clear this with every pkt accepted by the HBA */
14816 			un->un_tran_fatal_count = 0;
14817 			break;	/* Success; try the next cmd (if any) */
14818 
14819 		case TRAN_BUSY:
14820 			un->un_ncmds_in_transport--;
14821 			ASSERT(un->un_ncmds_in_transport >= 0);
14822 
14823 			/*
14824 			 * Don't retry request sense, the sense data
14825 			 * is lost when another request is sent.
14826 			 * Free up the rqs buf and retry
14827 			 * the original failed cmd.  Update kstat.
14828 			 */
14829 			if (bp == un->un_rqs_bp) {
14830 				SD_UPDATE_KSTATS(un, kstat_runq_exit, bp);
14831 				bp = sd_mark_rqs_idle(un, xp);
14832 				sd_retry_command(un, bp, SD_RETRIES_STANDARD,
14833 					NULL, NULL, EIO, SD_BSY_TIMEOUT / 500,
14834 					kstat_waitq_enter);
14835 				goto exit;
14836 			}
14837 
14838 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
14839 			/*
14840 			 * Free the DMA resources for the  scsi_pkt. This will
14841 			 * allow mpxio to select another path the next time
14842 			 * we call scsi_transport() with this scsi_pkt.
14843 			 * See sdintr() for the rationalization behind this.
14844 			 */
14845 			if ((un->un_f_is_fibre == TRUE) &&
14846 			    ((xp->xb_pkt_flags & SD_XB_USCSICMD) == 0) &&
14847 			    ((xp->xb_pktp->pkt_flags & FLAG_SENSING) == 0)) {
14848 				scsi_dmafree(xp->xb_pktp);
14849 				xp->xb_pkt_flags |= SD_XB_DMA_FREED;
14850 			}
14851 #endif
14852 
14853 			if (SD_IS_DIRECT_PRIORITY(SD_GET_XBUF(bp))) {
14854 				/*
14855 				 * Commands that are SD_PATH_DIRECT_PRIORITY
14856 				 * are for error recovery situations. These do
14857 				 * not use the normal command waitq, so if they
14858 				 * get a TRAN_BUSY we cannot put them back onto
14859 				 * the waitq for later retry. One possible
14860 				 * problem is that there could already be some
14861 				 * other command on un_retry_bp that is waiting
14862 				 * for this one to complete, so we would be
14863 				 * deadlocked if we put this command back onto
14864 				 * the waitq for later retry (since un_retry_bp
14865 				 * must complete before the driver gets back to
14866 				 * commands on the waitq).
14867 				 *
14868 				 * To avoid deadlock we must schedule a callback
14869 				 * that will restart this command after a set
14870 				 * interval.  This should keep retrying for as
14871 				 * long as the underlying transport keeps
14872 				 * returning TRAN_BUSY (just like for other
14873 				 * commands).  Use the same timeout interval as
14874 				 * for the ordinary TRAN_BUSY retry.
14875 				 */
14876 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14877 				    "sd_start_cmds: scsi_transport() returned "
14878 				    "TRAN_BUSY for DIRECT_PRIORITY cmd!\n");
14879 
14880 				SD_UPDATE_KSTATS(un, kstat_runq_exit, bp);
14881 				un->un_direct_priority_timeid =
14882 				    timeout(sd_start_direct_priority_command,
14883 				    bp, SD_BSY_TIMEOUT / 500);
14884 
14885 				goto exit;
14886 			}
14887 
14888 			/*
14889 			 * For TRAN_BUSY, we want to reduce the throttle value,
14890 			 * unless we are retrying a command.
14891 			 */
14892 			if (bp != un->un_retry_bp) {
14893 				sd_reduce_throttle(un, SD_THROTTLE_TRAN_BUSY);
14894 			}
14895 
14896 			/*
14897 			 * Set up the bp to be tried again 10 ms later.
14898 			 * Note:x86: Is there a timeout value in the sd_lun
14899 			 * for this condition?
14900 			 */
14901 			sd_set_retry_bp(un, bp, SD_BSY_TIMEOUT / 500,
14902 				kstat_runq_back_to_waitq);
14903 			goto exit;
14904 
14905 		case TRAN_FATAL_ERROR:
14906 			un->un_tran_fatal_count++;
14907 			/* FALLTHRU */
14908 
14909 		case TRAN_BADPKT:
14910 		default:
14911 			un->un_ncmds_in_transport--;
14912 			ASSERT(un->un_ncmds_in_transport >= 0);
14913 
14914 			/*
14915 			 * If this is our REQUEST SENSE command with a
14916 			 * transport error, we must get back the pointers
14917 			 * to the original buf, and mark the REQUEST
14918 			 * SENSE command as "available".
14919 			 */
14920 			if (bp == un->un_rqs_bp) {
14921 				bp = sd_mark_rqs_idle(un, xp);
14922 				xp = SD_GET_XBUF(bp);
14923 			} else {
14924 				/*
14925 				 * Legacy behavior: do not update transport
14926 				 * error count for request sense commands.
14927 				 */
14928 				SD_UPDATE_ERRSTATS(un, sd_transerrs);
14929 			}
14930 
14931 			SD_UPDATE_KSTATS(un, kstat_runq_exit, bp);
14932 			sd_print_transport_rejected_message(un, xp, rval);
14933 
14934 			/*
14935 			 * We must use sd_return_failed_command_no_restart() to
14936 			 * avoid a recursive call back into sd_start_cmds().
14937 			 * However this also means that we must keep processing
14938 			 * the waitq here in order to avoid stalling.
14939 			 */
14940 			sd_return_failed_command_no_restart(un, bp, EIO);
14941 
14942 			/*
14943 			 * Notify any threads waiting in sd_ddi_suspend() that
14944 			 * a command completion has occurred.
14945 			 */
14946 			if (un->un_state == SD_STATE_SUSPENDED) {
14947 				cv_broadcast(&un->un_disk_busy_cv);
14948 			}
14949 
14950 			if (bp == immed_bp) {
14951 				/* immed_bp is gone by now, so clear this */
14952 				immed_bp = NULL;
14953 			}
14954 			break;
14955 		}
14956 
14957 	} while (immed_bp == NULL);
14958 
14959 exit:
14960 	ASSERT(mutex_owned(SD_MUTEX(un)));
14961 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_start_cmds: exit\n");
14962 }
14963 
14964 
14965 /*
14966  *    Function: sd_return_command
14967  *
14968  * Description: Returns a command to its originator (with or without an
14969  *		error).  Also starts commands waiting to be transported
14970  *		to the target.
14971  *
14972  *     Context: May be called from interrupt, kernel, or timeout context
14973  */
14974 
14975 static void
14976 sd_return_command(struct sd_lun *un, struct buf *bp)
14977 {
14978 	struct sd_xbuf *xp;
14979 #if defined(__i386) || defined(__amd64)
14980 	struct scsi_pkt *pktp;
14981 #endif
14982 
14983 	ASSERT(bp != NULL);
14984 	ASSERT(un != NULL);
14985 	ASSERT(mutex_owned(SD_MUTEX(un)));
14986 	ASSERT(bp != un->un_rqs_bp);
14987 	xp = SD_GET_XBUF(bp);
14988 	ASSERT(xp != NULL);
14989 
14990 #if defined(__i386) || defined(__amd64)
14991 	pktp = SD_GET_PKTP(bp);
14992 #endif
14993 
14994 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_return_command: entry\n");
14995 
14996 #if defined(__i386) || defined(__amd64)
14997 	/*
14998 	 * Note:x86: check for the "sdrestart failed" case.
14999 	 */
15000 	if (((xp->xb_pkt_flags & SD_XB_USCSICMD) != SD_XB_USCSICMD) &&
15001 		(geterror(bp) == 0) && (xp->xb_dma_resid != 0) &&
15002 		(xp->xb_pktp->pkt_resid == 0)) {
15003 
15004 		if (sd_setup_next_xfer(un, bp, pktp, xp) != 0) {
15005 			/*
15006 			 * Successfully set up next portion of cmd
15007 			 * transfer, try sending it
15008 			 */
15009 			sd_retry_command(un, bp, SD_RETRIES_NOCHECK,
15010 			    NULL, NULL, 0, (clock_t)0, NULL);
15011 			sd_start_cmds(un, NULL);
15012 			return;	/* Note:x86: need a return here? */
15013 		}
15014 	}
15015 #endif
15016 
15017 	/*
15018 	 * If this is the failfast bp, clear it from un_failfast_bp. This
15019 	 * can happen if upon being re-tried the failfast bp either
15020 	 * succeeded or encountered another error (possibly even a different
15021 	 * error than the one that precipitated the failfast state, but in
15022 	 * that case it would have had to exhaust retries as well). Regardless,
15023 	 * this should not occur whenever the instance is in the active
15024 	 * failfast state.
15025 	 */
15026 	if (bp == un->un_failfast_bp) {
15027 		ASSERT(un->un_failfast_state == SD_FAILFAST_INACTIVE);
15028 		un->un_failfast_bp = NULL;
15029 	}
15030 
15031 	/*
15032 	 * Clear the failfast state upon successful completion of ANY cmd.
15033 	 */
15034 	if (bp->b_error == 0) {
15035 		un->un_failfast_state = SD_FAILFAST_INACTIVE;
15036 	}
15037 
15038 	/*
15039 	 * This is used if the command was retried one or more times. Show that
15040 	 * we are done with it, and allow processing of the waitq to resume.
15041 	 */
15042 	if (bp == un->un_retry_bp) {
15043 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15044 		    "sd_return_command: un:0x%p: "
15045 		    "RETURNING retry_bp:0x%p\n", un, un->un_retry_bp);
15046 		un->un_retry_bp = NULL;
15047 		un->un_retry_statp = NULL;
15048 	}
15049 
15050 	SD_UPDATE_RDWR_STATS(un, bp);
15051 	SD_UPDATE_PARTITION_STATS(un, bp);
15052 
15053 	switch (un->un_state) {
15054 	case SD_STATE_SUSPENDED:
15055 		/*
15056 		 * Notify any threads waiting in sd_ddi_suspend() that
15057 		 * a command completion has occurred.
15058 		 */
15059 		cv_broadcast(&un->un_disk_busy_cv);
15060 		break;
15061 	default:
15062 		sd_start_cmds(un, NULL);
15063 		break;
15064 	}
15065 
15066 	/* Return this command up the iodone chain to its originator. */
15067 	mutex_exit(SD_MUTEX(un));
15068 
15069 	(*(sd_destroypkt_map[xp->xb_chain_iodone]))(bp);
15070 	xp->xb_pktp = NULL;
15071 
15072 	SD_BEGIN_IODONE(xp->xb_chain_iodone, un, bp);
15073 
15074 	ASSERT(!mutex_owned(SD_MUTEX(un)));
15075 	mutex_enter(SD_MUTEX(un));
15076 
15077 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_return_command: exit\n");
15078 }
15079 
15080 
15081 /*
15082  *    Function: sd_return_failed_command
15083  *
15084  * Description: Command completion when an error occurred.
15085  *
15086  *     Context: May be called from interrupt context
15087  */
15088 
15089 static void
15090 sd_return_failed_command(struct sd_lun *un, struct buf *bp, int errcode)
15091 {
15092 	ASSERT(bp != NULL);
15093 	ASSERT(un != NULL);
15094 	ASSERT(mutex_owned(SD_MUTEX(un)));
15095 
15096 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15097 	    "sd_return_failed_command: entry\n");
15098 
15099 	/*
15100 	 * b_resid could already be nonzero due to a partial data
15101 	 * transfer, so do not change it here.
15102 	 */
15103 	SD_BIOERROR(bp, errcode);
15104 
15105 	sd_return_command(un, bp);
15106 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15107 	    "sd_return_failed_command: exit\n");
15108 }
15109 
15110 
15111 /*
15112  *    Function: sd_return_failed_command_no_restart
15113  *
15114  * Description: Same as sd_return_failed_command, but ensures that no
15115  *		call back into sd_start_cmds will be issued.
15116  *
15117  *     Context: May be called from interrupt context
15118  */
15119 
15120 static void
15121 sd_return_failed_command_no_restart(struct sd_lun *un, struct buf *bp,
15122 	int errcode)
15123 {
15124 	struct sd_xbuf *xp;
15125 
15126 	ASSERT(bp != NULL);
15127 	ASSERT(un != NULL);
15128 	ASSERT(mutex_owned(SD_MUTEX(un)));
15129 	xp = SD_GET_XBUF(bp);
15130 	ASSERT(xp != NULL);
15131 	ASSERT(errcode != 0);
15132 
15133 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15134 	    "sd_return_failed_command_no_restart: entry\n");
15135 
15136 	/*
15137 	 * b_resid could already be nonzero due to a partial data
15138 	 * transfer, so do not change it here.
15139 	 */
15140 	SD_BIOERROR(bp, errcode);
15141 
15142 	/*
15143 	 * If this is the failfast bp, clear it. This can happen if the
15144 	 * failfast bp encounterd a fatal error when we attempted to
15145 	 * re-try it (such as a scsi_transport(9F) failure).  However
15146 	 * we should NOT be in an active failfast state if the failfast
15147 	 * bp is not NULL.
15148 	 */
15149 	if (bp == un->un_failfast_bp) {
15150 		ASSERT(un->un_failfast_state == SD_FAILFAST_INACTIVE);
15151 		un->un_failfast_bp = NULL;
15152 	}
15153 
15154 	if (bp == un->un_retry_bp) {
15155 		/*
15156 		 * This command was retried one or more times. Show that we are
15157 		 * done with it, and allow processing of the waitq to resume.
15158 		 */
15159 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15160 		    "sd_return_failed_command_no_restart: "
15161 		    " un:0x%p: RETURNING retry_bp:0x%p\n", un, un->un_retry_bp);
15162 		un->un_retry_bp = NULL;
15163 		un->un_retry_statp = NULL;
15164 	}
15165 
15166 	SD_UPDATE_RDWR_STATS(un, bp);
15167 	SD_UPDATE_PARTITION_STATS(un, bp);
15168 
15169 	mutex_exit(SD_MUTEX(un));
15170 
15171 	if (xp->xb_pktp != NULL) {
15172 		(*(sd_destroypkt_map[xp->xb_chain_iodone]))(bp);
15173 		xp->xb_pktp = NULL;
15174 	}
15175 
15176 	SD_BEGIN_IODONE(xp->xb_chain_iodone, un, bp);
15177 
15178 	mutex_enter(SD_MUTEX(un));
15179 
15180 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15181 	    "sd_return_failed_command_no_restart: exit\n");
15182 }
15183 
15184 
15185 /*
15186  *    Function: sd_retry_command
15187  *
15188  * Description: queue up a command for retry, or (optionally) fail it
15189  *		if retry counts are exhausted.
15190  *
15191  *   Arguments: un - Pointer to the sd_lun struct for the target.
15192  *
15193  *		bp - Pointer to the buf for the command to be retried.
15194  *
15195  *		retry_check_flag - Flag to see which (if any) of the retry
15196  *		   counts should be decremented/checked. If the indicated
15197  *		   retry count is exhausted, then the command will not be
15198  *		   retried; it will be failed instead. This should use a
15199  *		   value equal to one of the following:
15200  *
15201  *			SD_RETRIES_NOCHECK
15202  *			SD_RESD_RETRIES_STANDARD
15203  *			SD_RETRIES_VICTIM
15204  *
15205  *		   Optionally may be bitwise-OR'ed with SD_RETRIES_ISOLATE
15206  *		   if the check should be made to see of FLAG_ISOLATE is set
15207  *		   in the pkt. If FLAG_ISOLATE is set, then the command is
15208  *		   not retried, it is simply failed.
15209  *
15210  *		user_funcp - Ptr to function to call before dispatching the
15211  *		   command. May be NULL if no action needs to be performed.
15212  *		   (Primarily intended for printing messages.)
15213  *
15214  *		user_arg - Optional argument to be passed along to
15215  *		   the user_funcp call.
15216  *
15217  *		failure_code - errno return code to set in the bp if the
15218  *		   command is going to be failed.
15219  *
15220  *		retry_delay - Retry delay interval in (clock_t) units. May
15221  *		   be zero which indicates that the retry should be retried
15222  *		   immediately (ie, without an intervening delay).
15223  *
15224  *		statp - Ptr to kstat function to be updated if the command
15225  *		   is queued for a delayed retry. May be NULL if no kstat
15226  *		   update is desired.
15227  *
15228  *     Context: May be called from interupt context.
15229  */
15230 
15231 static void
15232 sd_retry_command(struct sd_lun *un, struct buf *bp, int retry_check_flag,
15233 	void (*user_funcp)(struct sd_lun *un, struct buf *bp, void *argp, int
15234 	code), void *user_arg, int failure_code,  clock_t retry_delay,
15235 	void (*statp)(kstat_io_t *))
15236 {
15237 	struct sd_xbuf	*xp;
15238 	struct scsi_pkt	*pktp;
15239 
15240 	ASSERT(un != NULL);
15241 	ASSERT(mutex_owned(SD_MUTEX(un)));
15242 	ASSERT(bp != NULL);
15243 	xp = SD_GET_XBUF(bp);
15244 	ASSERT(xp != NULL);
15245 	pktp = SD_GET_PKTP(bp);
15246 	ASSERT(pktp != NULL);
15247 
15248 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
15249 	    "sd_retry_command: entry: bp:0x%p xp:0x%p\n", bp, xp);
15250 
15251 	/*
15252 	 * If we are syncing or dumping, fail the command to avoid
15253 	 * recursively calling back into scsi_transport().
15254 	 */
15255 	if (ddi_in_panic()) {
15256 		goto fail_command_no_log;
15257 	}
15258 
15259 	/*
15260 	 * We should never be be retrying a command with FLAG_DIAGNOSE set, so
15261 	 * log an error and fail the command.
15262 	 */
15263 	if ((pktp->pkt_flags & FLAG_DIAGNOSE) != 0) {
15264 		scsi_log(SD_DEVINFO(un), sd_label, CE_NOTE,
15265 		    "ERROR, retrying FLAG_DIAGNOSE command.\n");
15266 		sd_dump_memory(un, SD_LOG_IO, "CDB",
15267 		    (uchar_t *)pktp->pkt_cdbp, CDB_SIZE, SD_LOG_HEX);
15268 		sd_dump_memory(un, SD_LOG_IO, "Sense Data",
15269 		    (uchar_t *)xp->xb_sense_data, SENSE_LENGTH, SD_LOG_HEX);
15270 		goto fail_command;
15271 	}
15272 
15273 	/*
15274 	 * If we are suspended, then put the command onto head of the
15275 	 * wait queue since we don't want to start more commands.
15276 	 */
15277 	switch (un->un_state) {
15278 	case SD_STATE_SUSPENDED:
15279 	case SD_STATE_DUMPING:
15280 		bp->av_forw = un->un_waitq_headp;
15281 		un->un_waitq_headp = bp;
15282 		if (un->un_waitq_tailp == NULL) {
15283 			un->un_waitq_tailp = bp;
15284 		}
15285 		SD_UPDATE_KSTATS(un, kstat_waitq_enter, bp);
15286 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_retry_command: "
15287 		    "exiting; cmd bp:0x%p requeued for SUSPEND/DUMP\n", bp);
15288 		return;
15289 	default:
15290 		break;
15291 	}
15292 
15293 	/*
15294 	 * If the caller wants us to check FLAG_ISOLATE, then see if that
15295 	 * is set; if it is then we do not want to retry the command.
15296 	 * Normally, FLAG_ISOLATE is only used with USCSI cmds.
15297 	 */
15298 	if ((retry_check_flag & SD_RETRIES_ISOLATE) != 0) {
15299 		if ((pktp->pkt_flags & FLAG_ISOLATE) != 0) {
15300 			goto fail_command;
15301 		}
15302 	}
15303 
15304 
15305 	/*
15306 	 * If SD_RETRIES_FAILFAST is set, it indicates that either a
15307 	 * command timeout or a selection timeout has occurred. This means
15308 	 * that we were unable to establish an kind of communication with
15309 	 * the target, and subsequent retries and/or commands are likely
15310 	 * to encounter similar results and take a long time to complete.
15311 	 *
15312 	 * If this is a failfast error condition, we need to update the
15313 	 * failfast state, even if this bp does not have B_FAILFAST set.
15314 	 */
15315 	if (retry_check_flag & SD_RETRIES_FAILFAST) {
15316 		if (un->un_failfast_state == SD_FAILFAST_ACTIVE) {
15317 			ASSERT(un->un_failfast_bp == NULL);
15318 			/*
15319 			 * If we are already in the active failfast state, and
15320 			 * another failfast error condition has been detected,
15321 			 * then fail this command if it has B_FAILFAST set.
15322 			 * If B_FAILFAST is clear, then maintain the legacy
15323 			 * behavior of retrying heroically, even tho this will
15324 			 * take a lot more time to fail the command.
15325 			 */
15326 			if (bp->b_flags & B_FAILFAST) {
15327 				goto fail_command;
15328 			}
15329 		} else {
15330 			/*
15331 			 * We're not in the active failfast state, but we
15332 			 * have a failfast error condition, so we must begin
15333 			 * transition to the next state. We do this regardless
15334 			 * of whether or not this bp has B_FAILFAST set.
15335 			 */
15336 			if (un->un_failfast_bp == NULL) {
15337 				/*
15338 				 * This is the first bp to meet a failfast
15339 				 * condition so save it on un_failfast_bp &
15340 				 * do normal retry processing. Do not enter
15341 				 * active failfast state yet. This marks
15342 				 * entry into the "failfast pending" state.
15343 				 */
15344 				un->un_failfast_bp = bp;
15345 
15346 			} else if (un->un_failfast_bp == bp) {
15347 				/*
15348 				 * This is the second time *this* bp has
15349 				 * encountered a failfast error condition,
15350 				 * so enter active failfast state & flush
15351 				 * queues as appropriate.
15352 				 */
15353 				un->un_failfast_state = SD_FAILFAST_ACTIVE;
15354 				un->un_failfast_bp = NULL;
15355 				sd_failfast_flushq(un);
15356 
15357 				/*
15358 				 * Fail this bp now if B_FAILFAST set;
15359 				 * otherwise continue with retries. (It would
15360 				 * be pretty ironic if this bp succeeded on a
15361 				 * subsequent retry after we just flushed all
15362 				 * the queues).
15363 				 */
15364 				if (bp->b_flags & B_FAILFAST) {
15365 					goto fail_command;
15366 				}
15367 
15368 #if !defined(lint) && !defined(__lint)
15369 			} else {
15370 				/*
15371 				 * If neither of the preceeding conditionals
15372 				 * was true, it means that there is some
15373 				 * *other* bp that has met an inital failfast
15374 				 * condition and is currently either being
15375 				 * retried or is waiting to be retried. In
15376 				 * that case we should perform normal retry
15377 				 * processing on *this* bp, since there is a
15378 				 * chance that the current failfast condition
15379 				 * is transient and recoverable. If that does
15380 				 * not turn out to be the case, then retries
15381 				 * will be cleared when the wait queue is
15382 				 * flushed anyway.
15383 				 */
15384 #endif
15385 			}
15386 		}
15387 	} else {
15388 		/*
15389 		 * SD_RETRIES_FAILFAST is clear, which indicates that we
15390 		 * likely were able to at least establish some level of
15391 		 * communication with the target and subsequent commands
15392 		 * and/or retries are likely to get through to the target,
15393 		 * In this case we want to be aggressive about clearing
15394 		 * the failfast state. Note that this does not affect
15395 		 * the "failfast pending" condition.
15396 		 */
15397 		un->un_failfast_state = SD_FAILFAST_INACTIVE;
15398 	}
15399 
15400 
15401 	/*
15402 	 * Check the specified retry count to see if we can still do
15403 	 * any retries with this pkt before we should fail it.
15404 	 */
15405 	switch (retry_check_flag & SD_RETRIES_MASK) {
15406 	case SD_RETRIES_VICTIM:
15407 		/*
15408 		 * Check the victim retry count. If exhausted, then fall
15409 		 * thru & check against the standard retry count.
15410 		 */
15411 		if (xp->xb_victim_retry_count < un->un_victim_retry_count) {
15412 			/* Increment count & proceed with the retry */
15413 			xp->xb_victim_retry_count++;
15414 			break;
15415 		}
15416 		/* Victim retries exhausted, fall back to std. retries... */
15417 		/* FALLTHRU */
15418 
15419 	case SD_RETRIES_STANDARD:
15420 		if (xp->xb_retry_count >= un->un_retry_count) {
15421 			/* Retries exhausted, fail the command */
15422 			SD_TRACE(SD_LOG_IO_CORE, un,
15423 			    "sd_retry_command: retries exhausted!\n");
15424 			/*
15425 			 * update b_resid for failed SCMD_READ & SCMD_WRITE
15426 			 * commands with nonzero pkt_resid.
15427 			 */
15428 			if ((pktp->pkt_reason == CMD_CMPLT) &&
15429 			    (SD_GET_PKT_STATUS(pktp) == STATUS_GOOD) &&
15430 			    (pktp->pkt_resid != 0)) {
15431 				uchar_t op = SD_GET_PKT_OPCODE(pktp) & 0x1F;
15432 				if ((op == SCMD_READ) || (op == SCMD_WRITE)) {
15433 					SD_UPDATE_B_RESID(bp, pktp);
15434 				}
15435 			}
15436 			goto fail_command;
15437 		}
15438 		xp->xb_retry_count++;
15439 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15440 		    "sd_retry_command: retry count:%d\n", xp->xb_retry_count);
15441 		break;
15442 
15443 	case SD_RETRIES_UA:
15444 		if (xp->xb_ua_retry_count >= sd_ua_retry_count) {
15445 			/* Retries exhausted, fail the command */
15446 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
15447 			    "Unit Attention retries exhausted. "
15448 			    "Check the target.\n");
15449 			goto fail_command;
15450 		}
15451 		xp->xb_ua_retry_count++;
15452 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15453 		    "sd_retry_command: retry count:%d\n",
15454 			xp->xb_ua_retry_count);
15455 		break;
15456 
15457 	case SD_RETRIES_BUSY:
15458 		if (xp->xb_retry_count >= un->un_busy_retry_count) {
15459 			/* Retries exhausted, fail the command */
15460 			SD_TRACE(SD_LOG_IO_CORE, un,
15461 			    "sd_retry_command: retries exhausted!\n");
15462 			goto fail_command;
15463 		}
15464 		xp->xb_retry_count++;
15465 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15466 		    "sd_retry_command: retry count:%d\n", xp->xb_retry_count);
15467 		break;
15468 
15469 	case SD_RETRIES_NOCHECK:
15470 	default:
15471 		/* No retry count to check. Just proceed with the retry */
15472 		break;
15473 	}
15474 
15475 	xp->xb_pktp->pkt_flags |= FLAG_HEAD;
15476 
15477 	/*
15478 	 * If we were given a zero timeout, we must attempt to retry the
15479 	 * command immediately (ie, without a delay).
15480 	 */
15481 	if (retry_delay == 0) {
15482 		/*
15483 		 * Check some limiting conditions to see if we can actually
15484 		 * do the immediate retry.  If we cannot, then we must
15485 		 * fall back to queueing up a delayed retry.
15486 		 */
15487 		if (un->un_ncmds_in_transport >= un->un_throttle) {
15488 			/*
15489 			 * We are at the throttle limit for the target,
15490 			 * fall back to delayed retry.
15491 			 */
15492 			retry_delay = SD_BSY_TIMEOUT;
15493 			statp = kstat_waitq_enter;
15494 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15495 			    "sd_retry_command: immed. retry hit "
15496 			    "throttle!\n");
15497 		} else {
15498 			/*
15499 			 * We're clear to proceed with the immediate retry.
15500 			 * First call the user-provided function (if any)
15501 			 */
15502 			if (user_funcp != NULL) {
15503 				(*user_funcp)(un, bp, user_arg,
15504 				    SD_IMMEDIATE_RETRY_ISSUED);
15505 #ifdef __lock_lint
15506 				sd_print_incomplete_msg(un, bp, user_arg,
15507 				    SD_IMMEDIATE_RETRY_ISSUED);
15508 				sd_print_cmd_incomplete_msg(un, bp, user_arg,
15509 				    SD_IMMEDIATE_RETRY_ISSUED);
15510 				sd_print_sense_failed_msg(un, bp, user_arg,
15511 				    SD_IMMEDIATE_RETRY_ISSUED);
15512 #endif
15513 			}
15514 
15515 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15516 			    "sd_retry_command: issuing immediate retry\n");
15517 
15518 			/*
15519 			 * Call sd_start_cmds() to transport the command to
15520 			 * the target.
15521 			 */
15522 			sd_start_cmds(un, bp);
15523 
15524 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15525 			    "sd_retry_command exit\n");
15526 			return;
15527 		}
15528 	}
15529 
15530 	/*
15531 	 * Set up to retry the command after a delay.
15532 	 * First call the user-provided function (if any)
15533 	 */
15534 	if (user_funcp != NULL) {
15535 		(*user_funcp)(un, bp, user_arg, SD_DELAYED_RETRY_ISSUED);
15536 	}
15537 
15538 	sd_set_retry_bp(un, bp, retry_delay, statp);
15539 
15540 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_retry_command: exit\n");
15541 	return;
15542 
15543 fail_command:
15544 
15545 	if (user_funcp != NULL) {
15546 		(*user_funcp)(un, bp, user_arg, SD_NO_RETRY_ISSUED);
15547 	}
15548 
15549 fail_command_no_log:
15550 
15551 	SD_INFO(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15552 	    "sd_retry_command: returning failed command\n");
15553 
15554 	sd_return_failed_command(un, bp, failure_code);
15555 
15556 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_retry_command: exit\n");
15557 }
15558 
15559 
15560 /*
15561  *    Function: sd_set_retry_bp
15562  *
15563  * Description: Set up the given bp for retry.
15564  *
15565  *   Arguments: un - ptr to associated softstate
15566  *		bp - ptr to buf(9S) for the command
15567  *		retry_delay - time interval before issuing retry (may be 0)
15568  *		statp - optional pointer to kstat function
15569  *
15570  *     Context: May be called under interrupt context
15571  */
15572 
15573 static void
15574 sd_set_retry_bp(struct sd_lun *un, struct buf *bp, clock_t retry_delay,
15575 	void (*statp)(kstat_io_t *))
15576 {
15577 	ASSERT(un != NULL);
15578 	ASSERT(mutex_owned(SD_MUTEX(un)));
15579 	ASSERT(bp != NULL);
15580 
15581 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
15582 	    "sd_set_retry_bp: entry: un:0x%p bp:0x%p\n", un, bp);
15583 
15584 	/*
15585 	 * Indicate that the command is being retried. This will not allow any
15586 	 * other commands on the wait queue to be transported to the target
15587 	 * until this command has been completed (success or failure). The
15588 	 * "retry command" is not transported to the target until the given
15589 	 * time delay expires, unless the user specified a 0 retry_delay.
15590 	 *
15591 	 * Note: the timeout(9F) callback routine is what actually calls
15592 	 * sd_start_cmds() to transport the command, with the exception of a
15593 	 * zero retry_delay. The only current implementor of a zero retry delay
15594 	 * is the case where a START_STOP_UNIT is sent to spin-up a device.
15595 	 */
15596 	if (un->un_retry_bp == NULL) {
15597 		ASSERT(un->un_retry_statp == NULL);
15598 		un->un_retry_bp = bp;
15599 
15600 		/*
15601 		 * If the user has not specified a delay the command should
15602 		 * be queued and no timeout should be scheduled.
15603 		 */
15604 		if (retry_delay == 0) {
15605 			/*
15606 			 * Save the kstat pointer that will be used in the
15607 			 * call to SD_UPDATE_KSTATS() below, so that
15608 			 * sd_start_cmds() can correctly decrement the waitq
15609 			 * count when it is time to transport this command.
15610 			 */
15611 			un->un_retry_statp = statp;
15612 			goto done;
15613 		}
15614 	}
15615 
15616 	if (un->un_retry_bp == bp) {
15617 		/*
15618 		 * Save the kstat pointer that will be used in the call to
15619 		 * SD_UPDATE_KSTATS() below, so that sd_start_cmds() can
15620 		 * correctly decrement the waitq count when it is time to
15621 		 * transport this command.
15622 		 */
15623 		un->un_retry_statp = statp;
15624 
15625 		/*
15626 		 * Schedule a timeout if:
15627 		 *   1) The user has specified a delay.
15628 		 *   2) There is not a START_STOP_UNIT callback pending.
15629 		 *
15630 		 * If no delay has been specified, then it is up to the caller
15631 		 * to ensure that IO processing continues without stalling.
15632 		 * Effectively, this means that the caller will issue the
15633 		 * required call to sd_start_cmds(). The START_STOP_UNIT
15634 		 * callback does this after the START STOP UNIT command has
15635 		 * completed. In either of these cases we should not schedule
15636 		 * a timeout callback here.  Also don't schedule the timeout if
15637 		 * an SD_PATH_DIRECT_PRIORITY command is waiting to restart.
15638 		 */
15639 		if ((retry_delay != 0) && (un->un_startstop_timeid == NULL) &&
15640 		    (un->un_direct_priority_timeid == NULL)) {
15641 			un->un_retry_timeid =
15642 			    timeout(sd_start_retry_command, un, retry_delay);
15643 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15644 			    "sd_set_retry_bp: setting timeout: un: 0x%p"
15645 			    " bp:0x%p un_retry_timeid:0x%p\n",
15646 			    un, bp, un->un_retry_timeid);
15647 		}
15648 	} else {
15649 		/*
15650 		 * We only get in here if there is already another command
15651 		 * waiting to be retried.  In this case, we just put the
15652 		 * given command onto the wait queue, so it can be transported
15653 		 * after the current retry command has completed.
15654 		 *
15655 		 * Also we have to make sure that if the command at the head
15656 		 * of the wait queue is the un_failfast_bp, that we do not
15657 		 * put ahead of it any other commands that are to be retried.
15658 		 */
15659 		if ((un->un_failfast_bp != NULL) &&
15660 		    (un->un_failfast_bp == un->un_waitq_headp)) {
15661 			/*
15662 			 * Enqueue this command AFTER the first command on
15663 			 * the wait queue (which is also un_failfast_bp).
15664 			 */
15665 			bp->av_forw = un->un_waitq_headp->av_forw;
15666 			un->un_waitq_headp->av_forw = bp;
15667 			if (un->un_waitq_headp == un->un_waitq_tailp) {
15668 				un->un_waitq_tailp = bp;
15669 			}
15670 		} else {
15671 			/* Enqueue this command at the head of the waitq. */
15672 			bp->av_forw = un->un_waitq_headp;
15673 			un->un_waitq_headp = bp;
15674 			if (un->un_waitq_tailp == NULL) {
15675 				un->un_waitq_tailp = bp;
15676 			}
15677 		}
15678 
15679 		if (statp == NULL) {
15680 			statp = kstat_waitq_enter;
15681 		}
15682 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15683 		    "sd_set_retry_bp: un:0x%p already delayed retry\n", un);
15684 	}
15685 
15686 done:
15687 	if (statp != NULL) {
15688 		SD_UPDATE_KSTATS(un, statp, bp);
15689 	}
15690 
15691 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15692 	    "sd_set_retry_bp: exit un:0x%p\n", un);
15693 }
15694 
15695 
15696 /*
15697  *    Function: sd_start_retry_command
15698  *
15699  * Description: Start the command that has been waiting on the target's
15700  *		retry queue.  Called from timeout(9F) context after the
15701  *		retry delay interval has expired.
15702  *
15703  *   Arguments: arg - pointer to associated softstate for the device.
15704  *
15705  *     Context: timeout(9F) thread context.  May not sleep.
15706  */
15707 
15708 static void
15709 sd_start_retry_command(void *arg)
15710 {
15711 	struct sd_lun *un = arg;
15712 
15713 	ASSERT(un != NULL);
15714 	ASSERT(!mutex_owned(SD_MUTEX(un)));
15715 
15716 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15717 	    "sd_start_retry_command: entry\n");
15718 
15719 	mutex_enter(SD_MUTEX(un));
15720 
15721 	un->un_retry_timeid = NULL;
15722 
15723 	if (un->un_retry_bp != NULL) {
15724 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15725 		    "sd_start_retry_command: un:0x%p STARTING bp:0x%p\n",
15726 		    un, un->un_retry_bp);
15727 		sd_start_cmds(un, un->un_retry_bp);
15728 	}
15729 
15730 	mutex_exit(SD_MUTEX(un));
15731 
15732 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15733 	    "sd_start_retry_command: exit\n");
15734 }
15735 
15736 
15737 /*
15738  *    Function: sd_start_direct_priority_command
15739  *
15740  * Description: Used to re-start an SD_PATH_DIRECT_PRIORITY command that had
15741  *		received TRAN_BUSY when we called scsi_transport() to send it
15742  *		to the underlying HBA. This function is called from timeout(9F)
15743  *		context after the delay interval has expired.
15744  *
15745  *   Arguments: arg - pointer to associated buf(9S) to be restarted.
15746  *
15747  *     Context: timeout(9F) thread context.  May not sleep.
15748  */
15749 
15750 static void
15751 sd_start_direct_priority_command(void *arg)
15752 {
15753 	struct buf	*priority_bp = arg;
15754 	struct sd_lun	*un;
15755 
15756 	ASSERT(priority_bp != NULL);
15757 	un = SD_GET_UN(priority_bp);
15758 	ASSERT(un != NULL);
15759 	ASSERT(!mutex_owned(SD_MUTEX(un)));
15760 
15761 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15762 	    "sd_start_direct_priority_command: entry\n");
15763 
15764 	mutex_enter(SD_MUTEX(un));
15765 	un->un_direct_priority_timeid = NULL;
15766 	sd_start_cmds(un, priority_bp);
15767 	mutex_exit(SD_MUTEX(un));
15768 
15769 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15770 	    "sd_start_direct_priority_command: exit\n");
15771 }
15772 
15773 
15774 /*
15775  *    Function: sd_send_request_sense_command
15776  *
15777  * Description: Sends a REQUEST SENSE command to the target
15778  *
15779  *     Context: May be called from interrupt context.
15780  */
15781 
15782 static void
15783 sd_send_request_sense_command(struct sd_lun *un, struct buf *bp,
15784 	struct scsi_pkt *pktp)
15785 {
15786 	ASSERT(bp != NULL);
15787 	ASSERT(un != NULL);
15788 	ASSERT(mutex_owned(SD_MUTEX(un)));
15789 
15790 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sd_send_request_sense_command: "
15791 	    "entry: buf:0x%p\n", bp);
15792 
15793 	/*
15794 	 * If we are syncing or dumping, then fail the command to avoid a
15795 	 * recursive callback into scsi_transport(). Also fail the command
15796 	 * if we are suspended (legacy behavior).
15797 	 */
15798 	if (ddi_in_panic() || (un->un_state == SD_STATE_SUSPENDED) ||
15799 	    (un->un_state == SD_STATE_DUMPING)) {
15800 		sd_return_failed_command(un, bp, EIO);
15801 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15802 		    "sd_send_request_sense_command: syncing/dumping, exit\n");
15803 		return;
15804 	}
15805 
15806 	/*
15807 	 * Retry the failed command and don't issue the request sense if:
15808 	 *    1) the sense buf is busy
15809 	 *    2) we have 1 or more outstanding commands on the target
15810 	 *    (the sense data will be cleared or invalidated any way)
15811 	 *
15812 	 * Note: There could be an issue with not checking a retry limit here,
15813 	 * the problem is determining which retry limit to check.
15814 	 */
15815 	if ((un->un_sense_isbusy != 0) || (un->un_ncmds_in_transport > 0)) {
15816 		/* Don't retry if the command is flagged as non-retryable */
15817 		if ((pktp->pkt_flags & FLAG_DIAGNOSE) == 0) {
15818 			sd_retry_command(un, bp, SD_RETRIES_NOCHECK,
15819 			    NULL, NULL, 0, SD_BSY_TIMEOUT, kstat_waitq_enter);
15820 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15821 			    "sd_send_request_sense_command: "
15822 			    "at full throttle, retrying exit\n");
15823 		} else {
15824 			sd_return_failed_command(un, bp, EIO);
15825 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15826 			    "sd_send_request_sense_command: "
15827 			    "at full throttle, non-retryable exit\n");
15828 		}
15829 		return;
15830 	}
15831 
15832 	sd_mark_rqs_busy(un, bp);
15833 	sd_start_cmds(un, un->un_rqs_bp);
15834 
15835 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15836 	    "sd_send_request_sense_command: exit\n");
15837 }
15838 
15839 
15840 /*
15841  *    Function: sd_mark_rqs_busy
15842  *
15843  * Description: Indicate that the request sense bp for this instance is
15844  *		in use.
15845  *
15846  *     Context: May be called under interrupt context
15847  */
15848 
15849 static void
15850 sd_mark_rqs_busy(struct sd_lun *un, struct buf *bp)
15851 {
15852 	struct sd_xbuf	*sense_xp;
15853 
15854 	ASSERT(un != NULL);
15855 	ASSERT(bp != NULL);
15856 	ASSERT(mutex_owned(SD_MUTEX(un)));
15857 	ASSERT(un->un_sense_isbusy == 0);
15858 
15859 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_mark_rqs_busy: entry: "
15860 	    "buf:0x%p xp:0x%p un:0x%p\n", bp, SD_GET_XBUF(bp), un);
15861 
15862 	sense_xp = SD_GET_XBUF(un->un_rqs_bp);
15863 	ASSERT(sense_xp != NULL);
15864 
15865 	SD_INFO(SD_LOG_IO, un,
15866 	    "sd_mark_rqs_busy: entry: sense_xp:0x%p\n", sense_xp);
15867 
15868 	ASSERT(sense_xp->xb_pktp != NULL);
15869 	ASSERT((sense_xp->xb_pktp->pkt_flags & (FLAG_SENSING | FLAG_HEAD))
15870 	    == (FLAG_SENSING | FLAG_HEAD));
15871 
15872 	un->un_sense_isbusy = 1;
15873 	un->un_rqs_bp->b_resid = 0;
15874 	sense_xp->xb_pktp->pkt_resid  = 0;
15875 	sense_xp->xb_pktp->pkt_reason = 0;
15876 
15877 	/* So we can get back the bp at interrupt time! */
15878 	sense_xp->xb_sense_bp = bp;
15879 
15880 	bzero(un->un_rqs_bp->b_un.b_addr, SENSE_LENGTH);
15881 
15882 	/*
15883 	 * Mark this buf as awaiting sense data. (This is already set in
15884 	 * the pkt_flags for the RQS packet.)
15885 	 */
15886 	((SD_GET_XBUF(bp))->xb_pktp)->pkt_flags |= FLAG_SENSING;
15887 
15888 	sense_xp->xb_retry_count	= 0;
15889 	sense_xp->xb_victim_retry_count = 0;
15890 	sense_xp->xb_ua_retry_count	= 0;
15891 	sense_xp->xb_dma_resid  = 0;
15892 
15893 	/* Clean up the fields for auto-request sense */
15894 	sense_xp->xb_sense_status = 0;
15895 	sense_xp->xb_sense_state  = 0;
15896 	sense_xp->xb_sense_resid  = 0;
15897 	bzero(sense_xp->xb_sense_data, sizeof (sense_xp->xb_sense_data));
15898 
15899 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_mark_rqs_busy: exit\n");
15900 }
15901 
15902 
15903 /*
15904  *    Function: sd_mark_rqs_idle
15905  *
15906  * Description: SD_MUTEX must be held continuously through this routine
15907  *		to prevent reuse of the rqs struct before the caller can
15908  *		complete it's processing.
15909  *
15910  * Return Code: Pointer to the RQS buf
15911  *
15912  *     Context: May be called under interrupt context
15913  */
15914 
15915 static struct buf *
15916 sd_mark_rqs_idle(struct sd_lun *un, struct sd_xbuf *sense_xp)
15917 {
15918 	struct buf *bp;
15919 	ASSERT(un != NULL);
15920 	ASSERT(sense_xp != NULL);
15921 	ASSERT(mutex_owned(SD_MUTEX(un)));
15922 	ASSERT(un->un_sense_isbusy != 0);
15923 
15924 	un->un_sense_isbusy = 0;
15925 	bp = sense_xp->xb_sense_bp;
15926 	sense_xp->xb_sense_bp = NULL;
15927 
15928 	/* This pkt is no longer interested in getting sense data */
15929 	((SD_GET_XBUF(bp))->xb_pktp)->pkt_flags &= ~FLAG_SENSING;
15930 
15931 	return (bp);
15932 }
15933 
15934 
15935 
15936 /*
15937  *    Function: sd_alloc_rqs
15938  *
15939  * Description: Set up the unit to receive auto request sense data
15940  *
15941  * Return Code: DDI_SUCCESS or DDI_FAILURE
15942  *
15943  *     Context: Called under attach(9E) context
15944  */
15945 
15946 static int
15947 sd_alloc_rqs(struct scsi_device *devp, struct sd_lun *un)
15948 {
15949 	struct sd_xbuf *xp;
15950 
15951 	ASSERT(un != NULL);
15952 	ASSERT(!mutex_owned(SD_MUTEX(un)));
15953 	ASSERT(un->un_rqs_bp == NULL);
15954 	ASSERT(un->un_rqs_pktp == NULL);
15955 
15956 	/*
15957 	 * First allocate the required buf and scsi_pkt structs, then set up
15958 	 * the CDB in the scsi_pkt for a REQUEST SENSE command.
15959 	 */
15960 	un->un_rqs_bp = scsi_alloc_consistent_buf(&devp->sd_address, NULL,
15961 	    SENSE_LENGTH, B_READ, SLEEP_FUNC, NULL);
15962 	if (un->un_rqs_bp == NULL) {
15963 		return (DDI_FAILURE);
15964 	}
15965 
15966 	un->un_rqs_pktp = scsi_init_pkt(&devp->sd_address, NULL, un->un_rqs_bp,
15967 	    CDB_GROUP0, 1, 0, PKT_CONSISTENT, SLEEP_FUNC, NULL);
15968 
15969 	if (un->un_rqs_pktp == NULL) {
15970 		sd_free_rqs(un);
15971 		return (DDI_FAILURE);
15972 	}
15973 
15974 	/* Set up the CDB in the scsi_pkt for a REQUEST SENSE command. */
15975 	(void) scsi_setup_cdb((union scsi_cdb *)un->un_rqs_pktp->pkt_cdbp,
15976 	    SCMD_REQUEST_SENSE, 0, SENSE_LENGTH, 0);
15977 
15978 	SD_FILL_SCSI1_LUN(un, un->un_rqs_pktp);
15979 
15980 	/* Set up the other needed members in the ARQ scsi_pkt. */
15981 	un->un_rqs_pktp->pkt_comp   = sdintr;
15982 	un->un_rqs_pktp->pkt_time   = sd_io_time;
15983 	un->un_rqs_pktp->pkt_flags |=
15984 	    (FLAG_SENSING | FLAG_HEAD);	/* (1222170) */
15985 
15986 	/*
15987 	 * Allocate  & init the sd_xbuf struct for the RQS command. Do not
15988 	 * provide any intpkt, destroypkt routines as we take care of
15989 	 * scsi_pkt allocation/freeing here and in sd_free_rqs().
15990 	 */
15991 	xp = kmem_alloc(sizeof (struct sd_xbuf), KM_SLEEP);
15992 	sd_xbuf_init(un, un->un_rqs_bp, xp, SD_CHAIN_NULL, NULL);
15993 	xp->xb_pktp = un->un_rqs_pktp;
15994 	SD_INFO(SD_LOG_ATTACH_DETACH, un,
15995 	    "sd_alloc_rqs: un 0x%p, rqs  xp 0x%p,  pkt 0x%p,  buf 0x%p\n",
15996 	    un, xp, un->un_rqs_pktp, un->un_rqs_bp);
15997 
15998 	/*
15999 	 * Save the pointer to the request sense private bp so it can
16000 	 * be retrieved in sdintr.
16001 	 */
16002 	un->un_rqs_pktp->pkt_private = un->un_rqs_bp;
16003 	ASSERT(un->un_rqs_bp->b_private == xp);
16004 
16005 	/*
16006 	 * See if the HBA supports auto-request sense for the specified
16007 	 * target/lun. If it does, then try to enable it (if not already
16008 	 * enabled).
16009 	 *
16010 	 * Note: For some HBAs (ifp & sf), scsi_ifsetcap will always return
16011 	 * failure, while for other HBAs (pln) scsi_ifsetcap will always
16012 	 * return success.  However, in both of these cases ARQ is always
16013 	 * enabled and scsi_ifgetcap will always return true. The best approach
16014 	 * is to issue the scsi_ifgetcap() first, then try the scsi_ifsetcap().
16015 	 *
16016 	 * The 3rd case is the HBA (adp) always return enabled on
16017 	 * scsi_ifgetgetcap even when it's not enable, the best approach
16018 	 * is issue a scsi_ifsetcap then a scsi_ifgetcap
16019 	 * Note: this case is to circumvent the Adaptec bug. (x86 only)
16020 	 */
16021 
16022 	if (un->un_f_is_fibre == TRUE) {
16023 		un->un_f_arq_enabled = TRUE;
16024 	} else {
16025 #if defined(__i386) || defined(__amd64)
16026 		/*
16027 		 * Circumvent the Adaptec bug, remove this code when
16028 		 * the bug is fixed
16029 		 */
16030 		(void) scsi_ifsetcap(SD_ADDRESS(un), "auto-rqsense", 1, 1);
16031 #endif
16032 		switch (scsi_ifgetcap(SD_ADDRESS(un), "auto-rqsense", 1)) {
16033 		case 0:
16034 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
16035 				"sd_alloc_rqs: HBA supports ARQ\n");
16036 			/*
16037 			 * ARQ is supported by this HBA but currently is not
16038 			 * enabled. Attempt to enable it and if successful then
16039 			 * mark this instance as ARQ enabled.
16040 			 */
16041 			if (scsi_ifsetcap(SD_ADDRESS(un), "auto-rqsense", 1, 1)
16042 				== 1) {
16043 				/* Successfully enabled ARQ in the HBA */
16044 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
16045 					"sd_alloc_rqs: ARQ enabled\n");
16046 				un->un_f_arq_enabled = TRUE;
16047 			} else {
16048 				/* Could not enable ARQ in the HBA */
16049 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
16050 				"sd_alloc_rqs: failed ARQ enable\n");
16051 				un->un_f_arq_enabled = FALSE;
16052 			}
16053 			break;
16054 		case 1:
16055 			/*
16056 			 * ARQ is supported by this HBA and is already enabled.
16057 			 * Just mark ARQ as enabled for this instance.
16058 			 */
16059 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
16060 				"sd_alloc_rqs: ARQ already enabled\n");
16061 			un->un_f_arq_enabled = TRUE;
16062 			break;
16063 		default:
16064 			/*
16065 			 * ARQ is not supported by this HBA; disable it for this
16066 			 * instance.
16067 			 */
16068 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
16069 				"sd_alloc_rqs: HBA does not support ARQ\n");
16070 			un->un_f_arq_enabled = FALSE;
16071 			break;
16072 		}
16073 	}
16074 
16075 	return (DDI_SUCCESS);
16076 }
16077 
16078 
16079 /*
16080  *    Function: sd_free_rqs
16081  *
16082  * Description: Cleanup for the pre-instance RQS command.
16083  *
16084  *     Context: Kernel thread context
16085  */
16086 
16087 static void
16088 sd_free_rqs(struct sd_lun *un)
16089 {
16090 	ASSERT(un != NULL);
16091 
16092 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_free_rqs: entry\n");
16093 
16094 	/*
16095 	 * If consistent memory is bound to a scsi_pkt, the pkt
16096 	 * has to be destroyed *before* freeing the consistent memory.
16097 	 * Don't change the sequence of this operations.
16098 	 * scsi_destroy_pkt() might access memory, which isn't allowed,
16099 	 * after it was freed in scsi_free_consistent_buf().
16100 	 */
16101 	if (un->un_rqs_pktp != NULL) {
16102 		scsi_destroy_pkt(un->un_rqs_pktp);
16103 		un->un_rqs_pktp = NULL;
16104 	}
16105 
16106 	if (un->un_rqs_bp != NULL) {
16107 		kmem_free(SD_GET_XBUF(un->un_rqs_bp), sizeof (struct sd_xbuf));
16108 		scsi_free_consistent_buf(un->un_rqs_bp);
16109 		un->un_rqs_bp = NULL;
16110 	}
16111 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_free_rqs: exit\n");
16112 }
16113 
16114 
16115 
16116 /*
16117  *    Function: sd_reduce_throttle
16118  *
16119  * Description: Reduces the maximun # of outstanding commands on a
16120  *		target to the current number of outstanding commands.
16121  *		Queues a tiemout(9F) callback to restore the limit
16122  *		after a specified interval has elapsed.
16123  *		Typically used when we get a TRAN_BUSY return code
16124  *		back from scsi_transport().
16125  *
16126  *   Arguments: un - ptr to the sd_lun softstate struct
16127  *		throttle_type: SD_THROTTLE_TRAN_BUSY or SD_THROTTLE_QFULL
16128  *
16129  *     Context: May be called from interrupt context
16130  */
16131 
16132 static void
16133 sd_reduce_throttle(struct sd_lun *un, int throttle_type)
16134 {
16135 	ASSERT(un != NULL);
16136 	ASSERT(mutex_owned(SD_MUTEX(un)));
16137 	ASSERT(un->un_ncmds_in_transport >= 0);
16138 
16139 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_reduce_throttle: "
16140 	    "entry: un:0x%p un_throttle:%d un_ncmds_in_transport:%d\n",
16141 	    un, un->un_throttle, un->un_ncmds_in_transport);
16142 
16143 	if (un->un_throttle > 1) {
16144 		if (un->un_f_use_adaptive_throttle == TRUE) {
16145 			switch (throttle_type) {
16146 			case SD_THROTTLE_TRAN_BUSY:
16147 				if (un->un_busy_throttle == 0) {
16148 					un->un_busy_throttle = un->un_throttle;
16149 				}
16150 				break;
16151 			case SD_THROTTLE_QFULL:
16152 				un->un_busy_throttle = 0;
16153 				break;
16154 			default:
16155 				ASSERT(FALSE);
16156 			}
16157 
16158 			if (un->un_ncmds_in_transport > 0) {
16159 			    un->un_throttle = un->un_ncmds_in_transport;
16160 			}
16161 
16162 		} else {
16163 			if (un->un_ncmds_in_transport == 0) {
16164 				un->un_throttle = 1;
16165 			} else {
16166 				un->un_throttle = un->un_ncmds_in_transport;
16167 			}
16168 		}
16169 	}
16170 
16171 	/* Reschedule the timeout if none is currently active */
16172 	if (un->un_reset_throttle_timeid == NULL) {
16173 		un->un_reset_throttle_timeid = timeout(sd_restore_throttle,
16174 		    un, SD_THROTTLE_RESET_INTERVAL);
16175 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16176 		    "sd_reduce_throttle: timeout scheduled!\n");
16177 	}
16178 
16179 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_reduce_throttle: "
16180 	    "exit: un:0x%p un_throttle:%d\n", un, un->un_throttle);
16181 }
16182 
16183 
16184 
16185 /*
16186  *    Function: sd_restore_throttle
16187  *
16188  * Description: Callback function for timeout(9F).  Resets the current
16189  *		value of un->un_throttle to its default.
16190  *
16191  *   Arguments: arg - pointer to associated softstate for the device.
16192  *
16193  *     Context: May be called from interrupt context
16194  */
16195 
16196 static void
16197 sd_restore_throttle(void *arg)
16198 {
16199 	struct sd_lun	*un = arg;
16200 
16201 	ASSERT(un != NULL);
16202 	ASSERT(!mutex_owned(SD_MUTEX(un)));
16203 
16204 	mutex_enter(SD_MUTEX(un));
16205 
16206 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sd_restore_throttle: "
16207 	    "entry: un:0x%p un_throttle:%d\n", un, un->un_throttle);
16208 
16209 	un->un_reset_throttle_timeid = NULL;
16210 
16211 	if (un->un_f_use_adaptive_throttle == TRUE) {
16212 		/*
16213 		 * If un_busy_throttle is nonzero, then it contains the
16214 		 * value that un_throttle was when we got a TRAN_BUSY back
16215 		 * from scsi_transport(). We want to revert back to this
16216 		 * value.
16217 		 *
16218 		 * In the QFULL case, the throttle limit will incrementally
16219 		 * increase until it reaches max throttle.
16220 		 */
16221 		if (un->un_busy_throttle > 0) {
16222 			un->un_throttle = un->un_busy_throttle;
16223 			un->un_busy_throttle = 0;
16224 		} else {
16225 			/*
16226 			 * increase throttle by 10% open gate slowly, schedule
16227 			 * another restore if saved throttle has not been
16228 			 * reached
16229 			 */
16230 			short throttle;
16231 			if (sd_qfull_throttle_enable) {
16232 				throttle = un->un_throttle +
16233 				    max((un->un_throttle / 10), 1);
16234 				un->un_throttle =
16235 				    (throttle < un->un_saved_throttle) ?
16236 				    throttle : un->un_saved_throttle;
16237 				if (un->un_throttle < un->un_saved_throttle) {
16238 				    un->un_reset_throttle_timeid =
16239 					timeout(sd_restore_throttle,
16240 					un, SD_QFULL_THROTTLE_RESET_INTERVAL);
16241 				}
16242 			}
16243 		}
16244 
16245 		/*
16246 		 * If un_throttle has fallen below the low-water mark, we
16247 		 * restore the maximum value here (and allow it to ratchet
16248 		 * down again if necessary).
16249 		 */
16250 		if (un->un_throttle < un->un_min_throttle) {
16251 			un->un_throttle = un->un_saved_throttle;
16252 		}
16253 	} else {
16254 		SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sd_restore_throttle: "
16255 		    "restoring limit from 0x%x to 0x%x\n",
16256 		    un->un_throttle, un->un_saved_throttle);
16257 		un->un_throttle = un->un_saved_throttle;
16258 	}
16259 
16260 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
16261 	    "sd_restore_throttle: calling sd_start_cmds!\n");
16262 
16263 	sd_start_cmds(un, NULL);
16264 
16265 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
16266 	    "sd_restore_throttle: exit: un:0x%p un_throttle:%d\n",
16267 	    un, un->un_throttle);
16268 
16269 	mutex_exit(SD_MUTEX(un));
16270 
16271 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sd_restore_throttle: exit\n");
16272 }
16273 
16274 /*
16275  *    Function: sdrunout
16276  *
16277  * Description: Callback routine for scsi_init_pkt when a resource allocation
16278  *		fails.
16279  *
16280  *   Arguments: arg - a pointer to the sd_lun unit struct for the particular
16281  *		soft state instance.
16282  *
16283  * Return Code: The scsi_init_pkt routine allows for the callback function to
16284  *		return a 0 indicating the callback should be rescheduled or a 1
16285  *		indicating not to reschedule. This routine always returns 1
16286  *		because the driver always provides a callback function to
16287  *		scsi_init_pkt. This results in a callback always being scheduled
16288  *		(via the scsi_init_pkt callback implementation) if a resource
16289  *		failure occurs.
16290  *
16291  *     Context: This callback function may not block or call routines that block
16292  *
16293  *        Note: Using the scsi_init_pkt callback facility can result in an I/O
16294  *		request persisting at the head of the list which cannot be
16295  *		satisfied even after multiple retries. In the future the driver
16296  *		may implement some time of maximum runout count before failing
16297  *		an I/O.
16298  */
16299 
16300 static int
16301 sdrunout(caddr_t arg)
16302 {
16303 	struct sd_lun	*un = (struct sd_lun *)arg;
16304 
16305 	ASSERT(un != NULL);
16306 	ASSERT(!mutex_owned(SD_MUTEX(un)));
16307 
16308 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sdrunout: entry\n");
16309 
16310 	mutex_enter(SD_MUTEX(un));
16311 	sd_start_cmds(un, NULL);
16312 	mutex_exit(SD_MUTEX(un));
16313 	/*
16314 	 * This callback routine always returns 1 (i.e. do not reschedule)
16315 	 * because we always specify sdrunout as the callback handler for
16316 	 * scsi_init_pkt inside the call to sd_start_cmds.
16317 	 */
16318 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sdrunout: exit\n");
16319 	return (1);
16320 }
16321 
16322 
16323 /*
16324  *    Function: sdintr
16325  *
16326  * Description: Completion callback routine for scsi_pkt(9S) structs
16327  *		sent to the HBA driver via scsi_transport(9F).
16328  *
16329  *     Context: Interrupt context
16330  */
16331 
16332 static void
16333 sdintr(struct scsi_pkt *pktp)
16334 {
16335 	struct buf	*bp;
16336 	struct sd_xbuf	*xp;
16337 	struct sd_lun	*un;
16338 
16339 	ASSERT(pktp != NULL);
16340 	bp = (struct buf *)pktp->pkt_private;
16341 	ASSERT(bp != NULL);
16342 	xp = SD_GET_XBUF(bp);
16343 	ASSERT(xp != NULL);
16344 	ASSERT(xp->xb_pktp != NULL);
16345 	un = SD_GET_UN(bp);
16346 	ASSERT(un != NULL);
16347 	ASSERT(!mutex_owned(SD_MUTEX(un)));
16348 
16349 #ifdef SD_FAULT_INJECTION
16350 
16351 	SD_INFO(SD_LOG_IOERR, un, "sdintr: sdintr calling Fault injection\n");
16352 	/* SD FaultInjection */
16353 	sd_faultinjection(pktp);
16354 
16355 #endif /* SD_FAULT_INJECTION */
16356 
16357 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sdintr: entry: buf:0x%p,"
16358 	    " xp:0x%p, un:0x%p\n", bp, xp, un);
16359 
16360 	mutex_enter(SD_MUTEX(un));
16361 
16362 	/* Reduce the count of the #commands currently in transport */
16363 	un->un_ncmds_in_transport--;
16364 	ASSERT(un->un_ncmds_in_transport >= 0);
16365 
16366 	/* Increment counter to indicate that the callback routine is active */
16367 	un->un_in_callback++;
16368 
16369 	SD_UPDATE_KSTATS(un, kstat_runq_exit, bp);
16370 
16371 #ifdef	SDDEBUG
16372 	if (bp == un->un_retry_bp) {
16373 		SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sdintr: "
16374 		    "un:0x%p: GOT retry_bp:0x%p un_ncmds_in_transport:%d\n",
16375 		    un, un->un_retry_bp, un->un_ncmds_in_transport);
16376 	}
16377 #endif
16378 
16379 	/*
16380 	 * If pkt_reason is CMD_DEV_GONE, just fail the command
16381 	 */
16382 	if (pktp->pkt_reason == CMD_DEV_GONE) {
16383 		scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
16384 			    "Device is gone\n");
16385 		sd_return_failed_command(un, bp, EIO);
16386 		goto exit;
16387 	}
16388 
16389 	/*
16390 	 * First see if the pkt has auto-request sense data with it....
16391 	 * Look at the packet state first so we don't take a performance
16392 	 * hit looking at the arq enabled flag unless absolutely necessary.
16393 	 */
16394 	if ((pktp->pkt_state & STATE_ARQ_DONE) &&
16395 	    (un->un_f_arq_enabled == TRUE)) {
16396 		/*
16397 		 * The HBA did an auto request sense for this command so check
16398 		 * for FLAG_DIAGNOSE. If set this indicates a uscsi or internal
16399 		 * driver command that should not be retried.
16400 		 */
16401 		if ((pktp->pkt_flags & FLAG_DIAGNOSE) != 0) {
16402 			/*
16403 			 * Save the relevant sense info into the xp for the
16404 			 * original cmd.
16405 			 */
16406 			struct scsi_arq_status *asp;
16407 			asp = (struct scsi_arq_status *)(pktp->pkt_scbp);
16408 			xp->xb_sense_status =
16409 			    *((uchar_t *)(&(asp->sts_rqpkt_status)));
16410 			xp->xb_sense_state  = asp->sts_rqpkt_state;
16411 			xp->xb_sense_resid  = asp->sts_rqpkt_resid;
16412 			bcopy(&asp->sts_sensedata, xp->xb_sense_data,
16413 			    min(sizeof (struct scsi_extended_sense),
16414 			    SENSE_LENGTH));
16415 
16416 			/* fail the command */
16417 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16418 			    "sdintr: arq done and FLAG_DIAGNOSE set\n");
16419 			sd_return_failed_command(un, bp, EIO);
16420 			goto exit;
16421 		}
16422 
16423 #if (defined(__i386) || defined(__amd64))	/* DMAFREE for x86 only */
16424 		/*
16425 		 * We want to either retry or fail this command, so free
16426 		 * the DMA resources here.  If we retry the command then
16427 		 * the DMA resources will be reallocated in sd_start_cmds().
16428 		 * Note that when PKT_DMA_PARTIAL is used, this reallocation
16429 		 * causes the *entire* transfer to start over again from the
16430 		 * beginning of the request, even for PARTIAL chunks that
16431 		 * have already transferred successfully.
16432 		 */
16433 		if ((un->un_f_is_fibre == TRUE) &&
16434 		    ((xp->xb_pkt_flags & SD_XB_USCSICMD) == 0) &&
16435 		    ((pktp->pkt_flags & FLAG_SENSING) == 0))  {
16436 			scsi_dmafree(pktp);
16437 			xp->xb_pkt_flags |= SD_XB_DMA_FREED;
16438 		}
16439 #endif
16440 
16441 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16442 		    "sdintr: arq done, sd_handle_auto_request_sense\n");
16443 
16444 		sd_handle_auto_request_sense(un, bp, xp, pktp);
16445 		goto exit;
16446 	}
16447 
16448 	/* Next see if this is the REQUEST SENSE pkt for the instance */
16449 	if (pktp->pkt_flags & FLAG_SENSING)  {
16450 		/* This pktp is from the unit's REQUEST_SENSE command */
16451 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16452 		    "sdintr: sd_handle_request_sense\n");
16453 		sd_handle_request_sense(un, bp, xp, pktp);
16454 		goto exit;
16455 	}
16456 
16457 	/*
16458 	 * Check to see if the command successfully completed as requested;
16459 	 * this is the most common case (and also the hot performance path).
16460 	 *
16461 	 * Requirements for successful completion are:
16462 	 * pkt_reason is CMD_CMPLT and packet status is status good.
16463 	 * In addition:
16464 	 * - A residual of zero indicates successful completion no matter what
16465 	 *   the command is.
16466 	 * - If the residual is not zero and the command is not a read or
16467 	 *   write, then it's still defined as successful completion. In other
16468 	 *   words, if the command is a read or write the residual must be
16469 	 *   zero for successful completion.
16470 	 * - If the residual is not zero and the command is a read or
16471 	 *   write, and it's a USCSICMD, then it's still defined as
16472 	 *   successful completion.
16473 	 */
16474 	if ((pktp->pkt_reason == CMD_CMPLT) &&
16475 	    (SD_GET_PKT_STATUS(pktp) == STATUS_GOOD)) {
16476 
16477 		/*
16478 		 * Since this command is returned with a good status, we
16479 		 * can reset the count for Sonoma failover.
16480 		 */
16481 		un->un_sonoma_failure_count = 0;
16482 
16483 		/*
16484 		 * Return all USCSI commands on good status
16485 		 */
16486 		if (pktp->pkt_resid == 0) {
16487 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16488 			    "sdintr: returning command for resid == 0\n");
16489 		} else if (((SD_GET_PKT_OPCODE(pktp) & 0x1F) != SCMD_READ) &&
16490 		    ((SD_GET_PKT_OPCODE(pktp) & 0x1F) != SCMD_WRITE)) {
16491 			SD_UPDATE_B_RESID(bp, pktp);
16492 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16493 			    "sdintr: returning command for resid != 0\n");
16494 		} else if (xp->xb_pkt_flags & SD_XB_USCSICMD) {
16495 			SD_UPDATE_B_RESID(bp, pktp);
16496 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16497 				"sdintr: returning uscsi command\n");
16498 		} else {
16499 			goto not_successful;
16500 		}
16501 		sd_return_command(un, bp);
16502 
16503 		/*
16504 		 * Decrement counter to indicate that the callback routine
16505 		 * is done.
16506 		 */
16507 		un->un_in_callback--;
16508 		ASSERT(un->un_in_callback >= 0);
16509 		mutex_exit(SD_MUTEX(un));
16510 
16511 		return;
16512 	}
16513 
16514 not_successful:
16515 
16516 #if (defined(__i386) || defined(__amd64))	/* DMAFREE for x86 only */
16517 	/*
16518 	 * The following is based upon knowledge of the underlying transport
16519 	 * and its use of DMA resources.  This code should be removed when
16520 	 * PKT_DMA_PARTIAL support is taken out of the disk driver in favor
16521 	 * of the new PKT_CMD_BREAKUP protocol. See also sd_initpkt_for_buf()
16522 	 * and sd_start_cmds().
16523 	 *
16524 	 * Free any DMA resources associated with this command if there
16525 	 * is a chance it could be retried or enqueued for later retry.
16526 	 * If we keep the DMA binding then mpxio cannot reissue the
16527 	 * command on another path whenever a path failure occurs.
16528 	 *
16529 	 * Note that when PKT_DMA_PARTIAL is used, free/reallocation
16530 	 * causes the *entire* transfer to start over again from the
16531 	 * beginning of the request, even for PARTIAL chunks that
16532 	 * have already transferred successfully.
16533 	 *
16534 	 * This is only done for non-uscsi commands (and also skipped for the
16535 	 * driver's internal RQS command). Also just do this for Fibre Channel
16536 	 * devices as these are the only ones that support mpxio.
16537 	 */
16538 	if ((un->un_f_is_fibre == TRUE) &&
16539 	    ((xp->xb_pkt_flags & SD_XB_USCSICMD) == 0) &&
16540 	    ((pktp->pkt_flags & FLAG_SENSING) == 0))  {
16541 		scsi_dmafree(pktp);
16542 		xp->xb_pkt_flags |= SD_XB_DMA_FREED;
16543 	}
16544 #endif
16545 
16546 	/*
16547 	 * The command did not successfully complete as requested so check
16548 	 * for FLAG_DIAGNOSE. If set this indicates a uscsi or internal
16549 	 * driver command that should not be retried so just return. If
16550 	 * FLAG_DIAGNOSE is not set the error will be processed below.
16551 	 */
16552 	if ((pktp->pkt_flags & FLAG_DIAGNOSE) != 0) {
16553 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16554 		    "sdintr: FLAG_DIAGNOSE: sd_return_failed_command\n");
16555 		/*
16556 		 * Issue a request sense if a check condition caused the error
16557 		 * (we handle the auto request sense case above), otherwise
16558 		 * just fail the command.
16559 		 */
16560 		if ((pktp->pkt_reason == CMD_CMPLT) &&
16561 		    (SD_GET_PKT_STATUS(pktp) == STATUS_CHECK)) {
16562 			sd_send_request_sense_command(un, bp, pktp);
16563 		} else {
16564 			sd_return_failed_command(un, bp, EIO);
16565 		}
16566 		goto exit;
16567 	}
16568 
16569 	/*
16570 	 * The command did not successfully complete as requested so process
16571 	 * the error, retry, and/or attempt recovery.
16572 	 */
16573 	switch (pktp->pkt_reason) {
16574 	case CMD_CMPLT:
16575 		switch (SD_GET_PKT_STATUS(pktp)) {
16576 		case STATUS_GOOD:
16577 			/*
16578 			 * The command completed successfully with a non-zero
16579 			 * residual
16580 			 */
16581 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16582 			    "sdintr: STATUS_GOOD \n");
16583 			sd_pkt_status_good(un, bp, xp, pktp);
16584 			break;
16585 
16586 		case STATUS_CHECK:
16587 		case STATUS_TERMINATED:
16588 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16589 			    "sdintr: STATUS_TERMINATED | STATUS_CHECK\n");
16590 			sd_pkt_status_check_condition(un, bp, xp, pktp);
16591 			break;
16592 
16593 		case STATUS_BUSY:
16594 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16595 			    "sdintr: STATUS_BUSY\n");
16596 			sd_pkt_status_busy(un, bp, xp, pktp);
16597 			break;
16598 
16599 		case STATUS_RESERVATION_CONFLICT:
16600 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16601 			    "sdintr: STATUS_RESERVATION_CONFLICT\n");
16602 			sd_pkt_status_reservation_conflict(un, bp, xp, pktp);
16603 			break;
16604 
16605 		case STATUS_QFULL:
16606 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16607 			    "sdintr: STATUS_QFULL\n");
16608 			sd_pkt_status_qfull(un, bp, xp, pktp);
16609 			break;
16610 
16611 		case STATUS_MET:
16612 		case STATUS_INTERMEDIATE:
16613 		case STATUS_SCSI2:
16614 		case STATUS_INTERMEDIATE_MET:
16615 		case STATUS_ACA_ACTIVE:
16616 			scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
16617 			    "Unexpected SCSI status received: 0x%x\n",
16618 			    SD_GET_PKT_STATUS(pktp));
16619 			sd_return_failed_command(un, bp, EIO);
16620 			break;
16621 
16622 		default:
16623 			scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
16624 			    "Invalid SCSI status received: 0x%x\n",
16625 			    SD_GET_PKT_STATUS(pktp));
16626 			sd_return_failed_command(un, bp, EIO);
16627 			break;
16628 
16629 		}
16630 		break;
16631 
16632 	case CMD_INCOMPLETE:
16633 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16634 		    "sdintr:  CMD_INCOMPLETE\n");
16635 		sd_pkt_reason_cmd_incomplete(un, bp, xp, pktp);
16636 		break;
16637 	case CMD_TRAN_ERR:
16638 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16639 		    "sdintr: CMD_TRAN_ERR\n");
16640 		sd_pkt_reason_cmd_tran_err(un, bp, xp, pktp);
16641 		break;
16642 	case CMD_RESET:
16643 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16644 		    "sdintr: CMD_RESET \n");
16645 		sd_pkt_reason_cmd_reset(un, bp, xp, pktp);
16646 		break;
16647 	case CMD_ABORTED:
16648 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16649 		    "sdintr: CMD_ABORTED \n");
16650 		sd_pkt_reason_cmd_aborted(un, bp, xp, pktp);
16651 		break;
16652 	case CMD_TIMEOUT:
16653 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16654 		    "sdintr: CMD_TIMEOUT\n");
16655 		sd_pkt_reason_cmd_timeout(un, bp, xp, pktp);
16656 		break;
16657 	case CMD_UNX_BUS_FREE:
16658 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16659 		    "sdintr: CMD_UNX_BUS_FREE \n");
16660 		sd_pkt_reason_cmd_unx_bus_free(un, bp, xp, pktp);
16661 		break;
16662 	case CMD_TAG_REJECT:
16663 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16664 		    "sdintr: CMD_TAG_REJECT\n");
16665 		sd_pkt_reason_cmd_tag_reject(un, bp, xp, pktp);
16666 		break;
16667 	default:
16668 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16669 		    "sdintr: default\n");
16670 		sd_pkt_reason_default(un, bp, xp, pktp);
16671 		break;
16672 	}
16673 
16674 exit:
16675 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sdintr: exit\n");
16676 
16677 	/* Decrement counter to indicate that the callback routine is done. */
16678 	un->un_in_callback--;
16679 	ASSERT(un->un_in_callback >= 0);
16680 
16681 	/*
16682 	 * At this point, the pkt has been dispatched, ie, it is either
16683 	 * being re-tried or has been returned to its caller and should
16684 	 * not be referenced.
16685 	 */
16686 
16687 	mutex_exit(SD_MUTEX(un));
16688 }
16689 
16690 
16691 /*
16692  *    Function: sd_print_incomplete_msg
16693  *
16694  * Description: Prints the error message for a CMD_INCOMPLETE error.
16695  *
16696  *   Arguments: un - ptr to associated softstate for the device.
16697  *		bp - ptr to the buf(9S) for the command.
16698  *		arg - message string ptr
16699  *		code - SD_DELAYED_RETRY_ISSUED, SD_IMMEDIATE_RETRY_ISSUED,
16700  *			or SD_NO_RETRY_ISSUED.
16701  *
16702  *     Context: May be called under interrupt context
16703  */
16704 
16705 static void
16706 sd_print_incomplete_msg(struct sd_lun *un, struct buf *bp, void *arg, int code)
16707 {
16708 	struct scsi_pkt	*pktp;
16709 	char	*msgp;
16710 	char	*cmdp = arg;
16711 
16712 	ASSERT(un != NULL);
16713 	ASSERT(mutex_owned(SD_MUTEX(un)));
16714 	ASSERT(bp != NULL);
16715 	ASSERT(arg != NULL);
16716 	pktp = SD_GET_PKTP(bp);
16717 	ASSERT(pktp != NULL);
16718 
16719 	switch (code) {
16720 	case SD_DELAYED_RETRY_ISSUED:
16721 	case SD_IMMEDIATE_RETRY_ISSUED:
16722 		msgp = "retrying";
16723 		break;
16724 	case SD_NO_RETRY_ISSUED:
16725 	default:
16726 		msgp = "giving up";
16727 		break;
16728 	}
16729 
16730 	if ((pktp->pkt_flags & FLAG_SILENT) == 0) {
16731 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
16732 		    "incomplete %s- %s\n", cmdp, msgp);
16733 	}
16734 }
16735 
16736 
16737 
16738 /*
16739  *    Function: sd_pkt_status_good
16740  *
16741  * Description: Processing for a STATUS_GOOD code in pkt_status.
16742  *
16743  *     Context: May be called under interrupt context
16744  */
16745 
16746 static void
16747 sd_pkt_status_good(struct sd_lun *un, struct buf *bp,
16748 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
16749 {
16750 	char	*cmdp;
16751 
16752 	ASSERT(un != NULL);
16753 	ASSERT(mutex_owned(SD_MUTEX(un)));
16754 	ASSERT(bp != NULL);
16755 	ASSERT(xp != NULL);
16756 	ASSERT(pktp != NULL);
16757 	ASSERT(pktp->pkt_reason == CMD_CMPLT);
16758 	ASSERT(SD_GET_PKT_STATUS(pktp) == STATUS_GOOD);
16759 	ASSERT(pktp->pkt_resid != 0);
16760 
16761 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_pkt_status_good: entry\n");
16762 
16763 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
16764 	switch (SD_GET_PKT_OPCODE(pktp) & 0x1F) {
16765 	case SCMD_READ:
16766 		cmdp = "read";
16767 		break;
16768 	case SCMD_WRITE:
16769 		cmdp = "write";
16770 		break;
16771 	default:
16772 		SD_UPDATE_B_RESID(bp, pktp);
16773 		sd_return_command(un, bp);
16774 		SD_TRACE(SD_LOG_IO_CORE, un, "sd_pkt_status_good: exit\n");
16775 		return;
16776 	}
16777 
16778 	/*
16779 	 * See if we can retry the read/write, preferrably immediately.
16780 	 * If retries are exhaused, then sd_retry_command() will update
16781 	 * the b_resid count.
16782 	 */
16783 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_incomplete_msg,
16784 	    cmdp, EIO, (clock_t)0, NULL);
16785 
16786 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_pkt_status_good: exit\n");
16787 }
16788 
16789 
16790 
16791 
16792 
16793 /*
16794  *    Function: sd_handle_request_sense
16795  *
16796  * Description: Processing for non-auto Request Sense command.
16797  *
16798  *   Arguments: un - ptr to associated softstate
16799  *		sense_bp - ptr to buf(9S) for the RQS command
16800  *		sense_xp - ptr to the sd_xbuf for the RQS command
16801  *		sense_pktp - ptr to the scsi_pkt(9S) for the RQS command
16802  *
16803  *     Context: May be called under interrupt context
16804  */
16805 
16806 static void
16807 sd_handle_request_sense(struct sd_lun *un, struct buf *sense_bp,
16808 	struct sd_xbuf *sense_xp, struct scsi_pkt *sense_pktp)
16809 {
16810 	struct buf	*cmd_bp;	/* buf for the original command */
16811 	struct sd_xbuf	*cmd_xp;	/* sd_xbuf for the original command */
16812 	struct scsi_pkt *cmd_pktp;	/* pkt for the original command */
16813 
16814 	ASSERT(un != NULL);
16815 	ASSERT(mutex_owned(SD_MUTEX(un)));
16816 	ASSERT(sense_bp != NULL);
16817 	ASSERT(sense_xp != NULL);
16818 	ASSERT(sense_pktp != NULL);
16819 
16820 	/*
16821 	 * Note the sense_bp, sense_xp, and sense_pktp here are for the
16822 	 * RQS command and not the original command.
16823 	 */
16824 	ASSERT(sense_pktp == un->un_rqs_pktp);
16825 	ASSERT(sense_bp   == un->un_rqs_bp);
16826 	ASSERT((sense_pktp->pkt_flags & (FLAG_SENSING | FLAG_HEAD)) ==
16827 	    (FLAG_SENSING | FLAG_HEAD));
16828 	ASSERT((((SD_GET_XBUF(sense_xp->xb_sense_bp))->xb_pktp->pkt_flags) &
16829 	    FLAG_SENSING) == FLAG_SENSING);
16830 
16831 	/* These are the bp, xp, and pktp for the original command */
16832 	cmd_bp = sense_xp->xb_sense_bp;
16833 	cmd_xp = SD_GET_XBUF(cmd_bp);
16834 	cmd_pktp = SD_GET_PKTP(cmd_bp);
16835 
16836 	if (sense_pktp->pkt_reason != CMD_CMPLT) {
16837 		/*
16838 		 * The REQUEST SENSE command failed.  Release the REQUEST
16839 		 * SENSE command for re-use, get back the bp for the original
16840 		 * command, and attempt to re-try the original command if
16841 		 * FLAG_DIAGNOSE is not set in the original packet.
16842 		 */
16843 		SD_UPDATE_ERRSTATS(un, sd_harderrs);
16844 		if ((cmd_pktp->pkt_flags & FLAG_DIAGNOSE) == 0) {
16845 			cmd_bp = sd_mark_rqs_idle(un, sense_xp);
16846 			sd_retry_command(un, cmd_bp, SD_RETRIES_STANDARD,
16847 			    NULL, NULL, EIO, (clock_t)0, NULL);
16848 			return;
16849 		}
16850 	}
16851 
16852 	/*
16853 	 * Save the relevant sense info into the xp for the original cmd.
16854 	 *
16855 	 * Note: if the request sense failed the state info will be zero
16856 	 * as set in sd_mark_rqs_busy()
16857 	 */
16858 	cmd_xp->xb_sense_status = *(sense_pktp->pkt_scbp);
16859 	cmd_xp->xb_sense_state  = sense_pktp->pkt_state;
16860 	cmd_xp->xb_sense_resid  = sense_pktp->pkt_resid;
16861 	bcopy(sense_bp->b_un.b_addr, cmd_xp->xb_sense_data, SENSE_LENGTH);
16862 
16863 	/*
16864 	 *  Free up the RQS command....
16865 	 *  NOTE:
16866 	 *	Must do this BEFORE calling sd_validate_sense_data!
16867 	 *	sd_validate_sense_data may return the original command in
16868 	 *	which case the pkt will be freed and the flags can no
16869 	 *	longer be touched.
16870 	 *	SD_MUTEX is held through this process until the command
16871 	 *	is dispatched based upon the sense data, so there are
16872 	 *	no race conditions.
16873 	 */
16874 	(void) sd_mark_rqs_idle(un, sense_xp);
16875 
16876 	/*
16877 	 * For a retryable command see if we have valid sense data, if so then
16878 	 * turn it over to sd_decode_sense() to figure out the right course of
16879 	 * action. Just fail a non-retryable command.
16880 	 */
16881 	if ((cmd_pktp->pkt_flags & FLAG_DIAGNOSE) == 0) {
16882 		if (sd_validate_sense_data(un, cmd_bp, cmd_xp) ==
16883 		    SD_SENSE_DATA_IS_VALID) {
16884 			sd_decode_sense(un, cmd_bp, cmd_xp, cmd_pktp);
16885 		}
16886 	} else {
16887 		SD_DUMP_MEMORY(un, SD_LOG_IO_CORE, "Failed CDB",
16888 		    (uchar_t *)cmd_pktp->pkt_cdbp, CDB_SIZE, SD_LOG_HEX);
16889 		SD_DUMP_MEMORY(un, SD_LOG_IO_CORE, "Sense Data",
16890 		    (uchar_t *)cmd_xp->xb_sense_data, SENSE_LENGTH, SD_LOG_HEX);
16891 		sd_return_failed_command(un, cmd_bp, EIO);
16892 	}
16893 }
16894 
16895 
16896 
16897 
16898 /*
16899  *    Function: sd_handle_auto_request_sense
16900  *
16901  * Description: Processing for auto-request sense information.
16902  *
16903  *   Arguments: un - ptr to associated softstate
16904  *		bp - ptr to buf(9S) for the command
16905  *		xp - ptr to the sd_xbuf for the command
16906  *		pktp - ptr to the scsi_pkt(9S) for the command
16907  *
16908  *     Context: May be called under interrupt context
16909  */
16910 
16911 static void
16912 sd_handle_auto_request_sense(struct sd_lun *un, struct buf *bp,
16913 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
16914 {
16915 	struct scsi_arq_status *asp;
16916 
16917 	ASSERT(un != NULL);
16918 	ASSERT(mutex_owned(SD_MUTEX(un)));
16919 	ASSERT(bp != NULL);
16920 	ASSERT(xp != NULL);
16921 	ASSERT(pktp != NULL);
16922 	ASSERT(pktp != un->un_rqs_pktp);
16923 	ASSERT(bp   != un->un_rqs_bp);
16924 
16925 	/*
16926 	 * For auto-request sense, we get a scsi_arq_status back from
16927 	 * the HBA, with the sense data in the sts_sensedata member.
16928 	 * The pkt_scbp of the packet points to this scsi_arq_status.
16929 	 */
16930 	asp = (struct scsi_arq_status *)(pktp->pkt_scbp);
16931 
16932 	if (asp->sts_rqpkt_reason != CMD_CMPLT) {
16933 		/*
16934 		 * The auto REQUEST SENSE failed; see if we can re-try
16935 		 * the original command.
16936 		 */
16937 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
16938 		    "auto request sense failed (reason=%s)\n",
16939 		    scsi_rname(asp->sts_rqpkt_reason));
16940 
16941 		sd_reset_target(un, pktp);
16942 
16943 		sd_retry_command(un, bp, SD_RETRIES_STANDARD,
16944 		    NULL, NULL, EIO, (clock_t)0, NULL);
16945 		return;
16946 	}
16947 
16948 	/* Save the relevant sense info into the xp for the original cmd. */
16949 	xp->xb_sense_status = *((uchar_t *)(&(asp->sts_rqpkt_status)));
16950 	xp->xb_sense_state  = asp->sts_rqpkt_state;
16951 	xp->xb_sense_resid  = asp->sts_rqpkt_resid;
16952 	bcopy(&asp->sts_sensedata, xp->xb_sense_data,
16953 	    min(sizeof (struct scsi_extended_sense), SENSE_LENGTH));
16954 
16955 	/*
16956 	 * See if we have valid sense data, if so then turn it over to
16957 	 * sd_decode_sense() to figure out the right course of action.
16958 	 */
16959 	if (sd_validate_sense_data(un, bp, xp) == SD_SENSE_DATA_IS_VALID) {
16960 		sd_decode_sense(un, bp, xp, pktp);
16961 	}
16962 }
16963 
16964 
16965 /*
16966  *    Function: sd_print_sense_failed_msg
16967  *
16968  * Description: Print log message when RQS has failed.
16969  *
16970  *   Arguments: un - ptr to associated softstate
16971  *		bp - ptr to buf(9S) for the command
16972  *		arg - generic message string ptr
16973  *		code - SD_IMMEDIATE_RETRY_ISSUED, SD_DELAYED_RETRY_ISSUED,
16974  *			or SD_NO_RETRY_ISSUED
16975  *
16976  *     Context: May be called from interrupt context
16977  */
16978 
16979 static void
16980 sd_print_sense_failed_msg(struct sd_lun *un, struct buf *bp, void *arg,
16981 	int code)
16982 {
16983 	char	*msgp = arg;
16984 
16985 	ASSERT(un != NULL);
16986 	ASSERT(mutex_owned(SD_MUTEX(un)));
16987 	ASSERT(bp != NULL);
16988 
16989 	if ((code == SD_NO_RETRY_ISSUED) && (msgp != NULL)) {
16990 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN, msgp);
16991 	}
16992 }
16993 
16994 
16995 /*
16996  *    Function: sd_validate_sense_data
16997  *
16998  * Description: Check the given sense data for validity.
16999  *		If the sense data is not valid, the command will
17000  *		be either failed or retried!
17001  *
17002  * Return Code: SD_SENSE_DATA_IS_INVALID
17003  *		SD_SENSE_DATA_IS_VALID
17004  *
17005  *     Context: May be called from interrupt context
17006  */
17007 
17008 static int
17009 sd_validate_sense_data(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp)
17010 {
17011 	struct scsi_extended_sense *esp;
17012 	struct	scsi_pkt *pktp;
17013 	size_t	actual_len;
17014 	char	*msgp = NULL;
17015 
17016 	ASSERT(un != NULL);
17017 	ASSERT(mutex_owned(SD_MUTEX(un)));
17018 	ASSERT(bp != NULL);
17019 	ASSERT(bp != un->un_rqs_bp);
17020 	ASSERT(xp != NULL);
17021 
17022 	pktp = SD_GET_PKTP(bp);
17023 	ASSERT(pktp != NULL);
17024 
17025 	/*
17026 	 * Check the status of the RQS command (auto or manual).
17027 	 */
17028 	switch (xp->xb_sense_status & STATUS_MASK) {
17029 	case STATUS_GOOD:
17030 		break;
17031 
17032 	case STATUS_RESERVATION_CONFLICT:
17033 		sd_pkt_status_reservation_conflict(un, bp, xp, pktp);
17034 		return (SD_SENSE_DATA_IS_INVALID);
17035 
17036 	case STATUS_BUSY:
17037 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
17038 		    "Busy Status on REQUEST SENSE\n");
17039 		sd_retry_command(un, bp, SD_RETRIES_BUSY, NULL,
17040 		    NULL, EIO, SD_BSY_TIMEOUT / 500, kstat_waitq_enter);
17041 		return (SD_SENSE_DATA_IS_INVALID);
17042 
17043 	case STATUS_QFULL:
17044 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
17045 		    "QFULL Status on REQUEST SENSE\n");
17046 		sd_retry_command(un, bp, SD_RETRIES_STANDARD, NULL,
17047 		    NULL, EIO, SD_BSY_TIMEOUT / 500, kstat_waitq_enter);
17048 		return (SD_SENSE_DATA_IS_INVALID);
17049 
17050 	case STATUS_CHECK:
17051 	case STATUS_TERMINATED:
17052 		msgp = "Check Condition on REQUEST SENSE\n";
17053 		goto sense_failed;
17054 
17055 	default:
17056 		msgp = "Not STATUS_GOOD on REQUEST_SENSE\n";
17057 		goto sense_failed;
17058 	}
17059 
17060 	/*
17061 	 * See if we got the minimum required amount of sense data.
17062 	 * Note: We are assuming the returned sense data is SENSE_LENGTH bytes
17063 	 * or less.
17064 	 */
17065 	actual_len = (int)(SENSE_LENGTH - xp->xb_sense_resid);
17066 	if (((xp->xb_sense_state & STATE_XFERRED_DATA) == 0) ||
17067 	    (actual_len == 0)) {
17068 		msgp = "Request Sense couldn't get sense data\n";
17069 		goto sense_failed;
17070 	}
17071 
17072 	if (actual_len < SUN_MIN_SENSE_LENGTH) {
17073 		msgp = "Not enough sense information\n";
17074 		goto sense_failed;
17075 	}
17076 
17077 	/*
17078 	 * We require the extended sense data
17079 	 */
17080 	esp = (struct scsi_extended_sense *)xp->xb_sense_data;
17081 	if (esp->es_class != CLASS_EXTENDED_SENSE) {
17082 		if ((pktp->pkt_flags & FLAG_SILENT) == 0) {
17083 			static char tmp[8];
17084 			static char buf[148];
17085 			char *p = (char *)(xp->xb_sense_data);
17086 			int i;
17087 
17088 			mutex_enter(&sd_sense_mutex);
17089 			(void) strcpy(buf, "undecodable sense information:");
17090 			for (i = 0; i < actual_len; i++) {
17091 				(void) sprintf(tmp, " 0x%x", *(p++)&0xff);
17092 				(void) strcpy(&buf[strlen(buf)], tmp);
17093 			}
17094 			i = strlen(buf);
17095 			(void) strcpy(&buf[i], "-(assumed fatal)\n");
17096 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN, buf);
17097 			mutex_exit(&sd_sense_mutex);
17098 		}
17099 		/* Note: Legacy behavior, fail the command with no retry */
17100 		sd_return_failed_command(un, bp, EIO);
17101 		return (SD_SENSE_DATA_IS_INVALID);
17102 	}
17103 
17104 	/*
17105 	 * Check that es_code is valid (es_class concatenated with es_code
17106 	 * make up the "response code" field.  es_class will always be 7, so
17107 	 * make sure es_code is 0, 1, 2, 3 or 0xf.  es_code will indicate the
17108 	 * format.
17109 	 */
17110 	if ((esp->es_code != CODE_FMT_FIXED_CURRENT) &&
17111 	    (esp->es_code != CODE_FMT_FIXED_DEFERRED) &&
17112 	    (esp->es_code != CODE_FMT_DESCR_CURRENT) &&
17113 	    (esp->es_code != CODE_FMT_DESCR_DEFERRED) &&
17114 	    (esp->es_code != CODE_FMT_VENDOR_SPECIFIC)) {
17115 		goto sense_failed;
17116 	}
17117 
17118 	return (SD_SENSE_DATA_IS_VALID);
17119 
17120 sense_failed:
17121 	/*
17122 	 * If the request sense failed (for whatever reason), attempt
17123 	 * to retry the original command.
17124 	 */
17125 #if defined(__i386) || defined(__amd64)
17126 	/*
17127 	 * SD_RETRY_DELAY is conditionally compile (#if fibre) in
17128 	 * sddef.h for Sparc platform, and x86 uses 1 binary
17129 	 * for both SCSI/FC.
17130 	 * The SD_RETRY_DELAY value need to be adjusted here
17131 	 * when SD_RETRY_DELAY change in sddef.h
17132 	 */
17133 	sd_retry_command(un, bp, SD_RETRIES_STANDARD,
17134 	    sd_print_sense_failed_msg, msgp, EIO,
17135 		un->un_f_is_fibre?drv_usectohz(100000):(clock_t)0, NULL);
17136 #else
17137 	sd_retry_command(un, bp, SD_RETRIES_STANDARD,
17138 	    sd_print_sense_failed_msg, msgp, EIO, SD_RETRY_DELAY, NULL);
17139 #endif
17140 
17141 	return (SD_SENSE_DATA_IS_INVALID);
17142 }
17143 
17144 
17145 
17146 /*
17147  *    Function: sd_decode_sense
17148  *
17149  * Description: Take recovery action(s) when SCSI Sense Data is received.
17150  *
17151  *     Context: Interrupt context.
17152  */
17153 
17154 static void
17155 sd_decode_sense(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
17156 	struct scsi_pkt *pktp)
17157 {
17158 	uint8_t sense_key;
17159 
17160 	ASSERT(un != NULL);
17161 	ASSERT(mutex_owned(SD_MUTEX(un)));
17162 	ASSERT(bp != NULL);
17163 	ASSERT(bp != un->un_rqs_bp);
17164 	ASSERT(xp != NULL);
17165 	ASSERT(pktp != NULL);
17166 
17167 	sense_key = scsi_sense_key(xp->xb_sense_data);
17168 
17169 	switch (sense_key) {
17170 	case KEY_NO_SENSE:
17171 		sd_sense_key_no_sense(un, bp, xp, pktp);
17172 		break;
17173 	case KEY_RECOVERABLE_ERROR:
17174 		sd_sense_key_recoverable_error(un, xp->xb_sense_data,
17175 		    bp, xp, pktp);
17176 		break;
17177 	case KEY_NOT_READY:
17178 		sd_sense_key_not_ready(un, xp->xb_sense_data,
17179 		    bp, xp, pktp);
17180 		break;
17181 	case KEY_MEDIUM_ERROR:
17182 	case KEY_HARDWARE_ERROR:
17183 		sd_sense_key_medium_or_hardware_error(un,
17184 		    xp->xb_sense_data, bp, xp, pktp);
17185 		break;
17186 	case KEY_ILLEGAL_REQUEST:
17187 		sd_sense_key_illegal_request(un, bp, xp, pktp);
17188 		break;
17189 	case KEY_UNIT_ATTENTION:
17190 		sd_sense_key_unit_attention(un, xp->xb_sense_data,
17191 		    bp, xp, pktp);
17192 		break;
17193 	case KEY_WRITE_PROTECT:
17194 	case KEY_VOLUME_OVERFLOW:
17195 	case KEY_MISCOMPARE:
17196 		sd_sense_key_fail_command(un, bp, xp, pktp);
17197 		break;
17198 	case KEY_BLANK_CHECK:
17199 		sd_sense_key_blank_check(un, bp, xp, pktp);
17200 		break;
17201 	case KEY_ABORTED_COMMAND:
17202 		sd_sense_key_aborted_command(un, bp, xp, pktp);
17203 		break;
17204 	case KEY_VENDOR_UNIQUE:
17205 	case KEY_COPY_ABORTED:
17206 	case KEY_EQUAL:
17207 	case KEY_RESERVED:
17208 	default:
17209 		sd_sense_key_default(un, xp->xb_sense_data,
17210 		    bp, xp, pktp);
17211 		break;
17212 	}
17213 }
17214 
17215 
17216 /*
17217  *    Function: sd_dump_memory
17218  *
17219  * Description: Debug logging routine to print the contents of a user provided
17220  *		buffer. The output of the buffer is broken up into 256 byte
17221  *		segments due to a size constraint of the scsi_log.
17222  *		implementation.
17223  *
17224  *   Arguments: un - ptr to softstate
17225  *		comp - component mask
17226  *		title - "title" string to preceed data when printed
17227  *		data - ptr to data block to be printed
17228  *		len - size of data block to be printed
17229  *		fmt - SD_LOG_HEX (use 0x%02x format) or SD_LOG_CHAR (use %c)
17230  *
17231  *     Context: May be called from interrupt context
17232  */
17233 
17234 #define	SD_DUMP_MEMORY_BUF_SIZE	256
17235 
17236 static char *sd_dump_format_string[] = {
17237 		" 0x%02x",
17238 		" %c"
17239 };
17240 
17241 static void
17242 sd_dump_memory(struct sd_lun *un, uint_t comp, char *title, uchar_t *data,
17243     int len, int fmt)
17244 {
17245 	int	i, j;
17246 	int	avail_count;
17247 	int	start_offset;
17248 	int	end_offset;
17249 	size_t	entry_len;
17250 	char	*bufp;
17251 	char	*local_buf;
17252 	char	*format_string;
17253 
17254 	ASSERT((fmt == SD_LOG_HEX) || (fmt == SD_LOG_CHAR));
17255 
17256 	/*
17257 	 * In the debug version of the driver, this function is called from a
17258 	 * number of places which are NOPs in the release driver.
17259 	 * The debug driver therefore has additional methods of filtering
17260 	 * debug output.
17261 	 */
17262 #ifdef SDDEBUG
17263 	/*
17264 	 * In the debug version of the driver we can reduce the amount of debug
17265 	 * messages by setting sd_error_level to something other than
17266 	 * SCSI_ERR_ALL and clearing bits in sd_level_mask and
17267 	 * sd_component_mask.
17268 	 */
17269 	if (((sd_level_mask & (SD_LOGMASK_DUMP_MEM | SD_LOGMASK_DIAG)) == 0) ||
17270 	    (sd_error_level != SCSI_ERR_ALL)) {
17271 		return;
17272 	}
17273 	if (((sd_component_mask & comp) == 0) ||
17274 	    (sd_error_level != SCSI_ERR_ALL)) {
17275 		return;
17276 	}
17277 #else
17278 	if (sd_error_level != SCSI_ERR_ALL) {
17279 		return;
17280 	}
17281 #endif
17282 
17283 	local_buf = kmem_zalloc(SD_DUMP_MEMORY_BUF_SIZE, KM_SLEEP);
17284 	bufp = local_buf;
17285 	/*
17286 	 * Available length is the length of local_buf[], minus the
17287 	 * length of the title string, minus one for the ":", minus
17288 	 * one for the newline, minus one for the NULL terminator.
17289 	 * This gives the #bytes available for holding the printed
17290 	 * values from the given data buffer.
17291 	 */
17292 	if (fmt == SD_LOG_HEX) {
17293 		format_string = sd_dump_format_string[0];
17294 	} else /* SD_LOG_CHAR */ {
17295 		format_string = sd_dump_format_string[1];
17296 	}
17297 	/*
17298 	 * Available count is the number of elements from the given
17299 	 * data buffer that we can fit into the available length.
17300 	 * This is based upon the size of the format string used.
17301 	 * Make one entry and find it's size.
17302 	 */
17303 	(void) sprintf(bufp, format_string, data[0]);
17304 	entry_len = strlen(bufp);
17305 	avail_count = (SD_DUMP_MEMORY_BUF_SIZE - strlen(title) - 3) / entry_len;
17306 
17307 	j = 0;
17308 	while (j < len) {
17309 		bufp = local_buf;
17310 		bzero(bufp, SD_DUMP_MEMORY_BUF_SIZE);
17311 		start_offset = j;
17312 
17313 		end_offset = start_offset + avail_count;
17314 
17315 		(void) sprintf(bufp, "%s:", title);
17316 		bufp += strlen(bufp);
17317 		for (i = start_offset; ((i < end_offset) && (j < len));
17318 		    i++, j++) {
17319 			(void) sprintf(bufp, format_string, data[i]);
17320 			bufp += entry_len;
17321 		}
17322 		(void) sprintf(bufp, "\n");
17323 
17324 		scsi_log(SD_DEVINFO(un), sd_label, CE_NOTE, "%s", local_buf);
17325 	}
17326 	kmem_free(local_buf, SD_DUMP_MEMORY_BUF_SIZE);
17327 }
17328 
17329 /*
17330  *    Function: sd_print_sense_msg
17331  *
17332  * Description: Log a message based upon the given sense data.
17333  *
17334  *   Arguments: un - ptr to associated softstate
17335  *		bp - ptr to buf(9S) for the command
17336  *		arg - ptr to associate sd_sense_info struct
17337  *		code - SD_IMMEDIATE_RETRY_ISSUED, SD_DELAYED_RETRY_ISSUED,
17338  *			or SD_NO_RETRY_ISSUED
17339  *
17340  *     Context: May be called from interrupt context
17341  */
17342 
17343 static void
17344 sd_print_sense_msg(struct sd_lun *un, struct buf *bp, void *arg, int code)
17345 {
17346 	struct sd_xbuf	*xp;
17347 	struct scsi_pkt	*pktp;
17348 	uint8_t *sensep;
17349 	daddr_t request_blkno;
17350 	diskaddr_t err_blkno;
17351 	int severity;
17352 	int pfa_flag;
17353 	extern struct scsi_key_strings scsi_cmds[];
17354 
17355 	ASSERT(un != NULL);
17356 	ASSERT(mutex_owned(SD_MUTEX(un)));
17357 	ASSERT(bp != NULL);
17358 	xp = SD_GET_XBUF(bp);
17359 	ASSERT(xp != NULL);
17360 	pktp = SD_GET_PKTP(bp);
17361 	ASSERT(pktp != NULL);
17362 	ASSERT(arg != NULL);
17363 
17364 	severity = ((struct sd_sense_info *)(arg))->ssi_severity;
17365 	pfa_flag = ((struct sd_sense_info *)(arg))->ssi_pfa_flag;
17366 
17367 	if ((code == SD_DELAYED_RETRY_ISSUED) ||
17368 	    (code == SD_IMMEDIATE_RETRY_ISSUED)) {
17369 		severity = SCSI_ERR_RETRYABLE;
17370 	}
17371 
17372 	/* Use absolute block number for the request block number */
17373 	request_blkno = xp->xb_blkno;
17374 
17375 	/*
17376 	 * Now try to get the error block number from the sense data
17377 	 */
17378 	sensep = xp->xb_sense_data;
17379 
17380 	if (scsi_sense_info_uint64(sensep, SENSE_LENGTH,
17381 		(uint64_t *)&err_blkno)) {
17382 		/*
17383 		 * We retrieved the error block number from the information
17384 		 * portion of the sense data.
17385 		 *
17386 		 * For USCSI commands we are better off using the error
17387 		 * block no. as the requested block no. (This is the best
17388 		 * we can estimate.)
17389 		 */
17390 		if ((SD_IS_BUFIO(xp) == FALSE) &&
17391 		    ((pktp->pkt_flags & FLAG_SILENT) == 0)) {
17392 			request_blkno = err_blkno;
17393 		}
17394 	} else {
17395 		/*
17396 		 * Without the es_valid bit set (for fixed format) or an
17397 		 * information descriptor (for descriptor format) we cannot
17398 		 * be certain of the error blkno, so just use the
17399 		 * request_blkno.
17400 		 */
17401 		err_blkno = (diskaddr_t)request_blkno;
17402 	}
17403 
17404 	/*
17405 	 * The following will log the buffer contents for the release driver
17406 	 * if the SD_LOGMASK_DIAG bit of sd_level_mask is set, or the error
17407 	 * level is set to verbose.
17408 	 */
17409 	sd_dump_memory(un, SD_LOG_IO, "Failed CDB",
17410 	    (uchar_t *)pktp->pkt_cdbp, CDB_SIZE, SD_LOG_HEX);
17411 	sd_dump_memory(un, SD_LOG_IO, "Sense Data",
17412 	    (uchar_t *)sensep, SENSE_LENGTH, SD_LOG_HEX);
17413 
17414 	if (pfa_flag == FALSE) {
17415 		/* This is normally only set for USCSI */
17416 		if ((pktp->pkt_flags & FLAG_SILENT) != 0) {
17417 			return;
17418 		}
17419 
17420 		if ((SD_IS_BUFIO(xp) == TRUE) &&
17421 		    (((sd_level_mask & SD_LOGMASK_DIAG) == 0) &&
17422 		    (severity < sd_error_level))) {
17423 			return;
17424 		}
17425 	}
17426 
17427 	/*
17428 	 * Check for Sonoma Failover and keep a count of how many failed I/O's
17429 	 */
17430 	if ((SD_IS_LSI(un)) &&
17431 	    (scsi_sense_key(sensep) == KEY_ILLEGAL_REQUEST) &&
17432 	    (scsi_sense_asc(sensep) == 0x94) &&
17433 	    (scsi_sense_ascq(sensep) == 0x01)) {
17434 		un->un_sonoma_failure_count++;
17435 		if (un->un_sonoma_failure_count > 1) {
17436 			return;
17437 		}
17438 	}
17439 
17440 	scsi_vu_errmsg(SD_SCSI_DEVP(un), pktp, sd_label, severity,
17441 	    request_blkno, err_blkno, scsi_cmds,
17442 	    (struct scsi_extended_sense *)sensep,
17443 	    un->un_additional_codes, NULL);
17444 }
17445 
17446 /*
17447  *    Function: sd_sense_key_no_sense
17448  *
17449  * Description: Recovery action when sense data was not received.
17450  *
17451  *     Context: May be called from interrupt context
17452  */
17453 
17454 static void
17455 sd_sense_key_no_sense(struct sd_lun *un, struct buf *bp,
17456 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
17457 {
17458 	struct sd_sense_info	si;
17459 
17460 	ASSERT(un != NULL);
17461 	ASSERT(mutex_owned(SD_MUTEX(un)));
17462 	ASSERT(bp != NULL);
17463 	ASSERT(xp != NULL);
17464 	ASSERT(pktp != NULL);
17465 
17466 	si.ssi_severity = SCSI_ERR_FATAL;
17467 	si.ssi_pfa_flag = FALSE;
17468 
17469 	SD_UPDATE_ERRSTATS(un, sd_softerrs);
17470 
17471 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
17472 		&si, EIO, (clock_t)0, NULL);
17473 }
17474 
17475 
17476 /*
17477  *    Function: sd_sense_key_recoverable_error
17478  *
17479  * Description: Recovery actions for a SCSI "Recovered Error" sense key.
17480  *
17481  *     Context: May be called from interrupt context
17482  */
17483 
17484 static void
17485 sd_sense_key_recoverable_error(struct sd_lun *un,
17486 	uint8_t *sense_datap,
17487 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp)
17488 {
17489 	struct sd_sense_info	si;
17490 	uint8_t asc = scsi_sense_asc(sense_datap);
17491 
17492 	ASSERT(un != NULL);
17493 	ASSERT(mutex_owned(SD_MUTEX(un)));
17494 	ASSERT(bp != NULL);
17495 	ASSERT(xp != NULL);
17496 	ASSERT(pktp != NULL);
17497 
17498 	/*
17499 	 * 0x5D: FAILURE PREDICTION THRESHOLD EXCEEDED
17500 	 */
17501 	if ((asc == 0x5D) && (sd_report_pfa != 0)) {
17502 		SD_UPDATE_ERRSTATS(un, sd_rq_pfa_err);
17503 		si.ssi_severity = SCSI_ERR_INFO;
17504 		si.ssi_pfa_flag = TRUE;
17505 	} else {
17506 		SD_UPDATE_ERRSTATS(un, sd_softerrs);
17507 		SD_UPDATE_ERRSTATS(un, sd_rq_recov_err);
17508 		si.ssi_severity = SCSI_ERR_RECOVERED;
17509 		si.ssi_pfa_flag = FALSE;
17510 	}
17511 
17512 	if (pktp->pkt_resid == 0) {
17513 		sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
17514 		sd_return_command(un, bp);
17515 		return;
17516 	}
17517 
17518 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
17519 	    &si, EIO, (clock_t)0, NULL);
17520 }
17521 
17522 
17523 
17524 
17525 /*
17526  *    Function: sd_sense_key_not_ready
17527  *
17528  * Description: Recovery actions for a SCSI "Not Ready" sense key.
17529  *
17530  *     Context: May be called from interrupt context
17531  */
17532 
17533 static void
17534 sd_sense_key_not_ready(struct sd_lun *un,
17535 	uint8_t *sense_datap,
17536 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp)
17537 {
17538 	struct sd_sense_info	si;
17539 	uint8_t asc = scsi_sense_asc(sense_datap);
17540 	uint8_t ascq = scsi_sense_ascq(sense_datap);
17541 
17542 	ASSERT(un != NULL);
17543 	ASSERT(mutex_owned(SD_MUTEX(un)));
17544 	ASSERT(bp != NULL);
17545 	ASSERT(xp != NULL);
17546 	ASSERT(pktp != NULL);
17547 
17548 	si.ssi_severity = SCSI_ERR_FATAL;
17549 	si.ssi_pfa_flag = FALSE;
17550 
17551 	/*
17552 	 * Update error stats after first NOT READY error. Disks may have
17553 	 * been powered down and may need to be restarted.  For CDROMs,
17554 	 * report NOT READY errors only if media is present.
17555 	 */
17556 	if ((ISCD(un) && (un->un_f_geometry_is_valid == TRUE)) ||
17557 	    (xp->xb_retry_count > 0)) {
17558 		SD_UPDATE_ERRSTATS(un, sd_harderrs);
17559 		SD_UPDATE_ERRSTATS(un, sd_rq_ntrdy_err);
17560 	}
17561 
17562 	/*
17563 	 * Just fail if the "not ready" retry limit has been reached.
17564 	 */
17565 	if (xp->xb_retry_count >= un->un_notready_retry_count) {
17566 		/* Special check for error message printing for removables. */
17567 		if (un->un_f_has_removable_media && (asc == 0x04) &&
17568 		    (ascq >= 0x04)) {
17569 			si.ssi_severity = SCSI_ERR_ALL;
17570 		}
17571 		goto fail_command;
17572 	}
17573 
17574 	/*
17575 	 * Check the ASC and ASCQ in the sense data as needed, to determine
17576 	 * what to do.
17577 	 */
17578 	switch (asc) {
17579 	case 0x04:	/* LOGICAL UNIT NOT READY */
17580 		/*
17581 		 * disk drives that don't spin up result in a very long delay
17582 		 * in format without warning messages. We will log a message
17583 		 * if the error level is set to verbose.
17584 		 */
17585 		if (sd_error_level < SCSI_ERR_RETRYABLE) {
17586 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
17587 			    "logical unit not ready, resetting disk\n");
17588 		}
17589 
17590 		/*
17591 		 * There are different requirements for CDROMs and disks for
17592 		 * the number of retries.  If a CD-ROM is giving this, it is
17593 		 * probably reading TOC and is in the process of getting
17594 		 * ready, so we should keep on trying for a long time to make
17595 		 * sure that all types of media are taken in account (for
17596 		 * some media the drive takes a long time to read TOC).  For
17597 		 * disks we do not want to retry this too many times as this
17598 		 * can cause a long hang in format when the drive refuses to
17599 		 * spin up (a very common failure).
17600 		 */
17601 		switch (ascq) {
17602 		case 0x00:  /* LUN NOT READY, CAUSE NOT REPORTABLE */
17603 			/*
17604 			 * Disk drives frequently refuse to spin up which
17605 			 * results in a very long hang in format without
17606 			 * warning messages.
17607 			 *
17608 			 * Note: This code preserves the legacy behavior of
17609 			 * comparing xb_retry_count against zero for fibre
17610 			 * channel targets instead of comparing against the
17611 			 * un_reset_retry_count value.  The reason for this
17612 			 * discrepancy has been so utterly lost beneath the
17613 			 * Sands of Time that even Indiana Jones could not
17614 			 * find it.
17615 			 */
17616 			if (un->un_f_is_fibre == TRUE) {
17617 				if (((sd_level_mask & SD_LOGMASK_DIAG) ||
17618 					(xp->xb_retry_count > 0)) &&
17619 					(un->un_startstop_timeid == NULL)) {
17620 					scsi_log(SD_DEVINFO(un), sd_label,
17621 					CE_WARN, "logical unit not ready, "
17622 					"resetting disk\n");
17623 					sd_reset_target(un, pktp);
17624 				}
17625 			} else {
17626 				if (((sd_level_mask & SD_LOGMASK_DIAG) ||
17627 					(xp->xb_retry_count >
17628 					un->un_reset_retry_count)) &&
17629 					(un->un_startstop_timeid == NULL)) {
17630 					scsi_log(SD_DEVINFO(un), sd_label,
17631 					CE_WARN, "logical unit not ready, "
17632 					"resetting disk\n");
17633 					sd_reset_target(un, pktp);
17634 				}
17635 			}
17636 			break;
17637 
17638 		case 0x01:  /* LUN IS IN PROCESS OF BECOMING READY */
17639 			/*
17640 			 * If the target is in the process of becoming
17641 			 * ready, just proceed with the retry. This can
17642 			 * happen with CD-ROMs that take a long time to
17643 			 * read TOC after a power cycle or reset.
17644 			 */
17645 			goto do_retry;
17646 
17647 		case 0x02:  /* LUN NOT READY, INITITIALIZING CMD REQUIRED */
17648 			break;
17649 
17650 		case 0x03:  /* LUN NOT READY, MANUAL INTERVENTION REQUIRED */
17651 			/*
17652 			 * Retries cannot help here so just fail right away.
17653 			 */
17654 			goto fail_command;
17655 
17656 		case 0x88:
17657 			/*
17658 			 * Vendor-unique code for T3/T4: it indicates a
17659 			 * path problem in a mutipathed config, but as far as
17660 			 * the target driver is concerned it equates to a fatal
17661 			 * error, so we should just fail the command right away
17662 			 * (without printing anything to the console). If this
17663 			 * is not a T3/T4, fall thru to the default recovery
17664 			 * action.
17665 			 * T3/T4 is FC only, don't need to check is_fibre
17666 			 */
17667 			if (SD_IS_T3(un) || SD_IS_T4(un)) {
17668 				sd_return_failed_command(un, bp, EIO);
17669 				return;
17670 			}
17671 			/* FALLTHRU */
17672 
17673 		case 0x04:  /* LUN NOT READY, FORMAT IN PROGRESS */
17674 		case 0x05:  /* LUN NOT READY, REBUILD IN PROGRESS */
17675 		case 0x06:  /* LUN NOT READY, RECALCULATION IN PROGRESS */
17676 		case 0x07:  /* LUN NOT READY, OPERATION IN PROGRESS */
17677 		case 0x08:  /* LUN NOT READY, LONG WRITE IN PROGRESS */
17678 		default:    /* Possible future codes in SCSI spec? */
17679 			/*
17680 			 * For removable-media devices, do not retry if
17681 			 * ASCQ > 2 as these result mostly from USCSI commands
17682 			 * on MMC devices issued to check status of an
17683 			 * operation initiated in immediate mode.  Also for
17684 			 * ASCQ >= 4 do not print console messages as these
17685 			 * mainly represent a user-initiated operation
17686 			 * instead of a system failure.
17687 			 */
17688 			if (un->un_f_has_removable_media) {
17689 				si.ssi_severity = SCSI_ERR_ALL;
17690 				goto fail_command;
17691 			}
17692 			break;
17693 		}
17694 
17695 		/*
17696 		 * As part of our recovery attempt for the NOT READY
17697 		 * condition, we issue a START STOP UNIT command. However
17698 		 * we want to wait for a short delay before attempting this
17699 		 * as there may still be more commands coming back from the
17700 		 * target with the check condition. To do this we use
17701 		 * timeout(9F) to call sd_start_stop_unit_callback() after
17702 		 * the delay interval expires. (sd_start_stop_unit_callback()
17703 		 * dispatches sd_start_stop_unit_task(), which will issue
17704 		 * the actual START STOP UNIT command. The delay interval
17705 		 * is one-half of the delay that we will use to retry the
17706 		 * command that generated the NOT READY condition.
17707 		 *
17708 		 * Note that we could just dispatch sd_start_stop_unit_task()
17709 		 * from here and allow it to sleep for the delay interval,
17710 		 * but then we would be tying up the taskq thread
17711 		 * uncesessarily for the duration of the delay.
17712 		 *
17713 		 * Do not issue the START STOP UNIT if the current command
17714 		 * is already a START STOP UNIT.
17715 		 */
17716 		if (pktp->pkt_cdbp[0] == SCMD_START_STOP) {
17717 			break;
17718 		}
17719 
17720 		/*
17721 		 * Do not schedule the timeout if one is already pending.
17722 		 */
17723 		if (un->un_startstop_timeid != NULL) {
17724 			SD_INFO(SD_LOG_ERROR, un,
17725 			    "sd_sense_key_not_ready: restart already issued to"
17726 			    " %s%d\n", ddi_driver_name(SD_DEVINFO(un)),
17727 			    ddi_get_instance(SD_DEVINFO(un)));
17728 			break;
17729 		}
17730 
17731 		/*
17732 		 * Schedule the START STOP UNIT command, then queue the command
17733 		 * for a retry.
17734 		 *
17735 		 * Note: A timeout is not scheduled for this retry because we
17736 		 * want the retry to be serial with the START_STOP_UNIT. The
17737 		 * retry will be started when the START_STOP_UNIT is completed
17738 		 * in sd_start_stop_unit_task.
17739 		 */
17740 		un->un_startstop_timeid = timeout(sd_start_stop_unit_callback,
17741 		    un, SD_BSY_TIMEOUT / 2);
17742 		xp->xb_retry_count++;
17743 		sd_set_retry_bp(un, bp, 0, kstat_waitq_enter);
17744 		return;
17745 
17746 	case 0x05:	/* LOGICAL UNIT DOES NOT RESPOND TO SELECTION */
17747 		if (sd_error_level < SCSI_ERR_RETRYABLE) {
17748 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
17749 			    "unit does not respond to selection\n");
17750 		}
17751 		break;
17752 
17753 	case 0x3A:	/* MEDIUM NOT PRESENT */
17754 		if (sd_error_level >= SCSI_ERR_FATAL) {
17755 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
17756 			    "Caddy not inserted in drive\n");
17757 		}
17758 
17759 		sr_ejected(un);
17760 		un->un_mediastate = DKIO_EJECTED;
17761 		/* The state has changed, inform the media watch routines */
17762 		cv_broadcast(&un->un_state_cv);
17763 		/* Just fail if no media is present in the drive. */
17764 		goto fail_command;
17765 
17766 	default:
17767 		if (sd_error_level < SCSI_ERR_RETRYABLE) {
17768 			scsi_log(SD_DEVINFO(un), sd_label, CE_NOTE,
17769 			    "Unit not Ready. Additional sense code 0x%x\n",
17770 			    asc);
17771 		}
17772 		break;
17773 	}
17774 
17775 do_retry:
17776 
17777 	/*
17778 	 * Retry the command, as some targets may report NOT READY for
17779 	 * several seconds after being reset.
17780 	 */
17781 	xp->xb_retry_count++;
17782 	si.ssi_severity = SCSI_ERR_RETRYABLE;
17783 	sd_retry_command(un, bp, SD_RETRIES_NOCHECK, sd_print_sense_msg,
17784 	    &si, EIO, SD_BSY_TIMEOUT, NULL);
17785 
17786 	return;
17787 
17788 fail_command:
17789 	sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
17790 	sd_return_failed_command(un, bp, EIO);
17791 }
17792 
17793 
17794 
17795 /*
17796  *    Function: sd_sense_key_medium_or_hardware_error
17797  *
17798  * Description: Recovery actions for a SCSI "Medium Error" or "Hardware Error"
17799  *		sense key.
17800  *
17801  *     Context: May be called from interrupt context
17802  */
17803 
17804 static void
17805 sd_sense_key_medium_or_hardware_error(struct sd_lun *un,
17806 	uint8_t *sense_datap,
17807 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp)
17808 {
17809 	struct sd_sense_info	si;
17810 	uint8_t sense_key = scsi_sense_key(sense_datap);
17811 	uint8_t asc = scsi_sense_asc(sense_datap);
17812 
17813 	ASSERT(un != NULL);
17814 	ASSERT(mutex_owned(SD_MUTEX(un)));
17815 	ASSERT(bp != NULL);
17816 	ASSERT(xp != NULL);
17817 	ASSERT(pktp != NULL);
17818 
17819 	si.ssi_severity = SCSI_ERR_FATAL;
17820 	si.ssi_pfa_flag = FALSE;
17821 
17822 	if (sense_key == KEY_MEDIUM_ERROR) {
17823 		SD_UPDATE_ERRSTATS(un, sd_rq_media_err);
17824 	}
17825 
17826 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
17827 
17828 	if ((un->un_reset_retry_count != 0) &&
17829 	    (xp->xb_retry_count == un->un_reset_retry_count)) {
17830 		mutex_exit(SD_MUTEX(un));
17831 		/* Do NOT do a RESET_ALL here: too intrusive. (4112858) */
17832 		if (un->un_f_allow_bus_device_reset == TRUE) {
17833 
17834 			boolean_t try_resetting_target = B_TRUE;
17835 
17836 			/*
17837 			 * We need to be able to handle specific ASC when we are
17838 			 * handling a KEY_HARDWARE_ERROR. In particular
17839 			 * taking the default action of resetting the target may
17840 			 * not be the appropriate way to attempt recovery.
17841 			 * Resetting a target because of a single LUN failure
17842 			 * victimizes all LUNs on that target.
17843 			 *
17844 			 * This is true for the LSI arrays, if an LSI
17845 			 * array controller returns an ASC of 0x84 (LUN Dead) we
17846 			 * should trust it.
17847 			 */
17848 
17849 			if (sense_key == KEY_HARDWARE_ERROR) {
17850 				switch (asc) {
17851 				case 0x84:
17852 					if (SD_IS_LSI(un)) {
17853 						try_resetting_target = B_FALSE;
17854 					}
17855 					break;
17856 				default:
17857 					break;
17858 				}
17859 			}
17860 
17861 			if (try_resetting_target == B_TRUE) {
17862 				int reset_retval = 0;
17863 				if (un->un_f_lun_reset_enabled == TRUE) {
17864 					SD_TRACE(SD_LOG_IO_CORE, un,
17865 					    "sd_sense_key_medium_or_hardware_"
17866 					    "error: issuing RESET_LUN\n");
17867 					reset_retval =
17868 					    scsi_reset(SD_ADDRESS(un),
17869 					    RESET_LUN);
17870 				}
17871 				if (reset_retval == 0) {
17872 					SD_TRACE(SD_LOG_IO_CORE, un,
17873 					    "sd_sense_key_medium_or_hardware_"
17874 					    "error: issuing RESET_TARGET\n");
17875 					(void) scsi_reset(SD_ADDRESS(un),
17876 					    RESET_TARGET);
17877 				}
17878 			}
17879 		}
17880 		mutex_enter(SD_MUTEX(un));
17881 	}
17882 
17883 	/*
17884 	 * This really ought to be a fatal error, but we will retry anyway
17885 	 * as some drives report this as a spurious error.
17886 	 */
17887 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
17888 	    &si, EIO, (clock_t)0, NULL);
17889 }
17890 
17891 
17892 
17893 /*
17894  *    Function: sd_sense_key_illegal_request
17895  *
17896  * Description: Recovery actions for a SCSI "Illegal Request" sense key.
17897  *
17898  *     Context: May be called from interrupt context
17899  */
17900 
17901 static void
17902 sd_sense_key_illegal_request(struct sd_lun *un, struct buf *bp,
17903 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
17904 {
17905 	struct sd_sense_info	si;
17906 
17907 	ASSERT(un != NULL);
17908 	ASSERT(mutex_owned(SD_MUTEX(un)));
17909 	ASSERT(bp != NULL);
17910 	ASSERT(xp != NULL);
17911 	ASSERT(pktp != NULL);
17912 
17913 	SD_UPDATE_ERRSTATS(un, sd_softerrs);
17914 	SD_UPDATE_ERRSTATS(un, sd_rq_illrq_err);
17915 
17916 	si.ssi_severity = SCSI_ERR_INFO;
17917 	si.ssi_pfa_flag = FALSE;
17918 
17919 	/* Pointless to retry if the target thinks it's an illegal request */
17920 	sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
17921 	sd_return_failed_command(un, bp, EIO);
17922 }
17923 
17924 
17925 
17926 
17927 /*
17928  *    Function: sd_sense_key_unit_attention
17929  *
17930  * Description: Recovery actions for a SCSI "Unit Attention" sense key.
17931  *
17932  *     Context: May be called from interrupt context
17933  */
17934 
17935 static void
17936 sd_sense_key_unit_attention(struct sd_lun *un,
17937 	uint8_t *sense_datap,
17938 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp)
17939 {
17940 	/*
17941 	 * For UNIT ATTENTION we allow retries for one minute. Devices
17942 	 * like Sonoma can return UNIT ATTENTION close to a minute
17943 	 * under certain conditions.
17944 	 */
17945 	int	retry_check_flag = SD_RETRIES_UA;
17946 	boolean_t	kstat_updated = B_FALSE;
17947 	struct	sd_sense_info		si;
17948 	uint8_t asc = scsi_sense_asc(sense_datap);
17949 
17950 	ASSERT(un != NULL);
17951 	ASSERT(mutex_owned(SD_MUTEX(un)));
17952 	ASSERT(bp != NULL);
17953 	ASSERT(xp != NULL);
17954 	ASSERT(pktp != NULL);
17955 
17956 	si.ssi_severity = SCSI_ERR_INFO;
17957 	si.ssi_pfa_flag = FALSE;
17958 
17959 
17960 	switch (asc) {
17961 	case 0x5D:  /* FAILURE PREDICTION THRESHOLD EXCEEDED */
17962 		if (sd_report_pfa != 0) {
17963 			SD_UPDATE_ERRSTATS(un, sd_rq_pfa_err);
17964 			si.ssi_pfa_flag = TRUE;
17965 			retry_check_flag = SD_RETRIES_STANDARD;
17966 			goto do_retry;
17967 		}
17968 
17969 		break;
17970 
17971 	case 0x29:  /* POWER ON, RESET, OR BUS DEVICE RESET OCCURRED */
17972 		if ((un->un_resvd_status & SD_RESERVE) == SD_RESERVE) {
17973 			un->un_resvd_status |=
17974 			    (SD_LOST_RESERVE | SD_WANT_RESERVE);
17975 		}
17976 #ifdef _LP64
17977 		if (un->un_blockcount + 1 > SD_GROUP1_MAX_ADDRESS) {
17978 			if (taskq_dispatch(sd_tq, sd_reenable_dsense_task,
17979 			    un, KM_NOSLEEP) == 0) {
17980 				/*
17981 				 * If we can't dispatch the task we'll just
17982 				 * live without descriptor sense.  We can
17983 				 * try again on the next "unit attention"
17984 				 */
17985 				SD_ERROR(SD_LOG_ERROR, un,
17986 				    "sd_sense_key_unit_attention: "
17987 				    "Could not dispatch "
17988 				    "sd_reenable_dsense_task\n");
17989 			}
17990 		}
17991 #endif /* _LP64 */
17992 		/* FALLTHRU */
17993 
17994 	case 0x28: /* NOT READY TO READY CHANGE, MEDIUM MAY HAVE CHANGED */
17995 		if (!un->un_f_has_removable_media) {
17996 			break;
17997 		}
17998 
17999 		/*
18000 		 * When we get a unit attention from a removable-media device,
18001 		 * it may be in a state that will take a long time to recover
18002 		 * (e.g., from a reset).  Since we are executing in interrupt
18003 		 * context here, we cannot wait around for the device to come
18004 		 * back. So hand this command off to sd_media_change_task()
18005 		 * for deferred processing under taskq thread context. (Note
18006 		 * that the command still may be failed if a problem is
18007 		 * encountered at a later time.)
18008 		 */
18009 		if (taskq_dispatch(sd_tq, sd_media_change_task, pktp,
18010 		    KM_NOSLEEP) == 0) {
18011 			/*
18012 			 * Cannot dispatch the request so fail the command.
18013 			 */
18014 			SD_UPDATE_ERRSTATS(un, sd_harderrs);
18015 			SD_UPDATE_ERRSTATS(un, sd_rq_nodev_err);
18016 			si.ssi_severity = SCSI_ERR_FATAL;
18017 			sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
18018 			sd_return_failed_command(un, bp, EIO);
18019 		}
18020 
18021 		/*
18022 		 * If failed to dispatch sd_media_change_task(), we already
18023 		 * updated kstat. If succeed to dispatch sd_media_change_task(),
18024 		 * we should update kstat later if it encounters an error. So,
18025 		 * we update kstat_updated flag here.
18026 		 */
18027 		kstat_updated = B_TRUE;
18028 
18029 		/*
18030 		 * Either the command has been successfully dispatched to a
18031 		 * task Q for retrying, or the dispatch failed. In either case
18032 		 * do NOT retry again by calling sd_retry_command. This sets up
18033 		 * two retries of the same command and when one completes and
18034 		 * frees the resources the other will access freed memory,
18035 		 * a bad thing.
18036 		 */
18037 		return;
18038 
18039 	default:
18040 		break;
18041 	}
18042 
18043 	/*
18044 	 * Update kstat if we haven't done that.
18045 	 */
18046 	if (!kstat_updated) {
18047 		SD_UPDATE_ERRSTATS(un, sd_harderrs);
18048 		SD_UPDATE_ERRSTATS(un, sd_rq_nodev_err);
18049 	}
18050 
18051 do_retry:
18052 	sd_retry_command(un, bp, retry_check_flag, sd_print_sense_msg, &si,
18053 	    EIO, SD_UA_RETRY_DELAY, NULL);
18054 }
18055 
18056 
18057 
18058 /*
18059  *    Function: sd_sense_key_fail_command
18060  *
18061  * Description: Use to fail a command when we don't like the sense key that
18062  *		was returned.
18063  *
18064  *     Context: May be called from interrupt context
18065  */
18066 
18067 static void
18068 sd_sense_key_fail_command(struct sd_lun *un, struct buf *bp,
18069 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18070 {
18071 	struct sd_sense_info	si;
18072 
18073 	ASSERT(un != NULL);
18074 	ASSERT(mutex_owned(SD_MUTEX(un)));
18075 	ASSERT(bp != NULL);
18076 	ASSERT(xp != NULL);
18077 	ASSERT(pktp != NULL);
18078 
18079 	si.ssi_severity = SCSI_ERR_FATAL;
18080 	si.ssi_pfa_flag = FALSE;
18081 
18082 	sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
18083 	sd_return_failed_command(un, bp, EIO);
18084 }
18085 
18086 
18087 
18088 /*
18089  *    Function: sd_sense_key_blank_check
18090  *
18091  * Description: Recovery actions for a SCSI "Blank Check" sense key.
18092  *		Has no monetary connotation.
18093  *
18094  *     Context: May be called from interrupt context
18095  */
18096 
18097 static void
18098 sd_sense_key_blank_check(struct sd_lun *un, struct buf *bp,
18099 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18100 {
18101 	struct sd_sense_info	si;
18102 
18103 	ASSERT(un != NULL);
18104 	ASSERT(mutex_owned(SD_MUTEX(un)));
18105 	ASSERT(bp != NULL);
18106 	ASSERT(xp != NULL);
18107 	ASSERT(pktp != NULL);
18108 
18109 	/*
18110 	 * Blank check is not fatal for removable devices, therefore
18111 	 * it does not require a console message.
18112 	 */
18113 	si.ssi_severity = (un->un_f_has_removable_media) ? SCSI_ERR_ALL :
18114 	    SCSI_ERR_FATAL;
18115 	si.ssi_pfa_flag = FALSE;
18116 
18117 	sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
18118 	sd_return_failed_command(un, bp, EIO);
18119 }
18120 
18121 
18122 
18123 
18124 /*
18125  *    Function: sd_sense_key_aborted_command
18126  *
18127  * Description: Recovery actions for a SCSI "Aborted Command" sense key.
18128  *
18129  *     Context: May be called from interrupt context
18130  */
18131 
18132 static void
18133 sd_sense_key_aborted_command(struct sd_lun *un, struct buf *bp,
18134 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18135 {
18136 	struct sd_sense_info	si;
18137 
18138 	ASSERT(un != NULL);
18139 	ASSERT(mutex_owned(SD_MUTEX(un)));
18140 	ASSERT(bp != NULL);
18141 	ASSERT(xp != NULL);
18142 	ASSERT(pktp != NULL);
18143 
18144 	si.ssi_severity = SCSI_ERR_FATAL;
18145 	si.ssi_pfa_flag = FALSE;
18146 
18147 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
18148 
18149 	/*
18150 	 * This really ought to be a fatal error, but we will retry anyway
18151 	 * as some drives report this as a spurious error.
18152 	 */
18153 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
18154 	    &si, EIO, (clock_t)0, NULL);
18155 }
18156 
18157 
18158 
18159 /*
18160  *    Function: sd_sense_key_default
18161  *
18162  * Description: Default recovery action for several SCSI sense keys (basically
18163  *		attempts a retry).
18164  *
18165  *     Context: May be called from interrupt context
18166  */
18167 
18168 static void
18169 sd_sense_key_default(struct sd_lun *un,
18170 	uint8_t *sense_datap,
18171 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp)
18172 {
18173 	struct sd_sense_info	si;
18174 	uint8_t sense_key = scsi_sense_key(sense_datap);
18175 
18176 	ASSERT(un != NULL);
18177 	ASSERT(mutex_owned(SD_MUTEX(un)));
18178 	ASSERT(bp != NULL);
18179 	ASSERT(xp != NULL);
18180 	ASSERT(pktp != NULL);
18181 
18182 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
18183 
18184 	/*
18185 	 * Undecoded sense key.	Attempt retries and hope that will fix
18186 	 * the problem.  Otherwise, we're dead.
18187 	 */
18188 	if ((pktp->pkt_flags & FLAG_SILENT) == 0) {
18189 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
18190 		    "Unhandled Sense Key '%s'\n", sense_keys[sense_key]);
18191 	}
18192 
18193 	si.ssi_severity = SCSI_ERR_FATAL;
18194 	si.ssi_pfa_flag = FALSE;
18195 
18196 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
18197 	    &si, EIO, (clock_t)0, NULL);
18198 }
18199 
18200 
18201 
18202 /*
18203  *    Function: sd_print_retry_msg
18204  *
18205  * Description: Print a message indicating the retry action being taken.
18206  *
18207  *   Arguments: un - ptr to associated softstate
18208  *		bp - ptr to buf(9S) for the command
18209  *		arg - not used.
18210  *		flag - SD_IMMEDIATE_RETRY_ISSUED, SD_DELAYED_RETRY_ISSUED,
18211  *			or SD_NO_RETRY_ISSUED
18212  *
18213  *     Context: May be called from interrupt context
18214  */
18215 /* ARGSUSED */
18216 static void
18217 sd_print_retry_msg(struct sd_lun *un, struct buf *bp, void *arg, int flag)
18218 {
18219 	struct sd_xbuf	*xp;
18220 	struct scsi_pkt *pktp;
18221 	char *reasonp;
18222 	char *msgp;
18223 
18224 	ASSERT(un != NULL);
18225 	ASSERT(mutex_owned(SD_MUTEX(un)));
18226 	ASSERT(bp != NULL);
18227 	pktp = SD_GET_PKTP(bp);
18228 	ASSERT(pktp != NULL);
18229 	xp = SD_GET_XBUF(bp);
18230 	ASSERT(xp != NULL);
18231 
18232 	ASSERT(!mutex_owned(&un->un_pm_mutex));
18233 	mutex_enter(&un->un_pm_mutex);
18234 	if ((un->un_state == SD_STATE_SUSPENDED) ||
18235 	    (SD_DEVICE_IS_IN_LOW_POWER(un)) ||
18236 	    (pktp->pkt_flags & FLAG_SILENT)) {
18237 		mutex_exit(&un->un_pm_mutex);
18238 		goto update_pkt_reason;
18239 	}
18240 	mutex_exit(&un->un_pm_mutex);
18241 
18242 	/*
18243 	 * Suppress messages if they are all the same pkt_reason; with
18244 	 * TQ, many (up to 256) are returned with the same pkt_reason.
18245 	 * If we are in panic, then suppress the retry messages.
18246 	 */
18247 	switch (flag) {
18248 	case SD_NO_RETRY_ISSUED:
18249 		msgp = "giving up";
18250 		break;
18251 	case SD_IMMEDIATE_RETRY_ISSUED:
18252 	case SD_DELAYED_RETRY_ISSUED:
18253 		if (ddi_in_panic() || (un->un_state == SD_STATE_OFFLINE) ||
18254 		    ((pktp->pkt_reason == un->un_last_pkt_reason) &&
18255 		    (sd_error_level != SCSI_ERR_ALL))) {
18256 			return;
18257 		}
18258 		msgp = "retrying command";
18259 		break;
18260 	default:
18261 		goto update_pkt_reason;
18262 	}
18263 
18264 	reasonp = (((pktp->pkt_statistics & STAT_PERR) != 0) ? "parity error" :
18265 	    scsi_rname(pktp->pkt_reason));
18266 
18267 	scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
18268 	    "SCSI transport failed: reason '%s': %s\n", reasonp, msgp);
18269 
18270 update_pkt_reason:
18271 	/*
18272 	 * Update un->un_last_pkt_reason with the value in pktp->pkt_reason.
18273 	 * This is to prevent multiple console messages for the same failure
18274 	 * condition.  Note that un->un_last_pkt_reason is NOT restored if &
18275 	 * when the command is retried successfully because there still may be
18276 	 * more commands coming back with the same value of pktp->pkt_reason.
18277 	 */
18278 	if ((pktp->pkt_reason != CMD_CMPLT) || (xp->xb_retry_count == 0)) {
18279 		un->un_last_pkt_reason = pktp->pkt_reason;
18280 	}
18281 }
18282 
18283 
18284 /*
18285  *    Function: sd_print_cmd_incomplete_msg
18286  *
18287  * Description: Message logging fn. for a SCSA "CMD_INCOMPLETE" pkt_reason.
18288  *
18289  *   Arguments: un - ptr to associated softstate
18290  *		bp - ptr to buf(9S) for the command
18291  *		arg - passed to sd_print_retry_msg()
18292  *		code - SD_IMMEDIATE_RETRY_ISSUED, SD_DELAYED_RETRY_ISSUED,
18293  *			or SD_NO_RETRY_ISSUED
18294  *
18295  *     Context: May be called from interrupt context
18296  */
18297 
18298 static void
18299 sd_print_cmd_incomplete_msg(struct sd_lun *un, struct buf *bp, void *arg,
18300 	int code)
18301 {
18302 	dev_info_t	*dip;
18303 
18304 	ASSERT(un != NULL);
18305 	ASSERT(mutex_owned(SD_MUTEX(un)));
18306 	ASSERT(bp != NULL);
18307 
18308 	switch (code) {
18309 	case SD_NO_RETRY_ISSUED:
18310 		/* Command was failed. Someone turned off this target? */
18311 		if (un->un_state != SD_STATE_OFFLINE) {
18312 			/*
18313 			 * Suppress message if we are detaching and
18314 			 * device has been disconnected
18315 			 * Note that DEVI_IS_DEVICE_REMOVED is a consolidation
18316 			 * private interface and not part of the DDI
18317 			 */
18318 			dip = un->un_sd->sd_dev;
18319 			if (!(DEVI_IS_DETACHING(dip) &&
18320 			    DEVI_IS_DEVICE_REMOVED(dip))) {
18321 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
18322 				"disk not responding to selection\n");
18323 			}
18324 			New_state(un, SD_STATE_OFFLINE);
18325 		}
18326 		break;
18327 
18328 	case SD_DELAYED_RETRY_ISSUED:
18329 	case SD_IMMEDIATE_RETRY_ISSUED:
18330 	default:
18331 		/* Command was successfully queued for retry */
18332 		sd_print_retry_msg(un, bp, arg, code);
18333 		break;
18334 	}
18335 }
18336 
18337 
18338 /*
18339  *    Function: sd_pkt_reason_cmd_incomplete
18340  *
18341  * Description: Recovery actions for a SCSA "CMD_INCOMPLETE" pkt_reason.
18342  *
18343  *     Context: May be called from interrupt context
18344  */
18345 
18346 static void
18347 sd_pkt_reason_cmd_incomplete(struct sd_lun *un, struct buf *bp,
18348 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18349 {
18350 	int flag = SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE;
18351 
18352 	ASSERT(un != NULL);
18353 	ASSERT(mutex_owned(SD_MUTEX(un)));
18354 	ASSERT(bp != NULL);
18355 	ASSERT(xp != NULL);
18356 	ASSERT(pktp != NULL);
18357 
18358 	/* Do not do a reset if selection did not complete */
18359 	/* Note: Should this not just check the bit? */
18360 	if (pktp->pkt_state != STATE_GOT_BUS) {
18361 		SD_UPDATE_ERRSTATS(un, sd_transerrs);
18362 		sd_reset_target(un, pktp);
18363 	}
18364 
18365 	/*
18366 	 * If the target was not successfully selected, then set
18367 	 * SD_RETRIES_FAILFAST to indicate that we lost communication
18368 	 * with the target, and further retries and/or commands are
18369 	 * likely to take a long time.
18370 	 */
18371 	if ((pktp->pkt_state & STATE_GOT_TARGET) == 0) {
18372 		flag |= SD_RETRIES_FAILFAST;
18373 	}
18374 
18375 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
18376 
18377 	sd_retry_command(un, bp, flag,
18378 	    sd_print_cmd_incomplete_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
18379 }
18380 
18381 
18382 
18383 /*
18384  *    Function: sd_pkt_reason_cmd_tran_err
18385  *
18386  * Description: Recovery actions for a SCSA "CMD_TRAN_ERR" pkt_reason.
18387  *
18388  *     Context: May be called from interrupt context
18389  */
18390 
18391 static void
18392 sd_pkt_reason_cmd_tran_err(struct sd_lun *un, struct buf *bp,
18393 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18394 {
18395 	ASSERT(un != NULL);
18396 	ASSERT(mutex_owned(SD_MUTEX(un)));
18397 	ASSERT(bp != NULL);
18398 	ASSERT(xp != NULL);
18399 	ASSERT(pktp != NULL);
18400 
18401 	/*
18402 	 * Do not reset if we got a parity error, or if
18403 	 * selection did not complete.
18404 	 */
18405 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
18406 	/* Note: Should this not just check the bit for pkt_state? */
18407 	if (((pktp->pkt_statistics & STAT_PERR) == 0) &&
18408 	    (pktp->pkt_state != STATE_GOT_BUS)) {
18409 		SD_UPDATE_ERRSTATS(un, sd_transerrs);
18410 		sd_reset_target(un, pktp);
18411 	}
18412 
18413 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
18414 
18415 	sd_retry_command(un, bp, (SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE),
18416 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
18417 }
18418 
18419 
18420 
18421 /*
18422  *    Function: sd_pkt_reason_cmd_reset
18423  *
18424  * Description: Recovery actions for a SCSA "CMD_RESET" pkt_reason.
18425  *
18426  *     Context: May be called from interrupt context
18427  */
18428 
18429 static void
18430 sd_pkt_reason_cmd_reset(struct sd_lun *un, struct buf *bp,
18431 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18432 {
18433 	ASSERT(un != NULL);
18434 	ASSERT(mutex_owned(SD_MUTEX(un)));
18435 	ASSERT(bp != NULL);
18436 	ASSERT(xp != NULL);
18437 	ASSERT(pktp != NULL);
18438 
18439 	/* The target may still be running the command, so try to reset. */
18440 	SD_UPDATE_ERRSTATS(un, sd_transerrs);
18441 	sd_reset_target(un, pktp);
18442 
18443 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
18444 
18445 	/*
18446 	 * If pkt_reason is CMD_RESET chances are that this pkt got
18447 	 * reset because another target on this bus caused it. The target
18448 	 * that caused it should get CMD_TIMEOUT with pkt_statistics
18449 	 * of STAT_TIMEOUT/STAT_DEV_RESET.
18450 	 */
18451 
18452 	sd_retry_command(un, bp, (SD_RETRIES_VICTIM | SD_RETRIES_ISOLATE),
18453 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
18454 }
18455 
18456 
18457 
18458 
18459 /*
18460  *    Function: sd_pkt_reason_cmd_aborted
18461  *
18462  * Description: Recovery actions for a SCSA "CMD_ABORTED" pkt_reason.
18463  *
18464  *     Context: May be called from interrupt context
18465  */
18466 
18467 static void
18468 sd_pkt_reason_cmd_aborted(struct sd_lun *un, struct buf *bp,
18469 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18470 {
18471 	ASSERT(un != NULL);
18472 	ASSERT(mutex_owned(SD_MUTEX(un)));
18473 	ASSERT(bp != NULL);
18474 	ASSERT(xp != NULL);
18475 	ASSERT(pktp != NULL);
18476 
18477 	/* The target may still be running the command, so try to reset. */
18478 	SD_UPDATE_ERRSTATS(un, sd_transerrs);
18479 	sd_reset_target(un, pktp);
18480 
18481 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
18482 
18483 	/*
18484 	 * If pkt_reason is CMD_ABORTED chances are that this pkt got
18485 	 * aborted because another target on this bus caused it. The target
18486 	 * that caused it should get CMD_TIMEOUT with pkt_statistics
18487 	 * of STAT_TIMEOUT/STAT_DEV_RESET.
18488 	 */
18489 
18490 	sd_retry_command(un, bp, (SD_RETRIES_VICTIM | SD_RETRIES_ISOLATE),
18491 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
18492 }
18493 
18494 
18495 
18496 /*
18497  *    Function: sd_pkt_reason_cmd_timeout
18498  *
18499  * Description: Recovery actions for a SCSA "CMD_TIMEOUT" pkt_reason.
18500  *
18501  *     Context: May be called from interrupt context
18502  */
18503 
18504 static void
18505 sd_pkt_reason_cmd_timeout(struct sd_lun *un, struct buf *bp,
18506 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18507 {
18508 	ASSERT(un != NULL);
18509 	ASSERT(mutex_owned(SD_MUTEX(un)));
18510 	ASSERT(bp != NULL);
18511 	ASSERT(xp != NULL);
18512 	ASSERT(pktp != NULL);
18513 
18514 
18515 	SD_UPDATE_ERRSTATS(un, sd_transerrs);
18516 	sd_reset_target(un, pktp);
18517 
18518 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
18519 
18520 	/*
18521 	 * A command timeout indicates that we could not establish
18522 	 * communication with the target, so set SD_RETRIES_FAILFAST
18523 	 * as further retries/commands are likely to take a long time.
18524 	 */
18525 	sd_retry_command(un, bp,
18526 	    (SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE | SD_RETRIES_FAILFAST),
18527 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
18528 }
18529 
18530 
18531 
18532 /*
18533  *    Function: sd_pkt_reason_cmd_unx_bus_free
18534  *
18535  * Description: Recovery actions for a SCSA "CMD_UNX_BUS_FREE" pkt_reason.
18536  *
18537  *     Context: May be called from interrupt context
18538  */
18539 
18540 static void
18541 sd_pkt_reason_cmd_unx_bus_free(struct sd_lun *un, struct buf *bp,
18542 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18543 {
18544 	void (*funcp)(struct sd_lun *un, struct buf *bp, void *arg, int code);
18545 
18546 	ASSERT(un != NULL);
18547 	ASSERT(mutex_owned(SD_MUTEX(un)));
18548 	ASSERT(bp != NULL);
18549 	ASSERT(xp != NULL);
18550 	ASSERT(pktp != NULL);
18551 
18552 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
18553 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
18554 
18555 	funcp = ((pktp->pkt_statistics & STAT_PERR) == 0) ?
18556 	    sd_print_retry_msg : NULL;
18557 
18558 	sd_retry_command(un, bp, (SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE),
18559 	    funcp, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
18560 }
18561 
18562 
18563 /*
18564  *    Function: sd_pkt_reason_cmd_tag_reject
18565  *
18566  * Description: Recovery actions for a SCSA "CMD_TAG_REJECT" pkt_reason.
18567  *
18568  *     Context: May be called from interrupt context
18569  */
18570 
18571 static void
18572 sd_pkt_reason_cmd_tag_reject(struct sd_lun *un, struct buf *bp,
18573 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18574 {
18575 	ASSERT(un != NULL);
18576 	ASSERT(mutex_owned(SD_MUTEX(un)));
18577 	ASSERT(bp != NULL);
18578 	ASSERT(xp != NULL);
18579 	ASSERT(pktp != NULL);
18580 
18581 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
18582 	pktp->pkt_flags = 0;
18583 	un->un_tagflags = 0;
18584 	if (un->un_f_opt_queueing == TRUE) {
18585 		un->un_throttle = min(un->un_throttle, 3);
18586 	} else {
18587 		un->un_throttle = 1;
18588 	}
18589 	mutex_exit(SD_MUTEX(un));
18590 	(void) scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing", 0, 1);
18591 	mutex_enter(SD_MUTEX(un));
18592 
18593 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
18594 
18595 	/* Legacy behavior not to check retry counts here. */
18596 	sd_retry_command(un, bp, (SD_RETRIES_NOCHECK | SD_RETRIES_ISOLATE),
18597 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
18598 }
18599 
18600 
18601 /*
18602  *    Function: sd_pkt_reason_default
18603  *
18604  * Description: Default recovery actions for SCSA pkt_reason values that
18605  *		do not have more explicit recovery actions.
18606  *
18607  *     Context: May be called from interrupt context
18608  */
18609 
18610 static void
18611 sd_pkt_reason_default(struct sd_lun *un, struct buf *bp,
18612 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18613 {
18614 	ASSERT(un != NULL);
18615 	ASSERT(mutex_owned(SD_MUTEX(un)));
18616 	ASSERT(bp != NULL);
18617 	ASSERT(xp != NULL);
18618 	ASSERT(pktp != NULL);
18619 
18620 	SD_UPDATE_ERRSTATS(un, sd_transerrs);
18621 	sd_reset_target(un, pktp);
18622 
18623 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
18624 
18625 	sd_retry_command(un, bp, (SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE),
18626 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
18627 }
18628 
18629 
18630 
18631 /*
18632  *    Function: sd_pkt_status_check_condition
18633  *
18634  * Description: Recovery actions for a "STATUS_CHECK" SCSI command status.
18635  *
18636  *     Context: May be called from interrupt context
18637  */
18638 
18639 static void
18640 sd_pkt_status_check_condition(struct sd_lun *un, struct buf *bp,
18641 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18642 {
18643 	ASSERT(un != NULL);
18644 	ASSERT(mutex_owned(SD_MUTEX(un)));
18645 	ASSERT(bp != NULL);
18646 	ASSERT(xp != NULL);
18647 	ASSERT(pktp != NULL);
18648 
18649 	SD_TRACE(SD_LOG_IO, un, "sd_pkt_status_check_condition: "
18650 	    "entry: buf:0x%p xp:0x%p\n", bp, xp);
18651 
18652 	/*
18653 	 * If ARQ is NOT enabled, then issue a REQUEST SENSE command (the
18654 	 * command will be retried after the request sense). Otherwise, retry
18655 	 * the command. Note: we are issuing the request sense even though the
18656 	 * retry limit may have been reached for the failed command.
18657 	 */
18658 	if (un->un_f_arq_enabled == FALSE) {
18659 		SD_INFO(SD_LOG_IO_CORE, un, "sd_pkt_status_check_condition: "
18660 		    "no ARQ, sending request sense command\n");
18661 		sd_send_request_sense_command(un, bp, pktp);
18662 	} else {
18663 		SD_INFO(SD_LOG_IO_CORE, un, "sd_pkt_status_check_condition: "
18664 		    "ARQ,retrying request sense command\n");
18665 #if defined(__i386) || defined(__amd64)
18666 		/*
18667 		 * The SD_RETRY_DELAY value need to be adjusted here
18668 		 * when SD_RETRY_DELAY change in sddef.h
18669 		 */
18670 		sd_retry_command(un, bp, SD_RETRIES_STANDARD, NULL, NULL, EIO,
18671 			un->un_f_is_fibre?drv_usectohz(100000):(clock_t)0,
18672 			NULL);
18673 #else
18674 		sd_retry_command(un, bp, SD_RETRIES_STANDARD, NULL, NULL,
18675 		    EIO, SD_RETRY_DELAY, NULL);
18676 #endif
18677 	}
18678 
18679 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_pkt_status_check_condition: exit\n");
18680 }
18681 
18682 
18683 /*
18684  *    Function: sd_pkt_status_busy
18685  *
18686  * Description: Recovery actions for a "STATUS_BUSY" SCSI command status.
18687  *
18688  *     Context: May be called from interrupt context
18689  */
18690 
18691 static void
18692 sd_pkt_status_busy(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
18693 	struct scsi_pkt *pktp)
18694 {
18695 	ASSERT(un != NULL);
18696 	ASSERT(mutex_owned(SD_MUTEX(un)));
18697 	ASSERT(bp != NULL);
18698 	ASSERT(xp != NULL);
18699 	ASSERT(pktp != NULL);
18700 
18701 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18702 	    "sd_pkt_status_busy: entry\n");
18703 
18704 	/* If retries are exhausted, just fail the command. */
18705 	if (xp->xb_retry_count >= un->un_busy_retry_count) {
18706 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
18707 		    "device busy too long\n");
18708 		sd_return_failed_command(un, bp, EIO);
18709 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18710 		    "sd_pkt_status_busy: exit\n");
18711 		return;
18712 	}
18713 	xp->xb_retry_count++;
18714 
18715 	/*
18716 	 * Try to reset the target. However, we do not want to perform
18717 	 * more than one reset if the device continues to fail. The reset
18718 	 * will be performed when the retry count reaches the reset
18719 	 * threshold.  This threshold should be set such that at least
18720 	 * one retry is issued before the reset is performed.
18721 	 */
18722 	if (xp->xb_retry_count ==
18723 	    ((un->un_reset_retry_count < 2) ? 2 : un->un_reset_retry_count)) {
18724 		int rval = 0;
18725 		mutex_exit(SD_MUTEX(un));
18726 		if (un->un_f_allow_bus_device_reset == TRUE) {
18727 			/*
18728 			 * First try to reset the LUN; if we cannot then
18729 			 * try to reset the target.
18730 			 */
18731 			if (un->un_f_lun_reset_enabled == TRUE) {
18732 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18733 				    "sd_pkt_status_busy: RESET_LUN\n");
18734 				rval = scsi_reset(SD_ADDRESS(un), RESET_LUN);
18735 			}
18736 			if (rval == 0) {
18737 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18738 				    "sd_pkt_status_busy: RESET_TARGET\n");
18739 				rval = scsi_reset(SD_ADDRESS(un), RESET_TARGET);
18740 			}
18741 		}
18742 		if (rval == 0) {
18743 			/*
18744 			 * If the RESET_LUN and/or RESET_TARGET failed,
18745 			 * try RESET_ALL
18746 			 */
18747 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18748 			    "sd_pkt_status_busy: RESET_ALL\n");
18749 			rval = scsi_reset(SD_ADDRESS(un), RESET_ALL);
18750 		}
18751 		mutex_enter(SD_MUTEX(un));
18752 		if (rval == 0) {
18753 			/*
18754 			 * The RESET_LUN, RESET_TARGET, and/or RESET_ALL failed.
18755 			 * At this point we give up & fail the command.
18756 			 */
18757 			sd_return_failed_command(un, bp, EIO);
18758 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18759 			    "sd_pkt_status_busy: exit (failed cmd)\n");
18760 			return;
18761 		}
18762 	}
18763 
18764 	/*
18765 	 * Retry the command. Be sure to specify SD_RETRIES_NOCHECK as
18766 	 * we have already checked the retry counts above.
18767 	 */
18768 	sd_retry_command(un, bp, SD_RETRIES_NOCHECK, NULL, NULL,
18769 	    EIO, SD_BSY_TIMEOUT, NULL);
18770 
18771 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18772 	    "sd_pkt_status_busy: exit\n");
18773 }
18774 
18775 
18776 /*
18777  *    Function: sd_pkt_status_reservation_conflict
18778  *
18779  * Description: Recovery actions for a "STATUS_RESERVATION_CONFLICT" SCSI
18780  *		command status.
18781  *
18782  *     Context: May be called from interrupt context
18783  */
18784 
18785 static void
18786 sd_pkt_status_reservation_conflict(struct sd_lun *un, struct buf *bp,
18787 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18788 {
18789 	ASSERT(un != NULL);
18790 	ASSERT(mutex_owned(SD_MUTEX(un)));
18791 	ASSERT(bp != NULL);
18792 	ASSERT(xp != NULL);
18793 	ASSERT(pktp != NULL);
18794 
18795 	/*
18796 	 * If the command was PERSISTENT_RESERVATION_[IN|OUT] then reservation
18797 	 * conflict could be due to various reasons like incorrect keys, not
18798 	 * registered or not reserved etc. So, we return EACCES to the caller.
18799 	 */
18800 	if (un->un_reservation_type == SD_SCSI3_RESERVATION) {
18801 		int cmd = SD_GET_PKT_OPCODE(pktp);
18802 		if ((cmd == SCMD_PERSISTENT_RESERVE_IN) ||
18803 		    (cmd == SCMD_PERSISTENT_RESERVE_OUT)) {
18804 			sd_return_failed_command(un, bp, EACCES);
18805 			return;
18806 		}
18807 	}
18808 
18809 	un->un_resvd_status |= SD_RESERVATION_CONFLICT;
18810 
18811 	if ((un->un_resvd_status & SD_FAILFAST) != 0) {
18812 		if (sd_failfast_enable != 0) {
18813 			/* By definition, we must panic here.... */
18814 			sd_panic_for_res_conflict(un);
18815 			/*NOTREACHED*/
18816 		}
18817 		SD_ERROR(SD_LOG_IO, un,
18818 		    "sd_handle_resv_conflict: Disk Reserved\n");
18819 		sd_return_failed_command(un, bp, EACCES);
18820 		return;
18821 	}
18822 
18823 	/*
18824 	 * 1147670: retry only if sd_retry_on_reservation_conflict
18825 	 * property is set (default is 1). Retries will not succeed
18826 	 * on a disk reserved by another initiator. HA systems
18827 	 * may reset this via sd.conf to avoid these retries.
18828 	 *
18829 	 * Note: The legacy return code for this failure is EIO, however EACCES
18830 	 * seems more appropriate for a reservation conflict.
18831 	 */
18832 	if (sd_retry_on_reservation_conflict == 0) {
18833 		SD_ERROR(SD_LOG_IO, un,
18834 		    "sd_handle_resv_conflict: Device Reserved\n");
18835 		sd_return_failed_command(un, bp, EIO);
18836 		return;
18837 	}
18838 
18839 	/*
18840 	 * Retry the command if we can.
18841 	 *
18842 	 * Note: The legacy return code for this failure is EIO, however EACCES
18843 	 * seems more appropriate for a reservation conflict.
18844 	 */
18845 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, NULL, NULL, EIO,
18846 	    (clock_t)2, NULL);
18847 }
18848 
18849 
18850 
18851 /*
18852  *    Function: sd_pkt_status_qfull
18853  *
18854  * Description: Handle a QUEUE FULL condition from the target.  This can
18855  *		occur if the HBA does not handle the queue full condition.
18856  *		(Basically this means third-party HBAs as Sun HBAs will
18857  *		handle the queue full condition.)  Note that if there are
18858  *		some commands already in the transport, then the queue full
18859  *		has occurred because the queue for this nexus is actually
18860  *		full. If there are no commands in the transport, then the
18861  *		queue full is resulting from some other initiator or lun
18862  *		consuming all the resources at the target.
18863  *
18864  *     Context: May be called from interrupt context
18865  */
18866 
18867 static void
18868 sd_pkt_status_qfull(struct sd_lun *un, struct buf *bp,
18869 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18870 {
18871 	ASSERT(un != NULL);
18872 	ASSERT(mutex_owned(SD_MUTEX(un)));
18873 	ASSERT(bp != NULL);
18874 	ASSERT(xp != NULL);
18875 	ASSERT(pktp != NULL);
18876 
18877 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18878 	    "sd_pkt_status_qfull: entry\n");
18879 
18880 	/*
18881 	 * Just lower the QFULL throttle and retry the command.  Note that
18882 	 * we do not limit the number of retries here.
18883 	 */
18884 	sd_reduce_throttle(un, SD_THROTTLE_QFULL);
18885 	sd_retry_command(un, bp, SD_RETRIES_NOCHECK, NULL, NULL, 0,
18886 	    SD_RESTART_TIMEOUT, NULL);
18887 
18888 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18889 	    "sd_pkt_status_qfull: exit\n");
18890 }
18891 
18892 
18893 /*
18894  *    Function: sd_reset_target
18895  *
18896  * Description: Issue a scsi_reset(9F), with either RESET_LUN,
18897  *		RESET_TARGET, or RESET_ALL.
18898  *
18899  *     Context: May be called under interrupt context.
18900  */
18901 
18902 static void
18903 sd_reset_target(struct sd_lun *un, struct scsi_pkt *pktp)
18904 {
18905 	int rval = 0;
18906 
18907 	ASSERT(un != NULL);
18908 	ASSERT(mutex_owned(SD_MUTEX(un)));
18909 	ASSERT(pktp != NULL);
18910 
18911 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_reset_target: entry\n");
18912 
18913 	/*
18914 	 * No need to reset if the transport layer has already done so.
18915 	 */
18916 	if ((pktp->pkt_statistics &
18917 	    (STAT_BUS_RESET | STAT_DEV_RESET | STAT_ABORTED)) != 0) {
18918 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18919 		    "sd_reset_target: no reset\n");
18920 		return;
18921 	}
18922 
18923 	mutex_exit(SD_MUTEX(un));
18924 
18925 	if (un->un_f_allow_bus_device_reset == TRUE) {
18926 		if (un->un_f_lun_reset_enabled == TRUE) {
18927 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18928 			    "sd_reset_target: RESET_LUN\n");
18929 			rval = scsi_reset(SD_ADDRESS(un), RESET_LUN);
18930 		}
18931 		if (rval == 0) {
18932 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18933 			    "sd_reset_target: RESET_TARGET\n");
18934 			rval = scsi_reset(SD_ADDRESS(un), RESET_TARGET);
18935 		}
18936 	}
18937 
18938 	if (rval == 0) {
18939 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18940 		    "sd_reset_target: RESET_ALL\n");
18941 		(void) scsi_reset(SD_ADDRESS(un), RESET_ALL);
18942 	}
18943 
18944 	mutex_enter(SD_MUTEX(un));
18945 
18946 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_reset_target: exit\n");
18947 }
18948 
18949 
18950 /*
18951  *    Function: sd_media_change_task
18952  *
18953  * Description: Recovery action for CDROM to become available.
18954  *
18955  *     Context: Executes in a taskq() thread context
18956  */
18957 
18958 static void
18959 sd_media_change_task(void *arg)
18960 {
18961 	struct	scsi_pkt	*pktp = arg;
18962 	struct	sd_lun		*un;
18963 	struct	buf		*bp;
18964 	struct	sd_xbuf		*xp;
18965 	int	err		= 0;
18966 	int	retry_count	= 0;
18967 	int	retry_limit	= SD_UNIT_ATTENTION_RETRY/10;
18968 	struct	sd_sense_info	si;
18969 
18970 	ASSERT(pktp != NULL);
18971 	bp = (struct buf *)pktp->pkt_private;
18972 	ASSERT(bp != NULL);
18973 	xp = SD_GET_XBUF(bp);
18974 	ASSERT(xp != NULL);
18975 	un = SD_GET_UN(bp);
18976 	ASSERT(un != NULL);
18977 	ASSERT(!mutex_owned(SD_MUTEX(un)));
18978 	ASSERT(un->un_f_monitor_media_state);
18979 
18980 	si.ssi_severity = SCSI_ERR_INFO;
18981 	si.ssi_pfa_flag = FALSE;
18982 
18983 	/*
18984 	 * When a reset is issued on a CDROM, it takes a long time to
18985 	 * recover. First few attempts to read capacity and other things
18986 	 * related to handling unit attention fail (with a ASC 0x4 and
18987 	 * ASCQ 0x1). In that case we want to do enough retries and we want
18988 	 * to limit the retries in other cases of genuine failures like
18989 	 * no media in drive.
18990 	 */
18991 	while (retry_count++ < retry_limit) {
18992 		if ((err = sd_handle_mchange(un)) == 0) {
18993 			break;
18994 		}
18995 		if (err == EAGAIN) {
18996 			retry_limit = SD_UNIT_ATTENTION_RETRY;
18997 		}
18998 		/* Sleep for 0.5 sec. & try again */
18999 		delay(drv_usectohz(500000));
19000 	}
19001 
19002 	/*
19003 	 * Dispatch (retry or fail) the original command here,
19004 	 * along with appropriate console messages....
19005 	 *
19006 	 * Must grab the mutex before calling sd_retry_command,
19007 	 * sd_print_sense_msg and sd_return_failed_command.
19008 	 */
19009 	mutex_enter(SD_MUTEX(un));
19010 	if (err != SD_CMD_SUCCESS) {
19011 		SD_UPDATE_ERRSTATS(un, sd_harderrs);
19012 		SD_UPDATE_ERRSTATS(un, sd_rq_nodev_err);
19013 		si.ssi_severity = SCSI_ERR_FATAL;
19014 		sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
19015 		sd_return_failed_command(un, bp, EIO);
19016 	} else {
19017 		sd_retry_command(un, bp, SD_RETRIES_NOCHECK, sd_print_sense_msg,
19018 		    &si, EIO, (clock_t)0, NULL);
19019 	}
19020 	mutex_exit(SD_MUTEX(un));
19021 }
19022 
19023 
19024 
19025 /*
19026  *    Function: sd_handle_mchange
19027  *
19028  * Description: Perform geometry validation & other recovery when CDROM
19029  *		has been removed from drive.
19030  *
19031  * Return Code: 0 for success
19032  *		errno-type return code of either sd_send_scsi_DOORLOCK() or
19033  *		sd_send_scsi_READ_CAPACITY()
19034  *
19035  *     Context: Executes in a taskq() thread context
19036  */
19037 
19038 static int
19039 sd_handle_mchange(struct sd_lun *un)
19040 {
19041 	uint64_t	capacity;
19042 	uint32_t	lbasize;
19043 	int		rval;
19044 
19045 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19046 	ASSERT(un->un_f_monitor_media_state);
19047 
19048 	if ((rval = sd_send_scsi_READ_CAPACITY(un, &capacity, &lbasize,
19049 	    SD_PATH_DIRECT_PRIORITY)) != 0) {
19050 		return (rval);
19051 	}
19052 
19053 	mutex_enter(SD_MUTEX(un));
19054 	sd_update_block_info(un, lbasize, capacity);
19055 
19056 	if (un->un_errstats != NULL) {
19057 		struct	sd_errstats *stp =
19058 		    (struct sd_errstats *)un->un_errstats->ks_data;
19059 		stp->sd_capacity.value.ui64 = (uint64_t)
19060 		    ((uint64_t)un->un_blockcount *
19061 		    (uint64_t)un->un_tgt_blocksize);
19062 	}
19063 
19064 	/*
19065 	 * Note: Maybe let the strategy/partitioning chain worry about getting
19066 	 * valid geometry.
19067 	 */
19068 	un->un_f_geometry_is_valid = FALSE;
19069 	(void) sd_validate_geometry(un, SD_PATH_DIRECT_PRIORITY);
19070 	if (un->un_f_geometry_is_valid == FALSE) {
19071 		mutex_exit(SD_MUTEX(un));
19072 		return (EIO);
19073 	}
19074 
19075 	mutex_exit(SD_MUTEX(un));
19076 
19077 	/*
19078 	 * Try to lock the door
19079 	 */
19080 	return (sd_send_scsi_DOORLOCK(un, SD_REMOVAL_PREVENT,
19081 	    SD_PATH_DIRECT_PRIORITY));
19082 }
19083 
19084 
19085 /*
19086  *    Function: sd_send_scsi_DOORLOCK
19087  *
19088  * Description: Issue the scsi DOOR LOCK command
19089  *
19090  *   Arguments: un    - pointer to driver soft state (unit) structure for
19091  *			this target.
19092  *		flag  - SD_REMOVAL_ALLOW
19093  *			SD_REMOVAL_PREVENT
19094  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
19095  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
19096  *			to use the USCSI "direct" chain and bypass the normal
19097  *			command waitq. SD_PATH_DIRECT_PRIORITY is used when this
19098  *			command is issued as part of an error recovery action.
19099  *
19100  * Return Code: 0   - Success
19101  *		errno return code from sd_send_scsi_cmd()
19102  *
19103  *     Context: Can sleep.
19104  */
19105 
19106 static int
19107 sd_send_scsi_DOORLOCK(struct sd_lun *un, int flag, int path_flag)
19108 {
19109 	union scsi_cdb		cdb;
19110 	struct uscsi_cmd	ucmd_buf;
19111 	struct scsi_extended_sense	sense_buf;
19112 	int			status;
19113 
19114 	ASSERT(un != NULL);
19115 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19116 
19117 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_DOORLOCK: entry: un:0x%p\n", un);
19118 
19119 	/* already determined doorlock is not supported, fake success */
19120 	if (un->un_f_doorlock_supported == FALSE) {
19121 		return (0);
19122 	}
19123 
19124 	bzero(&cdb, sizeof (cdb));
19125 	bzero(&ucmd_buf, sizeof (ucmd_buf));
19126 
19127 	cdb.scc_cmd = SCMD_DOORLOCK;
19128 	cdb.cdb_opaque[4] = (uchar_t)flag;
19129 
19130 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
19131 	ucmd_buf.uscsi_cdblen	= CDB_GROUP0;
19132 	ucmd_buf.uscsi_bufaddr	= NULL;
19133 	ucmd_buf.uscsi_buflen	= 0;
19134 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
19135 	ucmd_buf.uscsi_rqlen	= sizeof (sense_buf);
19136 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_SILENT;
19137 	ucmd_buf.uscsi_timeout	= 15;
19138 
19139 	SD_TRACE(SD_LOG_IO, un,
19140 	    "sd_send_scsi_DOORLOCK: returning sd_send_scsi_cmd()\n");
19141 
19142 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
19143 	    UIO_SYSSPACE, UIO_SYSSPACE, path_flag);
19144 
19145 	if ((status == EIO) && (ucmd_buf.uscsi_status == STATUS_CHECK) &&
19146 	    (ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
19147 	    (scsi_sense_key((uint8_t *)&sense_buf) == KEY_ILLEGAL_REQUEST)) {
19148 		/* fake success and skip subsequent doorlock commands */
19149 		un->un_f_doorlock_supported = FALSE;
19150 		return (0);
19151 	}
19152 
19153 	return (status);
19154 }
19155 
19156 /*
19157  *    Function: sd_send_scsi_READ_CAPACITY
19158  *
19159  * Description: This routine uses the scsi READ CAPACITY command to determine
19160  *		the device capacity in number of blocks and the device native
19161  *		block size. If this function returns a failure, then the
19162  *		values in *capp and *lbap are undefined.  If the capacity
19163  *		returned is 0xffffffff then the lun is too large for a
19164  *		normal READ CAPACITY command and the results of a
19165  *		READ CAPACITY 16 will be used instead.
19166  *
19167  *   Arguments: un   - ptr to soft state struct for the target
19168  *		capp - ptr to unsigned 64-bit variable to receive the
19169  *			capacity value from the command.
19170  *		lbap - ptr to unsigned 32-bit varaible to receive the
19171  *			block size value from the command
19172  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
19173  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
19174  *			to use the USCSI "direct" chain and bypass the normal
19175  *			command waitq. SD_PATH_DIRECT_PRIORITY is used when this
19176  *			command is issued as part of an error recovery action.
19177  *
19178  * Return Code: 0   - Success
19179  *		EIO - IO error
19180  *		EACCES - Reservation conflict detected
19181  *		EAGAIN - Device is becoming ready
19182  *		errno return code from sd_send_scsi_cmd()
19183  *
19184  *     Context: Can sleep.  Blocks until command completes.
19185  */
19186 
19187 #define	SD_CAPACITY_SIZE	sizeof (struct scsi_capacity)
19188 
19189 static int
19190 sd_send_scsi_READ_CAPACITY(struct sd_lun *un, uint64_t *capp, uint32_t *lbap,
19191 	int path_flag)
19192 {
19193 	struct	scsi_extended_sense	sense_buf;
19194 	struct	uscsi_cmd	ucmd_buf;
19195 	union	scsi_cdb	cdb;
19196 	uint32_t		*capacity_buf;
19197 	uint64_t		capacity;
19198 	uint32_t		lbasize;
19199 	int			status;
19200 
19201 	ASSERT(un != NULL);
19202 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19203 	ASSERT(capp != NULL);
19204 	ASSERT(lbap != NULL);
19205 
19206 	SD_TRACE(SD_LOG_IO, un,
19207 	    "sd_send_scsi_READ_CAPACITY: entry: un:0x%p\n", un);
19208 
19209 	/*
19210 	 * First send a READ_CAPACITY command to the target.
19211 	 * (This command is mandatory under SCSI-2.)
19212 	 *
19213 	 * Set up the CDB for the READ_CAPACITY command.  The Partial
19214 	 * Medium Indicator bit is cleared.  The address field must be
19215 	 * zero if the PMI bit is zero.
19216 	 */
19217 	bzero(&cdb, sizeof (cdb));
19218 	bzero(&ucmd_buf, sizeof (ucmd_buf));
19219 
19220 	capacity_buf = kmem_zalloc(SD_CAPACITY_SIZE, KM_SLEEP);
19221 
19222 	cdb.scc_cmd = SCMD_READ_CAPACITY;
19223 
19224 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
19225 	ucmd_buf.uscsi_cdblen	= CDB_GROUP1;
19226 	ucmd_buf.uscsi_bufaddr	= (caddr_t)capacity_buf;
19227 	ucmd_buf.uscsi_buflen	= SD_CAPACITY_SIZE;
19228 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
19229 	ucmd_buf.uscsi_rqlen	= sizeof (sense_buf);
19230 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
19231 	ucmd_buf.uscsi_timeout	= 60;
19232 
19233 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
19234 	    UIO_SYSSPACE, UIO_SYSSPACE, path_flag);
19235 
19236 	switch (status) {
19237 	case 0:
19238 		/* Return failure if we did not get valid capacity data. */
19239 		if (ucmd_buf.uscsi_resid != 0) {
19240 			kmem_free(capacity_buf, SD_CAPACITY_SIZE);
19241 			return (EIO);
19242 		}
19243 
19244 		/*
19245 		 * Read capacity and block size from the READ CAPACITY 10 data.
19246 		 * This data may be adjusted later due to device specific
19247 		 * issues.
19248 		 *
19249 		 * According to the SCSI spec, the READ CAPACITY 10
19250 		 * command returns the following:
19251 		 *
19252 		 *  bytes 0-3: Maximum logical block address available.
19253 		 *		(MSB in byte:0 & LSB in byte:3)
19254 		 *
19255 		 *  bytes 4-7: Block length in bytes
19256 		 *		(MSB in byte:4 & LSB in byte:7)
19257 		 *
19258 		 */
19259 		capacity = BE_32(capacity_buf[0]);
19260 		lbasize = BE_32(capacity_buf[1]);
19261 
19262 		/*
19263 		 * Done with capacity_buf
19264 		 */
19265 		kmem_free(capacity_buf, SD_CAPACITY_SIZE);
19266 
19267 		/*
19268 		 * if the reported capacity is set to all 0xf's, then
19269 		 * this disk is too large and requires SBC-2 commands.
19270 		 * Reissue the request using READ CAPACITY 16.
19271 		 */
19272 		if (capacity == 0xffffffff) {
19273 			status = sd_send_scsi_READ_CAPACITY_16(un, &capacity,
19274 			    &lbasize, path_flag);
19275 			if (status != 0) {
19276 				return (status);
19277 			}
19278 		}
19279 		break;	/* Success! */
19280 	case EIO:
19281 		switch (ucmd_buf.uscsi_status) {
19282 		case STATUS_RESERVATION_CONFLICT:
19283 			status = EACCES;
19284 			break;
19285 		case STATUS_CHECK:
19286 			/*
19287 			 * Check condition; look for ASC/ASCQ of 0x04/0x01
19288 			 * (LOGICAL UNIT IS IN PROCESS OF BECOMING READY)
19289 			 */
19290 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
19291 			    (scsi_sense_asc((uint8_t *)&sense_buf) == 0x04) &&
19292 			    (scsi_sense_ascq((uint8_t *)&sense_buf) == 0x01)) {
19293 				kmem_free(capacity_buf, SD_CAPACITY_SIZE);
19294 				return (EAGAIN);
19295 			}
19296 			break;
19297 		default:
19298 			break;
19299 		}
19300 		/* FALLTHRU */
19301 	default:
19302 		kmem_free(capacity_buf, SD_CAPACITY_SIZE);
19303 		return (status);
19304 	}
19305 
19306 	/*
19307 	 * Some ATAPI CD-ROM drives report inaccurate LBA size values
19308 	 * (2352 and 0 are common) so for these devices always force the value
19309 	 * to 2048 as required by the ATAPI specs.
19310 	 */
19311 	if ((un->un_f_cfg_is_atapi == TRUE) && (ISCD(un))) {
19312 		lbasize = 2048;
19313 	}
19314 
19315 	/*
19316 	 * Get the maximum LBA value from the READ CAPACITY data.
19317 	 * Here we assume that the Partial Medium Indicator (PMI) bit
19318 	 * was cleared when issuing the command. This means that the LBA
19319 	 * returned from the device is the LBA of the last logical block
19320 	 * on the logical unit.  The actual logical block count will be
19321 	 * this value plus one.
19322 	 *
19323 	 * Currently the capacity is saved in terms of un->un_sys_blocksize,
19324 	 * so scale the capacity value to reflect this.
19325 	 */
19326 	capacity = (capacity + 1) * (lbasize / un->un_sys_blocksize);
19327 
19328 #if defined(__i386) || defined(__amd64)
19329 	/*
19330 	 * Refer to comments related to off-by-1 at the
19331 	 * header of this file.
19332 	 * Treat 1TB disk as (1T - 512)B.
19333 	 */
19334 	if (un->un_f_capacity_adjusted == 1)
19335 	    capacity = DK_MAX_BLOCKS;
19336 #endif
19337 
19338 	/*
19339 	 * Copy the values from the READ CAPACITY command into the space
19340 	 * provided by the caller.
19341 	 */
19342 	*capp = capacity;
19343 	*lbap = lbasize;
19344 
19345 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_READ_CAPACITY: "
19346 	    "capacity:0x%llx  lbasize:0x%x\n", capacity, lbasize);
19347 
19348 	/*
19349 	 * Both the lbasize and capacity from the device must be nonzero,
19350 	 * otherwise we assume that the values are not valid and return
19351 	 * failure to the caller. (4203735)
19352 	 */
19353 	if ((capacity == 0) || (lbasize == 0)) {
19354 		return (EIO);
19355 	}
19356 
19357 	return (0);
19358 }
19359 
19360 /*
19361  *    Function: sd_send_scsi_READ_CAPACITY_16
19362  *
19363  * Description: This routine uses the scsi READ CAPACITY 16 command to
19364  *		determine the device capacity in number of blocks and the
19365  *		device native block size.  If this function returns a failure,
19366  *		then the values in *capp and *lbap are undefined.
19367  *		This routine should always be called by
19368  *		sd_send_scsi_READ_CAPACITY which will appy any device
19369  *		specific adjustments to capacity and lbasize.
19370  *
19371  *   Arguments: un   - ptr to soft state struct for the target
19372  *		capp - ptr to unsigned 64-bit variable to receive the
19373  *			capacity value from the command.
19374  *		lbap - ptr to unsigned 32-bit varaible to receive the
19375  *			block size value from the command
19376  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
19377  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
19378  *			to use the USCSI "direct" chain and bypass the normal
19379  *			command waitq. SD_PATH_DIRECT_PRIORITY is used when
19380  *			this command is issued as part of an error recovery
19381  *			action.
19382  *
19383  * Return Code: 0   - Success
19384  *		EIO - IO error
19385  *		EACCES - Reservation conflict detected
19386  *		EAGAIN - Device is becoming ready
19387  *		errno return code from sd_send_scsi_cmd()
19388  *
19389  *     Context: Can sleep.  Blocks until command completes.
19390  */
19391 
19392 #define	SD_CAPACITY_16_SIZE	sizeof (struct scsi_capacity_16)
19393 
19394 static int
19395 sd_send_scsi_READ_CAPACITY_16(struct sd_lun *un, uint64_t *capp,
19396 	uint32_t *lbap, int path_flag)
19397 {
19398 	struct	scsi_extended_sense	sense_buf;
19399 	struct	uscsi_cmd	ucmd_buf;
19400 	union	scsi_cdb	cdb;
19401 	uint64_t		*capacity16_buf;
19402 	uint64_t		capacity;
19403 	uint32_t		lbasize;
19404 	int			status;
19405 
19406 	ASSERT(un != NULL);
19407 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19408 	ASSERT(capp != NULL);
19409 	ASSERT(lbap != NULL);
19410 
19411 	SD_TRACE(SD_LOG_IO, un,
19412 	    "sd_send_scsi_READ_CAPACITY: entry: un:0x%p\n", un);
19413 
19414 	/*
19415 	 * First send a READ_CAPACITY_16 command to the target.
19416 	 *
19417 	 * Set up the CDB for the READ_CAPACITY_16 command.  The Partial
19418 	 * Medium Indicator bit is cleared.  The address field must be
19419 	 * zero if the PMI bit is zero.
19420 	 */
19421 	bzero(&cdb, sizeof (cdb));
19422 	bzero(&ucmd_buf, sizeof (ucmd_buf));
19423 
19424 	capacity16_buf = kmem_zalloc(SD_CAPACITY_16_SIZE, KM_SLEEP);
19425 
19426 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
19427 	ucmd_buf.uscsi_cdblen	= CDB_GROUP4;
19428 	ucmd_buf.uscsi_bufaddr	= (caddr_t)capacity16_buf;
19429 	ucmd_buf.uscsi_buflen	= SD_CAPACITY_16_SIZE;
19430 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
19431 	ucmd_buf.uscsi_rqlen	= sizeof (sense_buf);
19432 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
19433 	ucmd_buf.uscsi_timeout	= 60;
19434 
19435 	/*
19436 	 * Read Capacity (16) is a Service Action In command.  One
19437 	 * command byte (0x9E) is overloaded for multiple operations,
19438 	 * with the second CDB byte specifying the desired operation
19439 	 */
19440 	cdb.scc_cmd = SCMD_SVC_ACTION_IN_G4;
19441 	cdb.cdb_opaque[1] = SSVC_ACTION_READ_CAPACITY_G4;
19442 
19443 	/*
19444 	 * Fill in allocation length field
19445 	 */
19446 	FORMG4COUNT(&cdb, ucmd_buf.uscsi_buflen);
19447 
19448 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
19449 	    UIO_SYSSPACE, UIO_SYSSPACE, path_flag);
19450 
19451 	switch (status) {
19452 	case 0:
19453 		/* Return failure if we did not get valid capacity data. */
19454 		if (ucmd_buf.uscsi_resid > 20) {
19455 			kmem_free(capacity16_buf, SD_CAPACITY_16_SIZE);
19456 			return (EIO);
19457 		}
19458 
19459 		/*
19460 		 * Read capacity and block size from the READ CAPACITY 10 data.
19461 		 * This data may be adjusted later due to device specific
19462 		 * issues.
19463 		 *
19464 		 * According to the SCSI spec, the READ CAPACITY 10
19465 		 * command returns the following:
19466 		 *
19467 		 *  bytes 0-7: Maximum logical block address available.
19468 		 *		(MSB in byte:0 & LSB in byte:7)
19469 		 *
19470 		 *  bytes 8-11: Block length in bytes
19471 		 *		(MSB in byte:8 & LSB in byte:11)
19472 		 *
19473 		 */
19474 		capacity = BE_64(capacity16_buf[0]);
19475 		lbasize = BE_32(*(uint32_t *)&capacity16_buf[1]);
19476 
19477 		/*
19478 		 * Done with capacity16_buf
19479 		 */
19480 		kmem_free(capacity16_buf, SD_CAPACITY_16_SIZE);
19481 
19482 		/*
19483 		 * if the reported capacity is set to all 0xf's, then
19484 		 * this disk is too large.  This could only happen with
19485 		 * a device that supports LBAs larger than 64 bits which
19486 		 * are not defined by any current T10 standards.
19487 		 */
19488 		if (capacity == 0xffffffffffffffff) {
19489 			return (EIO);
19490 		}
19491 		break;	/* Success! */
19492 	case EIO:
19493 		switch (ucmd_buf.uscsi_status) {
19494 		case STATUS_RESERVATION_CONFLICT:
19495 			status = EACCES;
19496 			break;
19497 		case STATUS_CHECK:
19498 			/*
19499 			 * Check condition; look for ASC/ASCQ of 0x04/0x01
19500 			 * (LOGICAL UNIT IS IN PROCESS OF BECOMING READY)
19501 			 */
19502 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
19503 			    (scsi_sense_asc((uint8_t *)&sense_buf) == 0x04) &&
19504 			    (scsi_sense_ascq((uint8_t *)&sense_buf) == 0x01)) {
19505 				kmem_free(capacity16_buf, SD_CAPACITY_16_SIZE);
19506 				return (EAGAIN);
19507 			}
19508 			break;
19509 		default:
19510 			break;
19511 		}
19512 		/* FALLTHRU */
19513 	default:
19514 		kmem_free(capacity16_buf, SD_CAPACITY_16_SIZE);
19515 		return (status);
19516 	}
19517 
19518 	*capp = capacity;
19519 	*lbap = lbasize;
19520 
19521 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_READ_CAPACITY_16: "
19522 	    "capacity:0x%llx  lbasize:0x%x\n", capacity, lbasize);
19523 
19524 	return (0);
19525 }
19526 
19527 
19528 /*
19529  *    Function: sd_send_scsi_START_STOP_UNIT
19530  *
19531  * Description: Issue a scsi START STOP UNIT command to the target.
19532  *
19533  *   Arguments: un    - pointer to driver soft state (unit) structure for
19534  *			this target.
19535  *		flag  - SD_TARGET_START
19536  *			SD_TARGET_STOP
19537  *			SD_TARGET_EJECT
19538  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
19539  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
19540  *			to use the USCSI "direct" chain and bypass the normal
19541  *			command waitq. SD_PATH_DIRECT_PRIORITY is used when this
19542  *			command is issued as part of an error recovery action.
19543  *
19544  * Return Code: 0   - Success
19545  *		EIO - IO error
19546  *		EACCES - Reservation conflict detected
19547  *		ENXIO  - Not Ready, medium not present
19548  *		errno return code from sd_send_scsi_cmd()
19549  *
19550  *     Context: Can sleep.
19551  */
19552 
19553 static int
19554 sd_send_scsi_START_STOP_UNIT(struct sd_lun *un, int flag, int path_flag)
19555 {
19556 	struct	scsi_extended_sense	sense_buf;
19557 	union scsi_cdb		cdb;
19558 	struct uscsi_cmd	ucmd_buf;
19559 	int			status;
19560 
19561 	ASSERT(un != NULL);
19562 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19563 
19564 	SD_TRACE(SD_LOG_IO, un,
19565 	    "sd_send_scsi_START_STOP_UNIT: entry: un:0x%p\n", un);
19566 
19567 	if (un->un_f_check_start_stop &&
19568 	    ((flag == SD_TARGET_START) || (flag == SD_TARGET_STOP)) &&
19569 	    (un->un_f_start_stop_supported != TRUE)) {
19570 		return (0);
19571 	}
19572 
19573 	bzero(&cdb, sizeof (cdb));
19574 	bzero(&ucmd_buf, sizeof (ucmd_buf));
19575 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
19576 
19577 	cdb.scc_cmd = SCMD_START_STOP;
19578 	cdb.cdb_opaque[4] = (uchar_t)flag;
19579 
19580 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
19581 	ucmd_buf.uscsi_cdblen	= CDB_GROUP0;
19582 	ucmd_buf.uscsi_bufaddr	= NULL;
19583 	ucmd_buf.uscsi_buflen	= 0;
19584 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
19585 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
19586 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_SILENT;
19587 	ucmd_buf.uscsi_timeout	= 200;
19588 
19589 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
19590 	    UIO_SYSSPACE, UIO_SYSSPACE, path_flag);
19591 
19592 	switch (status) {
19593 	case 0:
19594 		break;	/* Success! */
19595 	case EIO:
19596 		switch (ucmd_buf.uscsi_status) {
19597 		case STATUS_RESERVATION_CONFLICT:
19598 			status = EACCES;
19599 			break;
19600 		case STATUS_CHECK:
19601 			if (ucmd_buf.uscsi_rqstatus == STATUS_GOOD) {
19602 				switch (scsi_sense_key(
19603 						(uint8_t *)&sense_buf)) {
19604 				case KEY_ILLEGAL_REQUEST:
19605 					status = ENOTSUP;
19606 					break;
19607 				case KEY_NOT_READY:
19608 					if (scsi_sense_asc(
19609 						    (uint8_t *)&sense_buf)
19610 					    == 0x3A) {
19611 						status = ENXIO;
19612 					}
19613 					break;
19614 				default:
19615 					break;
19616 				}
19617 			}
19618 			break;
19619 		default:
19620 			break;
19621 		}
19622 		break;
19623 	default:
19624 		break;
19625 	}
19626 
19627 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_START_STOP_UNIT: exit\n");
19628 
19629 	return (status);
19630 }
19631 
19632 
19633 /*
19634  *    Function: sd_start_stop_unit_callback
19635  *
19636  * Description: timeout(9F) callback to begin recovery process for a
19637  *		device that has spun down.
19638  *
19639  *   Arguments: arg - pointer to associated softstate struct.
19640  *
19641  *     Context: Executes in a timeout(9F) thread context
19642  */
19643 
19644 static void
19645 sd_start_stop_unit_callback(void *arg)
19646 {
19647 	struct sd_lun	*un = arg;
19648 	ASSERT(un != NULL);
19649 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19650 
19651 	SD_TRACE(SD_LOG_IO, un, "sd_start_stop_unit_callback: entry\n");
19652 
19653 	(void) taskq_dispatch(sd_tq, sd_start_stop_unit_task, un, KM_NOSLEEP);
19654 }
19655 
19656 
19657 /*
19658  *    Function: sd_start_stop_unit_task
19659  *
19660  * Description: Recovery procedure when a drive is spun down.
19661  *
19662  *   Arguments: arg - pointer to associated softstate struct.
19663  *
19664  *     Context: Executes in a taskq() thread context
19665  */
19666 
19667 static void
19668 sd_start_stop_unit_task(void *arg)
19669 {
19670 	struct sd_lun	*un = arg;
19671 
19672 	ASSERT(un != NULL);
19673 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19674 
19675 	SD_TRACE(SD_LOG_IO, un, "sd_start_stop_unit_task: entry\n");
19676 
19677 	/*
19678 	 * Some unformatted drives report not ready error, no need to
19679 	 * restart if format has been initiated.
19680 	 */
19681 	mutex_enter(SD_MUTEX(un));
19682 	if (un->un_f_format_in_progress == TRUE) {
19683 		mutex_exit(SD_MUTEX(un));
19684 		return;
19685 	}
19686 	mutex_exit(SD_MUTEX(un));
19687 
19688 	/*
19689 	 * When a START STOP command is issued from here, it is part of a
19690 	 * failure recovery operation and must be issued before any other
19691 	 * commands, including any pending retries. Thus it must be sent
19692 	 * using SD_PATH_DIRECT_PRIORITY. It doesn't matter if the spin up
19693 	 * succeeds or not, we will start I/O after the attempt.
19694 	 */
19695 	(void) sd_send_scsi_START_STOP_UNIT(un, SD_TARGET_START,
19696 	    SD_PATH_DIRECT_PRIORITY);
19697 
19698 	/*
19699 	 * The above call blocks until the START_STOP_UNIT command completes.
19700 	 * Now that it has completed, we must re-try the original IO that
19701 	 * received the NOT READY condition in the first place. There are
19702 	 * three possible conditions here:
19703 	 *
19704 	 *  (1) The original IO is on un_retry_bp.
19705 	 *  (2) The original IO is on the regular wait queue, and un_retry_bp
19706 	 *	is NULL.
19707 	 *  (3) The original IO is on the regular wait queue, and un_retry_bp
19708 	 *	points to some other, unrelated bp.
19709 	 *
19710 	 * For each case, we must call sd_start_cmds() with un_retry_bp
19711 	 * as the argument. If un_retry_bp is NULL, this will initiate
19712 	 * processing of the regular wait queue.  If un_retry_bp is not NULL,
19713 	 * then this will process the bp on un_retry_bp. That may or may not
19714 	 * be the original IO, but that does not matter: the important thing
19715 	 * is to keep the IO processing going at this point.
19716 	 *
19717 	 * Note: This is a very specific error recovery sequence associated
19718 	 * with a drive that is not spun up. We attempt a START_STOP_UNIT and
19719 	 * serialize the I/O with completion of the spin-up.
19720 	 */
19721 	mutex_enter(SD_MUTEX(un));
19722 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
19723 	    "sd_start_stop_unit_task: un:0x%p starting bp:0x%p\n",
19724 	    un, un->un_retry_bp);
19725 	un->un_startstop_timeid = NULL;	/* Timeout is no longer pending */
19726 	sd_start_cmds(un, un->un_retry_bp);
19727 	mutex_exit(SD_MUTEX(un));
19728 
19729 	SD_TRACE(SD_LOG_IO, un, "sd_start_stop_unit_task: exit\n");
19730 }
19731 
19732 
19733 /*
19734  *    Function: sd_send_scsi_INQUIRY
19735  *
19736  * Description: Issue the scsi INQUIRY command.
19737  *
19738  *   Arguments: un
19739  *		bufaddr
19740  *		buflen
19741  *		evpd
19742  *		page_code
19743  *		page_length
19744  *
19745  * Return Code: 0   - Success
19746  *		errno return code from sd_send_scsi_cmd()
19747  *
19748  *     Context: Can sleep. Does not return until command is completed.
19749  */
19750 
19751 static int
19752 sd_send_scsi_INQUIRY(struct sd_lun *un, uchar_t *bufaddr, size_t buflen,
19753 	uchar_t evpd, uchar_t page_code, size_t *residp)
19754 {
19755 	union scsi_cdb		cdb;
19756 	struct uscsi_cmd	ucmd_buf;
19757 	int			status;
19758 
19759 	ASSERT(un != NULL);
19760 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19761 	ASSERT(bufaddr != NULL);
19762 
19763 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_INQUIRY: entry: un:0x%p\n", un);
19764 
19765 	bzero(&cdb, sizeof (cdb));
19766 	bzero(&ucmd_buf, sizeof (ucmd_buf));
19767 	bzero(bufaddr, buflen);
19768 
19769 	cdb.scc_cmd = SCMD_INQUIRY;
19770 	cdb.cdb_opaque[1] = evpd;
19771 	cdb.cdb_opaque[2] = page_code;
19772 	FORMG0COUNT(&cdb, buflen);
19773 
19774 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
19775 	ucmd_buf.uscsi_cdblen	= CDB_GROUP0;
19776 	ucmd_buf.uscsi_bufaddr	= (caddr_t)bufaddr;
19777 	ucmd_buf.uscsi_buflen	= buflen;
19778 	ucmd_buf.uscsi_rqbuf	= NULL;
19779 	ucmd_buf.uscsi_rqlen	= 0;
19780 	ucmd_buf.uscsi_flags	= USCSI_READ | USCSI_SILENT;
19781 	ucmd_buf.uscsi_timeout	= 200;	/* Excessive legacy value */
19782 
19783 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
19784 	    UIO_SYSSPACE, UIO_SYSSPACE, SD_PATH_DIRECT);
19785 
19786 	if ((status == 0) && (residp != NULL)) {
19787 		*residp = ucmd_buf.uscsi_resid;
19788 	}
19789 
19790 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_INQUIRY: exit\n");
19791 
19792 	return (status);
19793 }
19794 
19795 
19796 /*
19797  *    Function: sd_send_scsi_TEST_UNIT_READY
19798  *
19799  * Description: Issue the scsi TEST UNIT READY command.
19800  *		This routine can be told to set the flag USCSI_DIAGNOSE to
19801  *		prevent retrying failed commands. Use this when the intent
19802  *		is either to check for device readiness, to clear a Unit
19803  *		Attention, or to clear any outstanding sense data.
19804  *		However under specific conditions the expected behavior
19805  *		is for retries to bring a device ready, so use the flag
19806  *		with caution.
19807  *
19808  *   Arguments: un
19809  *		flag:   SD_CHECK_FOR_MEDIA: return ENXIO if no media present
19810  *			SD_DONT_RETRY_TUR: include uscsi flag USCSI_DIAGNOSE.
19811  *			0: dont check for media present, do retries on cmd.
19812  *
19813  * Return Code: 0   - Success
19814  *		EIO - IO error
19815  *		EACCES - Reservation conflict detected
19816  *		ENXIO  - Not Ready, medium not present
19817  *		errno return code from sd_send_scsi_cmd()
19818  *
19819  *     Context: Can sleep. Does not return until command is completed.
19820  */
19821 
19822 static int
19823 sd_send_scsi_TEST_UNIT_READY(struct sd_lun *un, int flag)
19824 {
19825 	struct	scsi_extended_sense	sense_buf;
19826 	union scsi_cdb		cdb;
19827 	struct uscsi_cmd	ucmd_buf;
19828 	int			status;
19829 
19830 	ASSERT(un != NULL);
19831 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19832 
19833 	SD_TRACE(SD_LOG_IO, un,
19834 	    "sd_send_scsi_TEST_UNIT_READY: entry: un:0x%p\n", un);
19835 
19836 	/*
19837 	 * Some Seagate elite1 TQ devices get hung with disconnect/reconnect
19838 	 * timeouts when they receive a TUR and the queue is not empty. Check
19839 	 * the configuration flag set during attach (indicating the drive has
19840 	 * this firmware bug) and un_ncmds_in_transport before issuing the
19841 	 * TUR. If there are
19842 	 * pending commands return success, this is a bit arbitrary but is ok
19843 	 * for non-removables (i.e. the eliteI disks) and non-clustering
19844 	 * configurations.
19845 	 */
19846 	if (un->un_f_cfg_tur_check == TRUE) {
19847 		mutex_enter(SD_MUTEX(un));
19848 		if (un->un_ncmds_in_transport != 0) {
19849 			mutex_exit(SD_MUTEX(un));
19850 			return (0);
19851 		}
19852 		mutex_exit(SD_MUTEX(un));
19853 	}
19854 
19855 	bzero(&cdb, sizeof (cdb));
19856 	bzero(&ucmd_buf, sizeof (ucmd_buf));
19857 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
19858 
19859 	cdb.scc_cmd = SCMD_TEST_UNIT_READY;
19860 
19861 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
19862 	ucmd_buf.uscsi_cdblen	= CDB_GROUP0;
19863 	ucmd_buf.uscsi_bufaddr	= NULL;
19864 	ucmd_buf.uscsi_buflen	= 0;
19865 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
19866 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
19867 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_SILENT;
19868 
19869 	/* Use flag USCSI_DIAGNOSE to prevent retries if it fails. */
19870 	if ((flag & SD_DONT_RETRY_TUR) != 0) {
19871 		ucmd_buf.uscsi_flags |= USCSI_DIAGNOSE;
19872 	}
19873 	ucmd_buf.uscsi_timeout	= 60;
19874 
19875 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
19876 	    UIO_SYSSPACE, UIO_SYSSPACE,
19877 	    ((flag & SD_BYPASS_PM) ? SD_PATH_DIRECT : SD_PATH_STANDARD));
19878 
19879 	switch (status) {
19880 	case 0:
19881 		break;	/* Success! */
19882 	case EIO:
19883 		switch (ucmd_buf.uscsi_status) {
19884 		case STATUS_RESERVATION_CONFLICT:
19885 			status = EACCES;
19886 			break;
19887 		case STATUS_CHECK:
19888 			if ((flag & SD_CHECK_FOR_MEDIA) == 0) {
19889 				break;
19890 			}
19891 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
19892 			    (scsi_sense_key((uint8_t *)&sense_buf) ==
19893 				KEY_NOT_READY) &&
19894 			    (scsi_sense_asc((uint8_t *)&sense_buf) == 0x3A)) {
19895 				status = ENXIO;
19896 			}
19897 			break;
19898 		default:
19899 			break;
19900 		}
19901 		break;
19902 	default:
19903 		break;
19904 	}
19905 
19906 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_TEST_UNIT_READY: exit\n");
19907 
19908 	return (status);
19909 }
19910 
19911 
19912 /*
19913  *    Function: sd_send_scsi_PERSISTENT_RESERVE_IN
19914  *
19915  * Description: Issue the scsi PERSISTENT RESERVE IN command.
19916  *
19917  *   Arguments: un
19918  *
19919  * Return Code: 0   - Success
19920  *		EACCES
19921  *		ENOTSUP
19922  *		errno return code from sd_send_scsi_cmd()
19923  *
19924  *     Context: Can sleep. Does not return until command is completed.
19925  */
19926 
19927 static int
19928 sd_send_scsi_PERSISTENT_RESERVE_IN(struct sd_lun *un, uchar_t  usr_cmd,
19929 	uint16_t data_len, uchar_t *data_bufp)
19930 {
19931 	struct scsi_extended_sense	sense_buf;
19932 	union scsi_cdb		cdb;
19933 	struct uscsi_cmd	ucmd_buf;
19934 	int			status;
19935 	int			no_caller_buf = FALSE;
19936 
19937 	ASSERT(un != NULL);
19938 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19939 	ASSERT((usr_cmd == SD_READ_KEYS) || (usr_cmd == SD_READ_RESV));
19940 
19941 	SD_TRACE(SD_LOG_IO, un,
19942 	    "sd_send_scsi_PERSISTENT_RESERVE_IN: entry: un:0x%p\n", un);
19943 
19944 	bzero(&cdb, sizeof (cdb));
19945 	bzero(&ucmd_buf, sizeof (ucmd_buf));
19946 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
19947 	if (data_bufp == NULL) {
19948 		/* Allocate a default buf if the caller did not give one */
19949 		ASSERT(data_len == 0);
19950 		data_len  = MHIOC_RESV_KEY_SIZE;
19951 		data_bufp = kmem_zalloc(MHIOC_RESV_KEY_SIZE, KM_SLEEP);
19952 		no_caller_buf = TRUE;
19953 	}
19954 
19955 	cdb.scc_cmd = SCMD_PERSISTENT_RESERVE_IN;
19956 	cdb.cdb_opaque[1] = usr_cmd;
19957 	FORMG1COUNT(&cdb, data_len);
19958 
19959 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
19960 	ucmd_buf.uscsi_cdblen	= CDB_GROUP1;
19961 	ucmd_buf.uscsi_bufaddr	= (caddr_t)data_bufp;
19962 	ucmd_buf.uscsi_buflen	= data_len;
19963 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
19964 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
19965 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
19966 	ucmd_buf.uscsi_timeout	= 60;
19967 
19968 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
19969 	    UIO_SYSSPACE, UIO_SYSSPACE, SD_PATH_STANDARD);
19970 
19971 	switch (status) {
19972 	case 0:
19973 		break;	/* Success! */
19974 	case EIO:
19975 		switch (ucmd_buf.uscsi_status) {
19976 		case STATUS_RESERVATION_CONFLICT:
19977 			status = EACCES;
19978 			break;
19979 		case STATUS_CHECK:
19980 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
19981 			    (scsi_sense_key((uint8_t *)&sense_buf) ==
19982 				KEY_ILLEGAL_REQUEST)) {
19983 				status = ENOTSUP;
19984 			}
19985 			break;
19986 		default:
19987 			break;
19988 		}
19989 		break;
19990 	default:
19991 		break;
19992 	}
19993 
19994 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_PERSISTENT_RESERVE_IN: exit\n");
19995 
19996 	if (no_caller_buf == TRUE) {
19997 		kmem_free(data_bufp, data_len);
19998 	}
19999 
20000 	return (status);
20001 }
20002 
20003 
20004 /*
20005  *    Function: sd_send_scsi_PERSISTENT_RESERVE_OUT
20006  *
20007  * Description: This routine is the driver entry point for handling CD-ROM
20008  *		multi-host persistent reservation requests (MHIOCGRP_INKEYS,
20009  *		MHIOCGRP_INRESV) by sending the SCSI-3 PROUT commands to the
20010  *		device.
20011  *
20012  *   Arguments: un  -   Pointer to soft state struct for the target.
20013  *		usr_cmd SCSI-3 reservation facility command (one of
20014  *			SD_SCSI3_REGISTER, SD_SCSI3_RESERVE, SD_SCSI3_RELEASE,
20015  *			SD_SCSI3_PREEMPTANDABORT)
20016  *		usr_bufp - user provided pointer register, reserve descriptor or
20017  *			preempt and abort structure (mhioc_register_t,
20018  *                      mhioc_resv_desc_t, mhioc_preemptandabort_t)
20019  *
20020  * Return Code: 0   - Success
20021  *		EACCES
20022  *		ENOTSUP
20023  *		errno return code from sd_send_scsi_cmd()
20024  *
20025  *     Context: Can sleep. Does not return until command is completed.
20026  */
20027 
20028 static int
20029 sd_send_scsi_PERSISTENT_RESERVE_OUT(struct sd_lun *un, uchar_t usr_cmd,
20030 	uchar_t	*usr_bufp)
20031 {
20032 	struct scsi_extended_sense	sense_buf;
20033 	union scsi_cdb		cdb;
20034 	struct uscsi_cmd	ucmd_buf;
20035 	int			status;
20036 	uchar_t			data_len = sizeof (sd_prout_t);
20037 	sd_prout_t		*prp;
20038 
20039 	ASSERT(un != NULL);
20040 	ASSERT(!mutex_owned(SD_MUTEX(un)));
20041 	ASSERT(data_len == 24);	/* required by scsi spec */
20042 
20043 	SD_TRACE(SD_LOG_IO, un,
20044 	    "sd_send_scsi_PERSISTENT_RESERVE_OUT: entry: un:0x%p\n", un);
20045 
20046 	if (usr_bufp == NULL) {
20047 		return (EINVAL);
20048 	}
20049 
20050 	bzero(&cdb, sizeof (cdb));
20051 	bzero(&ucmd_buf, sizeof (ucmd_buf));
20052 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
20053 	prp = kmem_zalloc(data_len, KM_SLEEP);
20054 
20055 	cdb.scc_cmd = SCMD_PERSISTENT_RESERVE_OUT;
20056 	cdb.cdb_opaque[1] = usr_cmd;
20057 	FORMG1COUNT(&cdb, data_len);
20058 
20059 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
20060 	ucmd_buf.uscsi_cdblen	= CDB_GROUP1;
20061 	ucmd_buf.uscsi_bufaddr	= (caddr_t)prp;
20062 	ucmd_buf.uscsi_buflen	= data_len;
20063 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
20064 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
20065 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_WRITE | USCSI_SILENT;
20066 	ucmd_buf.uscsi_timeout	= 60;
20067 
20068 	switch (usr_cmd) {
20069 	case SD_SCSI3_REGISTER: {
20070 		mhioc_register_t *ptr = (mhioc_register_t *)usr_bufp;
20071 
20072 		bcopy(ptr->oldkey.key, prp->res_key, MHIOC_RESV_KEY_SIZE);
20073 		bcopy(ptr->newkey.key, prp->service_key,
20074 		    MHIOC_RESV_KEY_SIZE);
20075 		prp->aptpl = ptr->aptpl;
20076 		break;
20077 	}
20078 	case SD_SCSI3_RESERVE:
20079 	case SD_SCSI3_RELEASE: {
20080 		mhioc_resv_desc_t *ptr = (mhioc_resv_desc_t *)usr_bufp;
20081 
20082 		bcopy(ptr->key.key, prp->res_key, MHIOC_RESV_KEY_SIZE);
20083 		prp->scope_address = BE_32(ptr->scope_specific_addr);
20084 		cdb.cdb_opaque[2] = ptr->type;
20085 		break;
20086 	}
20087 	case SD_SCSI3_PREEMPTANDABORT: {
20088 		mhioc_preemptandabort_t *ptr =
20089 		    (mhioc_preemptandabort_t *)usr_bufp;
20090 
20091 		bcopy(ptr->resvdesc.key.key, prp->res_key, MHIOC_RESV_KEY_SIZE);
20092 		bcopy(ptr->victim_key.key, prp->service_key,
20093 		    MHIOC_RESV_KEY_SIZE);
20094 		prp->scope_address = BE_32(ptr->resvdesc.scope_specific_addr);
20095 		cdb.cdb_opaque[2] = ptr->resvdesc.type;
20096 		ucmd_buf.uscsi_flags |= USCSI_HEAD;
20097 		break;
20098 	}
20099 	case SD_SCSI3_REGISTERANDIGNOREKEY:
20100 	{
20101 		mhioc_registerandignorekey_t *ptr;
20102 		ptr = (mhioc_registerandignorekey_t *)usr_bufp;
20103 		bcopy(ptr->newkey.key,
20104 		    prp->service_key, MHIOC_RESV_KEY_SIZE);
20105 		prp->aptpl = ptr->aptpl;
20106 		break;
20107 	}
20108 	default:
20109 		ASSERT(FALSE);
20110 		break;
20111 	}
20112 
20113 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
20114 	    UIO_SYSSPACE, UIO_SYSSPACE, SD_PATH_STANDARD);
20115 
20116 	switch (status) {
20117 	case 0:
20118 		break;	/* Success! */
20119 	case EIO:
20120 		switch (ucmd_buf.uscsi_status) {
20121 		case STATUS_RESERVATION_CONFLICT:
20122 			status = EACCES;
20123 			break;
20124 		case STATUS_CHECK:
20125 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
20126 			    (scsi_sense_key((uint8_t *)&sense_buf) ==
20127 				KEY_ILLEGAL_REQUEST)) {
20128 				status = ENOTSUP;
20129 			}
20130 			break;
20131 		default:
20132 			break;
20133 		}
20134 		break;
20135 	default:
20136 		break;
20137 	}
20138 
20139 	kmem_free(prp, data_len);
20140 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_PERSISTENT_RESERVE_OUT: exit\n");
20141 	return (status);
20142 }
20143 
20144 
20145 /*
20146  *    Function: sd_send_scsi_SYNCHRONIZE_CACHE
20147  *
20148  * Description: Issues a scsi SYNCHRONIZE CACHE command to the target
20149  *
20150  *   Arguments: un - pointer to the target's soft state struct
20151  *
20152  * Return Code: 0 - success
20153  *		errno-type error code
20154  *
20155  *     Context: kernel thread context only.
20156  */
20157 
20158 static int
20159 sd_send_scsi_SYNCHRONIZE_CACHE(struct sd_lun *un, struct dk_callback *dkc)
20160 {
20161 	struct sd_uscsi_info	*uip;
20162 	struct uscsi_cmd	*uscmd;
20163 	union scsi_cdb		*cdb;
20164 	struct buf		*bp;
20165 	int			rval = 0;
20166 
20167 	SD_TRACE(SD_LOG_IO, un,
20168 	    "sd_send_scsi_SYNCHRONIZE_CACHE: entry: un:0x%p\n", un);
20169 
20170 	ASSERT(un != NULL);
20171 	ASSERT(!mutex_owned(SD_MUTEX(un)));
20172 
20173 	cdb = kmem_zalloc(CDB_GROUP1, KM_SLEEP);
20174 	cdb->scc_cmd = SCMD_SYNCHRONIZE_CACHE;
20175 
20176 	/*
20177 	 * First get some memory for the uscsi_cmd struct and cdb
20178 	 * and initialize for SYNCHRONIZE_CACHE cmd.
20179 	 */
20180 	uscmd = kmem_zalloc(sizeof (struct uscsi_cmd), KM_SLEEP);
20181 	uscmd->uscsi_cdblen = CDB_GROUP1;
20182 	uscmd->uscsi_cdb = (caddr_t)cdb;
20183 	uscmd->uscsi_bufaddr = NULL;
20184 	uscmd->uscsi_buflen = 0;
20185 	uscmd->uscsi_rqbuf = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
20186 	uscmd->uscsi_rqlen = SENSE_LENGTH;
20187 	uscmd->uscsi_rqresid = SENSE_LENGTH;
20188 	uscmd->uscsi_flags = USCSI_RQENABLE | USCSI_SILENT;
20189 	uscmd->uscsi_timeout = sd_io_time;
20190 
20191 	/*
20192 	 * Allocate an sd_uscsi_info struct and fill it with the info
20193 	 * needed by sd_initpkt_for_uscsi().  Then put the pointer into
20194 	 * b_private in the buf for sd_initpkt_for_uscsi().  Note that
20195 	 * since we allocate the buf here in this function, we do not
20196 	 * need to preserve the prior contents of b_private.
20197 	 * The sd_uscsi_info struct is also used by sd_uscsi_strategy()
20198 	 */
20199 	uip = kmem_zalloc(sizeof (struct sd_uscsi_info), KM_SLEEP);
20200 	uip->ui_flags = SD_PATH_DIRECT;
20201 	uip->ui_cmdp  = uscmd;
20202 
20203 	bp = getrbuf(KM_SLEEP);
20204 	bp->b_private = uip;
20205 
20206 	/*
20207 	 * Setup buffer to carry uscsi request.
20208 	 */
20209 	bp->b_flags  = B_BUSY;
20210 	bp->b_bcount = 0;
20211 	bp->b_blkno  = 0;
20212 
20213 	if (dkc != NULL) {
20214 		bp->b_iodone = sd_send_scsi_SYNCHRONIZE_CACHE_biodone;
20215 		uip->ui_dkc = *dkc;
20216 	}
20217 
20218 	bp->b_edev = SD_GET_DEV(un);
20219 	bp->b_dev = cmpdev(bp->b_edev);	/* maybe unnecessary? */
20220 
20221 	(void) sd_uscsi_strategy(bp);
20222 
20223 	/*
20224 	 * If synchronous request, wait for completion
20225 	 * If async just return and let b_iodone callback
20226 	 * cleanup.
20227 	 * NOTE: On return, u_ncmds_in_driver will be decremented,
20228 	 * but it was also incremented in sd_uscsi_strategy(), so
20229 	 * we should be ok.
20230 	 */
20231 	if (dkc == NULL) {
20232 		(void) biowait(bp);
20233 		rval = sd_send_scsi_SYNCHRONIZE_CACHE_biodone(bp);
20234 	}
20235 
20236 	return (rval);
20237 }
20238 
20239 
20240 static int
20241 sd_send_scsi_SYNCHRONIZE_CACHE_biodone(struct buf *bp)
20242 {
20243 	struct sd_uscsi_info *uip;
20244 	struct uscsi_cmd *uscmd;
20245 	uint8_t *sense_buf;
20246 	struct sd_lun *un;
20247 	int status;
20248 
20249 	uip = (struct sd_uscsi_info *)(bp->b_private);
20250 	ASSERT(uip != NULL);
20251 
20252 	uscmd = uip->ui_cmdp;
20253 	ASSERT(uscmd != NULL);
20254 
20255 	sense_buf = (uint8_t *)uscmd->uscsi_rqbuf;
20256 	ASSERT(sense_buf != NULL);
20257 
20258 	un = ddi_get_soft_state(sd_state, SD_GET_INSTANCE_FROM_BUF(bp));
20259 	ASSERT(un != NULL);
20260 
20261 	status = geterror(bp);
20262 	switch (status) {
20263 	case 0:
20264 		break;	/* Success! */
20265 	case EIO:
20266 		switch (uscmd->uscsi_status) {
20267 		case STATUS_RESERVATION_CONFLICT:
20268 			/* Ignore reservation conflict */
20269 			status = 0;
20270 			goto done;
20271 
20272 		case STATUS_CHECK:
20273 			if ((uscmd->uscsi_rqstatus == STATUS_GOOD) &&
20274 			    (scsi_sense_key(sense_buf) ==
20275 				KEY_ILLEGAL_REQUEST)) {
20276 				/* Ignore Illegal Request error */
20277 				mutex_enter(SD_MUTEX(un));
20278 				un->un_f_sync_cache_supported = FALSE;
20279 				mutex_exit(SD_MUTEX(un));
20280 				status = ENOTSUP;
20281 				goto done;
20282 			}
20283 			break;
20284 		default:
20285 			break;
20286 		}
20287 		/* FALLTHRU */
20288 	default:
20289 		/* Ignore error if the media is not present */
20290 		if (sd_send_scsi_TEST_UNIT_READY(un, 0) != 0) {
20291 			status = 0;
20292 			goto done;
20293 		}
20294 		/* If we reach this, we had an error */
20295 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
20296 		    "SYNCHRONIZE CACHE command failed (%d)\n", status);
20297 		break;
20298 	}
20299 
20300 done:
20301 	if (uip->ui_dkc.dkc_callback != NULL) {
20302 		(*uip->ui_dkc.dkc_callback)(uip->ui_dkc.dkc_cookie, status);
20303 	}
20304 
20305 	ASSERT((bp->b_flags & B_REMAPPED) == 0);
20306 	freerbuf(bp);
20307 	kmem_free(uip, sizeof (struct sd_uscsi_info));
20308 	kmem_free(uscmd->uscsi_rqbuf, SENSE_LENGTH);
20309 	kmem_free(uscmd->uscsi_cdb, (size_t)uscmd->uscsi_cdblen);
20310 	kmem_free(uscmd, sizeof (struct uscsi_cmd));
20311 
20312 	return (status);
20313 }
20314 
20315 
20316 /*
20317  *    Function: sd_send_scsi_GET_CONFIGURATION
20318  *
20319  * Description: Issues the get configuration command to the device.
20320  *		Called from sd_check_for_writable_cd & sd_get_media_info
20321  *		caller needs to ensure that buflen = SD_PROFILE_HEADER_LEN
20322  *   Arguments: un
20323  *		ucmdbuf
20324  *		rqbuf
20325  *		rqbuflen
20326  *		bufaddr
20327  *		buflen
20328  *
20329  * Return Code: 0   - Success
20330  *		errno return code from sd_send_scsi_cmd()
20331  *
20332  *     Context: Can sleep. Does not return until command is completed.
20333  *
20334  */
20335 
20336 static int
20337 sd_send_scsi_GET_CONFIGURATION(struct sd_lun *un, struct uscsi_cmd *ucmdbuf,
20338 	uchar_t *rqbuf, uint_t rqbuflen, uchar_t *bufaddr, uint_t buflen)
20339 {
20340 	char	cdb[CDB_GROUP1];
20341 	int	status;
20342 
20343 	ASSERT(un != NULL);
20344 	ASSERT(!mutex_owned(SD_MUTEX(un)));
20345 	ASSERT(bufaddr != NULL);
20346 	ASSERT(ucmdbuf != NULL);
20347 	ASSERT(rqbuf != NULL);
20348 
20349 	SD_TRACE(SD_LOG_IO, un,
20350 	    "sd_send_scsi_GET_CONFIGURATION: entry: un:0x%p\n", un);
20351 
20352 	bzero(cdb, sizeof (cdb));
20353 	bzero(ucmdbuf, sizeof (struct uscsi_cmd));
20354 	bzero(rqbuf, rqbuflen);
20355 	bzero(bufaddr, buflen);
20356 
20357 	/*
20358 	 * Set up cdb field for the get configuration command.
20359 	 */
20360 	cdb[0] = SCMD_GET_CONFIGURATION;
20361 	cdb[1] = 0x02;  /* Requested Type */
20362 	cdb[8] = SD_PROFILE_HEADER_LEN;
20363 	ucmdbuf->uscsi_cdb = cdb;
20364 	ucmdbuf->uscsi_cdblen = CDB_GROUP1;
20365 	ucmdbuf->uscsi_bufaddr = (caddr_t)bufaddr;
20366 	ucmdbuf->uscsi_buflen = buflen;
20367 	ucmdbuf->uscsi_timeout = sd_io_time;
20368 	ucmdbuf->uscsi_rqbuf = (caddr_t)rqbuf;
20369 	ucmdbuf->uscsi_rqlen = rqbuflen;
20370 	ucmdbuf->uscsi_flags = USCSI_RQENABLE|USCSI_SILENT|USCSI_READ;
20371 
20372 	status = sd_send_scsi_cmd(SD_GET_DEV(un), ucmdbuf, UIO_SYSSPACE,
20373 	    UIO_SYSSPACE, UIO_SYSSPACE, SD_PATH_STANDARD);
20374 
20375 	switch (status) {
20376 	case 0:
20377 		break;  /* Success! */
20378 	case EIO:
20379 		switch (ucmdbuf->uscsi_status) {
20380 		case STATUS_RESERVATION_CONFLICT:
20381 			status = EACCES;
20382 			break;
20383 		default:
20384 			break;
20385 		}
20386 		break;
20387 	default:
20388 		break;
20389 	}
20390 
20391 	if (status == 0) {
20392 		SD_DUMP_MEMORY(un, SD_LOG_IO,
20393 		    "sd_send_scsi_GET_CONFIGURATION: data",
20394 		    (uchar_t *)bufaddr, SD_PROFILE_HEADER_LEN, SD_LOG_HEX);
20395 	}
20396 
20397 	SD_TRACE(SD_LOG_IO, un,
20398 	    "sd_send_scsi_GET_CONFIGURATION: exit\n");
20399 
20400 	return (status);
20401 }
20402 
20403 /*
20404  *    Function: sd_send_scsi_feature_GET_CONFIGURATION
20405  *
20406  * Description: Issues the get configuration command to the device to
20407  *              retrieve a specfic feature. Called from
20408  *		sd_check_for_writable_cd & sd_set_mmc_caps.
20409  *   Arguments: un
20410  *              ucmdbuf
20411  *              rqbuf
20412  *              rqbuflen
20413  *              bufaddr
20414  *              buflen
20415  *		feature
20416  *
20417  * Return Code: 0   - Success
20418  *              errno return code from sd_send_scsi_cmd()
20419  *
20420  *     Context: Can sleep. Does not return until command is completed.
20421  *
20422  */
20423 static int
20424 sd_send_scsi_feature_GET_CONFIGURATION(struct sd_lun *un,
20425 	struct uscsi_cmd *ucmdbuf, uchar_t *rqbuf, uint_t rqbuflen,
20426 	uchar_t *bufaddr, uint_t buflen, char feature)
20427 {
20428 	char    cdb[CDB_GROUP1];
20429 	int	status;
20430 
20431 	ASSERT(un != NULL);
20432 	ASSERT(!mutex_owned(SD_MUTEX(un)));
20433 	ASSERT(bufaddr != NULL);
20434 	ASSERT(ucmdbuf != NULL);
20435 	ASSERT(rqbuf != NULL);
20436 
20437 	SD_TRACE(SD_LOG_IO, un,
20438 	    "sd_send_scsi_feature_GET_CONFIGURATION: entry: un:0x%p\n", un);
20439 
20440 	bzero(cdb, sizeof (cdb));
20441 	bzero(ucmdbuf, sizeof (struct uscsi_cmd));
20442 	bzero(rqbuf, rqbuflen);
20443 	bzero(bufaddr, buflen);
20444 
20445 	/*
20446 	 * Set up cdb field for the get configuration command.
20447 	 */
20448 	cdb[0] = SCMD_GET_CONFIGURATION;
20449 	cdb[1] = 0x02;  /* Requested Type */
20450 	cdb[3] = feature;
20451 	cdb[8] = buflen;
20452 	ucmdbuf->uscsi_cdb = cdb;
20453 	ucmdbuf->uscsi_cdblen = CDB_GROUP1;
20454 	ucmdbuf->uscsi_bufaddr = (caddr_t)bufaddr;
20455 	ucmdbuf->uscsi_buflen = buflen;
20456 	ucmdbuf->uscsi_timeout = sd_io_time;
20457 	ucmdbuf->uscsi_rqbuf = (caddr_t)rqbuf;
20458 	ucmdbuf->uscsi_rqlen = rqbuflen;
20459 	ucmdbuf->uscsi_flags = USCSI_RQENABLE|USCSI_SILENT|USCSI_READ;
20460 
20461 	status = sd_send_scsi_cmd(SD_GET_DEV(un), ucmdbuf, UIO_SYSSPACE,
20462 	    UIO_SYSSPACE, UIO_SYSSPACE, SD_PATH_STANDARD);
20463 
20464 	switch (status) {
20465 	case 0:
20466 		break;  /* Success! */
20467 	case EIO:
20468 		switch (ucmdbuf->uscsi_status) {
20469 		case STATUS_RESERVATION_CONFLICT:
20470 			status = EACCES;
20471 			break;
20472 		default:
20473 			break;
20474 		}
20475 		break;
20476 	default:
20477 		break;
20478 	}
20479 
20480 	if (status == 0) {
20481 		SD_DUMP_MEMORY(un, SD_LOG_IO,
20482 		    "sd_send_scsi_feature_GET_CONFIGURATION: data",
20483 		    (uchar_t *)bufaddr, SD_PROFILE_HEADER_LEN, SD_LOG_HEX);
20484 	}
20485 
20486 	SD_TRACE(SD_LOG_IO, un,
20487 	    "sd_send_scsi_feature_GET_CONFIGURATION: exit\n");
20488 
20489 	return (status);
20490 }
20491 
20492 
20493 /*
20494  *    Function: sd_send_scsi_MODE_SENSE
20495  *
20496  * Description: Utility function for issuing a scsi MODE SENSE command.
20497  *		Note: This routine uses a consistent implementation for Group0,
20498  *		Group1, and Group2 commands across all platforms. ATAPI devices
20499  *		use Group 1 Read/Write commands and Group 2 Mode Sense/Select
20500  *
20501  *   Arguments: un - pointer to the softstate struct for the target.
20502  *		cdbsize - size CDB to be used (CDB_GROUP0 (6 byte), or
20503  *			  CDB_GROUP[1|2] (10 byte).
20504  *		bufaddr - buffer for page data retrieved from the target.
20505  *		buflen - size of page to be retrieved.
20506  *		page_code - page code of data to be retrieved from the target.
20507  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
20508  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
20509  *			to use the USCSI "direct" chain and bypass the normal
20510  *			command waitq.
20511  *
20512  * Return Code: 0   - Success
20513  *		errno return code from sd_send_scsi_cmd()
20514  *
20515  *     Context: Can sleep. Does not return until command is completed.
20516  */
20517 
20518 static int
20519 sd_send_scsi_MODE_SENSE(struct sd_lun *un, int cdbsize, uchar_t *bufaddr,
20520 	size_t buflen,  uchar_t page_code, int path_flag)
20521 {
20522 	struct	scsi_extended_sense	sense_buf;
20523 	union scsi_cdb		cdb;
20524 	struct uscsi_cmd	ucmd_buf;
20525 	int			status;
20526 	int			headlen;
20527 
20528 	ASSERT(un != NULL);
20529 	ASSERT(!mutex_owned(SD_MUTEX(un)));
20530 	ASSERT(bufaddr != NULL);
20531 	ASSERT((cdbsize == CDB_GROUP0) || (cdbsize == CDB_GROUP1) ||
20532 	    (cdbsize == CDB_GROUP2));
20533 
20534 	SD_TRACE(SD_LOG_IO, un,
20535 	    "sd_send_scsi_MODE_SENSE: entry: un:0x%p\n", un);
20536 
20537 	bzero(&cdb, sizeof (cdb));
20538 	bzero(&ucmd_buf, sizeof (ucmd_buf));
20539 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
20540 	bzero(bufaddr, buflen);
20541 
20542 	if (cdbsize == CDB_GROUP0) {
20543 		cdb.scc_cmd = SCMD_MODE_SENSE;
20544 		cdb.cdb_opaque[2] = page_code;
20545 		FORMG0COUNT(&cdb, buflen);
20546 		headlen = MODE_HEADER_LENGTH;
20547 	} else {
20548 		cdb.scc_cmd = SCMD_MODE_SENSE_G1;
20549 		cdb.cdb_opaque[2] = page_code;
20550 		FORMG1COUNT(&cdb, buflen);
20551 		headlen = MODE_HEADER_LENGTH_GRP2;
20552 	}
20553 
20554 	ASSERT(headlen <= buflen);
20555 	SD_FILL_SCSI1_LUN_CDB(un, &cdb);
20556 
20557 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
20558 	ucmd_buf.uscsi_cdblen	= (uchar_t)cdbsize;
20559 	ucmd_buf.uscsi_bufaddr	= (caddr_t)bufaddr;
20560 	ucmd_buf.uscsi_buflen	= buflen;
20561 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
20562 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
20563 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
20564 	ucmd_buf.uscsi_timeout	= 60;
20565 
20566 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
20567 	    UIO_SYSSPACE, UIO_SYSSPACE, path_flag);
20568 
20569 	switch (status) {
20570 	case 0:
20571 		/*
20572 		 * sr_check_wp() uses 0x3f page code and check the header of
20573 		 * mode page to determine if target device is write-protected.
20574 		 * But some USB devices return 0 bytes for 0x3f page code. For
20575 		 * this case, make sure that mode page header is returned at
20576 		 * least.
20577 		 */
20578 		if (buflen - ucmd_buf.uscsi_resid <  headlen)
20579 			status = EIO;
20580 		break;	/* Success! */
20581 	case EIO:
20582 		switch (ucmd_buf.uscsi_status) {
20583 		case STATUS_RESERVATION_CONFLICT:
20584 			status = EACCES;
20585 			break;
20586 		default:
20587 			break;
20588 		}
20589 		break;
20590 	default:
20591 		break;
20592 	}
20593 
20594 	if (status == 0) {
20595 		SD_DUMP_MEMORY(un, SD_LOG_IO, "sd_send_scsi_MODE_SENSE: data",
20596 		    (uchar_t *)bufaddr, buflen, SD_LOG_HEX);
20597 	}
20598 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_MODE_SENSE: exit\n");
20599 
20600 	return (status);
20601 }
20602 
20603 
20604 /*
20605  *    Function: sd_send_scsi_MODE_SELECT
20606  *
20607  * Description: Utility function for issuing a scsi MODE SELECT command.
20608  *		Note: This routine uses a consistent implementation for Group0,
20609  *		Group1, and Group2 commands across all platforms. ATAPI devices
20610  *		use Group 1 Read/Write commands and Group 2 Mode Sense/Select
20611  *
20612  *   Arguments: un - pointer to the softstate struct for the target.
20613  *		cdbsize - size CDB to be used (CDB_GROUP0 (6 byte), or
20614  *			  CDB_GROUP[1|2] (10 byte).
20615  *		bufaddr - buffer for page data retrieved from the target.
20616  *		buflen - size of page to be retrieved.
20617  *		save_page - boolean to determin if SP bit should be set.
20618  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
20619  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
20620  *			to use the USCSI "direct" chain and bypass the normal
20621  *			command waitq.
20622  *
20623  * Return Code: 0   - Success
20624  *		errno return code from sd_send_scsi_cmd()
20625  *
20626  *     Context: Can sleep. Does not return until command is completed.
20627  */
20628 
20629 static int
20630 sd_send_scsi_MODE_SELECT(struct sd_lun *un, int cdbsize, uchar_t *bufaddr,
20631 	size_t buflen,  uchar_t save_page, int path_flag)
20632 {
20633 	struct	scsi_extended_sense	sense_buf;
20634 	union scsi_cdb		cdb;
20635 	struct uscsi_cmd	ucmd_buf;
20636 	int			status;
20637 
20638 	ASSERT(un != NULL);
20639 	ASSERT(!mutex_owned(SD_MUTEX(un)));
20640 	ASSERT(bufaddr != NULL);
20641 	ASSERT((cdbsize == CDB_GROUP0) || (cdbsize == CDB_GROUP1) ||
20642 	    (cdbsize == CDB_GROUP2));
20643 
20644 	SD_TRACE(SD_LOG_IO, un,
20645 	    "sd_send_scsi_MODE_SELECT: entry: un:0x%p\n", un);
20646 
20647 	bzero(&cdb, sizeof (cdb));
20648 	bzero(&ucmd_buf, sizeof (ucmd_buf));
20649 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
20650 
20651 	/* Set the PF bit for many third party drives */
20652 	cdb.cdb_opaque[1] = 0x10;
20653 
20654 	/* Set the savepage(SP) bit if given */
20655 	if (save_page == SD_SAVE_PAGE) {
20656 		cdb.cdb_opaque[1] |= 0x01;
20657 	}
20658 
20659 	if (cdbsize == CDB_GROUP0) {
20660 		cdb.scc_cmd = SCMD_MODE_SELECT;
20661 		FORMG0COUNT(&cdb, buflen);
20662 	} else {
20663 		cdb.scc_cmd = SCMD_MODE_SELECT_G1;
20664 		FORMG1COUNT(&cdb, buflen);
20665 	}
20666 
20667 	SD_FILL_SCSI1_LUN_CDB(un, &cdb);
20668 
20669 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
20670 	ucmd_buf.uscsi_cdblen	= (uchar_t)cdbsize;
20671 	ucmd_buf.uscsi_bufaddr	= (caddr_t)bufaddr;
20672 	ucmd_buf.uscsi_buflen	= buflen;
20673 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
20674 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
20675 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_WRITE | USCSI_SILENT;
20676 	ucmd_buf.uscsi_timeout	= 60;
20677 
20678 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
20679 	    UIO_SYSSPACE, UIO_SYSSPACE, path_flag);
20680 
20681 	switch (status) {
20682 	case 0:
20683 		break;	/* Success! */
20684 	case EIO:
20685 		switch (ucmd_buf.uscsi_status) {
20686 		case STATUS_RESERVATION_CONFLICT:
20687 			status = EACCES;
20688 			break;
20689 		default:
20690 			break;
20691 		}
20692 		break;
20693 	default:
20694 		break;
20695 	}
20696 
20697 	if (status == 0) {
20698 		SD_DUMP_MEMORY(un, SD_LOG_IO, "sd_send_scsi_MODE_SELECT: data",
20699 		    (uchar_t *)bufaddr, buflen, SD_LOG_HEX);
20700 	}
20701 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_MODE_SELECT: exit\n");
20702 
20703 	return (status);
20704 }
20705 
20706 
20707 /*
20708  *    Function: sd_send_scsi_RDWR
20709  *
20710  * Description: Issue a scsi READ or WRITE command with the given parameters.
20711  *
20712  *   Arguments: un:      Pointer to the sd_lun struct for the target.
20713  *		cmd:	 SCMD_READ or SCMD_WRITE
20714  *		bufaddr: Address of caller's buffer to receive the RDWR data
20715  *		buflen:  Length of caller's buffer receive the RDWR data.
20716  *		start_block: Block number for the start of the RDWR operation.
20717  *			 (Assumes target-native block size.)
20718  *		residp:  Pointer to variable to receive the redisual of the
20719  *			 RDWR operation (may be NULL of no residual requested).
20720  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
20721  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
20722  *			to use the USCSI "direct" chain and bypass the normal
20723  *			command waitq.
20724  *
20725  * Return Code: 0   - Success
20726  *		errno return code from sd_send_scsi_cmd()
20727  *
20728  *     Context: Can sleep. Does not return until command is completed.
20729  */
20730 
20731 static int
20732 sd_send_scsi_RDWR(struct sd_lun *un, uchar_t cmd, void *bufaddr,
20733 	size_t buflen, daddr_t start_block, int path_flag)
20734 {
20735 	struct	scsi_extended_sense	sense_buf;
20736 	union scsi_cdb		cdb;
20737 	struct uscsi_cmd	ucmd_buf;
20738 	uint32_t		block_count;
20739 	int			status;
20740 	int			cdbsize;
20741 	uchar_t			flag;
20742 
20743 	ASSERT(un != NULL);
20744 	ASSERT(!mutex_owned(SD_MUTEX(un)));
20745 	ASSERT(bufaddr != NULL);
20746 	ASSERT((cmd == SCMD_READ) || (cmd == SCMD_WRITE));
20747 
20748 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_RDWR: entry: un:0x%p\n", un);
20749 
20750 	if (un->un_f_tgt_blocksize_is_valid != TRUE) {
20751 		return (EINVAL);
20752 	}
20753 
20754 	mutex_enter(SD_MUTEX(un));
20755 	block_count = SD_BYTES2TGTBLOCKS(un, buflen);
20756 	mutex_exit(SD_MUTEX(un));
20757 
20758 	flag = (cmd == SCMD_READ) ? USCSI_READ : USCSI_WRITE;
20759 
20760 	SD_INFO(SD_LOG_IO, un, "sd_send_scsi_RDWR: "
20761 	    "bufaddr:0x%p buflen:0x%x start_block:0x%p block_count:0x%x\n",
20762 	    bufaddr, buflen, start_block, block_count);
20763 
20764 	bzero(&cdb, sizeof (cdb));
20765 	bzero(&ucmd_buf, sizeof (ucmd_buf));
20766 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
20767 
20768 	/* Compute CDB size to use */
20769 	if (start_block > 0xffffffff)
20770 		cdbsize = CDB_GROUP4;
20771 	else if ((start_block & 0xFFE00000) ||
20772 	    (un->un_f_cfg_is_atapi == TRUE))
20773 		cdbsize = CDB_GROUP1;
20774 	else
20775 		cdbsize = CDB_GROUP0;
20776 
20777 	switch (cdbsize) {
20778 	case CDB_GROUP0:	/* 6-byte CDBs */
20779 		cdb.scc_cmd = cmd;
20780 		FORMG0ADDR(&cdb, start_block);
20781 		FORMG0COUNT(&cdb, block_count);
20782 		break;
20783 	case CDB_GROUP1:	/* 10-byte CDBs */
20784 		cdb.scc_cmd = cmd | SCMD_GROUP1;
20785 		FORMG1ADDR(&cdb, start_block);
20786 		FORMG1COUNT(&cdb, block_count);
20787 		break;
20788 	case CDB_GROUP4:	/* 16-byte CDBs */
20789 		cdb.scc_cmd = cmd | SCMD_GROUP4;
20790 		FORMG4LONGADDR(&cdb, (uint64_t)start_block);
20791 		FORMG4COUNT(&cdb, block_count);
20792 		break;
20793 	case CDB_GROUP5:	/* 12-byte CDBs (currently unsupported) */
20794 	default:
20795 		/* All others reserved */
20796 		return (EINVAL);
20797 	}
20798 
20799 	/* Set LUN bit(s) in CDB if this is a SCSI-1 device */
20800 	SD_FILL_SCSI1_LUN_CDB(un, &cdb);
20801 
20802 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
20803 	ucmd_buf.uscsi_cdblen	= (uchar_t)cdbsize;
20804 	ucmd_buf.uscsi_bufaddr	= bufaddr;
20805 	ucmd_buf.uscsi_buflen	= buflen;
20806 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
20807 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
20808 	ucmd_buf.uscsi_flags	= flag | USCSI_RQENABLE | USCSI_SILENT;
20809 	ucmd_buf.uscsi_timeout	= 60;
20810 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
20811 				UIO_SYSSPACE, UIO_SYSSPACE, path_flag);
20812 	switch (status) {
20813 	case 0:
20814 		break;	/* Success! */
20815 	case EIO:
20816 		switch (ucmd_buf.uscsi_status) {
20817 		case STATUS_RESERVATION_CONFLICT:
20818 			status = EACCES;
20819 			break;
20820 		default:
20821 			break;
20822 		}
20823 		break;
20824 	default:
20825 		break;
20826 	}
20827 
20828 	if (status == 0) {
20829 		SD_DUMP_MEMORY(un, SD_LOG_IO, "sd_send_scsi_RDWR: data",
20830 		    (uchar_t *)bufaddr, buflen, SD_LOG_HEX);
20831 	}
20832 
20833 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_RDWR: exit\n");
20834 
20835 	return (status);
20836 }
20837 
20838 
20839 /*
20840  *    Function: sd_send_scsi_LOG_SENSE
20841  *
20842  * Description: Issue a scsi LOG_SENSE command with the given parameters.
20843  *
20844  *   Arguments: un:      Pointer to the sd_lun struct for the target.
20845  *
20846  * Return Code: 0   - Success
20847  *		errno return code from sd_send_scsi_cmd()
20848  *
20849  *     Context: Can sleep. Does not return until command is completed.
20850  */
20851 
20852 static int
20853 sd_send_scsi_LOG_SENSE(struct sd_lun *un, uchar_t *bufaddr, uint16_t buflen,
20854 	uchar_t page_code, uchar_t page_control, uint16_t param_ptr,
20855 	int path_flag)
20856 
20857 {
20858 	struct	scsi_extended_sense	sense_buf;
20859 	union scsi_cdb		cdb;
20860 	struct uscsi_cmd	ucmd_buf;
20861 	int			status;
20862 
20863 	ASSERT(un != NULL);
20864 	ASSERT(!mutex_owned(SD_MUTEX(un)));
20865 
20866 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_LOG_SENSE: entry: un:0x%p\n", un);
20867 
20868 	bzero(&cdb, sizeof (cdb));
20869 	bzero(&ucmd_buf, sizeof (ucmd_buf));
20870 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
20871 
20872 	cdb.scc_cmd = SCMD_LOG_SENSE_G1;
20873 	cdb.cdb_opaque[2] = (page_control << 6) | page_code;
20874 	cdb.cdb_opaque[5] = (uchar_t)((param_ptr & 0xFF00) >> 8);
20875 	cdb.cdb_opaque[6] = (uchar_t)(param_ptr  & 0x00FF);
20876 	FORMG1COUNT(&cdb, buflen);
20877 
20878 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
20879 	ucmd_buf.uscsi_cdblen	= CDB_GROUP1;
20880 	ucmd_buf.uscsi_bufaddr	= (caddr_t)bufaddr;
20881 	ucmd_buf.uscsi_buflen	= buflen;
20882 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
20883 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
20884 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
20885 	ucmd_buf.uscsi_timeout	= 60;
20886 
20887 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
20888 	    UIO_SYSSPACE, UIO_SYSSPACE, path_flag);
20889 
20890 	switch (status) {
20891 	case 0:
20892 		break;
20893 	case EIO:
20894 		switch (ucmd_buf.uscsi_status) {
20895 		case STATUS_RESERVATION_CONFLICT:
20896 			status = EACCES;
20897 			break;
20898 		case STATUS_CHECK:
20899 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
20900 			    (scsi_sense_key((uint8_t *)&sense_buf) ==
20901 				KEY_ILLEGAL_REQUEST) &&
20902 			    (scsi_sense_asc((uint8_t *)&sense_buf) == 0x24)) {
20903 				/*
20904 				 * ASC 0x24: INVALID FIELD IN CDB
20905 				 */
20906 				switch (page_code) {
20907 				case START_STOP_CYCLE_PAGE:
20908 					/*
20909 					 * The start stop cycle counter is
20910 					 * implemented as page 0x31 in earlier
20911 					 * generation disks. In new generation
20912 					 * disks the start stop cycle counter is
20913 					 * implemented as page 0xE. To properly
20914 					 * handle this case if an attempt for
20915 					 * log page 0xE is made and fails we
20916 					 * will try again using page 0x31.
20917 					 *
20918 					 * Network storage BU committed to
20919 					 * maintain the page 0x31 for this
20920 					 * purpose and will not have any other
20921 					 * page implemented with page code 0x31
20922 					 * until all disks transition to the
20923 					 * standard page.
20924 					 */
20925 					mutex_enter(SD_MUTEX(un));
20926 					un->un_start_stop_cycle_page =
20927 					    START_STOP_CYCLE_VU_PAGE;
20928 					cdb.cdb_opaque[2] =
20929 					    (char)(page_control << 6) |
20930 					    un->un_start_stop_cycle_page;
20931 					mutex_exit(SD_MUTEX(un));
20932 					status = sd_send_scsi_cmd(
20933 					    SD_GET_DEV(un), &ucmd_buf,
20934 					    UIO_SYSSPACE, UIO_SYSSPACE,
20935 					    UIO_SYSSPACE, path_flag);
20936 
20937 					break;
20938 				case TEMPERATURE_PAGE:
20939 					status = ENOTTY;
20940 					break;
20941 				default:
20942 					break;
20943 				}
20944 			}
20945 			break;
20946 		default:
20947 			break;
20948 		}
20949 		break;
20950 	default:
20951 		break;
20952 	}
20953 
20954 	if (status == 0) {
20955 		SD_DUMP_MEMORY(un, SD_LOG_IO, "sd_send_scsi_LOG_SENSE: data",
20956 		    (uchar_t *)bufaddr, buflen, SD_LOG_HEX);
20957 	}
20958 
20959 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_LOG_SENSE: exit\n");
20960 
20961 	return (status);
20962 }
20963 
20964 
20965 /*
20966  *    Function: sdioctl
20967  *
20968  * Description: Driver's ioctl(9e) entry point function.
20969  *
20970  *   Arguments: dev     - device number
20971  *		cmd     - ioctl operation to be performed
20972  *		arg     - user argument, contains data to be set or reference
20973  *			  parameter for get
20974  *		flag    - bit flag, indicating open settings, 32/64 bit type
20975  *		cred_p  - user credential pointer
20976  *		rval_p  - calling process return value (OPT)
20977  *
20978  * Return Code: EINVAL
20979  *		ENOTTY
20980  *		ENXIO
20981  *		EIO
20982  *		EFAULT
20983  *		ENOTSUP
20984  *		EPERM
20985  *
20986  *     Context: Called from the device switch at normal priority.
20987  */
20988 
20989 static int
20990 sdioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cred_p, int *rval_p)
20991 {
20992 	struct sd_lun	*un = NULL;
20993 	int		geom_validated = FALSE;
20994 	int		err = 0;
20995 	int		i = 0;
20996 	cred_t		*cr;
20997 
20998 	/*
20999 	 * All device accesses go thru sdstrategy where we check on suspend
21000 	 * status
21001 	 */
21002 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
21003 		return (ENXIO);
21004 	}
21005 
21006 	ASSERT(!mutex_owned(SD_MUTEX(un)));
21007 
21008 	/*
21009 	 * Moved this wait from sd_uscsi_strategy to here for
21010 	 * reasons of deadlock prevention. Internal driver commands,
21011 	 * specifically those to change a devices power level, result
21012 	 * in a call to sd_uscsi_strategy.
21013 	 */
21014 	mutex_enter(SD_MUTEX(un));
21015 	while ((un->un_state == SD_STATE_SUSPENDED) ||
21016 	    (un->un_state == SD_STATE_PM_CHANGING)) {
21017 		cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
21018 	}
21019 	/*
21020 	 * Twiddling the counter here protects commands from now
21021 	 * through to the top of sd_uscsi_strategy. Without the
21022 	 * counter inc. a power down, for example, could get in
21023 	 * after the above check for state is made and before
21024 	 * execution gets to the top of sd_uscsi_strategy.
21025 	 * That would cause problems.
21026 	 */
21027 	un->un_ncmds_in_driver++;
21028 
21029 	if ((un->un_f_geometry_is_valid == FALSE) &&
21030 	    (flag & (FNDELAY | FNONBLOCK))) {
21031 		switch (cmd) {
21032 		case CDROMPAUSE:
21033 		case CDROMRESUME:
21034 		case CDROMPLAYMSF:
21035 		case CDROMPLAYTRKIND:
21036 		case CDROMREADTOCHDR:
21037 		case CDROMREADTOCENTRY:
21038 		case CDROMSTOP:
21039 		case CDROMSTART:
21040 		case CDROMVOLCTRL:
21041 		case CDROMSUBCHNL:
21042 		case CDROMREADMODE2:
21043 		case CDROMREADMODE1:
21044 		case CDROMREADOFFSET:
21045 		case CDROMSBLKMODE:
21046 		case CDROMGBLKMODE:
21047 		case CDROMGDRVSPEED:
21048 		case CDROMSDRVSPEED:
21049 		case CDROMCDDA:
21050 		case CDROMCDXA:
21051 		case CDROMSUBCODE:
21052 			if (!ISCD(un)) {
21053 				un->un_ncmds_in_driver--;
21054 				ASSERT(un->un_ncmds_in_driver >= 0);
21055 				mutex_exit(SD_MUTEX(un));
21056 				return (ENOTTY);
21057 			}
21058 			break;
21059 		case FDEJECT:
21060 		case DKIOCEJECT:
21061 		case CDROMEJECT:
21062 			if (!un->un_f_eject_media_supported) {
21063 				un->un_ncmds_in_driver--;
21064 				ASSERT(un->un_ncmds_in_driver >= 0);
21065 				mutex_exit(SD_MUTEX(un));
21066 				return (ENOTTY);
21067 			}
21068 			break;
21069 		case DKIOCSVTOC:
21070 		case DKIOCSETEFI:
21071 		case DKIOCSMBOOT:
21072 		case DKIOCFLUSHWRITECACHE:
21073 			mutex_exit(SD_MUTEX(un));
21074 			err = sd_send_scsi_TEST_UNIT_READY(un, 0);
21075 			if (err != 0) {
21076 				mutex_enter(SD_MUTEX(un));
21077 				un->un_ncmds_in_driver--;
21078 				ASSERT(un->un_ncmds_in_driver >= 0);
21079 				mutex_exit(SD_MUTEX(un));
21080 				return (EIO);
21081 			}
21082 			mutex_enter(SD_MUTEX(un));
21083 			/* FALLTHROUGH */
21084 		case DKIOCREMOVABLE:
21085 		case DKIOCHOTPLUGGABLE:
21086 		case DKIOCINFO:
21087 		case DKIOCGMEDIAINFO:
21088 		case MHIOCENFAILFAST:
21089 		case MHIOCSTATUS:
21090 		case MHIOCTKOWN:
21091 		case MHIOCRELEASE:
21092 		case MHIOCGRP_INKEYS:
21093 		case MHIOCGRP_INRESV:
21094 		case MHIOCGRP_REGISTER:
21095 		case MHIOCGRP_RESERVE:
21096 		case MHIOCGRP_PREEMPTANDABORT:
21097 		case MHIOCGRP_REGISTERANDIGNOREKEY:
21098 		case CDROMCLOSETRAY:
21099 		case USCSICMD:
21100 			goto skip_ready_valid;
21101 		default:
21102 			break;
21103 		}
21104 
21105 		mutex_exit(SD_MUTEX(un));
21106 		err = sd_ready_and_valid(un);
21107 		mutex_enter(SD_MUTEX(un));
21108 		if (err == SD_READY_NOT_VALID) {
21109 			switch (cmd) {
21110 			case DKIOCGAPART:
21111 			case DKIOCGGEOM:
21112 			case DKIOCSGEOM:
21113 			case DKIOCGVTOC:
21114 			case DKIOCSVTOC:
21115 			case DKIOCSAPART:
21116 			case DKIOCG_PHYGEOM:
21117 			case DKIOCG_VIRTGEOM:
21118 				err = ENOTSUP;
21119 				un->un_ncmds_in_driver--;
21120 				ASSERT(un->un_ncmds_in_driver >= 0);
21121 				mutex_exit(SD_MUTEX(un));
21122 				return (err);
21123 			}
21124 		}
21125 		if (err != SD_READY_VALID) {
21126 			switch (cmd) {
21127 			case DKIOCSTATE:
21128 			case CDROMGDRVSPEED:
21129 			case CDROMSDRVSPEED:
21130 			case FDEJECT:	/* for eject command */
21131 			case DKIOCEJECT:
21132 			case CDROMEJECT:
21133 			case DKIOCGETEFI:
21134 			case DKIOCSGEOM:
21135 			case DKIOCREMOVABLE:
21136 			case DKIOCHOTPLUGGABLE:
21137 			case DKIOCSAPART:
21138 			case DKIOCSETEFI:
21139 				break;
21140 			default:
21141 				if (un->un_f_has_removable_media) {
21142 					err = ENXIO;
21143 				} else {
21144 					/* Do not map EACCES to EIO */
21145 					if (err != EACCES)
21146 						err = EIO;
21147 				}
21148 				un->un_ncmds_in_driver--;
21149 				ASSERT(un->un_ncmds_in_driver >= 0);
21150 				mutex_exit(SD_MUTEX(un));
21151 				return (err);
21152 			}
21153 		}
21154 		geom_validated = TRUE;
21155 	}
21156 	if ((un->un_f_geometry_is_valid == TRUE) &&
21157 	    (un->un_solaris_size > 0)) {
21158 		/*
21159 		 * the "geometry_is_valid" flag could be true if we
21160 		 * have an fdisk table but no Solaris partition
21161 		 */
21162 		if (un->un_vtoc.v_sanity != VTOC_SANE) {
21163 			/* it is EFI, so return ENOTSUP for these */
21164 			switch (cmd) {
21165 			case DKIOCGAPART:
21166 			case DKIOCGGEOM:
21167 			case DKIOCGVTOC:
21168 			case DKIOCSVTOC:
21169 			case DKIOCSAPART:
21170 				err = ENOTSUP;
21171 				un->un_ncmds_in_driver--;
21172 				ASSERT(un->un_ncmds_in_driver >= 0);
21173 				mutex_exit(SD_MUTEX(un));
21174 				return (err);
21175 			}
21176 		}
21177 	}
21178 
21179 skip_ready_valid:
21180 	mutex_exit(SD_MUTEX(un));
21181 
21182 	switch (cmd) {
21183 	case DKIOCINFO:
21184 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCINFO\n");
21185 		err = sd_dkio_ctrl_info(dev, (caddr_t)arg, flag);
21186 		break;
21187 
21188 	case DKIOCGMEDIAINFO:
21189 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCGMEDIAINFO\n");
21190 		err = sd_get_media_info(dev, (caddr_t)arg, flag);
21191 		break;
21192 
21193 	case DKIOCGGEOM:
21194 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCGGEOM\n");
21195 		err = sd_dkio_get_geometry(dev, (caddr_t)arg, flag,
21196 		    geom_validated);
21197 		break;
21198 
21199 	case DKIOCSGEOM:
21200 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCSGEOM\n");
21201 		err = sd_dkio_set_geometry(dev, (caddr_t)arg, flag);
21202 		break;
21203 
21204 	case DKIOCGAPART:
21205 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCGAPART\n");
21206 		err = sd_dkio_get_partition(dev, (caddr_t)arg, flag,
21207 		    geom_validated);
21208 		break;
21209 
21210 	case DKIOCSAPART:
21211 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCSAPART\n");
21212 		err = sd_dkio_set_partition(dev, (caddr_t)arg, flag);
21213 		break;
21214 
21215 	case DKIOCGVTOC:
21216 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCGVTOC\n");
21217 		err = sd_dkio_get_vtoc(dev, (caddr_t)arg, flag,
21218 		    geom_validated);
21219 		break;
21220 
21221 	case DKIOCGETEFI:
21222 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCGETEFI\n");
21223 		err = sd_dkio_get_efi(dev, (caddr_t)arg, flag);
21224 		break;
21225 
21226 	case DKIOCPARTITION:
21227 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCPARTITION\n");
21228 		err = sd_dkio_partition(dev, (caddr_t)arg, flag);
21229 		break;
21230 
21231 	case DKIOCSVTOC:
21232 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCSVTOC\n");
21233 		err = sd_dkio_set_vtoc(dev, (caddr_t)arg, flag);
21234 		break;
21235 
21236 	case DKIOCSETEFI:
21237 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCSETEFI\n");
21238 		err = sd_dkio_set_efi(dev, (caddr_t)arg, flag);
21239 		break;
21240 
21241 	case DKIOCGMBOOT:
21242 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCGMBOOT\n");
21243 		err = sd_dkio_get_mboot(dev, (caddr_t)arg, flag);
21244 		break;
21245 
21246 	case DKIOCSMBOOT:
21247 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCSMBOOT\n");
21248 		err = sd_dkio_set_mboot(dev, (caddr_t)arg, flag);
21249 		break;
21250 
21251 	case DKIOCLOCK:
21252 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCLOCK\n");
21253 		err = sd_send_scsi_DOORLOCK(un, SD_REMOVAL_PREVENT,
21254 		    SD_PATH_STANDARD);
21255 		break;
21256 
21257 	case DKIOCUNLOCK:
21258 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCUNLOCK\n");
21259 		err = sd_send_scsi_DOORLOCK(un, SD_REMOVAL_ALLOW,
21260 		    SD_PATH_STANDARD);
21261 		break;
21262 
21263 	case DKIOCSTATE: {
21264 		enum dkio_state		state;
21265 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCSTATE\n");
21266 
21267 		if (ddi_copyin((void *)arg, &state, sizeof (int), flag) != 0) {
21268 			err = EFAULT;
21269 		} else {
21270 			err = sd_check_media(dev, state);
21271 			if (err == 0) {
21272 				if (ddi_copyout(&un->un_mediastate, (void *)arg,
21273 				    sizeof (int), flag) != 0)
21274 					err = EFAULT;
21275 			}
21276 		}
21277 		break;
21278 	}
21279 
21280 	case DKIOCREMOVABLE:
21281 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCREMOVABLE\n");
21282 		/*
21283 		 * At present, vold only does automount for removable-media
21284 		 * devices, in order not to break current applications, we
21285 		 * still let hopluggable devices pretend to be removable media
21286 		 * devices for vold. In the near future, once vold is EOL'ed,
21287 		 * we should remove this workaround.
21288 		 */
21289 		if (un->un_f_has_removable_media || un->un_f_is_hotpluggable) {
21290 			i = 1;
21291 		} else {
21292 			i = 0;
21293 		}
21294 		if (ddi_copyout(&i, (void *)arg, sizeof (int), flag) != 0) {
21295 			err = EFAULT;
21296 		} else {
21297 			err = 0;
21298 		}
21299 		break;
21300 
21301 	case DKIOCHOTPLUGGABLE:
21302 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCHOTPLUGGABLE\n");
21303 		if (un->un_f_is_hotpluggable) {
21304 			i = 1;
21305 		} else {
21306 			i = 0;
21307 		}
21308 		if (ddi_copyout(&i, (void *)arg, sizeof (int), flag) != 0) {
21309 			err = EFAULT;
21310 		} else {
21311 			err = 0;
21312 		}
21313 		break;
21314 
21315 	case DKIOCGTEMPERATURE:
21316 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCGTEMPERATURE\n");
21317 		err = sd_dkio_get_temp(dev, (caddr_t)arg, flag);
21318 		break;
21319 
21320 	case MHIOCENFAILFAST:
21321 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCENFAILFAST\n");
21322 		if ((err = drv_priv(cred_p)) == 0) {
21323 			err = sd_mhdioc_failfast(dev, (caddr_t)arg, flag);
21324 		}
21325 		break;
21326 
21327 	case MHIOCTKOWN:
21328 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCTKOWN\n");
21329 		if ((err = drv_priv(cred_p)) == 0) {
21330 			err = sd_mhdioc_takeown(dev, (caddr_t)arg, flag);
21331 		}
21332 		break;
21333 
21334 	case MHIOCRELEASE:
21335 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCRELEASE\n");
21336 		if ((err = drv_priv(cred_p)) == 0) {
21337 			err = sd_mhdioc_release(dev);
21338 		}
21339 		break;
21340 
21341 	case MHIOCSTATUS:
21342 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCSTATUS\n");
21343 		if ((err = drv_priv(cred_p)) == 0) {
21344 			switch (sd_send_scsi_TEST_UNIT_READY(un, 0)) {
21345 			case 0:
21346 				err = 0;
21347 				break;
21348 			case EACCES:
21349 				*rval_p = 1;
21350 				err = 0;
21351 				break;
21352 			default:
21353 				err = EIO;
21354 				break;
21355 			}
21356 		}
21357 		break;
21358 
21359 	case MHIOCQRESERVE:
21360 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCQRESERVE\n");
21361 		if ((err = drv_priv(cred_p)) == 0) {
21362 			err = sd_reserve_release(dev, SD_RESERVE);
21363 		}
21364 		break;
21365 
21366 	case MHIOCREREGISTERDEVID:
21367 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCREREGISTERDEVID\n");
21368 		if (drv_priv(cred_p) == EPERM) {
21369 			err = EPERM;
21370 		} else if (!un->un_f_devid_supported) {
21371 			err = ENOTTY;
21372 		} else {
21373 			err = sd_mhdioc_register_devid(dev);
21374 		}
21375 		break;
21376 
21377 	case MHIOCGRP_INKEYS:
21378 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_INKEYS\n");
21379 		if (((err = drv_priv(cred_p)) != EPERM) && arg != NULL) {
21380 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
21381 				err = ENOTSUP;
21382 			} else {
21383 				err = sd_mhdioc_inkeys(dev, (caddr_t)arg,
21384 				    flag);
21385 			}
21386 		}
21387 		break;
21388 
21389 	case MHIOCGRP_INRESV:
21390 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_INRESV\n");
21391 		if (((err = drv_priv(cred_p)) != EPERM) && arg != NULL) {
21392 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
21393 				err = ENOTSUP;
21394 			} else {
21395 				err = sd_mhdioc_inresv(dev, (caddr_t)arg, flag);
21396 			}
21397 		}
21398 		break;
21399 
21400 	case MHIOCGRP_REGISTER:
21401 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_REGISTER\n");
21402 		if ((err = drv_priv(cred_p)) != EPERM) {
21403 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
21404 				err = ENOTSUP;
21405 			} else if (arg != NULL) {
21406 				mhioc_register_t reg;
21407 				if (ddi_copyin((void *)arg, &reg,
21408 				    sizeof (mhioc_register_t), flag) != 0) {
21409 					err = EFAULT;
21410 				} else {
21411 					err =
21412 					    sd_send_scsi_PERSISTENT_RESERVE_OUT(
21413 					    un, SD_SCSI3_REGISTER,
21414 					    (uchar_t *)&reg);
21415 				}
21416 			}
21417 		}
21418 		break;
21419 
21420 	case MHIOCGRP_RESERVE:
21421 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_RESERVE\n");
21422 		if ((err = drv_priv(cred_p)) != EPERM) {
21423 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
21424 				err = ENOTSUP;
21425 			} else if (arg != NULL) {
21426 				mhioc_resv_desc_t resv_desc;
21427 				if (ddi_copyin((void *)arg, &resv_desc,
21428 				    sizeof (mhioc_resv_desc_t), flag) != 0) {
21429 					err = EFAULT;
21430 				} else {
21431 					err =
21432 					    sd_send_scsi_PERSISTENT_RESERVE_OUT(
21433 					    un, SD_SCSI3_RESERVE,
21434 					    (uchar_t *)&resv_desc);
21435 				}
21436 			}
21437 		}
21438 		break;
21439 
21440 	case MHIOCGRP_PREEMPTANDABORT:
21441 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_PREEMPTANDABORT\n");
21442 		if ((err = drv_priv(cred_p)) != EPERM) {
21443 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
21444 				err = ENOTSUP;
21445 			} else if (arg != NULL) {
21446 				mhioc_preemptandabort_t preempt_abort;
21447 				if (ddi_copyin((void *)arg, &preempt_abort,
21448 				    sizeof (mhioc_preemptandabort_t),
21449 				    flag) != 0) {
21450 					err = EFAULT;
21451 				} else {
21452 					err =
21453 					    sd_send_scsi_PERSISTENT_RESERVE_OUT(
21454 					    un, SD_SCSI3_PREEMPTANDABORT,
21455 					    (uchar_t *)&preempt_abort);
21456 				}
21457 			}
21458 		}
21459 		break;
21460 
21461 	case MHIOCGRP_REGISTERANDIGNOREKEY:
21462 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_PREEMPTANDABORT\n");
21463 		if ((err = drv_priv(cred_p)) != EPERM) {
21464 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
21465 				err = ENOTSUP;
21466 			} else if (arg != NULL) {
21467 				mhioc_registerandignorekey_t r_and_i;
21468 				if (ddi_copyin((void *)arg, (void *)&r_and_i,
21469 				    sizeof (mhioc_registerandignorekey_t),
21470 				    flag) != 0) {
21471 					err = EFAULT;
21472 				} else {
21473 					err =
21474 					    sd_send_scsi_PERSISTENT_RESERVE_OUT(
21475 					    un, SD_SCSI3_REGISTERANDIGNOREKEY,
21476 					    (uchar_t *)&r_and_i);
21477 				}
21478 			}
21479 		}
21480 		break;
21481 
21482 	case USCSICMD:
21483 		SD_TRACE(SD_LOG_IOCTL, un, "USCSICMD\n");
21484 		cr = ddi_get_cred();
21485 		if ((drv_priv(cred_p) != 0) && (drv_priv(cr) != 0)) {
21486 			err = EPERM;
21487 		} else {
21488 			err = sd_uscsi_ioctl(dev, (caddr_t)arg, flag);
21489 		}
21490 		break;
21491 
21492 	case CDROMPAUSE:
21493 	case CDROMRESUME:
21494 		SD_TRACE(SD_LOG_IOCTL, un, "PAUSE-RESUME\n");
21495 		if (!ISCD(un)) {
21496 			err = ENOTTY;
21497 		} else {
21498 			err = sr_pause_resume(dev, cmd);
21499 		}
21500 		break;
21501 
21502 	case CDROMPLAYMSF:
21503 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMPLAYMSF\n");
21504 		if (!ISCD(un)) {
21505 			err = ENOTTY;
21506 		} else {
21507 			err = sr_play_msf(dev, (caddr_t)arg, flag);
21508 		}
21509 		break;
21510 
21511 	case CDROMPLAYTRKIND:
21512 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMPLAYTRKIND\n");
21513 #if defined(__i386) || defined(__amd64)
21514 		/*
21515 		 * not supported on ATAPI CD drives, use CDROMPLAYMSF instead
21516 		 */
21517 		if (!ISCD(un) || (un->un_f_cfg_is_atapi == TRUE)) {
21518 #else
21519 		if (!ISCD(un)) {
21520 #endif
21521 			err = ENOTTY;
21522 		} else {
21523 			err = sr_play_trkind(dev, (caddr_t)arg, flag);
21524 		}
21525 		break;
21526 
21527 	case CDROMREADTOCHDR:
21528 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADTOCHDR\n");
21529 		if (!ISCD(un)) {
21530 			err = ENOTTY;
21531 		} else {
21532 			err = sr_read_tochdr(dev, (caddr_t)arg, flag);
21533 		}
21534 		break;
21535 
21536 	case CDROMREADTOCENTRY:
21537 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADTOCENTRY\n");
21538 		if (!ISCD(un)) {
21539 			err = ENOTTY;
21540 		} else {
21541 			err = sr_read_tocentry(dev, (caddr_t)arg, flag);
21542 		}
21543 		break;
21544 
21545 	case CDROMSTOP:
21546 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSTOP\n");
21547 		if (!ISCD(un)) {
21548 			err = ENOTTY;
21549 		} else {
21550 			err = sd_send_scsi_START_STOP_UNIT(un, SD_TARGET_STOP,
21551 			    SD_PATH_STANDARD);
21552 		}
21553 		break;
21554 
21555 	case CDROMSTART:
21556 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSTART\n");
21557 		if (!ISCD(un)) {
21558 			err = ENOTTY;
21559 		} else {
21560 			err = sd_send_scsi_START_STOP_UNIT(un, SD_TARGET_START,
21561 			    SD_PATH_STANDARD);
21562 		}
21563 		break;
21564 
21565 	case CDROMCLOSETRAY:
21566 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMCLOSETRAY\n");
21567 		if (!ISCD(un)) {
21568 			err = ENOTTY;
21569 		} else {
21570 			err = sd_send_scsi_START_STOP_UNIT(un, SD_TARGET_CLOSE,
21571 			    SD_PATH_STANDARD);
21572 		}
21573 		break;
21574 
21575 	case FDEJECT:	/* for eject command */
21576 	case DKIOCEJECT:
21577 	case CDROMEJECT:
21578 		SD_TRACE(SD_LOG_IOCTL, un, "EJECT\n");
21579 		if (!un->un_f_eject_media_supported) {
21580 			err = ENOTTY;
21581 		} else {
21582 			err = sr_eject(dev);
21583 		}
21584 		break;
21585 
21586 	case CDROMVOLCTRL:
21587 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMVOLCTRL\n");
21588 		if (!ISCD(un)) {
21589 			err = ENOTTY;
21590 		} else {
21591 			err = sr_volume_ctrl(dev, (caddr_t)arg, flag);
21592 		}
21593 		break;
21594 
21595 	case CDROMSUBCHNL:
21596 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSUBCHNL\n");
21597 		if (!ISCD(un)) {
21598 			err = ENOTTY;
21599 		} else {
21600 			err = sr_read_subchannel(dev, (caddr_t)arg, flag);
21601 		}
21602 		break;
21603 
21604 	case CDROMREADMODE2:
21605 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADMODE2\n");
21606 		if (!ISCD(un)) {
21607 			err = ENOTTY;
21608 		} else if (un->un_f_cfg_is_atapi == TRUE) {
21609 			/*
21610 			 * If the drive supports READ CD, use that instead of
21611 			 * switching the LBA size via a MODE SELECT
21612 			 * Block Descriptor
21613 			 */
21614 			err = sr_read_cd_mode2(dev, (caddr_t)arg, flag);
21615 		} else {
21616 			err = sr_read_mode2(dev, (caddr_t)arg, flag);
21617 		}
21618 		break;
21619 
21620 	case CDROMREADMODE1:
21621 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADMODE1\n");
21622 		if (!ISCD(un)) {
21623 			err = ENOTTY;
21624 		} else {
21625 			err = sr_read_mode1(dev, (caddr_t)arg, flag);
21626 		}
21627 		break;
21628 
21629 	case CDROMREADOFFSET:
21630 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADOFFSET\n");
21631 		if (!ISCD(un)) {
21632 			err = ENOTTY;
21633 		} else {
21634 			err = sr_read_sony_session_offset(dev, (caddr_t)arg,
21635 			    flag);
21636 		}
21637 		break;
21638 
21639 	case CDROMSBLKMODE:
21640 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSBLKMODE\n");
21641 		/*
21642 		 * There is no means of changing block size in case of atapi
21643 		 * drives, thus return ENOTTY if drive type is atapi
21644 		 */
21645 		if (!ISCD(un) || (un->un_f_cfg_is_atapi == TRUE)) {
21646 			err = ENOTTY;
21647 		} else if (un->un_f_mmc_cap == TRUE) {
21648 
21649 			/*
21650 			 * MMC Devices do not support changing the
21651 			 * logical block size
21652 			 *
21653 			 * Note: EINVAL is being returned instead of ENOTTY to
21654 			 * maintain consistancy with the original mmc
21655 			 * driver update.
21656 			 */
21657 			err = EINVAL;
21658 		} else {
21659 			mutex_enter(SD_MUTEX(un));
21660 			if ((!(un->un_exclopen & (1<<SDPART(dev)))) ||
21661 			    (un->un_ncmds_in_transport > 0)) {
21662 				mutex_exit(SD_MUTEX(un));
21663 				err = EINVAL;
21664 			} else {
21665 				mutex_exit(SD_MUTEX(un));
21666 				err = sr_change_blkmode(dev, cmd, arg, flag);
21667 			}
21668 		}
21669 		break;
21670 
21671 	case CDROMGBLKMODE:
21672 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMGBLKMODE\n");
21673 		if (!ISCD(un)) {
21674 			err = ENOTTY;
21675 		} else if ((un->un_f_cfg_is_atapi != FALSE) &&
21676 		    (un->un_f_blockcount_is_valid != FALSE)) {
21677 			/*
21678 			 * Drive is an ATAPI drive so return target block
21679 			 * size for ATAPI drives since we cannot change the
21680 			 * blocksize on ATAPI drives. Used primarily to detect
21681 			 * if an ATAPI cdrom is present.
21682 			 */
21683 			if (ddi_copyout(&un->un_tgt_blocksize, (void *)arg,
21684 			    sizeof (int), flag) != 0) {
21685 				err = EFAULT;
21686 			} else {
21687 				err = 0;
21688 			}
21689 
21690 		} else {
21691 			/*
21692 			 * Drive supports changing block sizes via a Mode
21693 			 * Select.
21694 			 */
21695 			err = sr_change_blkmode(dev, cmd, arg, flag);
21696 		}
21697 		break;
21698 
21699 	case CDROMGDRVSPEED:
21700 	case CDROMSDRVSPEED:
21701 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMXDRVSPEED\n");
21702 		if (!ISCD(un)) {
21703 			err = ENOTTY;
21704 		} else if (un->un_f_mmc_cap == TRUE) {
21705 			/*
21706 			 * Note: In the future the driver implementation
21707 			 * for getting and
21708 			 * setting cd speed should entail:
21709 			 * 1) If non-mmc try the Toshiba mode page
21710 			 *    (sr_change_speed)
21711 			 * 2) If mmc but no support for Real Time Streaming try
21712 			 *    the SET CD SPEED (0xBB) command
21713 			 *   (sr_atapi_change_speed)
21714 			 * 3) If mmc and support for Real Time Streaming
21715 			 *    try the GET PERFORMANCE and SET STREAMING
21716 			 *    commands (not yet implemented, 4380808)
21717 			 */
21718 			/*
21719 			 * As per recent MMC spec, CD-ROM speed is variable
21720 			 * and changes with LBA. Since there is no such
21721 			 * things as drive speed now, fail this ioctl.
21722 			 *
21723 			 * Note: EINVAL is returned for consistancy of original
21724 			 * implementation which included support for getting
21725 			 * the drive speed of mmc devices but not setting
21726 			 * the drive speed. Thus EINVAL would be returned
21727 			 * if a set request was made for an mmc device.
21728 			 * We no longer support get or set speed for
21729 			 * mmc but need to remain consistant with regard
21730 			 * to the error code returned.
21731 			 */
21732 			err = EINVAL;
21733 		} else if (un->un_f_cfg_is_atapi == TRUE) {
21734 			err = sr_atapi_change_speed(dev, cmd, arg, flag);
21735 		} else {
21736 			err = sr_change_speed(dev, cmd, arg, flag);
21737 		}
21738 		break;
21739 
21740 	case CDROMCDDA:
21741 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMCDDA\n");
21742 		if (!ISCD(un)) {
21743 			err = ENOTTY;
21744 		} else {
21745 			err = sr_read_cdda(dev, (void *)arg, flag);
21746 		}
21747 		break;
21748 
21749 	case CDROMCDXA:
21750 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMCDXA\n");
21751 		if (!ISCD(un)) {
21752 			err = ENOTTY;
21753 		} else {
21754 			err = sr_read_cdxa(dev, (caddr_t)arg, flag);
21755 		}
21756 		break;
21757 
21758 	case CDROMSUBCODE:
21759 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSUBCODE\n");
21760 		if (!ISCD(un)) {
21761 			err = ENOTTY;
21762 		} else {
21763 			err = sr_read_all_subcodes(dev, (caddr_t)arg, flag);
21764 		}
21765 		break;
21766 
21767 	case DKIOCPARTINFO: {
21768 		/*
21769 		 * Return parameters describing the selected disk slice.
21770 		 * Note: this ioctl is for the intel platform only
21771 		 */
21772 #if defined(__i386) || defined(__amd64)
21773 		int part;
21774 
21775 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCPARTINFO\n");
21776 		part = SDPART(dev);
21777 
21778 		/* don't check un_solaris_size for pN */
21779 		if (part < P0_RAW_DISK && un->un_solaris_size == 0) {
21780 			err = EIO;
21781 		} else {
21782 			struct part_info p;
21783 
21784 			p.p_start = (daddr_t)un->un_offset[part];
21785 			p.p_length = (int)un->un_map[part].dkl_nblk;
21786 #ifdef _MULTI_DATAMODEL
21787 			switch (ddi_model_convert_from(flag & FMODELS)) {
21788 			case DDI_MODEL_ILP32:
21789 			{
21790 				struct part_info32 p32;
21791 
21792 				p32.p_start = (daddr32_t)p.p_start;
21793 				p32.p_length = p.p_length;
21794 				if (ddi_copyout(&p32, (void *)arg,
21795 				    sizeof (p32), flag))
21796 					err = EFAULT;
21797 				break;
21798 			}
21799 
21800 			case DDI_MODEL_NONE:
21801 			{
21802 				if (ddi_copyout(&p, (void *)arg, sizeof (p),
21803 				    flag))
21804 					err = EFAULT;
21805 				break;
21806 			}
21807 			}
21808 #else /* ! _MULTI_DATAMODEL */
21809 			if (ddi_copyout(&p, (void *)arg, sizeof (p), flag))
21810 				err = EFAULT;
21811 #endif /* _MULTI_DATAMODEL */
21812 		}
21813 #else
21814 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCPARTINFO\n");
21815 		err = ENOTTY;
21816 #endif
21817 		break;
21818 	}
21819 
21820 	case DKIOCG_PHYGEOM: {
21821 		/* Return the driver's notion of the media physical geometry */
21822 #if defined(__i386) || defined(__amd64)
21823 		uint64_t	capacity;
21824 		struct dk_geom	disk_geom;
21825 		struct dk_geom	*dkgp = &disk_geom;
21826 
21827 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCG_PHYGEOM\n");
21828 		mutex_enter(SD_MUTEX(un));
21829 
21830 		if (un->un_g.dkg_nhead != 0 &&
21831 		    un->un_g.dkg_nsect != 0) {
21832 			/*
21833 			 * We succeeded in getting a geometry, but
21834 			 * right now it is being reported as just the
21835 			 * Solaris fdisk partition, just like for
21836 			 * DKIOCGGEOM. We need to change that to be
21837 			 * correct for the entire disk now.
21838 			 */
21839 			bcopy(&un->un_g, dkgp, sizeof (*dkgp));
21840 			dkgp->dkg_acyl = 0;
21841 			dkgp->dkg_ncyl = un->un_blockcount /
21842 			    (dkgp->dkg_nhead * dkgp->dkg_nsect);
21843 		} else {
21844 			bzero(dkgp, sizeof (struct dk_geom));
21845 			/*
21846 			 * This disk does not have a Solaris VTOC
21847 			 * so we must present a physical geometry
21848 			 * that will remain consistent regardless
21849 			 * of how the disk is used. This will ensure
21850 			 * that the geometry does not change regardless
21851 			 * of the fdisk partition type (ie. EFI, FAT32,
21852 			 * Solaris, etc).
21853 			 */
21854 			if (ISCD(un)) {
21855 				dkgp->dkg_nhead = un->un_pgeom.g_nhead;
21856 				dkgp->dkg_nsect = un->un_pgeom.g_nsect;
21857 				dkgp->dkg_ncyl = un->un_pgeom.g_ncyl;
21858 				dkgp->dkg_acyl = un->un_pgeom.g_acyl;
21859 			} else {
21860 				/*
21861 				 * Invalid un_blockcount can generate invalid
21862 				 * dk_geom and may result in division by zero
21863 				 * system failure. Should make sure blockcount
21864 				 * is valid before using it here.
21865 				 */
21866 				if (un->un_f_blockcount_is_valid == FALSE) {
21867 					mutex_exit(SD_MUTEX(un));
21868 					err = EIO;
21869 
21870 					break;
21871 				}
21872 
21873 				/*
21874 				 * Refer to comments related to off-by-1 at the
21875 				 * header of this file
21876 				 */
21877 				if (!un->un_f_capacity_adjusted &&
21878 					!un->un_f_has_removable_media &&
21879 				    !un->un_f_is_hotpluggable &&
21880 					(un->un_tgt_blocksize ==
21881 					un->un_sys_blocksize))
21882 					capacity = un->un_blockcount - 1;
21883 				else
21884 					capacity = un->un_blockcount;
21885 
21886 				sd_convert_geometry(capacity, dkgp);
21887 				dkgp->dkg_acyl = 0;
21888 				dkgp->dkg_ncyl = capacity /
21889 				    (dkgp->dkg_nhead * dkgp->dkg_nsect);
21890 			}
21891 		}
21892 		dkgp->dkg_pcyl = dkgp->dkg_ncyl + dkgp->dkg_acyl;
21893 
21894 		if (ddi_copyout(dkgp, (void *)arg,
21895 		    sizeof (struct dk_geom), flag)) {
21896 			mutex_exit(SD_MUTEX(un));
21897 			err = EFAULT;
21898 		} else {
21899 			mutex_exit(SD_MUTEX(un));
21900 			err = 0;
21901 		}
21902 #else
21903 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCG_PHYGEOM\n");
21904 		err = ENOTTY;
21905 #endif
21906 		break;
21907 	}
21908 
21909 	case DKIOCG_VIRTGEOM: {
21910 		/* Return the driver's notion of the media's logical geometry */
21911 #if defined(__i386) || defined(__amd64)
21912 		struct dk_geom	disk_geom;
21913 		struct dk_geom	*dkgp = &disk_geom;
21914 
21915 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCG_VIRTGEOM\n");
21916 		mutex_enter(SD_MUTEX(un));
21917 		/*
21918 		 * If there is no HBA geometry available, or
21919 		 * if the HBA returned us something that doesn't
21920 		 * really fit into an Int 13/function 8 geometry
21921 		 * result, just fail the ioctl.  See PSARC 1998/313.
21922 		 */
21923 		if (un->un_lgeom.g_nhead == 0 ||
21924 		    un->un_lgeom.g_nsect == 0 ||
21925 		    un->un_lgeom.g_ncyl > 1024) {
21926 			mutex_exit(SD_MUTEX(un));
21927 			err = EINVAL;
21928 		} else {
21929 			dkgp->dkg_ncyl	= un->un_lgeom.g_ncyl;
21930 			dkgp->dkg_acyl	= un->un_lgeom.g_acyl;
21931 			dkgp->dkg_pcyl	= dkgp->dkg_ncyl + dkgp->dkg_acyl;
21932 			dkgp->dkg_nhead	= un->un_lgeom.g_nhead;
21933 			dkgp->dkg_nsect	= un->un_lgeom.g_nsect;
21934 
21935 			if (ddi_copyout(dkgp, (void *)arg,
21936 			    sizeof (struct dk_geom), flag)) {
21937 				mutex_exit(SD_MUTEX(un));
21938 				err = EFAULT;
21939 			} else {
21940 				mutex_exit(SD_MUTEX(un));
21941 				err = 0;
21942 			}
21943 		}
21944 #else
21945 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCG_VIRTGEOM\n");
21946 		err = ENOTTY;
21947 #endif
21948 		break;
21949 	}
21950 #ifdef SDDEBUG
21951 /* RESET/ABORTS testing ioctls */
21952 	case DKIOCRESET: {
21953 		int	reset_level;
21954 
21955 		if (ddi_copyin((void *)arg, &reset_level, sizeof (int), flag)) {
21956 			err = EFAULT;
21957 		} else {
21958 			SD_INFO(SD_LOG_IOCTL, un, "sdioctl: DKIOCRESET: "
21959 			    "reset_level = 0x%lx\n", reset_level);
21960 			if (scsi_reset(SD_ADDRESS(un), reset_level)) {
21961 				err = 0;
21962 			} else {
21963 				err = EIO;
21964 			}
21965 		}
21966 		break;
21967 	}
21968 
21969 	case DKIOCABORT:
21970 		SD_INFO(SD_LOG_IOCTL, un, "sdioctl: DKIOCABORT:\n");
21971 		if (scsi_abort(SD_ADDRESS(un), NULL)) {
21972 			err = 0;
21973 		} else {
21974 			err = EIO;
21975 		}
21976 		break;
21977 #endif
21978 
21979 #ifdef SD_FAULT_INJECTION
21980 /* SDIOC FaultInjection testing ioctls */
21981 	case SDIOCSTART:
21982 	case SDIOCSTOP:
21983 	case SDIOCINSERTPKT:
21984 	case SDIOCINSERTXB:
21985 	case SDIOCINSERTUN:
21986 	case SDIOCINSERTARQ:
21987 	case SDIOCPUSH:
21988 	case SDIOCRETRIEVE:
21989 	case SDIOCRUN:
21990 		SD_INFO(SD_LOG_SDTEST, un, "sdioctl:"
21991 		    "SDIOC detected cmd:0x%X:\n", cmd);
21992 		/* call error generator */
21993 		sd_faultinjection_ioctl(cmd, arg, un);
21994 		err = 0;
21995 		break;
21996 
21997 #endif /* SD_FAULT_INJECTION */
21998 
21999 	case DKIOCFLUSHWRITECACHE:
22000 		{
22001 			struct dk_callback *dkc = (struct dk_callback *)arg;
22002 
22003 			mutex_enter(SD_MUTEX(un));
22004 			if (!un->un_f_sync_cache_supported ||
22005 			    !un->un_f_write_cache_enabled) {
22006 				err = un->un_f_sync_cache_supported ?
22007 					0 : ENOTSUP;
22008 				mutex_exit(SD_MUTEX(un));
22009 				if ((flag & FKIOCTL) && dkc != NULL &&
22010 				    dkc->dkc_callback != NULL) {
22011 					(*dkc->dkc_callback)(dkc->dkc_cookie,
22012 					    err);
22013 					/*
22014 					 * Did callback and reported error.
22015 					 * Since we did a callback, ioctl
22016 					 * should return 0.
22017 					 */
22018 					err = 0;
22019 				}
22020 				break;
22021 			}
22022 			mutex_exit(SD_MUTEX(un));
22023 
22024 			if ((flag & FKIOCTL) && dkc != NULL &&
22025 			    dkc->dkc_callback != NULL) {
22026 				/* async SYNC CACHE request */
22027 				err = sd_send_scsi_SYNCHRONIZE_CACHE(un, dkc);
22028 			} else {
22029 				/* synchronous SYNC CACHE request */
22030 				err = sd_send_scsi_SYNCHRONIZE_CACHE(un, NULL);
22031 			}
22032 		}
22033 		break;
22034 
22035 	case DKIOCGETWCE: {
22036 
22037 		int wce;
22038 
22039 		if ((err = sd_get_write_cache_enabled(un, &wce)) != 0) {
22040 			break;
22041 		}
22042 
22043 		if (ddi_copyout(&wce, (void *)arg, sizeof (wce), flag)) {
22044 			err = EFAULT;
22045 		}
22046 		break;
22047 	}
22048 
22049 	case DKIOCSETWCE: {
22050 
22051 		int wce, sync_supported;
22052 
22053 		if (ddi_copyin((void *)arg, &wce, sizeof (wce), flag)) {
22054 			err = EFAULT;
22055 			break;
22056 		}
22057 
22058 		/*
22059 		 * Synchronize multiple threads trying to enable
22060 		 * or disable the cache via the un_f_wcc_cv
22061 		 * condition variable.
22062 		 */
22063 		mutex_enter(SD_MUTEX(un));
22064 
22065 		/*
22066 		 * Don't allow the cache to be enabled if the
22067 		 * config file has it disabled.
22068 		 */
22069 		if (un->un_f_opt_disable_cache && wce) {
22070 			mutex_exit(SD_MUTEX(un));
22071 			err = EINVAL;
22072 			break;
22073 		}
22074 
22075 		/*
22076 		 * Wait for write cache change in progress
22077 		 * bit to be clear before proceeding.
22078 		 */
22079 		while (un->un_f_wcc_inprog)
22080 			cv_wait(&un->un_wcc_cv, SD_MUTEX(un));
22081 
22082 		un->un_f_wcc_inprog = 1;
22083 
22084 		if (un->un_f_write_cache_enabled && wce == 0) {
22085 			/*
22086 			 * Disable the write cache.  Don't clear
22087 			 * un_f_write_cache_enabled until after
22088 			 * the mode select and flush are complete.
22089 			 */
22090 			sync_supported = un->un_f_sync_cache_supported;
22091 			mutex_exit(SD_MUTEX(un));
22092 			if ((err = sd_cache_control(un, SD_CACHE_NOCHANGE,
22093 			    SD_CACHE_DISABLE)) == 0 && sync_supported) {
22094 				err = sd_send_scsi_SYNCHRONIZE_CACHE(un, NULL);
22095 			}
22096 
22097 			mutex_enter(SD_MUTEX(un));
22098 			if (err == 0) {
22099 				un->un_f_write_cache_enabled = 0;
22100 			}
22101 
22102 		} else if (!un->un_f_write_cache_enabled && wce != 0) {
22103 			/*
22104 			 * Set un_f_write_cache_enabled first, so there is
22105 			 * no window where the cache is enabled, but the
22106 			 * bit says it isn't.
22107 			 */
22108 			un->un_f_write_cache_enabled = 1;
22109 			mutex_exit(SD_MUTEX(un));
22110 
22111 			err = sd_cache_control(un, SD_CACHE_NOCHANGE,
22112 				SD_CACHE_ENABLE);
22113 
22114 			mutex_enter(SD_MUTEX(un));
22115 
22116 			if (err) {
22117 				un->un_f_write_cache_enabled = 0;
22118 			}
22119 		}
22120 
22121 		un->un_f_wcc_inprog = 0;
22122 		cv_broadcast(&un->un_wcc_cv);
22123 		mutex_exit(SD_MUTEX(un));
22124 		break;
22125 	}
22126 
22127 	default:
22128 		err = ENOTTY;
22129 		break;
22130 	}
22131 	mutex_enter(SD_MUTEX(un));
22132 	un->un_ncmds_in_driver--;
22133 	ASSERT(un->un_ncmds_in_driver >= 0);
22134 	mutex_exit(SD_MUTEX(un));
22135 
22136 	SD_TRACE(SD_LOG_IOCTL, un, "sdioctl: exit: %d\n", err);
22137 	return (err);
22138 }
22139 
22140 
22141 /*
22142  *    Function: sd_uscsi_ioctl
22143  *
22144  * Description: This routine is the driver entry point for handling USCSI ioctl
22145  *		requests (USCSICMD).
22146  *
22147  *   Arguments: dev	- the device number
22148  *		arg	- user provided scsi command
22149  *		flag	- this argument is a pass through to ddi_copyxxx()
22150  *			  directly from the mode argument of ioctl().
22151  *
22152  * Return Code: code returned by sd_send_scsi_cmd
22153  *		ENXIO
22154  *		EFAULT
22155  *		EAGAIN
22156  */
22157 
22158 static int
22159 sd_uscsi_ioctl(dev_t dev, caddr_t arg, int flag)
22160 {
22161 #ifdef _MULTI_DATAMODEL
22162 	/*
22163 	 * For use when a 32 bit app makes a call into a
22164 	 * 64 bit ioctl
22165 	 */
22166 	struct uscsi_cmd32	uscsi_cmd_32_for_64;
22167 	struct uscsi_cmd32	*ucmd32 = &uscsi_cmd_32_for_64;
22168 	model_t			model;
22169 #endif /* _MULTI_DATAMODEL */
22170 	struct uscsi_cmd	*scmd = NULL;
22171 	struct sd_lun		*un = NULL;
22172 	enum uio_seg		uioseg;
22173 	char			cdb[CDB_GROUP0];
22174 	int			rval = 0;
22175 
22176 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
22177 		return (ENXIO);
22178 	}
22179 
22180 	SD_TRACE(SD_LOG_IOCTL, un, "sd_uscsi_ioctl: entry: un:0x%p\n", un);
22181 
22182 	scmd = (struct uscsi_cmd *)
22183 	    kmem_zalloc(sizeof (struct uscsi_cmd), KM_SLEEP);
22184 
22185 #ifdef _MULTI_DATAMODEL
22186 	switch (model = ddi_model_convert_from(flag & FMODELS)) {
22187 	case DDI_MODEL_ILP32:
22188 	{
22189 		if (ddi_copyin((void *)arg, ucmd32, sizeof (*ucmd32), flag)) {
22190 			rval = EFAULT;
22191 			goto done;
22192 		}
22193 		/*
22194 		 * Convert the ILP32 uscsi data from the
22195 		 * application to LP64 for internal use.
22196 		 */
22197 		uscsi_cmd32touscsi_cmd(ucmd32, scmd);
22198 		break;
22199 	}
22200 	case DDI_MODEL_NONE:
22201 		if (ddi_copyin((void *)arg, scmd, sizeof (*scmd), flag)) {
22202 			rval = EFAULT;
22203 			goto done;
22204 		}
22205 		break;
22206 	}
22207 #else /* ! _MULTI_DATAMODEL */
22208 	if (ddi_copyin((void *)arg, scmd, sizeof (*scmd), flag)) {
22209 		rval = EFAULT;
22210 		goto done;
22211 	}
22212 #endif /* _MULTI_DATAMODEL */
22213 
22214 	scmd->uscsi_flags &= ~USCSI_NOINTR;
22215 	uioseg = (flag & FKIOCTL) ? UIO_SYSSPACE : UIO_USERSPACE;
22216 	if (un->un_f_format_in_progress == TRUE) {
22217 		rval = EAGAIN;
22218 		goto done;
22219 	}
22220 
22221 	/*
22222 	 * Gotta do the ddi_copyin() here on the uscsi_cdb so that
22223 	 * we will have a valid cdb[0] to test.
22224 	 */
22225 	if ((ddi_copyin(scmd->uscsi_cdb, cdb, CDB_GROUP0, flag) == 0) &&
22226 	    (cdb[0] == SCMD_FORMAT)) {
22227 		SD_TRACE(SD_LOG_IOCTL, un,
22228 		    "sd_uscsi_ioctl: scmd->uscsi_cdb 0x%x\n", cdb[0]);
22229 		mutex_enter(SD_MUTEX(un));
22230 		un->un_f_format_in_progress = TRUE;
22231 		mutex_exit(SD_MUTEX(un));
22232 		rval = sd_send_scsi_cmd(dev, scmd, uioseg, uioseg, uioseg,
22233 		    SD_PATH_STANDARD);
22234 		mutex_enter(SD_MUTEX(un));
22235 		un->un_f_format_in_progress = FALSE;
22236 		mutex_exit(SD_MUTEX(un));
22237 	} else {
22238 		SD_TRACE(SD_LOG_IOCTL, un,
22239 		    "sd_uscsi_ioctl: scmd->uscsi_cdb 0x%x\n", cdb[0]);
22240 		/*
22241 		 * It's OK to fall into here even if the ddi_copyin()
22242 		 * on the uscsi_cdb above fails, because sd_send_scsi_cmd()
22243 		 * does this same copyin and will return the EFAULT
22244 		 * if it fails.
22245 		 */
22246 		rval = sd_send_scsi_cmd(dev, scmd, uioseg, uioseg, uioseg,
22247 		    SD_PATH_STANDARD);
22248 	}
22249 #ifdef _MULTI_DATAMODEL
22250 	switch (model) {
22251 	case DDI_MODEL_ILP32:
22252 		/*
22253 		 * Convert back to ILP32 before copyout to the
22254 		 * application
22255 		 */
22256 		uscsi_cmdtouscsi_cmd32(scmd, ucmd32);
22257 		if (ddi_copyout(ucmd32, (void *)arg, sizeof (*ucmd32), flag)) {
22258 			if (rval != 0) {
22259 				rval = EFAULT;
22260 			}
22261 		}
22262 		break;
22263 	case DDI_MODEL_NONE:
22264 		if (ddi_copyout(scmd, (void *)arg, sizeof (*scmd), flag)) {
22265 			if (rval != 0) {
22266 				rval = EFAULT;
22267 			}
22268 		}
22269 		break;
22270 	}
22271 #else /* ! _MULTI_DATAMODE */
22272 	if (ddi_copyout(scmd, (void *)arg, sizeof (*scmd), flag)) {
22273 		if (rval != 0) {
22274 			rval = EFAULT;
22275 		}
22276 	}
22277 #endif /* _MULTI_DATAMODE */
22278 done:
22279 	kmem_free(scmd, sizeof (struct uscsi_cmd));
22280 
22281 	SD_TRACE(SD_LOG_IOCTL, un, "sd_uscsi_ioctl: exit: un:0x%p\n", un);
22282 
22283 	return (rval);
22284 }
22285 
22286 
22287 /*
22288  *    Function: sd_dkio_ctrl_info
22289  *
22290  * Description: This routine is the driver entry point for handling controller
22291  *		information ioctl requests (DKIOCINFO).
22292  *
22293  *   Arguments: dev  - the device number
22294  *		arg  - pointer to user provided dk_cinfo structure
22295  *		       specifying the controller type and attributes.
22296  *		flag - this argument is a pass through to ddi_copyxxx()
22297  *		       directly from the mode argument of ioctl().
22298  *
22299  * Return Code: 0
22300  *		EFAULT
22301  *		ENXIO
22302  */
22303 
22304 static int
22305 sd_dkio_ctrl_info(dev_t dev, caddr_t arg, int flag)
22306 {
22307 	struct sd_lun	*un = NULL;
22308 	struct dk_cinfo	*info;
22309 	dev_info_t	*pdip;
22310 	int		lun, tgt;
22311 
22312 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
22313 		return (ENXIO);
22314 	}
22315 
22316 	info = (struct dk_cinfo *)
22317 		kmem_zalloc(sizeof (struct dk_cinfo), KM_SLEEP);
22318 
22319 	switch (un->un_ctype) {
22320 	case CTYPE_CDROM:
22321 		info->dki_ctype = DKC_CDROM;
22322 		break;
22323 	default:
22324 		info->dki_ctype = DKC_SCSI_CCS;
22325 		break;
22326 	}
22327 	pdip = ddi_get_parent(SD_DEVINFO(un));
22328 	info->dki_cnum = ddi_get_instance(pdip);
22329 	if (strlen(ddi_get_name(pdip)) < DK_DEVLEN) {
22330 		(void) strcpy(info->dki_cname, ddi_get_name(pdip));
22331 	} else {
22332 		(void) strncpy(info->dki_cname, ddi_node_name(pdip),
22333 		    DK_DEVLEN - 1);
22334 	}
22335 
22336 	lun = ddi_prop_get_int(DDI_DEV_T_ANY, SD_DEVINFO(un),
22337 	    DDI_PROP_DONTPASS, SCSI_ADDR_PROP_LUN, 0);
22338 	tgt = ddi_prop_get_int(DDI_DEV_T_ANY, SD_DEVINFO(un),
22339 	    DDI_PROP_DONTPASS, SCSI_ADDR_PROP_TARGET, 0);
22340 
22341 	/* Unit Information */
22342 	info->dki_unit = ddi_get_instance(SD_DEVINFO(un));
22343 	info->dki_slave = ((tgt << 3) | lun);
22344 	(void) strncpy(info->dki_dname, ddi_driver_name(SD_DEVINFO(un)),
22345 	    DK_DEVLEN - 1);
22346 	info->dki_flags = DKI_FMTVOL;
22347 	info->dki_partition = SDPART(dev);
22348 
22349 	/* Max Transfer size of this device in blocks */
22350 	info->dki_maxtransfer = un->un_max_xfer_size / un->un_sys_blocksize;
22351 	info->dki_addr = 0;
22352 	info->dki_space = 0;
22353 	info->dki_prio = 0;
22354 	info->dki_vec = 0;
22355 
22356 	if (ddi_copyout(info, arg, sizeof (struct dk_cinfo), flag) != 0) {
22357 		kmem_free(info, sizeof (struct dk_cinfo));
22358 		return (EFAULT);
22359 	} else {
22360 		kmem_free(info, sizeof (struct dk_cinfo));
22361 		return (0);
22362 	}
22363 }
22364 
22365 
22366 /*
22367  *    Function: sd_get_media_info
22368  *
22369  * Description: This routine is the driver entry point for handling ioctl
22370  *		requests for the media type or command set profile used by the
22371  *		drive to operate on the media (DKIOCGMEDIAINFO).
22372  *
22373  *   Arguments: dev	- the device number
22374  *		arg	- pointer to user provided dk_minfo structure
22375  *			  specifying the media type, logical block size and
22376  *			  drive capacity.
22377  *		flag	- this argument is a pass through to ddi_copyxxx()
22378  *			  directly from the mode argument of ioctl().
22379  *
22380  * Return Code: 0
22381  *		EACCESS
22382  *		EFAULT
22383  *		ENXIO
22384  *		EIO
22385  */
22386 
22387 static int
22388 sd_get_media_info(dev_t dev, caddr_t arg, int flag)
22389 {
22390 	struct sd_lun		*un = NULL;
22391 	struct uscsi_cmd	com;
22392 	struct scsi_inquiry	*sinq;
22393 	struct dk_minfo		media_info;
22394 	u_longlong_t		media_capacity;
22395 	uint64_t		capacity;
22396 	uint_t			lbasize;
22397 	uchar_t			*out_data;
22398 	uchar_t			*rqbuf;
22399 	int			rval = 0;
22400 	int			rtn;
22401 
22402 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
22403 	    (un->un_state == SD_STATE_OFFLINE)) {
22404 		return (ENXIO);
22405 	}
22406 
22407 	SD_TRACE(SD_LOG_IOCTL_DKIO, un, "sd_get_media_info: entry\n");
22408 
22409 	out_data = kmem_zalloc(SD_PROFILE_HEADER_LEN, KM_SLEEP);
22410 	rqbuf = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
22411 
22412 	/* Issue a TUR to determine if the drive is ready with media present */
22413 	rval = sd_send_scsi_TEST_UNIT_READY(un, SD_CHECK_FOR_MEDIA);
22414 	if (rval == ENXIO) {
22415 		goto done;
22416 	}
22417 
22418 	/* Now get configuration data */
22419 	if (ISCD(un)) {
22420 		media_info.dki_media_type = DK_CDROM;
22421 
22422 		/* Allow SCMD_GET_CONFIGURATION to MMC devices only */
22423 		if (un->un_f_mmc_cap == TRUE) {
22424 			rtn = sd_send_scsi_GET_CONFIGURATION(un, &com, rqbuf,
22425 				SENSE_LENGTH, out_data, SD_PROFILE_HEADER_LEN);
22426 
22427 			if (rtn) {
22428 				/*
22429 				 * Failed for other than an illegal request
22430 				 * or command not supported
22431 				 */
22432 				if ((com.uscsi_status == STATUS_CHECK) &&
22433 				    (com.uscsi_rqstatus == STATUS_GOOD)) {
22434 					if ((rqbuf[2] != KEY_ILLEGAL_REQUEST) ||
22435 					    (rqbuf[12] != 0x20)) {
22436 						rval = EIO;
22437 						goto done;
22438 					}
22439 				}
22440 			} else {
22441 				/*
22442 				 * The GET CONFIGURATION command succeeded
22443 				 * so set the media type according to the
22444 				 * returned data
22445 				 */
22446 				media_info.dki_media_type = out_data[6];
22447 				media_info.dki_media_type <<= 8;
22448 				media_info.dki_media_type |= out_data[7];
22449 			}
22450 		}
22451 	} else {
22452 		/*
22453 		 * The profile list is not available, so we attempt to identify
22454 		 * the media type based on the inquiry data
22455 		 */
22456 		sinq = un->un_sd->sd_inq;
22457 		if (sinq->inq_qual == 0) {
22458 			/* This is a direct access device */
22459 			media_info.dki_media_type = DK_FIXED_DISK;
22460 
22461 			if ((bcmp(sinq->inq_vid, "IOMEGA", 6) == 0) ||
22462 			    (bcmp(sinq->inq_vid, "iomega", 6) == 0)) {
22463 				if ((bcmp(sinq->inq_pid, "ZIP", 3) == 0)) {
22464 					media_info.dki_media_type = DK_ZIP;
22465 				} else if (
22466 				    (bcmp(sinq->inq_pid, "jaz", 3) == 0)) {
22467 					media_info.dki_media_type = DK_JAZ;
22468 				}
22469 			}
22470 		} else {
22471 			/* Not a CD or direct access so return unknown media */
22472 			media_info.dki_media_type = DK_UNKNOWN;
22473 		}
22474 	}
22475 
22476 	/* Now read the capacity so we can provide the lbasize and capacity */
22477 	switch (sd_send_scsi_READ_CAPACITY(un, &capacity, &lbasize,
22478 	    SD_PATH_DIRECT)) {
22479 	case 0:
22480 		break;
22481 	case EACCES:
22482 		rval = EACCES;
22483 		goto done;
22484 	default:
22485 		rval = EIO;
22486 		goto done;
22487 	}
22488 
22489 	media_info.dki_lbsize = lbasize;
22490 	media_capacity = capacity;
22491 
22492 	/*
22493 	 * sd_send_scsi_READ_CAPACITY() reports capacity in
22494 	 * un->un_sys_blocksize chunks. So we need to convert it into
22495 	 * cap.lbasize chunks.
22496 	 */
22497 	media_capacity *= un->un_sys_blocksize;
22498 	media_capacity /= lbasize;
22499 	media_info.dki_capacity = media_capacity;
22500 
22501 	if (ddi_copyout(&media_info, arg, sizeof (struct dk_minfo), flag)) {
22502 		rval = EFAULT;
22503 		/* Put goto. Anybody might add some code below in future */
22504 		goto done;
22505 	}
22506 done:
22507 	kmem_free(out_data, SD_PROFILE_HEADER_LEN);
22508 	kmem_free(rqbuf, SENSE_LENGTH);
22509 	return (rval);
22510 }
22511 
22512 
22513 /*
22514  *    Function: sd_dkio_get_geometry
22515  *
22516  * Description: This routine is the driver entry point for handling user
22517  *		requests to get the device geometry (DKIOCGGEOM).
22518  *
22519  *   Arguments: dev  - the device number
22520  *		arg  - pointer to user provided dk_geom structure specifying
22521  *			the controller's notion of the current geometry.
22522  *		flag - this argument is a pass through to ddi_copyxxx()
22523  *		       directly from the mode argument of ioctl().
22524  *		geom_validated - flag indicating if the device geometry has been
22525  *				 previously validated in the sdioctl routine.
22526  *
22527  * Return Code: 0
22528  *		EFAULT
22529  *		ENXIO
22530  *		EIO
22531  */
22532 
22533 static int
22534 sd_dkio_get_geometry(dev_t dev, caddr_t arg, int flag, int geom_validated)
22535 {
22536 	struct sd_lun	*un = NULL;
22537 	struct dk_geom	*tmp_geom = NULL;
22538 	int		rval = 0;
22539 
22540 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
22541 		return (ENXIO);
22542 	}
22543 
22544 	if (geom_validated == FALSE) {
22545 		/*
22546 		 * sd_validate_geometry does not spin a disk up
22547 		 * if it was spun down. We need to make sure it
22548 		 * is ready.
22549 		 */
22550 		if ((rval = sd_send_scsi_TEST_UNIT_READY(un, 0)) != 0) {
22551 			return (rval);
22552 		}
22553 		mutex_enter(SD_MUTEX(un));
22554 		rval = sd_validate_geometry(un, SD_PATH_DIRECT);
22555 		mutex_exit(SD_MUTEX(un));
22556 	}
22557 	if (rval)
22558 		return (rval);
22559 
22560 	/*
22561 	 * It is possible that un_solaris_size is 0(uninitialized)
22562 	 * after sd_unit_attach. Reservation conflict may cause the
22563 	 * above situation. Thus, the zero check of un_solaris_size
22564 	 * should occur after the sd_validate_geometry() call.
22565 	 */
22566 #if defined(__i386) || defined(__amd64)
22567 	if (un->un_solaris_size == 0) {
22568 		return (EIO);
22569 	}
22570 #endif
22571 
22572 	/*
22573 	 * Make a local copy of the soft state geometry to avoid some potential
22574 	 * race conditions associated with holding the mutex and updating the
22575 	 * write_reinstruct value
22576 	 */
22577 	tmp_geom = kmem_zalloc(sizeof (struct dk_geom), KM_SLEEP);
22578 	mutex_enter(SD_MUTEX(un));
22579 	bcopy(&un->un_g, tmp_geom, sizeof (struct dk_geom));
22580 	mutex_exit(SD_MUTEX(un));
22581 
22582 	if (tmp_geom->dkg_write_reinstruct == 0) {
22583 		tmp_geom->dkg_write_reinstruct =
22584 		    (int)((int)(tmp_geom->dkg_nsect * tmp_geom->dkg_rpm *
22585 		    sd_rot_delay) / (int)60000);
22586 	}
22587 
22588 	rval = ddi_copyout(tmp_geom, (void *)arg, sizeof (struct dk_geom),
22589 	    flag);
22590 	if (rval != 0) {
22591 		rval = EFAULT;
22592 	}
22593 
22594 	kmem_free(tmp_geom, sizeof (struct dk_geom));
22595 	return (rval);
22596 
22597 }
22598 
22599 
22600 /*
22601  *    Function: sd_dkio_set_geometry
22602  *
22603  * Description: This routine is the driver entry point for handling user
22604  *		requests to set the device geometry (DKIOCSGEOM). The actual
22605  *		device geometry is not updated, just the driver "notion" of it.
22606  *
22607  *   Arguments: dev  - the device number
22608  *		arg  - pointer to user provided dk_geom structure used to set
22609  *			the controller's notion of the current geometry.
22610  *		flag - this argument is a pass through to ddi_copyxxx()
22611  *		       directly from the mode argument of ioctl().
22612  *
22613  * Return Code: 0
22614  *		EFAULT
22615  *		ENXIO
22616  *		EIO
22617  */
22618 
22619 static int
22620 sd_dkio_set_geometry(dev_t dev, caddr_t arg, int flag)
22621 {
22622 	struct sd_lun	*un = NULL;
22623 	struct dk_geom	*tmp_geom;
22624 	struct dk_map	*lp;
22625 	int		rval = 0;
22626 	int		i;
22627 
22628 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
22629 		return (ENXIO);
22630 	}
22631 
22632 	/*
22633 	 * Make sure there is no reservation conflict on the lun.
22634 	 */
22635 	if (sd_send_scsi_TEST_UNIT_READY(un, 0) == EACCES) {
22636 		return (EACCES);
22637 	}
22638 
22639 #if defined(__i386) || defined(__amd64)
22640 	if (un->un_solaris_size == 0) {
22641 		return (EIO);
22642 	}
22643 #endif
22644 
22645 	/*
22646 	 * We need to copy the user specified geometry into local
22647 	 * storage and then update the softstate. We don't want to hold
22648 	 * the mutex and copyin directly from the user to the soft state
22649 	 */
22650 	tmp_geom = (struct dk_geom *)
22651 	    kmem_zalloc(sizeof (struct dk_geom), KM_SLEEP);
22652 	rval = ddi_copyin(arg, tmp_geom, sizeof (struct dk_geom), flag);
22653 	if (rval != 0) {
22654 		kmem_free(tmp_geom, sizeof (struct dk_geom));
22655 		return (EFAULT);
22656 	}
22657 
22658 	mutex_enter(SD_MUTEX(un));
22659 	bcopy(tmp_geom, &un->un_g, sizeof (struct dk_geom));
22660 	for (i = 0; i < NDKMAP; i++) {
22661 		lp  = &un->un_map[i];
22662 		un->un_offset[i] =
22663 		    un->un_g.dkg_nhead * un->un_g.dkg_nsect * lp->dkl_cylno;
22664 #if defined(__i386) || defined(__amd64)
22665 		un->un_offset[i] += un->un_solaris_offset;
22666 #endif
22667 	}
22668 	un->un_f_geometry_is_valid = FALSE;
22669 	mutex_exit(SD_MUTEX(un));
22670 	kmem_free(tmp_geom, sizeof (struct dk_geom));
22671 
22672 	return (rval);
22673 }
22674 
22675 
22676 /*
22677  *    Function: sd_dkio_get_partition
22678  *
22679  * Description: This routine is the driver entry point for handling user
22680  *		requests to get the partition table (DKIOCGAPART).
22681  *
22682  *   Arguments: dev  - the device number
22683  *		arg  - pointer to user provided dk_allmap structure specifying
22684  *			the controller's notion of the current partition table.
22685  *		flag - this argument is a pass through to ddi_copyxxx()
22686  *		       directly from the mode argument of ioctl().
22687  *		geom_validated - flag indicating if the device geometry has been
22688  *				 previously validated in the sdioctl routine.
22689  *
22690  * Return Code: 0
22691  *		EFAULT
22692  *		ENXIO
22693  *		EIO
22694  */
22695 
22696 static int
22697 sd_dkio_get_partition(dev_t dev, caddr_t arg, int flag, int geom_validated)
22698 {
22699 	struct sd_lun	*un = NULL;
22700 	int		rval = 0;
22701 	int		size;
22702 
22703 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
22704 		return (ENXIO);
22705 	}
22706 
22707 	/*
22708 	 * Make sure the geometry is valid before getting the partition
22709 	 * information.
22710 	 */
22711 	mutex_enter(SD_MUTEX(un));
22712 	if (geom_validated == FALSE) {
22713 		/*
22714 		 * sd_validate_geometry does not spin a disk up
22715 		 * if it was spun down. We need to make sure it
22716 		 * is ready before validating the geometry.
22717 		 */
22718 		mutex_exit(SD_MUTEX(un));
22719 		if ((rval = sd_send_scsi_TEST_UNIT_READY(un, 0)) != 0) {
22720 			return (rval);
22721 		}
22722 		mutex_enter(SD_MUTEX(un));
22723 
22724 		if ((rval = sd_validate_geometry(un, SD_PATH_DIRECT)) != 0) {
22725 			mutex_exit(SD_MUTEX(un));
22726 			return (rval);
22727 		}
22728 	}
22729 	mutex_exit(SD_MUTEX(un));
22730 
22731 	/*
22732 	 * It is possible that un_solaris_size is 0(uninitialized)
22733 	 * after sd_unit_attach. Reservation conflict may cause the
22734 	 * above situation. Thus, the zero check of un_solaris_size
22735 	 * should occur after the sd_validate_geometry() call.
22736 	 */
22737 #if defined(__i386) || defined(__amd64)
22738 	if (un->un_solaris_size == 0) {
22739 		return (EIO);
22740 	}
22741 #endif
22742 
22743 #ifdef _MULTI_DATAMODEL
22744 	switch (ddi_model_convert_from(flag & FMODELS)) {
22745 	case DDI_MODEL_ILP32: {
22746 		struct dk_map32 dk_map32[NDKMAP];
22747 		int		i;
22748 
22749 		for (i = 0; i < NDKMAP; i++) {
22750 			dk_map32[i].dkl_cylno = un->un_map[i].dkl_cylno;
22751 			dk_map32[i].dkl_nblk  = un->un_map[i].dkl_nblk;
22752 		}
22753 		size = NDKMAP * sizeof (struct dk_map32);
22754 		rval = ddi_copyout(dk_map32, (void *)arg, size, flag);
22755 		if (rval != 0) {
22756 			rval = EFAULT;
22757 		}
22758 		break;
22759 	}
22760 	case DDI_MODEL_NONE:
22761 		size = NDKMAP * sizeof (struct dk_map);
22762 		rval = ddi_copyout(un->un_map, (void *)arg, size, flag);
22763 		if (rval != 0) {
22764 			rval = EFAULT;
22765 		}
22766 		break;
22767 	}
22768 #else /* ! _MULTI_DATAMODEL */
22769 	size = NDKMAP * sizeof (struct dk_map);
22770 	rval = ddi_copyout(un->un_map, (void *)arg, size, flag);
22771 	if (rval != 0) {
22772 		rval = EFAULT;
22773 	}
22774 #endif /* _MULTI_DATAMODEL */
22775 	return (rval);
22776 }
22777 
22778 
22779 /*
22780  *    Function: sd_dkio_set_partition
22781  *
22782  * Description: This routine is the driver entry point for handling user
22783  *		requests to set the partition table (DKIOCSAPART). The actual
22784  *		device partition is not updated.
22785  *
22786  *   Arguments: dev  - the device number
22787  *		arg  - pointer to user provided dk_allmap structure used to set
22788  *			the controller's notion of the partition table.
22789  *		flag - this argument is a pass through to ddi_copyxxx()
22790  *		       directly from the mode argument of ioctl().
22791  *
22792  * Return Code: 0
22793  *		EINVAL
22794  *		EFAULT
22795  *		ENXIO
22796  *		EIO
22797  */
22798 
22799 static int
22800 sd_dkio_set_partition(dev_t dev, caddr_t arg, int flag)
22801 {
22802 	struct sd_lun	*un = NULL;
22803 	struct dk_map	dk_map[NDKMAP];
22804 	struct dk_map	*lp;
22805 	int		rval = 0;
22806 	int		size;
22807 	int		i;
22808 #if defined(_SUNOS_VTOC_16)
22809 	struct dkl_partition	*vp;
22810 #endif
22811 
22812 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
22813 		return (ENXIO);
22814 	}
22815 
22816 	/*
22817 	 * Set the map for all logical partitions.  We lock
22818 	 * the priority just to make sure an interrupt doesn't
22819 	 * come in while the map is half updated.
22820 	 */
22821 	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::un_solaris_size))
22822 	mutex_enter(SD_MUTEX(un));
22823 	if (un->un_blockcount > DK_MAX_BLOCKS) {
22824 		mutex_exit(SD_MUTEX(un));
22825 		return (ENOTSUP);
22826 	}
22827 	mutex_exit(SD_MUTEX(un));
22828 
22829 	/*
22830 	 * Make sure there is no reservation conflict on the lun.
22831 	 */
22832 	if (sd_send_scsi_TEST_UNIT_READY(un, 0) == EACCES) {
22833 		return (EACCES);
22834 	}
22835 
22836 #if defined(__i386) || defined(__amd64)
22837 	if (un->un_solaris_size == 0) {
22838 		return (EIO);
22839 	}
22840 #endif
22841 
22842 #ifdef _MULTI_DATAMODEL
22843 	switch (ddi_model_convert_from(flag & FMODELS)) {
22844 	case DDI_MODEL_ILP32: {
22845 		struct dk_map32 dk_map32[NDKMAP];
22846 
22847 		size = NDKMAP * sizeof (struct dk_map32);
22848 		rval = ddi_copyin((void *)arg, dk_map32, size, flag);
22849 		if (rval != 0) {
22850 			return (EFAULT);
22851 		}
22852 		for (i = 0; i < NDKMAP; i++) {
22853 			dk_map[i].dkl_cylno = dk_map32[i].dkl_cylno;
22854 			dk_map[i].dkl_nblk  = dk_map32[i].dkl_nblk;
22855 		}
22856 		break;
22857 	}
22858 	case DDI_MODEL_NONE:
22859 		size = NDKMAP * sizeof (struct dk_map);
22860 		rval = ddi_copyin((void *)arg, dk_map, size, flag);
22861 		if (rval != 0) {
22862 			return (EFAULT);
22863 		}
22864 		break;
22865 	}
22866 #else /* ! _MULTI_DATAMODEL */
22867 	size = NDKMAP * sizeof (struct dk_map);
22868 	rval = ddi_copyin((void *)arg, dk_map, size, flag);
22869 	if (rval != 0) {
22870 		return (EFAULT);
22871 	}
22872 #endif /* _MULTI_DATAMODEL */
22873 
22874 	mutex_enter(SD_MUTEX(un));
22875 	/* Note: The size used in this bcopy is set based upon the data model */
22876 	bcopy(dk_map, un->un_map, size);
22877 #if defined(_SUNOS_VTOC_16)
22878 	vp = (struct dkl_partition *)&(un->un_vtoc);
22879 #endif	/* defined(_SUNOS_VTOC_16) */
22880 	for (i = 0; i < NDKMAP; i++) {
22881 		lp  = &un->un_map[i];
22882 		un->un_offset[i] =
22883 		    un->un_g.dkg_nhead * un->un_g.dkg_nsect * lp->dkl_cylno;
22884 #if defined(_SUNOS_VTOC_16)
22885 		vp->p_start = un->un_offset[i];
22886 		vp->p_size = lp->dkl_nblk;
22887 		vp++;
22888 #endif	/* defined(_SUNOS_VTOC_16) */
22889 #if defined(__i386) || defined(__amd64)
22890 		un->un_offset[i] += un->un_solaris_offset;
22891 #endif
22892 	}
22893 	mutex_exit(SD_MUTEX(un));
22894 	return (rval);
22895 }
22896 
22897 
22898 /*
22899  *    Function: sd_dkio_get_vtoc
22900  *
22901  * Description: This routine is the driver entry point for handling user
22902  *		requests to get the current volume table of contents
22903  *		(DKIOCGVTOC).
22904  *
22905  *   Arguments: dev  - the device number
22906  *		arg  - pointer to user provided vtoc structure specifying
22907  *			the current vtoc.
22908  *		flag - this argument is a pass through to ddi_copyxxx()
22909  *		       directly from the mode argument of ioctl().
22910  *		geom_validated - flag indicating if the device geometry has been
22911  *				 previously validated in the sdioctl routine.
22912  *
22913  * Return Code: 0
22914  *		EFAULT
22915  *		ENXIO
22916  *		EIO
22917  */
22918 
22919 static int
22920 sd_dkio_get_vtoc(dev_t dev, caddr_t arg, int flag, int geom_validated)
22921 {
22922 	struct sd_lun	*un = NULL;
22923 #if defined(_SUNOS_VTOC_8)
22924 	struct vtoc	user_vtoc;
22925 #endif	/* defined(_SUNOS_VTOC_8) */
22926 	int		rval = 0;
22927 
22928 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
22929 		return (ENXIO);
22930 	}
22931 
22932 	mutex_enter(SD_MUTEX(un));
22933 	if (geom_validated == FALSE) {
22934 		/*
22935 		 * sd_validate_geometry does not spin a disk up
22936 		 * if it was spun down. We need to make sure it
22937 		 * is ready.
22938 		 */
22939 		mutex_exit(SD_MUTEX(un));
22940 		if ((rval = sd_send_scsi_TEST_UNIT_READY(un, 0)) != 0) {
22941 			return (rval);
22942 		}
22943 		mutex_enter(SD_MUTEX(un));
22944 		if ((rval = sd_validate_geometry(un, SD_PATH_DIRECT)) != 0) {
22945 			mutex_exit(SD_MUTEX(un));
22946 			return (rval);
22947 		}
22948 	}
22949 
22950 #if defined(_SUNOS_VTOC_8)
22951 	sd_build_user_vtoc(un, &user_vtoc);
22952 	mutex_exit(SD_MUTEX(un));
22953 
22954 #ifdef _MULTI_DATAMODEL
22955 	switch (ddi_model_convert_from(flag & FMODELS)) {
22956 	case DDI_MODEL_ILP32: {
22957 		struct vtoc32 user_vtoc32;
22958 
22959 		vtoctovtoc32(user_vtoc, user_vtoc32);
22960 		if (ddi_copyout(&user_vtoc32, (void *)arg,
22961 		    sizeof (struct vtoc32), flag)) {
22962 			return (EFAULT);
22963 		}
22964 		break;
22965 	}
22966 
22967 	case DDI_MODEL_NONE:
22968 		if (ddi_copyout(&user_vtoc, (void *)arg,
22969 		    sizeof (struct vtoc), flag)) {
22970 			return (EFAULT);
22971 		}
22972 		break;
22973 	}
22974 #else /* ! _MULTI_DATAMODEL */
22975 	if (ddi_copyout(&user_vtoc, (void *)arg, sizeof (struct vtoc), flag)) {
22976 		return (EFAULT);
22977 	}
22978 #endif /* _MULTI_DATAMODEL */
22979 
22980 #elif defined(_SUNOS_VTOC_16)
22981 	mutex_exit(SD_MUTEX(un));
22982 
22983 #ifdef _MULTI_DATAMODEL
22984 	/*
22985 	 * The un_vtoc structure is a "struct dk_vtoc"  which is always
22986 	 * 32-bit to maintain compatibility with existing on-disk
22987 	 * structures.  Thus, we need to convert the structure when copying
22988 	 * it out to a datamodel-dependent "struct vtoc" in a 64-bit
22989 	 * program.  If the target is a 32-bit program, then no conversion
22990 	 * is necessary.
22991 	 */
22992 	/* LINTED: logical expression always true: op "||" */
22993 	ASSERT(sizeof (un->un_vtoc) == sizeof (struct vtoc32));
22994 	switch (ddi_model_convert_from(flag & FMODELS)) {
22995 	case DDI_MODEL_ILP32:
22996 		if (ddi_copyout(&(un->un_vtoc), (void *)arg,
22997 		    sizeof (un->un_vtoc), flag)) {
22998 			return (EFAULT);
22999 		}
23000 		break;
23001 
23002 	case DDI_MODEL_NONE: {
23003 		struct vtoc user_vtoc;
23004 
23005 		vtoc32tovtoc(un->un_vtoc, user_vtoc);
23006 		if (ddi_copyout(&user_vtoc, (void *)arg,
23007 		    sizeof (struct vtoc), flag)) {
23008 			return (EFAULT);
23009 		}
23010 		break;
23011 	}
23012 	}
23013 #else /* ! _MULTI_DATAMODEL */
23014 	if (ddi_copyout(&(un->un_vtoc), (void *)arg, sizeof (un->un_vtoc),
23015 	    flag)) {
23016 		return (EFAULT);
23017 	}
23018 #endif /* _MULTI_DATAMODEL */
23019 #else
23020 #error "No VTOC format defined."
23021 #endif
23022 
23023 	return (rval);
23024 }
23025 
23026 static int
23027 sd_dkio_get_efi(dev_t dev, caddr_t arg, int flag)
23028 {
23029 	struct sd_lun	*un = NULL;
23030 	dk_efi_t	user_efi;
23031 	int		rval = 0;
23032 	void		*buffer;
23033 
23034 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL)
23035 		return (ENXIO);
23036 
23037 	if (ddi_copyin(arg, &user_efi, sizeof (dk_efi_t), flag))
23038 		return (EFAULT);
23039 
23040 	user_efi.dki_data = (void *)(uintptr_t)user_efi.dki_data_64;
23041 
23042 	if ((user_efi.dki_length % un->un_tgt_blocksize) ||
23043 	    (user_efi.dki_length > un->un_max_xfer_size))
23044 		return (EINVAL);
23045 
23046 	buffer = kmem_alloc(user_efi.dki_length, KM_SLEEP);
23047 	rval = sd_send_scsi_READ(un, buffer, user_efi.dki_length,
23048 	    user_efi.dki_lba, SD_PATH_DIRECT);
23049 	if (rval == 0 && ddi_copyout(buffer, user_efi.dki_data,
23050 	    user_efi.dki_length, flag) != 0)
23051 		rval = EFAULT;
23052 
23053 	kmem_free(buffer, user_efi.dki_length);
23054 	return (rval);
23055 }
23056 
23057 /*
23058  *    Function: sd_build_user_vtoc
23059  *
23060  * Description: This routine populates a pass by reference variable with the
23061  *		current volume table of contents.
23062  *
23063  *   Arguments: un - driver soft state (unit) structure
23064  *		user_vtoc - pointer to vtoc structure to be populated
23065  */
23066 
23067 static void
23068 sd_build_user_vtoc(struct sd_lun *un, struct vtoc *user_vtoc)
23069 {
23070 	struct dk_map2		*lpart;
23071 	struct dk_map		*lmap;
23072 	struct partition	*vpart;
23073 	int			nblks;
23074 	int			i;
23075 
23076 	ASSERT(mutex_owned(SD_MUTEX(un)));
23077 
23078 	/*
23079 	 * Return vtoc structure fields in the provided VTOC area, addressed
23080 	 * by *vtoc.
23081 	 */
23082 	bzero(user_vtoc, sizeof (struct vtoc));
23083 	user_vtoc->v_bootinfo[0] = un->un_vtoc.v_bootinfo[0];
23084 	user_vtoc->v_bootinfo[1] = un->un_vtoc.v_bootinfo[1];
23085 	user_vtoc->v_bootinfo[2] = un->un_vtoc.v_bootinfo[2];
23086 	user_vtoc->v_sanity	= VTOC_SANE;
23087 	user_vtoc->v_version	= un->un_vtoc.v_version;
23088 	bcopy(un->un_vtoc.v_volume, user_vtoc->v_volume, LEN_DKL_VVOL);
23089 	user_vtoc->v_sectorsz = un->un_sys_blocksize;
23090 	user_vtoc->v_nparts = un->un_vtoc.v_nparts;
23091 	bcopy(un->un_vtoc.v_reserved, user_vtoc->v_reserved,
23092 	    sizeof (un->un_vtoc.v_reserved));
23093 	/*
23094 	 * Convert partitioning information.
23095 	 *
23096 	 * Note the conversion from starting cylinder number
23097 	 * to starting sector number.
23098 	 */
23099 	lmap = un->un_map;
23100 	lpart = (struct dk_map2 *)un->un_vtoc.v_part;
23101 	vpart = user_vtoc->v_part;
23102 
23103 	nblks = un->un_g.dkg_nsect * un->un_g.dkg_nhead;
23104 
23105 	for (i = 0; i < V_NUMPAR; i++) {
23106 		vpart->p_tag	= lpart->p_tag;
23107 		vpart->p_flag	= lpart->p_flag;
23108 		vpart->p_start	= lmap->dkl_cylno * nblks;
23109 		vpart->p_size	= lmap->dkl_nblk;
23110 		lmap++;
23111 		lpart++;
23112 		vpart++;
23113 
23114 		/* (4364927) */
23115 		user_vtoc->timestamp[i] = (time_t)un->un_vtoc.v_timestamp[i];
23116 	}
23117 
23118 	bcopy(un->un_asciilabel, user_vtoc->v_asciilabel, LEN_DKL_ASCII);
23119 }
23120 
23121 static int
23122 sd_dkio_partition(dev_t dev, caddr_t arg, int flag)
23123 {
23124 	struct sd_lun		*un = NULL;
23125 	struct partition64	p64;
23126 	int			rval = 0;
23127 	uint_t			nparts;
23128 	efi_gpe_t		*partitions;
23129 	efi_gpt_t		*buffer;
23130 	diskaddr_t		gpe_lba;
23131 
23132 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
23133 		return (ENXIO);
23134 	}
23135 
23136 	if (ddi_copyin((const void *)arg, &p64,
23137 	    sizeof (struct partition64), flag)) {
23138 		return (EFAULT);
23139 	}
23140 
23141 	buffer = kmem_alloc(EFI_MIN_ARRAY_SIZE, KM_SLEEP);
23142 	rval = sd_send_scsi_READ(un, buffer, DEV_BSIZE,
23143 		1, SD_PATH_DIRECT);
23144 	if (rval != 0)
23145 		goto done_error;
23146 
23147 	sd_swap_efi_gpt(buffer);
23148 
23149 	if ((rval = sd_validate_efi(buffer)) != 0)
23150 		goto done_error;
23151 
23152 	nparts = buffer->efi_gpt_NumberOfPartitionEntries;
23153 	gpe_lba = buffer->efi_gpt_PartitionEntryLBA;
23154 	if (p64.p_partno > nparts) {
23155 		/* couldn't find it */
23156 		rval = ESRCH;
23157 		goto done_error;
23158 	}
23159 	/*
23160 	 * if we're dealing with a partition that's out of the normal
23161 	 * 16K block, adjust accordingly
23162 	 */
23163 	gpe_lba += p64.p_partno / sizeof (efi_gpe_t);
23164 	rval = sd_send_scsi_READ(un, buffer, EFI_MIN_ARRAY_SIZE,
23165 			gpe_lba, SD_PATH_DIRECT);
23166 	if (rval) {
23167 		goto done_error;
23168 	}
23169 	partitions = (efi_gpe_t *)buffer;
23170 
23171 	sd_swap_efi_gpe(nparts, partitions);
23172 
23173 	partitions += p64.p_partno;
23174 	bcopy(&partitions->efi_gpe_PartitionTypeGUID, &p64.p_type,
23175 	    sizeof (struct uuid));
23176 	p64.p_start = partitions->efi_gpe_StartingLBA;
23177 	p64.p_size = partitions->efi_gpe_EndingLBA -
23178 			p64.p_start + 1;
23179 
23180 	if (ddi_copyout(&p64, (void *)arg, sizeof (struct partition64), flag))
23181 		rval = EFAULT;
23182 
23183 done_error:
23184 	kmem_free(buffer, EFI_MIN_ARRAY_SIZE);
23185 	return (rval);
23186 }
23187 
23188 
23189 /*
23190  *    Function: sd_dkio_set_vtoc
23191  *
23192  * Description: This routine is the driver entry point for handling user
23193  *		requests to set the current volume table of contents
23194  *		(DKIOCSVTOC).
23195  *
23196  *   Arguments: dev  - the device number
23197  *		arg  - pointer to user provided vtoc structure used to set the
23198  *			current vtoc.
23199  *		flag - this argument is a pass through to ddi_copyxxx()
23200  *		       directly from the mode argument of ioctl().
23201  *
23202  * Return Code: 0
23203  *		EFAULT
23204  *		ENXIO
23205  *		EINVAL
23206  *		ENOTSUP
23207  */
23208 
23209 static int
23210 sd_dkio_set_vtoc(dev_t dev, caddr_t arg, int flag)
23211 {
23212 	struct sd_lun	*un = NULL;
23213 	struct vtoc	user_vtoc;
23214 	int		rval = 0;
23215 
23216 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
23217 		return (ENXIO);
23218 	}
23219 
23220 #if defined(__i386) || defined(__amd64)
23221 	if (un->un_tgt_blocksize != un->un_sys_blocksize) {
23222 		return (EINVAL);
23223 	}
23224 #endif
23225 
23226 #ifdef _MULTI_DATAMODEL
23227 	switch (ddi_model_convert_from(flag & FMODELS)) {
23228 	case DDI_MODEL_ILP32: {
23229 		struct vtoc32 user_vtoc32;
23230 
23231 		if (ddi_copyin((const void *)arg, &user_vtoc32,
23232 		    sizeof (struct vtoc32), flag)) {
23233 			return (EFAULT);
23234 		}
23235 		vtoc32tovtoc(user_vtoc32, user_vtoc);
23236 		break;
23237 	}
23238 
23239 	case DDI_MODEL_NONE:
23240 		if (ddi_copyin((const void *)arg, &user_vtoc,
23241 		    sizeof (struct vtoc), flag)) {
23242 			return (EFAULT);
23243 		}
23244 		break;
23245 	}
23246 #else /* ! _MULTI_DATAMODEL */
23247 	if (ddi_copyin((const void *)arg, &user_vtoc,
23248 	    sizeof (struct vtoc), flag)) {
23249 		return (EFAULT);
23250 	}
23251 #endif /* _MULTI_DATAMODEL */
23252 
23253 	mutex_enter(SD_MUTEX(un));
23254 	if (un->un_blockcount > DK_MAX_BLOCKS) {
23255 		mutex_exit(SD_MUTEX(un));
23256 		return (ENOTSUP);
23257 	}
23258 	if (un->un_g.dkg_ncyl == 0) {
23259 		mutex_exit(SD_MUTEX(un));
23260 		return (EINVAL);
23261 	}
23262 
23263 	mutex_exit(SD_MUTEX(un));
23264 	sd_clear_efi(un);
23265 	ddi_remove_minor_node(SD_DEVINFO(un), "wd");
23266 	ddi_remove_minor_node(SD_DEVINFO(un), "wd,raw");
23267 	(void) ddi_create_minor_node(SD_DEVINFO(un), "h",
23268 	    S_IFBLK, (SDUNIT(dev) << SDUNIT_SHIFT) | WD_NODE,
23269 	    un->un_node_type, NULL);
23270 	(void) ddi_create_minor_node(SD_DEVINFO(un), "h,raw",
23271 	    S_IFCHR, (SDUNIT(dev) << SDUNIT_SHIFT) | WD_NODE,
23272 	    un->un_node_type, NULL);
23273 	mutex_enter(SD_MUTEX(un));
23274 
23275 	if ((rval = sd_build_label_vtoc(un, &user_vtoc)) == 0) {
23276 		if ((rval = sd_write_label(dev)) == 0) {
23277 			if ((rval = sd_validate_geometry(un, SD_PATH_DIRECT))
23278 			    != 0) {
23279 				SD_ERROR(SD_LOG_IOCTL_DKIO, un,
23280 				    "sd_dkio_set_vtoc: "
23281 				    "Failed validate geometry\n");
23282 			}
23283 		}
23284 	}
23285 
23286 	/*
23287 	 * If sd_build_label_vtoc, or sd_write_label failed above write the
23288 	 * devid anyway, what can it hurt? Also preserve the device id by
23289 	 * writing to the disk acyl for the case where a devid has been
23290 	 * fabricated.
23291 	 */
23292 	if (un->un_f_devid_supported &&
23293 	    (un->un_f_opt_fab_devid == TRUE)) {
23294 		if (un->un_devid == NULL) {
23295 			sd_register_devid(un, SD_DEVINFO(un),
23296 			    SD_TARGET_IS_UNRESERVED);
23297 		} else {
23298 			/*
23299 			 * The device id for this disk has been
23300 			 * fabricated. Fabricated device id's are
23301 			 * managed by storing them in the last 2
23302 			 * available sectors on the drive. The device
23303 			 * id must be preserved by writing it back out
23304 			 * to this location.
23305 			 */
23306 			if (sd_write_deviceid(un) != 0) {
23307 				ddi_devid_free(un->un_devid);
23308 				un->un_devid = NULL;
23309 			}
23310 		}
23311 	}
23312 	mutex_exit(SD_MUTEX(un));
23313 	return (rval);
23314 }
23315 
23316 
23317 /*
23318  *    Function: sd_build_label_vtoc
23319  *
23320  * Description: This routine updates the driver soft state current volume table
23321  *		of contents based on a user specified vtoc.
23322  *
23323  *   Arguments: un - driver soft state (unit) structure
23324  *		user_vtoc - pointer to vtoc structure specifying vtoc to be used
23325  *			    to update the driver soft state.
23326  *
23327  * Return Code: 0
23328  *		EINVAL
23329  */
23330 
23331 static int
23332 sd_build_label_vtoc(struct sd_lun *un, struct vtoc *user_vtoc)
23333 {
23334 	struct dk_map		*lmap;
23335 	struct partition	*vpart;
23336 	int			nblks;
23337 #if defined(_SUNOS_VTOC_8)
23338 	int			ncyl;
23339 	struct dk_map2		*lpart;
23340 #endif	/* defined(_SUNOS_VTOC_8) */
23341 	int			i;
23342 
23343 	ASSERT(mutex_owned(SD_MUTEX(un)));
23344 
23345 	/* Sanity-check the vtoc */
23346 	if (user_vtoc->v_sanity != VTOC_SANE ||
23347 	    user_vtoc->v_sectorsz != un->un_sys_blocksize ||
23348 	    user_vtoc->v_nparts != V_NUMPAR) {
23349 		return (EINVAL);
23350 	}
23351 
23352 	nblks = un->un_g.dkg_nsect * un->un_g.dkg_nhead;
23353 	if (nblks == 0) {
23354 		return (EINVAL);
23355 	}
23356 
23357 #if defined(_SUNOS_VTOC_8)
23358 	vpart = user_vtoc->v_part;
23359 	for (i = 0; i < V_NUMPAR; i++) {
23360 		if ((vpart->p_start % nblks) != 0) {
23361 			return (EINVAL);
23362 		}
23363 		ncyl = vpart->p_start / nblks;
23364 		ncyl += vpart->p_size / nblks;
23365 		if ((vpart->p_size % nblks) != 0) {
23366 			ncyl++;
23367 		}
23368 		if (ncyl > (int)un->un_g.dkg_ncyl) {
23369 			return (EINVAL);
23370 		}
23371 		vpart++;
23372 	}
23373 #endif	/* defined(_SUNOS_VTOC_8) */
23374 
23375 	/* Put appropriate vtoc structure fields into the disk label */
23376 #if defined(_SUNOS_VTOC_16)
23377 	/*
23378 	 * The vtoc is always a 32bit data structure to maintain the
23379 	 * on-disk format. Convert "in place" instead of bcopying it.
23380 	 */
23381 	vtoctovtoc32((*user_vtoc), (*((struct vtoc32 *)&(un->un_vtoc))));
23382 
23383 	/*
23384 	 * in the 16-slice vtoc, starting sectors are expressed in
23385 	 * numbers *relative* to the start of the Solaris fdisk partition.
23386 	 */
23387 	lmap = un->un_map;
23388 	vpart = user_vtoc->v_part;
23389 
23390 	for (i = 0; i < (int)user_vtoc->v_nparts; i++, lmap++, vpart++) {
23391 		lmap->dkl_cylno = vpart->p_start / nblks;
23392 		lmap->dkl_nblk = vpart->p_size;
23393 	}
23394 
23395 #elif defined(_SUNOS_VTOC_8)
23396 
23397 	un->un_vtoc.v_bootinfo[0] = (uint32_t)user_vtoc->v_bootinfo[0];
23398 	un->un_vtoc.v_bootinfo[1] = (uint32_t)user_vtoc->v_bootinfo[1];
23399 	un->un_vtoc.v_bootinfo[2] = (uint32_t)user_vtoc->v_bootinfo[2];
23400 
23401 	un->un_vtoc.v_sanity = (uint32_t)user_vtoc->v_sanity;
23402 	un->un_vtoc.v_version = (uint32_t)user_vtoc->v_version;
23403 
23404 	bcopy(user_vtoc->v_volume, un->un_vtoc.v_volume, LEN_DKL_VVOL);
23405 
23406 	un->un_vtoc.v_nparts = user_vtoc->v_nparts;
23407 
23408 	bcopy(user_vtoc->v_reserved, un->un_vtoc.v_reserved,
23409 	    sizeof (un->un_vtoc.v_reserved));
23410 
23411 	/*
23412 	 * Note the conversion from starting sector number
23413 	 * to starting cylinder number.
23414 	 * Return error if division results in a remainder.
23415 	 */
23416 	lmap = un->un_map;
23417 	lpart = un->un_vtoc.v_part;
23418 	vpart = user_vtoc->v_part;
23419 
23420 	for (i = 0; i < (int)user_vtoc->v_nparts; i++) {
23421 		lpart->p_tag  = vpart->p_tag;
23422 		lpart->p_flag = vpart->p_flag;
23423 		lmap->dkl_cylno = vpart->p_start / nblks;
23424 		lmap->dkl_nblk = vpart->p_size;
23425 
23426 		lmap++;
23427 		lpart++;
23428 		vpart++;
23429 
23430 		/* (4387723) */
23431 #ifdef _LP64
23432 		if (user_vtoc->timestamp[i] > TIME32_MAX) {
23433 			un->un_vtoc.v_timestamp[i] = TIME32_MAX;
23434 		} else {
23435 			un->un_vtoc.v_timestamp[i] = user_vtoc->timestamp[i];
23436 		}
23437 #else
23438 		un->un_vtoc.v_timestamp[i] = user_vtoc->timestamp[i];
23439 #endif
23440 	}
23441 
23442 	bcopy(user_vtoc->v_asciilabel, un->un_asciilabel, LEN_DKL_ASCII);
23443 #else
23444 #error "No VTOC format defined."
23445 #endif
23446 	return (0);
23447 }
23448 
23449 /*
23450  *    Function: sd_clear_efi
23451  *
23452  * Description: This routine clears all EFI labels.
23453  *
23454  *   Arguments: un - driver soft state (unit) structure
23455  *
23456  * Return Code: void
23457  */
23458 
23459 static void
23460 sd_clear_efi(struct sd_lun *un)
23461 {
23462 	efi_gpt_t	*gpt;
23463 	uint_t		lbasize;
23464 	uint64_t	cap;
23465 	int rval;
23466 
23467 	ASSERT(!mutex_owned(SD_MUTEX(un)));
23468 
23469 	gpt = kmem_alloc(sizeof (efi_gpt_t), KM_SLEEP);
23470 
23471 	if (sd_send_scsi_READ(un, gpt, DEV_BSIZE, 1, SD_PATH_DIRECT) != 0) {
23472 		goto done;
23473 	}
23474 
23475 	sd_swap_efi_gpt(gpt);
23476 	rval = sd_validate_efi(gpt);
23477 	if (rval == 0) {
23478 		/* clear primary */
23479 		bzero(gpt, sizeof (efi_gpt_t));
23480 		if ((rval = sd_send_scsi_WRITE(un, gpt, EFI_LABEL_SIZE, 1,
23481 			SD_PATH_DIRECT))) {
23482 			SD_INFO(SD_LOG_IO_PARTITION, un,
23483 				"sd_clear_efi: clear primary label failed\n");
23484 		}
23485 	}
23486 	/* the backup */
23487 	rval = sd_send_scsi_READ_CAPACITY(un, &cap, &lbasize,
23488 	    SD_PATH_DIRECT);
23489 	if (rval) {
23490 		goto done;
23491 	}
23492 	/*
23493 	 * The MMC standard allows READ CAPACITY to be
23494 	 * inaccurate by a bounded amount (in the interest of
23495 	 * response latency).  As a result, failed READs are
23496 	 * commonplace (due to the reading of metadata and not
23497 	 * data). Depending on the per-Vendor/drive Sense data,
23498 	 * the failed READ can cause many (unnecessary) retries.
23499 	 */
23500 	if ((rval = sd_send_scsi_READ(un, gpt, lbasize,
23501 	    cap - 1, ISCD(un) ? SD_PATH_DIRECT_PRIORITY :
23502 		SD_PATH_DIRECT)) != 0) {
23503 		goto done;
23504 	}
23505 	sd_swap_efi_gpt(gpt);
23506 	rval = sd_validate_efi(gpt);
23507 	if (rval == 0) {
23508 		/* clear backup */
23509 		SD_TRACE(SD_LOG_IOCTL, un, "sd_clear_efi clear backup@%lu\n",
23510 			cap-1);
23511 		bzero(gpt, sizeof (efi_gpt_t));
23512 		if ((rval = sd_send_scsi_WRITE(un, gpt, EFI_LABEL_SIZE,
23513 		    cap-1, SD_PATH_DIRECT))) {
23514 			SD_INFO(SD_LOG_IO_PARTITION, un,
23515 				"sd_clear_efi: clear backup label failed\n");
23516 		}
23517 	} else {
23518 		/*
23519 		 * Refer to comments related to off-by-1 at the
23520 		 * header of this file
23521 		 */
23522 		if ((rval = sd_send_scsi_READ(un, gpt, lbasize,
23523 		    cap - 2, ISCD(un) ? SD_PATH_DIRECT_PRIORITY :
23524 			SD_PATH_DIRECT)) != 0) {
23525 			goto done;
23526 		}
23527 		sd_swap_efi_gpt(gpt);
23528 		rval = sd_validate_efi(gpt);
23529 		if (rval == 0) {
23530 			/* clear legacy backup EFI label */
23531 			SD_TRACE(SD_LOG_IOCTL, un,
23532 			    "sd_clear_efi clear backup@%lu\n", cap-2);
23533 			bzero(gpt, sizeof (efi_gpt_t));
23534 			if ((rval = sd_send_scsi_WRITE(un, gpt, EFI_LABEL_SIZE,
23535 			    cap-2, SD_PATH_DIRECT))) {
23536 				SD_INFO(SD_LOG_IO_PARTITION,
23537 				    un, "sd_clear_efi: "
23538 				    " clear legacy backup label failed\n");
23539 			}
23540 		}
23541 	}
23542 
23543 done:
23544 	kmem_free(gpt, sizeof (efi_gpt_t));
23545 }
23546 
23547 /*
23548  *    Function: sd_set_vtoc
23549  *
23550  * Description: This routine writes data to the appropriate positions
23551  *
23552  *   Arguments: un - driver soft state (unit) structure
23553  *              dkl  - the data to be written
23554  *
23555  * Return: void
23556  */
23557 
23558 static int
23559 sd_set_vtoc(struct sd_lun *un, struct dk_label *dkl)
23560 {
23561 	void			*shadow_buf;
23562 	uint_t			label_addr;
23563 	int			sec;
23564 	int			blk;
23565 	int			head;
23566 	int			cyl;
23567 	int			rval;
23568 
23569 #if defined(__i386) || defined(__amd64)
23570 	label_addr = un->un_solaris_offset + DK_LABEL_LOC;
23571 #else
23572 	/* Write the primary label at block 0 of the solaris partition. */
23573 	label_addr = 0;
23574 #endif
23575 
23576 	if (NOT_DEVBSIZE(un)) {
23577 		shadow_buf = kmem_zalloc(un->un_tgt_blocksize, KM_SLEEP);
23578 		/*
23579 		 * Read the target's first block.
23580 		 */
23581 		if ((rval = sd_send_scsi_READ(un, shadow_buf,
23582 		    un->un_tgt_blocksize, label_addr,
23583 		    SD_PATH_STANDARD)) != 0) {
23584 			goto exit;
23585 		}
23586 		/*
23587 		 * Copy the contents of the label into the shadow buffer
23588 		 * which is of the size of target block size.
23589 		 */
23590 		bcopy(dkl, shadow_buf, sizeof (struct dk_label));
23591 	}
23592 
23593 	/* Write the primary label */
23594 	if (NOT_DEVBSIZE(un)) {
23595 		rval = sd_send_scsi_WRITE(un, shadow_buf, un->un_tgt_blocksize,
23596 		    label_addr, SD_PATH_STANDARD);
23597 	} else {
23598 		rval = sd_send_scsi_WRITE(un, dkl, un->un_sys_blocksize,
23599 		    label_addr, SD_PATH_STANDARD);
23600 	}
23601 	if (rval != 0) {
23602 		return (rval);
23603 	}
23604 
23605 	/*
23606 	 * Calculate where the backup labels go.  They are always on
23607 	 * the last alternate cylinder, but some older drives put them
23608 	 * on head 2 instead of the last head.	They are always on the
23609 	 * first 5 odd sectors of the appropriate track.
23610 	 *
23611 	 * We have no choice at this point, but to believe that the
23612 	 * disk label is valid.	 Use the geometry of the disk
23613 	 * as described in the label.
23614 	 */
23615 	cyl  = dkl->dkl_ncyl  + dkl->dkl_acyl - 1;
23616 	head = dkl->dkl_nhead - 1;
23617 
23618 	/*
23619 	 * Write and verify the backup labels. Make sure we don't try to
23620 	 * write past the last cylinder.
23621 	 */
23622 	for (sec = 1; ((sec < 5 * 2 + 1) && (sec < dkl->dkl_nsect)); sec += 2) {
23623 		blk = (daddr_t)(
23624 		    (cyl * ((dkl->dkl_nhead * dkl->dkl_nsect) - dkl->dkl_apc)) +
23625 		    (head * dkl->dkl_nsect) + sec);
23626 #if defined(__i386) || defined(__amd64)
23627 		blk += un->un_solaris_offset;
23628 #endif
23629 		if (NOT_DEVBSIZE(un)) {
23630 			uint64_t	tblk;
23631 			/*
23632 			 * Need to read the block first for read modify write.
23633 			 */
23634 			tblk = (uint64_t)blk;
23635 			blk = (int)((tblk * un->un_sys_blocksize) /
23636 			    un->un_tgt_blocksize);
23637 			if ((rval = sd_send_scsi_READ(un, shadow_buf,
23638 			    un->un_tgt_blocksize, blk,
23639 			    SD_PATH_STANDARD)) != 0) {
23640 				goto exit;
23641 			}
23642 			/*
23643 			 * Modify the shadow buffer with the label.
23644 			 */
23645 			bcopy(dkl, shadow_buf, sizeof (struct dk_label));
23646 			rval = sd_send_scsi_WRITE(un, shadow_buf,
23647 			    un->un_tgt_blocksize, blk, SD_PATH_STANDARD);
23648 		} else {
23649 			rval = sd_send_scsi_WRITE(un, dkl, un->un_sys_blocksize,
23650 			    blk, SD_PATH_STANDARD);
23651 			SD_INFO(SD_LOG_IO_PARTITION, un,
23652 			"sd_set_vtoc: wrote backup label %d\n", blk);
23653 		}
23654 		if (rval != 0) {
23655 			goto exit;
23656 		}
23657 	}
23658 exit:
23659 	if (NOT_DEVBSIZE(un)) {
23660 		kmem_free(shadow_buf, un->un_tgt_blocksize);
23661 	}
23662 	return (rval);
23663 }
23664 
23665 /*
23666  *    Function: sd_clear_vtoc
23667  *
23668  * Description: This routine clears out the VTOC labels.
23669  *
23670  *   Arguments: un - driver soft state (unit) structure
23671  *
23672  * Return: void
23673  */
23674 
23675 static void
23676 sd_clear_vtoc(struct sd_lun *un)
23677 {
23678 	struct dk_label		*dkl;
23679 
23680 	mutex_exit(SD_MUTEX(un));
23681 	dkl = kmem_zalloc(sizeof (struct dk_label), KM_SLEEP);
23682 	mutex_enter(SD_MUTEX(un));
23683 	/*
23684 	 * sd_set_vtoc uses these fields in order to figure out
23685 	 * where to overwrite the backup labels
23686 	 */
23687 	dkl->dkl_apc    = un->un_g.dkg_apc;
23688 	dkl->dkl_ncyl   = un->un_g.dkg_ncyl;
23689 	dkl->dkl_acyl   = un->un_g.dkg_acyl;
23690 	dkl->dkl_nhead  = un->un_g.dkg_nhead;
23691 	dkl->dkl_nsect  = un->un_g.dkg_nsect;
23692 	mutex_exit(SD_MUTEX(un));
23693 	(void) sd_set_vtoc(un, dkl);
23694 	kmem_free(dkl, sizeof (struct dk_label));
23695 
23696 	mutex_enter(SD_MUTEX(un));
23697 }
23698 
23699 /*
23700  *    Function: sd_write_label
23701  *
23702  * Description: This routine will validate and write the driver soft state vtoc
23703  *		contents to the device.
23704  *
23705  *   Arguments: dev - the device number
23706  *
23707  * Return Code: the code returned by sd_send_scsi_cmd()
23708  *		0
23709  *		EINVAL
23710  *		ENXIO
23711  *		ENOMEM
23712  */
23713 
23714 static int
23715 sd_write_label(dev_t dev)
23716 {
23717 	struct sd_lun		*un;
23718 	struct dk_label		*dkl;
23719 	short			sum;
23720 	short			*sp;
23721 	int			i;
23722 	int			rval;
23723 
23724 	if (((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) ||
23725 	    (un->un_state == SD_STATE_OFFLINE)) {
23726 		return (ENXIO);
23727 	}
23728 	ASSERT(mutex_owned(SD_MUTEX(un)));
23729 	mutex_exit(SD_MUTEX(un));
23730 	dkl = kmem_zalloc(sizeof (struct dk_label), KM_SLEEP);
23731 	mutex_enter(SD_MUTEX(un));
23732 
23733 	bcopy(&un->un_vtoc, &dkl->dkl_vtoc, sizeof (struct dk_vtoc));
23734 	dkl->dkl_rpm	= un->un_g.dkg_rpm;
23735 	dkl->dkl_pcyl	= un->un_g.dkg_pcyl;
23736 	dkl->dkl_apc	= un->un_g.dkg_apc;
23737 	dkl->dkl_intrlv = un->un_g.dkg_intrlv;
23738 	dkl->dkl_ncyl	= un->un_g.dkg_ncyl;
23739 	dkl->dkl_acyl	= un->un_g.dkg_acyl;
23740 	dkl->dkl_nhead	= un->un_g.dkg_nhead;
23741 	dkl->dkl_nsect	= un->un_g.dkg_nsect;
23742 
23743 #if defined(_SUNOS_VTOC_8)
23744 	dkl->dkl_obs1	= un->un_g.dkg_obs1;
23745 	dkl->dkl_obs2	= un->un_g.dkg_obs2;
23746 	dkl->dkl_obs3	= un->un_g.dkg_obs3;
23747 	for (i = 0; i < NDKMAP; i++) {
23748 		dkl->dkl_map[i].dkl_cylno = un->un_map[i].dkl_cylno;
23749 		dkl->dkl_map[i].dkl_nblk  = un->un_map[i].dkl_nblk;
23750 	}
23751 	bcopy(un->un_asciilabel, dkl->dkl_asciilabel, LEN_DKL_ASCII);
23752 #elif defined(_SUNOS_VTOC_16)
23753 	dkl->dkl_skew	= un->un_dkg_skew;
23754 #else
23755 #error "No VTOC format defined."
23756 #endif
23757 
23758 	dkl->dkl_magic			= DKL_MAGIC;
23759 	dkl->dkl_write_reinstruct	= un->un_g.dkg_write_reinstruct;
23760 	dkl->dkl_read_reinstruct	= un->un_g.dkg_read_reinstruct;
23761 
23762 	/* Construct checksum for the new disk label */
23763 	sum = 0;
23764 	sp = (short *)dkl;
23765 	i = sizeof (struct dk_label) / sizeof (short);
23766 	while (i--) {
23767 		sum ^= *sp++;
23768 	}
23769 	dkl->dkl_cksum = sum;
23770 
23771 	mutex_exit(SD_MUTEX(un));
23772 
23773 	rval = sd_set_vtoc(un, dkl);
23774 exit:
23775 	kmem_free(dkl, sizeof (struct dk_label));
23776 	mutex_enter(SD_MUTEX(un));
23777 	return (rval);
23778 }
23779 
23780 static int
23781 sd_dkio_set_efi(dev_t dev, caddr_t arg, int flag)
23782 {
23783 	struct sd_lun	*un = NULL;
23784 	dk_efi_t	user_efi;
23785 	int		rval = 0;
23786 	void		*buffer;
23787 
23788 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL)
23789 		return (ENXIO);
23790 
23791 	if (ddi_copyin(arg, &user_efi, sizeof (dk_efi_t), flag))
23792 		return (EFAULT);
23793 
23794 	user_efi.dki_data = (void *)(uintptr_t)user_efi.dki_data_64;
23795 
23796 	if ((user_efi.dki_length % un->un_tgt_blocksize) ||
23797 	    (user_efi.dki_length > un->un_max_xfer_size))
23798 		return (EINVAL);
23799 
23800 	buffer = kmem_alloc(user_efi.dki_length, KM_SLEEP);
23801 	if (ddi_copyin(user_efi.dki_data, buffer, user_efi.dki_length, flag)) {
23802 		rval = EFAULT;
23803 	} else {
23804 		/*
23805 		 * let's clear the vtoc labels and clear the softstate
23806 		 * vtoc.
23807 		 */
23808 		mutex_enter(SD_MUTEX(un));
23809 		if (un->un_vtoc.v_sanity == VTOC_SANE) {
23810 			SD_TRACE(SD_LOG_IO_PARTITION, un,
23811 				"sd_dkio_set_efi: CLEAR VTOC\n");
23812 			sd_clear_vtoc(un);
23813 			bzero(&un->un_vtoc, sizeof (struct dk_vtoc));
23814 			mutex_exit(SD_MUTEX(un));
23815 			ddi_remove_minor_node(SD_DEVINFO(un), "h");
23816 			ddi_remove_minor_node(SD_DEVINFO(un), "h,raw");
23817 			(void) ddi_create_minor_node(SD_DEVINFO(un), "wd",
23818 			    S_IFBLK,
23819 			    (SDUNIT(dev) << SDUNIT_SHIFT) | WD_NODE,
23820 			    un->un_node_type, NULL);
23821 			(void) ddi_create_minor_node(SD_DEVINFO(un), "wd,raw",
23822 			    S_IFCHR,
23823 			    (SDUNIT(dev) << SDUNIT_SHIFT) | WD_NODE,
23824 			    un->un_node_type, NULL);
23825 		} else
23826 			mutex_exit(SD_MUTEX(un));
23827 		rval = sd_send_scsi_WRITE(un, buffer, user_efi.dki_length,
23828 		    user_efi.dki_lba, SD_PATH_DIRECT);
23829 		if (rval == 0) {
23830 			mutex_enter(SD_MUTEX(un));
23831 			un->un_f_geometry_is_valid = FALSE;
23832 			mutex_exit(SD_MUTEX(un));
23833 		}
23834 	}
23835 	kmem_free(buffer, user_efi.dki_length);
23836 	return (rval);
23837 }
23838 
23839 /*
23840  *    Function: sd_dkio_get_mboot
23841  *
23842  * Description: This routine is the driver entry point for handling user
23843  *		requests to get the current device mboot (DKIOCGMBOOT)
23844  *
23845  *   Arguments: dev  - the device number
23846  *		arg  - pointer to user provided mboot structure specifying
23847  *			the current mboot.
23848  *		flag - this argument is a pass through to ddi_copyxxx()
23849  *		       directly from the mode argument of ioctl().
23850  *
23851  * Return Code: 0
23852  *		EINVAL
23853  *		EFAULT
23854  *		ENXIO
23855  */
23856 
23857 static int
23858 sd_dkio_get_mboot(dev_t dev, caddr_t arg, int flag)
23859 {
23860 	struct sd_lun	*un;
23861 	struct mboot	*mboot;
23862 	int		rval;
23863 	size_t		buffer_size;
23864 
23865 	if (((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) ||
23866 	    (un->un_state == SD_STATE_OFFLINE)) {
23867 		return (ENXIO);
23868 	}
23869 
23870 	if (!un->un_f_mboot_supported || arg == NULL) {
23871 		return (EINVAL);
23872 	}
23873 
23874 	/*
23875 	 * Read the mboot block, located at absolute block 0 on the target.
23876 	 */
23877 	buffer_size = SD_REQBYTES2TGTBYTES(un, sizeof (struct mboot));
23878 
23879 	SD_TRACE(SD_LOG_IO_PARTITION, un,
23880 	    "sd_dkio_get_mboot: allocation size: 0x%x\n", buffer_size);
23881 
23882 	mboot = kmem_zalloc(buffer_size, KM_SLEEP);
23883 	if ((rval = sd_send_scsi_READ(un, mboot, buffer_size, 0,
23884 	    SD_PATH_STANDARD)) == 0) {
23885 		if (ddi_copyout(mboot, (void *)arg,
23886 		    sizeof (struct mboot), flag) != 0) {
23887 			rval = EFAULT;
23888 		}
23889 	}
23890 	kmem_free(mboot, buffer_size);
23891 	return (rval);
23892 }
23893 
23894 
23895 /*
23896  *    Function: sd_dkio_set_mboot
23897  *
23898  * Description: This routine is the driver entry point for handling user
23899  *		requests to validate and set the device master boot
23900  *		(DKIOCSMBOOT).
23901  *
23902  *   Arguments: dev  - the device number
23903  *		arg  - pointer to user provided mboot structure used to set the
23904  *			master boot.
23905  *		flag - this argument is a pass through to ddi_copyxxx()
23906  *		       directly from the mode argument of ioctl().
23907  *
23908  * Return Code: 0
23909  *		EINVAL
23910  *		EFAULT
23911  *		ENXIO
23912  */
23913 
23914 static int
23915 sd_dkio_set_mboot(dev_t dev, caddr_t arg, int flag)
23916 {
23917 	struct sd_lun	*un = NULL;
23918 	struct mboot	*mboot = NULL;
23919 	int		rval;
23920 	ushort_t	magic;
23921 
23922 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
23923 		return (ENXIO);
23924 	}
23925 
23926 	ASSERT(!mutex_owned(SD_MUTEX(un)));
23927 
23928 	if (!un->un_f_mboot_supported) {
23929 		return (EINVAL);
23930 	}
23931 
23932 	if (arg == NULL) {
23933 		return (EINVAL);
23934 	}
23935 
23936 	mboot = kmem_zalloc(sizeof (struct mboot), KM_SLEEP);
23937 
23938 	if (ddi_copyin((const void *)arg, mboot,
23939 	    sizeof (struct mboot), flag) != 0) {
23940 		kmem_free(mboot, (size_t)(sizeof (struct mboot)));
23941 		return (EFAULT);
23942 	}
23943 
23944 	/* Is this really a master boot record? */
23945 	magic = LE_16(mboot->signature);
23946 	if (magic != MBB_MAGIC) {
23947 		kmem_free(mboot, (size_t)(sizeof (struct mboot)));
23948 		return (EINVAL);
23949 	}
23950 
23951 	rval = sd_send_scsi_WRITE(un, mboot, un->un_sys_blocksize, 0,
23952 	    SD_PATH_STANDARD);
23953 
23954 	mutex_enter(SD_MUTEX(un));
23955 #if defined(__i386) || defined(__amd64)
23956 	if (rval == 0) {
23957 		/*
23958 		 * mboot has been written successfully.
23959 		 * update the fdisk and vtoc tables in memory
23960 		 */
23961 		rval = sd_update_fdisk_and_vtoc(un);
23962 		if ((un->un_f_geometry_is_valid == FALSE) || (rval != 0)) {
23963 			mutex_exit(SD_MUTEX(un));
23964 			kmem_free(mboot, (size_t)(sizeof (struct mboot)));
23965 			return (rval);
23966 		}
23967 	}
23968 
23969 	/*
23970 	 * If the mboot write fails, write the devid anyway, what can it hurt?
23971 	 * Also preserve the device id by writing to the disk acyl for the case
23972 	 * where a devid has been fabricated.
23973 	 */
23974 	if (un->un_f_devid_supported && un->un_f_opt_fab_devid) {
23975 		if (un->un_devid == NULL) {
23976 			sd_register_devid(un, SD_DEVINFO(un),
23977 			    SD_TARGET_IS_UNRESERVED);
23978 		} else {
23979 			/*
23980 			 * The device id for this disk has been
23981 			 * fabricated. Fabricated device id's are
23982 			 * managed by storing them in the last 2
23983 			 * available sectors on the drive. The device
23984 			 * id must be preserved by writing it back out
23985 			 * to this location.
23986 			 */
23987 			if (sd_write_deviceid(un) != 0) {
23988 				ddi_devid_free(un->un_devid);
23989 				un->un_devid = NULL;
23990 			}
23991 		}
23992 	}
23993 
23994 #ifdef __lock_lint
23995 	sd_setup_default_geometry(un);
23996 #endif
23997 
23998 #else
23999 	if (rval == 0) {
24000 		/*
24001 		 * mboot has been written successfully.
24002 		 * set up the default geometry and VTOC
24003 		 */
24004 		if (un->un_blockcount <= DK_MAX_BLOCKS)
24005 			sd_setup_default_geometry(un);
24006 	}
24007 #endif
24008 	mutex_exit(SD_MUTEX(un));
24009 	kmem_free(mboot, (size_t)(sizeof (struct mboot)));
24010 	return (rval);
24011 }
24012 
24013 
24014 /*
24015  *    Function: sd_setup_default_geometry
24016  *
24017  * Description: This local utility routine sets the default geometry as part of
24018  *		setting the device mboot.
24019  *
24020  *   Arguments: un - driver soft state (unit) structure
24021  *
24022  * Note: This may be redundant with sd_build_default_label.
24023  */
24024 
24025 static void
24026 sd_setup_default_geometry(struct sd_lun *un)
24027 {
24028 	/* zero out the soft state geometry and partition table. */
24029 	bzero(&un->un_g, sizeof (struct dk_geom));
24030 	bzero(&un->un_vtoc, sizeof (struct dk_vtoc));
24031 	bzero(un->un_map, NDKMAP * (sizeof (struct dk_map)));
24032 	un->un_asciilabel[0] = '\0';
24033 
24034 	/*
24035 	 * For the rpm, we use the minimum for the disk.
24036 	 * For the head, cyl and number of sector per track,
24037 	 * if the capacity <= 1GB, head = 64, sect = 32.
24038 	 * else head = 255, sect 63
24039 	 * Note: the capacity should be equal to C*H*S values.
24040 	 * This will cause some truncation of size due to
24041 	 * round off errors. For CD-ROMs, this truncation can
24042 	 * have adverse side effects, so returning ncyl and
24043 	 * nhead as 1. The nsect will overflow for most of
24044 	 * CD-ROMs as nsect is of type ushort.
24045 	 */
24046 	if (ISCD(un)) {
24047 		un->un_g.dkg_ncyl = 1;
24048 		un->un_g.dkg_nhead = 1;
24049 		un->un_g.dkg_nsect = un->un_blockcount;
24050 	} else {
24051 		if (un->un_blockcount <= 0x1000) {
24052 			/* Needed for unlabeled SCSI floppies. */
24053 			un->un_g.dkg_nhead = 2;
24054 			un->un_g.dkg_ncyl = 80;
24055 			un->un_g.dkg_pcyl = 80;
24056 			un->un_g.dkg_nsect = un->un_blockcount / (2 * 80);
24057 		} else if (un->un_blockcount <= 0x200000) {
24058 			un->un_g.dkg_nhead = 64;
24059 			un->un_g.dkg_nsect = 32;
24060 			un->un_g.dkg_ncyl = un->un_blockcount / (64 * 32);
24061 		} else {
24062 			un->un_g.dkg_nhead = 255;
24063 			un->un_g.dkg_nsect = 63;
24064 			un->un_g.dkg_ncyl = un->un_blockcount / (255 * 63);
24065 		}
24066 		un->un_blockcount = un->un_g.dkg_ncyl *
24067 		    un->un_g.dkg_nhead * un->un_g.dkg_nsect;
24068 	}
24069 	un->un_g.dkg_acyl = 0;
24070 	un->un_g.dkg_bcyl = 0;
24071 	un->un_g.dkg_intrlv = 1;
24072 	un->un_g.dkg_rpm = 200;
24073 	un->un_g.dkg_read_reinstruct = 0;
24074 	un->un_g.dkg_write_reinstruct = 0;
24075 	if (un->un_g.dkg_pcyl == 0) {
24076 		un->un_g.dkg_pcyl = un->un_g.dkg_ncyl + un->un_g.dkg_acyl;
24077 	}
24078 
24079 	un->un_map['a'-'a'].dkl_cylno = 0;
24080 	un->un_map['a'-'a'].dkl_nblk = un->un_blockcount;
24081 	un->un_map['c'-'a'].dkl_cylno = 0;
24082 	un->un_map['c'-'a'].dkl_nblk = un->un_blockcount;
24083 	un->un_f_geometry_is_valid = FALSE;
24084 }
24085 
24086 
24087 #if defined(__i386) || defined(__amd64)
24088 /*
24089  *    Function: sd_update_fdisk_and_vtoc
24090  *
24091  * Description: This local utility routine updates the device fdisk and vtoc
24092  *		as part of setting the device mboot.
24093  *
24094  *   Arguments: un - driver soft state (unit) structure
24095  *
24096  * Return Code: 0 for success or errno-type return code.
24097  *
24098  *    Note:x86: This looks like a duplicate of sd_validate_geometry(), but
24099  *		these did exist seperately in x86 sd.c!!!
24100  */
24101 
24102 static int
24103 sd_update_fdisk_and_vtoc(struct sd_lun *un)
24104 {
24105 	static char	labelstring[128];
24106 	static char	buf[256];
24107 	char		*label = 0;
24108 	int		count;
24109 	int		label_rc = 0;
24110 	int		gvalid = un->un_f_geometry_is_valid;
24111 	int		fdisk_rval;
24112 	int		lbasize;
24113 	int		capacity;
24114 
24115 	ASSERT(mutex_owned(SD_MUTEX(un)));
24116 
24117 	if (un->un_f_tgt_blocksize_is_valid == FALSE) {
24118 		return (EINVAL);
24119 	}
24120 
24121 	if (un->un_f_blockcount_is_valid == FALSE) {
24122 		return (EINVAL);
24123 	}
24124 
24125 #if defined(_SUNOS_VTOC_16)
24126 	/*
24127 	 * Set up the "whole disk" fdisk partition; this should always
24128 	 * exist, regardless of whether the disk contains an fdisk table
24129 	 * or vtoc.
24130 	 */
24131 	un->un_map[P0_RAW_DISK].dkl_cylno = 0;
24132 	un->un_map[P0_RAW_DISK].dkl_nblk = un->un_blockcount;
24133 #endif	/* defined(_SUNOS_VTOC_16) */
24134 
24135 	/*
24136 	 * copy the lbasize and capacity so that if they're
24137 	 * reset while we're not holding the SD_MUTEX(un), we will
24138 	 * continue to use valid values after the SD_MUTEX(un) is
24139 	 * reacquired.
24140 	 */
24141 	lbasize  = un->un_tgt_blocksize;
24142 	capacity = un->un_blockcount;
24143 
24144 	/*
24145 	 * refresh the logical and physical geometry caches.
24146 	 * (data from mode sense format/rigid disk geometry pages,
24147 	 * and scsi_ifgetcap("geometry").
24148 	 */
24149 	sd_resync_geom_caches(un, capacity, lbasize, SD_PATH_DIRECT);
24150 
24151 	/*
24152 	 * Only DIRECT ACCESS devices will have Sun labels.
24153 	 * CD's supposedly have a Sun label, too
24154 	 */
24155 	if (un->un_f_vtoc_label_supported) {
24156 		fdisk_rval = sd_read_fdisk(un, capacity, lbasize,
24157 		    SD_PATH_DIRECT);
24158 		if (fdisk_rval == SD_CMD_FAILURE) {
24159 			ASSERT(mutex_owned(SD_MUTEX(un)));
24160 			return (EIO);
24161 		}
24162 
24163 		if (fdisk_rval == SD_CMD_RESERVATION_CONFLICT) {
24164 			ASSERT(mutex_owned(SD_MUTEX(un)));
24165 			return (EACCES);
24166 		}
24167 
24168 		if (un->un_solaris_size <= DK_LABEL_LOC) {
24169 			/*
24170 			 * Found fdisk table but no Solaris partition entry,
24171 			 * so don't call sd_uselabel() and don't create
24172 			 * a default label.
24173 			 */
24174 			label_rc = 0;
24175 			un->un_f_geometry_is_valid = TRUE;
24176 			goto no_solaris_partition;
24177 		}
24178 
24179 #if defined(_SUNOS_VTOC_8)
24180 		label = (char *)un->un_asciilabel;
24181 #elif defined(_SUNOS_VTOC_16)
24182 		label = (char *)un->un_vtoc.v_asciilabel;
24183 #else
24184 #error "No VTOC format defined."
24185 #endif
24186 	} else if (capacity < 0) {
24187 		ASSERT(mutex_owned(SD_MUTEX(un)));
24188 		return (EINVAL);
24189 	}
24190 
24191 	/*
24192 	 * For Removable media We reach here if we have found a
24193 	 * SOLARIS PARTITION.
24194 	 * If un_f_geometry_is_valid is FALSE it indicates that the SOLARIS
24195 	 * PARTITION has changed from the previous one, hence we will setup a
24196 	 * default VTOC in this case.
24197 	 */
24198 	if (un->un_f_geometry_is_valid == FALSE) {
24199 		sd_build_default_label(un);
24200 		label_rc = 0;
24201 	}
24202 
24203 no_solaris_partition:
24204 	if ((!un->un_f_has_removable_media ||
24205 	    (un->un_f_has_removable_media &&
24206 	    un->un_mediastate == DKIO_EJECTED)) &&
24207 		(un->un_state == SD_STATE_NORMAL && !gvalid)) {
24208 		/*
24209 		 * Print out a message indicating who and what we are.
24210 		 * We do this only when we happen to really validate the
24211 		 * geometry. We may call sd_validate_geometry() at other
24212 		 * times, ioctl()'s like Get VTOC in which case we
24213 		 * don't want to print the label.
24214 		 * If the geometry is valid, print the label string,
24215 		 * else print vendor and product info, if available
24216 		 */
24217 		if ((un->un_f_geometry_is_valid == TRUE) && (label != NULL)) {
24218 			SD_INFO(SD_LOG_IOCTL_DKIO, un, "?<%s>\n", label);
24219 		} else {
24220 			mutex_enter(&sd_label_mutex);
24221 			sd_inq_fill(SD_INQUIRY(un)->inq_vid, VIDMAX,
24222 			    labelstring);
24223 			sd_inq_fill(SD_INQUIRY(un)->inq_pid, PIDMAX,
24224 			    &labelstring[64]);
24225 			(void) sprintf(buf, "?Vendor '%s', product '%s'",
24226 			    labelstring, &labelstring[64]);
24227 			if (un->un_f_blockcount_is_valid == TRUE) {
24228 				(void) sprintf(&buf[strlen(buf)],
24229 				    ", %" PRIu64 " %u byte blocks\n",
24230 				    un->un_blockcount,
24231 				    un->un_tgt_blocksize);
24232 			} else {
24233 				(void) sprintf(&buf[strlen(buf)],
24234 				    ", (unknown capacity)\n");
24235 			}
24236 			SD_INFO(SD_LOG_IOCTL_DKIO, un, buf);
24237 			mutex_exit(&sd_label_mutex);
24238 		}
24239 	}
24240 
24241 #if defined(_SUNOS_VTOC_16)
24242 	/*
24243 	 * If we have valid geometry, set up the remaining fdisk partitions.
24244 	 * Note that dkl_cylno is not used for the fdisk map entries, so
24245 	 * we set it to an entirely bogus value.
24246 	 */
24247 	for (count = 0; count < FD_NUMPART; count++) {
24248 		un->un_map[FDISK_P1 + count].dkl_cylno = -1;
24249 		un->un_map[FDISK_P1 + count].dkl_nblk =
24250 		    un->un_fmap[count].fmap_nblk;
24251 		un->un_offset[FDISK_P1 + count] =
24252 		    un->un_fmap[count].fmap_start;
24253 	}
24254 #endif
24255 
24256 	for (count = 0; count < NDKMAP; count++) {
24257 #if defined(_SUNOS_VTOC_8)
24258 		struct dk_map *lp  = &un->un_map[count];
24259 		un->un_offset[count] =
24260 		    un->un_g.dkg_nhead * un->un_g.dkg_nsect * lp->dkl_cylno;
24261 #elif defined(_SUNOS_VTOC_16)
24262 		struct dkl_partition *vp = &un->un_vtoc.v_part[count];
24263 		un->un_offset[count] = vp->p_start + un->un_solaris_offset;
24264 #else
24265 #error "No VTOC format defined."
24266 #endif
24267 	}
24268 
24269 	ASSERT(mutex_owned(SD_MUTEX(un)));
24270 	return (label_rc);
24271 }
24272 #endif
24273 
24274 
24275 /*
24276  *    Function: sd_check_media
24277  *
24278  * Description: This utility routine implements the functionality for the
24279  *		DKIOCSTATE ioctl. This ioctl blocks the user thread until the
24280  *		driver state changes from that specified by the user
24281  *		(inserted or ejected). For example, if the user specifies
24282  *		DKIO_EJECTED and the current media state is inserted this
24283  *		routine will immediately return DKIO_INSERTED. However, if the
24284  *		current media state is not inserted the user thread will be
24285  *		blocked until the drive state changes. If DKIO_NONE is specified
24286  *		the user thread will block until a drive state change occurs.
24287  *
24288  *   Arguments: dev  - the device number
24289  *		state  - user pointer to a dkio_state, updated with the current
24290  *			drive state at return.
24291  *
24292  * Return Code: ENXIO
24293  *		EIO
24294  *		EAGAIN
24295  *		EINTR
24296  */
24297 
24298 static int
24299 sd_check_media(dev_t dev, enum dkio_state state)
24300 {
24301 	struct sd_lun		*un = NULL;
24302 	enum dkio_state		prev_state;
24303 	opaque_t		token = NULL;
24304 	int			rval = 0;
24305 
24306 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24307 		return (ENXIO);
24308 	}
24309 
24310 	SD_TRACE(SD_LOG_COMMON, un, "sd_check_media: entry\n");
24311 
24312 	mutex_enter(SD_MUTEX(un));
24313 
24314 	SD_TRACE(SD_LOG_COMMON, un, "sd_check_media: "
24315 	    "state=%x, mediastate=%x\n", state, un->un_mediastate);
24316 
24317 	prev_state = un->un_mediastate;
24318 
24319 	/* is there anything to do? */
24320 	if (state == un->un_mediastate || un->un_mediastate == DKIO_NONE) {
24321 		/*
24322 		 * submit the request to the scsi_watch service;
24323 		 * scsi_media_watch_cb() does the real work
24324 		 */
24325 		mutex_exit(SD_MUTEX(un));
24326 
24327 		/*
24328 		 * This change handles the case where a scsi watch request is
24329 		 * added to a device that is powered down. To accomplish this
24330 		 * we power up the device before adding the scsi watch request,
24331 		 * since the scsi watch sends a TUR directly to the device
24332 		 * which the device cannot handle if it is powered down.
24333 		 */
24334 		if (sd_pm_entry(un) != DDI_SUCCESS) {
24335 			mutex_enter(SD_MUTEX(un));
24336 			goto done;
24337 		}
24338 
24339 		token = scsi_watch_request_submit(SD_SCSI_DEVP(un),
24340 		    sd_check_media_time, SENSE_LENGTH, sd_media_watch_cb,
24341 		    (caddr_t)dev);
24342 
24343 		sd_pm_exit(un);
24344 
24345 		mutex_enter(SD_MUTEX(un));
24346 		if (token == NULL) {
24347 			rval = EAGAIN;
24348 			goto done;
24349 		}
24350 
24351 		/*
24352 		 * This is a special case IOCTL that doesn't return
24353 		 * until the media state changes. Routine sdpower
24354 		 * knows about and handles this so don't count it
24355 		 * as an active cmd in the driver, which would
24356 		 * keep the device busy to the pm framework.
24357 		 * If the count isn't decremented the device can't
24358 		 * be powered down.
24359 		 */
24360 		un->un_ncmds_in_driver--;
24361 		ASSERT(un->un_ncmds_in_driver >= 0);
24362 
24363 		/*
24364 		 * if a prior request had been made, this will be the same
24365 		 * token, as scsi_watch was designed that way.
24366 		 */
24367 		un->un_swr_token = token;
24368 		un->un_specified_mediastate = state;
24369 
24370 		/*
24371 		 * now wait for media change
24372 		 * we will not be signalled unless mediastate == state but it is
24373 		 * still better to test for this condition, since there is a
24374 		 * 2 sec cv_broadcast delay when mediastate == DKIO_INSERTED
24375 		 */
24376 		SD_TRACE(SD_LOG_COMMON, un,
24377 		    "sd_check_media: waiting for media state change\n");
24378 		while (un->un_mediastate == state) {
24379 			if (cv_wait_sig(&un->un_state_cv, SD_MUTEX(un)) == 0) {
24380 				SD_TRACE(SD_LOG_COMMON, un,
24381 				    "sd_check_media: waiting for media state "
24382 				    "was interrupted\n");
24383 				un->un_ncmds_in_driver++;
24384 				rval = EINTR;
24385 				goto done;
24386 			}
24387 			SD_TRACE(SD_LOG_COMMON, un,
24388 			    "sd_check_media: received signal, state=%x\n",
24389 			    un->un_mediastate);
24390 		}
24391 		/*
24392 		 * Inc the counter to indicate the device once again
24393 		 * has an active outstanding cmd.
24394 		 */
24395 		un->un_ncmds_in_driver++;
24396 	}
24397 
24398 	/* invalidate geometry */
24399 	if (prev_state == DKIO_INSERTED && un->un_mediastate == DKIO_EJECTED) {
24400 		sr_ejected(un);
24401 	}
24402 
24403 	if (un->un_mediastate == DKIO_INSERTED && prev_state != DKIO_INSERTED) {
24404 		uint64_t	capacity;
24405 		uint_t		lbasize;
24406 
24407 		SD_TRACE(SD_LOG_COMMON, un, "sd_check_media: media inserted\n");
24408 		mutex_exit(SD_MUTEX(un));
24409 		/*
24410 		 * Since the following routines use SD_PATH_DIRECT, we must
24411 		 * call PM directly before the upcoming disk accesses. This
24412 		 * may cause the disk to be power/spin up.
24413 		 */
24414 
24415 		if (sd_pm_entry(un) == DDI_SUCCESS) {
24416 			rval = sd_send_scsi_READ_CAPACITY(un,
24417 			    &capacity,
24418 			    &lbasize, SD_PATH_DIRECT);
24419 			if (rval != 0) {
24420 				sd_pm_exit(un);
24421 				mutex_enter(SD_MUTEX(un));
24422 				goto done;
24423 			}
24424 		} else {
24425 			rval = EIO;
24426 			mutex_enter(SD_MUTEX(un));
24427 			goto done;
24428 		}
24429 		mutex_enter(SD_MUTEX(un));
24430 
24431 		sd_update_block_info(un, lbasize, capacity);
24432 
24433 		un->un_f_geometry_is_valid	= FALSE;
24434 		(void) sd_validate_geometry(un, SD_PATH_DIRECT);
24435 
24436 		mutex_exit(SD_MUTEX(un));
24437 		rval = sd_send_scsi_DOORLOCK(un, SD_REMOVAL_PREVENT,
24438 		    SD_PATH_DIRECT);
24439 		sd_pm_exit(un);
24440 
24441 		mutex_enter(SD_MUTEX(un));
24442 	}
24443 done:
24444 	un->un_f_watcht_stopped = FALSE;
24445 	if (un->un_swr_token) {
24446 		/*
24447 		 * Use of this local token and the mutex ensures that we avoid
24448 		 * some race conditions associated with terminating the
24449 		 * scsi watch.
24450 		 */
24451 		token = un->un_swr_token;
24452 		un->un_swr_token = (opaque_t)NULL;
24453 		mutex_exit(SD_MUTEX(un));
24454 		(void) scsi_watch_request_terminate(token,
24455 		    SCSI_WATCH_TERMINATE_WAIT);
24456 		mutex_enter(SD_MUTEX(un));
24457 	}
24458 
24459 	/*
24460 	 * Update the capacity kstat value, if no media previously
24461 	 * (capacity kstat is 0) and a media has been inserted
24462 	 * (un_f_blockcount_is_valid == TRUE)
24463 	 */
24464 	if (un->un_errstats) {
24465 		struct sd_errstats	*stp = NULL;
24466 
24467 		stp = (struct sd_errstats *)un->un_errstats->ks_data;
24468 		if ((stp->sd_capacity.value.ui64 == 0) &&
24469 		    (un->un_f_blockcount_is_valid == TRUE)) {
24470 			stp->sd_capacity.value.ui64 =
24471 			    (uint64_t)((uint64_t)un->un_blockcount *
24472 			    un->un_sys_blocksize);
24473 		}
24474 	}
24475 	mutex_exit(SD_MUTEX(un));
24476 	SD_TRACE(SD_LOG_COMMON, un, "sd_check_media: done\n");
24477 	return (rval);
24478 }
24479 
24480 
24481 /*
24482  *    Function: sd_delayed_cv_broadcast
24483  *
24484  * Description: Delayed cv_broadcast to allow for target to recover from media
24485  *		insertion.
24486  *
24487  *   Arguments: arg - driver soft state (unit) structure
24488  */
24489 
24490 static void
24491 sd_delayed_cv_broadcast(void *arg)
24492 {
24493 	struct sd_lun *un = arg;
24494 
24495 	SD_TRACE(SD_LOG_COMMON, un, "sd_delayed_cv_broadcast\n");
24496 
24497 	mutex_enter(SD_MUTEX(un));
24498 	un->un_dcvb_timeid = NULL;
24499 	cv_broadcast(&un->un_state_cv);
24500 	mutex_exit(SD_MUTEX(un));
24501 }
24502 
24503 
24504 /*
24505  *    Function: sd_media_watch_cb
24506  *
24507  * Description: Callback routine used for support of the DKIOCSTATE ioctl. This
24508  *		routine processes the TUR sense data and updates the driver
24509  *		state if a transition has occurred. The user thread
24510  *		(sd_check_media) is then signalled.
24511  *
24512  *   Arguments: arg -   the device 'dev_t' is used for context to discriminate
24513  *			among multiple watches that share this callback function
24514  *		resultp - scsi watch facility result packet containing scsi
24515  *			  packet, status byte and sense data
24516  *
24517  * Return Code: 0 for success, -1 for failure
24518  */
24519 
24520 static int
24521 sd_media_watch_cb(caddr_t arg, struct scsi_watch_result *resultp)
24522 {
24523 	struct sd_lun			*un;
24524 	struct scsi_status		*statusp = resultp->statusp;
24525 	uint8_t				*sensep = (uint8_t *)resultp->sensep;
24526 	enum dkio_state			state = DKIO_NONE;
24527 	dev_t				dev = (dev_t)arg;
24528 	uchar_t				actual_sense_length;
24529 	uint8_t				skey, asc, ascq;
24530 
24531 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24532 		return (-1);
24533 	}
24534 	actual_sense_length = resultp->actual_sense_length;
24535 
24536 	mutex_enter(SD_MUTEX(un));
24537 	SD_TRACE(SD_LOG_COMMON, un,
24538 	    "sd_media_watch_cb: status=%x, sensep=%p, len=%x\n",
24539 	    *((char *)statusp), (void *)sensep, actual_sense_length);
24540 
24541 	if (resultp->pkt->pkt_reason == CMD_DEV_GONE) {
24542 		un->un_mediastate = DKIO_DEV_GONE;
24543 		cv_broadcast(&un->un_state_cv);
24544 		mutex_exit(SD_MUTEX(un));
24545 
24546 		return (0);
24547 	}
24548 
24549 	/*
24550 	 * If there was a check condition then sensep points to valid sense data
24551 	 * If status was not a check condition but a reservation or busy status
24552 	 * then the new state is DKIO_NONE
24553 	 */
24554 	if (sensep != NULL) {
24555 		skey = scsi_sense_key(sensep);
24556 		asc = scsi_sense_asc(sensep);
24557 		ascq = scsi_sense_ascq(sensep);
24558 
24559 		SD_INFO(SD_LOG_COMMON, un,
24560 		    "sd_media_watch_cb: sense KEY=%x, ASC=%x, ASCQ=%x\n",
24561 		    skey, asc, ascq);
24562 		/* This routine only uses up to 13 bytes of sense data. */
24563 		if (actual_sense_length >= 13) {
24564 			if (skey == KEY_UNIT_ATTENTION) {
24565 				if (asc == 0x28) {
24566 					state = DKIO_INSERTED;
24567 				}
24568 			} else {
24569 				/*
24570 				 * if 02/04/02  means that the host
24571 				 * should send start command. Explicitly
24572 				 * leave the media state as is
24573 				 * (inserted) as the media is inserted
24574 				 * and host has stopped device for PM
24575 				 * reasons. Upon next true read/write
24576 				 * to this media will bring the
24577 				 * device to the right state good for
24578 				 * media access.
24579 				 */
24580 				if ((skey == KEY_NOT_READY) &&
24581 				    (asc == 0x3a)) {
24582 					state = DKIO_EJECTED;
24583 				}
24584 
24585 				/*
24586 				 * If the drivge is busy with an operation
24587 				 * or long write, keep the media in an
24588 				 * inserted state.
24589 				 */
24590 
24591 				if ((skey == KEY_NOT_READY) &&
24592 				    (asc == 0x04) &&
24593 				    ((ascq == 0x02) ||
24594 				    (ascq == 0x07) ||
24595 				    (ascq == 0x08))) {
24596 					state = DKIO_INSERTED;
24597 				}
24598 			}
24599 		}
24600 	} else if ((*((char *)statusp) == STATUS_GOOD) &&
24601 	    (resultp->pkt->pkt_reason == CMD_CMPLT)) {
24602 		state = DKIO_INSERTED;
24603 	}
24604 
24605 	SD_TRACE(SD_LOG_COMMON, un,
24606 	    "sd_media_watch_cb: state=%x, specified=%x\n",
24607 	    state, un->un_specified_mediastate);
24608 
24609 	/*
24610 	 * now signal the waiting thread if this is *not* the specified state;
24611 	 * delay the signal if the state is DKIO_INSERTED to allow the target
24612 	 * to recover
24613 	 */
24614 	if (state != un->un_specified_mediastate) {
24615 		un->un_mediastate = state;
24616 		if (state == DKIO_INSERTED) {
24617 			/*
24618 			 * delay the signal to give the drive a chance
24619 			 * to do what it apparently needs to do
24620 			 */
24621 			SD_TRACE(SD_LOG_COMMON, un,
24622 			    "sd_media_watch_cb: delayed cv_broadcast\n");
24623 			if (un->un_dcvb_timeid == NULL) {
24624 				un->un_dcvb_timeid =
24625 				    timeout(sd_delayed_cv_broadcast, un,
24626 				    drv_usectohz((clock_t)MEDIA_ACCESS_DELAY));
24627 			}
24628 		} else {
24629 			SD_TRACE(SD_LOG_COMMON, un,
24630 			    "sd_media_watch_cb: immediate cv_broadcast\n");
24631 			cv_broadcast(&un->un_state_cv);
24632 		}
24633 	}
24634 	mutex_exit(SD_MUTEX(un));
24635 	return (0);
24636 }
24637 
24638 
24639 /*
24640  *    Function: sd_dkio_get_temp
24641  *
24642  * Description: This routine is the driver entry point for handling ioctl
24643  *		requests to get the disk temperature.
24644  *
24645  *   Arguments: dev  - the device number
24646  *		arg  - pointer to user provided dk_temperature structure.
24647  *		flag - this argument is a pass through to ddi_copyxxx()
24648  *		       directly from the mode argument of ioctl().
24649  *
24650  * Return Code: 0
24651  *		EFAULT
24652  *		ENXIO
24653  *		EAGAIN
24654  */
24655 
24656 static int
24657 sd_dkio_get_temp(dev_t dev, caddr_t arg, int flag)
24658 {
24659 	struct sd_lun		*un = NULL;
24660 	struct dk_temperature	*dktemp = NULL;
24661 	uchar_t			*temperature_page;
24662 	int			rval = 0;
24663 	int			path_flag = SD_PATH_STANDARD;
24664 
24665 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24666 		return (ENXIO);
24667 	}
24668 
24669 	dktemp = kmem_zalloc(sizeof (struct dk_temperature), KM_SLEEP);
24670 
24671 	/* copyin the disk temp argument to get the user flags */
24672 	if (ddi_copyin((void *)arg, dktemp,
24673 	    sizeof (struct dk_temperature), flag) != 0) {
24674 		rval = EFAULT;
24675 		goto done;
24676 	}
24677 
24678 	/* Initialize the temperature to invalid. */
24679 	dktemp->dkt_cur_temp = (short)DKT_INVALID_TEMP;
24680 	dktemp->dkt_ref_temp = (short)DKT_INVALID_TEMP;
24681 
24682 	/*
24683 	 * Note: Investigate removing the "bypass pm" semantic.
24684 	 * Can we just bypass PM always?
24685 	 */
24686 	if (dktemp->dkt_flags & DKT_BYPASS_PM) {
24687 		path_flag = SD_PATH_DIRECT;
24688 		ASSERT(!mutex_owned(&un->un_pm_mutex));
24689 		mutex_enter(&un->un_pm_mutex);
24690 		if (SD_DEVICE_IS_IN_LOW_POWER(un)) {
24691 			/*
24692 			 * If DKT_BYPASS_PM is set, and the drive happens to be
24693 			 * in low power mode, we can not wake it up, Need to
24694 			 * return EAGAIN.
24695 			 */
24696 			mutex_exit(&un->un_pm_mutex);
24697 			rval = EAGAIN;
24698 			goto done;
24699 		} else {
24700 			/*
24701 			 * Indicate to PM the device is busy. This is required
24702 			 * to avoid a race - i.e. the ioctl is issuing a
24703 			 * command and the pm framework brings down the device
24704 			 * to low power mode (possible power cut-off on some
24705 			 * platforms).
24706 			 */
24707 			mutex_exit(&un->un_pm_mutex);
24708 			if (sd_pm_entry(un) != DDI_SUCCESS) {
24709 				rval = EAGAIN;
24710 				goto done;
24711 			}
24712 		}
24713 	}
24714 
24715 	temperature_page = kmem_zalloc(TEMPERATURE_PAGE_SIZE, KM_SLEEP);
24716 
24717 	if ((rval = sd_send_scsi_LOG_SENSE(un, temperature_page,
24718 	    TEMPERATURE_PAGE_SIZE, TEMPERATURE_PAGE, 1, 0, path_flag)) != 0) {
24719 		goto done2;
24720 	}
24721 
24722 	/*
24723 	 * For the current temperature verify that the parameter length is 0x02
24724 	 * and the parameter code is 0x00
24725 	 */
24726 	if ((temperature_page[7] == 0x02) && (temperature_page[4] == 0x00) &&
24727 	    (temperature_page[5] == 0x00)) {
24728 		if (temperature_page[9] == 0xFF) {
24729 			dktemp->dkt_cur_temp = (short)DKT_INVALID_TEMP;
24730 		} else {
24731 			dktemp->dkt_cur_temp = (short)(temperature_page[9]);
24732 		}
24733 	}
24734 
24735 	/*
24736 	 * For the reference temperature verify that the parameter
24737 	 * length is 0x02 and the parameter code is 0x01
24738 	 */
24739 	if ((temperature_page[13] == 0x02) && (temperature_page[10] == 0x00) &&
24740 	    (temperature_page[11] == 0x01)) {
24741 		if (temperature_page[15] == 0xFF) {
24742 			dktemp->dkt_ref_temp = (short)DKT_INVALID_TEMP;
24743 		} else {
24744 			dktemp->dkt_ref_temp = (short)(temperature_page[15]);
24745 		}
24746 	}
24747 
24748 	/* Do the copyout regardless of the temperature commands status. */
24749 	if (ddi_copyout(dktemp, (void *)arg, sizeof (struct dk_temperature),
24750 	    flag) != 0) {
24751 		rval = EFAULT;
24752 	}
24753 
24754 done2:
24755 	if (path_flag == SD_PATH_DIRECT) {
24756 		sd_pm_exit(un);
24757 	}
24758 
24759 	kmem_free(temperature_page, TEMPERATURE_PAGE_SIZE);
24760 done:
24761 	if (dktemp != NULL) {
24762 		kmem_free(dktemp, sizeof (struct dk_temperature));
24763 	}
24764 
24765 	return (rval);
24766 }
24767 
24768 
24769 /*
24770  *    Function: sd_log_page_supported
24771  *
24772  * Description: This routine uses sd_send_scsi_LOG_SENSE to find the list of
24773  *		supported log pages.
24774  *
24775  *   Arguments: un -
24776  *		log_page -
24777  *
24778  * Return Code: -1 - on error (log sense is optional and may not be supported).
24779  *		0  - log page not found.
24780  *  		1  - log page found.
24781  */
24782 
24783 static int
24784 sd_log_page_supported(struct sd_lun *un, int log_page)
24785 {
24786 	uchar_t *log_page_data;
24787 	int	i;
24788 	int	match = 0;
24789 	int	log_size;
24790 
24791 	log_page_data = kmem_zalloc(0xFF, KM_SLEEP);
24792 
24793 	if (sd_send_scsi_LOG_SENSE(un, log_page_data, 0xFF, 0, 0x01, 0,
24794 	    SD_PATH_DIRECT) != 0) {
24795 		SD_ERROR(SD_LOG_COMMON, un,
24796 		    "sd_log_page_supported: failed log page retrieval\n");
24797 		kmem_free(log_page_data, 0xFF);
24798 		return (-1);
24799 	}
24800 	log_size = log_page_data[3];
24801 
24802 	/*
24803 	 * The list of supported log pages start from the fourth byte. Check
24804 	 * until we run out of log pages or a match is found.
24805 	 */
24806 	for (i = 4; (i < (log_size + 4)) && !match; i++) {
24807 		if (log_page_data[i] == log_page) {
24808 			match++;
24809 		}
24810 	}
24811 	kmem_free(log_page_data, 0xFF);
24812 	return (match);
24813 }
24814 
24815 
24816 /*
24817  *    Function: sd_mhdioc_failfast
24818  *
24819  * Description: This routine is the driver entry point for handling ioctl
24820  *		requests to enable/disable the multihost failfast option.
24821  *		(MHIOCENFAILFAST)
24822  *
24823  *   Arguments: dev	- the device number
24824  *		arg	- user specified probing interval.
24825  *		flag	- this argument is a pass through to ddi_copyxxx()
24826  *			  directly from the mode argument of ioctl().
24827  *
24828  * Return Code: 0
24829  *		EFAULT
24830  *		ENXIO
24831  */
24832 
24833 static int
24834 sd_mhdioc_failfast(dev_t dev, caddr_t arg, int flag)
24835 {
24836 	struct sd_lun	*un = NULL;
24837 	int		mh_time;
24838 	int		rval = 0;
24839 
24840 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24841 		return (ENXIO);
24842 	}
24843 
24844 	if (ddi_copyin((void *)arg, &mh_time, sizeof (int), flag))
24845 		return (EFAULT);
24846 
24847 	if (mh_time) {
24848 		mutex_enter(SD_MUTEX(un));
24849 		un->un_resvd_status |= SD_FAILFAST;
24850 		mutex_exit(SD_MUTEX(un));
24851 		/*
24852 		 * If mh_time is INT_MAX, then this ioctl is being used for
24853 		 * SCSI-3 PGR purposes, and we don't need to spawn watch thread.
24854 		 */
24855 		if (mh_time != INT_MAX) {
24856 			rval = sd_check_mhd(dev, mh_time);
24857 		}
24858 	} else {
24859 		(void) sd_check_mhd(dev, 0);
24860 		mutex_enter(SD_MUTEX(un));
24861 		un->un_resvd_status &= ~SD_FAILFAST;
24862 		mutex_exit(SD_MUTEX(un));
24863 	}
24864 	return (rval);
24865 }
24866 
24867 
24868 /*
24869  *    Function: sd_mhdioc_takeown
24870  *
24871  * Description: This routine is the driver entry point for handling ioctl
24872  *		requests to forcefully acquire exclusive access rights to the
24873  *		multihost disk (MHIOCTKOWN).
24874  *
24875  *   Arguments: dev	- the device number
24876  *		arg	- user provided structure specifying the delay
24877  *			  parameters in milliseconds
24878  *		flag	- this argument is a pass through to ddi_copyxxx()
24879  *			  directly from the mode argument of ioctl().
24880  *
24881  * Return Code: 0
24882  *		EFAULT
24883  *		ENXIO
24884  */
24885 
24886 static int
24887 sd_mhdioc_takeown(dev_t dev, caddr_t arg, int flag)
24888 {
24889 	struct sd_lun		*un = NULL;
24890 	struct mhioctkown	*tkown = NULL;
24891 	int			rval = 0;
24892 
24893 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24894 		return (ENXIO);
24895 	}
24896 
24897 	if (arg != NULL) {
24898 		tkown = (struct mhioctkown *)
24899 		    kmem_zalloc(sizeof (struct mhioctkown), KM_SLEEP);
24900 		rval = ddi_copyin(arg, tkown, sizeof (struct mhioctkown), flag);
24901 		if (rval != 0) {
24902 			rval = EFAULT;
24903 			goto error;
24904 		}
24905 	}
24906 
24907 	rval = sd_take_ownership(dev, tkown);
24908 	mutex_enter(SD_MUTEX(un));
24909 	if (rval == 0) {
24910 		un->un_resvd_status |= SD_RESERVE;
24911 		if (tkown != NULL && tkown->reinstate_resv_delay != 0) {
24912 			sd_reinstate_resv_delay =
24913 			    tkown->reinstate_resv_delay * 1000;
24914 		} else {
24915 			sd_reinstate_resv_delay = SD_REINSTATE_RESV_DELAY;
24916 		}
24917 		/*
24918 		 * Give the scsi_watch routine interval set by
24919 		 * the MHIOCENFAILFAST ioctl precedence here.
24920 		 */
24921 		if ((un->un_resvd_status & SD_FAILFAST) == 0) {
24922 			mutex_exit(SD_MUTEX(un));
24923 			(void) sd_check_mhd(dev, sd_reinstate_resv_delay/1000);
24924 			SD_TRACE(SD_LOG_IOCTL_MHD, un,
24925 			    "sd_mhdioc_takeown : %d\n",
24926 			    sd_reinstate_resv_delay);
24927 		} else {
24928 			mutex_exit(SD_MUTEX(un));
24929 		}
24930 		(void) scsi_reset_notify(SD_ADDRESS(un), SCSI_RESET_NOTIFY,
24931 		    sd_mhd_reset_notify_cb, (caddr_t)un);
24932 	} else {
24933 		un->un_resvd_status &= ~SD_RESERVE;
24934 		mutex_exit(SD_MUTEX(un));
24935 	}
24936 
24937 error:
24938 	if (tkown != NULL) {
24939 		kmem_free(tkown, sizeof (struct mhioctkown));
24940 	}
24941 	return (rval);
24942 }
24943 
24944 
24945 /*
24946  *    Function: sd_mhdioc_release
24947  *
24948  * Description: This routine is the driver entry point for handling ioctl
24949  *		requests to release exclusive access rights to the multihost
24950  *		disk (MHIOCRELEASE).
24951  *
24952  *   Arguments: dev	- the device number
24953  *
24954  * Return Code: 0
24955  *		ENXIO
24956  */
24957 
24958 static int
24959 sd_mhdioc_release(dev_t dev)
24960 {
24961 	struct sd_lun		*un = NULL;
24962 	timeout_id_t		resvd_timeid_save;
24963 	int			resvd_status_save;
24964 	int			rval = 0;
24965 
24966 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24967 		return (ENXIO);
24968 	}
24969 
24970 	mutex_enter(SD_MUTEX(un));
24971 	resvd_status_save = un->un_resvd_status;
24972 	un->un_resvd_status &=
24973 	    ~(SD_RESERVE | SD_LOST_RESERVE | SD_WANT_RESERVE);
24974 	if (un->un_resvd_timeid) {
24975 		resvd_timeid_save = un->un_resvd_timeid;
24976 		un->un_resvd_timeid = NULL;
24977 		mutex_exit(SD_MUTEX(un));
24978 		(void) untimeout(resvd_timeid_save);
24979 	} else {
24980 		mutex_exit(SD_MUTEX(un));
24981 	}
24982 
24983 	/*
24984 	 * destroy any pending timeout thread that may be attempting to
24985 	 * reinstate reservation on this device.
24986 	 */
24987 	sd_rmv_resv_reclaim_req(dev);
24988 
24989 	if ((rval = sd_reserve_release(dev, SD_RELEASE)) == 0) {
24990 		mutex_enter(SD_MUTEX(un));
24991 		if ((un->un_mhd_token) &&
24992 		    ((un->un_resvd_status & SD_FAILFAST) == 0)) {
24993 			mutex_exit(SD_MUTEX(un));
24994 			(void) sd_check_mhd(dev, 0);
24995 		} else {
24996 			mutex_exit(SD_MUTEX(un));
24997 		}
24998 		(void) scsi_reset_notify(SD_ADDRESS(un), SCSI_RESET_CANCEL,
24999 		    sd_mhd_reset_notify_cb, (caddr_t)un);
25000 	} else {
25001 		/*
25002 		 * sd_mhd_watch_cb will restart the resvd recover timeout thread
25003 		 */
25004 		mutex_enter(SD_MUTEX(un));
25005 		un->un_resvd_status = resvd_status_save;
25006 		mutex_exit(SD_MUTEX(un));
25007 	}
25008 	return (rval);
25009 }
25010 
25011 
25012 /*
25013  *    Function: sd_mhdioc_register_devid
25014  *
25015  * Description: This routine is the driver entry point for handling ioctl
25016  *		requests to register the device id (MHIOCREREGISTERDEVID).
25017  *
25018  *		Note: The implementation for this ioctl has been updated to
25019  *		be consistent with the original PSARC case (1999/357)
25020  *		(4375899, 4241671, 4220005)
25021  *
25022  *   Arguments: dev	- the device number
25023  *
25024  * Return Code: 0
25025  *		ENXIO
25026  */
25027 
25028 static int
25029 sd_mhdioc_register_devid(dev_t dev)
25030 {
25031 	struct sd_lun	*un = NULL;
25032 	int		rval = 0;
25033 
25034 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
25035 		return (ENXIO);
25036 	}
25037 
25038 	ASSERT(!mutex_owned(SD_MUTEX(un)));
25039 
25040 	mutex_enter(SD_MUTEX(un));
25041 
25042 	/* If a devid already exists, de-register it */
25043 	if (un->un_devid != NULL) {
25044 		ddi_devid_unregister(SD_DEVINFO(un));
25045 		/*
25046 		 * After unregister devid, needs to free devid memory
25047 		 */
25048 		ddi_devid_free(un->un_devid);
25049 		un->un_devid = NULL;
25050 	}
25051 
25052 	/* Check for reservation conflict */
25053 	mutex_exit(SD_MUTEX(un));
25054 	rval = sd_send_scsi_TEST_UNIT_READY(un, 0);
25055 	mutex_enter(SD_MUTEX(un));
25056 
25057 	switch (rval) {
25058 	case 0:
25059 		sd_register_devid(un, SD_DEVINFO(un), SD_TARGET_IS_UNRESERVED);
25060 		break;
25061 	case EACCES:
25062 		break;
25063 	default:
25064 		rval = EIO;
25065 	}
25066 
25067 	mutex_exit(SD_MUTEX(un));
25068 	return (rval);
25069 }
25070 
25071 
25072 /*
25073  *    Function: sd_mhdioc_inkeys
25074  *
25075  * Description: This routine is the driver entry point for handling ioctl
25076  *		requests to issue the SCSI-3 Persistent In Read Keys command
25077  *		to the device (MHIOCGRP_INKEYS).
25078  *
25079  *   Arguments: dev	- the device number
25080  *		arg	- user provided in_keys structure
25081  *		flag	- this argument is a pass through to ddi_copyxxx()
25082  *			  directly from the mode argument of ioctl().
25083  *
25084  * Return Code: code returned by sd_persistent_reservation_in_read_keys()
25085  *		ENXIO
25086  *		EFAULT
25087  */
25088 
25089 static int
25090 sd_mhdioc_inkeys(dev_t dev, caddr_t arg, int flag)
25091 {
25092 	struct sd_lun		*un;
25093 	mhioc_inkeys_t		inkeys;
25094 	int			rval = 0;
25095 
25096 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
25097 		return (ENXIO);
25098 	}
25099 
25100 #ifdef _MULTI_DATAMODEL
25101 	switch (ddi_model_convert_from(flag & FMODELS)) {
25102 	case DDI_MODEL_ILP32: {
25103 		struct mhioc_inkeys32	inkeys32;
25104 
25105 		if (ddi_copyin(arg, &inkeys32,
25106 		    sizeof (struct mhioc_inkeys32), flag) != 0) {
25107 			return (EFAULT);
25108 		}
25109 		inkeys.li = (mhioc_key_list_t *)(uintptr_t)inkeys32.li;
25110 		if ((rval = sd_persistent_reservation_in_read_keys(un,
25111 		    &inkeys, flag)) != 0) {
25112 			return (rval);
25113 		}
25114 		inkeys32.generation = inkeys.generation;
25115 		if (ddi_copyout(&inkeys32, arg, sizeof (struct mhioc_inkeys32),
25116 		    flag) != 0) {
25117 			return (EFAULT);
25118 		}
25119 		break;
25120 	}
25121 	case DDI_MODEL_NONE:
25122 		if (ddi_copyin(arg, &inkeys, sizeof (mhioc_inkeys_t),
25123 		    flag) != 0) {
25124 			return (EFAULT);
25125 		}
25126 		if ((rval = sd_persistent_reservation_in_read_keys(un,
25127 		    &inkeys, flag)) != 0) {
25128 			return (rval);
25129 		}
25130 		if (ddi_copyout(&inkeys, arg, sizeof (mhioc_inkeys_t),
25131 		    flag) != 0) {
25132 			return (EFAULT);
25133 		}
25134 		break;
25135 	}
25136 
25137 #else /* ! _MULTI_DATAMODEL */
25138 
25139 	if (ddi_copyin(arg, &inkeys, sizeof (mhioc_inkeys_t), flag) != 0) {
25140 		return (EFAULT);
25141 	}
25142 	rval = sd_persistent_reservation_in_read_keys(un, &inkeys, flag);
25143 	if (rval != 0) {
25144 		return (rval);
25145 	}
25146 	if (ddi_copyout(&inkeys, arg, sizeof (mhioc_inkeys_t), flag) != 0) {
25147 		return (EFAULT);
25148 	}
25149 
25150 #endif /* _MULTI_DATAMODEL */
25151 
25152 	return (rval);
25153 }
25154 
25155 
25156 /*
25157  *    Function: sd_mhdioc_inresv
25158  *
25159  * Description: This routine is the driver entry point for handling ioctl
25160  *		requests to issue the SCSI-3 Persistent In Read Reservations
25161  *		command to the device (MHIOCGRP_INKEYS).
25162  *
25163  *   Arguments: dev	- the device number
25164  *		arg	- user provided in_resv structure
25165  *		flag	- this argument is a pass through to ddi_copyxxx()
25166  *			  directly from the mode argument of ioctl().
25167  *
25168  * Return Code: code returned by sd_persistent_reservation_in_read_resv()
25169  *		ENXIO
25170  *		EFAULT
25171  */
25172 
25173 static int
25174 sd_mhdioc_inresv(dev_t dev, caddr_t arg, int flag)
25175 {
25176 	struct sd_lun		*un;
25177 	mhioc_inresvs_t		inresvs;
25178 	int			rval = 0;
25179 
25180 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
25181 		return (ENXIO);
25182 	}
25183 
25184 #ifdef _MULTI_DATAMODEL
25185 
25186 	switch (ddi_model_convert_from(flag & FMODELS)) {
25187 	case DDI_MODEL_ILP32: {
25188 		struct mhioc_inresvs32	inresvs32;
25189 
25190 		if (ddi_copyin(arg, &inresvs32,
25191 		    sizeof (struct mhioc_inresvs32), flag) != 0) {
25192 			return (EFAULT);
25193 		}
25194 		inresvs.li = (mhioc_resv_desc_list_t *)(uintptr_t)inresvs32.li;
25195 		if ((rval = sd_persistent_reservation_in_read_resv(un,
25196 		    &inresvs, flag)) != 0) {
25197 			return (rval);
25198 		}
25199 		inresvs32.generation = inresvs.generation;
25200 		if (ddi_copyout(&inresvs32, arg,
25201 		    sizeof (struct mhioc_inresvs32), flag) != 0) {
25202 			return (EFAULT);
25203 		}
25204 		break;
25205 	}
25206 	case DDI_MODEL_NONE:
25207 		if (ddi_copyin(arg, &inresvs,
25208 		    sizeof (mhioc_inresvs_t), flag) != 0) {
25209 			return (EFAULT);
25210 		}
25211 		if ((rval = sd_persistent_reservation_in_read_resv(un,
25212 		    &inresvs, flag)) != 0) {
25213 			return (rval);
25214 		}
25215 		if (ddi_copyout(&inresvs, arg,
25216 		    sizeof (mhioc_inresvs_t), flag) != 0) {
25217 			return (EFAULT);
25218 		}
25219 		break;
25220 	}
25221 
25222 #else /* ! _MULTI_DATAMODEL */
25223 
25224 	if (ddi_copyin(arg, &inresvs, sizeof (mhioc_inresvs_t), flag) != 0) {
25225 		return (EFAULT);
25226 	}
25227 	rval = sd_persistent_reservation_in_read_resv(un, &inresvs, flag);
25228 	if (rval != 0) {
25229 		return (rval);
25230 	}
25231 	if (ddi_copyout(&inresvs, arg, sizeof (mhioc_inresvs_t), flag)) {
25232 		return (EFAULT);
25233 	}
25234 
25235 #endif /* ! _MULTI_DATAMODEL */
25236 
25237 	return (rval);
25238 }
25239 
25240 
25241 /*
25242  * The following routines support the clustering functionality described below
25243  * and implement lost reservation reclaim functionality.
25244  *
25245  * Clustering
25246  * ----------
25247  * The clustering code uses two different, independent forms of SCSI
25248  * reservation. Traditional SCSI-2 Reserve/Release and the newer SCSI-3
25249  * Persistent Group Reservations. For any particular disk, it will use either
25250  * SCSI-2 or SCSI-3 PGR but never both at the same time for the same disk.
25251  *
25252  * SCSI-2
25253  * The cluster software takes ownership of a multi-hosted disk by issuing the
25254  * MHIOCTKOWN ioctl to the disk driver. It releases ownership by issuing the
25255  * MHIOCRELEASE ioctl.Closely related is the MHIOCENFAILFAST ioctl -- a cluster,
25256  * just after taking ownership of the disk with the MHIOCTKOWN ioctl then issues
25257  * the MHIOCENFAILFAST ioctl.  This ioctl "enables failfast" in the driver. The
25258  * meaning of failfast is that if the driver (on this host) ever encounters the
25259  * scsi error return code RESERVATION_CONFLICT from the device, it should
25260  * immediately panic the host. The motivation for this ioctl is that if this
25261  * host does encounter reservation conflict, the underlying cause is that some
25262  * other host of the cluster has decided that this host is no longer in the
25263  * cluster and has seized control of the disks for itself. Since this host is no
25264  * longer in the cluster, it ought to panic itself. The MHIOCENFAILFAST ioctl
25265  * does two things:
25266  *	(a) it sets a flag that will cause any returned RESERVATION_CONFLICT
25267  *      error to panic the host
25268  *      (b) it sets up a periodic timer to test whether this host still has
25269  *      "access" (in that no other host has reserved the device):  if the
25270  *      periodic timer gets RESERVATION_CONFLICT, the host is panicked. The
25271  *      purpose of that periodic timer is to handle scenarios where the host is
25272  *      otherwise temporarily quiescent, temporarily doing no real i/o.
25273  * The MHIOCTKOWN ioctl will "break" a reservation that is held by another host,
25274  * by issuing a SCSI Bus Device Reset.  It will then issue a SCSI Reserve for
25275  * the device itself.
25276  *
25277  * SCSI-3 PGR
25278  * A direct semantic implementation of the SCSI-3 Persistent Reservation
25279  * facility is supported through the shared multihost disk ioctls
25280  * (MHIOCGRP_INKEYS, MHIOCGRP_INRESV, MHIOCGRP_REGISTER, MHIOCGRP_RESERVE,
25281  * MHIOCGRP_PREEMPTANDABORT)
25282  *
25283  * Reservation Reclaim:
25284  * --------------------
25285  * To support the lost reservation reclaim operations this driver creates a
25286  * single thread to handle reinstating reservations on all devices that have
25287  * lost reservations sd_resv_reclaim_requests are logged for all devices that
25288  * have LOST RESERVATIONS when the scsi watch facility callsback sd_mhd_watch_cb
25289  * and the reservation reclaim thread loops through the requests to regain the
25290  * lost reservations.
25291  */
25292 
25293 /*
25294  *    Function: sd_check_mhd()
25295  *
25296  * Description: This function sets up and submits a scsi watch request or
25297  *		terminates an existing watch request. This routine is used in
25298  *		support of reservation reclaim.
25299  *
25300  *   Arguments: dev    - the device 'dev_t' is used for context to discriminate
25301  *			 among multiple watches that share the callback function
25302  *		interval - the number of microseconds specifying the watch
25303  *			   interval for issuing TEST UNIT READY commands. If
25304  *			   set to 0 the watch should be terminated. If the
25305  *			   interval is set to 0 and if the device is required
25306  *			   to hold reservation while disabling failfast, the
25307  *			   watch is restarted with an interval of
25308  *			   reinstate_resv_delay.
25309  *
25310  * Return Code: 0	   - Successful submit/terminate of scsi watch request
25311  *		ENXIO      - Indicates an invalid device was specified
25312  *		EAGAIN     - Unable to submit the scsi watch request
25313  */
25314 
25315 static int
25316 sd_check_mhd(dev_t dev, int interval)
25317 {
25318 	struct sd_lun	*un;
25319 	opaque_t	token;
25320 
25321 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
25322 		return (ENXIO);
25323 	}
25324 
25325 	/* is this a watch termination request? */
25326 	if (interval == 0) {
25327 		mutex_enter(SD_MUTEX(un));
25328 		/* if there is an existing watch task then terminate it */
25329 		if (un->un_mhd_token) {
25330 			token = un->un_mhd_token;
25331 			un->un_mhd_token = NULL;
25332 			mutex_exit(SD_MUTEX(un));
25333 			(void) scsi_watch_request_terminate(token,
25334 			    SCSI_WATCH_TERMINATE_WAIT);
25335 			mutex_enter(SD_MUTEX(un));
25336 		} else {
25337 			mutex_exit(SD_MUTEX(un));
25338 			/*
25339 			 * Note: If we return here we don't check for the
25340 			 * failfast case. This is the original legacy
25341 			 * implementation but perhaps we should be checking
25342 			 * the failfast case.
25343 			 */
25344 			return (0);
25345 		}
25346 		/*
25347 		 * If the device is required to hold reservation while
25348 		 * disabling failfast, we need to restart the scsi_watch
25349 		 * routine with an interval of reinstate_resv_delay.
25350 		 */
25351 		if (un->un_resvd_status & SD_RESERVE) {
25352 			interval = sd_reinstate_resv_delay/1000;
25353 		} else {
25354 			/* no failfast so bail */
25355 			mutex_exit(SD_MUTEX(un));
25356 			return (0);
25357 		}
25358 		mutex_exit(SD_MUTEX(un));
25359 	}
25360 
25361 	/*
25362 	 * adjust minimum time interval to 1 second,
25363 	 * and convert from msecs to usecs
25364 	 */
25365 	if (interval > 0 && interval < 1000) {
25366 		interval = 1000;
25367 	}
25368 	interval *= 1000;
25369 
25370 	/*
25371 	 * submit the request to the scsi_watch service
25372 	 */
25373 	token = scsi_watch_request_submit(SD_SCSI_DEVP(un), interval,
25374 	    SENSE_LENGTH, sd_mhd_watch_cb, (caddr_t)dev);
25375 	if (token == NULL) {
25376 		return (EAGAIN);
25377 	}
25378 
25379 	/*
25380 	 * save token for termination later on
25381 	 */
25382 	mutex_enter(SD_MUTEX(un));
25383 	un->un_mhd_token = token;
25384 	mutex_exit(SD_MUTEX(un));
25385 	return (0);
25386 }
25387 
25388 
25389 /*
25390  *    Function: sd_mhd_watch_cb()
25391  *
25392  * Description: This function is the call back function used by the scsi watch
25393  *		facility. The scsi watch facility sends the "Test Unit Ready"
25394  *		and processes the status. If applicable (i.e. a "Unit Attention"
25395  *		status and automatic "Request Sense" not used) the scsi watch
25396  *		facility will send a "Request Sense" and retrieve the sense data
25397  *		to be passed to this callback function. In either case the
25398  *		automatic "Request Sense" or the facility submitting one, this
25399  *		callback is passed the status and sense data.
25400  *
25401  *   Arguments: arg -   the device 'dev_t' is used for context to discriminate
25402  *			among multiple watches that share this callback function
25403  *		resultp - scsi watch facility result packet containing scsi
25404  *			  packet, status byte and sense data
25405  *
25406  * Return Code: 0 - continue the watch task
25407  *		non-zero - terminate the watch task
25408  */
25409 
25410 static int
25411 sd_mhd_watch_cb(caddr_t arg, struct scsi_watch_result *resultp)
25412 {
25413 	struct sd_lun			*un;
25414 	struct scsi_status		*statusp;
25415 	uint8_t				*sensep;
25416 	struct scsi_pkt			*pkt;
25417 	uchar_t				actual_sense_length;
25418 	dev_t  				dev = (dev_t)arg;
25419 
25420 	ASSERT(resultp != NULL);
25421 	statusp			= resultp->statusp;
25422 	sensep			= (uint8_t *)resultp->sensep;
25423 	pkt			= resultp->pkt;
25424 	actual_sense_length	= resultp->actual_sense_length;
25425 
25426 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
25427 		return (ENXIO);
25428 	}
25429 
25430 	SD_TRACE(SD_LOG_IOCTL_MHD, un,
25431 	    "sd_mhd_watch_cb: reason '%s', status '%s'\n",
25432 	    scsi_rname(pkt->pkt_reason), sd_sname(*((unsigned char *)statusp)));
25433 
25434 	/* Begin processing of the status and/or sense data */
25435 	if (pkt->pkt_reason != CMD_CMPLT) {
25436 		/* Handle the incomplete packet */
25437 		sd_mhd_watch_incomplete(un, pkt);
25438 		return (0);
25439 	} else if (*((unsigned char *)statusp) != STATUS_GOOD) {
25440 		if (*((unsigned char *)statusp)
25441 		    == STATUS_RESERVATION_CONFLICT) {
25442 			/*
25443 			 * Handle a reservation conflict by panicking if
25444 			 * configured for failfast or by logging the conflict
25445 			 * and updating the reservation status
25446 			 */
25447 			mutex_enter(SD_MUTEX(un));
25448 			if ((un->un_resvd_status & SD_FAILFAST) &&
25449 			    (sd_failfast_enable)) {
25450 				sd_panic_for_res_conflict(un);
25451 				/*NOTREACHED*/
25452 			}
25453 			SD_INFO(SD_LOG_IOCTL_MHD, un,
25454 			    "sd_mhd_watch_cb: Reservation Conflict\n");
25455 			un->un_resvd_status |= SD_RESERVATION_CONFLICT;
25456 			mutex_exit(SD_MUTEX(un));
25457 		}
25458 	}
25459 
25460 	if (sensep != NULL) {
25461 		if (actual_sense_length >= (SENSE_LENGTH - 2)) {
25462 			mutex_enter(SD_MUTEX(un));
25463 			if ((scsi_sense_asc(sensep) ==
25464 			    SD_SCSI_RESET_SENSE_CODE) &&
25465 			    (un->un_resvd_status & SD_RESERVE)) {
25466 				/*
25467 				 * The additional sense code indicates a power
25468 				 * on or bus device reset has occurred; update
25469 				 * the reservation status.
25470 				 */
25471 				un->un_resvd_status |=
25472 				    (SD_LOST_RESERVE | SD_WANT_RESERVE);
25473 				SD_INFO(SD_LOG_IOCTL_MHD, un,
25474 				    "sd_mhd_watch_cb: Lost Reservation\n");
25475 			}
25476 		} else {
25477 			return (0);
25478 		}
25479 	} else {
25480 		mutex_enter(SD_MUTEX(un));
25481 	}
25482 
25483 	if ((un->un_resvd_status & SD_RESERVE) &&
25484 	    (un->un_resvd_status & SD_LOST_RESERVE)) {
25485 		if (un->un_resvd_status & SD_WANT_RESERVE) {
25486 			/*
25487 			 * A reset occurred in between the last probe and this
25488 			 * one so if a timeout is pending cancel it.
25489 			 */
25490 			if (un->un_resvd_timeid) {
25491 				timeout_id_t temp_id = un->un_resvd_timeid;
25492 				un->un_resvd_timeid = NULL;
25493 				mutex_exit(SD_MUTEX(un));
25494 				(void) untimeout(temp_id);
25495 				mutex_enter(SD_MUTEX(un));
25496 			}
25497 			un->un_resvd_status &= ~SD_WANT_RESERVE;
25498 		}
25499 		if (un->un_resvd_timeid == 0) {
25500 			/* Schedule a timeout to handle the lost reservation */
25501 			un->un_resvd_timeid = timeout(sd_mhd_resvd_recover,
25502 			    (void *)dev,
25503 			    drv_usectohz(sd_reinstate_resv_delay));
25504 		}
25505 	}
25506 	mutex_exit(SD_MUTEX(un));
25507 	return (0);
25508 }
25509 
25510 
25511 /*
25512  *    Function: sd_mhd_watch_incomplete()
25513  *
25514  * Description: This function is used to find out why a scsi pkt sent by the
25515  *		scsi watch facility was not completed. Under some scenarios this
25516  *		routine will return. Otherwise it will send a bus reset to see
25517  *		if the drive is still online.
25518  *
25519  *   Arguments: un  - driver soft state (unit) structure
25520  *		pkt - incomplete scsi pkt
25521  */
25522 
25523 static void
25524 sd_mhd_watch_incomplete(struct sd_lun *un, struct scsi_pkt *pkt)
25525 {
25526 	int	be_chatty;
25527 	int	perr;
25528 
25529 	ASSERT(pkt != NULL);
25530 	ASSERT(un != NULL);
25531 	be_chatty	= (!(pkt->pkt_flags & FLAG_SILENT));
25532 	perr		= (pkt->pkt_statistics & STAT_PERR);
25533 
25534 	mutex_enter(SD_MUTEX(un));
25535 	if (un->un_state == SD_STATE_DUMPING) {
25536 		mutex_exit(SD_MUTEX(un));
25537 		return;
25538 	}
25539 
25540 	switch (pkt->pkt_reason) {
25541 	case CMD_UNX_BUS_FREE:
25542 		/*
25543 		 * If we had a parity error that caused the target to drop BSY*,
25544 		 * don't be chatty about it.
25545 		 */
25546 		if (perr && be_chatty) {
25547 			be_chatty = 0;
25548 		}
25549 		break;
25550 	case CMD_TAG_REJECT:
25551 		/*
25552 		 * The SCSI-2 spec states that a tag reject will be sent by the
25553 		 * target if tagged queuing is not supported. A tag reject may
25554 		 * also be sent during certain initialization periods or to
25555 		 * control internal resources. For the latter case the target
25556 		 * may also return Queue Full.
25557 		 *
25558 		 * If this driver receives a tag reject from a target that is
25559 		 * going through an init period or controlling internal
25560 		 * resources tagged queuing will be disabled. This is a less
25561 		 * than optimal behavior but the driver is unable to determine
25562 		 * the target state and assumes tagged queueing is not supported
25563 		 */
25564 		pkt->pkt_flags = 0;
25565 		un->un_tagflags = 0;
25566 
25567 		if (un->un_f_opt_queueing == TRUE) {
25568 			un->un_throttle = min(un->un_throttle, 3);
25569 		} else {
25570 			un->un_throttle = 1;
25571 		}
25572 		mutex_exit(SD_MUTEX(un));
25573 		(void) scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing", 0, 1);
25574 		mutex_enter(SD_MUTEX(un));
25575 		break;
25576 	case CMD_INCOMPLETE:
25577 		/*
25578 		 * The transport stopped with an abnormal state, fallthrough and
25579 		 * reset the target and/or bus unless selection did not complete
25580 		 * (indicated by STATE_GOT_BUS) in which case we don't want to
25581 		 * go through a target/bus reset
25582 		 */
25583 		if (pkt->pkt_state == STATE_GOT_BUS) {
25584 			break;
25585 		}
25586 		/*FALLTHROUGH*/
25587 
25588 	case CMD_TIMEOUT:
25589 	default:
25590 		/*
25591 		 * The lun may still be running the command, so a lun reset
25592 		 * should be attempted. If the lun reset fails or cannot be
25593 		 * issued, than try a target reset. Lastly try a bus reset.
25594 		 */
25595 		if ((pkt->pkt_statistics &
25596 		    (STAT_BUS_RESET|STAT_DEV_RESET|STAT_ABORTED)) == 0) {
25597 			int reset_retval = 0;
25598 			mutex_exit(SD_MUTEX(un));
25599 			if (un->un_f_allow_bus_device_reset == TRUE) {
25600 				if (un->un_f_lun_reset_enabled == TRUE) {
25601 					reset_retval =
25602 					    scsi_reset(SD_ADDRESS(un),
25603 					    RESET_LUN);
25604 				}
25605 				if (reset_retval == 0) {
25606 					reset_retval =
25607 					    scsi_reset(SD_ADDRESS(un),
25608 					    RESET_TARGET);
25609 				}
25610 			}
25611 			if (reset_retval == 0) {
25612 				(void) scsi_reset(SD_ADDRESS(un), RESET_ALL);
25613 			}
25614 			mutex_enter(SD_MUTEX(un));
25615 		}
25616 		break;
25617 	}
25618 
25619 	/* A device/bus reset has occurred; update the reservation status. */
25620 	if ((pkt->pkt_reason == CMD_RESET) || (pkt->pkt_statistics &
25621 	    (STAT_BUS_RESET | STAT_DEV_RESET))) {
25622 		if ((un->un_resvd_status & SD_RESERVE) == SD_RESERVE) {
25623 			un->un_resvd_status |=
25624 			    (SD_LOST_RESERVE | SD_WANT_RESERVE);
25625 			SD_INFO(SD_LOG_IOCTL_MHD, un,
25626 			    "sd_mhd_watch_incomplete: Lost Reservation\n");
25627 		}
25628 	}
25629 
25630 	/*
25631 	 * The disk has been turned off; Update the device state.
25632 	 *
25633 	 * Note: Should we be offlining the disk here?
25634 	 */
25635 	if (pkt->pkt_state == STATE_GOT_BUS) {
25636 		SD_INFO(SD_LOG_IOCTL_MHD, un, "sd_mhd_watch_incomplete: "
25637 		    "Disk not responding to selection\n");
25638 		if (un->un_state != SD_STATE_OFFLINE) {
25639 			New_state(un, SD_STATE_OFFLINE);
25640 		}
25641 	} else if (be_chatty) {
25642 		/*
25643 		 * suppress messages if they are all the same pkt reason;
25644 		 * with TQ, many (up to 256) are returned with the same
25645 		 * pkt_reason
25646 		 */
25647 		if (pkt->pkt_reason != un->un_last_pkt_reason) {
25648 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
25649 			    "sd_mhd_watch_incomplete: "
25650 			    "SCSI transport failed: reason '%s'\n",
25651 			    scsi_rname(pkt->pkt_reason));
25652 		}
25653 	}
25654 	un->un_last_pkt_reason = pkt->pkt_reason;
25655 	mutex_exit(SD_MUTEX(un));
25656 }
25657 
25658 
25659 /*
25660  *    Function: sd_sname()
25661  *
25662  * Description: This is a simple little routine to return a string containing
25663  *		a printable description of command status byte for use in
25664  *		logging.
25665  *
25666  *   Arguments: status - pointer to a status byte
25667  *
25668  * Return Code: char * - string containing status description.
25669  */
25670 
25671 static char *
25672 sd_sname(uchar_t status)
25673 {
25674 	switch (status & STATUS_MASK) {
25675 	case STATUS_GOOD:
25676 		return ("good status");
25677 	case STATUS_CHECK:
25678 		return ("check condition");
25679 	case STATUS_MET:
25680 		return ("condition met");
25681 	case STATUS_BUSY:
25682 		return ("busy");
25683 	case STATUS_INTERMEDIATE:
25684 		return ("intermediate");
25685 	case STATUS_INTERMEDIATE_MET:
25686 		return ("intermediate - condition met");
25687 	case STATUS_RESERVATION_CONFLICT:
25688 		return ("reservation_conflict");
25689 	case STATUS_TERMINATED:
25690 		return ("command terminated");
25691 	case STATUS_QFULL:
25692 		return ("queue full");
25693 	default:
25694 		return ("<unknown status>");
25695 	}
25696 }
25697 
25698 
25699 /*
25700  *    Function: sd_mhd_resvd_recover()
25701  *
25702  * Description: This function adds a reservation entry to the
25703  *		sd_resv_reclaim_request list and signals the reservation
25704  *		reclaim thread that there is work pending. If the reservation
25705  *		reclaim thread has not been previously created this function
25706  *		will kick it off.
25707  *
25708  *   Arguments: arg -   the device 'dev_t' is used for context to discriminate
25709  *			among multiple watches that share this callback function
25710  *
25711  *     Context: This routine is called by timeout() and is run in interrupt
25712  *		context. It must not sleep or call other functions which may
25713  *		sleep.
25714  */
25715 
25716 static void
25717 sd_mhd_resvd_recover(void *arg)
25718 {
25719 	dev_t			dev = (dev_t)arg;
25720 	struct sd_lun		*un;
25721 	struct sd_thr_request	*sd_treq = NULL;
25722 	struct sd_thr_request	*sd_cur = NULL;
25723 	struct sd_thr_request	*sd_prev = NULL;
25724 	int			already_there = 0;
25725 
25726 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
25727 		return;
25728 	}
25729 
25730 	mutex_enter(SD_MUTEX(un));
25731 	un->un_resvd_timeid = NULL;
25732 	if (un->un_resvd_status & SD_WANT_RESERVE) {
25733 		/*
25734 		 * There was a reset so don't issue the reserve, allow the
25735 		 * sd_mhd_watch_cb callback function to notice this and
25736 		 * reschedule the timeout for reservation.
25737 		 */
25738 		mutex_exit(SD_MUTEX(un));
25739 		return;
25740 	}
25741 	mutex_exit(SD_MUTEX(un));
25742 
25743 	/*
25744 	 * Add this device to the sd_resv_reclaim_request list and the
25745 	 * sd_resv_reclaim_thread should take care of the rest.
25746 	 *
25747 	 * Note: We can't sleep in this context so if the memory allocation
25748 	 * fails allow the sd_mhd_watch_cb callback function to notice this and
25749 	 * reschedule the timeout for reservation.  (4378460)
25750 	 */
25751 	sd_treq = (struct sd_thr_request *)
25752 	    kmem_zalloc(sizeof (struct sd_thr_request), KM_NOSLEEP);
25753 	if (sd_treq == NULL) {
25754 		return;
25755 	}
25756 
25757 	sd_treq->sd_thr_req_next = NULL;
25758 	sd_treq->dev = dev;
25759 	mutex_enter(&sd_tr.srq_resv_reclaim_mutex);
25760 	if (sd_tr.srq_thr_req_head == NULL) {
25761 		sd_tr.srq_thr_req_head = sd_treq;
25762 	} else {
25763 		sd_cur = sd_prev = sd_tr.srq_thr_req_head;
25764 		for (; sd_cur != NULL; sd_cur = sd_cur->sd_thr_req_next) {
25765 			if (sd_cur->dev == dev) {
25766 				/*
25767 				 * already in Queue so don't log
25768 				 * another request for the device
25769 				 */
25770 				already_there = 1;
25771 				break;
25772 			}
25773 			sd_prev = sd_cur;
25774 		}
25775 		if (!already_there) {
25776 			SD_INFO(SD_LOG_IOCTL_MHD, un, "sd_mhd_resvd_recover: "
25777 			    "logging request for %lx\n", dev);
25778 			sd_prev->sd_thr_req_next = sd_treq;
25779 		} else {
25780 			kmem_free(sd_treq, sizeof (struct sd_thr_request));
25781 		}
25782 	}
25783 
25784 	/*
25785 	 * Create a kernel thread to do the reservation reclaim and free up this
25786 	 * thread. We cannot block this thread while we go away to do the
25787 	 * reservation reclaim
25788 	 */
25789 	if (sd_tr.srq_resv_reclaim_thread == NULL)
25790 		sd_tr.srq_resv_reclaim_thread = thread_create(NULL, 0,
25791 		    sd_resv_reclaim_thread, NULL,
25792 		    0, &p0, TS_RUN, v.v_maxsyspri - 2);
25793 
25794 	/* Tell the reservation reclaim thread that it has work to do */
25795 	cv_signal(&sd_tr.srq_resv_reclaim_cv);
25796 	mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
25797 }
25798 
25799 /*
25800  *    Function: sd_resv_reclaim_thread()
25801  *
25802  * Description: This function implements the reservation reclaim operations
25803  *
25804  *   Arguments: arg - the device 'dev_t' is used for context to discriminate
25805  *		      among multiple watches that share this callback function
25806  */
25807 
25808 static void
25809 sd_resv_reclaim_thread()
25810 {
25811 	struct sd_lun		*un;
25812 	struct sd_thr_request	*sd_mhreq;
25813 
25814 	/* Wait for work */
25815 	mutex_enter(&sd_tr.srq_resv_reclaim_mutex);
25816 	if (sd_tr.srq_thr_req_head == NULL) {
25817 		cv_wait(&sd_tr.srq_resv_reclaim_cv,
25818 		    &sd_tr.srq_resv_reclaim_mutex);
25819 	}
25820 
25821 	/* Loop while we have work */
25822 	while ((sd_tr.srq_thr_cur_req = sd_tr.srq_thr_req_head) != NULL) {
25823 		un = ddi_get_soft_state(sd_state,
25824 		    SDUNIT(sd_tr.srq_thr_cur_req->dev));
25825 		if (un == NULL) {
25826 			/*
25827 			 * softstate structure is NULL so just
25828 			 * dequeue the request and continue
25829 			 */
25830 			sd_tr.srq_thr_req_head =
25831 			    sd_tr.srq_thr_cur_req->sd_thr_req_next;
25832 			kmem_free(sd_tr.srq_thr_cur_req,
25833 			    sizeof (struct sd_thr_request));
25834 			continue;
25835 		}
25836 
25837 		/* dequeue the request */
25838 		sd_mhreq = sd_tr.srq_thr_cur_req;
25839 		sd_tr.srq_thr_req_head =
25840 		    sd_tr.srq_thr_cur_req->sd_thr_req_next;
25841 		mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
25842 
25843 		/*
25844 		 * Reclaim reservation only if SD_RESERVE is still set. There
25845 		 * may have been a call to MHIOCRELEASE before we got here.
25846 		 */
25847 		mutex_enter(SD_MUTEX(un));
25848 		if ((un->un_resvd_status & SD_RESERVE) == SD_RESERVE) {
25849 			/*
25850 			 * Note: The SD_LOST_RESERVE flag is cleared before
25851 			 * reclaiming the reservation. If this is done after the
25852 			 * call to sd_reserve_release a reservation loss in the
25853 			 * window between pkt completion of reserve cmd and
25854 			 * mutex_enter below may not be recognized
25855 			 */
25856 			un->un_resvd_status &= ~SD_LOST_RESERVE;
25857 			mutex_exit(SD_MUTEX(un));
25858 
25859 			if (sd_reserve_release(sd_mhreq->dev,
25860 			    SD_RESERVE) == 0) {
25861 				mutex_enter(SD_MUTEX(un));
25862 				un->un_resvd_status |= SD_RESERVE;
25863 				mutex_exit(SD_MUTEX(un));
25864 				SD_INFO(SD_LOG_IOCTL_MHD, un,
25865 				    "sd_resv_reclaim_thread: "
25866 				    "Reservation Recovered\n");
25867 			} else {
25868 				mutex_enter(SD_MUTEX(un));
25869 				un->un_resvd_status |= SD_LOST_RESERVE;
25870 				mutex_exit(SD_MUTEX(un));
25871 				SD_INFO(SD_LOG_IOCTL_MHD, un,
25872 				    "sd_resv_reclaim_thread: Failed "
25873 				    "Reservation Recovery\n");
25874 			}
25875 		} else {
25876 			mutex_exit(SD_MUTEX(un));
25877 		}
25878 		mutex_enter(&sd_tr.srq_resv_reclaim_mutex);
25879 		ASSERT(sd_mhreq == sd_tr.srq_thr_cur_req);
25880 		kmem_free(sd_mhreq, sizeof (struct sd_thr_request));
25881 		sd_mhreq = sd_tr.srq_thr_cur_req = NULL;
25882 		/*
25883 		 * wakeup the destroy thread if anyone is waiting on
25884 		 * us to complete.
25885 		 */
25886 		cv_signal(&sd_tr.srq_inprocess_cv);
25887 		SD_TRACE(SD_LOG_IOCTL_MHD, un,
25888 		    "sd_resv_reclaim_thread: cv_signalling current request \n");
25889 	}
25890 
25891 	/*
25892 	 * cleanup the sd_tr structure now that this thread will not exist
25893 	 */
25894 	ASSERT(sd_tr.srq_thr_req_head == NULL);
25895 	ASSERT(sd_tr.srq_thr_cur_req == NULL);
25896 	sd_tr.srq_resv_reclaim_thread = NULL;
25897 	mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
25898 	thread_exit();
25899 }
25900 
25901 
25902 /*
25903  *    Function: sd_rmv_resv_reclaim_req()
25904  *
25905  * Description: This function removes any pending reservation reclaim requests
25906  *		for the specified device.
25907  *
25908  *   Arguments: dev - the device 'dev_t'
25909  */
25910 
25911 static void
25912 sd_rmv_resv_reclaim_req(dev_t dev)
25913 {
25914 	struct sd_thr_request *sd_mhreq;
25915 	struct sd_thr_request *sd_prev;
25916 
25917 	/* Remove a reservation reclaim request from the list */
25918 	mutex_enter(&sd_tr.srq_resv_reclaim_mutex);
25919 	if (sd_tr.srq_thr_cur_req && sd_tr.srq_thr_cur_req->dev == dev) {
25920 		/*
25921 		 * We are attempting to reinstate reservation for
25922 		 * this device. We wait for sd_reserve_release()
25923 		 * to return before we return.
25924 		 */
25925 		cv_wait(&sd_tr.srq_inprocess_cv,
25926 		    &sd_tr.srq_resv_reclaim_mutex);
25927 	} else {
25928 		sd_prev = sd_mhreq = sd_tr.srq_thr_req_head;
25929 		if (sd_mhreq && sd_mhreq->dev == dev) {
25930 			sd_tr.srq_thr_req_head = sd_mhreq->sd_thr_req_next;
25931 			kmem_free(sd_mhreq, sizeof (struct sd_thr_request));
25932 			mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
25933 			return;
25934 		}
25935 		for (; sd_mhreq != NULL; sd_mhreq = sd_mhreq->sd_thr_req_next) {
25936 			if (sd_mhreq && sd_mhreq->dev == dev) {
25937 				break;
25938 			}
25939 			sd_prev = sd_mhreq;
25940 		}
25941 		if (sd_mhreq != NULL) {
25942 			sd_prev->sd_thr_req_next = sd_mhreq->sd_thr_req_next;
25943 			kmem_free(sd_mhreq, sizeof (struct sd_thr_request));
25944 		}
25945 	}
25946 	mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
25947 }
25948 
25949 
25950 /*
25951  *    Function: sd_mhd_reset_notify_cb()
25952  *
25953  * Description: This is a call back function for scsi_reset_notify. This
25954  *		function updates the softstate reserved status and logs the
25955  *		reset. The driver scsi watch facility callback function
25956  *		(sd_mhd_watch_cb) and reservation reclaim thread functionality
25957  *		will reclaim the reservation.
25958  *
25959  *   Arguments: arg  - driver soft state (unit) structure
25960  */
25961 
25962 static void
25963 sd_mhd_reset_notify_cb(caddr_t arg)
25964 {
25965 	struct sd_lun *un = (struct sd_lun *)arg;
25966 
25967 	mutex_enter(SD_MUTEX(un));
25968 	if ((un->un_resvd_status & SD_RESERVE) == SD_RESERVE) {
25969 		un->un_resvd_status |= (SD_LOST_RESERVE | SD_WANT_RESERVE);
25970 		SD_INFO(SD_LOG_IOCTL_MHD, un,
25971 		    "sd_mhd_reset_notify_cb: Lost Reservation\n");
25972 	}
25973 	mutex_exit(SD_MUTEX(un));
25974 }
25975 
25976 
25977 /*
25978  *    Function: sd_take_ownership()
25979  *
25980  * Description: This routine implements an algorithm to achieve a stable
25981  *		reservation on disks which don't implement priority reserve,
25982  *		and makes sure that other host lose re-reservation attempts.
25983  *		This algorithm contains of a loop that keeps issuing the RESERVE
25984  *		for some period of time (min_ownership_delay, default 6 seconds)
25985  *		During that loop, it looks to see if there has been a bus device
25986  *		reset or bus reset (both of which cause an existing reservation
25987  *		to be lost). If the reservation is lost issue RESERVE until a
25988  *		period of min_ownership_delay with no resets has gone by, or
25989  *		until max_ownership_delay has expired. This loop ensures that
25990  *		the host really did manage to reserve the device, in spite of
25991  *		resets. The looping for min_ownership_delay (default six
25992  *		seconds) is important to early generation clustering products,
25993  *		Solstice HA 1.x and Sun Cluster 2.x. Those products use an
25994  *		MHIOCENFAILFAST periodic timer of two seconds. By having
25995  *		MHIOCTKOWN issue Reserves in a loop for six seconds, and having
25996  *		MHIOCENFAILFAST poll every two seconds, the idea is that by the
25997  *		time the MHIOCTKOWN ioctl returns, the other host (if any) will
25998  *		have already noticed, via the MHIOCENFAILFAST polling, that it
25999  *		no longer "owns" the disk and will have panicked itself.  Thus,
26000  *		the host issuing the MHIOCTKOWN is assured (with timing
26001  *		dependencies) that by the time it actually starts to use the
26002  *		disk for real work, the old owner is no longer accessing it.
26003  *
26004  *		min_ownership_delay is the minimum amount of time for which the
26005  *		disk must be reserved continuously devoid of resets before the
26006  *		MHIOCTKOWN ioctl will return success.
26007  *
26008  *		max_ownership_delay indicates the amount of time by which the
26009  *		take ownership should succeed or timeout with an error.
26010  *
26011  *   Arguments: dev - the device 'dev_t'
26012  *		*p  - struct containing timing info.
26013  *
26014  * Return Code: 0 for success or error code
26015  */
26016 
26017 static int
26018 sd_take_ownership(dev_t dev, struct mhioctkown *p)
26019 {
26020 	struct sd_lun	*un;
26021 	int		rval;
26022 	int		err;
26023 	int		reservation_count   = 0;
26024 	int		min_ownership_delay =  6000000; /* in usec */
26025 	int		max_ownership_delay = 30000000; /* in usec */
26026 	clock_t		start_time;	/* starting time of this algorithm */
26027 	clock_t		end_time;	/* time limit for giving up */
26028 	clock_t		ownership_time;	/* time limit for stable ownership */
26029 	clock_t		current_time;
26030 	clock_t		previous_current_time;
26031 
26032 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
26033 		return (ENXIO);
26034 	}
26035 
26036 	/*
26037 	 * Attempt a device reservation. A priority reservation is requested.
26038 	 */
26039 	if ((rval = sd_reserve_release(dev, SD_PRIORITY_RESERVE))
26040 	    != SD_SUCCESS) {
26041 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
26042 		    "sd_take_ownership: return(1)=%d\n", rval);
26043 		return (rval);
26044 	}
26045 
26046 	/* Update the softstate reserved status to indicate the reservation */
26047 	mutex_enter(SD_MUTEX(un));
26048 	un->un_resvd_status |= SD_RESERVE;
26049 	un->un_resvd_status &=
26050 	    ~(SD_LOST_RESERVE | SD_WANT_RESERVE | SD_RESERVATION_CONFLICT);
26051 	mutex_exit(SD_MUTEX(un));
26052 
26053 	if (p != NULL) {
26054 		if (p->min_ownership_delay != 0) {
26055 			min_ownership_delay = p->min_ownership_delay * 1000;
26056 		}
26057 		if (p->max_ownership_delay != 0) {
26058 			max_ownership_delay = p->max_ownership_delay * 1000;
26059 		}
26060 	}
26061 	SD_INFO(SD_LOG_IOCTL_MHD, un,
26062 	    "sd_take_ownership: min, max delays: %d, %d\n",
26063 	    min_ownership_delay, max_ownership_delay);
26064 
26065 	start_time = ddi_get_lbolt();
26066 	current_time	= start_time;
26067 	ownership_time	= current_time + drv_usectohz(min_ownership_delay);
26068 	end_time	= start_time + drv_usectohz(max_ownership_delay);
26069 
26070 	while (current_time - end_time < 0) {
26071 		delay(drv_usectohz(500000));
26072 
26073 		if ((err = sd_reserve_release(dev, SD_RESERVE)) != 0) {
26074 			if ((sd_reserve_release(dev, SD_RESERVE)) != 0) {
26075 				mutex_enter(SD_MUTEX(un));
26076 				rval = (un->un_resvd_status &
26077 				    SD_RESERVATION_CONFLICT) ? EACCES : EIO;
26078 				mutex_exit(SD_MUTEX(un));
26079 				break;
26080 			}
26081 		}
26082 		previous_current_time = current_time;
26083 		current_time = ddi_get_lbolt();
26084 		mutex_enter(SD_MUTEX(un));
26085 		if (err || (un->un_resvd_status & SD_LOST_RESERVE)) {
26086 			ownership_time = ddi_get_lbolt() +
26087 			    drv_usectohz(min_ownership_delay);
26088 			reservation_count = 0;
26089 		} else {
26090 			reservation_count++;
26091 		}
26092 		un->un_resvd_status |= SD_RESERVE;
26093 		un->un_resvd_status &= ~(SD_LOST_RESERVE | SD_WANT_RESERVE);
26094 		mutex_exit(SD_MUTEX(un));
26095 
26096 		SD_INFO(SD_LOG_IOCTL_MHD, un,
26097 		    "sd_take_ownership: ticks for loop iteration=%ld, "
26098 		    "reservation=%s\n", (current_time - previous_current_time),
26099 		    reservation_count ? "ok" : "reclaimed");
26100 
26101 		if (current_time - ownership_time >= 0 &&
26102 		    reservation_count >= 4) {
26103 			rval = 0; /* Achieved a stable ownership */
26104 			break;
26105 		}
26106 		if (current_time - end_time >= 0) {
26107 			rval = EACCES; /* No ownership in max possible time */
26108 			break;
26109 		}
26110 	}
26111 	SD_TRACE(SD_LOG_IOCTL_MHD, un,
26112 	    "sd_take_ownership: return(2)=%d\n", rval);
26113 	return (rval);
26114 }
26115 
26116 
26117 /*
26118  *    Function: sd_reserve_release()
26119  *
26120  * Description: This function builds and sends scsi RESERVE, RELEASE, and
26121  *		PRIORITY RESERVE commands based on a user specified command type
26122  *
26123  *   Arguments: dev - the device 'dev_t'
26124  *		cmd - user specified command type; one of SD_PRIORITY_RESERVE,
26125  *		      SD_RESERVE, SD_RELEASE
26126  *
26127  * Return Code: 0 or Error Code
26128  */
26129 
26130 static int
26131 sd_reserve_release(dev_t dev, int cmd)
26132 {
26133 	struct uscsi_cmd	*com = NULL;
26134 	struct sd_lun		*un = NULL;
26135 	char			cdb[CDB_GROUP0];
26136 	int			rval;
26137 
26138 	ASSERT((cmd == SD_RELEASE) || (cmd == SD_RESERVE) ||
26139 	    (cmd == SD_PRIORITY_RESERVE));
26140 
26141 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
26142 		return (ENXIO);
26143 	}
26144 
26145 	/* instantiate and initialize the command and cdb */
26146 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
26147 	bzero(cdb, CDB_GROUP0);
26148 	com->uscsi_flags   = USCSI_SILENT;
26149 	com->uscsi_timeout = un->un_reserve_release_time;
26150 	com->uscsi_cdblen  = CDB_GROUP0;
26151 	com->uscsi_cdb	   = cdb;
26152 	if (cmd == SD_RELEASE) {
26153 		cdb[0] = SCMD_RELEASE;
26154 	} else {
26155 		cdb[0] = SCMD_RESERVE;
26156 	}
26157 
26158 	/* Send the command. */
26159 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
26160 	    UIO_SYSSPACE, SD_PATH_STANDARD);
26161 
26162 	/*
26163 	 * "break" a reservation that is held by another host, by issuing a
26164 	 * reset if priority reserve is desired, and we could not get the
26165 	 * device.
26166 	 */
26167 	if ((cmd == SD_PRIORITY_RESERVE) &&
26168 	    (rval != 0) && (com->uscsi_status == STATUS_RESERVATION_CONFLICT)) {
26169 		/*
26170 		 * First try to reset the LUN. If we cannot, then try a target
26171 		 * reset, followed by a bus reset if the target reset fails.
26172 		 */
26173 		int reset_retval = 0;
26174 		if (un->un_f_lun_reset_enabled == TRUE) {
26175 			reset_retval = scsi_reset(SD_ADDRESS(un), RESET_LUN);
26176 		}
26177 		if (reset_retval == 0) {
26178 			/* The LUN reset either failed or was not issued */
26179 			reset_retval = scsi_reset(SD_ADDRESS(un), RESET_TARGET);
26180 		}
26181 		if ((reset_retval == 0) &&
26182 		    (scsi_reset(SD_ADDRESS(un), RESET_ALL) == 0)) {
26183 			rval = EIO;
26184 			kmem_free(com, sizeof (*com));
26185 			return (rval);
26186 		}
26187 
26188 		bzero(com, sizeof (struct uscsi_cmd));
26189 		com->uscsi_flags   = USCSI_SILENT;
26190 		com->uscsi_cdb	   = cdb;
26191 		com->uscsi_cdblen  = CDB_GROUP0;
26192 		com->uscsi_timeout = 5;
26193 
26194 		/*
26195 		 * Reissue the last reserve command, this time without request
26196 		 * sense.  Assume that it is just a regular reserve command.
26197 		 */
26198 		rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
26199 		    UIO_SYSSPACE, SD_PATH_STANDARD);
26200 	}
26201 
26202 	/* Return an error if still getting a reservation conflict. */
26203 	if ((rval != 0) && (com->uscsi_status == STATUS_RESERVATION_CONFLICT)) {
26204 		rval = EACCES;
26205 	}
26206 
26207 	kmem_free(com, sizeof (*com));
26208 	return (rval);
26209 }
26210 
26211 
26212 #define	SD_NDUMP_RETRIES	12
26213 /*
26214  *	System Crash Dump routine
26215  */
26216 
26217 static int
26218 sddump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk)
26219 {
26220 	int		instance;
26221 	int		partition;
26222 	int		i;
26223 	int		err;
26224 	struct sd_lun	*un;
26225 	struct dk_map	*lp;
26226 	struct scsi_pkt *wr_pktp;
26227 	struct buf	*wr_bp;
26228 	struct buf	wr_buf;
26229 	daddr_t		tgt_byte_offset; /* rmw - byte offset for target */
26230 	daddr_t		tgt_blkno;	/* rmw - blkno for target */
26231 	size_t		tgt_byte_count; /* rmw -  # of bytes to xfer */
26232 	size_t		tgt_nblk; /* rmw -  # of tgt blks to xfer */
26233 	size_t		io_start_offset;
26234 	int		doing_rmw = FALSE;
26235 	int		rval;
26236 #if defined(__i386) || defined(__amd64)
26237 	ssize_t dma_resid;
26238 	daddr_t oblkno;
26239 #endif
26240 
26241 	instance = SDUNIT(dev);
26242 	if (((un = ddi_get_soft_state(sd_state, instance)) == NULL) ||
26243 	    (!un->un_f_geometry_is_valid) || ISCD(un)) {
26244 		return (ENXIO);
26245 	}
26246 
26247 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*un))
26248 
26249 	SD_TRACE(SD_LOG_DUMP, un, "sddump: entry\n");
26250 
26251 	partition = SDPART(dev);
26252 	SD_INFO(SD_LOG_DUMP, un, "sddump: partition = %d\n", partition);
26253 
26254 	/* Validate blocks to dump at against partition size. */
26255 	lp = &un->un_map[partition];
26256 	if ((blkno + nblk) > lp->dkl_nblk) {
26257 		SD_TRACE(SD_LOG_DUMP, un,
26258 		    "sddump: dump range larger than partition: "
26259 		    "blkno = 0x%x, nblk = 0x%x, dkl_nblk = 0x%x\n",
26260 		    blkno, nblk, lp->dkl_nblk);
26261 		return (EINVAL);
26262 	}
26263 
26264 	mutex_enter(&un->un_pm_mutex);
26265 	if (SD_DEVICE_IS_IN_LOW_POWER(un)) {
26266 		struct scsi_pkt *start_pktp;
26267 
26268 		mutex_exit(&un->un_pm_mutex);
26269 
26270 		/*
26271 		 * use pm framework to power on HBA 1st
26272 		 */
26273 		(void) pm_raise_power(SD_DEVINFO(un), 0, SD_SPINDLE_ON);
26274 
26275 		/*
26276 		 * Dump no long uses sdpower to power on a device, it's
26277 		 * in-line here so it can be done in polled mode.
26278 		 */
26279 
26280 		SD_INFO(SD_LOG_DUMP, un, "sddump: starting device\n");
26281 
26282 		start_pktp = scsi_init_pkt(SD_ADDRESS(un), NULL, NULL,
26283 		    CDB_GROUP0, un->un_status_len, 0, 0, NULL_FUNC, NULL);
26284 
26285 		if (start_pktp == NULL) {
26286 			/* We were not given a SCSI packet, fail. */
26287 			return (EIO);
26288 		}
26289 		bzero(start_pktp->pkt_cdbp, CDB_GROUP0);
26290 		start_pktp->pkt_cdbp[0] = SCMD_START_STOP;
26291 		start_pktp->pkt_cdbp[4] = SD_TARGET_START;
26292 		start_pktp->pkt_flags = FLAG_NOINTR;
26293 
26294 		mutex_enter(SD_MUTEX(un));
26295 		SD_FILL_SCSI1_LUN(un, start_pktp);
26296 		mutex_exit(SD_MUTEX(un));
26297 		/*
26298 		 * Scsi_poll returns 0 (success) if the command completes and
26299 		 * the status block is STATUS_GOOD.
26300 		 */
26301 		if (sd_scsi_poll(un, start_pktp) != 0) {
26302 			scsi_destroy_pkt(start_pktp);
26303 			return (EIO);
26304 		}
26305 		scsi_destroy_pkt(start_pktp);
26306 		(void) sd_ddi_pm_resume(un);
26307 	} else {
26308 		mutex_exit(&un->un_pm_mutex);
26309 	}
26310 
26311 	mutex_enter(SD_MUTEX(un));
26312 	un->un_throttle = 0;
26313 
26314 	/*
26315 	 * The first time through, reset the specific target device.
26316 	 * However, when cpr calls sddump we know that sd is in a
26317 	 * a good state so no bus reset is required.
26318 	 * Clear sense data via Request Sense cmd.
26319 	 * In sddump we don't care about allow_bus_device_reset anymore
26320 	 */
26321 
26322 	if ((un->un_state != SD_STATE_SUSPENDED) &&
26323 	    (un->un_state != SD_STATE_DUMPING)) {
26324 
26325 		New_state(un, SD_STATE_DUMPING);
26326 
26327 		if (un->un_f_is_fibre == FALSE) {
26328 			mutex_exit(SD_MUTEX(un));
26329 			/*
26330 			 * Attempt a bus reset for parallel scsi.
26331 			 *
26332 			 * Note: A bus reset is required because on some host
26333 			 * systems (i.e. E420R) a bus device reset is
26334 			 * insufficient to reset the state of the target.
26335 			 *
26336 			 * Note: Don't issue the reset for fibre-channel,
26337 			 * because this tends to hang the bus (loop) for
26338 			 * too long while everyone is logging out and in
26339 			 * and the deadman timer for dumping will fire
26340 			 * before the dump is complete.
26341 			 */
26342 			if (scsi_reset(SD_ADDRESS(un), RESET_ALL) == 0) {
26343 				mutex_enter(SD_MUTEX(un));
26344 				Restore_state(un);
26345 				mutex_exit(SD_MUTEX(un));
26346 				return (EIO);
26347 			}
26348 
26349 			/* Delay to give the device some recovery time. */
26350 			drv_usecwait(10000);
26351 
26352 			if (sd_send_polled_RQS(un) == SD_FAILURE) {
26353 				SD_INFO(SD_LOG_DUMP, un,
26354 					"sddump: sd_send_polled_RQS failed\n");
26355 			}
26356 			mutex_enter(SD_MUTEX(un));
26357 		}
26358 	}
26359 
26360 	/*
26361 	 * Convert the partition-relative block number to a
26362 	 * disk physical block number.
26363 	 */
26364 	blkno += un->un_offset[partition];
26365 	SD_INFO(SD_LOG_DUMP, un, "sddump: disk blkno = 0x%x\n", blkno);
26366 
26367 
26368 	/*
26369 	 * Check if the device has a non-512 block size.
26370 	 */
26371 	wr_bp = NULL;
26372 	if (NOT_DEVBSIZE(un)) {
26373 		tgt_byte_offset = blkno * un->un_sys_blocksize;
26374 		tgt_byte_count = nblk * un->un_sys_blocksize;
26375 		if ((tgt_byte_offset % un->un_tgt_blocksize) ||
26376 		    (tgt_byte_count % un->un_tgt_blocksize)) {
26377 			doing_rmw = TRUE;
26378 			/*
26379 			 * Calculate the block number and number of block
26380 			 * in terms of the media block size.
26381 			 */
26382 			tgt_blkno = tgt_byte_offset / un->un_tgt_blocksize;
26383 			tgt_nblk =
26384 			    ((tgt_byte_offset + tgt_byte_count +
26385 				(un->un_tgt_blocksize - 1)) /
26386 				un->un_tgt_blocksize) - tgt_blkno;
26387 
26388 			/*
26389 			 * Invoke the routine which is going to do read part
26390 			 * of read-modify-write.
26391 			 * Note that this routine returns a pointer to
26392 			 * a valid bp in wr_bp.
26393 			 */
26394 			err = sddump_do_read_of_rmw(un, tgt_blkno, tgt_nblk,
26395 			    &wr_bp);
26396 			if (err) {
26397 				mutex_exit(SD_MUTEX(un));
26398 				return (err);
26399 			}
26400 			/*
26401 			 * Offset is being calculated as -
26402 			 * (original block # * system block size) -
26403 			 * (new block # * target block size)
26404 			 */
26405 			io_start_offset =
26406 			    ((uint64_t)(blkno * un->un_sys_blocksize)) -
26407 			    ((uint64_t)(tgt_blkno * un->un_tgt_blocksize));
26408 
26409 			ASSERT((io_start_offset >= 0) &&
26410 			    (io_start_offset < un->un_tgt_blocksize));
26411 			/*
26412 			 * Do the modify portion of read modify write.
26413 			 */
26414 			bcopy(addr, &wr_bp->b_un.b_addr[io_start_offset],
26415 			    (size_t)nblk * un->un_sys_blocksize);
26416 		} else {
26417 			doing_rmw = FALSE;
26418 			tgt_blkno = tgt_byte_offset / un->un_tgt_blocksize;
26419 			tgt_nblk = tgt_byte_count / un->un_tgt_blocksize;
26420 		}
26421 
26422 		/* Convert blkno and nblk to target blocks */
26423 		blkno = tgt_blkno;
26424 		nblk = tgt_nblk;
26425 	} else {
26426 		wr_bp = &wr_buf;
26427 		bzero(wr_bp, sizeof (struct buf));
26428 		wr_bp->b_flags		= B_BUSY;
26429 		wr_bp->b_un.b_addr	= addr;
26430 		wr_bp->b_bcount		= nblk << DEV_BSHIFT;
26431 		wr_bp->b_resid		= 0;
26432 	}
26433 
26434 	mutex_exit(SD_MUTEX(un));
26435 
26436 	/*
26437 	 * Obtain a SCSI packet for the write command.
26438 	 * It should be safe to call the allocator here without
26439 	 * worrying about being locked for DVMA mapping because
26440 	 * the address we're passed is already a DVMA mapping
26441 	 *
26442 	 * We are also not going to worry about semaphore ownership
26443 	 * in the dump buffer. Dumping is single threaded at present.
26444 	 */
26445 
26446 	wr_pktp = NULL;
26447 
26448 #if defined(__i386) || defined(__amd64)
26449 	dma_resid = wr_bp->b_bcount;
26450 	oblkno = blkno;
26451 	while (dma_resid != 0) {
26452 #endif
26453 
26454 	for (i = 0; i < SD_NDUMP_RETRIES; i++) {
26455 		wr_bp->b_flags &= ~B_ERROR;
26456 
26457 #if defined(__i386) || defined(__amd64)
26458 		blkno = oblkno +
26459 			((wr_bp->b_bcount - dma_resid) /
26460 			    un->un_tgt_blocksize);
26461 		nblk = dma_resid / un->un_tgt_blocksize;
26462 
26463 		if (wr_pktp) {
26464 			/* Partial DMA transfers after initial transfer */
26465 			rval = sd_setup_next_rw_pkt(un, wr_pktp, wr_bp,
26466 			    blkno, nblk);
26467 		} else {
26468 			/* Initial transfer */
26469 			rval = sd_setup_rw_pkt(un, &wr_pktp, wr_bp,
26470 			    un->un_pkt_flags, NULL_FUNC, NULL,
26471 			    blkno, nblk);
26472 		}
26473 #else
26474 		rval = sd_setup_rw_pkt(un, &wr_pktp, wr_bp,
26475 		    0, NULL_FUNC, NULL, blkno, nblk);
26476 #endif
26477 
26478 		if (rval == 0) {
26479 			/* We were given a SCSI packet, continue. */
26480 			break;
26481 		}
26482 
26483 		if (i == 0) {
26484 			if (wr_bp->b_flags & B_ERROR) {
26485 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
26486 				    "no resources for dumping; "
26487 				    "error code: 0x%x, retrying",
26488 				    geterror(wr_bp));
26489 			} else {
26490 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
26491 				    "no resources for dumping; retrying");
26492 			}
26493 		} else if (i != (SD_NDUMP_RETRIES - 1)) {
26494 			if (wr_bp->b_flags & B_ERROR) {
26495 				scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
26496 				    "no resources for dumping; error code: "
26497 				    "0x%x, retrying\n", geterror(wr_bp));
26498 			}
26499 		} else {
26500 			if (wr_bp->b_flags & B_ERROR) {
26501 				scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
26502 				    "no resources for dumping; "
26503 				    "error code: 0x%x, retries failed, "
26504 				    "giving up.\n", geterror(wr_bp));
26505 			} else {
26506 				scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
26507 				    "no resources for dumping; "
26508 				    "retries failed, giving up.\n");
26509 			}
26510 			mutex_enter(SD_MUTEX(un));
26511 			Restore_state(un);
26512 			if (NOT_DEVBSIZE(un) && (doing_rmw == TRUE)) {
26513 				mutex_exit(SD_MUTEX(un));
26514 				scsi_free_consistent_buf(wr_bp);
26515 			} else {
26516 				mutex_exit(SD_MUTEX(un));
26517 			}
26518 			return (EIO);
26519 		}
26520 		drv_usecwait(10000);
26521 	}
26522 
26523 #if defined(__i386) || defined(__amd64)
26524 	/*
26525 	 * save the resid from PARTIAL_DMA
26526 	 */
26527 	dma_resid = wr_pktp->pkt_resid;
26528 	if (dma_resid != 0)
26529 		nblk -= SD_BYTES2TGTBLOCKS(un, dma_resid);
26530 	wr_pktp->pkt_resid = 0;
26531 #endif
26532 
26533 	/* SunBug 1222170 */
26534 	wr_pktp->pkt_flags = FLAG_NOINTR;
26535 
26536 	err = EIO;
26537 	for (i = 0; i < SD_NDUMP_RETRIES; i++) {
26538 
26539 		/*
26540 		 * Scsi_poll returns 0 (success) if the command completes and
26541 		 * the status block is STATUS_GOOD.  We should only check
26542 		 * errors if this condition is not true.  Even then we should
26543 		 * send our own request sense packet only if we have a check
26544 		 * condition and auto request sense has not been performed by
26545 		 * the hba.
26546 		 */
26547 		SD_TRACE(SD_LOG_DUMP, un, "sddump: sending write\n");
26548 
26549 		if ((sd_scsi_poll(un, wr_pktp) == 0) &&
26550 		    (wr_pktp->pkt_resid == 0)) {
26551 			err = SD_SUCCESS;
26552 			break;
26553 		}
26554 
26555 		/*
26556 		 * Check CMD_DEV_GONE 1st, give up if device is gone.
26557 		 */
26558 		if (wr_pktp->pkt_reason == CMD_DEV_GONE) {
26559 			scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
26560 			    "Device is gone\n");
26561 			break;
26562 		}
26563 
26564 		if (SD_GET_PKT_STATUS(wr_pktp) == STATUS_CHECK) {
26565 			SD_INFO(SD_LOG_DUMP, un,
26566 			    "sddump: write failed with CHECK, try # %d\n", i);
26567 			if (((wr_pktp->pkt_state & STATE_ARQ_DONE) == 0)) {
26568 				(void) sd_send_polled_RQS(un);
26569 			}
26570 
26571 			continue;
26572 		}
26573 
26574 		if (SD_GET_PKT_STATUS(wr_pktp) == STATUS_BUSY) {
26575 			int reset_retval = 0;
26576 
26577 			SD_INFO(SD_LOG_DUMP, un,
26578 			    "sddump: write failed with BUSY, try # %d\n", i);
26579 
26580 			if (un->un_f_lun_reset_enabled == TRUE) {
26581 				reset_retval = scsi_reset(SD_ADDRESS(un),
26582 				    RESET_LUN);
26583 			}
26584 			if (reset_retval == 0) {
26585 				(void) scsi_reset(SD_ADDRESS(un), RESET_TARGET);
26586 			}
26587 			(void) sd_send_polled_RQS(un);
26588 
26589 		} else {
26590 			SD_INFO(SD_LOG_DUMP, un,
26591 			    "sddump: write failed with 0x%x, try # %d\n",
26592 			    SD_GET_PKT_STATUS(wr_pktp), i);
26593 			mutex_enter(SD_MUTEX(un));
26594 			sd_reset_target(un, wr_pktp);
26595 			mutex_exit(SD_MUTEX(un));
26596 		}
26597 
26598 		/*
26599 		 * If we are not getting anywhere with lun/target resets,
26600 		 * let's reset the bus.
26601 		 */
26602 		if (i == SD_NDUMP_RETRIES/2) {
26603 			(void) scsi_reset(SD_ADDRESS(un), RESET_ALL);
26604 			(void) sd_send_polled_RQS(un);
26605 		}
26606 
26607 	}
26608 #if defined(__i386) || defined(__amd64)
26609 	}	/* dma_resid */
26610 #endif
26611 
26612 	scsi_destroy_pkt(wr_pktp);
26613 	mutex_enter(SD_MUTEX(un));
26614 	if ((NOT_DEVBSIZE(un)) && (doing_rmw == TRUE)) {
26615 		mutex_exit(SD_MUTEX(un));
26616 		scsi_free_consistent_buf(wr_bp);
26617 	} else {
26618 		mutex_exit(SD_MUTEX(un));
26619 	}
26620 	SD_TRACE(SD_LOG_DUMP, un, "sddump: exit: err = %d\n", err);
26621 	return (err);
26622 }
26623 
26624 /*
26625  *    Function: sd_scsi_poll()
26626  *
26627  * Description: This is a wrapper for the scsi_poll call.
26628  *
26629  *   Arguments: sd_lun - The unit structure
26630  *              scsi_pkt - The scsi packet being sent to the device.
26631  *
26632  * Return Code: 0 - Command completed successfully with good status
26633  *             -1 - Command failed.  This could indicate a check condition
26634  *                  or other status value requiring recovery action.
26635  *
26636  */
26637 
26638 static int
26639 sd_scsi_poll(struct sd_lun *un, struct scsi_pkt *pktp)
26640 {
26641 	int status;
26642 
26643 	ASSERT(un != NULL);
26644 	ASSERT(!mutex_owned(SD_MUTEX(un)));
26645 	ASSERT(pktp != NULL);
26646 
26647 	status = SD_SUCCESS;
26648 
26649 	if (scsi_ifgetcap(&pktp->pkt_address, "tagged-qing", 1) == 1) {
26650 		pktp->pkt_flags |= un->un_tagflags;
26651 		pktp->pkt_flags &= ~FLAG_NODISCON;
26652 	}
26653 
26654 	status = sd_ddi_scsi_poll(pktp);
26655 	/*
26656 	 * Scsi_poll returns 0 (success) if the command completes and the
26657 	 * status block is STATUS_GOOD.  We should only check errors if this
26658 	 * condition is not true.  Even then we should send our own request
26659 	 * sense packet only if we have a check condition and auto
26660 	 * request sense has not been performed by the hba.
26661 	 * Don't get RQS data if pkt_reason is CMD_DEV_GONE.
26662 	 */
26663 	if ((status != SD_SUCCESS) &&
26664 	    (SD_GET_PKT_STATUS(pktp) == STATUS_CHECK) &&
26665 	    (pktp->pkt_state & STATE_ARQ_DONE) == 0 &&
26666 	    (pktp->pkt_reason != CMD_DEV_GONE))
26667 		(void) sd_send_polled_RQS(un);
26668 
26669 	return (status);
26670 }
26671 
26672 /*
26673  *    Function: sd_send_polled_RQS()
26674  *
26675  * Description: This sends the request sense command to a device.
26676  *
26677  *   Arguments: sd_lun - The unit structure
26678  *
26679  * Return Code: 0 - Command completed successfully with good status
26680  *             -1 - Command failed.
26681  *
26682  */
26683 
26684 static int
26685 sd_send_polled_RQS(struct sd_lun *un)
26686 {
26687 	int	ret_val;
26688 	struct	scsi_pkt	*rqs_pktp;
26689 	struct	buf		*rqs_bp;
26690 
26691 	ASSERT(un != NULL);
26692 	ASSERT(!mutex_owned(SD_MUTEX(un)));
26693 
26694 	ret_val = SD_SUCCESS;
26695 
26696 	rqs_pktp = un->un_rqs_pktp;
26697 	rqs_bp	 = un->un_rqs_bp;
26698 
26699 	mutex_enter(SD_MUTEX(un));
26700 
26701 	if (un->un_sense_isbusy) {
26702 		ret_val = SD_FAILURE;
26703 		mutex_exit(SD_MUTEX(un));
26704 		return (ret_val);
26705 	}
26706 
26707 	/*
26708 	 * If the request sense buffer (and packet) is not in use,
26709 	 * let's set the un_sense_isbusy and send our packet
26710 	 */
26711 	un->un_sense_isbusy 	= 1;
26712 	rqs_pktp->pkt_resid  	= 0;
26713 	rqs_pktp->pkt_reason 	= 0;
26714 	rqs_pktp->pkt_flags |= FLAG_NOINTR;
26715 	bzero(rqs_bp->b_un.b_addr, SENSE_LENGTH);
26716 
26717 	mutex_exit(SD_MUTEX(un));
26718 
26719 	SD_INFO(SD_LOG_COMMON, un, "sd_send_polled_RQS: req sense buf at"
26720 	    " 0x%p\n", rqs_bp->b_un.b_addr);
26721 
26722 	/*
26723 	 * Can't send this to sd_scsi_poll, we wrap ourselves around the
26724 	 * axle - it has a call into us!
26725 	 */
26726 	if ((ret_val = sd_ddi_scsi_poll(rqs_pktp)) != 0) {
26727 		SD_INFO(SD_LOG_COMMON, un,
26728 		    "sd_send_polled_RQS: RQS failed\n");
26729 	}
26730 
26731 	SD_DUMP_MEMORY(un, SD_LOG_COMMON, "sd_send_polled_RQS:",
26732 	    (uchar_t *)rqs_bp->b_un.b_addr, SENSE_LENGTH, SD_LOG_HEX);
26733 
26734 	mutex_enter(SD_MUTEX(un));
26735 	un->un_sense_isbusy = 0;
26736 	mutex_exit(SD_MUTEX(un));
26737 
26738 	return (ret_val);
26739 }
26740 
26741 /*
26742  * Defines needed for localized version of the scsi_poll routine.
26743  */
26744 #define	SD_CSEC		10000			/* usecs */
26745 #define	SD_SEC_TO_CSEC	(1000000/SD_CSEC)
26746 
26747 
26748 /*
26749  *    Function: sd_ddi_scsi_poll()
26750  *
26751  * Description: Localized version of the scsi_poll routine.  The purpose is to
26752  *		send a scsi_pkt to a device as a polled command.  This version
26753  *		is to ensure more robust handling of transport errors.
26754  *		Specifically this routine cures not ready, coming ready
26755  *		transition for power up and reset of sonoma's.  This can take
26756  *		up to 45 seconds for power-on and 20 seconds for reset of a
26757  * 		sonoma lun.
26758  *
26759  *   Arguments: scsi_pkt - The scsi_pkt being sent to a device
26760  *
26761  * Return Code: 0 - Command completed successfully with good status
26762  *             -1 - Command failed.
26763  *
26764  */
26765 
26766 static int
26767 sd_ddi_scsi_poll(struct scsi_pkt *pkt)
26768 {
26769 	int busy_count;
26770 	int timeout;
26771 	int rval = SD_FAILURE;
26772 	int savef;
26773 	uint8_t *sensep;
26774 	long savet;
26775 	void (*savec)();
26776 	/*
26777 	 * The following is defined in machdep.c and is used in determining if
26778 	 * the scsi transport system will do polled I/O instead of interrupt
26779 	 * I/O when called from xx_dump().
26780 	 */
26781 	extern int do_polled_io;
26782 
26783 	/*
26784 	 * save old flags in pkt, to restore at end
26785 	 */
26786 	savef = pkt->pkt_flags;
26787 	savec = pkt->pkt_comp;
26788 	savet = pkt->pkt_time;
26789 
26790 	pkt->pkt_flags |= FLAG_NOINTR;
26791 
26792 	/*
26793 	 * XXX there is nothing in the SCSA spec that states that we should not
26794 	 * do a callback for polled cmds; however, removing this will break sd
26795 	 * and probably other target drivers
26796 	 */
26797 	pkt->pkt_comp = NULL;
26798 
26799 	/*
26800 	 * we don't like a polled command without timeout.
26801 	 * 60 seconds seems long enough.
26802 	 */
26803 	if (pkt->pkt_time == 0) {
26804 		pkt->pkt_time = SCSI_POLL_TIMEOUT;
26805 	}
26806 
26807 	/*
26808 	 * Send polled cmd.
26809 	 *
26810 	 * We do some error recovery for various errors.  Tran_busy,
26811 	 * queue full, and non-dispatched commands are retried every 10 msec.
26812 	 * as they are typically transient failures.  Busy status and Not
26813 	 * Ready are retried every second as this status takes a while to
26814 	 * change.  Unit attention is retried for pkt_time (60) times
26815 	 * with no delay.
26816 	 */
26817 	timeout = pkt->pkt_time * SD_SEC_TO_CSEC;
26818 
26819 	for (busy_count = 0; busy_count < timeout; busy_count++) {
26820 		int rc;
26821 		int poll_delay;
26822 
26823 		/*
26824 		 * Initialize pkt status variables.
26825 		 */
26826 		*pkt->pkt_scbp = pkt->pkt_reason = pkt->pkt_state = 0;
26827 
26828 		if ((rc = scsi_transport(pkt)) != TRAN_ACCEPT) {
26829 			if (rc != TRAN_BUSY) {
26830 				/* Transport failed - give up. */
26831 				break;
26832 			} else {
26833 				/* Transport busy - try again. */
26834 				poll_delay = 1 * SD_CSEC; /* 10 msec */
26835 			}
26836 		} else {
26837 			/*
26838 			 * Transport accepted - check pkt status.
26839 			 */
26840 			rc = (*pkt->pkt_scbp) & STATUS_MASK;
26841 			if (pkt->pkt_reason == CMD_CMPLT &&
26842 			    rc == STATUS_CHECK &&
26843 			    pkt->pkt_state & STATE_ARQ_DONE) {
26844 				struct scsi_arq_status *arqstat =
26845 				    (struct scsi_arq_status *)(pkt->pkt_scbp);
26846 
26847 				sensep = (uint8_t *)&arqstat->sts_sensedata;
26848 			} else {
26849 				sensep = NULL;
26850 			}
26851 
26852 			if ((pkt->pkt_reason == CMD_CMPLT) &&
26853 			    (rc == STATUS_GOOD)) {
26854 				/* No error - we're done */
26855 				rval = SD_SUCCESS;
26856 				break;
26857 
26858 			} else if (pkt->pkt_reason == CMD_DEV_GONE) {
26859 				/* Lost connection - give up */
26860 				break;
26861 
26862 			} else if ((pkt->pkt_reason == CMD_INCOMPLETE) &&
26863 			    (pkt->pkt_state == 0)) {
26864 				/* Pkt not dispatched - try again. */
26865 				poll_delay = 1 * SD_CSEC; /* 10 msec. */
26866 
26867 			} else if ((pkt->pkt_reason == CMD_CMPLT) &&
26868 			    (rc == STATUS_QFULL)) {
26869 				/* Queue full - try again. */
26870 				poll_delay = 1 * SD_CSEC; /* 10 msec. */
26871 
26872 			} else if ((pkt->pkt_reason == CMD_CMPLT) &&
26873 			    (rc == STATUS_BUSY)) {
26874 				/* Busy - try again. */
26875 				poll_delay = 100 * SD_CSEC; /* 1 sec. */
26876 				busy_count += (SD_SEC_TO_CSEC - 1);
26877 
26878 			} else if ((sensep != NULL) &&
26879 			    (scsi_sense_key(sensep) ==
26880 				KEY_UNIT_ATTENTION)) {
26881 				/* Unit Attention - try again */
26882 				busy_count += (SD_SEC_TO_CSEC - 1); /* 1 */
26883 				continue;
26884 
26885 			} else if ((sensep != NULL) &&
26886 			    (scsi_sense_key(sensep) == KEY_NOT_READY) &&
26887 			    (scsi_sense_asc(sensep) == 0x04) &&
26888 			    (scsi_sense_ascq(sensep) == 0x01)) {
26889 				/* Not ready -> ready - try again. */
26890 				poll_delay = 100 * SD_CSEC; /* 1 sec. */
26891 				busy_count += (SD_SEC_TO_CSEC - 1);
26892 
26893 			} else {
26894 				/* BAD status - give up. */
26895 				break;
26896 			}
26897 		}
26898 
26899 		if ((curthread->t_flag & T_INTR_THREAD) == 0 &&
26900 		    !do_polled_io) {
26901 			delay(drv_usectohz(poll_delay));
26902 		} else {
26903 			/* we busy wait during cpr_dump or interrupt threads */
26904 			drv_usecwait(poll_delay);
26905 		}
26906 	}
26907 
26908 	pkt->pkt_flags = savef;
26909 	pkt->pkt_comp = savec;
26910 	pkt->pkt_time = savet;
26911 	return (rval);
26912 }
26913 
26914 
26915 /*
26916  *    Function: sd_persistent_reservation_in_read_keys
26917  *
26918  * Description: This routine is the driver entry point for handling CD-ROM
26919  *		multi-host persistent reservation requests (MHIOCGRP_INKEYS)
26920  *		by sending the SCSI-3 PRIN commands to the device.
26921  *		Processes the read keys command response by copying the
26922  *		reservation key information into the user provided buffer.
26923  *		Support for the 32/64 bit _MULTI_DATAMODEL is implemented.
26924  *
26925  *   Arguments: un   -  Pointer to soft state struct for the target.
26926  *		usrp -	user provided pointer to multihost Persistent In Read
26927  *			Keys structure (mhioc_inkeys_t)
26928  *		flag -	this argument is a pass through to ddi_copyxxx()
26929  *			directly from the mode argument of ioctl().
26930  *
26931  * Return Code: 0   - Success
26932  *		EACCES
26933  *		ENOTSUP
26934  *		errno return code from sd_send_scsi_cmd()
26935  *
26936  *     Context: Can sleep. Does not return until command is completed.
26937  */
26938 
26939 static int
26940 sd_persistent_reservation_in_read_keys(struct sd_lun *un,
26941     mhioc_inkeys_t *usrp, int flag)
26942 {
26943 #ifdef _MULTI_DATAMODEL
26944 	struct mhioc_key_list32	li32;
26945 #endif
26946 	sd_prin_readkeys_t	*in;
26947 	mhioc_inkeys_t		*ptr;
26948 	mhioc_key_list_t	li;
26949 	uchar_t			*data_bufp;
26950 	int 			data_len;
26951 	int			rval;
26952 	size_t			copysz;
26953 
26954 	if ((ptr = (mhioc_inkeys_t *)usrp) == NULL) {
26955 		return (EINVAL);
26956 	}
26957 	bzero(&li, sizeof (mhioc_key_list_t));
26958 
26959 	/*
26960 	 * Get the listsize from user
26961 	 */
26962 #ifdef _MULTI_DATAMODEL
26963 
26964 	switch (ddi_model_convert_from(flag & FMODELS)) {
26965 	case DDI_MODEL_ILP32:
26966 		copysz = sizeof (struct mhioc_key_list32);
26967 		if (ddi_copyin(ptr->li, &li32, copysz, flag)) {
26968 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
26969 			    "sd_persistent_reservation_in_read_keys: "
26970 			    "failed ddi_copyin: mhioc_key_list32_t\n");
26971 			rval = EFAULT;
26972 			goto done;
26973 		}
26974 		li.listsize = li32.listsize;
26975 		li.list = (mhioc_resv_key_t *)(uintptr_t)li32.list;
26976 		break;
26977 
26978 	case DDI_MODEL_NONE:
26979 		copysz = sizeof (mhioc_key_list_t);
26980 		if (ddi_copyin(ptr->li, &li, copysz, flag)) {
26981 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
26982 			    "sd_persistent_reservation_in_read_keys: "
26983 			    "failed ddi_copyin: mhioc_key_list_t\n");
26984 			rval = EFAULT;
26985 			goto done;
26986 		}
26987 		break;
26988 	}
26989 
26990 #else /* ! _MULTI_DATAMODEL */
26991 	copysz = sizeof (mhioc_key_list_t);
26992 	if (ddi_copyin(ptr->li, &li, copysz, flag)) {
26993 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
26994 		    "sd_persistent_reservation_in_read_keys: "
26995 		    "failed ddi_copyin: mhioc_key_list_t\n");
26996 		rval = EFAULT;
26997 		goto done;
26998 	}
26999 #endif
27000 
27001 	data_len  = li.listsize * MHIOC_RESV_KEY_SIZE;
27002 	data_len += (sizeof (sd_prin_readkeys_t) - sizeof (caddr_t));
27003 	data_bufp = kmem_zalloc(data_len, KM_SLEEP);
27004 
27005 	if ((rval = sd_send_scsi_PERSISTENT_RESERVE_IN(un, SD_READ_KEYS,
27006 	    data_len, data_bufp)) != 0) {
27007 		goto done;
27008 	}
27009 	in = (sd_prin_readkeys_t *)data_bufp;
27010 	ptr->generation = BE_32(in->generation);
27011 	li.listlen = BE_32(in->len) / MHIOC_RESV_KEY_SIZE;
27012 
27013 	/*
27014 	 * Return the min(listsize, listlen) keys
27015 	 */
27016 #ifdef _MULTI_DATAMODEL
27017 
27018 	switch (ddi_model_convert_from(flag & FMODELS)) {
27019 	case DDI_MODEL_ILP32:
27020 		li32.listlen = li.listlen;
27021 		if (ddi_copyout(&li32, ptr->li, copysz, flag)) {
27022 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
27023 			    "sd_persistent_reservation_in_read_keys: "
27024 			    "failed ddi_copyout: mhioc_key_list32_t\n");
27025 			rval = EFAULT;
27026 			goto done;
27027 		}
27028 		break;
27029 
27030 	case DDI_MODEL_NONE:
27031 		if (ddi_copyout(&li, ptr->li, copysz, flag)) {
27032 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
27033 			    "sd_persistent_reservation_in_read_keys: "
27034 			    "failed ddi_copyout: mhioc_key_list_t\n");
27035 			rval = EFAULT;
27036 			goto done;
27037 		}
27038 		break;
27039 	}
27040 
27041 #else /* ! _MULTI_DATAMODEL */
27042 
27043 	if (ddi_copyout(&li, ptr->li, copysz, flag)) {
27044 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
27045 		    "sd_persistent_reservation_in_read_keys: "
27046 		    "failed ddi_copyout: mhioc_key_list_t\n");
27047 		rval = EFAULT;
27048 		goto done;
27049 	}
27050 
27051 #endif /* _MULTI_DATAMODEL */
27052 
27053 	copysz = min(li.listlen * MHIOC_RESV_KEY_SIZE,
27054 	    li.listsize * MHIOC_RESV_KEY_SIZE);
27055 	if (ddi_copyout(&in->keylist, li.list, copysz, flag)) {
27056 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
27057 		    "sd_persistent_reservation_in_read_keys: "
27058 		    "failed ddi_copyout: keylist\n");
27059 		rval = EFAULT;
27060 	}
27061 done:
27062 	kmem_free(data_bufp, data_len);
27063 	return (rval);
27064 }
27065 
27066 
27067 /*
27068  *    Function: sd_persistent_reservation_in_read_resv
27069  *
27070  * Description: This routine is the driver entry point for handling CD-ROM
27071  *		multi-host persistent reservation requests (MHIOCGRP_INRESV)
27072  *		by sending the SCSI-3 PRIN commands to the device.
27073  *		Process the read persistent reservations command response by
27074  *		copying the reservation information into the user provided
27075  *		buffer. Support for the 32/64 _MULTI_DATAMODEL is implemented.
27076  *
27077  *   Arguments: un   -  Pointer to soft state struct for the target.
27078  *		usrp -	user provided pointer to multihost Persistent In Read
27079  *			Keys structure (mhioc_inkeys_t)
27080  *		flag -	this argument is a pass through to ddi_copyxxx()
27081  *			directly from the mode argument of ioctl().
27082  *
27083  * Return Code: 0   - Success
27084  *		EACCES
27085  *		ENOTSUP
27086  *		errno return code from sd_send_scsi_cmd()
27087  *
27088  *     Context: Can sleep. Does not return until command is completed.
27089  */
27090 
27091 static int
27092 sd_persistent_reservation_in_read_resv(struct sd_lun *un,
27093     mhioc_inresvs_t *usrp, int flag)
27094 {
27095 #ifdef _MULTI_DATAMODEL
27096 	struct mhioc_resv_desc_list32 resvlist32;
27097 #endif
27098 	sd_prin_readresv_t	*in;
27099 	mhioc_inresvs_t		*ptr;
27100 	sd_readresv_desc_t	*readresv_ptr;
27101 	mhioc_resv_desc_list_t	resvlist;
27102 	mhioc_resv_desc_t 	resvdesc;
27103 	uchar_t			*data_bufp;
27104 	int 			data_len;
27105 	int			rval;
27106 	int			i;
27107 	size_t			copysz;
27108 	mhioc_resv_desc_t	*bufp;
27109 
27110 	if ((ptr = usrp) == NULL) {
27111 		return (EINVAL);
27112 	}
27113 
27114 	/*
27115 	 * Get the listsize from user
27116 	 */
27117 #ifdef _MULTI_DATAMODEL
27118 	switch (ddi_model_convert_from(flag & FMODELS)) {
27119 	case DDI_MODEL_ILP32:
27120 		copysz = sizeof (struct mhioc_resv_desc_list32);
27121 		if (ddi_copyin(ptr->li, &resvlist32, copysz, flag)) {
27122 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
27123 			    "sd_persistent_reservation_in_read_resv: "
27124 			    "failed ddi_copyin: mhioc_resv_desc_list_t\n");
27125 			rval = EFAULT;
27126 			goto done;
27127 		}
27128 		resvlist.listsize = resvlist32.listsize;
27129 		resvlist.list = (mhioc_resv_desc_t *)(uintptr_t)resvlist32.list;
27130 		break;
27131 
27132 	case DDI_MODEL_NONE:
27133 		copysz = sizeof (mhioc_resv_desc_list_t);
27134 		if (ddi_copyin(ptr->li, &resvlist, copysz, flag)) {
27135 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
27136 			    "sd_persistent_reservation_in_read_resv: "
27137 			    "failed ddi_copyin: mhioc_resv_desc_list_t\n");
27138 			rval = EFAULT;
27139 			goto done;
27140 		}
27141 		break;
27142 	}
27143 #else /* ! _MULTI_DATAMODEL */
27144 	copysz = sizeof (mhioc_resv_desc_list_t);
27145 	if (ddi_copyin(ptr->li, &resvlist, copysz, flag)) {
27146 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
27147 		    "sd_persistent_reservation_in_read_resv: "
27148 		    "failed ddi_copyin: mhioc_resv_desc_list_t\n");
27149 		rval = EFAULT;
27150 		goto done;
27151 	}
27152 #endif /* ! _MULTI_DATAMODEL */
27153 
27154 	data_len  = resvlist.listsize * SCSI3_RESV_DESC_LEN;
27155 	data_len += (sizeof (sd_prin_readresv_t) - sizeof (caddr_t));
27156 	data_bufp = kmem_zalloc(data_len, KM_SLEEP);
27157 
27158 	if ((rval = sd_send_scsi_PERSISTENT_RESERVE_IN(un, SD_READ_RESV,
27159 	    data_len, data_bufp)) != 0) {
27160 		goto done;
27161 	}
27162 	in = (sd_prin_readresv_t *)data_bufp;
27163 	ptr->generation = BE_32(in->generation);
27164 	resvlist.listlen = BE_32(in->len) / SCSI3_RESV_DESC_LEN;
27165 
27166 	/*
27167 	 * Return the min(listsize, listlen( keys
27168 	 */
27169 #ifdef _MULTI_DATAMODEL
27170 
27171 	switch (ddi_model_convert_from(flag & FMODELS)) {
27172 	case DDI_MODEL_ILP32:
27173 		resvlist32.listlen = resvlist.listlen;
27174 		if (ddi_copyout(&resvlist32, ptr->li, copysz, flag)) {
27175 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
27176 			    "sd_persistent_reservation_in_read_resv: "
27177 			    "failed ddi_copyout: mhioc_resv_desc_list_t\n");
27178 			rval = EFAULT;
27179 			goto done;
27180 		}
27181 		break;
27182 
27183 	case DDI_MODEL_NONE:
27184 		if (ddi_copyout(&resvlist, ptr->li, copysz, flag)) {
27185 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
27186 			    "sd_persistent_reservation_in_read_resv: "
27187 			    "failed ddi_copyout: mhioc_resv_desc_list_t\n");
27188 			rval = EFAULT;
27189 			goto done;
27190 		}
27191 		break;
27192 	}
27193 
27194 #else /* ! _MULTI_DATAMODEL */
27195 
27196 	if (ddi_copyout(&resvlist, ptr->li, copysz, flag)) {
27197 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
27198 		    "sd_persistent_reservation_in_read_resv: "
27199 		    "failed ddi_copyout: mhioc_resv_desc_list_t\n");
27200 		rval = EFAULT;
27201 		goto done;
27202 	}
27203 
27204 #endif /* ! _MULTI_DATAMODEL */
27205 
27206 	readresv_ptr = (sd_readresv_desc_t *)&in->readresv_desc;
27207 	bufp = resvlist.list;
27208 	copysz = sizeof (mhioc_resv_desc_t);
27209 	for (i = 0; i < min(resvlist.listlen, resvlist.listsize);
27210 	    i++, readresv_ptr++, bufp++) {
27211 
27212 		bcopy(&readresv_ptr->resvkey, &resvdesc.key,
27213 		    MHIOC_RESV_KEY_SIZE);
27214 		resvdesc.type  = readresv_ptr->type;
27215 		resvdesc.scope = readresv_ptr->scope;
27216 		resvdesc.scope_specific_addr =
27217 		    BE_32(readresv_ptr->scope_specific_addr);
27218 
27219 		if (ddi_copyout(&resvdesc, bufp, copysz, flag)) {
27220 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
27221 			    "sd_persistent_reservation_in_read_resv: "
27222 			    "failed ddi_copyout: resvlist\n");
27223 			rval = EFAULT;
27224 			goto done;
27225 		}
27226 	}
27227 done:
27228 	kmem_free(data_bufp, data_len);
27229 	return (rval);
27230 }
27231 
27232 
27233 /*
27234  *    Function: sr_change_blkmode()
27235  *
27236  * Description: This routine is the driver entry point for handling CD-ROM
27237  *		block mode ioctl requests. Support for returning and changing
27238  *		the current block size in use by the device is implemented. The
27239  *		LBA size is changed via a MODE SELECT Block Descriptor.
27240  *
27241  *		This routine issues a mode sense with an allocation length of
27242  *		12 bytes for the mode page header and a single block descriptor.
27243  *
27244  *   Arguments: dev - the device 'dev_t'
27245  *		cmd - the request type; one of CDROMGBLKMODE (get) or
27246  *		      CDROMSBLKMODE (set)
27247  *		data - current block size or requested block size
27248  *		flag - this argument is a pass through to ddi_copyxxx() directly
27249  *		       from the mode argument of ioctl().
27250  *
27251  * Return Code: the code returned by sd_send_scsi_cmd()
27252  *		EINVAL if invalid arguments are provided
27253  *		EFAULT if ddi_copyxxx() fails
27254  *		ENXIO if fail ddi_get_soft_state
27255  *		EIO if invalid mode sense block descriptor length
27256  *
27257  */
27258 
27259 static int
27260 sr_change_blkmode(dev_t dev, int cmd, intptr_t data, int flag)
27261 {
27262 	struct sd_lun			*un = NULL;
27263 	struct mode_header		*sense_mhp, *select_mhp;
27264 	struct block_descriptor		*sense_desc, *select_desc;
27265 	int				current_bsize;
27266 	int				rval = EINVAL;
27267 	uchar_t				*sense = NULL;
27268 	uchar_t				*select = NULL;
27269 
27270 	ASSERT((cmd == CDROMGBLKMODE) || (cmd == CDROMSBLKMODE));
27271 
27272 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
27273 		return (ENXIO);
27274 	}
27275 
27276 	/*
27277 	 * The block length is changed via the Mode Select block descriptor, the
27278 	 * "Read/Write Error Recovery" mode page (0x1) contents are not actually
27279 	 * required as part of this routine. Therefore the mode sense allocation
27280 	 * length is specified to be the length of a mode page header and a
27281 	 * block descriptor.
27282 	 */
27283 	sense = kmem_zalloc(BUFLEN_CHG_BLK_MODE, KM_SLEEP);
27284 
27285 	if ((rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, sense,
27286 	    BUFLEN_CHG_BLK_MODE, MODEPAGE_ERR_RECOV, SD_PATH_STANDARD)) != 0) {
27287 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27288 		    "sr_change_blkmode: Mode Sense Failed\n");
27289 		kmem_free(sense, BUFLEN_CHG_BLK_MODE);
27290 		return (rval);
27291 	}
27292 
27293 	/* Check the block descriptor len to handle only 1 block descriptor */
27294 	sense_mhp = (struct mode_header *)sense;
27295 	if ((sense_mhp->bdesc_length == 0) ||
27296 	    (sense_mhp->bdesc_length > MODE_BLK_DESC_LENGTH)) {
27297 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27298 		    "sr_change_blkmode: Mode Sense returned invalid block"
27299 		    " descriptor length\n");
27300 		kmem_free(sense, BUFLEN_CHG_BLK_MODE);
27301 		return (EIO);
27302 	}
27303 	sense_desc = (struct block_descriptor *)(sense + MODE_HEADER_LENGTH);
27304 	current_bsize = ((sense_desc->blksize_hi << 16) |
27305 	    (sense_desc->blksize_mid << 8) | sense_desc->blksize_lo);
27306 
27307 	/* Process command */
27308 	switch (cmd) {
27309 	case CDROMGBLKMODE:
27310 		/* Return the block size obtained during the mode sense */
27311 		if (ddi_copyout(&current_bsize, (void *)data,
27312 		    sizeof (int), flag) != 0)
27313 			rval = EFAULT;
27314 		break;
27315 	case CDROMSBLKMODE:
27316 		/* Validate the requested block size */
27317 		switch (data) {
27318 		case CDROM_BLK_512:
27319 		case CDROM_BLK_1024:
27320 		case CDROM_BLK_2048:
27321 		case CDROM_BLK_2056:
27322 		case CDROM_BLK_2336:
27323 		case CDROM_BLK_2340:
27324 		case CDROM_BLK_2352:
27325 		case CDROM_BLK_2368:
27326 		case CDROM_BLK_2448:
27327 		case CDROM_BLK_2646:
27328 		case CDROM_BLK_2647:
27329 			break;
27330 		default:
27331 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27332 			    "sr_change_blkmode: "
27333 			    "Block Size '%ld' Not Supported\n", data);
27334 			kmem_free(sense, BUFLEN_CHG_BLK_MODE);
27335 			return (EINVAL);
27336 		}
27337 
27338 		/*
27339 		 * The current block size matches the requested block size so
27340 		 * there is no need to send the mode select to change the size
27341 		 */
27342 		if (current_bsize == data) {
27343 			break;
27344 		}
27345 
27346 		/* Build the select data for the requested block size */
27347 		select = kmem_zalloc(BUFLEN_CHG_BLK_MODE, KM_SLEEP);
27348 		select_mhp = (struct mode_header *)select;
27349 		select_desc =
27350 		    (struct block_descriptor *)(select + MODE_HEADER_LENGTH);
27351 		/*
27352 		 * The LBA size is changed via the block descriptor, so the
27353 		 * descriptor is built according to the user data
27354 		 */
27355 		select_mhp->bdesc_length = MODE_BLK_DESC_LENGTH;
27356 		select_desc->blksize_hi  = (char)(((data) & 0x00ff0000) >> 16);
27357 		select_desc->blksize_mid = (char)(((data) & 0x0000ff00) >> 8);
27358 		select_desc->blksize_lo  = (char)((data) & 0x000000ff);
27359 
27360 		/* Send the mode select for the requested block size */
27361 		if ((rval = sd_send_scsi_MODE_SELECT(un, CDB_GROUP0,
27362 		    select, BUFLEN_CHG_BLK_MODE, SD_DONTSAVE_PAGE,
27363 		    SD_PATH_STANDARD)) != 0) {
27364 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27365 			    "sr_change_blkmode: Mode Select Failed\n");
27366 			/*
27367 			 * The mode select failed for the requested block size,
27368 			 * so reset the data for the original block size and
27369 			 * send it to the target. The error is indicated by the
27370 			 * return value for the failed mode select.
27371 			 */
27372 			select_desc->blksize_hi  = sense_desc->blksize_hi;
27373 			select_desc->blksize_mid = sense_desc->blksize_mid;
27374 			select_desc->blksize_lo  = sense_desc->blksize_lo;
27375 			(void) sd_send_scsi_MODE_SELECT(un, CDB_GROUP0,
27376 			    select, BUFLEN_CHG_BLK_MODE, SD_DONTSAVE_PAGE,
27377 			    SD_PATH_STANDARD);
27378 		} else {
27379 			ASSERT(!mutex_owned(SD_MUTEX(un)));
27380 			mutex_enter(SD_MUTEX(un));
27381 			sd_update_block_info(un, (uint32_t)data, 0);
27382 
27383 			mutex_exit(SD_MUTEX(un));
27384 		}
27385 		break;
27386 	default:
27387 		/* should not reach here, but check anyway */
27388 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27389 		    "sr_change_blkmode: Command '%x' Not Supported\n", cmd);
27390 		rval = EINVAL;
27391 		break;
27392 	}
27393 
27394 	if (select) {
27395 		kmem_free(select, BUFLEN_CHG_BLK_MODE);
27396 	}
27397 	if (sense) {
27398 		kmem_free(sense, BUFLEN_CHG_BLK_MODE);
27399 	}
27400 	return (rval);
27401 }
27402 
27403 
27404 /*
27405  * Note: The following sr_change_speed() and sr_atapi_change_speed() routines
27406  * implement driver support for getting and setting the CD speed. The command
27407  * set used will be based on the device type. If the device has not been
27408  * identified as MMC the Toshiba vendor specific mode page will be used. If
27409  * the device is MMC but does not support the Real Time Streaming feature
27410  * the SET CD SPEED command will be used to set speed and mode page 0x2A will
27411  * be used to read the speed.
27412  */
27413 
27414 /*
27415  *    Function: sr_change_speed()
27416  *
27417  * Description: This routine is the driver entry point for handling CD-ROM
27418  *		drive speed ioctl requests for devices supporting the Toshiba
27419  *		vendor specific drive speed mode page. Support for returning
27420  *		and changing the current drive speed in use by the device is
27421  *		implemented.
27422  *
27423  *   Arguments: dev - the device 'dev_t'
27424  *		cmd - the request type; one of CDROMGDRVSPEED (get) or
27425  *		      CDROMSDRVSPEED (set)
27426  *		data - current drive speed or requested drive speed
27427  *		flag - this argument is a pass through to ddi_copyxxx() directly
27428  *		       from the mode argument of ioctl().
27429  *
27430  * Return Code: the code returned by sd_send_scsi_cmd()
27431  *		EINVAL if invalid arguments are provided
27432  *		EFAULT if ddi_copyxxx() fails
27433  *		ENXIO if fail ddi_get_soft_state
27434  *		EIO if invalid mode sense block descriptor length
27435  */
27436 
27437 static int
27438 sr_change_speed(dev_t dev, int cmd, intptr_t data, int flag)
27439 {
27440 	struct sd_lun			*un = NULL;
27441 	struct mode_header		*sense_mhp, *select_mhp;
27442 	struct mode_speed		*sense_page, *select_page;
27443 	int				current_speed;
27444 	int				rval = EINVAL;
27445 	int				bd_len;
27446 	uchar_t				*sense = NULL;
27447 	uchar_t				*select = NULL;
27448 
27449 	ASSERT((cmd == CDROMGDRVSPEED) || (cmd == CDROMSDRVSPEED));
27450 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
27451 		return (ENXIO);
27452 	}
27453 
27454 	/*
27455 	 * Note: The drive speed is being modified here according to a Toshiba
27456 	 * vendor specific mode page (0x31).
27457 	 */
27458 	sense = kmem_zalloc(BUFLEN_MODE_CDROM_SPEED, KM_SLEEP);
27459 
27460 	if ((rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, sense,
27461 	    BUFLEN_MODE_CDROM_SPEED, CDROM_MODE_SPEED,
27462 		SD_PATH_STANDARD)) != 0) {
27463 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27464 		    "sr_change_speed: Mode Sense Failed\n");
27465 		kmem_free(sense, BUFLEN_MODE_CDROM_SPEED);
27466 		return (rval);
27467 	}
27468 	sense_mhp  = (struct mode_header *)sense;
27469 
27470 	/* Check the block descriptor len to handle only 1 block descriptor */
27471 	bd_len = sense_mhp->bdesc_length;
27472 	if (bd_len > MODE_BLK_DESC_LENGTH) {
27473 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27474 		    "sr_change_speed: Mode Sense returned invalid block "
27475 		    "descriptor length\n");
27476 		kmem_free(sense, BUFLEN_MODE_CDROM_SPEED);
27477 		return (EIO);
27478 	}
27479 
27480 	sense_page = (struct mode_speed *)
27481 	    (sense + MODE_HEADER_LENGTH + sense_mhp->bdesc_length);
27482 	current_speed = sense_page->speed;
27483 
27484 	/* Process command */
27485 	switch (cmd) {
27486 	case CDROMGDRVSPEED:
27487 		/* Return the drive speed obtained during the mode sense */
27488 		if (current_speed == 0x2) {
27489 			current_speed = CDROM_TWELVE_SPEED;
27490 		}
27491 		if (ddi_copyout(&current_speed, (void *)data,
27492 		    sizeof (int), flag) != 0) {
27493 			rval = EFAULT;
27494 		}
27495 		break;
27496 	case CDROMSDRVSPEED:
27497 		/* Validate the requested drive speed */
27498 		switch ((uchar_t)data) {
27499 		case CDROM_TWELVE_SPEED:
27500 			data = 0x2;
27501 			/*FALLTHROUGH*/
27502 		case CDROM_NORMAL_SPEED:
27503 		case CDROM_DOUBLE_SPEED:
27504 		case CDROM_QUAD_SPEED:
27505 		case CDROM_MAXIMUM_SPEED:
27506 			break;
27507 		default:
27508 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27509 			    "sr_change_speed: "
27510 			    "Drive Speed '%d' Not Supported\n", (uchar_t)data);
27511 			kmem_free(sense, BUFLEN_MODE_CDROM_SPEED);
27512 			return (EINVAL);
27513 		}
27514 
27515 		/*
27516 		 * The current drive speed matches the requested drive speed so
27517 		 * there is no need to send the mode select to change the speed
27518 		 */
27519 		if (current_speed == data) {
27520 			break;
27521 		}
27522 
27523 		/* Build the select data for the requested drive speed */
27524 		select = kmem_zalloc(BUFLEN_MODE_CDROM_SPEED, KM_SLEEP);
27525 		select_mhp = (struct mode_header *)select;
27526 		select_mhp->bdesc_length = 0;
27527 		select_page =
27528 		    (struct mode_speed *)(select + MODE_HEADER_LENGTH);
27529 		select_page =
27530 		    (struct mode_speed *)(select + MODE_HEADER_LENGTH);
27531 		select_page->mode_page.code = CDROM_MODE_SPEED;
27532 		select_page->mode_page.length = 2;
27533 		select_page->speed = (uchar_t)data;
27534 
27535 		/* Send the mode select for the requested block size */
27536 		if ((rval = sd_send_scsi_MODE_SELECT(un, CDB_GROUP0, select,
27537 		    MODEPAGE_CDROM_SPEED_LEN + MODE_HEADER_LENGTH,
27538 		    SD_DONTSAVE_PAGE, SD_PATH_STANDARD)) != 0) {
27539 			/*
27540 			 * The mode select failed for the requested drive speed,
27541 			 * so reset the data for the original drive speed and
27542 			 * send it to the target. The error is indicated by the
27543 			 * return value for the failed mode select.
27544 			 */
27545 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27546 			    "sr_drive_speed: Mode Select Failed\n");
27547 			select_page->speed = sense_page->speed;
27548 			(void) sd_send_scsi_MODE_SELECT(un, CDB_GROUP0, select,
27549 			    MODEPAGE_CDROM_SPEED_LEN + MODE_HEADER_LENGTH,
27550 			    SD_DONTSAVE_PAGE, SD_PATH_STANDARD);
27551 		}
27552 		break;
27553 	default:
27554 		/* should not reach here, but check anyway */
27555 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27556 		    "sr_change_speed: Command '%x' Not Supported\n", cmd);
27557 		rval = EINVAL;
27558 		break;
27559 	}
27560 
27561 	if (select) {
27562 		kmem_free(select, BUFLEN_MODE_CDROM_SPEED);
27563 	}
27564 	if (sense) {
27565 		kmem_free(sense, BUFLEN_MODE_CDROM_SPEED);
27566 	}
27567 
27568 	return (rval);
27569 }
27570 
27571 
27572 /*
27573  *    Function: sr_atapi_change_speed()
27574  *
27575  * Description: This routine is the driver entry point for handling CD-ROM
27576  *		drive speed ioctl requests for MMC devices that do not support
27577  *		the Real Time Streaming feature (0x107).
27578  *
27579  *		Note: This routine will use the SET SPEED command which may not
27580  *		be supported by all devices.
27581  *
27582  *   Arguments: dev- the device 'dev_t'
27583  *		cmd- the request type; one of CDROMGDRVSPEED (get) or
27584  *		     CDROMSDRVSPEED (set)
27585  *		data- current drive speed or requested drive speed
27586  *		flag- this argument is a pass through to ddi_copyxxx() directly
27587  *		      from the mode argument of ioctl().
27588  *
27589  * Return Code: the code returned by sd_send_scsi_cmd()
27590  *		EINVAL if invalid arguments are provided
27591  *		EFAULT if ddi_copyxxx() fails
27592  *		ENXIO if fail ddi_get_soft_state
27593  *		EIO if invalid mode sense block descriptor length
27594  */
27595 
27596 static int
27597 sr_atapi_change_speed(dev_t dev, int cmd, intptr_t data, int flag)
27598 {
27599 	struct sd_lun			*un;
27600 	struct uscsi_cmd		*com = NULL;
27601 	struct mode_header_grp2		*sense_mhp;
27602 	uchar_t				*sense_page;
27603 	uchar_t				*sense = NULL;
27604 	char				cdb[CDB_GROUP5];
27605 	int				bd_len;
27606 	int				current_speed = 0;
27607 	int				max_speed = 0;
27608 	int				rval;
27609 
27610 	ASSERT((cmd == CDROMGDRVSPEED) || (cmd == CDROMSDRVSPEED));
27611 
27612 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
27613 		return (ENXIO);
27614 	}
27615 
27616 	sense = kmem_zalloc(BUFLEN_MODE_CDROM_CAP, KM_SLEEP);
27617 
27618 	if ((rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP1, sense,
27619 	    BUFLEN_MODE_CDROM_CAP, MODEPAGE_CDROM_CAP,
27620 	    SD_PATH_STANDARD)) != 0) {
27621 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27622 		    "sr_atapi_change_speed: Mode Sense Failed\n");
27623 		kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
27624 		return (rval);
27625 	}
27626 
27627 	/* Check the block descriptor len to handle only 1 block descriptor */
27628 	sense_mhp = (struct mode_header_grp2 *)sense;
27629 	bd_len = (sense_mhp->bdesc_length_hi << 8) | sense_mhp->bdesc_length_lo;
27630 	if (bd_len > MODE_BLK_DESC_LENGTH) {
27631 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27632 		    "sr_atapi_change_speed: Mode Sense returned invalid "
27633 		    "block descriptor length\n");
27634 		kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
27635 		return (EIO);
27636 	}
27637 
27638 	/* Calculate the current and maximum drive speeds */
27639 	sense_page = (uchar_t *)(sense + MODE_HEADER_LENGTH_GRP2 + bd_len);
27640 	current_speed = (sense_page[14] << 8) | sense_page[15];
27641 	max_speed = (sense_page[8] << 8) | sense_page[9];
27642 
27643 	/* Process the command */
27644 	switch (cmd) {
27645 	case CDROMGDRVSPEED:
27646 		current_speed /= SD_SPEED_1X;
27647 		if (ddi_copyout(&current_speed, (void *)data,
27648 		    sizeof (int), flag) != 0)
27649 			rval = EFAULT;
27650 		break;
27651 	case CDROMSDRVSPEED:
27652 		/* Convert the speed code to KB/sec */
27653 		switch ((uchar_t)data) {
27654 		case CDROM_NORMAL_SPEED:
27655 			current_speed = SD_SPEED_1X;
27656 			break;
27657 		case CDROM_DOUBLE_SPEED:
27658 			current_speed = 2 * SD_SPEED_1X;
27659 			break;
27660 		case CDROM_QUAD_SPEED:
27661 			current_speed = 4 * SD_SPEED_1X;
27662 			break;
27663 		case CDROM_TWELVE_SPEED:
27664 			current_speed = 12 * SD_SPEED_1X;
27665 			break;
27666 		case CDROM_MAXIMUM_SPEED:
27667 			current_speed = 0xffff;
27668 			break;
27669 		default:
27670 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27671 			    "sr_atapi_change_speed: invalid drive speed %d\n",
27672 			    (uchar_t)data);
27673 			kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
27674 			return (EINVAL);
27675 		}
27676 
27677 		/* Check the request against the drive's max speed. */
27678 		if (current_speed != 0xffff) {
27679 			if (current_speed > max_speed) {
27680 				kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
27681 				return (EINVAL);
27682 			}
27683 		}
27684 
27685 		/*
27686 		 * Build and send the SET SPEED command
27687 		 *
27688 		 * Note: The SET SPEED (0xBB) command used in this routine is
27689 		 * obsolete per the SCSI MMC spec but still supported in the
27690 		 * MT FUJI vendor spec. Most equipment is adhereing to MT FUJI
27691 		 * therefore the command is still implemented in this routine.
27692 		 */
27693 		bzero(cdb, sizeof (cdb));
27694 		cdb[0] = (char)SCMD_SET_CDROM_SPEED;
27695 		cdb[2] = (uchar_t)(current_speed >> 8);
27696 		cdb[3] = (uchar_t)current_speed;
27697 		com = kmem_zalloc(sizeof (*com), KM_SLEEP);
27698 		com->uscsi_cdb	   = (caddr_t)cdb;
27699 		com->uscsi_cdblen  = CDB_GROUP5;
27700 		com->uscsi_bufaddr = NULL;
27701 		com->uscsi_buflen  = 0;
27702 		com->uscsi_flags   = USCSI_DIAGNOSE|USCSI_SILENT;
27703 		rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, 0,
27704 		    UIO_SYSSPACE, SD_PATH_STANDARD);
27705 		break;
27706 	default:
27707 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27708 		    "sr_atapi_change_speed: Command '%x' Not Supported\n", cmd);
27709 		rval = EINVAL;
27710 	}
27711 
27712 	if (sense) {
27713 		kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
27714 	}
27715 	if (com) {
27716 		kmem_free(com, sizeof (*com));
27717 	}
27718 	return (rval);
27719 }
27720 
27721 
27722 /*
27723  *    Function: sr_pause_resume()
27724  *
27725  * Description: This routine is the driver entry point for handling CD-ROM
27726  *		pause/resume ioctl requests. This only affects the audio play
27727  *		operation.
27728  *
27729  *   Arguments: dev - the device 'dev_t'
27730  *		cmd - the request type; one of CDROMPAUSE or CDROMRESUME, used
27731  *		      for setting the resume bit of the cdb.
27732  *
27733  * Return Code: the code returned by sd_send_scsi_cmd()
27734  *		EINVAL if invalid mode specified
27735  *
27736  */
27737 
27738 static int
27739 sr_pause_resume(dev_t dev, int cmd)
27740 {
27741 	struct sd_lun		*un;
27742 	struct uscsi_cmd	*com;
27743 	char			cdb[CDB_GROUP1];
27744 	int			rval;
27745 
27746 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
27747 		return (ENXIO);
27748 	}
27749 
27750 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
27751 	bzero(cdb, CDB_GROUP1);
27752 	cdb[0] = SCMD_PAUSE_RESUME;
27753 	switch (cmd) {
27754 	case CDROMRESUME:
27755 		cdb[8] = 1;
27756 		break;
27757 	case CDROMPAUSE:
27758 		cdb[8] = 0;
27759 		break;
27760 	default:
27761 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN, "sr_pause_resume:"
27762 		    " Command '%x' Not Supported\n", cmd);
27763 		rval = EINVAL;
27764 		goto done;
27765 	}
27766 
27767 	com->uscsi_cdb    = cdb;
27768 	com->uscsi_cdblen = CDB_GROUP1;
27769 	com->uscsi_flags  = USCSI_DIAGNOSE|USCSI_SILENT;
27770 
27771 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
27772 	    UIO_SYSSPACE, SD_PATH_STANDARD);
27773 
27774 done:
27775 	kmem_free(com, sizeof (*com));
27776 	return (rval);
27777 }
27778 
27779 
27780 /*
27781  *    Function: sr_play_msf()
27782  *
27783  * Description: This routine is the driver entry point for handling CD-ROM
27784  *		ioctl requests to output the audio signals at the specified
27785  *		starting address and continue the audio play until the specified
27786  *		ending address (CDROMPLAYMSF) The address is in Minute Second
27787  *		Frame (MSF) format.
27788  *
27789  *   Arguments: dev	- the device 'dev_t'
27790  *		data	- pointer to user provided audio msf structure,
27791  *		          specifying start/end addresses.
27792  *		flag	- this argument is a pass through to ddi_copyxxx()
27793  *		          directly from the mode argument of ioctl().
27794  *
27795  * Return Code: the code returned by sd_send_scsi_cmd()
27796  *		EFAULT if ddi_copyxxx() fails
27797  *		ENXIO if fail ddi_get_soft_state
27798  *		EINVAL if data pointer is NULL
27799  */
27800 
27801 static int
27802 sr_play_msf(dev_t dev, caddr_t data, int flag)
27803 {
27804 	struct sd_lun		*un;
27805 	struct uscsi_cmd	*com;
27806 	struct cdrom_msf	msf_struct;
27807 	struct cdrom_msf	*msf = &msf_struct;
27808 	char			cdb[CDB_GROUP1];
27809 	int			rval;
27810 
27811 	if (data == NULL) {
27812 		return (EINVAL);
27813 	}
27814 
27815 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
27816 		return (ENXIO);
27817 	}
27818 
27819 	if (ddi_copyin(data, msf, sizeof (struct cdrom_msf), flag)) {
27820 		return (EFAULT);
27821 	}
27822 
27823 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
27824 	bzero(cdb, CDB_GROUP1);
27825 	cdb[0] = SCMD_PLAYAUDIO_MSF;
27826 	if (un->un_f_cfg_playmsf_bcd == TRUE) {
27827 		cdb[3] = BYTE_TO_BCD(msf->cdmsf_min0);
27828 		cdb[4] = BYTE_TO_BCD(msf->cdmsf_sec0);
27829 		cdb[5] = BYTE_TO_BCD(msf->cdmsf_frame0);
27830 		cdb[6] = BYTE_TO_BCD(msf->cdmsf_min1);
27831 		cdb[7] = BYTE_TO_BCD(msf->cdmsf_sec1);
27832 		cdb[8] = BYTE_TO_BCD(msf->cdmsf_frame1);
27833 	} else {
27834 		cdb[3] = msf->cdmsf_min0;
27835 		cdb[4] = msf->cdmsf_sec0;
27836 		cdb[5] = msf->cdmsf_frame0;
27837 		cdb[6] = msf->cdmsf_min1;
27838 		cdb[7] = msf->cdmsf_sec1;
27839 		cdb[8] = msf->cdmsf_frame1;
27840 	}
27841 	com->uscsi_cdb    = cdb;
27842 	com->uscsi_cdblen = CDB_GROUP1;
27843 	com->uscsi_flags  = USCSI_DIAGNOSE|USCSI_SILENT;
27844 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
27845 	    UIO_SYSSPACE, SD_PATH_STANDARD);
27846 	kmem_free(com, sizeof (*com));
27847 	return (rval);
27848 }
27849 
27850 
27851 /*
27852  *    Function: sr_play_trkind()
27853  *
27854  * Description: This routine is the driver entry point for handling CD-ROM
27855  *		ioctl requests to output the audio signals at the specified
27856  *		starting address and continue the audio play until the specified
27857  *		ending address (CDROMPLAYTRKIND). The address is in Track Index
27858  *		format.
27859  *
27860  *   Arguments: dev	- the device 'dev_t'
27861  *		data	- pointer to user provided audio track/index structure,
27862  *		          specifying start/end addresses.
27863  *		flag	- this argument is a pass through to ddi_copyxxx()
27864  *		          directly from the mode argument of ioctl().
27865  *
27866  * Return Code: the code returned by sd_send_scsi_cmd()
27867  *		EFAULT if ddi_copyxxx() fails
27868  *		ENXIO if fail ddi_get_soft_state
27869  *		EINVAL if data pointer is NULL
27870  */
27871 
27872 static int
27873 sr_play_trkind(dev_t dev, caddr_t data, int flag)
27874 {
27875 	struct cdrom_ti		ti_struct;
27876 	struct cdrom_ti		*ti = &ti_struct;
27877 	struct uscsi_cmd	*com = NULL;
27878 	char			cdb[CDB_GROUP1];
27879 	int			rval;
27880 
27881 	if (data == NULL) {
27882 		return (EINVAL);
27883 	}
27884 
27885 	if (ddi_copyin(data, ti, sizeof (struct cdrom_ti), flag)) {
27886 		return (EFAULT);
27887 	}
27888 
27889 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
27890 	bzero(cdb, CDB_GROUP1);
27891 	cdb[0] = SCMD_PLAYAUDIO_TI;
27892 	cdb[4] = ti->cdti_trk0;
27893 	cdb[5] = ti->cdti_ind0;
27894 	cdb[7] = ti->cdti_trk1;
27895 	cdb[8] = ti->cdti_ind1;
27896 	com->uscsi_cdb    = cdb;
27897 	com->uscsi_cdblen = CDB_GROUP1;
27898 	com->uscsi_flags  = USCSI_DIAGNOSE|USCSI_SILENT;
27899 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
27900 	    UIO_SYSSPACE, SD_PATH_STANDARD);
27901 	kmem_free(com, sizeof (*com));
27902 	return (rval);
27903 }
27904 
27905 
27906 /*
27907  *    Function: sr_read_all_subcodes()
27908  *
27909  * Description: This routine is the driver entry point for handling CD-ROM
27910  *		ioctl requests to return raw subcode data while the target is
27911  *		playing audio (CDROMSUBCODE).
27912  *
27913  *   Arguments: dev	- the device 'dev_t'
27914  *		data	- pointer to user provided cdrom subcode structure,
27915  *		          specifying the transfer length and address.
27916  *		flag	- this argument is a pass through to ddi_copyxxx()
27917  *		          directly from the mode argument of ioctl().
27918  *
27919  * Return Code: the code returned by sd_send_scsi_cmd()
27920  *		EFAULT if ddi_copyxxx() fails
27921  *		ENXIO if fail ddi_get_soft_state
27922  *		EINVAL if data pointer is NULL
27923  */
27924 
27925 static int
27926 sr_read_all_subcodes(dev_t dev, caddr_t data, int flag)
27927 {
27928 	struct sd_lun		*un = NULL;
27929 	struct uscsi_cmd	*com = NULL;
27930 	struct cdrom_subcode	*subcode = NULL;
27931 	int			rval;
27932 	size_t			buflen;
27933 	char			cdb[CDB_GROUP5];
27934 
27935 #ifdef _MULTI_DATAMODEL
27936 	/* To support ILP32 applications in an LP64 world */
27937 	struct cdrom_subcode32		cdrom_subcode32;
27938 	struct cdrom_subcode32		*cdsc32 = &cdrom_subcode32;
27939 #endif
27940 	if (data == NULL) {
27941 		return (EINVAL);
27942 	}
27943 
27944 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
27945 		return (ENXIO);
27946 	}
27947 
27948 	subcode = kmem_zalloc(sizeof (struct cdrom_subcode), KM_SLEEP);
27949 
27950 #ifdef _MULTI_DATAMODEL
27951 	switch (ddi_model_convert_from(flag & FMODELS)) {
27952 	case DDI_MODEL_ILP32:
27953 		if (ddi_copyin(data, cdsc32, sizeof (*cdsc32), flag)) {
27954 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27955 			    "sr_read_all_subcodes: ddi_copyin Failed\n");
27956 			kmem_free(subcode, sizeof (struct cdrom_subcode));
27957 			return (EFAULT);
27958 		}
27959 		/* Convert the ILP32 uscsi data from the application to LP64 */
27960 		cdrom_subcode32tocdrom_subcode(cdsc32, subcode);
27961 		break;
27962 	case DDI_MODEL_NONE:
27963 		if (ddi_copyin(data, subcode,
27964 		    sizeof (struct cdrom_subcode), flag)) {
27965 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27966 			    "sr_read_all_subcodes: ddi_copyin Failed\n");
27967 			kmem_free(subcode, sizeof (struct cdrom_subcode));
27968 			return (EFAULT);
27969 		}
27970 		break;
27971 	}
27972 #else /* ! _MULTI_DATAMODEL */
27973 	if (ddi_copyin(data, subcode, sizeof (struct cdrom_subcode), flag)) {
27974 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27975 		    "sr_read_all_subcodes: ddi_copyin Failed\n");
27976 		kmem_free(subcode, sizeof (struct cdrom_subcode));
27977 		return (EFAULT);
27978 	}
27979 #endif /* _MULTI_DATAMODEL */
27980 
27981 	/*
27982 	 * Since MMC-2 expects max 3 bytes for length, check if the
27983 	 * length input is greater than 3 bytes
27984 	 */
27985 	if ((subcode->cdsc_length & 0xFF000000) != 0) {
27986 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27987 		    "sr_read_all_subcodes: "
27988 		    "cdrom transfer length too large: %d (limit %d)\n",
27989 		    subcode->cdsc_length, 0xFFFFFF);
27990 		kmem_free(subcode, sizeof (struct cdrom_subcode));
27991 		return (EINVAL);
27992 	}
27993 
27994 	buflen = CDROM_BLK_SUBCODE * subcode->cdsc_length;
27995 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
27996 	bzero(cdb, CDB_GROUP5);
27997 
27998 	if (un->un_f_mmc_cap == TRUE) {
27999 		cdb[0] = (char)SCMD_READ_CD;
28000 		cdb[2] = (char)0xff;
28001 		cdb[3] = (char)0xff;
28002 		cdb[4] = (char)0xff;
28003 		cdb[5] = (char)0xff;
28004 		cdb[6] = (((subcode->cdsc_length) & 0x00ff0000) >> 16);
28005 		cdb[7] = (((subcode->cdsc_length) & 0x0000ff00) >> 8);
28006 		cdb[8] = ((subcode->cdsc_length) & 0x000000ff);
28007 		cdb[10] = 1;
28008 	} else {
28009 		/*
28010 		 * Note: A vendor specific command (0xDF) is being used her to
28011 		 * request a read of all subcodes.
28012 		 */
28013 		cdb[0] = (char)SCMD_READ_ALL_SUBCODES;
28014 		cdb[6] = (((subcode->cdsc_length) & 0xff000000) >> 24);
28015 		cdb[7] = (((subcode->cdsc_length) & 0x00ff0000) >> 16);
28016 		cdb[8] = (((subcode->cdsc_length) & 0x0000ff00) >> 8);
28017 		cdb[9] = ((subcode->cdsc_length) & 0x000000ff);
28018 	}
28019 	com->uscsi_cdb	   = cdb;
28020 	com->uscsi_cdblen  = CDB_GROUP5;
28021 	com->uscsi_bufaddr = (caddr_t)subcode->cdsc_addr;
28022 	com->uscsi_buflen  = buflen;
28023 	com->uscsi_flags   = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
28024 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_USERSPACE,
28025 	    UIO_SYSSPACE, SD_PATH_STANDARD);
28026 	kmem_free(subcode, sizeof (struct cdrom_subcode));
28027 	kmem_free(com, sizeof (*com));
28028 	return (rval);
28029 }
28030 
28031 
28032 /*
28033  *    Function: sr_read_subchannel()
28034  *
28035  * Description: This routine is the driver entry point for handling CD-ROM
28036  *		ioctl requests to return the Q sub-channel data of the CD
28037  *		current position block. (CDROMSUBCHNL) The data includes the
28038  *		track number, index number, absolute CD-ROM address (LBA or MSF
28039  *		format per the user) , track relative CD-ROM address (LBA or MSF
28040  *		format per the user), control data and audio status.
28041  *
28042  *   Arguments: dev	- the device 'dev_t'
28043  *		data	- pointer to user provided cdrom sub-channel structure
28044  *		flag	- this argument is a pass through to ddi_copyxxx()
28045  *		          directly from the mode argument of ioctl().
28046  *
28047  * Return Code: the code returned by sd_send_scsi_cmd()
28048  *		EFAULT if ddi_copyxxx() fails
28049  *		ENXIO if fail ddi_get_soft_state
28050  *		EINVAL if data pointer is NULL
28051  */
28052 
28053 static int
28054 sr_read_subchannel(dev_t dev, caddr_t data, int flag)
28055 {
28056 	struct sd_lun		*un;
28057 	struct uscsi_cmd	*com;
28058 	struct cdrom_subchnl	subchanel;
28059 	struct cdrom_subchnl	*subchnl = &subchanel;
28060 	char			cdb[CDB_GROUP1];
28061 	caddr_t			buffer;
28062 	int			rval;
28063 
28064 	if (data == NULL) {
28065 		return (EINVAL);
28066 	}
28067 
28068 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
28069 	    (un->un_state == SD_STATE_OFFLINE)) {
28070 		return (ENXIO);
28071 	}
28072 
28073 	if (ddi_copyin(data, subchnl, sizeof (struct cdrom_subchnl), flag)) {
28074 		return (EFAULT);
28075 	}
28076 
28077 	buffer = kmem_zalloc((size_t)16, KM_SLEEP);
28078 	bzero(cdb, CDB_GROUP1);
28079 	cdb[0] = SCMD_READ_SUBCHANNEL;
28080 	/* Set the MSF bit based on the user requested address format */
28081 	cdb[1] = (subchnl->cdsc_format & CDROM_LBA) ? 0 : 0x02;
28082 	/*
28083 	 * Set the Q bit in byte 2 to indicate that Q sub-channel data be
28084 	 * returned
28085 	 */
28086 	cdb[2] = 0x40;
28087 	/*
28088 	 * Set byte 3 to specify the return data format. A value of 0x01
28089 	 * indicates that the CD-ROM current position should be returned.
28090 	 */
28091 	cdb[3] = 0x01;
28092 	cdb[8] = 0x10;
28093 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
28094 	com->uscsi_cdb	   = cdb;
28095 	com->uscsi_cdblen  = CDB_GROUP1;
28096 	com->uscsi_bufaddr = buffer;
28097 	com->uscsi_buflen  = 16;
28098 	com->uscsi_flags   = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
28099 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
28100 	    UIO_SYSSPACE, SD_PATH_STANDARD);
28101 	if (rval != 0) {
28102 		kmem_free(buffer, 16);
28103 		kmem_free(com, sizeof (*com));
28104 		return (rval);
28105 	}
28106 
28107 	/* Process the returned Q sub-channel data */
28108 	subchnl->cdsc_audiostatus = buffer[1];
28109 	subchnl->cdsc_adr	= (buffer[5] & 0xF0);
28110 	subchnl->cdsc_ctrl	= (buffer[5] & 0x0F);
28111 	subchnl->cdsc_trk	= buffer[6];
28112 	subchnl->cdsc_ind	= buffer[7];
28113 	if (subchnl->cdsc_format & CDROM_LBA) {
28114 		subchnl->cdsc_absaddr.lba =
28115 		    ((uchar_t)buffer[8] << 24) + ((uchar_t)buffer[9] << 16) +
28116 		    ((uchar_t)buffer[10] << 8) + ((uchar_t)buffer[11]);
28117 		subchnl->cdsc_reladdr.lba =
28118 		    ((uchar_t)buffer[12] << 24) + ((uchar_t)buffer[13] << 16) +
28119 		    ((uchar_t)buffer[14] << 8) + ((uchar_t)buffer[15]);
28120 	} else if (un->un_f_cfg_readsub_bcd == TRUE) {
28121 		subchnl->cdsc_absaddr.msf.minute = BCD_TO_BYTE(buffer[9]);
28122 		subchnl->cdsc_absaddr.msf.second = BCD_TO_BYTE(buffer[10]);
28123 		subchnl->cdsc_absaddr.msf.frame  = BCD_TO_BYTE(buffer[11]);
28124 		subchnl->cdsc_reladdr.msf.minute = BCD_TO_BYTE(buffer[13]);
28125 		subchnl->cdsc_reladdr.msf.second = BCD_TO_BYTE(buffer[14]);
28126 		subchnl->cdsc_reladdr.msf.frame  = BCD_TO_BYTE(buffer[15]);
28127 	} else {
28128 		subchnl->cdsc_absaddr.msf.minute = buffer[9];
28129 		subchnl->cdsc_absaddr.msf.second = buffer[10];
28130 		subchnl->cdsc_absaddr.msf.frame  = buffer[11];
28131 		subchnl->cdsc_reladdr.msf.minute = buffer[13];
28132 		subchnl->cdsc_reladdr.msf.second = buffer[14];
28133 		subchnl->cdsc_reladdr.msf.frame  = buffer[15];
28134 	}
28135 	kmem_free(buffer, 16);
28136 	kmem_free(com, sizeof (*com));
28137 	if (ddi_copyout(subchnl, data, sizeof (struct cdrom_subchnl), flag)
28138 	    != 0) {
28139 		return (EFAULT);
28140 	}
28141 	return (rval);
28142 }
28143 
28144 
28145 /*
28146  *    Function: sr_read_tocentry()
28147  *
28148  * Description: This routine is the driver entry point for handling CD-ROM
28149  *		ioctl requests to read from the Table of Contents (TOC)
28150  *		(CDROMREADTOCENTRY). This routine provides the ADR and CTRL
28151  *		fields, the starting address (LBA or MSF format per the user)
28152  *		and the data mode if the user specified track is a data track.
28153  *
28154  *		Note: The READ HEADER (0x44) command used in this routine is
28155  *		obsolete per the SCSI MMC spec but still supported in the
28156  *		MT FUJI vendor spec. Most equipment is adhereing to MT FUJI
28157  *		therefore the command is still implemented in this routine.
28158  *
28159  *   Arguments: dev	- the device 'dev_t'
28160  *		data	- pointer to user provided toc entry structure,
28161  *			  specifying the track # and the address format
28162  *			  (LBA or MSF).
28163  *		flag	- this argument is a pass through to ddi_copyxxx()
28164  *		          directly from the mode argument of ioctl().
28165  *
28166  * Return Code: the code returned by sd_send_scsi_cmd()
28167  *		EFAULT if ddi_copyxxx() fails
28168  *		ENXIO if fail ddi_get_soft_state
28169  *		EINVAL if data pointer is NULL
28170  */
28171 
28172 static int
28173 sr_read_tocentry(dev_t dev, caddr_t data, int flag)
28174 {
28175 	struct sd_lun		*un = NULL;
28176 	struct uscsi_cmd	*com;
28177 	struct cdrom_tocentry	toc_entry;
28178 	struct cdrom_tocentry	*entry = &toc_entry;
28179 	caddr_t			buffer;
28180 	int			rval;
28181 	char			cdb[CDB_GROUP1];
28182 
28183 	if (data == NULL) {
28184 		return (EINVAL);
28185 	}
28186 
28187 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
28188 	    (un->un_state == SD_STATE_OFFLINE)) {
28189 		return (ENXIO);
28190 	}
28191 
28192 	if (ddi_copyin(data, entry, sizeof (struct cdrom_tocentry), flag)) {
28193 		return (EFAULT);
28194 	}
28195 
28196 	/* Validate the requested track and address format */
28197 	if (!(entry->cdte_format & (CDROM_LBA | CDROM_MSF))) {
28198 		return (EINVAL);
28199 	}
28200 
28201 	if (entry->cdte_track == 0) {
28202 		return (EINVAL);
28203 	}
28204 
28205 	buffer = kmem_zalloc((size_t)12, KM_SLEEP);
28206 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
28207 	bzero(cdb, CDB_GROUP1);
28208 
28209 	cdb[0] = SCMD_READ_TOC;
28210 	/* Set the MSF bit based on the user requested address format  */
28211 	cdb[1] = ((entry->cdte_format & CDROM_LBA) ? 0 : 2);
28212 	if (un->un_f_cfg_read_toc_trk_bcd == TRUE) {
28213 		cdb[6] = BYTE_TO_BCD(entry->cdte_track);
28214 	} else {
28215 		cdb[6] = entry->cdte_track;
28216 	}
28217 
28218 	/*
28219 	 * Bytes 7 & 8 are the 12 byte allocation length for a single entry.
28220 	 * (4 byte TOC response header + 8 byte track descriptor)
28221 	 */
28222 	cdb[8] = 12;
28223 	com->uscsi_cdb	   = cdb;
28224 	com->uscsi_cdblen  = CDB_GROUP1;
28225 	com->uscsi_bufaddr = buffer;
28226 	com->uscsi_buflen  = 0x0C;
28227 	com->uscsi_flags   = (USCSI_DIAGNOSE | USCSI_SILENT | USCSI_READ);
28228 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
28229 	    UIO_SYSSPACE, SD_PATH_STANDARD);
28230 	if (rval != 0) {
28231 		kmem_free(buffer, 12);
28232 		kmem_free(com, sizeof (*com));
28233 		return (rval);
28234 	}
28235 
28236 	/* Process the toc entry */
28237 	entry->cdte_adr		= (buffer[5] & 0xF0) >> 4;
28238 	entry->cdte_ctrl	= (buffer[5] & 0x0F);
28239 	if (entry->cdte_format & CDROM_LBA) {
28240 		entry->cdte_addr.lba =
28241 		    ((uchar_t)buffer[8] << 24) + ((uchar_t)buffer[9] << 16) +
28242 		    ((uchar_t)buffer[10] << 8) + ((uchar_t)buffer[11]);
28243 	} else if (un->un_f_cfg_read_toc_addr_bcd == TRUE) {
28244 		entry->cdte_addr.msf.minute	= BCD_TO_BYTE(buffer[9]);
28245 		entry->cdte_addr.msf.second	= BCD_TO_BYTE(buffer[10]);
28246 		entry->cdte_addr.msf.frame	= BCD_TO_BYTE(buffer[11]);
28247 		/*
28248 		 * Send a READ TOC command using the LBA address format to get
28249 		 * the LBA for the track requested so it can be used in the
28250 		 * READ HEADER request
28251 		 *
28252 		 * Note: The MSF bit of the READ HEADER command specifies the
28253 		 * output format. The block address specified in that command
28254 		 * must be in LBA format.
28255 		 */
28256 		cdb[1] = 0;
28257 		rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
28258 		    UIO_SYSSPACE, SD_PATH_STANDARD);
28259 		if (rval != 0) {
28260 			kmem_free(buffer, 12);
28261 			kmem_free(com, sizeof (*com));
28262 			return (rval);
28263 		}
28264 	} else {
28265 		entry->cdte_addr.msf.minute	= buffer[9];
28266 		entry->cdte_addr.msf.second	= buffer[10];
28267 		entry->cdte_addr.msf.frame	= buffer[11];
28268 		/*
28269 		 * Send a READ TOC command using the LBA address format to get
28270 		 * the LBA for the track requested so it can be used in the
28271 		 * READ HEADER request
28272 		 *
28273 		 * Note: The MSF bit of the READ HEADER command specifies the
28274 		 * output format. The block address specified in that command
28275 		 * must be in LBA format.
28276 		 */
28277 		cdb[1] = 0;
28278 		rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
28279 		    UIO_SYSSPACE, SD_PATH_STANDARD);
28280 		if (rval != 0) {
28281 			kmem_free(buffer, 12);
28282 			kmem_free(com, sizeof (*com));
28283 			return (rval);
28284 		}
28285 	}
28286 
28287 	/*
28288 	 * Build and send the READ HEADER command to determine the data mode of
28289 	 * the user specified track.
28290 	 */
28291 	if ((entry->cdte_ctrl & CDROM_DATA_TRACK) &&
28292 	    (entry->cdte_track != CDROM_LEADOUT)) {
28293 		bzero(cdb, CDB_GROUP1);
28294 		cdb[0] = SCMD_READ_HEADER;
28295 		cdb[2] = buffer[8];
28296 		cdb[3] = buffer[9];
28297 		cdb[4] = buffer[10];
28298 		cdb[5] = buffer[11];
28299 		cdb[8] = 0x08;
28300 		com->uscsi_buflen = 0x08;
28301 		rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
28302 		    UIO_SYSSPACE, SD_PATH_STANDARD);
28303 		if (rval == 0) {
28304 			entry->cdte_datamode = buffer[0];
28305 		} else {
28306 			/*
28307 			 * READ HEADER command failed, since this is
28308 			 * obsoleted in one spec, its better to return
28309 			 * -1 for an invlid track so that we can still
28310 			 * recieve the rest of the TOC data.
28311 			 */
28312 			entry->cdte_datamode = (uchar_t)-1;
28313 		}
28314 	} else {
28315 		entry->cdte_datamode = (uchar_t)-1;
28316 	}
28317 
28318 	kmem_free(buffer, 12);
28319 	kmem_free(com, sizeof (*com));
28320 	if (ddi_copyout(entry, data, sizeof (struct cdrom_tocentry), flag) != 0)
28321 		return (EFAULT);
28322 
28323 	return (rval);
28324 }
28325 
28326 
28327 /*
28328  *    Function: sr_read_tochdr()
28329  *
28330  * Description: This routine is the driver entry point for handling CD-ROM
28331  * 		ioctl requests to read the Table of Contents (TOC) header
28332  *		(CDROMREADTOHDR). The TOC header consists of the disk starting
28333  *		and ending track numbers
28334  *
28335  *   Arguments: dev	- the device 'dev_t'
28336  *		data	- pointer to user provided toc header structure,
28337  *			  specifying the starting and ending track numbers.
28338  *		flag	- this argument is a pass through to ddi_copyxxx()
28339  *			  directly from the mode argument of ioctl().
28340  *
28341  * Return Code: the code returned by sd_send_scsi_cmd()
28342  *		EFAULT if ddi_copyxxx() fails
28343  *		ENXIO if fail ddi_get_soft_state
28344  *		EINVAL if data pointer is NULL
28345  */
28346 
28347 static int
28348 sr_read_tochdr(dev_t dev, caddr_t data, int flag)
28349 {
28350 	struct sd_lun		*un;
28351 	struct uscsi_cmd	*com;
28352 	struct cdrom_tochdr	toc_header;
28353 	struct cdrom_tochdr	*hdr = &toc_header;
28354 	char			cdb[CDB_GROUP1];
28355 	int			rval;
28356 	caddr_t			buffer;
28357 
28358 	if (data == NULL) {
28359 		return (EINVAL);
28360 	}
28361 
28362 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
28363 	    (un->un_state == SD_STATE_OFFLINE)) {
28364 		return (ENXIO);
28365 	}
28366 
28367 	buffer = kmem_zalloc(4, KM_SLEEP);
28368 	bzero(cdb, CDB_GROUP1);
28369 	cdb[0] = SCMD_READ_TOC;
28370 	/*
28371 	 * Specifying a track number of 0x00 in the READ TOC command indicates
28372 	 * that the TOC header should be returned
28373 	 */
28374 	cdb[6] = 0x00;
28375 	/*
28376 	 * Bytes 7 & 8 are the 4 byte allocation length for TOC header.
28377 	 * (2 byte data len + 1 byte starting track # + 1 byte ending track #)
28378 	 */
28379 	cdb[8] = 0x04;
28380 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
28381 	com->uscsi_cdb	   = cdb;
28382 	com->uscsi_cdblen  = CDB_GROUP1;
28383 	com->uscsi_bufaddr = buffer;
28384 	com->uscsi_buflen  = 0x04;
28385 	com->uscsi_timeout = 300;
28386 	com->uscsi_flags   = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
28387 
28388 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
28389 	    UIO_SYSSPACE, SD_PATH_STANDARD);
28390 	if (un->un_f_cfg_read_toc_trk_bcd == TRUE) {
28391 		hdr->cdth_trk0 = BCD_TO_BYTE(buffer[2]);
28392 		hdr->cdth_trk1 = BCD_TO_BYTE(buffer[3]);
28393 	} else {
28394 		hdr->cdth_trk0 = buffer[2];
28395 		hdr->cdth_trk1 = buffer[3];
28396 	}
28397 	kmem_free(buffer, 4);
28398 	kmem_free(com, sizeof (*com));
28399 	if (ddi_copyout(hdr, data, sizeof (struct cdrom_tochdr), flag) != 0) {
28400 		return (EFAULT);
28401 	}
28402 	return (rval);
28403 }
28404 
28405 
28406 /*
28407  * Note: The following sr_read_mode1(), sr_read_cd_mode2(), sr_read_mode2(),
28408  * sr_read_cdda(), sr_read_cdxa(), routines implement driver support for
28409  * handling CDROMREAD ioctl requests for mode 1 user data, mode 2 user data,
28410  * digital audio and extended architecture digital audio. These modes are
28411  * defined in the IEC908 (Red Book), ISO10149 (Yellow Book), and the SCSI3
28412  * MMC specs.
28413  *
28414  * In addition to support for the various data formats these routines also
28415  * include support for devices that implement only the direct access READ
28416  * commands (0x08, 0x28), devices that implement the READ_CD commands
28417  * (0xBE, 0xD4), and devices that implement the vendor unique READ CDDA and
28418  * READ CDXA commands (0xD8, 0xDB)
28419  */
28420 
28421 /*
28422  *    Function: sr_read_mode1()
28423  *
28424  * Description: This routine is the driver entry point for handling CD-ROM
28425  *		ioctl read mode1 requests (CDROMREADMODE1).
28426  *
28427  *   Arguments: dev	- the device 'dev_t'
28428  *		data	- pointer to user provided cd read structure specifying
28429  *			  the lba buffer address and length.
28430  *		flag	- this argument is a pass through to ddi_copyxxx()
28431  *			  directly from the mode argument of ioctl().
28432  *
28433  * Return Code: the code returned by sd_send_scsi_cmd()
28434  *		EFAULT if ddi_copyxxx() fails
28435  *		ENXIO if fail ddi_get_soft_state
28436  *		EINVAL if data pointer is NULL
28437  */
28438 
28439 static int
28440 sr_read_mode1(dev_t dev, caddr_t data, int flag)
28441 {
28442 	struct sd_lun		*un;
28443 	struct cdrom_read	mode1_struct;
28444 	struct cdrom_read	*mode1 = &mode1_struct;
28445 	int			rval;
28446 #ifdef _MULTI_DATAMODEL
28447 	/* To support ILP32 applications in an LP64 world */
28448 	struct cdrom_read32	cdrom_read32;
28449 	struct cdrom_read32	*cdrd32 = &cdrom_read32;
28450 #endif /* _MULTI_DATAMODEL */
28451 
28452 	if (data == NULL) {
28453 		return (EINVAL);
28454 	}
28455 
28456 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
28457 	    (un->un_state == SD_STATE_OFFLINE)) {
28458 		return (ENXIO);
28459 	}
28460 
28461 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
28462 	    "sd_read_mode1: entry: un:0x%p\n", un);
28463 
28464 #ifdef _MULTI_DATAMODEL
28465 	switch (ddi_model_convert_from(flag & FMODELS)) {
28466 	case DDI_MODEL_ILP32:
28467 		if (ddi_copyin(data, cdrd32, sizeof (*cdrd32), flag) != 0) {
28468 			return (EFAULT);
28469 		}
28470 		/* Convert the ILP32 uscsi data from the application to LP64 */
28471 		cdrom_read32tocdrom_read(cdrd32, mode1);
28472 		break;
28473 	case DDI_MODEL_NONE:
28474 		if (ddi_copyin(data, mode1, sizeof (struct cdrom_read), flag)) {
28475 			return (EFAULT);
28476 		}
28477 	}
28478 #else /* ! _MULTI_DATAMODEL */
28479 	if (ddi_copyin(data, mode1, sizeof (struct cdrom_read), flag)) {
28480 		return (EFAULT);
28481 	}
28482 #endif /* _MULTI_DATAMODEL */
28483 
28484 	rval = sd_send_scsi_READ(un, mode1->cdread_bufaddr,
28485 	    mode1->cdread_buflen, mode1->cdread_lba, SD_PATH_STANDARD);
28486 
28487 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
28488 	    "sd_read_mode1: exit: un:0x%p\n", un);
28489 
28490 	return (rval);
28491 }
28492 
28493 
28494 /*
28495  *    Function: sr_read_cd_mode2()
28496  *
28497  * Description: This routine is the driver entry point for handling CD-ROM
28498  *		ioctl read mode2 requests (CDROMREADMODE2) for devices that
28499  *		support the READ CD (0xBE) command or the 1st generation
28500  *		READ CD (0xD4) command.
28501  *
28502  *   Arguments: dev	- the device 'dev_t'
28503  *		data	- pointer to user provided cd read structure specifying
28504  *			  the lba buffer address and length.
28505  *		flag	- this argument is a pass through to ddi_copyxxx()
28506  *			  directly from the mode argument of ioctl().
28507  *
28508  * Return Code: the code returned by sd_send_scsi_cmd()
28509  *		EFAULT if ddi_copyxxx() fails
28510  *		ENXIO if fail ddi_get_soft_state
28511  *		EINVAL if data pointer is NULL
28512  */
28513 
28514 static int
28515 sr_read_cd_mode2(dev_t dev, caddr_t data, int flag)
28516 {
28517 	struct sd_lun		*un;
28518 	struct uscsi_cmd	*com;
28519 	struct cdrom_read	mode2_struct;
28520 	struct cdrom_read	*mode2 = &mode2_struct;
28521 	uchar_t			cdb[CDB_GROUP5];
28522 	int			nblocks;
28523 	int			rval;
28524 #ifdef _MULTI_DATAMODEL
28525 	/*  To support ILP32 applications in an LP64 world */
28526 	struct cdrom_read32	cdrom_read32;
28527 	struct cdrom_read32	*cdrd32 = &cdrom_read32;
28528 #endif /* _MULTI_DATAMODEL */
28529 
28530 	if (data == NULL) {
28531 		return (EINVAL);
28532 	}
28533 
28534 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
28535 	    (un->un_state == SD_STATE_OFFLINE)) {
28536 		return (ENXIO);
28537 	}
28538 
28539 #ifdef _MULTI_DATAMODEL
28540 	switch (ddi_model_convert_from(flag & FMODELS)) {
28541 	case DDI_MODEL_ILP32:
28542 		if (ddi_copyin(data, cdrd32, sizeof (*cdrd32), flag) != 0) {
28543 			return (EFAULT);
28544 		}
28545 		/* Convert the ILP32 uscsi data from the application to LP64 */
28546 		cdrom_read32tocdrom_read(cdrd32, mode2);
28547 		break;
28548 	case DDI_MODEL_NONE:
28549 		if (ddi_copyin(data, mode2, sizeof (*mode2), flag) != 0) {
28550 			return (EFAULT);
28551 		}
28552 		break;
28553 	}
28554 
28555 #else /* ! _MULTI_DATAMODEL */
28556 	if (ddi_copyin(data, mode2, sizeof (*mode2), flag) != 0) {
28557 		return (EFAULT);
28558 	}
28559 #endif /* _MULTI_DATAMODEL */
28560 
28561 	bzero(cdb, sizeof (cdb));
28562 	if (un->un_f_cfg_read_cd_xd4 == TRUE) {
28563 		/* Read command supported by 1st generation atapi drives */
28564 		cdb[0] = SCMD_READ_CDD4;
28565 	} else {
28566 		/* Universal CD Access Command */
28567 		cdb[0] = SCMD_READ_CD;
28568 	}
28569 
28570 	/*
28571 	 * Set expected sector type to: 2336s byte, Mode 2 Yellow Book
28572 	 */
28573 	cdb[1] = CDROM_SECTOR_TYPE_MODE2;
28574 
28575 	/* set the start address */
28576 	cdb[2] = (uchar_t)((mode2->cdread_lba >> 24) & 0XFF);
28577 	cdb[3] = (uchar_t)((mode2->cdread_lba >> 16) & 0XFF);
28578 	cdb[4] = (uchar_t)((mode2->cdread_lba >> 8) & 0xFF);
28579 	cdb[5] = (uchar_t)(mode2->cdread_lba & 0xFF);
28580 
28581 	/* set the transfer length */
28582 	nblocks = mode2->cdread_buflen / 2336;
28583 	cdb[6] = (uchar_t)(nblocks >> 16);
28584 	cdb[7] = (uchar_t)(nblocks >> 8);
28585 	cdb[8] = (uchar_t)nblocks;
28586 
28587 	/* set the filter bits */
28588 	cdb[9] = CDROM_READ_CD_USERDATA;
28589 
28590 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
28591 	com->uscsi_cdb = (caddr_t)cdb;
28592 	com->uscsi_cdblen = sizeof (cdb);
28593 	com->uscsi_bufaddr = mode2->cdread_bufaddr;
28594 	com->uscsi_buflen = mode2->cdread_buflen;
28595 	com->uscsi_flags = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
28596 
28597 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_USERSPACE,
28598 	    UIO_SYSSPACE, SD_PATH_STANDARD);
28599 	kmem_free(com, sizeof (*com));
28600 	return (rval);
28601 }
28602 
28603 
28604 /*
28605  *    Function: sr_read_mode2()
28606  *
28607  * Description: This routine is the driver entry point for handling CD-ROM
28608  *		ioctl read mode2 requests (CDROMREADMODE2) for devices that
28609  *		do not support the READ CD (0xBE) command.
28610  *
28611  *   Arguments: dev	- the device 'dev_t'
28612  *		data	- pointer to user provided cd read structure specifying
28613  *			  the lba buffer address and length.
28614  *		flag	- this argument is a pass through to ddi_copyxxx()
28615  *			  directly from the mode argument of ioctl().
28616  *
28617  * Return Code: the code returned by sd_send_scsi_cmd()
28618  *		EFAULT if ddi_copyxxx() fails
28619  *		ENXIO if fail ddi_get_soft_state
28620  *		EINVAL if data pointer is NULL
28621  *		EIO if fail to reset block size
28622  *		EAGAIN if commands are in progress in the driver
28623  */
28624 
28625 static int
28626 sr_read_mode2(dev_t dev, caddr_t data, int flag)
28627 {
28628 	struct sd_lun		*un;
28629 	struct cdrom_read	mode2_struct;
28630 	struct cdrom_read	*mode2 = &mode2_struct;
28631 	int			rval;
28632 	uint32_t		restore_blksize;
28633 	struct uscsi_cmd	*com;
28634 	uchar_t			cdb[CDB_GROUP0];
28635 	int			nblocks;
28636 
28637 #ifdef _MULTI_DATAMODEL
28638 	/* To support ILP32 applications in an LP64 world */
28639 	struct cdrom_read32	cdrom_read32;
28640 	struct cdrom_read32	*cdrd32 = &cdrom_read32;
28641 #endif /* _MULTI_DATAMODEL */
28642 
28643 	if (data == NULL) {
28644 		return (EINVAL);
28645 	}
28646 
28647 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
28648 	    (un->un_state == SD_STATE_OFFLINE)) {
28649 		return (ENXIO);
28650 	}
28651 
28652 	/*
28653 	 * Because this routine will update the device and driver block size
28654 	 * being used we want to make sure there are no commands in progress.
28655 	 * If commands are in progress the user will have to try again.
28656 	 *
28657 	 * We check for 1 instead of 0 because we increment un_ncmds_in_driver
28658 	 * in sdioctl to protect commands from sdioctl through to the top of
28659 	 * sd_uscsi_strategy. See sdioctl for details.
28660 	 */
28661 	mutex_enter(SD_MUTEX(un));
28662 	if (un->un_ncmds_in_driver != 1) {
28663 		mutex_exit(SD_MUTEX(un));
28664 		return (EAGAIN);
28665 	}
28666 	mutex_exit(SD_MUTEX(un));
28667 
28668 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
28669 	    "sd_read_mode2: entry: un:0x%p\n", un);
28670 
28671 #ifdef _MULTI_DATAMODEL
28672 	switch (ddi_model_convert_from(flag & FMODELS)) {
28673 	case DDI_MODEL_ILP32:
28674 		if (ddi_copyin(data, cdrd32, sizeof (*cdrd32), flag) != 0) {
28675 			return (EFAULT);
28676 		}
28677 		/* Convert the ILP32 uscsi data from the application to LP64 */
28678 		cdrom_read32tocdrom_read(cdrd32, mode2);
28679 		break;
28680 	case DDI_MODEL_NONE:
28681 		if (ddi_copyin(data, mode2, sizeof (*mode2), flag) != 0) {
28682 			return (EFAULT);
28683 		}
28684 		break;
28685 	}
28686 #else /* ! _MULTI_DATAMODEL */
28687 	if (ddi_copyin(data, mode2, sizeof (*mode2), flag)) {
28688 		return (EFAULT);
28689 	}
28690 #endif /* _MULTI_DATAMODEL */
28691 
28692 	/* Store the current target block size for restoration later */
28693 	restore_blksize = un->un_tgt_blocksize;
28694 
28695 	/* Change the device and soft state target block size to 2336 */
28696 	if (sr_sector_mode(dev, SD_MODE2_BLKSIZE) != 0) {
28697 		rval = EIO;
28698 		goto done;
28699 	}
28700 
28701 
28702 	bzero(cdb, sizeof (cdb));
28703 
28704 	/* set READ operation */
28705 	cdb[0] = SCMD_READ;
28706 
28707 	/* adjust lba for 2kbyte blocks from 512 byte blocks */
28708 	mode2->cdread_lba >>= 2;
28709 
28710 	/* set the start address */
28711 	cdb[1] = (uchar_t)((mode2->cdread_lba >> 16) & 0X1F);
28712 	cdb[2] = (uchar_t)((mode2->cdread_lba >> 8) & 0xFF);
28713 	cdb[3] = (uchar_t)(mode2->cdread_lba & 0xFF);
28714 
28715 	/* set the transfer length */
28716 	nblocks = mode2->cdread_buflen / 2336;
28717 	cdb[4] = (uchar_t)nblocks & 0xFF;
28718 
28719 	/* build command */
28720 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
28721 	com->uscsi_cdb = (caddr_t)cdb;
28722 	com->uscsi_cdblen = sizeof (cdb);
28723 	com->uscsi_bufaddr = mode2->cdread_bufaddr;
28724 	com->uscsi_buflen = mode2->cdread_buflen;
28725 	com->uscsi_flags = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
28726 
28727 	/*
28728 	 * Issue SCSI command with user space address for read buffer.
28729 	 *
28730 	 * This sends the command through main channel in the driver.
28731 	 *
28732 	 * Since this is accessed via an IOCTL call, we go through the
28733 	 * standard path, so that if the device was powered down, then
28734 	 * it would be 'awakened' to handle the command.
28735 	 */
28736 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_USERSPACE,
28737 	    UIO_SYSSPACE, SD_PATH_STANDARD);
28738 
28739 	kmem_free(com, sizeof (*com));
28740 
28741 	/* Restore the device and soft state target block size */
28742 	if (sr_sector_mode(dev, restore_blksize) != 0) {
28743 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
28744 		    "can't do switch back to mode 1\n");
28745 		/*
28746 		 * If sd_send_scsi_READ succeeded we still need to report
28747 		 * an error because we failed to reset the block size
28748 		 */
28749 		if (rval == 0) {
28750 			rval = EIO;
28751 		}
28752 	}
28753 
28754 done:
28755 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
28756 	    "sd_read_mode2: exit: un:0x%p\n", un);
28757 
28758 	return (rval);
28759 }
28760 
28761 
28762 /*
28763  *    Function: sr_sector_mode()
28764  *
28765  * Description: This utility function is used by sr_read_mode2 to set the target
28766  *		block size based on the user specified size. This is a legacy
28767  *		implementation based upon a vendor specific mode page
28768  *
28769  *   Arguments: dev	- the device 'dev_t'
28770  *		data	- flag indicating if block size is being set to 2336 or
28771  *			  512.
28772  *
28773  * Return Code: the code returned by sd_send_scsi_cmd()
28774  *		EFAULT if ddi_copyxxx() fails
28775  *		ENXIO if fail ddi_get_soft_state
28776  *		EINVAL if data pointer is NULL
28777  */
28778 
28779 static int
28780 sr_sector_mode(dev_t dev, uint32_t blksize)
28781 {
28782 	struct sd_lun	*un;
28783 	uchar_t		*sense;
28784 	uchar_t		*select;
28785 	int		rval;
28786 
28787 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
28788 	    (un->un_state == SD_STATE_OFFLINE)) {
28789 		return (ENXIO);
28790 	}
28791 
28792 	sense = kmem_zalloc(20, KM_SLEEP);
28793 
28794 	/* Note: This is a vendor specific mode page (0x81) */
28795 	if ((rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, sense, 20, 0x81,
28796 	    SD_PATH_STANDARD)) != 0) {
28797 		SD_ERROR(SD_LOG_IOCTL_RMMEDIA, un,
28798 		    "sr_sector_mode: Mode Sense failed\n");
28799 		kmem_free(sense, 20);
28800 		return (rval);
28801 	}
28802 	select = kmem_zalloc(20, KM_SLEEP);
28803 	select[3] = 0x08;
28804 	select[10] = ((blksize >> 8) & 0xff);
28805 	select[11] = (blksize & 0xff);
28806 	select[12] = 0x01;
28807 	select[13] = 0x06;
28808 	select[14] = sense[14];
28809 	select[15] = sense[15];
28810 	if (blksize == SD_MODE2_BLKSIZE) {
28811 		select[14] |= 0x01;
28812 	}
28813 
28814 	if ((rval = sd_send_scsi_MODE_SELECT(un, CDB_GROUP0, select, 20,
28815 	    SD_DONTSAVE_PAGE, SD_PATH_STANDARD)) != 0) {
28816 		SD_ERROR(SD_LOG_IOCTL_RMMEDIA, un,
28817 		    "sr_sector_mode: Mode Select failed\n");
28818 	} else {
28819 		/*
28820 		 * Only update the softstate block size if we successfully
28821 		 * changed the device block mode.
28822 		 */
28823 		mutex_enter(SD_MUTEX(un));
28824 		sd_update_block_info(un, blksize, 0);
28825 		mutex_exit(SD_MUTEX(un));
28826 	}
28827 	kmem_free(sense, 20);
28828 	kmem_free(select, 20);
28829 	return (rval);
28830 }
28831 
28832 
28833 /*
28834  *    Function: sr_read_cdda()
28835  *
28836  * Description: This routine is the driver entry point for handling CD-ROM
28837  *		ioctl requests to return CD-DA or subcode data. (CDROMCDDA) If
28838  *		the target supports CDDA these requests are handled via a vendor
28839  *		specific command (0xD8) If the target does not support CDDA
28840  *		these requests are handled via the READ CD command (0xBE).
28841  *
28842  *   Arguments: dev	- the device 'dev_t'
28843  *		data	- pointer to user provided CD-DA structure specifying
28844  *			  the track starting address, transfer length, and
28845  *			  subcode options.
28846  *		flag	- this argument is a pass through to ddi_copyxxx()
28847  *			  directly from the mode argument of ioctl().
28848  *
28849  * Return Code: the code returned by sd_send_scsi_cmd()
28850  *		EFAULT if ddi_copyxxx() fails
28851  *		ENXIO if fail ddi_get_soft_state
28852  *		EINVAL if invalid arguments are provided
28853  *		ENOTTY
28854  */
28855 
28856 static int
28857 sr_read_cdda(dev_t dev, caddr_t data, int flag)
28858 {
28859 	struct sd_lun			*un;
28860 	struct uscsi_cmd		*com;
28861 	struct cdrom_cdda		*cdda;
28862 	int				rval;
28863 	size_t				buflen;
28864 	char				cdb[CDB_GROUP5];
28865 
28866 #ifdef _MULTI_DATAMODEL
28867 	/* To support ILP32 applications in an LP64 world */
28868 	struct cdrom_cdda32	cdrom_cdda32;
28869 	struct cdrom_cdda32	*cdda32 = &cdrom_cdda32;
28870 #endif /* _MULTI_DATAMODEL */
28871 
28872 	if (data == NULL) {
28873 		return (EINVAL);
28874 	}
28875 
28876 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
28877 		return (ENXIO);
28878 	}
28879 
28880 	cdda = kmem_zalloc(sizeof (struct cdrom_cdda), KM_SLEEP);
28881 
28882 #ifdef _MULTI_DATAMODEL
28883 	switch (ddi_model_convert_from(flag & FMODELS)) {
28884 	case DDI_MODEL_ILP32:
28885 		if (ddi_copyin(data, cdda32, sizeof (*cdda32), flag)) {
28886 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
28887 			    "sr_read_cdda: ddi_copyin Failed\n");
28888 			kmem_free(cdda, sizeof (struct cdrom_cdda));
28889 			return (EFAULT);
28890 		}
28891 		/* Convert the ILP32 uscsi data from the application to LP64 */
28892 		cdrom_cdda32tocdrom_cdda(cdda32, cdda);
28893 		break;
28894 	case DDI_MODEL_NONE:
28895 		if (ddi_copyin(data, cdda, sizeof (struct cdrom_cdda), flag)) {
28896 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
28897 			    "sr_read_cdda: ddi_copyin Failed\n");
28898 			kmem_free(cdda, sizeof (struct cdrom_cdda));
28899 			return (EFAULT);
28900 		}
28901 		break;
28902 	}
28903 #else /* ! _MULTI_DATAMODEL */
28904 	if (ddi_copyin(data, cdda, sizeof (struct cdrom_cdda), flag)) {
28905 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
28906 		    "sr_read_cdda: ddi_copyin Failed\n");
28907 		kmem_free(cdda, sizeof (struct cdrom_cdda));
28908 		return (EFAULT);
28909 	}
28910 #endif /* _MULTI_DATAMODEL */
28911 
28912 	/*
28913 	 * Since MMC-2 expects max 3 bytes for length, check if the
28914 	 * length input is greater than 3 bytes
28915 	 */
28916 	if ((cdda->cdda_length & 0xFF000000) != 0) {
28917 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN, "sr_read_cdda: "
28918 		    "cdrom transfer length too large: %d (limit %d)\n",
28919 		    cdda->cdda_length, 0xFFFFFF);
28920 		kmem_free(cdda, sizeof (struct cdrom_cdda));
28921 		return (EINVAL);
28922 	}
28923 
28924 	switch (cdda->cdda_subcode) {
28925 	case CDROM_DA_NO_SUBCODE:
28926 		buflen = CDROM_BLK_2352 * cdda->cdda_length;
28927 		break;
28928 	case CDROM_DA_SUBQ:
28929 		buflen = CDROM_BLK_2368 * cdda->cdda_length;
28930 		break;
28931 	case CDROM_DA_ALL_SUBCODE:
28932 		buflen = CDROM_BLK_2448 * cdda->cdda_length;
28933 		break;
28934 	case CDROM_DA_SUBCODE_ONLY:
28935 		buflen = CDROM_BLK_SUBCODE * cdda->cdda_length;
28936 		break;
28937 	default:
28938 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
28939 		    "sr_read_cdda: Subcode '0x%x' Not Supported\n",
28940 		    cdda->cdda_subcode);
28941 		kmem_free(cdda, sizeof (struct cdrom_cdda));
28942 		return (EINVAL);
28943 	}
28944 
28945 	/* Build and send the command */
28946 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
28947 	bzero(cdb, CDB_GROUP5);
28948 
28949 	if (un->un_f_cfg_cdda == TRUE) {
28950 		cdb[0] = (char)SCMD_READ_CD;
28951 		cdb[1] = 0x04;
28952 		cdb[2] = (((cdda->cdda_addr) & 0xff000000) >> 24);
28953 		cdb[3] = (((cdda->cdda_addr) & 0x00ff0000) >> 16);
28954 		cdb[4] = (((cdda->cdda_addr) & 0x0000ff00) >> 8);
28955 		cdb[5] = ((cdda->cdda_addr) & 0x000000ff);
28956 		cdb[6] = (((cdda->cdda_length) & 0x00ff0000) >> 16);
28957 		cdb[7] = (((cdda->cdda_length) & 0x0000ff00) >> 8);
28958 		cdb[8] = ((cdda->cdda_length) & 0x000000ff);
28959 		cdb[9] = 0x10;
28960 		switch (cdda->cdda_subcode) {
28961 		case CDROM_DA_NO_SUBCODE :
28962 			cdb[10] = 0x0;
28963 			break;
28964 		case CDROM_DA_SUBQ :
28965 			cdb[10] = 0x2;
28966 			break;
28967 		case CDROM_DA_ALL_SUBCODE :
28968 			cdb[10] = 0x1;
28969 			break;
28970 		case CDROM_DA_SUBCODE_ONLY :
28971 			/* FALLTHROUGH */
28972 		default :
28973 			kmem_free(cdda, sizeof (struct cdrom_cdda));
28974 			kmem_free(com, sizeof (*com));
28975 			return (ENOTTY);
28976 		}
28977 	} else {
28978 		cdb[0] = (char)SCMD_READ_CDDA;
28979 		cdb[2] = (((cdda->cdda_addr) & 0xff000000) >> 24);
28980 		cdb[3] = (((cdda->cdda_addr) & 0x00ff0000) >> 16);
28981 		cdb[4] = (((cdda->cdda_addr) & 0x0000ff00) >> 8);
28982 		cdb[5] = ((cdda->cdda_addr) & 0x000000ff);
28983 		cdb[6] = (((cdda->cdda_length) & 0xff000000) >> 24);
28984 		cdb[7] = (((cdda->cdda_length) & 0x00ff0000) >> 16);
28985 		cdb[8] = (((cdda->cdda_length) & 0x0000ff00) >> 8);
28986 		cdb[9] = ((cdda->cdda_length) & 0x000000ff);
28987 		cdb[10] = cdda->cdda_subcode;
28988 	}
28989 
28990 	com->uscsi_cdb = cdb;
28991 	com->uscsi_cdblen = CDB_GROUP5;
28992 	com->uscsi_bufaddr = (caddr_t)cdda->cdda_data;
28993 	com->uscsi_buflen = buflen;
28994 	com->uscsi_flags = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
28995 
28996 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_USERSPACE,
28997 	    UIO_SYSSPACE, SD_PATH_STANDARD);
28998 
28999 	kmem_free(cdda, sizeof (struct cdrom_cdda));
29000 	kmem_free(com, sizeof (*com));
29001 	return (rval);
29002 }
29003 
29004 
29005 /*
29006  *    Function: sr_read_cdxa()
29007  *
29008  * Description: This routine is the driver entry point for handling CD-ROM
29009  *		ioctl requests to return CD-XA (Extended Architecture) data.
29010  *		(CDROMCDXA).
29011  *
29012  *   Arguments: dev	- the device 'dev_t'
29013  *		data	- pointer to user provided CD-XA structure specifying
29014  *			  the data starting address, transfer length, and format
29015  *		flag	- this argument is a pass through to ddi_copyxxx()
29016  *			  directly from the mode argument of ioctl().
29017  *
29018  * Return Code: the code returned by sd_send_scsi_cmd()
29019  *		EFAULT if ddi_copyxxx() fails
29020  *		ENXIO if fail ddi_get_soft_state
29021  *		EINVAL if data pointer is NULL
29022  */
29023 
29024 static int
29025 sr_read_cdxa(dev_t dev, caddr_t data, int flag)
29026 {
29027 	struct sd_lun		*un;
29028 	struct uscsi_cmd	*com;
29029 	struct cdrom_cdxa	*cdxa;
29030 	int			rval;
29031 	size_t			buflen;
29032 	char			cdb[CDB_GROUP5];
29033 	uchar_t			read_flags;
29034 
29035 #ifdef _MULTI_DATAMODEL
29036 	/* To support ILP32 applications in an LP64 world */
29037 	struct cdrom_cdxa32		cdrom_cdxa32;
29038 	struct cdrom_cdxa32		*cdxa32 = &cdrom_cdxa32;
29039 #endif /* _MULTI_DATAMODEL */
29040 
29041 	if (data == NULL) {
29042 		return (EINVAL);
29043 	}
29044 
29045 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
29046 		return (ENXIO);
29047 	}
29048 
29049 	cdxa = kmem_zalloc(sizeof (struct cdrom_cdxa), KM_SLEEP);
29050 
29051 #ifdef _MULTI_DATAMODEL
29052 	switch (ddi_model_convert_from(flag & FMODELS)) {
29053 	case DDI_MODEL_ILP32:
29054 		if (ddi_copyin(data, cdxa32, sizeof (*cdxa32), flag)) {
29055 			kmem_free(cdxa, sizeof (struct cdrom_cdxa));
29056 			return (EFAULT);
29057 		}
29058 		/*
29059 		 * Convert the ILP32 uscsi data from the
29060 		 * application to LP64 for internal use.
29061 		 */
29062 		cdrom_cdxa32tocdrom_cdxa(cdxa32, cdxa);
29063 		break;
29064 	case DDI_MODEL_NONE:
29065 		if (ddi_copyin(data, cdxa, sizeof (struct cdrom_cdxa), flag)) {
29066 			kmem_free(cdxa, sizeof (struct cdrom_cdxa));
29067 			return (EFAULT);
29068 		}
29069 		break;
29070 	}
29071 #else /* ! _MULTI_DATAMODEL */
29072 	if (ddi_copyin(data, cdxa, sizeof (struct cdrom_cdxa), flag)) {
29073 		kmem_free(cdxa, sizeof (struct cdrom_cdxa));
29074 		return (EFAULT);
29075 	}
29076 #endif /* _MULTI_DATAMODEL */
29077 
29078 	/*
29079 	 * Since MMC-2 expects max 3 bytes for length, check if the
29080 	 * length input is greater than 3 bytes
29081 	 */
29082 	if ((cdxa->cdxa_length & 0xFF000000) != 0) {
29083 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN, "sr_read_cdxa: "
29084 		    "cdrom transfer length too large: %d (limit %d)\n",
29085 		    cdxa->cdxa_length, 0xFFFFFF);
29086 		kmem_free(cdxa, sizeof (struct cdrom_cdxa));
29087 		return (EINVAL);
29088 	}
29089 
29090 	switch (cdxa->cdxa_format) {
29091 	case CDROM_XA_DATA:
29092 		buflen = CDROM_BLK_2048 * cdxa->cdxa_length;
29093 		read_flags = 0x10;
29094 		break;
29095 	case CDROM_XA_SECTOR_DATA:
29096 		buflen = CDROM_BLK_2352 * cdxa->cdxa_length;
29097 		read_flags = 0xf8;
29098 		break;
29099 	case CDROM_XA_DATA_W_ERROR:
29100 		buflen = CDROM_BLK_2646 * cdxa->cdxa_length;
29101 		read_flags = 0xfc;
29102 		break;
29103 	default:
29104 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
29105 		    "sr_read_cdxa: Format '0x%x' Not Supported\n",
29106 		    cdxa->cdxa_format);
29107 		kmem_free(cdxa, sizeof (struct cdrom_cdxa));
29108 		return (EINVAL);
29109 	}
29110 
29111 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
29112 	bzero(cdb, CDB_GROUP5);
29113 	if (un->un_f_mmc_cap == TRUE) {
29114 		cdb[0] = (char)SCMD_READ_CD;
29115 		cdb[2] = (((cdxa->cdxa_addr) & 0xff000000) >> 24);
29116 		cdb[3] = (((cdxa->cdxa_addr) & 0x00ff0000) >> 16);
29117 		cdb[4] = (((cdxa->cdxa_addr) & 0x0000ff00) >> 8);
29118 		cdb[5] = ((cdxa->cdxa_addr) & 0x000000ff);
29119 		cdb[6] = (((cdxa->cdxa_length) & 0x00ff0000) >> 16);
29120 		cdb[7] = (((cdxa->cdxa_length) & 0x0000ff00) >> 8);
29121 		cdb[8] = ((cdxa->cdxa_length) & 0x000000ff);
29122 		cdb[9] = (char)read_flags;
29123 	} else {
29124 		/*
29125 		 * Note: A vendor specific command (0xDB) is being used her to
29126 		 * request a read of all subcodes.
29127 		 */
29128 		cdb[0] = (char)SCMD_READ_CDXA;
29129 		cdb[2] = (((cdxa->cdxa_addr) & 0xff000000) >> 24);
29130 		cdb[3] = (((cdxa->cdxa_addr) & 0x00ff0000) >> 16);
29131 		cdb[4] = (((cdxa->cdxa_addr) & 0x0000ff00) >> 8);
29132 		cdb[5] = ((cdxa->cdxa_addr) & 0x000000ff);
29133 		cdb[6] = (((cdxa->cdxa_length) & 0xff000000) >> 24);
29134 		cdb[7] = (((cdxa->cdxa_length) & 0x00ff0000) >> 16);
29135 		cdb[8] = (((cdxa->cdxa_length) & 0x0000ff00) >> 8);
29136 		cdb[9] = ((cdxa->cdxa_length) & 0x000000ff);
29137 		cdb[10] = cdxa->cdxa_format;
29138 	}
29139 	com->uscsi_cdb	   = cdb;
29140 	com->uscsi_cdblen  = CDB_GROUP5;
29141 	com->uscsi_bufaddr = (caddr_t)cdxa->cdxa_data;
29142 	com->uscsi_buflen  = buflen;
29143 	com->uscsi_flags   = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
29144 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_USERSPACE,
29145 	    UIO_SYSSPACE, SD_PATH_STANDARD);
29146 	kmem_free(cdxa, sizeof (struct cdrom_cdxa));
29147 	kmem_free(com, sizeof (*com));
29148 	return (rval);
29149 }
29150 
29151 
29152 /*
29153  *    Function: sr_eject()
29154  *
29155  * Description: This routine is the driver entry point for handling CD-ROM
29156  *		eject ioctl requests (FDEJECT, DKIOCEJECT, CDROMEJECT)
29157  *
29158  *   Arguments: dev	- the device 'dev_t'
29159  *
29160  * Return Code: the code returned by sd_send_scsi_cmd()
29161  */
29162 
29163 static int
29164 sr_eject(dev_t dev)
29165 {
29166 	struct sd_lun	*un;
29167 	int		rval;
29168 
29169 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
29170 	    (un->un_state == SD_STATE_OFFLINE)) {
29171 		return (ENXIO);
29172 	}
29173 	if ((rval = sd_send_scsi_DOORLOCK(un, SD_REMOVAL_ALLOW,
29174 	    SD_PATH_STANDARD)) != 0) {
29175 		return (rval);
29176 	}
29177 
29178 	rval = sd_send_scsi_START_STOP_UNIT(un, SD_TARGET_EJECT,
29179 	    SD_PATH_STANDARD);
29180 
29181 	if (rval == 0) {
29182 		mutex_enter(SD_MUTEX(un));
29183 		sr_ejected(un);
29184 		un->un_mediastate = DKIO_EJECTED;
29185 		cv_broadcast(&un->un_state_cv);
29186 		mutex_exit(SD_MUTEX(un));
29187 	}
29188 	return (rval);
29189 }
29190 
29191 
29192 /*
29193  *    Function: sr_ejected()
29194  *
29195  * Description: This routine updates the soft state structure to invalidate the
29196  *		geometry information after the media has been ejected or a
29197  *		media eject has been detected.
29198  *
29199  *   Arguments: un - driver soft state (unit) structure
29200  */
29201 
29202 static void
29203 sr_ejected(struct sd_lun *un)
29204 {
29205 	struct sd_errstats *stp;
29206 
29207 	ASSERT(un != NULL);
29208 	ASSERT(mutex_owned(SD_MUTEX(un)));
29209 
29210 	un->un_f_blockcount_is_valid	= FALSE;
29211 	un->un_f_tgt_blocksize_is_valid	= FALSE;
29212 	un->un_f_geometry_is_valid	= FALSE;
29213 
29214 	if (un->un_errstats != NULL) {
29215 		stp = (struct sd_errstats *)un->un_errstats->ks_data;
29216 		stp->sd_capacity.value.ui64 = 0;
29217 	}
29218 }
29219 
29220 
29221 /*
29222  *    Function: sr_check_wp()
29223  *
29224  * Description: This routine checks the write protection of a removable
29225  *      media disk and hotpluggable devices via the write protect bit of
29226  *      the Mode Page Header device specific field. Some devices choke
29227  *      on unsupported mode page. In order to workaround this issue,
29228  *      this routine has been implemented to use 0x3f mode page(request
29229  *      for all pages) for all device types.
29230  *
29231  *   Arguments: dev		- the device 'dev_t'
29232  *
29233  * Return Code: int indicating if the device is write protected (1) or not (0)
29234  *
29235  *     Context: Kernel thread.
29236  *
29237  */
29238 
29239 static int
29240 sr_check_wp(dev_t dev)
29241 {
29242 	struct sd_lun	*un;
29243 	uchar_t		device_specific;
29244 	uchar_t		*sense;
29245 	int		hdrlen;
29246 	int		rval = FALSE;
29247 
29248 	/*
29249 	 * Note: The return codes for this routine should be reworked to
29250 	 * properly handle the case of a NULL softstate.
29251 	 */
29252 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
29253 		return (FALSE);
29254 	}
29255 
29256 	if (un->un_f_cfg_is_atapi == TRUE) {
29257 		/*
29258 		 * The mode page contents are not required; set the allocation
29259 		 * length for the mode page header only
29260 		 */
29261 		hdrlen = MODE_HEADER_LENGTH_GRP2;
29262 		sense = kmem_zalloc(hdrlen, KM_SLEEP);
29263 		if (sd_send_scsi_MODE_SENSE(un, CDB_GROUP1, sense, hdrlen,
29264 		    MODEPAGE_ALLPAGES, SD_PATH_STANDARD) != 0)
29265 			goto err_exit;
29266 		device_specific =
29267 		    ((struct mode_header_grp2 *)sense)->device_specific;
29268 	} else {
29269 		hdrlen = MODE_HEADER_LENGTH;
29270 		sense = kmem_zalloc(hdrlen, KM_SLEEP);
29271 		if (sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, sense, hdrlen,
29272 		    MODEPAGE_ALLPAGES, SD_PATH_STANDARD) != 0)
29273 			goto err_exit;
29274 		device_specific =
29275 		    ((struct mode_header *)sense)->device_specific;
29276 	}
29277 
29278 	/*
29279 	 * Write protect mode sense failed; not all disks
29280 	 * understand this query. Return FALSE assuming that
29281 	 * these devices are not writable.
29282 	 */
29283 	if (device_specific & WRITE_PROTECT) {
29284 		rval = TRUE;
29285 	}
29286 
29287 err_exit:
29288 	kmem_free(sense, hdrlen);
29289 	return (rval);
29290 }
29291 
29292 /*
29293  *    Function: sr_volume_ctrl()
29294  *
29295  * Description: This routine is the driver entry point for handling CD-ROM
29296  *		audio output volume ioctl requests. (CDROMVOLCTRL)
29297  *
29298  *   Arguments: dev	- the device 'dev_t'
29299  *		data	- pointer to user audio volume control structure
29300  *		flag	- this argument is a pass through to ddi_copyxxx()
29301  *			  directly from the mode argument of ioctl().
29302  *
29303  * Return Code: the code returned by sd_send_scsi_cmd()
29304  *		EFAULT if ddi_copyxxx() fails
29305  *		ENXIO if fail ddi_get_soft_state
29306  *		EINVAL if data pointer is NULL
29307  *
29308  */
29309 
29310 static int
29311 sr_volume_ctrl(dev_t dev, caddr_t data, int flag)
29312 {
29313 	struct sd_lun		*un;
29314 	struct cdrom_volctrl    volume;
29315 	struct cdrom_volctrl    *vol = &volume;
29316 	uchar_t			*sense_page;
29317 	uchar_t			*select_page;
29318 	uchar_t			*sense;
29319 	uchar_t			*select;
29320 	int			sense_buflen;
29321 	int			select_buflen;
29322 	int			rval;
29323 
29324 	if (data == NULL) {
29325 		return (EINVAL);
29326 	}
29327 
29328 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
29329 	    (un->un_state == SD_STATE_OFFLINE)) {
29330 		return (ENXIO);
29331 	}
29332 
29333 	if (ddi_copyin(data, vol, sizeof (struct cdrom_volctrl), flag)) {
29334 		return (EFAULT);
29335 	}
29336 
29337 	if ((un->un_f_cfg_is_atapi == TRUE) || (un->un_f_mmc_cap == TRUE)) {
29338 		struct mode_header_grp2		*sense_mhp;
29339 		struct mode_header_grp2		*select_mhp;
29340 		int				bd_len;
29341 
29342 		sense_buflen = MODE_PARAM_LENGTH_GRP2 + MODEPAGE_AUDIO_CTRL_LEN;
29343 		select_buflen = MODE_HEADER_LENGTH_GRP2 +
29344 		    MODEPAGE_AUDIO_CTRL_LEN;
29345 		sense  = kmem_zalloc(sense_buflen, KM_SLEEP);
29346 		select = kmem_zalloc(select_buflen, KM_SLEEP);
29347 		if ((rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP1, sense,
29348 		    sense_buflen, MODEPAGE_AUDIO_CTRL,
29349 		    SD_PATH_STANDARD)) != 0) {
29350 			SD_ERROR(SD_LOG_IOCTL_RMMEDIA, un,
29351 			    "sr_volume_ctrl: Mode Sense Failed\n");
29352 			kmem_free(sense, sense_buflen);
29353 			kmem_free(select, select_buflen);
29354 			return (rval);
29355 		}
29356 		sense_mhp = (struct mode_header_grp2 *)sense;
29357 		select_mhp = (struct mode_header_grp2 *)select;
29358 		bd_len = (sense_mhp->bdesc_length_hi << 8) |
29359 		    sense_mhp->bdesc_length_lo;
29360 		if (bd_len > MODE_BLK_DESC_LENGTH) {
29361 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
29362 			    "sr_volume_ctrl: Mode Sense returned invalid "
29363 			    "block descriptor length\n");
29364 			kmem_free(sense, sense_buflen);
29365 			kmem_free(select, select_buflen);
29366 			return (EIO);
29367 		}
29368 		sense_page = (uchar_t *)
29369 		    (sense + MODE_HEADER_LENGTH_GRP2 + bd_len);
29370 		select_page = (uchar_t *)(select + MODE_HEADER_LENGTH_GRP2);
29371 		select_mhp->length_msb = 0;
29372 		select_mhp->length_lsb = 0;
29373 		select_mhp->bdesc_length_hi = 0;
29374 		select_mhp->bdesc_length_lo = 0;
29375 	} else {
29376 		struct mode_header		*sense_mhp, *select_mhp;
29377 
29378 		sense_buflen = MODE_PARAM_LENGTH + MODEPAGE_AUDIO_CTRL_LEN;
29379 		select_buflen = MODE_HEADER_LENGTH + MODEPAGE_AUDIO_CTRL_LEN;
29380 		sense  = kmem_zalloc(sense_buflen, KM_SLEEP);
29381 		select = kmem_zalloc(select_buflen, KM_SLEEP);
29382 		if ((rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, sense,
29383 		    sense_buflen, MODEPAGE_AUDIO_CTRL,
29384 		    SD_PATH_STANDARD)) != 0) {
29385 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
29386 			    "sr_volume_ctrl: Mode Sense Failed\n");
29387 			kmem_free(sense, sense_buflen);
29388 			kmem_free(select, select_buflen);
29389 			return (rval);
29390 		}
29391 		sense_mhp  = (struct mode_header *)sense;
29392 		select_mhp = (struct mode_header *)select;
29393 		if (sense_mhp->bdesc_length > MODE_BLK_DESC_LENGTH) {
29394 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
29395 			    "sr_volume_ctrl: Mode Sense returned invalid "
29396 			    "block descriptor length\n");
29397 			kmem_free(sense, sense_buflen);
29398 			kmem_free(select, select_buflen);
29399 			return (EIO);
29400 		}
29401 		sense_page = (uchar_t *)
29402 		    (sense + MODE_HEADER_LENGTH + sense_mhp->bdesc_length);
29403 		select_page = (uchar_t *)(select + MODE_HEADER_LENGTH);
29404 		select_mhp->length = 0;
29405 		select_mhp->bdesc_length = 0;
29406 	}
29407 	/*
29408 	 * Note: An audio control data structure could be created and overlayed
29409 	 * on the following in place of the array indexing method implemented.
29410 	 */
29411 
29412 	/* Build the select data for the user volume data */
29413 	select_page[0] = MODEPAGE_AUDIO_CTRL;
29414 	select_page[1] = 0xE;
29415 	/* Set the immediate bit */
29416 	select_page[2] = 0x04;
29417 	/* Zero out reserved fields */
29418 	select_page[3] = 0x00;
29419 	select_page[4] = 0x00;
29420 	/* Return sense data for fields not to be modified */
29421 	select_page[5] = sense_page[5];
29422 	select_page[6] = sense_page[6];
29423 	select_page[7] = sense_page[7];
29424 	/* Set the user specified volume levels for channel 0 and 1 */
29425 	select_page[8] = 0x01;
29426 	select_page[9] = vol->channel0;
29427 	select_page[10] = 0x02;
29428 	select_page[11] = vol->channel1;
29429 	/* Channel 2 and 3 are currently unsupported so return the sense data */
29430 	select_page[12] = sense_page[12];
29431 	select_page[13] = sense_page[13];
29432 	select_page[14] = sense_page[14];
29433 	select_page[15] = sense_page[15];
29434 
29435 	if ((un->un_f_cfg_is_atapi == TRUE) || (un->un_f_mmc_cap == TRUE)) {
29436 		rval = sd_send_scsi_MODE_SELECT(un, CDB_GROUP1, select,
29437 		    select_buflen, SD_DONTSAVE_PAGE, SD_PATH_STANDARD);
29438 	} else {
29439 		rval = sd_send_scsi_MODE_SELECT(un, CDB_GROUP0, select,
29440 		    select_buflen, SD_DONTSAVE_PAGE, SD_PATH_STANDARD);
29441 	}
29442 
29443 	kmem_free(sense, sense_buflen);
29444 	kmem_free(select, select_buflen);
29445 	return (rval);
29446 }
29447 
29448 
29449 /*
29450  *    Function: sr_read_sony_session_offset()
29451  *
29452  * Description: This routine is the driver entry point for handling CD-ROM
29453  *		ioctl requests for session offset information. (CDROMREADOFFSET)
29454  *		The address of the first track in the last session of a
29455  *		multi-session CD-ROM is returned
29456  *
29457  *		Note: This routine uses a vendor specific key value in the
29458  *		command control field without implementing any vendor check here
29459  *		or in the ioctl routine.
29460  *
29461  *   Arguments: dev	- the device 'dev_t'
29462  *		data	- pointer to an int to hold the requested address
29463  *		flag	- this argument is a pass through to ddi_copyxxx()
29464  *			  directly from the mode argument of ioctl().
29465  *
29466  * Return Code: the code returned by sd_send_scsi_cmd()
29467  *		EFAULT if ddi_copyxxx() fails
29468  *		ENXIO if fail ddi_get_soft_state
29469  *		EINVAL if data pointer is NULL
29470  */
29471 
29472 static int
29473 sr_read_sony_session_offset(dev_t dev, caddr_t data, int flag)
29474 {
29475 	struct sd_lun		*un;
29476 	struct uscsi_cmd	*com;
29477 	caddr_t			buffer;
29478 	char			cdb[CDB_GROUP1];
29479 	int			session_offset = 0;
29480 	int			rval;
29481 
29482 	if (data == NULL) {
29483 		return (EINVAL);
29484 	}
29485 
29486 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
29487 	    (un->un_state == SD_STATE_OFFLINE)) {
29488 		return (ENXIO);
29489 	}
29490 
29491 	buffer = kmem_zalloc((size_t)SONY_SESSION_OFFSET_LEN, KM_SLEEP);
29492 	bzero(cdb, CDB_GROUP1);
29493 	cdb[0] = SCMD_READ_TOC;
29494 	/*
29495 	 * Bytes 7 & 8 are the 12 byte allocation length for a single entry.
29496 	 * (4 byte TOC response header + 8 byte response data)
29497 	 */
29498 	cdb[8] = SONY_SESSION_OFFSET_LEN;
29499 	/* Byte 9 is the control byte. A vendor specific value is used */
29500 	cdb[9] = SONY_SESSION_OFFSET_KEY;
29501 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
29502 	com->uscsi_cdb = cdb;
29503 	com->uscsi_cdblen = CDB_GROUP1;
29504 	com->uscsi_bufaddr = buffer;
29505 	com->uscsi_buflen = SONY_SESSION_OFFSET_LEN;
29506 	com->uscsi_flags = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
29507 
29508 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
29509 	    UIO_SYSSPACE, SD_PATH_STANDARD);
29510 	if (rval != 0) {
29511 		kmem_free(buffer, SONY_SESSION_OFFSET_LEN);
29512 		kmem_free(com, sizeof (*com));
29513 		return (rval);
29514 	}
29515 	if (buffer[1] == SONY_SESSION_OFFSET_VALID) {
29516 		session_offset =
29517 		    ((uchar_t)buffer[8] << 24) + ((uchar_t)buffer[9] << 16) +
29518 		    ((uchar_t)buffer[10] << 8) + ((uchar_t)buffer[11]);
29519 		/*
29520 		 * Offset returned offset in current lbasize block's. Convert to
29521 		 * 2k block's to return to the user
29522 		 */
29523 		if (un->un_tgt_blocksize == CDROM_BLK_512) {
29524 			session_offset >>= 2;
29525 		} else if (un->un_tgt_blocksize == CDROM_BLK_1024) {
29526 			session_offset >>= 1;
29527 		}
29528 	}
29529 
29530 	if (ddi_copyout(&session_offset, data, sizeof (int), flag) != 0) {
29531 		rval = EFAULT;
29532 	}
29533 
29534 	kmem_free(buffer, SONY_SESSION_OFFSET_LEN);
29535 	kmem_free(com, sizeof (*com));
29536 	return (rval);
29537 }
29538 
29539 
29540 /*
29541  *    Function: sd_wm_cache_constructor()
29542  *
29543  * Description: Cache Constructor for the wmap cache for the read/modify/write
29544  * 		devices.
29545  *
29546  *   Arguments: wm      - A pointer to the sd_w_map to be initialized.
29547  *		un	- sd_lun structure for the device.
29548  *		flag	- the km flags passed to constructor
29549  *
29550  * Return Code: 0 on success.
29551  *		-1 on failure.
29552  */
29553 
29554 /*ARGSUSED*/
29555 static int
29556 sd_wm_cache_constructor(void *wm, void *un, int flags)
29557 {
29558 	bzero(wm, sizeof (struct sd_w_map));
29559 	cv_init(&((struct sd_w_map *)wm)->wm_avail, NULL, CV_DRIVER, NULL);
29560 	return (0);
29561 }
29562 
29563 
29564 /*
29565  *    Function: sd_wm_cache_destructor()
29566  *
29567  * Description: Cache destructor for the wmap cache for the read/modify/write
29568  * 		devices.
29569  *
29570  *   Arguments: wm      - A pointer to the sd_w_map to be initialized.
29571  *		un	- sd_lun structure for the device.
29572  */
29573 /*ARGSUSED*/
29574 static void
29575 sd_wm_cache_destructor(void *wm, void *un)
29576 {
29577 	cv_destroy(&((struct sd_w_map *)wm)->wm_avail);
29578 }
29579 
29580 
29581 /*
29582  *    Function: sd_range_lock()
29583  *
29584  * Description: Lock the range of blocks specified as parameter to ensure
29585  *		that read, modify write is atomic and no other i/o writes
29586  *		to the same location. The range is specified in terms
29587  *		of start and end blocks. Block numbers are the actual
29588  *		media block numbers and not system.
29589  *
29590  *   Arguments: un	- sd_lun structure for the device.
29591  *		startb - The starting block number
29592  *		endb - The end block number
29593  *		typ - type of i/o - simple/read_modify_write
29594  *
29595  * Return Code: wm  - pointer to the wmap structure.
29596  *
29597  *     Context: This routine can sleep.
29598  */
29599 
29600 static struct sd_w_map *
29601 sd_range_lock(struct sd_lun *un, daddr_t startb, daddr_t endb, ushort_t typ)
29602 {
29603 	struct sd_w_map *wmp = NULL;
29604 	struct sd_w_map *sl_wmp = NULL;
29605 	struct sd_w_map *tmp_wmp;
29606 	wm_state state = SD_WM_CHK_LIST;
29607 
29608 
29609 	ASSERT(un != NULL);
29610 	ASSERT(!mutex_owned(SD_MUTEX(un)));
29611 
29612 	mutex_enter(SD_MUTEX(un));
29613 
29614 	while (state != SD_WM_DONE) {
29615 
29616 		switch (state) {
29617 		case SD_WM_CHK_LIST:
29618 			/*
29619 			 * This is the starting state. Check the wmap list
29620 			 * to see if the range is currently available.
29621 			 */
29622 			if (!(typ & SD_WTYPE_RMW) && !(un->un_rmw_count)) {
29623 				/*
29624 				 * If this is a simple write and no rmw
29625 				 * i/o is pending then try to lock the
29626 				 * range as the range should be available.
29627 				 */
29628 				state = SD_WM_LOCK_RANGE;
29629 			} else {
29630 				tmp_wmp = sd_get_range(un, startb, endb);
29631 				if (tmp_wmp != NULL) {
29632 					if ((wmp != NULL) && ONLIST(un, wmp)) {
29633 						/*
29634 						 * Should not keep onlist wmps
29635 						 * while waiting this macro
29636 						 * will also do wmp = NULL;
29637 						 */
29638 						FREE_ONLIST_WMAP(un, wmp);
29639 					}
29640 					/*
29641 					 * sl_wmp is the wmap on which wait
29642 					 * is done, since the tmp_wmp points
29643 					 * to the inuse wmap, set sl_wmp to
29644 					 * tmp_wmp and change the state to sleep
29645 					 */
29646 					sl_wmp = tmp_wmp;
29647 					state = SD_WM_WAIT_MAP;
29648 				} else {
29649 					state = SD_WM_LOCK_RANGE;
29650 				}
29651 
29652 			}
29653 			break;
29654 
29655 		case SD_WM_LOCK_RANGE:
29656 			ASSERT(un->un_wm_cache);
29657 			/*
29658 			 * The range need to be locked, try to get a wmap.
29659 			 * First attempt it with NO_SLEEP, want to avoid a sleep
29660 			 * if possible as we will have to release the sd mutex
29661 			 * if we have to sleep.
29662 			 */
29663 			if (wmp == NULL)
29664 				wmp = kmem_cache_alloc(un->un_wm_cache,
29665 				    KM_NOSLEEP);
29666 			if (wmp == NULL) {
29667 				mutex_exit(SD_MUTEX(un));
29668 				_NOTE(DATA_READABLE_WITHOUT_LOCK
29669 				    (sd_lun::un_wm_cache))
29670 				wmp = kmem_cache_alloc(un->un_wm_cache,
29671 				    KM_SLEEP);
29672 				mutex_enter(SD_MUTEX(un));
29673 				/*
29674 				 * we released the mutex so recheck and go to
29675 				 * check list state.
29676 				 */
29677 				state = SD_WM_CHK_LIST;
29678 			} else {
29679 				/*
29680 				 * We exit out of state machine since we
29681 				 * have the wmap. Do the housekeeping first.
29682 				 * place the wmap on the wmap list if it is not
29683 				 * on it already and then set the state to done.
29684 				 */
29685 				wmp->wm_start = startb;
29686 				wmp->wm_end = endb;
29687 				wmp->wm_flags = typ | SD_WM_BUSY;
29688 				if (typ & SD_WTYPE_RMW) {
29689 					un->un_rmw_count++;
29690 				}
29691 				/*
29692 				 * If not already on the list then link
29693 				 */
29694 				if (!ONLIST(un, wmp)) {
29695 					wmp->wm_next = un->un_wm;
29696 					wmp->wm_prev = NULL;
29697 					if (wmp->wm_next)
29698 						wmp->wm_next->wm_prev = wmp;
29699 					un->un_wm = wmp;
29700 				}
29701 				state = SD_WM_DONE;
29702 			}
29703 			break;
29704 
29705 		case SD_WM_WAIT_MAP:
29706 			ASSERT(sl_wmp->wm_flags & SD_WM_BUSY);
29707 			/*
29708 			 * Wait is done on sl_wmp, which is set in the
29709 			 * check_list state.
29710 			 */
29711 			sl_wmp->wm_wanted_count++;
29712 			cv_wait(&sl_wmp->wm_avail, SD_MUTEX(un));
29713 			sl_wmp->wm_wanted_count--;
29714 			/*
29715 			 * We can reuse the memory from the completed sl_wmp
29716 			 * lock range for our new lock, but only if noone is
29717 			 * waiting for it.
29718 			 */
29719 			ASSERT(!(sl_wmp->wm_flags & SD_WM_BUSY));
29720 			if (sl_wmp->wm_wanted_count == 0) {
29721 				if (wmp != NULL)
29722 					CHK_N_FREEWMP(un, wmp);
29723 				wmp = sl_wmp;
29724 			}
29725 			sl_wmp = NULL;
29726 			/*
29727 			 * After waking up, need to recheck for availability of
29728 			 * range.
29729 			 */
29730 			state = SD_WM_CHK_LIST;
29731 			break;
29732 
29733 		default:
29734 			panic("sd_range_lock: "
29735 			    "Unknown state %d in sd_range_lock", state);
29736 			/*NOTREACHED*/
29737 		} /* switch(state) */
29738 
29739 	} /* while(state != SD_WM_DONE) */
29740 
29741 	mutex_exit(SD_MUTEX(un));
29742 
29743 	ASSERT(wmp != NULL);
29744 
29745 	return (wmp);
29746 }
29747 
29748 
29749 /*
29750  *    Function: sd_get_range()
29751  *
29752  * Description: Find if there any overlapping I/O to this one
29753  *		Returns the write-map of 1st such I/O, NULL otherwise.
29754  *
29755  *   Arguments: un	- sd_lun structure for the device.
29756  *		startb - The starting block number
29757  *		endb - The end block number
29758  *
29759  * Return Code: wm  - pointer to the wmap structure.
29760  */
29761 
29762 static struct sd_w_map *
29763 sd_get_range(struct sd_lun *un, daddr_t startb, daddr_t endb)
29764 {
29765 	struct sd_w_map *wmp;
29766 
29767 	ASSERT(un != NULL);
29768 
29769 	for (wmp = un->un_wm; wmp != NULL; wmp = wmp->wm_next) {
29770 		if (!(wmp->wm_flags & SD_WM_BUSY)) {
29771 			continue;
29772 		}
29773 		if ((startb >= wmp->wm_start) && (startb <= wmp->wm_end)) {
29774 			break;
29775 		}
29776 		if ((endb >= wmp->wm_start) && (endb <= wmp->wm_end)) {
29777 			break;
29778 		}
29779 	}
29780 
29781 	return (wmp);
29782 }
29783 
29784 
29785 /*
29786  *    Function: sd_free_inlist_wmap()
29787  *
29788  * Description: Unlink and free a write map struct.
29789  *
29790  *   Arguments: un      - sd_lun structure for the device.
29791  *		wmp	- sd_w_map which needs to be unlinked.
29792  */
29793 
29794 static void
29795 sd_free_inlist_wmap(struct sd_lun *un, struct sd_w_map *wmp)
29796 {
29797 	ASSERT(un != NULL);
29798 
29799 	if (un->un_wm == wmp) {
29800 		un->un_wm = wmp->wm_next;
29801 	} else {
29802 		wmp->wm_prev->wm_next = wmp->wm_next;
29803 	}
29804 
29805 	if (wmp->wm_next) {
29806 		wmp->wm_next->wm_prev = wmp->wm_prev;
29807 	}
29808 
29809 	wmp->wm_next = wmp->wm_prev = NULL;
29810 
29811 	kmem_cache_free(un->un_wm_cache, wmp);
29812 }
29813 
29814 
29815 /*
29816  *    Function: sd_range_unlock()
29817  *
29818  * Description: Unlock the range locked by wm.
29819  *		Free write map if nobody else is waiting on it.
29820  *
29821  *   Arguments: un      - sd_lun structure for the device.
29822  *              wmp     - sd_w_map which needs to be unlinked.
29823  */
29824 
29825 static void
29826 sd_range_unlock(struct sd_lun *un, struct sd_w_map *wm)
29827 {
29828 	ASSERT(un != NULL);
29829 	ASSERT(wm != NULL);
29830 	ASSERT(!mutex_owned(SD_MUTEX(un)));
29831 
29832 	mutex_enter(SD_MUTEX(un));
29833 
29834 	if (wm->wm_flags & SD_WTYPE_RMW) {
29835 		un->un_rmw_count--;
29836 	}
29837 
29838 	if (wm->wm_wanted_count) {
29839 		wm->wm_flags = 0;
29840 		/*
29841 		 * Broadcast that the wmap is available now.
29842 		 */
29843 		cv_broadcast(&wm->wm_avail);
29844 	} else {
29845 		/*
29846 		 * If no one is waiting on the map, it should be free'ed.
29847 		 */
29848 		sd_free_inlist_wmap(un, wm);
29849 	}
29850 
29851 	mutex_exit(SD_MUTEX(un));
29852 }
29853 
29854 
29855 /*
29856  *    Function: sd_read_modify_write_task
29857  *
29858  * Description: Called from a taskq thread to initiate the write phase of
29859  *		a read-modify-write request.  This is used for targets where
29860  *		un->un_sys_blocksize != un->un_tgt_blocksize.
29861  *
29862  *   Arguments: arg - a pointer to the buf(9S) struct for the write command.
29863  *
29864  *     Context: Called under taskq thread context.
29865  */
29866 
29867 static void
29868 sd_read_modify_write_task(void *arg)
29869 {
29870 	struct sd_mapblocksize_info	*bsp;
29871 	struct buf	*bp;
29872 	struct sd_xbuf	*xp;
29873 	struct sd_lun	*un;
29874 
29875 	bp = arg;	/* The bp is given in arg */
29876 	ASSERT(bp != NULL);
29877 
29878 	/* Get the pointer to the layer-private data struct */
29879 	xp = SD_GET_XBUF(bp);
29880 	ASSERT(xp != NULL);
29881 	bsp = xp->xb_private;
29882 	ASSERT(bsp != NULL);
29883 
29884 	un = SD_GET_UN(bp);
29885 	ASSERT(un != NULL);
29886 	ASSERT(!mutex_owned(SD_MUTEX(un)));
29887 
29888 	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
29889 	    "sd_read_modify_write_task: entry: buf:0x%p\n", bp);
29890 
29891 	/*
29892 	 * This is the write phase of a read-modify-write request, called
29893 	 * under the context of a taskq thread in response to the completion
29894 	 * of the read portion of the rmw request completing under interrupt
29895 	 * context. The write request must be sent from here down the iostart
29896 	 * chain as if it were being sent from sd_mapblocksize_iostart(), so
29897 	 * we use the layer index saved in the layer-private data area.
29898 	 */
29899 	SD_NEXT_IOSTART(bsp->mbs_layer_index, un, bp);
29900 
29901 	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
29902 	    "sd_read_modify_write_task: exit: buf:0x%p\n", bp);
29903 }
29904 
29905 
29906 /*
29907  *    Function: sddump_do_read_of_rmw()
29908  *
29909  * Description: This routine will be called from sddump, If sddump is called
29910  *		with an I/O which not aligned on device blocksize boundary
29911  *		then the write has to be converted to read-modify-write.
29912  *		Do the read part here in order to keep sddump simple.
29913  *		Note - That the sd_mutex is held across the call to this
29914  *		routine.
29915  *
29916  *   Arguments: un	- sd_lun
29917  *		blkno	- block number in terms of media block size.
29918  *		nblk	- number of blocks.
29919  *		bpp	- pointer to pointer to the buf structure. On return
29920  *			from this function, *bpp points to the valid buffer
29921  *			to which the write has to be done.
29922  *
29923  * Return Code: 0 for success or errno-type return code
29924  */
29925 
29926 static int
29927 sddump_do_read_of_rmw(struct sd_lun *un, uint64_t blkno, uint64_t nblk,
29928 	struct buf **bpp)
29929 {
29930 	int err;
29931 	int i;
29932 	int rval;
29933 	struct buf *bp;
29934 	struct scsi_pkt *pkt = NULL;
29935 	uint32_t target_blocksize;
29936 
29937 	ASSERT(un != NULL);
29938 	ASSERT(mutex_owned(SD_MUTEX(un)));
29939 
29940 	target_blocksize = un->un_tgt_blocksize;
29941 
29942 	mutex_exit(SD_MUTEX(un));
29943 
29944 	bp = scsi_alloc_consistent_buf(SD_ADDRESS(un), (struct buf *)NULL,
29945 	    (size_t)(nblk * target_blocksize), B_READ, NULL_FUNC, NULL);
29946 	if (bp == NULL) {
29947 		scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
29948 		    "no resources for dumping; giving up");
29949 		err = ENOMEM;
29950 		goto done;
29951 	}
29952 
29953 	rval = sd_setup_rw_pkt(un, &pkt, bp, 0, NULL_FUNC, NULL,
29954 	    blkno, nblk);
29955 	if (rval != 0) {
29956 		scsi_free_consistent_buf(bp);
29957 		scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
29958 		    "no resources for dumping; giving up");
29959 		err = ENOMEM;
29960 		goto done;
29961 	}
29962 
29963 	pkt->pkt_flags |= FLAG_NOINTR;
29964 
29965 	err = EIO;
29966 	for (i = 0; i < SD_NDUMP_RETRIES; i++) {
29967 
29968 		/*
29969 		 * Scsi_poll returns 0 (success) if the command completes and
29970 		 * the status block is STATUS_GOOD.  We should only check
29971 		 * errors if this condition is not true.  Even then we should
29972 		 * send our own request sense packet only if we have a check
29973 		 * condition and auto request sense has not been performed by
29974 		 * the hba.
29975 		 */
29976 		SD_TRACE(SD_LOG_DUMP, un, "sddump: sending read\n");
29977 
29978 		if ((sd_scsi_poll(un, pkt) == 0) && (pkt->pkt_resid == 0)) {
29979 			err = 0;
29980 			break;
29981 		}
29982 
29983 		/*
29984 		 * Check CMD_DEV_GONE 1st, give up if device is gone,
29985 		 * no need to read RQS data.
29986 		 */
29987 		if (pkt->pkt_reason == CMD_DEV_GONE) {
29988 			scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
29989 			    "Device is gone\n");
29990 			break;
29991 		}
29992 
29993 		if (SD_GET_PKT_STATUS(pkt) == STATUS_CHECK) {
29994 			SD_INFO(SD_LOG_DUMP, un,
29995 			    "sddump: read failed with CHECK, try # %d\n", i);
29996 			if (((pkt->pkt_state & STATE_ARQ_DONE) == 0)) {
29997 				(void) sd_send_polled_RQS(un);
29998 			}
29999 
30000 			continue;
30001 		}
30002 
30003 		if (SD_GET_PKT_STATUS(pkt) == STATUS_BUSY) {
30004 			int reset_retval = 0;
30005 
30006 			SD_INFO(SD_LOG_DUMP, un,
30007 			    "sddump: read failed with BUSY, try # %d\n", i);
30008 
30009 			if (un->un_f_lun_reset_enabled == TRUE) {
30010 				reset_retval = scsi_reset(SD_ADDRESS(un),
30011 				    RESET_LUN);
30012 			}
30013 			if (reset_retval == 0) {
30014 				(void) scsi_reset(SD_ADDRESS(un), RESET_TARGET);
30015 			}
30016 			(void) sd_send_polled_RQS(un);
30017 
30018 		} else {
30019 			SD_INFO(SD_LOG_DUMP, un,
30020 			    "sddump: read failed with 0x%x, try # %d\n",
30021 			    SD_GET_PKT_STATUS(pkt), i);
30022 			mutex_enter(SD_MUTEX(un));
30023 			sd_reset_target(un, pkt);
30024 			mutex_exit(SD_MUTEX(un));
30025 		}
30026 
30027 		/*
30028 		 * If we are not getting anywhere with lun/target resets,
30029 		 * let's reset the bus.
30030 		 */
30031 		if (i > SD_NDUMP_RETRIES/2) {
30032 			(void) scsi_reset(SD_ADDRESS(un), RESET_ALL);
30033 			(void) sd_send_polled_RQS(un);
30034 		}
30035 
30036 	}
30037 	scsi_destroy_pkt(pkt);
30038 
30039 	if (err != 0) {
30040 		scsi_free_consistent_buf(bp);
30041 		*bpp = NULL;
30042 	} else {
30043 		*bpp = bp;
30044 	}
30045 
30046 done:
30047 	mutex_enter(SD_MUTEX(un));
30048 	return (err);
30049 }
30050 
30051 
30052 /*
30053  *    Function: sd_failfast_flushq
30054  *
30055  * Description: Take all bp's on the wait queue that have B_FAILFAST set
30056  *		in b_flags and move them onto the failfast queue, then kick
30057  *		off a thread to return all bp's on the failfast queue to
30058  *		their owners with an error set.
30059  *
30060  *   Arguments: un - pointer to the soft state struct for the instance.
30061  *
30062  *     Context: may execute in interrupt context.
30063  */
30064 
30065 static void
30066 sd_failfast_flushq(struct sd_lun *un)
30067 {
30068 	struct buf *bp;
30069 	struct buf *next_waitq_bp;
30070 	struct buf *prev_waitq_bp = NULL;
30071 
30072 	ASSERT(un != NULL);
30073 	ASSERT(mutex_owned(SD_MUTEX(un)));
30074 	ASSERT(un->un_failfast_state == SD_FAILFAST_ACTIVE);
30075 	ASSERT(un->un_failfast_bp == NULL);
30076 
30077 	SD_TRACE(SD_LOG_IO_FAILFAST, un,
30078 	    "sd_failfast_flushq: entry: un:0x%p\n", un);
30079 
30080 	/*
30081 	 * Check if we should flush all bufs when entering failfast state, or
30082 	 * just those with B_FAILFAST set.
30083 	 */
30084 	if (sd_failfast_flushctl & SD_FAILFAST_FLUSH_ALL_BUFS) {
30085 		/*
30086 		 * Move *all* bp's on the wait queue to the failfast flush
30087 		 * queue, including those that do NOT have B_FAILFAST set.
30088 		 */
30089 		if (un->un_failfast_headp == NULL) {
30090 			ASSERT(un->un_failfast_tailp == NULL);
30091 			un->un_failfast_headp = un->un_waitq_headp;
30092 		} else {
30093 			ASSERT(un->un_failfast_tailp != NULL);
30094 			un->un_failfast_tailp->av_forw = un->un_waitq_headp;
30095 		}
30096 
30097 		un->un_failfast_tailp = un->un_waitq_tailp;
30098 
30099 		/* update kstat for each bp moved out of the waitq */
30100 		for (bp = un->un_waitq_headp; bp != NULL; bp = bp->av_forw) {
30101 			SD_UPDATE_KSTATS(un, kstat_waitq_exit, bp);
30102 		}
30103 
30104 		/* empty the waitq */
30105 		un->un_waitq_headp = un->un_waitq_tailp = NULL;
30106 
30107 	} else {
30108 		/*
30109 		 * Go thru the wait queue, pick off all entries with
30110 		 * B_FAILFAST set, and move these onto the failfast queue.
30111 		 */
30112 		for (bp = un->un_waitq_headp; bp != NULL; bp = next_waitq_bp) {
30113 			/*
30114 			 * Save the pointer to the next bp on the wait queue,
30115 			 * so we get to it on the next iteration of this loop.
30116 			 */
30117 			next_waitq_bp = bp->av_forw;
30118 
30119 			/*
30120 			 * If this bp from the wait queue does NOT have
30121 			 * B_FAILFAST set, just move on to the next element
30122 			 * in the wait queue. Note, this is the only place
30123 			 * where it is correct to set prev_waitq_bp.
30124 			 */
30125 			if ((bp->b_flags & B_FAILFAST) == 0) {
30126 				prev_waitq_bp = bp;
30127 				continue;
30128 			}
30129 
30130 			/*
30131 			 * Remove the bp from the wait queue.
30132 			 */
30133 			if (bp == un->un_waitq_headp) {
30134 				/* The bp is the first element of the waitq. */
30135 				un->un_waitq_headp = next_waitq_bp;
30136 				if (un->un_waitq_headp == NULL) {
30137 					/* The wait queue is now empty */
30138 					un->un_waitq_tailp = NULL;
30139 				}
30140 			} else {
30141 				/*
30142 				 * The bp is either somewhere in the middle
30143 				 * or at the end of the wait queue.
30144 				 */
30145 				ASSERT(un->un_waitq_headp != NULL);
30146 				ASSERT(prev_waitq_bp != NULL);
30147 				ASSERT((prev_waitq_bp->b_flags & B_FAILFAST)
30148 				    == 0);
30149 				if (bp == un->un_waitq_tailp) {
30150 					/* bp is the last entry on the waitq. */
30151 					ASSERT(next_waitq_bp == NULL);
30152 					un->un_waitq_tailp = prev_waitq_bp;
30153 				}
30154 				prev_waitq_bp->av_forw = next_waitq_bp;
30155 			}
30156 			bp->av_forw = NULL;
30157 
30158 			/*
30159 			 * update kstat since the bp is moved out of
30160 			 * the waitq
30161 			 */
30162 			SD_UPDATE_KSTATS(un, kstat_waitq_exit, bp);
30163 
30164 			/*
30165 			 * Now put the bp onto the failfast queue.
30166 			 */
30167 			if (un->un_failfast_headp == NULL) {
30168 				/* failfast queue is currently empty */
30169 				ASSERT(un->un_failfast_tailp == NULL);
30170 				un->un_failfast_headp =
30171 				    un->un_failfast_tailp = bp;
30172 			} else {
30173 				/* Add the bp to the end of the failfast q */
30174 				ASSERT(un->un_failfast_tailp != NULL);
30175 				ASSERT(un->un_failfast_tailp->b_flags &
30176 				    B_FAILFAST);
30177 				un->un_failfast_tailp->av_forw = bp;
30178 				un->un_failfast_tailp = bp;
30179 			}
30180 		}
30181 	}
30182 
30183 	/*
30184 	 * Now return all bp's on the failfast queue to their owners.
30185 	 */
30186 	while ((bp = un->un_failfast_headp) != NULL) {
30187 
30188 		un->un_failfast_headp = bp->av_forw;
30189 		if (un->un_failfast_headp == NULL) {
30190 			un->un_failfast_tailp = NULL;
30191 		}
30192 
30193 		/*
30194 		 * We want to return the bp with a failure error code, but
30195 		 * we do not want a call to sd_start_cmds() to occur here,
30196 		 * so use sd_return_failed_command_no_restart() instead of
30197 		 * sd_return_failed_command().
30198 		 */
30199 		sd_return_failed_command_no_restart(un, bp, EIO);
30200 	}
30201 
30202 	/* Flush the xbuf queues if required. */
30203 	if (sd_failfast_flushctl & SD_FAILFAST_FLUSH_ALL_QUEUES) {
30204 		ddi_xbuf_flushq(un->un_xbuf_attr, sd_failfast_flushq_callback);
30205 	}
30206 
30207 	SD_TRACE(SD_LOG_IO_FAILFAST, un,
30208 	    "sd_failfast_flushq: exit: un:0x%p\n", un);
30209 }
30210 
30211 
30212 /*
30213  *    Function: sd_failfast_flushq_callback
30214  *
30215  * Description: Return TRUE if the given bp meets the criteria for failfast
30216  *		flushing. Used with ddi_xbuf_flushq(9F).
30217  *
30218  *   Arguments: bp - ptr to buf struct to be examined.
30219  *
30220  *     Context: Any
30221  */
30222 
30223 static int
30224 sd_failfast_flushq_callback(struct buf *bp)
30225 {
30226 	/*
30227 	 * Return TRUE if (1) we want to flush ALL bufs when the failfast
30228 	 * state is entered; OR (2) the given bp has B_FAILFAST set.
30229 	 */
30230 	return (((sd_failfast_flushctl & SD_FAILFAST_FLUSH_ALL_BUFS) ||
30231 	    (bp->b_flags & B_FAILFAST)) ? TRUE : FALSE);
30232 }
30233 
30234 
30235 
30236 #if defined(__i386) || defined(__amd64)
30237 /*
30238  * Function: sd_setup_next_xfer
30239  *
30240  * Description: Prepare next I/O operation using DMA_PARTIAL
30241  *
30242  */
30243 
30244 static int
30245 sd_setup_next_xfer(struct sd_lun *un, struct buf *bp,
30246     struct scsi_pkt *pkt, struct sd_xbuf *xp)
30247 {
30248 	ssize_t	num_blks_not_xfered;
30249 	daddr_t	strt_blk_num;
30250 	ssize_t	bytes_not_xfered;
30251 	int	rval;
30252 
30253 	ASSERT(pkt->pkt_resid == 0);
30254 
30255 	/*
30256 	 * Calculate next block number and amount to be transferred.
30257 	 *
30258 	 * How much data NOT transfered to the HBA yet.
30259 	 */
30260 	bytes_not_xfered = xp->xb_dma_resid;
30261 
30262 	/*
30263 	 * figure how many blocks NOT transfered to the HBA yet.
30264 	 */
30265 	num_blks_not_xfered = SD_BYTES2TGTBLOCKS(un, bytes_not_xfered);
30266 
30267 	/*
30268 	 * set starting block number to the end of what WAS transfered.
30269 	 */
30270 	strt_blk_num = xp->xb_blkno +
30271 	    SD_BYTES2TGTBLOCKS(un, bp->b_bcount - bytes_not_xfered);
30272 
30273 	/*
30274 	 * Move pkt to the next portion of the xfer.  sd_setup_next_rw_pkt
30275 	 * will call scsi_initpkt with NULL_FUNC so we do not have to release
30276 	 * the disk mutex here.
30277 	 */
30278 	rval = sd_setup_next_rw_pkt(un, pkt, bp,
30279 	    strt_blk_num, num_blks_not_xfered);
30280 
30281 	if (rval == 0) {
30282 
30283 		/*
30284 		 * Success.
30285 		 *
30286 		 * Adjust things if there are still more blocks to be
30287 		 * transfered.
30288 		 */
30289 		xp->xb_dma_resid = pkt->pkt_resid;
30290 		pkt->pkt_resid = 0;
30291 
30292 		return (1);
30293 	}
30294 
30295 	/*
30296 	 * There's really only one possible return value from
30297 	 * sd_setup_next_rw_pkt which occurs when scsi_init_pkt
30298 	 * returns NULL.
30299 	 */
30300 	ASSERT(rval == SD_PKT_ALLOC_FAILURE);
30301 
30302 	bp->b_resid = bp->b_bcount;
30303 	bp->b_flags |= B_ERROR;
30304 
30305 	scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
30306 	    "Error setting up next portion of DMA transfer\n");
30307 
30308 	return (0);
30309 }
30310 #endif
30311 
30312 /*
30313  *    Function: sd_panic_for_res_conflict
30314  *
30315  * Description: Call panic with a string formated with "Reservation Conflict"
30316  *		and a human readable identifier indicating the SD instance
30317  *		that experienced the reservation conflict.
30318  *
30319  *   Arguments: un - pointer to the soft state struct for the instance.
30320  *
30321  *     Context: may execute in interrupt context.
30322  */
30323 
30324 #define	SD_RESV_CONFLICT_FMT_LEN 40
30325 void
30326 sd_panic_for_res_conflict(struct sd_lun *un)
30327 {
30328 	char panic_str[SD_RESV_CONFLICT_FMT_LEN+MAXPATHLEN];
30329 	char path_str[MAXPATHLEN];
30330 
30331 	(void) snprintf(panic_str, sizeof (panic_str),
30332 	    "Reservation Conflict\nDisk: %s",
30333 	    ddi_pathname(SD_DEVINFO(un), path_str));
30334 
30335 	panic(panic_str);
30336 }
30337 
30338 /*
30339  * Note: The following sd_faultinjection_ioctl( ) routines implement
30340  * driver support for handling fault injection for error analysis
30341  * causing faults in multiple layers of the driver.
30342  *
30343  */
30344 
30345 #ifdef SD_FAULT_INJECTION
30346 static uint_t   sd_fault_injection_on = 0;
30347 
30348 /*
30349  *    Function: sd_faultinjection_ioctl()
30350  *
30351  * Description: This routine is the driver entry point for handling
30352  *              faultinjection ioctls to inject errors into the
30353  *              layer model
30354  *
30355  *   Arguments: cmd	- the ioctl cmd recieved
30356  *		arg	- the arguments from user and returns
30357  */
30358 
30359 static void
30360 sd_faultinjection_ioctl(int cmd, intptr_t arg,  struct sd_lun *un) {
30361 
30362 	uint_t i;
30363 	uint_t rval;
30364 
30365 	SD_TRACE(SD_LOG_IOERR, un, "sd_faultinjection_ioctl: entry\n");
30366 
30367 	mutex_enter(SD_MUTEX(un));
30368 
30369 	switch (cmd) {
30370 	case SDIOCRUN:
30371 		/* Allow pushed faults to be injected */
30372 		SD_INFO(SD_LOG_SDTEST, un,
30373 		    "sd_faultinjection_ioctl: Injecting Fault Run\n");
30374 
30375 		sd_fault_injection_on = 1;
30376 
30377 		SD_INFO(SD_LOG_IOERR, un,
30378 		    "sd_faultinjection_ioctl: run finished\n");
30379 		break;
30380 
30381 	case SDIOCSTART:
30382 		/* Start Injection Session */
30383 		SD_INFO(SD_LOG_SDTEST, un,
30384 		    "sd_faultinjection_ioctl: Injecting Fault Start\n");
30385 
30386 		sd_fault_injection_on = 0;
30387 		un->sd_injection_mask = 0xFFFFFFFF;
30388 		for (i = 0; i < SD_FI_MAX_ERROR; i++) {
30389 			un->sd_fi_fifo_pkt[i] = NULL;
30390 			un->sd_fi_fifo_xb[i] = NULL;
30391 			un->sd_fi_fifo_un[i] = NULL;
30392 			un->sd_fi_fifo_arq[i] = NULL;
30393 		}
30394 		un->sd_fi_fifo_start = 0;
30395 		un->sd_fi_fifo_end = 0;
30396 
30397 		mutex_enter(&(un->un_fi_mutex));
30398 		un->sd_fi_log[0] = '\0';
30399 		un->sd_fi_buf_len = 0;
30400 		mutex_exit(&(un->un_fi_mutex));
30401 
30402 		SD_INFO(SD_LOG_IOERR, un,
30403 		    "sd_faultinjection_ioctl: start finished\n");
30404 		break;
30405 
30406 	case SDIOCSTOP:
30407 		/* Stop Injection Session */
30408 		SD_INFO(SD_LOG_SDTEST, un,
30409 		    "sd_faultinjection_ioctl: Injecting Fault Stop\n");
30410 		sd_fault_injection_on = 0;
30411 		un->sd_injection_mask = 0x0;
30412 
30413 		/* Empty stray or unuseds structs from fifo */
30414 		for (i = 0; i < SD_FI_MAX_ERROR; i++) {
30415 			if (un->sd_fi_fifo_pkt[i] != NULL) {
30416 				kmem_free(un->sd_fi_fifo_pkt[i],
30417 				    sizeof (struct sd_fi_pkt));
30418 			}
30419 			if (un->sd_fi_fifo_xb[i] != NULL) {
30420 				kmem_free(un->sd_fi_fifo_xb[i],
30421 				    sizeof (struct sd_fi_xb));
30422 			}
30423 			if (un->sd_fi_fifo_un[i] != NULL) {
30424 				kmem_free(un->sd_fi_fifo_un[i],
30425 				    sizeof (struct sd_fi_un));
30426 			}
30427 			if (un->sd_fi_fifo_arq[i] != NULL) {
30428 				kmem_free(un->sd_fi_fifo_arq[i],
30429 				    sizeof (struct sd_fi_arq));
30430 			}
30431 			un->sd_fi_fifo_pkt[i] = NULL;
30432 			un->sd_fi_fifo_un[i] = NULL;
30433 			un->sd_fi_fifo_xb[i] = NULL;
30434 			un->sd_fi_fifo_arq[i] = NULL;
30435 		}
30436 		un->sd_fi_fifo_start = 0;
30437 		un->sd_fi_fifo_end = 0;
30438 
30439 		SD_INFO(SD_LOG_IOERR, un,
30440 		    "sd_faultinjection_ioctl: stop finished\n");
30441 		break;
30442 
30443 	case SDIOCINSERTPKT:
30444 		/* Store a packet struct to be pushed onto fifo */
30445 		SD_INFO(SD_LOG_SDTEST, un,
30446 		    "sd_faultinjection_ioctl: Injecting Fault Insert Pkt\n");
30447 
30448 		i = un->sd_fi_fifo_end % SD_FI_MAX_ERROR;
30449 
30450 		sd_fault_injection_on = 0;
30451 
30452 		/* No more that SD_FI_MAX_ERROR allowed in Queue */
30453 		if (un->sd_fi_fifo_pkt[i] != NULL) {
30454 			kmem_free(un->sd_fi_fifo_pkt[i],
30455 			    sizeof (struct sd_fi_pkt));
30456 		}
30457 		if (arg != NULL) {
30458 			un->sd_fi_fifo_pkt[i] =
30459 			    kmem_alloc(sizeof (struct sd_fi_pkt), KM_NOSLEEP);
30460 			if (un->sd_fi_fifo_pkt[i] == NULL) {
30461 				/* Alloc failed don't store anything */
30462 				break;
30463 			}
30464 			rval = ddi_copyin((void *)arg, un->sd_fi_fifo_pkt[i],
30465 			    sizeof (struct sd_fi_pkt), 0);
30466 			if (rval == -1) {
30467 				kmem_free(un->sd_fi_fifo_pkt[i],
30468 				    sizeof (struct sd_fi_pkt));
30469 				un->sd_fi_fifo_pkt[i] = NULL;
30470 			}
30471 		} else {
30472 			SD_INFO(SD_LOG_IOERR, un,
30473 			    "sd_faultinjection_ioctl: pkt null\n");
30474 		}
30475 		break;
30476 
30477 	case SDIOCINSERTXB:
30478 		/* Store a xb struct to be pushed onto fifo */
30479 		SD_INFO(SD_LOG_SDTEST, un,
30480 		    "sd_faultinjection_ioctl: Injecting Fault Insert XB\n");
30481 
30482 		i = un->sd_fi_fifo_end % SD_FI_MAX_ERROR;
30483 
30484 		sd_fault_injection_on = 0;
30485 
30486 		if (un->sd_fi_fifo_xb[i] != NULL) {
30487 			kmem_free(un->sd_fi_fifo_xb[i],
30488 			    sizeof (struct sd_fi_xb));
30489 			un->sd_fi_fifo_xb[i] = NULL;
30490 		}
30491 		if (arg != NULL) {
30492 			un->sd_fi_fifo_xb[i] =
30493 			    kmem_alloc(sizeof (struct sd_fi_xb), KM_NOSLEEP);
30494 			if (un->sd_fi_fifo_xb[i] == NULL) {
30495 				/* Alloc failed don't store anything */
30496 				break;
30497 			}
30498 			rval = ddi_copyin((void *)arg, un->sd_fi_fifo_xb[i],
30499 			    sizeof (struct sd_fi_xb), 0);
30500 
30501 			if (rval == -1) {
30502 				kmem_free(un->sd_fi_fifo_xb[i],
30503 				    sizeof (struct sd_fi_xb));
30504 				un->sd_fi_fifo_xb[i] = NULL;
30505 			}
30506 		} else {
30507 			SD_INFO(SD_LOG_IOERR, un,
30508 			    "sd_faultinjection_ioctl: xb null\n");
30509 		}
30510 		break;
30511 
30512 	case SDIOCINSERTUN:
30513 		/* Store a un struct to be pushed onto fifo */
30514 		SD_INFO(SD_LOG_SDTEST, un,
30515 		    "sd_faultinjection_ioctl: Injecting Fault Insert UN\n");
30516 
30517 		i = un->sd_fi_fifo_end % SD_FI_MAX_ERROR;
30518 
30519 		sd_fault_injection_on = 0;
30520 
30521 		if (un->sd_fi_fifo_un[i] != NULL) {
30522 			kmem_free(un->sd_fi_fifo_un[i],
30523 			    sizeof (struct sd_fi_un));
30524 			un->sd_fi_fifo_un[i] = NULL;
30525 		}
30526 		if (arg != NULL) {
30527 			un->sd_fi_fifo_un[i] =
30528 			    kmem_alloc(sizeof (struct sd_fi_un), KM_NOSLEEP);
30529 			if (un->sd_fi_fifo_un[i] == NULL) {
30530 				/* Alloc failed don't store anything */
30531 				break;
30532 			}
30533 			rval = ddi_copyin((void *)arg, un->sd_fi_fifo_un[i],
30534 			    sizeof (struct sd_fi_un), 0);
30535 			if (rval == -1) {
30536 				kmem_free(un->sd_fi_fifo_un[i],
30537 				    sizeof (struct sd_fi_un));
30538 				un->sd_fi_fifo_un[i] = NULL;
30539 			}
30540 
30541 		} else {
30542 			SD_INFO(SD_LOG_IOERR, un,
30543 			    "sd_faultinjection_ioctl: un null\n");
30544 		}
30545 
30546 		break;
30547 
30548 	case SDIOCINSERTARQ:
30549 		/* Store a arq struct to be pushed onto fifo */
30550 		SD_INFO(SD_LOG_SDTEST, un,
30551 		    "sd_faultinjection_ioctl: Injecting Fault Insert ARQ\n");
30552 		i = un->sd_fi_fifo_end % SD_FI_MAX_ERROR;
30553 
30554 		sd_fault_injection_on = 0;
30555 
30556 		if (un->sd_fi_fifo_arq[i] != NULL) {
30557 			kmem_free(un->sd_fi_fifo_arq[i],
30558 			    sizeof (struct sd_fi_arq));
30559 			un->sd_fi_fifo_arq[i] = NULL;
30560 		}
30561 		if (arg != NULL) {
30562 			un->sd_fi_fifo_arq[i] =
30563 			    kmem_alloc(sizeof (struct sd_fi_arq), KM_NOSLEEP);
30564 			if (un->sd_fi_fifo_arq[i] == NULL) {
30565 				/* Alloc failed don't store anything */
30566 				break;
30567 			}
30568 			rval = ddi_copyin((void *)arg, un->sd_fi_fifo_arq[i],
30569 			    sizeof (struct sd_fi_arq), 0);
30570 			if (rval == -1) {
30571 				kmem_free(un->sd_fi_fifo_arq[i],
30572 				    sizeof (struct sd_fi_arq));
30573 				un->sd_fi_fifo_arq[i] = NULL;
30574 			}
30575 
30576 		} else {
30577 			SD_INFO(SD_LOG_IOERR, un,
30578 			    "sd_faultinjection_ioctl: arq null\n");
30579 		}
30580 
30581 		break;
30582 
30583 	case SDIOCPUSH:
30584 		/* Push stored xb, pkt, un, and arq onto fifo */
30585 		sd_fault_injection_on = 0;
30586 
30587 		if (arg != NULL) {
30588 			rval = ddi_copyin((void *)arg, &i, sizeof (uint_t), 0);
30589 			if (rval != -1 &&
30590 			    un->sd_fi_fifo_end + i < SD_FI_MAX_ERROR) {
30591 				un->sd_fi_fifo_end += i;
30592 			}
30593 		} else {
30594 			SD_INFO(SD_LOG_IOERR, un,
30595 			    "sd_faultinjection_ioctl: push arg null\n");
30596 			if (un->sd_fi_fifo_end + i < SD_FI_MAX_ERROR) {
30597 				un->sd_fi_fifo_end++;
30598 			}
30599 		}
30600 		SD_INFO(SD_LOG_IOERR, un,
30601 		    "sd_faultinjection_ioctl: push to end=%d\n",
30602 		    un->sd_fi_fifo_end);
30603 		break;
30604 
30605 	case SDIOCRETRIEVE:
30606 		/* Return buffer of log from Injection session */
30607 		SD_INFO(SD_LOG_SDTEST, un,
30608 		    "sd_faultinjection_ioctl: Injecting Fault Retreive");
30609 
30610 		sd_fault_injection_on = 0;
30611 
30612 		mutex_enter(&(un->un_fi_mutex));
30613 		rval = ddi_copyout(un->sd_fi_log, (void *)arg,
30614 		    un->sd_fi_buf_len+1, 0);
30615 		mutex_exit(&(un->un_fi_mutex));
30616 
30617 		if (rval == -1) {
30618 			/*
30619 			 * arg is possibly invalid setting
30620 			 * it to NULL for return
30621 			 */
30622 			arg = NULL;
30623 		}
30624 		break;
30625 	}
30626 
30627 	mutex_exit(SD_MUTEX(un));
30628 	SD_TRACE(SD_LOG_IOERR, un, "sd_faultinjection_ioctl:"
30629 			    " exit\n");
30630 }
30631 
30632 
30633 /*
30634  *    Function: sd_injection_log()
30635  *
30636  * Description: This routine adds buff to the already existing injection log
30637  *              for retrieval via faultinjection_ioctl for use in fault
30638  *              detection and recovery
30639  *
30640  *   Arguments: buf - the string to add to the log
30641  */
30642 
30643 static void
30644 sd_injection_log(char *buf, struct sd_lun *un)
30645 {
30646 	uint_t len;
30647 
30648 	ASSERT(un != NULL);
30649 	ASSERT(buf != NULL);
30650 
30651 	mutex_enter(&(un->un_fi_mutex));
30652 
30653 	len = min(strlen(buf), 255);
30654 	/* Add logged value to Injection log to be returned later */
30655 	if (len + un->sd_fi_buf_len < SD_FI_MAX_BUF) {
30656 		uint_t	offset = strlen((char *)un->sd_fi_log);
30657 		char *destp = (char *)un->sd_fi_log + offset;
30658 		int i;
30659 		for (i = 0; i < len; i++) {
30660 			*destp++ = *buf++;
30661 		}
30662 		un->sd_fi_buf_len += len;
30663 		un->sd_fi_log[un->sd_fi_buf_len] = '\0';
30664 	}
30665 
30666 	mutex_exit(&(un->un_fi_mutex));
30667 }
30668 
30669 
30670 /*
30671  *    Function: sd_faultinjection()
30672  *
30673  * Description: This routine takes the pkt and changes its
30674  *		content based on error injection scenerio.
30675  *
30676  *   Arguments: pktp	- packet to be changed
30677  */
30678 
30679 static void
30680 sd_faultinjection(struct scsi_pkt *pktp)
30681 {
30682 	uint_t i;
30683 	struct sd_fi_pkt *fi_pkt;
30684 	struct sd_fi_xb *fi_xb;
30685 	struct sd_fi_un *fi_un;
30686 	struct sd_fi_arq *fi_arq;
30687 	struct buf *bp;
30688 	struct sd_xbuf *xb;
30689 	struct sd_lun *un;
30690 
30691 	ASSERT(pktp != NULL);
30692 
30693 	/* pull bp xb and un from pktp */
30694 	bp = (struct buf *)pktp->pkt_private;
30695 	xb = SD_GET_XBUF(bp);
30696 	un = SD_GET_UN(bp);
30697 
30698 	ASSERT(un != NULL);
30699 
30700 	mutex_enter(SD_MUTEX(un));
30701 
30702 	SD_TRACE(SD_LOG_SDTEST, un,
30703 	    "sd_faultinjection: entry Injection from sdintr\n");
30704 
30705 	/* if injection is off return */
30706 	if (sd_fault_injection_on == 0 ||
30707 		un->sd_fi_fifo_start == un->sd_fi_fifo_end) {
30708 		mutex_exit(SD_MUTEX(un));
30709 		return;
30710 	}
30711 
30712 
30713 	/* take next set off fifo */
30714 	i = un->sd_fi_fifo_start % SD_FI_MAX_ERROR;
30715 
30716 	fi_pkt = un->sd_fi_fifo_pkt[i];
30717 	fi_xb = un->sd_fi_fifo_xb[i];
30718 	fi_un = un->sd_fi_fifo_un[i];
30719 	fi_arq = un->sd_fi_fifo_arq[i];
30720 
30721 
30722 	/* set variables accordingly */
30723 	/* set pkt if it was on fifo */
30724 	if (fi_pkt != NULL) {
30725 		SD_CONDSET(pktp, pkt, pkt_flags, "pkt_flags");
30726 		SD_CONDSET(*pktp, pkt, pkt_scbp, "pkt_scbp");
30727 		SD_CONDSET(*pktp, pkt, pkt_cdbp, "pkt_cdbp");
30728 		SD_CONDSET(pktp, pkt, pkt_state, "pkt_state");
30729 		SD_CONDSET(pktp, pkt, pkt_statistics, "pkt_statistics");
30730 		SD_CONDSET(pktp, pkt, pkt_reason, "pkt_reason");
30731 
30732 	}
30733 
30734 	/* set xb if it was on fifo */
30735 	if (fi_xb != NULL) {
30736 		SD_CONDSET(xb, xb, xb_blkno, "xb_blkno");
30737 		SD_CONDSET(xb, xb, xb_dma_resid, "xb_dma_resid");
30738 		SD_CONDSET(xb, xb, xb_retry_count, "xb_retry_count");
30739 		SD_CONDSET(xb, xb, xb_victim_retry_count,
30740 		    "xb_victim_retry_count");
30741 		SD_CONDSET(xb, xb, xb_sense_status, "xb_sense_status");
30742 		SD_CONDSET(xb, xb, xb_sense_state, "xb_sense_state");
30743 		SD_CONDSET(xb, xb, xb_sense_resid, "xb_sense_resid");
30744 
30745 		/* copy in block data from sense */
30746 		if (fi_xb->xb_sense_data[0] != -1) {
30747 			bcopy(fi_xb->xb_sense_data, xb->xb_sense_data,
30748 			    SENSE_LENGTH);
30749 		}
30750 
30751 		/* copy in extended sense codes */
30752 		SD_CONDSET(((struct scsi_extended_sense *)xb), xb, es_code,
30753 		    "es_code");
30754 		SD_CONDSET(((struct scsi_extended_sense *)xb), xb, es_key,
30755 		    "es_key");
30756 		SD_CONDSET(((struct scsi_extended_sense *)xb), xb, es_add_code,
30757 		    "es_add_code");
30758 		SD_CONDSET(((struct scsi_extended_sense *)xb), xb,
30759 		    es_qual_code, "es_qual_code");
30760 	}
30761 
30762 	/* set un if it was on fifo */
30763 	if (fi_un != NULL) {
30764 		SD_CONDSET(un->un_sd->sd_inq, un, inq_rmb, "inq_rmb");
30765 		SD_CONDSET(un, un, un_ctype, "un_ctype");
30766 		SD_CONDSET(un, un, un_reset_retry_count,
30767 		    "un_reset_retry_count");
30768 		SD_CONDSET(un, un, un_reservation_type, "un_reservation_type");
30769 		SD_CONDSET(un, un, un_resvd_status, "un_resvd_status");
30770 		SD_CONDSET(un, un, un_f_arq_enabled, "un_f_arq_enabled");
30771 		SD_CONDSET(un, un, un_f_geometry_is_valid,
30772 		    "un_f_geometry_is_valid");
30773 		SD_CONDSET(un, un, un_f_allow_bus_device_reset,
30774 		    "un_f_allow_bus_device_reset");
30775 		SD_CONDSET(un, un, un_f_opt_queueing, "un_f_opt_queueing");
30776 
30777 	}
30778 
30779 	/* copy in auto request sense if it was on fifo */
30780 	if (fi_arq != NULL) {
30781 		bcopy(fi_arq, pktp->pkt_scbp, sizeof (struct sd_fi_arq));
30782 	}
30783 
30784 	/* free structs */
30785 	if (un->sd_fi_fifo_pkt[i] != NULL) {
30786 		kmem_free(un->sd_fi_fifo_pkt[i], sizeof (struct sd_fi_pkt));
30787 	}
30788 	if (un->sd_fi_fifo_xb[i] != NULL) {
30789 		kmem_free(un->sd_fi_fifo_xb[i], sizeof (struct sd_fi_xb));
30790 	}
30791 	if (un->sd_fi_fifo_un[i] != NULL) {
30792 		kmem_free(un->sd_fi_fifo_un[i], sizeof (struct sd_fi_un));
30793 	}
30794 	if (un->sd_fi_fifo_arq[i] != NULL) {
30795 		kmem_free(un->sd_fi_fifo_arq[i], sizeof (struct sd_fi_arq));
30796 	}
30797 
30798 	/*
30799 	 * kmem_free does not gurantee to set to NULL
30800 	 * since we uses these to determine if we set
30801 	 * values or not lets confirm they are always
30802 	 * NULL after free
30803 	 */
30804 	un->sd_fi_fifo_pkt[i] = NULL;
30805 	un->sd_fi_fifo_un[i] = NULL;
30806 	un->sd_fi_fifo_xb[i] = NULL;
30807 	un->sd_fi_fifo_arq[i] = NULL;
30808 
30809 	un->sd_fi_fifo_start++;
30810 
30811 	mutex_exit(SD_MUTEX(un));
30812 
30813 	SD_TRACE(SD_LOG_SDTEST, un, "sd_faultinjection: exit\n");
30814 }
30815 
30816 #endif /* SD_FAULT_INJECTION */
30817 
30818 /*
30819  * This routine is invoked in sd_unit_attach(). Before calling it, the
30820  * properties in conf file should be processed already, and "hotpluggable"
30821  * property was processed also.
30822  *
30823  * The sd driver distinguishes 3 different type of devices: removable media,
30824  * non-removable media, and hotpluggable. Below the differences are defined:
30825  *
30826  * 1. Device ID
30827  *
30828  *     The device ID of a device is used to identify this device. Refer to
30829  *     ddi_devid_register(9F).
30830  *
30831  *     For a non-removable media disk device which can provide 0x80 or 0x83
30832  *     VPD page (refer to INQUIRY command of SCSI SPC specification), a unique
30833  *     device ID is created to identify this device. For other non-removable
30834  *     media devices, a default device ID is created only if this device has
30835  *     at least 2 alter cylinders. Otherwise, this device has no devid.
30836  *
30837  *     -------------------------------------------------------
30838  *     removable media   hotpluggable  | Can Have Device ID
30839  *     -------------------------------------------------------
30840  *         false             false     |     Yes
30841  *         false             true      |     Yes
30842  *         true                x       |     No
30843  *     ------------------------------------------------------
30844  *
30845  *
30846  * 2. SCSI group 4 commands
30847  *
30848  *     In SCSI specs, only some commands in group 4 command set can use
30849  *     8-byte addresses that can be used to access >2TB storage spaces.
30850  *     Other commands have no such capability. Without supporting group4,
30851  *     it is impossible to make full use of storage spaces of a disk with
30852  *     capacity larger than 2TB.
30853  *
30854  *     -----------------------------------------------
30855  *     removable media   hotpluggable   LP64  |  Group
30856  *     -----------------------------------------------
30857  *           false          false       false |   1
30858  *           false          false       true  |   4
30859  *           false          true        false |   1
30860  *           false          true        true  |   4
30861  *           true             x           x   |   5
30862  *     -----------------------------------------------
30863  *
30864  *
30865  * 3. Check for VTOC Label
30866  *
30867  *     If a direct-access disk has no EFI label, sd will check if it has a
30868  *     valid VTOC label. Now, sd also does that check for removable media
30869  *     and hotpluggable devices.
30870  *
30871  *     --------------------------------------------------------------
30872  *     Direct-Access   removable media    hotpluggable |  Check Label
30873  *     -------------------------------------------------------------
30874  *         false          false           false        |   No
30875  *         false          false           true         |   No
30876  *         false          true            false        |   Yes
30877  *         false          true            true         |   Yes
30878  *         true            x                x          |   Yes
30879  *     --------------------------------------------------------------
30880  *
30881  *
30882  * 4. Building default VTOC label
30883  *
30884  *     As section 3 says, sd checks if some kinds of devices have VTOC label.
30885  *     If those devices have no valid VTOC label, sd(7d) will attempt to
30886  *     create default VTOC for them. Currently sd creates default VTOC label
30887  *     for all devices on x86 platform (VTOC_16), but only for removable
30888  *     media devices on SPARC (VTOC_8).
30889  *
30890  *     -----------------------------------------------------------
30891  *       removable media hotpluggable platform   |   Default Label
30892  *     -----------------------------------------------------------
30893  *             false          false    sparc     |     No
30894  *             false          true      x86      |     Yes
30895  *             false          true     sparc     |     Yes
30896  *             true             x        x       |     Yes
30897  *     ----------------------------------------------------------
30898  *
30899  *
30900  * 5. Supported blocksizes of target devices
30901  *
30902  *     Sd supports non-512-byte blocksize for removable media devices only.
30903  *     For other devices, only 512-byte blocksize is supported. This may be
30904  *     changed in near future because some RAID devices require non-512-byte
30905  *     blocksize
30906  *
30907  *     -----------------------------------------------------------
30908  *     removable media    hotpluggable    | non-512-byte blocksize
30909  *     -----------------------------------------------------------
30910  *           false          false         |   No
30911  *           false          true          |   No
30912  *           true             x           |   Yes
30913  *     -----------------------------------------------------------
30914  *
30915  *
30916  * 6. Automatic mount & unmount (i.e. vold)
30917  *
30918  *     Sd(7d) driver provides DKIOCREMOVABLE ioctl. This ioctl is used to query
30919  *     if a device is removable media device. It return 1 for removable media
30920  *     devices, and 0 for others.
30921  *
30922  *     Vold treats a device as removable one only if DKIOREMOVABLE returns 1.
30923  *     And it does automounting only for removable media devices. In order to
30924  *     preserve users' experience and let vold continue to do automounting for
30925  *     USB disk devices, DKIOCREMOVABLE ioctl still returns 1 for USB/1394 disk
30926  *     devices.
30927  *
30928  *      ------------------------------------------------------
30929  *       removable media    hotpluggable   |  automatic mount
30930  *      ------------------------------------------------------
30931  *             false          false        |   No
30932  *             false          true         |   Yes
30933  *             true             x          |   Yes
30934  *      ------------------------------------------------------
30935  *
30936  *
30937  * 7. fdisk partition management
30938  *
30939  *     Fdisk is traditional partition method on x86 platform. Sd(7d) driver
30940  *     just supports fdisk partitions on x86 platform. On sparc platform, sd
30941  *     doesn't support fdisk partitions at all. Note: pcfs(7fs) can recognize
30942  *     fdisk partitions on both x86 and SPARC platform.
30943  *
30944  *     -----------------------------------------------------------
30945  *       platform   removable media  USB/1394  |  fdisk supported
30946  *     -----------------------------------------------------------
30947  *        x86         X               X        |       true
30948  *     ------------------------------------------------------------
30949  *        sparc       X               X        |       false
30950  *     ------------------------------------------------------------
30951  *
30952  *
30953  * 8. MBOOT/MBR
30954  *
30955  *     Although sd(7d) doesn't support fdisk on SPARC platform, it does support
30956  *     read/write mboot for removable media devices on sparc platform.
30957  *
30958  *     -----------------------------------------------------------
30959  *       platform   removable media  USB/1394  |  mboot supported
30960  *     -----------------------------------------------------------
30961  *        x86         X               X        |       true
30962  *     ------------------------------------------------------------
30963  *        sparc      false           false     |       false
30964  *        sparc      false           true      |       true
30965  *        sparc      true            false     |       true
30966  *        sparc      true            true      |       true
30967  *     ------------------------------------------------------------
30968  *
30969  *
30970  * 9.  error handling during opening device
30971  *
30972  *     If failed to open a disk device, an errno is returned. For some kinds
30973  *     of errors, different errno is returned depending on if this device is
30974  *     a removable media device. This brings USB/1394 hard disks in line with
30975  *     expected hard disk behavior. It is not expected that this breaks any
30976  *     application.
30977  *
30978  *     ------------------------------------------------------
30979  *       removable media    hotpluggable   |  errno
30980  *     ------------------------------------------------------
30981  *             false          false        |   EIO
30982  *             false          true         |   EIO
30983  *             true             x          |   ENXIO
30984  *     ------------------------------------------------------
30985  *
30986  *
30987  * 11. ioctls: DKIOCEJECT, CDROMEJECT
30988  *
30989  *     These IOCTLs are applicable only to removable media devices.
30990  *
30991  *     -----------------------------------------------------------
30992  *       removable media    hotpluggable   |DKIOCEJECT, CDROMEJECT
30993  *     -----------------------------------------------------------
30994  *             false          false        |     No
30995  *             false          true         |     No
30996  *             true            x           |     Yes
30997  *     -----------------------------------------------------------
30998  *
30999  *
31000  * 12. Kstats for partitions
31001  *
31002  *     sd creates partition kstat for non-removable media devices. USB and
31003  *     Firewire hard disks now have partition kstats
31004  *
31005  *      ------------------------------------------------------
31006  *       removable media    hotplugable    |   kstat
31007  *      ------------------------------------------------------
31008  *             false          false        |    Yes
31009  *             false          true         |    Yes
31010  *             true             x          |    No
31011  *       ------------------------------------------------------
31012  *
31013  *
31014  * 13. Removable media & hotpluggable properties
31015  *
31016  *     Sd driver creates a "removable-media" property for removable media
31017  *     devices. Parent nexus drivers create a "hotpluggable" property if
31018  *     it supports hotplugging.
31019  *
31020  *     ---------------------------------------------------------------------
31021  *     removable media   hotpluggable |  "removable-media"   " hotpluggable"
31022  *     ---------------------------------------------------------------------
31023  *       false            false       |    No                   No
31024  *       false            true        |    No                   Yes
31025  *       true             false       |    Yes                  No
31026  *       true             true        |    Yes                  Yes
31027  *     ---------------------------------------------------------------------
31028  *
31029  *
31030  * 14. Power Management
31031  *
31032  *     sd only power manages removable media devices or devices that support
31033  *     LOG_SENSE or have a "pm-capable" property  (PSARC/2002/250)
31034  *
31035  *     A parent nexus that supports hotplugging can also set "pm-capable"
31036  *     if the disk can be power managed.
31037  *
31038  *     ------------------------------------------------------------
31039  *       removable media hotpluggable pm-capable  |   power manage
31040  *     ------------------------------------------------------------
31041  *             false          false     false     |     No
31042  *             false          false     true      |     Yes
31043  *             false          true      false     |     No
31044  *             false          true      true      |     Yes
31045  *             true             x        x        |     Yes
31046  *     ------------------------------------------------------------
31047  *
31048  *      USB and firewire hard disks can now be power managed independently
31049  *      of the framebuffer
31050  *
31051  *
31052  * 15. Support for USB disks with capacity larger than 1TB
31053  *
31054  *     Currently, sd doesn't permit a fixed disk device with capacity
31055  *     larger than 1TB to be used in a 32-bit operating system environment.
31056  *     However, sd doesn't do that for removable media devices. Instead, it
31057  *     assumes that removable media devices cannot have a capacity larger
31058  *     than 1TB. Therefore, using those devices on 32-bit system is partially
31059  *     supported, which can cause some unexpected results.
31060  *
31061  *     ---------------------------------------------------------------------
31062  *       removable media    USB/1394 | Capacity > 1TB |   Used in 32-bit env
31063  *     ---------------------------------------------------------------------
31064  *             false          false  |   true         |     no
31065  *             false          true   |   true         |     no
31066  *             true           false  |   true         |     Yes
31067  *             true           true   |   true         |     Yes
31068  *     ---------------------------------------------------------------------
31069  *
31070  *
31071  * 16. Check write-protection at open time
31072  *
31073  *     When a removable media device is being opened for writing without NDELAY
31074  *     flag, sd will check if this device is writable. If attempting to open
31075  *     without NDELAY flag a write-protected device, this operation will abort.
31076  *
31077  *     ------------------------------------------------------------
31078  *       removable media    USB/1394   |   WP Check
31079  *     ------------------------------------------------------------
31080  *             false          false    |     No
31081  *             false          true     |     No
31082  *             true           false    |     Yes
31083  *             true           true     |     Yes
31084  *     ------------------------------------------------------------
31085  *
31086  *
31087  * 17. syslog when corrupted VTOC is encountered
31088  *
31089  *      Currently, if an invalid VTOC is encountered, sd only print syslog
31090  *      for fixed SCSI disks.
31091  *     ------------------------------------------------------------
31092  *       removable media    USB/1394   |   print syslog
31093  *     ------------------------------------------------------------
31094  *             false          false    |     Yes
31095  *             false          true     |     No
31096  *             true           false    |     No
31097  *             true           true     |     No
31098  *     ------------------------------------------------------------
31099  */
31100 static void
31101 sd_set_unit_attributes(struct sd_lun *un, dev_info_t *devi)
31102 {
31103 	int	pm_capable_prop;
31104 
31105 	ASSERT(un->un_sd);
31106 	ASSERT(un->un_sd->sd_inq);
31107 
31108 #if defined(_SUNOS_VTOC_16)
31109 	/*
31110 	 * For VTOC_16 devices, the default label will be created for all
31111 	 * devices. (see sd_build_default_label)
31112 	 */
31113 	un->un_f_default_vtoc_supported = TRUE;
31114 #endif
31115 
31116 	if (un->un_sd->sd_inq->inq_rmb) {
31117 		/*
31118 		 * The media of this device is removable. And for this kind
31119 		 * of devices, it is possible to change medium after openning
31120 		 * devices. Thus we should support this operation.
31121 		 */
31122 		un->un_f_has_removable_media = TRUE;
31123 
31124 #if defined(_SUNOS_VTOC_8)
31125 		/*
31126 		 * Note: currently, for VTOC_8 devices, default label is
31127 		 * created for removable and hotpluggable devices only.
31128 		 */
31129 		un->un_f_default_vtoc_supported = TRUE;
31130 #endif
31131 		/*
31132 		 * support non-512-byte blocksize of removable media devices
31133 		 */
31134 		un->un_f_non_devbsize_supported = TRUE;
31135 
31136 		/*
31137 		 * Assume that all removable media devices support DOOR_LOCK
31138 		 */
31139 		un->un_f_doorlock_supported = TRUE;
31140 
31141 		/*
31142 		 * For a removable media device, it is possible to be opened
31143 		 * with NDELAY flag when there is no media in drive, in this
31144 		 * case we don't care if device is writable. But if without
31145 		 * NDELAY flag, we need to check if media is write-protected.
31146 		 */
31147 		un->un_f_chk_wp_open = TRUE;
31148 
31149 		/*
31150 		 * need to start a SCSI watch thread to monitor media state,
31151 		 * when media is being inserted or ejected, notify syseventd.
31152 		 */
31153 		un->un_f_monitor_media_state = TRUE;
31154 
31155 		/*
31156 		 * Some devices don't support START_STOP_UNIT command.
31157 		 * Therefore, we'd better check if a device supports it
31158 		 * before sending it.
31159 		 */
31160 		un->un_f_check_start_stop = TRUE;
31161 
31162 		/*
31163 		 * support eject media ioctl:
31164 		 *		FDEJECT, DKIOCEJECT, CDROMEJECT
31165 		 */
31166 		un->un_f_eject_media_supported = TRUE;
31167 
31168 		/*
31169 		 * Because many removable-media devices don't support
31170 		 * LOG_SENSE, we couldn't use this command to check if
31171 		 * a removable media device support power-management.
31172 		 * We assume that they support power-management via
31173 		 * START_STOP_UNIT command and can be spun up and down
31174 		 * without limitations.
31175 		 */
31176 		un->un_f_pm_supported = TRUE;
31177 
31178 		/*
31179 		 * Need to create a zero length (Boolean) property
31180 		 * removable-media for the removable media devices.
31181 		 * Note that the return value of the property is not being
31182 		 * checked, since if unable to create the property
31183 		 * then do not want the attach to fail altogether. Consistent
31184 		 * with other property creation in attach.
31185 		 */
31186 		(void) ddi_prop_create(DDI_DEV_T_NONE, devi,
31187 		    DDI_PROP_CANSLEEP, "removable-media", NULL, 0);
31188 
31189 	} else {
31190 		/*
31191 		 * create device ID for device
31192 		 */
31193 		un->un_f_devid_supported = TRUE;
31194 
31195 		/*
31196 		 * Spin up non-removable-media devices once it is attached
31197 		 */
31198 		un->un_f_attach_spinup = TRUE;
31199 
31200 		/*
31201 		 * According to SCSI specification, Sense data has two kinds of
31202 		 * format: fixed format, and descriptor format. At present, we
31203 		 * don't support descriptor format sense data for removable
31204 		 * media.
31205 		 */
31206 		if (SD_INQUIRY(un)->inq_dtype == DTYPE_DIRECT) {
31207 			un->un_f_descr_format_supported = TRUE;
31208 		}
31209 
31210 		/*
31211 		 * kstats are created only for non-removable media devices.
31212 		 *
31213 		 * Set this in sd.conf to 0 in order to disable kstats.  The
31214 		 * default is 1, so they are enabled by default.
31215 		 */
31216 		un->un_f_pkstats_enabled = (ddi_prop_get_int(DDI_DEV_T_ANY,
31217 		    SD_DEVINFO(un), DDI_PROP_DONTPASS,
31218 			"enable-partition-kstats", 1));
31219 
31220 		/*
31221 		 * Check if HBA has set the "pm-capable" property.
31222 		 * If "pm-capable" exists and is non-zero then we can
31223 		 * power manage the device without checking the start/stop
31224 		 * cycle count log sense page.
31225 		 *
31226 		 * If "pm-capable" exists and is SD_PM_CAPABLE_FALSE (0)
31227 		 * then we should not power manage the device.
31228 		 *
31229 		 * If "pm-capable" doesn't exist then pm_capable_prop will
31230 		 * be set to SD_PM_CAPABLE_UNDEFINED (-1).  In this case,
31231 		 * sd will check the start/stop cycle count log sense page
31232 		 * and power manage the device if the cycle count limit has
31233 		 * not been exceeded.
31234 		 */
31235 		pm_capable_prop = ddi_prop_get_int(DDI_DEV_T_ANY, devi,
31236 		    DDI_PROP_DONTPASS, "pm-capable", SD_PM_CAPABLE_UNDEFINED);
31237 		if (pm_capable_prop == SD_PM_CAPABLE_UNDEFINED) {
31238 			un->un_f_log_sense_supported = TRUE;
31239 		} else {
31240 			/*
31241 			 * pm-capable property exists.
31242 			 *
31243 			 * Convert "TRUE" values for pm_capable_prop to
31244 			 * SD_PM_CAPABLE_TRUE (1) to make it easier to check
31245 			 * later. "TRUE" values are any values except
31246 			 * SD_PM_CAPABLE_FALSE (0) and
31247 			 * SD_PM_CAPABLE_UNDEFINED (-1)
31248 			 */
31249 			if (pm_capable_prop == SD_PM_CAPABLE_FALSE) {
31250 				un->un_f_log_sense_supported = FALSE;
31251 			} else {
31252 				un->un_f_pm_supported = TRUE;
31253 			}
31254 
31255 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
31256 			    "sd_unit_attach: un:0x%p pm-capable "
31257 			    "property set to %d.\n", un, un->un_f_pm_supported);
31258 		}
31259 	}
31260 
31261 	if (un->un_f_is_hotpluggable) {
31262 #if defined(_SUNOS_VTOC_8)
31263 		/*
31264 		 * Note: currently, for VTOC_8 devices, default label is
31265 		 * created for removable and hotpluggable devices only.
31266 		 */
31267 		un->un_f_default_vtoc_supported = TRUE;
31268 #endif
31269 
31270 		/*
31271 		 * Temporarily, let hotpluggable devices pretend to be
31272 		 * removable-media devices for vold.
31273 		 */
31274 		un->un_f_monitor_media_state = TRUE;
31275 
31276 		un->un_f_check_start_stop = TRUE;
31277 
31278 	}
31279 
31280 	/*
31281 	 * By default, only DIRECT ACCESS devices and CDs will have Sun
31282 	 * labels.
31283 	 */
31284 	if ((SD_INQUIRY(un)->inq_dtype == DTYPE_DIRECT) ||
31285 	    (un->un_sd->sd_inq->inq_rmb)) {
31286 		/*
31287 		 * Direct access devices have disk label
31288 		 */
31289 		un->un_f_vtoc_label_supported = TRUE;
31290 	}
31291 
31292 	/*
31293 	 * Fdisk partitions are supported for all direct access devices on
31294 	 * x86 platform, and just for removable media and hotpluggable
31295 	 * devices on SPARC platform. Later, we will set the following flag
31296 	 * to FALSE if current device is not removable media or hotpluggable
31297 	 * device and if sd works on SAPRC platform.
31298 	 */
31299 	if (SD_INQUIRY(un)->inq_dtype == DTYPE_DIRECT) {
31300 		un->un_f_mboot_supported = TRUE;
31301 	}
31302 
31303 	if (!un->un_f_is_hotpluggable &&
31304 	    !un->un_sd->sd_inq->inq_rmb) {
31305 
31306 #if defined(_SUNOS_VTOC_8)
31307 		/*
31308 		 * Don't support fdisk on fixed disk
31309 		 */
31310 		un->un_f_mboot_supported = FALSE;
31311 #endif
31312 
31313 		/*
31314 		 * Fixed disk support SYNC CACHE
31315 		 */
31316 		un->un_f_sync_cache_supported = TRUE;
31317 
31318 		/*
31319 		 * For fixed disk, if its VTOC is not valid, we will write
31320 		 * errlog into system log
31321 		 */
31322 		if (un->un_f_vtoc_label_supported)
31323 			un->un_f_vtoc_errlog_supported = TRUE;
31324 	}
31325 }
31326