xref: /titanic_51/usr/src/uts/common/io/scsi/targets/sd.c (revision 39d5b283b19bb4c2b286b4375d8338d921f6d654)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * SCSI disk target driver.
30  */
31 #include <sys/scsi/scsi.h>
32 #include <sys/dkbad.h>
33 #include <sys/dklabel.h>
34 #include <sys/dkio.h>
35 #include <sys/fdio.h>
36 #include <sys/cdio.h>
37 #include <sys/mhd.h>
38 #include <sys/vtoc.h>
39 #include <sys/dktp/fdisk.h>
40 #include <sys/file.h>
41 #include <sys/stat.h>
42 #include <sys/kstat.h>
43 #include <sys/vtrace.h>
44 #include <sys/note.h>
45 #include <sys/thread.h>
46 #include <sys/proc.h>
47 #include <sys/efi_partition.h>
48 #include <sys/var.h>
49 #include <sys/aio_req.h>
50 
51 #ifdef __lock_lint
52 #define	_LP64
53 #define	__amd64
54 #endif
55 
56 #if (defined(__fibre))
57 /* Note: is there a leadville version of the following? */
58 #include <sys/fc4/fcal_linkapp.h>
59 #endif
60 #include <sys/taskq.h>
61 #include <sys/uuid.h>
62 #include <sys/byteorder.h>
63 #include <sys/sdt.h>
64 
65 #include "sd_xbuf.h"
66 
67 #include <sys/scsi/targets/sddef.h>
68 
69 
70 /*
71  * Loadable module info.
72  */
73 #if (defined(__fibre))
74 #define	SD_MODULE_NAME	"SCSI SSA/FCAL Disk Driver %I%"
75 char _depends_on[]	= "misc/scsi drv/fcp";
76 #else
77 #define	SD_MODULE_NAME	"SCSI Disk Driver %I%"
78 char _depends_on[]	= "misc/scsi";
79 #endif
80 
81 /*
82  * Define the interconnect type, to allow the driver to distinguish
83  * between parallel SCSI (sd) and fibre channel (ssd) behaviors.
84  *
85  * This is really for backward compatability. In the future, the driver
86  * should actually check the "interconnect-type" property as reported by
87  * the HBA; however at present this property is not defined by all HBAs,
88  * so we will use this #define (1) to permit the driver to run in
89  * backward-compatability mode; and (2) to print a notification message
90  * if an FC HBA does not support the "interconnect-type" property.  The
91  * behavior of the driver will be to assume parallel SCSI behaviors unless
92  * the "interconnect-type" property is defined by the HBA **AND** has a
93  * value of either INTERCONNECT_FIBRE, INTERCONNECT_SSA, or
94  * INTERCONNECT_FABRIC, in which case the driver will assume Fibre
95  * Channel behaviors (as per the old ssd).  (Note that the
96  * INTERCONNECT_1394 and INTERCONNECT_USB types are not supported and
97  * will result in the driver assuming parallel SCSI behaviors.)
98  *
99  * (see common/sys/scsi/impl/services.h)
100  *
101  * Note: For ssd semantics, don't use INTERCONNECT_FABRIC as the default
102  * since some FC HBAs may already support that, and there is some code in
103  * the driver that already looks for it.  Using INTERCONNECT_FABRIC as the
104  * default would confuse that code, and besides things should work fine
105  * anyways if the FC HBA already reports INTERCONNECT_FABRIC for the
106  * "interconnect_type" property.
107  *
108  * Notes for off-by-1 workaround:
109  * -----------------------------
110  *
111  *    SCSI READ_CAPACITY command returns the LBA number of the
112  *    last logical block, but sd once treated this number as
113  *    disks' capacity on x86 platform. And LBAs are addressed
114  *    based 0. So the last block was lost on x86 platform.
115  *
116  *    Now, we remove this workaround. In order for present sd
117  *    driver to work with disks which are labeled/partitioned
118  *    via previous sd, we add workaround as follows:
119  *
120  *    1) Locate backup EFI label: sd searchs the next to last
121  *       block for backup EFI label if it can't find it on the
122  *       last block;
123  *    2) Calculate geometry: refer to sd_convert_geometry(), If
124  *       capacity increasing by 1 causes disks' capacity to cross
125  *       over the limits in table CHS_values, geometry info will
126  *       change. This will raise an issue: In case that primary
127  *       VTOC label is destroyed, format commandline can restore
128  *       it via backup VTOC labels. And format locates backup VTOC
129  *       labels by use of geometry from sd driver. So changing
130  *       geometry will prevent format from finding backup VTOC
131  *       labels. To eliminate this side effect for compatibility,
132  *       sd uses (capacity -1) to calculate geometry;
133  *    3) 1TB disks: VTOC uses 32-bit signed int, thus sd doesn't
134  *       support VTOC for a disk which has more than DK_MAX_BLOCKS
135  *       LBAs. However, for exactly 1TB disk, it was treated as
136  *       (1T - 512)B in the past, and could have VTOC. To overcome
137  *       this, if an exactly 1TB disk has solaris fdisk partition,
138  *       it will be allowed to work with sd.
139  */
140 #if (defined(__fibre))
141 #define	SD_DEFAULT_INTERCONNECT_TYPE	SD_INTERCONNECT_FIBRE
142 #else
143 #define	SD_DEFAULT_INTERCONNECT_TYPE	SD_INTERCONNECT_PARALLEL
144 #endif
145 
146 /*
147  * The name of the driver, established from the module name in _init.
148  */
149 static	char *sd_label			= NULL;
150 
151 /*
152  * Driver name is unfortunately prefixed on some driver.conf properties.
153  */
154 #if (defined(__fibre))
155 #define	sd_max_xfer_size		ssd_max_xfer_size
156 #define	sd_config_list			ssd_config_list
157 static	char *sd_max_xfer_size		= "ssd_max_xfer_size";
158 static	char *sd_config_list		= "ssd-config-list";
159 #else
160 static	char *sd_max_xfer_size		= "sd_max_xfer_size";
161 static	char *sd_config_list		= "sd-config-list";
162 #endif
163 
164 /*
165  * Driver global variables
166  */
167 
168 #if (defined(__fibre))
169 /*
170  * These #defines are to avoid namespace collisions that occur because this
171  * code is currently used to compile two seperate driver modules: sd and ssd.
172  * All global variables need to be treated this way (even if declared static)
173  * in order to allow the debugger to resolve the names properly.
174  * It is anticipated that in the near future the ssd module will be obsoleted,
175  * at which time this namespace issue should go away.
176  */
177 #define	sd_state			ssd_state
178 #define	sd_io_time			ssd_io_time
179 #define	sd_failfast_enable		ssd_failfast_enable
180 #define	sd_ua_retry_count		ssd_ua_retry_count
181 #define	sd_report_pfa			ssd_report_pfa
182 #define	sd_max_throttle			ssd_max_throttle
183 #define	sd_min_throttle			ssd_min_throttle
184 #define	sd_rot_delay			ssd_rot_delay
185 
186 #define	sd_retry_on_reservation_conflict	\
187 					ssd_retry_on_reservation_conflict
188 #define	sd_reinstate_resv_delay		ssd_reinstate_resv_delay
189 #define	sd_resv_conflict_name		ssd_resv_conflict_name
190 
191 #define	sd_component_mask		ssd_component_mask
192 #define	sd_level_mask			ssd_level_mask
193 #define	sd_debug_un			ssd_debug_un
194 #define	sd_error_level			ssd_error_level
195 
196 #define	sd_xbuf_active_limit		ssd_xbuf_active_limit
197 #define	sd_xbuf_reserve_limit		ssd_xbuf_reserve_limit
198 
199 #define	sd_tr				ssd_tr
200 #define	sd_reset_throttle_timeout	ssd_reset_throttle_timeout
201 #define	sd_qfull_throttle_timeout	ssd_qfull_throttle_timeout
202 #define	sd_qfull_throttle_enable	ssd_qfull_throttle_enable
203 #define	sd_check_media_time		ssd_check_media_time
204 #define	sd_wait_cmds_complete		ssd_wait_cmds_complete
205 #define	sd_label_mutex			ssd_label_mutex
206 #define	sd_detach_mutex			ssd_detach_mutex
207 #define	sd_log_buf			ssd_log_buf
208 #define	sd_log_mutex			ssd_log_mutex
209 
210 #define	sd_disk_table			ssd_disk_table
211 #define	sd_disk_table_size		ssd_disk_table_size
212 #define	sd_sense_mutex			ssd_sense_mutex
213 #define	sd_cdbtab			ssd_cdbtab
214 
215 #define	sd_cb_ops			ssd_cb_ops
216 #define	sd_ops				ssd_ops
217 #define	sd_additional_codes		ssd_additional_codes
218 
219 #define	sd_minor_data			ssd_minor_data
220 #define	sd_minor_data_efi		ssd_minor_data_efi
221 
222 #define	sd_tq				ssd_tq
223 #define	sd_wmr_tq			ssd_wmr_tq
224 #define	sd_taskq_name			ssd_taskq_name
225 #define	sd_wmr_taskq_name		ssd_wmr_taskq_name
226 #define	sd_taskq_minalloc		ssd_taskq_minalloc
227 #define	sd_taskq_maxalloc		ssd_taskq_maxalloc
228 
229 #define	sd_dump_format_string		ssd_dump_format_string
230 
231 #define	sd_iostart_chain		ssd_iostart_chain
232 #define	sd_iodone_chain			ssd_iodone_chain
233 
234 #define	sd_pm_idletime			ssd_pm_idletime
235 
236 #define	sd_force_pm_supported		ssd_force_pm_supported
237 
238 #define	sd_dtype_optical_bind		ssd_dtype_optical_bind
239 
240 #endif
241 
242 
243 #ifdef	SDDEBUG
244 int	sd_force_pm_supported		= 0;
245 #endif	/* SDDEBUG */
246 
247 void *sd_state				= NULL;
248 int sd_io_time				= SD_IO_TIME;
249 int sd_failfast_enable			= 1;
250 int sd_ua_retry_count			= SD_UA_RETRY_COUNT;
251 int sd_report_pfa			= 1;
252 int sd_max_throttle			= SD_MAX_THROTTLE;
253 int sd_min_throttle			= SD_MIN_THROTTLE;
254 int sd_rot_delay			= 4; /* Default 4ms Rotation delay */
255 int sd_qfull_throttle_enable		= TRUE;
256 
257 int sd_retry_on_reservation_conflict	= 1;
258 int sd_reinstate_resv_delay		= SD_REINSTATE_RESV_DELAY;
259 _NOTE(SCHEME_PROTECTS_DATA("safe sharing", sd_reinstate_resv_delay))
260 
261 static int sd_dtype_optical_bind	= -1;
262 
263 /* Note: the following is not a bug, it really is "sd_" and not "ssd_" */
264 static	char *sd_resv_conflict_name	= "sd_retry_on_reservation_conflict";
265 
266 /*
267  * Global data for debug logging. To enable debug printing, sd_component_mask
268  * and sd_level_mask should be set to the desired bit patterns as outlined in
269  * sddef.h.
270  */
271 uint_t	sd_component_mask		= 0x0;
272 uint_t	sd_level_mask			= 0x0;
273 struct	sd_lun *sd_debug_un		= NULL;
274 uint_t	sd_error_level			= SCSI_ERR_RETRYABLE;
275 
276 /* Note: these may go away in the future... */
277 static uint32_t	sd_xbuf_active_limit	= 512;
278 static uint32_t sd_xbuf_reserve_limit	= 16;
279 
280 static struct sd_resv_reclaim_request	sd_tr = { NULL, NULL, NULL, 0, 0, 0 };
281 
282 /*
283  * Timer value used to reset the throttle after it has been reduced
284  * (typically in response to TRAN_BUSY or STATUS_QFULL)
285  */
286 static int sd_reset_throttle_timeout	= SD_RESET_THROTTLE_TIMEOUT;
287 static int sd_qfull_throttle_timeout	= SD_QFULL_THROTTLE_TIMEOUT;
288 
289 /*
290  * Interval value associated with the media change scsi watch.
291  */
292 static int sd_check_media_time		= 3000000;
293 
294 /*
295  * Wait value used for in progress operations during a DDI_SUSPEND
296  */
297 static int sd_wait_cmds_complete	= SD_WAIT_CMDS_COMPLETE;
298 
299 /*
300  * sd_label_mutex protects a static buffer used in the disk label
301  * component of the driver
302  */
303 static kmutex_t sd_label_mutex;
304 
305 /*
306  * sd_detach_mutex protects un_layer_count, un_detach_count, and
307  * un_opens_in_progress in the sd_lun structure.
308  */
309 static kmutex_t sd_detach_mutex;
310 
311 _NOTE(MUTEX_PROTECTS_DATA(sd_detach_mutex,
312 	sd_lun::{un_layer_count un_detach_count un_opens_in_progress}))
313 
314 /*
315  * Global buffer and mutex for debug logging
316  */
317 static char	sd_log_buf[1024];
318 static kmutex_t	sd_log_mutex;
319 
320 
321 /*
322  * "Smart" Probe Caching structs, globals, #defines, etc.
323  * For parallel scsi and non-self-identify device only.
324  */
325 
326 /*
327  * The following resources and routines are implemented to support
328  * "smart" probing, which caches the scsi_probe() results in an array,
329  * in order to help avoid long probe times.
330  */
331 struct sd_scsi_probe_cache {
332 	struct	sd_scsi_probe_cache	*next;
333 	dev_info_t	*pdip;
334 	int		cache[NTARGETS_WIDE];
335 };
336 
337 static kmutex_t	sd_scsi_probe_cache_mutex;
338 static struct	sd_scsi_probe_cache *sd_scsi_probe_cache_head = NULL;
339 
340 /*
341  * Really we only need protection on the head of the linked list, but
342  * better safe than sorry.
343  */
344 _NOTE(MUTEX_PROTECTS_DATA(sd_scsi_probe_cache_mutex,
345     sd_scsi_probe_cache::next sd_scsi_probe_cache::pdip))
346 
347 _NOTE(MUTEX_PROTECTS_DATA(sd_scsi_probe_cache_mutex,
348     sd_scsi_probe_cache_head))
349 
350 
351 /*
352  * Vendor specific data name property declarations
353  */
354 
355 #if defined(__fibre) || defined(__i386) ||defined(__amd64)
356 
357 static sd_tunables seagate_properties = {
358 	SEAGATE_THROTTLE_VALUE,
359 	0,
360 	0,
361 	0,
362 	0,
363 	0,
364 	0,
365 	0,
366 	0
367 };
368 
369 
370 static sd_tunables fujitsu_properties = {
371 	FUJITSU_THROTTLE_VALUE,
372 	0,
373 	0,
374 	0,
375 	0,
376 	0,
377 	0,
378 	0,
379 	0
380 };
381 
382 static sd_tunables ibm_properties = {
383 	IBM_THROTTLE_VALUE,
384 	0,
385 	0,
386 	0,
387 	0,
388 	0,
389 	0,
390 	0,
391 	0
392 };
393 
394 static sd_tunables purple_properties = {
395 	PURPLE_THROTTLE_VALUE,
396 	0,
397 	0,
398 	PURPLE_BUSY_RETRIES,
399 	PURPLE_RESET_RETRY_COUNT,
400 	PURPLE_RESERVE_RELEASE_TIME,
401 	0,
402 	0,
403 	0
404 };
405 
406 static sd_tunables sve_properties = {
407 	SVE_THROTTLE_VALUE,
408 	0,
409 	0,
410 	SVE_BUSY_RETRIES,
411 	SVE_RESET_RETRY_COUNT,
412 	SVE_RESERVE_RELEASE_TIME,
413 	SVE_MIN_THROTTLE_VALUE,
414 	SVE_DISKSORT_DISABLED_FLAG,
415 	0
416 };
417 
418 static sd_tunables maserati_properties = {
419 	0,
420 	0,
421 	0,
422 	0,
423 	0,
424 	0,
425 	0,
426 	MASERATI_DISKSORT_DISABLED_FLAG,
427 	MASERATI_LUN_RESET_ENABLED_FLAG
428 };
429 
430 static sd_tunables pirus_properties = {
431 	PIRUS_THROTTLE_VALUE,
432 	0,
433 	PIRUS_NRR_COUNT,
434 	PIRUS_BUSY_RETRIES,
435 	PIRUS_RESET_RETRY_COUNT,
436 	0,
437 	PIRUS_MIN_THROTTLE_VALUE,
438 	PIRUS_DISKSORT_DISABLED_FLAG,
439 	PIRUS_LUN_RESET_ENABLED_FLAG
440 };
441 
442 #endif
443 
444 #if (defined(__sparc) && !defined(__fibre)) || \
445 	(defined(__i386) || defined(__amd64))
446 
447 
448 static sd_tunables elite_properties = {
449 	ELITE_THROTTLE_VALUE,
450 	0,
451 	0,
452 	0,
453 	0,
454 	0,
455 	0,
456 	0,
457 	0
458 };
459 
460 static sd_tunables st31200n_properties = {
461 	ST31200N_THROTTLE_VALUE,
462 	0,
463 	0,
464 	0,
465 	0,
466 	0,
467 	0,
468 	0,
469 	0
470 };
471 
472 #endif /* Fibre or not */
473 
474 static sd_tunables lsi_properties_scsi = {
475 	LSI_THROTTLE_VALUE,
476 	0,
477 	LSI_NOTREADY_RETRIES,
478 	0,
479 	0,
480 	0,
481 	0,
482 	0,
483 	0
484 };
485 
486 static sd_tunables symbios_properties = {
487 	SYMBIOS_THROTTLE_VALUE,
488 	0,
489 	SYMBIOS_NOTREADY_RETRIES,
490 	0,
491 	0,
492 	0,
493 	0,
494 	0,
495 	0
496 };
497 
498 static sd_tunables lsi_properties = {
499 	0,
500 	0,
501 	LSI_NOTREADY_RETRIES,
502 	0,
503 	0,
504 	0,
505 	0,
506 	0,
507 	0
508 };
509 
510 static sd_tunables lsi_oem_properties = {
511 	0,
512 	0,
513 	LSI_OEM_NOTREADY_RETRIES,
514 	0,
515 	0,
516 	0,
517 	0,
518 	0,
519 	0
520 };
521 
522 
523 
524 #if (defined(SD_PROP_TST))
525 
526 #define	SD_TST_CTYPE_VAL	CTYPE_CDROM
527 #define	SD_TST_THROTTLE_VAL	16
528 #define	SD_TST_NOTREADY_VAL	12
529 #define	SD_TST_BUSY_VAL		60
530 #define	SD_TST_RST_RETRY_VAL	36
531 #define	SD_TST_RSV_REL_TIME	60
532 
533 static sd_tunables tst_properties = {
534 	SD_TST_THROTTLE_VAL,
535 	SD_TST_CTYPE_VAL,
536 	SD_TST_NOTREADY_VAL,
537 	SD_TST_BUSY_VAL,
538 	SD_TST_RST_RETRY_VAL,
539 	SD_TST_RSV_REL_TIME,
540 	0,
541 	0,
542 	0
543 };
544 #endif
545 
546 /* This is similiar to the ANSI toupper implementation */
547 #define	SD_TOUPPER(C)	(((C) >= 'a' && (C) <= 'z') ? (C) - 'a' + 'A' : (C))
548 
549 /*
550  * Static Driver Configuration Table
551  *
552  * This is the table of disks which need throttle adjustment (or, perhaps
553  * something else as defined by the flags at a future time.)  device_id
554  * is a string consisting of concatenated vid (vendor), pid (product/model)
555  * and revision strings as defined in the scsi_inquiry structure.  Offsets of
556  * the parts of the string are as defined by the sizes in the scsi_inquiry
557  * structure.  Device type is searched as far as the device_id string is
558  * defined.  Flags defines which values are to be set in the driver from the
559  * properties list.
560  *
561  * Entries below which begin and end with a "*" are a special case.
562  * These do not have a specific vendor, and the string which follows
563  * can appear anywhere in the 16 byte PID portion of the inquiry data.
564  *
565  * Entries below which begin and end with a " " (blank) are a special
566  * case. The comparison function will treat multiple consecutive blanks
567  * as equivalent to a single blank. For example, this causes a
568  * sd_disk_table entry of " NEC CDROM " to match a device's id string
569  * of  "NEC       CDROM".
570  *
571  * Note: The MD21 controller type has been obsoleted.
572  *	 ST318202F is a Legacy device
573  *	 MAM3182FC, MAM3364FC, MAM3738FC do not appear to have ever been
574  *	 made with an FC connection. The entries here are a legacy.
575  */
576 static sd_disk_config_t sd_disk_table[] = {
577 #if defined(__fibre) || defined(__i386) || defined(__amd64)
578 	{ "SEAGATE ST34371FC", SD_CONF_BSET_THROTTLE, &seagate_properties },
579 	{ "SEAGATE ST19171FC", SD_CONF_BSET_THROTTLE, &seagate_properties },
580 	{ "SEAGATE ST39102FC", SD_CONF_BSET_THROTTLE, &seagate_properties },
581 	{ "SEAGATE ST39103FC", SD_CONF_BSET_THROTTLE, &seagate_properties },
582 	{ "SEAGATE ST118273F", SD_CONF_BSET_THROTTLE, &seagate_properties },
583 	{ "SEAGATE ST318202F", SD_CONF_BSET_THROTTLE, &seagate_properties },
584 	{ "SEAGATE ST318203F", SD_CONF_BSET_THROTTLE, &seagate_properties },
585 	{ "SEAGATE ST136403F", SD_CONF_BSET_THROTTLE, &seagate_properties },
586 	{ "SEAGATE ST318304F", SD_CONF_BSET_THROTTLE, &seagate_properties },
587 	{ "SEAGATE ST336704F", SD_CONF_BSET_THROTTLE, &seagate_properties },
588 	{ "SEAGATE ST373405F", SD_CONF_BSET_THROTTLE, &seagate_properties },
589 	{ "SEAGATE ST336605F", SD_CONF_BSET_THROTTLE, &seagate_properties },
590 	{ "SEAGATE ST336752F", SD_CONF_BSET_THROTTLE, &seagate_properties },
591 	{ "SEAGATE ST318452F", SD_CONF_BSET_THROTTLE, &seagate_properties },
592 	{ "FUJITSU MAG3091F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
593 	{ "FUJITSU MAG3182F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
594 	{ "FUJITSU MAA3182F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
595 	{ "FUJITSU MAF3364F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
596 	{ "FUJITSU MAL3364F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
597 	{ "FUJITSU MAL3738F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
598 	{ "FUJITSU MAM3182FC",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
599 	{ "FUJITSU MAM3364FC",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
600 	{ "FUJITSU MAM3738FC",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
601 	{ "IBM     DDYFT1835",  SD_CONF_BSET_THROTTLE, &ibm_properties },
602 	{ "IBM     DDYFT3695",  SD_CONF_BSET_THROTTLE, &ibm_properties },
603 	{ "IBM     IC35LF2D2",  SD_CONF_BSET_THROTTLE, &ibm_properties },
604 	{ "IBM     IC35LF2PR",  SD_CONF_BSET_THROTTLE, &ibm_properties },
605 	{ "IBM     3526",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
606 	{ "IBM     3542",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
607 	{ "IBM     3552",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
608 	{ "IBM     1722",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
609 	{ "IBM     1742",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
610 	{ "IBM     1815",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
611 	{ "IBM     FAStT",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
612 	{ "IBM     1814",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
613 	{ "IBM     1814-200",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
614 	{ "LSI     INF",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
615 	{ "ENGENIO INF",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
616 	{ "SGI     TP",		SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
617 	{ "SGI     IS",		SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
618 	{ "*CSM100_*",		SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
619 	{ "*CSM200_*",		SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
620 	{ "Fujitsu SX300",	SD_CONF_BSET_THROTTLE,  &lsi_oem_properties },
621 	{ "LSI",		SD_CONF_BSET_NRR_COUNT, &lsi_properties },
622 	{ "SUN     T3", SD_CONF_BSET_THROTTLE |
623 			SD_CONF_BSET_BSY_RETRY_COUNT|
624 			SD_CONF_BSET_RST_RETRIES|
625 			SD_CONF_BSET_RSV_REL_TIME,
626 		&purple_properties },
627 	{ "SUN     SESS01", SD_CONF_BSET_THROTTLE |
628 		SD_CONF_BSET_BSY_RETRY_COUNT|
629 		SD_CONF_BSET_RST_RETRIES|
630 		SD_CONF_BSET_RSV_REL_TIME|
631 		SD_CONF_BSET_MIN_THROTTLE|
632 		SD_CONF_BSET_DISKSORT_DISABLED,
633 		&sve_properties },
634 	{ "SUN     T4", SD_CONF_BSET_THROTTLE |
635 			SD_CONF_BSET_BSY_RETRY_COUNT|
636 			SD_CONF_BSET_RST_RETRIES|
637 			SD_CONF_BSET_RSV_REL_TIME,
638 		&purple_properties },
639 	{ "SUN     SVE01", SD_CONF_BSET_DISKSORT_DISABLED |
640 		SD_CONF_BSET_LUN_RESET_ENABLED,
641 		&maserati_properties },
642 	{ "SUN     SE6920", SD_CONF_BSET_THROTTLE |
643 		SD_CONF_BSET_NRR_COUNT|
644 		SD_CONF_BSET_BSY_RETRY_COUNT|
645 		SD_CONF_BSET_RST_RETRIES|
646 		SD_CONF_BSET_MIN_THROTTLE|
647 		SD_CONF_BSET_DISKSORT_DISABLED|
648 		SD_CONF_BSET_LUN_RESET_ENABLED,
649 		&pirus_properties },
650 	{ "SUN     SE6940", SD_CONF_BSET_THROTTLE |
651 		SD_CONF_BSET_NRR_COUNT|
652 		SD_CONF_BSET_BSY_RETRY_COUNT|
653 		SD_CONF_BSET_RST_RETRIES|
654 		SD_CONF_BSET_MIN_THROTTLE|
655 		SD_CONF_BSET_DISKSORT_DISABLED|
656 		SD_CONF_BSET_LUN_RESET_ENABLED,
657 		&pirus_properties },
658 	{ "SUN     StorageTek 6920", SD_CONF_BSET_THROTTLE |
659 		SD_CONF_BSET_NRR_COUNT|
660 		SD_CONF_BSET_BSY_RETRY_COUNT|
661 		SD_CONF_BSET_RST_RETRIES|
662 		SD_CONF_BSET_MIN_THROTTLE|
663 		SD_CONF_BSET_DISKSORT_DISABLED|
664 		SD_CONF_BSET_LUN_RESET_ENABLED,
665 		&pirus_properties },
666 	{ "SUN     StorageTek 6940", SD_CONF_BSET_THROTTLE |
667 		SD_CONF_BSET_NRR_COUNT|
668 		SD_CONF_BSET_BSY_RETRY_COUNT|
669 		SD_CONF_BSET_RST_RETRIES|
670 		SD_CONF_BSET_MIN_THROTTLE|
671 		SD_CONF_BSET_DISKSORT_DISABLED|
672 		SD_CONF_BSET_LUN_RESET_ENABLED,
673 		&pirus_properties },
674 	{ "SUN     PSX1000", SD_CONF_BSET_THROTTLE |
675 		SD_CONF_BSET_NRR_COUNT|
676 		SD_CONF_BSET_BSY_RETRY_COUNT|
677 		SD_CONF_BSET_RST_RETRIES|
678 		SD_CONF_BSET_MIN_THROTTLE|
679 		SD_CONF_BSET_DISKSORT_DISABLED|
680 		SD_CONF_BSET_LUN_RESET_ENABLED,
681 		&pirus_properties },
682 	{ "SUN     SE6330", SD_CONF_BSET_THROTTLE |
683 		SD_CONF_BSET_NRR_COUNT|
684 		SD_CONF_BSET_BSY_RETRY_COUNT|
685 		SD_CONF_BSET_RST_RETRIES|
686 		SD_CONF_BSET_MIN_THROTTLE|
687 		SD_CONF_BSET_DISKSORT_DISABLED|
688 		SD_CONF_BSET_LUN_RESET_ENABLED,
689 		&pirus_properties },
690 	{ "STK     OPENstorage", SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
691 	{ "STK     OpenStorage", SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
692 	{ "STK     BladeCtlr",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
693 	{ "STK     FLEXLINE",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
694 	{ "SYMBIOS", SD_CONF_BSET_NRR_COUNT, &symbios_properties },
695 #endif /* fibre or NON-sparc platforms */
696 #if ((defined(__sparc) && !defined(__fibre)) ||\
697 	(defined(__i386) || defined(__amd64)))
698 	{ "SEAGATE ST42400N", SD_CONF_BSET_THROTTLE, &elite_properties },
699 	{ "SEAGATE ST31200N", SD_CONF_BSET_THROTTLE, &st31200n_properties },
700 	{ "SEAGATE ST41600N", SD_CONF_BSET_TUR_CHECK, NULL },
701 	{ "CONNER  CP30540",  SD_CONF_BSET_NOCACHE,  NULL },
702 	{ "*SUN0104*", SD_CONF_BSET_FAB_DEVID, NULL },
703 	{ "*SUN0207*", SD_CONF_BSET_FAB_DEVID, NULL },
704 	{ "*SUN0327*", SD_CONF_BSET_FAB_DEVID, NULL },
705 	{ "*SUN0340*", SD_CONF_BSET_FAB_DEVID, NULL },
706 	{ "*SUN0424*", SD_CONF_BSET_FAB_DEVID, NULL },
707 	{ "*SUN0669*", SD_CONF_BSET_FAB_DEVID, NULL },
708 	{ "*SUN1.0G*", SD_CONF_BSET_FAB_DEVID, NULL },
709 	{ "SYMBIOS INF-01-00       ", SD_CONF_BSET_FAB_DEVID, NULL },
710 	{ "SYMBIOS", SD_CONF_BSET_THROTTLE|SD_CONF_BSET_NRR_COUNT,
711 	    &symbios_properties },
712 	{ "LSI", SD_CONF_BSET_THROTTLE | SD_CONF_BSET_NRR_COUNT,
713 	    &lsi_properties_scsi },
714 #if defined(__i386) || defined(__amd64)
715 	{ " NEC CD-ROM DRIVE:260 ", (SD_CONF_BSET_PLAYMSF_BCD
716 				    | SD_CONF_BSET_READSUB_BCD
717 				    | SD_CONF_BSET_READ_TOC_ADDR_BCD
718 				    | SD_CONF_BSET_NO_READ_HEADER
719 				    | SD_CONF_BSET_READ_CD_XD4), NULL },
720 
721 	{ " NEC CD-ROM DRIVE:270 ", (SD_CONF_BSET_PLAYMSF_BCD
722 				    | SD_CONF_BSET_READSUB_BCD
723 				    | SD_CONF_BSET_READ_TOC_ADDR_BCD
724 				    | SD_CONF_BSET_NO_READ_HEADER
725 				    | SD_CONF_BSET_READ_CD_XD4), NULL },
726 #endif /* __i386 || __amd64 */
727 #endif /* sparc NON-fibre or NON-sparc platforms */
728 
729 #if (defined(SD_PROP_TST))
730 	{ "VENDOR  PRODUCT ", (SD_CONF_BSET_THROTTLE
731 				| SD_CONF_BSET_CTYPE
732 				| SD_CONF_BSET_NRR_COUNT
733 				| SD_CONF_BSET_FAB_DEVID
734 				| SD_CONF_BSET_NOCACHE
735 				| SD_CONF_BSET_BSY_RETRY_COUNT
736 				| SD_CONF_BSET_PLAYMSF_BCD
737 				| SD_CONF_BSET_READSUB_BCD
738 				| SD_CONF_BSET_READ_TOC_TRK_BCD
739 				| SD_CONF_BSET_READ_TOC_ADDR_BCD
740 				| SD_CONF_BSET_NO_READ_HEADER
741 				| SD_CONF_BSET_READ_CD_XD4
742 				| SD_CONF_BSET_RST_RETRIES
743 				| SD_CONF_BSET_RSV_REL_TIME
744 				| SD_CONF_BSET_TUR_CHECK), &tst_properties},
745 #endif
746 };
747 
748 static const int sd_disk_table_size =
749 	sizeof (sd_disk_table)/ sizeof (sd_disk_config_t);
750 
751 
752 /*
753  * Return codes of sd_uselabel().
754  */
755 #define	SD_LABEL_IS_VALID		0
756 #define	SD_LABEL_IS_INVALID		1
757 
758 #define	SD_INTERCONNECT_PARALLEL	0
759 #define	SD_INTERCONNECT_FABRIC		1
760 #define	SD_INTERCONNECT_FIBRE		2
761 #define	SD_INTERCONNECT_SSA		3
762 #define	SD_INTERCONNECT_SATA		4
763 #define	SD_IS_PARALLEL_SCSI(un)		\
764 	((un)->un_interconnect_type == SD_INTERCONNECT_PARALLEL)
765 #define	SD_IS_SERIAL(un)		\
766 	((un)->un_interconnect_type == SD_INTERCONNECT_SATA)
767 
768 /*
769  * Definitions used by device id registration routines
770  */
771 #define	VPD_HEAD_OFFSET		3	/* size of head for vpd page */
772 #define	VPD_PAGE_LENGTH		3	/* offset for pge length data */
773 #define	VPD_MODE_PAGE		1	/* offset into vpd pg for "page code" */
774 #define	WD_NODE			7	/* the whole disk minor */
775 
776 static kmutex_t sd_sense_mutex = {0};
777 
778 /*
779  * Macros for updates of the driver state
780  */
781 #define	New_state(un, s)        \
782 	(un)->un_last_state = (un)->un_state, (un)->un_state = (s)
783 #define	Restore_state(un)	\
784 	{ uchar_t tmp = (un)->un_last_state; New_state((un), tmp); }
785 
786 static struct sd_cdbinfo sd_cdbtab[] = {
787 	{ CDB_GROUP0, 0x00,	   0x1FFFFF,   0xFF,	    },
788 	{ CDB_GROUP1, SCMD_GROUP1, 0xFFFFFFFF, 0xFFFF,	    },
789 	{ CDB_GROUP5, SCMD_GROUP5, 0xFFFFFFFF, 0xFFFFFFFF,  },
790 	{ CDB_GROUP4, SCMD_GROUP4, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFF, },
791 };
792 
793 /*
794  * Specifies the number of seconds that must have elapsed since the last
795  * cmd. has completed for a device to be declared idle to the PM framework.
796  */
797 static int sd_pm_idletime = 1;
798 
799 /*
800  * Internal function prototypes
801  */
802 
803 #if (defined(__fibre))
804 /*
805  * These #defines are to avoid namespace collisions that occur because this
806  * code is currently used to compile two seperate driver modules: sd and ssd.
807  * All function names need to be treated this way (even if declared static)
808  * in order to allow the debugger to resolve the names properly.
809  * It is anticipated that in the near future the ssd module will be obsoleted,
810  * at which time this ugliness should go away.
811  */
812 #define	sd_log_trace			ssd_log_trace
813 #define	sd_log_info			ssd_log_info
814 #define	sd_log_err			ssd_log_err
815 #define	sdprobe				ssdprobe
816 #define	sdinfo				ssdinfo
817 #define	sd_prop_op			ssd_prop_op
818 #define	sd_scsi_probe_cache_init	ssd_scsi_probe_cache_init
819 #define	sd_scsi_probe_cache_fini	ssd_scsi_probe_cache_fini
820 #define	sd_scsi_clear_probe_cache	ssd_scsi_clear_probe_cache
821 #define	sd_scsi_probe_with_cache	ssd_scsi_probe_with_cache
822 #define	sd_spin_up_unit			ssd_spin_up_unit
823 #define	sd_enable_descr_sense		ssd_enable_descr_sense
824 #define	sd_reenable_dsense_task		ssd_reenable_dsense_task
825 #define	sd_set_mmc_caps			ssd_set_mmc_caps
826 #define	sd_read_unit_properties		ssd_read_unit_properties
827 #define	sd_process_sdconf_file		ssd_process_sdconf_file
828 #define	sd_process_sdconf_table		ssd_process_sdconf_table
829 #define	sd_sdconf_id_match		ssd_sdconf_id_match
830 #define	sd_blank_cmp			ssd_blank_cmp
831 #define	sd_chk_vers1_data		ssd_chk_vers1_data
832 #define	sd_set_vers1_properties		ssd_set_vers1_properties
833 #define	sd_validate_geometry		ssd_validate_geometry
834 
835 #if defined(_SUNOS_VTOC_16)
836 #define	sd_convert_geometry		ssd_convert_geometry
837 #endif
838 
839 #define	sd_resync_geom_caches		ssd_resync_geom_caches
840 #define	sd_read_fdisk			ssd_read_fdisk
841 #define	sd_get_physical_geometry	ssd_get_physical_geometry
842 #define	sd_get_virtual_geometry		ssd_get_virtual_geometry
843 #define	sd_update_block_info		ssd_update_block_info
844 #define	sd_swap_efi_gpt			ssd_swap_efi_gpt
845 #define	sd_swap_efi_gpe			ssd_swap_efi_gpe
846 #define	sd_validate_efi			ssd_validate_efi
847 #define	sd_use_efi			ssd_use_efi
848 #define	sd_uselabel			ssd_uselabel
849 #define	sd_build_default_label		ssd_build_default_label
850 #define	sd_has_max_chs_vals		ssd_has_max_chs_vals
851 #define	sd_inq_fill			ssd_inq_fill
852 #define	sd_register_devid		ssd_register_devid
853 #define	sd_get_devid_block		ssd_get_devid_block
854 #define	sd_get_devid			ssd_get_devid
855 #define	sd_create_devid			ssd_create_devid
856 #define	sd_write_deviceid		ssd_write_deviceid
857 #define	sd_check_vpd_page_support	ssd_check_vpd_page_support
858 #define	sd_setup_pm			ssd_setup_pm
859 #define	sd_create_pm_components		ssd_create_pm_components
860 #define	sd_ddi_suspend			ssd_ddi_suspend
861 #define	sd_ddi_pm_suspend		ssd_ddi_pm_suspend
862 #define	sd_ddi_resume			ssd_ddi_resume
863 #define	sd_ddi_pm_resume		ssd_ddi_pm_resume
864 #define	sdpower				ssdpower
865 #define	sdattach			ssdattach
866 #define	sddetach			ssddetach
867 #define	sd_unit_attach			ssd_unit_attach
868 #define	sd_unit_detach			ssd_unit_detach
869 #define	sd_set_unit_attributes		ssd_set_unit_attributes
870 #define	sd_create_minor_nodes		ssd_create_minor_nodes
871 #define	sd_create_errstats		ssd_create_errstats
872 #define	sd_set_errstats			ssd_set_errstats
873 #define	sd_set_pstats			ssd_set_pstats
874 #define	sddump				ssddump
875 #define	sd_scsi_poll			ssd_scsi_poll
876 #define	sd_send_polled_RQS		ssd_send_polled_RQS
877 #define	sd_ddi_scsi_poll		ssd_ddi_scsi_poll
878 #define	sd_init_event_callbacks		ssd_init_event_callbacks
879 #define	sd_event_callback		ssd_event_callback
880 #define	sd_cache_control		ssd_cache_control
881 #define	sd_get_write_cache_enabled	ssd_get_write_cache_enabled
882 #define	sd_make_device			ssd_make_device
883 #define	sdopen				ssdopen
884 #define	sdclose				ssdclose
885 #define	sd_ready_and_valid		ssd_ready_and_valid
886 #define	sdmin				ssdmin
887 #define	sdread				ssdread
888 #define	sdwrite				ssdwrite
889 #define	sdaread				ssdaread
890 #define	sdawrite			ssdawrite
891 #define	sdstrategy			ssdstrategy
892 #define	sdioctl				ssdioctl
893 #define	sd_mapblockaddr_iostart		ssd_mapblockaddr_iostart
894 #define	sd_mapblocksize_iostart		ssd_mapblocksize_iostart
895 #define	sd_checksum_iostart		ssd_checksum_iostart
896 #define	sd_checksum_uscsi_iostart	ssd_checksum_uscsi_iostart
897 #define	sd_pm_iostart			ssd_pm_iostart
898 #define	sd_core_iostart			ssd_core_iostart
899 #define	sd_mapblockaddr_iodone		ssd_mapblockaddr_iodone
900 #define	sd_mapblocksize_iodone		ssd_mapblocksize_iodone
901 #define	sd_checksum_iodone		ssd_checksum_iodone
902 #define	sd_checksum_uscsi_iodone	ssd_checksum_uscsi_iodone
903 #define	sd_pm_iodone			ssd_pm_iodone
904 #define	sd_initpkt_for_buf		ssd_initpkt_for_buf
905 #define	sd_destroypkt_for_buf		ssd_destroypkt_for_buf
906 #define	sd_setup_rw_pkt			ssd_setup_rw_pkt
907 #define	sd_setup_next_rw_pkt		ssd_setup_next_rw_pkt
908 #define	sd_buf_iodone			ssd_buf_iodone
909 #define	sd_uscsi_strategy		ssd_uscsi_strategy
910 #define	sd_initpkt_for_uscsi		ssd_initpkt_for_uscsi
911 #define	sd_destroypkt_for_uscsi		ssd_destroypkt_for_uscsi
912 #define	sd_uscsi_iodone			ssd_uscsi_iodone
913 #define	sd_xbuf_strategy		ssd_xbuf_strategy
914 #define	sd_xbuf_init			ssd_xbuf_init
915 #define	sd_pm_entry			ssd_pm_entry
916 #define	sd_pm_exit			ssd_pm_exit
917 
918 #define	sd_pm_idletimeout_handler	ssd_pm_idletimeout_handler
919 #define	sd_pm_timeout_handler		ssd_pm_timeout_handler
920 
921 #define	sd_add_buf_to_waitq		ssd_add_buf_to_waitq
922 #define	sdintr				ssdintr
923 #define	sd_start_cmds			ssd_start_cmds
924 #define	sd_send_scsi_cmd		ssd_send_scsi_cmd
925 #define	sd_bioclone_alloc		ssd_bioclone_alloc
926 #define	sd_bioclone_free		ssd_bioclone_free
927 #define	sd_shadow_buf_alloc		ssd_shadow_buf_alloc
928 #define	sd_shadow_buf_free		ssd_shadow_buf_free
929 #define	sd_print_transport_rejected_message	\
930 					ssd_print_transport_rejected_message
931 #define	sd_retry_command		ssd_retry_command
932 #define	sd_set_retry_bp			ssd_set_retry_bp
933 #define	sd_send_request_sense_command	ssd_send_request_sense_command
934 #define	sd_start_retry_command		ssd_start_retry_command
935 #define	sd_start_direct_priority_command	\
936 					ssd_start_direct_priority_command
937 #define	sd_return_failed_command	ssd_return_failed_command
938 #define	sd_return_failed_command_no_restart	\
939 					ssd_return_failed_command_no_restart
940 #define	sd_return_command		ssd_return_command
941 #define	sd_sync_with_callback		ssd_sync_with_callback
942 #define	sdrunout			ssdrunout
943 #define	sd_mark_rqs_busy		ssd_mark_rqs_busy
944 #define	sd_mark_rqs_idle		ssd_mark_rqs_idle
945 #define	sd_reduce_throttle		ssd_reduce_throttle
946 #define	sd_restore_throttle		ssd_restore_throttle
947 #define	sd_print_incomplete_msg		ssd_print_incomplete_msg
948 #define	sd_init_cdb_limits		ssd_init_cdb_limits
949 #define	sd_pkt_status_good		ssd_pkt_status_good
950 #define	sd_pkt_status_check_condition	ssd_pkt_status_check_condition
951 #define	sd_pkt_status_busy		ssd_pkt_status_busy
952 #define	sd_pkt_status_reservation_conflict	\
953 					ssd_pkt_status_reservation_conflict
954 #define	sd_pkt_status_qfull		ssd_pkt_status_qfull
955 #define	sd_handle_request_sense		ssd_handle_request_sense
956 #define	sd_handle_auto_request_sense	ssd_handle_auto_request_sense
957 #define	sd_print_sense_failed_msg	ssd_print_sense_failed_msg
958 #define	sd_validate_sense_data		ssd_validate_sense_data
959 #define	sd_decode_sense			ssd_decode_sense
960 #define	sd_print_sense_msg		ssd_print_sense_msg
961 #define	sd_sense_key_no_sense		ssd_sense_key_no_sense
962 #define	sd_sense_key_recoverable_error	ssd_sense_key_recoverable_error
963 #define	sd_sense_key_not_ready		ssd_sense_key_not_ready
964 #define	sd_sense_key_medium_or_hardware_error	\
965 					ssd_sense_key_medium_or_hardware_error
966 #define	sd_sense_key_illegal_request	ssd_sense_key_illegal_request
967 #define	sd_sense_key_unit_attention	ssd_sense_key_unit_attention
968 #define	sd_sense_key_fail_command	ssd_sense_key_fail_command
969 #define	sd_sense_key_blank_check	ssd_sense_key_blank_check
970 #define	sd_sense_key_aborted_command	ssd_sense_key_aborted_command
971 #define	sd_sense_key_default		ssd_sense_key_default
972 #define	sd_print_retry_msg		ssd_print_retry_msg
973 #define	sd_print_cmd_incomplete_msg	ssd_print_cmd_incomplete_msg
974 #define	sd_pkt_reason_cmd_incomplete	ssd_pkt_reason_cmd_incomplete
975 #define	sd_pkt_reason_cmd_tran_err	ssd_pkt_reason_cmd_tran_err
976 #define	sd_pkt_reason_cmd_reset		ssd_pkt_reason_cmd_reset
977 #define	sd_pkt_reason_cmd_aborted	ssd_pkt_reason_cmd_aborted
978 #define	sd_pkt_reason_cmd_timeout	ssd_pkt_reason_cmd_timeout
979 #define	sd_pkt_reason_cmd_unx_bus_free	ssd_pkt_reason_cmd_unx_bus_free
980 #define	sd_pkt_reason_cmd_tag_reject	ssd_pkt_reason_cmd_tag_reject
981 #define	sd_pkt_reason_default		ssd_pkt_reason_default
982 #define	sd_reset_target			ssd_reset_target
983 #define	sd_start_stop_unit_callback	ssd_start_stop_unit_callback
984 #define	sd_start_stop_unit_task		ssd_start_stop_unit_task
985 #define	sd_taskq_create			ssd_taskq_create
986 #define	sd_taskq_delete			ssd_taskq_delete
987 #define	sd_media_change_task		ssd_media_change_task
988 #define	sd_handle_mchange		ssd_handle_mchange
989 #define	sd_send_scsi_DOORLOCK		ssd_send_scsi_DOORLOCK
990 #define	sd_send_scsi_READ_CAPACITY	ssd_send_scsi_READ_CAPACITY
991 #define	sd_send_scsi_READ_CAPACITY_16	ssd_send_scsi_READ_CAPACITY_16
992 #define	sd_send_scsi_GET_CONFIGURATION	ssd_send_scsi_GET_CONFIGURATION
993 #define	sd_send_scsi_feature_GET_CONFIGURATION	\
994 					sd_send_scsi_feature_GET_CONFIGURATION
995 #define	sd_send_scsi_START_STOP_UNIT	ssd_send_scsi_START_STOP_UNIT
996 #define	sd_send_scsi_INQUIRY		ssd_send_scsi_INQUIRY
997 #define	sd_send_scsi_TEST_UNIT_READY	ssd_send_scsi_TEST_UNIT_READY
998 #define	sd_send_scsi_PERSISTENT_RESERVE_IN	\
999 					ssd_send_scsi_PERSISTENT_RESERVE_IN
1000 #define	sd_send_scsi_PERSISTENT_RESERVE_OUT	\
1001 					ssd_send_scsi_PERSISTENT_RESERVE_OUT
1002 #define	sd_send_scsi_SYNCHRONIZE_CACHE	ssd_send_scsi_SYNCHRONIZE_CACHE
1003 #define	sd_send_scsi_SYNCHRONIZE_CACHE_biodone	\
1004 					ssd_send_scsi_SYNCHRONIZE_CACHE_biodone
1005 #define	sd_send_scsi_MODE_SENSE		ssd_send_scsi_MODE_SENSE
1006 #define	sd_send_scsi_MODE_SELECT	ssd_send_scsi_MODE_SELECT
1007 #define	sd_send_scsi_RDWR		ssd_send_scsi_RDWR
1008 #define	sd_send_scsi_LOG_SENSE		ssd_send_scsi_LOG_SENSE
1009 #define	sd_alloc_rqs			ssd_alloc_rqs
1010 #define	sd_free_rqs			ssd_free_rqs
1011 #define	sd_dump_memory			ssd_dump_memory
1012 #define	sd_uscsi_ioctl			ssd_uscsi_ioctl
1013 #define	sd_get_media_info		ssd_get_media_info
1014 #define	sd_dkio_ctrl_info		ssd_dkio_ctrl_info
1015 #define	sd_dkio_get_geometry		ssd_dkio_get_geometry
1016 #define	sd_dkio_set_geometry		ssd_dkio_set_geometry
1017 #define	sd_dkio_get_partition		ssd_dkio_get_partition
1018 #define	sd_dkio_set_partition		ssd_dkio_set_partition
1019 #define	sd_dkio_partition		ssd_dkio_partition
1020 #define	sd_dkio_get_vtoc		ssd_dkio_get_vtoc
1021 #define	sd_dkio_get_efi			ssd_dkio_get_efi
1022 #define	sd_build_user_vtoc		ssd_build_user_vtoc
1023 #define	sd_dkio_set_vtoc		ssd_dkio_set_vtoc
1024 #define	sd_dkio_set_efi			ssd_dkio_set_efi
1025 #define	sd_build_label_vtoc		ssd_build_label_vtoc
1026 #define	sd_write_label			ssd_write_label
1027 #define	sd_clear_vtoc			ssd_clear_vtoc
1028 #define	sd_clear_efi			ssd_clear_efi
1029 #define	sd_get_tunables_from_conf	ssd_get_tunables_from_conf
1030 #define	sd_setup_next_xfer		ssd_setup_next_xfer
1031 #define	sd_dkio_get_temp		ssd_dkio_get_temp
1032 #define	sd_dkio_get_mboot		ssd_dkio_get_mboot
1033 #define	sd_dkio_set_mboot		ssd_dkio_set_mboot
1034 #define	sd_setup_default_geometry	ssd_setup_default_geometry
1035 #define	sd_update_fdisk_and_vtoc	ssd_update_fdisk_and_vtoc
1036 #define	sd_check_mhd			ssd_check_mhd
1037 #define	sd_mhd_watch_cb			ssd_mhd_watch_cb
1038 #define	sd_mhd_watch_incomplete		ssd_mhd_watch_incomplete
1039 #define	sd_sname			ssd_sname
1040 #define	sd_mhd_resvd_recover		ssd_mhd_resvd_recover
1041 #define	sd_resv_reclaim_thread		ssd_resv_reclaim_thread
1042 #define	sd_take_ownership		ssd_take_ownership
1043 #define	sd_reserve_release		ssd_reserve_release
1044 #define	sd_rmv_resv_reclaim_req		ssd_rmv_resv_reclaim_req
1045 #define	sd_mhd_reset_notify_cb		ssd_mhd_reset_notify_cb
1046 #define	sd_persistent_reservation_in_read_keys	\
1047 					ssd_persistent_reservation_in_read_keys
1048 #define	sd_persistent_reservation_in_read_resv	\
1049 					ssd_persistent_reservation_in_read_resv
1050 #define	sd_mhdioc_takeown		ssd_mhdioc_takeown
1051 #define	sd_mhdioc_failfast		ssd_mhdioc_failfast
1052 #define	sd_mhdioc_release		ssd_mhdioc_release
1053 #define	sd_mhdioc_register_devid	ssd_mhdioc_register_devid
1054 #define	sd_mhdioc_inkeys		ssd_mhdioc_inkeys
1055 #define	sd_mhdioc_inresv		ssd_mhdioc_inresv
1056 #define	sr_change_blkmode		ssr_change_blkmode
1057 #define	sr_change_speed			ssr_change_speed
1058 #define	sr_atapi_change_speed		ssr_atapi_change_speed
1059 #define	sr_pause_resume			ssr_pause_resume
1060 #define	sr_play_msf			ssr_play_msf
1061 #define	sr_play_trkind			ssr_play_trkind
1062 #define	sr_read_all_subcodes		ssr_read_all_subcodes
1063 #define	sr_read_subchannel		ssr_read_subchannel
1064 #define	sr_read_tocentry		ssr_read_tocentry
1065 #define	sr_read_tochdr			ssr_read_tochdr
1066 #define	sr_read_cdda			ssr_read_cdda
1067 #define	sr_read_cdxa			ssr_read_cdxa
1068 #define	sr_read_mode1			ssr_read_mode1
1069 #define	sr_read_mode2			ssr_read_mode2
1070 #define	sr_read_cd_mode2		ssr_read_cd_mode2
1071 #define	sr_sector_mode			ssr_sector_mode
1072 #define	sr_eject			ssr_eject
1073 #define	sr_ejected			ssr_ejected
1074 #define	sr_check_wp			ssr_check_wp
1075 #define	sd_check_media			ssd_check_media
1076 #define	sd_media_watch_cb		ssd_media_watch_cb
1077 #define	sd_delayed_cv_broadcast		ssd_delayed_cv_broadcast
1078 #define	sr_volume_ctrl			ssr_volume_ctrl
1079 #define	sr_read_sony_session_offset	ssr_read_sony_session_offset
1080 #define	sd_log_page_supported		ssd_log_page_supported
1081 #define	sd_check_for_writable_cd	ssd_check_for_writable_cd
1082 #define	sd_wm_cache_constructor		ssd_wm_cache_constructor
1083 #define	sd_wm_cache_destructor		ssd_wm_cache_destructor
1084 #define	sd_range_lock			ssd_range_lock
1085 #define	sd_get_range			ssd_get_range
1086 #define	sd_free_inlist_wmap		ssd_free_inlist_wmap
1087 #define	sd_range_unlock			ssd_range_unlock
1088 #define	sd_read_modify_write_task	ssd_read_modify_write_task
1089 #define	sddump_do_read_of_rmw		ssddump_do_read_of_rmw
1090 
1091 #define	sd_iostart_chain		ssd_iostart_chain
1092 #define	sd_iodone_chain			ssd_iodone_chain
1093 #define	sd_initpkt_map			ssd_initpkt_map
1094 #define	sd_destroypkt_map		ssd_destroypkt_map
1095 #define	sd_chain_type_map		ssd_chain_type_map
1096 #define	sd_chain_index_map		ssd_chain_index_map
1097 
1098 #define	sd_failfast_flushctl		ssd_failfast_flushctl
1099 #define	sd_failfast_flushq		ssd_failfast_flushq
1100 #define	sd_failfast_flushq_callback	ssd_failfast_flushq_callback
1101 
1102 #define	sd_is_lsi			ssd_is_lsi
1103 
1104 #endif	/* #if (defined(__fibre)) */
1105 
1106 
1107 int _init(void);
1108 int _fini(void);
1109 int _info(struct modinfo *modinfop);
1110 
1111 /*PRINTFLIKE3*/
1112 static void sd_log_trace(uint_t comp, struct sd_lun *un, const char *fmt, ...);
1113 /*PRINTFLIKE3*/
1114 static void sd_log_info(uint_t comp, struct sd_lun *un, const char *fmt, ...);
1115 /*PRINTFLIKE3*/
1116 static void sd_log_err(uint_t comp, struct sd_lun *un, const char *fmt, ...);
1117 
1118 static int sdprobe(dev_info_t *devi);
1119 static int sdinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg,
1120     void **result);
1121 static int sd_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op,
1122     int mod_flags, char *name, caddr_t valuep, int *lengthp);
1123 
1124 /*
1125  * Smart probe for parallel scsi
1126  */
1127 static void sd_scsi_probe_cache_init(void);
1128 static void sd_scsi_probe_cache_fini(void);
1129 static void sd_scsi_clear_probe_cache(void);
1130 static int  sd_scsi_probe_with_cache(struct scsi_device *devp, int (*fn)());
1131 
1132 static int	sd_spin_up_unit(struct sd_lun *un);
1133 #ifdef _LP64
1134 static void	sd_enable_descr_sense(struct sd_lun *un);
1135 static void	sd_reenable_dsense_task(void *arg);
1136 #endif /* _LP64 */
1137 
1138 static void	sd_set_mmc_caps(struct sd_lun *un);
1139 
1140 static void sd_read_unit_properties(struct sd_lun *un);
1141 static int  sd_process_sdconf_file(struct sd_lun *un);
1142 static void sd_get_tunables_from_conf(struct sd_lun *un, int flags,
1143     int *data_list, sd_tunables *values);
1144 static void sd_process_sdconf_table(struct sd_lun *un);
1145 static int  sd_sdconf_id_match(struct sd_lun *un, char *id, int idlen);
1146 static int  sd_blank_cmp(struct sd_lun *un, char *id, int idlen);
1147 static int  sd_chk_vers1_data(struct sd_lun *un, int flags, int *prop_list,
1148 	int list_len, char *dataname_ptr);
1149 static void sd_set_vers1_properties(struct sd_lun *un, int flags,
1150     sd_tunables *prop_list);
1151 static int  sd_validate_geometry(struct sd_lun *un, int path_flag);
1152 
1153 #if defined(_SUNOS_VTOC_16)
1154 static void sd_convert_geometry(uint64_t capacity, struct dk_geom *un_g);
1155 #endif
1156 
1157 static void sd_resync_geom_caches(struct sd_lun *un, int capacity, int lbasize,
1158 	int path_flag);
1159 static int  sd_read_fdisk(struct sd_lun *un, uint_t capacity, int lbasize,
1160 	int path_flag);
1161 static void sd_get_physical_geometry(struct sd_lun *un,
1162 	struct geom_cache *pgeom_p, int capacity, int lbasize, int path_flag);
1163 static void sd_get_virtual_geometry(struct sd_lun *un, int capacity,
1164 	int lbasize);
1165 static int  sd_uselabel(struct sd_lun *un, struct dk_label *l, int path_flag);
1166 static void sd_swap_efi_gpt(efi_gpt_t *);
1167 static void sd_swap_efi_gpe(int nparts, efi_gpe_t *);
1168 static int sd_validate_efi(efi_gpt_t *);
1169 static int sd_use_efi(struct sd_lun *, int);
1170 static void sd_build_default_label(struct sd_lun *un);
1171 
1172 #if defined(_FIRMWARE_NEEDS_FDISK)
1173 static int  sd_has_max_chs_vals(struct ipart *fdp);
1174 #endif
1175 static void sd_inq_fill(char *p, int l, char *s);
1176 
1177 
1178 static void sd_register_devid(struct sd_lun *un, dev_info_t *devi,
1179     int reservation_flag);
1180 static daddr_t  sd_get_devid_block(struct sd_lun *un);
1181 static int  sd_get_devid(struct sd_lun *un);
1182 static int  sd_get_serialnum(struct sd_lun *un, uchar_t *wwn, int *len);
1183 static ddi_devid_t sd_create_devid(struct sd_lun *un);
1184 static int  sd_write_deviceid(struct sd_lun *un);
1185 static int  sd_get_devid_page(struct sd_lun *un, uchar_t *wwn, int *len);
1186 static int  sd_check_vpd_page_support(struct sd_lun *un);
1187 
1188 static void sd_setup_pm(struct sd_lun *un, dev_info_t *devi);
1189 static void sd_create_pm_components(dev_info_t *devi, struct sd_lun *un);
1190 
1191 static int  sd_ddi_suspend(dev_info_t *devi);
1192 static int  sd_ddi_pm_suspend(struct sd_lun *un);
1193 static int  sd_ddi_resume(dev_info_t *devi);
1194 static int  sd_ddi_pm_resume(struct sd_lun *un);
1195 static int  sdpower(dev_info_t *devi, int component, int level);
1196 
1197 static int  sdattach(dev_info_t *devi, ddi_attach_cmd_t cmd);
1198 static int  sddetach(dev_info_t *devi, ddi_detach_cmd_t cmd);
1199 static int  sd_unit_attach(dev_info_t *devi);
1200 static int  sd_unit_detach(dev_info_t *devi);
1201 
1202 static void sd_set_unit_attributes(struct sd_lun *un, dev_info_t *devi);
1203 static int  sd_create_minor_nodes(struct sd_lun *un, dev_info_t *devi);
1204 static void sd_create_errstats(struct sd_lun *un, int instance);
1205 static void sd_set_errstats(struct sd_lun *un);
1206 static void sd_set_pstats(struct sd_lun *un);
1207 
1208 static int  sddump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk);
1209 static int  sd_scsi_poll(struct sd_lun *un, struct scsi_pkt *pkt);
1210 static int  sd_send_polled_RQS(struct sd_lun *un);
1211 static int  sd_ddi_scsi_poll(struct scsi_pkt *pkt);
1212 
1213 #if (defined(__fibre))
1214 /*
1215  * Event callbacks (photon)
1216  */
1217 static void sd_init_event_callbacks(struct sd_lun *un);
1218 static void  sd_event_callback(dev_info_t *, ddi_eventcookie_t, void *, void *);
1219 #endif
1220 
1221 /*
1222  * Defines for sd_cache_control
1223  */
1224 
1225 #define	SD_CACHE_ENABLE		1
1226 #define	SD_CACHE_DISABLE	0
1227 #define	SD_CACHE_NOCHANGE	-1
1228 
1229 static int   sd_cache_control(struct sd_lun *un, int rcd_flag, int wce_flag);
1230 static int   sd_get_write_cache_enabled(struct sd_lun *un, int *is_enabled);
1231 static dev_t sd_make_device(dev_info_t *devi);
1232 
1233 static void  sd_update_block_info(struct sd_lun *un, uint32_t lbasize,
1234 	uint64_t capacity);
1235 
1236 /*
1237  * Driver entry point functions.
1238  */
1239 static int  sdopen(dev_t *dev_p, int flag, int otyp, cred_t *cred_p);
1240 static int  sdclose(dev_t dev, int flag, int otyp, cred_t *cred_p);
1241 static int  sd_ready_and_valid(struct sd_lun *un);
1242 
1243 static void sdmin(struct buf *bp);
1244 static int sdread(dev_t dev, struct uio *uio, cred_t *cred_p);
1245 static int sdwrite(dev_t dev, struct uio *uio, cred_t *cred_p);
1246 static int sdaread(dev_t dev, struct aio_req *aio, cred_t *cred_p);
1247 static int sdawrite(dev_t dev, struct aio_req *aio, cred_t *cred_p);
1248 
1249 static int sdstrategy(struct buf *bp);
1250 static int sdioctl(dev_t, int, intptr_t, int, cred_t *, int *);
1251 
1252 /*
1253  * Function prototypes for layering functions in the iostart chain.
1254  */
1255 static void sd_mapblockaddr_iostart(int index, struct sd_lun *un,
1256 	struct buf *bp);
1257 static void sd_mapblocksize_iostart(int index, struct sd_lun *un,
1258 	struct buf *bp);
1259 static void sd_checksum_iostart(int index, struct sd_lun *un, struct buf *bp);
1260 static void sd_checksum_uscsi_iostart(int index, struct sd_lun *un,
1261 	struct buf *bp);
1262 static void sd_pm_iostart(int index, struct sd_lun *un, struct buf *bp);
1263 static void sd_core_iostart(int index, struct sd_lun *un, struct buf *bp);
1264 
1265 /*
1266  * Function prototypes for layering functions in the iodone chain.
1267  */
1268 static void sd_buf_iodone(int index, struct sd_lun *un, struct buf *bp);
1269 static void sd_uscsi_iodone(int index, struct sd_lun *un, struct buf *bp);
1270 static void sd_mapblockaddr_iodone(int index, struct sd_lun *un,
1271 	struct buf *bp);
1272 static void sd_mapblocksize_iodone(int index, struct sd_lun *un,
1273 	struct buf *bp);
1274 static void sd_checksum_iodone(int index, struct sd_lun *un, struct buf *bp);
1275 static void sd_checksum_uscsi_iodone(int index, struct sd_lun *un,
1276 	struct buf *bp);
1277 static void sd_pm_iodone(int index, struct sd_lun *un, struct buf *bp);
1278 
1279 /*
1280  * Prototypes for functions to support buf(9S) based IO.
1281  */
1282 static void sd_xbuf_strategy(struct buf *bp, ddi_xbuf_t xp, void *arg);
1283 static int sd_initpkt_for_buf(struct buf *, struct scsi_pkt **);
1284 static void sd_destroypkt_for_buf(struct buf *);
1285 static int sd_setup_rw_pkt(struct sd_lun *un, struct scsi_pkt **pktpp,
1286 	struct buf *bp, int flags,
1287 	int (*callback)(caddr_t), caddr_t callback_arg,
1288 	diskaddr_t lba, uint32_t blockcount);
1289 #if defined(__i386) || defined(__amd64)
1290 static int sd_setup_next_rw_pkt(struct sd_lun *un, struct scsi_pkt *pktp,
1291 	struct buf *bp, diskaddr_t lba, uint32_t blockcount);
1292 #endif /* defined(__i386) || defined(__amd64) */
1293 
1294 /*
1295  * Prototypes for functions to support USCSI IO.
1296  */
1297 static int sd_uscsi_strategy(struct buf *bp);
1298 static int sd_initpkt_for_uscsi(struct buf *, struct scsi_pkt **);
1299 static void sd_destroypkt_for_uscsi(struct buf *);
1300 
1301 static void sd_xbuf_init(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
1302 	uchar_t chain_type, void *pktinfop);
1303 
1304 static int  sd_pm_entry(struct sd_lun *un);
1305 static void sd_pm_exit(struct sd_lun *un);
1306 
1307 static void sd_pm_idletimeout_handler(void *arg);
1308 
1309 /*
1310  * sd_core internal functions (used at the sd_core_io layer).
1311  */
1312 static void sd_add_buf_to_waitq(struct sd_lun *un, struct buf *bp);
1313 static void sdintr(struct scsi_pkt *pktp);
1314 static void sd_start_cmds(struct sd_lun *un, struct buf *immed_bp);
1315 
1316 static int sd_send_scsi_cmd(dev_t dev, struct uscsi_cmd *incmd,
1317 	enum uio_seg cdbspace, enum uio_seg dataspace, enum uio_seg rqbufspace,
1318 	int path_flag);
1319 
1320 static struct buf *sd_bioclone_alloc(struct buf *bp, size_t datalen,
1321 	daddr_t blkno, int (*func)(struct buf *));
1322 static struct buf *sd_shadow_buf_alloc(struct buf *bp, size_t datalen,
1323 	uint_t bflags, daddr_t blkno, int (*func)(struct buf *));
1324 static void sd_bioclone_free(struct buf *bp);
1325 static void sd_shadow_buf_free(struct buf *bp);
1326 
1327 static void sd_print_transport_rejected_message(struct sd_lun *un,
1328 	struct sd_xbuf *xp, int code);
1329 static void sd_print_incomplete_msg(struct sd_lun *un, struct buf *bp,
1330     void *arg, int code);
1331 static void sd_print_sense_failed_msg(struct sd_lun *un, struct buf *bp,
1332     void *arg, int code);
1333 static void sd_print_cmd_incomplete_msg(struct sd_lun *un, struct buf *bp,
1334     void *arg, int code);
1335 
1336 static void sd_retry_command(struct sd_lun *un, struct buf *bp,
1337 	int retry_check_flag,
1338 	void (*user_funcp)(struct sd_lun *un, struct buf *bp, void *argp,
1339 		int c),
1340 	void *user_arg, int failure_code,  clock_t retry_delay,
1341 	void (*statp)(kstat_io_t *));
1342 
1343 static void sd_set_retry_bp(struct sd_lun *un, struct buf *bp,
1344 	clock_t retry_delay, void (*statp)(kstat_io_t *));
1345 
1346 static void sd_send_request_sense_command(struct sd_lun *un, struct buf *bp,
1347 	struct scsi_pkt *pktp);
1348 static void sd_start_retry_command(void *arg);
1349 static void sd_start_direct_priority_command(void *arg);
1350 static void sd_return_failed_command(struct sd_lun *un, struct buf *bp,
1351 	int errcode);
1352 static void sd_return_failed_command_no_restart(struct sd_lun *un,
1353 	struct buf *bp, int errcode);
1354 static void sd_return_command(struct sd_lun *un, struct buf *bp);
1355 static void sd_sync_with_callback(struct sd_lun *un);
1356 static int sdrunout(caddr_t arg);
1357 
1358 static void sd_mark_rqs_busy(struct sd_lun *un, struct buf *bp);
1359 static struct buf *sd_mark_rqs_idle(struct sd_lun *un, struct sd_xbuf *xp);
1360 
1361 static void sd_reduce_throttle(struct sd_lun *un, int throttle_type);
1362 static void sd_restore_throttle(void *arg);
1363 
1364 static void sd_init_cdb_limits(struct sd_lun *un);
1365 
1366 static void sd_pkt_status_good(struct sd_lun *un, struct buf *bp,
1367 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1368 
1369 /*
1370  * Error handling functions
1371  */
1372 static void sd_pkt_status_check_condition(struct sd_lun *un, struct buf *bp,
1373 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1374 static void sd_pkt_status_busy(struct sd_lun *un, struct buf *bp,
1375 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1376 static void sd_pkt_status_reservation_conflict(struct sd_lun *un,
1377 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1378 static void sd_pkt_status_qfull(struct sd_lun *un, struct buf *bp,
1379 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1380 
1381 static void sd_handle_request_sense(struct sd_lun *un, struct buf *bp,
1382 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1383 static void sd_handle_auto_request_sense(struct sd_lun *un, struct buf *bp,
1384 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1385 static int sd_validate_sense_data(struct sd_lun *un, struct buf *bp,
1386 	struct sd_xbuf *xp);
1387 static void sd_decode_sense(struct sd_lun *un, struct buf *bp,
1388 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1389 
1390 static void sd_print_sense_msg(struct sd_lun *un, struct buf *bp,
1391 	void *arg, int code);
1392 
1393 static void sd_sense_key_no_sense(struct sd_lun *un, struct buf *bp,
1394 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1395 static void sd_sense_key_recoverable_error(struct sd_lun *un,
1396 	uint8_t *sense_datap,
1397 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1398 static void sd_sense_key_not_ready(struct sd_lun *un,
1399 	uint8_t *sense_datap,
1400 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1401 static void sd_sense_key_medium_or_hardware_error(struct sd_lun *un,
1402 	uint8_t *sense_datap,
1403 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1404 static void sd_sense_key_illegal_request(struct sd_lun *un, struct buf *bp,
1405 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1406 static void sd_sense_key_unit_attention(struct sd_lun *un,
1407 	uint8_t *sense_datap,
1408 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1409 static void sd_sense_key_fail_command(struct sd_lun *un, struct buf *bp,
1410 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1411 static void sd_sense_key_blank_check(struct sd_lun *un, struct buf *bp,
1412 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1413 static void sd_sense_key_aborted_command(struct sd_lun *un, struct buf *bp,
1414 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1415 static void sd_sense_key_default(struct sd_lun *un,
1416 	uint8_t *sense_datap,
1417 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1418 
1419 static void sd_print_retry_msg(struct sd_lun *un, struct buf *bp,
1420 	void *arg, int flag);
1421 
1422 static void sd_pkt_reason_cmd_incomplete(struct sd_lun *un, struct buf *bp,
1423 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1424 static void sd_pkt_reason_cmd_tran_err(struct sd_lun *un, struct buf *bp,
1425 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1426 static void sd_pkt_reason_cmd_reset(struct sd_lun *un, struct buf *bp,
1427 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1428 static void sd_pkt_reason_cmd_aborted(struct sd_lun *un, struct buf *bp,
1429 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1430 static void sd_pkt_reason_cmd_timeout(struct sd_lun *un, struct buf *bp,
1431 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1432 static void sd_pkt_reason_cmd_unx_bus_free(struct sd_lun *un, struct buf *bp,
1433 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1434 static void sd_pkt_reason_cmd_tag_reject(struct sd_lun *un, struct buf *bp,
1435 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1436 static void sd_pkt_reason_default(struct sd_lun *un, struct buf *bp,
1437 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1438 
1439 static void sd_reset_target(struct sd_lun *un, struct scsi_pkt *pktp);
1440 
1441 static void sd_start_stop_unit_callback(void *arg);
1442 static void sd_start_stop_unit_task(void *arg);
1443 
1444 static void sd_taskq_create(void);
1445 static void sd_taskq_delete(void);
1446 static void sd_media_change_task(void *arg);
1447 
1448 static int sd_handle_mchange(struct sd_lun *un);
1449 static int sd_send_scsi_DOORLOCK(struct sd_lun *un, int flag, int path_flag);
1450 static int sd_send_scsi_READ_CAPACITY(struct sd_lun *un, uint64_t *capp,
1451 	uint32_t *lbap, int path_flag);
1452 static int sd_send_scsi_READ_CAPACITY_16(struct sd_lun *un, uint64_t *capp,
1453 	uint32_t *lbap, int path_flag);
1454 static int sd_send_scsi_START_STOP_UNIT(struct sd_lun *un, int flag,
1455 	int path_flag);
1456 static int sd_send_scsi_INQUIRY(struct sd_lun *un, uchar_t *bufaddr,
1457 	size_t buflen, uchar_t evpd, uchar_t page_code, size_t *residp);
1458 static int sd_send_scsi_TEST_UNIT_READY(struct sd_lun *un, int flag);
1459 static int sd_send_scsi_PERSISTENT_RESERVE_IN(struct sd_lun *un,
1460 	uchar_t usr_cmd, uint16_t data_len, uchar_t *data_bufp);
1461 static int sd_send_scsi_PERSISTENT_RESERVE_OUT(struct sd_lun *un,
1462 	uchar_t usr_cmd, uchar_t *usr_bufp);
1463 static int sd_send_scsi_SYNCHRONIZE_CACHE(struct sd_lun *un,
1464 	struct dk_callback *dkc);
1465 static int sd_send_scsi_SYNCHRONIZE_CACHE_biodone(struct buf *bp);
1466 static int sd_send_scsi_GET_CONFIGURATION(struct sd_lun *un,
1467 	struct uscsi_cmd *ucmdbuf, uchar_t *rqbuf, uint_t rqbuflen,
1468 	uchar_t *bufaddr, uint_t buflen);
1469 static int sd_send_scsi_feature_GET_CONFIGURATION(struct sd_lun *un,
1470 	struct uscsi_cmd *ucmdbuf, uchar_t *rqbuf, uint_t rqbuflen,
1471 	uchar_t *bufaddr, uint_t buflen, char feature);
1472 static int sd_send_scsi_MODE_SENSE(struct sd_lun *un, int cdbsize,
1473 	uchar_t *bufaddr, size_t buflen, uchar_t page_code, int path_flag);
1474 static int sd_send_scsi_MODE_SELECT(struct sd_lun *un, int cdbsize,
1475 	uchar_t *bufaddr, size_t buflen, uchar_t save_page, int path_flag);
1476 static int sd_send_scsi_RDWR(struct sd_lun *un, uchar_t cmd, void *bufaddr,
1477 	size_t buflen, daddr_t start_block, int path_flag);
1478 #define	sd_send_scsi_READ(un, bufaddr, buflen, start_block, path_flag)	\
1479 	sd_send_scsi_RDWR(un, SCMD_READ, bufaddr, buflen, start_block, \
1480 	path_flag)
1481 #define	sd_send_scsi_WRITE(un, bufaddr, buflen, start_block, path_flag)	\
1482 	sd_send_scsi_RDWR(un, SCMD_WRITE, bufaddr, buflen, start_block,\
1483 	path_flag)
1484 
1485 static int sd_send_scsi_LOG_SENSE(struct sd_lun *un, uchar_t *bufaddr,
1486 	uint16_t buflen, uchar_t page_code, uchar_t page_control,
1487 	uint16_t param_ptr, int path_flag);
1488 
1489 static int  sd_alloc_rqs(struct scsi_device *devp, struct sd_lun *un);
1490 static void sd_free_rqs(struct sd_lun *un);
1491 
1492 static void sd_dump_memory(struct sd_lun *un, uint_t comp, char *title,
1493 	uchar_t *data, int len, int fmt);
1494 static void sd_panic_for_res_conflict(struct sd_lun *un);
1495 
1496 /*
1497  * Disk Ioctl Function Prototypes
1498  */
1499 static int sd_uscsi_ioctl(dev_t dev, caddr_t arg, int flag);
1500 static int sd_get_media_info(dev_t dev, caddr_t arg, int flag);
1501 static int sd_dkio_ctrl_info(dev_t dev, caddr_t arg, int flag);
1502 static int sd_dkio_get_geometry(dev_t dev, caddr_t arg, int flag,
1503 	int geom_validated);
1504 static int sd_dkio_set_geometry(dev_t dev, caddr_t arg, int flag);
1505 static int sd_dkio_get_partition(dev_t dev, caddr_t arg, int flag,
1506 	int geom_validated);
1507 static int sd_dkio_set_partition(dev_t dev, caddr_t arg, int flag);
1508 static int sd_dkio_get_vtoc(dev_t dev, caddr_t arg, int flag,
1509 	int geom_validated);
1510 static int sd_dkio_get_efi(dev_t dev, caddr_t arg, int flag);
1511 static int sd_dkio_partition(dev_t dev, caddr_t arg, int flag);
1512 static void sd_build_user_vtoc(struct sd_lun *un, struct vtoc *user_vtoc);
1513 static int sd_dkio_set_vtoc(dev_t dev, caddr_t arg, int flag);
1514 static int sd_dkio_set_efi(dev_t dev, caddr_t arg, int flag);
1515 static int sd_build_label_vtoc(struct sd_lun *un, struct vtoc *user_vtoc);
1516 static int sd_write_label(dev_t dev);
1517 static int sd_set_vtoc(struct sd_lun *un, struct dk_label *dkl);
1518 static void sd_clear_vtoc(struct sd_lun *un);
1519 static void sd_clear_efi(struct sd_lun *un);
1520 static int sd_dkio_get_temp(dev_t dev, caddr_t arg, int flag);
1521 static int sd_dkio_get_mboot(dev_t dev, caddr_t arg, int flag);
1522 static int sd_dkio_set_mboot(dev_t dev, caddr_t arg, int flag);
1523 static void sd_setup_default_geometry(struct sd_lun *un);
1524 #if defined(__i386) || defined(__amd64)
1525 static int sd_update_fdisk_and_vtoc(struct sd_lun *un);
1526 #endif
1527 
1528 /*
1529  * Multi-host Ioctl Prototypes
1530  */
1531 static int sd_check_mhd(dev_t dev, int interval);
1532 static int sd_mhd_watch_cb(caddr_t arg, struct scsi_watch_result *resultp);
1533 static void sd_mhd_watch_incomplete(struct sd_lun *un, struct scsi_pkt *pkt);
1534 static char *sd_sname(uchar_t status);
1535 static void sd_mhd_resvd_recover(void *arg);
1536 static void sd_resv_reclaim_thread();
1537 static int sd_take_ownership(dev_t dev, struct mhioctkown *p);
1538 static int sd_reserve_release(dev_t dev, int cmd);
1539 static void sd_rmv_resv_reclaim_req(dev_t dev);
1540 static void sd_mhd_reset_notify_cb(caddr_t arg);
1541 static int sd_persistent_reservation_in_read_keys(struct sd_lun *un,
1542 	mhioc_inkeys_t *usrp, int flag);
1543 static int sd_persistent_reservation_in_read_resv(struct sd_lun *un,
1544 	mhioc_inresvs_t *usrp, int flag);
1545 static int sd_mhdioc_takeown(dev_t dev, caddr_t arg, int flag);
1546 static int sd_mhdioc_failfast(dev_t dev, caddr_t arg, int flag);
1547 static int sd_mhdioc_release(dev_t dev);
1548 static int sd_mhdioc_register_devid(dev_t dev);
1549 static int sd_mhdioc_inkeys(dev_t dev, caddr_t arg, int flag);
1550 static int sd_mhdioc_inresv(dev_t dev, caddr_t arg, int flag);
1551 
1552 /*
1553  * SCSI removable prototypes
1554  */
1555 static int sr_change_blkmode(dev_t dev, int cmd, intptr_t data, int flag);
1556 static int sr_change_speed(dev_t dev, int cmd, intptr_t data, int flag);
1557 static int sr_atapi_change_speed(dev_t dev, int cmd, intptr_t data, int flag);
1558 static int sr_pause_resume(dev_t dev, int mode);
1559 static int sr_play_msf(dev_t dev, caddr_t data, int flag);
1560 static int sr_play_trkind(dev_t dev, caddr_t data, int flag);
1561 static int sr_read_all_subcodes(dev_t dev, caddr_t data, int flag);
1562 static int sr_read_subchannel(dev_t dev, caddr_t data, int flag);
1563 static int sr_read_tocentry(dev_t dev, caddr_t data, int flag);
1564 static int sr_read_tochdr(dev_t dev, caddr_t data, int flag);
1565 static int sr_read_cdda(dev_t dev, caddr_t data, int flag);
1566 static int sr_read_cdxa(dev_t dev, caddr_t data, int flag);
1567 static int sr_read_mode1(dev_t dev, caddr_t data, int flag);
1568 static int sr_read_mode2(dev_t dev, caddr_t data, int flag);
1569 static int sr_read_cd_mode2(dev_t dev, caddr_t data, int flag);
1570 static int sr_sector_mode(dev_t dev, uint32_t blksize);
1571 static int sr_eject(dev_t dev);
1572 static void sr_ejected(register struct sd_lun *un);
1573 static int sr_check_wp(dev_t dev);
1574 static int sd_check_media(dev_t dev, enum dkio_state state);
1575 static int sd_media_watch_cb(caddr_t arg, struct scsi_watch_result *resultp);
1576 static void sd_delayed_cv_broadcast(void *arg);
1577 static int sr_volume_ctrl(dev_t dev, caddr_t data, int flag);
1578 static int sr_read_sony_session_offset(dev_t dev, caddr_t data, int flag);
1579 
1580 static int sd_log_page_supported(struct sd_lun *un, int log_page);
1581 
1582 /*
1583  * Function Prototype for the non-512 support (DVDRAM, MO etc.) functions.
1584  */
1585 static void sd_check_for_writable_cd(struct sd_lun *un);
1586 static int sd_wm_cache_constructor(void *wm, void *un, int flags);
1587 static void sd_wm_cache_destructor(void *wm, void *un);
1588 static struct sd_w_map *sd_range_lock(struct sd_lun *un, daddr_t startb,
1589 	daddr_t endb, ushort_t typ);
1590 static struct sd_w_map *sd_get_range(struct sd_lun *un, daddr_t startb,
1591 	daddr_t endb);
1592 static void sd_free_inlist_wmap(struct sd_lun *un, struct sd_w_map *wmp);
1593 static void sd_range_unlock(struct sd_lun *un, struct sd_w_map *wm);
1594 static void sd_read_modify_write_task(void * arg);
1595 static int
1596 sddump_do_read_of_rmw(struct sd_lun *un, uint64_t blkno, uint64_t nblk,
1597 	struct buf **bpp);
1598 
1599 
1600 /*
1601  * Function prototypes for failfast support.
1602  */
1603 static void sd_failfast_flushq(struct sd_lun *un);
1604 static int sd_failfast_flushq_callback(struct buf *bp);
1605 
1606 /*
1607  * Function prototypes to check for lsi devices
1608  */
1609 static void sd_is_lsi(struct sd_lun *un);
1610 
1611 /*
1612  * Function prototypes for x86 support
1613  */
1614 #if defined(__i386) || defined(__amd64)
1615 static int sd_setup_next_xfer(struct sd_lun *un, struct buf *bp,
1616 		struct scsi_pkt *pkt, struct sd_xbuf *xp);
1617 #endif
1618 
1619 /*
1620  * Constants for failfast support:
1621  *
1622  * SD_FAILFAST_INACTIVE: Instance is currently in a normal state, with NO
1623  * failfast processing being performed.
1624  *
1625  * SD_FAILFAST_ACTIVE: Instance is in the failfast state and is performing
1626  * failfast processing on all bufs with B_FAILFAST set.
1627  */
1628 
1629 #define	SD_FAILFAST_INACTIVE		0
1630 #define	SD_FAILFAST_ACTIVE		1
1631 
1632 /*
1633  * Bitmask to control behavior of buf(9S) flushes when a transition to
1634  * the failfast state occurs. Optional bits include:
1635  *
1636  * SD_FAILFAST_FLUSH_ALL_BUFS: When set, flush ALL bufs including those that
1637  * do NOT have B_FAILFAST set. When clear, only bufs with B_FAILFAST will
1638  * be flushed.
1639  *
1640  * SD_FAILFAST_FLUSH_ALL_QUEUES: When set, flush any/all other queues in the
1641  * driver, in addition to the regular wait queue. This includes the xbuf
1642  * queues. When clear, only the driver's wait queue will be flushed.
1643  */
1644 #define	SD_FAILFAST_FLUSH_ALL_BUFS	0x01
1645 #define	SD_FAILFAST_FLUSH_ALL_QUEUES	0x02
1646 
1647 /*
1648  * The default behavior is to only flush bufs that have B_FAILFAST set, but
1649  * to flush all queues within the driver.
1650  */
1651 static int sd_failfast_flushctl = SD_FAILFAST_FLUSH_ALL_QUEUES;
1652 
1653 
1654 /*
1655  * SD Testing Fault Injection
1656  */
1657 #ifdef SD_FAULT_INJECTION
1658 static void sd_faultinjection_ioctl(int cmd, intptr_t arg, struct sd_lun *un);
1659 static void sd_faultinjection(struct scsi_pkt *pktp);
1660 static void sd_injection_log(char *buf, struct sd_lun *un);
1661 #endif
1662 
1663 /*
1664  * Device driver ops vector
1665  */
1666 static struct cb_ops sd_cb_ops = {
1667 	sdopen,			/* open */
1668 	sdclose,		/* close */
1669 	sdstrategy,		/* strategy */
1670 	nodev,			/* print */
1671 	sddump,			/* dump */
1672 	sdread,			/* read */
1673 	sdwrite,		/* write */
1674 	sdioctl,		/* ioctl */
1675 	nodev,			/* devmap */
1676 	nodev,			/* mmap */
1677 	nodev,			/* segmap */
1678 	nochpoll,		/* poll */
1679 	sd_prop_op,		/* cb_prop_op */
1680 	0,			/* streamtab  */
1681 	D_64BIT | D_MP | D_NEW | D_HOTPLUG, /* Driver compatibility flags */
1682 	CB_REV,			/* cb_rev */
1683 	sdaread, 		/* async I/O read entry point */
1684 	sdawrite		/* async I/O write entry point */
1685 };
1686 
1687 static struct dev_ops sd_ops = {
1688 	DEVO_REV,		/* devo_rev, */
1689 	0,			/* refcnt  */
1690 	sdinfo,			/* info */
1691 	nulldev,		/* identify */
1692 	sdprobe,		/* probe */
1693 	sdattach,		/* attach */
1694 	sddetach,		/* detach */
1695 	nodev,			/* reset */
1696 	&sd_cb_ops,		/* driver operations */
1697 	NULL,			/* bus operations */
1698 	sdpower			/* power */
1699 };
1700 
1701 
1702 /*
1703  * This is the loadable module wrapper.
1704  */
1705 #include <sys/modctl.h>
1706 
1707 static struct modldrv modldrv = {
1708 	&mod_driverops,		/* Type of module. This one is a driver */
1709 	SD_MODULE_NAME,		/* Module name. */
1710 	&sd_ops			/* driver ops */
1711 };
1712 
1713 
1714 static struct modlinkage modlinkage = {
1715 	MODREV_1,
1716 	&modldrv,
1717 	NULL
1718 };
1719 
1720 
1721 static struct scsi_asq_key_strings sd_additional_codes[] = {
1722 	0x81, 0, "Logical Unit is Reserved",
1723 	0x85, 0, "Audio Address Not Valid",
1724 	0xb6, 0, "Media Load Mechanism Failed",
1725 	0xB9, 0, "Audio Play Operation Aborted",
1726 	0xbf, 0, "Buffer Overflow for Read All Subcodes Command",
1727 	0x53, 2, "Medium removal prevented",
1728 	0x6f, 0, "Authentication failed during key exchange",
1729 	0x6f, 1, "Key not present",
1730 	0x6f, 2, "Key not established",
1731 	0x6f, 3, "Read without proper authentication",
1732 	0x6f, 4, "Mismatched region to this logical unit",
1733 	0x6f, 5, "Region reset count error",
1734 	0xffff, 0x0, NULL
1735 };
1736 
1737 
1738 /*
1739  * Struct for passing printing information for sense data messages
1740  */
1741 struct sd_sense_info {
1742 	int	ssi_severity;
1743 	int	ssi_pfa_flag;
1744 };
1745 
1746 /*
1747  * Table of function pointers for iostart-side routines. Seperate "chains"
1748  * of layered function calls are formed by placing the function pointers
1749  * sequentially in the desired order. Functions are called according to an
1750  * incrementing table index ordering. The last function in each chain must
1751  * be sd_core_iostart(). The corresponding iodone-side routines are expected
1752  * in the sd_iodone_chain[] array.
1753  *
1754  * Note: It may seem more natural to organize both the iostart and iodone
1755  * functions together, into an array of structures (or some similar
1756  * organization) with a common index, rather than two seperate arrays which
1757  * must be maintained in synchronization. The purpose of this division is
1758  * to achiece improved performance: individual arrays allows for more
1759  * effective cache line utilization on certain platforms.
1760  */
1761 
1762 typedef void (*sd_chain_t)(int index, struct sd_lun *un, struct buf *bp);
1763 
1764 
1765 static sd_chain_t sd_iostart_chain[] = {
1766 
1767 	/* Chain for buf IO for disk drive targets (PM enabled) */
1768 	sd_mapblockaddr_iostart,	/* Index: 0 */
1769 	sd_pm_iostart,			/* Index: 1 */
1770 	sd_core_iostart,		/* Index: 2 */
1771 
1772 	/* Chain for buf IO for disk drive targets (PM disabled) */
1773 	sd_mapblockaddr_iostart,	/* Index: 3 */
1774 	sd_core_iostart,		/* Index: 4 */
1775 
1776 	/* Chain for buf IO for removable-media targets (PM enabled) */
1777 	sd_mapblockaddr_iostart,	/* Index: 5 */
1778 	sd_mapblocksize_iostart,	/* Index: 6 */
1779 	sd_pm_iostart,			/* Index: 7 */
1780 	sd_core_iostart,		/* Index: 8 */
1781 
1782 	/* Chain for buf IO for removable-media targets (PM disabled) */
1783 	sd_mapblockaddr_iostart,	/* Index: 9 */
1784 	sd_mapblocksize_iostart,	/* Index: 10 */
1785 	sd_core_iostart,		/* Index: 11 */
1786 
1787 	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
1788 	sd_mapblockaddr_iostart,	/* Index: 12 */
1789 	sd_checksum_iostart,		/* Index: 13 */
1790 	sd_pm_iostart,			/* Index: 14 */
1791 	sd_core_iostart,		/* Index: 15 */
1792 
1793 	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
1794 	sd_mapblockaddr_iostart,	/* Index: 16 */
1795 	sd_checksum_iostart,		/* Index: 17 */
1796 	sd_core_iostart,		/* Index: 18 */
1797 
1798 	/* Chain for USCSI commands (all targets) */
1799 	sd_pm_iostart,			/* Index: 19 */
1800 	sd_core_iostart,		/* Index: 20 */
1801 
1802 	/* Chain for checksumming USCSI commands (all targets) */
1803 	sd_checksum_uscsi_iostart,	/* Index: 21 */
1804 	sd_pm_iostart,			/* Index: 22 */
1805 	sd_core_iostart,		/* Index: 23 */
1806 
1807 	/* Chain for "direct" USCSI commands (all targets) */
1808 	sd_core_iostart,		/* Index: 24 */
1809 
1810 	/* Chain for "direct priority" USCSI commands (all targets) */
1811 	sd_core_iostart,		/* Index: 25 */
1812 };
1813 
1814 /*
1815  * Macros to locate the first function of each iostart chain in the
1816  * sd_iostart_chain[] array. These are located by the index in the array.
1817  */
1818 #define	SD_CHAIN_DISK_IOSTART			0
1819 #define	SD_CHAIN_DISK_IOSTART_NO_PM		3
1820 #define	SD_CHAIN_RMMEDIA_IOSTART		5
1821 #define	SD_CHAIN_RMMEDIA_IOSTART_NO_PM		9
1822 #define	SD_CHAIN_CHKSUM_IOSTART			12
1823 #define	SD_CHAIN_CHKSUM_IOSTART_NO_PM		16
1824 #define	SD_CHAIN_USCSI_CMD_IOSTART		19
1825 #define	SD_CHAIN_USCSI_CHKSUM_IOSTART		21
1826 #define	SD_CHAIN_DIRECT_CMD_IOSTART		24
1827 #define	SD_CHAIN_PRIORITY_CMD_IOSTART		25
1828 
1829 
1830 /*
1831  * Table of function pointers for the iodone-side routines for the driver-
1832  * internal layering mechanism.  The calling sequence for iodone routines
1833  * uses a decrementing table index, so the last routine called in a chain
1834  * must be at the lowest array index location for that chain.  The last
1835  * routine for each chain must be either sd_buf_iodone() (for buf(9S) IOs)
1836  * or sd_uscsi_iodone() (for uscsi IOs).  Other than this, the ordering
1837  * of the functions in an iodone side chain must correspond to the ordering
1838  * of the iostart routines for that chain.  Note that there is no iodone
1839  * side routine that corresponds to sd_core_iostart(), so there is no
1840  * entry in the table for this.
1841  */
1842 
1843 static sd_chain_t sd_iodone_chain[] = {
1844 
1845 	/* Chain for buf IO for disk drive targets (PM enabled) */
1846 	sd_buf_iodone,			/* Index: 0 */
1847 	sd_mapblockaddr_iodone,		/* Index: 1 */
1848 	sd_pm_iodone,			/* Index: 2 */
1849 
1850 	/* Chain for buf IO for disk drive targets (PM disabled) */
1851 	sd_buf_iodone,			/* Index: 3 */
1852 	sd_mapblockaddr_iodone,		/* Index: 4 */
1853 
1854 	/* Chain for buf IO for removable-media targets (PM enabled) */
1855 	sd_buf_iodone,			/* Index: 5 */
1856 	sd_mapblockaddr_iodone,		/* Index: 6 */
1857 	sd_mapblocksize_iodone,		/* Index: 7 */
1858 	sd_pm_iodone,			/* Index: 8 */
1859 
1860 	/* Chain for buf IO for removable-media targets (PM disabled) */
1861 	sd_buf_iodone,			/* Index: 9 */
1862 	sd_mapblockaddr_iodone,		/* Index: 10 */
1863 	sd_mapblocksize_iodone,		/* Index: 11 */
1864 
1865 	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
1866 	sd_buf_iodone,			/* Index: 12 */
1867 	sd_mapblockaddr_iodone,		/* Index: 13 */
1868 	sd_checksum_iodone,		/* Index: 14 */
1869 	sd_pm_iodone,			/* Index: 15 */
1870 
1871 	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
1872 	sd_buf_iodone,			/* Index: 16 */
1873 	sd_mapblockaddr_iodone,		/* Index: 17 */
1874 	sd_checksum_iodone,		/* Index: 18 */
1875 
1876 	/* Chain for USCSI commands (non-checksum targets) */
1877 	sd_uscsi_iodone,		/* Index: 19 */
1878 	sd_pm_iodone,			/* Index: 20 */
1879 
1880 	/* Chain for USCSI commands (checksum targets) */
1881 	sd_uscsi_iodone,		/* Index: 21 */
1882 	sd_checksum_uscsi_iodone,	/* Index: 22 */
1883 	sd_pm_iodone,			/* Index: 22 */
1884 
1885 	/* Chain for "direct" USCSI commands (all targets) */
1886 	sd_uscsi_iodone,		/* Index: 24 */
1887 
1888 	/* Chain for "direct priority" USCSI commands (all targets) */
1889 	sd_uscsi_iodone,		/* Index: 25 */
1890 };
1891 
1892 
1893 /*
1894  * Macros to locate the "first" function in the sd_iodone_chain[] array for
1895  * each iodone-side chain. These are located by the array index, but as the
1896  * iodone side functions are called in a decrementing-index order, the
1897  * highest index number in each chain must be specified (as these correspond
1898  * to the first function in the iodone chain that will be called by the core
1899  * at IO completion time).
1900  */
1901 
1902 #define	SD_CHAIN_DISK_IODONE			2
1903 #define	SD_CHAIN_DISK_IODONE_NO_PM		4
1904 #define	SD_CHAIN_RMMEDIA_IODONE			8
1905 #define	SD_CHAIN_RMMEDIA_IODONE_NO_PM		11
1906 #define	SD_CHAIN_CHKSUM_IODONE			15
1907 #define	SD_CHAIN_CHKSUM_IODONE_NO_PM		18
1908 #define	SD_CHAIN_USCSI_CMD_IODONE		20
1909 #define	SD_CHAIN_USCSI_CHKSUM_IODONE		22
1910 #define	SD_CHAIN_DIRECT_CMD_IODONE		24
1911 #define	SD_CHAIN_PRIORITY_CMD_IODONE		25
1912 
1913 
1914 
1915 
1916 /*
1917  * Array to map a layering chain index to the appropriate initpkt routine.
1918  * The redundant entries are present so that the index used for accessing
1919  * the above sd_iostart_chain and sd_iodone_chain tables can be used directly
1920  * with this table as well.
1921  */
1922 typedef int (*sd_initpkt_t)(struct buf *, struct scsi_pkt **);
1923 
1924 static sd_initpkt_t	sd_initpkt_map[] = {
1925 
1926 	/* Chain for buf IO for disk drive targets (PM enabled) */
1927 	sd_initpkt_for_buf,		/* Index: 0 */
1928 	sd_initpkt_for_buf,		/* Index: 1 */
1929 	sd_initpkt_for_buf,		/* Index: 2 */
1930 
1931 	/* Chain for buf IO for disk drive targets (PM disabled) */
1932 	sd_initpkt_for_buf,		/* Index: 3 */
1933 	sd_initpkt_for_buf,		/* Index: 4 */
1934 
1935 	/* Chain for buf IO for removable-media targets (PM enabled) */
1936 	sd_initpkt_for_buf,		/* Index: 5 */
1937 	sd_initpkt_for_buf,		/* Index: 6 */
1938 	sd_initpkt_for_buf,		/* Index: 7 */
1939 	sd_initpkt_for_buf,		/* Index: 8 */
1940 
1941 	/* Chain for buf IO for removable-media targets (PM disabled) */
1942 	sd_initpkt_for_buf,		/* Index: 9 */
1943 	sd_initpkt_for_buf,		/* Index: 10 */
1944 	sd_initpkt_for_buf,		/* Index: 11 */
1945 
1946 	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
1947 	sd_initpkt_for_buf,		/* Index: 12 */
1948 	sd_initpkt_for_buf,		/* Index: 13 */
1949 	sd_initpkt_for_buf,		/* Index: 14 */
1950 	sd_initpkt_for_buf,		/* Index: 15 */
1951 
1952 	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
1953 	sd_initpkt_for_buf,		/* Index: 16 */
1954 	sd_initpkt_for_buf,		/* Index: 17 */
1955 	sd_initpkt_for_buf,		/* Index: 18 */
1956 
1957 	/* Chain for USCSI commands (non-checksum targets) */
1958 	sd_initpkt_for_uscsi,		/* Index: 19 */
1959 	sd_initpkt_for_uscsi,		/* Index: 20 */
1960 
1961 	/* Chain for USCSI commands (checksum targets) */
1962 	sd_initpkt_for_uscsi,		/* Index: 21 */
1963 	sd_initpkt_for_uscsi,		/* Index: 22 */
1964 	sd_initpkt_for_uscsi,		/* Index: 22 */
1965 
1966 	/* Chain for "direct" USCSI commands (all targets) */
1967 	sd_initpkt_for_uscsi,		/* Index: 24 */
1968 
1969 	/* Chain for "direct priority" USCSI commands (all targets) */
1970 	sd_initpkt_for_uscsi,		/* Index: 25 */
1971 
1972 };
1973 
1974 
1975 /*
1976  * Array to map a layering chain index to the appropriate destroypktpkt routine.
1977  * The redundant entries are present so that the index used for accessing
1978  * the above sd_iostart_chain and sd_iodone_chain tables can be used directly
1979  * with this table as well.
1980  */
1981 typedef void (*sd_destroypkt_t)(struct buf *);
1982 
1983 static sd_destroypkt_t	sd_destroypkt_map[] = {
1984 
1985 	/* Chain for buf IO for disk drive targets (PM enabled) */
1986 	sd_destroypkt_for_buf,		/* Index: 0 */
1987 	sd_destroypkt_for_buf,		/* Index: 1 */
1988 	sd_destroypkt_for_buf,		/* Index: 2 */
1989 
1990 	/* Chain for buf IO for disk drive targets (PM disabled) */
1991 	sd_destroypkt_for_buf,		/* Index: 3 */
1992 	sd_destroypkt_for_buf,		/* Index: 4 */
1993 
1994 	/* Chain for buf IO for removable-media targets (PM enabled) */
1995 	sd_destroypkt_for_buf,		/* Index: 5 */
1996 	sd_destroypkt_for_buf,		/* Index: 6 */
1997 	sd_destroypkt_for_buf,		/* Index: 7 */
1998 	sd_destroypkt_for_buf,		/* Index: 8 */
1999 
2000 	/* Chain for buf IO for removable-media targets (PM disabled) */
2001 	sd_destroypkt_for_buf,		/* Index: 9 */
2002 	sd_destroypkt_for_buf,		/* Index: 10 */
2003 	sd_destroypkt_for_buf,		/* Index: 11 */
2004 
2005 	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
2006 	sd_destroypkt_for_buf,		/* Index: 12 */
2007 	sd_destroypkt_for_buf,		/* Index: 13 */
2008 	sd_destroypkt_for_buf,		/* Index: 14 */
2009 	sd_destroypkt_for_buf,		/* Index: 15 */
2010 
2011 	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
2012 	sd_destroypkt_for_buf,		/* Index: 16 */
2013 	sd_destroypkt_for_buf,		/* Index: 17 */
2014 	sd_destroypkt_for_buf,		/* Index: 18 */
2015 
2016 	/* Chain for USCSI commands (non-checksum targets) */
2017 	sd_destroypkt_for_uscsi,	/* Index: 19 */
2018 	sd_destroypkt_for_uscsi,	/* Index: 20 */
2019 
2020 	/* Chain for USCSI commands (checksum targets) */
2021 	sd_destroypkt_for_uscsi,	/* Index: 21 */
2022 	sd_destroypkt_for_uscsi,	/* Index: 22 */
2023 	sd_destroypkt_for_uscsi,	/* Index: 22 */
2024 
2025 	/* Chain for "direct" USCSI commands (all targets) */
2026 	sd_destroypkt_for_uscsi,	/* Index: 24 */
2027 
2028 	/* Chain for "direct priority" USCSI commands (all targets) */
2029 	sd_destroypkt_for_uscsi,	/* Index: 25 */
2030 
2031 };
2032 
2033 
2034 
2035 /*
2036  * Array to map a layering chain index to the appropriate chain "type".
2037  * The chain type indicates a specific property/usage of the chain.
2038  * The redundant entries are present so that the index used for accessing
2039  * the above sd_iostart_chain and sd_iodone_chain tables can be used directly
2040  * with this table as well.
2041  */
2042 
2043 #define	SD_CHAIN_NULL			0	/* for the special RQS cmd */
2044 #define	SD_CHAIN_BUFIO			1	/* regular buf IO */
2045 #define	SD_CHAIN_USCSI			2	/* regular USCSI commands */
2046 #define	SD_CHAIN_DIRECT			3	/* uscsi, w/ bypass power mgt */
2047 #define	SD_CHAIN_DIRECT_PRIORITY	4	/* uscsi, w/ bypass power mgt */
2048 						/* (for error recovery) */
2049 
2050 static int sd_chain_type_map[] = {
2051 
2052 	/* Chain for buf IO for disk drive targets (PM enabled) */
2053 	SD_CHAIN_BUFIO,			/* Index: 0 */
2054 	SD_CHAIN_BUFIO,			/* Index: 1 */
2055 	SD_CHAIN_BUFIO,			/* Index: 2 */
2056 
2057 	/* Chain for buf IO for disk drive targets (PM disabled) */
2058 	SD_CHAIN_BUFIO,			/* Index: 3 */
2059 	SD_CHAIN_BUFIO,			/* Index: 4 */
2060 
2061 	/* Chain for buf IO for removable-media targets (PM enabled) */
2062 	SD_CHAIN_BUFIO,			/* Index: 5 */
2063 	SD_CHAIN_BUFIO,			/* Index: 6 */
2064 	SD_CHAIN_BUFIO,			/* Index: 7 */
2065 	SD_CHAIN_BUFIO,			/* Index: 8 */
2066 
2067 	/* Chain for buf IO for removable-media targets (PM disabled) */
2068 	SD_CHAIN_BUFIO,			/* Index: 9 */
2069 	SD_CHAIN_BUFIO,			/* Index: 10 */
2070 	SD_CHAIN_BUFIO,			/* Index: 11 */
2071 
2072 	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
2073 	SD_CHAIN_BUFIO,			/* Index: 12 */
2074 	SD_CHAIN_BUFIO,			/* Index: 13 */
2075 	SD_CHAIN_BUFIO,			/* Index: 14 */
2076 	SD_CHAIN_BUFIO,			/* Index: 15 */
2077 
2078 	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
2079 	SD_CHAIN_BUFIO,			/* Index: 16 */
2080 	SD_CHAIN_BUFIO,			/* Index: 17 */
2081 	SD_CHAIN_BUFIO,			/* Index: 18 */
2082 
2083 	/* Chain for USCSI commands (non-checksum targets) */
2084 	SD_CHAIN_USCSI,			/* Index: 19 */
2085 	SD_CHAIN_USCSI,			/* Index: 20 */
2086 
2087 	/* Chain for USCSI commands (checksum targets) */
2088 	SD_CHAIN_USCSI,			/* Index: 21 */
2089 	SD_CHAIN_USCSI,			/* Index: 22 */
2090 	SD_CHAIN_USCSI,			/* Index: 22 */
2091 
2092 	/* Chain for "direct" USCSI commands (all targets) */
2093 	SD_CHAIN_DIRECT,		/* Index: 24 */
2094 
2095 	/* Chain for "direct priority" USCSI commands (all targets) */
2096 	SD_CHAIN_DIRECT_PRIORITY,	/* Index: 25 */
2097 };
2098 
2099 
2100 /* Macro to return TRUE if the IO has come from the sd_buf_iostart() chain. */
2101 #define	SD_IS_BUFIO(xp)			\
2102 	(sd_chain_type_map[(xp)->xb_chain_iostart] == SD_CHAIN_BUFIO)
2103 
2104 /* Macro to return TRUE if the IO has come from the "direct priority" chain. */
2105 #define	SD_IS_DIRECT_PRIORITY(xp)	\
2106 	(sd_chain_type_map[(xp)->xb_chain_iostart] == SD_CHAIN_DIRECT_PRIORITY)
2107 
2108 
2109 
2110 /*
2111  * Struct, array, and macros to map a specific chain to the appropriate
2112  * layering indexes in the sd_iostart_chain[] and sd_iodone_chain[] arrays.
2113  *
2114  * The sd_chain_index_map[] array is used at attach time to set the various
2115  * un_xxx_chain type members of the sd_lun softstate to the specific layering
2116  * chain to be used with the instance. This allows different instances to use
2117  * different chain for buf IO, uscsi IO, etc.. Also, since the xb_chain_iostart
2118  * and xb_chain_iodone index values in the sd_xbuf are initialized to these
2119  * values at sd_xbuf init time, this allows (1) layering chains may be changed
2120  * dynamically & without the use of locking; and (2) a layer may update the
2121  * xb_chain_io[start|done] member in a given xbuf with its current index value,
2122  * to allow for deferred processing of an IO within the same chain from a
2123  * different execution context.
2124  */
2125 
2126 struct sd_chain_index {
2127 	int	sci_iostart_index;
2128 	int	sci_iodone_index;
2129 };
2130 
2131 static struct sd_chain_index	sd_chain_index_map[] = {
2132 	{ SD_CHAIN_DISK_IOSTART,		SD_CHAIN_DISK_IODONE },
2133 	{ SD_CHAIN_DISK_IOSTART_NO_PM,		SD_CHAIN_DISK_IODONE_NO_PM },
2134 	{ SD_CHAIN_RMMEDIA_IOSTART,		SD_CHAIN_RMMEDIA_IODONE },
2135 	{ SD_CHAIN_RMMEDIA_IOSTART_NO_PM,	SD_CHAIN_RMMEDIA_IODONE_NO_PM },
2136 	{ SD_CHAIN_CHKSUM_IOSTART,		SD_CHAIN_CHKSUM_IODONE },
2137 	{ SD_CHAIN_CHKSUM_IOSTART_NO_PM,	SD_CHAIN_CHKSUM_IODONE_NO_PM },
2138 	{ SD_CHAIN_USCSI_CMD_IOSTART,		SD_CHAIN_USCSI_CMD_IODONE },
2139 	{ SD_CHAIN_USCSI_CHKSUM_IOSTART,	SD_CHAIN_USCSI_CHKSUM_IODONE },
2140 	{ SD_CHAIN_DIRECT_CMD_IOSTART,		SD_CHAIN_DIRECT_CMD_IODONE },
2141 	{ SD_CHAIN_PRIORITY_CMD_IOSTART,	SD_CHAIN_PRIORITY_CMD_IODONE },
2142 };
2143 
2144 
2145 /*
2146  * The following are indexes into the sd_chain_index_map[] array.
2147  */
2148 
2149 /* un->un_buf_chain_type must be set to one of these */
2150 #define	SD_CHAIN_INFO_DISK		0
2151 #define	SD_CHAIN_INFO_DISK_NO_PM	1
2152 #define	SD_CHAIN_INFO_RMMEDIA		2
2153 #define	SD_CHAIN_INFO_RMMEDIA_NO_PM	3
2154 #define	SD_CHAIN_INFO_CHKSUM		4
2155 #define	SD_CHAIN_INFO_CHKSUM_NO_PM	5
2156 
2157 /* un->un_uscsi_chain_type must be set to one of these */
2158 #define	SD_CHAIN_INFO_USCSI_CMD		6
2159 /* USCSI with PM disabled is the same as DIRECT */
2160 #define	SD_CHAIN_INFO_USCSI_CMD_NO_PM	8
2161 #define	SD_CHAIN_INFO_USCSI_CHKSUM	7
2162 
2163 /* un->un_direct_chain_type must be set to one of these */
2164 #define	SD_CHAIN_INFO_DIRECT_CMD	8
2165 
2166 /* un->un_priority_chain_type must be set to one of these */
2167 #define	SD_CHAIN_INFO_PRIORITY_CMD	9
2168 
2169 /* size for devid inquiries */
2170 #define	MAX_INQUIRY_SIZE		0xF0
2171 
2172 /*
2173  * Macros used by functions to pass a given buf(9S) struct along to the
2174  * next function in the layering chain for further processing.
2175  *
2176  * In the following macros, passing more than three arguments to the called
2177  * routines causes the optimizer for the SPARC compiler to stop doing tail
2178  * call elimination which results in significant performance degradation.
2179  */
2180 #define	SD_BEGIN_IOSTART(index, un, bp)	\
2181 	((*(sd_iostart_chain[index]))(index, un, bp))
2182 
2183 #define	SD_BEGIN_IODONE(index, un, bp)	\
2184 	((*(sd_iodone_chain[index]))(index, un, bp))
2185 
2186 #define	SD_NEXT_IOSTART(index, un, bp)				\
2187 	((*(sd_iostart_chain[(index) + 1]))((index) + 1, un, bp))
2188 
2189 #define	SD_NEXT_IODONE(index, un, bp)				\
2190 	((*(sd_iodone_chain[(index) - 1]))((index) - 1, un, bp))
2191 
2192 /*
2193  *    Function: _init
2194  *
2195  * Description: This is the driver _init(9E) entry point.
2196  *
2197  * Return Code: Returns the value from mod_install(9F) or
2198  *		ddi_soft_state_init(9F) as appropriate.
2199  *
2200  *     Context: Called when driver module loaded.
2201  */
2202 
2203 int
2204 _init(void)
2205 {
2206 	int	err;
2207 
2208 	/* establish driver name from module name */
2209 	sd_label = mod_modname(&modlinkage);
2210 
2211 	err = ddi_soft_state_init(&sd_state, sizeof (struct sd_lun),
2212 		SD_MAXUNIT);
2213 
2214 	if (err != 0) {
2215 		return (err);
2216 	}
2217 
2218 	mutex_init(&sd_detach_mutex, NULL, MUTEX_DRIVER, NULL);
2219 	mutex_init(&sd_log_mutex,    NULL, MUTEX_DRIVER, NULL);
2220 	mutex_init(&sd_label_mutex,  NULL, MUTEX_DRIVER, NULL);
2221 
2222 	mutex_init(&sd_tr.srq_resv_reclaim_mutex, NULL, MUTEX_DRIVER, NULL);
2223 	cv_init(&sd_tr.srq_resv_reclaim_cv, NULL, CV_DRIVER, NULL);
2224 	cv_init(&sd_tr.srq_inprocess_cv, NULL, CV_DRIVER, NULL);
2225 
2226 	/*
2227 	 * it's ok to init here even for fibre device
2228 	 */
2229 	sd_scsi_probe_cache_init();
2230 
2231 	/*
2232 	 * Creating taskq before mod_install ensures that all callers (threads)
2233 	 * that enter the module after a successfull mod_install encounter
2234 	 * a valid taskq.
2235 	 */
2236 	sd_taskq_create();
2237 
2238 	err = mod_install(&modlinkage);
2239 	if (err != 0) {
2240 		/* delete taskq if install fails */
2241 		sd_taskq_delete();
2242 
2243 		mutex_destroy(&sd_detach_mutex);
2244 		mutex_destroy(&sd_log_mutex);
2245 		mutex_destroy(&sd_label_mutex);
2246 
2247 		mutex_destroy(&sd_tr.srq_resv_reclaim_mutex);
2248 		cv_destroy(&sd_tr.srq_resv_reclaim_cv);
2249 		cv_destroy(&sd_tr.srq_inprocess_cv);
2250 
2251 		sd_scsi_probe_cache_fini();
2252 
2253 		ddi_soft_state_fini(&sd_state);
2254 		return (err);
2255 	}
2256 
2257 	return (err);
2258 }
2259 
2260 
2261 /*
2262  *    Function: _fini
2263  *
2264  * Description: This is the driver _fini(9E) entry point.
2265  *
2266  * Return Code: Returns the value from mod_remove(9F)
2267  *
2268  *     Context: Called when driver module is unloaded.
2269  */
2270 
2271 int
2272 _fini(void)
2273 {
2274 	int err;
2275 
2276 	if ((err = mod_remove(&modlinkage)) != 0) {
2277 		return (err);
2278 	}
2279 
2280 	sd_taskq_delete();
2281 
2282 	mutex_destroy(&sd_detach_mutex);
2283 	mutex_destroy(&sd_log_mutex);
2284 	mutex_destroy(&sd_label_mutex);
2285 	mutex_destroy(&sd_tr.srq_resv_reclaim_mutex);
2286 
2287 	sd_scsi_probe_cache_fini();
2288 
2289 	cv_destroy(&sd_tr.srq_resv_reclaim_cv);
2290 	cv_destroy(&sd_tr.srq_inprocess_cv);
2291 
2292 	ddi_soft_state_fini(&sd_state);
2293 
2294 	return (err);
2295 }
2296 
2297 
2298 /*
2299  *    Function: _info
2300  *
2301  * Description: This is the driver _info(9E) entry point.
2302  *
2303  *   Arguments: modinfop - pointer to the driver modinfo structure
2304  *
2305  * Return Code: Returns the value from mod_info(9F).
2306  *
2307  *     Context: Kernel thread context
2308  */
2309 
2310 int
2311 _info(struct modinfo *modinfop)
2312 {
2313 	return (mod_info(&modlinkage, modinfop));
2314 }
2315 
2316 
2317 /*
2318  * The following routines implement the driver message logging facility.
2319  * They provide component- and level- based debug output filtering.
2320  * Output may also be restricted to messages for a single instance by
2321  * specifying a soft state pointer in sd_debug_un. If sd_debug_un is set
2322  * to NULL, then messages for all instances are printed.
2323  *
2324  * These routines have been cloned from each other due to the language
2325  * constraints of macros and variable argument list processing.
2326  */
2327 
2328 
2329 /*
2330  *    Function: sd_log_err
2331  *
2332  * Description: This routine is called by the SD_ERROR macro for debug
2333  *		logging of error conditions.
2334  *
2335  *   Arguments: comp - driver component being logged
2336  *		dev  - pointer to driver info structure
2337  *		fmt  - error string and format to be logged
2338  */
2339 
2340 static void
2341 sd_log_err(uint_t comp, struct sd_lun *un, const char *fmt, ...)
2342 {
2343 	va_list		ap;
2344 	dev_info_t	*dev;
2345 
2346 	ASSERT(un != NULL);
2347 	dev = SD_DEVINFO(un);
2348 	ASSERT(dev != NULL);
2349 
2350 	/*
2351 	 * Filter messages based on the global component and level masks.
2352 	 * Also print if un matches the value of sd_debug_un, or if
2353 	 * sd_debug_un is set to NULL.
2354 	 */
2355 	if ((sd_component_mask & comp) && (sd_level_mask & SD_LOGMASK_ERROR) &&
2356 	    ((sd_debug_un == NULL) || (sd_debug_un == un))) {
2357 		mutex_enter(&sd_log_mutex);
2358 		va_start(ap, fmt);
2359 		(void) vsprintf(sd_log_buf, fmt, ap);
2360 		va_end(ap);
2361 		scsi_log(dev, sd_label, CE_CONT, "%s", sd_log_buf);
2362 		mutex_exit(&sd_log_mutex);
2363 	}
2364 #ifdef SD_FAULT_INJECTION
2365 	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::sd_injection_mask));
2366 	if (un->sd_injection_mask & comp) {
2367 		mutex_enter(&sd_log_mutex);
2368 		va_start(ap, fmt);
2369 		(void) vsprintf(sd_log_buf, fmt, ap);
2370 		va_end(ap);
2371 		sd_injection_log(sd_log_buf, un);
2372 		mutex_exit(&sd_log_mutex);
2373 	}
2374 #endif
2375 }
2376 
2377 
2378 /*
2379  *    Function: sd_log_info
2380  *
2381  * Description: This routine is called by the SD_INFO macro for debug
2382  *		logging of general purpose informational conditions.
2383  *
2384  *   Arguments: comp - driver component being logged
2385  *		dev  - pointer to driver info structure
2386  *		fmt  - info string and format to be logged
2387  */
2388 
2389 static void
2390 sd_log_info(uint_t component, struct sd_lun *un, const char *fmt, ...)
2391 {
2392 	va_list		ap;
2393 	dev_info_t	*dev;
2394 
2395 	ASSERT(un != NULL);
2396 	dev = SD_DEVINFO(un);
2397 	ASSERT(dev != NULL);
2398 
2399 	/*
2400 	 * Filter messages based on the global component and level masks.
2401 	 * Also print if un matches the value of sd_debug_un, or if
2402 	 * sd_debug_un is set to NULL.
2403 	 */
2404 	if ((sd_component_mask & component) &&
2405 	    (sd_level_mask & SD_LOGMASK_INFO) &&
2406 	    ((sd_debug_un == NULL) || (sd_debug_un == un))) {
2407 		mutex_enter(&sd_log_mutex);
2408 		va_start(ap, fmt);
2409 		(void) vsprintf(sd_log_buf, fmt, ap);
2410 		va_end(ap);
2411 		scsi_log(dev, sd_label, CE_CONT, "%s", sd_log_buf);
2412 		mutex_exit(&sd_log_mutex);
2413 	}
2414 #ifdef SD_FAULT_INJECTION
2415 	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::sd_injection_mask));
2416 	if (un->sd_injection_mask & component) {
2417 		mutex_enter(&sd_log_mutex);
2418 		va_start(ap, fmt);
2419 		(void) vsprintf(sd_log_buf, fmt, ap);
2420 		va_end(ap);
2421 		sd_injection_log(sd_log_buf, un);
2422 		mutex_exit(&sd_log_mutex);
2423 	}
2424 #endif
2425 }
2426 
2427 
2428 /*
2429  *    Function: sd_log_trace
2430  *
2431  * Description: This routine is called by the SD_TRACE macro for debug
2432  *		logging of trace conditions (i.e. function entry/exit).
2433  *
2434  *   Arguments: comp - driver component being logged
2435  *		dev  - pointer to driver info structure
2436  *		fmt  - trace string and format to be logged
2437  */
2438 
2439 static void
2440 sd_log_trace(uint_t component, struct sd_lun *un, const char *fmt, ...)
2441 {
2442 	va_list		ap;
2443 	dev_info_t	*dev;
2444 
2445 	ASSERT(un != NULL);
2446 	dev = SD_DEVINFO(un);
2447 	ASSERT(dev != NULL);
2448 
2449 	/*
2450 	 * Filter messages based on the global component and level masks.
2451 	 * Also print if un matches the value of sd_debug_un, or if
2452 	 * sd_debug_un is set to NULL.
2453 	 */
2454 	if ((sd_component_mask & component) &&
2455 	    (sd_level_mask & SD_LOGMASK_TRACE) &&
2456 	    ((sd_debug_un == NULL) || (sd_debug_un == un))) {
2457 		mutex_enter(&sd_log_mutex);
2458 		va_start(ap, fmt);
2459 		(void) vsprintf(sd_log_buf, fmt, ap);
2460 		va_end(ap);
2461 		scsi_log(dev, sd_label, CE_CONT, "%s", sd_log_buf);
2462 		mutex_exit(&sd_log_mutex);
2463 	}
2464 #ifdef SD_FAULT_INJECTION
2465 	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::sd_injection_mask));
2466 	if (un->sd_injection_mask & component) {
2467 		mutex_enter(&sd_log_mutex);
2468 		va_start(ap, fmt);
2469 		(void) vsprintf(sd_log_buf, fmt, ap);
2470 		va_end(ap);
2471 		sd_injection_log(sd_log_buf, un);
2472 		mutex_exit(&sd_log_mutex);
2473 	}
2474 #endif
2475 }
2476 
2477 
2478 /*
2479  *    Function: sdprobe
2480  *
2481  * Description: This is the driver probe(9e) entry point function.
2482  *
2483  *   Arguments: devi - opaque device info handle
2484  *
2485  * Return Code: DDI_PROBE_SUCCESS: If the probe was successful.
2486  *              DDI_PROBE_FAILURE: If the probe failed.
2487  *              DDI_PROBE_PARTIAL: If the instance is not present now,
2488  *				   but may be present in the future.
2489  */
2490 
2491 static int
2492 sdprobe(dev_info_t *devi)
2493 {
2494 	struct scsi_device	*devp;
2495 	int			rval;
2496 	int			instance;
2497 
2498 	/*
2499 	 * if it wasn't for pln, sdprobe could actually be nulldev
2500 	 * in the "__fibre" case.
2501 	 */
2502 	if (ddi_dev_is_sid(devi) == DDI_SUCCESS) {
2503 		return (DDI_PROBE_DONTCARE);
2504 	}
2505 
2506 	devp = ddi_get_driver_private(devi);
2507 
2508 	if (devp == NULL) {
2509 		/* Ooops... nexus driver is mis-configured... */
2510 		return (DDI_PROBE_FAILURE);
2511 	}
2512 
2513 	instance = ddi_get_instance(devi);
2514 
2515 	if (ddi_get_soft_state(sd_state, instance) != NULL) {
2516 		return (DDI_PROBE_PARTIAL);
2517 	}
2518 
2519 	/*
2520 	 * Call the SCSA utility probe routine to see if we actually
2521 	 * have a target at this SCSI nexus.
2522 	 */
2523 	switch (sd_scsi_probe_with_cache(devp, NULL_FUNC)) {
2524 	case SCSIPROBE_EXISTS:
2525 		switch (devp->sd_inq->inq_dtype) {
2526 		case DTYPE_DIRECT:
2527 			rval = DDI_PROBE_SUCCESS;
2528 			break;
2529 		case DTYPE_RODIRECT:
2530 			/* CDs etc. Can be removable media */
2531 			rval = DDI_PROBE_SUCCESS;
2532 			break;
2533 		case DTYPE_OPTICAL:
2534 			/*
2535 			 * Rewritable optical driver HP115AA
2536 			 * Can also be removable media
2537 			 */
2538 
2539 			/*
2540 			 * Do not attempt to bind to  DTYPE_OPTICAL if
2541 			 * pre solaris 9 sparc sd behavior is required
2542 			 *
2543 			 * If first time through and sd_dtype_optical_bind
2544 			 * has not been set in /etc/system check properties
2545 			 */
2546 
2547 			if (sd_dtype_optical_bind  < 0) {
2548 			    sd_dtype_optical_bind = ddi_prop_get_int
2549 				(DDI_DEV_T_ANY,	devi,	0,
2550 				"optical-device-bind",	1);
2551 			}
2552 
2553 			if (sd_dtype_optical_bind == 0) {
2554 				rval = DDI_PROBE_FAILURE;
2555 			} else {
2556 				rval = DDI_PROBE_SUCCESS;
2557 			}
2558 			break;
2559 
2560 		case DTYPE_NOTPRESENT:
2561 		default:
2562 			rval = DDI_PROBE_FAILURE;
2563 			break;
2564 		}
2565 		break;
2566 	default:
2567 		rval = DDI_PROBE_PARTIAL;
2568 		break;
2569 	}
2570 
2571 	/*
2572 	 * This routine checks for resource allocation prior to freeing,
2573 	 * so it will take care of the "smart probing" case where a
2574 	 * scsi_probe() may or may not have been issued and will *not*
2575 	 * free previously-freed resources.
2576 	 */
2577 	scsi_unprobe(devp);
2578 	return (rval);
2579 }
2580 
2581 
2582 /*
2583  *    Function: sdinfo
2584  *
2585  * Description: This is the driver getinfo(9e) entry point function.
2586  * 		Given the device number, return the devinfo pointer from
2587  *		the scsi_device structure or the instance number
2588  *		associated with the dev_t.
2589  *
2590  *   Arguments: dip     - pointer to device info structure
2591  *		infocmd - command argument (DDI_INFO_DEVT2DEVINFO,
2592  *			  DDI_INFO_DEVT2INSTANCE)
2593  *		arg     - driver dev_t
2594  *		resultp - user buffer for request response
2595  *
2596  * Return Code: DDI_SUCCESS
2597  *              DDI_FAILURE
2598  */
2599 /* ARGSUSED */
2600 static int
2601 sdinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
2602 {
2603 	struct sd_lun	*un;
2604 	dev_t		dev;
2605 	int		instance;
2606 	int		error;
2607 
2608 	switch (infocmd) {
2609 	case DDI_INFO_DEVT2DEVINFO:
2610 		dev = (dev_t)arg;
2611 		instance = SDUNIT(dev);
2612 		if ((un = ddi_get_soft_state(sd_state, instance)) == NULL) {
2613 			return (DDI_FAILURE);
2614 		}
2615 		*result = (void *) SD_DEVINFO(un);
2616 		error = DDI_SUCCESS;
2617 		break;
2618 	case DDI_INFO_DEVT2INSTANCE:
2619 		dev = (dev_t)arg;
2620 		instance = SDUNIT(dev);
2621 		*result = (void *)(uintptr_t)instance;
2622 		error = DDI_SUCCESS;
2623 		break;
2624 	default:
2625 		error = DDI_FAILURE;
2626 	}
2627 	return (error);
2628 }
2629 
2630 /*
2631  *    Function: sd_prop_op
2632  *
2633  * Description: This is the driver prop_op(9e) entry point function.
2634  *		Return the number of blocks for the partition in question
2635  *		or forward the request to the property facilities.
2636  *
2637  *   Arguments: dev       - device number
2638  *		dip       - pointer to device info structure
2639  *		prop_op   - property operator
2640  *		mod_flags - DDI_PROP_DONTPASS, don't pass to parent
2641  *		name      - pointer to property name
2642  *		valuep    - pointer or address of the user buffer
2643  *		lengthp   - property length
2644  *
2645  * Return Code: DDI_PROP_SUCCESS
2646  *              DDI_PROP_NOT_FOUND
2647  *              DDI_PROP_UNDEFINED
2648  *              DDI_PROP_NO_MEMORY
2649  *              DDI_PROP_BUF_TOO_SMALL
2650  */
2651 
2652 static int
2653 sd_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, int mod_flags,
2654 	char *name, caddr_t valuep, int *lengthp)
2655 {
2656 	int		instance = ddi_get_instance(dip);
2657 	struct sd_lun	*un;
2658 	uint64_t	nblocks64;
2659 
2660 	/*
2661 	 * Our dynamic properties are all device specific and size oriented.
2662 	 * Requests issued under conditions where size is valid are passed
2663 	 * to ddi_prop_op_nblocks with the size information, otherwise the
2664 	 * request is passed to ddi_prop_op. Size depends on valid geometry.
2665 	 */
2666 	un = ddi_get_soft_state(sd_state, instance);
2667 	if ((dev == DDI_DEV_T_ANY) || (un == NULL) ||
2668 	    (un->un_f_geometry_is_valid == FALSE)) {
2669 		return (ddi_prop_op(dev, dip, prop_op, mod_flags,
2670 		    name, valuep, lengthp));
2671 	} else {
2672 		/* get nblocks value */
2673 		ASSERT(!mutex_owned(SD_MUTEX(un)));
2674 		mutex_enter(SD_MUTEX(un));
2675 		nblocks64 = (ulong_t)un->un_map[SDPART(dev)].dkl_nblk;
2676 		mutex_exit(SD_MUTEX(un));
2677 
2678 		return (ddi_prop_op_nblocks(dev, dip, prop_op, mod_flags,
2679 		    name, valuep, lengthp, nblocks64));
2680 	}
2681 }
2682 
2683 /*
2684  * The following functions are for smart probing:
2685  * sd_scsi_probe_cache_init()
2686  * sd_scsi_probe_cache_fini()
2687  * sd_scsi_clear_probe_cache()
2688  * sd_scsi_probe_with_cache()
2689  */
2690 
2691 /*
2692  *    Function: sd_scsi_probe_cache_init
2693  *
2694  * Description: Initializes the probe response cache mutex and head pointer.
2695  *
2696  *     Context: Kernel thread context
2697  */
2698 
2699 static void
2700 sd_scsi_probe_cache_init(void)
2701 {
2702 	mutex_init(&sd_scsi_probe_cache_mutex, NULL, MUTEX_DRIVER, NULL);
2703 	sd_scsi_probe_cache_head = NULL;
2704 }
2705 
2706 
2707 /*
2708  *    Function: sd_scsi_probe_cache_fini
2709  *
2710  * Description: Frees all resources associated with the probe response cache.
2711  *
2712  *     Context: Kernel thread context
2713  */
2714 
2715 static void
2716 sd_scsi_probe_cache_fini(void)
2717 {
2718 	struct sd_scsi_probe_cache *cp;
2719 	struct sd_scsi_probe_cache *ncp;
2720 
2721 	/* Clean up our smart probing linked list */
2722 	for (cp = sd_scsi_probe_cache_head; cp != NULL; cp = ncp) {
2723 		ncp = cp->next;
2724 		kmem_free(cp, sizeof (struct sd_scsi_probe_cache));
2725 	}
2726 	sd_scsi_probe_cache_head = NULL;
2727 	mutex_destroy(&sd_scsi_probe_cache_mutex);
2728 }
2729 
2730 
2731 /*
2732  *    Function: sd_scsi_clear_probe_cache
2733  *
2734  * Description: This routine clears the probe response cache. This is
2735  *		done when open() returns ENXIO so that when deferred
2736  *		attach is attempted (possibly after a device has been
2737  *		turned on) we will retry the probe. Since we don't know
2738  *		which target we failed to open, we just clear the
2739  *		entire cache.
2740  *
2741  *     Context: Kernel thread context
2742  */
2743 
2744 static void
2745 sd_scsi_clear_probe_cache(void)
2746 {
2747 	struct sd_scsi_probe_cache	*cp;
2748 	int				i;
2749 
2750 	mutex_enter(&sd_scsi_probe_cache_mutex);
2751 	for (cp = sd_scsi_probe_cache_head; cp != NULL; cp = cp->next) {
2752 		/*
2753 		 * Reset all entries to SCSIPROBE_EXISTS.  This will
2754 		 * force probing to be performed the next time
2755 		 * sd_scsi_probe_with_cache is called.
2756 		 */
2757 		for (i = 0; i < NTARGETS_WIDE; i++) {
2758 			cp->cache[i] = SCSIPROBE_EXISTS;
2759 		}
2760 	}
2761 	mutex_exit(&sd_scsi_probe_cache_mutex);
2762 }
2763 
2764 
2765 /*
2766  *    Function: sd_scsi_probe_with_cache
2767  *
2768  * Description: This routine implements support for a scsi device probe
2769  *		with cache. The driver maintains a cache of the target
2770  *		responses to scsi probes. If we get no response from a
2771  *		target during a probe inquiry, we remember that, and we
2772  *		avoid additional calls to scsi_probe on non-zero LUNs
2773  *		on the same target until the cache is cleared. By doing
2774  *		so we avoid the 1/4 sec selection timeout for nonzero
2775  *		LUNs. lun0 of a target is always probed.
2776  *
2777  *   Arguments: devp     - Pointer to a scsi_device(9S) structure
2778  *              waitfunc - indicates what the allocator routines should
2779  *			   do when resources are not available. This value
2780  *			   is passed on to scsi_probe() when that routine
2781  *			   is called.
2782  *
2783  * Return Code: SCSIPROBE_NORESP if a NORESP in probe response cache;
2784  *		otherwise the value returned by scsi_probe(9F).
2785  *
2786  *     Context: Kernel thread context
2787  */
2788 
2789 static int
2790 sd_scsi_probe_with_cache(struct scsi_device *devp, int (*waitfn)())
2791 {
2792 	struct sd_scsi_probe_cache	*cp;
2793 	dev_info_t	*pdip = ddi_get_parent(devp->sd_dev);
2794 	int		lun, tgt;
2795 
2796 	lun = ddi_prop_get_int(DDI_DEV_T_ANY, devp->sd_dev, DDI_PROP_DONTPASS,
2797 	    SCSI_ADDR_PROP_LUN, 0);
2798 	tgt = ddi_prop_get_int(DDI_DEV_T_ANY, devp->sd_dev, DDI_PROP_DONTPASS,
2799 	    SCSI_ADDR_PROP_TARGET, -1);
2800 
2801 	/* Make sure caching enabled and target in range */
2802 	if ((tgt < 0) || (tgt >= NTARGETS_WIDE)) {
2803 		/* do it the old way (no cache) */
2804 		return (scsi_probe(devp, waitfn));
2805 	}
2806 
2807 	mutex_enter(&sd_scsi_probe_cache_mutex);
2808 
2809 	/* Find the cache for this scsi bus instance */
2810 	for (cp = sd_scsi_probe_cache_head; cp != NULL; cp = cp->next) {
2811 		if (cp->pdip == pdip) {
2812 			break;
2813 		}
2814 	}
2815 
2816 	/* If we can't find a cache for this pdip, create one */
2817 	if (cp == NULL) {
2818 		int i;
2819 
2820 		cp = kmem_zalloc(sizeof (struct sd_scsi_probe_cache),
2821 		    KM_SLEEP);
2822 		cp->pdip = pdip;
2823 		cp->next = sd_scsi_probe_cache_head;
2824 		sd_scsi_probe_cache_head = cp;
2825 		for (i = 0; i < NTARGETS_WIDE; i++) {
2826 			cp->cache[i] = SCSIPROBE_EXISTS;
2827 		}
2828 	}
2829 
2830 	mutex_exit(&sd_scsi_probe_cache_mutex);
2831 
2832 	/* Recompute the cache for this target if LUN zero */
2833 	if (lun == 0) {
2834 		cp->cache[tgt] = SCSIPROBE_EXISTS;
2835 	}
2836 
2837 	/* Don't probe if cache remembers a NORESP from a previous LUN. */
2838 	if (cp->cache[tgt] != SCSIPROBE_EXISTS) {
2839 		return (SCSIPROBE_NORESP);
2840 	}
2841 
2842 	/* Do the actual probe; save & return the result */
2843 	return (cp->cache[tgt] = scsi_probe(devp, waitfn));
2844 }
2845 
2846 
2847 /*
2848  *    Function: sd_spin_up_unit
2849  *
2850  * Description: Issues the following commands to spin-up the device:
2851  *		START STOP UNIT, and INQUIRY.
2852  *
2853  *   Arguments: un - driver soft state (unit) structure
2854  *
2855  * Return Code: 0 - success
2856  *		EIO - failure
2857  *		EACCES - reservation conflict
2858  *
2859  *     Context: Kernel thread context
2860  */
2861 
2862 static int
2863 sd_spin_up_unit(struct sd_lun *un)
2864 {
2865 	size_t	resid		= 0;
2866 	int	has_conflict	= FALSE;
2867 	uchar_t *bufaddr;
2868 
2869 	ASSERT(un != NULL);
2870 
2871 	/*
2872 	 * Send a throwaway START UNIT command.
2873 	 *
2874 	 * If we fail on this, we don't care presently what precisely
2875 	 * is wrong.  EMC's arrays will also fail this with a check
2876 	 * condition (0x2/0x4/0x3) if the device is "inactive," but
2877 	 * we don't want to fail the attach because it may become
2878 	 * "active" later.
2879 	 */
2880 	if (sd_send_scsi_START_STOP_UNIT(un, SD_TARGET_START, SD_PATH_DIRECT)
2881 	    == EACCES)
2882 		has_conflict = TRUE;
2883 
2884 	/*
2885 	 * Send another INQUIRY command to the target. This is necessary for
2886 	 * non-removable media direct access devices because their INQUIRY data
2887 	 * may not be fully qualified until they are spun up (perhaps via the
2888 	 * START command above).  Note: This seems to be needed for some
2889 	 * legacy devices only.) The INQUIRY command should succeed even if a
2890 	 * Reservation Conflict is present.
2891 	 */
2892 	bufaddr = kmem_zalloc(SUN_INQSIZE, KM_SLEEP);
2893 	if (sd_send_scsi_INQUIRY(un, bufaddr, SUN_INQSIZE, 0, 0, &resid) != 0) {
2894 		kmem_free(bufaddr, SUN_INQSIZE);
2895 		return (EIO);
2896 	}
2897 
2898 	/*
2899 	 * If we got enough INQUIRY data, copy it over the old INQUIRY data.
2900 	 * Note that this routine does not return a failure here even if the
2901 	 * INQUIRY command did not return any data.  This is a legacy behavior.
2902 	 */
2903 	if ((SUN_INQSIZE - resid) >= SUN_MIN_INQLEN) {
2904 		bcopy(bufaddr, SD_INQUIRY(un), SUN_INQSIZE);
2905 	}
2906 
2907 	kmem_free(bufaddr, SUN_INQSIZE);
2908 
2909 	/* If we hit a reservation conflict above, tell the caller. */
2910 	if (has_conflict == TRUE) {
2911 		return (EACCES);
2912 	}
2913 
2914 	return (0);
2915 }
2916 
2917 #ifdef _LP64
2918 /*
2919  *    Function: sd_enable_descr_sense
2920  *
2921  * Description: This routine attempts to select descriptor sense format
2922  *		using the Control mode page.  Devices that support 64 bit
2923  *		LBAs (for >2TB luns) should also implement descriptor
2924  *		sense data so we will call this function whenever we see
2925  *		a lun larger than 2TB.  If for some reason the device
2926  *		supports 64 bit LBAs but doesn't support descriptor sense
2927  *		presumably the mode select will fail.  Everything will
2928  *		continue to work normally except that we will not get
2929  *		complete sense data for commands that fail with an LBA
2930  *		larger than 32 bits.
2931  *
2932  *   Arguments: un - driver soft state (unit) structure
2933  *
2934  *     Context: Kernel thread context only
2935  */
2936 
2937 static void
2938 sd_enable_descr_sense(struct sd_lun *un)
2939 {
2940 	uchar_t			*header;
2941 	struct mode_control_scsi3 *ctrl_bufp;
2942 	size_t			buflen;
2943 	size_t			bd_len;
2944 
2945 	/*
2946 	 * Read MODE SENSE page 0xA, Control Mode Page
2947 	 */
2948 	buflen = MODE_HEADER_LENGTH + MODE_BLK_DESC_LENGTH +
2949 	    sizeof (struct mode_control_scsi3);
2950 	header = kmem_zalloc(buflen, KM_SLEEP);
2951 	if (sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, header, buflen,
2952 	    MODEPAGE_CTRL_MODE, SD_PATH_DIRECT) != 0) {
2953 		SD_ERROR(SD_LOG_COMMON, un,
2954 		    "sd_enable_descr_sense: mode sense ctrl page failed\n");
2955 		goto eds_exit;
2956 	}
2957 
2958 	/*
2959 	 * Determine size of Block Descriptors in order to locate
2960 	 * the mode page data. ATAPI devices return 0, SCSI devices
2961 	 * should return MODE_BLK_DESC_LENGTH.
2962 	 */
2963 	bd_len  = ((struct mode_header *)header)->bdesc_length;
2964 
2965 	ctrl_bufp = (struct mode_control_scsi3 *)
2966 	    (header + MODE_HEADER_LENGTH + bd_len);
2967 
2968 	/*
2969 	 * Clear PS bit for MODE SELECT
2970 	 */
2971 	ctrl_bufp->mode_page.ps = 0;
2972 
2973 	/*
2974 	 * Set D_SENSE to enable descriptor sense format.
2975 	 */
2976 	ctrl_bufp->d_sense = 1;
2977 
2978 	/*
2979 	 * Use MODE SELECT to commit the change to the D_SENSE bit
2980 	 */
2981 	if (sd_send_scsi_MODE_SELECT(un, CDB_GROUP0, header,
2982 	    buflen, SD_DONTSAVE_PAGE, SD_PATH_DIRECT) != 0) {
2983 		SD_INFO(SD_LOG_COMMON, un,
2984 		    "sd_enable_descr_sense: mode select ctrl page failed\n");
2985 		goto eds_exit;
2986 	}
2987 
2988 eds_exit:
2989 	kmem_free(header, buflen);
2990 }
2991 
2992 /*
2993  *    Function: sd_reenable_dsense_task
2994  *
2995  * Description: Re-enable descriptor sense after device or bus reset
2996  *
2997  *     Context: Executes in a taskq() thread context
2998  */
2999 static void
3000 sd_reenable_dsense_task(void *arg)
3001 {
3002 	struct	sd_lun	*un = arg;
3003 
3004 	ASSERT(un != NULL);
3005 	sd_enable_descr_sense(un);
3006 }
3007 #endif /* _LP64 */
3008 
3009 /*
3010  *    Function: sd_set_mmc_caps
3011  *
3012  * Description: This routine determines if the device is MMC compliant and if
3013  *		the device supports CDDA via a mode sense of the CDVD
3014  *		capabilities mode page. Also checks if the device is a
3015  *		dvdram writable device.
3016  *
3017  *   Arguments: un - driver soft state (unit) structure
3018  *
3019  *     Context: Kernel thread context only
3020  */
3021 
3022 static void
3023 sd_set_mmc_caps(struct sd_lun *un)
3024 {
3025 	struct mode_header_grp2		*sense_mhp;
3026 	uchar_t				*sense_page;
3027 	caddr_t				buf;
3028 	int				bd_len;
3029 	int				status;
3030 	struct uscsi_cmd		com;
3031 	int				rtn;
3032 	uchar_t				*out_data_rw, *out_data_hd;
3033 	uchar_t				*rqbuf_rw, *rqbuf_hd;
3034 
3035 	ASSERT(un != NULL);
3036 
3037 	/*
3038 	 * The flags which will be set in this function are - mmc compliant,
3039 	 * dvdram writable device, cdda support. Initialize them to FALSE
3040 	 * and if a capability is detected - it will be set to TRUE.
3041 	 */
3042 	un->un_f_mmc_cap = FALSE;
3043 	un->un_f_dvdram_writable_device = FALSE;
3044 	un->un_f_cfg_cdda = FALSE;
3045 
3046 	buf = kmem_zalloc(BUFLEN_MODE_CDROM_CAP, KM_SLEEP);
3047 	status = sd_send_scsi_MODE_SENSE(un, CDB_GROUP1, (uchar_t *)buf,
3048 	    BUFLEN_MODE_CDROM_CAP, MODEPAGE_CDROM_CAP, SD_PATH_DIRECT);
3049 
3050 	if (status != 0) {
3051 		/* command failed; just return */
3052 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3053 		return;
3054 	}
3055 	/*
3056 	 * If the mode sense request for the CDROM CAPABILITIES
3057 	 * page (0x2A) succeeds the device is assumed to be MMC.
3058 	 */
3059 	un->un_f_mmc_cap = TRUE;
3060 
3061 	/* Get to the page data */
3062 	sense_mhp = (struct mode_header_grp2 *)buf;
3063 	bd_len = (sense_mhp->bdesc_length_hi << 8) |
3064 	    sense_mhp->bdesc_length_lo;
3065 	if (bd_len > MODE_BLK_DESC_LENGTH) {
3066 		/*
3067 		 * We did not get back the expected block descriptor
3068 		 * length so we cannot determine if the device supports
3069 		 * CDDA. However, we still indicate the device is MMC
3070 		 * according to the successful response to the page
3071 		 * 0x2A mode sense request.
3072 		 */
3073 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3074 		    "sd_set_mmc_caps: Mode Sense returned "
3075 		    "invalid block descriptor length\n");
3076 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3077 		return;
3078 	}
3079 
3080 	/* See if read CDDA is supported */
3081 	sense_page = (uchar_t *)(buf + MODE_HEADER_LENGTH_GRP2 +
3082 	    bd_len);
3083 	un->un_f_cfg_cdda = (sense_page[5] & 0x01) ? TRUE : FALSE;
3084 
3085 	/* See if writing DVD RAM is supported. */
3086 	un->un_f_dvdram_writable_device = (sense_page[3] & 0x20) ? TRUE : FALSE;
3087 	if (un->un_f_dvdram_writable_device == TRUE) {
3088 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3089 		return;
3090 	}
3091 
3092 	/*
3093 	 * If the device presents DVD or CD capabilities in the mode
3094 	 * page, we can return here since a RRD will not have
3095 	 * these capabilities.
3096 	 */
3097 	if ((sense_page[2] & 0x3f) || (sense_page[3] & 0x3f)) {
3098 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3099 		return;
3100 	}
3101 	kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3102 
3103 	/*
3104 	 * If un->un_f_dvdram_writable_device is still FALSE,
3105 	 * check for a Removable Rigid Disk (RRD).  A RRD
3106 	 * device is identified by the features RANDOM_WRITABLE and
3107 	 * HARDWARE_DEFECT_MANAGEMENT.
3108 	 */
3109 	out_data_rw = kmem_zalloc(SD_CURRENT_FEATURE_LEN, KM_SLEEP);
3110 	rqbuf_rw = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3111 
3112 	rtn = sd_send_scsi_feature_GET_CONFIGURATION(un, &com, rqbuf_rw,
3113 	    SENSE_LENGTH, out_data_rw, SD_CURRENT_FEATURE_LEN,
3114 	    RANDOM_WRITABLE);
3115 	if (rtn != 0) {
3116 		kmem_free(out_data_rw, SD_CURRENT_FEATURE_LEN);
3117 		kmem_free(rqbuf_rw, SENSE_LENGTH);
3118 		return;
3119 	}
3120 
3121 	out_data_hd = kmem_zalloc(SD_CURRENT_FEATURE_LEN, KM_SLEEP);
3122 	rqbuf_hd = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3123 
3124 	rtn = sd_send_scsi_feature_GET_CONFIGURATION(un, &com, rqbuf_hd,
3125 	    SENSE_LENGTH, out_data_hd, SD_CURRENT_FEATURE_LEN,
3126 	    HARDWARE_DEFECT_MANAGEMENT);
3127 	if (rtn == 0) {
3128 		/*
3129 		 * We have good information, check for random writable
3130 		 * and hardware defect features.
3131 		 */
3132 		if ((out_data_rw[9] & RANDOM_WRITABLE) &&
3133 		    (out_data_hd[9] & HARDWARE_DEFECT_MANAGEMENT)) {
3134 			un->un_f_dvdram_writable_device = TRUE;
3135 		}
3136 	}
3137 
3138 	kmem_free(out_data_rw, SD_CURRENT_FEATURE_LEN);
3139 	kmem_free(rqbuf_rw, SENSE_LENGTH);
3140 	kmem_free(out_data_hd, SD_CURRENT_FEATURE_LEN);
3141 	kmem_free(rqbuf_hd, SENSE_LENGTH);
3142 }
3143 
3144 /*
3145  *    Function: sd_check_for_writable_cd
3146  *
3147  * Description: This routine determines if the media in the device is
3148  *		writable or not. It uses the get configuration command (0x46)
3149  *		to determine if the media is writable
3150  *
3151  *   Arguments: un - driver soft state (unit) structure
3152  *
3153  *     Context: Never called at interrupt context.
3154  */
3155 
3156 static void
3157 sd_check_for_writable_cd(struct sd_lun *un)
3158 {
3159 	struct uscsi_cmd		com;
3160 	uchar_t				*out_data;
3161 	uchar_t				*rqbuf;
3162 	int				rtn;
3163 	uchar_t				*out_data_rw, *out_data_hd;
3164 	uchar_t				*rqbuf_rw, *rqbuf_hd;
3165 	struct mode_header_grp2		*sense_mhp;
3166 	uchar_t				*sense_page;
3167 	caddr_t				buf;
3168 	int				bd_len;
3169 	int				status;
3170 
3171 	ASSERT(un != NULL);
3172 	ASSERT(mutex_owned(SD_MUTEX(un)));
3173 
3174 	/*
3175 	 * Initialize the writable media to false, if configuration info.
3176 	 * tells us otherwise then only we will set it.
3177 	 */
3178 	un->un_f_mmc_writable_media = FALSE;
3179 	mutex_exit(SD_MUTEX(un));
3180 
3181 	out_data = kmem_zalloc(SD_PROFILE_HEADER_LEN, KM_SLEEP);
3182 	rqbuf = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3183 
3184 	rtn = sd_send_scsi_GET_CONFIGURATION(un, &com, rqbuf, SENSE_LENGTH,
3185 	    out_data, SD_PROFILE_HEADER_LEN);
3186 
3187 	mutex_enter(SD_MUTEX(un));
3188 	if (rtn == 0) {
3189 		/*
3190 		 * We have good information, check for writable DVD.
3191 		 */
3192 		if ((out_data[6] == 0) && (out_data[7] == 0x12)) {
3193 			un->un_f_mmc_writable_media = TRUE;
3194 			kmem_free(out_data, SD_PROFILE_HEADER_LEN);
3195 			kmem_free(rqbuf, SENSE_LENGTH);
3196 			return;
3197 		}
3198 	}
3199 
3200 	kmem_free(out_data, SD_PROFILE_HEADER_LEN);
3201 	kmem_free(rqbuf, SENSE_LENGTH);
3202 
3203 	/*
3204 	 * Determine if this is a RRD type device.
3205 	 */
3206 	mutex_exit(SD_MUTEX(un));
3207 	buf = kmem_zalloc(BUFLEN_MODE_CDROM_CAP, KM_SLEEP);
3208 	status = sd_send_scsi_MODE_SENSE(un, CDB_GROUP1, (uchar_t *)buf,
3209 	    BUFLEN_MODE_CDROM_CAP, MODEPAGE_CDROM_CAP, SD_PATH_DIRECT);
3210 	mutex_enter(SD_MUTEX(un));
3211 	if (status != 0) {
3212 		/* command failed; just return */
3213 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3214 		return;
3215 	}
3216 
3217 	/* Get to the page data */
3218 	sense_mhp = (struct mode_header_grp2 *)buf;
3219 	bd_len = (sense_mhp->bdesc_length_hi << 8) | sense_mhp->bdesc_length_lo;
3220 	if (bd_len > MODE_BLK_DESC_LENGTH) {
3221 		/*
3222 		 * We did not get back the expected block descriptor length so
3223 		 * we cannot check the mode page.
3224 		 */
3225 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3226 		    "sd_check_for_writable_cd: Mode Sense returned "
3227 		    "invalid block descriptor length\n");
3228 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3229 		return;
3230 	}
3231 
3232 	/*
3233 	 * If the device presents DVD or CD capabilities in the mode
3234 	 * page, we can return here since a RRD device will not have
3235 	 * these capabilities.
3236 	 */
3237 	sense_page = (uchar_t *)(buf + MODE_HEADER_LENGTH_GRP2 + bd_len);
3238 	if ((sense_page[2] & 0x3f) || (sense_page[3] & 0x3f)) {
3239 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3240 		return;
3241 	}
3242 	kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3243 
3244 	/*
3245 	 * If un->un_f_mmc_writable_media is still FALSE,
3246 	 * check for RRD type media.  A RRD device is identified
3247 	 * by the features RANDOM_WRITABLE and HARDWARE_DEFECT_MANAGEMENT.
3248 	 */
3249 	mutex_exit(SD_MUTEX(un));
3250 	out_data_rw = kmem_zalloc(SD_CURRENT_FEATURE_LEN, KM_SLEEP);
3251 	rqbuf_rw = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3252 
3253 	rtn = sd_send_scsi_feature_GET_CONFIGURATION(un, &com, rqbuf_rw,
3254 	    SENSE_LENGTH, out_data_rw, SD_CURRENT_FEATURE_LEN,
3255 	    RANDOM_WRITABLE);
3256 	if (rtn != 0) {
3257 		kmem_free(out_data_rw, SD_CURRENT_FEATURE_LEN);
3258 		kmem_free(rqbuf_rw, SENSE_LENGTH);
3259 		mutex_enter(SD_MUTEX(un));
3260 		return;
3261 	}
3262 
3263 	out_data_hd = kmem_zalloc(SD_CURRENT_FEATURE_LEN, KM_SLEEP);
3264 	rqbuf_hd = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3265 
3266 	rtn = sd_send_scsi_feature_GET_CONFIGURATION(un, &com, rqbuf_hd,
3267 	    SENSE_LENGTH, out_data_hd, SD_CURRENT_FEATURE_LEN,
3268 	    HARDWARE_DEFECT_MANAGEMENT);
3269 	mutex_enter(SD_MUTEX(un));
3270 	if (rtn == 0) {
3271 		/*
3272 		 * We have good information, check for random writable
3273 		 * and hardware defect features as current.
3274 		 */
3275 		if ((out_data_rw[9] & RANDOM_WRITABLE) &&
3276 		    (out_data_rw[10] & 0x1) &&
3277 		    (out_data_hd[9] & HARDWARE_DEFECT_MANAGEMENT) &&
3278 		    (out_data_hd[10] & 0x1)) {
3279 			un->un_f_mmc_writable_media = TRUE;
3280 		}
3281 	}
3282 
3283 	kmem_free(out_data_rw, SD_CURRENT_FEATURE_LEN);
3284 	kmem_free(rqbuf_rw, SENSE_LENGTH);
3285 	kmem_free(out_data_hd, SD_CURRENT_FEATURE_LEN);
3286 	kmem_free(rqbuf_hd, SENSE_LENGTH);
3287 }
3288 
3289 /*
3290  *    Function: sd_read_unit_properties
3291  *
3292  * Description: The following implements a property lookup mechanism.
3293  *		Properties for particular disks (keyed on vendor, model
3294  *		and rev numbers) are sought in the sd.conf file via
3295  *		sd_process_sdconf_file(), and if not found there, are
3296  *		looked for in a list hardcoded in this driver via
3297  *		sd_process_sdconf_table() Once located the properties
3298  *		are used to update the driver unit structure.
3299  *
3300  *   Arguments: un - driver soft state (unit) structure
3301  */
3302 
3303 static void
3304 sd_read_unit_properties(struct sd_lun *un)
3305 {
3306 	/*
3307 	 * sd_process_sdconf_file returns SD_FAILURE if it cannot find
3308 	 * the "sd-config-list" property (from the sd.conf file) or if
3309 	 * there was not a match for the inquiry vid/pid. If this event
3310 	 * occurs the static driver configuration table is searched for
3311 	 * a match.
3312 	 */
3313 	ASSERT(un != NULL);
3314 	if (sd_process_sdconf_file(un) == SD_FAILURE) {
3315 		sd_process_sdconf_table(un);
3316 	}
3317 
3318 	/* check for LSI device */
3319 	sd_is_lsi(un);
3320 
3321 
3322 }
3323 
3324 
3325 /*
3326  *    Function: sd_process_sdconf_file
3327  *
3328  * Description: Use ddi_getlongprop to obtain the properties from the
3329  *		driver's config file (ie, sd.conf) and update the driver
3330  *		soft state structure accordingly.
3331  *
3332  *   Arguments: un - driver soft state (unit) structure
3333  *
3334  * Return Code: SD_SUCCESS - The properties were successfully set according
3335  *			     to the driver configuration file.
3336  *		SD_FAILURE - The driver config list was not obtained or
3337  *			     there was no vid/pid match. This indicates that
3338  *			     the static config table should be used.
3339  *
3340  * The config file has a property, "sd-config-list", which consists of
3341  * one or more duplets as follows:
3342  *
3343  *  sd-config-list=
3344  *	<duplet>,
3345  *	[<duplet>,]
3346  *	[<duplet>];
3347  *
3348  * The structure of each duplet is as follows:
3349  *
3350  *  <duplet>:= <vid+pid>,<data-property-name_list>
3351  *
3352  * The first entry of the duplet is the device ID string (the concatenated
3353  * vid & pid; not to be confused with a device_id).  This is defined in
3354  * the same way as in the sd_disk_table.
3355  *
3356  * The second part of the duplet is a string that identifies a
3357  * data-property-name-list. The data-property-name-list is defined as
3358  * follows:
3359  *
3360  *  <data-property-name-list>:=<data-property-name> [<data-property-name>]
3361  *
3362  * The syntax of <data-property-name> depends on the <version> field.
3363  *
3364  * If version = SD_CONF_VERSION_1 we have the following syntax:
3365  *
3366  * 	<data-property-name>:=<version>,<flags>,<prop0>,<prop1>,.....<propN>
3367  *
3368  * where the prop0 value will be used to set prop0 if bit0 set in the
3369  * flags, prop1 if bit1 set, etc. and N = SD_CONF_MAX_ITEMS -1
3370  *
3371  */
3372 
3373 static int
3374 sd_process_sdconf_file(struct sd_lun *un)
3375 {
3376 	char	*config_list = NULL;
3377 	int	config_list_len;
3378 	int	len;
3379 	int	dupletlen = 0;
3380 	char	*vidptr;
3381 	int	vidlen;
3382 	char	*dnlist_ptr;
3383 	char	*dataname_ptr;
3384 	int	dnlist_len;
3385 	int	dataname_len;
3386 	int	*data_list;
3387 	int	data_list_len;
3388 	int	rval = SD_FAILURE;
3389 	int	i;
3390 
3391 	ASSERT(un != NULL);
3392 
3393 	/* Obtain the configuration list associated with the .conf file */
3394 	if (ddi_getlongprop(DDI_DEV_T_ANY, SD_DEVINFO(un), DDI_PROP_DONTPASS,
3395 	    sd_config_list, (caddr_t)&config_list, &config_list_len)
3396 	    != DDI_PROP_SUCCESS) {
3397 		return (SD_FAILURE);
3398 	}
3399 
3400 	/*
3401 	 * Compare vids in each duplet to the inquiry vid - if a match is
3402 	 * made, get the data value and update the soft state structure
3403 	 * accordingly.
3404 	 *
3405 	 * Note: This algorithm is complex and difficult to maintain. It should
3406 	 * be replaced with a more robust implementation.
3407 	 */
3408 	for (len = config_list_len, vidptr = config_list; len > 0;
3409 	    vidptr += dupletlen, len -= dupletlen) {
3410 		/*
3411 		 * Note: The assumption here is that each vid entry is on
3412 		 * a unique line from its associated duplet.
3413 		 */
3414 		vidlen = dupletlen = (int)strlen(vidptr);
3415 		if ((vidlen == 0) ||
3416 		    (sd_sdconf_id_match(un, vidptr, vidlen) != SD_SUCCESS)) {
3417 			dupletlen++;
3418 			continue;
3419 		}
3420 
3421 		/*
3422 		 * dnlist contains 1 or more blank separated
3423 		 * data-property-name entries
3424 		 */
3425 		dnlist_ptr = vidptr + vidlen + 1;
3426 		dnlist_len = (int)strlen(dnlist_ptr);
3427 		dupletlen += dnlist_len + 2;
3428 
3429 		/*
3430 		 * Set a pointer for the first data-property-name
3431 		 * entry in the list
3432 		 */
3433 		dataname_ptr = dnlist_ptr;
3434 		dataname_len = 0;
3435 
3436 		/*
3437 		 * Loop through all data-property-name entries in the
3438 		 * data-property-name-list setting the properties for each.
3439 		 */
3440 		while (dataname_len < dnlist_len) {
3441 			int version;
3442 
3443 			/*
3444 			 * Determine the length of the current
3445 			 * data-property-name entry by indexing until a
3446 			 * blank or NULL is encountered. When the space is
3447 			 * encountered reset it to a NULL for compliance
3448 			 * with ddi_getlongprop().
3449 			 */
3450 			for (i = 0; ((dataname_ptr[i] != ' ') &&
3451 			    (dataname_ptr[i] != '\0')); i++) {
3452 				;
3453 			}
3454 
3455 			dataname_len += i;
3456 			/* If not null terminated, Make it so */
3457 			if (dataname_ptr[i] == ' ') {
3458 				dataname_ptr[i] = '\0';
3459 			}
3460 			dataname_len++;
3461 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3462 			    "sd_process_sdconf_file: disk:%s, data:%s\n",
3463 			    vidptr, dataname_ptr);
3464 
3465 			/* Get the data list */
3466 			if (ddi_getlongprop(DDI_DEV_T_ANY, SD_DEVINFO(un), 0,
3467 			    dataname_ptr, (caddr_t)&data_list, &data_list_len)
3468 			    != DDI_PROP_SUCCESS) {
3469 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
3470 				    "sd_process_sdconf_file: data property (%s)"
3471 				    " has no value\n", dataname_ptr);
3472 				dataname_ptr = dnlist_ptr + dataname_len;
3473 				continue;
3474 			}
3475 
3476 			version = data_list[0];
3477 
3478 			if (version == SD_CONF_VERSION_1) {
3479 				sd_tunables values;
3480 
3481 				/* Set the properties */
3482 				if (sd_chk_vers1_data(un, data_list[1],
3483 				    &data_list[2], data_list_len, dataname_ptr)
3484 				    == SD_SUCCESS) {
3485 					sd_get_tunables_from_conf(un,
3486 					    data_list[1], &data_list[2],
3487 					    &values);
3488 					sd_set_vers1_properties(un,
3489 					    data_list[1], &values);
3490 					rval = SD_SUCCESS;
3491 				} else {
3492 					rval = SD_FAILURE;
3493 				}
3494 			} else {
3495 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3496 				    "data property %s version 0x%x is invalid.",
3497 				    dataname_ptr, version);
3498 				rval = SD_FAILURE;
3499 			}
3500 			kmem_free(data_list, data_list_len);
3501 			dataname_ptr = dnlist_ptr + dataname_len;
3502 		}
3503 	}
3504 
3505 	/* free up the memory allocated by ddi_getlongprop */
3506 	if (config_list) {
3507 		kmem_free(config_list, config_list_len);
3508 	}
3509 
3510 	return (rval);
3511 }
3512 
3513 /*
3514  *    Function: sd_get_tunables_from_conf()
3515  *
3516  *
3517  *    This function reads the data list from the sd.conf file and pulls
3518  *    the values that can have numeric values as arguments and places
3519  *    the values in the apropriate sd_tunables member.
3520  *    Since the order of the data list members varies across platforms
3521  *    This function reads them from the data list in a platform specific
3522  *    order and places them into the correct sd_tunable member that is
3523  *    a consistant across all platforms.
3524  */
3525 static void
3526 sd_get_tunables_from_conf(struct sd_lun *un, int flags, int *data_list,
3527     sd_tunables *values)
3528 {
3529 	int i;
3530 	int mask;
3531 
3532 	bzero(values, sizeof (sd_tunables));
3533 
3534 	for (i = 0; i < SD_CONF_MAX_ITEMS; i++) {
3535 
3536 		mask = 1 << i;
3537 		if (mask > flags) {
3538 			break;
3539 		}
3540 
3541 		switch (mask & flags) {
3542 		case 0:	/* This mask bit not set in flags */
3543 			continue;
3544 		case SD_CONF_BSET_THROTTLE:
3545 			values->sdt_throttle = data_list[i];
3546 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3547 			    "sd_get_tunables_from_conf: throttle = %d\n",
3548 			    values->sdt_throttle);
3549 			break;
3550 		case SD_CONF_BSET_CTYPE:
3551 			values->sdt_ctype = data_list[i];
3552 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3553 			    "sd_get_tunables_from_conf: ctype = %d\n",
3554 			    values->sdt_ctype);
3555 			break;
3556 		case SD_CONF_BSET_NRR_COUNT:
3557 			values->sdt_not_rdy_retries = data_list[i];
3558 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3559 			    "sd_get_tunables_from_conf: not_rdy_retries = %d\n",
3560 			    values->sdt_not_rdy_retries);
3561 			break;
3562 		case SD_CONF_BSET_BSY_RETRY_COUNT:
3563 			values->sdt_busy_retries = data_list[i];
3564 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3565 			    "sd_get_tunables_from_conf: busy_retries = %d\n",
3566 			    values->sdt_busy_retries);
3567 			break;
3568 		case SD_CONF_BSET_RST_RETRIES:
3569 			values->sdt_reset_retries = data_list[i];
3570 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3571 			    "sd_get_tunables_from_conf: reset_retries = %d\n",
3572 			    values->sdt_reset_retries);
3573 			break;
3574 		case SD_CONF_BSET_RSV_REL_TIME:
3575 			values->sdt_reserv_rel_time = data_list[i];
3576 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3577 			    "sd_get_tunables_from_conf: reserv_rel_time = %d\n",
3578 			    values->sdt_reserv_rel_time);
3579 			break;
3580 		case SD_CONF_BSET_MIN_THROTTLE:
3581 			values->sdt_min_throttle = data_list[i];
3582 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3583 			    "sd_get_tunables_from_conf: min_throttle = %d\n",
3584 			    values->sdt_min_throttle);
3585 			break;
3586 		case SD_CONF_BSET_DISKSORT_DISABLED:
3587 			values->sdt_disk_sort_dis = data_list[i];
3588 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3589 			    "sd_get_tunables_from_conf: disk_sort_dis = %d\n",
3590 			    values->sdt_disk_sort_dis);
3591 			break;
3592 		case SD_CONF_BSET_LUN_RESET_ENABLED:
3593 			values->sdt_lun_reset_enable = data_list[i];
3594 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3595 			    "sd_get_tunables_from_conf: lun_reset_enable = %d"
3596 			    "\n", values->sdt_lun_reset_enable);
3597 			break;
3598 		}
3599 	}
3600 }
3601 
3602 /*
3603  *    Function: sd_process_sdconf_table
3604  *
3605  * Description: Search the static configuration table for a match on the
3606  *		inquiry vid/pid and update the driver soft state structure
3607  *		according to the table property values for the device.
3608  *
3609  *		The form of a configuration table entry is:
3610  *		  <vid+pid>,<flags>,<property-data>
3611  *		  "SEAGATE ST42400N",1,63,0,0			(Fibre)
3612  *		  "SEAGATE ST42400N",1,63,0,0,0,0		(Sparc)
3613  *		  "SEAGATE ST42400N",1,63,0,0,0,0,0,0,0,0,0,0	(Intel)
3614  *
3615  *   Arguments: un - driver soft state (unit) structure
3616  */
3617 
3618 static void
3619 sd_process_sdconf_table(struct sd_lun *un)
3620 {
3621 	char	*id = NULL;
3622 	int	table_index;
3623 	int	idlen;
3624 
3625 	ASSERT(un != NULL);
3626 	for (table_index = 0; table_index < sd_disk_table_size;
3627 	    table_index++) {
3628 		id = sd_disk_table[table_index].device_id;
3629 		idlen = strlen(id);
3630 		if (idlen == 0) {
3631 			continue;
3632 		}
3633 
3634 		/*
3635 		 * The static configuration table currently does not
3636 		 * implement version 10 properties. Additionally,
3637 		 * multiple data-property-name entries are not
3638 		 * implemented in the static configuration table.
3639 		 */
3640 		if (sd_sdconf_id_match(un, id, idlen) == SD_SUCCESS) {
3641 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3642 			    "sd_process_sdconf_table: disk %s\n", id);
3643 			sd_set_vers1_properties(un,
3644 			    sd_disk_table[table_index].flags,
3645 			    sd_disk_table[table_index].properties);
3646 			break;
3647 		}
3648 	}
3649 }
3650 
3651 
3652 /*
3653  *    Function: sd_sdconf_id_match
3654  *
3655  * Description: This local function implements a case sensitive vid/pid
3656  *		comparison as well as the boundary cases of wild card and
3657  *		multiple blanks.
3658  *
3659  *		Note: An implicit assumption made here is that the scsi
3660  *		inquiry structure will always keep the vid, pid and
3661  *		revision strings in consecutive sequence, so they can be
3662  *		read as a single string. If this assumption is not the
3663  *		case, a separate string, to be used for the check, needs
3664  *		to be built with these strings concatenated.
3665  *
3666  *   Arguments: un - driver soft state (unit) structure
3667  *		id - table or config file vid/pid
3668  *		idlen  - length of the vid/pid (bytes)
3669  *
3670  * Return Code: SD_SUCCESS - Indicates a match with the inquiry vid/pid
3671  *		SD_FAILURE - Indicates no match with the inquiry vid/pid
3672  */
3673 
3674 static int
3675 sd_sdconf_id_match(struct sd_lun *un, char *id, int idlen)
3676 {
3677 	struct scsi_inquiry	*sd_inq;
3678 	int 			rval = SD_SUCCESS;
3679 
3680 	ASSERT(un != NULL);
3681 	sd_inq = un->un_sd->sd_inq;
3682 	ASSERT(id != NULL);
3683 
3684 	/*
3685 	 * We use the inq_vid as a pointer to a buffer containing the
3686 	 * vid and pid and use the entire vid/pid length of the table
3687 	 * entry for the comparison. This works because the inq_pid
3688 	 * data member follows inq_vid in the scsi_inquiry structure.
3689 	 */
3690 	if (strncasecmp(sd_inq->inq_vid, id, idlen) != 0) {
3691 		/*
3692 		 * The user id string is compared to the inquiry vid/pid
3693 		 * using a case insensitive comparison and ignoring
3694 		 * multiple spaces.
3695 		 */
3696 		rval = sd_blank_cmp(un, id, idlen);
3697 		if (rval != SD_SUCCESS) {
3698 			/*
3699 			 * User id strings that start and end with a "*"
3700 			 * are a special case. These do not have a
3701 			 * specific vendor, and the product string can
3702 			 * appear anywhere in the 16 byte PID portion of
3703 			 * the inquiry data. This is a simple strstr()
3704 			 * type search for the user id in the inquiry data.
3705 			 */
3706 			if ((id[0] == '*') && (id[idlen - 1] == '*')) {
3707 				char	*pidptr = &id[1];
3708 				int	i;
3709 				int	j;
3710 				int	pidstrlen = idlen - 2;
3711 				j = sizeof (SD_INQUIRY(un)->inq_pid) -
3712 				    pidstrlen;
3713 
3714 				if (j < 0) {
3715 					return (SD_FAILURE);
3716 				}
3717 				for (i = 0; i < j; i++) {
3718 					if (bcmp(&SD_INQUIRY(un)->inq_pid[i],
3719 					    pidptr, pidstrlen) == 0) {
3720 						rval = SD_SUCCESS;
3721 						break;
3722 					}
3723 				}
3724 			}
3725 		}
3726 	}
3727 	return (rval);
3728 }
3729 
3730 
3731 /*
3732  *    Function: sd_blank_cmp
3733  *
3734  * Description: If the id string starts and ends with a space, treat
3735  *		multiple consecutive spaces as equivalent to a single
3736  *		space. For example, this causes a sd_disk_table entry
3737  *		of " NEC CDROM " to match a device's id string of
3738  *		"NEC       CDROM".
3739  *
3740  *		Note: The success exit condition for this routine is if
3741  *		the pointer to the table entry is '\0' and the cnt of
3742  *		the inquiry length is zero. This will happen if the inquiry
3743  *		string returned by the device is padded with spaces to be
3744  *		exactly 24 bytes in length (8 byte vid + 16 byte pid). The
3745  *		SCSI spec states that the inquiry string is to be padded with
3746  *		spaces.
3747  *
3748  *   Arguments: un - driver soft state (unit) structure
3749  *		id - table or config file vid/pid
3750  *		idlen  - length of the vid/pid (bytes)
3751  *
3752  * Return Code: SD_SUCCESS - Indicates a match with the inquiry vid/pid
3753  *		SD_FAILURE - Indicates no match with the inquiry vid/pid
3754  */
3755 
3756 static int
3757 sd_blank_cmp(struct sd_lun *un, char *id, int idlen)
3758 {
3759 	char		*p1;
3760 	char		*p2;
3761 	int		cnt;
3762 	cnt = sizeof (SD_INQUIRY(un)->inq_vid) +
3763 	    sizeof (SD_INQUIRY(un)->inq_pid);
3764 
3765 	ASSERT(un != NULL);
3766 	p2 = un->un_sd->sd_inq->inq_vid;
3767 	ASSERT(id != NULL);
3768 	p1 = id;
3769 
3770 	if ((id[0] == ' ') && (id[idlen - 1] == ' ')) {
3771 		/*
3772 		 * Note: string p1 is terminated by a NUL but string p2
3773 		 * isn't.  The end of p2 is determined by cnt.
3774 		 */
3775 		for (;;) {
3776 			/* skip over any extra blanks in both strings */
3777 			while ((*p1 != '\0') && (*p1 == ' ')) {
3778 				p1++;
3779 			}
3780 			while ((cnt != 0) && (*p2 == ' ')) {
3781 				p2++;
3782 				cnt--;
3783 			}
3784 
3785 			/* compare the two strings */
3786 			if ((cnt == 0) ||
3787 			    (SD_TOUPPER(*p1) != SD_TOUPPER(*p2))) {
3788 				break;
3789 			}
3790 			while ((cnt > 0) &&
3791 			    (SD_TOUPPER(*p1) == SD_TOUPPER(*p2))) {
3792 				p1++;
3793 				p2++;
3794 				cnt--;
3795 			}
3796 		}
3797 	}
3798 
3799 	/* return SD_SUCCESS if both strings match */
3800 	return (((*p1 == '\0') && (cnt == 0)) ? SD_SUCCESS : SD_FAILURE);
3801 }
3802 
3803 
3804 /*
3805  *    Function: sd_chk_vers1_data
3806  *
3807  * Description: Verify the version 1 device properties provided by the
3808  *		user via the configuration file
3809  *
3810  *   Arguments: un	     - driver soft state (unit) structure
3811  *		flags	     - integer mask indicating properties to be set
3812  *		prop_list    - integer list of property values
3813  *		list_len     - length of user provided data
3814  *
3815  * Return Code: SD_SUCCESS - Indicates the user provided data is valid
3816  *		SD_FAILURE - Indicates the user provided data is invalid
3817  */
3818 
3819 static int
3820 sd_chk_vers1_data(struct sd_lun *un, int flags, int *prop_list,
3821     int list_len, char *dataname_ptr)
3822 {
3823 	int i;
3824 	int mask = 1;
3825 	int index = 0;
3826 
3827 	ASSERT(un != NULL);
3828 
3829 	/* Check for a NULL property name and list */
3830 	if (dataname_ptr == NULL) {
3831 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3832 		    "sd_chk_vers1_data: NULL data property name.");
3833 		return (SD_FAILURE);
3834 	}
3835 	if (prop_list == NULL) {
3836 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3837 		    "sd_chk_vers1_data: %s NULL data property list.",
3838 		    dataname_ptr);
3839 		return (SD_FAILURE);
3840 	}
3841 
3842 	/* Display a warning if undefined bits are set in the flags */
3843 	if (flags & ~SD_CONF_BIT_MASK) {
3844 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3845 		    "sd_chk_vers1_data: invalid bits 0x%x in data list %s. "
3846 		    "Properties not set.",
3847 		    (flags & ~SD_CONF_BIT_MASK), dataname_ptr);
3848 		return (SD_FAILURE);
3849 	}
3850 
3851 	/*
3852 	 * Verify the length of the list by identifying the highest bit set
3853 	 * in the flags and validating that the property list has a length
3854 	 * up to the index of this bit.
3855 	 */
3856 	for (i = 0; i < SD_CONF_MAX_ITEMS; i++) {
3857 		if (flags & mask) {
3858 			index++;
3859 		}
3860 		mask = 1 << i;
3861 	}
3862 	if ((list_len / sizeof (int)) < (index + 2)) {
3863 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3864 		    "sd_chk_vers1_data: "
3865 		    "Data property list %s size is incorrect. "
3866 		    "Properties not set.", dataname_ptr);
3867 		scsi_log(SD_DEVINFO(un), sd_label, CE_CONT, "Size expected: "
3868 		    "version + 1 flagword + %d properties", SD_CONF_MAX_ITEMS);
3869 		return (SD_FAILURE);
3870 	}
3871 	return (SD_SUCCESS);
3872 }
3873 
3874 
3875 /*
3876  *    Function: sd_set_vers1_properties
3877  *
3878  * Description: Set version 1 device properties based on a property list
3879  *		retrieved from the driver configuration file or static
3880  *		configuration table. Version 1 properties have the format:
3881  *
3882  * 	<data-property-name>:=<version>,<flags>,<prop0>,<prop1>,.....<propN>
3883  *
3884  *		where the prop0 value will be used to set prop0 if bit0
3885  *		is set in the flags
3886  *
3887  *   Arguments: un	     - driver soft state (unit) structure
3888  *		flags	     - integer mask indicating properties to be set
3889  *		prop_list    - integer list of property values
3890  */
3891 
3892 static void
3893 sd_set_vers1_properties(struct sd_lun *un, int flags, sd_tunables *prop_list)
3894 {
3895 	ASSERT(un != NULL);
3896 
3897 	/*
3898 	 * Set the flag to indicate cache is to be disabled. An attempt
3899 	 * to disable the cache via sd_cache_control() will be made
3900 	 * later during attach once the basic initialization is complete.
3901 	 */
3902 	if (flags & SD_CONF_BSET_NOCACHE) {
3903 		un->un_f_opt_disable_cache = TRUE;
3904 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3905 		    "sd_set_vers1_properties: caching disabled flag set\n");
3906 	}
3907 
3908 	/* CD-specific configuration parameters */
3909 	if (flags & SD_CONF_BSET_PLAYMSF_BCD) {
3910 		un->un_f_cfg_playmsf_bcd = TRUE;
3911 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3912 		    "sd_set_vers1_properties: playmsf_bcd set\n");
3913 	}
3914 	if (flags & SD_CONF_BSET_READSUB_BCD) {
3915 		un->un_f_cfg_readsub_bcd = TRUE;
3916 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3917 		    "sd_set_vers1_properties: readsub_bcd set\n");
3918 	}
3919 	if (flags & SD_CONF_BSET_READ_TOC_TRK_BCD) {
3920 		un->un_f_cfg_read_toc_trk_bcd = TRUE;
3921 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3922 		    "sd_set_vers1_properties: read_toc_trk_bcd set\n");
3923 	}
3924 	if (flags & SD_CONF_BSET_READ_TOC_ADDR_BCD) {
3925 		un->un_f_cfg_read_toc_addr_bcd = TRUE;
3926 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3927 		    "sd_set_vers1_properties: read_toc_addr_bcd set\n");
3928 	}
3929 	if (flags & SD_CONF_BSET_NO_READ_HEADER) {
3930 		un->un_f_cfg_no_read_header = TRUE;
3931 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3932 			    "sd_set_vers1_properties: no_read_header set\n");
3933 	}
3934 	if (flags & SD_CONF_BSET_READ_CD_XD4) {
3935 		un->un_f_cfg_read_cd_xd4 = TRUE;
3936 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3937 		    "sd_set_vers1_properties: read_cd_xd4 set\n");
3938 	}
3939 
3940 	/* Support for devices which do not have valid/unique serial numbers */
3941 	if (flags & SD_CONF_BSET_FAB_DEVID) {
3942 		un->un_f_opt_fab_devid = TRUE;
3943 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3944 		    "sd_set_vers1_properties: fab_devid bit set\n");
3945 	}
3946 
3947 	/* Support for user throttle configuration */
3948 	if (flags & SD_CONF_BSET_THROTTLE) {
3949 		ASSERT(prop_list != NULL);
3950 		un->un_saved_throttle = un->un_throttle =
3951 		    prop_list->sdt_throttle;
3952 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3953 		    "sd_set_vers1_properties: throttle set to %d\n",
3954 		    prop_list->sdt_throttle);
3955 	}
3956 
3957 	/* Set the per disk retry count according to the conf file or table. */
3958 	if (flags & SD_CONF_BSET_NRR_COUNT) {
3959 		ASSERT(prop_list != NULL);
3960 		if (prop_list->sdt_not_rdy_retries) {
3961 			un->un_notready_retry_count =
3962 				prop_list->sdt_not_rdy_retries;
3963 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3964 			    "sd_set_vers1_properties: not ready retry count"
3965 			    " set to %d\n", un->un_notready_retry_count);
3966 		}
3967 	}
3968 
3969 	/* The controller type is reported for generic disk driver ioctls */
3970 	if (flags & SD_CONF_BSET_CTYPE) {
3971 		ASSERT(prop_list != NULL);
3972 		switch (prop_list->sdt_ctype) {
3973 		case CTYPE_CDROM:
3974 			un->un_ctype = prop_list->sdt_ctype;
3975 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3976 			    "sd_set_vers1_properties: ctype set to "
3977 			    "CTYPE_CDROM\n");
3978 			break;
3979 		case CTYPE_CCS:
3980 			un->un_ctype = prop_list->sdt_ctype;
3981 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3982 				"sd_set_vers1_properties: ctype set to "
3983 				"CTYPE_CCS\n");
3984 			break;
3985 		case CTYPE_ROD:		/* RW optical */
3986 			un->un_ctype = prop_list->sdt_ctype;
3987 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3988 			    "sd_set_vers1_properties: ctype set to "
3989 			    "CTYPE_ROD\n");
3990 			break;
3991 		default:
3992 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3993 			    "sd_set_vers1_properties: Could not set "
3994 			    "invalid ctype value (%d)",
3995 			    prop_list->sdt_ctype);
3996 		}
3997 	}
3998 
3999 	/* Purple failover timeout */
4000 	if (flags & SD_CONF_BSET_BSY_RETRY_COUNT) {
4001 		ASSERT(prop_list != NULL);
4002 		un->un_busy_retry_count =
4003 			prop_list->sdt_busy_retries;
4004 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4005 		    "sd_set_vers1_properties: "
4006 		    "busy retry count set to %d\n",
4007 		    un->un_busy_retry_count);
4008 	}
4009 
4010 	/* Purple reset retry count */
4011 	if (flags & SD_CONF_BSET_RST_RETRIES) {
4012 		ASSERT(prop_list != NULL);
4013 		un->un_reset_retry_count =
4014 			prop_list->sdt_reset_retries;
4015 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4016 		    "sd_set_vers1_properties: "
4017 		    "reset retry count set to %d\n",
4018 		    un->un_reset_retry_count);
4019 	}
4020 
4021 	/* Purple reservation release timeout */
4022 	if (flags & SD_CONF_BSET_RSV_REL_TIME) {
4023 		ASSERT(prop_list != NULL);
4024 		un->un_reserve_release_time =
4025 			prop_list->sdt_reserv_rel_time;
4026 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4027 		    "sd_set_vers1_properties: "
4028 		    "reservation release timeout set to %d\n",
4029 		    un->un_reserve_release_time);
4030 	}
4031 
4032 	/*
4033 	 * Driver flag telling the driver to verify that no commands are pending
4034 	 * for a device before issuing a Test Unit Ready. This is a workaround
4035 	 * for a firmware bug in some Seagate eliteI drives.
4036 	 */
4037 	if (flags & SD_CONF_BSET_TUR_CHECK) {
4038 		un->un_f_cfg_tur_check = TRUE;
4039 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4040 		    "sd_set_vers1_properties: tur queue check set\n");
4041 	}
4042 
4043 	if (flags & SD_CONF_BSET_MIN_THROTTLE) {
4044 		un->un_min_throttle = prop_list->sdt_min_throttle;
4045 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4046 		    "sd_set_vers1_properties: min throttle set to %d\n",
4047 		    un->un_min_throttle);
4048 	}
4049 
4050 	if (flags & SD_CONF_BSET_DISKSORT_DISABLED) {
4051 		un->un_f_disksort_disabled =
4052 		    (prop_list->sdt_disk_sort_dis != 0) ?
4053 		    TRUE : FALSE;
4054 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4055 		    "sd_set_vers1_properties: disksort disabled "
4056 		    "flag set to %d\n",
4057 		    prop_list->sdt_disk_sort_dis);
4058 	}
4059 
4060 	if (flags & SD_CONF_BSET_LUN_RESET_ENABLED) {
4061 		un->un_f_lun_reset_enabled =
4062 		    (prop_list->sdt_lun_reset_enable != 0) ?
4063 		    TRUE : FALSE;
4064 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4065 		    "sd_set_vers1_properties: lun reset enabled "
4066 		    "flag set to %d\n",
4067 		    prop_list->sdt_lun_reset_enable);
4068 	}
4069 
4070 	/*
4071 	 * Validate the throttle values.
4072 	 * If any of the numbers are invalid, set everything to defaults.
4073 	 */
4074 	if ((un->un_throttle < SD_LOWEST_VALID_THROTTLE) ||
4075 	    (un->un_min_throttle < SD_LOWEST_VALID_THROTTLE) ||
4076 	    (un->un_min_throttle > un->un_throttle)) {
4077 		un->un_saved_throttle = un->un_throttle = sd_max_throttle;
4078 		un->un_min_throttle = sd_min_throttle;
4079 	}
4080 }
4081 
4082 /*
4083  *   Function: sd_is_lsi()
4084  *
4085  *   Description: Check for lsi devices, step throught the static device
4086  *	table to match vid/pid.
4087  *
4088  *   Args: un - ptr to sd_lun
4089  *
4090  *   Notes:  When creating new LSI property, need to add the new LSI property
4091  *		to this function.
4092  */
4093 static void
4094 sd_is_lsi(struct sd_lun *un)
4095 {
4096 	char	*id = NULL;
4097 	int	table_index;
4098 	int	idlen;
4099 	void	*prop;
4100 
4101 	ASSERT(un != NULL);
4102 	for (table_index = 0; table_index < sd_disk_table_size;
4103 	    table_index++) {
4104 		id = sd_disk_table[table_index].device_id;
4105 		idlen = strlen(id);
4106 		if (idlen == 0) {
4107 			continue;
4108 		}
4109 
4110 		if (sd_sdconf_id_match(un, id, idlen) == SD_SUCCESS) {
4111 			prop = sd_disk_table[table_index].properties;
4112 			if (prop == &lsi_properties ||
4113 			    prop == &lsi_oem_properties ||
4114 			    prop == &lsi_properties_scsi ||
4115 			    prop == &symbios_properties) {
4116 				un->un_f_cfg_is_lsi = TRUE;
4117 			}
4118 			break;
4119 		}
4120 	}
4121 }
4122 
4123 
4124 /*
4125  * The following routines support reading and interpretation of disk labels,
4126  * including Solaris BE (8-slice) vtoc's, Solaris LE (16-slice) vtoc's, and
4127  * fdisk tables.
4128  */
4129 
4130 /*
4131  *    Function: sd_validate_geometry
4132  *
4133  * Description: Read the label from the disk (if present). Update the unit's
4134  *		geometry and vtoc information from the data in the label.
4135  *		Verify that the label is valid.
4136  *
4137  *   Arguments: un - driver soft state (unit) structure
4138  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
4139  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
4140  *			to use the USCSI "direct" chain and bypass the normal
4141  *			command waitq.
4142  *
4143  * Return Code: 0 - Successful completion
4144  *		EINVAL  - Invalid value in un->un_tgt_blocksize or
4145  *			  un->un_blockcount; or label on disk is corrupted
4146  *			  or unreadable.
4147  *		EACCES  - Reservation conflict at the device.
4148  *		ENOMEM  - Resource allocation error
4149  *		ENOTSUP - geometry not applicable
4150  *
4151  *     Context: Kernel thread only (can sleep).
4152  */
4153 
4154 static int
4155 sd_validate_geometry(struct sd_lun *un, int path_flag)
4156 {
4157 	static	char		labelstring[128];
4158 	static	char		buf[256];
4159 	char	*label		= NULL;
4160 	int	label_error = 0;
4161 	int	gvalid		= un->un_f_geometry_is_valid;
4162 	int	lbasize;
4163 	uint_t	capacity;
4164 	int	count;
4165 
4166 	ASSERT(un != NULL);
4167 	ASSERT(mutex_owned(SD_MUTEX(un)));
4168 
4169 	/*
4170 	 * If the required values are not valid, then try getting them
4171 	 * once via read capacity. If that fails, then fail this call.
4172 	 * This is necessary with the new mpxio failover behavior in
4173 	 * the T300 where we can get an attach for the inactive path
4174 	 * before the active path. The inactive path fails commands with
4175 	 * sense data of 02,04,88 which happens to the read capacity
4176 	 * before mpxio has had sufficient knowledge to know if it should
4177 	 * force a fail over or not. (Which it won't do at attach anyhow).
4178 	 * If the read capacity at attach time fails, un_tgt_blocksize and
4179 	 * un_blockcount won't be valid.
4180 	 */
4181 	if ((un->un_f_tgt_blocksize_is_valid != TRUE) ||
4182 	    (un->un_f_blockcount_is_valid != TRUE)) {
4183 		uint64_t	cap;
4184 		uint32_t	lbasz;
4185 		int		rval;
4186 
4187 		mutex_exit(SD_MUTEX(un));
4188 		rval = sd_send_scsi_READ_CAPACITY(un, &cap,
4189 		    &lbasz, SD_PATH_DIRECT);
4190 		mutex_enter(SD_MUTEX(un));
4191 		if (rval == 0) {
4192 			/*
4193 			 * The following relies on
4194 			 * sd_send_scsi_READ_CAPACITY never
4195 			 * returning 0 for capacity and/or lbasize.
4196 			 */
4197 			sd_update_block_info(un, lbasz, cap);
4198 		}
4199 
4200 		if ((un->un_f_tgt_blocksize_is_valid != TRUE) ||
4201 		    (un->un_f_blockcount_is_valid != TRUE)) {
4202 			return (EINVAL);
4203 		}
4204 	}
4205 
4206 	/*
4207 	 * Copy the lbasize and capacity so that if they're reset while we're
4208 	 * not holding the SD_MUTEX, we will continue to use valid values
4209 	 * after the SD_MUTEX is reacquired. (4119659)
4210 	 */
4211 	lbasize  = un->un_tgt_blocksize;
4212 	capacity = un->un_blockcount;
4213 
4214 #if defined(_SUNOS_VTOC_16)
4215 	/*
4216 	 * Set up the "whole disk" fdisk partition; this should always
4217 	 * exist, regardless of whether the disk contains an fdisk table
4218 	 * or vtoc.
4219 	 */
4220 	un->un_map[P0_RAW_DISK].dkl_cylno = 0;
4221 	un->un_map[P0_RAW_DISK].dkl_nblk  = capacity;
4222 #endif
4223 
4224 	/*
4225 	 * Refresh the logical and physical geometry caches.
4226 	 * (data from MODE SENSE format/rigid disk geometry pages,
4227 	 * and scsi_ifgetcap("geometry").
4228 	 */
4229 	sd_resync_geom_caches(un, capacity, lbasize, path_flag);
4230 
4231 	label_error = sd_use_efi(un, path_flag);
4232 	if (label_error == 0) {
4233 		/* found a valid EFI label */
4234 		SD_TRACE(SD_LOG_IO_PARTITION, un,
4235 			"sd_validate_geometry: found EFI label\n");
4236 		un->un_solaris_offset = 0;
4237 		un->un_solaris_size = capacity;
4238 		return (ENOTSUP);
4239 	}
4240 	if (un->un_blockcount > DK_MAX_BLOCKS) {
4241 		if (label_error == ESRCH) {
4242 			/*
4243 			 * they've configured a LUN over 1TB, but used
4244 			 * format.dat to restrict format's view of the
4245 			 * capacity to be under 1TB
4246 			 */
4247 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
4248 "is >1TB and has a VTOC label: use format(1M) to either decrease the");
4249 			scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
4250 "size to be < 1TB or relabel the disk with an EFI label");
4251 		} else {
4252 			/* unlabeled disk over 1TB */
4253 #if defined(__i386) || defined(__amd64)
4254 			/*
4255 			 * Refer to comments on off-by-1 at the head of the file
4256 			 * A 1TB disk was treated as (1T - 512)B in the past,
4257 			 * thus, it might have valid solaris partition. We
4258 			 * will return ENOTSUP later only if this disk has no
4259 			 * valid solaris partition.
4260 			 */
4261 			if ((un->un_tgt_blocksize != un->un_sys_blocksize) ||
4262 			    (un->un_blockcount - 1 > DK_MAX_BLOCKS) ||
4263 			    un->un_f_has_removable_media ||
4264 			    un->un_f_is_hotpluggable)
4265 #endif
4266 				return (ENOTSUP);
4267 		}
4268 	}
4269 	label_error = 0;
4270 
4271 	/*
4272 	 * at this point it is either labeled with a VTOC or it is
4273 	 * under 1TB (<= 1TB actually for off-by-1)
4274 	 */
4275 	if (un->un_f_vtoc_label_supported) {
4276 		struct	dk_label *dkl;
4277 		offset_t dkl1;
4278 		offset_t label_addr, real_addr;
4279 		int	rval;
4280 		size_t	buffer_size;
4281 
4282 		/*
4283 		 * Note: This will set up un->un_solaris_size and
4284 		 * un->un_solaris_offset.
4285 		 */
4286 		switch (sd_read_fdisk(un, capacity, lbasize, path_flag)) {
4287 		case SD_CMD_RESERVATION_CONFLICT:
4288 			ASSERT(mutex_owned(SD_MUTEX(un)));
4289 			return (EACCES);
4290 		case SD_CMD_FAILURE:
4291 			ASSERT(mutex_owned(SD_MUTEX(un)));
4292 			return (ENOMEM);
4293 		}
4294 
4295 		if (un->un_solaris_size <= DK_LABEL_LOC) {
4296 
4297 #if defined(__i386) || defined(__amd64)
4298 			/*
4299 			 * Refer to comments on off-by-1 at the head of the file
4300 			 * This is for 1TB disk only. Since that there is no
4301 			 * solaris partitions, return ENOTSUP as we do for
4302 			 * >1TB disk.
4303 			 */
4304 			if (un->un_blockcount > DK_MAX_BLOCKS)
4305 				return (ENOTSUP);
4306 #endif
4307 			/*
4308 			 * Found fdisk table but no Solaris partition entry,
4309 			 * so don't call sd_uselabel() and don't create
4310 			 * a default label.
4311 			 */
4312 			label_error = 0;
4313 			un->un_f_geometry_is_valid = TRUE;
4314 			goto no_solaris_partition;
4315 		}
4316 		label_addr = (daddr_t)(un->un_solaris_offset + DK_LABEL_LOC);
4317 
4318 #if defined(__i386) || defined(__amd64)
4319 		/*
4320 		 * Refer to comments on off-by-1 at the head of the file
4321 		 * Now, this 1TB disk has valid solaris partition. It
4322 		 * must be created by previous sd driver, we have to
4323 		 * treat it as (1T-512)B.
4324 		 */
4325 		if (un->un_blockcount > DK_MAX_BLOCKS) {
4326 			un->un_f_capacity_adjusted = 1;
4327 			un->un_blockcount = DK_MAX_BLOCKS;
4328 			un->un_map[P0_RAW_DISK].dkl_nblk  = DK_MAX_BLOCKS;
4329 
4330 			/*
4331 			 * Refer to sd_read_fdisk, when there is no
4332 			 * fdisk partition table, un_solaris_size is
4333 			 * set to disk's capacity. In this case, we
4334 			 * need to adjust it
4335 			 */
4336 			if (un->un_solaris_size > DK_MAX_BLOCKS)
4337 				un->un_solaris_size = DK_MAX_BLOCKS;
4338 			sd_resync_geom_caches(un, DK_MAX_BLOCKS,
4339 			    lbasize, path_flag);
4340 		}
4341 #endif
4342 
4343 		/*
4344 		 * sys_blocksize != tgt_blocksize, need to re-adjust
4345 		 * blkno and save the index to beginning of dk_label
4346 		 */
4347 		real_addr = SD_SYS2TGTBLOCK(un, label_addr);
4348 		buffer_size = SD_REQBYTES2TGTBYTES(un,
4349 		    sizeof (struct dk_label));
4350 
4351 		SD_TRACE(SD_LOG_IO_PARTITION, un, "sd_validate_geometry: "
4352 		    "label_addr: 0x%x allocation size: 0x%x\n",
4353 		    label_addr, buffer_size);
4354 		dkl = kmem_zalloc(buffer_size, KM_NOSLEEP);
4355 		if (dkl == NULL) {
4356 			return (ENOMEM);
4357 		}
4358 
4359 		mutex_exit(SD_MUTEX(un));
4360 		rval = sd_send_scsi_READ(un, dkl, buffer_size, real_addr,
4361 		    path_flag);
4362 		mutex_enter(SD_MUTEX(un));
4363 
4364 		switch (rval) {
4365 		case 0:
4366 			/*
4367 			 * sd_uselabel will establish that the geometry
4368 			 * is valid.
4369 			 * For sys_blocksize != tgt_blocksize, need
4370 			 * to index into the beginning of dk_label
4371 			 */
4372 			dkl1 = (daddr_t)dkl
4373 				+ SD_TGTBYTEOFFSET(un, label_addr, real_addr);
4374 			if (sd_uselabel(un, (struct dk_label *)(uintptr_t)dkl1,
4375 			    path_flag) != SD_LABEL_IS_VALID) {
4376 				label_error = EINVAL;
4377 			}
4378 			break;
4379 		case EACCES:
4380 			label_error = EACCES;
4381 			break;
4382 		default:
4383 			label_error = EINVAL;
4384 			break;
4385 		}
4386 
4387 		kmem_free(dkl, buffer_size);
4388 
4389 #if defined(_SUNOS_VTOC_8)
4390 		label = (char *)un->un_asciilabel;
4391 #elif defined(_SUNOS_VTOC_16)
4392 		label = (char *)un->un_vtoc.v_asciilabel;
4393 #else
4394 #error "No VTOC format defined."
4395 #endif
4396 	}
4397 
4398 	/*
4399 	 * If a valid label was not found, AND if no reservation conflict
4400 	 * was detected, then go ahead and create a default label (4069506).
4401 	 */
4402 	if (un->un_f_default_vtoc_supported && (label_error != EACCES)) {
4403 		if (un->un_f_geometry_is_valid == FALSE) {
4404 			sd_build_default_label(un);
4405 		}
4406 		label_error = 0;
4407 	}
4408 
4409 no_solaris_partition:
4410 	if ((!un->un_f_has_removable_media ||
4411 	    (un->un_f_has_removable_media &&
4412 		un->un_mediastate == DKIO_EJECTED)) &&
4413 		(un->un_state == SD_STATE_NORMAL && !gvalid)) {
4414 		/*
4415 		 * Print out a message indicating who and what we are.
4416 		 * We do this only when we happen to really validate the
4417 		 * geometry. We may call sd_validate_geometry() at other
4418 		 * times, e.g., ioctl()'s like Get VTOC in which case we
4419 		 * don't want to print the label.
4420 		 * If the geometry is valid, print the label string,
4421 		 * else print vendor and product info, if available
4422 		 */
4423 		if ((un->un_f_geometry_is_valid == TRUE) && (label != NULL)) {
4424 			SD_INFO(SD_LOG_ATTACH_DETACH, un, "?<%s>\n", label);
4425 		} else {
4426 			mutex_enter(&sd_label_mutex);
4427 			sd_inq_fill(SD_INQUIRY(un)->inq_vid, VIDMAX,
4428 			    labelstring);
4429 			sd_inq_fill(SD_INQUIRY(un)->inq_pid, PIDMAX,
4430 			    &labelstring[64]);
4431 			(void) sprintf(buf, "?Vendor '%s', product '%s'",
4432 			    labelstring, &labelstring[64]);
4433 			if (un->un_f_blockcount_is_valid == TRUE) {
4434 				(void) sprintf(&buf[strlen(buf)],
4435 				    ", %llu %u byte blocks\n",
4436 				    (longlong_t)un->un_blockcount,
4437 				    un->un_tgt_blocksize);
4438 			} else {
4439 				(void) sprintf(&buf[strlen(buf)],
4440 				    ", (unknown capacity)\n");
4441 			}
4442 			SD_INFO(SD_LOG_ATTACH_DETACH, un, buf);
4443 			mutex_exit(&sd_label_mutex);
4444 		}
4445 	}
4446 
4447 #if defined(_SUNOS_VTOC_16)
4448 	/*
4449 	 * If we have valid geometry, set up the remaining fdisk partitions.
4450 	 * Note that dkl_cylno is not used for the fdisk map entries, so
4451 	 * we set it to an entirely bogus value.
4452 	 */
4453 	for (count = 0; count < FD_NUMPART; count++) {
4454 		un->un_map[FDISK_P1 + count].dkl_cylno = -1;
4455 		un->un_map[FDISK_P1 + count].dkl_nblk =
4456 		    un->un_fmap[count].fmap_nblk;
4457 
4458 		un->un_offset[FDISK_P1 + count] =
4459 		    un->un_fmap[count].fmap_start;
4460 	}
4461 #endif
4462 
4463 	for (count = 0; count < NDKMAP; count++) {
4464 #if defined(_SUNOS_VTOC_8)
4465 		struct dk_map *lp  = &un->un_map[count];
4466 		un->un_offset[count] =
4467 		    un->un_g.dkg_nhead * un->un_g.dkg_nsect * lp->dkl_cylno;
4468 #elif defined(_SUNOS_VTOC_16)
4469 		struct dkl_partition *vp = &un->un_vtoc.v_part[count];
4470 
4471 		un->un_offset[count] = vp->p_start + un->un_solaris_offset;
4472 #else
4473 #error "No VTOC format defined."
4474 #endif
4475 	}
4476 
4477 	return (label_error);
4478 }
4479 
4480 
4481 #if defined(_SUNOS_VTOC_16)
4482 /*
4483  * Macro: MAX_BLKS
4484  *
4485  *	This macro is used for table entries where we need to have the largest
4486  *	possible sector value for that head & SPT (sectors per track)
4487  *	combination.  Other entries for some smaller disk sizes are set by
4488  *	convention to match those used by X86 BIOS usage.
4489  */
4490 #define	MAX_BLKS(heads, spt)	UINT16_MAX * heads * spt, heads, spt
4491 
4492 /*
4493  *    Function: sd_convert_geometry
4494  *
4495  * Description: Convert physical geometry into a dk_geom structure. In
4496  *		other words, make sure we don't wrap 16-bit values.
4497  *		e.g. converting from geom_cache to dk_geom
4498  *
4499  *     Context: Kernel thread only
4500  */
4501 static void
4502 sd_convert_geometry(uint64_t capacity, struct dk_geom *un_g)
4503 {
4504 	int i;
4505 	static const struct chs_values {
4506 		uint_t max_cap;		/* Max Capacity for this HS. */
4507 		uint_t nhead;		/* Heads to use. */
4508 		uint_t nsect;		/* SPT to use. */
4509 	} CHS_values[] = {
4510 		{0x00200000,  64, 32},		/* 1GB or smaller disk. */
4511 		{0x01000000, 128, 32},		/* 8GB or smaller disk. */
4512 		{MAX_BLKS(255,  63)},		/* 502.02GB or smaller disk. */
4513 		{MAX_BLKS(255, 126)},		/* .98TB or smaller disk. */
4514 		{DK_MAX_BLOCKS, 255, 189}	/* Max size is just under 1TB */
4515 	};
4516 
4517 	/* Unlabeled SCSI floppy device */
4518 	if (capacity <= 0x1000) {
4519 		un_g->dkg_nhead = 2;
4520 		un_g->dkg_ncyl = 80;
4521 		un_g->dkg_nsect = capacity / (un_g->dkg_nhead * un_g->dkg_ncyl);
4522 		return;
4523 	}
4524 
4525 	/*
4526 	 * For all devices we calculate cylinders using the
4527 	 * heads and sectors we assign based on capacity of the
4528 	 * device.  The table is designed to be compatible with the
4529 	 * way other operating systems lay out fdisk tables for X86
4530 	 * and to insure that the cylinders never exceed 65535 to
4531 	 * prevent problems with X86 ioctls that report geometry.
4532 	 * We use SPT that are multiples of 63, since other OSes that
4533 	 * are not limited to 16-bits for cylinders stop at 63 SPT
4534 	 * we make do by using multiples of 63 SPT.
4535 	 *
4536 	 * Note than capacities greater than or equal to 1TB will simply
4537 	 * get the largest geometry from the table. This should be okay
4538 	 * since disks this large shouldn't be using CHS values anyway.
4539 	 */
4540 	for (i = 0; CHS_values[i].max_cap < capacity &&
4541 	    CHS_values[i].max_cap != DK_MAX_BLOCKS; i++)
4542 		;
4543 
4544 	un_g->dkg_nhead = CHS_values[i].nhead;
4545 	un_g->dkg_nsect = CHS_values[i].nsect;
4546 }
4547 #endif
4548 
4549 
4550 /*
4551  *    Function: sd_resync_geom_caches
4552  *
4553  * Description: (Re)initialize both geometry caches: the virtual geometry
4554  *		information is extracted from the HBA (the "geometry"
4555  *		capability), and the physical geometry cache data is
4556  *		generated by issuing MODE SENSE commands.
4557  *
4558  *   Arguments: un - driver soft state (unit) structure
4559  *		capacity - disk capacity in #blocks
4560  *		lbasize - disk block size in bytes
4561  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
4562  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
4563  *			to use the USCSI "direct" chain and bypass the normal
4564  *			command waitq.
4565  *
4566  *     Context: Kernel thread only (can sleep).
4567  */
4568 
4569 static void
4570 sd_resync_geom_caches(struct sd_lun *un, int capacity, int lbasize,
4571 	int path_flag)
4572 {
4573 	struct 	geom_cache 	pgeom;
4574 	struct 	geom_cache	*pgeom_p = &pgeom;
4575 	int 	spc;
4576 	unsigned short nhead;
4577 	unsigned short nsect;
4578 
4579 	ASSERT(un != NULL);
4580 	ASSERT(mutex_owned(SD_MUTEX(un)));
4581 
4582 	/*
4583 	 * Ask the controller for its logical geometry.
4584 	 * Note: if the HBA does not support scsi_ifgetcap("geometry"),
4585 	 * then the lgeom cache will be invalid.
4586 	 */
4587 	sd_get_virtual_geometry(un, capacity, lbasize);
4588 
4589 	/*
4590 	 * Initialize the pgeom cache from lgeom, so that if MODE SENSE
4591 	 * doesn't work, DKIOCG_PHYSGEOM can return reasonable values.
4592 	 */
4593 	if (un->un_lgeom.g_nsect == 0 || un->un_lgeom.g_nhead == 0) {
4594 		/*
4595 		 * Note: Perhaps this needs to be more adaptive? The rationale
4596 		 * is that, if there's no HBA geometry from the HBA driver, any
4597 		 * guess is good, since this is the physical geometry. If MODE
4598 		 * SENSE fails this gives a max cylinder size for non-LBA access
4599 		 */
4600 		nhead = 255;
4601 		nsect = 63;
4602 	} else {
4603 		nhead = un->un_lgeom.g_nhead;
4604 		nsect = un->un_lgeom.g_nsect;
4605 	}
4606 
4607 	if (ISCD(un)) {
4608 		pgeom_p->g_nhead = 1;
4609 		pgeom_p->g_nsect = nsect * nhead;
4610 	} else {
4611 		pgeom_p->g_nhead = nhead;
4612 		pgeom_p->g_nsect = nsect;
4613 	}
4614 
4615 	spc = pgeom_p->g_nhead * pgeom_p->g_nsect;
4616 	pgeom_p->g_capacity = capacity;
4617 	pgeom_p->g_ncyl = pgeom_p->g_capacity / spc;
4618 	pgeom_p->g_acyl = 0;
4619 
4620 	/*
4621 	 * Retrieve fresh geometry data from the hardware, stash it
4622 	 * here temporarily before we rebuild the incore label.
4623 	 *
4624 	 * We want to use the MODE SENSE commands to derive the
4625 	 * physical geometry of the device, but if either command
4626 	 * fails, the logical geometry is used as the fallback for
4627 	 * disk label geometry.
4628 	 */
4629 	mutex_exit(SD_MUTEX(un));
4630 	sd_get_physical_geometry(un, pgeom_p, capacity, lbasize, path_flag);
4631 	mutex_enter(SD_MUTEX(un));
4632 
4633 	/*
4634 	 * Now update the real copy while holding the mutex. This
4635 	 * way the global copy is never in an inconsistent state.
4636 	 */
4637 	bcopy(pgeom_p, &un->un_pgeom,  sizeof (un->un_pgeom));
4638 
4639 	SD_INFO(SD_LOG_COMMON, un, "sd_resync_geom_caches: "
4640 	    "(cached from lgeom)\n");
4641 	SD_INFO(SD_LOG_COMMON, un,
4642 	    "   ncyl: %ld; acyl: %d; nhead: %d; nsect: %d\n",
4643 	    un->un_pgeom.g_ncyl, un->un_pgeom.g_acyl,
4644 	    un->un_pgeom.g_nhead, un->un_pgeom.g_nsect);
4645 	SD_INFO(SD_LOG_COMMON, un, "   lbasize: %d; capacity: %ld; "
4646 	    "intrlv: %d; rpm: %d\n", un->un_pgeom.g_secsize,
4647 	    un->un_pgeom.g_capacity, un->un_pgeom.g_intrlv,
4648 	    un->un_pgeom.g_rpm);
4649 }
4650 
4651 
4652 /*
4653  *    Function: sd_read_fdisk
4654  *
4655  * Description: utility routine to read the fdisk table.
4656  *
4657  *   Arguments: un - driver soft state (unit) structure
4658  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
4659  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
4660  *			to use the USCSI "direct" chain and bypass the normal
4661  *			command waitq.
4662  *
4663  * Return Code: SD_CMD_SUCCESS
4664  *		SD_CMD_FAILURE
4665  *
4666  *     Context: Kernel thread only (can sleep).
4667  */
4668 /* ARGSUSED */
4669 static int
4670 sd_read_fdisk(struct sd_lun *un, uint_t capacity, int lbasize, int path_flag)
4671 {
4672 #if defined(_NO_FDISK_PRESENT)
4673 
4674 	un->un_solaris_offset = 0;
4675 	un->un_solaris_size = capacity;
4676 	bzero(un->un_fmap, sizeof (struct fmap) * FD_NUMPART);
4677 	return (SD_CMD_SUCCESS);
4678 
4679 #elif defined(_FIRMWARE_NEEDS_FDISK)
4680 
4681 	struct ipart	*fdp;
4682 	struct mboot	*mbp;
4683 	struct ipart	fdisk[FD_NUMPART];
4684 	int		i;
4685 	char		sigbuf[2];
4686 	caddr_t		bufp;
4687 	int		uidx;
4688 	int		rval;
4689 	int		lba = 0;
4690 	uint_t		solaris_offset;	/* offset to solaris part. */
4691 	daddr_t		solaris_size;	/* size of solaris partition */
4692 	uint32_t	blocksize;
4693 
4694 	ASSERT(un != NULL);
4695 	ASSERT(mutex_owned(SD_MUTEX(un)));
4696 	ASSERT(un->un_f_tgt_blocksize_is_valid == TRUE);
4697 
4698 	blocksize = un->un_tgt_blocksize;
4699 
4700 	/*
4701 	 * Start off assuming no fdisk table
4702 	 */
4703 	solaris_offset = 0;
4704 	solaris_size   = capacity;
4705 
4706 	mutex_exit(SD_MUTEX(un));
4707 	bufp = kmem_zalloc(blocksize, KM_SLEEP);
4708 	rval = sd_send_scsi_READ(un, bufp, blocksize, 0, path_flag);
4709 	mutex_enter(SD_MUTEX(un));
4710 
4711 	if (rval != 0) {
4712 		SD_ERROR(SD_LOG_ATTACH_DETACH, un,
4713 		    "sd_read_fdisk: fdisk read err\n");
4714 		kmem_free(bufp, blocksize);
4715 		return (SD_CMD_FAILURE);
4716 	}
4717 
4718 	mbp = (struct mboot *)bufp;
4719 
4720 	/*
4721 	 * The fdisk table does not begin on a 4-byte boundary within the
4722 	 * master boot record, so we copy it to an aligned structure to avoid
4723 	 * alignment exceptions on some processors.
4724 	 */
4725 	bcopy(&mbp->parts[0], fdisk, sizeof (fdisk));
4726 
4727 	/*
4728 	 * Check for lba support before verifying sig; sig might not be
4729 	 * there, say on a blank disk, but the max_chs mark may still
4730 	 * be present.
4731 	 *
4732 	 * Note: LBA support and BEFs are an x86-only concept but this
4733 	 * code should work OK on SPARC as well.
4734 	 */
4735 
4736 	/*
4737 	 * First, check for lba-access-ok on root node (or prom root node)
4738 	 * if present there, don't need to search fdisk table.
4739 	 */
4740 	if (ddi_getprop(DDI_DEV_T_ANY, ddi_root_node(), 0,
4741 	    "lba-access-ok", 0) != 0) {
4742 		/* All drives do LBA; don't search fdisk table */
4743 		lba = 1;
4744 	} else {
4745 		/* Okay, look for mark in fdisk table */
4746 		for (fdp = fdisk, i = 0; i < FD_NUMPART; i++, fdp++) {
4747 			/* accumulate "lba" value from all partitions */
4748 			lba = (lba || sd_has_max_chs_vals(fdp));
4749 		}
4750 	}
4751 
4752 	if (lba != 0) {
4753 		dev_t dev = sd_make_device(SD_DEVINFO(un));
4754 
4755 		if (ddi_getprop(dev, SD_DEVINFO(un), DDI_PROP_DONTPASS,
4756 		    "lba-access-ok", 0) == 0) {
4757 			/* not found; create it */
4758 			if (ddi_prop_create(dev, SD_DEVINFO(un), 0,
4759 			    "lba-access-ok", (caddr_t)NULL, 0) !=
4760 			    DDI_PROP_SUCCESS) {
4761 				SD_ERROR(SD_LOG_ATTACH_DETACH, un,
4762 				    "sd_read_fdisk: Can't create lba property "
4763 				    "for instance %d\n",
4764 				    ddi_get_instance(SD_DEVINFO(un)));
4765 			}
4766 		}
4767 	}
4768 
4769 	bcopy(&mbp->signature, sigbuf, sizeof (sigbuf));
4770 
4771 	/*
4772 	 * Endian-independent signature check
4773 	 */
4774 	if (((sigbuf[1] & 0xFF) != ((MBB_MAGIC >> 8) & 0xFF)) ||
4775 	    (sigbuf[0] != (MBB_MAGIC & 0xFF))) {
4776 		SD_ERROR(SD_LOG_ATTACH_DETACH, un,
4777 		    "sd_read_fdisk: no fdisk\n");
4778 		bzero(un->un_fmap, sizeof (struct fmap) * FD_NUMPART);
4779 		rval = SD_CMD_SUCCESS;
4780 		goto done;
4781 	}
4782 
4783 #ifdef SDDEBUG
4784 	if (sd_level_mask & SD_LOGMASK_INFO) {
4785 		fdp = fdisk;
4786 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_read_fdisk:\n");
4787 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "         relsect    "
4788 		    "numsect         sysid       bootid\n");
4789 		for (i = 0; i < FD_NUMPART; i++, fdp++) {
4790 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4791 			    "    %d:  %8d   %8d     0x%08x     0x%08x\n",
4792 			    i, fdp->relsect, fdp->numsect,
4793 			    fdp->systid, fdp->bootid);
4794 		}
4795 	}
4796 #endif
4797 
4798 	/*
4799 	 * Try to find the unix partition
4800 	 */
4801 	uidx = -1;
4802 	solaris_offset = 0;
4803 	solaris_size   = 0;
4804 
4805 	for (fdp = fdisk, i = 0; i < FD_NUMPART; i++, fdp++) {
4806 		int	relsect;
4807 		int	numsect;
4808 
4809 		if (fdp->numsect == 0) {
4810 			un->un_fmap[i].fmap_start = 0;
4811 			un->un_fmap[i].fmap_nblk  = 0;
4812 			continue;
4813 		}
4814 
4815 		/*
4816 		 * Data in the fdisk table is little-endian.
4817 		 */
4818 		relsect = LE_32(fdp->relsect);
4819 		numsect = LE_32(fdp->numsect);
4820 
4821 		un->un_fmap[i].fmap_start = relsect;
4822 		un->un_fmap[i].fmap_nblk  = numsect;
4823 
4824 		if (fdp->systid != SUNIXOS &&
4825 		    fdp->systid != SUNIXOS2 &&
4826 		    fdp->systid != EFI_PMBR) {
4827 			continue;
4828 		}
4829 
4830 		/*
4831 		 * use the last active solaris partition id found
4832 		 * (there should only be 1 active partition id)
4833 		 *
4834 		 * if there are no active solaris partition id
4835 		 * then use the first inactive solaris partition id
4836 		 */
4837 		if ((uidx == -1) || (fdp->bootid == ACTIVE)) {
4838 			uidx = i;
4839 			solaris_offset = relsect;
4840 			solaris_size   = numsect;
4841 		}
4842 	}
4843 
4844 	SD_INFO(SD_LOG_ATTACH_DETACH, un, "fdisk 0x%x 0x%lx",
4845 	    un->un_solaris_offset, un->un_solaris_size);
4846 
4847 	rval = SD_CMD_SUCCESS;
4848 
4849 done:
4850 
4851 	/*
4852 	 * Clear the VTOC info, only if the Solaris partition entry
4853 	 * has moved, changed size, been deleted, or if the size of
4854 	 * the partition is too small to even fit the label sector.
4855 	 */
4856 	if ((un->un_solaris_offset != solaris_offset) ||
4857 	    (un->un_solaris_size != solaris_size) ||
4858 	    solaris_size <= DK_LABEL_LOC) {
4859 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "fdisk moved 0x%x 0x%lx",
4860 			solaris_offset, solaris_size);
4861 		bzero(&un->un_g, sizeof (struct dk_geom));
4862 		bzero(&un->un_vtoc, sizeof (struct dk_vtoc));
4863 		bzero(&un->un_map, NDKMAP * (sizeof (struct dk_map)));
4864 		un->un_f_geometry_is_valid = FALSE;
4865 	}
4866 	un->un_solaris_offset = solaris_offset;
4867 	un->un_solaris_size = solaris_size;
4868 	kmem_free(bufp, blocksize);
4869 	return (rval);
4870 
4871 #else	/* #elif defined(_FIRMWARE_NEEDS_FDISK) */
4872 #error "fdisk table presence undetermined for this platform."
4873 #endif	/* #if defined(_NO_FDISK_PRESENT) */
4874 }
4875 
4876 
4877 /*
4878  *    Function: sd_get_physical_geometry
4879  *
4880  * Description: Retrieve the MODE SENSE page 3 (Format Device Page) and
4881  *		MODE SENSE page 4 (Rigid Disk Drive Geometry Page) from the
4882  *		target, and use this information to initialize the physical
4883  *		geometry cache specified by pgeom_p.
4884  *
4885  *		MODE SENSE is an optional command, so failure in this case
4886  *		does not necessarily denote an error. We want to use the
4887  *		MODE SENSE commands to derive the physical geometry of the
4888  *		device, but if either command fails, the logical geometry is
4889  *		used as the fallback for disk label geometry.
4890  *
4891  *		This requires that un->un_blockcount and un->un_tgt_blocksize
4892  *		have already been initialized for the current target and
4893  *		that the current values be passed as args so that we don't
4894  *		end up ever trying to use -1 as a valid value. This could
4895  *		happen if either value is reset while we're not holding
4896  *		the mutex.
4897  *
4898  *   Arguments: un - driver soft state (unit) structure
4899  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
4900  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
4901  *			to use the USCSI "direct" chain and bypass the normal
4902  *			command waitq.
4903  *
4904  *     Context: Kernel thread only (can sleep).
4905  */
4906 
4907 static void
4908 sd_get_physical_geometry(struct sd_lun *un, struct geom_cache *pgeom_p,
4909 	int capacity, int lbasize, int path_flag)
4910 {
4911 	struct	mode_format	*page3p;
4912 	struct	mode_geometry	*page4p;
4913 	struct	mode_header	*headerp;
4914 	int	sector_size;
4915 	int	nsect;
4916 	int	nhead;
4917 	int	ncyl;
4918 	int	intrlv;
4919 	int	spc;
4920 	int	modesense_capacity;
4921 	int	rpm;
4922 	int	bd_len;
4923 	int	mode_header_length;
4924 	uchar_t	*p3bufp;
4925 	uchar_t	*p4bufp;
4926 	int	cdbsize;
4927 
4928 	ASSERT(un != NULL);
4929 	ASSERT(!(mutex_owned(SD_MUTEX(un))));
4930 
4931 	if (un->un_f_blockcount_is_valid != TRUE) {
4932 		return;
4933 	}
4934 
4935 	if (un->un_f_tgt_blocksize_is_valid != TRUE) {
4936 		return;
4937 	}
4938 
4939 	if (lbasize == 0) {
4940 		if (ISCD(un)) {
4941 			lbasize = 2048;
4942 		} else {
4943 			lbasize = un->un_sys_blocksize;
4944 		}
4945 	}
4946 	pgeom_p->g_secsize = (unsigned short)lbasize;
4947 
4948 	cdbsize = (un->un_f_cfg_is_atapi == TRUE) ? CDB_GROUP2 : CDB_GROUP0;
4949 
4950 	/*
4951 	 * Retrieve MODE SENSE page 3 - Format Device Page
4952 	 */
4953 	p3bufp = kmem_zalloc(SD_MODE_SENSE_PAGE3_LENGTH, KM_SLEEP);
4954 	if (sd_send_scsi_MODE_SENSE(un, cdbsize, p3bufp,
4955 	    SD_MODE_SENSE_PAGE3_LENGTH, SD_MODE_SENSE_PAGE3_CODE, path_flag)
4956 	    != 0) {
4957 		SD_ERROR(SD_LOG_COMMON, un,
4958 		    "sd_get_physical_geometry: mode sense page 3 failed\n");
4959 		goto page3_exit;
4960 	}
4961 
4962 	/*
4963 	 * Determine size of Block Descriptors in order to locate the mode
4964 	 * page data.  ATAPI devices return 0, SCSI devices should return
4965 	 * MODE_BLK_DESC_LENGTH.
4966 	 */
4967 	headerp = (struct mode_header *)p3bufp;
4968 	if (un->un_f_cfg_is_atapi == TRUE) {
4969 		struct mode_header_grp2 *mhp =
4970 		    (struct mode_header_grp2 *)headerp;
4971 		mode_header_length = MODE_HEADER_LENGTH_GRP2;
4972 		bd_len = (mhp->bdesc_length_hi << 8) | mhp->bdesc_length_lo;
4973 	} else {
4974 		mode_header_length = MODE_HEADER_LENGTH;
4975 		bd_len = ((struct mode_header *)headerp)->bdesc_length;
4976 	}
4977 
4978 	if (bd_len > MODE_BLK_DESC_LENGTH) {
4979 		SD_ERROR(SD_LOG_COMMON, un, "sd_get_physical_geometry: "
4980 		    "received unexpected bd_len of %d, page3\n", bd_len);
4981 		goto page3_exit;
4982 	}
4983 
4984 	page3p = (struct mode_format *)
4985 	    ((caddr_t)headerp + mode_header_length + bd_len);
4986 
4987 	if (page3p->mode_page.code != SD_MODE_SENSE_PAGE3_CODE) {
4988 		SD_ERROR(SD_LOG_COMMON, un, "sd_get_physical_geometry: "
4989 		    "mode sense pg3 code mismatch %d\n",
4990 		    page3p->mode_page.code);
4991 		goto page3_exit;
4992 	}
4993 
4994 	/*
4995 	 * Use this physical geometry data only if BOTH MODE SENSE commands
4996 	 * complete successfully; otherwise, revert to the logical geometry.
4997 	 * So, we need to save everything in temporary variables.
4998 	 */
4999 	sector_size = BE_16(page3p->data_bytes_sect);
5000 
5001 	/*
5002 	 * 1243403: The NEC D38x7 drives do not support MODE SENSE sector size
5003 	 */
5004 	if (sector_size == 0) {
5005 		sector_size = (ISCD(un)) ? 2048 : un->un_sys_blocksize;
5006 	} else {
5007 		sector_size &= ~(un->un_sys_blocksize - 1);
5008 	}
5009 
5010 	nsect  = BE_16(page3p->sect_track);
5011 	intrlv = BE_16(page3p->interleave);
5012 
5013 	SD_INFO(SD_LOG_COMMON, un,
5014 	    "sd_get_physical_geometry: Format Parameters (page 3)\n");
5015 	SD_INFO(SD_LOG_COMMON, un,
5016 	    "   mode page: %d; nsect: %d; sector size: %d;\n",
5017 	    page3p->mode_page.code, nsect, sector_size);
5018 	SD_INFO(SD_LOG_COMMON, un,
5019 	    "   interleave: %d; track skew: %d; cylinder skew: %d;\n", intrlv,
5020 	    BE_16(page3p->track_skew),
5021 	    BE_16(page3p->cylinder_skew));
5022 
5023 
5024 	/*
5025 	 * Retrieve MODE SENSE page 4 - Rigid Disk Drive Geometry Page
5026 	 */
5027 	p4bufp = kmem_zalloc(SD_MODE_SENSE_PAGE4_LENGTH, KM_SLEEP);
5028 	if (sd_send_scsi_MODE_SENSE(un, cdbsize, p4bufp,
5029 	    SD_MODE_SENSE_PAGE4_LENGTH, SD_MODE_SENSE_PAGE4_CODE, path_flag)
5030 	    != 0) {
5031 		SD_ERROR(SD_LOG_COMMON, un,
5032 		    "sd_get_physical_geometry: mode sense page 4 failed\n");
5033 		goto page4_exit;
5034 	}
5035 
5036 	/*
5037 	 * Determine size of Block Descriptors in order to locate the mode
5038 	 * page data.  ATAPI devices return 0, SCSI devices should return
5039 	 * MODE_BLK_DESC_LENGTH.
5040 	 */
5041 	headerp = (struct mode_header *)p4bufp;
5042 	if (un->un_f_cfg_is_atapi == TRUE) {
5043 		struct mode_header_grp2 *mhp =
5044 		    (struct mode_header_grp2 *)headerp;
5045 		bd_len = (mhp->bdesc_length_hi << 8) | mhp->bdesc_length_lo;
5046 	} else {
5047 		bd_len = ((struct mode_header *)headerp)->bdesc_length;
5048 	}
5049 
5050 	if (bd_len > MODE_BLK_DESC_LENGTH) {
5051 		SD_ERROR(SD_LOG_COMMON, un, "sd_get_physical_geometry: "
5052 		    "received unexpected bd_len of %d, page4\n", bd_len);
5053 		goto page4_exit;
5054 	}
5055 
5056 	page4p = (struct mode_geometry *)
5057 	    ((caddr_t)headerp + mode_header_length + bd_len);
5058 
5059 	if (page4p->mode_page.code != SD_MODE_SENSE_PAGE4_CODE) {
5060 		SD_ERROR(SD_LOG_COMMON, un, "sd_get_physical_geometry: "
5061 		    "mode sense pg4 code mismatch %d\n",
5062 		    page4p->mode_page.code);
5063 		goto page4_exit;
5064 	}
5065 
5066 	/*
5067 	 * Stash the data now, after we know that both commands completed.
5068 	 */
5069 
5070 	mutex_enter(SD_MUTEX(un));
5071 
5072 	nhead = (int)page4p->heads;	/* uchar, so no conversion needed */
5073 	spc   = nhead * nsect;
5074 	ncyl  = (page4p->cyl_ub << 16) + (page4p->cyl_mb << 8) + page4p->cyl_lb;
5075 	rpm   = BE_16(page4p->rpm);
5076 
5077 	modesense_capacity = spc * ncyl;
5078 
5079 	SD_INFO(SD_LOG_COMMON, un,
5080 	    "sd_get_physical_geometry: Geometry Parameters (page 4)\n");
5081 	SD_INFO(SD_LOG_COMMON, un,
5082 	    "   cylinders: %d; heads: %d; rpm: %d;\n", ncyl, nhead, rpm);
5083 	SD_INFO(SD_LOG_COMMON, un,
5084 	    "   computed capacity(h*s*c): %d;\n", modesense_capacity);
5085 	SD_INFO(SD_LOG_COMMON, un, "   pgeom_p: %p; read cap: %d\n",
5086 	    (void *)pgeom_p, capacity);
5087 
5088 	/*
5089 	 * Compensate if the drive's geometry is not rectangular, i.e.,
5090 	 * the product of C * H * S returned by MODE SENSE >= that returned
5091 	 * by read capacity. This is an idiosyncrasy of the original x86
5092 	 * disk subsystem.
5093 	 */
5094 	if (modesense_capacity >= capacity) {
5095 		SD_INFO(SD_LOG_COMMON, un,
5096 		    "sd_get_physical_geometry: adjusting acyl; "
5097 		    "old: %d; new: %d\n", pgeom_p->g_acyl,
5098 		    (modesense_capacity - capacity + spc - 1) / spc);
5099 		if (sector_size != 0) {
5100 			/* 1243403: NEC D38x7 drives don't support sec size */
5101 			pgeom_p->g_secsize = (unsigned short)sector_size;
5102 		}
5103 		pgeom_p->g_nsect    = (unsigned short)nsect;
5104 		pgeom_p->g_nhead    = (unsigned short)nhead;
5105 		pgeom_p->g_capacity = capacity;
5106 		pgeom_p->g_acyl	    =
5107 		    (modesense_capacity - pgeom_p->g_capacity + spc - 1) / spc;
5108 		pgeom_p->g_ncyl	    = ncyl - pgeom_p->g_acyl;
5109 	}
5110 
5111 	pgeom_p->g_rpm    = (unsigned short)rpm;
5112 	pgeom_p->g_intrlv = (unsigned short)intrlv;
5113 
5114 	SD_INFO(SD_LOG_COMMON, un,
5115 	    "sd_get_physical_geometry: mode sense geometry:\n");
5116 	SD_INFO(SD_LOG_COMMON, un,
5117 	    "   nsect: %d; sector size: %d; interlv: %d\n",
5118 	    nsect, sector_size, intrlv);
5119 	SD_INFO(SD_LOG_COMMON, un,
5120 	    "   nhead: %d; ncyl: %d; rpm: %d; capacity(ms): %d\n",
5121 	    nhead, ncyl, rpm, modesense_capacity);
5122 	SD_INFO(SD_LOG_COMMON, un,
5123 	    "sd_get_physical_geometry: (cached)\n");
5124 	SD_INFO(SD_LOG_COMMON, un,
5125 	    "   ncyl: %ld; acyl: %d; nhead: %d; nsect: %d\n",
5126 	    un->un_pgeom.g_ncyl,  un->un_pgeom.g_acyl,
5127 	    un->un_pgeom.g_nhead, un->un_pgeom.g_nsect);
5128 	SD_INFO(SD_LOG_COMMON, un,
5129 	    "   lbasize: %d; capacity: %ld; intrlv: %d; rpm: %d\n",
5130 	    un->un_pgeom.g_secsize, un->un_pgeom.g_capacity,
5131 	    un->un_pgeom.g_intrlv, un->un_pgeom.g_rpm);
5132 
5133 	mutex_exit(SD_MUTEX(un));
5134 
5135 page4_exit:
5136 	kmem_free(p4bufp, SD_MODE_SENSE_PAGE4_LENGTH);
5137 page3_exit:
5138 	kmem_free(p3bufp, SD_MODE_SENSE_PAGE3_LENGTH);
5139 }
5140 
5141 
5142 /*
5143  *    Function: sd_get_virtual_geometry
5144  *
5145  * Description: Ask the controller to tell us about the target device.
5146  *
5147  *   Arguments: un - pointer to softstate
5148  *		capacity - disk capacity in #blocks
5149  *		lbasize - disk block size in bytes
5150  *
5151  *     Context: Kernel thread only
5152  */
5153 
5154 static void
5155 sd_get_virtual_geometry(struct sd_lun *un, int capacity, int lbasize)
5156 {
5157 	struct	geom_cache 	*lgeom_p = &un->un_lgeom;
5158 	uint_t	geombuf;
5159 	int	spc;
5160 
5161 	ASSERT(un != NULL);
5162 	ASSERT(mutex_owned(SD_MUTEX(un)));
5163 
5164 	mutex_exit(SD_MUTEX(un));
5165 
5166 	/* Set sector size, and total number of sectors */
5167 	(void) scsi_ifsetcap(SD_ADDRESS(un), "sector-size",   lbasize,  1);
5168 	(void) scsi_ifsetcap(SD_ADDRESS(un), "total-sectors", capacity, 1);
5169 
5170 	/* Let the HBA tell us its geometry */
5171 	geombuf = (uint_t)scsi_ifgetcap(SD_ADDRESS(un), "geometry", 1);
5172 
5173 	mutex_enter(SD_MUTEX(un));
5174 
5175 	/* A value of -1 indicates an undefined "geometry" property */
5176 	if (geombuf == (-1)) {
5177 		return;
5178 	}
5179 
5180 	/* Initialize the logical geometry cache. */
5181 	lgeom_p->g_nhead   = (geombuf >> 16) & 0xffff;
5182 	lgeom_p->g_nsect   = geombuf & 0xffff;
5183 	lgeom_p->g_secsize = un->un_sys_blocksize;
5184 
5185 	spc = lgeom_p->g_nhead * lgeom_p->g_nsect;
5186 
5187 	/*
5188 	 * Note: The driver originally converted the capacity value from
5189 	 * target blocks to system blocks. However, the capacity value passed
5190 	 * to this routine is already in terms of system blocks (this scaling
5191 	 * is done when the READ CAPACITY command is issued and processed).
5192 	 * This 'error' may have gone undetected because the usage of g_ncyl
5193 	 * (which is based upon g_capacity) is very limited within the driver
5194 	 */
5195 	lgeom_p->g_capacity = capacity;
5196 
5197 	/*
5198 	 * Set ncyl to zero if the hba returned a zero nhead or nsect value. The
5199 	 * hba may return zero values if the device has been removed.
5200 	 */
5201 	if (spc == 0) {
5202 		lgeom_p->g_ncyl = 0;
5203 	} else {
5204 		lgeom_p->g_ncyl = lgeom_p->g_capacity / spc;
5205 	}
5206 	lgeom_p->g_acyl = 0;
5207 
5208 	SD_INFO(SD_LOG_COMMON, un, "sd_get_virtual_geometry: (cached)\n");
5209 	SD_INFO(SD_LOG_COMMON, un,
5210 	    "   ncyl: %ld; acyl: %d; nhead: %d; nsect: %d\n",
5211 	    un->un_lgeom.g_ncyl,  un->un_lgeom.g_acyl,
5212 	    un->un_lgeom.g_nhead, un->un_lgeom.g_nsect);
5213 	SD_INFO(SD_LOG_COMMON, un, "   lbasize: %d; capacity: %ld; "
5214 	    "intrlv: %d; rpm: %d\n", un->un_lgeom.g_secsize,
5215 	    un->un_lgeom.g_capacity, un->un_lgeom.g_intrlv, un->un_lgeom.g_rpm);
5216 }
5217 
5218 
5219 /*
5220  *    Function: sd_update_block_info
5221  *
5222  * Description: Calculate a byte count to sector count bitshift value
5223  *		from sector size.
5224  *
5225  *   Arguments: un: unit struct.
5226  *		lbasize: new target sector size
5227  *		capacity: new target capacity, ie. block count
5228  *
5229  *     Context: Kernel thread context
5230  */
5231 
5232 static void
5233 sd_update_block_info(struct sd_lun *un, uint32_t lbasize, uint64_t capacity)
5234 {
5235 	if (lbasize != 0) {
5236 		un->un_tgt_blocksize = lbasize;
5237 		un->un_f_tgt_blocksize_is_valid	= TRUE;
5238 	}
5239 
5240 	if (capacity != 0) {
5241 		un->un_blockcount		= capacity;
5242 		un->un_f_blockcount_is_valid	= TRUE;
5243 	}
5244 }
5245 
5246 
5247 static void
5248 sd_swap_efi_gpt(efi_gpt_t *e)
5249 {
5250 	_NOTE(ASSUMING_PROTECTED(*e))
5251 	e->efi_gpt_Signature = LE_64(e->efi_gpt_Signature);
5252 	e->efi_gpt_Revision = LE_32(e->efi_gpt_Revision);
5253 	e->efi_gpt_HeaderSize = LE_32(e->efi_gpt_HeaderSize);
5254 	e->efi_gpt_HeaderCRC32 = LE_32(e->efi_gpt_HeaderCRC32);
5255 	e->efi_gpt_MyLBA = LE_64(e->efi_gpt_MyLBA);
5256 	e->efi_gpt_AlternateLBA = LE_64(e->efi_gpt_AlternateLBA);
5257 	e->efi_gpt_FirstUsableLBA = LE_64(e->efi_gpt_FirstUsableLBA);
5258 	e->efi_gpt_LastUsableLBA = LE_64(e->efi_gpt_LastUsableLBA);
5259 	UUID_LE_CONVERT(e->efi_gpt_DiskGUID, e->efi_gpt_DiskGUID);
5260 	e->efi_gpt_PartitionEntryLBA = LE_64(e->efi_gpt_PartitionEntryLBA);
5261 	e->efi_gpt_NumberOfPartitionEntries =
5262 	    LE_32(e->efi_gpt_NumberOfPartitionEntries);
5263 	e->efi_gpt_SizeOfPartitionEntry =
5264 	    LE_32(e->efi_gpt_SizeOfPartitionEntry);
5265 	e->efi_gpt_PartitionEntryArrayCRC32 =
5266 	    LE_32(e->efi_gpt_PartitionEntryArrayCRC32);
5267 }
5268 
5269 static void
5270 sd_swap_efi_gpe(int nparts, efi_gpe_t *p)
5271 {
5272 	int i;
5273 
5274 	_NOTE(ASSUMING_PROTECTED(*p))
5275 	for (i = 0; i < nparts; i++) {
5276 		UUID_LE_CONVERT(p[i].efi_gpe_PartitionTypeGUID,
5277 		    p[i].efi_gpe_PartitionTypeGUID);
5278 		p[i].efi_gpe_StartingLBA = LE_64(p[i].efi_gpe_StartingLBA);
5279 		p[i].efi_gpe_EndingLBA = LE_64(p[i].efi_gpe_EndingLBA);
5280 		/* PartitionAttrs */
5281 	}
5282 }
5283 
5284 static int
5285 sd_validate_efi(efi_gpt_t *labp)
5286 {
5287 	if (labp->efi_gpt_Signature != EFI_SIGNATURE)
5288 		return (EINVAL);
5289 	/* at least 96 bytes in this version of the spec. */
5290 	if (sizeof (efi_gpt_t) - sizeof (labp->efi_gpt_Reserved2) >
5291 	    labp->efi_gpt_HeaderSize)
5292 		return (EINVAL);
5293 	/* this should be 128 bytes */
5294 	if (labp->efi_gpt_SizeOfPartitionEntry != sizeof (efi_gpe_t))
5295 		return (EINVAL);
5296 	return (0);
5297 }
5298 
5299 static int
5300 sd_use_efi(struct sd_lun *un, int path_flag)
5301 {
5302 	int		i;
5303 	int		rval = 0;
5304 	efi_gpe_t	*partitions;
5305 	uchar_t		*buf;
5306 	uint_t		lbasize;
5307 	uint64_t	cap;
5308 	uint_t		nparts;
5309 	diskaddr_t	gpe_lba;
5310 
5311 	ASSERT(mutex_owned(SD_MUTEX(un)));
5312 	lbasize = un->un_tgt_blocksize;
5313 
5314 	mutex_exit(SD_MUTEX(un));
5315 
5316 	buf = kmem_zalloc(EFI_MIN_ARRAY_SIZE, KM_SLEEP);
5317 
5318 	if (un->un_tgt_blocksize != un->un_sys_blocksize) {
5319 		rval = EINVAL;
5320 		goto done_err;
5321 	}
5322 
5323 	rval = sd_send_scsi_READ(un, buf, lbasize, 0, path_flag);
5324 	if (rval) {
5325 		goto done_err;
5326 	}
5327 	if (((struct dk_label *)buf)->dkl_magic == DKL_MAGIC) {
5328 		/* not ours */
5329 		rval = ESRCH;
5330 		goto done_err;
5331 	}
5332 
5333 	rval = sd_send_scsi_READ(un, buf, lbasize, 1, path_flag);
5334 	if (rval) {
5335 		goto done_err;
5336 	}
5337 	sd_swap_efi_gpt((efi_gpt_t *)buf);
5338 
5339 	if ((rval = sd_validate_efi((efi_gpt_t *)buf)) != 0) {
5340 		/*
5341 		 * Couldn't read the primary, try the backup.  Our
5342 		 * capacity at this point could be based on CHS, so
5343 		 * check what the device reports.
5344 		 */
5345 		rval = sd_send_scsi_READ_CAPACITY(un, &cap, &lbasize,
5346 		    path_flag);
5347 		if (rval) {
5348 			goto done_err;
5349 		}
5350 
5351 		/*
5352 		 * The MMC standard allows READ CAPACITY to be
5353 		 * inaccurate by a bounded amount (in the interest of
5354 		 * response latency).  As a result, failed READs are
5355 		 * commonplace (due to the reading of metadata and not
5356 		 * data). Depending on the per-Vendor/drive Sense data,
5357 		 * the failed READ can cause many (unnecessary) retries.
5358 		 */
5359 		if ((rval = sd_send_scsi_READ(un, buf, lbasize,
5360 		    cap - 1, (ISCD(un)) ? SD_PATH_DIRECT_PRIORITY :
5361 			path_flag)) != 0) {
5362 				goto done_err;
5363 		}
5364 
5365 		sd_swap_efi_gpt((efi_gpt_t *)buf);
5366 		if ((rval = sd_validate_efi((efi_gpt_t *)buf)) != 0) {
5367 
5368 			/*
5369 			 * Refer to comments related to off-by-1 at the
5370 			 * header of this file. Search the next to last
5371 			 * block for backup EFI label.
5372 			 */
5373 			if ((rval = sd_send_scsi_READ(un, buf, lbasize,
5374 			    cap - 2, (ISCD(un)) ? SD_PATH_DIRECT_PRIORITY :
5375 				path_flag)) != 0) {
5376 					goto done_err;
5377 			}
5378 			sd_swap_efi_gpt((efi_gpt_t *)buf);
5379 			if ((rval = sd_validate_efi((efi_gpt_t *)buf)) != 0)
5380 				goto done_err;
5381 		}
5382 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
5383 		    "primary label corrupt; using backup\n");
5384 	}
5385 
5386 	nparts = ((efi_gpt_t *)buf)->efi_gpt_NumberOfPartitionEntries;
5387 	gpe_lba = ((efi_gpt_t *)buf)->efi_gpt_PartitionEntryLBA;
5388 
5389 	rval = sd_send_scsi_READ(un, buf, EFI_MIN_ARRAY_SIZE, gpe_lba,
5390 	    path_flag);
5391 	if (rval) {
5392 		goto done_err;
5393 	}
5394 	partitions = (efi_gpe_t *)buf;
5395 
5396 	if (nparts > MAXPART) {
5397 		nparts = MAXPART;
5398 	}
5399 	sd_swap_efi_gpe(nparts, partitions);
5400 
5401 	mutex_enter(SD_MUTEX(un));
5402 
5403 	/* Fill in partition table. */
5404 	for (i = 0; i < nparts; i++) {
5405 		if (partitions->efi_gpe_StartingLBA != 0 ||
5406 		    partitions->efi_gpe_EndingLBA != 0) {
5407 			un->un_map[i].dkl_cylno =
5408 			    partitions->efi_gpe_StartingLBA;
5409 			un->un_map[i].dkl_nblk =
5410 			    partitions->efi_gpe_EndingLBA -
5411 			    partitions->efi_gpe_StartingLBA + 1;
5412 			un->un_offset[i] =
5413 			    partitions->efi_gpe_StartingLBA;
5414 		}
5415 		if (i == WD_NODE) {
5416 			/*
5417 			 * minor number 7 corresponds to the whole disk
5418 			 */
5419 			un->un_map[i].dkl_cylno = 0;
5420 			un->un_map[i].dkl_nblk = un->un_blockcount;
5421 			un->un_offset[i] = 0;
5422 		}
5423 		partitions++;
5424 	}
5425 	un->un_solaris_offset = 0;
5426 	un->un_solaris_size = cap;
5427 	un->un_f_geometry_is_valid = TRUE;
5428 
5429 	/* clear the vtoc label */
5430 	bzero(&un->un_vtoc, sizeof (struct dk_vtoc));
5431 
5432 	kmem_free(buf, EFI_MIN_ARRAY_SIZE);
5433 	return (0);
5434 
5435 done_err:
5436 	kmem_free(buf, EFI_MIN_ARRAY_SIZE);
5437 	mutex_enter(SD_MUTEX(un));
5438 	/*
5439 	 * if we didn't find something that could look like a VTOC
5440 	 * and the disk is over 1TB, we know there isn't a valid label.
5441 	 * Otherwise let sd_uselabel decide what to do.  We only
5442 	 * want to invalidate this if we're certain the label isn't
5443 	 * valid because sd_prop_op will now fail, which in turn
5444 	 * causes things like opens and stats on the partition to fail.
5445 	 */
5446 	if ((un->un_blockcount > DK_MAX_BLOCKS) && (rval != ESRCH)) {
5447 		un->un_f_geometry_is_valid = FALSE;
5448 	}
5449 	return (rval);
5450 }
5451 
5452 
5453 /*
5454  *    Function: sd_uselabel
5455  *
5456  * Description: Validate the disk label and update the relevant data (geometry,
5457  *		partition, vtoc, and capacity data) in the sd_lun struct.
5458  *		Marks the geometry of the unit as being valid.
5459  *
5460  *   Arguments: un: unit struct.
5461  *		dk_label: disk label
5462  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
5463  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
5464  *			to use the USCSI "direct" chain and bypass the normal
5465  *			command waitq.
5466  *
5467  * Return Code: SD_LABEL_IS_VALID: Label read from disk is OK; geometry,
5468  *		partition, vtoc, and capacity data are good.
5469  *
5470  *		SD_LABEL_IS_INVALID: Magic number or checksum error in the
5471  *		label; or computed capacity does not jibe with capacity
5472  *		reported from the READ CAPACITY command.
5473  *
5474  *     Context: Kernel thread only (can sleep).
5475  */
5476 
5477 static int
5478 sd_uselabel(struct sd_lun *un, struct dk_label *labp, int path_flag)
5479 {
5480 	short	*sp;
5481 	short	sum;
5482 	short	count;
5483 	int	label_error = SD_LABEL_IS_VALID;
5484 	int	i;
5485 	int	capacity;
5486 	int	part_end;
5487 	int	track_capacity;
5488 	int	err;
5489 #if defined(_SUNOS_VTOC_16)
5490 	struct	dkl_partition	*vpartp;
5491 #endif
5492 	ASSERT(un != NULL);
5493 	ASSERT(mutex_owned(SD_MUTEX(un)));
5494 
5495 	/* Validate the magic number of the label. */
5496 	if (labp->dkl_magic != DKL_MAGIC) {
5497 #if defined(__sparc)
5498 		if ((un->un_state == SD_STATE_NORMAL) &&
5499 			un->un_f_vtoc_errlog_supported) {
5500 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
5501 			    "Corrupt label; wrong magic number\n");
5502 		}
5503 #endif
5504 		return (SD_LABEL_IS_INVALID);
5505 	}
5506 
5507 	/* Validate the checksum of the label. */
5508 	sp  = (short *)labp;
5509 	sum = 0;
5510 	count = sizeof (struct dk_label) / sizeof (short);
5511 	while (count--)	 {
5512 		sum ^= *sp++;
5513 	}
5514 
5515 	if (sum != 0) {
5516 #if	defined(_SUNOS_VTOC_16)
5517 		if ((un->un_state == SD_STATE_NORMAL) && !ISCD(un)) {
5518 #elif defined(_SUNOS_VTOC_8)
5519 		if ((un->un_state == SD_STATE_NORMAL) &&
5520 		    un->un_f_vtoc_errlog_supported) {
5521 #endif
5522 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
5523 			    "Corrupt label - label checksum failed\n");
5524 		}
5525 		return (SD_LABEL_IS_INVALID);
5526 	}
5527 
5528 
5529 	/*
5530 	 * Fill in geometry structure with data from label.
5531 	 */
5532 	bzero(&un->un_g, sizeof (struct dk_geom));
5533 	un->un_g.dkg_ncyl   = labp->dkl_ncyl;
5534 	un->un_g.dkg_acyl   = labp->dkl_acyl;
5535 	un->un_g.dkg_bcyl   = 0;
5536 	un->un_g.dkg_nhead  = labp->dkl_nhead;
5537 	un->un_g.dkg_nsect  = labp->dkl_nsect;
5538 	un->un_g.dkg_intrlv = labp->dkl_intrlv;
5539 
5540 #if defined(_SUNOS_VTOC_8)
5541 	un->un_g.dkg_gap1   = labp->dkl_gap1;
5542 	un->un_g.dkg_gap2   = labp->dkl_gap2;
5543 	un->un_g.dkg_bhead  = labp->dkl_bhead;
5544 #endif
5545 #if defined(_SUNOS_VTOC_16)
5546 	un->un_dkg_skew = labp->dkl_skew;
5547 #endif
5548 
5549 #if defined(__i386) || defined(__amd64)
5550 	un->un_g.dkg_apc = labp->dkl_apc;
5551 #endif
5552 
5553 	/*
5554 	 * Currently we rely on the values in the label being accurate. If
5555 	 * dlk_rpm or dlk_pcly are zero in the label, use a default value.
5556 	 *
5557 	 * Note: In the future a MODE SENSE may be used to retrieve this data,
5558 	 * although this command is optional in SCSI-2.
5559 	 */
5560 	un->un_g.dkg_rpm  = (labp->dkl_rpm  != 0) ? labp->dkl_rpm  : 3600;
5561 	un->un_g.dkg_pcyl = (labp->dkl_pcyl != 0) ? labp->dkl_pcyl :
5562 	    (un->un_g.dkg_ncyl + un->un_g.dkg_acyl);
5563 
5564 	/*
5565 	 * The Read and Write reinstruct values may not be valid
5566 	 * for older disks.
5567 	 */
5568 	un->un_g.dkg_read_reinstruct  = labp->dkl_read_reinstruct;
5569 	un->un_g.dkg_write_reinstruct = labp->dkl_write_reinstruct;
5570 
5571 	/* Fill in partition table. */
5572 #if defined(_SUNOS_VTOC_8)
5573 	for (i = 0; i < NDKMAP; i++) {
5574 		un->un_map[i].dkl_cylno = labp->dkl_map[i].dkl_cylno;
5575 		un->un_map[i].dkl_nblk  = labp->dkl_map[i].dkl_nblk;
5576 	}
5577 #endif
5578 #if  defined(_SUNOS_VTOC_16)
5579 	vpartp		= labp->dkl_vtoc.v_part;
5580 	track_capacity	= labp->dkl_nhead * labp->dkl_nsect;
5581 
5582 	/* Prevent divide by zero */
5583 	if (track_capacity == 0) {
5584 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
5585 		    "Corrupt label - zero nhead or nsect value\n");
5586 
5587 		return (SD_LABEL_IS_INVALID);
5588 	}
5589 
5590 	for (i = 0; i < NDKMAP; i++, vpartp++) {
5591 		un->un_map[i].dkl_cylno = vpartp->p_start / track_capacity;
5592 		un->un_map[i].dkl_nblk  = vpartp->p_size;
5593 	}
5594 #endif
5595 
5596 	/* Fill in VTOC Structure. */
5597 	bcopy(&labp->dkl_vtoc, &un->un_vtoc, sizeof (struct dk_vtoc));
5598 #if defined(_SUNOS_VTOC_8)
5599 	/*
5600 	 * The 8-slice vtoc does not include the ascii label; save it into
5601 	 * the device's soft state structure here.
5602 	 */
5603 	bcopy(labp->dkl_asciilabel, un->un_asciilabel, LEN_DKL_ASCII);
5604 #endif
5605 
5606 	/* Now look for a valid capacity. */
5607 	track_capacity	= (un->un_g.dkg_nhead * un->un_g.dkg_nsect);
5608 	capacity	= (un->un_g.dkg_ncyl  * track_capacity);
5609 
5610 	if (un->un_g.dkg_acyl) {
5611 #if defined(__i386) || defined(__amd64)
5612 		/* we may have > 1 alts cylinder */
5613 		capacity += (track_capacity * un->un_g.dkg_acyl);
5614 #else
5615 		capacity += track_capacity;
5616 #endif
5617 	}
5618 
5619 	/*
5620 	 * Force check here to ensure the computed capacity is valid.
5621 	 * If capacity is zero, it indicates an invalid label and
5622 	 * we should abort updating the relevant data then.
5623 	 */
5624 	if (capacity == 0) {
5625 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
5626 		    "Corrupt label - no valid capacity could be retrieved\n");
5627 
5628 		return (SD_LABEL_IS_INVALID);
5629 	}
5630 
5631 	/* Mark the geometry as valid. */
5632 	un->un_f_geometry_is_valid = TRUE;
5633 
5634 	/*
5635 	 * At this point, un->un_blockcount should contain valid data from
5636 	 * the READ CAPACITY command.
5637 	 */
5638 	if (un->un_f_blockcount_is_valid != TRUE) {
5639 		/*
5640 		 * We have a situation where the target didn't give us a good
5641 		 * READ CAPACITY value, yet there appears to be a valid label.
5642 		 * In this case, we'll fake the capacity.
5643 		 */
5644 		un->un_blockcount = capacity;
5645 		un->un_f_blockcount_is_valid = TRUE;
5646 		goto done;
5647 	}
5648 
5649 
5650 	if ((capacity <= un->un_blockcount) ||
5651 	    (un->un_state != SD_STATE_NORMAL)) {
5652 #if defined(_SUNOS_VTOC_8)
5653 		/*
5654 		 * We can't let this happen on drives that are subdivided
5655 		 * into logical disks (i.e., that have an fdisk table).
5656 		 * The un_blockcount field should always hold the full media
5657 		 * size in sectors, period.  This code would overwrite
5658 		 * un_blockcount with the size of the Solaris fdisk partition.
5659 		 */
5660 		SD_ERROR(SD_LOG_COMMON, un,
5661 		    "sd_uselabel: Label %d blocks; Drive %d blocks\n",
5662 		    capacity, un->un_blockcount);
5663 		un->un_blockcount = capacity;
5664 		un->un_f_blockcount_is_valid = TRUE;
5665 #endif	/* defined(_SUNOS_VTOC_8) */
5666 		goto done;
5667 	}
5668 
5669 	if (ISCD(un)) {
5670 		/* For CDROMs, we trust that the data in the label is OK. */
5671 #if defined(_SUNOS_VTOC_8)
5672 		for (i = 0; i < NDKMAP; i++) {
5673 			part_end = labp->dkl_nhead * labp->dkl_nsect *
5674 			    labp->dkl_map[i].dkl_cylno +
5675 			    labp->dkl_map[i].dkl_nblk  - 1;
5676 
5677 			if ((labp->dkl_map[i].dkl_nblk) &&
5678 			    (part_end > un->un_blockcount)) {
5679 				un->un_f_geometry_is_valid = FALSE;
5680 				break;
5681 			}
5682 		}
5683 #endif
5684 #if defined(_SUNOS_VTOC_16)
5685 		vpartp = &(labp->dkl_vtoc.v_part[0]);
5686 		for (i = 0; i < NDKMAP; i++, vpartp++) {
5687 			part_end = vpartp->p_start + vpartp->p_size;
5688 			if ((vpartp->p_size > 0) &&
5689 			    (part_end > un->un_blockcount)) {
5690 				un->un_f_geometry_is_valid = FALSE;
5691 				break;
5692 			}
5693 		}
5694 #endif
5695 	} else {
5696 		uint64_t t_capacity;
5697 		uint32_t t_lbasize;
5698 
5699 		mutex_exit(SD_MUTEX(un));
5700 		err = sd_send_scsi_READ_CAPACITY(un, &t_capacity, &t_lbasize,
5701 		    path_flag);
5702 		ASSERT(t_capacity <= DK_MAX_BLOCKS);
5703 		mutex_enter(SD_MUTEX(un));
5704 
5705 		if (err == 0) {
5706 			sd_update_block_info(un, t_lbasize, t_capacity);
5707 		}
5708 
5709 		if (capacity > un->un_blockcount) {
5710 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
5711 			    "Corrupt label - bad geometry\n");
5712 			scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
5713 			    "Label says %u blocks; Drive says %llu blocks\n",
5714 			    capacity, (unsigned long long)un->un_blockcount);
5715 			un->un_f_geometry_is_valid = FALSE;
5716 			label_error = SD_LABEL_IS_INVALID;
5717 		}
5718 	}
5719 
5720 done:
5721 
5722 	SD_INFO(SD_LOG_COMMON, un, "sd_uselabel: (label geometry)\n");
5723 	SD_INFO(SD_LOG_COMMON, un,
5724 	    "   ncyl: %d; acyl: %d; nhead: %d; nsect: %d\n",
5725 	    un->un_g.dkg_ncyl,  un->un_g.dkg_acyl,
5726 	    un->un_g.dkg_nhead, un->un_g.dkg_nsect);
5727 	SD_INFO(SD_LOG_COMMON, un,
5728 	    "   lbasize: %d; capacity: %d; intrlv: %d; rpm: %d\n",
5729 	    un->un_tgt_blocksize, un->un_blockcount,
5730 	    un->un_g.dkg_intrlv, un->un_g.dkg_rpm);
5731 	SD_INFO(SD_LOG_COMMON, un, "   wrt_reinstr: %d; rd_reinstr: %d\n",
5732 	    un->un_g.dkg_write_reinstruct, un->un_g.dkg_read_reinstruct);
5733 
5734 	ASSERT(mutex_owned(SD_MUTEX(un)));
5735 
5736 	return (label_error);
5737 }
5738 
5739 
5740 /*
5741  *    Function: sd_build_default_label
5742  *
5743  * Description: Generate a default label for those devices that do not have
5744  *		one, e.g., new media, removable cartridges, etc..
5745  *
5746  *     Context: Kernel thread only
5747  */
5748 
5749 static void
5750 sd_build_default_label(struct sd_lun *un)
5751 {
5752 #if defined(_SUNOS_VTOC_16)
5753 	uint_t	phys_spc;
5754 	uint_t	disksize;
5755 	struct	dk_geom un_g;
5756 	uint64_t capacity;
5757 #endif
5758 
5759 	ASSERT(un != NULL);
5760 	ASSERT(mutex_owned(SD_MUTEX(un)));
5761 
5762 #if defined(_SUNOS_VTOC_8)
5763 	/*
5764 	 * Note: This is a legacy check for non-removable devices on VTOC_8
5765 	 * only. This may be a valid check for VTOC_16 as well.
5766 	 * Once we understand why there is this difference between SPARC and
5767 	 * x86 platform, we could remove this legacy check.
5768 	 */
5769 	ASSERT(un->un_f_default_vtoc_supported);
5770 #endif
5771 
5772 	bzero(&un->un_g, sizeof (struct dk_geom));
5773 	bzero(&un->un_vtoc, sizeof (struct dk_vtoc));
5774 	bzero(&un->un_map, NDKMAP * (sizeof (struct dk_map)));
5775 
5776 #if defined(_SUNOS_VTOC_8)
5777 
5778 	/*
5779 	 * It's a REMOVABLE media, therefore no label (on sparc, anyway).
5780 	 * But it is still necessary to set up various geometry information,
5781 	 * and we are doing this here.
5782 	 */
5783 
5784 	/*
5785 	 * For the rpm, we use the minimum for the disk.  For the head, cyl,
5786 	 * and number of sector per track, if the capacity <= 1GB, head = 64,
5787 	 * sect = 32.  else head = 255, sect 63 Note: the capacity should be
5788 	 * equal to C*H*S values.  This will cause some truncation of size due
5789 	 * to round off errors. For CD-ROMs, this truncation can have adverse
5790 	 * side effects, so returning ncyl and nhead as 1. The nsect will
5791 	 * overflow for most of CD-ROMs as nsect is of type ushort. (4190569)
5792 	 */
5793 	if (ISCD(un)) {
5794 		/*
5795 		 * Preserve the old behavior for non-writable
5796 		 * medias. Since dkg_nsect is a ushort, it
5797 		 * will lose bits as cdroms have more than
5798 		 * 65536 sectors. So if we recalculate
5799 		 * capacity, it will become much shorter.
5800 		 * But the dkg_* information is not
5801 		 * used for CDROMs so it is OK. But for
5802 		 * Writable CDs we need this information
5803 		 * to be valid (for newfs say). So we
5804 		 * make nsect and nhead > 1 that way
5805 		 * nsect can still stay within ushort limit
5806 		 * without losing any bits.
5807 		 */
5808 		if (un->un_f_mmc_writable_media == TRUE) {
5809 			un->un_g.dkg_nhead = 64;
5810 			un->un_g.dkg_nsect = 32;
5811 			un->un_g.dkg_ncyl = un->un_blockcount / (64 * 32);
5812 			un->un_blockcount = un->un_g.dkg_ncyl *
5813 			    un->un_g.dkg_nhead * un->un_g.dkg_nsect;
5814 		} else {
5815 			un->un_g.dkg_ncyl  = 1;
5816 			un->un_g.dkg_nhead = 1;
5817 			un->un_g.dkg_nsect = un->un_blockcount;
5818 		}
5819 	} else {
5820 		if (un->un_blockcount <= 0x1000) {
5821 			/* unlabeled SCSI floppy device */
5822 			un->un_g.dkg_nhead = 2;
5823 			un->un_g.dkg_ncyl = 80;
5824 			un->un_g.dkg_nsect = un->un_blockcount / (2 * 80);
5825 		} else if (un->un_blockcount <= 0x200000) {
5826 			un->un_g.dkg_nhead = 64;
5827 			un->un_g.dkg_nsect = 32;
5828 			un->un_g.dkg_ncyl  = un->un_blockcount / (64 * 32);
5829 		} else {
5830 			un->un_g.dkg_nhead = 255;
5831 			un->un_g.dkg_nsect = 63;
5832 			un->un_g.dkg_ncyl  = un->un_blockcount / (255 * 63);
5833 		}
5834 		un->un_blockcount =
5835 		    un->un_g.dkg_ncyl * un->un_g.dkg_nhead * un->un_g.dkg_nsect;
5836 	}
5837 
5838 	un->un_g.dkg_acyl	= 0;
5839 	un->un_g.dkg_bcyl	= 0;
5840 	un->un_g.dkg_rpm	= 200;
5841 	un->un_asciilabel[0]	= '\0';
5842 	un->un_g.dkg_pcyl	= un->un_g.dkg_ncyl;
5843 
5844 	un->un_map[0].dkl_cylno = 0;
5845 	un->un_map[0].dkl_nblk  = un->un_blockcount;
5846 	un->un_map[2].dkl_cylno = 0;
5847 	un->un_map[2].dkl_nblk  = un->un_blockcount;
5848 
5849 #elif defined(_SUNOS_VTOC_16)
5850 
5851 	if (un->un_solaris_size == 0) {
5852 		/*
5853 		 * Got fdisk table but no solaris entry therefore
5854 		 * don't create a default label
5855 		 */
5856 		un->un_f_geometry_is_valid = TRUE;
5857 		return;
5858 	}
5859 
5860 	/*
5861 	 * For CDs we continue to use the physical geometry to calculate
5862 	 * number of cylinders. All other devices must convert the
5863 	 * physical geometry (geom_cache) to values that will fit
5864 	 * in a dk_geom structure.
5865 	 */
5866 	if (ISCD(un)) {
5867 		phys_spc = un->un_pgeom.g_nhead * un->un_pgeom.g_nsect;
5868 	} else {
5869 		/* Convert physical geometry to disk geometry */
5870 		bzero(&un_g, sizeof (struct dk_geom));
5871 
5872 		/*
5873 		 * Refer to comments related to off-by-1 at the
5874 		 * header of this file.
5875 		 * Before caculating geometry, capacity should be
5876 		 * decreased by 1. That un_f_capacity_adjusted is
5877 		 * TRUE means that we are treating a 1TB disk as
5878 		 * (1T - 512)B. And the capacity of disks is already
5879 		 * decreased by 1.
5880 		 */
5881 		if (!un->un_f_capacity_adjusted &&
5882 		    !un->un_f_has_removable_media &&
5883 		    !un->un_f_is_hotpluggable &&
5884 			un->un_tgt_blocksize == un->un_sys_blocksize)
5885 			capacity = un->un_blockcount - 1;
5886 		else
5887 			capacity = un->un_blockcount;
5888 
5889 		sd_convert_geometry(capacity, &un_g);
5890 		bcopy(&un_g, &un->un_g, sizeof (un->un_g));
5891 		phys_spc = un->un_g.dkg_nhead * un->un_g.dkg_nsect;
5892 	}
5893 
5894 	ASSERT(phys_spc != 0);
5895 	un->un_g.dkg_pcyl = un->un_solaris_size / phys_spc;
5896 	un->un_g.dkg_acyl = DK_ACYL;
5897 	un->un_g.dkg_ncyl = un->un_g.dkg_pcyl - DK_ACYL;
5898 	disksize = un->un_g.dkg_ncyl * phys_spc;
5899 
5900 	if (ISCD(un)) {
5901 		/*
5902 		 * CD's don't use the "heads * sectors * cyls"-type of
5903 		 * geometry, but instead use the entire capacity of the media.
5904 		 */
5905 		disksize = un->un_solaris_size;
5906 		un->un_g.dkg_nhead = 1;
5907 		un->un_g.dkg_nsect = 1;
5908 		un->un_g.dkg_rpm =
5909 		    (un->un_pgeom.g_rpm == 0) ? 200 : un->un_pgeom.g_rpm;
5910 
5911 		un->un_vtoc.v_part[0].p_start = 0;
5912 		un->un_vtoc.v_part[0].p_size  = disksize;
5913 		un->un_vtoc.v_part[0].p_tag   = V_BACKUP;
5914 		un->un_vtoc.v_part[0].p_flag  = V_UNMNT;
5915 
5916 		un->un_map[0].dkl_cylno = 0;
5917 		un->un_map[0].dkl_nblk  = disksize;
5918 		un->un_offset[0] = 0;
5919 
5920 	} else {
5921 		/*
5922 		 * Hard disks and removable media cartridges
5923 		 */
5924 		un->un_g.dkg_rpm =
5925 		    (un->un_pgeom.g_rpm == 0) ? 3600: un->un_pgeom.g_rpm;
5926 		un->un_vtoc.v_sectorsz = un->un_sys_blocksize;
5927 
5928 		/* Add boot slice */
5929 		un->un_vtoc.v_part[8].p_start = 0;
5930 		un->un_vtoc.v_part[8].p_size  = phys_spc;
5931 		un->un_vtoc.v_part[8].p_tag   = V_BOOT;
5932 		un->un_vtoc.v_part[8].p_flag  = V_UNMNT;
5933 
5934 		un->un_map[8].dkl_cylno = 0;
5935 		un->un_map[8].dkl_nblk  = phys_spc;
5936 		un->un_offset[8] = 0;
5937 	}
5938 
5939 	un->un_g.dkg_apc = 0;
5940 	un->un_vtoc.v_nparts = V_NUMPAR;
5941 	un->un_vtoc.v_version = V_VERSION;
5942 
5943 	/* Add backup slice */
5944 	un->un_vtoc.v_part[2].p_start = 0;
5945 	un->un_vtoc.v_part[2].p_size  = disksize;
5946 	un->un_vtoc.v_part[2].p_tag   = V_BACKUP;
5947 	un->un_vtoc.v_part[2].p_flag  = V_UNMNT;
5948 
5949 	un->un_map[2].dkl_cylno = 0;
5950 	un->un_map[2].dkl_nblk  = disksize;
5951 	un->un_offset[2] = 0;
5952 
5953 	(void) sprintf(un->un_vtoc.v_asciilabel, "DEFAULT cyl %d alt %d"
5954 	    " hd %d sec %d", un->un_g.dkg_ncyl, un->un_g.dkg_acyl,
5955 	    un->un_g.dkg_nhead, un->un_g.dkg_nsect);
5956 
5957 #else
5958 #error "No VTOC format defined."
5959 #endif
5960 
5961 	un->un_g.dkg_read_reinstruct  = 0;
5962 	un->un_g.dkg_write_reinstruct = 0;
5963 
5964 	un->un_g.dkg_intrlv = 1;
5965 
5966 	un->un_vtoc.v_sanity  = VTOC_SANE;
5967 
5968 	un->un_f_geometry_is_valid = TRUE;
5969 
5970 	SD_INFO(SD_LOG_COMMON, un,
5971 	    "sd_build_default_label: Default label created: "
5972 	    "cyl: %d\tacyl: %d\tnhead: %d\tnsect: %d\tcap: %d\n",
5973 	    un->un_g.dkg_ncyl, un->un_g.dkg_acyl, un->un_g.dkg_nhead,
5974 	    un->un_g.dkg_nsect, un->un_blockcount);
5975 }
5976 
5977 
5978 #if defined(_FIRMWARE_NEEDS_FDISK)
5979 /*
5980  * Max CHS values, as they are encoded into bytes, for 1022/254/63
5981  */
5982 #define	LBA_MAX_SECT	(63 | ((1022 & 0x300) >> 2))
5983 #define	LBA_MAX_CYL	(1022 & 0xFF)
5984 #define	LBA_MAX_HEAD	(254)
5985 
5986 
5987 /*
5988  *    Function: sd_has_max_chs_vals
5989  *
5990  * Description: Return TRUE if Cylinder-Head-Sector values are all at maximum.
5991  *
5992  *   Arguments: fdp - ptr to CHS info
5993  *
5994  * Return Code: True or false
5995  *
5996  *     Context: Any.
5997  */
5998 
5999 static int
6000 sd_has_max_chs_vals(struct ipart *fdp)
6001 {
6002 	return ((fdp->begcyl  == LBA_MAX_CYL)	&&
6003 	    (fdp->beghead == LBA_MAX_HEAD)	&&
6004 	    (fdp->begsect == LBA_MAX_SECT)	&&
6005 	    (fdp->endcyl  == LBA_MAX_CYL)	&&
6006 	    (fdp->endhead == LBA_MAX_HEAD)	&&
6007 	    (fdp->endsect == LBA_MAX_SECT));
6008 }
6009 #endif
6010 
6011 
6012 /*
6013  *    Function: sd_inq_fill
6014  *
6015  * Description: Print a piece of inquiry data, cleaned up for non-printable
6016  *		characters and stopping at the first space character after
6017  *		the beginning of the passed string;
6018  *
6019  *   Arguments: p - source string
6020  *		l - maximum length to copy
6021  *		s - destination string
6022  *
6023  *     Context: Any.
6024  */
6025 
6026 static void
6027 sd_inq_fill(char *p, int l, char *s)
6028 {
6029 	unsigned i = 0;
6030 	char c;
6031 
6032 	while (i++ < l) {
6033 		if ((c = *p++) < ' ' || c >= 0x7F) {
6034 			c = '*';
6035 		} else if (i != 1 && c == ' ') {
6036 			break;
6037 		}
6038 		*s++ = c;
6039 	}
6040 	*s++ = 0;
6041 }
6042 
6043 
6044 /*
6045  *    Function: sd_register_devid
6046  *
6047  * Description: This routine will obtain the device id information from the
6048  *		target, obtain the serial number, and register the device
6049  *		id with the ddi framework.
6050  *
6051  *   Arguments: devi - the system's dev_info_t for the device.
6052  *		un - driver soft state (unit) structure
6053  *		reservation_flag - indicates if a reservation conflict
6054  *		occurred during attach
6055  *
6056  *     Context: Kernel Thread
6057  */
6058 static void
6059 sd_register_devid(struct sd_lun *un, dev_info_t *devi, int reservation_flag)
6060 {
6061 	int		rval		= 0;
6062 	uchar_t		*inq80		= NULL;
6063 	size_t		inq80_len	= MAX_INQUIRY_SIZE;
6064 	size_t		inq80_resid	= 0;
6065 	uchar_t		*inq83		= NULL;
6066 	size_t		inq83_len	= MAX_INQUIRY_SIZE;
6067 	size_t		inq83_resid	= 0;
6068 
6069 	ASSERT(un != NULL);
6070 	ASSERT(mutex_owned(SD_MUTEX(un)));
6071 	ASSERT((SD_DEVINFO(un)) == devi);
6072 
6073 	/*
6074 	 * This is the case of antiquated Sun disk drives that have the
6075 	 * FAB_DEVID property set in the disk_table.  These drives
6076 	 * manage the devid's by storing them in last 2 available sectors
6077 	 * on the drive and have them fabricated by the ddi layer by calling
6078 	 * ddi_devid_init and passing the DEVID_FAB flag.
6079 	 */
6080 	if (un->un_f_opt_fab_devid == TRUE) {
6081 		/*
6082 		 * Depending on EINVAL isn't reliable, since a reserved disk
6083 		 * may result in invalid geometry, so check to make sure a
6084 		 * reservation conflict did not occur during attach.
6085 		 */
6086 		if ((sd_get_devid(un) == EINVAL) &&
6087 		    (reservation_flag != SD_TARGET_IS_RESERVED)) {
6088 			/*
6089 			 * The devid is invalid AND there is no reservation
6090 			 * conflict.  Fabricate a new devid.
6091 			 */
6092 			(void) sd_create_devid(un);
6093 		}
6094 
6095 		/* Register the devid if it exists */
6096 		if (un->un_devid != NULL) {
6097 			(void) ddi_devid_register(SD_DEVINFO(un),
6098 			    un->un_devid);
6099 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
6100 			    "sd_register_devid: Devid Fabricated\n");
6101 		}
6102 		return;
6103 	}
6104 
6105 	/*
6106 	 * We check the availibility of the World Wide Name (0x83) and Unit
6107 	 * Serial Number (0x80) pages in sd_check_vpd_page_support(), and using
6108 	 * un_vpd_page_mask from them, we decide which way to get the WWN.  If
6109 	 * 0x83 is availible, that is the best choice.  Our next choice is
6110 	 * 0x80.  If neither are availible, we munge the devid from the device
6111 	 * vid/pid/serial # for Sun qualified disks, or use the ddi framework
6112 	 * to fabricate a devid for non-Sun qualified disks.
6113 	 */
6114 	if (sd_check_vpd_page_support(un) == 0) {
6115 		/* collect page 80 data if available */
6116 		if (un->un_vpd_page_mask & SD_VPD_UNIT_SERIAL_PG) {
6117 
6118 			mutex_exit(SD_MUTEX(un));
6119 			inq80 = kmem_zalloc(inq80_len, KM_SLEEP);
6120 			rval = sd_send_scsi_INQUIRY(un, inq80, inq80_len,
6121 			    0x01, 0x80, &inq80_resid);
6122 
6123 			if (rval != 0) {
6124 				kmem_free(inq80, inq80_len);
6125 				inq80 = NULL;
6126 				inq80_len = 0;
6127 			}
6128 			mutex_enter(SD_MUTEX(un));
6129 		}
6130 
6131 		/* collect page 83 data if available */
6132 		if (un->un_vpd_page_mask & SD_VPD_DEVID_WWN_PG) {
6133 			mutex_exit(SD_MUTEX(un));
6134 			inq83 = kmem_zalloc(inq83_len, KM_SLEEP);
6135 			rval = sd_send_scsi_INQUIRY(un, inq83, inq83_len,
6136 			    0x01, 0x83, &inq83_resid);
6137 
6138 			if (rval != 0) {
6139 				kmem_free(inq83, inq83_len);
6140 				inq83 = NULL;
6141 				inq83_len = 0;
6142 			}
6143 			mutex_enter(SD_MUTEX(un));
6144 		}
6145 	}
6146 
6147 	/* encode best devid possible based on data available */
6148 	if (ddi_devid_scsi_encode(DEVID_SCSI_ENCODE_VERSION_LATEST,
6149 	    (char *)ddi_driver_name(SD_DEVINFO(un)),
6150 	    (uchar_t *)SD_INQUIRY(un), sizeof (*SD_INQUIRY(un)),
6151 	    inq80, inq80_len - inq80_resid, inq83, inq83_len -
6152 	    inq83_resid, &un->un_devid) == DDI_SUCCESS) {
6153 
6154 		/* devid successfully encoded, register devid */
6155 		(void) ddi_devid_register(SD_DEVINFO(un), un->un_devid);
6156 
6157 	} else {
6158 		/*
6159 		 * Unable to encode a devid based on data available.
6160 		 * This is not a Sun qualified disk.  Older Sun disk
6161 		 * drives that have the SD_FAB_DEVID property
6162 		 * set in the disk_table and non Sun qualified
6163 		 * disks are treated in the same manner.  These
6164 		 * drives manage the devid's by storing them in
6165 		 * last 2 available sectors on the drive and
6166 		 * have them fabricated by the ddi layer by
6167 		 * calling ddi_devid_init and passing the
6168 		 * DEVID_FAB flag.
6169 		 * Create a fabricate devid only if there's no
6170 		 * fabricate devid existed.
6171 		 */
6172 		if (sd_get_devid(un) == EINVAL) {
6173 			(void) sd_create_devid(un);
6174 			un->un_f_opt_fab_devid = TRUE;
6175 		}
6176 
6177 		/* Register the devid if it exists */
6178 		if (un->un_devid != NULL) {
6179 			(void) ddi_devid_register(SD_DEVINFO(un),
6180 			    un->un_devid);
6181 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
6182 			    "sd_register_devid: devid fabricated using "
6183 			    "ddi framework\n");
6184 		}
6185 	}
6186 
6187 	/* clean up resources */
6188 	if (inq80 != NULL) {
6189 		kmem_free(inq80, inq80_len);
6190 	}
6191 	if (inq83 != NULL) {
6192 		kmem_free(inq83, inq83_len);
6193 	}
6194 }
6195 
6196 static daddr_t
6197 sd_get_devid_block(struct sd_lun *un)
6198 {
6199 	daddr_t			spc, blk, head, cyl;
6200 
6201 	if (un->un_blockcount <= DK_MAX_BLOCKS) {
6202 		/* this geometry doesn't allow us to write a devid */
6203 		if (un->un_g.dkg_acyl < 2) {
6204 			return (-1);
6205 		}
6206 
6207 		/*
6208 		 * Subtract 2 guarantees that the next to last cylinder
6209 		 * is used
6210 		 */
6211 		cyl  = un->un_g.dkg_ncyl  + un->un_g.dkg_acyl - 2;
6212 		spc  = un->un_g.dkg_nhead * un->un_g.dkg_nsect;
6213 		head = un->un_g.dkg_nhead - 1;
6214 		blk  = (cyl * (spc - un->un_g.dkg_apc)) +
6215 		    (head * un->un_g.dkg_nsect) + 1;
6216 	} else {
6217 		if (un->un_reserved != -1) {
6218 			blk = un->un_map[un->un_reserved].dkl_cylno + 1;
6219 		} else {
6220 			return (-1);
6221 		}
6222 	}
6223 	return (blk);
6224 }
6225 
6226 /*
6227  *    Function: sd_get_devid
6228  *
6229  * Description: This routine will return 0 if a valid device id has been
6230  *		obtained from the target and stored in the soft state. If a
6231  *		valid device id has not been previously read and stored, a
6232  *		read attempt will be made.
6233  *
6234  *   Arguments: un - driver soft state (unit) structure
6235  *
6236  * Return Code: 0 if we successfully get the device id
6237  *
6238  *     Context: Kernel Thread
6239  */
6240 
6241 static int
6242 sd_get_devid(struct sd_lun *un)
6243 {
6244 	struct dk_devid		*dkdevid;
6245 	ddi_devid_t		tmpid;
6246 	uint_t			*ip;
6247 	size_t			sz;
6248 	daddr_t			blk;
6249 	int			status;
6250 	int			chksum;
6251 	int			i;
6252 	size_t			buffer_size;
6253 
6254 	ASSERT(un != NULL);
6255 	ASSERT(mutex_owned(SD_MUTEX(un)));
6256 
6257 	SD_TRACE(SD_LOG_ATTACH_DETACH, un, "sd_get_devid: entry: un: 0x%p\n",
6258 	    un);
6259 
6260 	if (un->un_devid != NULL) {
6261 		return (0);
6262 	}
6263 
6264 	blk = sd_get_devid_block(un);
6265 	if (blk < 0)
6266 		return (EINVAL);
6267 
6268 	/*
6269 	 * Read and verify device id, stored in the reserved cylinders at the
6270 	 * end of the disk. Backup label is on the odd sectors of the last
6271 	 * track of the last cylinder. Device id will be on track of the next
6272 	 * to last cylinder.
6273 	 */
6274 	buffer_size = SD_REQBYTES2TGTBYTES(un, sizeof (struct dk_devid));
6275 	mutex_exit(SD_MUTEX(un));
6276 	dkdevid = kmem_alloc(buffer_size, KM_SLEEP);
6277 	status = sd_send_scsi_READ(un, dkdevid, buffer_size, blk,
6278 	    SD_PATH_DIRECT);
6279 	if (status != 0) {
6280 		goto error;
6281 	}
6282 
6283 	/* Validate the revision */
6284 	if ((dkdevid->dkd_rev_hi != DK_DEVID_REV_MSB) ||
6285 	    (dkdevid->dkd_rev_lo != DK_DEVID_REV_LSB)) {
6286 		status = EINVAL;
6287 		goto error;
6288 	}
6289 
6290 	/* Calculate the checksum */
6291 	chksum = 0;
6292 	ip = (uint_t *)dkdevid;
6293 	for (i = 0; i < ((un->un_sys_blocksize - sizeof (int))/sizeof (int));
6294 	    i++) {
6295 		chksum ^= ip[i];
6296 	}
6297 
6298 	/* Compare the checksums */
6299 	if (DKD_GETCHKSUM(dkdevid) != chksum) {
6300 		status = EINVAL;
6301 		goto error;
6302 	}
6303 
6304 	/* Validate the device id */
6305 	if (ddi_devid_valid((ddi_devid_t)&dkdevid->dkd_devid) != DDI_SUCCESS) {
6306 		status = EINVAL;
6307 		goto error;
6308 	}
6309 
6310 	/*
6311 	 * Store the device id in the driver soft state
6312 	 */
6313 	sz = ddi_devid_sizeof((ddi_devid_t)&dkdevid->dkd_devid);
6314 	tmpid = kmem_alloc(sz, KM_SLEEP);
6315 
6316 	mutex_enter(SD_MUTEX(un));
6317 
6318 	un->un_devid = tmpid;
6319 	bcopy(&dkdevid->dkd_devid, un->un_devid, sz);
6320 
6321 	kmem_free(dkdevid, buffer_size);
6322 
6323 	SD_TRACE(SD_LOG_ATTACH_DETACH, un, "sd_get_devid: exit: un:0x%p\n", un);
6324 
6325 	return (status);
6326 error:
6327 	mutex_enter(SD_MUTEX(un));
6328 	kmem_free(dkdevid, buffer_size);
6329 	return (status);
6330 }
6331 
6332 
6333 /*
6334  *    Function: sd_create_devid
6335  *
6336  * Description: This routine will fabricate the device id and write it
6337  *		to the disk.
6338  *
6339  *   Arguments: un - driver soft state (unit) structure
6340  *
6341  * Return Code: value of the fabricated device id
6342  *
6343  *     Context: Kernel Thread
6344  */
6345 
6346 static ddi_devid_t
6347 sd_create_devid(struct sd_lun *un)
6348 {
6349 	ASSERT(un != NULL);
6350 
6351 	/* Fabricate the devid */
6352 	if (ddi_devid_init(SD_DEVINFO(un), DEVID_FAB, 0, NULL, &un->un_devid)
6353 	    == DDI_FAILURE) {
6354 		return (NULL);
6355 	}
6356 
6357 	/* Write the devid to disk */
6358 	if (sd_write_deviceid(un) != 0) {
6359 		ddi_devid_free(un->un_devid);
6360 		un->un_devid = NULL;
6361 	}
6362 
6363 	return (un->un_devid);
6364 }
6365 
6366 
6367 /*
6368  *    Function: sd_write_deviceid
6369  *
6370  * Description: This routine will write the device id to the disk
6371  *		reserved sector.
6372  *
6373  *   Arguments: un - driver soft state (unit) structure
6374  *
6375  * Return Code: EINVAL
6376  *		value returned by sd_send_scsi_cmd
6377  *
6378  *     Context: Kernel Thread
6379  */
6380 
6381 static int
6382 sd_write_deviceid(struct sd_lun *un)
6383 {
6384 	struct dk_devid		*dkdevid;
6385 	daddr_t			blk;
6386 	uint_t			*ip, chksum;
6387 	int			status;
6388 	int			i;
6389 
6390 	ASSERT(mutex_owned(SD_MUTEX(un)));
6391 
6392 	blk = sd_get_devid_block(un);
6393 	if (blk < 0)
6394 		return (-1);
6395 	mutex_exit(SD_MUTEX(un));
6396 
6397 	/* Allocate the buffer */
6398 	dkdevid = kmem_zalloc(un->un_sys_blocksize, KM_SLEEP);
6399 
6400 	/* Fill in the revision */
6401 	dkdevid->dkd_rev_hi = DK_DEVID_REV_MSB;
6402 	dkdevid->dkd_rev_lo = DK_DEVID_REV_LSB;
6403 
6404 	/* Copy in the device id */
6405 	mutex_enter(SD_MUTEX(un));
6406 	bcopy(un->un_devid, &dkdevid->dkd_devid,
6407 	    ddi_devid_sizeof(un->un_devid));
6408 	mutex_exit(SD_MUTEX(un));
6409 
6410 	/* Calculate the checksum */
6411 	chksum = 0;
6412 	ip = (uint_t *)dkdevid;
6413 	for (i = 0; i < ((un->un_sys_blocksize - sizeof (int))/sizeof (int));
6414 	    i++) {
6415 		chksum ^= ip[i];
6416 	}
6417 
6418 	/* Fill-in checksum */
6419 	DKD_FORMCHKSUM(chksum, dkdevid);
6420 
6421 	/* Write the reserved sector */
6422 	status = sd_send_scsi_WRITE(un, dkdevid, un->un_sys_blocksize, blk,
6423 	    SD_PATH_DIRECT);
6424 
6425 	kmem_free(dkdevid, un->un_sys_blocksize);
6426 
6427 	mutex_enter(SD_MUTEX(un));
6428 	return (status);
6429 }
6430 
6431 
6432 /*
6433  *    Function: sd_check_vpd_page_support
6434  *
6435  * Description: This routine sends an inquiry command with the EVPD bit set and
6436  *		a page code of 0x00 to the device. It is used to determine which
6437  *		vital product pages are availible to find the devid. We are
6438  *		looking for pages 0x83 or 0x80.  If we return a negative 1, the
6439  *		device does not support that command.
6440  *
6441  *   Arguments: un  - driver soft state (unit) structure
6442  *
6443  * Return Code: 0 - success
6444  *		1 - check condition
6445  *
6446  *     Context: This routine can sleep.
6447  */
6448 
6449 static int
6450 sd_check_vpd_page_support(struct sd_lun *un)
6451 {
6452 	uchar_t	*page_list	= NULL;
6453 	uchar_t	page_length	= 0xff;	/* Use max possible length */
6454 	uchar_t	evpd		= 0x01;	/* Set the EVPD bit */
6455 	uchar_t	page_code	= 0x00;	/* Supported VPD Pages */
6456 	int    	rval		= 0;
6457 	int	counter;
6458 
6459 	ASSERT(un != NULL);
6460 	ASSERT(mutex_owned(SD_MUTEX(un)));
6461 
6462 	mutex_exit(SD_MUTEX(un));
6463 
6464 	/*
6465 	 * We'll set the page length to the maximum to save figuring it out
6466 	 * with an additional call.
6467 	 */
6468 	page_list =  kmem_zalloc(page_length, KM_SLEEP);
6469 
6470 	rval = sd_send_scsi_INQUIRY(un, page_list, page_length, evpd,
6471 	    page_code, NULL);
6472 
6473 	mutex_enter(SD_MUTEX(un));
6474 
6475 	/*
6476 	 * Now we must validate that the device accepted the command, as some
6477 	 * drives do not support it.  If the drive does support it, we will
6478 	 * return 0, and the supported pages will be in un_vpd_page_mask.  If
6479 	 * not, we return -1.
6480 	 */
6481 	if ((rval == 0) && (page_list[VPD_MODE_PAGE] == 0x00)) {
6482 		/* Loop to find one of the 2 pages we need */
6483 		counter = 4;  /* Supported pages start at byte 4, with 0x00 */
6484 
6485 		/*
6486 		 * Pages are returned in ascending order, and 0x83 is what we
6487 		 * are hoping for.
6488 		 */
6489 		while ((page_list[counter] <= 0x83) &&
6490 		    (counter <= (page_list[VPD_PAGE_LENGTH] +
6491 		    VPD_HEAD_OFFSET))) {
6492 			/*
6493 			 * Add 3 because page_list[3] is the number of
6494 			 * pages minus 3
6495 			 */
6496 
6497 			switch (page_list[counter]) {
6498 			case 0x00:
6499 				un->un_vpd_page_mask |= SD_VPD_SUPPORTED_PG;
6500 				break;
6501 			case 0x80:
6502 				un->un_vpd_page_mask |= SD_VPD_UNIT_SERIAL_PG;
6503 				break;
6504 			case 0x81:
6505 				un->un_vpd_page_mask |= SD_VPD_OPERATING_PG;
6506 				break;
6507 			case 0x82:
6508 				un->un_vpd_page_mask |= SD_VPD_ASCII_OP_PG;
6509 				break;
6510 			case 0x83:
6511 				un->un_vpd_page_mask |= SD_VPD_DEVID_WWN_PG;
6512 				break;
6513 			}
6514 			counter++;
6515 		}
6516 
6517 	} else {
6518 		rval = -1;
6519 
6520 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
6521 		    "sd_check_vpd_page_support: This drive does not implement "
6522 		    "VPD pages.\n");
6523 	}
6524 
6525 	kmem_free(page_list, page_length);
6526 
6527 	return (rval);
6528 }
6529 
6530 
6531 /*
6532  *    Function: sd_setup_pm
6533  *
6534  * Description: Initialize Power Management on the device
6535  *
6536  *     Context: Kernel Thread
6537  */
6538 
6539 static void
6540 sd_setup_pm(struct sd_lun *un, dev_info_t *devi)
6541 {
6542 	uint_t	log_page_size;
6543 	uchar_t	*log_page_data;
6544 	int	rval;
6545 
6546 	/*
6547 	 * Since we are called from attach, holding a mutex for
6548 	 * un is unnecessary. Because some of the routines called
6549 	 * from here require SD_MUTEX to not be held, assert this
6550 	 * right up front.
6551 	 */
6552 	ASSERT(!mutex_owned(SD_MUTEX(un)));
6553 	/*
6554 	 * Since the sd device does not have the 'reg' property,
6555 	 * cpr will not call its DDI_SUSPEND/DDI_RESUME entries.
6556 	 * The following code is to tell cpr that this device
6557 	 * DOES need to be suspended and resumed.
6558 	 */
6559 	(void) ddi_prop_update_string(DDI_DEV_T_NONE, devi,
6560 	    "pm-hardware-state", "needs-suspend-resume");
6561 
6562 	/*
6563 	 * This complies with the new power management framework
6564 	 * for certain desktop machines. Create the pm_components
6565 	 * property as a string array property.
6566 	 */
6567 	if (un->un_f_pm_supported) {
6568 		/*
6569 		 * not all devices have a motor, try it first.
6570 		 * some devices may return ILLEGAL REQUEST, some
6571 		 * will hang
6572 		 * The following START_STOP_UNIT is used to check if target
6573 		 * device has a motor.
6574 		 */
6575 		un->un_f_start_stop_supported = TRUE;
6576 		if (sd_send_scsi_START_STOP_UNIT(un, SD_TARGET_START,
6577 		    SD_PATH_DIRECT) != 0) {
6578 			un->un_f_start_stop_supported = FALSE;
6579 		}
6580 
6581 		/*
6582 		 * create pm properties anyways otherwise the parent can't
6583 		 * go to sleep
6584 		 */
6585 		(void) sd_create_pm_components(devi, un);
6586 		un->un_f_pm_is_enabled = TRUE;
6587 		return;
6588 	}
6589 
6590 	if (!un->un_f_log_sense_supported) {
6591 		un->un_power_level = SD_SPINDLE_ON;
6592 		un->un_f_pm_is_enabled = FALSE;
6593 		return;
6594 	}
6595 
6596 	rval = sd_log_page_supported(un, START_STOP_CYCLE_PAGE);
6597 
6598 #ifdef	SDDEBUG
6599 	if (sd_force_pm_supported) {
6600 		/* Force a successful result */
6601 		rval = 1;
6602 	}
6603 #endif
6604 
6605 	/*
6606 	 * If the start-stop cycle counter log page is not supported
6607 	 * or if the pm-capable property is SD_PM_CAPABLE_FALSE (0)
6608 	 * then we should not create the pm_components property.
6609 	 */
6610 	if (rval == -1) {
6611 		/*
6612 		 * Error.
6613 		 * Reading log sense failed, most likely this is
6614 		 * an older drive that does not support log sense.
6615 		 * If this fails auto-pm is not supported.
6616 		 */
6617 		un->un_power_level = SD_SPINDLE_ON;
6618 		un->un_f_pm_is_enabled = FALSE;
6619 
6620 	} else if (rval == 0) {
6621 		/*
6622 		 * Page not found.
6623 		 * The start stop cycle counter is implemented as page
6624 		 * START_STOP_CYCLE_PAGE_VU_PAGE (0x31) in older disks. For
6625 		 * newer disks it is implemented as START_STOP_CYCLE_PAGE (0xE).
6626 		 */
6627 		if (sd_log_page_supported(un, START_STOP_CYCLE_VU_PAGE) == 1) {
6628 			/*
6629 			 * Page found, use this one.
6630 			 */
6631 			un->un_start_stop_cycle_page = START_STOP_CYCLE_VU_PAGE;
6632 			un->un_f_pm_is_enabled = TRUE;
6633 		} else {
6634 			/*
6635 			 * Error or page not found.
6636 			 * auto-pm is not supported for this device.
6637 			 */
6638 			un->un_power_level = SD_SPINDLE_ON;
6639 			un->un_f_pm_is_enabled = FALSE;
6640 		}
6641 	} else {
6642 		/*
6643 		 * Page found, use it.
6644 		 */
6645 		un->un_start_stop_cycle_page = START_STOP_CYCLE_PAGE;
6646 		un->un_f_pm_is_enabled = TRUE;
6647 	}
6648 
6649 
6650 	if (un->un_f_pm_is_enabled == TRUE) {
6651 		log_page_size = START_STOP_CYCLE_COUNTER_PAGE_SIZE;
6652 		log_page_data = kmem_zalloc(log_page_size, KM_SLEEP);
6653 
6654 		rval = sd_send_scsi_LOG_SENSE(un, log_page_data,
6655 		    log_page_size, un->un_start_stop_cycle_page,
6656 		    0x01, 0, SD_PATH_DIRECT);
6657 #ifdef	SDDEBUG
6658 		if (sd_force_pm_supported) {
6659 			/* Force a successful result */
6660 			rval = 0;
6661 		}
6662 #endif
6663 
6664 		/*
6665 		 * If the Log sense for Page( Start/stop cycle counter page)
6666 		 * succeeds, then power managment is supported and we can
6667 		 * enable auto-pm.
6668 		 */
6669 		if (rval == 0)  {
6670 			(void) sd_create_pm_components(devi, un);
6671 		} else {
6672 			un->un_power_level = SD_SPINDLE_ON;
6673 			un->un_f_pm_is_enabled = FALSE;
6674 		}
6675 
6676 		kmem_free(log_page_data, log_page_size);
6677 	}
6678 }
6679 
6680 
6681 /*
6682  *    Function: sd_create_pm_components
6683  *
6684  * Description: Initialize PM property.
6685  *
6686  *     Context: Kernel thread context
6687  */
6688 
6689 static void
6690 sd_create_pm_components(dev_info_t *devi, struct sd_lun *un)
6691 {
6692 	char *pm_comp[] = { "NAME=spindle-motor", "0=off", "1=on", NULL };
6693 
6694 	ASSERT(!mutex_owned(SD_MUTEX(un)));
6695 
6696 	if (ddi_prop_update_string_array(DDI_DEV_T_NONE, devi,
6697 	    "pm-components", pm_comp, 3) == DDI_PROP_SUCCESS) {
6698 		/*
6699 		 * When components are initially created they are idle,
6700 		 * power up any non-removables.
6701 		 * Note: the return value of pm_raise_power can't be used
6702 		 * for determining if PM should be enabled for this device.
6703 		 * Even if you check the return values and remove this
6704 		 * property created above, the PM framework will not honor the
6705 		 * change after the first call to pm_raise_power. Hence,
6706 		 * removal of that property does not help if pm_raise_power
6707 		 * fails. In the case of removable media, the start/stop
6708 		 * will fail if the media is not present.
6709 		 */
6710 		if (un->un_f_attach_spinup && (pm_raise_power(SD_DEVINFO(un), 0,
6711 		    SD_SPINDLE_ON) == DDI_SUCCESS)) {
6712 			mutex_enter(SD_MUTEX(un));
6713 			un->un_power_level = SD_SPINDLE_ON;
6714 			mutex_enter(&un->un_pm_mutex);
6715 			/* Set to on and not busy. */
6716 			un->un_pm_count = 0;
6717 		} else {
6718 			mutex_enter(SD_MUTEX(un));
6719 			un->un_power_level = SD_SPINDLE_OFF;
6720 			mutex_enter(&un->un_pm_mutex);
6721 			/* Set to off. */
6722 			un->un_pm_count = -1;
6723 		}
6724 		mutex_exit(&un->un_pm_mutex);
6725 		mutex_exit(SD_MUTEX(un));
6726 	} else {
6727 		un->un_power_level = SD_SPINDLE_ON;
6728 		un->un_f_pm_is_enabled = FALSE;
6729 	}
6730 }
6731 
6732 
6733 /*
6734  *    Function: sd_ddi_suspend
6735  *
6736  * Description: Performs system power-down operations. This includes
6737  *		setting the drive state to indicate its suspended so
6738  *		that no new commands will be accepted. Also, wait for
6739  *		all commands that are in transport or queued to a timer
6740  *		for retry to complete. All timeout threads are cancelled.
6741  *
6742  * Return Code: DDI_FAILURE or DDI_SUCCESS
6743  *
6744  *     Context: Kernel thread context
6745  */
6746 
6747 static int
6748 sd_ddi_suspend(dev_info_t *devi)
6749 {
6750 	struct	sd_lun	*un;
6751 	clock_t		wait_cmds_complete;
6752 
6753 	un = ddi_get_soft_state(sd_state, ddi_get_instance(devi));
6754 	if (un == NULL) {
6755 		return (DDI_FAILURE);
6756 	}
6757 
6758 	SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: entry\n");
6759 
6760 	mutex_enter(SD_MUTEX(un));
6761 
6762 	/* Return success if the device is already suspended. */
6763 	if (un->un_state == SD_STATE_SUSPENDED) {
6764 		mutex_exit(SD_MUTEX(un));
6765 		SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: "
6766 		    "device already suspended, exiting\n");
6767 		return (DDI_SUCCESS);
6768 	}
6769 
6770 	/* Return failure if the device is being used by HA */
6771 	if (un->un_resvd_status &
6772 	    (SD_RESERVE | SD_WANT_RESERVE | SD_LOST_RESERVE)) {
6773 		mutex_exit(SD_MUTEX(un));
6774 		SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: "
6775 		    "device in use by HA, exiting\n");
6776 		return (DDI_FAILURE);
6777 	}
6778 
6779 	/*
6780 	 * Return failure if the device is in a resource wait
6781 	 * or power changing state.
6782 	 */
6783 	if ((un->un_state == SD_STATE_RWAIT) ||
6784 	    (un->un_state == SD_STATE_PM_CHANGING)) {
6785 		mutex_exit(SD_MUTEX(un));
6786 		SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: "
6787 		    "device in resource wait state, exiting\n");
6788 		return (DDI_FAILURE);
6789 	}
6790 
6791 
6792 	un->un_save_state = un->un_last_state;
6793 	New_state(un, SD_STATE_SUSPENDED);
6794 
6795 	/*
6796 	 * Wait for all commands that are in transport or queued to a timer
6797 	 * for retry to complete.
6798 	 *
6799 	 * While waiting, no new commands will be accepted or sent because of
6800 	 * the new state we set above.
6801 	 *
6802 	 * Wait till current operation has completed. If we are in the resource
6803 	 * wait state (with an intr outstanding) then we need to wait till the
6804 	 * intr completes and starts the next cmd. We want to wait for
6805 	 * SD_WAIT_CMDS_COMPLETE seconds before failing the DDI_SUSPEND.
6806 	 */
6807 	wait_cmds_complete = ddi_get_lbolt() +
6808 	    (sd_wait_cmds_complete * drv_usectohz(1000000));
6809 
6810 	while (un->un_ncmds_in_transport != 0) {
6811 		/*
6812 		 * Fail if commands do not finish in the specified time.
6813 		 */
6814 		if (cv_timedwait(&un->un_disk_busy_cv, SD_MUTEX(un),
6815 		    wait_cmds_complete) == -1) {
6816 			/*
6817 			 * Undo the state changes made above. Everything
6818 			 * must go back to it's original value.
6819 			 */
6820 			Restore_state(un);
6821 			un->un_last_state = un->un_save_state;
6822 			/* Wake up any threads that might be waiting. */
6823 			cv_broadcast(&un->un_suspend_cv);
6824 			mutex_exit(SD_MUTEX(un));
6825 			SD_ERROR(SD_LOG_IO_PM, un,
6826 			    "sd_ddi_suspend: failed due to outstanding cmds\n");
6827 			SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: exiting\n");
6828 			return (DDI_FAILURE);
6829 		}
6830 	}
6831 
6832 	/*
6833 	 * Cancel SCSI watch thread and timeouts, if any are active
6834 	 */
6835 
6836 	if (SD_OK_TO_SUSPEND_SCSI_WATCHER(un)) {
6837 		opaque_t temp_token = un->un_swr_token;
6838 		mutex_exit(SD_MUTEX(un));
6839 		scsi_watch_suspend(temp_token);
6840 		mutex_enter(SD_MUTEX(un));
6841 	}
6842 
6843 	if (un->un_reset_throttle_timeid != NULL) {
6844 		timeout_id_t temp_id = un->un_reset_throttle_timeid;
6845 		un->un_reset_throttle_timeid = NULL;
6846 		mutex_exit(SD_MUTEX(un));
6847 		(void) untimeout(temp_id);
6848 		mutex_enter(SD_MUTEX(un));
6849 	}
6850 
6851 	if (un->un_dcvb_timeid != NULL) {
6852 		timeout_id_t temp_id = un->un_dcvb_timeid;
6853 		un->un_dcvb_timeid = NULL;
6854 		mutex_exit(SD_MUTEX(un));
6855 		(void) untimeout(temp_id);
6856 		mutex_enter(SD_MUTEX(un));
6857 	}
6858 
6859 	mutex_enter(&un->un_pm_mutex);
6860 	if (un->un_pm_timeid != NULL) {
6861 		timeout_id_t temp_id = un->un_pm_timeid;
6862 		un->un_pm_timeid = NULL;
6863 		mutex_exit(&un->un_pm_mutex);
6864 		mutex_exit(SD_MUTEX(un));
6865 		(void) untimeout(temp_id);
6866 		mutex_enter(SD_MUTEX(un));
6867 	} else {
6868 		mutex_exit(&un->un_pm_mutex);
6869 	}
6870 
6871 	if (un->un_retry_timeid != NULL) {
6872 		timeout_id_t temp_id = un->un_retry_timeid;
6873 		un->un_retry_timeid = NULL;
6874 		mutex_exit(SD_MUTEX(un));
6875 		(void) untimeout(temp_id);
6876 		mutex_enter(SD_MUTEX(un));
6877 	}
6878 
6879 	if (un->un_direct_priority_timeid != NULL) {
6880 		timeout_id_t temp_id = un->un_direct_priority_timeid;
6881 		un->un_direct_priority_timeid = NULL;
6882 		mutex_exit(SD_MUTEX(un));
6883 		(void) untimeout(temp_id);
6884 		mutex_enter(SD_MUTEX(un));
6885 	}
6886 
6887 	if (un->un_f_is_fibre == TRUE) {
6888 		/*
6889 		 * Remove callbacks for insert and remove events
6890 		 */
6891 		if (un->un_insert_event != NULL) {
6892 			mutex_exit(SD_MUTEX(un));
6893 			(void) ddi_remove_event_handler(un->un_insert_cb_id);
6894 			mutex_enter(SD_MUTEX(un));
6895 			un->un_insert_event = NULL;
6896 		}
6897 
6898 		if (un->un_remove_event != NULL) {
6899 			mutex_exit(SD_MUTEX(un));
6900 			(void) ddi_remove_event_handler(un->un_remove_cb_id);
6901 			mutex_enter(SD_MUTEX(un));
6902 			un->un_remove_event = NULL;
6903 		}
6904 	}
6905 
6906 	mutex_exit(SD_MUTEX(un));
6907 
6908 	SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: exit\n");
6909 
6910 	return (DDI_SUCCESS);
6911 }
6912 
6913 
6914 /*
6915  *    Function: sd_ddi_pm_suspend
6916  *
6917  * Description: Set the drive state to low power.
6918  *		Someone else is required to actually change the drive
6919  *		power level.
6920  *
6921  *   Arguments: un - driver soft state (unit) structure
6922  *
6923  * Return Code: DDI_FAILURE or DDI_SUCCESS
6924  *
6925  *     Context: Kernel thread context
6926  */
6927 
6928 static int
6929 sd_ddi_pm_suspend(struct sd_lun *un)
6930 {
6931 	ASSERT(un != NULL);
6932 	SD_TRACE(SD_LOG_POWER, un, "sd_ddi_pm_suspend: entry\n");
6933 
6934 	ASSERT(!mutex_owned(SD_MUTEX(un)));
6935 	mutex_enter(SD_MUTEX(un));
6936 
6937 	/*
6938 	 * Exit if power management is not enabled for this device, or if
6939 	 * the device is being used by HA.
6940 	 */
6941 	if ((un->un_f_pm_is_enabled == FALSE) || (un->un_resvd_status &
6942 	    (SD_RESERVE | SD_WANT_RESERVE | SD_LOST_RESERVE))) {
6943 		mutex_exit(SD_MUTEX(un));
6944 		SD_TRACE(SD_LOG_POWER, un, "sd_ddi_pm_suspend: exiting\n");
6945 		return (DDI_SUCCESS);
6946 	}
6947 
6948 	SD_INFO(SD_LOG_POWER, un, "sd_ddi_pm_suspend: un_ncmds_in_driver=%ld\n",
6949 	    un->un_ncmds_in_driver);
6950 
6951 	/*
6952 	 * See if the device is not busy, ie.:
6953 	 *    - we have no commands in the driver for this device
6954 	 *    - not waiting for resources
6955 	 */
6956 	if ((un->un_ncmds_in_driver == 0) &&
6957 	    (un->un_state != SD_STATE_RWAIT)) {
6958 		/*
6959 		 * The device is not busy, so it is OK to go to low power state.
6960 		 * Indicate low power, but rely on someone else to actually
6961 		 * change it.
6962 		 */
6963 		mutex_enter(&un->un_pm_mutex);
6964 		un->un_pm_count = -1;
6965 		mutex_exit(&un->un_pm_mutex);
6966 		un->un_power_level = SD_SPINDLE_OFF;
6967 	}
6968 
6969 	mutex_exit(SD_MUTEX(un));
6970 
6971 	SD_TRACE(SD_LOG_POWER, un, "sd_ddi_pm_suspend: exit\n");
6972 
6973 	return (DDI_SUCCESS);
6974 }
6975 
6976 
6977 /*
6978  *    Function: sd_ddi_resume
6979  *
6980  * Description: Performs system power-up operations..
6981  *
6982  * Return Code: DDI_SUCCESS
6983  *		DDI_FAILURE
6984  *
6985  *     Context: Kernel thread context
6986  */
6987 
6988 static int
6989 sd_ddi_resume(dev_info_t *devi)
6990 {
6991 	struct	sd_lun	*un;
6992 
6993 	un = ddi_get_soft_state(sd_state, ddi_get_instance(devi));
6994 	if (un == NULL) {
6995 		return (DDI_FAILURE);
6996 	}
6997 
6998 	SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_resume: entry\n");
6999 
7000 	mutex_enter(SD_MUTEX(un));
7001 	Restore_state(un);
7002 
7003 	/*
7004 	 * Restore the state which was saved to give the
7005 	 * the right state in un_last_state
7006 	 */
7007 	un->un_last_state = un->un_save_state;
7008 	/*
7009 	 * Note: throttle comes back at full.
7010 	 * Also note: this MUST be done before calling pm_raise_power
7011 	 * otherwise the system can get hung in biowait. The scenario where
7012 	 * this'll happen is under cpr suspend. Writing of the system
7013 	 * state goes through sddump, which writes 0 to un_throttle. If
7014 	 * writing the system state then fails, example if the partition is
7015 	 * too small, then cpr attempts a resume. If throttle isn't restored
7016 	 * from the saved value until after calling pm_raise_power then
7017 	 * cmds sent in sdpower are not transported and sd_send_scsi_cmd hangs
7018 	 * in biowait.
7019 	 */
7020 	un->un_throttle = un->un_saved_throttle;
7021 
7022 	/*
7023 	 * The chance of failure is very rare as the only command done in power
7024 	 * entry point is START command when you transition from 0->1 or
7025 	 * unknown->1. Put it to SPINDLE ON state irrespective of the state at
7026 	 * which suspend was done. Ignore the return value as the resume should
7027 	 * not be failed. In the case of removable media the media need not be
7028 	 * inserted and hence there is a chance that raise power will fail with
7029 	 * media not present.
7030 	 */
7031 	if (un->un_f_attach_spinup) {
7032 		mutex_exit(SD_MUTEX(un));
7033 		(void) pm_raise_power(SD_DEVINFO(un), 0, SD_SPINDLE_ON);
7034 		mutex_enter(SD_MUTEX(un));
7035 	}
7036 
7037 	/*
7038 	 * Don't broadcast to the suspend cv and therefore possibly
7039 	 * start I/O until after power has been restored.
7040 	 */
7041 	cv_broadcast(&un->un_suspend_cv);
7042 	cv_broadcast(&un->un_state_cv);
7043 
7044 	/* restart thread */
7045 	if (SD_OK_TO_RESUME_SCSI_WATCHER(un)) {
7046 		scsi_watch_resume(un->un_swr_token);
7047 	}
7048 
7049 #if (defined(__fibre))
7050 	if (un->un_f_is_fibre == TRUE) {
7051 		/*
7052 		 * Add callbacks for insert and remove events
7053 		 */
7054 		if (strcmp(un->un_node_type, DDI_NT_BLOCK_CHAN)) {
7055 			sd_init_event_callbacks(un);
7056 		}
7057 	}
7058 #endif
7059 
7060 	/*
7061 	 * Transport any pending commands to the target.
7062 	 *
7063 	 * If this is a low-activity device commands in queue will have to wait
7064 	 * until new commands come in, which may take awhile. Also, we
7065 	 * specifically don't check un_ncmds_in_transport because we know that
7066 	 * there really are no commands in progress after the unit was
7067 	 * suspended and we could have reached the throttle level, been
7068 	 * suspended, and have no new commands coming in for awhile. Highly
7069 	 * unlikely, but so is the low-activity disk scenario.
7070 	 */
7071 	ddi_xbuf_dispatch(un->un_xbuf_attr);
7072 
7073 	sd_start_cmds(un, NULL);
7074 	mutex_exit(SD_MUTEX(un));
7075 
7076 	SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_resume: exit\n");
7077 
7078 	return (DDI_SUCCESS);
7079 }
7080 
7081 
7082 /*
7083  *    Function: sd_ddi_pm_resume
7084  *
7085  * Description: Set the drive state to powered on.
7086  *		Someone else is required to actually change the drive
7087  *		power level.
7088  *
7089  *   Arguments: un - driver soft state (unit) structure
7090  *
7091  * Return Code: DDI_SUCCESS
7092  *
7093  *     Context: Kernel thread context
7094  */
7095 
7096 static int
7097 sd_ddi_pm_resume(struct sd_lun *un)
7098 {
7099 	ASSERT(un != NULL);
7100 
7101 	ASSERT(!mutex_owned(SD_MUTEX(un)));
7102 	mutex_enter(SD_MUTEX(un));
7103 	un->un_power_level = SD_SPINDLE_ON;
7104 
7105 	ASSERT(!mutex_owned(&un->un_pm_mutex));
7106 	mutex_enter(&un->un_pm_mutex);
7107 	if (SD_DEVICE_IS_IN_LOW_POWER(un)) {
7108 		un->un_pm_count++;
7109 		ASSERT(un->un_pm_count == 0);
7110 		/*
7111 		 * Note: no longer do the cv_broadcast on un_suspend_cv. The
7112 		 * un_suspend_cv is for a system resume, not a power management
7113 		 * device resume. (4297749)
7114 		 *	 cv_broadcast(&un->un_suspend_cv);
7115 		 */
7116 	}
7117 	mutex_exit(&un->un_pm_mutex);
7118 	mutex_exit(SD_MUTEX(un));
7119 
7120 	return (DDI_SUCCESS);
7121 }
7122 
7123 
7124 /*
7125  *    Function: sd_pm_idletimeout_handler
7126  *
7127  * Description: A timer routine that's active only while a device is busy.
7128  *		The purpose is to extend slightly the pm framework's busy
7129  *		view of the device to prevent busy/idle thrashing for
7130  *		back-to-back commands. Do this by comparing the current time
7131  *		to the time at which the last command completed and when the
7132  *		difference is greater than sd_pm_idletime, call
7133  *		pm_idle_component. In addition to indicating idle to the pm
7134  *		framework, update the chain type to again use the internal pm
7135  *		layers of the driver.
7136  *
7137  *   Arguments: arg - driver soft state (unit) structure
7138  *
7139  *     Context: Executes in a timeout(9F) thread context
7140  */
7141 
7142 static void
7143 sd_pm_idletimeout_handler(void *arg)
7144 {
7145 	struct sd_lun *un = arg;
7146 
7147 	time_t	now;
7148 
7149 	mutex_enter(&sd_detach_mutex);
7150 	if (un->un_detach_count != 0) {
7151 		/* Abort if the instance is detaching */
7152 		mutex_exit(&sd_detach_mutex);
7153 		return;
7154 	}
7155 	mutex_exit(&sd_detach_mutex);
7156 
7157 	now = ddi_get_time();
7158 	/*
7159 	 * Grab both mutexes, in the proper order, since we're accessing
7160 	 * both PM and softstate variables.
7161 	 */
7162 	mutex_enter(SD_MUTEX(un));
7163 	mutex_enter(&un->un_pm_mutex);
7164 	if (((now - un->un_pm_idle_time) > sd_pm_idletime) &&
7165 	    (un->un_ncmds_in_driver == 0) && (un->un_pm_count == 0)) {
7166 		/*
7167 		 * Update the chain types.
7168 		 * This takes affect on the next new command received.
7169 		 */
7170 		if (un->un_f_non_devbsize_supported) {
7171 			un->un_buf_chain_type = SD_CHAIN_INFO_RMMEDIA;
7172 		} else {
7173 			un->un_buf_chain_type = SD_CHAIN_INFO_DISK;
7174 		}
7175 		un->un_uscsi_chain_type  = SD_CHAIN_INFO_USCSI_CMD;
7176 
7177 		SD_TRACE(SD_LOG_IO_PM, un,
7178 		    "sd_pm_idletimeout_handler: idling device\n");
7179 		(void) pm_idle_component(SD_DEVINFO(un), 0);
7180 		un->un_pm_idle_timeid = NULL;
7181 	} else {
7182 		un->un_pm_idle_timeid =
7183 			timeout(sd_pm_idletimeout_handler, un,
7184 			(drv_usectohz((clock_t)300000))); /* 300 ms. */
7185 	}
7186 	mutex_exit(&un->un_pm_mutex);
7187 	mutex_exit(SD_MUTEX(un));
7188 }
7189 
7190 
7191 /*
7192  *    Function: sd_pm_timeout_handler
7193  *
7194  * Description: Callback to tell framework we are idle.
7195  *
7196  *     Context: timeout(9f) thread context.
7197  */
7198 
7199 static void
7200 sd_pm_timeout_handler(void *arg)
7201 {
7202 	struct sd_lun *un = arg;
7203 
7204 	(void) pm_idle_component(SD_DEVINFO(un), 0);
7205 	mutex_enter(&un->un_pm_mutex);
7206 	un->un_pm_timeid = NULL;
7207 	mutex_exit(&un->un_pm_mutex);
7208 }
7209 
7210 
7211 /*
7212  *    Function: sdpower
7213  *
7214  * Description: PM entry point.
7215  *
7216  * Return Code: DDI_SUCCESS
7217  *		DDI_FAILURE
7218  *
7219  *     Context: Kernel thread context
7220  */
7221 
7222 static int
7223 sdpower(dev_info_t *devi, int component, int level)
7224 {
7225 	struct sd_lun	*un;
7226 	int		instance;
7227 	int		rval = DDI_SUCCESS;
7228 	uint_t		i, log_page_size, maxcycles, ncycles;
7229 	uchar_t		*log_page_data;
7230 	int		log_sense_page;
7231 	int		medium_present;
7232 	time_t		intvlp;
7233 	dev_t		dev;
7234 	struct pm_trans_data	sd_pm_tran_data;
7235 	uchar_t		save_state;
7236 	int		sval;
7237 	uchar_t		state_before_pm;
7238 	int		got_semaphore_here;
7239 
7240 	instance = ddi_get_instance(devi);
7241 
7242 	if (((un = ddi_get_soft_state(sd_state, instance)) == NULL) ||
7243 	    (SD_SPINDLE_OFF > level) || (level > SD_SPINDLE_ON) ||
7244 	    component != 0) {
7245 		return (DDI_FAILURE);
7246 	}
7247 
7248 	dev = sd_make_device(SD_DEVINFO(un));
7249 
7250 	SD_TRACE(SD_LOG_IO_PM, un, "sdpower: entry, level = %d\n", level);
7251 
7252 	/*
7253 	 * Must synchronize power down with close.
7254 	 * Attempt to decrement/acquire the open/close semaphore,
7255 	 * but do NOT wait on it. If it's not greater than zero,
7256 	 * ie. it can't be decremented without waiting, then
7257 	 * someone else, either open or close, already has it
7258 	 * and the try returns 0. Use that knowledge here to determine
7259 	 * if it's OK to change the device power level.
7260 	 * Also, only increment it on exit if it was decremented, ie. gotten,
7261 	 * here.
7262 	 */
7263 	got_semaphore_here = sema_tryp(&un->un_semoclose);
7264 
7265 	mutex_enter(SD_MUTEX(un));
7266 
7267 	SD_INFO(SD_LOG_POWER, un, "sdpower: un_ncmds_in_driver = %ld\n",
7268 	    un->un_ncmds_in_driver);
7269 
7270 	/*
7271 	 * If un_ncmds_in_driver is non-zero it indicates commands are
7272 	 * already being processed in the driver, or if the semaphore was
7273 	 * not gotten here it indicates an open or close is being processed.
7274 	 * At the same time somebody is requesting to go low power which
7275 	 * can't happen, therefore we need to return failure.
7276 	 */
7277 	if ((level == SD_SPINDLE_OFF) &&
7278 	    ((un->un_ncmds_in_driver != 0) || (got_semaphore_here == 0))) {
7279 		mutex_exit(SD_MUTEX(un));
7280 
7281 		if (got_semaphore_here != 0) {
7282 			sema_v(&un->un_semoclose);
7283 		}
7284 		SD_TRACE(SD_LOG_IO_PM, un,
7285 		    "sdpower: exit, device has queued cmds.\n");
7286 		return (DDI_FAILURE);
7287 	}
7288 
7289 	/*
7290 	 * if it is OFFLINE that means the disk is completely dead
7291 	 * in our case we have to put the disk in on or off by sending commands
7292 	 * Of course that will fail anyway so return back here.
7293 	 *
7294 	 * Power changes to a device that's OFFLINE or SUSPENDED
7295 	 * are not allowed.
7296 	 */
7297 	if ((un->un_state == SD_STATE_OFFLINE) ||
7298 	    (un->un_state == SD_STATE_SUSPENDED)) {
7299 		mutex_exit(SD_MUTEX(un));
7300 
7301 		if (got_semaphore_here != 0) {
7302 			sema_v(&un->un_semoclose);
7303 		}
7304 		SD_TRACE(SD_LOG_IO_PM, un,
7305 		    "sdpower: exit, device is off-line.\n");
7306 		return (DDI_FAILURE);
7307 	}
7308 
7309 	/*
7310 	 * Change the device's state to indicate it's power level
7311 	 * is being changed. Do this to prevent a power off in the
7312 	 * middle of commands, which is especially bad on devices
7313 	 * that are really powered off instead of just spun down.
7314 	 */
7315 	state_before_pm = un->un_state;
7316 	un->un_state = SD_STATE_PM_CHANGING;
7317 
7318 	mutex_exit(SD_MUTEX(un));
7319 
7320 	/*
7321 	 * If "pm-capable" property is set to TRUE by HBA drivers,
7322 	 * bypass the following checking, otherwise, check the log
7323 	 * sense information for this device
7324 	 */
7325 	if ((level == SD_SPINDLE_OFF) && un->un_f_log_sense_supported) {
7326 		/*
7327 		 * Get the log sense information to understand whether the
7328 		 * the powercycle counts have gone beyond the threshhold.
7329 		 */
7330 		log_page_size = START_STOP_CYCLE_COUNTER_PAGE_SIZE;
7331 		log_page_data = kmem_zalloc(log_page_size, KM_SLEEP);
7332 
7333 		mutex_enter(SD_MUTEX(un));
7334 		log_sense_page = un->un_start_stop_cycle_page;
7335 		mutex_exit(SD_MUTEX(un));
7336 
7337 		rval = sd_send_scsi_LOG_SENSE(un, log_page_data,
7338 		    log_page_size, log_sense_page, 0x01, 0, SD_PATH_DIRECT);
7339 #ifdef	SDDEBUG
7340 		if (sd_force_pm_supported) {
7341 			/* Force a successful result */
7342 			rval = 0;
7343 		}
7344 #endif
7345 		if (rval != 0) {
7346 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
7347 			    "Log Sense Failed\n");
7348 			kmem_free(log_page_data, log_page_size);
7349 			/* Cannot support power management on those drives */
7350 
7351 			if (got_semaphore_here != 0) {
7352 				sema_v(&un->un_semoclose);
7353 			}
7354 			/*
7355 			 * On exit put the state back to it's original value
7356 			 * and broadcast to anyone waiting for the power
7357 			 * change completion.
7358 			 */
7359 			mutex_enter(SD_MUTEX(un));
7360 			un->un_state = state_before_pm;
7361 			cv_broadcast(&un->un_suspend_cv);
7362 			mutex_exit(SD_MUTEX(un));
7363 			SD_TRACE(SD_LOG_IO_PM, un,
7364 			    "sdpower: exit, Log Sense Failed.\n");
7365 			return (DDI_FAILURE);
7366 		}
7367 
7368 		/*
7369 		 * From the page data - Convert the essential information to
7370 		 * pm_trans_data
7371 		 */
7372 		maxcycles =
7373 		    (log_page_data[0x1c] << 24) | (log_page_data[0x1d] << 16) |
7374 		    (log_page_data[0x1E] << 8)  | log_page_data[0x1F];
7375 
7376 		sd_pm_tran_data.un.scsi_cycles.lifemax = maxcycles;
7377 
7378 		ncycles =
7379 		    (log_page_data[0x24] << 24) | (log_page_data[0x25] << 16) |
7380 		    (log_page_data[0x26] << 8)  | log_page_data[0x27];
7381 
7382 		sd_pm_tran_data.un.scsi_cycles.ncycles = ncycles;
7383 
7384 		for (i = 0; i < DC_SCSI_MFR_LEN; i++) {
7385 			sd_pm_tran_data.un.scsi_cycles.svc_date[i] =
7386 			    log_page_data[8+i];
7387 		}
7388 
7389 		kmem_free(log_page_data, log_page_size);
7390 
7391 		/*
7392 		 * Call pm_trans_check routine to get the Ok from
7393 		 * the global policy
7394 		 */
7395 
7396 		sd_pm_tran_data.format = DC_SCSI_FORMAT;
7397 		sd_pm_tran_data.un.scsi_cycles.flag = 0;
7398 
7399 		rval = pm_trans_check(&sd_pm_tran_data, &intvlp);
7400 #ifdef	SDDEBUG
7401 		if (sd_force_pm_supported) {
7402 			/* Force a successful result */
7403 			rval = 1;
7404 		}
7405 #endif
7406 		switch (rval) {
7407 		case 0:
7408 			/*
7409 			 * Not Ok to Power cycle or error in parameters passed
7410 			 * Would have given the advised time to consider power
7411 			 * cycle. Based on the new intvlp parameter we are
7412 			 * supposed to pretend we are busy so that pm framework
7413 			 * will never call our power entry point. Because of
7414 			 * that install a timeout handler and wait for the
7415 			 * recommended time to elapse so that power management
7416 			 * can be effective again.
7417 			 *
7418 			 * To effect this behavior, call pm_busy_component to
7419 			 * indicate to the framework this device is busy.
7420 			 * By not adjusting un_pm_count the rest of PM in
7421 			 * the driver will function normally, and independant
7422 			 * of this but because the framework is told the device
7423 			 * is busy it won't attempt powering down until it gets
7424 			 * a matching idle. The timeout handler sends this.
7425 			 * Note: sd_pm_entry can't be called here to do this
7426 			 * because sdpower may have been called as a result
7427 			 * of a call to pm_raise_power from within sd_pm_entry.
7428 			 *
7429 			 * If a timeout handler is already active then
7430 			 * don't install another.
7431 			 */
7432 			mutex_enter(&un->un_pm_mutex);
7433 			if (un->un_pm_timeid == NULL) {
7434 				un->un_pm_timeid =
7435 				    timeout(sd_pm_timeout_handler,
7436 				    un, intvlp * drv_usectohz(1000000));
7437 				mutex_exit(&un->un_pm_mutex);
7438 				(void) pm_busy_component(SD_DEVINFO(un), 0);
7439 			} else {
7440 				mutex_exit(&un->un_pm_mutex);
7441 			}
7442 			if (got_semaphore_here != 0) {
7443 				sema_v(&un->un_semoclose);
7444 			}
7445 			/*
7446 			 * On exit put the state back to it's original value
7447 			 * and broadcast to anyone waiting for the power
7448 			 * change completion.
7449 			 */
7450 			mutex_enter(SD_MUTEX(un));
7451 			un->un_state = state_before_pm;
7452 			cv_broadcast(&un->un_suspend_cv);
7453 			mutex_exit(SD_MUTEX(un));
7454 
7455 			SD_TRACE(SD_LOG_IO_PM, un, "sdpower: exit, "
7456 			    "trans check Failed, not ok to power cycle.\n");
7457 			return (DDI_FAILURE);
7458 
7459 		case -1:
7460 			if (got_semaphore_here != 0) {
7461 				sema_v(&un->un_semoclose);
7462 			}
7463 			/*
7464 			 * On exit put the state back to it's original value
7465 			 * and broadcast to anyone waiting for the power
7466 			 * change completion.
7467 			 */
7468 			mutex_enter(SD_MUTEX(un));
7469 			un->un_state = state_before_pm;
7470 			cv_broadcast(&un->un_suspend_cv);
7471 			mutex_exit(SD_MUTEX(un));
7472 			SD_TRACE(SD_LOG_IO_PM, un,
7473 			    "sdpower: exit, trans check command Failed.\n");
7474 			return (DDI_FAILURE);
7475 		}
7476 	}
7477 
7478 	if (level == SD_SPINDLE_OFF) {
7479 		/*
7480 		 * Save the last state... if the STOP FAILS we need it
7481 		 * for restoring
7482 		 */
7483 		mutex_enter(SD_MUTEX(un));
7484 		save_state = un->un_last_state;
7485 		/*
7486 		 * There must not be any cmds. getting processed
7487 		 * in the driver when we get here. Power to the
7488 		 * device is potentially going off.
7489 		 */
7490 		ASSERT(un->un_ncmds_in_driver == 0);
7491 		mutex_exit(SD_MUTEX(un));
7492 
7493 		/*
7494 		 * For now suspend the device completely before spindle is
7495 		 * turned off
7496 		 */
7497 		if ((rval = sd_ddi_pm_suspend(un)) == DDI_FAILURE) {
7498 			if (got_semaphore_here != 0) {
7499 				sema_v(&un->un_semoclose);
7500 			}
7501 			/*
7502 			 * On exit put the state back to it's original value
7503 			 * and broadcast to anyone waiting for the power
7504 			 * change completion.
7505 			 */
7506 			mutex_enter(SD_MUTEX(un));
7507 			un->un_state = state_before_pm;
7508 			cv_broadcast(&un->un_suspend_cv);
7509 			mutex_exit(SD_MUTEX(un));
7510 			SD_TRACE(SD_LOG_IO_PM, un,
7511 			    "sdpower: exit, PM suspend Failed.\n");
7512 			return (DDI_FAILURE);
7513 		}
7514 	}
7515 
7516 	/*
7517 	 * The transition from SPINDLE_OFF to SPINDLE_ON can happen in open,
7518 	 * close, or strategy. Dump no long uses this routine, it uses it's
7519 	 * own code so it can be done in polled mode.
7520 	 */
7521 
7522 	medium_present = TRUE;
7523 
7524 	/*
7525 	 * When powering up, issue a TUR in case the device is at unit
7526 	 * attention.  Don't do retries. Bypass the PM layer, otherwise
7527 	 * a deadlock on un_pm_busy_cv will occur.
7528 	 */
7529 	if (level == SD_SPINDLE_ON) {
7530 		(void) sd_send_scsi_TEST_UNIT_READY(un,
7531 		    SD_DONT_RETRY_TUR | SD_BYPASS_PM);
7532 	}
7533 
7534 	SD_TRACE(SD_LOG_IO_PM, un, "sdpower: sending \'%s\' unit\n",
7535 	    ((level == SD_SPINDLE_ON) ? "START" : "STOP"));
7536 
7537 	sval = sd_send_scsi_START_STOP_UNIT(un,
7538 	    ((level == SD_SPINDLE_ON) ? SD_TARGET_START : SD_TARGET_STOP),
7539 	    SD_PATH_DIRECT);
7540 	/* Command failed, check for media present. */
7541 	if ((sval == ENXIO) && un->un_f_has_removable_media) {
7542 		medium_present = FALSE;
7543 	}
7544 
7545 	/*
7546 	 * The conditions of interest here are:
7547 	 *   if a spindle off with media present fails,
7548 	 *	then restore the state and return an error.
7549 	 *   else if a spindle on fails,
7550 	 *	then return an error (there's no state to restore).
7551 	 * In all other cases we setup for the new state
7552 	 * and return success.
7553 	 */
7554 	switch (level) {
7555 	case SD_SPINDLE_OFF:
7556 		if ((medium_present == TRUE) && (sval != 0)) {
7557 			/* The stop command from above failed */
7558 			rval = DDI_FAILURE;
7559 			/*
7560 			 * The stop command failed, and we have media
7561 			 * present. Put the level back by calling the
7562 			 * sd_pm_resume() and set the state back to
7563 			 * it's previous value.
7564 			 */
7565 			(void) sd_ddi_pm_resume(un);
7566 			mutex_enter(SD_MUTEX(un));
7567 			un->un_last_state = save_state;
7568 			mutex_exit(SD_MUTEX(un));
7569 			break;
7570 		}
7571 		/*
7572 		 * The stop command from above succeeded.
7573 		 */
7574 		if (un->un_f_monitor_media_state) {
7575 			/*
7576 			 * Terminate watch thread in case of removable media
7577 			 * devices going into low power state. This is as per
7578 			 * the requirements of pm framework, otherwise commands
7579 			 * will be generated for the device (through watch
7580 			 * thread), even when the device is in low power state.
7581 			 */
7582 			mutex_enter(SD_MUTEX(un));
7583 			un->un_f_watcht_stopped = FALSE;
7584 			if (un->un_swr_token != NULL) {
7585 				opaque_t temp_token = un->un_swr_token;
7586 				un->un_f_watcht_stopped = TRUE;
7587 				un->un_swr_token = NULL;
7588 				mutex_exit(SD_MUTEX(un));
7589 				(void) scsi_watch_request_terminate(temp_token,
7590 				    SCSI_WATCH_TERMINATE_WAIT);
7591 			} else {
7592 				mutex_exit(SD_MUTEX(un));
7593 			}
7594 		}
7595 		break;
7596 
7597 	default:	/* The level requested is spindle on... */
7598 		/*
7599 		 * Legacy behavior: return success on a failed spinup
7600 		 * if there is no media in the drive.
7601 		 * Do this by looking at medium_present here.
7602 		 */
7603 		if ((sval != 0) && medium_present) {
7604 			/* The start command from above failed */
7605 			rval = DDI_FAILURE;
7606 			break;
7607 		}
7608 		/*
7609 		 * The start command from above succeeded
7610 		 * Resume the devices now that we have
7611 		 * started the disks
7612 		 */
7613 		(void) sd_ddi_pm_resume(un);
7614 
7615 		/*
7616 		 * Resume the watch thread since it was suspended
7617 		 * when the device went into low power mode.
7618 		 */
7619 		if (un->un_f_monitor_media_state) {
7620 			mutex_enter(SD_MUTEX(un));
7621 			if (un->un_f_watcht_stopped == TRUE) {
7622 				opaque_t temp_token;
7623 
7624 				un->un_f_watcht_stopped = FALSE;
7625 				mutex_exit(SD_MUTEX(un));
7626 				temp_token = scsi_watch_request_submit(
7627 				    SD_SCSI_DEVP(un),
7628 				    sd_check_media_time,
7629 				    SENSE_LENGTH, sd_media_watch_cb,
7630 				    (caddr_t)dev);
7631 				mutex_enter(SD_MUTEX(un));
7632 				un->un_swr_token = temp_token;
7633 			}
7634 			mutex_exit(SD_MUTEX(un));
7635 		}
7636 	}
7637 	if (got_semaphore_here != 0) {
7638 		sema_v(&un->un_semoclose);
7639 	}
7640 	/*
7641 	 * On exit put the state back to it's original value
7642 	 * and broadcast to anyone waiting for the power
7643 	 * change completion.
7644 	 */
7645 	mutex_enter(SD_MUTEX(un));
7646 	un->un_state = state_before_pm;
7647 	cv_broadcast(&un->un_suspend_cv);
7648 	mutex_exit(SD_MUTEX(un));
7649 
7650 	SD_TRACE(SD_LOG_IO_PM, un, "sdpower: exit, status = 0x%x\n", rval);
7651 
7652 	return (rval);
7653 }
7654 
7655 
7656 
7657 /*
7658  *    Function: sdattach
7659  *
7660  * Description: Driver's attach(9e) entry point function.
7661  *
7662  *   Arguments: devi - opaque device info handle
7663  *		cmd  - attach  type
7664  *
7665  * Return Code: DDI_SUCCESS
7666  *		DDI_FAILURE
7667  *
7668  *     Context: Kernel thread context
7669  */
7670 
7671 static int
7672 sdattach(dev_info_t *devi, ddi_attach_cmd_t cmd)
7673 {
7674 	switch (cmd) {
7675 	case DDI_ATTACH:
7676 		return (sd_unit_attach(devi));
7677 	case DDI_RESUME:
7678 		return (sd_ddi_resume(devi));
7679 	default:
7680 		break;
7681 	}
7682 	return (DDI_FAILURE);
7683 }
7684 
7685 
7686 /*
7687  *    Function: sddetach
7688  *
7689  * Description: Driver's detach(9E) entry point function.
7690  *
7691  *   Arguments: devi - opaque device info handle
7692  *		cmd  - detach  type
7693  *
7694  * Return Code: DDI_SUCCESS
7695  *		DDI_FAILURE
7696  *
7697  *     Context: Kernel thread context
7698  */
7699 
7700 static int
7701 sddetach(dev_info_t *devi, ddi_detach_cmd_t cmd)
7702 {
7703 	switch (cmd) {
7704 	case DDI_DETACH:
7705 		return (sd_unit_detach(devi));
7706 	case DDI_SUSPEND:
7707 		return (sd_ddi_suspend(devi));
7708 	default:
7709 		break;
7710 	}
7711 	return (DDI_FAILURE);
7712 }
7713 
7714 
7715 /*
7716  *     Function: sd_sync_with_callback
7717  *
7718  *  Description: Prevents sd_unit_attach or sd_unit_detach from freeing the soft
7719  *		 state while the callback routine is active.
7720  *
7721  *    Arguments: un: softstate structure for the instance
7722  *
7723  *	Context: Kernel thread context
7724  */
7725 
7726 static void
7727 sd_sync_with_callback(struct sd_lun *un)
7728 {
7729 	ASSERT(un != NULL);
7730 
7731 	mutex_enter(SD_MUTEX(un));
7732 
7733 	ASSERT(un->un_in_callback >= 0);
7734 
7735 	while (un->un_in_callback > 0) {
7736 		mutex_exit(SD_MUTEX(un));
7737 		delay(2);
7738 		mutex_enter(SD_MUTEX(un));
7739 	}
7740 
7741 	mutex_exit(SD_MUTEX(un));
7742 }
7743 
7744 /*
7745  *    Function: sd_unit_attach
7746  *
7747  * Description: Performs DDI_ATTACH processing for sdattach(). Allocates
7748  *		the soft state structure for the device and performs
7749  *		all necessary structure and device initializations.
7750  *
7751  *   Arguments: devi: the system's dev_info_t for the device.
7752  *
7753  * Return Code: DDI_SUCCESS if attach is successful.
7754  *		DDI_FAILURE if any part of the attach fails.
7755  *
7756  *     Context: Called at attach(9e) time for the DDI_ATTACH flag.
7757  *		Kernel thread context only.  Can sleep.
7758  */
7759 
7760 static int
7761 sd_unit_attach(dev_info_t *devi)
7762 {
7763 	struct	scsi_device	*devp;
7764 	struct	sd_lun		*un;
7765 	char			*variantp;
7766 	int	reservation_flag = SD_TARGET_IS_UNRESERVED;
7767 	int	instance;
7768 	int	rval;
7769 	int	wc_enabled;
7770 	uint64_t	capacity;
7771 	uint_t		lbasize;
7772 
7773 	/*
7774 	 * Retrieve the target driver's private data area. This was set
7775 	 * up by the HBA.
7776 	 */
7777 	devp = ddi_get_driver_private(devi);
7778 
7779 	/*
7780 	 * Since we have no idea what state things were left in by the last
7781 	 * user of the device, set up some 'default' settings, ie. turn 'em
7782 	 * off. The scsi_ifsetcap calls force re-negotiations with the drive.
7783 	 * Do this before the scsi_probe, which sends an inquiry.
7784 	 * This is a fix for bug (4430280).
7785 	 * Of special importance is wide-xfer. The drive could have been left
7786 	 * in wide transfer mode by the last driver to communicate with it,
7787 	 * this includes us. If that's the case, and if the following is not
7788 	 * setup properly or we don't re-negotiate with the drive prior to
7789 	 * transferring data to/from the drive, it causes bus parity errors,
7790 	 * data overruns, and unexpected interrupts. This first occurred when
7791 	 * the fix for bug (4378686) was made.
7792 	 */
7793 	(void) scsi_ifsetcap(&devp->sd_address, "lun-reset", 0, 1);
7794 	(void) scsi_ifsetcap(&devp->sd_address, "wide-xfer", 0, 1);
7795 	(void) scsi_ifsetcap(&devp->sd_address, "tagged-qing", 0, 1);
7796 	(void) scsi_ifsetcap(&devp->sd_address, "auto-rqsense", 0, 1);
7797 
7798 	/*
7799 	 * Use scsi_probe() to issue an INQUIRY command to the device.
7800 	 * This call will allocate and fill in the scsi_inquiry structure
7801 	 * and point the sd_inq member of the scsi_device structure to it.
7802 	 * If the attach succeeds, then this memory will not be de-allocated
7803 	 * (via scsi_unprobe()) until the instance is detached.
7804 	 */
7805 	if (scsi_probe(devp, SLEEP_FUNC) != SCSIPROBE_EXISTS) {
7806 		goto probe_failed;
7807 	}
7808 
7809 	/*
7810 	 * Check the device type as specified in the inquiry data and
7811 	 * claim it if it is of a type that we support.
7812 	 */
7813 	switch (devp->sd_inq->inq_dtype) {
7814 	case DTYPE_DIRECT:
7815 		break;
7816 	case DTYPE_RODIRECT:
7817 		break;
7818 	case DTYPE_OPTICAL:
7819 		break;
7820 	case DTYPE_NOTPRESENT:
7821 	default:
7822 		/* Unsupported device type; fail the attach. */
7823 		goto probe_failed;
7824 	}
7825 
7826 	/*
7827 	 * Allocate the soft state structure for this unit.
7828 	 *
7829 	 * We rely upon this memory being set to all zeroes by
7830 	 * ddi_soft_state_zalloc().  We assume that any member of the
7831 	 * soft state structure that is not explicitly initialized by
7832 	 * this routine will have a value of zero.
7833 	 */
7834 	instance = ddi_get_instance(devp->sd_dev);
7835 	if (ddi_soft_state_zalloc(sd_state, instance) != DDI_SUCCESS) {
7836 		goto probe_failed;
7837 	}
7838 
7839 	/*
7840 	 * Retrieve a pointer to the newly-allocated soft state.
7841 	 *
7842 	 * This should NEVER fail if the ddi_soft_state_zalloc() call above
7843 	 * was successful, unless something has gone horribly wrong and the
7844 	 * ddi's soft state internals are corrupt (in which case it is
7845 	 * probably better to halt here than just fail the attach....)
7846 	 */
7847 	if ((un = ddi_get_soft_state(sd_state, instance)) == NULL) {
7848 		panic("sd_unit_attach: NULL soft state on instance:0x%x",
7849 		    instance);
7850 		/*NOTREACHED*/
7851 	}
7852 
7853 	/*
7854 	 * Link the back ptr of the driver soft state to the scsi_device
7855 	 * struct for this lun.
7856 	 * Save a pointer to the softstate in the driver-private area of
7857 	 * the scsi_device struct.
7858 	 * Note: We cannot call SD_INFO, SD_TRACE, SD_ERROR, or SD_DIAG until
7859 	 * we first set un->un_sd below.
7860 	 */
7861 	un->un_sd = devp;
7862 	devp->sd_private = (opaque_t)un;
7863 
7864 	/*
7865 	 * The following must be after devp is stored in the soft state struct.
7866 	 */
7867 #ifdef SDDEBUG
7868 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
7869 	    "%s_unit_attach: un:0x%p instance:%d\n",
7870 	    ddi_driver_name(devi), un, instance);
7871 #endif
7872 
7873 	/*
7874 	 * Set up the device type and node type (for the minor nodes).
7875 	 * By default we assume that the device can at least support the
7876 	 * Common Command Set. Call it a CD-ROM if it reports itself
7877 	 * as a RODIRECT device.
7878 	 */
7879 	switch (devp->sd_inq->inq_dtype) {
7880 	case DTYPE_RODIRECT:
7881 		un->un_node_type = DDI_NT_CD_CHAN;
7882 		un->un_ctype	 = CTYPE_CDROM;
7883 		break;
7884 	case DTYPE_OPTICAL:
7885 		un->un_node_type = DDI_NT_BLOCK_CHAN;
7886 		un->un_ctype	 = CTYPE_ROD;
7887 		break;
7888 	default:
7889 		un->un_node_type = DDI_NT_BLOCK_CHAN;
7890 		un->un_ctype	 = CTYPE_CCS;
7891 		break;
7892 	}
7893 
7894 	/*
7895 	 * Try to read the interconnect type from the HBA.
7896 	 *
7897 	 * Note: This driver is currently compiled as two binaries, a parallel
7898 	 * scsi version (sd) and a fibre channel version (ssd). All functional
7899 	 * differences are determined at compile time. In the future a single
7900 	 * binary will be provided and the inteconnect type will be used to
7901 	 * differentiate between fibre and parallel scsi behaviors. At that time
7902 	 * it will be necessary for all fibre channel HBAs to support this
7903 	 * property.
7904 	 *
7905 	 * set un_f_is_fiber to TRUE ( default fiber )
7906 	 */
7907 	un->un_f_is_fibre = TRUE;
7908 	switch (scsi_ifgetcap(SD_ADDRESS(un), "interconnect-type", -1)) {
7909 	case INTERCONNECT_SSA:
7910 		un->un_interconnect_type = SD_INTERCONNECT_SSA;
7911 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
7912 		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_SSA\n", un);
7913 		break;
7914 	case INTERCONNECT_PARALLEL:
7915 		un->un_f_is_fibre = FALSE;
7916 		un->un_interconnect_type = SD_INTERCONNECT_PARALLEL;
7917 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
7918 		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_PARALLEL\n", un);
7919 		break;
7920 	case INTERCONNECT_SATA:
7921 		un->un_f_is_fibre = FALSE;
7922 		un->un_interconnect_type = SD_INTERCONNECT_SATA;
7923 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
7924 		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_SATA\n", un);
7925 		break;
7926 	case INTERCONNECT_FIBRE:
7927 		un->un_interconnect_type = SD_INTERCONNECT_FIBRE;
7928 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
7929 		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_FIBRE\n", un);
7930 		break;
7931 	case INTERCONNECT_FABRIC:
7932 		un->un_interconnect_type = SD_INTERCONNECT_FABRIC;
7933 		un->un_node_type = DDI_NT_BLOCK_FABRIC;
7934 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
7935 		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_FABRIC\n", un);
7936 		break;
7937 	default:
7938 #ifdef SD_DEFAULT_INTERCONNECT_TYPE
7939 		/*
7940 		 * The HBA does not support the "interconnect-type" property
7941 		 * (or did not provide a recognized type).
7942 		 *
7943 		 * Note: This will be obsoleted when a single fibre channel
7944 		 * and parallel scsi driver is delivered. In the meantime the
7945 		 * interconnect type will be set to the platform default.If that
7946 		 * type is not parallel SCSI, it means that we should be
7947 		 * assuming "ssd" semantics. However, here this also means that
7948 		 * the FC HBA is not supporting the "interconnect-type" property
7949 		 * like we expect it to, so log this occurrence.
7950 		 */
7951 		un->un_interconnect_type = SD_DEFAULT_INTERCONNECT_TYPE;
7952 		if (!SD_IS_PARALLEL_SCSI(un)) {
7953 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
7954 			    "sd_unit_attach: un:0x%p Assuming "
7955 			    "INTERCONNECT_FIBRE\n", un);
7956 		} else {
7957 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
7958 			    "sd_unit_attach: un:0x%p Assuming "
7959 			    "INTERCONNECT_PARALLEL\n", un);
7960 			un->un_f_is_fibre = FALSE;
7961 		}
7962 #else
7963 		/*
7964 		 * Note: This source will be implemented when a single fibre
7965 		 * channel and parallel scsi driver is delivered. The default
7966 		 * will be to assume that if a device does not support the
7967 		 * "interconnect-type" property it is a parallel SCSI HBA and
7968 		 * we will set the interconnect type for parallel scsi.
7969 		 */
7970 		un->un_interconnect_type = SD_INTERCONNECT_PARALLEL;
7971 		un->un_f_is_fibre = FALSE;
7972 #endif
7973 		break;
7974 	}
7975 
7976 	if (un->un_f_is_fibre == TRUE) {
7977 		if (scsi_ifgetcap(SD_ADDRESS(un), "scsi-version", 1) ==
7978 			SCSI_VERSION_3) {
7979 			switch (un->un_interconnect_type) {
7980 			case SD_INTERCONNECT_FIBRE:
7981 			case SD_INTERCONNECT_SSA:
7982 				un->un_node_type = DDI_NT_BLOCK_WWN;
7983 				break;
7984 			default:
7985 				break;
7986 			}
7987 		}
7988 	}
7989 
7990 	/*
7991 	 * Initialize the Request Sense command for the target
7992 	 */
7993 	if (sd_alloc_rqs(devp, un) != DDI_SUCCESS) {
7994 		goto alloc_rqs_failed;
7995 	}
7996 
7997 	/*
7998 	 * Set un_retry_count with SD_RETRY_COUNT, this is ok for Sparc
7999 	 * with seperate binary for sd and ssd.
8000 	 *
8001 	 * x86 has 1 binary, un_retry_count is set base on connection type.
8002 	 * The hardcoded values will go away when Sparc uses 1 binary
8003 	 * for sd and ssd.  This hardcoded values need to match
8004 	 * SD_RETRY_COUNT in sddef.h
8005 	 * The value used is base on interconnect type.
8006 	 * fibre = 3, parallel = 5
8007 	 */
8008 #if defined(__i386) || defined(__amd64)
8009 	un->un_retry_count = un->un_f_is_fibre ? 3 : 5;
8010 #else
8011 	un->un_retry_count = SD_RETRY_COUNT;
8012 #endif
8013 
8014 	/*
8015 	 * Set the per disk retry count to the default number of retries
8016 	 * for disks and CDROMs. This value can be overridden by the
8017 	 * disk property list or an entry in sd.conf.
8018 	 */
8019 	un->un_notready_retry_count =
8020 	    ISCD(un) ? CD_NOT_READY_RETRY_COUNT(un)
8021 			: DISK_NOT_READY_RETRY_COUNT(un);
8022 
8023 	/*
8024 	 * Set the busy retry count to the default value of un_retry_count.
8025 	 * This can be overridden by entries in sd.conf or the device
8026 	 * config table.
8027 	 */
8028 	un->un_busy_retry_count = un->un_retry_count;
8029 
8030 	/*
8031 	 * Init the reset threshold for retries.  This number determines
8032 	 * how many retries must be performed before a reset can be issued
8033 	 * (for certain error conditions). This can be overridden by entries
8034 	 * in sd.conf or the device config table.
8035 	 */
8036 	un->un_reset_retry_count = (un->un_retry_count / 2);
8037 
8038 	/*
8039 	 * Set the victim_retry_count to the default un_retry_count
8040 	 */
8041 	un->un_victim_retry_count = (2 * un->un_retry_count);
8042 
8043 	/*
8044 	 * Set the reservation release timeout to the default value of
8045 	 * 5 seconds. This can be overridden by entries in ssd.conf or the
8046 	 * device config table.
8047 	 */
8048 	un->un_reserve_release_time = 5;
8049 
8050 	/*
8051 	 * Set up the default maximum transfer size. Note that this may
8052 	 * get updated later in the attach, when setting up default wide
8053 	 * operations for disks.
8054 	 */
8055 #if defined(__i386) || defined(__amd64)
8056 	un->un_max_xfer_size = (uint_t)SD_DEFAULT_MAX_XFER_SIZE;
8057 #else
8058 	un->un_max_xfer_size = (uint_t)maxphys;
8059 #endif
8060 
8061 	/*
8062 	 * Get "allow bus device reset" property (defaults to "enabled" if
8063 	 * the property was not defined). This is to disable bus resets for
8064 	 * certain kinds of error recovery. Note: In the future when a run-time
8065 	 * fibre check is available the soft state flag should default to
8066 	 * enabled.
8067 	 */
8068 	if (un->un_f_is_fibre == TRUE) {
8069 		un->un_f_allow_bus_device_reset = TRUE;
8070 	} else {
8071 		if (ddi_getprop(DDI_DEV_T_ANY, devi, DDI_PROP_DONTPASS,
8072 			"allow-bus-device-reset", 1) != 0) {
8073 			un->un_f_allow_bus_device_reset = TRUE;
8074 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
8075 			"sd_unit_attach: un:0x%p Bus device reset enabled\n",
8076 				un);
8077 		} else {
8078 			un->un_f_allow_bus_device_reset = FALSE;
8079 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
8080 			"sd_unit_attach: un:0x%p Bus device reset disabled\n",
8081 				un);
8082 		}
8083 	}
8084 
8085 	/*
8086 	 * Check if this is an ATAPI device. ATAPI devices use Group 1
8087 	 * Read/Write commands and Group 2 Mode Sense/Select commands.
8088 	 *
8089 	 * Note: The "obsolete" way of doing this is to check for the "atapi"
8090 	 * property. The new "variant" property with a value of "atapi" has been
8091 	 * introduced so that future 'variants' of standard SCSI behavior (like
8092 	 * atapi) could be specified by the underlying HBA drivers by supplying
8093 	 * a new value for the "variant" property, instead of having to define a
8094 	 * new property.
8095 	 */
8096 	if (ddi_prop_get_int(DDI_DEV_T_ANY, devi, 0, "atapi", -1) != -1) {
8097 		un->un_f_cfg_is_atapi = TRUE;
8098 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
8099 		    "sd_unit_attach: un:0x%p Atapi device\n", un);
8100 	}
8101 	if (ddi_prop_lookup_string(DDI_DEV_T_ANY, devi, 0, "variant",
8102 	    &variantp) == DDI_PROP_SUCCESS) {
8103 		if (strcmp(variantp, "atapi") == 0) {
8104 			un->un_f_cfg_is_atapi = TRUE;
8105 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
8106 			    "sd_unit_attach: un:0x%p Atapi device\n", un);
8107 		}
8108 		ddi_prop_free(variantp);
8109 	}
8110 
8111 	un->un_cmd_timeout	= SD_IO_TIME;
8112 
8113 	/* Info on current states, statuses, etc. (Updated frequently) */
8114 	un->un_state		= SD_STATE_NORMAL;
8115 	un->un_last_state	= SD_STATE_NORMAL;
8116 
8117 	/* Control & status info for command throttling */
8118 	un->un_throttle		= sd_max_throttle;
8119 	un->un_saved_throttle	= sd_max_throttle;
8120 	un->un_min_throttle	= sd_min_throttle;
8121 
8122 	if (un->un_f_is_fibre == TRUE) {
8123 		un->un_f_use_adaptive_throttle = TRUE;
8124 	} else {
8125 		un->un_f_use_adaptive_throttle = FALSE;
8126 	}
8127 
8128 	/* Removable media support. */
8129 	cv_init(&un->un_state_cv, NULL, CV_DRIVER, NULL);
8130 	un->un_mediastate		= DKIO_NONE;
8131 	un->un_specified_mediastate	= DKIO_NONE;
8132 
8133 	/* CVs for suspend/resume (PM or DR) */
8134 	cv_init(&un->un_suspend_cv,   NULL, CV_DRIVER, NULL);
8135 	cv_init(&un->un_disk_busy_cv, NULL, CV_DRIVER, NULL);
8136 
8137 	/* Power management support. */
8138 	un->un_power_level = SD_SPINDLE_UNINIT;
8139 
8140 	cv_init(&un->un_wcc_cv,   NULL, CV_DRIVER, NULL);
8141 	un->un_f_wcc_inprog = 0;
8142 
8143 	/*
8144 	 * The open/close semaphore is used to serialize threads executing
8145 	 * in the driver's open & close entry point routines for a given
8146 	 * instance.
8147 	 */
8148 	(void) sema_init(&un->un_semoclose, 1, NULL, SEMA_DRIVER, NULL);
8149 
8150 	/*
8151 	 * The conf file entry and softstate variable is a forceful override,
8152 	 * meaning a non-zero value must be entered to change the default.
8153 	 */
8154 	un->un_f_disksort_disabled = FALSE;
8155 
8156 	/*
8157 	 * Retrieve the properties from the static driver table or the driver
8158 	 * configuration file (.conf) for this unit and update the soft state
8159 	 * for the device as needed for the indicated properties.
8160 	 * Note: the property configuration needs to occur here as some of the
8161 	 * following routines may have dependancies on soft state flags set
8162 	 * as part of the driver property configuration.
8163 	 */
8164 	sd_read_unit_properties(un);
8165 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
8166 	    "sd_unit_attach: un:0x%p property configuration complete.\n", un);
8167 
8168 	/*
8169 	 * Only if a device has "hotpluggable" property, it is
8170 	 * treated as hotpluggable device. Otherwise, it is
8171 	 * regarded as non-hotpluggable one.
8172 	 */
8173 	if (ddi_prop_get_int(DDI_DEV_T_ANY, devi, 0, "hotpluggable",
8174 	    -1) != -1) {
8175 		un->un_f_is_hotpluggable = TRUE;
8176 	}
8177 
8178 	/*
8179 	 * set unit's attributes(flags) according to "hotpluggable" and
8180 	 * RMB bit in INQUIRY data.
8181 	 */
8182 	sd_set_unit_attributes(un, devi);
8183 
8184 	/*
8185 	 * By default, we mark the capacity, lbasize, and geometry
8186 	 * as invalid. Only if we successfully read a valid capacity
8187 	 * will we update the un_blockcount and un_tgt_blocksize with the
8188 	 * valid values (the geometry will be validated later).
8189 	 */
8190 	un->un_f_blockcount_is_valid	= FALSE;
8191 	un->un_f_tgt_blocksize_is_valid	= FALSE;
8192 	un->un_f_geometry_is_valid	= FALSE;
8193 
8194 	/*
8195 	 * Use DEV_BSIZE and DEV_BSHIFT as defaults, until we can determine
8196 	 * otherwise.
8197 	 */
8198 	un->un_tgt_blocksize  = un->un_sys_blocksize  = DEV_BSIZE;
8199 	un->un_blockcount = 0;
8200 
8201 	/*
8202 	 * Set up the per-instance info needed to determine the correct
8203 	 * CDBs and other info for issuing commands to the target.
8204 	 */
8205 	sd_init_cdb_limits(un);
8206 
8207 	/*
8208 	 * Set up the IO chains to use, based upon the target type.
8209 	 */
8210 	if (un->un_f_non_devbsize_supported) {
8211 		un->un_buf_chain_type = SD_CHAIN_INFO_RMMEDIA;
8212 	} else {
8213 		un->un_buf_chain_type = SD_CHAIN_INFO_DISK;
8214 	}
8215 	un->un_uscsi_chain_type  = SD_CHAIN_INFO_USCSI_CMD;
8216 	un->un_direct_chain_type = SD_CHAIN_INFO_DIRECT_CMD;
8217 	un->un_priority_chain_type = SD_CHAIN_INFO_PRIORITY_CMD;
8218 
8219 	un->un_xbuf_attr = ddi_xbuf_attr_create(sizeof (struct sd_xbuf),
8220 	    sd_xbuf_strategy, un, sd_xbuf_active_limit,  sd_xbuf_reserve_limit,
8221 	    ddi_driver_major(devi), DDI_XBUF_QTHREAD_DRIVER);
8222 	ddi_xbuf_attr_register_devinfo(un->un_xbuf_attr, devi);
8223 
8224 
8225 	if (ISCD(un)) {
8226 		un->un_additional_codes = sd_additional_codes;
8227 	} else {
8228 		un->un_additional_codes = NULL;
8229 	}
8230 
8231 	/*
8232 	 * Create the kstats here so they can be available for attach-time
8233 	 * routines that send commands to the unit (either polled or via
8234 	 * sd_send_scsi_cmd).
8235 	 *
8236 	 * Note: This is a critical sequence that needs to be maintained:
8237 	 *	1) Instantiate the kstats here, before any routines using the
8238 	 *	   iopath (i.e. sd_send_scsi_cmd).
8239 	 *	2) Initialize the error stats (sd_set_errstats) and partition
8240 	 *	   stats (sd_set_pstats), following sd_validate_geometry(),
8241 	 *	   sd_register_devid(), and sd_cache_control().
8242 	 */
8243 
8244 	un->un_stats = kstat_create(sd_label, instance,
8245 	    NULL, "disk", KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT);
8246 	if (un->un_stats != NULL) {
8247 		un->un_stats->ks_lock = SD_MUTEX(un);
8248 		kstat_install(un->un_stats);
8249 	}
8250 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
8251 	    "sd_unit_attach: un:0x%p un_stats created\n", un);
8252 
8253 	sd_create_errstats(un, instance);
8254 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
8255 	    "sd_unit_attach: un:0x%p errstats created\n", un);
8256 
8257 	/*
8258 	 * The following if/else code was relocated here from below as part
8259 	 * of the fix for bug (4430280). However with the default setup added
8260 	 * on entry to this routine, it's no longer absolutely necessary for
8261 	 * this to be before the call to sd_spin_up_unit.
8262 	 */
8263 	if (SD_IS_PARALLEL_SCSI(un) || SD_IS_SERIAL(un)) {
8264 		/*
8265 		 * If SCSI-2 tagged queueing is supported by the target
8266 		 * and by the host adapter then we will enable it.
8267 		 */
8268 		un->un_tagflags = 0;
8269 		if ((devp->sd_inq->inq_rdf == RDF_SCSI2) &&
8270 		    (devp->sd_inq->inq_cmdque) &&
8271 		    (un->un_f_arq_enabled == TRUE)) {
8272 			if (scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing",
8273 			    1, 1) == 1) {
8274 				un->un_tagflags = FLAG_STAG;
8275 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
8276 				    "sd_unit_attach: un:0x%p tag queueing "
8277 				    "enabled\n", un);
8278 			} else if (scsi_ifgetcap(SD_ADDRESS(un),
8279 			    "untagged-qing", 0) == 1) {
8280 				un->un_f_opt_queueing = TRUE;
8281 				un->un_saved_throttle = un->un_throttle =
8282 				    min(un->un_throttle, 3);
8283 			} else {
8284 				un->un_f_opt_queueing = FALSE;
8285 				un->un_saved_throttle = un->un_throttle = 1;
8286 			}
8287 		} else if ((scsi_ifgetcap(SD_ADDRESS(un), "untagged-qing", 0)
8288 		    == 1) && (un->un_f_arq_enabled == TRUE)) {
8289 			/* The Host Adapter supports internal queueing. */
8290 			un->un_f_opt_queueing = TRUE;
8291 			un->un_saved_throttle = un->un_throttle =
8292 			    min(un->un_throttle, 3);
8293 		} else {
8294 			un->un_f_opt_queueing = FALSE;
8295 			un->un_saved_throttle = un->un_throttle = 1;
8296 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
8297 			    "sd_unit_attach: un:0x%p no tag queueing\n", un);
8298 		}
8299 
8300 		/*
8301 		 * Enable large transfers for SATA/SAS drives
8302 		 */
8303 		if (SD_IS_SERIAL(un)) {
8304 			un->un_max_xfer_size =
8305 			    ddi_getprop(DDI_DEV_T_ANY, devi, 0,
8306 			    sd_max_xfer_size, SD_MAX_XFER_SIZE);
8307 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
8308 			    "sd_unit_attach: un:0x%p max transfer "
8309 			    "size=0x%x\n", un, un->un_max_xfer_size);
8310 
8311 		}
8312 
8313 		/* Setup or tear down default wide operations for disks */
8314 
8315 		/*
8316 		 * Note: Legacy: it may be possible for both "sd_max_xfer_size"
8317 		 * and "ssd_max_xfer_size" to exist simultaneously on the same
8318 		 * system and be set to different values. In the future this
8319 		 * code may need to be updated when the ssd module is
8320 		 * obsoleted and removed from the system. (4299588)
8321 		 */
8322 		if (SD_IS_PARALLEL_SCSI(un) &&
8323 		    (devp->sd_inq->inq_rdf == RDF_SCSI2) &&
8324 		    (devp->sd_inq->inq_wbus16 || devp->sd_inq->inq_wbus32)) {
8325 			if (scsi_ifsetcap(SD_ADDRESS(un), "wide-xfer",
8326 			    1, 1) == 1) {
8327 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
8328 				    "sd_unit_attach: un:0x%p Wide Transfer "
8329 				    "enabled\n", un);
8330 			}
8331 
8332 			/*
8333 			 * If tagged queuing has also been enabled, then
8334 			 * enable large xfers
8335 			 */
8336 			if (un->un_saved_throttle == sd_max_throttle) {
8337 				un->un_max_xfer_size =
8338 				    ddi_getprop(DDI_DEV_T_ANY, devi, 0,
8339 				    sd_max_xfer_size, SD_MAX_XFER_SIZE);
8340 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
8341 				    "sd_unit_attach: un:0x%p max transfer "
8342 				    "size=0x%x\n", un, un->un_max_xfer_size);
8343 			}
8344 		} else {
8345 			if (scsi_ifsetcap(SD_ADDRESS(un), "wide-xfer",
8346 			    0, 1) == 1) {
8347 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
8348 				    "sd_unit_attach: un:0x%p "
8349 				    "Wide Transfer disabled\n", un);
8350 			}
8351 		}
8352 	} else {
8353 		un->un_tagflags = FLAG_STAG;
8354 		un->un_max_xfer_size = ddi_getprop(DDI_DEV_T_ANY,
8355 		    devi, 0, sd_max_xfer_size, SD_MAX_XFER_SIZE);
8356 	}
8357 
8358 	/*
8359 	 * If this target supports LUN reset, try to enable it.
8360 	 */
8361 	if (un->un_f_lun_reset_enabled) {
8362 		if (scsi_ifsetcap(SD_ADDRESS(un), "lun-reset", 1, 1) == 1) {
8363 			SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_unit_attach: "
8364 			    "un:0x%p lun_reset capability set\n", un);
8365 		} else {
8366 			SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_unit_attach: "
8367 			    "un:0x%p lun-reset capability not set\n", un);
8368 		}
8369 	}
8370 
8371 	/*
8372 	 * At this point in the attach, we have enough info in the
8373 	 * soft state to be able to issue commands to the target.
8374 	 *
8375 	 * All command paths used below MUST issue their commands as
8376 	 * SD_PATH_DIRECT. This is important as intermediate layers
8377 	 * are not all initialized yet (such as PM).
8378 	 */
8379 
8380 	/*
8381 	 * Send a TEST UNIT READY command to the device. This should clear
8382 	 * any outstanding UNIT ATTENTION that may be present.
8383 	 *
8384 	 * Note: Don't check for success, just track if there is a reservation,
8385 	 * this is a throw away command to clear any unit attentions.
8386 	 *
8387 	 * Note: This MUST be the first command issued to the target during
8388 	 * attach to ensure power on UNIT ATTENTIONS are cleared.
8389 	 * Pass in flag SD_DONT_RETRY_TUR to prevent the long delays associated
8390 	 * with attempts at spinning up a device with no media.
8391 	 */
8392 	if (sd_send_scsi_TEST_UNIT_READY(un, SD_DONT_RETRY_TUR) == EACCES) {
8393 		reservation_flag = SD_TARGET_IS_RESERVED;
8394 	}
8395 
8396 	/*
8397 	 * If the device is NOT a removable media device, attempt to spin
8398 	 * it up (using the START_STOP_UNIT command) and read its capacity
8399 	 * (using the READ CAPACITY command).  Note, however, that either
8400 	 * of these could fail and in some cases we would continue with
8401 	 * the attach despite the failure (see below).
8402 	 */
8403 	if (un->un_f_descr_format_supported) {
8404 		switch (sd_spin_up_unit(un)) {
8405 		case 0:
8406 			/*
8407 			 * Spin-up was successful; now try to read the
8408 			 * capacity.  If successful then save the results
8409 			 * and mark the capacity & lbasize as valid.
8410 			 */
8411 			SD_TRACE(SD_LOG_ATTACH_DETACH, un,
8412 			    "sd_unit_attach: un:0x%p spin-up successful\n", un);
8413 
8414 			switch (sd_send_scsi_READ_CAPACITY(un, &capacity,
8415 			    &lbasize, SD_PATH_DIRECT)) {
8416 			case 0: {
8417 				if (capacity > DK_MAX_BLOCKS) {
8418 #ifdef _LP64
8419 					if (capacity + 1 >
8420 					    SD_GROUP1_MAX_ADDRESS) {
8421 						/*
8422 						 * Enable descriptor format
8423 						 * sense data so that we can
8424 						 * get 64 bit sense data
8425 						 * fields.
8426 						 */
8427 						sd_enable_descr_sense(un);
8428 					}
8429 #else
8430 					/* 32-bit kernels can't handle this */
8431 					scsi_log(SD_DEVINFO(un),
8432 					    sd_label, CE_WARN,
8433 					    "disk has %llu blocks, which "
8434 					    "is too large for a 32-bit "
8435 					    "kernel", capacity);
8436 
8437 #if defined(__i386) || defined(__amd64)
8438 					/*
8439 					 * Refer to comments related to off-by-1
8440 					 * at the header of this file.
8441 					 * 1TB disk was treated as (1T - 512)B
8442 					 * in the past, so that it might has
8443 					 * valid VTOC and solaris partitions,
8444 					 * we have to allow it to continue to
8445 					 * work.
8446 					 */
8447 					if (capacity -1 > DK_MAX_BLOCKS)
8448 #endif
8449 					goto spinup_failed;
8450 #endif
8451 				}
8452 
8453 				/*
8454 				 * Here it's not necessary to check the case:
8455 				 * the capacity of the device is bigger than
8456 				 * what the max hba cdb can support. Because
8457 				 * sd_send_scsi_READ_CAPACITY will retrieve
8458 				 * the capacity by sending USCSI command, which
8459 				 * is constrained by the max hba cdb. Actually,
8460 				 * sd_send_scsi_READ_CAPACITY will return
8461 				 * EINVAL when using bigger cdb than required
8462 				 * cdb length. Will handle this case in
8463 				 * "case EINVAL".
8464 				 */
8465 
8466 				/*
8467 				 * The following relies on
8468 				 * sd_send_scsi_READ_CAPACITY never
8469 				 * returning 0 for capacity and/or lbasize.
8470 				 */
8471 				sd_update_block_info(un, lbasize, capacity);
8472 
8473 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
8474 				    "sd_unit_attach: un:0x%p capacity = %ld "
8475 				    "blocks; lbasize= %ld.\n", un,
8476 				    un->un_blockcount, un->un_tgt_blocksize);
8477 
8478 				break;
8479 			}
8480 			case EINVAL:
8481 				/*
8482 				 * In the case where the max-cdb-length property
8483 				 * is smaller than the required CDB length for
8484 				 * a SCSI device, a target driver can fail to
8485 				 * attach to that device.
8486 				 */
8487 				scsi_log(SD_DEVINFO(un),
8488 				    sd_label, CE_WARN,
8489 				    "disk capacity is too large "
8490 				    "for current cdb length");
8491 				goto spinup_failed;
8492 			case EACCES:
8493 				/*
8494 				 * Should never get here if the spin-up
8495 				 * succeeded, but code it in anyway.
8496 				 * From here, just continue with the attach...
8497 				 */
8498 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
8499 				    "sd_unit_attach: un:0x%p "
8500 				    "sd_send_scsi_READ_CAPACITY "
8501 				    "returned reservation conflict\n", un);
8502 				reservation_flag = SD_TARGET_IS_RESERVED;
8503 				break;
8504 			default:
8505 				/*
8506 				 * Likewise, should never get here if the
8507 				 * spin-up succeeded. Just continue with
8508 				 * the attach...
8509 				 */
8510 				break;
8511 			}
8512 			break;
8513 		case EACCES:
8514 			/*
8515 			 * Device is reserved by another host.  In this case
8516 			 * we could not spin it up or read the capacity, but
8517 			 * we continue with the attach anyway.
8518 			 */
8519 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
8520 			    "sd_unit_attach: un:0x%p spin-up reservation "
8521 			    "conflict.\n", un);
8522 			reservation_flag = SD_TARGET_IS_RESERVED;
8523 			break;
8524 		default:
8525 			/* Fail the attach if the spin-up failed. */
8526 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
8527 			    "sd_unit_attach: un:0x%p spin-up failed.", un);
8528 			goto spinup_failed;
8529 		}
8530 	}
8531 
8532 	/*
8533 	 * Check to see if this is a MMC drive
8534 	 */
8535 	if (ISCD(un)) {
8536 		sd_set_mmc_caps(un);
8537 	}
8538 
8539 	/*
8540 	 * Create the minor nodes for the device.
8541 	 * Note: If we want to support fdisk on both sparc and intel, this will
8542 	 * have to separate out the notion that VTOC8 is always sparc, and
8543 	 * VTOC16 is always intel (tho these can be the defaults).  The vtoc
8544 	 * type will have to be determined at run-time, and the fdisk
8545 	 * partitioning will have to have been read & set up before we
8546 	 * create the minor nodes. (any other inits (such as kstats) that
8547 	 * also ought to be done before creating the minor nodes?) (Doesn't
8548 	 * setting up the minor nodes kind of imply that we're ready to
8549 	 * handle an open from userland?)
8550 	 */
8551 	if (sd_create_minor_nodes(un, devi) != DDI_SUCCESS) {
8552 		goto create_minor_nodes_failed;
8553 	}
8554 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
8555 	    "sd_unit_attach: un:0x%p minor nodes created\n", un);
8556 
8557 	/*
8558 	 * Add a zero-length attribute to tell the world we support
8559 	 * kernel ioctls (for layered drivers)
8560 	 */
8561 	(void) ddi_prop_create(DDI_DEV_T_NONE, devi, DDI_PROP_CANSLEEP,
8562 	    DDI_KERNEL_IOCTL, NULL, 0);
8563 
8564 	/*
8565 	 * Add a boolean property to tell the world we support
8566 	 * the B_FAILFAST flag (for layered drivers)
8567 	 */
8568 	(void) ddi_prop_create(DDI_DEV_T_NONE, devi, DDI_PROP_CANSLEEP,
8569 	    "ddi-failfast-supported", NULL, 0);
8570 
8571 	/*
8572 	 * Initialize power management
8573 	 */
8574 	mutex_init(&un->un_pm_mutex, NULL, MUTEX_DRIVER, NULL);
8575 	cv_init(&un->un_pm_busy_cv, NULL, CV_DRIVER, NULL);
8576 	sd_setup_pm(un, devi);
8577 	if (un->un_f_pm_is_enabled == FALSE) {
8578 		/*
8579 		 * For performance, point to a jump table that does
8580 		 * not include pm.
8581 		 * The direct and priority chains don't change with PM.
8582 		 *
8583 		 * Note: this is currently done based on individual device
8584 		 * capabilities. When an interface for determining system
8585 		 * power enabled state becomes available, or when additional
8586 		 * layers are added to the command chain, these values will
8587 		 * have to be re-evaluated for correctness.
8588 		 */
8589 		if (un->un_f_non_devbsize_supported) {
8590 			un->un_buf_chain_type = SD_CHAIN_INFO_RMMEDIA_NO_PM;
8591 		} else {
8592 			un->un_buf_chain_type = SD_CHAIN_INFO_DISK_NO_PM;
8593 		}
8594 		un->un_uscsi_chain_type  = SD_CHAIN_INFO_USCSI_CMD_NO_PM;
8595 	}
8596 
8597 	/*
8598 	 * This property is set to 0 by HA software to avoid retries
8599 	 * on a reserved disk. (The preferred property name is
8600 	 * "retry-on-reservation-conflict") (1189689)
8601 	 *
8602 	 * Note: The use of a global here can have unintended consequences. A
8603 	 * per instance variable is preferrable to match the capabilities of
8604 	 * different underlying hba's (4402600)
8605 	 */
8606 	sd_retry_on_reservation_conflict = ddi_getprop(DDI_DEV_T_ANY, devi,
8607 	    DDI_PROP_DONTPASS, "retry-on-reservation-conflict",
8608 	    sd_retry_on_reservation_conflict);
8609 	if (sd_retry_on_reservation_conflict != 0) {
8610 		sd_retry_on_reservation_conflict = ddi_getprop(DDI_DEV_T_ANY,
8611 		    devi, DDI_PROP_DONTPASS, sd_resv_conflict_name,
8612 		    sd_retry_on_reservation_conflict);
8613 	}
8614 
8615 	/* Set up options for QFULL handling. */
8616 	if ((rval = ddi_getprop(DDI_DEV_T_ANY, devi, 0,
8617 	    "qfull-retries", -1)) != -1) {
8618 		(void) scsi_ifsetcap(SD_ADDRESS(un), "qfull-retries",
8619 		    rval, 1);
8620 	}
8621 	if ((rval = ddi_getprop(DDI_DEV_T_ANY, devi, 0,
8622 	    "qfull-retry-interval", -1)) != -1) {
8623 		(void) scsi_ifsetcap(SD_ADDRESS(un), "qfull-retry-interval",
8624 		    rval, 1);
8625 	}
8626 
8627 	/*
8628 	 * This just prints a message that announces the existence of the
8629 	 * device. The message is always printed in the system logfile, but
8630 	 * only appears on the console if the system is booted with the
8631 	 * -v (verbose) argument.
8632 	 */
8633 	ddi_report_dev(devi);
8634 
8635 	/*
8636 	 * The framework calls driver attach routines single-threaded
8637 	 * for a given instance.  However we still acquire SD_MUTEX here
8638 	 * because this required for calling the sd_validate_geometry()
8639 	 * and sd_register_devid() functions.
8640 	 */
8641 	mutex_enter(SD_MUTEX(un));
8642 	un->un_f_geometry_is_valid = FALSE;
8643 	un->un_mediastate = DKIO_NONE;
8644 	un->un_reserved = -1;
8645 
8646 	/*
8647 	 * Read and validate the device's geometry (ie, disk label)
8648 	 * A new unformatted drive will not have a valid geometry, but
8649 	 * the driver needs to successfully attach to this device so
8650 	 * the drive can be formatted via ioctls.
8651 	 */
8652 	if (((sd_validate_geometry(un, SD_PATH_DIRECT) ==
8653 	    ENOTSUP)) &&
8654 	    (un->un_blockcount < DK_MAX_BLOCKS)) {
8655 		/*
8656 		 * We found a small disk with an EFI label on it;
8657 		 * we need to fix up the minor nodes accordingly.
8658 		 */
8659 		ddi_remove_minor_node(devi, "h");
8660 		ddi_remove_minor_node(devi, "h,raw");
8661 		(void) ddi_create_minor_node(devi, "wd",
8662 		    S_IFBLK,
8663 		    (instance << SDUNIT_SHIFT) | WD_NODE,
8664 		    un->un_node_type, NULL);
8665 		(void) ddi_create_minor_node(devi, "wd,raw",
8666 		    S_IFCHR,
8667 		    (instance << SDUNIT_SHIFT) | WD_NODE,
8668 		    un->un_node_type, NULL);
8669 	}
8670 #if defined(__i386) || defined(__amd64)
8671 	else if (un->un_f_capacity_adjusted == 1) {
8672 		/*
8673 		 * Refer to comments related to off-by-1 at the
8674 		 * header of this file.
8675 		 * Adjust minor node for 1TB disk.
8676 		 */
8677 		ddi_remove_minor_node(devi, "wd");
8678 		ddi_remove_minor_node(devi, "wd,raw");
8679 		(void) ddi_create_minor_node(devi, "h",
8680 		    S_IFBLK,
8681 		    (instance << SDUNIT_SHIFT) | WD_NODE,
8682 		    un->un_node_type, NULL);
8683 		(void) ddi_create_minor_node(devi, "h,raw",
8684 		    S_IFCHR,
8685 		    (instance << SDUNIT_SHIFT) | WD_NODE,
8686 		    un->un_node_type, NULL);
8687 	}
8688 #endif
8689 	/*
8690 	 * Read and initialize the devid for the unit.
8691 	 */
8692 	ASSERT(un->un_errstats != NULL);
8693 	if (un->un_f_devid_supported) {
8694 		sd_register_devid(un, devi, reservation_flag);
8695 	}
8696 	mutex_exit(SD_MUTEX(un));
8697 
8698 #if (defined(__fibre))
8699 	/*
8700 	 * Register callbacks for fibre only.  You can't do this soley
8701 	 * on the basis of the devid_type because this is hba specific.
8702 	 * We need to query our hba capabilities to find out whether to
8703 	 * register or not.
8704 	 */
8705 	if (un->un_f_is_fibre) {
8706 	    if (strcmp(un->un_node_type, DDI_NT_BLOCK_CHAN)) {
8707 		sd_init_event_callbacks(un);
8708 		SD_TRACE(SD_LOG_ATTACH_DETACH, un,
8709 		    "sd_unit_attach: un:0x%p event callbacks inserted", un);
8710 	    }
8711 	}
8712 #endif
8713 
8714 	if (un->un_f_opt_disable_cache == TRUE) {
8715 		/*
8716 		 * Disable both read cache and write cache.  This is
8717 		 * the historic behavior of the keywords in the config file.
8718 		 */
8719 		if (sd_cache_control(un, SD_CACHE_DISABLE, SD_CACHE_DISABLE) !=
8720 		    0) {
8721 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
8722 			    "sd_unit_attach: un:0x%p Could not disable "
8723 			    "caching", un);
8724 			goto devid_failed;
8725 		}
8726 	}
8727 
8728 	/*
8729 	 * Check the value of the WCE bit now and
8730 	 * set un_f_write_cache_enabled accordingly.
8731 	 */
8732 	(void) sd_get_write_cache_enabled(un, &wc_enabled);
8733 	mutex_enter(SD_MUTEX(un));
8734 	un->un_f_write_cache_enabled = (wc_enabled != 0);
8735 	mutex_exit(SD_MUTEX(un));
8736 
8737 	/*
8738 	 * Set the pstat and error stat values here, so data obtained during the
8739 	 * previous attach-time routines is available.
8740 	 *
8741 	 * Note: This is a critical sequence that needs to be maintained:
8742 	 *	1) Instantiate the kstats before any routines using the iopath
8743 	 *	   (i.e. sd_send_scsi_cmd).
8744 	 *	2) Initialize the error stats (sd_set_errstats) and partition
8745 	 *	   stats (sd_set_pstats)here, following sd_validate_geometry(),
8746 	 *	   sd_register_devid(), and sd_cache_control().
8747 	 */
8748 	if (un->un_f_pkstats_enabled) {
8749 		sd_set_pstats(un);
8750 		SD_TRACE(SD_LOG_ATTACH_DETACH, un,
8751 		    "sd_unit_attach: un:0x%p pstats created and set\n", un);
8752 	}
8753 
8754 	sd_set_errstats(un);
8755 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
8756 	    "sd_unit_attach: un:0x%p errstats set\n", un);
8757 
8758 	/*
8759 	 * Find out what type of reservation this disk supports.
8760 	 */
8761 	switch (sd_send_scsi_PERSISTENT_RESERVE_IN(un, SD_READ_KEYS, 0, NULL)) {
8762 	case 0:
8763 		/*
8764 		 * SCSI-3 reservations are supported.
8765 		 */
8766 		un->un_reservation_type = SD_SCSI3_RESERVATION;
8767 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
8768 		    "sd_unit_attach: un:0x%p SCSI-3 reservations\n", un);
8769 		break;
8770 	case ENOTSUP:
8771 		/*
8772 		 * The PERSISTENT RESERVE IN command would not be recognized by
8773 		 * a SCSI-2 device, so assume the reservation type is SCSI-2.
8774 		 */
8775 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
8776 		    "sd_unit_attach: un:0x%p SCSI-2 reservations\n", un);
8777 		un->un_reservation_type = SD_SCSI2_RESERVATION;
8778 		break;
8779 	default:
8780 		/*
8781 		 * default to SCSI-3 reservations
8782 		 */
8783 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
8784 		    "sd_unit_attach: un:0x%p default SCSI3 reservations\n", un);
8785 		un->un_reservation_type = SD_SCSI3_RESERVATION;
8786 		break;
8787 	}
8788 
8789 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
8790 	    "sd_unit_attach: un:0x%p exit success\n", un);
8791 
8792 	return (DDI_SUCCESS);
8793 
8794 	/*
8795 	 * An error occurred during the attach; clean up & return failure.
8796 	 */
8797 
8798 devid_failed:
8799 
8800 setup_pm_failed:
8801 	ddi_remove_minor_node(devi, NULL);
8802 
8803 create_minor_nodes_failed:
8804 	/*
8805 	 * Cleanup from the scsi_ifsetcap() calls (437868)
8806 	 */
8807 	(void) scsi_ifsetcap(SD_ADDRESS(un), "lun-reset", 0, 1);
8808 	(void) scsi_ifsetcap(SD_ADDRESS(un), "wide-xfer", 0, 1);
8809 	(void) scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing", 0, 1);
8810 
8811 	if (un->un_f_is_fibre == FALSE) {
8812 	    (void) scsi_ifsetcap(SD_ADDRESS(un), "auto-rqsense", 0, 1);
8813 	}
8814 
8815 spinup_failed:
8816 
8817 	mutex_enter(SD_MUTEX(un));
8818 
8819 	/* Cancel callback for SD_PATH_DIRECT_PRIORITY cmd. restart */
8820 	if (un->un_direct_priority_timeid != NULL) {
8821 		timeout_id_t temp_id = un->un_direct_priority_timeid;
8822 		un->un_direct_priority_timeid = NULL;
8823 		mutex_exit(SD_MUTEX(un));
8824 		(void) untimeout(temp_id);
8825 		mutex_enter(SD_MUTEX(un));
8826 	}
8827 
8828 	/* Cancel any pending start/stop timeouts */
8829 	if (un->un_startstop_timeid != NULL) {
8830 		timeout_id_t temp_id = un->un_startstop_timeid;
8831 		un->un_startstop_timeid = NULL;
8832 		mutex_exit(SD_MUTEX(un));
8833 		(void) untimeout(temp_id);
8834 		mutex_enter(SD_MUTEX(un));
8835 	}
8836 
8837 	/* Cancel any pending reset-throttle timeouts */
8838 	if (un->un_reset_throttle_timeid != NULL) {
8839 		timeout_id_t temp_id = un->un_reset_throttle_timeid;
8840 		un->un_reset_throttle_timeid = NULL;
8841 		mutex_exit(SD_MUTEX(un));
8842 		(void) untimeout(temp_id);
8843 		mutex_enter(SD_MUTEX(un));
8844 	}
8845 
8846 	/* Cancel any pending retry timeouts */
8847 	if (un->un_retry_timeid != NULL) {
8848 		timeout_id_t temp_id = un->un_retry_timeid;
8849 		un->un_retry_timeid = NULL;
8850 		mutex_exit(SD_MUTEX(un));
8851 		(void) untimeout(temp_id);
8852 		mutex_enter(SD_MUTEX(un));
8853 	}
8854 
8855 	/* Cancel any pending delayed cv broadcast timeouts */
8856 	if (un->un_dcvb_timeid != NULL) {
8857 		timeout_id_t temp_id = un->un_dcvb_timeid;
8858 		un->un_dcvb_timeid = NULL;
8859 		mutex_exit(SD_MUTEX(un));
8860 		(void) untimeout(temp_id);
8861 		mutex_enter(SD_MUTEX(un));
8862 	}
8863 
8864 	mutex_exit(SD_MUTEX(un));
8865 
8866 	/* There should not be any in-progress I/O so ASSERT this check */
8867 	ASSERT(un->un_ncmds_in_transport == 0);
8868 	ASSERT(un->un_ncmds_in_driver == 0);
8869 
8870 	/* Do not free the softstate if the callback routine is active */
8871 	sd_sync_with_callback(un);
8872 
8873 	/*
8874 	 * Partition stats apparently are not used with removables. These would
8875 	 * not have been created during attach, so no need to clean them up...
8876 	 */
8877 	if (un->un_stats != NULL) {
8878 		kstat_delete(un->un_stats);
8879 		un->un_stats = NULL;
8880 	}
8881 	if (un->un_errstats != NULL) {
8882 		kstat_delete(un->un_errstats);
8883 		un->un_errstats = NULL;
8884 	}
8885 
8886 	ddi_xbuf_attr_unregister_devinfo(un->un_xbuf_attr, devi);
8887 	ddi_xbuf_attr_destroy(un->un_xbuf_attr);
8888 
8889 	ddi_prop_remove_all(devi);
8890 	sema_destroy(&un->un_semoclose);
8891 	cv_destroy(&un->un_state_cv);
8892 
8893 getrbuf_failed:
8894 
8895 	sd_free_rqs(un);
8896 
8897 alloc_rqs_failed:
8898 
8899 	devp->sd_private = NULL;
8900 	bzero(un, sizeof (struct sd_lun));	/* Clear any stale data! */
8901 
8902 get_softstate_failed:
8903 	/*
8904 	 * Note: the man pages are unclear as to whether or not doing a
8905 	 * ddi_soft_state_free(sd_state, instance) is the right way to
8906 	 * clean up after the ddi_soft_state_zalloc() if the subsequent
8907 	 * ddi_get_soft_state() fails.  The implication seems to be
8908 	 * that the get_soft_state cannot fail if the zalloc succeeds.
8909 	 */
8910 	ddi_soft_state_free(sd_state, instance);
8911 
8912 probe_failed:
8913 	scsi_unprobe(devp);
8914 #ifdef SDDEBUG
8915 	if ((sd_component_mask & SD_LOG_ATTACH_DETACH) &&
8916 	    (sd_level_mask & SD_LOGMASK_TRACE)) {
8917 		cmn_err(CE_CONT, "sd_unit_attach: un:0x%p exit failure\n",
8918 		    (void *)un);
8919 	}
8920 #endif
8921 	return (DDI_FAILURE);
8922 }
8923 
8924 
8925 /*
8926  *    Function: sd_unit_detach
8927  *
8928  * Description: Performs DDI_DETACH processing for sddetach().
8929  *
8930  * Return Code: DDI_SUCCESS
8931  *		DDI_FAILURE
8932  *
8933  *     Context: Kernel thread context
8934  */
8935 
8936 static int
8937 sd_unit_detach(dev_info_t *devi)
8938 {
8939 	struct scsi_device	*devp;
8940 	struct sd_lun		*un;
8941 	int			i;
8942 	dev_t			dev;
8943 	int			instance = ddi_get_instance(devi);
8944 
8945 	mutex_enter(&sd_detach_mutex);
8946 
8947 	/*
8948 	 * Fail the detach for any of the following:
8949 	 *  - Unable to get the sd_lun struct for the instance
8950 	 *  - A layered driver has an outstanding open on the instance
8951 	 *  - Another thread is already detaching this instance
8952 	 *  - Another thread is currently performing an open
8953 	 */
8954 	devp = ddi_get_driver_private(devi);
8955 	if ((devp == NULL) ||
8956 	    ((un = (struct sd_lun *)devp->sd_private) == NULL) ||
8957 	    (un->un_ncmds_in_driver != 0) || (un->un_layer_count != 0) ||
8958 	    (un->un_detach_count != 0) || (un->un_opens_in_progress != 0)) {
8959 		mutex_exit(&sd_detach_mutex);
8960 		return (DDI_FAILURE);
8961 	}
8962 
8963 	SD_TRACE(SD_LOG_ATTACH_DETACH, un, "sd_unit_detach: entry 0x%p\n", un);
8964 
8965 	/*
8966 	 * Mark this instance as currently in a detach, to inhibit any
8967 	 * opens from a layered driver.
8968 	 */
8969 	un->un_detach_count++;
8970 	mutex_exit(&sd_detach_mutex);
8971 
8972 	dev = sd_make_device(SD_DEVINFO(un));
8973 
8974 	_NOTE(COMPETING_THREADS_NOW);
8975 
8976 	mutex_enter(SD_MUTEX(un));
8977 
8978 	/*
8979 	 * Fail the detach if there are any outstanding layered
8980 	 * opens on this device.
8981 	 */
8982 	for (i = 0; i < NDKMAP; i++) {
8983 		if (un->un_ocmap.lyropen[i] != 0) {
8984 			goto err_notclosed;
8985 		}
8986 	}
8987 
8988 	/*
8989 	 * Verify there are NO outstanding commands issued to this device.
8990 	 * ie, un_ncmds_in_transport == 0.
8991 	 * It's possible to have outstanding commands through the physio
8992 	 * code path, even though everything's closed.
8993 	 */
8994 	if ((un->un_ncmds_in_transport != 0) || (un->un_retry_timeid != NULL) ||
8995 	    (un->un_direct_priority_timeid != NULL) ||
8996 	    (un->un_state == SD_STATE_RWAIT)) {
8997 		mutex_exit(SD_MUTEX(un));
8998 		SD_ERROR(SD_LOG_ATTACH_DETACH, un,
8999 		    "sd_dr_detach: Detach failure due to outstanding cmds\n");
9000 		goto err_stillbusy;
9001 	}
9002 
9003 	/*
9004 	 * If we have the device reserved, release the reservation.
9005 	 */
9006 	if ((un->un_resvd_status & SD_RESERVE) &&
9007 	    !(un->un_resvd_status & SD_LOST_RESERVE)) {
9008 		mutex_exit(SD_MUTEX(un));
9009 		/*
9010 		 * Note: sd_reserve_release sends a command to the device
9011 		 * via the sd_ioctlcmd() path, and can sleep.
9012 		 */
9013 		if (sd_reserve_release(dev, SD_RELEASE) != 0) {
9014 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
9015 			    "sd_dr_detach: Cannot release reservation \n");
9016 		}
9017 	} else {
9018 		mutex_exit(SD_MUTEX(un));
9019 	}
9020 
9021 	/*
9022 	 * Untimeout any reserve recover, throttle reset, restart unit
9023 	 * and delayed broadcast timeout threads. Protect the timeout pointer
9024 	 * from getting nulled by their callback functions.
9025 	 */
9026 	mutex_enter(SD_MUTEX(un));
9027 	if (un->un_resvd_timeid != NULL) {
9028 		timeout_id_t temp_id = un->un_resvd_timeid;
9029 		un->un_resvd_timeid = NULL;
9030 		mutex_exit(SD_MUTEX(un));
9031 		(void) untimeout(temp_id);
9032 		mutex_enter(SD_MUTEX(un));
9033 	}
9034 
9035 	if (un->un_reset_throttle_timeid != NULL) {
9036 		timeout_id_t temp_id = un->un_reset_throttle_timeid;
9037 		un->un_reset_throttle_timeid = NULL;
9038 		mutex_exit(SD_MUTEX(un));
9039 		(void) untimeout(temp_id);
9040 		mutex_enter(SD_MUTEX(un));
9041 	}
9042 
9043 	if (un->un_startstop_timeid != NULL) {
9044 		timeout_id_t temp_id = un->un_startstop_timeid;
9045 		un->un_startstop_timeid = NULL;
9046 		mutex_exit(SD_MUTEX(un));
9047 		(void) untimeout(temp_id);
9048 		mutex_enter(SD_MUTEX(un));
9049 	}
9050 
9051 	if (un->un_dcvb_timeid != NULL) {
9052 		timeout_id_t temp_id = un->un_dcvb_timeid;
9053 		un->un_dcvb_timeid = NULL;
9054 		mutex_exit(SD_MUTEX(un));
9055 		(void) untimeout(temp_id);
9056 	} else {
9057 		mutex_exit(SD_MUTEX(un));
9058 	}
9059 
9060 	/* Remove any pending reservation reclaim requests for this device */
9061 	sd_rmv_resv_reclaim_req(dev);
9062 
9063 	mutex_enter(SD_MUTEX(un));
9064 
9065 	/* Cancel any pending callbacks for SD_PATH_DIRECT_PRIORITY cmd. */
9066 	if (un->un_direct_priority_timeid != NULL) {
9067 		timeout_id_t temp_id = un->un_direct_priority_timeid;
9068 		un->un_direct_priority_timeid = NULL;
9069 		mutex_exit(SD_MUTEX(un));
9070 		(void) untimeout(temp_id);
9071 		mutex_enter(SD_MUTEX(un));
9072 	}
9073 
9074 	/* Cancel any active multi-host disk watch thread requests */
9075 	if (un->un_mhd_token != NULL) {
9076 		mutex_exit(SD_MUTEX(un));
9077 		 _NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::un_mhd_token));
9078 		if (scsi_watch_request_terminate(un->un_mhd_token,
9079 		    SCSI_WATCH_TERMINATE_NOWAIT)) {
9080 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
9081 			    "sd_dr_detach: Cannot cancel mhd watch request\n");
9082 			/*
9083 			 * Note: We are returning here after having removed
9084 			 * some driver timeouts above. This is consistent with
9085 			 * the legacy implementation but perhaps the watch
9086 			 * terminate call should be made with the wait flag set.
9087 			 */
9088 			goto err_stillbusy;
9089 		}
9090 		mutex_enter(SD_MUTEX(un));
9091 		un->un_mhd_token = NULL;
9092 	}
9093 
9094 	if (un->un_swr_token != NULL) {
9095 		mutex_exit(SD_MUTEX(un));
9096 		_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::un_swr_token));
9097 		if (scsi_watch_request_terminate(un->un_swr_token,
9098 		    SCSI_WATCH_TERMINATE_NOWAIT)) {
9099 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
9100 			    "sd_dr_detach: Cannot cancel swr watch request\n");
9101 			/*
9102 			 * Note: We are returning here after having removed
9103 			 * some driver timeouts above. This is consistent with
9104 			 * the legacy implementation but perhaps the watch
9105 			 * terminate call should be made with the wait flag set.
9106 			 */
9107 			goto err_stillbusy;
9108 		}
9109 		mutex_enter(SD_MUTEX(un));
9110 		un->un_swr_token = NULL;
9111 	}
9112 
9113 	mutex_exit(SD_MUTEX(un));
9114 
9115 	/*
9116 	 * Clear any scsi_reset_notifies. We clear the reset notifies
9117 	 * if we have not registered one.
9118 	 * Note: The sd_mhd_reset_notify_cb() fn tries to acquire SD_MUTEX!
9119 	 */
9120 	(void) scsi_reset_notify(SD_ADDRESS(un), SCSI_RESET_CANCEL,
9121 	    sd_mhd_reset_notify_cb, (caddr_t)un);
9122 
9123 	/*
9124 	 * protect the timeout pointers from getting nulled by
9125 	 * their callback functions during the cancellation process.
9126 	 * In such a scenario untimeout can be invoked with a null value.
9127 	 */
9128 	_NOTE(NO_COMPETING_THREADS_NOW);
9129 
9130 	mutex_enter(&un->un_pm_mutex);
9131 	if (un->un_pm_idle_timeid != NULL) {
9132 		timeout_id_t temp_id = un->un_pm_idle_timeid;
9133 		un->un_pm_idle_timeid = NULL;
9134 		mutex_exit(&un->un_pm_mutex);
9135 
9136 		/*
9137 		 * Timeout is active; cancel it.
9138 		 * Note that it'll never be active on a device
9139 		 * that does not support PM therefore we don't
9140 		 * have to check before calling pm_idle_component.
9141 		 */
9142 		(void) untimeout(temp_id);
9143 		(void) pm_idle_component(SD_DEVINFO(un), 0);
9144 		mutex_enter(&un->un_pm_mutex);
9145 	}
9146 
9147 	/*
9148 	 * Check whether there is already a timeout scheduled for power
9149 	 * management. If yes then don't lower the power here, that's.
9150 	 * the timeout handler's job.
9151 	 */
9152 	if (un->un_pm_timeid != NULL) {
9153 		timeout_id_t temp_id = un->un_pm_timeid;
9154 		un->un_pm_timeid = NULL;
9155 		mutex_exit(&un->un_pm_mutex);
9156 		/*
9157 		 * Timeout is active; cancel it.
9158 		 * Note that it'll never be active on a device
9159 		 * that does not support PM therefore we don't
9160 		 * have to check before calling pm_idle_component.
9161 		 */
9162 		(void) untimeout(temp_id);
9163 		(void) pm_idle_component(SD_DEVINFO(un), 0);
9164 
9165 	} else {
9166 		mutex_exit(&un->un_pm_mutex);
9167 		if ((un->un_f_pm_is_enabled == TRUE) &&
9168 		    (pm_lower_power(SD_DEVINFO(un), 0, SD_SPINDLE_OFF) !=
9169 		    DDI_SUCCESS)) {
9170 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
9171 		    "sd_dr_detach: Lower power request failed, ignoring.\n");
9172 			/*
9173 			 * Fix for bug: 4297749, item # 13
9174 			 * The above test now includes a check to see if PM is
9175 			 * supported by this device before call
9176 			 * pm_lower_power().
9177 			 * Note, the following is not dead code. The call to
9178 			 * pm_lower_power above will generate a call back into
9179 			 * our sdpower routine which might result in a timeout
9180 			 * handler getting activated. Therefore the following
9181 			 * code is valid and necessary.
9182 			 */
9183 			mutex_enter(&un->un_pm_mutex);
9184 			if (un->un_pm_timeid != NULL) {
9185 				timeout_id_t temp_id = un->un_pm_timeid;
9186 				un->un_pm_timeid = NULL;
9187 				mutex_exit(&un->un_pm_mutex);
9188 				(void) untimeout(temp_id);
9189 				(void) pm_idle_component(SD_DEVINFO(un), 0);
9190 			} else {
9191 				mutex_exit(&un->un_pm_mutex);
9192 			}
9193 		}
9194 	}
9195 
9196 	/*
9197 	 * Cleanup from the scsi_ifsetcap() calls (437868)
9198 	 * Relocated here from above to be after the call to
9199 	 * pm_lower_power, which was getting errors.
9200 	 */
9201 	(void) scsi_ifsetcap(SD_ADDRESS(un), "lun-reset", 0, 1);
9202 	(void) scsi_ifsetcap(SD_ADDRESS(un), "wide-xfer", 0, 1);
9203 	(void) scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing", 0, 1);
9204 
9205 	if (un->un_f_is_fibre == FALSE) {
9206 		(void) scsi_ifsetcap(SD_ADDRESS(un), "auto-rqsense", 0, 1);
9207 	}
9208 
9209 	/*
9210 	 * Remove any event callbacks, fibre only
9211 	 */
9212 	if (un->un_f_is_fibre == TRUE) {
9213 		if ((un->un_insert_event != NULL) &&
9214 			(ddi_remove_event_handler(un->un_insert_cb_id) !=
9215 				DDI_SUCCESS)) {
9216 			/*
9217 			 * Note: We are returning here after having done
9218 			 * substantial cleanup above. This is consistent
9219 			 * with the legacy implementation but this may not
9220 			 * be the right thing to do.
9221 			 */
9222 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
9223 				"sd_dr_detach: Cannot cancel insert event\n");
9224 			goto err_remove_event;
9225 		}
9226 		un->un_insert_event = NULL;
9227 
9228 		if ((un->un_remove_event != NULL) &&
9229 			(ddi_remove_event_handler(un->un_remove_cb_id) !=
9230 				DDI_SUCCESS)) {
9231 			/*
9232 			 * Note: We are returning here after having done
9233 			 * substantial cleanup above. This is consistent
9234 			 * with the legacy implementation but this may not
9235 			 * be the right thing to do.
9236 			 */
9237 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
9238 				"sd_dr_detach: Cannot cancel remove event\n");
9239 			goto err_remove_event;
9240 		}
9241 		un->un_remove_event = NULL;
9242 	}
9243 
9244 	/* Do not free the softstate if the callback routine is active */
9245 	sd_sync_with_callback(un);
9246 
9247 	/*
9248 	 * Hold the detach mutex here, to make sure that no other threads ever
9249 	 * can access a (partially) freed soft state structure.
9250 	 */
9251 	mutex_enter(&sd_detach_mutex);
9252 
9253 	/*
9254 	 * Clean up the soft state struct.
9255 	 * Cleanup is done in reverse order of allocs/inits.
9256 	 * At this point there should be no competing threads anymore.
9257 	 */
9258 
9259 	/* Unregister and free device id. */
9260 	ddi_devid_unregister(devi);
9261 	if (un->un_devid) {
9262 		ddi_devid_free(un->un_devid);
9263 		un->un_devid = NULL;
9264 	}
9265 
9266 	/*
9267 	 * Destroy wmap cache if it exists.
9268 	 */
9269 	if (un->un_wm_cache != NULL) {
9270 		kmem_cache_destroy(un->un_wm_cache);
9271 		un->un_wm_cache = NULL;
9272 	}
9273 
9274 	/* Remove minor nodes */
9275 	ddi_remove_minor_node(devi, NULL);
9276 
9277 	/*
9278 	 * kstat cleanup is done in detach for all device types (4363169).
9279 	 * We do not want to fail detach if the device kstats are not deleted
9280 	 * since there is a confusion about the devo_refcnt for the device.
9281 	 * We just delete the kstats and let detach complete successfully.
9282 	 */
9283 	if (un->un_stats != NULL) {
9284 		kstat_delete(un->un_stats);
9285 		un->un_stats = NULL;
9286 	}
9287 	if (un->un_errstats != NULL) {
9288 		kstat_delete(un->un_errstats);
9289 		un->un_errstats = NULL;
9290 	}
9291 
9292 	/* Remove partition stats */
9293 	if (un->un_f_pkstats_enabled) {
9294 		for (i = 0; i < NSDMAP; i++) {
9295 			if (un->un_pstats[i] != NULL) {
9296 				kstat_delete(un->un_pstats[i]);
9297 				un->un_pstats[i] = NULL;
9298 			}
9299 		}
9300 	}
9301 
9302 	/* Remove xbuf registration */
9303 	ddi_xbuf_attr_unregister_devinfo(un->un_xbuf_attr, devi);
9304 	ddi_xbuf_attr_destroy(un->un_xbuf_attr);
9305 
9306 	/* Remove driver properties */
9307 	ddi_prop_remove_all(devi);
9308 
9309 	mutex_destroy(&un->un_pm_mutex);
9310 	cv_destroy(&un->un_pm_busy_cv);
9311 
9312 	cv_destroy(&un->un_wcc_cv);
9313 
9314 	/* Open/close semaphore */
9315 	sema_destroy(&un->un_semoclose);
9316 
9317 	/* Removable media condvar. */
9318 	cv_destroy(&un->un_state_cv);
9319 
9320 	/* Suspend/resume condvar. */
9321 	cv_destroy(&un->un_suspend_cv);
9322 	cv_destroy(&un->un_disk_busy_cv);
9323 
9324 	sd_free_rqs(un);
9325 
9326 	/* Free up soft state */
9327 	devp->sd_private = NULL;
9328 	bzero(un, sizeof (struct sd_lun));
9329 	ddi_soft_state_free(sd_state, instance);
9330 
9331 	mutex_exit(&sd_detach_mutex);
9332 
9333 	/* This frees up the INQUIRY data associated with the device. */
9334 	scsi_unprobe(devp);
9335 
9336 	return (DDI_SUCCESS);
9337 
9338 err_notclosed:
9339 	mutex_exit(SD_MUTEX(un));
9340 
9341 err_stillbusy:
9342 	_NOTE(NO_COMPETING_THREADS_NOW);
9343 
9344 err_remove_event:
9345 	mutex_enter(&sd_detach_mutex);
9346 	un->un_detach_count--;
9347 	mutex_exit(&sd_detach_mutex);
9348 
9349 	SD_TRACE(SD_LOG_ATTACH_DETACH, un, "sd_unit_detach: exit failure\n");
9350 	return (DDI_FAILURE);
9351 }
9352 
9353 
9354 /*
9355  * Driver minor node structure and data table
9356  */
9357 struct driver_minor_data {
9358 	char	*name;
9359 	minor_t	minor;
9360 	int	type;
9361 };
9362 
9363 static struct driver_minor_data sd_minor_data[] = {
9364 	{"a", 0, S_IFBLK},
9365 	{"b", 1, S_IFBLK},
9366 	{"c", 2, S_IFBLK},
9367 	{"d", 3, S_IFBLK},
9368 	{"e", 4, S_IFBLK},
9369 	{"f", 5, S_IFBLK},
9370 	{"g", 6, S_IFBLK},
9371 	{"h", 7, S_IFBLK},
9372 #if defined(_SUNOS_VTOC_16)
9373 	{"i", 8, S_IFBLK},
9374 	{"j", 9, S_IFBLK},
9375 	{"k", 10, S_IFBLK},
9376 	{"l", 11, S_IFBLK},
9377 	{"m", 12, S_IFBLK},
9378 	{"n", 13, S_IFBLK},
9379 	{"o", 14, S_IFBLK},
9380 	{"p", 15, S_IFBLK},
9381 #endif			/* defined(_SUNOS_VTOC_16) */
9382 #if defined(_FIRMWARE_NEEDS_FDISK)
9383 	{"q", 16, S_IFBLK},
9384 	{"r", 17, S_IFBLK},
9385 	{"s", 18, S_IFBLK},
9386 	{"t", 19, S_IFBLK},
9387 	{"u", 20, S_IFBLK},
9388 #endif			/* defined(_FIRMWARE_NEEDS_FDISK) */
9389 	{"a,raw", 0, S_IFCHR},
9390 	{"b,raw", 1, S_IFCHR},
9391 	{"c,raw", 2, S_IFCHR},
9392 	{"d,raw", 3, S_IFCHR},
9393 	{"e,raw", 4, S_IFCHR},
9394 	{"f,raw", 5, S_IFCHR},
9395 	{"g,raw", 6, S_IFCHR},
9396 	{"h,raw", 7, S_IFCHR},
9397 #if defined(_SUNOS_VTOC_16)
9398 	{"i,raw", 8, S_IFCHR},
9399 	{"j,raw", 9, S_IFCHR},
9400 	{"k,raw", 10, S_IFCHR},
9401 	{"l,raw", 11, S_IFCHR},
9402 	{"m,raw", 12, S_IFCHR},
9403 	{"n,raw", 13, S_IFCHR},
9404 	{"o,raw", 14, S_IFCHR},
9405 	{"p,raw", 15, S_IFCHR},
9406 #endif			/* defined(_SUNOS_VTOC_16) */
9407 #if defined(_FIRMWARE_NEEDS_FDISK)
9408 	{"q,raw", 16, S_IFCHR},
9409 	{"r,raw", 17, S_IFCHR},
9410 	{"s,raw", 18, S_IFCHR},
9411 	{"t,raw", 19, S_IFCHR},
9412 	{"u,raw", 20, S_IFCHR},
9413 #endif			/* defined(_FIRMWARE_NEEDS_FDISK) */
9414 	{0}
9415 };
9416 
9417 static struct driver_minor_data sd_minor_data_efi[] = {
9418 	{"a", 0, S_IFBLK},
9419 	{"b", 1, S_IFBLK},
9420 	{"c", 2, S_IFBLK},
9421 	{"d", 3, S_IFBLK},
9422 	{"e", 4, S_IFBLK},
9423 	{"f", 5, S_IFBLK},
9424 	{"g", 6, S_IFBLK},
9425 	{"wd", 7, S_IFBLK},
9426 #if defined(_FIRMWARE_NEEDS_FDISK)
9427 	{"q", 16, S_IFBLK},
9428 	{"r", 17, S_IFBLK},
9429 	{"s", 18, S_IFBLK},
9430 	{"t", 19, S_IFBLK},
9431 	{"u", 20, S_IFBLK},
9432 #endif			/* defined(_FIRMWARE_NEEDS_FDISK) */
9433 	{"a,raw", 0, S_IFCHR},
9434 	{"b,raw", 1, S_IFCHR},
9435 	{"c,raw", 2, S_IFCHR},
9436 	{"d,raw", 3, S_IFCHR},
9437 	{"e,raw", 4, S_IFCHR},
9438 	{"f,raw", 5, S_IFCHR},
9439 	{"g,raw", 6, S_IFCHR},
9440 	{"wd,raw", 7, S_IFCHR},
9441 #if defined(_FIRMWARE_NEEDS_FDISK)
9442 	{"q,raw", 16, S_IFCHR},
9443 	{"r,raw", 17, S_IFCHR},
9444 	{"s,raw", 18, S_IFCHR},
9445 	{"t,raw", 19, S_IFCHR},
9446 	{"u,raw", 20, S_IFCHR},
9447 #endif			/* defined(_FIRMWARE_NEEDS_FDISK) */
9448 	{0}
9449 };
9450 
9451 
9452 /*
9453  *    Function: sd_create_minor_nodes
9454  *
9455  * Description: Create the minor device nodes for the instance.
9456  *
9457  *   Arguments: un - driver soft state (unit) structure
9458  *		devi - pointer to device info structure
9459  *
9460  * Return Code: DDI_SUCCESS
9461  *		DDI_FAILURE
9462  *
9463  *     Context: Kernel thread context
9464  */
9465 
9466 static int
9467 sd_create_minor_nodes(struct sd_lun *un, dev_info_t *devi)
9468 {
9469 	struct driver_minor_data	*dmdp;
9470 	struct scsi_device		*devp;
9471 	int				instance;
9472 	char				name[48];
9473 
9474 	ASSERT(un != NULL);
9475 	devp = ddi_get_driver_private(devi);
9476 	instance = ddi_get_instance(devp->sd_dev);
9477 
9478 	/*
9479 	 * Create all the minor nodes for this target.
9480 	 */
9481 	if (un->un_blockcount > DK_MAX_BLOCKS)
9482 		dmdp = sd_minor_data_efi;
9483 	else
9484 		dmdp = sd_minor_data;
9485 	while (dmdp->name != NULL) {
9486 
9487 		(void) sprintf(name, "%s", dmdp->name);
9488 
9489 		if (ddi_create_minor_node(devi, name, dmdp->type,
9490 		    (instance << SDUNIT_SHIFT) | dmdp->minor,
9491 		    un->un_node_type, NULL) == DDI_FAILURE) {
9492 			/*
9493 			 * Clean up any nodes that may have been created, in
9494 			 * case this fails in the middle of the loop.
9495 			 */
9496 			ddi_remove_minor_node(devi, NULL);
9497 			return (DDI_FAILURE);
9498 		}
9499 		dmdp++;
9500 	}
9501 
9502 	return (DDI_SUCCESS);
9503 }
9504 
9505 
9506 /*
9507  *    Function: sd_create_errstats
9508  *
9509  * Description: This routine instantiates the device error stats.
9510  *
9511  *		Note: During attach the stats are instantiated first so they are
9512  *		available for attach-time routines that utilize the driver
9513  *		iopath to send commands to the device. The stats are initialized
9514  *		separately so data obtained during some attach-time routines is
9515  *		available. (4362483)
9516  *
9517  *   Arguments: un - driver soft state (unit) structure
9518  *		instance - driver instance
9519  *
9520  *     Context: Kernel thread context
9521  */
9522 
9523 static void
9524 sd_create_errstats(struct sd_lun *un, int instance)
9525 {
9526 	struct	sd_errstats	*stp;
9527 	char	kstatmodule_err[KSTAT_STRLEN];
9528 	char	kstatname[KSTAT_STRLEN];
9529 	int	ndata = (sizeof (struct sd_errstats) / sizeof (kstat_named_t));
9530 
9531 	ASSERT(un != NULL);
9532 
9533 	if (un->un_errstats != NULL) {
9534 		return;
9535 	}
9536 
9537 	(void) snprintf(kstatmodule_err, sizeof (kstatmodule_err),
9538 	    "%serr", sd_label);
9539 	(void) snprintf(kstatname, sizeof (kstatname),
9540 	    "%s%d,err", sd_label, instance);
9541 
9542 	un->un_errstats = kstat_create(kstatmodule_err, instance, kstatname,
9543 	    "device_error", KSTAT_TYPE_NAMED, ndata, KSTAT_FLAG_PERSISTENT);
9544 
9545 	if (un->un_errstats == NULL) {
9546 		SD_ERROR(SD_LOG_ATTACH_DETACH, un,
9547 		    "sd_create_errstats: Failed kstat_create\n");
9548 		return;
9549 	}
9550 
9551 	stp = (struct sd_errstats *)un->un_errstats->ks_data;
9552 	kstat_named_init(&stp->sd_softerrs,	"Soft Errors",
9553 	    KSTAT_DATA_UINT32);
9554 	kstat_named_init(&stp->sd_harderrs,	"Hard Errors",
9555 	    KSTAT_DATA_UINT32);
9556 	kstat_named_init(&stp->sd_transerrs,	"Transport Errors",
9557 	    KSTAT_DATA_UINT32);
9558 	kstat_named_init(&stp->sd_vid,		"Vendor",
9559 	    KSTAT_DATA_CHAR);
9560 	kstat_named_init(&stp->sd_pid,		"Product",
9561 	    KSTAT_DATA_CHAR);
9562 	kstat_named_init(&stp->sd_revision,	"Revision",
9563 	    KSTAT_DATA_CHAR);
9564 	kstat_named_init(&stp->sd_serial,	"Serial No",
9565 	    KSTAT_DATA_CHAR);
9566 	kstat_named_init(&stp->sd_capacity,	"Size",
9567 	    KSTAT_DATA_ULONGLONG);
9568 	kstat_named_init(&stp->sd_rq_media_err,	"Media Error",
9569 	    KSTAT_DATA_UINT32);
9570 	kstat_named_init(&stp->sd_rq_ntrdy_err,	"Device Not Ready",
9571 	    KSTAT_DATA_UINT32);
9572 	kstat_named_init(&stp->sd_rq_nodev_err,	"No Device",
9573 	    KSTAT_DATA_UINT32);
9574 	kstat_named_init(&stp->sd_rq_recov_err,	"Recoverable",
9575 	    KSTAT_DATA_UINT32);
9576 	kstat_named_init(&stp->sd_rq_illrq_err,	"Illegal Request",
9577 	    KSTAT_DATA_UINT32);
9578 	kstat_named_init(&stp->sd_rq_pfa_err,	"Predictive Failure Analysis",
9579 	    KSTAT_DATA_UINT32);
9580 
9581 	un->un_errstats->ks_private = un;
9582 	un->un_errstats->ks_update  = nulldev;
9583 
9584 	kstat_install(un->un_errstats);
9585 }
9586 
9587 
9588 /*
9589  *    Function: sd_set_errstats
9590  *
9591  * Description: This routine sets the value of the vendor id, product id,
9592  *		revision, serial number, and capacity device error stats.
9593  *
9594  *		Note: During attach the stats are instantiated first so they are
9595  *		available for attach-time routines that utilize the driver
9596  *		iopath to send commands to the device. The stats are initialized
9597  *		separately so data obtained during some attach-time routines is
9598  *		available. (4362483)
9599  *
9600  *   Arguments: un - driver soft state (unit) structure
9601  *
9602  *     Context: Kernel thread context
9603  */
9604 
9605 static void
9606 sd_set_errstats(struct sd_lun *un)
9607 {
9608 	struct	sd_errstats	*stp;
9609 
9610 	ASSERT(un != NULL);
9611 	ASSERT(un->un_errstats != NULL);
9612 	stp = (struct sd_errstats *)un->un_errstats->ks_data;
9613 	ASSERT(stp != NULL);
9614 	(void) strncpy(stp->sd_vid.value.c, un->un_sd->sd_inq->inq_vid, 8);
9615 	(void) strncpy(stp->sd_pid.value.c, un->un_sd->sd_inq->inq_pid, 16);
9616 	(void) strncpy(stp->sd_revision.value.c,
9617 	    un->un_sd->sd_inq->inq_revision, 4);
9618 
9619 	/*
9620 	 * All the errstats are persistent across detach/attach,
9621 	 * so reset all the errstats here in case of the hot
9622 	 * replacement of disk drives, except for not changed
9623 	 * Sun qualified drives.
9624 	 */
9625 	if ((bcmp(&SD_INQUIRY(un)->inq_pid[9], "SUN", 3) != 0) ||
9626 	    (bcmp(&SD_INQUIRY(un)->inq_serial, stp->sd_serial.value.c,
9627 	    sizeof (SD_INQUIRY(un)->inq_serial)) != 0)) {
9628 		stp->sd_softerrs.value.ui32 = 0;
9629 		stp->sd_harderrs.value.ui32 = 0;
9630 		stp->sd_transerrs.value.ui32 = 0;
9631 		stp->sd_rq_media_err.value.ui32 = 0;
9632 		stp->sd_rq_ntrdy_err.value.ui32 = 0;
9633 		stp->sd_rq_nodev_err.value.ui32 = 0;
9634 		stp->sd_rq_recov_err.value.ui32 = 0;
9635 		stp->sd_rq_illrq_err.value.ui32 = 0;
9636 		stp->sd_rq_pfa_err.value.ui32 = 0;
9637 	}
9638 
9639 	/*
9640 	 * Set the "Serial No" kstat for Sun qualified drives (indicated by
9641 	 * "SUN" in bytes 25-27 of the inquiry data (bytes 9-11 of the pid)
9642 	 * (4376302))
9643 	 */
9644 	if (bcmp(&SD_INQUIRY(un)->inq_pid[9], "SUN", 3) == 0) {
9645 		bcopy(&SD_INQUIRY(un)->inq_serial, stp->sd_serial.value.c,
9646 		    sizeof (SD_INQUIRY(un)->inq_serial));
9647 	}
9648 
9649 	if (un->un_f_blockcount_is_valid != TRUE) {
9650 		/*
9651 		 * Set capacity error stat to 0 for no media. This ensures
9652 		 * a valid capacity is displayed in response to 'iostat -E'
9653 		 * when no media is present in the device.
9654 		 */
9655 		stp->sd_capacity.value.ui64 = 0;
9656 	} else {
9657 		/*
9658 		 * Multiply un_blockcount by un->un_sys_blocksize to get
9659 		 * capacity.
9660 		 *
9661 		 * Note: for non-512 blocksize devices "un_blockcount" has been
9662 		 * "scaled" in sd_send_scsi_READ_CAPACITY by multiplying by
9663 		 * (un_tgt_blocksize / un->un_sys_blocksize).
9664 		 */
9665 		stp->sd_capacity.value.ui64 = (uint64_t)
9666 		    ((uint64_t)un->un_blockcount * un->un_sys_blocksize);
9667 	}
9668 }
9669 
9670 
9671 /*
9672  *    Function: sd_set_pstats
9673  *
9674  * Description: This routine instantiates and initializes the partition
9675  *              stats for each partition with more than zero blocks.
9676  *		(4363169)
9677  *
9678  *   Arguments: un - driver soft state (unit) structure
9679  *
9680  *     Context: Kernel thread context
9681  */
9682 
9683 static void
9684 sd_set_pstats(struct sd_lun *un)
9685 {
9686 	char	kstatname[KSTAT_STRLEN];
9687 	int	instance;
9688 	int	i;
9689 
9690 	ASSERT(un != NULL);
9691 
9692 	instance = ddi_get_instance(SD_DEVINFO(un));
9693 
9694 	/* Note:x86: is this a VTOC8/VTOC16 difference? */
9695 	for (i = 0; i < NSDMAP; i++) {
9696 		if ((un->un_pstats[i] == NULL) &&
9697 		    (un->un_map[i].dkl_nblk != 0)) {
9698 			(void) snprintf(kstatname, sizeof (kstatname),
9699 			    "%s%d,%s", sd_label, instance,
9700 			    sd_minor_data[i].name);
9701 			un->un_pstats[i] = kstat_create(sd_label,
9702 			    instance, kstatname, "partition", KSTAT_TYPE_IO,
9703 			    1, KSTAT_FLAG_PERSISTENT);
9704 			if (un->un_pstats[i] != NULL) {
9705 				un->un_pstats[i]->ks_lock = SD_MUTEX(un);
9706 				kstat_install(un->un_pstats[i]);
9707 			}
9708 		}
9709 	}
9710 }
9711 
9712 
9713 #if (defined(__fibre))
9714 /*
9715  *    Function: sd_init_event_callbacks
9716  *
9717  * Description: This routine initializes the insertion and removal event
9718  *		callbacks. (fibre only)
9719  *
9720  *   Arguments: un - driver soft state (unit) structure
9721  *
9722  *     Context: Kernel thread context
9723  */
9724 
9725 static void
9726 sd_init_event_callbacks(struct sd_lun *un)
9727 {
9728 	ASSERT(un != NULL);
9729 
9730 	if ((un->un_insert_event == NULL) &&
9731 	    (ddi_get_eventcookie(SD_DEVINFO(un), FCAL_INSERT_EVENT,
9732 	    &un->un_insert_event) == DDI_SUCCESS)) {
9733 		/*
9734 		 * Add the callback for an insertion event
9735 		 */
9736 		(void) ddi_add_event_handler(SD_DEVINFO(un),
9737 		    un->un_insert_event, sd_event_callback, (void *)un,
9738 		    &(un->un_insert_cb_id));
9739 	}
9740 
9741 	if ((un->un_remove_event == NULL) &&
9742 	    (ddi_get_eventcookie(SD_DEVINFO(un), FCAL_REMOVE_EVENT,
9743 	    &un->un_remove_event) == DDI_SUCCESS)) {
9744 		/*
9745 		 * Add the callback for a removal event
9746 		 */
9747 		(void) ddi_add_event_handler(SD_DEVINFO(un),
9748 		    un->un_remove_event, sd_event_callback, (void *)un,
9749 		    &(un->un_remove_cb_id));
9750 	}
9751 }
9752 
9753 
9754 /*
9755  *    Function: sd_event_callback
9756  *
9757  * Description: This routine handles insert/remove events (photon). The
9758  *		state is changed to OFFLINE which can be used to supress
9759  *		error msgs. (fibre only)
9760  *
9761  *   Arguments: un - driver soft state (unit) structure
9762  *
9763  *     Context: Callout thread context
9764  */
9765 /* ARGSUSED */
9766 static void
9767 sd_event_callback(dev_info_t *dip, ddi_eventcookie_t event, void *arg,
9768     void *bus_impldata)
9769 {
9770 	struct sd_lun *un = (struct sd_lun *)arg;
9771 
9772 	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::un_insert_event));
9773 	if (event == un->un_insert_event) {
9774 		SD_TRACE(SD_LOG_COMMON, un, "sd_event_callback: insert event");
9775 		mutex_enter(SD_MUTEX(un));
9776 		if (un->un_state == SD_STATE_OFFLINE) {
9777 			if (un->un_last_state != SD_STATE_SUSPENDED) {
9778 				un->un_state = un->un_last_state;
9779 			} else {
9780 				/*
9781 				 * We have gone through SUSPEND/RESUME while
9782 				 * we were offline. Restore the last state
9783 				 */
9784 				un->un_state = un->un_save_state;
9785 			}
9786 		}
9787 		mutex_exit(SD_MUTEX(un));
9788 
9789 	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::un_remove_event));
9790 	} else if (event == un->un_remove_event) {
9791 		SD_TRACE(SD_LOG_COMMON, un, "sd_event_callback: remove event");
9792 		mutex_enter(SD_MUTEX(un));
9793 		/*
9794 		 * We need to handle an event callback that occurs during
9795 		 * the suspend operation, since we don't prevent it.
9796 		 */
9797 		if (un->un_state != SD_STATE_OFFLINE) {
9798 			if (un->un_state != SD_STATE_SUSPENDED) {
9799 				New_state(un, SD_STATE_OFFLINE);
9800 			} else {
9801 				un->un_last_state = SD_STATE_OFFLINE;
9802 			}
9803 		}
9804 		mutex_exit(SD_MUTEX(un));
9805 	} else {
9806 		scsi_log(SD_DEVINFO(un), sd_label, CE_NOTE,
9807 		    "!Unknown event\n");
9808 	}
9809 
9810 }
9811 #endif
9812 
9813 /*
9814  *    Function: sd_cache_control()
9815  *
9816  * Description: This routine is the driver entry point for setting
9817  *		read and write caching by modifying the WCE (write cache
9818  *		enable) and RCD (read cache disable) bits of mode
9819  *		page 8 (MODEPAGE_CACHING).
9820  *
9821  *   Arguments: un - driver soft state (unit) structure
9822  *		rcd_flag - flag for controlling the read cache
9823  *		wce_flag - flag for controlling the write cache
9824  *
9825  * Return Code: EIO
9826  *		code returned by sd_send_scsi_MODE_SENSE and
9827  *		sd_send_scsi_MODE_SELECT
9828  *
9829  *     Context: Kernel Thread
9830  */
9831 
9832 static int
9833 sd_cache_control(struct sd_lun *un, int rcd_flag, int wce_flag)
9834 {
9835 	struct mode_caching	*mode_caching_page;
9836 	uchar_t			*header;
9837 	size_t			buflen;
9838 	int			hdrlen;
9839 	int			bd_len;
9840 	int			rval = 0;
9841 	struct mode_header_grp2	*mhp;
9842 
9843 	ASSERT(un != NULL);
9844 
9845 	/*
9846 	 * Do a test unit ready, otherwise a mode sense may not work if this
9847 	 * is the first command sent to the device after boot.
9848 	 */
9849 	(void) sd_send_scsi_TEST_UNIT_READY(un, 0);
9850 
9851 	if (un->un_f_cfg_is_atapi == TRUE) {
9852 		hdrlen = MODE_HEADER_LENGTH_GRP2;
9853 	} else {
9854 		hdrlen = MODE_HEADER_LENGTH;
9855 	}
9856 
9857 	/*
9858 	 * Allocate memory for the retrieved mode page and its headers.  Set
9859 	 * a pointer to the page itself.  Use mode_cache_scsi3 to insure
9860 	 * we get all of the mode sense data otherwise, the mode select
9861 	 * will fail.  mode_cache_scsi3 is a superset of mode_caching.
9862 	 */
9863 	buflen = hdrlen + MODE_BLK_DESC_LENGTH +
9864 		sizeof (struct mode_cache_scsi3);
9865 
9866 	header = kmem_zalloc(buflen, KM_SLEEP);
9867 
9868 	/* Get the information from the device. */
9869 	if (un->un_f_cfg_is_atapi == TRUE) {
9870 		rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP1, header, buflen,
9871 		    MODEPAGE_CACHING, SD_PATH_DIRECT);
9872 	} else {
9873 		rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, header, buflen,
9874 		    MODEPAGE_CACHING, SD_PATH_DIRECT);
9875 	}
9876 	if (rval != 0) {
9877 		SD_ERROR(SD_LOG_IOCTL_RMMEDIA, un,
9878 		    "sd_cache_control: Mode Sense Failed\n");
9879 		kmem_free(header, buflen);
9880 		return (rval);
9881 	}
9882 
9883 	/*
9884 	 * Determine size of Block Descriptors in order to locate
9885 	 * the mode page data. ATAPI devices return 0, SCSI devices
9886 	 * should return MODE_BLK_DESC_LENGTH.
9887 	 */
9888 	if (un->un_f_cfg_is_atapi == TRUE) {
9889 		mhp	= (struct mode_header_grp2 *)header;
9890 		bd_len  = (mhp->bdesc_length_hi << 8) | mhp->bdesc_length_lo;
9891 	} else {
9892 		bd_len  = ((struct mode_header *)header)->bdesc_length;
9893 	}
9894 
9895 	if (bd_len > MODE_BLK_DESC_LENGTH) {
9896 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
9897 		    "sd_cache_control: Mode Sense returned invalid "
9898 		    "block descriptor length\n");
9899 		kmem_free(header, buflen);
9900 		return (EIO);
9901 	}
9902 
9903 	mode_caching_page = (struct mode_caching *)(header + hdrlen + bd_len);
9904 
9905 	/* Check the relevant bits on successful mode sense. */
9906 	if ((mode_caching_page->rcd && rcd_flag == SD_CACHE_ENABLE) ||
9907 	    (!mode_caching_page->rcd && rcd_flag == SD_CACHE_DISABLE) ||
9908 	    (mode_caching_page->wce && wce_flag == SD_CACHE_DISABLE) ||
9909 	    (!mode_caching_page->wce && wce_flag == SD_CACHE_ENABLE)) {
9910 
9911 		size_t sbuflen;
9912 		uchar_t save_pg;
9913 
9914 		/*
9915 		 * Construct select buffer length based on the
9916 		 * length of the sense data returned.
9917 		 */
9918 		sbuflen =  hdrlen + MODE_BLK_DESC_LENGTH +
9919 				sizeof (struct mode_page) +
9920 				(int)mode_caching_page->mode_page.length;
9921 
9922 		/*
9923 		 * Set the caching bits as requested.
9924 		 */
9925 		if (rcd_flag == SD_CACHE_ENABLE)
9926 			mode_caching_page->rcd = 0;
9927 		else if (rcd_flag == SD_CACHE_DISABLE)
9928 			mode_caching_page->rcd = 1;
9929 
9930 		if (wce_flag == SD_CACHE_ENABLE)
9931 			mode_caching_page->wce = 1;
9932 		else if (wce_flag == SD_CACHE_DISABLE)
9933 			mode_caching_page->wce = 0;
9934 
9935 		/*
9936 		 * Save the page if the mode sense says the
9937 		 * drive supports it.
9938 		 */
9939 		save_pg = mode_caching_page->mode_page.ps ?
9940 				SD_SAVE_PAGE : SD_DONTSAVE_PAGE;
9941 
9942 		/* Clear reserved bits before mode select. */
9943 		mode_caching_page->mode_page.ps = 0;
9944 
9945 		/*
9946 		 * Clear out mode header for mode select.
9947 		 * The rest of the retrieved page will be reused.
9948 		 */
9949 		bzero(header, hdrlen);
9950 
9951 		if (un->un_f_cfg_is_atapi == TRUE) {
9952 			mhp = (struct mode_header_grp2 *)header;
9953 			mhp->bdesc_length_hi = bd_len >> 8;
9954 			mhp->bdesc_length_lo = (uchar_t)bd_len & 0xff;
9955 		} else {
9956 			((struct mode_header *)header)->bdesc_length = bd_len;
9957 		}
9958 
9959 		/* Issue mode select to change the cache settings */
9960 		if (un->un_f_cfg_is_atapi == TRUE) {
9961 			rval = sd_send_scsi_MODE_SELECT(un, CDB_GROUP1, header,
9962 			    sbuflen, save_pg, SD_PATH_DIRECT);
9963 		} else {
9964 			rval = sd_send_scsi_MODE_SELECT(un, CDB_GROUP0, header,
9965 			    sbuflen, save_pg, SD_PATH_DIRECT);
9966 		}
9967 	}
9968 
9969 	kmem_free(header, buflen);
9970 	return (rval);
9971 }
9972 
9973 
9974 /*
9975  *    Function: sd_get_write_cache_enabled()
9976  *
9977  * Description: This routine is the driver entry point for determining if
9978  *		write caching is enabled.  It examines the WCE (write cache
9979  *		enable) bits of mode page 8 (MODEPAGE_CACHING).
9980  *
9981  *   Arguments: un - driver soft state (unit) structure
9982  *   		is_enabled - pointer to int where write cache enabled state
9983  *   			is returned (non-zero -> write cache enabled)
9984  *
9985  *
9986  * Return Code: EIO
9987  *		code returned by sd_send_scsi_MODE_SENSE
9988  *
9989  *     Context: Kernel Thread
9990  *
9991  * NOTE: If ioctl is added to disable write cache, this sequence should
9992  * be followed so that no locking is required for accesses to
9993  * un->un_f_write_cache_enabled:
9994  * 	do mode select to clear wce
9995  * 	do synchronize cache to flush cache
9996  * 	set un->un_f_write_cache_enabled = FALSE
9997  *
9998  * Conversely, an ioctl to enable the write cache should be done
9999  * in this order:
10000  * 	set un->un_f_write_cache_enabled = TRUE
10001  * 	do mode select to set wce
10002  */
10003 
10004 static int
10005 sd_get_write_cache_enabled(struct sd_lun *un, int *is_enabled)
10006 {
10007 	struct mode_caching	*mode_caching_page;
10008 	uchar_t			*header;
10009 	size_t			buflen;
10010 	int			hdrlen;
10011 	int			bd_len;
10012 	int			rval = 0;
10013 
10014 	ASSERT(un != NULL);
10015 	ASSERT(is_enabled != NULL);
10016 
10017 	/* in case of error, flag as enabled */
10018 	*is_enabled = TRUE;
10019 
10020 	/*
10021 	 * Do a test unit ready, otherwise a mode sense may not work if this
10022 	 * is the first command sent to the device after boot.
10023 	 */
10024 	(void) sd_send_scsi_TEST_UNIT_READY(un, 0);
10025 
10026 	if (un->un_f_cfg_is_atapi == TRUE) {
10027 		hdrlen = MODE_HEADER_LENGTH_GRP2;
10028 	} else {
10029 		hdrlen = MODE_HEADER_LENGTH;
10030 	}
10031 
10032 	/*
10033 	 * Allocate memory for the retrieved mode page and its headers.  Set
10034 	 * a pointer to the page itself.
10035 	 */
10036 	buflen = hdrlen + MODE_BLK_DESC_LENGTH + sizeof (struct mode_caching);
10037 	header = kmem_zalloc(buflen, KM_SLEEP);
10038 
10039 	/* Get the information from the device. */
10040 	if (un->un_f_cfg_is_atapi == TRUE) {
10041 		rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP1, header, buflen,
10042 		    MODEPAGE_CACHING, SD_PATH_DIRECT);
10043 	} else {
10044 		rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, header, buflen,
10045 		    MODEPAGE_CACHING, SD_PATH_DIRECT);
10046 	}
10047 	if (rval != 0) {
10048 		SD_ERROR(SD_LOG_IOCTL_RMMEDIA, un,
10049 		    "sd_get_write_cache_enabled: Mode Sense Failed\n");
10050 		kmem_free(header, buflen);
10051 		return (rval);
10052 	}
10053 
10054 	/*
10055 	 * Determine size of Block Descriptors in order to locate
10056 	 * the mode page data. ATAPI devices return 0, SCSI devices
10057 	 * should return MODE_BLK_DESC_LENGTH.
10058 	 */
10059 	if (un->un_f_cfg_is_atapi == TRUE) {
10060 		struct mode_header_grp2	*mhp;
10061 		mhp	= (struct mode_header_grp2 *)header;
10062 		bd_len  = (mhp->bdesc_length_hi << 8) | mhp->bdesc_length_lo;
10063 	} else {
10064 		bd_len  = ((struct mode_header *)header)->bdesc_length;
10065 	}
10066 
10067 	if (bd_len > MODE_BLK_DESC_LENGTH) {
10068 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
10069 		    "sd_get_write_cache_enabled: Mode Sense returned invalid "
10070 		    "block descriptor length\n");
10071 		kmem_free(header, buflen);
10072 		return (EIO);
10073 	}
10074 
10075 	mode_caching_page = (struct mode_caching *)(header + hdrlen + bd_len);
10076 	*is_enabled = mode_caching_page->wce;
10077 
10078 	kmem_free(header, buflen);
10079 	return (0);
10080 }
10081 
10082 
10083 /*
10084  *    Function: sd_make_device
10085  *
10086  * Description: Utility routine to return the Solaris device number from
10087  *		the data in the device's dev_info structure.
10088  *
10089  * Return Code: The Solaris device number
10090  *
10091  *     Context: Any
10092  */
10093 
10094 static dev_t
10095 sd_make_device(dev_info_t *devi)
10096 {
10097 	return (makedevice(ddi_name_to_major(ddi_get_name(devi)),
10098 	    ddi_get_instance(devi) << SDUNIT_SHIFT));
10099 }
10100 
10101 
10102 /*
10103  *    Function: sd_pm_entry
10104  *
10105  * Description: Called at the start of a new command to manage power
10106  *		and busy status of a device. This includes determining whether
10107  *		the current power state of the device is sufficient for
10108  *		performing the command or whether it must be changed.
10109  *		The PM framework is notified appropriately.
10110  *		Only with a return status of DDI_SUCCESS will the
10111  *		component be busy to the framework.
10112  *
10113  *		All callers of sd_pm_entry must check the return status
10114  *		and only call sd_pm_exit it it was DDI_SUCCESS. A status
10115  *		of DDI_FAILURE indicates the device failed to power up.
10116  *		In this case un_pm_count has been adjusted so the result
10117  *		on exit is still powered down, ie. count is less than 0.
10118  *		Calling sd_pm_exit with this count value hits an ASSERT.
10119  *
10120  * Return Code: DDI_SUCCESS or DDI_FAILURE
10121  *
10122  *     Context: Kernel thread context.
10123  */
10124 
10125 static int
10126 sd_pm_entry(struct sd_lun *un)
10127 {
10128 	int return_status = DDI_SUCCESS;
10129 
10130 	ASSERT(!mutex_owned(SD_MUTEX(un)));
10131 	ASSERT(!mutex_owned(&un->un_pm_mutex));
10132 
10133 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_entry: entry\n");
10134 
10135 	if (un->un_f_pm_is_enabled == FALSE) {
10136 		SD_TRACE(SD_LOG_IO_PM, un,
10137 		    "sd_pm_entry: exiting, PM not enabled\n");
10138 		return (return_status);
10139 	}
10140 
10141 	/*
10142 	 * Just increment a counter if PM is enabled. On the transition from
10143 	 * 0 ==> 1, mark the device as busy.  The iodone side will decrement
10144 	 * the count with each IO and mark the device as idle when the count
10145 	 * hits 0.
10146 	 *
10147 	 * If the count is less than 0 the device is powered down. If a powered
10148 	 * down device is successfully powered up then the count must be
10149 	 * incremented to reflect the power up. Note that it'll get incremented
10150 	 * a second time to become busy.
10151 	 *
10152 	 * Because the following has the potential to change the device state
10153 	 * and must release the un_pm_mutex to do so, only one thread can be
10154 	 * allowed through at a time.
10155 	 */
10156 
10157 	mutex_enter(&un->un_pm_mutex);
10158 	while (un->un_pm_busy == TRUE) {
10159 		cv_wait(&un->un_pm_busy_cv, &un->un_pm_mutex);
10160 	}
10161 	un->un_pm_busy = TRUE;
10162 
10163 	if (un->un_pm_count < 1) {
10164 
10165 		SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_entry: busy component\n");
10166 
10167 		/*
10168 		 * Indicate we are now busy so the framework won't attempt to
10169 		 * power down the device. This call will only fail if either
10170 		 * we passed a bad component number or the device has no
10171 		 * components. Neither of these should ever happen.
10172 		 */
10173 		mutex_exit(&un->un_pm_mutex);
10174 		return_status = pm_busy_component(SD_DEVINFO(un), 0);
10175 		ASSERT(return_status == DDI_SUCCESS);
10176 
10177 		mutex_enter(&un->un_pm_mutex);
10178 
10179 		if (un->un_pm_count < 0) {
10180 			mutex_exit(&un->un_pm_mutex);
10181 
10182 			SD_TRACE(SD_LOG_IO_PM, un,
10183 			    "sd_pm_entry: power up component\n");
10184 
10185 			/*
10186 			 * pm_raise_power will cause sdpower to be called
10187 			 * which brings the device power level to the
10188 			 * desired state, ON in this case. If successful,
10189 			 * un_pm_count and un_power_level will be updated
10190 			 * appropriately.
10191 			 */
10192 			return_status = pm_raise_power(SD_DEVINFO(un), 0,
10193 			    SD_SPINDLE_ON);
10194 
10195 			mutex_enter(&un->un_pm_mutex);
10196 
10197 			if (return_status != DDI_SUCCESS) {
10198 				/*
10199 				 * Power up failed.
10200 				 * Idle the device and adjust the count
10201 				 * so the result on exit is that we're
10202 				 * still powered down, ie. count is less than 0.
10203 				 */
10204 				SD_TRACE(SD_LOG_IO_PM, un,
10205 				    "sd_pm_entry: power up failed,"
10206 				    " idle the component\n");
10207 
10208 				(void) pm_idle_component(SD_DEVINFO(un), 0);
10209 				un->un_pm_count--;
10210 			} else {
10211 				/*
10212 				 * Device is powered up, verify the
10213 				 * count is non-negative.
10214 				 * This is debug only.
10215 				 */
10216 				ASSERT(un->un_pm_count == 0);
10217 			}
10218 		}
10219 
10220 		if (return_status == DDI_SUCCESS) {
10221 			/*
10222 			 * For performance, now that the device has been tagged
10223 			 * as busy, and it's known to be powered up, update the
10224 			 * chain types to use jump tables that do not include
10225 			 * pm. This significantly lowers the overhead and
10226 			 * therefore improves performance.
10227 			 */
10228 
10229 			mutex_exit(&un->un_pm_mutex);
10230 			mutex_enter(SD_MUTEX(un));
10231 			SD_TRACE(SD_LOG_IO_PM, un,
10232 			    "sd_pm_entry: changing uscsi_chain_type from %d\n",
10233 			    un->un_uscsi_chain_type);
10234 
10235 			if (un->un_f_non_devbsize_supported) {
10236 				un->un_buf_chain_type =
10237 				    SD_CHAIN_INFO_RMMEDIA_NO_PM;
10238 			} else {
10239 				un->un_buf_chain_type =
10240 				    SD_CHAIN_INFO_DISK_NO_PM;
10241 			}
10242 			un->un_uscsi_chain_type = SD_CHAIN_INFO_USCSI_CMD_NO_PM;
10243 
10244 			SD_TRACE(SD_LOG_IO_PM, un,
10245 			    "             changed  uscsi_chain_type to   %d\n",
10246 			    un->un_uscsi_chain_type);
10247 			mutex_exit(SD_MUTEX(un));
10248 			mutex_enter(&un->un_pm_mutex);
10249 
10250 			if (un->un_pm_idle_timeid == NULL) {
10251 				/* 300 ms. */
10252 				un->un_pm_idle_timeid =
10253 				    timeout(sd_pm_idletimeout_handler, un,
10254 				    (drv_usectohz((clock_t)300000)));
10255 				/*
10256 				 * Include an extra call to busy which keeps the
10257 				 * device busy with-respect-to the PM layer
10258 				 * until the timer fires, at which time it'll
10259 				 * get the extra idle call.
10260 				 */
10261 				(void) pm_busy_component(SD_DEVINFO(un), 0);
10262 			}
10263 		}
10264 	}
10265 	un->un_pm_busy = FALSE;
10266 	/* Next... */
10267 	cv_signal(&un->un_pm_busy_cv);
10268 
10269 	un->un_pm_count++;
10270 
10271 	SD_TRACE(SD_LOG_IO_PM, un,
10272 	    "sd_pm_entry: exiting, un_pm_count = %d\n", un->un_pm_count);
10273 
10274 	mutex_exit(&un->un_pm_mutex);
10275 
10276 	return (return_status);
10277 }
10278 
10279 
10280 /*
10281  *    Function: sd_pm_exit
10282  *
10283  * Description: Called at the completion of a command to manage busy
10284  *		status for the device. If the device becomes idle the
10285  *		PM framework is notified.
10286  *
10287  *     Context: Kernel thread context
10288  */
10289 
10290 static void
10291 sd_pm_exit(struct sd_lun *un)
10292 {
10293 	ASSERT(!mutex_owned(SD_MUTEX(un)));
10294 	ASSERT(!mutex_owned(&un->un_pm_mutex));
10295 
10296 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_exit: entry\n");
10297 
10298 	/*
10299 	 * After attach the following flag is only read, so don't
10300 	 * take the penalty of acquiring a mutex for it.
10301 	 */
10302 	if (un->un_f_pm_is_enabled == TRUE) {
10303 
10304 		mutex_enter(&un->un_pm_mutex);
10305 		un->un_pm_count--;
10306 
10307 		SD_TRACE(SD_LOG_IO_PM, un,
10308 		    "sd_pm_exit: un_pm_count = %d\n", un->un_pm_count);
10309 
10310 		ASSERT(un->un_pm_count >= 0);
10311 		if (un->un_pm_count == 0) {
10312 			mutex_exit(&un->un_pm_mutex);
10313 
10314 			SD_TRACE(SD_LOG_IO_PM, un,
10315 			    "sd_pm_exit: idle component\n");
10316 
10317 			(void) pm_idle_component(SD_DEVINFO(un), 0);
10318 
10319 		} else {
10320 			mutex_exit(&un->un_pm_mutex);
10321 		}
10322 	}
10323 
10324 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_exit: exiting\n");
10325 }
10326 
10327 
10328 /*
10329  *    Function: sdopen
10330  *
10331  * Description: Driver's open(9e) entry point function.
10332  *
10333  *   Arguments: dev_i   - pointer to device number
10334  *		flag    - how to open file (FEXCL, FNDELAY, FREAD, FWRITE)
10335  *		otyp    - open type (OTYP_BLK, OTYP_CHR, OTYP_LYR)
10336  *		cred_p  - user credential pointer
10337  *
10338  * Return Code: EINVAL
10339  *		ENXIO
10340  *		EIO
10341  *		EROFS
10342  *		EBUSY
10343  *
10344  *     Context: Kernel thread context
10345  */
10346 /* ARGSUSED */
10347 static int
10348 sdopen(dev_t *dev_p, int flag, int otyp, cred_t *cred_p)
10349 {
10350 	struct sd_lun	*un;
10351 	int		nodelay;
10352 	int		part;
10353 	uint64_t	partmask;
10354 	int		instance;
10355 	dev_t		dev;
10356 	int		rval = EIO;
10357 
10358 	/* Validate the open type */
10359 	if (otyp >= OTYPCNT) {
10360 		return (EINVAL);
10361 	}
10362 
10363 	dev = *dev_p;
10364 	instance = SDUNIT(dev);
10365 	mutex_enter(&sd_detach_mutex);
10366 
10367 	/*
10368 	 * Fail the open if there is no softstate for the instance, or
10369 	 * if another thread somewhere is trying to detach the instance.
10370 	 */
10371 	if (((un = ddi_get_soft_state(sd_state, instance)) == NULL) ||
10372 	    (un->un_detach_count != 0)) {
10373 		mutex_exit(&sd_detach_mutex);
10374 		/*
10375 		 * The probe cache only needs to be cleared when open (9e) fails
10376 		 * with ENXIO (4238046).
10377 		 */
10378 		/*
10379 		 * un-conditionally clearing probe cache is ok with
10380 		 * separate sd/ssd binaries
10381 		 * x86 platform can be an issue with both parallel
10382 		 * and fibre in 1 binary
10383 		 */
10384 		sd_scsi_clear_probe_cache();
10385 		return (ENXIO);
10386 	}
10387 
10388 	/*
10389 	 * The un_layer_count is to prevent another thread in specfs from
10390 	 * trying to detach the instance, which can happen when we are
10391 	 * called from a higher-layer driver instead of thru specfs.
10392 	 * This will not be needed when DDI provides a layered driver
10393 	 * interface that allows specfs to know that an instance is in
10394 	 * use by a layered driver & should not be detached.
10395 	 *
10396 	 * Note: the semantics for layered driver opens are exactly one
10397 	 * close for every open.
10398 	 */
10399 	if (otyp == OTYP_LYR) {
10400 		un->un_layer_count++;
10401 	}
10402 
10403 	/*
10404 	 * Keep a count of the current # of opens in progress. This is because
10405 	 * some layered drivers try to call us as a regular open. This can
10406 	 * cause problems that we cannot prevent, however by keeping this count
10407 	 * we can at least keep our open and detach routines from racing against
10408 	 * each other under such conditions.
10409 	 */
10410 	un->un_opens_in_progress++;
10411 	mutex_exit(&sd_detach_mutex);
10412 
10413 	nodelay  = (flag & (FNDELAY | FNONBLOCK));
10414 	part	 = SDPART(dev);
10415 	partmask = 1 << part;
10416 
10417 	/*
10418 	 * We use a semaphore here in order to serialize
10419 	 * open and close requests on the device.
10420 	 */
10421 	sema_p(&un->un_semoclose);
10422 
10423 	mutex_enter(SD_MUTEX(un));
10424 
10425 	/*
10426 	 * All device accesses go thru sdstrategy() where we check
10427 	 * on suspend status but there could be a scsi_poll command,
10428 	 * which bypasses sdstrategy(), so we need to check pm
10429 	 * status.
10430 	 */
10431 
10432 	if (!nodelay) {
10433 		while ((un->un_state == SD_STATE_SUSPENDED) ||
10434 		    (un->un_state == SD_STATE_PM_CHANGING)) {
10435 			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
10436 		}
10437 
10438 		mutex_exit(SD_MUTEX(un));
10439 		if (sd_pm_entry(un) != DDI_SUCCESS) {
10440 			rval = EIO;
10441 			SD_ERROR(SD_LOG_OPEN_CLOSE, un,
10442 			    "sdopen: sd_pm_entry failed\n");
10443 			goto open_failed_with_pm;
10444 		}
10445 		mutex_enter(SD_MUTEX(un));
10446 	}
10447 
10448 	/* check for previous exclusive open */
10449 	SD_TRACE(SD_LOG_OPEN_CLOSE, un, "sdopen: un=%p\n", (void *)un);
10450 	SD_TRACE(SD_LOG_OPEN_CLOSE, un,
10451 	    "sdopen: exclopen=%x, flag=%x, regopen=%x\n",
10452 	    un->un_exclopen, flag, un->un_ocmap.regopen[otyp]);
10453 
10454 	if (un->un_exclopen & (partmask)) {
10455 		goto excl_open_fail;
10456 	}
10457 
10458 	if (flag & FEXCL) {
10459 		int i;
10460 		if (un->un_ocmap.lyropen[part]) {
10461 			goto excl_open_fail;
10462 		}
10463 		for (i = 0; i < (OTYPCNT - 1); i++) {
10464 			if (un->un_ocmap.regopen[i] & (partmask)) {
10465 				goto excl_open_fail;
10466 			}
10467 		}
10468 	}
10469 
10470 	/*
10471 	 * Check the write permission if this is a removable media device,
10472 	 * NDELAY has not been set, and writable permission is requested.
10473 	 *
10474 	 * Note: If NDELAY was set and this is write-protected media the WRITE
10475 	 * attempt will fail with EIO as part of the I/O processing. This is a
10476 	 * more permissive implementation that allows the open to succeed and
10477 	 * WRITE attempts to fail when appropriate.
10478 	 */
10479 	if (un->un_f_chk_wp_open) {
10480 		if ((flag & FWRITE) && (!nodelay)) {
10481 			mutex_exit(SD_MUTEX(un));
10482 			/*
10483 			 * Defer the check for write permission on writable
10484 			 * DVD drive till sdstrategy and will not fail open even
10485 			 * if FWRITE is set as the device can be writable
10486 			 * depending upon the media and the media can change
10487 			 * after the call to open().
10488 			 */
10489 			if (un->un_f_dvdram_writable_device == FALSE) {
10490 				if (ISCD(un) || sr_check_wp(dev)) {
10491 				rval = EROFS;
10492 				mutex_enter(SD_MUTEX(un));
10493 				SD_ERROR(SD_LOG_OPEN_CLOSE, un, "sdopen: "
10494 				    "write to cd or write protected media\n");
10495 				goto open_fail;
10496 				}
10497 			}
10498 			mutex_enter(SD_MUTEX(un));
10499 		}
10500 	}
10501 
10502 	/*
10503 	 * If opening in NDELAY/NONBLOCK mode, just return.
10504 	 * Check if disk is ready and has a valid geometry later.
10505 	 */
10506 	if (!nodelay) {
10507 		mutex_exit(SD_MUTEX(un));
10508 		rval = sd_ready_and_valid(un);
10509 		mutex_enter(SD_MUTEX(un));
10510 		/*
10511 		 * Fail if device is not ready or if the number of disk
10512 		 * blocks is zero or negative for non CD devices.
10513 		 */
10514 		if ((rval != SD_READY_VALID) ||
10515 		    (!ISCD(un) && un->un_map[part].dkl_nblk <= 0)) {
10516 			rval = un->un_f_has_removable_media ? ENXIO : EIO;
10517 			SD_ERROR(SD_LOG_OPEN_CLOSE, un, "sdopen: "
10518 			    "device not ready or invalid disk block value\n");
10519 			goto open_fail;
10520 		}
10521 #if defined(__i386) || defined(__amd64)
10522 	} else {
10523 		uchar_t *cp;
10524 		/*
10525 		 * x86 requires special nodelay handling, so that p0 is
10526 		 * always defined and accessible.
10527 		 * Invalidate geometry only if device is not already open.
10528 		 */
10529 		cp = &un->un_ocmap.chkd[0];
10530 		while (cp < &un->un_ocmap.chkd[OCSIZE]) {
10531 			if (*cp != (uchar_t)0) {
10532 			    break;
10533 			}
10534 			cp++;
10535 		}
10536 		if (cp == &un->un_ocmap.chkd[OCSIZE]) {
10537 			un->un_f_geometry_is_valid = FALSE;
10538 		}
10539 
10540 #endif
10541 	}
10542 
10543 	if (otyp == OTYP_LYR) {
10544 		un->un_ocmap.lyropen[part]++;
10545 	} else {
10546 		un->un_ocmap.regopen[otyp] |= partmask;
10547 	}
10548 
10549 	/* Set up open and exclusive open flags */
10550 	if (flag & FEXCL) {
10551 		un->un_exclopen |= (partmask);
10552 	}
10553 
10554 	SD_TRACE(SD_LOG_OPEN_CLOSE, un, "sdopen: "
10555 	    "open of part %d type %d\n", part, otyp);
10556 
10557 	mutex_exit(SD_MUTEX(un));
10558 	if (!nodelay) {
10559 		sd_pm_exit(un);
10560 	}
10561 
10562 	sema_v(&un->un_semoclose);
10563 
10564 	mutex_enter(&sd_detach_mutex);
10565 	un->un_opens_in_progress--;
10566 	mutex_exit(&sd_detach_mutex);
10567 
10568 	SD_TRACE(SD_LOG_OPEN_CLOSE, un, "sdopen: exit success\n");
10569 	return (DDI_SUCCESS);
10570 
10571 excl_open_fail:
10572 	SD_ERROR(SD_LOG_OPEN_CLOSE, un, "sdopen: fail exclusive open\n");
10573 	rval = EBUSY;
10574 
10575 open_fail:
10576 	mutex_exit(SD_MUTEX(un));
10577 
10578 	/*
10579 	 * On a failed open we must exit the pm management.
10580 	 */
10581 	if (!nodelay) {
10582 		sd_pm_exit(un);
10583 	}
10584 open_failed_with_pm:
10585 	sema_v(&un->un_semoclose);
10586 
10587 	mutex_enter(&sd_detach_mutex);
10588 	un->un_opens_in_progress--;
10589 	if (otyp == OTYP_LYR) {
10590 		un->un_layer_count--;
10591 	}
10592 	mutex_exit(&sd_detach_mutex);
10593 
10594 	return (rval);
10595 }
10596 
10597 
10598 /*
10599  *    Function: sdclose
10600  *
10601  * Description: Driver's close(9e) entry point function.
10602  *
10603  *   Arguments: dev    - device number
10604  *		flag   - file status flag, informational only
10605  *		otyp   - close type (OTYP_BLK, OTYP_CHR, OTYP_LYR)
10606  *		cred_p - user credential pointer
10607  *
10608  * Return Code: ENXIO
10609  *
10610  *     Context: Kernel thread context
10611  */
10612 /* ARGSUSED */
10613 static int
10614 sdclose(dev_t dev, int flag, int otyp, cred_t *cred_p)
10615 {
10616 	struct sd_lun	*un;
10617 	uchar_t		*cp;
10618 	int		part;
10619 	int		nodelay;
10620 	int		rval = 0;
10621 
10622 	/* Validate the open type */
10623 	if (otyp >= OTYPCNT) {
10624 		return (ENXIO);
10625 	}
10626 
10627 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
10628 		return (ENXIO);
10629 	}
10630 
10631 	part = SDPART(dev);
10632 	nodelay = flag & (FNDELAY | FNONBLOCK);
10633 
10634 	SD_TRACE(SD_LOG_OPEN_CLOSE, un,
10635 	    "sdclose: close of part %d type %d\n", part, otyp);
10636 
10637 	/*
10638 	 * We use a semaphore here in order to serialize
10639 	 * open and close requests on the device.
10640 	 */
10641 	sema_p(&un->un_semoclose);
10642 
10643 	mutex_enter(SD_MUTEX(un));
10644 
10645 	/* Don't proceed if power is being changed. */
10646 	while (un->un_state == SD_STATE_PM_CHANGING) {
10647 		cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
10648 	}
10649 
10650 	if (un->un_exclopen & (1 << part)) {
10651 		un->un_exclopen &= ~(1 << part);
10652 	}
10653 
10654 	/* Update the open partition map */
10655 	if (otyp == OTYP_LYR) {
10656 		un->un_ocmap.lyropen[part] -= 1;
10657 	} else {
10658 		un->un_ocmap.regopen[otyp] &= ~(1 << part);
10659 	}
10660 
10661 	cp = &un->un_ocmap.chkd[0];
10662 	while (cp < &un->un_ocmap.chkd[OCSIZE]) {
10663 		if (*cp != NULL) {
10664 			break;
10665 		}
10666 		cp++;
10667 	}
10668 
10669 	if (cp == &un->un_ocmap.chkd[OCSIZE]) {
10670 		SD_TRACE(SD_LOG_OPEN_CLOSE, un, "sdclose: last close\n");
10671 
10672 		/*
10673 		 * We avoid persistance upon the last close, and set
10674 		 * the throttle back to the maximum.
10675 		 */
10676 		un->un_throttle = un->un_saved_throttle;
10677 
10678 		if (un->un_state == SD_STATE_OFFLINE) {
10679 			if (un->un_f_is_fibre == FALSE) {
10680 				scsi_log(SD_DEVINFO(un), sd_label,
10681 					CE_WARN, "offline\n");
10682 			}
10683 			un->un_f_geometry_is_valid = FALSE;
10684 
10685 		} else {
10686 			/*
10687 			 * Flush any outstanding writes in NVRAM cache.
10688 			 * Note: SYNCHRONIZE CACHE is an optional SCSI-2
10689 			 * cmd, it may not work for non-Pluto devices.
10690 			 * SYNCHRONIZE CACHE is not required for removables,
10691 			 * except DVD-RAM drives.
10692 			 *
10693 			 * Also note: because SYNCHRONIZE CACHE is currently
10694 			 * the only command issued here that requires the
10695 			 * drive be powered up, only do the power up before
10696 			 * sending the Sync Cache command. If additional
10697 			 * commands are added which require a powered up
10698 			 * drive, the following sequence may have to change.
10699 			 *
10700 			 * And finally, note that parallel SCSI on SPARC
10701 			 * only issues a Sync Cache to DVD-RAM, a newly
10702 			 * supported device.
10703 			 */
10704 #if defined(__i386) || defined(__amd64)
10705 			if (un->un_f_sync_cache_supported ||
10706 			    un->un_f_dvdram_writable_device == TRUE) {
10707 #else
10708 			if (un->un_f_dvdram_writable_device == TRUE) {
10709 #endif
10710 				mutex_exit(SD_MUTEX(un));
10711 				if (sd_pm_entry(un) == DDI_SUCCESS) {
10712 					rval =
10713 					    sd_send_scsi_SYNCHRONIZE_CACHE(un,
10714 					    NULL);
10715 					/* ignore error if not supported */
10716 					if (rval == ENOTSUP) {
10717 						rval = 0;
10718 					} else if (rval != 0) {
10719 						rval = EIO;
10720 					}
10721 					sd_pm_exit(un);
10722 				} else {
10723 					rval = EIO;
10724 				}
10725 				mutex_enter(SD_MUTEX(un));
10726 			}
10727 
10728 			/*
10729 			 * For devices which supports DOOR_LOCK, send an ALLOW
10730 			 * MEDIA REMOVAL command, but don't get upset if it
10731 			 * fails. We need to raise the power of the drive before
10732 			 * we can call sd_send_scsi_DOORLOCK()
10733 			 */
10734 			if (un->un_f_doorlock_supported) {
10735 				mutex_exit(SD_MUTEX(un));
10736 				if (sd_pm_entry(un) == DDI_SUCCESS) {
10737 					rval = sd_send_scsi_DOORLOCK(un,
10738 					    SD_REMOVAL_ALLOW, SD_PATH_DIRECT);
10739 
10740 					sd_pm_exit(un);
10741 					if (ISCD(un) && (rval != 0) &&
10742 					    (nodelay != 0)) {
10743 						rval = ENXIO;
10744 					}
10745 				} else {
10746 					rval = EIO;
10747 				}
10748 				mutex_enter(SD_MUTEX(un));
10749 			}
10750 
10751 			/*
10752 			 * If a device has removable media, invalidate all
10753 			 * parameters related to media, such as geometry,
10754 			 * blocksize, and blockcount.
10755 			 */
10756 			if (un->un_f_has_removable_media) {
10757 				sr_ejected(un);
10758 			}
10759 
10760 			/*
10761 			 * Destroy the cache (if it exists) which was
10762 			 * allocated for the write maps since this is
10763 			 * the last close for this media.
10764 			 */
10765 			if (un->un_wm_cache) {
10766 				/*
10767 				 * Check if there are pending commands.
10768 				 * and if there are give a warning and
10769 				 * do not destroy the cache.
10770 				 */
10771 				if (un->un_ncmds_in_driver > 0) {
10772 					scsi_log(SD_DEVINFO(un),
10773 					    sd_label, CE_WARN,
10774 					    "Unable to clean up memory "
10775 					    "because of pending I/O\n");
10776 				} else {
10777 					kmem_cache_destroy(
10778 					    un->un_wm_cache);
10779 					un->un_wm_cache = NULL;
10780 				}
10781 			}
10782 		}
10783 	}
10784 
10785 	mutex_exit(SD_MUTEX(un));
10786 	sema_v(&un->un_semoclose);
10787 
10788 	if (otyp == OTYP_LYR) {
10789 		mutex_enter(&sd_detach_mutex);
10790 		/*
10791 		 * The detach routine may run when the layer count
10792 		 * drops to zero.
10793 		 */
10794 		un->un_layer_count--;
10795 		mutex_exit(&sd_detach_mutex);
10796 	}
10797 
10798 	return (rval);
10799 }
10800 
10801 
10802 /*
10803  *    Function: sd_ready_and_valid
10804  *
10805  * Description: Test if device is ready and has a valid geometry.
10806  *
10807  *   Arguments: dev - device number
10808  *		un  - driver soft state (unit) structure
10809  *
10810  * Return Code: SD_READY_VALID		ready and valid label
10811  *		SD_READY_NOT_VALID	ready, geom ops never applicable
10812  *		SD_NOT_READY_VALID	not ready, no label
10813  *
10814  *     Context: Never called at interrupt context.
10815  */
10816 
10817 static int
10818 sd_ready_and_valid(struct sd_lun *un)
10819 {
10820 	struct sd_errstats	*stp;
10821 	uint64_t		capacity;
10822 	uint_t			lbasize;
10823 	int			rval = SD_READY_VALID;
10824 	char			name_str[48];
10825 
10826 	ASSERT(un != NULL);
10827 	ASSERT(!mutex_owned(SD_MUTEX(un)));
10828 
10829 	mutex_enter(SD_MUTEX(un));
10830 	/*
10831 	 * If a device has removable media, we must check if media is
10832 	 * ready when checking if this device is ready and valid.
10833 	 */
10834 	if (un->un_f_has_removable_media) {
10835 		mutex_exit(SD_MUTEX(un));
10836 		if (sd_send_scsi_TEST_UNIT_READY(un, 0) != 0) {
10837 			rval = SD_NOT_READY_VALID;
10838 			mutex_enter(SD_MUTEX(un));
10839 			goto done;
10840 		}
10841 
10842 		mutex_enter(SD_MUTEX(un));
10843 		if ((un->un_f_geometry_is_valid == FALSE) ||
10844 		    (un->un_f_blockcount_is_valid == FALSE) ||
10845 		    (un->un_f_tgt_blocksize_is_valid == FALSE)) {
10846 
10847 			/* capacity has to be read every open. */
10848 			mutex_exit(SD_MUTEX(un));
10849 			if (sd_send_scsi_READ_CAPACITY(un, &capacity,
10850 			    &lbasize, SD_PATH_DIRECT) != 0) {
10851 				mutex_enter(SD_MUTEX(un));
10852 				un->un_f_geometry_is_valid = FALSE;
10853 				rval = SD_NOT_READY_VALID;
10854 				goto done;
10855 			} else {
10856 				mutex_enter(SD_MUTEX(un));
10857 				sd_update_block_info(un, lbasize, capacity);
10858 			}
10859 		}
10860 
10861 		/*
10862 		 * Check if the media in the device is writable or not.
10863 		 */
10864 		if ((un->un_f_geometry_is_valid == FALSE) && ISCD(un)) {
10865 			sd_check_for_writable_cd(un);
10866 		}
10867 
10868 	} else {
10869 		/*
10870 		 * Do a test unit ready to clear any unit attention from non-cd
10871 		 * devices.
10872 		 */
10873 		mutex_exit(SD_MUTEX(un));
10874 		(void) sd_send_scsi_TEST_UNIT_READY(un, 0);
10875 		mutex_enter(SD_MUTEX(un));
10876 	}
10877 
10878 
10879 	/*
10880 	 * If this is a non 512 block device, allocate space for
10881 	 * the wmap cache. This is being done here since every time
10882 	 * a media is changed this routine will be called and the
10883 	 * block size is a function of media rather than device.
10884 	 */
10885 	if (un->un_f_non_devbsize_supported && NOT_DEVBSIZE(un)) {
10886 		if (!(un->un_wm_cache)) {
10887 			(void) snprintf(name_str, sizeof (name_str),
10888 			    "%s%d_cache",
10889 			    ddi_driver_name(SD_DEVINFO(un)),
10890 			    ddi_get_instance(SD_DEVINFO(un)));
10891 			un->un_wm_cache = kmem_cache_create(
10892 			    name_str, sizeof (struct sd_w_map),
10893 			    8, sd_wm_cache_constructor,
10894 			    sd_wm_cache_destructor, NULL,
10895 			    (void *)un, NULL, 0);
10896 			if (!(un->un_wm_cache)) {
10897 					rval = ENOMEM;
10898 					goto done;
10899 			}
10900 		}
10901 	}
10902 
10903 	if (un->un_state == SD_STATE_NORMAL) {
10904 		/*
10905 		 * If the target is not yet ready here (defined by a TUR
10906 		 * failure), invalidate the geometry and print an 'offline'
10907 		 * message. This is a legacy message, as the state of the
10908 		 * target is not actually changed to SD_STATE_OFFLINE.
10909 		 *
10910 		 * If the TUR fails for EACCES (Reservation Conflict), it
10911 		 * means there actually is nothing wrong with the target that
10912 		 * would require invalidating the geometry, so continue in
10913 		 * that case as if the TUR was successful.
10914 		 */
10915 		int err;
10916 
10917 		mutex_exit(SD_MUTEX(un));
10918 		err = sd_send_scsi_TEST_UNIT_READY(un, 0);
10919 		mutex_enter(SD_MUTEX(un));
10920 
10921 		if ((err != 0) && (err != EACCES)) {
10922 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
10923 			    "offline\n");
10924 			un->un_f_geometry_is_valid = FALSE;
10925 			rval = SD_NOT_READY_VALID;
10926 			goto done;
10927 		}
10928 	}
10929 
10930 	if (un->un_f_format_in_progress == FALSE) {
10931 		/*
10932 		 * Note: sd_validate_geometry may return TRUE, but that does
10933 		 * not necessarily mean un_f_geometry_is_valid == TRUE!
10934 		 */
10935 		rval = sd_validate_geometry(un, SD_PATH_DIRECT);
10936 		if (rval == ENOTSUP) {
10937 			if (un->un_f_geometry_is_valid == TRUE)
10938 				rval = 0;
10939 			else {
10940 				rval = SD_READY_NOT_VALID;
10941 				goto done;
10942 			}
10943 		}
10944 		if (rval != 0) {
10945 			/*
10946 			 * We don't check the validity of geometry for
10947 			 * CDROMs. Also we assume we have a good label
10948 			 * even if sd_validate_geometry returned ENOMEM.
10949 			 */
10950 			if (!ISCD(un) && rval != ENOMEM) {
10951 				rval = SD_NOT_READY_VALID;
10952 				goto done;
10953 			}
10954 		}
10955 	}
10956 
10957 	/*
10958 	 * If this device supports DOOR_LOCK command, try and send
10959 	 * this command to PREVENT MEDIA REMOVAL, but don't get upset
10960 	 * if it fails. For a CD, however, it is an error
10961 	 */
10962 	if (un->un_f_doorlock_supported) {
10963 		mutex_exit(SD_MUTEX(un));
10964 		if ((sd_send_scsi_DOORLOCK(un, SD_REMOVAL_PREVENT,
10965 		    SD_PATH_DIRECT) != 0) && ISCD(un)) {
10966 			rval = SD_NOT_READY_VALID;
10967 			mutex_enter(SD_MUTEX(un));
10968 			goto done;
10969 		}
10970 		mutex_enter(SD_MUTEX(un));
10971 	}
10972 
10973 	/* The state has changed, inform the media watch routines */
10974 	un->un_mediastate = DKIO_INSERTED;
10975 	cv_broadcast(&un->un_state_cv);
10976 	rval = SD_READY_VALID;
10977 
10978 done:
10979 
10980 	/*
10981 	 * Initialize the capacity kstat value, if no media previously
10982 	 * (capacity kstat is 0) and a media has been inserted
10983 	 * (un_blockcount > 0).
10984 	 */
10985 	if (un->un_errstats != NULL) {
10986 		stp = (struct sd_errstats *)un->un_errstats->ks_data;
10987 		if ((stp->sd_capacity.value.ui64 == 0) &&
10988 		    (un->un_f_blockcount_is_valid == TRUE)) {
10989 			stp->sd_capacity.value.ui64 =
10990 			    (uint64_t)((uint64_t)un->un_blockcount *
10991 			    un->un_sys_blocksize);
10992 		}
10993 	}
10994 
10995 	mutex_exit(SD_MUTEX(un));
10996 	return (rval);
10997 }
10998 
10999 
11000 /*
11001  *    Function: sdmin
11002  *
11003  * Description: Routine to limit the size of a data transfer. Used in
11004  *		conjunction with physio(9F).
11005  *
11006  *   Arguments: bp - pointer to the indicated buf(9S) struct.
11007  *
11008  *     Context: Kernel thread context.
11009  */
11010 
11011 static void
11012 sdmin(struct buf *bp)
11013 {
11014 	struct sd_lun	*un;
11015 	int		instance;
11016 
11017 	instance = SDUNIT(bp->b_edev);
11018 
11019 	un = ddi_get_soft_state(sd_state, instance);
11020 	ASSERT(un != NULL);
11021 
11022 	if (bp->b_bcount > un->un_max_xfer_size) {
11023 		bp->b_bcount = un->un_max_xfer_size;
11024 	}
11025 }
11026 
11027 
11028 /*
11029  *    Function: sdread
11030  *
11031  * Description: Driver's read(9e) entry point function.
11032  *
11033  *   Arguments: dev   - device number
11034  *		uio   - structure pointer describing where data is to be stored
11035  *			in user's space
11036  *		cred_p  - user credential pointer
11037  *
11038  * Return Code: ENXIO
11039  *		EIO
11040  *		EINVAL
11041  *		value returned by physio
11042  *
11043  *     Context: Kernel thread context.
11044  */
11045 /* ARGSUSED */
11046 static int
11047 sdread(dev_t dev, struct uio *uio, cred_t *cred_p)
11048 {
11049 	struct sd_lun	*un = NULL;
11050 	int		secmask;
11051 	int		err;
11052 
11053 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
11054 		return (ENXIO);
11055 	}
11056 
11057 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11058 
11059 	if ((un->un_f_geometry_is_valid == FALSE) && !ISCD(un)) {
11060 		mutex_enter(SD_MUTEX(un));
11061 		/*
11062 		 * Because the call to sd_ready_and_valid will issue I/O we
11063 		 * must wait here if either the device is suspended or
11064 		 * if it's power level is changing.
11065 		 */
11066 		while ((un->un_state == SD_STATE_SUSPENDED) ||
11067 		    (un->un_state == SD_STATE_PM_CHANGING)) {
11068 			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
11069 		}
11070 		un->un_ncmds_in_driver++;
11071 		mutex_exit(SD_MUTEX(un));
11072 		if ((sd_ready_and_valid(un)) != SD_READY_VALID) {
11073 			mutex_enter(SD_MUTEX(un));
11074 			un->un_ncmds_in_driver--;
11075 			ASSERT(un->un_ncmds_in_driver >= 0);
11076 			mutex_exit(SD_MUTEX(un));
11077 			return (EIO);
11078 		}
11079 		mutex_enter(SD_MUTEX(un));
11080 		un->un_ncmds_in_driver--;
11081 		ASSERT(un->un_ncmds_in_driver >= 0);
11082 		mutex_exit(SD_MUTEX(un));
11083 	}
11084 
11085 	/*
11086 	 * Read requests are restricted to multiples of the system block size.
11087 	 */
11088 	secmask = un->un_sys_blocksize - 1;
11089 
11090 	if (uio->uio_loffset & ((offset_t)(secmask))) {
11091 		SD_ERROR(SD_LOG_READ_WRITE, un,
11092 		    "sdread: file offset not modulo %d\n",
11093 		    un->un_sys_blocksize);
11094 		err = EINVAL;
11095 	} else if (uio->uio_iov->iov_len & (secmask)) {
11096 		SD_ERROR(SD_LOG_READ_WRITE, un,
11097 		    "sdread: transfer length not modulo %d\n",
11098 		    un->un_sys_blocksize);
11099 		err = EINVAL;
11100 	} else {
11101 		err = physio(sdstrategy, NULL, dev, B_READ, sdmin, uio);
11102 	}
11103 	return (err);
11104 }
11105 
11106 
11107 /*
11108  *    Function: sdwrite
11109  *
11110  * Description: Driver's write(9e) entry point function.
11111  *
11112  *   Arguments: dev   - device number
11113  *		uio   - structure pointer describing where data is stored in
11114  *			user's space
11115  *		cred_p  - user credential pointer
11116  *
11117  * Return Code: ENXIO
11118  *		EIO
11119  *		EINVAL
11120  *		value returned by physio
11121  *
11122  *     Context: Kernel thread context.
11123  */
11124 /* ARGSUSED */
11125 static int
11126 sdwrite(dev_t dev, struct uio *uio, cred_t *cred_p)
11127 {
11128 	struct sd_lun	*un = NULL;
11129 	int		secmask;
11130 	int		err;
11131 
11132 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
11133 		return (ENXIO);
11134 	}
11135 
11136 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11137 
11138 	if ((un->un_f_geometry_is_valid == FALSE) && !ISCD(un)) {
11139 		mutex_enter(SD_MUTEX(un));
11140 		/*
11141 		 * Because the call to sd_ready_and_valid will issue I/O we
11142 		 * must wait here if either the device is suspended or
11143 		 * if it's power level is changing.
11144 		 */
11145 		while ((un->un_state == SD_STATE_SUSPENDED) ||
11146 		    (un->un_state == SD_STATE_PM_CHANGING)) {
11147 			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
11148 		}
11149 		un->un_ncmds_in_driver++;
11150 		mutex_exit(SD_MUTEX(un));
11151 		if ((sd_ready_and_valid(un)) != SD_READY_VALID) {
11152 			mutex_enter(SD_MUTEX(un));
11153 			un->un_ncmds_in_driver--;
11154 			ASSERT(un->un_ncmds_in_driver >= 0);
11155 			mutex_exit(SD_MUTEX(un));
11156 			return (EIO);
11157 		}
11158 		mutex_enter(SD_MUTEX(un));
11159 		un->un_ncmds_in_driver--;
11160 		ASSERT(un->un_ncmds_in_driver >= 0);
11161 		mutex_exit(SD_MUTEX(un));
11162 	}
11163 
11164 	/*
11165 	 * Write requests are restricted to multiples of the system block size.
11166 	 */
11167 	secmask = un->un_sys_blocksize - 1;
11168 
11169 	if (uio->uio_loffset & ((offset_t)(secmask))) {
11170 		SD_ERROR(SD_LOG_READ_WRITE, un,
11171 		    "sdwrite: file offset not modulo %d\n",
11172 		    un->un_sys_blocksize);
11173 		err = EINVAL;
11174 	} else if (uio->uio_iov->iov_len & (secmask)) {
11175 		SD_ERROR(SD_LOG_READ_WRITE, un,
11176 		    "sdwrite: transfer length not modulo %d\n",
11177 		    un->un_sys_blocksize);
11178 		err = EINVAL;
11179 	} else {
11180 		err = physio(sdstrategy, NULL, dev, B_WRITE, sdmin, uio);
11181 	}
11182 	return (err);
11183 }
11184 
11185 
11186 /*
11187  *    Function: sdaread
11188  *
11189  * Description: Driver's aread(9e) entry point function.
11190  *
11191  *   Arguments: dev   - device number
11192  *		aio   - structure pointer describing where data is to be stored
11193  *		cred_p  - user credential pointer
11194  *
11195  * Return Code: ENXIO
11196  *		EIO
11197  *		EINVAL
11198  *		value returned by aphysio
11199  *
11200  *     Context: Kernel thread context.
11201  */
11202 /* ARGSUSED */
11203 static int
11204 sdaread(dev_t dev, struct aio_req *aio, cred_t *cred_p)
11205 {
11206 	struct sd_lun	*un = NULL;
11207 	struct uio	*uio = aio->aio_uio;
11208 	int		secmask;
11209 	int		err;
11210 
11211 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
11212 		return (ENXIO);
11213 	}
11214 
11215 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11216 
11217 	if ((un->un_f_geometry_is_valid == FALSE) && !ISCD(un)) {
11218 		mutex_enter(SD_MUTEX(un));
11219 		/*
11220 		 * Because the call to sd_ready_and_valid will issue I/O we
11221 		 * must wait here if either the device is suspended or
11222 		 * if it's power level is changing.
11223 		 */
11224 		while ((un->un_state == SD_STATE_SUSPENDED) ||
11225 		    (un->un_state == SD_STATE_PM_CHANGING)) {
11226 			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
11227 		}
11228 		un->un_ncmds_in_driver++;
11229 		mutex_exit(SD_MUTEX(un));
11230 		if ((sd_ready_and_valid(un)) != SD_READY_VALID) {
11231 			mutex_enter(SD_MUTEX(un));
11232 			un->un_ncmds_in_driver--;
11233 			ASSERT(un->un_ncmds_in_driver >= 0);
11234 			mutex_exit(SD_MUTEX(un));
11235 			return (EIO);
11236 		}
11237 		mutex_enter(SD_MUTEX(un));
11238 		un->un_ncmds_in_driver--;
11239 		ASSERT(un->un_ncmds_in_driver >= 0);
11240 		mutex_exit(SD_MUTEX(un));
11241 	}
11242 
11243 	/*
11244 	 * Read requests are restricted to multiples of the system block size.
11245 	 */
11246 	secmask = un->un_sys_blocksize - 1;
11247 
11248 	if (uio->uio_loffset & ((offset_t)(secmask))) {
11249 		SD_ERROR(SD_LOG_READ_WRITE, un,
11250 		    "sdaread: file offset not modulo %d\n",
11251 		    un->un_sys_blocksize);
11252 		err = EINVAL;
11253 	} else if (uio->uio_iov->iov_len & (secmask)) {
11254 		SD_ERROR(SD_LOG_READ_WRITE, un,
11255 		    "sdaread: transfer length not modulo %d\n",
11256 		    un->un_sys_blocksize);
11257 		err = EINVAL;
11258 	} else {
11259 		err = aphysio(sdstrategy, anocancel, dev, B_READ, sdmin, aio);
11260 	}
11261 	return (err);
11262 }
11263 
11264 
11265 /*
11266  *    Function: sdawrite
11267  *
11268  * Description: Driver's awrite(9e) entry point function.
11269  *
11270  *   Arguments: dev   - device number
11271  *		aio   - structure pointer describing where data is stored
11272  *		cred_p  - user credential pointer
11273  *
11274  * Return Code: ENXIO
11275  *		EIO
11276  *		EINVAL
11277  *		value returned by aphysio
11278  *
11279  *     Context: Kernel thread context.
11280  */
11281 /* ARGSUSED */
11282 static int
11283 sdawrite(dev_t dev, struct aio_req *aio, cred_t *cred_p)
11284 {
11285 	struct sd_lun	*un = NULL;
11286 	struct uio	*uio = aio->aio_uio;
11287 	int		secmask;
11288 	int		err;
11289 
11290 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
11291 		return (ENXIO);
11292 	}
11293 
11294 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11295 
11296 	if ((un->un_f_geometry_is_valid == FALSE) && !ISCD(un)) {
11297 		mutex_enter(SD_MUTEX(un));
11298 		/*
11299 		 * Because the call to sd_ready_and_valid will issue I/O we
11300 		 * must wait here if either the device is suspended or
11301 		 * if it's power level is changing.
11302 		 */
11303 		while ((un->un_state == SD_STATE_SUSPENDED) ||
11304 		    (un->un_state == SD_STATE_PM_CHANGING)) {
11305 			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
11306 		}
11307 		un->un_ncmds_in_driver++;
11308 		mutex_exit(SD_MUTEX(un));
11309 		if ((sd_ready_and_valid(un)) != SD_READY_VALID) {
11310 			mutex_enter(SD_MUTEX(un));
11311 			un->un_ncmds_in_driver--;
11312 			ASSERT(un->un_ncmds_in_driver >= 0);
11313 			mutex_exit(SD_MUTEX(un));
11314 			return (EIO);
11315 		}
11316 		mutex_enter(SD_MUTEX(un));
11317 		un->un_ncmds_in_driver--;
11318 		ASSERT(un->un_ncmds_in_driver >= 0);
11319 		mutex_exit(SD_MUTEX(un));
11320 	}
11321 
11322 	/*
11323 	 * Write requests are restricted to multiples of the system block size.
11324 	 */
11325 	secmask = un->un_sys_blocksize - 1;
11326 
11327 	if (uio->uio_loffset & ((offset_t)(secmask))) {
11328 		SD_ERROR(SD_LOG_READ_WRITE, un,
11329 		    "sdawrite: file offset not modulo %d\n",
11330 		    un->un_sys_blocksize);
11331 		err = EINVAL;
11332 	} else if (uio->uio_iov->iov_len & (secmask)) {
11333 		SD_ERROR(SD_LOG_READ_WRITE, un,
11334 		    "sdawrite: transfer length not modulo %d\n",
11335 		    un->un_sys_blocksize);
11336 		err = EINVAL;
11337 	} else {
11338 		err = aphysio(sdstrategy, anocancel, dev, B_WRITE, sdmin, aio);
11339 	}
11340 	return (err);
11341 }
11342 
11343 
11344 
11345 
11346 
11347 /*
11348  * Driver IO processing follows the following sequence:
11349  *
11350  *     sdioctl(9E)     sdstrategy(9E)         biodone(9F)
11351  *         |                |                     ^
11352  *         v                v                     |
11353  * sd_send_scsi_cmd()  ddi_xbuf_qstrategy()       +-------------------+
11354  *         |                |                     |                   |
11355  *         v                |                     |                   |
11356  * sd_uscsi_strategy() sd_xbuf_strategy()   sd_buf_iodone()   sd_uscsi_iodone()
11357  *         |                |                     ^                   ^
11358  *         v                v                     |                   |
11359  * SD_BEGIN_IOSTART()  SD_BEGIN_IOSTART()         |                   |
11360  *         |                |                     |                   |
11361  *     +---+                |                     +------------+      +-------+
11362  *     |                    |                                  |              |
11363  *     |   SD_NEXT_IOSTART()|                  SD_NEXT_IODONE()|              |
11364  *     |                    v                                  |              |
11365  *     |         sd_mapblockaddr_iostart()           sd_mapblockaddr_iodone() |
11366  *     |                    |                                  ^              |
11367  *     |   SD_NEXT_IOSTART()|                  SD_NEXT_IODONE()|              |
11368  *     |                    v                                  |              |
11369  *     |         sd_mapblocksize_iostart()           sd_mapblocksize_iodone() |
11370  *     |                    |                                  ^              |
11371  *     |   SD_NEXT_IOSTART()|                  SD_NEXT_IODONE()|              |
11372  *     |                    v                                  |              |
11373  *     |           sd_checksum_iostart()               sd_checksum_iodone()   |
11374  *     |                    |                                  ^              |
11375  *     +-> SD_NEXT_IOSTART()|                  SD_NEXT_IODONE()+------------->+
11376  *     |                    v                                  |              |
11377  *     |              sd_pm_iostart()                     sd_pm_iodone()      |
11378  *     |                    |                                  ^              |
11379  *     |                    |                                  |              |
11380  *     +-> SD_NEXT_IOSTART()|               SD_BEGIN_IODONE()--+--------------+
11381  *                          |                           ^
11382  *                          v                           |
11383  *                   sd_core_iostart()                  |
11384  *                          |                           |
11385  *                          |                           +------>(*destroypkt)()
11386  *                          +-> sd_start_cmds() <-+     |           |
11387  *                          |                     |     |           v
11388  *                          |                     |     |  scsi_destroy_pkt(9F)
11389  *                          |                     |     |
11390  *                          +->(*initpkt)()       +- sdintr()
11391  *                          |  |                        |  |
11392  *                          |  +-> scsi_init_pkt(9F)    |  +-> sd_handle_xxx()
11393  *                          |  +-> scsi_setup_cdb(9F)   |
11394  *                          |                           |
11395  *                          +--> scsi_transport(9F)     |
11396  *                                     |                |
11397  *                                     +----> SCSA ---->+
11398  *
11399  *
11400  * This code is based upon the following presumtions:
11401  *
11402  *   - iostart and iodone functions operate on buf(9S) structures. These
11403  *     functions perform the necessary operations on the buf(9S) and pass
11404  *     them along to the next function in the chain by using the macros
11405  *     SD_NEXT_IOSTART() (for iostart side functions) and SD_NEXT_IODONE()
11406  *     (for iodone side functions).
11407  *
11408  *   - The iostart side functions may sleep. The iodone side functions
11409  *     are called under interrupt context and may NOT sleep. Therefore
11410  *     iodone side functions also may not call iostart side functions.
11411  *     (NOTE: iostart side functions should NOT sleep for memory, as
11412  *     this could result in deadlock.)
11413  *
11414  *   - An iostart side function may call its corresponding iodone side
11415  *     function directly (if necessary).
11416  *
11417  *   - In the event of an error, an iostart side function can return a buf(9S)
11418  *     to its caller by calling SD_BEGIN_IODONE() (after setting B_ERROR and
11419  *     b_error in the usual way of course).
11420  *
11421  *   - The taskq mechanism may be used by the iodone side functions to dispatch
11422  *     requests to the iostart side functions.  The iostart side functions in
11423  *     this case would be called under the context of a taskq thread, so it's
11424  *     OK for them to block/sleep/spin in this case.
11425  *
11426  *   - iostart side functions may allocate "shadow" buf(9S) structs and
11427  *     pass them along to the next function in the chain.  The corresponding
11428  *     iodone side functions must coalesce the "shadow" bufs and return
11429  *     the "original" buf to the next higher layer.
11430  *
11431  *   - The b_private field of the buf(9S) struct holds a pointer to
11432  *     an sd_xbuf struct, which contains information needed to
11433  *     construct the scsi_pkt for the command.
11434  *
11435  *   - The SD_MUTEX(un) is NOT held across calls to the next layer. Each
11436  *     layer must acquire & release the SD_MUTEX(un) as needed.
11437  */
11438 
11439 
11440 /*
11441  * Create taskq for all targets in the system. This is created at
11442  * _init(9E) and destroyed at _fini(9E).
11443  *
11444  * Note: here we set the minalloc to a reasonably high number to ensure that
11445  * we will have an adequate supply of task entries available at interrupt time.
11446  * This is used in conjunction with the TASKQ_PREPOPULATE flag in
11447  * sd_create_taskq().  Since we do not want to sleep for allocations at
11448  * interrupt time, set maxalloc equal to minalloc. That way we will just fail
11449  * the command if we ever try to dispatch more than SD_TASKQ_MAXALLOC taskq
11450  * requests any one instant in time.
11451  */
11452 #define	SD_TASKQ_NUMTHREADS	8
11453 #define	SD_TASKQ_MINALLOC	256
11454 #define	SD_TASKQ_MAXALLOC	256
11455 
11456 static taskq_t	*sd_tq = NULL;
11457 _NOTE(SCHEME_PROTECTS_DATA("stable data", sd_tq))
11458 
11459 static int	sd_taskq_minalloc = SD_TASKQ_MINALLOC;
11460 static int	sd_taskq_maxalloc = SD_TASKQ_MAXALLOC;
11461 
11462 /*
11463  * The following task queue is being created for the write part of
11464  * read-modify-write of non-512 block size devices.
11465  * Limit the number of threads to 1 for now. This number has been choosen
11466  * considering the fact that it applies only to dvd ram drives/MO drives
11467  * currently. Performance for which is not main criteria at this stage.
11468  * Note: It needs to be explored if we can use a single taskq in future
11469  */
11470 #define	SD_WMR_TASKQ_NUMTHREADS	1
11471 static taskq_t	*sd_wmr_tq = NULL;
11472 _NOTE(SCHEME_PROTECTS_DATA("stable data", sd_wmr_tq))
11473 
11474 /*
11475  *    Function: sd_taskq_create
11476  *
11477  * Description: Create taskq thread(s) and preallocate task entries
11478  *
11479  * Return Code: Returns a pointer to the allocated taskq_t.
11480  *
11481  *     Context: Can sleep. Requires blockable context.
11482  *
11483  *       Notes: - The taskq() facility currently is NOT part of the DDI.
11484  *		  (definitely NOT recommeded for 3rd-party drivers!) :-)
11485  *		- taskq_create() will block for memory, also it will panic
11486  *		  if it cannot create the requested number of threads.
11487  *		- Currently taskq_create() creates threads that cannot be
11488  *		  swapped.
11489  *		- We use TASKQ_PREPOPULATE to ensure we have an adequate
11490  *		  supply of taskq entries at interrupt time (ie, so that we
11491  *		  do not have to sleep for memory)
11492  */
11493 
11494 static void
11495 sd_taskq_create(void)
11496 {
11497 	char	taskq_name[TASKQ_NAMELEN];
11498 
11499 	ASSERT(sd_tq == NULL);
11500 	ASSERT(sd_wmr_tq == NULL);
11501 
11502 	(void) snprintf(taskq_name, sizeof (taskq_name),
11503 	    "%s_drv_taskq", sd_label);
11504 	sd_tq = (taskq_create(taskq_name, SD_TASKQ_NUMTHREADS,
11505 	    (v.v_maxsyspri - 2), sd_taskq_minalloc, sd_taskq_maxalloc,
11506 	    TASKQ_PREPOPULATE));
11507 
11508 	(void) snprintf(taskq_name, sizeof (taskq_name),
11509 	    "%s_rmw_taskq", sd_label);
11510 	sd_wmr_tq = (taskq_create(taskq_name, SD_WMR_TASKQ_NUMTHREADS,
11511 	    (v.v_maxsyspri - 2), sd_taskq_minalloc, sd_taskq_maxalloc,
11512 	    TASKQ_PREPOPULATE));
11513 }
11514 
11515 
11516 /*
11517  *    Function: sd_taskq_delete
11518  *
11519  * Description: Complementary cleanup routine for sd_taskq_create().
11520  *
11521  *     Context: Kernel thread context.
11522  */
11523 
11524 static void
11525 sd_taskq_delete(void)
11526 {
11527 	ASSERT(sd_tq != NULL);
11528 	ASSERT(sd_wmr_tq != NULL);
11529 	taskq_destroy(sd_tq);
11530 	taskq_destroy(sd_wmr_tq);
11531 	sd_tq = NULL;
11532 	sd_wmr_tq = NULL;
11533 }
11534 
11535 
11536 /*
11537  *    Function: sdstrategy
11538  *
11539  * Description: Driver's strategy (9E) entry point function.
11540  *
11541  *   Arguments: bp - pointer to buf(9S)
11542  *
11543  * Return Code: Always returns zero
11544  *
11545  *     Context: Kernel thread context.
11546  */
11547 
11548 static int
11549 sdstrategy(struct buf *bp)
11550 {
11551 	struct sd_lun *un;
11552 
11553 	un = ddi_get_soft_state(sd_state, SD_GET_INSTANCE_FROM_BUF(bp));
11554 	if (un == NULL) {
11555 		bioerror(bp, EIO);
11556 		bp->b_resid = bp->b_bcount;
11557 		biodone(bp);
11558 		return (0);
11559 	}
11560 	/* As was done in the past, fail new cmds. if state is dumping. */
11561 	if (un->un_state == SD_STATE_DUMPING) {
11562 		bioerror(bp, ENXIO);
11563 		bp->b_resid = bp->b_bcount;
11564 		biodone(bp);
11565 		return (0);
11566 	}
11567 
11568 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11569 
11570 	/*
11571 	 * Commands may sneak in while we released the mutex in
11572 	 * DDI_SUSPEND, we should block new commands. However, old
11573 	 * commands that are still in the driver at this point should
11574 	 * still be allowed to drain.
11575 	 */
11576 	mutex_enter(SD_MUTEX(un));
11577 	/*
11578 	 * Must wait here if either the device is suspended or
11579 	 * if it's power level is changing.
11580 	 */
11581 	while ((un->un_state == SD_STATE_SUSPENDED) ||
11582 	    (un->un_state == SD_STATE_PM_CHANGING)) {
11583 		cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
11584 	}
11585 
11586 	un->un_ncmds_in_driver++;
11587 
11588 	/*
11589 	 * atapi: Since we are running the CD for now in PIO mode we need to
11590 	 * call bp_mapin here to avoid bp_mapin called interrupt context under
11591 	 * the HBA's init_pkt routine.
11592 	 */
11593 	if (un->un_f_cfg_is_atapi == TRUE) {
11594 		mutex_exit(SD_MUTEX(un));
11595 		bp_mapin(bp);
11596 		mutex_enter(SD_MUTEX(un));
11597 	}
11598 	SD_INFO(SD_LOG_IO, un, "sdstrategy: un_ncmds_in_driver = %ld\n",
11599 	    un->un_ncmds_in_driver);
11600 
11601 	mutex_exit(SD_MUTEX(un));
11602 
11603 	/*
11604 	 * This will (eventually) allocate the sd_xbuf area and
11605 	 * call sd_xbuf_strategy().  We just want to return the
11606 	 * result of ddi_xbuf_qstrategy so that we have an opt-
11607 	 * imized tail call which saves us a stack frame.
11608 	 */
11609 	return (ddi_xbuf_qstrategy(bp, un->un_xbuf_attr));
11610 }
11611 
11612 
11613 /*
11614  *    Function: sd_xbuf_strategy
11615  *
11616  * Description: Function for initiating IO operations via the
11617  *		ddi_xbuf_qstrategy() mechanism.
11618  *
11619  *     Context: Kernel thread context.
11620  */
11621 
11622 static void
11623 sd_xbuf_strategy(struct buf *bp, ddi_xbuf_t xp, void *arg)
11624 {
11625 	struct sd_lun *un = arg;
11626 
11627 	ASSERT(bp != NULL);
11628 	ASSERT(xp != NULL);
11629 	ASSERT(un != NULL);
11630 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11631 
11632 	/*
11633 	 * Initialize the fields in the xbuf and save a pointer to the
11634 	 * xbuf in bp->b_private.
11635 	 */
11636 	sd_xbuf_init(un, bp, xp, SD_CHAIN_BUFIO, NULL);
11637 
11638 	/* Send the buf down the iostart chain */
11639 	SD_BEGIN_IOSTART(((struct sd_xbuf *)xp)->xb_chain_iostart, un, bp);
11640 }
11641 
11642 
11643 /*
11644  *    Function: sd_xbuf_init
11645  *
11646  * Description: Prepare the given sd_xbuf struct for use.
11647  *
11648  *   Arguments: un - ptr to softstate
11649  *		bp - ptr to associated buf(9S)
11650  *		xp - ptr to associated sd_xbuf
11651  *		chain_type - IO chain type to use:
11652  *			SD_CHAIN_NULL
11653  *			SD_CHAIN_BUFIO
11654  *			SD_CHAIN_USCSI
11655  *			SD_CHAIN_DIRECT
11656  *			SD_CHAIN_DIRECT_PRIORITY
11657  *		pktinfop - ptr to private data struct for scsi_pkt(9S)
11658  *			initialization; may be NULL if none.
11659  *
11660  *     Context: Kernel thread context
11661  */
11662 
11663 static void
11664 sd_xbuf_init(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
11665 	uchar_t chain_type, void *pktinfop)
11666 {
11667 	int index;
11668 
11669 	ASSERT(un != NULL);
11670 	ASSERT(bp != NULL);
11671 	ASSERT(xp != NULL);
11672 
11673 	SD_INFO(SD_LOG_IO, un, "sd_xbuf_init: buf:0x%p chain type:0x%x\n",
11674 	    bp, chain_type);
11675 
11676 	xp->xb_un	= un;
11677 	xp->xb_pktp	= NULL;
11678 	xp->xb_pktinfo	= pktinfop;
11679 	xp->xb_private	= bp->b_private;
11680 	xp->xb_blkno	= (daddr_t)bp->b_blkno;
11681 
11682 	/*
11683 	 * Set up the iostart and iodone chain indexes in the xbuf, based
11684 	 * upon the specified chain type to use.
11685 	 */
11686 	switch (chain_type) {
11687 	case SD_CHAIN_NULL:
11688 		/*
11689 		 * Fall thru to just use the values for the buf type, even
11690 		 * tho for the NULL chain these values will never be used.
11691 		 */
11692 		/* FALLTHRU */
11693 	case SD_CHAIN_BUFIO:
11694 		index = un->un_buf_chain_type;
11695 		break;
11696 	case SD_CHAIN_USCSI:
11697 		index = un->un_uscsi_chain_type;
11698 		break;
11699 	case SD_CHAIN_DIRECT:
11700 		index = un->un_direct_chain_type;
11701 		break;
11702 	case SD_CHAIN_DIRECT_PRIORITY:
11703 		index = un->un_priority_chain_type;
11704 		break;
11705 	default:
11706 		/* We're really broken if we ever get here... */
11707 		panic("sd_xbuf_init: illegal chain type!");
11708 		/*NOTREACHED*/
11709 	}
11710 
11711 	xp->xb_chain_iostart = sd_chain_index_map[index].sci_iostart_index;
11712 	xp->xb_chain_iodone = sd_chain_index_map[index].sci_iodone_index;
11713 
11714 	/*
11715 	 * It might be a bit easier to simply bzero the entire xbuf above,
11716 	 * but it turns out that since we init a fair number of members anyway,
11717 	 * we save a fair number cycles by doing explicit assignment of zero.
11718 	 */
11719 	xp->xb_pkt_flags	= 0;
11720 	xp->xb_dma_resid	= 0;
11721 	xp->xb_retry_count	= 0;
11722 	xp->xb_victim_retry_count = 0;
11723 	xp->xb_ua_retry_count	= 0;
11724 	xp->xb_sense_bp		= NULL;
11725 	xp->xb_sense_status	= 0;
11726 	xp->xb_sense_state	= 0;
11727 	xp->xb_sense_resid	= 0;
11728 
11729 	bp->b_private	= xp;
11730 	bp->b_flags	&= ~(B_DONE | B_ERROR);
11731 	bp->b_resid	= 0;
11732 	bp->av_forw	= NULL;
11733 	bp->av_back	= NULL;
11734 	bioerror(bp, 0);
11735 
11736 	SD_INFO(SD_LOG_IO, un, "sd_xbuf_init: done.\n");
11737 }
11738 
11739 
11740 /*
11741  *    Function: sd_uscsi_strategy
11742  *
11743  * Description: Wrapper for calling into the USCSI chain via physio(9F)
11744  *
11745  *   Arguments: bp - buf struct ptr
11746  *
11747  * Return Code: Always returns 0
11748  *
11749  *     Context: Kernel thread context
11750  */
11751 
11752 static int
11753 sd_uscsi_strategy(struct buf *bp)
11754 {
11755 	struct sd_lun		*un;
11756 	struct sd_uscsi_info	*uip;
11757 	struct sd_xbuf		*xp;
11758 	uchar_t			chain_type;
11759 
11760 	ASSERT(bp != NULL);
11761 
11762 	un = ddi_get_soft_state(sd_state, SD_GET_INSTANCE_FROM_BUF(bp));
11763 	if (un == NULL) {
11764 		bioerror(bp, EIO);
11765 		bp->b_resid = bp->b_bcount;
11766 		biodone(bp);
11767 		return (0);
11768 	}
11769 
11770 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11771 
11772 	SD_TRACE(SD_LOG_IO, un, "sd_uscsi_strategy: entry: buf:0x%p\n", bp);
11773 
11774 	mutex_enter(SD_MUTEX(un));
11775 	/*
11776 	 * atapi: Since we are running the CD for now in PIO mode we need to
11777 	 * call bp_mapin here to avoid bp_mapin called interrupt context under
11778 	 * the HBA's init_pkt routine.
11779 	 */
11780 	if (un->un_f_cfg_is_atapi == TRUE) {
11781 		mutex_exit(SD_MUTEX(un));
11782 		bp_mapin(bp);
11783 		mutex_enter(SD_MUTEX(un));
11784 	}
11785 	un->un_ncmds_in_driver++;
11786 	SD_INFO(SD_LOG_IO, un, "sd_uscsi_strategy: un_ncmds_in_driver = %ld\n",
11787 	    un->un_ncmds_in_driver);
11788 	mutex_exit(SD_MUTEX(un));
11789 
11790 	/*
11791 	 * A pointer to a struct sd_uscsi_info is expected in bp->b_private
11792 	 */
11793 	ASSERT(bp->b_private != NULL);
11794 	uip = (struct sd_uscsi_info *)bp->b_private;
11795 
11796 	switch (uip->ui_flags) {
11797 	case SD_PATH_DIRECT:
11798 		chain_type = SD_CHAIN_DIRECT;
11799 		break;
11800 	case SD_PATH_DIRECT_PRIORITY:
11801 		chain_type = SD_CHAIN_DIRECT_PRIORITY;
11802 		break;
11803 	default:
11804 		chain_type = SD_CHAIN_USCSI;
11805 		break;
11806 	}
11807 
11808 	xp = kmem_alloc(sizeof (struct sd_xbuf), KM_SLEEP);
11809 	sd_xbuf_init(un, bp, xp, chain_type, uip->ui_cmdp);
11810 
11811 	/* Use the index obtained within xbuf_init */
11812 	SD_BEGIN_IOSTART(xp->xb_chain_iostart, un, bp);
11813 
11814 	SD_TRACE(SD_LOG_IO, un, "sd_uscsi_strategy: exit: buf:0x%p\n", bp);
11815 
11816 	return (0);
11817 }
11818 
11819 
11820 /*
11821  * These routines perform raw i/o operations.
11822  */
11823 /*ARGSUSED*/
11824 static void
11825 sduscsimin(struct buf *bp)
11826 {
11827 	/*
11828 	 * do not break up because the CDB count would then
11829 	 * be incorrect and data underruns would result (incomplete
11830 	 * read/writes which would be retried and then failed, see
11831 	 * sdintr().
11832 	 */
11833 }
11834 
11835 
11836 
11837 /*
11838  *    Function: sd_send_scsi_cmd
11839  *
11840  * Description: Runs a USCSI command for user (when called thru sdioctl),
11841  *		or for the driver
11842  *
11843  *   Arguments: dev - the dev_t for the device
11844  *		incmd - ptr to a valid uscsi_cmd struct
11845  *		cdbspace - UIO_USERSPACE or UIO_SYSSPACE
11846  *		dataspace - UIO_USERSPACE or UIO_SYSSPACE
11847  *		rqbufspace - UIO_USERSPACE or UIO_SYSSPACE
11848  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
11849  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
11850  *			to use the USCSI "direct" chain and bypass the normal
11851  *			command waitq.
11852  *
11853  * Return Code: 0 -  successful completion of the given command
11854  *		EIO - scsi_reset() failed, or see biowait()/physio() codes.
11855  *		ENXIO  - soft state not found for specified dev
11856  *		EINVAL
11857  *		EFAULT - copyin/copyout error
11858  *		return code of biowait(9F) or physio(9F):
11859  *			EIO - IO error, caller may check incmd->uscsi_status
11860  *			ENXIO
11861  *			EACCES - reservation conflict
11862  *
11863  *     Context: Waits for command to complete. Can sleep.
11864  */
11865 
11866 static int
11867 sd_send_scsi_cmd(dev_t dev, struct uscsi_cmd *incmd,
11868 	enum uio_seg cdbspace, enum uio_seg dataspace, enum uio_seg rqbufspace,
11869 	int path_flag)
11870 {
11871 	struct sd_uscsi_info	*uip;
11872 	struct uscsi_cmd	*uscmd;
11873 	struct sd_lun	*un;
11874 	struct buf	*bp;
11875 	int	rval;
11876 	int	flags;
11877 
11878 	un = ddi_get_soft_state(sd_state, SDUNIT(dev));
11879 	if (un == NULL) {
11880 		return (ENXIO);
11881 	}
11882 
11883 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11884 
11885 #ifdef SDDEBUG
11886 	switch (dataspace) {
11887 	case UIO_USERSPACE:
11888 		SD_TRACE(SD_LOG_IO, un,
11889 		    "sd_send_scsi_cmd: entry: un:0x%p UIO_USERSPACE\n", un);
11890 		break;
11891 	case UIO_SYSSPACE:
11892 		SD_TRACE(SD_LOG_IO, un,
11893 		    "sd_send_scsi_cmd: entry: un:0x%p UIO_SYSSPACE\n", un);
11894 		break;
11895 	default:
11896 		SD_TRACE(SD_LOG_IO, un,
11897 		    "sd_send_scsi_cmd: entry: un:0x%p UNEXPECTED SPACE\n", un);
11898 		break;
11899 	}
11900 #endif
11901 
11902 	/*
11903 	 * Perform resets directly; no need to generate a command to do it.
11904 	 */
11905 	if (incmd->uscsi_flags & (USCSI_RESET | USCSI_RESET_ALL)) {
11906 		flags = ((incmd->uscsi_flags & USCSI_RESET_ALL) != 0) ?
11907 		    RESET_ALL : RESET_TARGET;
11908 		SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_cmd: Issuing reset\n");
11909 		if (scsi_reset(SD_ADDRESS(un), flags) == 0) {
11910 			/* Reset attempt was unsuccessful */
11911 			SD_TRACE(SD_LOG_IO, un,
11912 			    "sd_send_scsi_cmd: reset: failure\n");
11913 			return (EIO);
11914 		}
11915 		SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_cmd: reset: success\n");
11916 		return (0);
11917 	}
11918 
11919 	/* Perfunctory sanity check... */
11920 	if (incmd->uscsi_cdblen <= 0) {
11921 		SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_cmd: "
11922 		    "invalid uscsi_cdblen, returning EINVAL\n");
11923 		return (EINVAL);
11924 	} else if (incmd->uscsi_cdblen > un->un_max_hba_cdb) {
11925 		SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_cmd: "
11926 		    "unsupported uscsi_cdblen, returning EINVAL\n");
11927 		return (EINVAL);
11928 	}
11929 
11930 	/*
11931 	 * In order to not worry about where the uscsi structure came from
11932 	 * (or where the cdb it points to came from) we're going to make
11933 	 * kmem_alloc'd copies of them here. This will also allow reference
11934 	 * to the data they contain long after this process has gone to
11935 	 * sleep and its kernel stack has been unmapped, etc.
11936 	 *
11937 	 * First get some memory for the uscsi_cmd struct and copy the
11938 	 * contents of the given uscsi_cmd struct into it.
11939 	 */
11940 	uscmd = kmem_zalloc(sizeof (struct uscsi_cmd), KM_SLEEP);
11941 	bcopy(incmd, uscmd, sizeof (struct uscsi_cmd));
11942 
11943 	SD_DUMP_MEMORY(un, SD_LOG_IO, "sd_send_scsi_cmd: uscsi_cmd",
11944 	    (uchar_t *)uscmd, sizeof (struct uscsi_cmd), SD_LOG_HEX);
11945 
11946 	/*
11947 	 * Now get some space for the CDB, and copy the given CDB into
11948 	 * it. Use ddi_copyin() in case the data is in user space.
11949 	 */
11950 	uscmd->uscsi_cdb = kmem_zalloc((size_t)incmd->uscsi_cdblen, KM_SLEEP);
11951 	flags = (cdbspace == UIO_SYSSPACE) ? FKIOCTL : 0;
11952 	if (ddi_copyin(incmd->uscsi_cdb, uscmd->uscsi_cdb,
11953 	    (uint_t)incmd->uscsi_cdblen, flags) != 0) {
11954 		kmem_free(uscmd->uscsi_cdb, (size_t)incmd->uscsi_cdblen);
11955 		kmem_free(uscmd, sizeof (struct uscsi_cmd));
11956 		return (EFAULT);
11957 	}
11958 
11959 	SD_DUMP_MEMORY(un, SD_LOG_IO, "sd_send_scsi_cmd: CDB",
11960 	    (uchar_t *)uscmd->uscsi_cdb, incmd->uscsi_cdblen, SD_LOG_HEX);
11961 
11962 	bp = getrbuf(KM_SLEEP);
11963 
11964 	/*
11965 	 * Allocate an sd_uscsi_info struct and fill it with the info
11966 	 * needed by sd_initpkt_for_uscsi().  Then put the pointer into
11967 	 * b_private in the buf for sd_initpkt_for_uscsi().  Note that
11968 	 * since we allocate the buf here in this function, we do not
11969 	 * need to preserve the prior contents of b_private.
11970 	 * The sd_uscsi_info struct is also used by sd_uscsi_strategy()
11971 	 */
11972 	uip = kmem_zalloc(sizeof (struct sd_uscsi_info), KM_SLEEP);
11973 	uip->ui_flags = path_flag;
11974 	uip->ui_cmdp  = uscmd;
11975 	bp->b_private = uip;
11976 
11977 	/*
11978 	 * Initialize Request Sense buffering, if requested.
11979 	 */
11980 	if (((uscmd->uscsi_flags & USCSI_RQENABLE) != 0) &&
11981 	    (uscmd->uscsi_rqlen != 0) && (uscmd->uscsi_rqbuf != NULL)) {
11982 		/*
11983 		 * Here uscmd->uscsi_rqbuf currently points to the caller's
11984 		 * buffer, but we replace this with a kernel buffer that
11985 		 * we allocate to use with the sense data. The sense data
11986 		 * (if present) gets copied into this new buffer before the
11987 		 * command is completed.  Then we copy the sense data from
11988 		 * our allocated buf into the caller's buffer below. Note
11989 		 * that incmd->uscsi_rqbuf and incmd->uscsi_rqlen are used
11990 		 * below to perform the copy back to the caller's buf.
11991 		 */
11992 		uscmd->uscsi_rqbuf = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
11993 		if (rqbufspace == UIO_USERSPACE) {
11994 			uscmd->uscsi_rqlen   = SENSE_LENGTH;
11995 			uscmd->uscsi_rqresid = SENSE_LENGTH;
11996 		} else {
11997 			uchar_t rlen = min(SENSE_LENGTH, uscmd->uscsi_rqlen);
11998 			uscmd->uscsi_rqlen   = rlen;
11999 			uscmd->uscsi_rqresid = rlen;
12000 		}
12001 	} else {
12002 		uscmd->uscsi_rqbuf = NULL;
12003 		uscmd->uscsi_rqlen   = 0;
12004 		uscmd->uscsi_rqresid = 0;
12005 	}
12006 
12007 	SD_INFO(SD_LOG_IO, un, "sd_send_scsi_cmd: rqbuf:0x%p  rqlen:%d\n",
12008 	    uscmd->uscsi_rqbuf, uscmd->uscsi_rqlen);
12009 
12010 	if (un->un_f_is_fibre == FALSE) {
12011 		/*
12012 		 * Force asynchronous mode, if necessary.  Doing this here
12013 		 * has the unfortunate effect of running other queued
12014 		 * commands async also, but since the main purpose of this
12015 		 * capability is downloading new drive firmware, we can
12016 		 * probably live with it.
12017 		 */
12018 		if ((uscmd->uscsi_flags & USCSI_ASYNC) != 0) {
12019 			if (scsi_ifgetcap(SD_ADDRESS(un), "synchronous", 1)
12020 				== 1) {
12021 				if (scsi_ifsetcap(SD_ADDRESS(un),
12022 					    "synchronous", 0, 1) == 1) {
12023 					SD_TRACE(SD_LOG_IO, un,
12024 					"sd_send_scsi_cmd: forced async ok\n");
12025 				} else {
12026 					SD_TRACE(SD_LOG_IO, un,
12027 					"sd_send_scsi_cmd:\
12028 					forced async failed\n");
12029 					rval = EINVAL;
12030 					goto done;
12031 				}
12032 			}
12033 		}
12034 
12035 		/*
12036 		 * Re-enable synchronous mode, if requested
12037 		 */
12038 		if (uscmd->uscsi_flags & USCSI_SYNC) {
12039 			if (scsi_ifgetcap(SD_ADDRESS(un), "synchronous", 1)
12040 				== 0) {
12041 				int i = scsi_ifsetcap(SD_ADDRESS(un),
12042 						"synchronous", 1, 1);
12043 				SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_cmd: "
12044 					"re-enabled sync %s\n",
12045 					(i == 1) ? "ok" : "failed");
12046 			}
12047 		}
12048 	}
12049 
12050 	/*
12051 	 * Commands sent with priority are intended for error recovery
12052 	 * situations, and do not have retries performed.
12053 	 */
12054 	if (path_flag == SD_PATH_DIRECT_PRIORITY) {
12055 		uscmd->uscsi_flags |= USCSI_DIAGNOSE;
12056 	}
12057 
12058 	/*
12059 	 * If we're going to do actual I/O, let physio do all the right things
12060 	 */
12061 	if (uscmd->uscsi_buflen != 0) {
12062 		struct iovec	aiov;
12063 		struct uio	auio;
12064 		struct uio	*uio = &auio;
12065 
12066 		bzero(&auio, sizeof (struct uio));
12067 		bzero(&aiov, sizeof (struct iovec));
12068 		aiov.iov_base = uscmd->uscsi_bufaddr;
12069 		aiov.iov_len  = uscmd->uscsi_buflen;
12070 		uio->uio_iov  = &aiov;
12071 
12072 		uio->uio_iovcnt  = 1;
12073 		uio->uio_resid   = uscmd->uscsi_buflen;
12074 		uio->uio_segflg  = dataspace;
12075 
12076 		/*
12077 		 * physio() will block here until the command completes....
12078 		 */
12079 		SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_cmd: calling physio.\n");
12080 
12081 		rval = physio(sd_uscsi_strategy, bp, dev,
12082 		    ((uscmd->uscsi_flags & USCSI_READ) ? B_READ : B_WRITE),
12083 		    sduscsimin, uio);
12084 
12085 		SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_cmd: "
12086 		    "returned from physio with 0x%x\n", rval);
12087 
12088 	} else {
12089 		/*
12090 		 * We have to mimic what physio would do here! Argh!
12091 		 */
12092 		bp->b_flags  = B_BUSY |
12093 		    ((uscmd->uscsi_flags & USCSI_READ) ? B_READ : B_WRITE);
12094 		bp->b_edev   = dev;
12095 		bp->b_dev    = cmpdev(dev);	/* maybe unnecessary? */
12096 		bp->b_bcount = 0;
12097 		bp->b_blkno  = 0;
12098 
12099 		SD_TRACE(SD_LOG_IO, un,
12100 		    "sd_send_scsi_cmd: calling sd_uscsi_strategy...\n");
12101 
12102 		(void) sd_uscsi_strategy(bp);
12103 
12104 		SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_cmd: calling biowait\n");
12105 
12106 		rval = biowait(bp);
12107 
12108 		SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_cmd: "
12109 		    "returned from  biowait with 0x%x\n", rval);
12110 	}
12111 
12112 done:
12113 
12114 #ifdef SDDEBUG
12115 	SD_INFO(SD_LOG_IO, un, "sd_send_scsi_cmd: "
12116 	    "uscsi_status: 0x%02x  uscsi_resid:0x%x\n",
12117 	    uscmd->uscsi_status, uscmd->uscsi_resid);
12118 	if (uscmd->uscsi_bufaddr != NULL) {
12119 		SD_INFO(SD_LOG_IO, un, "sd_send_scsi_cmd: "
12120 		    "uscmd->uscsi_bufaddr: 0x%p  uscmd->uscsi_buflen:%d\n",
12121 		    uscmd->uscsi_bufaddr, uscmd->uscsi_buflen);
12122 		if (dataspace == UIO_SYSSPACE) {
12123 			SD_DUMP_MEMORY(un, SD_LOG_IO,
12124 			    "data", (uchar_t *)uscmd->uscsi_bufaddr,
12125 			    uscmd->uscsi_buflen, SD_LOG_HEX);
12126 		}
12127 	}
12128 #endif
12129 
12130 	/*
12131 	 * Get the status and residual to return to the caller.
12132 	 */
12133 	incmd->uscsi_status = uscmd->uscsi_status;
12134 	incmd->uscsi_resid  = uscmd->uscsi_resid;
12135 
12136 	/*
12137 	 * If the caller wants sense data, copy back whatever sense data
12138 	 * we may have gotten, and update the relevant rqsense info.
12139 	 */
12140 	if (((uscmd->uscsi_flags & USCSI_RQENABLE) != 0) &&
12141 	    (uscmd->uscsi_rqlen != 0) && (uscmd->uscsi_rqbuf != NULL)) {
12142 
12143 		int rqlen = uscmd->uscsi_rqlen - uscmd->uscsi_rqresid;
12144 		rqlen = min(((int)incmd->uscsi_rqlen), rqlen);
12145 
12146 		/* Update the Request Sense status and resid */
12147 		incmd->uscsi_rqresid  = incmd->uscsi_rqlen - rqlen;
12148 		incmd->uscsi_rqstatus = uscmd->uscsi_rqstatus;
12149 
12150 		SD_INFO(SD_LOG_IO, un, "sd_send_scsi_cmd: "
12151 		    "uscsi_rqstatus: 0x%02x  uscsi_rqresid:0x%x\n",
12152 		    incmd->uscsi_rqstatus, incmd->uscsi_rqresid);
12153 
12154 		/* Copy out the sense data for user processes */
12155 		if ((incmd->uscsi_rqbuf != NULL) && (rqlen != 0)) {
12156 			int flags =
12157 			    (rqbufspace == UIO_USERSPACE) ? 0 : FKIOCTL;
12158 			if (ddi_copyout(uscmd->uscsi_rqbuf, incmd->uscsi_rqbuf,
12159 			    rqlen, flags) != 0) {
12160 				rval = EFAULT;
12161 			}
12162 			/*
12163 			 * Note: Can't touch incmd->uscsi_rqbuf so use
12164 			 * uscmd->uscsi_rqbuf instead. They're the same.
12165 			 */
12166 			SD_INFO(SD_LOG_IO, un, "sd_send_scsi_cmd: "
12167 			    "incmd->uscsi_rqbuf: 0x%p  rqlen:%d\n",
12168 			    incmd->uscsi_rqbuf, rqlen);
12169 			SD_DUMP_MEMORY(un, SD_LOG_IO, "rq",
12170 			    (uchar_t *)uscmd->uscsi_rqbuf, rqlen, SD_LOG_HEX);
12171 		}
12172 	}
12173 
12174 	/*
12175 	 * Free allocated resources and return; mapout the buf in case it was
12176 	 * mapped in by a lower layer.
12177 	 */
12178 	bp_mapout(bp);
12179 	freerbuf(bp);
12180 	kmem_free(uip, sizeof (struct sd_uscsi_info));
12181 	if (uscmd->uscsi_rqbuf != NULL) {
12182 		kmem_free(uscmd->uscsi_rqbuf, SENSE_LENGTH);
12183 	}
12184 	kmem_free(uscmd->uscsi_cdb, (size_t)uscmd->uscsi_cdblen);
12185 	kmem_free(uscmd, sizeof (struct uscsi_cmd));
12186 
12187 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_cmd: exit\n");
12188 
12189 	return (rval);
12190 }
12191 
12192 
12193 /*
12194  *    Function: sd_buf_iodone
12195  *
12196  * Description: Frees the sd_xbuf & returns the buf to its originator.
12197  *
12198  *     Context: May be called from interrupt context.
12199  */
12200 /* ARGSUSED */
12201 static void
12202 sd_buf_iodone(int index, struct sd_lun *un, struct buf *bp)
12203 {
12204 	struct sd_xbuf *xp;
12205 
12206 	ASSERT(un != NULL);
12207 	ASSERT(bp != NULL);
12208 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12209 
12210 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_buf_iodone: entry.\n");
12211 
12212 	xp = SD_GET_XBUF(bp);
12213 	ASSERT(xp != NULL);
12214 
12215 	mutex_enter(SD_MUTEX(un));
12216 
12217 	/*
12218 	 * Grab time when the cmd completed.
12219 	 * This is used for determining if the system has been
12220 	 * idle long enough to make it idle to the PM framework.
12221 	 * This is for lowering the overhead, and therefore improving
12222 	 * performance per I/O operation.
12223 	 */
12224 	un->un_pm_idle_time = ddi_get_time();
12225 
12226 	un->un_ncmds_in_driver--;
12227 	ASSERT(un->un_ncmds_in_driver >= 0);
12228 	SD_INFO(SD_LOG_IO, un, "sd_buf_iodone: un_ncmds_in_driver = %ld\n",
12229 	    un->un_ncmds_in_driver);
12230 
12231 	mutex_exit(SD_MUTEX(un));
12232 
12233 	ddi_xbuf_done(bp, un->un_xbuf_attr);	/* xbuf is gone after this */
12234 	biodone(bp);				/* bp is gone after this */
12235 
12236 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_buf_iodone: exit.\n");
12237 }
12238 
12239 
12240 /*
12241  *    Function: sd_uscsi_iodone
12242  *
12243  * Description: Frees the sd_xbuf & returns the buf to its originator.
12244  *
12245  *     Context: May be called from interrupt context.
12246  */
12247 /* ARGSUSED */
12248 static void
12249 sd_uscsi_iodone(int index, struct sd_lun *un, struct buf *bp)
12250 {
12251 	struct sd_xbuf *xp;
12252 
12253 	ASSERT(un != NULL);
12254 	ASSERT(bp != NULL);
12255 
12256 	xp = SD_GET_XBUF(bp);
12257 	ASSERT(xp != NULL);
12258 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12259 
12260 	SD_INFO(SD_LOG_IO, un, "sd_uscsi_iodone: entry.\n");
12261 
12262 	bp->b_private = xp->xb_private;
12263 
12264 	mutex_enter(SD_MUTEX(un));
12265 
12266 	/*
12267 	 * Grab time when the cmd completed.
12268 	 * This is used for determining if the system has been
12269 	 * idle long enough to make it idle to the PM framework.
12270 	 * This is for lowering the overhead, and therefore improving
12271 	 * performance per I/O operation.
12272 	 */
12273 	un->un_pm_idle_time = ddi_get_time();
12274 
12275 	un->un_ncmds_in_driver--;
12276 	ASSERT(un->un_ncmds_in_driver >= 0);
12277 	SD_INFO(SD_LOG_IO, un, "sd_uscsi_iodone: un_ncmds_in_driver = %ld\n",
12278 	    un->un_ncmds_in_driver);
12279 
12280 	mutex_exit(SD_MUTEX(un));
12281 
12282 	kmem_free(xp, sizeof (struct sd_xbuf));
12283 	biodone(bp);
12284 
12285 	SD_INFO(SD_LOG_IO, un, "sd_uscsi_iodone: exit.\n");
12286 }
12287 
12288 
12289 /*
12290  *    Function: sd_mapblockaddr_iostart
12291  *
12292  * Description: Verify request lies withing the partition limits for
12293  *		the indicated minor device.  Issue "overrun" buf if
12294  *		request would exceed partition range.  Converts
12295  *		partition-relative block address to absolute.
12296  *
12297  *     Context: Can sleep
12298  *
12299  *      Issues: This follows what the old code did, in terms of accessing
12300  *		some of the partition info in the unit struct without holding
12301  *		the mutext.  This is a general issue, if the partition info
12302  *		can be altered while IO is in progress... as soon as we send
12303  *		a buf, its partitioning can be invalid before it gets to the
12304  *		device.  Probably the right fix is to move partitioning out
12305  *		of the driver entirely.
12306  */
12307 
12308 static void
12309 sd_mapblockaddr_iostart(int index, struct sd_lun *un, struct buf *bp)
12310 {
12311 	daddr_t	nblocks;	/* #blocks in the given partition */
12312 	daddr_t	blocknum;	/* Block number specified by the buf */
12313 	size_t	requested_nblocks;
12314 	size_t	available_nblocks;
12315 	int	partition;
12316 	diskaddr_t	partition_offset;
12317 	struct sd_xbuf *xp;
12318 
12319 
12320 	ASSERT(un != NULL);
12321 	ASSERT(bp != NULL);
12322 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12323 
12324 	SD_TRACE(SD_LOG_IO_PARTITION, un,
12325 	    "sd_mapblockaddr_iostart: entry: buf:0x%p\n", bp);
12326 
12327 	xp = SD_GET_XBUF(bp);
12328 	ASSERT(xp != NULL);
12329 
12330 	/*
12331 	 * If the geometry is not indicated as valid, attempt to access
12332 	 * the unit & verify the geometry/label. This can be the case for
12333 	 * removable-media devices, of if the device was opened in
12334 	 * NDELAY/NONBLOCK mode.
12335 	 */
12336 	if ((un->un_f_geometry_is_valid != TRUE) &&
12337 	    (sd_ready_and_valid(un) != SD_READY_VALID)) {
12338 		/*
12339 		 * For removable devices it is possible to start an I/O
12340 		 * without a media by opening the device in nodelay mode.
12341 		 * Also for writable CDs there can be many scenarios where
12342 		 * there is no geometry yet but volume manager is trying to
12343 		 * issue a read() just because it can see TOC on the CD. So
12344 		 * do not print a message for removables.
12345 		 */
12346 		if (!un->un_f_has_removable_media) {
12347 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
12348 			    "i/o to invalid geometry\n");
12349 		}
12350 		bioerror(bp, EIO);
12351 		bp->b_resid = bp->b_bcount;
12352 		SD_BEGIN_IODONE(index, un, bp);
12353 		return;
12354 	}
12355 
12356 	partition = SDPART(bp->b_edev);
12357 
12358 	/* #blocks in partition */
12359 	nblocks = un->un_map[partition].dkl_nblk;    /* #blocks in partition */
12360 
12361 	/* Use of a local variable potentially improves performance slightly */
12362 	partition_offset = un->un_offset[partition];
12363 
12364 	/*
12365 	 * blocknum is the starting block number of the request. At this
12366 	 * point it is still relative to the start of the minor device.
12367 	 */
12368 	blocknum = xp->xb_blkno;
12369 
12370 	/*
12371 	 * Legacy: If the starting block number is one past the last block
12372 	 * in the partition, do not set B_ERROR in the buf.
12373 	 */
12374 	if (blocknum == nblocks)  {
12375 		goto error_exit;
12376 	}
12377 
12378 	/*
12379 	 * Confirm that the first block of the request lies within the
12380 	 * partition limits. Also the requested number of bytes must be
12381 	 * a multiple of the system block size.
12382 	 */
12383 	if ((blocknum < 0) || (blocknum >= nblocks) ||
12384 	    ((bp->b_bcount & (un->un_sys_blocksize - 1)) != 0)) {
12385 		bp->b_flags |= B_ERROR;
12386 		goto error_exit;
12387 	}
12388 
12389 	/*
12390 	 * If the requsted # blocks exceeds the available # blocks, that
12391 	 * is an overrun of the partition.
12392 	 */
12393 	requested_nblocks = SD_BYTES2SYSBLOCKS(un, bp->b_bcount);
12394 	available_nblocks = (size_t)(nblocks - blocknum);
12395 	ASSERT(nblocks >= blocknum);
12396 
12397 	if (requested_nblocks > available_nblocks) {
12398 		/*
12399 		 * Allocate an "overrun" buf to allow the request to proceed
12400 		 * for the amount of space available in the partition. The
12401 		 * amount not transferred will be added into the b_resid
12402 		 * when the operation is complete. The overrun buf
12403 		 * replaces the original buf here, and the original buf
12404 		 * is saved inside the overrun buf, for later use.
12405 		 */
12406 		size_t resid = SD_SYSBLOCKS2BYTES(un,
12407 		    (offset_t)(requested_nblocks - available_nblocks));
12408 		size_t count = bp->b_bcount - resid;
12409 		/*
12410 		 * Note: count is an unsigned entity thus it'll NEVER
12411 		 * be less than 0 so ASSERT the original values are
12412 		 * correct.
12413 		 */
12414 		ASSERT(bp->b_bcount >= resid);
12415 
12416 		bp = sd_bioclone_alloc(bp, count, blocknum,
12417 			(int (*)(struct buf *)) sd_mapblockaddr_iodone);
12418 		xp = SD_GET_XBUF(bp); /* Update for 'new' bp! */
12419 		ASSERT(xp != NULL);
12420 	}
12421 
12422 	/* At this point there should be no residual for this buf. */
12423 	ASSERT(bp->b_resid == 0);
12424 
12425 	/* Convert the block number to an absolute address. */
12426 	xp->xb_blkno += partition_offset;
12427 
12428 	SD_NEXT_IOSTART(index, un, bp);
12429 
12430 	SD_TRACE(SD_LOG_IO_PARTITION, un,
12431 	    "sd_mapblockaddr_iostart: exit 0: buf:0x%p\n", bp);
12432 
12433 	return;
12434 
12435 error_exit:
12436 	bp->b_resid = bp->b_bcount;
12437 	SD_BEGIN_IODONE(index, un, bp);
12438 	SD_TRACE(SD_LOG_IO_PARTITION, un,
12439 	    "sd_mapblockaddr_iostart: exit 1: buf:0x%p\n", bp);
12440 }
12441 
12442 
12443 /*
12444  *    Function: sd_mapblockaddr_iodone
12445  *
12446  * Description: Completion-side processing for partition management.
12447  *
12448  *     Context: May be called under interrupt context
12449  */
12450 
12451 static void
12452 sd_mapblockaddr_iodone(int index, struct sd_lun *un, struct buf *bp)
12453 {
12454 	/* int	partition; */	/* Not used, see below. */
12455 	ASSERT(un != NULL);
12456 	ASSERT(bp != NULL);
12457 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12458 
12459 	SD_TRACE(SD_LOG_IO_PARTITION, un,
12460 	    "sd_mapblockaddr_iodone: entry: buf:0x%p\n", bp);
12461 
12462 	if (bp->b_iodone == (int (*)(struct buf *)) sd_mapblockaddr_iodone) {
12463 		/*
12464 		 * We have an "overrun" buf to deal with...
12465 		 */
12466 		struct sd_xbuf	*xp;
12467 		struct buf	*obp;	/* ptr to the original buf */
12468 
12469 		xp = SD_GET_XBUF(bp);
12470 		ASSERT(xp != NULL);
12471 
12472 		/* Retrieve the pointer to the original buf */
12473 		obp = (struct buf *)xp->xb_private;
12474 		ASSERT(obp != NULL);
12475 
12476 		obp->b_resid = obp->b_bcount - (bp->b_bcount - bp->b_resid);
12477 		bioerror(obp, bp->b_error);
12478 
12479 		sd_bioclone_free(bp);
12480 
12481 		/*
12482 		 * Get back the original buf.
12483 		 * Note that since the restoration of xb_blkno below
12484 		 * was removed, the sd_xbuf is not needed.
12485 		 */
12486 		bp = obp;
12487 		/*
12488 		 * xp = SD_GET_XBUF(bp);
12489 		 * ASSERT(xp != NULL);
12490 		 */
12491 	}
12492 
12493 	/*
12494 	 * Convert sd->xb_blkno back to a minor-device relative value.
12495 	 * Note: this has been commented out, as it is not needed in the
12496 	 * current implementation of the driver (ie, since this function
12497 	 * is at the top of the layering chains, so the info will be
12498 	 * discarded) and it is in the "hot" IO path.
12499 	 *
12500 	 * partition = getminor(bp->b_edev) & SDPART_MASK;
12501 	 * xp->xb_blkno -= un->un_offset[partition];
12502 	 */
12503 
12504 	SD_NEXT_IODONE(index, un, bp);
12505 
12506 	SD_TRACE(SD_LOG_IO_PARTITION, un,
12507 	    "sd_mapblockaddr_iodone: exit: buf:0x%p\n", bp);
12508 }
12509 
12510 
12511 /*
12512  *    Function: sd_mapblocksize_iostart
12513  *
12514  * Description: Convert between system block size (un->un_sys_blocksize)
12515  *		and target block size (un->un_tgt_blocksize).
12516  *
12517  *     Context: Can sleep to allocate resources.
12518  *
12519  * Assumptions: A higher layer has already performed any partition validation,
12520  *		and converted the xp->xb_blkno to an absolute value relative
12521  *		to the start of the device.
12522  *
12523  *		It is also assumed that the higher layer has implemented
12524  *		an "overrun" mechanism for the case where the request would
12525  *		read/write beyond the end of a partition.  In this case we
12526  *		assume (and ASSERT) that bp->b_resid == 0.
12527  *
12528  *		Note: The implementation for this routine assumes the target
12529  *		block size remains constant between allocation and transport.
12530  */
12531 
12532 static void
12533 sd_mapblocksize_iostart(int index, struct sd_lun *un, struct buf *bp)
12534 {
12535 	struct sd_mapblocksize_info	*bsp;
12536 	struct sd_xbuf			*xp;
12537 	offset_t first_byte;
12538 	daddr_t	start_block, end_block;
12539 	daddr_t	request_bytes;
12540 	ushort_t is_aligned = FALSE;
12541 
12542 	ASSERT(un != NULL);
12543 	ASSERT(bp != NULL);
12544 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12545 	ASSERT(bp->b_resid == 0);
12546 
12547 	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
12548 	    "sd_mapblocksize_iostart: entry: buf:0x%p\n", bp);
12549 
12550 	/*
12551 	 * For a non-writable CD, a write request is an error
12552 	 */
12553 	if (ISCD(un) && ((bp->b_flags & B_READ) == 0) &&
12554 	    (un->un_f_mmc_writable_media == FALSE)) {
12555 		bioerror(bp, EIO);
12556 		bp->b_resid = bp->b_bcount;
12557 		SD_BEGIN_IODONE(index, un, bp);
12558 		return;
12559 	}
12560 
12561 	/*
12562 	 * We do not need a shadow buf if the device is using
12563 	 * un->un_sys_blocksize as its block size or if bcount == 0.
12564 	 * In this case there is no layer-private data block allocated.
12565 	 */
12566 	if ((un->un_tgt_blocksize == un->un_sys_blocksize) ||
12567 	    (bp->b_bcount == 0)) {
12568 		goto done;
12569 	}
12570 
12571 #if defined(__i386) || defined(__amd64)
12572 	/* We do not support non-block-aligned transfers for ROD devices */
12573 	ASSERT(!ISROD(un));
12574 #endif
12575 
12576 	xp = SD_GET_XBUF(bp);
12577 	ASSERT(xp != NULL);
12578 
12579 	SD_INFO(SD_LOG_IO_RMMEDIA, un, "sd_mapblocksize_iostart: "
12580 	    "tgt_blocksize:0x%x sys_blocksize: 0x%x\n",
12581 	    un->un_tgt_blocksize, un->un_sys_blocksize);
12582 	SD_INFO(SD_LOG_IO_RMMEDIA, un, "sd_mapblocksize_iostart: "
12583 	    "request start block:0x%x\n", xp->xb_blkno);
12584 	SD_INFO(SD_LOG_IO_RMMEDIA, un, "sd_mapblocksize_iostart: "
12585 	    "request len:0x%x\n", bp->b_bcount);
12586 
12587 	/*
12588 	 * Allocate the layer-private data area for the mapblocksize layer.
12589 	 * Layers are allowed to use the xp_private member of the sd_xbuf
12590 	 * struct to store the pointer to their layer-private data block, but
12591 	 * each layer also has the responsibility of restoring the prior
12592 	 * contents of xb_private before returning the buf/xbuf to the
12593 	 * higher layer that sent it.
12594 	 *
12595 	 * Here we save the prior contents of xp->xb_private into the
12596 	 * bsp->mbs_oprivate field of our layer-private data area. This value
12597 	 * is restored by sd_mapblocksize_iodone() just prior to freeing up
12598 	 * the layer-private area and returning the buf/xbuf to the layer
12599 	 * that sent it.
12600 	 *
12601 	 * Note that here we use kmem_zalloc for the allocation as there are
12602 	 * parts of the mapblocksize code that expect certain fields to be
12603 	 * zero unless explicitly set to a required value.
12604 	 */
12605 	bsp = kmem_zalloc(sizeof (struct sd_mapblocksize_info), KM_SLEEP);
12606 	bsp->mbs_oprivate = xp->xb_private;
12607 	xp->xb_private = bsp;
12608 
12609 	/*
12610 	 * This treats the data on the disk (target) as an array of bytes.
12611 	 * first_byte is the byte offset, from the beginning of the device,
12612 	 * to the location of the request. This is converted from a
12613 	 * un->un_sys_blocksize block address to a byte offset, and then back
12614 	 * to a block address based upon a un->un_tgt_blocksize block size.
12615 	 *
12616 	 * xp->xb_blkno should be absolute upon entry into this function,
12617 	 * but, but it is based upon partitions that use the "system"
12618 	 * block size. It must be adjusted to reflect the block size of
12619 	 * the target.
12620 	 *
12621 	 * Note that end_block is actually the block that follows the last
12622 	 * block of the request, but that's what is needed for the computation.
12623 	 */
12624 	first_byte  = SD_SYSBLOCKS2BYTES(un, (offset_t)xp->xb_blkno);
12625 	start_block = xp->xb_blkno = first_byte / un->un_tgt_blocksize;
12626 	end_block   = (first_byte + bp->b_bcount + un->un_tgt_blocksize - 1) /
12627 	    un->un_tgt_blocksize;
12628 
12629 	/* request_bytes is rounded up to a multiple of the target block size */
12630 	request_bytes = (end_block - start_block) * un->un_tgt_blocksize;
12631 
12632 	/*
12633 	 * See if the starting address of the request and the request
12634 	 * length are aligned on a un->un_tgt_blocksize boundary. If aligned
12635 	 * then we do not need to allocate a shadow buf to handle the request.
12636 	 */
12637 	if (((first_byte   % un->un_tgt_blocksize) == 0) &&
12638 	    ((bp->b_bcount % un->un_tgt_blocksize) == 0)) {
12639 		is_aligned = TRUE;
12640 	}
12641 
12642 	if ((bp->b_flags & B_READ) == 0) {
12643 		/*
12644 		 * Lock the range for a write operation. An aligned request is
12645 		 * considered a simple write; otherwise the request must be a
12646 		 * read-modify-write.
12647 		 */
12648 		bsp->mbs_wmp = sd_range_lock(un, start_block, end_block - 1,
12649 		    (is_aligned == TRUE) ? SD_WTYPE_SIMPLE : SD_WTYPE_RMW);
12650 	}
12651 
12652 	/*
12653 	 * Alloc a shadow buf if the request is not aligned. Also, this is
12654 	 * where the READ command is generated for a read-modify-write. (The
12655 	 * write phase is deferred until after the read completes.)
12656 	 */
12657 	if (is_aligned == FALSE) {
12658 
12659 		struct sd_mapblocksize_info	*shadow_bsp;
12660 		struct sd_xbuf	*shadow_xp;
12661 		struct buf	*shadow_bp;
12662 
12663 		/*
12664 		 * Allocate the shadow buf and it associated xbuf. Note that
12665 		 * after this call the xb_blkno value in both the original
12666 		 * buf's sd_xbuf _and_ the shadow buf's sd_xbuf will be the
12667 		 * same: absolute relative to the start of the device, and
12668 		 * adjusted for the target block size. The b_blkno in the
12669 		 * shadow buf will also be set to this value. We should never
12670 		 * change b_blkno in the original bp however.
12671 		 *
12672 		 * Note also that the shadow buf will always need to be a
12673 		 * READ command, regardless of whether the incoming command
12674 		 * is a READ or a WRITE.
12675 		 */
12676 		shadow_bp = sd_shadow_buf_alloc(bp, request_bytes, B_READ,
12677 		    xp->xb_blkno,
12678 		    (int (*)(struct buf *)) sd_mapblocksize_iodone);
12679 
12680 		shadow_xp = SD_GET_XBUF(shadow_bp);
12681 
12682 		/*
12683 		 * Allocate the layer-private data for the shadow buf.
12684 		 * (No need to preserve xb_private in the shadow xbuf.)
12685 		 */
12686 		shadow_xp->xb_private = shadow_bsp =
12687 		    kmem_zalloc(sizeof (struct sd_mapblocksize_info), KM_SLEEP);
12688 
12689 		/*
12690 		 * bsp->mbs_copy_offset is used later by sd_mapblocksize_iodone
12691 		 * to figure out where the start of the user data is (based upon
12692 		 * the system block size) in the data returned by the READ
12693 		 * command (which will be based upon the target blocksize). Note
12694 		 * that this is only really used if the request is unaligned.
12695 		 */
12696 		bsp->mbs_copy_offset = (ssize_t)(first_byte -
12697 		    ((offset_t)xp->xb_blkno * un->un_tgt_blocksize));
12698 		ASSERT((bsp->mbs_copy_offset >= 0) &&
12699 		    (bsp->mbs_copy_offset < un->un_tgt_blocksize));
12700 
12701 		shadow_bsp->mbs_copy_offset = bsp->mbs_copy_offset;
12702 
12703 		shadow_bsp->mbs_layer_index = bsp->mbs_layer_index = index;
12704 
12705 		/* Transfer the wmap (if any) to the shadow buf */
12706 		shadow_bsp->mbs_wmp = bsp->mbs_wmp;
12707 		bsp->mbs_wmp = NULL;
12708 
12709 		/*
12710 		 * The shadow buf goes on from here in place of the
12711 		 * original buf.
12712 		 */
12713 		shadow_bsp->mbs_orig_bp = bp;
12714 		bp = shadow_bp;
12715 	}
12716 
12717 	SD_INFO(SD_LOG_IO_RMMEDIA, un,
12718 	    "sd_mapblocksize_iostart: tgt start block:0x%x\n", xp->xb_blkno);
12719 	SD_INFO(SD_LOG_IO_RMMEDIA, un,
12720 	    "sd_mapblocksize_iostart: tgt request len:0x%x\n",
12721 	    request_bytes);
12722 	SD_INFO(SD_LOG_IO_RMMEDIA, un,
12723 	    "sd_mapblocksize_iostart: shadow buf:0x%x\n", bp);
12724 
12725 done:
12726 	SD_NEXT_IOSTART(index, un, bp);
12727 
12728 	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
12729 	    "sd_mapblocksize_iostart: exit: buf:0x%p\n", bp);
12730 }
12731 
12732 
12733 /*
12734  *    Function: sd_mapblocksize_iodone
12735  *
12736  * Description: Completion side processing for block-size mapping.
12737  *
12738  *     Context: May be called under interrupt context
12739  */
12740 
12741 static void
12742 sd_mapblocksize_iodone(int index, struct sd_lun *un, struct buf *bp)
12743 {
12744 	struct sd_mapblocksize_info	*bsp;
12745 	struct sd_xbuf	*xp;
12746 	struct sd_xbuf	*orig_xp;	/* sd_xbuf for the original buf */
12747 	struct buf	*orig_bp;	/* ptr to the original buf */
12748 	offset_t	shadow_end;
12749 	offset_t	request_end;
12750 	offset_t	shadow_start;
12751 	ssize_t		copy_offset;
12752 	size_t		copy_length;
12753 	size_t		shortfall;
12754 	uint_t		is_write;	/* TRUE if this bp is a WRITE */
12755 	uint_t		has_wmap;	/* TRUE is this bp has a wmap */
12756 
12757 	ASSERT(un != NULL);
12758 	ASSERT(bp != NULL);
12759 
12760 	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
12761 	    "sd_mapblocksize_iodone: entry: buf:0x%p\n", bp);
12762 
12763 	/*
12764 	 * There is no shadow buf or layer-private data if the target is
12765 	 * using un->un_sys_blocksize as its block size or if bcount == 0.
12766 	 */
12767 	if ((un->un_tgt_blocksize == un->un_sys_blocksize) ||
12768 	    (bp->b_bcount == 0)) {
12769 		goto exit;
12770 	}
12771 
12772 	xp = SD_GET_XBUF(bp);
12773 	ASSERT(xp != NULL);
12774 
12775 	/* Retrieve the pointer to the layer-private data area from the xbuf. */
12776 	bsp = xp->xb_private;
12777 
12778 	is_write = ((bp->b_flags & B_READ) == 0) ? TRUE : FALSE;
12779 	has_wmap = (bsp->mbs_wmp != NULL) ? TRUE : FALSE;
12780 
12781 	if (is_write) {
12782 		/*
12783 		 * For a WRITE request we must free up the block range that
12784 		 * we have locked up.  This holds regardless of whether this is
12785 		 * an aligned write request or a read-modify-write request.
12786 		 */
12787 		sd_range_unlock(un, bsp->mbs_wmp);
12788 		bsp->mbs_wmp = NULL;
12789 	}
12790 
12791 	if ((bp->b_iodone != (int(*)(struct buf *))sd_mapblocksize_iodone)) {
12792 		/*
12793 		 * An aligned read or write command will have no shadow buf;
12794 		 * there is not much else to do with it.
12795 		 */
12796 		goto done;
12797 	}
12798 
12799 	orig_bp = bsp->mbs_orig_bp;
12800 	ASSERT(orig_bp != NULL);
12801 	orig_xp = SD_GET_XBUF(orig_bp);
12802 	ASSERT(orig_xp != NULL);
12803 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12804 
12805 	if (!is_write && has_wmap) {
12806 		/*
12807 		 * A READ with a wmap means this is the READ phase of a
12808 		 * read-modify-write. If an error occurred on the READ then
12809 		 * we do not proceed with the WRITE phase or copy any data.
12810 		 * Just release the write maps and return with an error.
12811 		 */
12812 		if ((bp->b_resid != 0) || (bp->b_error != 0)) {
12813 			orig_bp->b_resid = orig_bp->b_bcount;
12814 			bioerror(orig_bp, bp->b_error);
12815 			sd_range_unlock(un, bsp->mbs_wmp);
12816 			goto freebuf_done;
12817 		}
12818 	}
12819 
12820 	/*
12821 	 * Here is where we set up to copy the data from the shadow buf
12822 	 * into the space associated with the original buf.
12823 	 *
12824 	 * To deal with the conversion between block sizes, these
12825 	 * computations treat the data as an array of bytes, with the
12826 	 * first byte (byte 0) corresponding to the first byte in the
12827 	 * first block on the disk.
12828 	 */
12829 
12830 	/*
12831 	 * shadow_start and shadow_len indicate the location and size of
12832 	 * the data returned with the shadow IO request.
12833 	 */
12834 	shadow_start  = SD_TGTBLOCKS2BYTES(un, (offset_t)xp->xb_blkno);
12835 	shadow_end    = shadow_start + bp->b_bcount - bp->b_resid;
12836 
12837 	/*
12838 	 * copy_offset gives the offset (in bytes) from the start of the first
12839 	 * block of the READ request to the beginning of the data.  We retrieve
12840 	 * this value from xb_pktp in the ORIGINAL xbuf, as it has been saved
12841 	 * there by sd_mapblockize_iostart(). copy_length gives the amount of
12842 	 * data to be copied (in bytes).
12843 	 */
12844 	copy_offset  = bsp->mbs_copy_offset;
12845 	ASSERT((copy_offset >= 0) && (copy_offset < un->un_tgt_blocksize));
12846 	copy_length  = orig_bp->b_bcount;
12847 	request_end  = shadow_start + copy_offset + orig_bp->b_bcount;
12848 
12849 	/*
12850 	 * Set up the resid and error fields of orig_bp as appropriate.
12851 	 */
12852 	if (shadow_end >= request_end) {
12853 		/* We got all the requested data; set resid to zero */
12854 		orig_bp->b_resid = 0;
12855 	} else {
12856 		/*
12857 		 * We failed to get enough data to fully satisfy the original
12858 		 * request. Just copy back whatever data we got and set
12859 		 * up the residual and error code as required.
12860 		 *
12861 		 * 'shortfall' is the amount by which the data received with the
12862 		 * shadow buf has "fallen short" of the requested amount.
12863 		 */
12864 		shortfall = (size_t)(request_end - shadow_end);
12865 
12866 		if (shortfall > orig_bp->b_bcount) {
12867 			/*
12868 			 * We did not get enough data to even partially
12869 			 * fulfill the original request.  The residual is
12870 			 * equal to the amount requested.
12871 			 */
12872 			orig_bp->b_resid = orig_bp->b_bcount;
12873 		} else {
12874 			/*
12875 			 * We did not get all the data that we requested
12876 			 * from the device, but we will try to return what
12877 			 * portion we did get.
12878 			 */
12879 			orig_bp->b_resid = shortfall;
12880 		}
12881 		ASSERT(copy_length >= orig_bp->b_resid);
12882 		copy_length  -= orig_bp->b_resid;
12883 	}
12884 
12885 	/* Propagate the error code from the shadow buf to the original buf */
12886 	bioerror(orig_bp, bp->b_error);
12887 
12888 	if (is_write) {
12889 		goto freebuf_done;	/* No data copying for a WRITE */
12890 	}
12891 
12892 	if (has_wmap) {
12893 		/*
12894 		 * This is a READ command from the READ phase of a
12895 		 * read-modify-write request. We have to copy the data given
12896 		 * by the user OVER the data returned by the READ command,
12897 		 * then convert the command from a READ to a WRITE and send
12898 		 * it back to the target.
12899 		 */
12900 		bcopy(orig_bp->b_un.b_addr, bp->b_un.b_addr + copy_offset,
12901 		    copy_length);
12902 
12903 		bp->b_flags &= ~((int)B_READ);	/* Convert to a WRITE */
12904 
12905 		/*
12906 		 * Dispatch the WRITE command to the taskq thread, which
12907 		 * will in turn send the command to the target. When the
12908 		 * WRITE command completes, we (sd_mapblocksize_iodone())
12909 		 * will get called again as part of the iodone chain
12910 		 * processing for it. Note that we will still be dealing
12911 		 * with the shadow buf at that point.
12912 		 */
12913 		if (taskq_dispatch(sd_wmr_tq, sd_read_modify_write_task, bp,
12914 		    KM_NOSLEEP) != 0) {
12915 			/*
12916 			 * Dispatch was successful so we are done. Return
12917 			 * without going any higher up the iodone chain. Do
12918 			 * not free up any layer-private data until after the
12919 			 * WRITE completes.
12920 			 */
12921 			return;
12922 		}
12923 
12924 		/*
12925 		 * Dispatch of the WRITE command failed; set up the error
12926 		 * condition and send this IO back up the iodone chain.
12927 		 */
12928 		bioerror(orig_bp, EIO);
12929 		orig_bp->b_resid = orig_bp->b_bcount;
12930 
12931 	} else {
12932 		/*
12933 		 * This is a regular READ request (ie, not a RMW). Copy the
12934 		 * data from the shadow buf into the original buf. The
12935 		 * copy_offset compensates for any "misalignment" between the
12936 		 * shadow buf (with its un->un_tgt_blocksize blocks) and the
12937 		 * original buf (with its un->un_sys_blocksize blocks).
12938 		 */
12939 		bcopy(bp->b_un.b_addr + copy_offset, orig_bp->b_un.b_addr,
12940 		    copy_length);
12941 	}
12942 
12943 freebuf_done:
12944 
12945 	/*
12946 	 * At this point we still have both the shadow buf AND the original
12947 	 * buf to deal with, as well as the layer-private data area in each.
12948 	 * Local variables are as follows:
12949 	 *
12950 	 * bp -- points to shadow buf
12951 	 * xp -- points to xbuf of shadow buf
12952 	 * bsp -- points to layer-private data area of shadow buf
12953 	 * orig_bp -- points to original buf
12954 	 *
12955 	 * First free the shadow buf and its associated xbuf, then free the
12956 	 * layer-private data area from the shadow buf. There is no need to
12957 	 * restore xb_private in the shadow xbuf.
12958 	 */
12959 	sd_shadow_buf_free(bp);
12960 	kmem_free(bsp, sizeof (struct sd_mapblocksize_info));
12961 
12962 	/*
12963 	 * Now update the local variables to point to the original buf, xbuf,
12964 	 * and layer-private area.
12965 	 */
12966 	bp = orig_bp;
12967 	xp = SD_GET_XBUF(bp);
12968 	ASSERT(xp != NULL);
12969 	ASSERT(xp == orig_xp);
12970 	bsp = xp->xb_private;
12971 	ASSERT(bsp != NULL);
12972 
12973 done:
12974 	/*
12975 	 * Restore xb_private to whatever it was set to by the next higher
12976 	 * layer in the chain, then free the layer-private data area.
12977 	 */
12978 	xp->xb_private = bsp->mbs_oprivate;
12979 	kmem_free(bsp, sizeof (struct sd_mapblocksize_info));
12980 
12981 exit:
12982 	SD_TRACE(SD_LOG_IO_RMMEDIA, SD_GET_UN(bp),
12983 	    "sd_mapblocksize_iodone: calling SD_NEXT_IODONE: buf:0x%p\n", bp);
12984 
12985 	SD_NEXT_IODONE(index, un, bp);
12986 }
12987 
12988 
12989 /*
12990  *    Function: sd_checksum_iostart
12991  *
12992  * Description: A stub function for a layer that's currently not used.
12993  *		For now just a placeholder.
12994  *
12995  *     Context: Kernel thread context
12996  */
12997 
12998 static void
12999 sd_checksum_iostart(int index, struct sd_lun *un, struct buf *bp)
13000 {
13001 	ASSERT(un != NULL);
13002 	ASSERT(bp != NULL);
13003 	ASSERT(!mutex_owned(SD_MUTEX(un)));
13004 	SD_NEXT_IOSTART(index, un, bp);
13005 }
13006 
13007 
13008 /*
13009  *    Function: sd_checksum_iodone
13010  *
13011  * Description: A stub function for a layer that's currently not used.
13012  *		For now just a placeholder.
13013  *
13014  *     Context: May be called under interrupt context
13015  */
13016 
13017 static void
13018 sd_checksum_iodone(int index, struct sd_lun *un, struct buf *bp)
13019 {
13020 	ASSERT(un != NULL);
13021 	ASSERT(bp != NULL);
13022 	ASSERT(!mutex_owned(SD_MUTEX(un)));
13023 	SD_NEXT_IODONE(index, un, bp);
13024 }
13025 
13026 
13027 /*
13028  *    Function: sd_checksum_uscsi_iostart
13029  *
13030  * Description: A stub function for a layer that's currently not used.
13031  *		For now just a placeholder.
13032  *
13033  *     Context: Kernel thread context
13034  */
13035 
13036 static void
13037 sd_checksum_uscsi_iostart(int index, struct sd_lun *un, struct buf *bp)
13038 {
13039 	ASSERT(un != NULL);
13040 	ASSERT(bp != NULL);
13041 	ASSERT(!mutex_owned(SD_MUTEX(un)));
13042 	SD_NEXT_IOSTART(index, un, bp);
13043 }
13044 
13045 
13046 /*
13047  *    Function: sd_checksum_uscsi_iodone
13048  *
13049  * Description: A stub function for a layer that's currently not used.
13050  *		For now just a placeholder.
13051  *
13052  *     Context: May be called under interrupt context
13053  */
13054 
13055 static void
13056 sd_checksum_uscsi_iodone(int index, struct sd_lun *un, struct buf *bp)
13057 {
13058 	ASSERT(un != NULL);
13059 	ASSERT(bp != NULL);
13060 	ASSERT(!mutex_owned(SD_MUTEX(un)));
13061 	SD_NEXT_IODONE(index, un, bp);
13062 }
13063 
13064 
13065 /*
13066  *    Function: sd_pm_iostart
13067  *
13068  * Description: iostart-side routine for Power mangement.
13069  *
13070  *     Context: Kernel thread context
13071  */
13072 
13073 static void
13074 sd_pm_iostart(int index, struct sd_lun *un, struct buf *bp)
13075 {
13076 	ASSERT(un != NULL);
13077 	ASSERT(bp != NULL);
13078 	ASSERT(!mutex_owned(SD_MUTEX(un)));
13079 	ASSERT(!mutex_owned(&un->un_pm_mutex));
13080 
13081 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iostart: entry\n");
13082 
13083 	if (sd_pm_entry(un) != DDI_SUCCESS) {
13084 		/*
13085 		 * Set up to return the failed buf back up the 'iodone'
13086 		 * side of the calling chain.
13087 		 */
13088 		bioerror(bp, EIO);
13089 		bp->b_resid = bp->b_bcount;
13090 
13091 		SD_BEGIN_IODONE(index, un, bp);
13092 
13093 		SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iostart: exit\n");
13094 		return;
13095 	}
13096 
13097 	SD_NEXT_IOSTART(index, un, bp);
13098 
13099 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iostart: exit\n");
13100 }
13101 
13102 
13103 /*
13104  *    Function: sd_pm_iodone
13105  *
13106  * Description: iodone-side routine for power mangement.
13107  *
13108  *     Context: may be called from interrupt context
13109  */
13110 
13111 static void
13112 sd_pm_iodone(int index, struct sd_lun *un, struct buf *bp)
13113 {
13114 	ASSERT(un != NULL);
13115 	ASSERT(bp != NULL);
13116 	ASSERT(!mutex_owned(&un->un_pm_mutex));
13117 
13118 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iodone: entry\n");
13119 
13120 	/*
13121 	 * After attach the following flag is only read, so don't
13122 	 * take the penalty of acquiring a mutex for it.
13123 	 */
13124 	if (un->un_f_pm_is_enabled == TRUE) {
13125 		sd_pm_exit(un);
13126 	}
13127 
13128 	SD_NEXT_IODONE(index, un, bp);
13129 
13130 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iodone: exit\n");
13131 }
13132 
13133 
13134 /*
13135  *    Function: sd_core_iostart
13136  *
13137  * Description: Primary driver function for enqueuing buf(9S) structs from
13138  *		the system and initiating IO to the target device
13139  *
13140  *     Context: Kernel thread context. Can sleep.
13141  *
13142  * Assumptions:  - The given xp->xb_blkno is absolute
13143  *		   (ie, relative to the start of the device).
13144  *		 - The IO is to be done using the native blocksize of
13145  *		   the device, as specified in un->un_tgt_blocksize.
13146  */
13147 /* ARGSUSED */
13148 static void
13149 sd_core_iostart(int index, struct sd_lun *un, struct buf *bp)
13150 {
13151 	struct sd_xbuf *xp;
13152 
13153 	ASSERT(un != NULL);
13154 	ASSERT(bp != NULL);
13155 	ASSERT(!mutex_owned(SD_MUTEX(un)));
13156 	ASSERT(bp->b_resid == 0);
13157 
13158 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_core_iostart: entry: bp:0x%p\n", bp);
13159 
13160 	xp = SD_GET_XBUF(bp);
13161 	ASSERT(xp != NULL);
13162 
13163 	mutex_enter(SD_MUTEX(un));
13164 
13165 	/*
13166 	 * If we are currently in the failfast state, fail any new IO
13167 	 * that has B_FAILFAST set, then return.
13168 	 */
13169 	if ((bp->b_flags & B_FAILFAST) &&
13170 	    (un->un_failfast_state == SD_FAILFAST_ACTIVE)) {
13171 		mutex_exit(SD_MUTEX(un));
13172 		bioerror(bp, EIO);
13173 		bp->b_resid = bp->b_bcount;
13174 		SD_BEGIN_IODONE(index, un, bp);
13175 		return;
13176 	}
13177 
13178 	if (SD_IS_DIRECT_PRIORITY(xp)) {
13179 		/*
13180 		 * Priority command -- transport it immediately.
13181 		 *
13182 		 * Note: We may want to assert that USCSI_DIAGNOSE is set,
13183 		 * because all direct priority commands should be associated
13184 		 * with error recovery actions which we don't want to retry.
13185 		 */
13186 		sd_start_cmds(un, bp);
13187 	} else {
13188 		/*
13189 		 * Normal command -- add it to the wait queue, then start
13190 		 * transporting commands from the wait queue.
13191 		 */
13192 		sd_add_buf_to_waitq(un, bp);
13193 		SD_UPDATE_KSTATS(un, kstat_waitq_enter, bp);
13194 		sd_start_cmds(un, NULL);
13195 	}
13196 
13197 	mutex_exit(SD_MUTEX(un));
13198 
13199 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_core_iostart: exit: bp:0x%p\n", bp);
13200 }
13201 
13202 
13203 /*
13204  *    Function: sd_init_cdb_limits
13205  *
13206  * Description: This is to handle scsi_pkt initialization differences
13207  *		between the driver platforms.
13208  *
13209  *		Legacy behaviors:
13210  *
13211  *		If the block number or the sector count exceeds the
13212  *		capabilities of a Group 0 command, shift over to a
13213  *		Group 1 command. We don't blindly use Group 1
13214  *		commands because a) some drives (CDC Wren IVs) get a
13215  *		bit confused, and b) there is probably a fair amount
13216  *		of speed difference for a target to receive and decode
13217  *		a 10 byte command instead of a 6 byte command.
13218  *
13219  *		The xfer time difference of 6 vs 10 byte CDBs is
13220  *		still significant so this code is still worthwhile.
13221  *		10 byte CDBs are very inefficient with the fas HBA driver
13222  *		and older disks. Each CDB byte took 1 usec with some
13223  *		popular disks.
13224  *
13225  *     Context: Must be called at attach time
13226  */
13227 
13228 static void
13229 sd_init_cdb_limits(struct sd_lun *un)
13230 {
13231 	int hba_cdb_limit;
13232 
13233 	/*
13234 	 * Use CDB_GROUP1 commands for most devices except for
13235 	 * parallel SCSI fixed drives in which case we get better
13236 	 * performance using CDB_GROUP0 commands (where applicable).
13237 	 */
13238 	un->un_mincdb = SD_CDB_GROUP1;
13239 #if !defined(__fibre)
13240 	if (!un->un_f_is_fibre && !un->un_f_cfg_is_atapi && !ISROD(un) &&
13241 	    !un->un_f_has_removable_media) {
13242 		un->un_mincdb = SD_CDB_GROUP0;
13243 	}
13244 #endif
13245 
13246 	/*
13247 	 * Try to read the max-cdb-length supported by HBA.
13248 	 */
13249 	un->un_max_hba_cdb = scsi_ifgetcap(SD_ADDRESS(un), "max-cdb-length", 1);
13250 	if (0 >= un->un_max_hba_cdb) {
13251 		un->un_max_hba_cdb = CDB_GROUP4;
13252 		hba_cdb_limit = SD_CDB_GROUP4;
13253 	} else if (0 < un->un_max_hba_cdb &&
13254 	    un->un_max_hba_cdb < CDB_GROUP1) {
13255 		hba_cdb_limit = SD_CDB_GROUP0;
13256 	} else if (CDB_GROUP1 <= un->un_max_hba_cdb &&
13257 	    un->un_max_hba_cdb < CDB_GROUP5) {
13258 		hba_cdb_limit = SD_CDB_GROUP1;
13259 	} else if (CDB_GROUP5 <= un->un_max_hba_cdb &&
13260 	    un->un_max_hba_cdb < CDB_GROUP4) {
13261 		hba_cdb_limit = SD_CDB_GROUP5;
13262 	} else {
13263 		hba_cdb_limit = SD_CDB_GROUP4;
13264 	}
13265 
13266 	/*
13267 	 * Use CDB_GROUP5 commands for removable devices.  Use CDB_GROUP4
13268 	 * commands for fixed disks unless we are building for a 32 bit
13269 	 * kernel.
13270 	 */
13271 #ifdef _LP64
13272 	un->un_maxcdb = (un->un_f_has_removable_media) ? SD_CDB_GROUP5 :
13273 	    min(hba_cdb_limit, SD_CDB_GROUP4);
13274 #else
13275 	un->un_maxcdb = (un->un_f_has_removable_media) ? SD_CDB_GROUP5 :
13276 	    min(hba_cdb_limit, SD_CDB_GROUP1);
13277 #endif
13278 
13279 	/*
13280 	 * x86 systems require the PKT_DMA_PARTIAL flag
13281 	 */
13282 #if defined(__x86)
13283 	un->un_pkt_flags = PKT_DMA_PARTIAL;
13284 #else
13285 	un->un_pkt_flags = 0;
13286 #endif
13287 
13288 	un->un_status_len = (int)((un->un_f_arq_enabled == TRUE)
13289 	    ? sizeof (struct scsi_arq_status) : 1);
13290 	un->un_cmd_timeout = (ushort_t)sd_io_time;
13291 	un->un_uscsi_timeout = ((ISCD(un)) ? 2 : 1) * un->un_cmd_timeout;
13292 }
13293 
13294 
13295 /*
13296  *    Function: sd_initpkt_for_buf
13297  *
13298  * Description: Allocate and initialize for transport a scsi_pkt struct,
13299  *		based upon the info specified in the given buf struct.
13300  *
13301  *		Assumes the xb_blkno in the request is absolute (ie,
13302  *		relative to the start of the device (NOT partition!).
13303  *		Also assumes that the request is using the native block
13304  *		size of the device (as returned by the READ CAPACITY
13305  *		command).
13306  *
13307  * Return Code: SD_PKT_ALLOC_SUCCESS
13308  *		SD_PKT_ALLOC_FAILURE
13309  *		SD_PKT_ALLOC_FAILURE_NO_DMA
13310  *		SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL
13311  *
13312  *     Context: Kernel thread and may be called from software interrupt context
13313  *		as part of a sdrunout callback. This function may not block or
13314  *		call routines that block
13315  */
13316 
13317 static int
13318 sd_initpkt_for_buf(struct buf *bp, struct scsi_pkt **pktpp)
13319 {
13320 	struct sd_xbuf	*xp;
13321 	struct scsi_pkt *pktp = NULL;
13322 	struct sd_lun	*un;
13323 	size_t		blockcount;
13324 	daddr_t		startblock;
13325 	int		rval;
13326 	int		cmd_flags;
13327 
13328 	ASSERT(bp != NULL);
13329 	ASSERT(pktpp != NULL);
13330 	xp = SD_GET_XBUF(bp);
13331 	ASSERT(xp != NULL);
13332 	un = SD_GET_UN(bp);
13333 	ASSERT(un != NULL);
13334 	ASSERT(mutex_owned(SD_MUTEX(un)));
13335 	ASSERT(bp->b_resid == 0);
13336 
13337 	SD_TRACE(SD_LOG_IO_CORE, un,
13338 	    "sd_initpkt_for_buf: entry: buf:0x%p\n", bp);
13339 
13340 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
13341 	if (xp->xb_pkt_flags & SD_XB_DMA_FREED) {
13342 		/*
13343 		 * Already have a scsi_pkt -- just need DMA resources.
13344 		 * We must recompute the CDB in case the mapping returns
13345 		 * a nonzero pkt_resid.
13346 		 * Note: if this is a portion of a PKT_DMA_PARTIAL transfer
13347 		 * that is being retried, the unmap/remap of the DMA resouces
13348 		 * will result in the entire transfer starting over again
13349 		 * from the very first block.
13350 		 */
13351 		ASSERT(xp->xb_pktp != NULL);
13352 		pktp = xp->xb_pktp;
13353 	} else {
13354 		pktp = NULL;
13355 	}
13356 #endif /* __i386 || __amd64 */
13357 
13358 	startblock = xp->xb_blkno;	/* Absolute block num. */
13359 	blockcount = SD_BYTES2TGTBLOCKS(un, bp->b_bcount);
13360 
13361 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
13362 
13363 	cmd_flags = un->un_pkt_flags | (xp->xb_pkt_flags & SD_XB_INITPKT_MASK);
13364 
13365 #else
13366 
13367 	cmd_flags = un->un_pkt_flags | xp->xb_pkt_flags;
13368 
13369 #endif
13370 
13371 	/*
13372 	 * sd_setup_rw_pkt will determine the appropriate CDB group to use,
13373 	 * call scsi_init_pkt, and build the CDB.
13374 	 */
13375 	rval = sd_setup_rw_pkt(un, &pktp, bp,
13376 	    cmd_flags, sdrunout, (caddr_t)un,
13377 	    startblock, blockcount);
13378 
13379 	if (rval == 0) {
13380 		/*
13381 		 * Success.
13382 		 *
13383 		 * If partial DMA is being used and required for this transfer.
13384 		 * set it up here.
13385 		 */
13386 		if ((un->un_pkt_flags & PKT_DMA_PARTIAL) != 0 &&
13387 		    (pktp->pkt_resid != 0)) {
13388 
13389 			/*
13390 			 * Save the CDB length and pkt_resid for the
13391 			 * next xfer
13392 			 */
13393 			xp->xb_dma_resid = pktp->pkt_resid;
13394 
13395 			/* rezero resid */
13396 			pktp->pkt_resid = 0;
13397 
13398 		} else {
13399 			xp->xb_dma_resid = 0;
13400 		}
13401 
13402 		pktp->pkt_flags = un->un_tagflags;
13403 		pktp->pkt_time  = un->un_cmd_timeout;
13404 		pktp->pkt_comp  = sdintr;
13405 
13406 		pktp->pkt_private = bp;
13407 		*pktpp = pktp;
13408 
13409 		SD_TRACE(SD_LOG_IO_CORE, un,
13410 		    "sd_initpkt_for_buf: exit: buf:0x%p\n", bp);
13411 
13412 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
13413 		xp->xb_pkt_flags &= ~SD_XB_DMA_FREED;
13414 #endif
13415 
13416 		return (SD_PKT_ALLOC_SUCCESS);
13417 
13418 	}
13419 
13420 	/*
13421 	 * SD_PKT_ALLOC_FAILURE is the only expected failure code
13422 	 * from sd_setup_rw_pkt.
13423 	 */
13424 	ASSERT(rval == SD_PKT_ALLOC_FAILURE);
13425 
13426 	if (rval == SD_PKT_ALLOC_FAILURE) {
13427 		*pktpp = NULL;
13428 		/*
13429 		 * Set the driver state to RWAIT to indicate the driver
13430 		 * is waiting on resource allocations. The driver will not
13431 		 * suspend, pm_suspend, or detatch while the state is RWAIT.
13432 		 */
13433 		New_state(un, SD_STATE_RWAIT);
13434 
13435 		SD_ERROR(SD_LOG_IO_CORE, un,
13436 		    "sd_initpkt_for_buf: No pktp. exit bp:0x%p\n", bp);
13437 
13438 		if ((bp->b_flags & B_ERROR) != 0) {
13439 			return (SD_PKT_ALLOC_FAILURE_NO_DMA);
13440 		}
13441 		return (SD_PKT_ALLOC_FAILURE);
13442 	} else {
13443 		/*
13444 		 * PKT_ALLOC_FAILURE_CDB_TOO_SMALL
13445 		 *
13446 		 * This should never happen.  Maybe someone messed with the
13447 		 * kernel's minphys?
13448 		 */
13449 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
13450 		    "Request rejected: too large for CDB: "
13451 		    "lba:0x%08lx  len:0x%08lx\n", startblock, blockcount);
13452 		SD_ERROR(SD_LOG_IO_CORE, un,
13453 		    "sd_initpkt_for_buf: No cp. exit bp:0x%p\n", bp);
13454 		return (SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL);
13455 
13456 	}
13457 }
13458 
13459 
13460 /*
13461  *    Function: sd_destroypkt_for_buf
13462  *
13463  * Description: Free the scsi_pkt(9S) for the given bp (buf IO processing).
13464  *
13465  *     Context: Kernel thread or interrupt context
13466  */
13467 
13468 static void
13469 sd_destroypkt_for_buf(struct buf *bp)
13470 {
13471 	ASSERT(bp != NULL);
13472 	ASSERT(SD_GET_UN(bp) != NULL);
13473 
13474 	SD_TRACE(SD_LOG_IO_CORE, SD_GET_UN(bp),
13475 	    "sd_destroypkt_for_buf: entry: buf:0x%p\n", bp);
13476 
13477 	ASSERT(SD_GET_PKTP(bp) != NULL);
13478 	scsi_destroy_pkt(SD_GET_PKTP(bp));
13479 
13480 	SD_TRACE(SD_LOG_IO_CORE, SD_GET_UN(bp),
13481 	    "sd_destroypkt_for_buf: exit: buf:0x%p\n", bp);
13482 }
13483 
13484 /*
13485  *    Function: sd_setup_rw_pkt
13486  *
13487  * Description: Determines appropriate CDB group for the requested LBA
13488  *		and transfer length, calls scsi_init_pkt, and builds
13489  *		the CDB.  Do not use for partial DMA transfers except
13490  *		for the initial transfer since the CDB size must
13491  *		remain constant.
13492  *
13493  *     Context: Kernel thread and may be called from software interrupt
13494  *		context as part of a sdrunout callback. This function may not
13495  *		block or call routines that block
13496  */
13497 
13498 
13499 int
13500 sd_setup_rw_pkt(struct sd_lun *un,
13501     struct scsi_pkt **pktpp, struct buf *bp, int flags,
13502     int (*callback)(caddr_t), caddr_t callback_arg,
13503     diskaddr_t lba, uint32_t blockcount)
13504 {
13505 	struct scsi_pkt *return_pktp;
13506 	union scsi_cdb *cdbp;
13507 	struct sd_cdbinfo *cp = NULL;
13508 	int i;
13509 
13510 	/*
13511 	 * See which size CDB to use, based upon the request.
13512 	 */
13513 	for (i = un->un_mincdb; i <= un->un_maxcdb; i++) {
13514 
13515 		/*
13516 		 * Check lba and block count against sd_cdbtab limits.
13517 		 * In the partial DMA case, we have to use the same size
13518 		 * CDB for all the transfers.  Check lba + blockcount
13519 		 * against the max LBA so we know that segment of the
13520 		 * transfer can use the CDB we select.
13521 		 */
13522 		if ((lba + blockcount - 1 <= sd_cdbtab[i].sc_maxlba) &&
13523 		    (blockcount <= sd_cdbtab[i].sc_maxlen)) {
13524 
13525 			/*
13526 			 * The command will fit into the CDB type
13527 			 * specified by sd_cdbtab[i].
13528 			 */
13529 			cp = sd_cdbtab + i;
13530 
13531 			/*
13532 			 * Call scsi_init_pkt so we can fill in the
13533 			 * CDB.
13534 			 */
13535 			return_pktp = scsi_init_pkt(SD_ADDRESS(un), *pktpp,
13536 			    bp, cp->sc_grpcode, un->un_status_len, 0,
13537 			    flags, callback, callback_arg);
13538 
13539 			if (return_pktp != NULL) {
13540 
13541 				/*
13542 				 * Return new value of pkt
13543 				 */
13544 				*pktpp = return_pktp;
13545 
13546 				/*
13547 				 * To be safe, zero the CDB insuring there is
13548 				 * no leftover data from a previous command.
13549 				 */
13550 				bzero(return_pktp->pkt_cdbp, cp->sc_grpcode);
13551 
13552 				/*
13553 				 * Handle partial DMA mapping
13554 				 */
13555 				if (return_pktp->pkt_resid != 0) {
13556 
13557 					/*
13558 					 * Not going to xfer as many blocks as
13559 					 * originally expected
13560 					 */
13561 					blockcount -=
13562 					    SD_BYTES2TGTBLOCKS(un,
13563 						return_pktp->pkt_resid);
13564 				}
13565 
13566 				cdbp = (union scsi_cdb *)return_pktp->pkt_cdbp;
13567 
13568 				/*
13569 				 * Set command byte based on the CDB
13570 				 * type we matched.
13571 				 */
13572 				cdbp->scc_cmd = cp->sc_grpmask |
13573 				    ((bp->b_flags & B_READ) ?
13574 					SCMD_READ : SCMD_WRITE);
13575 
13576 				SD_FILL_SCSI1_LUN(un, return_pktp);
13577 
13578 				/*
13579 				 * Fill in LBA and length
13580 				 */
13581 				ASSERT((cp->sc_grpcode == CDB_GROUP1) ||
13582 				    (cp->sc_grpcode == CDB_GROUP4) ||
13583 				    (cp->sc_grpcode == CDB_GROUP0) ||
13584 				    (cp->sc_grpcode == CDB_GROUP5));
13585 
13586 				if (cp->sc_grpcode == CDB_GROUP1) {
13587 					FORMG1ADDR(cdbp, lba);
13588 					FORMG1COUNT(cdbp, blockcount);
13589 					return (0);
13590 				} else if (cp->sc_grpcode == CDB_GROUP4) {
13591 					FORMG4LONGADDR(cdbp, lba);
13592 					FORMG4COUNT(cdbp, blockcount);
13593 					return (0);
13594 				} else if (cp->sc_grpcode == CDB_GROUP0) {
13595 					FORMG0ADDR(cdbp, lba);
13596 					FORMG0COUNT(cdbp, blockcount);
13597 					return (0);
13598 				} else if (cp->sc_grpcode == CDB_GROUP5) {
13599 					FORMG5ADDR(cdbp, lba);
13600 					FORMG5COUNT(cdbp, blockcount);
13601 					return (0);
13602 				}
13603 
13604 				/*
13605 				 * It should be impossible to not match one
13606 				 * of the CDB types above, so we should never
13607 				 * reach this point.  Set the CDB command byte
13608 				 * to test-unit-ready to avoid writing
13609 				 * to somewhere we don't intend.
13610 				 */
13611 				cdbp->scc_cmd = SCMD_TEST_UNIT_READY;
13612 				return (SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL);
13613 			} else {
13614 				/*
13615 				 * Couldn't get scsi_pkt
13616 				 */
13617 				return (SD_PKT_ALLOC_FAILURE);
13618 			}
13619 		}
13620 	}
13621 
13622 	/*
13623 	 * None of the available CDB types were suitable.  This really
13624 	 * should never happen:  on a 64 bit system we support
13625 	 * READ16/WRITE16 which will hold an entire 64 bit disk address
13626 	 * and on a 32 bit system we will refuse to bind to a device
13627 	 * larger than 2TB so addresses will never be larger than 32 bits.
13628 	 */
13629 	return (SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL);
13630 }
13631 
13632 #if defined(__i386) || defined(__amd64)
13633 /*
13634  *    Function: sd_setup_next_rw_pkt
13635  *
13636  * Description: Setup packet for partial DMA transfers, except for the
13637  * 		initial transfer.  sd_setup_rw_pkt should be used for
13638  *		the initial transfer.
13639  *
13640  *     Context: Kernel thread and may be called from interrupt context.
13641  */
13642 
13643 int
13644 sd_setup_next_rw_pkt(struct sd_lun *un,
13645     struct scsi_pkt *pktp, struct buf *bp,
13646     diskaddr_t lba, uint32_t blockcount)
13647 {
13648 	uchar_t com;
13649 	union scsi_cdb *cdbp;
13650 	uchar_t cdb_group_id;
13651 
13652 	ASSERT(pktp != NULL);
13653 	ASSERT(pktp->pkt_cdbp != NULL);
13654 
13655 	cdbp = (union scsi_cdb *)pktp->pkt_cdbp;
13656 	com = cdbp->scc_cmd;
13657 	cdb_group_id = CDB_GROUPID(com);
13658 
13659 	ASSERT((cdb_group_id == CDB_GROUPID_0) ||
13660 	    (cdb_group_id == CDB_GROUPID_1) ||
13661 	    (cdb_group_id == CDB_GROUPID_4) ||
13662 	    (cdb_group_id == CDB_GROUPID_5));
13663 
13664 	/*
13665 	 * Move pkt to the next portion of the xfer.
13666 	 * func is NULL_FUNC so we do not have to release
13667 	 * the disk mutex here.
13668 	 */
13669 	if (scsi_init_pkt(SD_ADDRESS(un), pktp, bp, 0, 0, 0, 0,
13670 	    NULL_FUNC, NULL) == pktp) {
13671 		/* Success.  Handle partial DMA */
13672 		if (pktp->pkt_resid != 0) {
13673 			blockcount -=
13674 			    SD_BYTES2TGTBLOCKS(un, pktp->pkt_resid);
13675 		}
13676 
13677 		cdbp->scc_cmd = com;
13678 		SD_FILL_SCSI1_LUN(un, pktp);
13679 		if (cdb_group_id == CDB_GROUPID_1) {
13680 			FORMG1ADDR(cdbp, lba);
13681 			FORMG1COUNT(cdbp, blockcount);
13682 			return (0);
13683 		} else if (cdb_group_id == CDB_GROUPID_4) {
13684 			FORMG4LONGADDR(cdbp, lba);
13685 			FORMG4COUNT(cdbp, blockcount);
13686 			return (0);
13687 		} else if (cdb_group_id == CDB_GROUPID_0) {
13688 			FORMG0ADDR(cdbp, lba);
13689 			FORMG0COUNT(cdbp, blockcount);
13690 			return (0);
13691 		} else if (cdb_group_id == CDB_GROUPID_5) {
13692 			FORMG5ADDR(cdbp, lba);
13693 			FORMG5COUNT(cdbp, blockcount);
13694 			return (0);
13695 		}
13696 
13697 		/* Unreachable */
13698 		return (SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL);
13699 	}
13700 
13701 	/*
13702 	 * Error setting up next portion of cmd transfer.
13703 	 * Something is definitely very wrong and this
13704 	 * should not happen.
13705 	 */
13706 	return (SD_PKT_ALLOC_FAILURE);
13707 }
13708 #endif /* defined(__i386) || defined(__amd64) */
13709 
13710 /*
13711  *    Function: sd_initpkt_for_uscsi
13712  *
13713  * Description: Allocate and initialize for transport a scsi_pkt struct,
13714  *		based upon the info specified in the given uscsi_cmd struct.
13715  *
13716  * Return Code: SD_PKT_ALLOC_SUCCESS
13717  *		SD_PKT_ALLOC_FAILURE
13718  *		SD_PKT_ALLOC_FAILURE_NO_DMA
13719  *		SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL
13720  *
13721  *     Context: Kernel thread and may be called from software interrupt context
13722  *		as part of a sdrunout callback. This function may not block or
13723  *		call routines that block
13724  */
13725 
13726 static int
13727 sd_initpkt_for_uscsi(struct buf *bp, struct scsi_pkt **pktpp)
13728 {
13729 	struct uscsi_cmd *uscmd;
13730 	struct sd_xbuf	*xp;
13731 	struct scsi_pkt	*pktp;
13732 	struct sd_lun	*un;
13733 	uint32_t	flags = 0;
13734 
13735 	ASSERT(bp != NULL);
13736 	ASSERT(pktpp != NULL);
13737 	xp = SD_GET_XBUF(bp);
13738 	ASSERT(xp != NULL);
13739 	un = SD_GET_UN(bp);
13740 	ASSERT(un != NULL);
13741 	ASSERT(mutex_owned(SD_MUTEX(un)));
13742 
13743 	/* The pointer to the uscsi_cmd struct is expected in xb_pktinfo */
13744 	uscmd = (struct uscsi_cmd *)xp->xb_pktinfo;
13745 	ASSERT(uscmd != NULL);
13746 
13747 	SD_TRACE(SD_LOG_IO_CORE, un,
13748 	    "sd_initpkt_for_uscsi: entry: buf:0x%p\n", bp);
13749 
13750 	/*
13751 	 * Allocate the scsi_pkt for the command.
13752 	 * Note: If PKT_DMA_PARTIAL flag is set, scsi_vhci binds a path
13753 	 *	 during scsi_init_pkt time and will continue to use the
13754 	 *	 same path as long as the same scsi_pkt is used without
13755 	 *	 intervening scsi_dma_free(). Since uscsi command does
13756 	 *	 not call scsi_dmafree() before retry failed command, it
13757 	 *	 is necessary to make sure PKT_DMA_PARTIAL flag is NOT
13758 	 *	 set such that scsi_vhci can use other available path for
13759 	 *	 retry. Besides, ucsci command does not allow DMA breakup,
13760 	 *	 so there is no need to set PKT_DMA_PARTIAL flag.
13761 	 */
13762 	pktp = scsi_init_pkt(SD_ADDRESS(un), NULL,
13763 	    ((bp->b_bcount != 0) ? bp : NULL), uscmd->uscsi_cdblen,
13764 	    sizeof (struct scsi_arq_status), 0,
13765 	    (un->un_pkt_flags & ~PKT_DMA_PARTIAL),
13766 	    sdrunout, (caddr_t)un);
13767 
13768 	if (pktp == NULL) {
13769 		*pktpp = NULL;
13770 		/*
13771 		 * Set the driver state to RWAIT to indicate the driver
13772 		 * is waiting on resource allocations. The driver will not
13773 		 * suspend, pm_suspend, or detatch while the state is RWAIT.
13774 		 */
13775 		New_state(un, SD_STATE_RWAIT);
13776 
13777 		SD_ERROR(SD_LOG_IO_CORE, un,
13778 		    "sd_initpkt_for_uscsi: No pktp. exit bp:0x%p\n", bp);
13779 
13780 		if ((bp->b_flags & B_ERROR) != 0) {
13781 			return (SD_PKT_ALLOC_FAILURE_NO_DMA);
13782 		}
13783 		return (SD_PKT_ALLOC_FAILURE);
13784 	}
13785 
13786 	/*
13787 	 * We do not do DMA breakup for USCSI commands, so return failure
13788 	 * here if all the needed DMA resources were not allocated.
13789 	 */
13790 	if ((un->un_pkt_flags & PKT_DMA_PARTIAL) &&
13791 	    (bp->b_bcount != 0) && (pktp->pkt_resid != 0)) {
13792 		scsi_destroy_pkt(pktp);
13793 		SD_ERROR(SD_LOG_IO_CORE, un, "sd_initpkt_for_uscsi: "
13794 		    "No partial DMA for USCSI. exit: buf:0x%p\n", bp);
13795 		return (SD_PKT_ALLOC_FAILURE_PKT_TOO_SMALL);
13796 	}
13797 
13798 	/* Init the cdb from the given uscsi struct */
13799 	(void) scsi_setup_cdb((union scsi_cdb *)pktp->pkt_cdbp,
13800 	    uscmd->uscsi_cdb[0], 0, 0, 0);
13801 
13802 	SD_FILL_SCSI1_LUN(un, pktp);
13803 
13804 	/*
13805 	 * Set up the optional USCSI flags. See the uscsi (7I) man page
13806 	 * for listing of the supported flags.
13807 	 */
13808 
13809 	if (uscmd->uscsi_flags & USCSI_SILENT) {
13810 		flags |= FLAG_SILENT;
13811 	}
13812 
13813 	if (uscmd->uscsi_flags & USCSI_DIAGNOSE) {
13814 		flags |= FLAG_DIAGNOSE;
13815 	}
13816 
13817 	if (uscmd->uscsi_flags & USCSI_ISOLATE) {
13818 		flags |= FLAG_ISOLATE;
13819 	}
13820 
13821 	if (un->un_f_is_fibre == FALSE) {
13822 		if (uscmd->uscsi_flags & USCSI_RENEGOT) {
13823 			flags |= FLAG_RENEGOTIATE_WIDE_SYNC;
13824 		}
13825 	}
13826 
13827 	/*
13828 	 * Set the pkt flags here so we save time later.
13829 	 * Note: These flags are NOT in the uscsi man page!!!
13830 	 */
13831 	if (uscmd->uscsi_flags & USCSI_HEAD) {
13832 		flags |= FLAG_HEAD;
13833 	}
13834 
13835 	if (uscmd->uscsi_flags & USCSI_NOINTR) {
13836 		flags |= FLAG_NOINTR;
13837 	}
13838 
13839 	/*
13840 	 * For tagged queueing, things get a bit complicated.
13841 	 * Check first for head of queue and last for ordered queue.
13842 	 * If neither head nor order, use the default driver tag flags.
13843 	 */
13844 	if ((uscmd->uscsi_flags & USCSI_NOTAG) == 0) {
13845 		if (uscmd->uscsi_flags & USCSI_HTAG) {
13846 			flags |= FLAG_HTAG;
13847 		} else if (uscmd->uscsi_flags & USCSI_OTAG) {
13848 			flags |= FLAG_OTAG;
13849 		} else {
13850 			flags |= un->un_tagflags & FLAG_TAGMASK;
13851 		}
13852 	}
13853 
13854 	if (uscmd->uscsi_flags & USCSI_NODISCON) {
13855 		flags = (flags & ~FLAG_TAGMASK) | FLAG_NODISCON;
13856 	}
13857 
13858 	pktp->pkt_flags = flags;
13859 
13860 	/* Copy the caller's CDB into the pkt... */
13861 	bcopy(uscmd->uscsi_cdb, pktp->pkt_cdbp, uscmd->uscsi_cdblen);
13862 
13863 	if (uscmd->uscsi_timeout == 0) {
13864 		pktp->pkt_time = un->un_uscsi_timeout;
13865 	} else {
13866 		pktp->pkt_time = uscmd->uscsi_timeout;
13867 	}
13868 
13869 	/* need it later to identify USCSI request in sdintr */
13870 	xp->xb_pkt_flags |= SD_XB_USCSICMD;
13871 
13872 	xp->xb_sense_resid = uscmd->uscsi_rqresid;
13873 
13874 	pktp->pkt_private = bp;
13875 	pktp->pkt_comp = sdintr;
13876 	*pktpp = pktp;
13877 
13878 	SD_TRACE(SD_LOG_IO_CORE, un,
13879 	    "sd_initpkt_for_uscsi: exit: buf:0x%p\n", bp);
13880 
13881 	return (SD_PKT_ALLOC_SUCCESS);
13882 }
13883 
13884 
13885 /*
13886  *    Function: sd_destroypkt_for_uscsi
13887  *
13888  * Description: Free the scsi_pkt(9S) struct for the given bp, for uscsi
13889  *		IOs.. Also saves relevant info into the associated uscsi_cmd
13890  *		struct.
13891  *
13892  *     Context: May be called under interrupt context
13893  */
13894 
13895 static void
13896 sd_destroypkt_for_uscsi(struct buf *bp)
13897 {
13898 	struct uscsi_cmd *uscmd;
13899 	struct sd_xbuf	*xp;
13900 	struct scsi_pkt	*pktp;
13901 	struct sd_lun	*un;
13902 
13903 	ASSERT(bp != NULL);
13904 	xp = SD_GET_XBUF(bp);
13905 	ASSERT(xp != NULL);
13906 	un = SD_GET_UN(bp);
13907 	ASSERT(un != NULL);
13908 	ASSERT(!mutex_owned(SD_MUTEX(un)));
13909 	pktp = SD_GET_PKTP(bp);
13910 	ASSERT(pktp != NULL);
13911 
13912 	SD_TRACE(SD_LOG_IO_CORE, un,
13913 	    "sd_destroypkt_for_uscsi: entry: buf:0x%p\n", bp);
13914 
13915 	/* The pointer to the uscsi_cmd struct is expected in xb_pktinfo */
13916 	uscmd = (struct uscsi_cmd *)xp->xb_pktinfo;
13917 	ASSERT(uscmd != NULL);
13918 
13919 	/* Save the status and the residual into the uscsi_cmd struct */
13920 	uscmd->uscsi_status = ((*(pktp)->pkt_scbp) & STATUS_MASK);
13921 	uscmd->uscsi_resid  = bp->b_resid;
13922 
13923 	/*
13924 	 * If enabled, copy any saved sense data into the area specified
13925 	 * by the uscsi command.
13926 	 */
13927 	if (((uscmd->uscsi_flags & USCSI_RQENABLE) != 0) &&
13928 	    (uscmd->uscsi_rqlen != 0) && (uscmd->uscsi_rqbuf != NULL)) {
13929 		/*
13930 		 * Note: uscmd->uscsi_rqbuf should always point to a buffer
13931 		 * at least SENSE_LENGTH bytes in size (see sd_send_scsi_cmd())
13932 		 */
13933 		uscmd->uscsi_rqstatus = xp->xb_sense_status;
13934 		uscmd->uscsi_rqresid  = xp->xb_sense_resid;
13935 		bcopy(xp->xb_sense_data, uscmd->uscsi_rqbuf, SENSE_LENGTH);
13936 	}
13937 
13938 	/* We are done with the scsi_pkt; free it now */
13939 	ASSERT(SD_GET_PKTP(bp) != NULL);
13940 	scsi_destroy_pkt(SD_GET_PKTP(bp));
13941 
13942 	SD_TRACE(SD_LOG_IO_CORE, un,
13943 	    "sd_destroypkt_for_uscsi: exit: buf:0x%p\n", bp);
13944 }
13945 
13946 
13947 /*
13948  *    Function: sd_bioclone_alloc
13949  *
13950  * Description: Allocate a buf(9S) and init it as per the given buf
13951  *		and the various arguments.  The associated sd_xbuf
13952  *		struct is (nearly) duplicated.  The struct buf *bp
13953  *		argument is saved in new_xp->xb_private.
13954  *
13955  *   Arguments: bp - ptr the the buf(9S) to be "shadowed"
13956  *		datalen - size of data area for the shadow bp
13957  *		blkno - starting LBA
13958  *		func - function pointer for b_iodone in the shadow buf. (May
13959  *			be NULL if none.)
13960  *
13961  * Return Code: Pointer to allocates buf(9S) struct
13962  *
13963  *     Context: Can sleep.
13964  */
13965 
13966 static struct buf *
13967 sd_bioclone_alloc(struct buf *bp, size_t datalen,
13968 	daddr_t blkno, int (*func)(struct buf *))
13969 {
13970 	struct	sd_lun	*un;
13971 	struct	sd_xbuf	*xp;
13972 	struct	sd_xbuf	*new_xp;
13973 	struct	buf	*new_bp;
13974 
13975 	ASSERT(bp != NULL);
13976 	xp = SD_GET_XBUF(bp);
13977 	ASSERT(xp != NULL);
13978 	un = SD_GET_UN(bp);
13979 	ASSERT(un != NULL);
13980 	ASSERT(!mutex_owned(SD_MUTEX(un)));
13981 
13982 	new_bp = bioclone(bp, 0, datalen, SD_GET_DEV(un), blkno, func,
13983 	    NULL, KM_SLEEP);
13984 
13985 	new_bp->b_lblkno	= blkno;
13986 
13987 	/*
13988 	 * Allocate an xbuf for the shadow bp and copy the contents of the
13989 	 * original xbuf into it.
13990 	 */
13991 	new_xp = kmem_alloc(sizeof (struct sd_xbuf), KM_SLEEP);
13992 	bcopy(xp, new_xp, sizeof (struct sd_xbuf));
13993 
13994 	/*
13995 	 * The given bp is automatically saved in the xb_private member
13996 	 * of the new xbuf.  Callers are allowed to depend on this.
13997 	 */
13998 	new_xp->xb_private = bp;
13999 
14000 	new_bp->b_private  = new_xp;
14001 
14002 	return (new_bp);
14003 }
14004 
14005 /*
14006  *    Function: sd_shadow_buf_alloc
14007  *
14008  * Description: Allocate a buf(9S) and init it as per the given buf
14009  *		and the various arguments.  The associated sd_xbuf
14010  *		struct is (nearly) duplicated.  The struct buf *bp
14011  *		argument is saved in new_xp->xb_private.
14012  *
14013  *   Arguments: bp - ptr the the buf(9S) to be "shadowed"
14014  *		datalen - size of data area for the shadow bp
14015  *		bflags - B_READ or B_WRITE (pseudo flag)
14016  *		blkno - starting LBA
14017  *		func - function pointer for b_iodone in the shadow buf. (May
14018  *			be NULL if none.)
14019  *
14020  * Return Code: Pointer to allocates buf(9S) struct
14021  *
14022  *     Context: Can sleep.
14023  */
14024 
14025 static struct buf *
14026 sd_shadow_buf_alloc(struct buf *bp, size_t datalen, uint_t bflags,
14027 	daddr_t blkno, int (*func)(struct buf *))
14028 {
14029 	struct	sd_lun	*un;
14030 	struct	sd_xbuf	*xp;
14031 	struct	sd_xbuf	*new_xp;
14032 	struct	buf	*new_bp;
14033 
14034 	ASSERT(bp != NULL);
14035 	xp = SD_GET_XBUF(bp);
14036 	ASSERT(xp != NULL);
14037 	un = SD_GET_UN(bp);
14038 	ASSERT(un != NULL);
14039 	ASSERT(!mutex_owned(SD_MUTEX(un)));
14040 
14041 	if (bp->b_flags & (B_PAGEIO | B_PHYS)) {
14042 		bp_mapin(bp);
14043 	}
14044 
14045 	bflags &= (B_READ | B_WRITE);
14046 #if defined(__i386) || defined(__amd64)
14047 	new_bp = getrbuf(KM_SLEEP);
14048 	new_bp->b_un.b_addr = kmem_zalloc(datalen, KM_SLEEP);
14049 	new_bp->b_bcount = datalen;
14050 	new_bp->b_flags = bflags |
14051 	    (bp->b_flags & ~(B_PAGEIO | B_PHYS | B_REMAPPED | B_SHADOW));
14052 #else
14053 	new_bp = scsi_alloc_consistent_buf(SD_ADDRESS(un), NULL,
14054 	    datalen, bflags, SLEEP_FUNC, NULL);
14055 #endif
14056 	new_bp->av_forw	= NULL;
14057 	new_bp->av_back	= NULL;
14058 	new_bp->b_dev	= bp->b_dev;
14059 	new_bp->b_blkno	= blkno;
14060 	new_bp->b_iodone = func;
14061 	new_bp->b_edev	= bp->b_edev;
14062 	new_bp->b_resid	= 0;
14063 
14064 	/* We need to preserve the B_FAILFAST flag */
14065 	if (bp->b_flags & B_FAILFAST) {
14066 		new_bp->b_flags |= B_FAILFAST;
14067 	}
14068 
14069 	/*
14070 	 * Allocate an xbuf for the shadow bp and copy the contents of the
14071 	 * original xbuf into it.
14072 	 */
14073 	new_xp = kmem_alloc(sizeof (struct sd_xbuf), KM_SLEEP);
14074 	bcopy(xp, new_xp, sizeof (struct sd_xbuf));
14075 
14076 	/* Need later to copy data between the shadow buf & original buf! */
14077 	new_xp->xb_pkt_flags |= PKT_CONSISTENT;
14078 
14079 	/*
14080 	 * The given bp is automatically saved in the xb_private member
14081 	 * of the new xbuf.  Callers are allowed to depend on this.
14082 	 */
14083 	new_xp->xb_private = bp;
14084 
14085 	new_bp->b_private  = new_xp;
14086 
14087 	return (new_bp);
14088 }
14089 
14090 /*
14091  *    Function: sd_bioclone_free
14092  *
14093  * Description: Deallocate a buf(9S) that was used for 'shadow' IO operations
14094  *		in the larger than partition operation.
14095  *
14096  *     Context: May be called under interrupt context
14097  */
14098 
14099 static void
14100 sd_bioclone_free(struct buf *bp)
14101 {
14102 	struct sd_xbuf	*xp;
14103 
14104 	ASSERT(bp != NULL);
14105 	xp = SD_GET_XBUF(bp);
14106 	ASSERT(xp != NULL);
14107 
14108 	/*
14109 	 * Call bp_mapout() before freeing the buf,  in case a lower
14110 	 * layer or HBA  had done a bp_mapin().  we must do this here
14111 	 * as we are the "originator" of the shadow buf.
14112 	 */
14113 	bp_mapout(bp);
14114 
14115 	/*
14116 	 * Null out b_iodone before freeing the bp, to ensure that the driver
14117 	 * never gets confused by a stale value in this field. (Just a little
14118 	 * extra defensiveness here.)
14119 	 */
14120 	bp->b_iodone = NULL;
14121 
14122 	freerbuf(bp);
14123 
14124 	kmem_free(xp, sizeof (struct sd_xbuf));
14125 }
14126 
14127 /*
14128  *    Function: sd_shadow_buf_free
14129  *
14130  * Description: Deallocate a buf(9S) that was used for 'shadow' IO operations.
14131  *
14132  *     Context: May be called under interrupt context
14133  */
14134 
14135 static void
14136 sd_shadow_buf_free(struct buf *bp)
14137 {
14138 	struct sd_xbuf	*xp;
14139 
14140 	ASSERT(bp != NULL);
14141 	xp = SD_GET_XBUF(bp);
14142 	ASSERT(xp != NULL);
14143 
14144 #if defined(__sparc)
14145 	/*
14146 	 * Call bp_mapout() before freeing the buf,  in case a lower
14147 	 * layer or HBA  had done a bp_mapin().  we must do this here
14148 	 * as we are the "originator" of the shadow buf.
14149 	 */
14150 	bp_mapout(bp);
14151 #endif
14152 
14153 	/*
14154 	 * Null out b_iodone before freeing the bp, to ensure that the driver
14155 	 * never gets confused by a stale value in this field. (Just a little
14156 	 * extra defensiveness here.)
14157 	 */
14158 	bp->b_iodone = NULL;
14159 
14160 #if defined(__i386) || defined(__amd64)
14161 	kmem_free(bp->b_un.b_addr, bp->b_bcount);
14162 	freerbuf(bp);
14163 #else
14164 	scsi_free_consistent_buf(bp);
14165 #endif
14166 
14167 	kmem_free(xp, sizeof (struct sd_xbuf));
14168 }
14169 
14170 
14171 /*
14172  *    Function: sd_print_transport_rejected_message
14173  *
14174  * Description: This implements the ludicrously complex rules for printing
14175  *		a "transport rejected" message.  This is to address the
14176  *		specific problem of having a flood of this error message
14177  *		produced when a failover occurs.
14178  *
14179  *     Context: Any.
14180  */
14181 
14182 static void
14183 sd_print_transport_rejected_message(struct sd_lun *un, struct sd_xbuf *xp,
14184 	int code)
14185 {
14186 	ASSERT(un != NULL);
14187 	ASSERT(mutex_owned(SD_MUTEX(un)));
14188 	ASSERT(xp != NULL);
14189 
14190 	/*
14191 	 * Print the "transport rejected" message under the following
14192 	 * conditions:
14193 	 *
14194 	 * - Whenever the SD_LOGMASK_DIAG bit of sd_level_mask is set
14195 	 * - The error code from scsi_transport() is NOT a TRAN_FATAL_ERROR.
14196 	 * - If the error code IS a TRAN_FATAL_ERROR, then the message is
14197 	 *   printed the FIRST time a TRAN_FATAL_ERROR is returned from
14198 	 *   scsi_transport(9F) (which indicates that the target might have
14199 	 *   gone off-line).  This uses the un->un_tran_fatal_count
14200 	 *   count, which is incremented whenever a TRAN_FATAL_ERROR is
14201 	 *   received, and reset to zero whenver a TRAN_ACCEPT is returned
14202 	 *   from scsi_transport().
14203 	 *
14204 	 * The FLAG_SILENT in the scsi_pkt must be CLEARED in ALL of
14205 	 * the preceeding cases in order for the message to be printed.
14206 	 */
14207 	if ((xp->xb_pktp->pkt_flags & FLAG_SILENT) == 0) {
14208 		if ((sd_level_mask & SD_LOGMASK_DIAG) ||
14209 		    (code != TRAN_FATAL_ERROR) ||
14210 		    (un->un_tran_fatal_count == 1)) {
14211 			switch (code) {
14212 			case TRAN_BADPKT:
14213 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
14214 				    "transport rejected bad packet\n");
14215 				break;
14216 			case TRAN_FATAL_ERROR:
14217 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
14218 				    "transport rejected fatal error\n");
14219 				break;
14220 			default:
14221 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
14222 				    "transport rejected (%d)\n", code);
14223 				break;
14224 			}
14225 		}
14226 	}
14227 }
14228 
14229 
14230 /*
14231  *    Function: sd_add_buf_to_waitq
14232  *
14233  * Description: Add the given buf(9S) struct to the wait queue for the
14234  *		instance.  If sorting is enabled, then the buf is added
14235  *		to the queue via an elevator sort algorithm (a la
14236  *		disksort(9F)).  The SD_GET_BLKNO(bp) is used as the sort key.
14237  *		If sorting is not enabled, then the buf is just added
14238  *		to the end of the wait queue.
14239  *
14240  * Return Code: void
14241  *
14242  *     Context: Does not sleep/block, therefore technically can be called
14243  *		from any context.  However if sorting is enabled then the
14244  *		execution time is indeterminate, and may take long if
14245  *		the wait queue grows large.
14246  */
14247 
14248 static void
14249 sd_add_buf_to_waitq(struct sd_lun *un, struct buf *bp)
14250 {
14251 	struct buf *ap;
14252 
14253 	ASSERT(bp != NULL);
14254 	ASSERT(un != NULL);
14255 	ASSERT(mutex_owned(SD_MUTEX(un)));
14256 
14257 	/* If the queue is empty, add the buf as the only entry & return. */
14258 	if (un->un_waitq_headp == NULL) {
14259 		ASSERT(un->un_waitq_tailp == NULL);
14260 		un->un_waitq_headp = un->un_waitq_tailp = bp;
14261 		bp->av_forw = NULL;
14262 		return;
14263 	}
14264 
14265 	ASSERT(un->un_waitq_tailp != NULL);
14266 
14267 	/*
14268 	 * If sorting is disabled, just add the buf to the tail end of
14269 	 * the wait queue and return.
14270 	 */
14271 	if (un->un_f_disksort_disabled) {
14272 		un->un_waitq_tailp->av_forw = bp;
14273 		un->un_waitq_tailp = bp;
14274 		bp->av_forw = NULL;
14275 		return;
14276 	}
14277 
14278 	/*
14279 	 * Sort thru the list of requests currently on the wait queue
14280 	 * and add the new buf request at the appropriate position.
14281 	 *
14282 	 * The un->un_waitq_headp is an activity chain pointer on which
14283 	 * we keep two queues, sorted in ascending SD_GET_BLKNO() order. The
14284 	 * first queue holds those requests which are positioned after
14285 	 * the current SD_GET_BLKNO() (in the first request); the second holds
14286 	 * requests which came in after their SD_GET_BLKNO() number was passed.
14287 	 * Thus we implement a one way scan, retracting after reaching
14288 	 * the end of the drive to the first request on the second
14289 	 * queue, at which time it becomes the first queue.
14290 	 * A one-way scan is natural because of the way UNIX read-ahead
14291 	 * blocks are allocated.
14292 	 *
14293 	 * If we lie after the first request, then we must locate the
14294 	 * second request list and add ourselves to it.
14295 	 */
14296 	ap = un->un_waitq_headp;
14297 	if (SD_GET_BLKNO(bp) < SD_GET_BLKNO(ap)) {
14298 		while (ap->av_forw != NULL) {
14299 			/*
14300 			 * Look for an "inversion" in the (normally
14301 			 * ascending) block numbers. This indicates
14302 			 * the start of the second request list.
14303 			 */
14304 			if (SD_GET_BLKNO(ap->av_forw) < SD_GET_BLKNO(ap)) {
14305 				/*
14306 				 * Search the second request list for the
14307 				 * first request at a larger block number.
14308 				 * We go before that; however if there is
14309 				 * no such request, we go at the end.
14310 				 */
14311 				do {
14312 					if (SD_GET_BLKNO(bp) <
14313 					    SD_GET_BLKNO(ap->av_forw)) {
14314 						goto insert;
14315 					}
14316 					ap = ap->av_forw;
14317 				} while (ap->av_forw != NULL);
14318 				goto insert;		/* after last */
14319 			}
14320 			ap = ap->av_forw;
14321 		}
14322 
14323 		/*
14324 		 * No inversions... we will go after the last, and
14325 		 * be the first request in the second request list.
14326 		 */
14327 		goto insert;
14328 	}
14329 
14330 	/*
14331 	 * Request is at/after the current request...
14332 	 * sort in the first request list.
14333 	 */
14334 	while (ap->av_forw != NULL) {
14335 		/*
14336 		 * We want to go after the current request (1) if
14337 		 * there is an inversion after it (i.e. it is the end
14338 		 * of the first request list), or (2) if the next
14339 		 * request is a larger block no. than our request.
14340 		 */
14341 		if ((SD_GET_BLKNO(ap->av_forw) < SD_GET_BLKNO(ap)) ||
14342 		    (SD_GET_BLKNO(bp) < SD_GET_BLKNO(ap->av_forw))) {
14343 			goto insert;
14344 		}
14345 		ap = ap->av_forw;
14346 	}
14347 
14348 	/*
14349 	 * Neither a second list nor a larger request, therefore
14350 	 * we go at the end of the first list (which is the same
14351 	 * as the end of the whole schebang).
14352 	 */
14353 insert:
14354 	bp->av_forw = ap->av_forw;
14355 	ap->av_forw = bp;
14356 
14357 	/*
14358 	 * If we inserted onto the tail end of the waitq, make sure the
14359 	 * tail pointer is updated.
14360 	 */
14361 	if (ap == un->un_waitq_tailp) {
14362 		un->un_waitq_tailp = bp;
14363 	}
14364 }
14365 
14366 
14367 /*
14368  *    Function: sd_start_cmds
14369  *
14370  * Description: Remove and transport cmds from the driver queues.
14371  *
14372  *   Arguments: un - pointer to the unit (soft state) struct for the target.
14373  *
14374  *		immed_bp - ptr to a buf to be transported immediately. Only
14375  *		the immed_bp is transported; bufs on the waitq are not
14376  *		processed and the un_retry_bp is not checked.  If immed_bp is
14377  *		NULL, then normal queue processing is performed.
14378  *
14379  *     Context: May be called from kernel thread context, interrupt context,
14380  *		or runout callback context. This function may not block or
14381  *		call routines that block.
14382  */
14383 
14384 static void
14385 sd_start_cmds(struct sd_lun *un, struct buf *immed_bp)
14386 {
14387 	struct	sd_xbuf	*xp;
14388 	struct	buf	*bp;
14389 	void	(*statp)(kstat_io_t *);
14390 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
14391 	void	(*saved_statp)(kstat_io_t *);
14392 #endif
14393 	int	rval;
14394 
14395 	ASSERT(un != NULL);
14396 	ASSERT(mutex_owned(SD_MUTEX(un)));
14397 	ASSERT(un->un_ncmds_in_transport >= 0);
14398 	ASSERT(un->un_throttle >= 0);
14399 
14400 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_start_cmds: entry\n");
14401 
14402 	do {
14403 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
14404 		saved_statp = NULL;
14405 #endif
14406 
14407 		/*
14408 		 * If we are syncing or dumping, fail the command to
14409 		 * avoid recursively calling back into scsi_transport().
14410 		 * The dump I/O itself uses a separate code path so this
14411 		 * only prevents non-dump I/O from being sent while dumping.
14412 		 * File system sync takes place before dumping begins.
14413 		 * During panic, filesystem I/O is allowed provided
14414 		 * un_in_callback is <= 1.  This is to prevent recursion
14415 		 * such as sd_start_cmds -> scsi_transport -> sdintr ->
14416 		 * sd_start_cmds and so on.  See panic.c for more information
14417 		 * about the states the system can be in during panic.
14418 		 */
14419 		if ((un->un_state == SD_STATE_DUMPING) ||
14420 		    (ddi_in_panic() && (un->un_in_callback > 1))) {
14421 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14422 			    "sd_start_cmds: panicking\n");
14423 			goto exit;
14424 		}
14425 
14426 		if ((bp = immed_bp) != NULL) {
14427 			/*
14428 			 * We have a bp that must be transported immediately.
14429 			 * It's OK to transport the immed_bp here without doing
14430 			 * the throttle limit check because the immed_bp is
14431 			 * always used in a retry/recovery case. This means
14432 			 * that we know we are not at the throttle limit by
14433 			 * virtue of the fact that to get here we must have
14434 			 * already gotten a command back via sdintr(). This also
14435 			 * relies on (1) the command on un_retry_bp preventing
14436 			 * further commands from the waitq from being issued;
14437 			 * and (2) the code in sd_retry_command checking the
14438 			 * throttle limit before issuing a delayed or immediate
14439 			 * retry. This holds even if the throttle limit is
14440 			 * currently ratcheted down from its maximum value.
14441 			 */
14442 			statp = kstat_runq_enter;
14443 			if (bp == un->un_retry_bp) {
14444 				ASSERT((un->un_retry_statp == NULL) ||
14445 				    (un->un_retry_statp == kstat_waitq_enter) ||
14446 				    (un->un_retry_statp ==
14447 				    kstat_runq_back_to_waitq));
14448 				/*
14449 				 * If the waitq kstat was incremented when
14450 				 * sd_set_retry_bp() queued this bp for a retry,
14451 				 * then we must set up statp so that the waitq
14452 				 * count will get decremented correctly below.
14453 				 * Also we must clear un->un_retry_statp to
14454 				 * ensure that we do not act on a stale value
14455 				 * in this field.
14456 				 */
14457 				if ((un->un_retry_statp == kstat_waitq_enter) ||
14458 				    (un->un_retry_statp ==
14459 				    kstat_runq_back_to_waitq)) {
14460 					statp = kstat_waitq_to_runq;
14461 				}
14462 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
14463 				saved_statp = un->un_retry_statp;
14464 #endif
14465 				un->un_retry_statp = NULL;
14466 
14467 				SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
14468 				    "sd_start_cmds: un:0x%p: GOT retry_bp:0x%p "
14469 				    "un_throttle:%d un_ncmds_in_transport:%d\n",
14470 				    un, un->un_retry_bp, un->un_throttle,
14471 				    un->un_ncmds_in_transport);
14472 			} else {
14473 				SD_TRACE(SD_LOG_IO_CORE, un, "sd_start_cmds: "
14474 				    "processing priority bp:0x%p\n", bp);
14475 			}
14476 
14477 		} else if ((bp = un->un_waitq_headp) != NULL) {
14478 			/*
14479 			 * A command on the waitq is ready to go, but do not
14480 			 * send it if:
14481 			 *
14482 			 * (1) the throttle limit has been reached, or
14483 			 * (2) a retry is pending, or
14484 			 * (3) a START_STOP_UNIT callback pending, or
14485 			 * (4) a callback for a SD_PATH_DIRECT_PRIORITY
14486 			 *	command is pending.
14487 			 *
14488 			 * For all of these conditions, IO processing will
14489 			 * restart after the condition is cleared.
14490 			 */
14491 			if (un->un_ncmds_in_transport >= un->un_throttle) {
14492 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14493 				    "sd_start_cmds: exiting, "
14494 				    "throttle limit reached!\n");
14495 				goto exit;
14496 			}
14497 			if (un->un_retry_bp != NULL) {
14498 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14499 				    "sd_start_cmds: exiting, retry pending!\n");
14500 				goto exit;
14501 			}
14502 			if (un->un_startstop_timeid != NULL) {
14503 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14504 				    "sd_start_cmds: exiting, "
14505 				    "START_STOP pending!\n");
14506 				goto exit;
14507 			}
14508 			if (un->un_direct_priority_timeid != NULL) {
14509 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14510 				    "sd_start_cmds: exiting, "
14511 				    "SD_PATH_DIRECT_PRIORITY cmd. pending!\n");
14512 				goto exit;
14513 			}
14514 
14515 			/* Dequeue the command */
14516 			un->un_waitq_headp = bp->av_forw;
14517 			if (un->un_waitq_headp == NULL) {
14518 				un->un_waitq_tailp = NULL;
14519 			}
14520 			bp->av_forw = NULL;
14521 			statp = kstat_waitq_to_runq;
14522 			SD_TRACE(SD_LOG_IO_CORE, un,
14523 			    "sd_start_cmds: processing waitq bp:0x%p\n", bp);
14524 
14525 		} else {
14526 			/* No work to do so bail out now */
14527 			SD_TRACE(SD_LOG_IO_CORE, un,
14528 			    "sd_start_cmds: no more work, exiting!\n");
14529 			goto exit;
14530 		}
14531 
14532 		/*
14533 		 * Reset the state to normal. This is the mechanism by which
14534 		 * the state transitions from either SD_STATE_RWAIT or
14535 		 * SD_STATE_OFFLINE to SD_STATE_NORMAL.
14536 		 * If state is SD_STATE_PM_CHANGING then this command is
14537 		 * part of the device power control and the state must
14538 		 * not be put back to normal. Doing so would would
14539 		 * allow new commands to proceed when they shouldn't,
14540 		 * the device may be going off.
14541 		 */
14542 		if ((un->un_state != SD_STATE_SUSPENDED) &&
14543 		    (un->un_state != SD_STATE_PM_CHANGING)) {
14544 			New_state(un, SD_STATE_NORMAL);
14545 		    }
14546 
14547 		xp = SD_GET_XBUF(bp);
14548 		ASSERT(xp != NULL);
14549 
14550 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
14551 		/*
14552 		 * Allocate the scsi_pkt if we need one, or attach DMA
14553 		 * resources if we have a scsi_pkt that needs them. The
14554 		 * latter should only occur for commands that are being
14555 		 * retried.
14556 		 */
14557 		if ((xp->xb_pktp == NULL) ||
14558 		    ((xp->xb_pkt_flags & SD_XB_DMA_FREED) != 0)) {
14559 #else
14560 		if (xp->xb_pktp == NULL) {
14561 #endif
14562 			/*
14563 			 * There is no scsi_pkt allocated for this buf. Call
14564 			 * the initpkt function to allocate & init one.
14565 			 *
14566 			 * The scsi_init_pkt runout callback functionality is
14567 			 * implemented as follows:
14568 			 *
14569 			 * 1) The initpkt function always calls
14570 			 *    scsi_init_pkt(9F) with sdrunout specified as the
14571 			 *    callback routine.
14572 			 * 2) A successful packet allocation is initialized and
14573 			 *    the I/O is transported.
14574 			 * 3) The I/O associated with an allocation resource
14575 			 *    failure is left on its queue to be retried via
14576 			 *    runout or the next I/O.
14577 			 * 4) The I/O associated with a DMA error is removed
14578 			 *    from the queue and failed with EIO. Processing of
14579 			 *    the transport queues is also halted to be
14580 			 *    restarted via runout or the next I/O.
14581 			 * 5) The I/O associated with a CDB size or packet
14582 			 *    size error is removed from the queue and failed
14583 			 *    with EIO. Processing of the transport queues is
14584 			 *    continued.
14585 			 *
14586 			 * Note: there is no interface for canceling a runout
14587 			 * callback. To prevent the driver from detaching or
14588 			 * suspending while a runout is pending the driver
14589 			 * state is set to SD_STATE_RWAIT
14590 			 *
14591 			 * Note: using the scsi_init_pkt callback facility can
14592 			 * result in an I/O request persisting at the head of
14593 			 * the list which cannot be satisfied even after
14594 			 * multiple retries. In the future the driver may
14595 			 * implement some kind of maximum runout count before
14596 			 * failing an I/O.
14597 			 *
14598 			 * Note: the use of funcp below may seem superfluous,
14599 			 * but it helps warlock figure out the correct
14600 			 * initpkt function calls (see [s]sd.wlcmd).
14601 			 */
14602 			struct scsi_pkt	*pktp;
14603 			int (*funcp)(struct buf *bp, struct scsi_pkt **pktp);
14604 
14605 			ASSERT(bp != un->un_rqs_bp);
14606 
14607 			funcp = sd_initpkt_map[xp->xb_chain_iostart];
14608 			switch ((*funcp)(bp, &pktp)) {
14609 			case  SD_PKT_ALLOC_SUCCESS:
14610 				xp->xb_pktp = pktp;
14611 				SD_TRACE(SD_LOG_IO_CORE, un,
14612 				    "sd_start_cmd: SD_PKT_ALLOC_SUCCESS 0x%p\n",
14613 				    pktp);
14614 				goto got_pkt;
14615 
14616 			case SD_PKT_ALLOC_FAILURE:
14617 				/*
14618 				 * Temporary (hopefully) resource depletion.
14619 				 * Since retries and RQS commands always have a
14620 				 * scsi_pkt allocated, these cases should never
14621 				 * get here. So the only cases this needs to
14622 				 * handle is a bp from the waitq (which we put
14623 				 * back onto the waitq for sdrunout), or a bp
14624 				 * sent as an immed_bp (which we just fail).
14625 				 */
14626 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14627 				    "sd_start_cmds: SD_PKT_ALLOC_FAILURE\n");
14628 
14629 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
14630 
14631 				if (bp == immed_bp) {
14632 					/*
14633 					 * If SD_XB_DMA_FREED is clear, then
14634 					 * this is a failure to allocate a
14635 					 * scsi_pkt, and we must fail the
14636 					 * command.
14637 					 */
14638 					if ((xp->xb_pkt_flags &
14639 					    SD_XB_DMA_FREED) == 0) {
14640 						break;
14641 					}
14642 
14643 					/*
14644 					 * If this immediate command is NOT our
14645 					 * un_retry_bp, then we must fail it.
14646 					 */
14647 					if (bp != un->un_retry_bp) {
14648 						break;
14649 					}
14650 
14651 					/*
14652 					 * We get here if this cmd is our
14653 					 * un_retry_bp that was DMAFREED, but
14654 					 * scsi_init_pkt() failed to reallocate
14655 					 * DMA resources when we attempted to
14656 					 * retry it. This can happen when an
14657 					 * mpxio failover is in progress, but
14658 					 * we don't want to just fail the
14659 					 * command in this case.
14660 					 *
14661 					 * Use timeout(9F) to restart it after
14662 					 * a 100ms delay.  We don't want to
14663 					 * let sdrunout() restart it, because
14664 					 * sdrunout() is just supposed to start
14665 					 * commands that are sitting on the
14666 					 * wait queue.  The un_retry_bp stays
14667 					 * set until the command completes, but
14668 					 * sdrunout can be called many times
14669 					 * before that happens.  Since sdrunout
14670 					 * cannot tell if the un_retry_bp is
14671 					 * already in the transport, it could
14672 					 * end up calling scsi_transport() for
14673 					 * the un_retry_bp multiple times.
14674 					 *
14675 					 * Also: don't schedule the callback
14676 					 * if some other callback is already
14677 					 * pending.
14678 					 */
14679 					if (un->un_retry_statp == NULL) {
14680 						/*
14681 						 * restore the kstat pointer to
14682 						 * keep kstat counts coherent
14683 						 * when we do retry the command.
14684 						 */
14685 						un->un_retry_statp =
14686 						    saved_statp;
14687 					}
14688 
14689 					if ((un->un_startstop_timeid == NULL) &&
14690 					    (un->un_retry_timeid == NULL) &&
14691 					    (un->un_direct_priority_timeid ==
14692 					    NULL)) {
14693 
14694 						un->un_retry_timeid =
14695 						    timeout(
14696 						    sd_start_retry_command,
14697 						    un, SD_RESTART_TIMEOUT);
14698 					}
14699 					goto exit;
14700 				}
14701 
14702 #else
14703 				if (bp == immed_bp) {
14704 					break;	/* Just fail the command */
14705 				}
14706 #endif
14707 
14708 				/* Add the buf back to the head of the waitq */
14709 				bp->av_forw = un->un_waitq_headp;
14710 				un->un_waitq_headp = bp;
14711 				if (un->un_waitq_tailp == NULL) {
14712 					un->un_waitq_tailp = bp;
14713 				}
14714 				goto exit;
14715 
14716 			case SD_PKT_ALLOC_FAILURE_NO_DMA:
14717 				/*
14718 				 * HBA DMA resource failure. Fail the command
14719 				 * and continue processing of the queues.
14720 				 */
14721 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14722 				    "sd_start_cmds: "
14723 				    "SD_PKT_ALLOC_FAILURE_NO_DMA\n");
14724 				break;
14725 
14726 			case SD_PKT_ALLOC_FAILURE_PKT_TOO_SMALL:
14727 				/*
14728 				 * Note:x86: Partial DMA mapping not supported
14729 				 * for USCSI commands, and all the needed DMA
14730 				 * resources were not allocated.
14731 				 */
14732 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14733 				    "sd_start_cmds: "
14734 				    "SD_PKT_ALLOC_FAILURE_PKT_TOO_SMALL\n");
14735 				break;
14736 
14737 			case SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL:
14738 				/*
14739 				 * Note:x86: Request cannot fit into CDB based
14740 				 * on lba and len.
14741 				 */
14742 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14743 				    "sd_start_cmds: "
14744 				    "SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL\n");
14745 				break;
14746 
14747 			default:
14748 				/* Should NEVER get here! */
14749 				panic("scsi_initpkt error");
14750 				/*NOTREACHED*/
14751 			}
14752 
14753 			/*
14754 			 * Fatal error in allocating a scsi_pkt for this buf.
14755 			 * Update kstats & return the buf with an error code.
14756 			 * We must use sd_return_failed_command_no_restart() to
14757 			 * avoid a recursive call back into sd_start_cmds().
14758 			 * However this also means that we must keep processing
14759 			 * the waitq here in order to avoid stalling.
14760 			 */
14761 			if (statp == kstat_waitq_to_runq) {
14762 				SD_UPDATE_KSTATS(un, kstat_waitq_exit, bp);
14763 			}
14764 			sd_return_failed_command_no_restart(un, bp, EIO);
14765 			if (bp == immed_bp) {
14766 				/* immed_bp is gone by now, so clear this */
14767 				immed_bp = NULL;
14768 			}
14769 			continue;
14770 		}
14771 got_pkt:
14772 		if (bp == immed_bp) {
14773 			/* goto the head of the class.... */
14774 			xp->xb_pktp->pkt_flags |= FLAG_HEAD;
14775 		}
14776 
14777 		un->un_ncmds_in_transport++;
14778 		SD_UPDATE_KSTATS(un, statp, bp);
14779 
14780 		/*
14781 		 * Call scsi_transport() to send the command to the target.
14782 		 * According to SCSA architecture, we must drop the mutex here
14783 		 * before calling scsi_transport() in order to avoid deadlock.
14784 		 * Note that the scsi_pkt's completion routine can be executed
14785 		 * (from interrupt context) even before the call to
14786 		 * scsi_transport() returns.
14787 		 */
14788 		SD_TRACE(SD_LOG_IO_CORE, un,
14789 		    "sd_start_cmds: calling scsi_transport()\n");
14790 		DTRACE_PROBE1(scsi__transport__dispatch, struct buf *, bp);
14791 
14792 		mutex_exit(SD_MUTEX(un));
14793 		rval = scsi_transport(xp->xb_pktp);
14794 		mutex_enter(SD_MUTEX(un));
14795 
14796 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14797 		    "sd_start_cmds: scsi_transport() returned %d\n", rval);
14798 
14799 		switch (rval) {
14800 		case TRAN_ACCEPT:
14801 			/* Clear this with every pkt accepted by the HBA */
14802 			un->un_tran_fatal_count = 0;
14803 			break;	/* Success; try the next cmd (if any) */
14804 
14805 		case TRAN_BUSY:
14806 			un->un_ncmds_in_transport--;
14807 			ASSERT(un->un_ncmds_in_transport >= 0);
14808 
14809 			/*
14810 			 * Don't retry request sense, the sense data
14811 			 * is lost when another request is sent.
14812 			 * Free up the rqs buf and retry
14813 			 * the original failed cmd.  Update kstat.
14814 			 */
14815 			if (bp == un->un_rqs_bp) {
14816 				SD_UPDATE_KSTATS(un, kstat_runq_exit, bp);
14817 				bp = sd_mark_rqs_idle(un, xp);
14818 				sd_retry_command(un, bp, SD_RETRIES_STANDARD,
14819 					NULL, NULL, EIO, SD_BSY_TIMEOUT / 500,
14820 					kstat_waitq_enter);
14821 				goto exit;
14822 			}
14823 
14824 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
14825 			/*
14826 			 * Free the DMA resources for the  scsi_pkt. This will
14827 			 * allow mpxio to select another path the next time
14828 			 * we call scsi_transport() with this scsi_pkt.
14829 			 * See sdintr() for the rationalization behind this.
14830 			 */
14831 			if ((un->un_f_is_fibre == TRUE) &&
14832 			    ((xp->xb_pkt_flags & SD_XB_USCSICMD) == 0) &&
14833 			    ((xp->xb_pktp->pkt_flags & FLAG_SENSING) == 0)) {
14834 				scsi_dmafree(xp->xb_pktp);
14835 				xp->xb_pkt_flags |= SD_XB_DMA_FREED;
14836 			}
14837 #endif
14838 
14839 			if (SD_IS_DIRECT_PRIORITY(SD_GET_XBUF(bp))) {
14840 				/*
14841 				 * Commands that are SD_PATH_DIRECT_PRIORITY
14842 				 * are for error recovery situations. These do
14843 				 * not use the normal command waitq, so if they
14844 				 * get a TRAN_BUSY we cannot put them back onto
14845 				 * the waitq for later retry. One possible
14846 				 * problem is that there could already be some
14847 				 * other command on un_retry_bp that is waiting
14848 				 * for this one to complete, so we would be
14849 				 * deadlocked if we put this command back onto
14850 				 * the waitq for later retry (since un_retry_bp
14851 				 * must complete before the driver gets back to
14852 				 * commands on the waitq).
14853 				 *
14854 				 * To avoid deadlock we must schedule a callback
14855 				 * that will restart this command after a set
14856 				 * interval.  This should keep retrying for as
14857 				 * long as the underlying transport keeps
14858 				 * returning TRAN_BUSY (just like for other
14859 				 * commands).  Use the same timeout interval as
14860 				 * for the ordinary TRAN_BUSY retry.
14861 				 */
14862 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14863 				    "sd_start_cmds: scsi_transport() returned "
14864 				    "TRAN_BUSY for DIRECT_PRIORITY cmd!\n");
14865 
14866 				SD_UPDATE_KSTATS(un, kstat_runq_exit, bp);
14867 				un->un_direct_priority_timeid =
14868 				    timeout(sd_start_direct_priority_command,
14869 				    bp, SD_BSY_TIMEOUT / 500);
14870 
14871 				goto exit;
14872 			}
14873 
14874 			/*
14875 			 * For TRAN_BUSY, we want to reduce the throttle value,
14876 			 * unless we are retrying a command.
14877 			 */
14878 			if (bp != un->un_retry_bp) {
14879 				sd_reduce_throttle(un, SD_THROTTLE_TRAN_BUSY);
14880 			}
14881 
14882 			/*
14883 			 * Set up the bp to be tried again 10 ms later.
14884 			 * Note:x86: Is there a timeout value in the sd_lun
14885 			 * for this condition?
14886 			 */
14887 			sd_set_retry_bp(un, bp, SD_BSY_TIMEOUT / 500,
14888 				kstat_runq_back_to_waitq);
14889 			goto exit;
14890 
14891 		case TRAN_FATAL_ERROR:
14892 			un->un_tran_fatal_count++;
14893 			/* FALLTHRU */
14894 
14895 		case TRAN_BADPKT:
14896 		default:
14897 			un->un_ncmds_in_transport--;
14898 			ASSERT(un->un_ncmds_in_transport >= 0);
14899 
14900 			/*
14901 			 * If this is our REQUEST SENSE command with a
14902 			 * transport error, we must get back the pointers
14903 			 * to the original buf, and mark the REQUEST
14904 			 * SENSE command as "available".
14905 			 */
14906 			if (bp == un->un_rqs_bp) {
14907 				bp = sd_mark_rqs_idle(un, xp);
14908 				xp = SD_GET_XBUF(bp);
14909 			} else {
14910 				/*
14911 				 * Legacy behavior: do not update transport
14912 				 * error count for request sense commands.
14913 				 */
14914 				SD_UPDATE_ERRSTATS(un, sd_transerrs);
14915 			}
14916 
14917 			SD_UPDATE_KSTATS(un, kstat_runq_exit, bp);
14918 			sd_print_transport_rejected_message(un, xp, rval);
14919 
14920 			/*
14921 			 * We must use sd_return_failed_command_no_restart() to
14922 			 * avoid a recursive call back into sd_start_cmds().
14923 			 * However this also means that we must keep processing
14924 			 * the waitq here in order to avoid stalling.
14925 			 */
14926 			sd_return_failed_command_no_restart(un, bp, EIO);
14927 
14928 			/*
14929 			 * Notify any threads waiting in sd_ddi_suspend() that
14930 			 * a command completion has occurred.
14931 			 */
14932 			if (un->un_state == SD_STATE_SUSPENDED) {
14933 				cv_broadcast(&un->un_disk_busy_cv);
14934 			}
14935 
14936 			if (bp == immed_bp) {
14937 				/* immed_bp is gone by now, so clear this */
14938 				immed_bp = NULL;
14939 			}
14940 			break;
14941 		}
14942 
14943 	} while (immed_bp == NULL);
14944 
14945 exit:
14946 	ASSERT(mutex_owned(SD_MUTEX(un)));
14947 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_start_cmds: exit\n");
14948 }
14949 
14950 
14951 /*
14952  *    Function: sd_return_command
14953  *
14954  * Description: Returns a command to its originator (with or without an
14955  *		error).  Also starts commands waiting to be transported
14956  *		to the target.
14957  *
14958  *     Context: May be called from interrupt, kernel, or timeout context
14959  */
14960 
14961 static void
14962 sd_return_command(struct sd_lun *un, struct buf *bp)
14963 {
14964 	struct sd_xbuf *xp;
14965 #if defined(__i386) || defined(__amd64)
14966 	struct scsi_pkt *pktp;
14967 #endif
14968 
14969 	ASSERT(bp != NULL);
14970 	ASSERT(un != NULL);
14971 	ASSERT(mutex_owned(SD_MUTEX(un)));
14972 	ASSERT(bp != un->un_rqs_bp);
14973 	xp = SD_GET_XBUF(bp);
14974 	ASSERT(xp != NULL);
14975 
14976 #if defined(__i386) || defined(__amd64)
14977 	pktp = SD_GET_PKTP(bp);
14978 #endif
14979 
14980 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_return_command: entry\n");
14981 
14982 #if defined(__i386) || defined(__amd64)
14983 	/*
14984 	 * Note:x86: check for the "sdrestart failed" case.
14985 	 */
14986 	if (((xp->xb_pkt_flags & SD_XB_USCSICMD) != SD_XB_USCSICMD) &&
14987 		(geterror(bp) == 0) && (xp->xb_dma_resid != 0) &&
14988 		(xp->xb_pktp->pkt_resid == 0)) {
14989 
14990 		if (sd_setup_next_xfer(un, bp, pktp, xp) != 0) {
14991 			/*
14992 			 * Successfully set up next portion of cmd
14993 			 * transfer, try sending it
14994 			 */
14995 			sd_retry_command(un, bp, SD_RETRIES_NOCHECK,
14996 			    NULL, NULL, 0, (clock_t)0, NULL);
14997 			sd_start_cmds(un, NULL);
14998 			return;	/* Note:x86: need a return here? */
14999 		}
15000 	}
15001 #endif
15002 
15003 	/*
15004 	 * If this is the failfast bp, clear it from un_failfast_bp. This
15005 	 * can happen if upon being re-tried the failfast bp either
15006 	 * succeeded or encountered another error (possibly even a different
15007 	 * error than the one that precipitated the failfast state, but in
15008 	 * that case it would have had to exhaust retries as well). Regardless,
15009 	 * this should not occur whenever the instance is in the active
15010 	 * failfast state.
15011 	 */
15012 	if (bp == un->un_failfast_bp) {
15013 		ASSERT(un->un_failfast_state == SD_FAILFAST_INACTIVE);
15014 		un->un_failfast_bp = NULL;
15015 	}
15016 
15017 	/*
15018 	 * Clear the failfast state upon successful completion of ANY cmd.
15019 	 */
15020 	if (bp->b_error == 0) {
15021 		un->un_failfast_state = SD_FAILFAST_INACTIVE;
15022 	}
15023 
15024 	/*
15025 	 * This is used if the command was retried one or more times. Show that
15026 	 * we are done with it, and allow processing of the waitq to resume.
15027 	 */
15028 	if (bp == un->un_retry_bp) {
15029 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15030 		    "sd_return_command: un:0x%p: "
15031 		    "RETURNING retry_bp:0x%p\n", un, un->un_retry_bp);
15032 		un->un_retry_bp = NULL;
15033 		un->un_retry_statp = NULL;
15034 	}
15035 
15036 	SD_UPDATE_RDWR_STATS(un, bp);
15037 	SD_UPDATE_PARTITION_STATS(un, bp);
15038 
15039 	switch (un->un_state) {
15040 	case SD_STATE_SUSPENDED:
15041 		/*
15042 		 * Notify any threads waiting in sd_ddi_suspend() that
15043 		 * a command completion has occurred.
15044 		 */
15045 		cv_broadcast(&un->un_disk_busy_cv);
15046 		break;
15047 	default:
15048 		sd_start_cmds(un, NULL);
15049 		break;
15050 	}
15051 
15052 	/* Return this command up the iodone chain to its originator. */
15053 	mutex_exit(SD_MUTEX(un));
15054 
15055 	(*(sd_destroypkt_map[xp->xb_chain_iodone]))(bp);
15056 	xp->xb_pktp = NULL;
15057 
15058 	SD_BEGIN_IODONE(xp->xb_chain_iodone, un, bp);
15059 
15060 	ASSERT(!mutex_owned(SD_MUTEX(un)));
15061 	mutex_enter(SD_MUTEX(un));
15062 
15063 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_return_command: exit\n");
15064 }
15065 
15066 
15067 /*
15068  *    Function: sd_return_failed_command
15069  *
15070  * Description: Command completion when an error occurred.
15071  *
15072  *     Context: May be called from interrupt context
15073  */
15074 
15075 static void
15076 sd_return_failed_command(struct sd_lun *un, struct buf *bp, int errcode)
15077 {
15078 	ASSERT(bp != NULL);
15079 	ASSERT(un != NULL);
15080 	ASSERT(mutex_owned(SD_MUTEX(un)));
15081 
15082 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15083 	    "sd_return_failed_command: entry\n");
15084 
15085 	/*
15086 	 * b_resid could already be nonzero due to a partial data
15087 	 * transfer, so do not change it here.
15088 	 */
15089 	SD_BIOERROR(bp, errcode);
15090 
15091 	sd_return_command(un, bp);
15092 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15093 	    "sd_return_failed_command: exit\n");
15094 }
15095 
15096 
15097 /*
15098  *    Function: sd_return_failed_command_no_restart
15099  *
15100  * Description: Same as sd_return_failed_command, but ensures that no
15101  *		call back into sd_start_cmds will be issued.
15102  *
15103  *     Context: May be called from interrupt context
15104  */
15105 
15106 static void
15107 sd_return_failed_command_no_restart(struct sd_lun *un, struct buf *bp,
15108 	int errcode)
15109 {
15110 	struct sd_xbuf *xp;
15111 
15112 	ASSERT(bp != NULL);
15113 	ASSERT(un != NULL);
15114 	ASSERT(mutex_owned(SD_MUTEX(un)));
15115 	xp = SD_GET_XBUF(bp);
15116 	ASSERT(xp != NULL);
15117 	ASSERT(errcode != 0);
15118 
15119 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15120 	    "sd_return_failed_command_no_restart: entry\n");
15121 
15122 	/*
15123 	 * b_resid could already be nonzero due to a partial data
15124 	 * transfer, so do not change it here.
15125 	 */
15126 	SD_BIOERROR(bp, errcode);
15127 
15128 	/*
15129 	 * If this is the failfast bp, clear it. This can happen if the
15130 	 * failfast bp encounterd a fatal error when we attempted to
15131 	 * re-try it (such as a scsi_transport(9F) failure).  However
15132 	 * we should NOT be in an active failfast state if the failfast
15133 	 * bp is not NULL.
15134 	 */
15135 	if (bp == un->un_failfast_bp) {
15136 		ASSERT(un->un_failfast_state == SD_FAILFAST_INACTIVE);
15137 		un->un_failfast_bp = NULL;
15138 	}
15139 
15140 	if (bp == un->un_retry_bp) {
15141 		/*
15142 		 * This command was retried one or more times. Show that we are
15143 		 * done with it, and allow processing of the waitq to resume.
15144 		 */
15145 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15146 		    "sd_return_failed_command_no_restart: "
15147 		    " un:0x%p: RETURNING retry_bp:0x%p\n", un, un->un_retry_bp);
15148 		un->un_retry_bp = NULL;
15149 		un->un_retry_statp = NULL;
15150 	}
15151 
15152 	SD_UPDATE_RDWR_STATS(un, bp);
15153 	SD_UPDATE_PARTITION_STATS(un, bp);
15154 
15155 	mutex_exit(SD_MUTEX(un));
15156 
15157 	if (xp->xb_pktp != NULL) {
15158 		(*(sd_destroypkt_map[xp->xb_chain_iodone]))(bp);
15159 		xp->xb_pktp = NULL;
15160 	}
15161 
15162 	SD_BEGIN_IODONE(xp->xb_chain_iodone, un, bp);
15163 
15164 	mutex_enter(SD_MUTEX(un));
15165 
15166 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15167 	    "sd_return_failed_command_no_restart: exit\n");
15168 }
15169 
15170 
15171 /*
15172  *    Function: sd_retry_command
15173  *
15174  * Description: queue up a command for retry, or (optionally) fail it
15175  *		if retry counts are exhausted.
15176  *
15177  *   Arguments: un - Pointer to the sd_lun struct for the target.
15178  *
15179  *		bp - Pointer to the buf for the command to be retried.
15180  *
15181  *		retry_check_flag - Flag to see which (if any) of the retry
15182  *		   counts should be decremented/checked. If the indicated
15183  *		   retry count is exhausted, then the command will not be
15184  *		   retried; it will be failed instead. This should use a
15185  *		   value equal to one of the following:
15186  *
15187  *			SD_RETRIES_NOCHECK
15188  *			SD_RESD_RETRIES_STANDARD
15189  *			SD_RETRIES_VICTIM
15190  *
15191  *		   Optionally may be bitwise-OR'ed with SD_RETRIES_ISOLATE
15192  *		   if the check should be made to see of FLAG_ISOLATE is set
15193  *		   in the pkt. If FLAG_ISOLATE is set, then the command is
15194  *		   not retried, it is simply failed.
15195  *
15196  *		user_funcp - Ptr to function to call before dispatching the
15197  *		   command. May be NULL if no action needs to be performed.
15198  *		   (Primarily intended for printing messages.)
15199  *
15200  *		user_arg - Optional argument to be passed along to
15201  *		   the user_funcp call.
15202  *
15203  *		failure_code - errno return code to set in the bp if the
15204  *		   command is going to be failed.
15205  *
15206  *		retry_delay - Retry delay interval in (clock_t) units. May
15207  *		   be zero which indicates that the retry should be retried
15208  *		   immediately (ie, without an intervening delay).
15209  *
15210  *		statp - Ptr to kstat function to be updated if the command
15211  *		   is queued for a delayed retry. May be NULL if no kstat
15212  *		   update is desired.
15213  *
15214  *     Context: May be called from interupt context.
15215  */
15216 
15217 static void
15218 sd_retry_command(struct sd_lun *un, struct buf *bp, int retry_check_flag,
15219 	void (*user_funcp)(struct sd_lun *un, struct buf *bp, void *argp, int
15220 	code), void *user_arg, int failure_code,  clock_t retry_delay,
15221 	void (*statp)(kstat_io_t *))
15222 {
15223 	struct sd_xbuf	*xp;
15224 	struct scsi_pkt	*pktp;
15225 
15226 	ASSERT(un != NULL);
15227 	ASSERT(mutex_owned(SD_MUTEX(un)));
15228 	ASSERT(bp != NULL);
15229 	xp = SD_GET_XBUF(bp);
15230 	ASSERT(xp != NULL);
15231 	pktp = SD_GET_PKTP(bp);
15232 	ASSERT(pktp != NULL);
15233 
15234 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
15235 	    "sd_retry_command: entry: bp:0x%p xp:0x%p\n", bp, xp);
15236 
15237 	/*
15238 	 * If we are syncing or dumping, fail the command to avoid
15239 	 * recursively calling back into scsi_transport().
15240 	 */
15241 	if (ddi_in_panic()) {
15242 		goto fail_command_no_log;
15243 	}
15244 
15245 	/*
15246 	 * We should never be be retrying a command with FLAG_DIAGNOSE set, so
15247 	 * log an error and fail the command.
15248 	 */
15249 	if ((pktp->pkt_flags & FLAG_DIAGNOSE) != 0) {
15250 		scsi_log(SD_DEVINFO(un), sd_label, CE_NOTE,
15251 		    "ERROR, retrying FLAG_DIAGNOSE command.\n");
15252 		sd_dump_memory(un, SD_LOG_IO, "CDB",
15253 		    (uchar_t *)pktp->pkt_cdbp, CDB_SIZE, SD_LOG_HEX);
15254 		sd_dump_memory(un, SD_LOG_IO, "Sense Data",
15255 		    (uchar_t *)xp->xb_sense_data, SENSE_LENGTH, SD_LOG_HEX);
15256 		goto fail_command;
15257 	}
15258 
15259 	/*
15260 	 * If we are suspended, then put the command onto head of the
15261 	 * wait queue since we don't want to start more commands.
15262 	 */
15263 	switch (un->un_state) {
15264 	case SD_STATE_SUSPENDED:
15265 	case SD_STATE_DUMPING:
15266 		bp->av_forw = un->un_waitq_headp;
15267 		un->un_waitq_headp = bp;
15268 		if (un->un_waitq_tailp == NULL) {
15269 			un->un_waitq_tailp = bp;
15270 		}
15271 		SD_UPDATE_KSTATS(un, kstat_waitq_enter, bp);
15272 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_retry_command: "
15273 		    "exiting; cmd bp:0x%p requeued for SUSPEND/DUMP\n", bp);
15274 		return;
15275 	default:
15276 		break;
15277 	}
15278 
15279 	/*
15280 	 * If the caller wants us to check FLAG_ISOLATE, then see if that
15281 	 * is set; if it is then we do not want to retry the command.
15282 	 * Normally, FLAG_ISOLATE is only used with USCSI cmds.
15283 	 */
15284 	if ((retry_check_flag & SD_RETRIES_ISOLATE) != 0) {
15285 		if ((pktp->pkt_flags & FLAG_ISOLATE) != 0) {
15286 			goto fail_command;
15287 		}
15288 	}
15289 
15290 
15291 	/*
15292 	 * If SD_RETRIES_FAILFAST is set, it indicates that either a
15293 	 * command timeout or a selection timeout has occurred. This means
15294 	 * that we were unable to establish an kind of communication with
15295 	 * the target, and subsequent retries and/or commands are likely
15296 	 * to encounter similar results and take a long time to complete.
15297 	 *
15298 	 * If this is a failfast error condition, we need to update the
15299 	 * failfast state, even if this bp does not have B_FAILFAST set.
15300 	 */
15301 	if (retry_check_flag & SD_RETRIES_FAILFAST) {
15302 		if (un->un_failfast_state == SD_FAILFAST_ACTIVE) {
15303 			ASSERT(un->un_failfast_bp == NULL);
15304 			/*
15305 			 * If we are already in the active failfast state, and
15306 			 * another failfast error condition has been detected,
15307 			 * then fail this command if it has B_FAILFAST set.
15308 			 * If B_FAILFAST is clear, then maintain the legacy
15309 			 * behavior of retrying heroically, even tho this will
15310 			 * take a lot more time to fail the command.
15311 			 */
15312 			if (bp->b_flags & B_FAILFAST) {
15313 				goto fail_command;
15314 			}
15315 		} else {
15316 			/*
15317 			 * We're not in the active failfast state, but we
15318 			 * have a failfast error condition, so we must begin
15319 			 * transition to the next state. We do this regardless
15320 			 * of whether or not this bp has B_FAILFAST set.
15321 			 */
15322 			if (un->un_failfast_bp == NULL) {
15323 				/*
15324 				 * This is the first bp to meet a failfast
15325 				 * condition so save it on un_failfast_bp &
15326 				 * do normal retry processing. Do not enter
15327 				 * active failfast state yet. This marks
15328 				 * entry into the "failfast pending" state.
15329 				 */
15330 				un->un_failfast_bp = bp;
15331 
15332 			} else if (un->un_failfast_bp == bp) {
15333 				/*
15334 				 * This is the second time *this* bp has
15335 				 * encountered a failfast error condition,
15336 				 * so enter active failfast state & flush
15337 				 * queues as appropriate.
15338 				 */
15339 				un->un_failfast_state = SD_FAILFAST_ACTIVE;
15340 				un->un_failfast_bp = NULL;
15341 				sd_failfast_flushq(un);
15342 
15343 				/*
15344 				 * Fail this bp now if B_FAILFAST set;
15345 				 * otherwise continue with retries. (It would
15346 				 * be pretty ironic if this bp succeeded on a
15347 				 * subsequent retry after we just flushed all
15348 				 * the queues).
15349 				 */
15350 				if (bp->b_flags & B_FAILFAST) {
15351 					goto fail_command;
15352 				}
15353 
15354 #if !defined(lint) && !defined(__lint)
15355 			} else {
15356 				/*
15357 				 * If neither of the preceeding conditionals
15358 				 * was true, it means that there is some
15359 				 * *other* bp that has met an inital failfast
15360 				 * condition and is currently either being
15361 				 * retried or is waiting to be retried. In
15362 				 * that case we should perform normal retry
15363 				 * processing on *this* bp, since there is a
15364 				 * chance that the current failfast condition
15365 				 * is transient and recoverable. If that does
15366 				 * not turn out to be the case, then retries
15367 				 * will be cleared when the wait queue is
15368 				 * flushed anyway.
15369 				 */
15370 #endif
15371 			}
15372 		}
15373 	} else {
15374 		/*
15375 		 * SD_RETRIES_FAILFAST is clear, which indicates that we
15376 		 * likely were able to at least establish some level of
15377 		 * communication with the target and subsequent commands
15378 		 * and/or retries are likely to get through to the target,
15379 		 * In this case we want to be aggressive about clearing
15380 		 * the failfast state. Note that this does not affect
15381 		 * the "failfast pending" condition.
15382 		 */
15383 		un->un_failfast_state = SD_FAILFAST_INACTIVE;
15384 	}
15385 
15386 
15387 	/*
15388 	 * Check the specified retry count to see if we can still do
15389 	 * any retries with this pkt before we should fail it.
15390 	 */
15391 	switch (retry_check_flag & SD_RETRIES_MASK) {
15392 	case SD_RETRIES_VICTIM:
15393 		/*
15394 		 * Check the victim retry count. If exhausted, then fall
15395 		 * thru & check against the standard retry count.
15396 		 */
15397 		if (xp->xb_victim_retry_count < un->un_victim_retry_count) {
15398 			/* Increment count & proceed with the retry */
15399 			xp->xb_victim_retry_count++;
15400 			break;
15401 		}
15402 		/* Victim retries exhausted, fall back to std. retries... */
15403 		/* FALLTHRU */
15404 
15405 	case SD_RETRIES_STANDARD:
15406 		if (xp->xb_retry_count >= un->un_retry_count) {
15407 			/* Retries exhausted, fail the command */
15408 			SD_TRACE(SD_LOG_IO_CORE, un,
15409 			    "sd_retry_command: retries exhausted!\n");
15410 			/*
15411 			 * update b_resid for failed SCMD_READ & SCMD_WRITE
15412 			 * commands with nonzero pkt_resid.
15413 			 */
15414 			if ((pktp->pkt_reason == CMD_CMPLT) &&
15415 			    (SD_GET_PKT_STATUS(pktp) == STATUS_GOOD) &&
15416 			    (pktp->pkt_resid != 0)) {
15417 				uchar_t op = SD_GET_PKT_OPCODE(pktp) & 0x1F;
15418 				if ((op == SCMD_READ) || (op == SCMD_WRITE)) {
15419 					SD_UPDATE_B_RESID(bp, pktp);
15420 				}
15421 			}
15422 			goto fail_command;
15423 		}
15424 		xp->xb_retry_count++;
15425 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15426 		    "sd_retry_command: retry count:%d\n", xp->xb_retry_count);
15427 		break;
15428 
15429 	case SD_RETRIES_UA:
15430 		if (xp->xb_ua_retry_count >= sd_ua_retry_count) {
15431 			/* Retries exhausted, fail the command */
15432 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
15433 			    "Unit Attention retries exhausted. "
15434 			    "Check the target.\n");
15435 			goto fail_command;
15436 		}
15437 		xp->xb_ua_retry_count++;
15438 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15439 		    "sd_retry_command: retry count:%d\n",
15440 			xp->xb_ua_retry_count);
15441 		break;
15442 
15443 	case SD_RETRIES_BUSY:
15444 		if (xp->xb_retry_count >= un->un_busy_retry_count) {
15445 			/* Retries exhausted, fail the command */
15446 			SD_TRACE(SD_LOG_IO_CORE, un,
15447 			    "sd_retry_command: retries exhausted!\n");
15448 			goto fail_command;
15449 		}
15450 		xp->xb_retry_count++;
15451 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15452 		    "sd_retry_command: retry count:%d\n", xp->xb_retry_count);
15453 		break;
15454 
15455 	case SD_RETRIES_NOCHECK:
15456 	default:
15457 		/* No retry count to check. Just proceed with the retry */
15458 		break;
15459 	}
15460 
15461 	xp->xb_pktp->pkt_flags |= FLAG_HEAD;
15462 
15463 	/*
15464 	 * If we were given a zero timeout, we must attempt to retry the
15465 	 * command immediately (ie, without a delay).
15466 	 */
15467 	if (retry_delay == 0) {
15468 		/*
15469 		 * Check some limiting conditions to see if we can actually
15470 		 * do the immediate retry.  If we cannot, then we must
15471 		 * fall back to queueing up a delayed retry.
15472 		 */
15473 		if (un->un_ncmds_in_transport >= un->un_throttle) {
15474 			/*
15475 			 * We are at the throttle limit for the target,
15476 			 * fall back to delayed retry.
15477 			 */
15478 			retry_delay = SD_BSY_TIMEOUT;
15479 			statp = kstat_waitq_enter;
15480 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15481 			    "sd_retry_command: immed. retry hit "
15482 			    "throttle!\n");
15483 		} else {
15484 			/*
15485 			 * We're clear to proceed with the immediate retry.
15486 			 * First call the user-provided function (if any)
15487 			 */
15488 			if (user_funcp != NULL) {
15489 				(*user_funcp)(un, bp, user_arg,
15490 				    SD_IMMEDIATE_RETRY_ISSUED);
15491 #ifdef __lock_lint
15492 				sd_print_incomplete_msg(un, bp, user_arg,
15493 				    SD_IMMEDIATE_RETRY_ISSUED);
15494 				sd_print_cmd_incomplete_msg(un, bp, user_arg,
15495 				    SD_IMMEDIATE_RETRY_ISSUED);
15496 				sd_print_sense_failed_msg(un, bp, user_arg,
15497 				    SD_IMMEDIATE_RETRY_ISSUED);
15498 #endif
15499 			}
15500 
15501 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15502 			    "sd_retry_command: issuing immediate retry\n");
15503 
15504 			/*
15505 			 * Call sd_start_cmds() to transport the command to
15506 			 * the target.
15507 			 */
15508 			sd_start_cmds(un, bp);
15509 
15510 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15511 			    "sd_retry_command exit\n");
15512 			return;
15513 		}
15514 	}
15515 
15516 	/*
15517 	 * Set up to retry the command after a delay.
15518 	 * First call the user-provided function (if any)
15519 	 */
15520 	if (user_funcp != NULL) {
15521 		(*user_funcp)(un, bp, user_arg, SD_DELAYED_RETRY_ISSUED);
15522 	}
15523 
15524 	sd_set_retry_bp(un, bp, retry_delay, statp);
15525 
15526 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_retry_command: exit\n");
15527 	return;
15528 
15529 fail_command:
15530 
15531 	if (user_funcp != NULL) {
15532 		(*user_funcp)(un, bp, user_arg, SD_NO_RETRY_ISSUED);
15533 	}
15534 
15535 fail_command_no_log:
15536 
15537 	SD_INFO(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15538 	    "sd_retry_command: returning failed command\n");
15539 
15540 	sd_return_failed_command(un, bp, failure_code);
15541 
15542 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_retry_command: exit\n");
15543 }
15544 
15545 
15546 /*
15547  *    Function: sd_set_retry_bp
15548  *
15549  * Description: Set up the given bp for retry.
15550  *
15551  *   Arguments: un - ptr to associated softstate
15552  *		bp - ptr to buf(9S) for the command
15553  *		retry_delay - time interval before issuing retry (may be 0)
15554  *		statp - optional pointer to kstat function
15555  *
15556  *     Context: May be called under interrupt context
15557  */
15558 
15559 static void
15560 sd_set_retry_bp(struct sd_lun *un, struct buf *bp, clock_t retry_delay,
15561 	void (*statp)(kstat_io_t *))
15562 {
15563 	ASSERT(un != NULL);
15564 	ASSERT(mutex_owned(SD_MUTEX(un)));
15565 	ASSERT(bp != NULL);
15566 
15567 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
15568 	    "sd_set_retry_bp: entry: un:0x%p bp:0x%p\n", un, bp);
15569 
15570 	/*
15571 	 * Indicate that the command is being retried. This will not allow any
15572 	 * other commands on the wait queue to be transported to the target
15573 	 * until this command has been completed (success or failure). The
15574 	 * "retry command" is not transported to the target until the given
15575 	 * time delay expires, unless the user specified a 0 retry_delay.
15576 	 *
15577 	 * Note: the timeout(9F) callback routine is what actually calls
15578 	 * sd_start_cmds() to transport the command, with the exception of a
15579 	 * zero retry_delay. The only current implementor of a zero retry delay
15580 	 * is the case where a START_STOP_UNIT is sent to spin-up a device.
15581 	 */
15582 	if (un->un_retry_bp == NULL) {
15583 		ASSERT(un->un_retry_statp == NULL);
15584 		un->un_retry_bp = bp;
15585 
15586 		/*
15587 		 * If the user has not specified a delay the command should
15588 		 * be queued and no timeout should be scheduled.
15589 		 */
15590 		if (retry_delay == 0) {
15591 			/*
15592 			 * Save the kstat pointer that will be used in the
15593 			 * call to SD_UPDATE_KSTATS() below, so that
15594 			 * sd_start_cmds() can correctly decrement the waitq
15595 			 * count when it is time to transport this command.
15596 			 */
15597 			un->un_retry_statp = statp;
15598 			goto done;
15599 		}
15600 	}
15601 
15602 	if (un->un_retry_bp == bp) {
15603 		/*
15604 		 * Save the kstat pointer that will be used in the call to
15605 		 * SD_UPDATE_KSTATS() below, so that sd_start_cmds() can
15606 		 * correctly decrement the waitq count when it is time to
15607 		 * transport this command.
15608 		 */
15609 		un->un_retry_statp = statp;
15610 
15611 		/*
15612 		 * Schedule a timeout if:
15613 		 *   1) The user has specified a delay.
15614 		 *   2) There is not a START_STOP_UNIT callback pending.
15615 		 *
15616 		 * If no delay has been specified, then it is up to the caller
15617 		 * to ensure that IO processing continues without stalling.
15618 		 * Effectively, this means that the caller will issue the
15619 		 * required call to sd_start_cmds(). The START_STOP_UNIT
15620 		 * callback does this after the START STOP UNIT command has
15621 		 * completed. In either of these cases we should not schedule
15622 		 * a timeout callback here.  Also don't schedule the timeout if
15623 		 * an SD_PATH_DIRECT_PRIORITY command is waiting to restart.
15624 		 */
15625 		if ((retry_delay != 0) && (un->un_startstop_timeid == NULL) &&
15626 		    (un->un_direct_priority_timeid == NULL)) {
15627 			un->un_retry_timeid =
15628 			    timeout(sd_start_retry_command, un, retry_delay);
15629 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15630 			    "sd_set_retry_bp: setting timeout: un: 0x%p"
15631 			    " bp:0x%p un_retry_timeid:0x%p\n",
15632 			    un, bp, un->un_retry_timeid);
15633 		}
15634 	} else {
15635 		/*
15636 		 * We only get in here if there is already another command
15637 		 * waiting to be retried.  In this case, we just put the
15638 		 * given command onto the wait queue, so it can be transported
15639 		 * after the current retry command has completed.
15640 		 *
15641 		 * Also we have to make sure that if the command at the head
15642 		 * of the wait queue is the un_failfast_bp, that we do not
15643 		 * put ahead of it any other commands that are to be retried.
15644 		 */
15645 		if ((un->un_failfast_bp != NULL) &&
15646 		    (un->un_failfast_bp == un->un_waitq_headp)) {
15647 			/*
15648 			 * Enqueue this command AFTER the first command on
15649 			 * the wait queue (which is also un_failfast_bp).
15650 			 */
15651 			bp->av_forw = un->un_waitq_headp->av_forw;
15652 			un->un_waitq_headp->av_forw = bp;
15653 			if (un->un_waitq_headp == un->un_waitq_tailp) {
15654 				un->un_waitq_tailp = bp;
15655 			}
15656 		} else {
15657 			/* Enqueue this command at the head of the waitq. */
15658 			bp->av_forw = un->un_waitq_headp;
15659 			un->un_waitq_headp = bp;
15660 			if (un->un_waitq_tailp == NULL) {
15661 				un->un_waitq_tailp = bp;
15662 			}
15663 		}
15664 
15665 		if (statp == NULL) {
15666 			statp = kstat_waitq_enter;
15667 		}
15668 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15669 		    "sd_set_retry_bp: un:0x%p already delayed retry\n", un);
15670 	}
15671 
15672 done:
15673 	if (statp != NULL) {
15674 		SD_UPDATE_KSTATS(un, statp, bp);
15675 	}
15676 
15677 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15678 	    "sd_set_retry_bp: exit un:0x%p\n", un);
15679 }
15680 
15681 
15682 /*
15683  *    Function: sd_start_retry_command
15684  *
15685  * Description: Start the command that has been waiting on the target's
15686  *		retry queue.  Called from timeout(9F) context after the
15687  *		retry delay interval has expired.
15688  *
15689  *   Arguments: arg - pointer to associated softstate for the device.
15690  *
15691  *     Context: timeout(9F) thread context.  May not sleep.
15692  */
15693 
15694 static void
15695 sd_start_retry_command(void *arg)
15696 {
15697 	struct sd_lun *un = arg;
15698 
15699 	ASSERT(un != NULL);
15700 	ASSERT(!mutex_owned(SD_MUTEX(un)));
15701 
15702 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15703 	    "sd_start_retry_command: entry\n");
15704 
15705 	mutex_enter(SD_MUTEX(un));
15706 
15707 	un->un_retry_timeid = NULL;
15708 
15709 	if (un->un_retry_bp != NULL) {
15710 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15711 		    "sd_start_retry_command: un:0x%p STARTING bp:0x%p\n",
15712 		    un, un->un_retry_bp);
15713 		sd_start_cmds(un, un->un_retry_bp);
15714 	}
15715 
15716 	mutex_exit(SD_MUTEX(un));
15717 
15718 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15719 	    "sd_start_retry_command: exit\n");
15720 }
15721 
15722 
15723 /*
15724  *    Function: sd_start_direct_priority_command
15725  *
15726  * Description: Used to re-start an SD_PATH_DIRECT_PRIORITY command that had
15727  *		received TRAN_BUSY when we called scsi_transport() to send it
15728  *		to the underlying HBA. This function is called from timeout(9F)
15729  *		context after the delay interval has expired.
15730  *
15731  *   Arguments: arg - pointer to associated buf(9S) to be restarted.
15732  *
15733  *     Context: timeout(9F) thread context.  May not sleep.
15734  */
15735 
15736 static void
15737 sd_start_direct_priority_command(void *arg)
15738 {
15739 	struct buf	*priority_bp = arg;
15740 	struct sd_lun	*un;
15741 
15742 	ASSERT(priority_bp != NULL);
15743 	un = SD_GET_UN(priority_bp);
15744 	ASSERT(un != NULL);
15745 	ASSERT(!mutex_owned(SD_MUTEX(un)));
15746 
15747 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15748 	    "sd_start_direct_priority_command: entry\n");
15749 
15750 	mutex_enter(SD_MUTEX(un));
15751 	un->un_direct_priority_timeid = NULL;
15752 	sd_start_cmds(un, priority_bp);
15753 	mutex_exit(SD_MUTEX(un));
15754 
15755 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15756 	    "sd_start_direct_priority_command: exit\n");
15757 }
15758 
15759 
15760 /*
15761  *    Function: sd_send_request_sense_command
15762  *
15763  * Description: Sends a REQUEST SENSE command to the target
15764  *
15765  *     Context: May be called from interrupt context.
15766  */
15767 
15768 static void
15769 sd_send_request_sense_command(struct sd_lun *un, struct buf *bp,
15770 	struct scsi_pkt *pktp)
15771 {
15772 	ASSERT(bp != NULL);
15773 	ASSERT(un != NULL);
15774 	ASSERT(mutex_owned(SD_MUTEX(un)));
15775 
15776 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sd_send_request_sense_command: "
15777 	    "entry: buf:0x%p\n", bp);
15778 
15779 	/*
15780 	 * If we are syncing or dumping, then fail the command to avoid a
15781 	 * recursive callback into scsi_transport(). Also fail the command
15782 	 * if we are suspended (legacy behavior).
15783 	 */
15784 	if (ddi_in_panic() || (un->un_state == SD_STATE_SUSPENDED) ||
15785 	    (un->un_state == SD_STATE_DUMPING)) {
15786 		sd_return_failed_command(un, bp, EIO);
15787 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15788 		    "sd_send_request_sense_command: syncing/dumping, exit\n");
15789 		return;
15790 	}
15791 
15792 	/*
15793 	 * Retry the failed command and don't issue the request sense if:
15794 	 *    1) the sense buf is busy
15795 	 *    2) we have 1 or more outstanding commands on the target
15796 	 *    (the sense data will be cleared or invalidated any way)
15797 	 *
15798 	 * Note: There could be an issue with not checking a retry limit here,
15799 	 * the problem is determining which retry limit to check.
15800 	 */
15801 	if ((un->un_sense_isbusy != 0) || (un->un_ncmds_in_transport > 0)) {
15802 		/* Don't retry if the command is flagged as non-retryable */
15803 		if ((pktp->pkt_flags & FLAG_DIAGNOSE) == 0) {
15804 			sd_retry_command(un, bp, SD_RETRIES_NOCHECK,
15805 			    NULL, NULL, 0, SD_BSY_TIMEOUT, kstat_waitq_enter);
15806 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15807 			    "sd_send_request_sense_command: "
15808 			    "at full throttle, retrying exit\n");
15809 		} else {
15810 			sd_return_failed_command(un, bp, EIO);
15811 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15812 			    "sd_send_request_sense_command: "
15813 			    "at full throttle, non-retryable exit\n");
15814 		}
15815 		return;
15816 	}
15817 
15818 	sd_mark_rqs_busy(un, bp);
15819 	sd_start_cmds(un, un->un_rqs_bp);
15820 
15821 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15822 	    "sd_send_request_sense_command: exit\n");
15823 }
15824 
15825 
15826 /*
15827  *    Function: sd_mark_rqs_busy
15828  *
15829  * Description: Indicate that the request sense bp for this instance is
15830  *		in use.
15831  *
15832  *     Context: May be called under interrupt context
15833  */
15834 
15835 static void
15836 sd_mark_rqs_busy(struct sd_lun *un, struct buf *bp)
15837 {
15838 	struct sd_xbuf	*sense_xp;
15839 
15840 	ASSERT(un != NULL);
15841 	ASSERT(bp != NULL);
15842 	ASSERT(mutex_owned(SD_MUTEX(un)));
15843 	ASSERT(un->un_sense_isbusy == 0);
15844 
15845 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_mark_rqs_busy: entry: "
15846 	    "buf:0x%p xp:0x%p un:0x%p\n", bp, SD_GET_XBUF(bp), un);
15847 
15848 	sense_xp = SD_GET_XBUF(un->un_rqs_bp);
15849 	ASSERT(sense_xp != NULL);
15850 
15851 	SD_INFO(SD_LOG_IO, un,
15852 	    "sd_mark_rqs_busy: entry: sense_xp:0x%p\n", sense_xp);
15853 
15854 	ASSERT(sense_xp->xb_pktp != NULL);
15855 	ASSERT((sense_xp->xb_pktp->pkt_flags & (FLAG_SENSING | FLAG_HEAD))
15856 	    == (FLAG_SENSING | FLAG_HEAD));
15857 
15858 	un->un_sense_isbusy = 1;
15859 	un->un_rqs_bp->b_resid = 0;
15860 	sense_xp->xb_pktp->pkt_resid  = 0;
15861 	sense_xp->xb_pktp->pkt_reason = 0;
15862 
15863 	/* So we can get back the bp at interrupt time! */
15864 	sense_xp->xb_sense_bp = bp;
15865 
15866 	bzero(un->un_rqs_bp->b_un.b_addr, SENSE_LENGTH);
15867 
15868 	/*
15869 	 * Mark this buf as awaiting sense data. (This is already set in
15870 	 * the pkt_flags for the RQS packet.)
15871 	 */
15872 	((SD_GET_XBUF(bp))->xb_pktp)->pkt_flags |= FLAG_SENSING;
15873 
15874 	sense_xp->xb_retry_count	= 0;
15875 	sense_xp->xb_victim_retry_count = 0;
15876 	sense_xp->xb_ua_retry_count	= 0;
15877 	sense_xp->xb_dma_resid  = 0;
15878 
15879 	/* Clean up the fields for auto-request sense */
15880 	sense_xp->xb_sense_status = 0;
15881 	sense_xp->xb_sense_state  = 0;
15882 	sense_xp->xb_sense_resid  = 0;
15883 	bzero(sense_xp->xb_sense_data, sizeof (sense_xp->xb_sense_data));
15884 
15885 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_mark_rqs_busy: exit\n");
15886 }
15887 
15888 
15889 /*
15890  *    Function: sd_mark_rqs_idle
15891  *
15892  * Description: SD_MUTEX must be held continuously through this routine
15893  *		to prevent reuse of the rqs struct before the caller can
15894  *		complete it's processing.
15895  *
15896  * Return Code: Pointer to the RQS buf
15897  *
15898  *     Context: May be called under interrupt context
15899  */
15900 
15901 static struct buf *
15902 sd_mark_rqs_idle(struct sd_lun *un, struct sd_xbuf *sense_xp)
15903 {
15904 	struct buf *bp;
15905 	ASSERT(un != NULL);
15906 	ASSERT(sense_xp != NULL);
15907 	ASSERT(mutex_owned(SD_MUTEX(un)));
15908 	ASSERT(un->un_sense_isbusy != 0);
15909 
15910 	un->un_sense_isbusy = 0;
15911 	bp = sense_xp->xb_sense_bp;
15912 	sense_xp->xb_sense_bp = NULL;
15913 
15914 	/* This pkt is no longer interested in getting sense data */
15915 	((SD_GET_XBUF(bp))->xb_pktp)->pkt_flags &= ~FLAG_SENSING;
15916 
15917 	return (bp);
15918 }
15919 
15920 
15921 
15922 /*
15923  *    Function: sd_alloc_rqs
15924  *
15925  * Description: Set up the unit to receive auto request sense data
15926  *
15927  * Return Code: DDI_SUCCESS or DDI_FAILURE
15928  *
15929  *     Context: Called under attach(9E) context
15930  */
15931 
15932 static int
15933 sd_alloc_rqs(struct scsi_device *devp, struct sd_lun *un)
15934 {
15935 	struct sd_xbuf *xp;
15936 
15937 	ASSERT(un != NULL);
15938 	ASSERT(!mutex_owned(SD_MUTEX(un)));
15939 	ASSERT(un->un_rqs_bp == NULL);
15940 	ASSERT(un->un_rqs_pktp == NULL);
15941 
15942 	/*
15943 	 * First allocate the required buf and scsi_pkt structs, then set up
15944 	 * the CDB in the scsi_pkt for a REQUEST SENSE command.
15945 	 */
15946 	un->un_rqs_bp = scsi_alloc_consistent_buf(&devp->sd_address, NULL,
15947 	    SENSE_LENGTH, B_READ, SLEEP_FUNC, NULL);
15948 	if (un->un_rqs_bp == NULL) {
15949 		return (DDI_FAILURE);
15950 	}
15951 
15952 	un->un_rqs_pktp = scsi_init_pkt(&devp->sd_address, NULL, un->un_rqs_bp,
15953 	    CDB_GROUP0, 1, 0, PKT_CONSISTENT, SLEEP_FUNC, NULL);
15954 
15955 	if (un->un_rqs_pktp == NULL) {
15956 		sd_free_rqs(un);
15957 		return (DDI_FAILURE);
15958 	}
15959 
15960 	/* Set up the CDB in the scsi_pkt for a REQUEST SENSE command. */
15961 	(void) scsi_setup_cdb((union scsi_cdb *)un->un_rqs_pktp->pkt_cdbp,
15962 	    SCMD_REQUEST_SENSE, 0, SENSE_LENGTH, 0);
15963 
15964 	SD_FILL_SCSI1_LUN(un, un->un_rqs_pktp);
15965 
15966 	/* Set up the other needed members in the ARQ scsi_pkt. */
15967 	un->un_rqs_pktp->pkt_comp   = sdintr;
15968 	un->un_rqs_pktp->pkt_time   = sd_io_time;
15969 	un->un_rqs_pktp->pkt_flags |=
15970 	    (FLAG_SENSING | FLAG_HEAD);	/* (1222170) */
15971 
15972 	/*
15973 	 * Allocate  & init the sd_xbuf struct for the RQS command. Do not
15974 	 * provide any intpkt, destroypkt routines as we take care of
15975 	 * scsi_pkt allocation/freeing here and in sd_free_rqs().
15976 	 */
15977 	xp = kmem_alloc(sizeof (struct sd_xbuf), KM_SLEEP);
15978 	sd_xbuf_init(un, un->un_rqs_bp, xp, SD_CHAIN_NULL, NULL);
15979 	xp->xb_pktp = un->un_rqs_pktp;
15980 	SD_INFO(SD_LOG_ATTACH_DETACH, un,
15981 	    "sd_alloc_rqs: un 0x%p, rqs  xp 0x%p,  pkt 0x%p,  buf 0x%p\n",
15982 	    un, xp, un->un_rqs_pktp, un->un_rqs_bp);
15983 
15984 	/*
15985 	 * Save the pointer to the request sense private bp so it can
15986 	 * be retrieved in sdintr.
15987 	 */
15988 	un->un_rqs_pktp->pkt_private = un->un_rqs_bp;
15989 	ASSERT(un->un_rqs_bp->b_private == xp);
15990 
15991 	/*
15992 	 * See if the HBA supports auto-request sense for the specified
15993 	 * target/lun. If it does, then try to enable it (if not already
15994 	 * enabled).
15995 	 *
15996 	 * Note: For some HBAs (ifp & sf), scsi_ifsetcap will always return
15997 	 * failure, while for other HBAs (pln) scsi_ifsetcap will always
15998 	 * return success.  However, in both of these cases ARQ is always
15999 	 * enabled and scsi_ifgetcap will always return true. The best approach
16000 	 * is to issue the scsi_ifgetcap() first, then try the scsi_ifsetcap().
16001 	 *
16002 	 * The 3rd case is the HBA (adp) always return enabled on
16003 	 * scsi_ifgetgetcap even when it's not enable, the best approach
16004 	 * is issue a scsi_ifsetcap then a scsi_ifgetcap
16005 	 * Note: this case is to circumvent the Adaptec bug. (x86 only)
16006 	 */
16007 
16008 	if (un->un_f_is_fibre == TRUE) {
16009 		un->un_f_arq_enabled = TRUE;
16010 	} else {
16011 #if defined(__i386) || defined(__amd64)
16012 		/*
16013 		 * Circumvent the Adaptec bug, remove this code when
16014 		 * the bug is fixed
16015 		 */
16016 		(void) scsi_ifsetcap(SD_ADDRESS(un), "auto-rqsense", 1, 1);
16017 #endif
16018 		switch (scsi_ifgetcap(SD_ADDRESS(un), "auto-rqsense", 1)) {
16019 		case 0:
16020 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
16021 				"sd_alloc_rqs: HBA supports ARQ\n");
16022 			/*
16023 			 * ARQ is supported by this HBA but currently is not
16024 			 * enabled. Attempt to enable it and if successful then
16025 			 * mark this instance as ARQ enabled.
16026 			 */
16027 			if (scsi_ifsetcap(SD_ADDRESS(un), "auto-rqsense", 1, 1)
16028 				== 1) {
16029 				/* Successfully enabled ARQ in the HBA */
16030 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
16031 					"sd_alloc_rqs: ARQ enabled\n");
16032 				un->un_f_arq_enabled = TRUE;
16033 			} else {
16034 				/* Could not enable ARQ in the HBA */
16035 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
16036 				"sd_alloc_rqs: failed ARQ enable\n");
16037 				un->un_f_arq_enabled = FALSE;
16038 			}
16039 			break;
16040 		case 1:
16041 			/*
16042 			 * ARQ is supported by this HBA and is already enabled.
16043 			 * Just mark ARQ as enabled for this instance.
16044 			 */
16045 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
16046 				"sd_alloc_rqs: ARQ already enabled\n");
16047 			un->un_f_arq_enabled = TRUE;
16048 			break;
16049 		default:
16050 			/*
16051 			 * ARQ is not supported by this HBA; disable it for this
16052 			 * instance.
16053 			 */
16054 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
16055 				"sd_alloc_rqs: HBA does not support ARQ\n");
16056 			un->un_f_arq_enabled = FALSE;
16057 			break;
16058 		}
16059 	}
16060 
16061 	return (DDI_SUCCESS);
16062 }
16063 
16064 
16065 /*
16066  *    Function: sd_free_rqs
16067  *
16068  * Description: Cleanup for the pre-instance RQS command.
16069  *
16070  *     Context: Kernel thread context
16071  */
16072 
16073 static void
16074 sd_free_rqs(struct sd_lun *un)
16075 {
16076 	ASSERT(un != NULL);
16077 
16078 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_free_rqs: entry\n");
16079 
16080 	/*
16081 	 * If consistent memory is bound to a scsi_pkt, the pkt
16082 	 * has to be destroyed *before* freeing the consistent memory.
16083 	 * Don't change the sequence of this operations.
16084 	 * scsi_destroy_pkt() might access memory, which isn't allowed,
16085 	 * after it was freed in scsi_free_consistent_buf().
16086 	 */
16087 	if (un->un_rqs_pktp != NULL) {
16088 		scsi_destroy_pkt(un->un_rqs_pktp);
16089 		un->un_rqs_pktp = NULL;
16090 	}
16091 
16092 	if (un->un_rqs_bp != NULL) {
16093 		kmem_free(SD_GET_XBUF(un->un_rqs_bp), sizeof (struct sd_xbuf));
16094 		scsi_free_consistent_buf(un->un_rqs_bp);
16095 		un->un_rqs_bp = NULL;
16096 	}
16097 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_free_rqs: exit\n");
16098 }
16099 
16100 
16101 
16102 /*
16103  *    Function: sd_reduce_throttle
16104  *
16105  * Description: Reduces the maximun # of outstanding commands on a
16106  *		target to the current number of outstanding commands.
16107  *		Queues a tiemout(9F) callback to restore the limit
16108  *		after a specified interval has elapsed.
16109  *		Typically used when we get a TRAN_BUSY return code
16110  *		back from scsi_transport().
16111  *
16112  *   Arguments: un - ptr to the sd_lun softstate struct
16113  *		throttle_type: SD_THROTTLE_TRAN_BUSY or SD_THROTTLE_QFULL
16114  *
16115  *     Context: May be called from interrupt context
16116  */
16117 
16118 static void
16119 sd_reduce_throttle(struct sd_lun *un, int throttle_type)
16120 {
16121 	ASSERT(un != NULL);
16122 	ASSERT(mutex_owned(SD_MUTEX(un)));
16123 	ASSERT(un->un_ncmds_in_transport >= 0);
16124 
16125 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_reduce_throttle: "
16126 	    "entry: un:0x%p un_throttle:%d un_ncmds_in_transport:%d\n",
16127 	    un, un->un_throttle, un->un_ncmds_in_transport);
16128 
16129 	if (un->un_throttle > 1) {
16130 		if (un->un_f_use_adaptive_throttle == TRUE) {
16131 			switch (throttle_type) {
16132 			case SD_THROTTLE_TRAN_BUSY:
16133 				if (un->un_busy_throttle == 0) {
16134 					un->un_busy_throttle = un->un_throttle;
16135 				}
16136 				break;
16137 			case SD_THROTTLE_QFULL:
16138 				un->un_busy_throttle = 0;
16139 				break;
16140 			default:
16141 				ASSERT(FALSE);
16142 			}
16143 
16144 			if (un->un_ncmds_in_transport > 0) {
16145 			    un->un_throttle = un->un_ncmds_in_transport;
16146 			}
16147 
16148 		} else {
16149 			if (un->un_ncmds_in_transport == 0) {
16150 				un->un_throttle = 1;
16151 			} else {
16152 				un->un_throttle = un->un_ncmds_in_transport;
16153 			}
16154 		}
16155 	}
16156 
16157 	/* Reschedule the timeout if none is currently active */
16158 	if (un->un_reset_throttle_timeid == NULL) {
16159 		un->un_reset_throttle_timeid = timeout(sd_restore_throttle,
16160 		    un, SD_THROTTLE_RESET_INTERVAL);
16161 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16162 		    "sd_reduce_throttle: timeout scheduled!\n");
16163 	}
16164 
16165 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_reduce_throttle: "
16166 	    "exit: un:0x%p un_throttle:%d\n", un, un->un_throttle);
16167 }
16168 
16169 
16170 
16171 /*
16172  *    Function: sd_restore_throttle
16173  *
16174  * Description: Callback function for timeout(9F).  Resets the current
16175  *		value of un->un_throttle to its default.
16176  *
16177  *   Arguments: arg - pointer to associated softstate for the device.
16178  *
16179  *     Context: May be called from interrupt context
16180  */
16181 
16182 static void
16183 sd_restore_throttle(void *arg)
16184 {
16185 	struct sd_lun	*un = arg;
16186 
16187 	ASSERT(un != NULL);
16188 	ASSERT(!mutex_owned(SD_MUTEX(un)));
16189 
16190 	mutex_enter(SD_MUTEX(un));
16191 
16192 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sd_restore_throttle: "
16193 	    "entry: un:0x%p un_throttle:%d\n", un, un->un_throttle);
16194 
16195 	un->un_reset_throttle_timeid = NULL;
16196 
16197 	if (un->un_f_use_adaptive_throttle == TRUE) {
16198 		/*
16199 		 * If un_busy_throttle is nonzero, then it contains the
16200 		 * value that un_throttle was when we got a TRAN_BUSY back
16201 		 * from scsi_transport(). We want to revert back to this
16202 		 * value.
16203 		 *
16204 		 * In the QFULL case, the throttle limit will incrementally
16205 		 * increase until it reaches max throttle.
16206 		 */
16207 		if (un->un_busy_throttle > 0) {
16208 			un->un_throttle = un->un_busy_throttle;
16209 			un->un_busy_throttle = 0;
16210 		} else {
16211 			/*
16212 			 * increase throttle by 10% open gate slowly, schedule
16213 			 * another restore if saved throttle has not been
16214 			 * reached
16215 			 */
16216 			short throttle;
16217 			if (sd_qfull_throttle_enable) {
16218 				throttle = un->un_throttle +
16219 				    max((un->un_throttle / 10), 1);
16220 				un->un_throttle =
16221 				    (throttle < un->un_saved_throttle) ?
16222 				    throttle : un->un_saved_throttle;
16223 				if (un->un_throttle < un->un_saved_throttle) {
16224 				    un->un_reset_throttle_timeid =
16225 					timeout(sd_restore_throttle,
16226 					un, SD_QFULL_THROTTLE_RESET_INTERVAL);
16227 				}
16228 			}
16229 		}
16230 
16231 		/*
16232 		 * If un_throttle has fallen below the low-water mark, we
16233 		 * restore the maximum value here (and allow it to ratchet
16234 		 * down again if necessary).
16235 		 */
16236 		if (un->un_throttle < un->un_min_throttle) {
16237 			un->un_throttle = un->un_saved_throttle;
16238 		}
16239 	} else {
16240 		SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sd_restore_throttle: "
16241 		    "restoring limit from 0x%x to 0x%x\n",
16242 		    un->un_throttle, un->un_saved_throttle);
16243 		un->un_throttle = un->un_saved_throttle;
16244 	}
16245 
16246 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
16247 	    "sd_restore_throttle: calling sd_start_cmds!\n");
16248 
16249 	sd_start_cmds(un, NULL);
16250 
16251 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
16252 	    "sd_restore_throttle: exit: un:0x%p un_throttle:%d\n",
16253 	    un, un->un_throttle);
16254 
16255 	mutex_exit(SD_MUTEX(un));
16256 
16257 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sd_restore_throttle: exit\n");
16258 }
16259 
16260 /*
16261  *    Function: sdrunout
16262  *
16263  * Description: Callback routine for scsi_init_pkt when a resource allocation
16264  *		fails.
16265  *
16266  *   Arguments: arg - a pointer to the sd_lun unit struct for the particular
16267  *		soft state instance.
16268  *
16269  * Return Code: The scsi_init_pkt routine allows for the callback function to
16270  *		return a 0 indicating the callback should be rescheduled or a 1
16271  *		indicating not to reschedule. This routine always returns 1
16272  *		because the driver always provides a callback function to
16273  *		scsi_init_pkt. This results in a callback always being scheduled
16274  *		(via the scsi_init_pkt callback implementation) if a resource
16275  *		failure occurs.
16276  *
16277  *     Context: This callback function may not block or call routines that block
16278  *
16279  *        Note: Using the scsi_init_pkt callback facility can result in an I/O
16280  *		request persisting at the head of the list which cannot be
16281  *		satisfied even after multiple retries. In the future the driver
16282  *		may implement some time of maximum runout count before failing
16283  *		an I/O.
16284  */
16285 
16286 static int
16287 sdrunout(caddr_t arg)
16288 {
16289 	struct sd_lun	*un = (struct sd_lun *)arg;
16290 
16291 	ASSERT(un != NULL);
16292 	ASSERT(!mutex_owned(SD_MUTEX(un)));
16293 
16294 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sdrunout: entry\n");
16295 
16296 	mutex_enter(SD_MUTEX(un));
16297 	sd_start_cmds(un, NULL);
16298 	mutex_exit(SD_MUTEX(un));
16299 	/*
16300 	 * This callback routine always returns 1 (i.e. do not reschedule)
16301 	 * because we always specify sdrunout as the callback handler for
16302 	 * scsi_init_pkt inside the call to sd_start_cmds.
16303 	 */
16304 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sdrunout: exit\n");
16305 	return (1);
16306 }
16307 
16308 
16309 /*
16310  *    Function: sdintr
16311  *
16312  * Description: Completion callback routine for scsi_pkt(9S) structs
16313  *		sent to the HBA driver via scsi_transport(9F).
16314  *
16315  *     Context: Interrupt context
16316  */
16317 
16318 static void
16319 sdintr(struct scsi_pkt *pktp)
16320 {
16321 	struct buf	*bp;
16322 	struct sd_xbuf	*xp;
16323 	struct sd_lun	*un;
16324 
16325 	ASSERT(pktp != NULL);
16326 	bp = (struct buf *)pktp->pkt_private;
16327 	ASSERT(bp != NULL);
16328 	xp = SD_GET_XBUF(bp);
16329 	ASSERT(xp != NULL);
16330 	ASSERT(xp->xb_pktp != NULL);
16331 	un = SD_GET_UN(bp);
16332 	ASSERT(un != NULL);
16333 	ASSERT(!mutex_owned(SD_MUTEX(un)));
16334 
16335 #ifdef SD_FAULT_INJECTION
16336 
16337 	SD_INFO(SD_LOG_IOERR, un, "sdintr: sdintr calling Fault injection\n");
16338 	/* SD FaultInjection */
16339 	sd_faultinjection(pktp);
16340 
16341 #endif /* SD_FAULT_INJECTION */
16342 
16343 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sdintr: entry: buf:0x%p,"
16344 	    " xp:0x%p, un:0x%p\n", bp, xp, un);
16345 
16346 	mutex_enter(SD_MUTEX(un));
16347 
16348 	/* Reduce the count of the #commands currently in transport */
16349 	un->un_ncmds_in_transport--;
16350 	ASSERT(un->un_ncmds_in_transport >= 0);
16351 
16352 	/* Increment counter to indicate that the callback routine is active */
16353 	un->un_in_callback++;
16354 
16355 	SD_UPDATE_KSTATS(un, kstat_runq_exit, bp);
16356 
16357 #ifdef	SDDEBUG
16358 	if (bp == un->un_retry_bp) {
16359 		SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sdintr: "
16360 		    "un:0x%p: GOT retry_bp:0x%p un_ncmds_in_transport:%d\n",
16361 		    un, un->un_retry_bp, un->un_ncmds_in_transport);
16362 	}
16363 #endif
16364 
16365 	/*
16366 	 * If pkt_reason is CMD_DEV_GONE, just fail the command
16367 	 */
16368 	if (pktp->pkt_reason == CMD_DEV_GONE) {
16369 		scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
16370 			    "Device is gone\n");
16371 		sd_return_failed_command(un, bp, EIO);
16372 		goto exit;
16373 	}
16374 
16375 	/*
16376 	 * First see if the pkt has auto-request sense data with it....
16377 	 * Look at the packet state first so we don't take a performance
16378 	 * hit looking at the arq enabled flag unless absolutely necessary.
16379 	 */
16380 	if ((pktp->pkt_state & STATE_ARQ_DONE) &&
16381 	    (un->un_f_arq_enabled == TRUE)) {
16382 		/*
16383 		 * The HBA did an auto request sense for this command so check
16384 		 * for FLAG_DIAGNOSE. If set this indicates a uscsi or internal
16385 		 * driver command that should not be retried.
16386 		 */
16387 		if ((pktp->pkt_flags & FLAG_DIAGNOSE) != 0) {
16388 			/*
16389 			 * Save the relevant sense info into the xp for the
16390 			 * original cmd.
16391 			 */
16392 			struct scsi_arq_status *asp;
16393 			asp = (struct scsi_arq_status *)(pktp->pkt_scbp);
16394 			xp->xb_sense_status =
16395 			    *((uchar_t *)(&(asp->sts_rqpkt_status)));
16396 			xp->xb_sense_state  = asp->sts_rqpkt_state;
16397 			xp->xb_sense_resid  = asp->sts_rqpkt_resid;
16398 			bcopy(&asp->sts_sensedata, xp->xb_sense_data,
16399 			    min(sizeof (struct scsi_extended_sense),
16400 			    SENSE_LENGTH));
16401 
16402 			/* fail the command */
16403 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16404 			    "sdintr: arq done and FLAG_DIAGNOSE set\n");
16405 			sd_return_failed_command(un, bp, EIO);
16406 			goto exit;
16407 		}
16408 
16409 #if (defined(__i386) || defined(__amd64))	/* DMAFREE for x86 only */
16410 		/*
16411 		 * We want to either retry or fail this command, so free
16412 		 * the DMA resources here.  If we retry the command then
16413 		 * the DMA resources will be reallocated in sd_start_cmds().
16414 		 * Note that when PKT_DMA_PARTIAL is used, this reallocation
16415 		 * causes the *entire* transfer to start over again from the
16416 		 * beginning of the request, even for PARTIAL chunks that
16417 		 * have already transferred successfully.
16418 		 */
16419 		if ((un->un_f_is_fibre == TRUE) &&
16420 		    ((xp->xb_pkt_flags & SD_XB_USCSICMD) == 0) &&
16421 		    ((pktp->pkt_flags & FLAG_SENSING) == 0))  {
16422 			scsi_dmafree(pktp);
16423 			xp->xb_pkt_flags |= SD_XB_DMA_FREED;
16424 		}
16425 #endif
16426 
16427 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16428 		    "sdintr: arq done, sd_handle_auto_request_sense\n");
16429 
16430 		sd_handle_auto_request_sense(un, bp, xp, pktp);
16431 		goto exit;
16432 	}
16433 
16434 	/* Next see if this is the REQUEST SENSE pkt for the instance */
16435 	if (pktp->pkt_flags & FLAG_SENSING)  {
16436 		/* This pktp is from the unit's REQUEST_SENSE command */
16437 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16438 		    "sdintr: sd_handle_request_sense\n");
16439 		sd_handle_request_sense(un, bp, xp, pktp);
16440 		goto exit;
16441 	}
16442 
16443 	/*
16444 	 * Check to see if the command successfully completed as requested;
16445 	 * this is the most common case (and also the hot performance path).
16446 	 *
16447 	 * Requirements for successful completion are:
16448 	 * pkt_reason is CMD_CMPLT and packet status is status good.
16449 	 * In addition:
16450 	 * - A residual of zero indicates successful completion no matter what
16451 	 *   the command is.
16452 	 * - If the residual is not zero and the command is not a read or
16453 	 *   write, then it's still defined as successful completion. In other
16454 	 *   words, if the command is a read or write the residual must be
16455 	 *   zero for successful completion.
16456 	 * - If the residual is not zero and the command is a read or
16457 	 *   write, and it's a USCSICMD, then it's still defined as
16458 	 *   successful completion.
16459 	 */
16460 	if ((pktp->pkt_reason == CMD_CMPLT) &&
16461 	    (SD_GET_PKT_STATUS(pktp) == STATUS_GOOD)) {
16462 
16463 		/*
16464 		 * Since this command is returned with a good status, we
16465 		 * can reset the count for Sonoma failover.
16466 		 */
16467 		un->un_sonoma_failure_count = 0;
16468 
16469 		/*
16470 		 * Return all USCSI commands on good status
16471 		 */
16472 		if (pktp->pkt_resid == 0) {
16473 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16474 			    "sdintr: returning command for resid == 0\n");
16475 		} else if (((SD_GET_PKT_OPCODE(pktp) & 0x1F) != SCMD_READ) &&
16476 		    ((SD_GET_PKT_OPCODE(pktp) & 0x1F) != SCMD_WRITE)) {
16477 			SD_UPDATE_B_RESID(bp, pktp);
16478 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16479 			    "sdintr: returning command for resid != 0\n");
16480 		} else if (xp->xb_pkt_flags & SD_XB_USCSICMD) {
16481 			SD_UPDATE_B_RESID(bp, pktp);
16482 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16483 				"sdintr: returning uscsi command\n");
16484 		} else {
16485 			goto not_successful;
16486 		}
16487 		sd_return_command(un, bp);
16488 
16489 		/*
16490 		 * Decrement counter to indicate that the callback routine
16491 		 * is done.
16492 		 */
16493 		un->un_in_callback--;
16494 		ASSERT(un->un_in_callback >= 0);
16495 		mutex_exit(SD_MUTEX(un));
16496 
16497 		return;
16498 	}
16499 
16500 not_successful:
16501 
16502 #if (defined(__i386) || defined(__amd64))	/* DMAFREE for x86 only */
16503 	/*
16504 	 * The following is based upon knowledge of the underlying transport
16505 	 * and its use of DMA resources.  This code should be removed when
16506 	 * PKT_DMA_PARTIAL support is taken out of the disk driver in favor
16507 	 * of the new PKT_CMD_BREAKUP protocol. See also sd_initpkt_for_buf()
16508 	 * and sd_start_cmds().
16509 	 *
16510 	 * Free any DMA resources associated with this command if there
16511 	 * is a chance it could be retried or enqueued for later retry.
16512 	 * If we keep the DMA binding then mpxio cannot reissue the
16513 	 * command on another path whenever a path failure occurs.
16514 	 *
16515 	 * Note that when PKT_DMA_PARTIAL is used, free/reallocation
16516 	 * causes the *entire* transfer to start over again from the
16517 	 * beginning of the request, even for PARTIAL chunks that
16518 	 * have already transferred successfully.
16519 	 *
16520 	 * This is only done for non-uscsi commands (and also skipped for the
16521 	 * driver's internal RQS command). Also just do this for Fibre Channel
16522 	 * devices as these are the only ones that support mpxio.
16523 	 */
16524 	if ((un->un_f_is_fibre == TRUE) &&
16525 	    ((xp->xb_pkt_flags & SD_XB_USCSICMD) == 0) &&
16526 	    ((pktp->pkt_flags & FLAG_SENSING) == 0))  {
16527 		scsi_dmafree(pktp);
16528 		xp->xb_pkt_flags |= SD_XB_DMA_FREED;
16529 	}
16530 #endif
16531 
16532 	/*
16533 	 * The command did not successfully complete as requested so check
16534 	 * for FLAG_DIAGNOSE. If set this indicates a uscsi or internal
16535 	 * driver command that should not be retried so just return. If
16536 	 * FLAG_DIAGNOSE is not set the error will be processed below.
16537 	 */
16538 	if ((pktp->pkt_flags & FLAG_DIAGNOSE) != 0) {
16539 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16540 		    "sdintr: FLAG_DIAGNOSE: sd_return_failed_command\n");
16541 		/*
16542 		 * Issue a request sense if a check condition caused the error
16543 		 * (we handle the auto request sense case above), otherwise
16544 		 * just fail the command.
16545 		 */
16546 		if ((pktp->pkt_reason == CMD_CMPLT) &&
16547 		    (SD_GET_PKT_STATUS(pktp) == STATUS_CHECK)) {
16548 			sd_send_request_sense_command(un, bp, pktp);
16549 		} else {
16550 			sd_return_failed_command(un, bp, EIO);
16551 		}
16552 		goto exit;
16553 	}
16554 
16555 	/*
16556 	 * The command did not successfully complete as requested so process
16557 	 * the error, retry, and/or attempt recovery.
16558 	 */
16559 	switch (pktp->pkt_reason) {
16560 	case CMD_CMPLT:
16561 		switch (SD_GET_PKT_STATUS(pktp)) {
16562 		case STATUS_GOOD:
16563 			/*
16564 			 * The command completed successfully with a non-zero
16565 			 * residual
16566 			 */
16567 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16568 			    "sdintr: STATUS_GOOD \n");
16569 			sd_pkt_status_good(un, bp, xp, pktp);
16570 			break;
16571 
16572 		case STATUS_CHECK:
16573 		case STATUS_TERMINATED:
16574 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16575 			    "sdintr: STATUS_TERMINATED | STATUS_CHECK\n");
16576 			sd_pkt_status_check_condition(un, bp, xp, pktp);
16577 			break;
16578 
16579 		case STATUS_BUSY:
16580 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16581 			    "sdintr: STATUS_BUSY\n");
16582 			sd_pkt_status_busy(un, bp, xp, pktp);
16583 			break;
16584 
16585 		case STATUS_RESERVATION_CONFLICT:
16586 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16587 			    "sdintr: STATUS_RESERVATION_CONFLICT\n");
16588 			sd_pkt_status_reservation_conflict(un, bp, xp, pktp);
16589 			break;
16590 
16591 		case STATUS_QFULL:
16592 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16593 			    "sdintr: STATUS_QFULL\n");
16594 			sd_pkt_status_qfull(un, bp, xp, pktp);
16595 			break;
16596 
16597 		case STATUS_MET:
16598 		case STATUS_INTERMEDIATE:
16599 		case STATUS_SCSI2:
16600 		case STATUS_INTERMEDIATE_MET:
16601 		case STATUS_ACA_ACTIVE:
16602 			scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
16603 			    "Unexpected SCSI status received: 0x%x\n",
16604 			    SD_GET_PKT_STATUS(pktp));
16605 			sd_return_failed_command(un, bp, EIO);
16606 			break;
16607 
16608 		default:
16609 			scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
16610 			    "Invalid SCSI status received: 0x%x\n",
16611 			    SD_GET_PKT_STATUS(pktp));
16612 			sd_return_failed_command(un, bp, EIO);
16613 			break;
16614 
16615 		}
16616 		break;
16617 
16618 	case CMD_INCOMPLETE:
16619 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16620 		    "sdintr:  CMD_INCOMPLETE\n");
16621 		sd_pkt_reason_cmd_incomplete(un, bp, xp, pktp);
16622 		break;
16623 	case CMD_TRAN_ERR:
16624 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16625 		    "sdintr: CMD_TRAN_ERR\n");
16626 		sd_pkt_reason_cmd_tran_err(un, bp, xp, pktp);
16627 		break;
16628 	case CMD_RESET:
16629 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16630 		    "sdintr: CMD_RESET \n");
16631 		sd_pkt_reason_cmd_reset(un, bp, xp, pktp);
16632 		break;
16633 	case CMD_ABORTED:
16634 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16635 		    "sdintr: CMD_ABORTED \n");
16636 		sd_pkt_reason_cmd_aborted(un, bp, xp, pktp);
16637 		break;
16638 	case CMD_TIMEOUT:
16639 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16640 		    "sdintr: CMD_TIMEOUT\n");
16641 		sd_pkt_reason_cmd_timeout(un, bp, xp, pktp);
16642 		break;
16643 	case CMD_UNX_BUS_FREE:
16644 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16645 		    "sdintr: CMD_UNX_BUS_FREE \n");
16646 		sd_pkt_reason_cmd_unx_bus_free(un, bp, xp, pktp);
16647 		break;
16648 	case CMD_TAG_REJECT:
16649 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16650 		    "sdintr: CMD_TAG_REJECT\n");
16651 		sd_pkt_reason_cmd_tag_reject(un, bp, xp, pktp);
16652 		break;
16653 	default:
16654 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16655 		    "sdintr: default\n");
16656 		sd_pkt_reason_default(un, bp, xp, pktp);
16657 		break;
16658 	}
16659 
16660 exit:
16661 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sdintr: exit\n");
16662 
16663 	/* Decrement counter to indicate that the callback routine is done. */
16664 	un->un_in_callback--;
16665 	ASSERT(un->un_in_callback >= 0);
16666 
16667 	/*
16668 	 * At this point, the pkt has been dispatched, ie, it is either
16669 	 * being re-tried or has been returned to its caller and should
16670 	 * not be referenced.
16671 	 */
16672 
16673 	mutex_exit(SD_MUTEX(un));
16674 }
16675 
16676 
16677 /*
16678  *    Function: sd_print_incomplete_msg
16679  *
16680  * Description: Prints the error message for a CMD_INCOMPLETE error.
16681  *
16682  *   Arguments: un - ptr to associated softstate for the device.
16683  *		bp - ptr to the buf(9S) for the command.
16684  *		arg - message string ptr
16685  *		code - SD_DELAYED_RETRY_ISSUED, SD_IMMEDIATE_RETRY_ISSUED,
16686  *			or SD_NO_RETRY_ISSUED.
16687  *
16688  *     Context: May be called under interrupt context
16689  */
16690 
16691 static void
16692 sd_print_incomplete_msg(struct sd_lun *un, struct buf *bp, void *arg, int code)
16693 {
16694 	struct scsi_pkt	*pktp;
16695 	char	*msgp;
16696 	char	*cmdp = arg;
16697 
16698 	ASSERT(un != NULL);
16699 	ASSERT(mutex_owned(SD_MUTEX(un)));
16700 	ASSERT(bp != NULL);
16701 	ASSERT(arg != NULL);
16702 	pktp = SD_GET_PKTP(bp);
16703 	ASSERT(pktp != NULL);
16704 
16705 	switch (code) {
16706 	case SD_DELAYED_RETRY_ISSUED:
16707 	case SD_IMMEDIATE_RETRY_ISSUED:
16708 		msgp = "retrying";
16709 		break;
16710 	case SD_NO_RETRY_ISSUED:
16711 	default:
16712 		msgp = "giving up";
16713 		break;
16714 	}
16715 
16716 	if ((pktp->pkt_flags & FLAG_SILENT) == 0) {
16717 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
16718 		    "incomplete %s- %s\n", cmdp, msgp);
16719 	}
16720 }
16721 
16722 
16723 
16724 /*
16725  *    Function: sd_pkt_status_good
16726  *
16727  * Description: Processing for a STATUS_GOOD code in pkt_status.
16728  *
16729  *     Context: May be called under interrupt context
16730  */
16731 
16732 static void
16733 sd_pkt_status_good(struct sd_lun *un, struct buf *bp,
16734 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
16735 {
16736 	char	*cmdp;
16737 
16738 	ASSERT(un != NULL);
16739 	ASSERT(mutex_owned(SD_MUTEX(un)));
16740 	ASSERT(bp != NULL);
16741 	ASSERT(xp != NULL);
16742 	ASSERT(pktp != NULL);
16743 	ASSERT(pktp->pkt_reason == CMD_CMPLT);
16744 	ASSERT(SD_GET_PKT_STATUS(pktp) == STATUS_GOOD);
16745 	ASSERT(pktp->pkt_resid != 0);
16746 
16747 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_pkt_status_good: entry\n");
16748 
16749 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
16750 	switch (SD_GET_PKT_OPCODE(pktp) & 0x1F) {
16751 	case SCMD_READ:
16752 		cmdp = "read";
16753 		break;
16754 	case SCMD_WRITE:
16755 		cmdp = "write";
16756 		break;
16757 	default:
16758 		SD_UPDATE_B_RESID(bp, pktp);
16759 		sd_return_command(un, bp);
16760 		SD_TRACE(SD_LOG_IO_CORE, un, "sd_pkt_status_good: exit\n");
16761 		return;
16762 	}
16763 
16764 	/*
16765 	 * See if we can retry the read/write, preferrably immediately.
16766 	 * If retries are exhaused, then sd_retry_command() will update
16767 	 * the b_resid count.
16768 	 */
16769 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_incomplete_msg,
16770 	    cmdp, EIO, (clock_t)0, NULL);
16771 
16772 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_pkt_status_good: exit\n");
16773 }
16774 
16775 
16776 
16777 
16778 
16779 /*
16780  *    Function: sd_handle_request_sense
16781  *
16782  * Description: Processing for non-auto Request Sense command.
16783  *
16784  *   Arguments: un - ptr to associated softstate
16785  *		sense_bp - ptr to buf(9S) for the RQS command
16786  *		sense_xp - ptr to the sd_xbuf for the RQS command
16787  *		sense_pktp - ptr to the scsi_pkt(9S) for the RQS command
16788  *
16789  *     Context: May be called under interrupt context
16790  */
16791 
16792 static void
16793 sd_handle_request_sense(struct sd_lun *un, struct buf *sense_bp,
16794 	struct sd_xbuf *sense_xp, struct scsi_pkt *sense_pktp)
16795 {
16796 	struct buf	*cmd_bp;	/* buf for the original command */
16797 	struct sd_xbuf	*cmd_xp;	/* sd_xbuf for the original command */
16798 	struct scsi_pkt *cmd_pktp;	/* pkt for the original command */
16799 
16800 	ASSERT(un != NULL);
16801 	ASSERT(mutex_owned(SD_MUTEX(un)));
16802 	ASSERT(sense_bp != NULL);
16803 	ASSERT(sense_xp != NULL);
16804 	ASSERT(sense_pktp != NULL);
16805 
16806 	/*
16807 	 * Note the sense_bp, sense_xp, and sense_pktp here are for the
16808 	 * RQS command and not the original command.
16809 	 */
16810 	ASSERT(sense_pktp == un->un_rqs_pktp);
16811 	ASSERT(sense_bp   == un->un_rqs_bp);
16812 	ASSERT((sense_pktp->pkt_flags & (FLAG_SENSING | FLAG_HEAD)) ==
16813 	    (FLAG_SENSING | FLAG_HEAD));
16814 	ASSERT((((SD_GET_XBUF(sense_xp->xb_sense_bp))->xb_pktp->pkt_flags) &
16815 	    FLAG_SENSING) == FLAG_SENSING);
16816 
16817 	/* These are the bp, xp, and pktp for the original command */
16818 	cmd_bp = sense_xp->xb_sense_bp;
16819 	cmd_xp = SD_GET_XBUF(cmd_bp);
16820 	cmd_pktp = SD_GET_PKTP(cmd_bp);
16821 
16822 	if (sense_pktp->pkt_reason != CMD_CMPLT) {
16823 		/*
16824 		 * The REQUEST SENSE command failed.  Release the REQUEST
16825 		 * SENSE command for re-use, get back the bp for the original
16826 		 * command, and attempt to re-try the original command if
16827 		 * FLAG_DIAGNOSE is not set in the original packet.
16828 		 */
16829 		SD_UPDATE_ERRSTATS(un, sd_harderrs);
16830 		if ((cmd_pktp->pkt_flags & FLAG_DIAGNOSE) == 0) {
16831 			cmd_bp = sd_mark_rqs_idle(un, sense_xp);
16832 			sd_retry_command(un, cmd_bp, SD_RETRIES_STANDARD,
16833 			    NULL, NULL, EIO, (clock_t)0, NULL);
16834 			return;
16835 		}
16836 	}
16837 
16838 	/*
16839 	 * Save the relevant sense info into the xp for the original cmd.
16840 	 *
16841 	 * Note: if the request sense failed the state info will be zero
16842 	 * as set in sd_mark_rqs_busy()
16843 	 */
16844 	cmd_xp->xb_sense_status = *(sense_pktp->pkt_scbp);
16845 	cmd_xp->xb_sense_state  = sense_pktp->pkt_state;
16846 	cmd_xp->xb_sense_resid  = sense_pktp->pkt_resid;
16847 	bcopy(sense_bp->b_un.b_addr, cmd_xp->xb_sense_data, SENSE_LENGTH);
16848 
16849 	/*
16850 	 *  Free up the RQS command....
16851 	 *  NOTE:
16852 	 *	Must do this BEFORE calling sd_validate_sense_data!
16853 	 *	sd_validate_sense_data may return the original command in
16854 	 *	which case the pkt will be freed and the flags can no
16855 	 *	longer be touched.
16856 	 *	SD_MUTEX is held through this process until the command
16857 	 *	is dispatched based upon the sense data, so there are
16858 	 *	no race conditions.
16859 	 */
16860 	(void) sd_mark_rqs_idle(un, sense_xp);
16861 
16862 	/*
16863 	 * For a retryable command see if we have valid sense data, if so then
16864 	 * turn it over to sd_decode_sense() to figure out the right course of
16865 	 * action. Just fail a non-retryable command.
16866 	 */
16867 	if ((cmd_pktp->pkt_flags & FLAG_DIAGNOSE) == 0) {
16868 		if (sd_validate_sense_data(un, cmd_bp, cmd_xp) ==
16869 		    SD_SENSE_DATA_IS_VALID) {
16870 			sd_decode_sense(un, cmd_bp, cmd_xp, cmd_pktp);
16871 		}
16872 	} else {
16873 		SD_DUMP_MEMORY(un, SD_LOG_IO_CORE, "Failed CDB",
16874 		    (uchar_t *)cmd_pktp->pkt_cdbp, CDB_SIZE, SD_LOG_HEX);
16875 		SD_DUMP_MEMORY(un, SD_LOG_IO_CORE, "Sense Data",
16876 		    (uchar_t *)cmd_xp->xb_sense_data, SENSE_LENGTH, SD_LOG_HEX);
16877 		sd_return_failed_command(un, cmd_bp, EIO);
16878 	}
16879 }
16880 
16881 
16882 
16883 
16884 /*
16885  *    Function: sd_handle_auto_request_sense
16886  *
16887  * Description: Processing for auto-request sense information.
16888  *
16889  *   Arguments: un - ptr to associated softstate
16890  *		bp - ptr to buf(9S) for the command
16891  *		xp - ptr to the sd_xbuf for the command
16892  *		pktp - ptr to the scsi_pkt(9S) for the command
16893  *
16894  *     Context: May be called under interrupt context
16895  */
16896 
16897 static void
16898 sd_handle_auto_request_sense(struct sd_lun *un, struct buf *bp,
16899 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
16900 {
16901 	struct scsi_arq_status *asp;
16902 
16903 	ASSERT(un != NULL);
16904 	ASSERT(mutex_owned(SD_MUTEX(un)));
16905 	ASSERT(bp != NULL);
16906 	ASSERT(xp != NULL);
16907 	ASSERT(pktp != NULL);
16908 	ASSERT(pktp != un->un_rqs_pktp);
16909 	ASSERT(bp   != un->un_rqs_bp);
16910 
16911 	/*
16912 	 * For auto-request sense, we get a scsi_arq_status back from
16913 	 * the HBA, with the sense data in the sts_sensedata member.
16914 	 * The pkt_scbp of the packet points to this scsi_arq_status.
16915 	 */
16916 	asp = (struct scsi_arq_status *)(pktp->pkt_scbp);
16917 
16918 	if (asp->sts_rqpkt_reason != CMD_CMPLT) {
16919 		/*
16920 		 * The auto REQUEST SENSE failed; see if we can re-try
16921 		 * the original command.
16922 		 */
16923 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
16924 		    "auto request sense failed (reason=%s)\n",
16925 		    scsi_rname(asp->sts_rqpkt_reason));
16926 
16927 		sd_reset_target(un, pktp);
16928 
16929 		sd_retry_command(un, bp, SD_RETRIES_STANDARD,
16930 		    NULL, NULL, EIO, (clock_t)0, NULL);
16931 		return;
16932 	}
16933 
16934 	/* Save the relevant sense info into the xp for the original cmd. */
16935 	xp->xb_sense_status = *((uchar_t *)(&(asp->sts_rqpkt_status)));
16936 	xp->xb_sense_state  = asp->sts_rqpkt_state;
16937 	xp->xb_sense_resid  = asp->sts_rqpkt_resid;
16938 	bcopy(&asp->sts_sensedata, xp->xb_sense_data,
16939 	    min(sizeof (struct scsi_extended_sense), SENSE_LENGTH));
16940 
16941 	/*
16942 	 * See if we have valid sense data, if so then turn it over to
16943 	 * sd_decode_sense() to figure out the right course of action.
16944 	 */
16945 	if (sd_validate_sense_data(un, bp, xp) == SD_SENSE_DATA_IS_VALID) {
16946 		sd_decode_sense(un, bp, xp, pktp);
16947 	}
16948 }
16949 
16950 
16951 /*
16952  *    Function: sd_print_sense_failed_msg
16953  *
16954  * Description: Print log message when RQS has failed.
16955  *
16956  *   Arguments: un - ptr to associated softstate
16957  *		bp - ptr to buf(9S) for the command
16958  *		arg - generic message string ptr
16959  *		code - SD_IMMEDIATE_RETRY_ISSUED, SD_DELAYED_RETRY_ISSUED,
16960  *			or SD_NO_RETRY_ISSUED
16961  *
16962  *     Context: May be called from interrupt context
16963  */
16964 
16965 static void
16966 sd_print_sense_failed_msg(struct sd_lun *un, struct buf *bp, void *arg,
16967 	int code)
16968 {
16969 	char	*msgp = arg;
16970 
16971 	ASSERT(un != NULL);
16972 	ASSERT(mutex_owned(SD_MUTEX(un)));
16973 	ASSERT(bp != NULL);
16974 
16975 	if ((code == SD_NO_RETRY_ISSUED) && (msgp != NULL)) {
16976 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN, msgp);
16977 	}
16978 }
16979 
16980 
16981 /*
16982  *    Function: sd_validate_sense_data
16983  *
16984  * Description: Check the given sense data for validity.
16985  *		If the sense data is not valid, the command will
16986  *		be either failed or retried!
16987  *
16988  * Return Code: SD_SENSE_DATA_IS_INVALID
16989  *		SD_SENSE_DATA_IS_VALID
16990  *
16991  *     Context: May be called from interrupt context
16992  */
16993 
16994 static int
16995 sd_validate_sense_data(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp)
16996 {
16997 	struct scsi_extended_sense *esp;
16998 	struct	scsi_pkt *pktp;
16999 	size_t	actual_len;
17000 	char	*msgp = NULL;
17001 
17002 	ASSERT(un != NULL);
17003 	ASSERT(mutex_owned(SD_MUTEX(un)));
17004 	ASSERT(bp != NULL);
17005 	ASSERT(bp != un->un_rqs_bp);
17006 	ASSERT(xp != NULL);
17007 
17008 	pktp = SD_GET_PKTP(bp);
17009 	ASSERT(pktp != NULL);
17010 
17011 	/*
17012 	 * Check the status of the RQS command (auto or manual).
17013 	 */
17014 	switch (xp->xb_sense_status & STATUS_MASK) {
17015 	case STATUS_GOOD:
17016 		break;
17017 
17018 	case STATUS_RESERVATION_CONFLICT:
17019 		sd_pkt_status_reservation_conflict(un, bp, xp, pktp);
17020 		return (SD_SENSE_DATA_IS_INVALID);
17021 
17022 	case STATUS_BUSY:
17023 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
17024 		    "Busy Status on REQUEST SENSE\n");
17025 		sd_retry_command(un, bp, SD_RETRIES_BUSY, NULL,
17026 		    NULL, EIO, SD_BSY_TIMEOUT / 500, kstat_waitq_enter);
17027 		return (SD_SENSE_DATA_IS_INVALID);
17028 
17029 	case STATUS_QFULL:
17030 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
17031 		    "QFULL Status on REQUEST SENSE\n");
17032 		sd_retry_command(un, bp, SD_RETRIES_STANDARD, NULL,
17033 		    NULL, EIO, SD_BSY_TIMEOUT / 500, kstat_waitq_enter);
17034 		return (SD_SENSE_DATA_IS_INVALID);
17035 
17036 	case STATUS_CHECK:
17037 	case STATUS_TERMINATED:
17038 		msgp = "Check Condition on REQUEST SENSE\n";
17039 		goto sense_failed;
17040 
17041 	default:
17042 		msgp = "Not STATUS_GOOD on REQUEST_SENSE\n";
17043 		goto sense_failed;
17044 	}
17045 
17046 	/*
17047 	 * See if we got the minimum required amount of sense data.
17048 	 * Note: We are assuming the returned sense data is SENSE_LENGTH bytes
17049 	 * or less.
17050 	 */
17051 	actual_len = (int)(SENSE_LENGTH - xp->xb_sense_resid);
17052 	if (((xp->xb_sense_state & STATE_XFERRED_DATA) == 0) ||
17053 	    (actual_len == 0)) {
17054 		msgp = "Request Sense couldn't get sense data\n";
17055 		goto sense_failed;
17056 	}
17057 
17058 	if (actual_len < SUN_MIN_SENSE_LENGTH) {
17059 		msgp = "Not enough sense information\n";
17060 		goto sense_failed;
17061 	}
17062 
17063 	/*
17064 	 * We require the extended sense data
17065 	 */
17066 	esp = (struct scsi_extended_sense *)xp->xb_sense_data;
17067 	if (esp->es_class != CLASS_EXTENDED_SENSE) {
17068 		if ((pktp->pkt_flags & FLAG_SILENT) == 0) {
17069 			static char tmp[8];
17070 			static char buf[148];
17071 			char *p = (char *)(xp->xb_sense_data);
17072 			int i;
17073 
17074 			mutex_enter(&sd_sense_mutex);
17075 			(void) strcpy(buf, "undecodable sense information:");
17076 			for (i = 0; i < actual_len; i++) {
17077 				(void) sprintf(tmp, " 0x%x", *(p++)&0xff);
17078 				(void) strcpy(&buf[strlen(buf)], tmp);
17079 			}
17080 			i = strlen(buf);
17081 			(void) strcpy(&buf[i], "-(assumed fatal)\n");
17082 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN, buf);
17083 			mutex_exit(&sd_sense_mutex);
17084 		}
17085 		/* Note: Legacy behavior, fail the command with no retry */
17086 		sd_return_failed_command(un, bp, EIO);
17087 		return (SD_SENSE_DATA_IS_INVALID);
17088 	}
17089 
17090 	/*
17091 	 * Check that es_code is valid (es_class concatenated with es_code
17092 	 * make up the "response code" field.  es_class will always be 7, so
17093 	 * make sure es_code is 0, 1, 2, 3 or 0xf.  es_code will indicate the
17094 	 * format.
17095 	 */
17096 	if ((esp->es_code != CODE_FMT_FIXED_CURRENT) &&
17097 	    (esp->es_code != CODE_FMT_FIXED_DEFERRED) &&
17098 	    (esp->es_code != CODE_FMT_DESCR_CURRENT) &&
17099 	    (esp->es_code != CODE_FMT_DESCR_DEFERRED) &&
17100 	    (esp->es_code != CODE_FMT_VENDOR_SPECIFIC)) {
17101 		goto sense_failed;
17102 	}
17103 
17104 	return (SD_SENSE_DATA_IS_VALID);
17105 
17106 sense_failed:
17107 	/*
17108 	 * If the request sense failed (for whatever reason), attempt
17109 	 * to retry the original command.
17110 	 */
17111 #if defined(__i386) || defined(__amd64)
17112 	/*
17113 	 * SD_RETRY_DELAY is conditionally compile (#if fibre) in
17114 	 * sddef.h for Sparc platform, and x86 uses 1 binary
17115 	 * for both SCSI/FC.
17116 	 * The SD_RETRY_DELAY value need to be adjusted here
17117 	 * when SD_RETRY_DELAY change in sddef.h
17118 	 */
17119 	sd_retry_command(un, bp, SD_RETRIES_STANDARD,
17120 	    sd_print_sense_failed_msg, msgp, EIO,
17121 		un->un_f_is_fibre?drv_usectohz(100000):(clock_t)0, NULL);
17122 #else
17123 	sd_retry_command(un, bp, SD_RETRIES_STANDARD,
17124 	    sd_print_sense_failed_msg, msgp, EIO, SD_RETRY_DELAY, NULL);
17125 #endif
17126 
17127 	return (SD_SENSE_DATA_IS_INVALID);
17128 }
17129 
17130 
17131 
17132 /*
17133  *    Function: sd_decode_sense
17134  *
17135  * Description: Take recovery action(s) when SCSI Sense Data is received.
17136  *
17137  *     Context: Interrupt context.
17138  */
17139 
17140 static void
17141 sd_decode_sense(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
17142 	struct scsi_pkt *pktp)
17143 {
17144 	uint8_t sense_key;
17145 
17146 	ASSERT(un != NULL);
17147 	ASSERT(mutex_owned(SD_MUTEX(un)));
17148 	ASSERT(bp != NULL);
17149 	ASSERT(bp != un->un_rqs_bp);
17150 	ASSERT(xp != NULL);
17151 	ASSERT(pktp != NULL);
17152 
17153 	sense_key = scsi_sense_key(xp->xb_sense_data);
17154 
17155 	switch (sense_key) {
17156 	case KEY_NO_SENSE:
17157 		sd_sense_key_no_sense(un, bp, xp, pktp);
17158 		break;
17159 	case KEY_RECOVERABLE_ERROR:
17160 		sd_sense_key_recoverable_error(un, xp->xb_sense_data,
17161 		    bp, xp, pktp);
17162 		break;
17163 	case KEY_NOT_READY:
17164 		sd_sense_key_not_ready(un, xp->xb_sense_data,
17165 		    bp, xp, pktp);
17166 		break;
17167 	case KEY_MEDIUM_ERROR:
17168 	case KEY_HARDWARE_ERROR:
17169 		sd_sense_key_medium_or_hardware_error(un,
17170 		    xp->xb_sense_data, bp, xp, pktp);
17171 		break;
17172 	case KEY_ILLEGAL_REQUEST:
17173 		sd_sense_key_illegal_request(un, bp, xp, pktp);
17174 		break;
17175 	case KEY_UNIT_ATTENTION:
17176 		sd_sense_key_unit_attention(un, xp->xb_sense_data,
17177 		    bp, xp, pktp);
17178 		break;
17179 	case KEY_WRITE_PROTECT:
17180 	case KEY_VOLUME_OVERFLOW:
17181 	case KEY_MISCOMPARE:
17182 		sd_sense_key_fail_command(un, bp, xp, pktp);
17183 		break;
17184 	case KEY_BLANK_CHECK:
17185 		sd_sense_key_blank_check(un, bp, xp, pktp);
17186 		break;
17187 	case KEY_ABORTED_COMMAND:
17188 		sd_sense_key_aborted_command(un, bp, xp, pktp);
17189 		break;
17190 	case KEY_VENDOR_UNIQUE:
17191 	case KEY_COPY_ABORTED:
17192 	case KEY_EQUAL:
17193 	case KEY_RESERVED:
17194 	default:
17195 		sd_sense_key_default(un, xp->xb_sense_data,
17196 		    bp, xp, pktp);
17197 		break;
17198 	}
17199 }
17200 
17201 
17202 /*
17203  *    Function: sd_dump_memory
17204  *
17205  * Description: Debug logging routine to print the contents of a user provided
17206  *		buffer. The output of the buffer is broken up into 256 byte
17207  *		segments due to a size constraint of the scsi_log.
17208  *		implementation.
17209  *
17210  *   Arguments: un - ptr to softstate
17211  *		comp - component mask
17212  *		title - "title" string to preceed data when printed
17213  *		data - ptr to data block to be printed
17214  *		len - size of data block to be printed
17215  *		fmt - SD_LOG_HEX (use 0x%02x format) or SD_LOG_CHAR (use %c)
17216  *
17217  *     Context: May be called from interrupt context
17218  */
17219 
17220 #define	SD_DUMP_MEMORY_BUF_SIZE	256
17221 
17222 static char *sd_dump_format_string[] = {
17223 		" 0x%02x",
17224 		" %c"
17225 };
17226 
17227 static void
17228 sd_dump_memory(struct sd_lun *un, uint_t comp, char *title, uchar_t *data,
17229     int len, int fmt)
17230 {
17231 	int	i, j;
17232 	int	avail_count;
17233 	int	start_offset;
17234 	int	end_offset;
17235 	size_t	entry_len;
17236 	char	*bufp;
17237 	char	*local_buf;
17238 	char	*format_string;
17239 
17240 	ASSERT((fmt == SD_LOG_HEX) || (fmt == SD_LOG_CHAR));
17241 
17242 	/*
17243 	 * In the debug version of the driver, this function is called from a
17244 	 * number of places which are NOPs in the release driver.
17245 	 * The debug driver therefore has additional methods of filtering
17246 	 * debug output.
17247 	 */
17248 #ifdef SDDEBUG
17249 	/*
17250 	 * In the debug version of the driver we can reduce the amount of debug
17251 	 * messages by setting sd_error_level to something other than
17252 	 * SCSI_ERR_ALL and clearing bits in sd_level_mask and
17253 	 * sd_component_mask.
17254 	 */
17255 	if (((sd_level_mask & (SD_LOGMASK_DUMP_MEM | SD_LOGMASK_DIAG)) == 0) ||
17256 	    (sd_error_level != SCSI_ERR_ALL)) {
17257 		return;
17258 	}
17259 	if (((sd_component_mask & comp) == 0) ||
17260 	    (sd_error_level != SCSI_ERR_ALL)) {
17261 		return;
17262 	}
17263 #else
17264 	if (sd_error_level != SCSI_ERR_ALL) {
17265 		return;
17266 	}
17267 #endif
17268 
17269 	local_buf = kmem_zalloc(SD_DUMP_MEMORY_BUF_SIZE, KM_SLEEP);
17270 	bufp = local_buf;
17271 	/*
17272 	 * Available length is the length of local_buf[], minus the
17273 	 * length of the title string, minus one for the ":", minus
17274 	 * one for the newline, minus one for the NULL terminator.
17275 	 * This gives the #bytes available for holding the printed
17276 	 * values from the given data buffer.
17277 	 */
17278 	if (fmt == SD_LOG_HEX) {
17279 		format_string = sd_dump_format_string[0];
17280 	} else /* SD_LOG_CHAR */ {
17281 		format_string = sd_dump_format_string[1];
17282 	}
17283 	/*
17284 	 * Available count is the number of elements from the given
17285 	 * data buffer that we can fit into the available length.
17286 	 * This is based upon the size of the format string used.
17287 	 * Make one entry and find it's size.
17288 	 */
17289 	(void) sprintf(bufp, format_string, data[0]);
17290 	entry_len = strlen(bufp);
17291 	avail_count = (SD_DUMP_MEMORY_BUF_SIZE - strlen(title) - 3) / entry_len;
17292 
17293 	j = 0;
17294 	while (j < len) {
17295 		bufp = local_buf;
17296 		bzero(bufp, SD_DUMP_MEMORY_BUF_SIZE);
17297 		start_offset = j;
17298 
17299 		end_offset = start_offset + avail_count;
17300 
17301 		(void) sprintf(bufp, "%s:", title);
17302 		bufp += strlen(bufp);
17303 		for (i = start_offset; ((i < end_offset) && (j < len));
17304 		    i++, j++) {
17305 			(void) sprintf(bufp, format_string, data[i]);
17306 			bufp += entry_len;
17307 		}
17308 		(void) sprintf(bufp, "\n");
17309 
17310 		scsi_log(SD_DEVINFO(un), sd_label, CE_NOTE, "%s", local_buf);
17311 	}
17312 	kmem_free(local_buf, SD_DUMP_MEMORY_BUF_SIZE);
17313 }
17314 
17315 /*
17316  *    Function: sd_print_sense_msg
17317  *
17318  * Description: Log a message based upon the given sense data.
17319  *
17320  *   Arguments: un - ptr to associated softstate
17321  *		bp - ptr to buf(9S) for the command
17322  *		arg - ptr to associate sd_sense_info struct
17323  *		code - SD_IMMEDIATE_RETRY_ISSUED, SD_DELAYED_RETRY_ISSUED,
17324  *			or SD_NO_RETRY_ISSUED
17325  *
17326  *     Context: May be called from interrupt context
17327  */
17328 
17329 static void
17330 sd_print_sense_msg(struct sd_lun *un, struct buf *bp, void *arg, int code)
17331 {
17332 	struct sd_xbuf	*xp;
17333 	struct scsi_pkt	*pktp;
17334 	uint8_t *sensep;
17335 	daddr_t request_blkno;
17336 	diskaddr_t err_blkno;
17337 	int severity;
17338 	int pfa_flag;
17339 	extern struct scsi_key_strings scsi_cmds[];
17340 
17341 	ASSERT(un != NULL);
17342 	ASSERT(mutex_owned(SD_MUTEX(un)));
17343 	ASSERT(bp != NULL);
17344 	xp = SD_GET_XBUF(bp);
17345 	ASSERT(xp != NULL);
17346 	pktp = SD_GET_PKTP(bp);
17347 	ASSERT(pktp != NULL);
17348 	ASSERT(arg != NULL);
17349 
17350 	severity = ((struct sd_sense_info *)(arg))->ssi_severity;
17351 	pfa_flag = ((struct sd_sense_info *)(arg))->ssi_pfa_flag;
17352 
17353 	if ((code == SD_DELAYED_RETRY_ISSUED) ||
17354 	    (code == SD_IMMEDIATE_RETRY_ISSUED)) {
17355 		severity = SCSI_ERR_RETRYABLE;
17356 	}
17357 
17358 	/* Use absolute block number for the request block number */
17359 	request_blkno = xp->xb_blkno;
17360 
17361 	/*
17362 	 * Now try to get the error block number from the sense data
17363 	 */
17364 	sensep = xp->xb_sense_data;
17365 
17366 	if (scsi_sense_info_uint64(sensep, SENSE_LENGTH,
17367 		(uint64_t *)&err_blkno)) {
17368 		/*
17369 		 * We retrieved the error block number from the information
17370 		 * portion of the sense data.
17371 		 *
17372 		 * For USCSI commands we are better off using the error
17373 		 * block no. as the requested block no. (This is the best
17374 		 * we can estimate.)
17375 		 */
17376 		if ((SD_IS_BUFIO(xp) == FALSE) &&
17377 		    ((pktp->pkt_flags & FLAG_SILENT) == 0)) {
17378 			request_blkno = err_blkno;
17379 		}
17380 	} else {
17381 		/*
17382 		 * Without the es_valid bit set (for fixed format) or an
17383 		 * information descriptor (for descriptor format) we cannot
17384 		 * be certain of the error blkno, so just use the
17385 		 * request_blkno.
17386 		 */
17387 		err_blkno = (diskaddr_t)request_blkno;
17388 	}
17389 
17390 	/*
17391 	 * The following will log the buffer contents for the release driver
17392 	 * if the SD_LOGMASK_DIAG bit of sd_level_mask is set, or the error
17393 	 * level is set to verbose.
17394 	 */
17395 	sd_dump_memory(un, SD_LOG_IO, "Failed CDB",
17396 	    (uchar_t *)pktp->pkt_cdbp, CDB_SIZE, SD_LOG_HEX);
17397 	sd_dump_memory(un, SD_LOG_IO, "Sense Data",
17398 	    (uchar_t *)sensep, SENSE_LENGTH, SD_LOG_HEX);
17399 
17400 	if (pfa_flag == FALSE) {
17401 		/* This is normally only set for USCSI */
17402 		if ((pktp->pkt_flags & FLAG_SILENT) != 0) {
17403 			return;
17404 		}
17405 
17406 		if ((SD_IS_BUFIO(xp) == TRUE) &&
17407 		    (((sd_level_mask & SD_LOGMASK_DIAG) == 0) &&
17408 		    (severity < sd_error_level))) {
17409 			return;
17410 		}
17411 	}
17412 
17413 	/*
17414 	 * Check for Sonoma Failover and keep a count of how many failed I/O's
17415 	 */
17416 	if ((SD_IS_LSI(un)) &&
17417 	    (scsi_sense_key(sensep) == KEY_ILLEGAL_REQUEST) &&
17418 	    (scsi_sense_asc(sensep) == 0x94) &&
17419 	    (scsi_sense_ascq(sensep) == 0x01)) {
17420 		un->un_sonoma_failure_count++;
17421 		if (un->un_sonoma_failure_count > 1) {
17422 			return;
17423 		}
17424 	}
17425 
17426 	scsi_vu_errmsg(SD_SCSI_DEVP(un), pktp, sd_label, severity,
17427 	    request_blkno, err_blkno, scsi_cmds,
17428 	    (struct scsi_extended_sense *)sensep,
17429 	    un->un_additional_codes, NULL);
17430 }
17431 
17432 /*
17433  *    Function: sd_sense_key_no_sense
17434  *
17435  * Description: Recovery action when sense data was not received.
17436  *
17437  *     Context: May be called from interrupt context
17438  */
17439 
17440 static void
17441 sd_sense_key_no_sense(struct sd_lun *un, struct buf *bp,
17442 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
17443 {
17444 	struct sd_sense_info	si;
17445 
17446 	ASSERT(un != NULL);
17447 	ASSERT(mutex_owned(SD_MUTEX(un)));
17448 	ASSERT(bp != NULL);
17449 	ASSERT(xp != NULL);
17450 	ASSERT(pktp != NULL);
17451 
17452 	si.ssi_severity = SCSI_ERR_FATAL;
17453 	si.ssi_pfa_flag = FALSE;
17454 
17455 	SD_UPDATE_ERRSTATS(un, sd_softerrs);
17456 
17457 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
17458 		&si, EIO, (clock_t)0, NULL);
17459 }
17460 
17461 
17462 /*
17463  *    Function: sd_sense_key_recoverable_error
17464  *
17465  * Description: Recovery actions for a SCSI "Recovered Error" sense key.
17466  *
17467  *     Context: May be called from interrupt context
17468  */
17469 
17470 static void
17471 sd_sense_key_recoverable_error(struct sd_lun *un,
17472 	uint8_t *sense_datap,
17473 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp)
17474 {
17475 	struct sd_sense_info	si;
17476 	uint8_t asc = scsi_sense_asc(sense_datap);
17477 
17478 	ASSERT(un != NULL);
17479 	ASSERT(mutex_owned(SD_MUTEX(un)));
17480 	ASSERT(bp != NULL);
17481 	ASSERT(xp != NULL);
17482 	ASSERT(pktp != NULL);
17483 
17484 	/*
17485 	 * 0x5D: FAILURE PREDICTION THRESHOLD EXCEEDED
17486 	 */
17487 	if ((asc == 0x5D) && (sd_report_pfa != 0)) {
17488 		SD_UPDATE_ERRSTATS(un, sd_rq_pfa_err);
17489 		si.ssi_severity = SCSI_ERR_INFO;
17490 		si.ssi_pfa_flag = TRUE;
17491 	} else {
17492 		SD_UPDATE_ERRSTATS(un, sd_softerrs);
17493 		SD_UPDATE_ERRSTATS(un, sd_rq_recov_err);
17494 		si.ssi_severity = SCSI_ERR_RECOVERED;
17495 		si.ssi_pfa_flag = FALSE;
17496 	}
17497 
17498 	if (pktp->pkt_resid == 0) {
17499 		sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
17500 		sd_return_command(un, bp);
17501 		return;
17502 	}
17503 
17504 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
17505 	    &si, EIO, (clock_t)0, NULL);
17506 }
17507 
17508 
17509 
17510 
17511 /*
17512  *    Function: sd_sense_key_not_ready
17513  *
17514  * Description: Recovery actions for a SCSI "Not Ready" sense key.
17515  *
17516  *     Context: May be called from interrupt context
17517  */
17518 
17519 static void
17520 sd_sense_key_not_ready(struct sd_lun *un,
17521 	uint8_t *sense_datap,
17522 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp)
17523 {
17524 	struct sd_sense_info	si;
17525 	uint8_t asc = scsi_sense_asc(sense_datap);
17526 	uint8_t ascq = scsi_sense_ascq(sense_datap);
17527 
17528 	ASSERT(un != NULL);
17529 	ASSERT(mutex_owned(SD_MUTEX(un)));
17530 	ASSERT(bp != NULL);
17531 	ASSERT(xp != NULL);
17532 	ASSERT(pktp != NULL);
17533 
17534 	si.ssi_severity = SCSI_ERR_FATAL;
17535 	si.ssi_pfa_flag = FALSE;
17536 
17537 	/*
17538 	 * Update error stats after first NOT READY error. Disks may have
17539 	 * been powered down and may need to be restarted.  For CDROMs,
17540 	 * report NOT READY errors only if media is present.
17541 	 */
17542 	if ((ISCD(un) && (un->un_f_geometry_is_valid == TRUE)) ||
17543 	    (xp->xb_retry_count > 0)) {
17544 		SD_UPDATE_ERRSTATS(un, sd_harderrs);
17545 		SD_UPDATE_ERRSTATS(un, sd_rq_ntrdy_err);
17546 	}
17547 
17548 	/*
17549 	 * Just fail if the "not ready" retry limit has been reached.
17550 	 */
17551 	if (xp->xb_retry_count >= un->un_notready_retry_count) {
17552 		/* Special check for error message printing for removables. */
17553 		if (un->un_f_has_removable_media && (asc == 0x04) &&
17554 		    (ascq >= 0x04)) {
17555 			si.ssi_severity = SCSI_ERR_ALL;
17556 		}
17557 		goto fail_command;
17558 	}
17559 
17560 	/*
17561 	 * Check the ASC and ASCQ in the sense data as needed, to determine
17562 	 * what to do.
17563 	 */
17564 	switch (asc) {
17565 	case 0x04:	/* LOGICAL UNIT NOT READY */
17566 		/*
17567 		 * disk drives that don't spin up result in a very long delay
17568 		 * in format without warning messages. We will log a message
17569 		 * if the error level is set to verbose.
17570 		 */
17571 		if (sd_error_level < SCSI_ERR_RETRYABLE) {
17572 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
17573 			    "logical unit not ready, resetting disk\n");
17574 		}
17575 
17576 		/*
17577 		 * There are different requirements for CDROMs and disks for
17578 		 * the number of retries.  If a CD-ROM is giving this, it is
17579 		 * probably reading TOC and is in the process of getting
17580 		 * ready, so we should keep on trying for a long time to make
17581 		 * sure that all types of media are taken in account (for
17582 		 * some media the drive takes a long time to read TOC).  For
17583 		 * disks we do not want to retry this too many times as this
17584 		 * can cause a long hang in format when the drive refuses to
17585 		 * spin up (a very common failure).
17586 		 */
17587 		switch (ascq) {
17588 		case 0x00:  /* LUN NOT READY, CAUSE NOT REPORTABLE */
17589 			/*
17590 			 * Disk drives frequently refuse to spin up which
17591 			 * results in a very long hang in format without
17592 			 * warning messages.
17593 			 *
17594 			 * Note: This code preserves the legacy behavior of
17595 			 * comparing xb_retry_count against zero for fibre
17596 			 * channel targets instead of comparing against the
17597 			 * un_reset_retry_count value.  The reason for this
17598 			 * discrepancy has been so utterly lost beneath the
17599 			 * Sands of Time that even Indiana Jones could not
17600 			 * find it.
17601 			 */
17602 			if (un->un_f_is_fibre == TRUE) {
17603 				if (((sd_level_mask & SD_LOGMASK_DIAG) ||
17604 					(xp->xb_retry_count > 0)) &&
17605 					(un->un_startstop_timeid == NULL)) {
17606 					scsi_log(SD_DEVINFO(un), sd_label,
17607 					CE_WARN, "logical unit not ready, "
17608 					"resetting disk\n");
17609 					sd_reset_target(un, pktp);
17610 				}
17611 			} else {
17612 				if (((sd_level_mask & SD_LOGMASK_DIAG) ||
17613 					(xp->xb_retry_count >
17614 					un->un_reset_retry_count)) &&
17615 					(un->un_startstop_timeid == NULL)) {
17616 					scsi_log(SD_DEVINFO(un), sd_label,
17617 					CE_WARN, "logical unit not ready, "
17618 					"resetting disk\n");
17619 					sd_reset_target(un, pktp);
17620 				}
17621 			}
17622 			break;
17623 
17624 		case 0x01:  /* LUN IS IN PROCESS OF BECOMING READY */
17625 			/*
17626 			 * If the target is in the process of becoming
17627 			 * ready, just proceed with the retry. This can
17628 			 * happen with CD-ROMs that take a long time to
17629 			 * read TOC after a power cycle or reset.
17630 			 */
17631 			goto do_retry;
17632 
17633 		case 0x02:  /* LUN NOT READY, INITITIALIZING CMD REQUIRED */
17634 			break;
17635 
17636 		case 0x03:  /* LUN NOT READY, MANUAL INTERVENTION REQUIRED */
17637 			/*
17638 			 * Retries cannot help here so just fail right away.
17639 			 */
17640 			goto fail_command;
17641 
17642 		case 0x88:
17643 			/*
17644 			 * Vendor-unique code for T3/T4: it indicates a
17645 			 * path problem in a mutipathed config, but as far as
17646 			 * the target driver is concerned it equates to a fatal
17647 			 * error, so we should just fail the command right away
17648 			 * (without printing anything to the console). If this
17649 			 * is not a T3/T4, fall thru to the default recovery
17650 			 * action.
17651 			 * T3/T4 is FC only, don't need to check is_fibre
17652 			 */
17653 			if (SD_IS_T3(un) || SD_IS_T4(un)) {
17654 				sd_return_failed_command(un, bp, EIO);
17655 				return;
17656 			}
17657 			/* FALLTHRU */
17658 
17659 		case 0x04:  /* LUN NOT READY, FORMAT IN PROGRESS */
17660 		case 0x05:  /* LUN NOT READY, REBUILD IN PROGRESS */
17661 		case 0x06:  /* LUN NOT READY, RECALCULATION IN PROGRESS */
17662 		case 0x07:  /* LUN NOT READY, OPERATION IN PROGRESS */
17663 		case 0x08:  /* LUN NOT READY, LONG WRITE IN PROGRESS */
17664 		default:    /* Possible future codes in SCSI spec? */
17665 			/*
17666 			 * For removable-media devices, do not retry if
17667 			 * ASCQ > 2 as these result mostly from USCSI commands
17668 			 * on MMC devices issued to check status of an
17669 			 * operation initiated in immediate mode.  Also for
17670 			 * ASCQ >= 4 do not print console messages as these
17671 			 * mainly represent a user-initiated operation
17672 			 * instead of a system failure.
17673 			 */
17674 			if (un->un_f_has_removable_media) {
17675 				si.ssi_severity = SCSI_ERR_ALL;
17676 				goto fail_command;
17677 			}
17678 			break;
17679 		}
17680 
17681 		/*
17682 		 * As part of our recovery attempt for the NOT READY
17683 		 * condition, we issue a START STOP UNIT command. However
17684 		 * we want to wait for a short delay before attempting this
17685 		 * as there may still be more commands coming back from the
17686 		 * target with the check condition. To do this we use
17687 		 * timeout(9F) to call sd_start_stop_unit_callback() after
17688 		 * the delay interval expires. (sd_start_stop_unit_callback()
17689 		 * dispatches sd_start_stop_unit_task(), which will issue
17690 		 * the actual START STOP UNIT command. The delay interval
17691 		 * is one-half of the delay that we will use to retry the
17692 		 * command that generated the NOT READY condition.
17693 		 *
17694 		 * Note that we could just dispatch sd_start_stop_unit_task()
17695 		 * from here and allow it to sleep for the delay interval,
17696 		 * but then we would be tying up the taskq thread
17697 		 * uncesessarily for the duration of the delay.
17698 		 *
17699 		 * Do not issue the START STOP UNIT if the current command
17700 		 * is already a START STOP UNIT.
17701 		 */
17702 		if (pktp->pkt_cdbp[0] == SCMD_START_STOP) {
17703 			break;
17704 		}
17705 
17706 		/*
17707 		 * Do not schedule the timeout if one is already pending.
17708 		 */
17709 		if (un->un_startstop_timeid != NULL) {
17710 			SD_INFO(SD_LOG_ERROR, un,
17711 			    "sd_sense_key_not_ready: restart already issued to"
17712 			    " %s%d\n", ddi_driver_name(SD_DEVINFO(un)),
17713 			    ddi_get_instance(SD_DEVINFO(un)));
17714 			break;
17715 		}
17716 
17717 		/*
17718 		 * Schedule the START STOP UNIT command, then queue the command
17719 		 * for a retry.
17720 		 *
17721 		 * Note: A timeout is not scheduled for this retry because we
17722 		 * want the retry to be serial with the START_STOP_UNIT. The
17723 		 * retry will be started when the START_STOP_UNIT is completed
17724 		 * in sd_start_stop_unit_task.
17725 		 */
17726 		un->un_startstop_timeid = timeout(sd_start_stop_unit_callback,
17727 		    un, SD_BSY_TIMEOUT / 2);
17728 		xp->xb_retry_count++;
17729 		sd_set_retry_bp(un, bp, 0, kstat_waitq_enter);
17730 		return;
17731 
17732 	case 0x05:	/* LOGICAL UNIT DOES NOT RESPOND TO SELECTION */
17733 		if (sd_error_level < SCSI_ERR_RETRYABLE) {
17734 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
17735 			    "unit does not respond to selection\n");
17736 		}
17737 		break;
17738 
17739 	case 0x3A:	/* MEDIUM NOT PRESENT */
17740 		if (sd_error_level >= SCSI_ERR_FATAL) {
17741 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
17742 			    "Caddy not inserted in drive\n");
17743 		}
17744 
17745 		sr_ejected(un);
17746 		un->un_mediastate = DKIO_EJECTED;
17747 		/* The state has changed, inform the media watch routines */
17748 		cv_broadcast(&un->un_state_cv);
17749 		/* Just fail if no media is present in the drive. */
17750 		goto fail_command;
17751 
17752 	default:
17753 		if (sd_error_level < SCSI_ERR_RETRYABLE) {
17754 			scsi_log(SD_DEVINFO(un), sd_label, CE_NOTE,
17755 			    "Unit not Ready. Additional sense code 0x%x\n",
17756 			    asc);
17757 		}
17758 		break;
17759 	}
17760 
17761 do_retry:
17762 
17763 	/*
17764 	 * Retry the command, as some targets may report NOT READY for
17765 	 * several seconds after being reset.
17766 	 */
17767 	xp->xb_retry_count++;
17768 	si.ssi_severity = SCSI_ERR_RETRYABLE;
17769 	sd_retry_command(un, bp, SD_RETRIES_NOCHECK, sd_print_sense_msg,
17770 	    &si, EIO, SD_BSY_TIMEOUT, NULL);
17771 
17772 	return;
17773 
17774 fail_command:
17775 	sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
17776 	sd_return_failed_command(un, bp, EIO);
17777 }
17778 
17779 
17780 
17781 /*
17782  *    Function: sd_sense_key_medium_or_hardware_error
17783  *
17784  * Description: Recovery actions for a SCSI "Medium Error" or "Hardware Error"
17785  *		sense key.
17786  *
17787  *     Context: May be called from interrupt context
17788  */
17789 
17790 static void
17791 sd_sense_key_medium_or_hardware_error(struct sd_lun *un,
17792 	uint8_t *sense_datap,
17793 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp)
17794 {
17795 	struct sd_sense_info	si;
17796 	uint8_t sense_key = scsi_sense_key(sense_datap);
17797 	uint8_t asc = scsi_sense_asc(sense_datap);
17798 
17799 	ASSERT(un != NULL);
17800 	ASSERT(mutex_owned(SD_MUTEX(un)));
17801 	ASSERT(bp != NULL);
17802 	ASSERT(xp != NULL);
17803 	ASSERT(pktp != NULL);
17804 
17805 	si.ssi_severity = SCSI_ERR_FATAL;
17806 	si.ssi_pfa_flag = FALSE;
17807 
17808 	if (sense_key == KEY_MEDIUM_ERROR) {
17809 		SD_UPDATE_ERRSTATS(un, sd_rq_media_err);
17810 	}
17811 
17812 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
17813 
17814 	if ((un->un_reset_retry_count != 0) &&
17815 	    (xp->xb_retry_count == un->un_reset_retry_count)) {
17816 		mutex_exit(SD_MUTEX(un));
17817 		/* Do NOT do a RESET_ALL here: too intrusive. (4112858) */
17818 		if (un->un_f_allow_bus_device_reset == TRUE) {
17819 
17820 			boolean_t try_resetting_target = B_TRUE;
17821 
17822 			/*
17823 			 * We need to be able to handle specific ASC when we are
17824 			 * handling a KEY_HARDWARE_ERROR. In particular
17825 			 * taking the default action of resetting the target may
17826 			 * not be the appropriate way to attempt recovery.
17827 			 * Resetting a target because of a single LUN failure
17828 			 * victimizes all LUNs on that target.
17829 			 *
17830 			 * This is true for the LSI arrays, if an LSI
17831 			 * array controller returns an ASC of 0x84 (LUN Dead) we
17832 			 * should trust it.
17833 			 */
17834 
17835 			if (sense_key == KEY_HARDWARE_ERROR) {
17836 				switch (asc) {
17837 				case 0x84:
17838 					if (SD_IS_LSI(un)) {
17839 						try_resetting_target = B_FALSE;
17840 					}
17841 					break;
17842 				default:
17843 					break;
17844 				}
17845 			}
17846 
17847 			if (try_resetting_target == B_TRUE) {
17848 				int reset_retval = 0;
17849 				if (un->un_f_lun_reset_enabled == TRUE) {
17850 					SD_TRACE(SD_LOG_IO_CORE, un,
17851 					    "sd_sense_key_medium_or_hardware_"
17852 					    "error: issuing RESET_LUN\n");
17853 					reset_retval =
17854 					    scsi_reset(SD_ADDRESS(un),
17855 					    RESET_LUN);
17856 				}
17857 				if (reset_retval == 0) {
17858 					SD_TRACE(SD_LOG_IO_CORE, un,
17859 					    "sd_sense_key_medium_or_hardware_"
17860 					    "error: issuing RESET_TARGET\n");
17861 					(void) scsi_reset(SD_ADDRESS(un),
17862 					    RESET_TARGET);
17863 				}
17864 			}
17865 		}
17866 		mutex_enter(SD_MUTEX(un));
17867 	}
17868 
17869 	/*
17870 	 * This really ought to be a fatal error, but we will retry anyway
17871 	 * as some drives report this as a spurious error.
17872 	 */
17873 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
17874 	    &si, EIO, (clock_t)0, NULL);
17875 }
17876 
17877 
17878 
17879 /*
17880  *    Function: sd_sense_key_illegal_request
17881  *
17882  * Description: Recovery actions for a SCSI "Illegal Request" sense key.
17883  *
17884  *     Context: May be called from interrupt context
17885  */
17886 
17887 static void
17888 sd_sense_key_illegal_request(struct sd_lun *un, struct buf *bp,
17889 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
17890 {
17891 	struct sd_sense_info	si;
17892 
17893 	ASSERT(un != NULL);
17894 	ASSERT(mutex_owned(SD_MUTEX(un)));
17895 	ASSERT(bp != NULL);
17896 	ASSERT(xp != NULL);
17897 	ASSERT(pktp != NULL);
17898 
17899 	SD_UPDATE_ERRSTATS(un, sd_softerrs);
17900 	SD_UPDATE_ERRSTATS(un, sd_rq_illrq_err);
17901 
17902 	si.ssi_severity = SCSI_ERR_INFO;
17903 	si.ssi_pfa_flag = FALSE;
17904 
17905 	/* Pointless to retry if the target thinks it's an illegal request */
17906 	sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
17907 	sd_return_failed_command(un, bp, EIO);
17908 }
17909 
17910 
17911 
17912 
17913 /*
17914  *    Function: sd_sense_key_unit_attention
17915  *
17916  * Description: Recovery actions for a SCSI "Unit Attention" sense key.
17917  *
17918  *     Context: May be called from interrupt context
17919  */
17920 
17921 static void
17922 sd_sense_key_unit_attention(struct sd_lun *un,
17923 	uint8_t *sense_datap,
17924 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp)
17925 {
17926 	/*
17927 	 * For UNIT ATTENTION we allow retries for one minute. Devices
17928 	 * like Sonoma can return UNIT ATTENTION close to a minute
17929 	 * under certain conditions.
17930 	 */
17931 	int	retry_check_flag = SD_RETRIES_UA;
17932 	boolean_t	kstat_updated = B_FALSE;
17933 	struct	sd_sense_info		si;
17934 	uint8_t asc = scsi_sense_asc(sense_datap);
17935 
17936 	ASSERT(un != NULL);
17937 	ASSERT(mutex_owned(SD_MUTEX(un)));
17938 	ASSERT(bp != NULL);
17939 	ASSERT(xp != NULL);
17940 	ASSERT(pktp != NULL);
17941 
17942 	si.ssi_severity = SCSI_ERR_INFO;
17943 	si.ssi_pfa_flag = FALSE;
17944 
17945 
17946 	switch (asc) {
17947 	case 0x5D:  /* FAILURE PREDICTION THRESHOLD EXCEEDED */
17948 		if (sd_report_pfa != 0) {
17949 			SD_UPDATE_ERRSTATS(un, sd_rq_pfa_err);
17950 			si.ssi_pfa_flag = TRUE;
17951 			retry_check_flag = SD_RETRIES_STANDARD;
17952 			goto do_retry;
17953 		}
17954 
17955 		break;
17956 
17957 	case 0x29:  /* POWER ON, RESET, OR BUS DEVICE RESET OCCURRED */
17958 		if ((un->un_resvd_status & SD_RESERVE) == SD_RESERVE) {
17959 			un->un_resvd_status |=
17960 			    (SD_LOST_RESERVE | SD_WANT_RESERVE);
17961 		}
17962 #ifdef _LP64
17963 		if (un->un_blockcount + 1 > SD_GROUP1_MAX_ADDRESS) {
17964 			if (taskq_dispatch(sd_tq, sd_reenable_dsense_task,
17965 			    un, KM_NOSLEEP) == 0) {
17966 				/*
17967 				 * If we can't dispatch the task we'll just
17968 				 * live without descriptor sense.  We can
17969 				 * try again on the next "unit attention"
17970 				 */
17971 				SD_ERROR(SD_LOG_ERROR, un,
17972 				    "sd_sense_key_unit_attention: "
17973 				    "Could not dispatch "
17974 				    "sd_reenable_dsense_task\n");
17975 			}
17976 		}
17977 #endif /* _LP64 */
17978 		/* FALLTHRU */
17979 
17980 	case 0x28: /* NOT READY TO READY CHANGE, MEDIUM MAY HAVE CHANGED */
17981 		if (!un->un_f_has_removable_media) {
17982 			break;
17983 		}
17984 
17985 		/*
17986 		 * When we get a unit attention from a removable-media device,
17987 		 * it may be in a state that will take a long time to recover
17988 		 * (e.g., from a reset).  Since we are executing in interrupt
17989 		 * context here, we cannot wait around for the device to come
17990 		 * back. So hand this command off to sd_media_change_task()
17991 		 * for deferred processing under taskq thread context. (Note
17992 		 * that the command still may be failed if a problem is
17993 		 * encountered at a later time.)
17994 		 */
17995 		if (taskq_dispatch(sd_tq, sd_media_change_task, pktp,
17996 		    KM_NOSLEEP) == 0) {
17997 			/*
17998 			 * Cannot dispatch the request so fail the command.
17999 			 */
18000 			SD_UPDATE_ERRSTATS(un, sd_harderrs);
18001 			SD_UPDATE_ERRSTATS(un, sd_rq_nodev_err);
18002 			si.ssi_severity = SCSI_ERR_FATAL;
18003 			sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
18004 			sd_return_failed_command(un, bp, EIO);
18005 		}
18006 
18007 		/*
18008 		 * If failed to dispatch sd_media_change_task(), we already
18009 		 * updated kstat. If succeed to dispatch sd_media_change_task(),
18010 		 * we should update kstat later if it encounters an error. So,
18011 		 * we update kstat_updated flag here.
18012 		 */
18013 		kstat_updated = B_TRUE;
18014 
18015 		/*
18016 		 * Either the command has been successfully dispatched to a
18017 		 * task Q for retrying, or the dispatch failed. In either case
18018 		 * do NOT retry again by calling sd_retry_command. This sets up
18019 		 * two retries of the same command and when one completes and
18020 		 * frees the resources the other will access freed memory,
18021 		 * a bad thing.
18022 		 */
18023 		return;
18024 
18025 	default:
18026 		break;
18027 	}
18028 
18029 	/*
18030 	 * Update kstat if we haven't done that.
18031 	 */
18032 	if (!kstat_updated) {
18033 		SD_UPDATE_ERRSTATS(un, sd_harderrs);
18034 		SD_UPDATE_ERRSTATS(un, sd_rq_nodev_err);
18035 	}
18036 
18037 do_retry:
18038 	sd_retry_command(un, bp, retry_check_flag, sd_print_sense_msg, &si,
18039 	    EIO, SD_UA_RETRY_DELAY, NULL);
18040 }
18041 
18042 
18043 
18044 /*
18045  *    Function: sd_sense_key_fail_command
18046  *
18047  * Description: Use to fail a command when we don't like the sense key that
18048  *		was returned.
18049  *
18050  *     Context: May be called from interrupt context
18051  */
18052 
18053 static void
18054 sd_sense_key_fail_command(struct sd_lun *un, struct buf *bp,
18055 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18056 {
18057 	struct sd_sense_info	si;
18058 
18059 	ASSERT(un != NULL);
18060 	ASSERT(mutex_owned(SD_MUTEX(un)));
18061 	ASSERT(bp != NULL);
18062 	ASSERT(xp != NULL);
18063 	ASSERT(pktp != NULL);
18064 
18065 	si.ssi_severity = SCSI_ERR_FATAL;
18066 	si.ssi_pfa_flag = FALSE;
18067 
18068 	sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
18069 	sd_return_failed_command(un, bp, EIO);
18070 }
18071 
18072 
18073 
18074 /*
18075  *    Function: sd_sense_key_blank_check
18076  *
18077  * Description: Recovery actions for a SCSI "Blank Check" sense key.
18078  *		Has no monetary connotation.
18079  *
18080  *     Context: May be called from interrupt context
18081  */
18082 
18083 static void
18084 sd_sense_key_blank_check(struct sd_lun *un, struct buf *bp,
18085 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18086 {
18087 	struct sd_sense_info	si;
18088 
18089 	ASSERT(un != NULL);
18090 	ASSERT(mutex_owned(SD_MUTEX(un)));
18091 	ASSERT(bp != NULL);
18092 	ASSERT(xp != NULL);
18093 	ASSERT(pktp != NULL);
18094 
18095 	/*
18096 	 * Blank check is not fatal for removable devices, therefore
18097 	 * it does not require a console message.
18098 	 */
18099 	si.ssi_severity = (un->un_f_has_removable_media) ? SCSI_ERR_ALL :
18100 	    SCSI_ERR_FATAL;
18101 	si.ssi_pfa_flag = FALSE;
18102 
18103 	sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
18104 	sd_return_failed_command(un, bp, EIO);
18105 }
18106 
18107 
18108 
18109 
18110 /*
18111  *    Function: sd_sense_key_aborted_command
18112  *
18113  * Description: Recovery actions for a SCSI "Aborted Command" sense key.
18114  *
18115  *     Context: May be called from interrupt context
18116  */
18117 
18118 static void
18119 sd_sense_key_aborted_command(struct sd_lun *un, struct buf *bp,
18120 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18121 {
18122 	struct sd_sense_info	si;
18123 
18124 	ASSERT(un != NULL);
18125 	ASSERT(mutex_owned(SD_MUTEX(un)));
18126 	ASSERT(bp != NULL);
18127 	ASSERT(xp != NULL);
18128 	ASSERT(pktp != NULL);
18129 
18130 	si.ssi_severity = SCSI_ERR_FATAL;
18131 	si.ssi_pfa_flag = FALSE;
18132 
18133 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
18134 
18135 	/*
18136 	 * This really ought to be a fatal error, but we will retry anyway
18137 	 * as some drives report this as a spurious error.
18138 	 */
18139 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
18140 	    &si, EIO, (clock_t)0, NULL);
18141 }
18142 
18143 
18144 
18145 /*
18146  *    Function: sd_sense_key_default
18147  *
18148  * Description: Default recovery action for several SCSI sense keys (basically
18149  *		attempts a retry).
18150  *
18151  *     Context: May be called from interrupt context
18152  */
18153 
18154 static void
18155 sd_sense_key_default(struct sd_lun *un,
18156 	uint8_t *sense_datap,
18157 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp)
18158 {
18159 	struct sd_sense_info	si;
18160 	uint8_t sense_key = scsi_sense_key(sense_datap);
18161 
18162 	ASSERT(un != NULL);
18163 	ASSERT(mutex_owned(SD_MUTEX(un)));
18164 	ASSERT(bp != NULL);
18165 	ASSERT(xp != NULL);
18166 	ASSERT(pktp != NULL);
18167 
18168 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
18169 
18170 	/*
18171 	 * Undecoded sense key.	Attempt retries and hope that will fix
18172 	 * the problem.  Otherwise, we're dead.
18173 	 */
18174 	if ((pktp->pkt_flags & FLAG_SILENT) == 0) {
18175 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
18176 		    "Unhandled Sense Key '%s'\n", sense_keys[sense_key]);
18177 	}
18178 
18179 	si.ssi_severity = SCSI_ERR_FATAL;
18180 	si.ssi_pfa_flag = FALSE;
18181 
18182 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
18183 	    &si, EIO, (clock_t)0, NULL);
18184 }
18185 
18186 
18187 
18188 /*
18189  *    Function: sd_print_retry_msg
18190  *
18191  * Description: Print a message indicating the retry action being taken.
18192  *
18193  *   Arguments: un - ptr to associated softstate
18194  *		bp - ptr to buf(9S) for the command
18195  *		arg - not used.
18196  *		flag - SD_IMMEDIATE_RETRY_ISSUED, SD_DELAYED_RETRY_ISSUED,
18197  *			or SD_NO_RETRY_ISSUED
18198  *
18199  *     Context: May be called from interrupt context
18200  */
18201 /* ARGSUSED */
18202 static void
18203 sd_print_retry_msg(struct sd_lun *un, struct buf *bp, void *arg, int flag)
18204 {
18205 	struct sd_xbuf	*xp;
18206 	struct scsi_pkt *pktp;
18207 	char *reasonp;
18208 	char *msgp;
18209 
18210 	ASSERT(un != NULL);
18211 	ASSERT(mutex_owned(SD_MUTEX(un)));
18212 	ASSERT(bp != NULL);
18213 	pktp = SD_GET_PKTP(bp);
18214 	ASSERT(pktp != NULL);
18215 	xp = SD_GET_XBUF(bp);
18216 	ASSERT(xp != NULL);
18217 
18218 	ASSERT(!mutex_owned(&un->un_pm_mutex));
18219 	mutex_enter(&un->un_pm_mutex);
18220 	if ((un->un_state == SD_STATE_SUSPENDED) ||
18221 	    (SD_DEVICE_IS_IN_LOW_POWER(un)) ||
18222 	    (pktp->pkt_flags & FLAG_SILENT)) {
18223 		mutex_exit(&un->un_pm_mutex);
18224 		goto update_pkt_reason;
18225 	}
18226 	mutex_exit(&un->un_pm_mutex);
18227 
18228 	/*
18229 	 * Suppress messages if they are all the same pkt_reason; with
18230 	 * TQ, many (up to 256) are returned with the same pkt_reason.
18231 	 * If we are in panic, then suppress the retry messages.
18232 	 */
18233 	switch (flag) {
18234 	case SD_NO_RETRY_ISSUED:
18235 		msgp = "giving up";
18236 		break;
18237 	case SD_IMMEDIATE_RETRY_ISSUED:
18238 	case SD_DELAYED_RETRY_ISSUED:
18239 		if (ddi_in_panic() || (un->un_state == SD_STATE_OFFLINE) ||
18240 		    ((pktp->pkt_reason == un->un_last_pkt_reason) &&
18241 		    (sd_error_level != SCSI_ERR_ALL))) {
18242 			return;
18243 		}
18244 		msgp = "retrying command";
18245 		break;
18246 	default:
18247 		goto update_pkt_reason;
18248 	}
18249 
18250 	reasonp = (((pktp->pkt_statistics & STAT_PERR) != 0) ? "parity error" :
18251 	    scsi_rname(pktp->pkt_reason));
18252 
18253 	scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
18254 	    "SCSI transport failed: reason '%s': %s\n", reasonp, msgp);
18255 
18256 update_pkt_reason:
18257 	/*
18258 	 * Update un->un_last_pkt_reason with the value in pktp->pkt_reason.
18259 	 * This is to prevent multiple console messages for the same failure
18260 	 * condition.  Note that un->un_last_pkt_reason is NOT restored if &
18261 	 * when the command is retried successfully because there still may be
18262 	 * more commands coming back with the same value of pktp->pkt_reason.
18263 	 */
18264 	if ((pktp->pkt_reason != CMD_CMPLT) || (xp->xb_retry_count == 0)) {
18265 		un->un_last_pkt_reason = pktp->pkt_reason;
18266 	}
18267 }
18268 
18269 
18270 /*
18271  *    Function: sd_print_cmd_incomplete_msg
18272  *
18273  * Description: Message logging fn. for a SCSA "CMD_INCOMPLETE" pkt_reason.
18274  *
18275  *   Arguments: un - ptr to associated softstate
18276  *		bp - ptr to buf(9S) for the command
18277  *		arg - passed to sd_print_retry_msg()
18278  *		code - SD_IMMEDIATE_RETRY_ISSUED, SD_DELAYED_RETRY_ISSUED,
18279  *			or SD_NO_RETRY_ISSUED
18280  *
18281  *     Context: May be called from interrupt context
18282  */
18283 
18284 static void
18285 sd_print_cmd_incomplete_msg(struct sd_lun *un, struct buf *bp, void *arg,
18286 	int code)
18287 {
18288 	dev_info_t	*dip;
18289 
18290 	ASSERT(un != NULL);
18291 	ASSERT(mutex_owned(SD_MUTEX(un)));
18292 	ASSERT(bp != NULL);
18293 
18294 	switch (code) {
18295 	case SD_NO_RETRY_ISSUED:
18296 		/* Command was failed. Someone turned off this target? */
18297 		if (un->un_state != SD_STATE_OFFLINE) {
18298 			/*
18299 			 * Suppress message if we are detaching and
18300 			 * device has been disconnected
18301 			 * Note that DEVI_IS_DEVICE_REMOVED is a consolidation
18302 			 * private interface and not part of the DDI
18303 			 */
18304 			dip = un->un_sd->sd_dev;
18305 			if (!(DEVI_IS_DETACHING(dip) &&
18306 			    DEVI_IS_DEVICE_REMOVED(dip))) {
18307 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
18308 				"disk not responding to selection\n");
18309 			}
18310 			New_state(un, SD_STATE_OFFLINE);
18311 		}
18312 		break;
18313 
18314 	case SD_DELAYED_RETRY_ISSUED:
18315 	case SD_IMMEDIATE_RETRY_ISSUED:
18316 	default:
18317 		/* Command was successfully queued for retry */
18318 		sd_print_retry_msg(un, bp, arg, code);
18319 		break;
18320 	}
18321 }
18322 
18323 
18324 /*
18325  *    Function: sd_pkt_reason_cmd_incomplete
18326  *
18327  * Description: Recovery actions for a SCSA "CMD_INCOMPLETE" pkt_reason.
18328  *
18329  *     Context: May be called from interrupt context
18330  */
18331 
18332 static void
18333 sd_pkt_reason_cmd_incomplete(struct sd_lun *un, struct buf *bp,
18334 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18335 {
18336 	int flag = SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE;
18337 
18338 	ASSERT(un != NULL);
18339 	ASSERT(mutex_owned(SD_MUTEX(un)));
18340 	ASSERT(bp != NULL);
18341 	ASSERT(xp != NULL);
18342 	ASSERT(pktp != NULL);
18343 
18344 	/* Do not do a reset if selection did not complete */
18345 	/* Note: Should this not just check the bit? */
18346 	if (pktp->pkt_state != STATE_GOT_BUS) {
18347 		SD_UPDATE_ERRSTATS(un, sd_transerrs);
18348 		sd_reset_target(un, pktp);
18349 	}
18350 
18351 	/*
18352 	 * If the target was not successfully selected, then set
18353 	 * SD_RETRIES_FAILFAST to indicate that we lost communication
18354 	 * with the target, and further retries and/or commands are
18355 	 * likely to take a long time.
18356 	 */
18357 	if ((pktp->pkt_state & STATE_GOT_TARGET) == 0) {
18358 		flag |= SD_RETRIES_FAILFAST;
18359 	}
18360 
18361 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
18362 
18363 	sd_retry_command(un, bp, flag,
18364 	    sd_print_cmd_incomplete_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
18365 }
18366 
18367 
18368 
18369 /*
18370  *    Function: sd_pkt_reason_cmd_tran_err
18371  *
18372  * Description: Recovery actions for a SCSA "CMD_TRAN_ERR" pkt_reason.
18373  *
18374  *     Context: May be called from interrupt context
18375  */
18376 
18377 static void
18378 sd_pkt_reason_cmd_tran_err(struct sd_lun *un, struct buf *bp,
18379 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18380 {
18381 	ASSERT(un != NULL);
18382 	ASSERT(mutex_owned(SD_MUTEX(un)));
18383 	ASSERT(bp != NULL);
18384 	ASSERT(xp != NULL);
18385 	ASSERT(pktp != NULL);
18386 
18387 	/*
18388 	 * Do not reset if we got a parity error, or if
18389 	 * selection did not complete.
18390 	 */
18391 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
18392 	/* Note: Should this not just check the bit for pkt_state? */
18393 	if (((pktp->pkt_statistics & STAT_PERR) == 0) &&
18394 	    (pktp->pkt_state != STATE_GOT_BUS)) {
18395 		SD_UPDATE_ERRSTATS(un, sd_transerrs);
18396 		sd_reset_target(un, pktp);
18397 	}
18398 
18399 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
18400 
18401 	sd_retry_command(un, bp, (SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE),
18402 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
18403 }
18404 
18405 
18406 
18407 /*
18408  *    Function: sd_pkt_reason_cmd_reset
18409  *
18410  * Description: Recovery actions for a SCSA "CMD_RESET" pkt_reason.
18411  *
18412  *     Context: May be called from interrupt context
18413  */
18414 
18415 static void
18416 sd_pkt_reason_cmd_reset(struct sd_lun *un, struct buf *bp,
18417 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18418 {
18419 	ASSERT(un != NULL);
18420 	ASSERT(mutex_owned(SD_MUTEX(un)));
18421 	ASSERT(bp != NULL);
18422 	ASSERT(xp != NULL);
18423 	ASSERT(pktp != NULL);
18424 
18425 	/* The target may still be running the command, so try to reset. */
18426 	SD_UPDATE_ERRSTATS(un, sd_transerrs);
18427 	sd_reset_target(un, pktp);
18428 
18429 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
18430 
18431 	/*
18432 	 * If pkt_reason is CMD_RESET chances are that this pkt got
18433 	 * reset because another target on this bus caused it. The target
18434 	 * that caused it should get CMD_TIMEOUT with pkt_statistics
18435 	 * of STAT_TIMEOUT/STAT_DEV_RESET.
18436 	 */
18437 
18438 	sd_retry_command(un, bp, (SD_RETRIES_VICTIM | SD_RETRIES_ISOLATE),
18439 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
18440 }
18441 
18442 
18443 
18444 
18445 /*
18446  *    Function: sd_pkt_reason_cmd_aborted
18447  *
18448  * Description: Recovery actions for a SCSA "CMD_ABORTED" pkt_reason.
18449  *
18450  *     Context: May be called from interrupt context
18451  */
18452 
18453 static void
18454 sd_pkt_reason_cmd_aborted(struct sd_lun *un, struct buf *bp,
18455 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18456 {
18457 	ASSERT(un != NULL);
18458 	ASSERT(mutex_owned(SD_MUTEX(un)));
18459 	ASSERT(bp != NULL);
18460 	ASSERT(xp != NULL);
18461 	ASSERT(pktp != NULL);
18462 
18463 	/* The target may still be running the command, so try to reset. */
18464 	SD_UPDATE_ERRSTATS(un, sd_transerrs);
18465 	sd_reset_target(un, pktp);
18466 
18467 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
18468 
18469 	/*
18470 	 * If pkt_reason is CMD_ABORTED chances are that this pkt got
18471 	 * aborted because another target on this bus caused it. The target
18472 	 * that caused it should get CMD_TIMEOUT with pkt_statistics
18473 	 * of STAT_TIMEOUT/STAT_DEV_RESET.
18474 	 */
18475 
18476 	sd_retry_command(un, bp, (SD_RETRIES_VICTIM | SD_RETRIES_ISOLATE),
18477 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
18478 }
18479 
18480 
18481 
18482 /*
18483  *    Function: sd_pkt_reason_cmd_timeout
18484  *
18485  * Description: Recovery actions for a SCSA "CMD_TIMEOUT" pkt_reason.
18486  *
18487  *     Context: May be called from interrupt context
18488  */
18489 
18490 static void
18491 sd_pkt_reason_cmd_timeout(struct sd_lun *un, struct buf *bp,
18492 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18493 {
18494 	ASSERT(un != NULL);
18495 	ASSERT(mutex_owned(SD_MUTEX(un)));
18496 	ASSERT(bp != NULL);
18497 	ASSERT(xp != NULL);
18498 	ASSERT(pktp != NULL);
18499 
18500 
18501 	SD_UPDATE_ERRSTATS(un, sd_transerrs);
18502 	sd_reset_target(un, pktp);
18503 
18504 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
18505 
18506 	/*
18507 	 * A command timeout indicates that we could not establish
18508 	 * communication with the target, so set SD_RETRIES_FAILFAST
18509 	 * as further retries/commands are likely to take a long time.
18510 	 */
18511 	sd_retry_command(un, bp,
18512 	    (SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE | SD_RETRIES_FAILFAST),
18513 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
18514 }
18515 
18516 
18517 
18518 /*
18519  *    Function: sd_pkt_reason_cmd_unx_bus_free
18520  *
18521  * Description: Recovery actions for a SCSA "CMD_UNX_BUS_FREE" pkt_reason.
18522  *
18523  *     Context: May be called from interrupt context
18524  */
18525 
18526 static void
18527 sd_pkt_reason_cmd_unx_bus_free(struct sd_lun *un, struct buf *bp,
18528 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18529 {
18530 	void (*funcp)(struct sd_lun *un, struct buf *bp, void *arg, int code);
18531 
18532 	ASSERT(un != NULL);
18533 	ASSERT(mutex_owned(SD_MUTEX(un)));
18534 	ASSERT(bp != NULL);
18535 	ASSERT(xp != NULL);
18536 	ASSERT(pktp != NULL);
18537 
18538 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
18539 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
18540 
18541 	funcp = ((pktp->pkt_statistics & STAT_PERR) == 0) ?
18542 	    sd_print_retry_msg : NULL;
18543 
18544 	sd_retry_command(un, bp, (SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE),
18545 	    funcp, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
18546 }
18547 
18548 
18549 /*
18550  *    Function: sd_pkt_reason_cmd_tag_reject
18551  *
18552  * Description: Recovery actions for a SCSA "CMD_TAG_REJECT" pkt_reason.
18553  *
18554  *     Context: May be called from interrupt context
18555  */
18556 
18557 static void
18558 sd_pkt_reason_cmd_tag_reject(struct sd_lun *un, struct buf *bp,
18559 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18560 {
18561 	ASSERT(un != NULL);
18562 	ASSERT(mutex_owned(SD_MUTEX(un)));
18563 	ASSERT(bp != NULL);
18564 	ASSERT(xp != NULL);
18565 	ASSERT(pktp != NULL);
18566 
18567 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
18568 	pktp->pkt_flags = 0;
18569 	un->un_tagflags = 0;
18570 	if (un->un_f_opt_queueing == TRUE) {
18571 		un->un_throttle = min(un->un_throttle, 3);
18572 	} else {
18573 		un->un_throttle = 1;
18574 	}
18575 	mutex_exit(SD_MUTEX(un));
18576 	(void) scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing", 0, 1);
18577 	mutex_enter(SD_MUTEX(un));
18578 
18579 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
18580 
18581 	/* Legacy behavior not to check retry counts here. */
18582 	sd_retry_command(un, bp, (SD_RETRIES_NOCHECK | SD_RETRIES_ISOLATE),
18583 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
18584 }
18585 
18586 
18587 /*
18588  *    Function: sd_pkt_reason_default
18589  *
18590  * Description: Default recovery actions for SCSA pkt_reason values that
18591  *		do not have more explicit recovery actions.
18592  *
18593  *     Context: May be called from interrupt context
18594  */
18595 
18596 static void
18597 sd_pkt_reason_default(struct sd_lun *un, struct buf *bp,
18598 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18599 {
18600 	ASSERT(un != NULL);
18601 	ASSERT(mutex_owned(SD_MUTEX(un)));
18602 	ASSERT(bp != NULL);
18603 	ASSERT(xp != NULL);
18604 	ASSERT(pktp != NULL);
18605 
18606 	SD_UPDATE_ERRSTATS(un, sd_transerrs);
18607 	sd_reset_target(un, pktp);
18608 
18609 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
18610 
18611 	sd_retry_command(un, bp, (SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE),
18612 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
18613 }
18614 
18615 
18616 
18617 /*
18618  *    Function: sd_pkt_status_check_condition
18619  *
18620  * Description: Recovery actions for a "STATUS_CHECK" SCSI command status.
18621  *
18622  *     Context: May be called from interrupt context
18623  */
18624 
18625 static void
18626 sd_pkt_status_check_condition(struct sd_lun *un, struct buf *bp,
18627 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18628 {
18629 	ASSERT(un != NULL);
18630 	ASSERT(mutex_owned(SD_MUTEX(un)));
18631 	ASSERT(bp != NULL);
18632 	ASSERT(xp != NULL);
18633 	ASSERT(pktp != NULL);
18634 
18635 	SD_TRACE(SD_LOG_IO, un, "sd_pkt_status_check_condition: "
18636 	    "entry: buf:0x%p xp:0x%p\n", bp, xp);
18637 
18638 	/*
18639 	 * If ARQ is NOT enabled, then issue a REQUEST SENSE command (the
18640 	 * command will be retried after the request sense). Otherwise, retry
18641 	 * the command. Note: we are issuing the request sense even though the
18642 	 * retry limit may have been reached for the failed command.
18643 	 */
18644 	if (un->un_f_arq_enabled == FALSE) {
18645 		SD_INFO(SD_LOG_IO_CORE, un, "sd_pkt_status_check_condition: "
18646 		    "no ARQ, sending request sense command\n");
18647 		sd_send_request_sense_command(un, bp, pktp);
18648 	} else {
18649 		SD_INFO(SD_LOG_IO_CORE, un, "sd_pkt_status_check_condition: "
18650 		    "ARQ,retrying request sense command\n");
18651 #if defined(__i386) || defined(__amd64)
18652 		/*
18653 		 * The SD_RETRY_DELAY value need to be adjusted here
18654 		 * when SD_RETRY_DELAY change in sddef.h
18655 		 */
18656 		sd_retry_command(un, bp, SD_RETRIES_STANDARD, NULL, NULL, EIO,
18657 			un->un_f_is_fibre?drv_usectohz(100000):(clock_t)0,
18658 			NULL);
18659 #else
18660 		sd_retry_command(un, bp, SD_RETRIES_STANDARD, NULL, NULL,
18661 		    EIO, SD_RETRY_DELAY, NULL);
18662 #endif
18663 	}
18664 
18665 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_pkt_status_check_condition: exit\n");
18666 }
18667 
18668 
18669 /*
18670  *    Function: sd_pkt_status_busy
18671  *
18672  * Description: Recovery actions for a "STATUS_BUSY" SCSI command status.
18673  *
18674  *     Context: May be called from interrupt context
18675  */
18676 
18677 static void
18678 sd_pkt_status_busy(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
18679 	struct scsi_pkt *pktp)
18680 {
18681 	ASSERT(un != NULL);
18682 	ASSERT(mutex_owned(SD_MUTEX(un)));
18683 	ASSERT(bp != NULL);
18684 	ASSERT(xp != NULL);
18685 	ASSERT(pktp != NULL);
18686 
18687 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18688 	    "sd_pkt_status_busy: entry\n");
18689 
18690 	/* If retries are exhausted, just fail the command. */
18691 	if (xp->xb_retry_count >= un->un_busy_retry_count) {
18692 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
18693 		    "device busy too long\n");
18694 		sd_return_failed_command(un, bp, EIO);
18695 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18696 		    "sd_pkt_status_busy: exit\n");
18697 		return;
18698 	}
18699 	xp->xb_retry_count++;
18700 
18701 	/*
18702 	 * Try to reset the target. However, we do not want to perform
18703 	 * more than one reset if the device continues to fail. The reset
18704 	 * will be performed when the retry count reaches the reset
18705 	 * threshold.  This threshold should be set such that at least
18706 	 * one retry is issued before the reset is performed.
18707 	 */
18708 	if (xp->xb_retry_count ==
18709 	    ((un->un_reset_retry_count < 2) ? 2 : un->un_reset_retry_count)) {
18710 		int rval = 0;
18711 		mutex_exit(SD_MUTEX(un));
18712 		if (un->un_f_allow_bus_device_reset == TRUE) {
18713 			/*
18714 			 * First try to reset the LUN; if we cannot then
18715 			 * try to reset the target.
18716 			 */
18717 			if (un->un_f_lun_reset_enabled == TRUE) {
18718 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18719 				    "sd_pkt_status_busy: RESET_LUN\n");
18720 				rval = scsi_reset(SD_ADDRESS(un), RESET_LUN);
18721 			}
18722 			if (rval == 0) {
18723 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18724 				    "sd_pkt_status_busy: RESET_TARGET\n");
18725 				rval = scsi_reset(SD_ADDRESS(un), RESET_TARGET);
18726 			}
18727 		}
18728 		if (rval == 0) {
18729 			/*
18730 			 * If the RESET_LUN and/or RESET_TARGET failed,
18731 			 * try RESET_ALL
18732 			 */
18733 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18734 			    "sd_pkt_status_busy: RESET_ALL\n");
18735 			rval = scsi_reset(SD_ADDRESS(un), RESET_ALL);
18736 		}
18737 		mutex_enter(SD_MUTEX(un));
18738 		if (rval == 0) {
18739 			/*
18740 			 * The RESET_LUN, RESET_TARGET, and/or RESET_ALL failed.
18741 			 * At this point we give up & fail the command.
18742 			 */
18743 			sd_return_failed_command(un, bp, EIO);
18744 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18745 			    "sd_pkt_status_busy: exit (failed cmd)\n");
18746 			return;
18747 		}
18748 	}
18749 
18750 	/*
18751 	 * Retry the command. Be sure to specify SD_RETRIES_NOCHECK as
18752 	 * we have already checked the retry counts above.
18753 	 */
18754 	sd_retry_command(un, bp, SD_RETRIES_NOCHECK, NULL, NULL,
18755 	    EIO, SD_BSY_TIMEOUT, NULL);
18756 
18757 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18758 	    "sd_pkt_status_busy: exit\n");
18759 }
18760 
18761 
18762 /*
18763  *    Function: sd_pkt_status_reservation_conflict
18764  *
18765  * Description: Recovery actions for a "STATUS_RESERVATION_CONFLICT" SCSI
18766  *		command status.
18767  *
18768  *     Context: May be called from interrupt context
18769  */
18770 
18771 static void
18772 sd_pkt_status_reservation_conflict(struct sd_lun *un, struct buf *bp,
18773 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18774 {
18775 	ASSERT(un != NULL);
18776 	ASSERT(mutex_owned(SD_MUTEX(un)));
18777 	ASSERT(bp != NULL);
18778 	ASSERT(xp != NULL);
18779 	ASSERT(pktp != NULL);
18780 
18781 	/*
18782 	 * If the command was PERSISTENT_RESERVATION_[IN|OUT] then reservation
18783 	 * conflict could be due to various reasons like incorrect keys, not
18784 	 * registered or not reserved etc. So, we return EACCES to the caller.
18785 	 */
18786 	if (un->un_reservation_type == SD_SCSI3_RESERVATION) {
18787 		int cmd = SD_GET_PKT_OPCODE(pktp);
18788 		if ((cmd == SCMD_PERSISTENT_RESERVE_IN) ||
18789 		    (cmd == SCMD_PERSISTENT_RESERVE_OUT)) {
18790 			sd_return_failed_command(un, bp, EACCES);
18791 			return;
18792 		}
18793 	}
18794 
18795 	un->un_resvd_status |= SD_RESERVATION_CONFLICT;
18796 
18797 	if ((un->un_resvd_status & SD_FAILFAST) != 0) {
18798 		if (sd_failfast_enable != 0) {
18799 			/* By definition, we must panic here.... */
18800 			sd_panic_for_res_conflict(un);
18801 			/*NOTREACHED*/
18802 		}
18803 		SD_ERROR(SD_LOG_IO, un,
18804 		    "sd_handle_resv_conflict: Disk Reserved\n");
18805 		sd_return_failed_command(un, bp, EACCES);
18806 		return;
18807 	}
18808 
18809 	/*
18810 	 * 1147670: retry only if sd_retry_on_reservation_conflict
18811 	 * property is set (default is 1). Retries will not succeed
18812 	 * on a disk reserved by another initiator. HA systems
18813 	 * may reset this via sd.conf to avoid these retries.
18814 	 *
18815 	 * Note: The legacy return code for this failure is EIO, however EACCES
18816 	 * seems more appropriate for a reservation conflict.
18817 	 */
18818 	if (sd_retry_on_reservation_conflict == 0) {
18819 		SD_ERROR(SD_LOG_IO, un,
18820 		    "sd_handle_resv_conflict: Device Reserved\n");
18821 		sd_return_failed_command(un, bp, EIO);
18822 		return;
18823 	}
18824 
18825 	/*
18826 	 * Retry the command if we can.
18827 	 *
18828 	 * Note: The legacy return code for this failure is EIO, however EACCES
18829 	 * seems more appropriate for a reservation conflict.
18830 	 */
18831 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, NULL, NULL, EIO,
18832 	    (clock_t)2, NULL);
18833 }
18834 
18835 
18836 
18837 /*
18838  *    Function: sd_pkt_status_qfull
18839  *
18840  * Description: Handle a QUEUE FULL condition from the target.  This can
18841  *		occur if the HBA does not handle the queue full condition.
18842  *		(Basically this means third-party HBAs as Sun HBAs will
18843  *		handle the queue full condition.)  Note that if there are
18844  *		some commands already in the transport, then the queue full
18845  *		has occurred because the queue for this nexus is actually
18846  *		full. If there are no commands in the transport, then the
18847  *		queue full is resulting from some other initiator or lun
18848  *		consuming all the resources at the target.
18849  *
18850  *     Context: May be called from interrupt context
18851  */
18852 
18853 static void
18854 sd_pkt_status_qfull(struct sd_lun *un, struct buf *bp,
18855 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18856 {
18857 	ASSERT(un != NULL);
18858 	ASSERT(mutex_owned(SD_MUTEX(un)));
18859 	ASSERT(bp != NULL);
18860 	ASSERT(xp != NULL);
18861 	ASSERT(pktp != NULL);
18862 
18863 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18864 	    "sd_pkt_status_qfull: entry\n");
18865 
18866 	/*
18867 	 * Just lower the QFULL throttle and retry the command.  Note that
18868 	 * we do not limit the number of retries here.
18869 	 */
18870 	sd_reduce_throttle(un, SD_THROTTLE_QFULL);
18871 	sd_retry_command(un, bp, SD_RETRIES_NOCHECK, NULL, NULL, 0,
18872 	    SD_RESTART_TIMEOUT, NULL);
18873 
18874 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18875 	    "sd_pkt_status_qfull: exit\n");
18876 }
18877 
18878 
18879 /*
18880  *    Function: sd_reset_target
18881  *
18882  * Description: Issue a scsi_reset(9F), with either RESET_LUN,
18883  *		RESET_TARGET, or RESET_ALL.
18884  *
18885  *     Context: May be called under interrupt context.
18886  */
18887 
18888 static void
18889 sd_reset_target(struct sd_lun *un, struct scsi_pkt *pktp)
18890 {
18891 	int rval = 0;
18892 
18893 	ASSERT(un != NULL);
18894 	ASSERT(mutex_owned(SD_MUTEX(un)));
18895 	ASSERT(pktp != NULL);
18896 
18897 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_reset_target: entry\n");
18898 
18899 	/*
18900 	 * No need to reset if the transport layer has already done so.
18901 	 */
18902 	if ((pktp->pkt_statistics &
18903 	    (STAT_BUS_RESET | STAT_DEV_RESET | STAT_ABORTED)) != 0) {
18904 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18905 		    "sd_reset_target: no reset\n");
18906 		return;
18907 	}
18908 
18909 	mutex_exit(SD_MUTEX(un));
18910 
18911 	if (un->un_f_allow_bus_device_reset == TRUE) {
18912 		if (un->un_f_lun_reset_enabled == TRUE) {
18913 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18914 			    "sd_reset_target: RESET_LUN\n");
18915 			rval = scsi_reset(SD_ADDRESS(un), RESET_LUN);
18916 		}
18917 		if (rval == 0) {
18918 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18919 			    "sd_reset_target: RESET_TARGET\n");
18920 			rval = scsi_reset(SD_ADDRESS(un), RESET_TARGET);
18921 		}
18922 	}
18923 
18924 	if (rval == 0) {
18925 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18926 		    "sd_reset_target: RESET_ALL\n");
18927 		(void) scsi_reset(SD_ADDRESS(un), RESET_ALL);
18928 	}
18929 
18930 	mutex_enter(SD_MUTEX(un));
18931 
18932 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_reset_target: exit\n");
18933 }
18934 
18935 
18936 /*
18937  *    Function: sd_media_change_task
18938  *
18939  * Description: Recovery action for CDROM to become available.
18940  *
18941  *     Context: Executes in a taskq() thread context
18942  */
18943 
18944 static void
18945 sd_media_change_task(void *arg)
18946 {
18947 	struct	scsi_pkt	*pktp = arg;
18948 	struct	sd_lun		*un;
18949 	struct	buf		*bp;
18950 	struct	sd_xbuf		*xp;
18951 	int	err		= 0;
18952 	int	retry_count	= 0;
18953 	int	retry_limit	= SD_UNIT_ATTENTION_RETRY/10;
18954 	struct	sd_sense_info	si;
18955 
18956 	ASSERT(pktp != NULL);
18957 	bp = (struct buf *)pktp->pkt_private;
18958 	ASSERT(bp != NULL);
18959 	xp = SD_GET_XBUF(bp);
18960 	ASSERT(xp != NULL);
18961 	un = SD_GET_UN(bp);
18962 	ASSERT(un != NULL);
18963 	ASSERT(!mutex_owned(SD_MUTEX(un)));
18964 	ASSERT(un->un_f_monitor_media_state);
18965 
18966 	si.ssi_severity = SCSI_ERR_INFO;
18967 	si.ssi_pfa_flag = FALSE;
18968 
18969 	/*
18970 	 * When a reset is issued on a CDROM, it takes a long time to
18971 	 * recover. First few attempts to read capacity and other things
18972 	 * related to handling unit attention fail (with a ASC 0x4 and
18973 	 * ASCQ 0x1). In that case we want to do enough retries and we want
18974 	 * to limit the retries in other cases of genuine failures like
18975 	 * no media in drive.
18976 	 */
18977 	while (retry_count++ < retry_limit) {
18978 		if ((err = sd_handle_mchange(un)) == 0) {
18979 			break;
18980 		}
18981 		if (err == EAGAIN) {
18982 			retry_limit = SD_UNIT_ATTENTION_RETRY;
18983 		}
18984 		/* Sleep for 0.5 sec. & try again */
18985 		delay(drv_usectohz(500000));
18986 	}
18987 
18988 	/*
18989 	 * Dispatch (retry or fail) the original command here,
18990 	 * along with appropriate console messages....
18991 	 *
18992 	 * Must grab the mutex before calling sd_retry_command,
18993 	 * sd_print_sense_msg and sd_return_failed_command.
18994 	 */
18995 	mutex_enter(SD_MUTEX(un));
18996 	if (err != SD_CMD_SUCCESS) {
18997 		SD_UPDATE_ERRSTATS(un, sd_harderrs);
18998 		SD_UPDATE_ERRSTATS(un, sd_rq_nodev_err);
18999 		si.ssi_severity = SCSI_ERR_FATAL;
19000 		sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
19001 		sd_return_failed_command(un, bp, EIO);
19002 	} else {
19003 		sd_retry_command(un, bp, SD_RETRIES_NOCHECK, sd_print_sense_msg,
19004 		    &si, EIO, (clock_t)0, NULL);
19005 	}
19006 	mutex_exit(SD_MUTEX(un));
19007 }
19008 
19009 
19010 
19011 /*
19012  *    Function: sd_handle_mchange
19013  *
19014  * Description: Perform geometry validation & other recovery when CDROM
19015  *		has been removed from drive.
19016  *
19017  * Return Code: 0 for success
19018  *		errno-type return code of either sd_send_scsi_DOORLOCK() or
19019  *		sd_send_scsi_READ_CAPACITY()
19020  *
19021  *     Context: Executes in a taskq() thread context
19022  */
19023 
19024 static int
19025 sd_handle_mchange(struct sd_lun *un)
19026 {
19027 	uint64_t	capacity;
19028 	uint32_t	lbasize;
19029 	int		rval;
19030 
19031 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19032 	ASSERT(un->un_f_monitor_media_state);
19033 
19034 	if ((rval = sd_send_scsi_READ_CAPACITY(un, &capacity, &lbasize,
19035 	    SD_PATH_DIRECT_PRIORITY)) != 0) {
19036 		return (rval);
19037 	}
19038 
19039 	mutex_enter(SD_MUTEX(un));
19040 	sd_update_block_info(un, lbasize, capacity);
19041 
19042 	if (un->un_errstats != NULL) {
19043 		struct	sd_errstats *stp =
19044 		    (struct sd_errstats *)un->un_errstats->ks_data;
19045 		stp->sd_capacity.value.ui64 = (uint64_t)
19046 		    ((uint64_t)un->un_blockcount *
19047 		    (uint64_t)un->un_tgt_blocksize);
19048 	}
19049 
19050 	/*
19051 	 * Note: Maybe let the strategy/partitioning chain worry about getting
19052 	 * valid geometry.
19053 	 */
19054 	un->un_f_geometry_is_valid = FALSE;
19055 	(void) sd_validate_geometry(un, SD_PATH_DIRECT_PRIORITY);
19056 	if (un->un_f_geometry_is_valid == FALSE) {
19057 		mutex_exit(SD_MUTEX(un));
19058 		return (EIO);
19059 	}
19060 
19061 	mutex_exit(SD_MUTEX(un));
19062 
19063 	/*
19064 	 * Try to lock the door
19065 	 */
19066 	return (sd_send_scsi_DOORLOCK(un, SD_REMOVAL_PREVENT,
19067 	    SD_PATH_DIRECT_PRIORITY));
19068 }
19069 
19070 
19071 /*
19072  *    Function: sd_send_scsi_DOORLOCK
19073  *
19074  * Description: Issue the scsi DOOR LOCK command
19075  *
19076  *   Arguments: un    - pointer to driver soft state (unit) structure for
19077  *			this target.
19078  *		flag  - SD_REMOVAL_ALLOW
19079  *			SD_REMOVAL_PREVENT
19080  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
19081  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
19082  *			to use the USCSI "direct" chain and bypass the normal
19083  *			command waitq. SD_PATH_DIRECT_PRIORITY is used when this
19084  *			command is issued as part of an error recovery action.
19085  *
19086  * Return Code: 0   - Success
19087  *		errno return code from sd_send_scsi_cmd()
19088  *
19089  *     Context: Can sleep.
19090  */
19091 
19092 static int
19093 sd_send_scsi_DOORLOCK(struct sd_lun *un, int flag, int path_flag)
19094 {
19095 	union scsi_cdb		cdb;
19096 	struct uscsi_cmd	ucmd_buf;
19097 	struct scsi_extended_sense	sense_buf;
19098 	int			status;
19099 
19100 	ASSERT(un != NULL);
19101 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19102 
19103 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_DOORLOCK: entry: un:0x%p\n", un);
19104 
19105 	/* already determined doorlock is not supported, fake success */
19106 	if (un->un_f_doorlock_supported == FALSE) {
19107 		return (0);
19108 	}
19109 
19110 	bzero(&cdb, sizeof (cdb));
19111 	bzero(&ucmd_buf, sizeof (ucmd_buf));
19112 
19113 	cdb.scc_cmd = SCMD_DOORLOCK;
19114 	cdb.cdb_opaque[4] = (uchar_t)flag;
19115 
19116 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
19117 	ucmd_buf.uscsi_cdblen	= CDB_GROUP0;
19118 	ucmd_buf.uscsi_bufaddr	= NULL;
19119 	ucmd_buf.uscsi_buflen	= 0;
19120 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
19121 	ucmd_buf.uscsi_rqlen	= sizeof (sense_buf);
19122 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_SILENT;
19123 	ucmd_buf.uscsi_timeout	= 15;
19124 
19125 	SD_TRACE(SD_LOG_IO, un,
19126 	    "sd_send_scsi_DOORLOCK: returning sd_send_scsi_cmd()\n");
19127 
19128 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
19129 	    UIO_SYSSPACE, UIO_SYSSPACE, path_flag);
19130 
19131 	if ((status == EIO) && (ucmd_buf.uscsi_status == STATUS_CHECK) &&
19132 	    (ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
19133 	    (scsi_sense_key((uint8_t *)&sense_buf) == KEY_ILLEGAL_REQUEST)) {
19134 		/* fake success and skip subsequent doorlock commands */
19135 		un->un_f_doorlock_supported = FALSE;
19136 		return (0);
19137 	}
19138 
19139 	return (status);
19140 }
19141 
19142 /*
19143  *    Function: sd_send_scsi_READ_CAPACITY
19144  *
19145  * Description: This routine uses the scsi READ CAPACITY command to determine
19146  *		the device capacity in number of blocks and the device native
19147  *		block size. If this function returns a failure, then the
19148  *		values in *capp and *lbap are undefined.  If the capacity
19149  *		returned is 0xffffffff then the lun is too large for a
19150  *		normal READ CAPACITY command and the results of a
19151  *		READ CAPACITY 16 will be used instead.
19152  *
19153  *   Arguments: un   - ptr to soft state struct for the target
19154  *		capp - ptr to unsigned 64-bit variable to receive the
19155  *			capacity value from the command.
19156  *		lbap - ptr to unsigned 32-bit varaible to receive the
19157  *			block size value from the command
19158  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
19159  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
19160  *			to use the USCSI "direct" chain and bypass the normal
19161  *			command waitq. SD_PATH_DIRECT_PRIORITY is used when this
19162  *			command is issued as part of an error recovery action.
19163  *
19164  * Return Code: 0   - Success
19165  *		EIO - IO error
19166  *		EACCES - Reservation conflict detected
19167  *		EAGAIN - Device is becoming ready
19168  *		errno return code from sd_send_scsi_cmd()
19169  *
19170  *     Context: Can sleep.  Blocks until command completes.
19171  */
19172 
19173 #define	SD_CAPACITY_SIZE	sizeof (struct scsi_capacity)
19174 
19175 static int
19176 sd_send_scsi_READ_CAPACITY(struct sd_lun *un, uint64_t *capp, uint32_t *lbap,
19177 	int path_flag)
19178 {
19179 	struct	scsi_extended_sense	sense_buf;
19180 	struct	uscsi_cmd	ucmd_buf;
19181 	union	scsi_cdb	cdb;
19182 	uint32_t		*capacity_buf;
19183 	uint64_t		capacity;
19184 	uint32_t		lbasize;
19185 	int			status;
19186 
19187 	ASSERT(un != NULL);
19188 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19189 	ASSERT(capp != NULL);
19190 	ASSERT(lbap != NULL);
19191 
19192 	SD_TRACE(SD_LOG_IO, un,
19193 	    "sd_send_scsi_READ_CAPACITY: entry: un:0x%p\n", un);
19194 
19195 	/*
19196 	 * First send a READ_CAPACITY command to the target.
19197 	 * (This command is mandatory under SCSI-2.)
19198 	 *
19199 	 * Set up the CDB for the READ_CAPACITY command.  The Partial
19200 	 * Medium Indicator bit is cleared.  The address field must be
19201 	 * zero if the PMI bit is zero.
19202 	 */
19203 	bzero(&cdb, sizeof (cdb));
19204 	bzero(&ucmd_buf, sizeof (ucmd_buf));
19205 
19206 	capacity_buf = kmem_zalloc(SD_CAPACITY_SIZE, KM_SLEEP);
19207 
19208 	cdb.scc_cmd = SCMD_READ_CAPACITY;
19209 
19210 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
19211 	ucmd_buf.uscsi_cdblen	= CDB_GROUP1;
19212 	ucmd_buf.uscsi_bufaddr	= (caddr_t)capacity_buf;
19213 	ucmd_buf.uscsi_buflen	= SD_CAPACITY_SIZE;
19214 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
19215 	ucmd_buf.uscsi_rqlen	= sizeof (sense_buf);
19216 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
19217 	ucmd_buf.uscsi_timeout	= 60;
19218 
19219 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
19220 	    UIO_SYSSPACE, UIO_SYSSPACE, path_flag);
19221 
19222 	switch (status) {
19223 	case 0:
19224 		/* Return failure if we did not get valid capacity data. */
19225 		if (ucmd_buf.uscsi_resid != 0) {
19226 			kmem_free(capacity_buf, SD_CAPACITY_SIZE);
19227 			return (EIO);
19228 		}
19229 
19230 		/*
19231 		 * Read capacity and block size from the READ CAPACITY 10 data.
19232 		 * This data may be adjusted later due to device specific
19233 		 * issues.
19234 		 *
19235 		 * According to the SCSI spec, the READ CAPACITY 10
19236 		 * command returns the following:
19237 		 *
19238 		 *  bytes 0-3: Maximum logical block address available.
19239 		 *		(MSB in byte:0 & LSB in byte:3)
19240 		 *
19241 		 *  bytes 4-7: Block length in bytes
19242 		 *		(MSB in byte:4 & LSB in byte:7)
19243 		 *
19244 		 */
19245 		capacity = BE_32(capacity_buf[0]);
19246 		lbasize = BE_32(capacity_buf[1]);
19247 
19248 		/*
19249 		 * Done with capacity_buf
19250 		 */
19251 		kmem_free(capacity_buf, SD_CAPACITY_SIZE);
19252 
19253 		/*
19254 		 * if the reported capacity is set to all 0xf's, then
19255 		 * this disk is too large and requires SBC-2 commands.
19256 		 * Reissue the request using READ CAPACITY 16.
19257 		 */
19258 		if (capacity == 0xffffffff) {
19259 			status = sd_send_scsi_READ_CAPACITY_16(un, &capacity,
19260 			    &lbasize, path_flag);
19261 			if (status != 0) {
19262 				return (status);
19263 			}
19264 		}
19265 		break;	/* Success! */
19266 	case EIO:
19267 		switch (ucmd_buf.uscsi_status) {
19268 		case STATUS_RESERVATION_CONFLICT:
19269 			status = EACCES;
19270 			break;
19271 		case STATUS_CHECK:
19272 			/*
19273 			 * Check condition; look for ASC/ASCQ of 0x04/0x01
19274 			 * (LOGICAL UNIT IS IN PROCESS OF BECOMING READY)
19275 			 */
19276 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
19277 			    (scsi_sense_asc((uint8_t *)&sense_buf) == 0x04) &&
19278 			    (scsi_sense_ascq((uint8_t *)&sense_buf) == 0x01)) {
19279 				kmem_free(capacity_buf, SD_CAPACITY_SIZE);
19280 				return (EAGAIN);
19281 			}
19282 			break;
19283 		default:
19284 			break;
19285 		}
19286 		/* FALLTHRU */
19287 	default:
19288 		kmem_free(capacity_buf, SD_CAPACITY_SIZE);
19289 		return (status);
19290 	}
19291 
19292 	/*
19293 	 * Some ATAPI CD-ROM drives report inaccurate LBA size values
19294 	 * (2352 and 0 are common) so for these devices always force the value
19295 	 * to 2048 as required by the ATAPI specs.
19296 	 */
19297 	if ((un->un_f_cfg_is_atapi == TRUE) && (ISCD(un))) {
19298 		lbasize = 2048;
19299 	}
19300 
19301 	/*
19302 	 * Get the maximum LBA value from the READ CAPACITY data.
19303 	 * Here we assume that the Partial Medium Indicator (PMI) bit
19304 	 * was cleared when issuing the command. This means that the LBA
19305 	 * returned from the device is the LBA of the last logical block
19306 	 * on the logical unit.  The actual logical block count will be
19307 	 * this value plus one.
19308 	 *
19309 	 * Currently the capacity is saved in terms of un->un_sys_blocksize,
19310 	 * so scale the capacity value to reflect this.
19311 	 */
19312 	capacity = (capacity + 1) * (lbasize / un->un_sys_blocksize);
19313 
19314 #if defined(__i386) || defined(__amd64)
19315 	/*
19316 	 * Refer to comments related to off-by-1 at the
19317 	 * header of this file.
19318 	 * Treat 1TB disk as (1T - 512)B.
19319 	 */
19320 	if (un->un_f_capacity_adjusted == 1)
19321 	    capacity = DK_MAX_BLOCKS;
19322 #endif
19323 
19324 	/*
19325 	 * Copy the values from the READ CAPACITY command into the space
19326 	 * provided by the caller.
19327 	 */
19328 	*capp = capacity;
19329 	*lbap = lbasize;
19330 
19331 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_READ_CAPACITY: "
19332 	    "capacity:0x%llx  lbasize:0x%x\n", capacity, lbasize);
19333 
19334 	/*
19335 	 * Both the lbasize and capacity from the device must be nonzero,
19336 	 * otherwise we assume that the values are not valid and return
19337 	 * failure to the caller. (4203735)
19338 	 */
19339 	if ((capacity == 0) || (lbasize == 0)) {
19340 		return (EIO);
19341 	}
19342 
19343 	return (0);
19344 }
19345 
19346 /*
19347  *    Function: sd_send_scsi_READ_CAPACITY_16
19348  *
19349  * Description: This routine uses the scsi READ CAPACITY 16 command to
19350  *		determine the device capacity in number of blocks and the
19351  *		device native block size.  If this function returns a failure,
19352  *		then the values in *capp and *lbap are undefined.
19353  *		This routine should always be called by
19354  *		sd_send_scsi_READ_CAPACITY which will appy any device
19355  *		specific adjustments to capacity and lbasize.
19356  *
19357  *   Arguments: un   - ptr to soft state struct for the target
19358  *		capp - ptr to unsigned 64-bit variable to receive the
19359  *			capacity value from the command.
19360  *		lbap - ptr to unsigned 32-bit varaible to receive the
19361  *			block size value from the command
19362  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
19363  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
19364  *			to use the USCSI "direct" chain and bypass the normal
19365  *			command waitq. SD_PATH_DIRECT_PRIORITY is used when
19366  *			this command is issued as part of an error recovery
19367  *			action.
19368  *
19369  * Return Code: 0   - Success
19370  *		EIO - IO error
19371  *		EACCES - Reservation conflict detected
19372  *		EAGAIN - Device is becoming ready
19373  *		errno return code from sd_send_scsi_cmd()
19374  *
19375  *     Context: Can sleep.  Blocks until command completes.
19376  */
19377 
19378 #define	SD_CAPACITY_16_SIZE	sizeof (struct scsi_capacity_16)
19379 
19380 static int
19381 sd_send_scsi_READ_CAPACITY_16(struct sd_lun *un, uint64_t *capp,
19382 	uint32_t *lbap, int path_flag)
19383 {
19384 	struct	scsi_extended_sense	sense_buf;
19385 	struct	uscsi_cmd	ucmd_buf;
19386 	union	scsi_cdb	cdb;
19387 	uint64_t		*capacity16_buf;
19388 	uint64_t		capacity;
19389 	uint32_t		lbasize;
19390 	int			status;
19391 
19392 	ASSERT(un != NULL);
19393 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19394 	ASSERT(capp != NULL);
19395 	ASSERT(lbap != NULL);
19396 
19397 	SD_TRACE(SD_LOG_IO, un,
19398 	    "sd_send_scsi_READ_CAPACITY: entry: un:0x%p\n", un);
19399 
19400 	/*
19401 	 * First send a READ_CAPACITY_16 command to the target.
19402 	 *
19403 	 * Set up the CDB for the READ_CAPACITY_16 command.  The Partial
19404 	 * Medium Indicator bit is cleared.  The address field must be
19405 	 * zero if the PMI bit is zero.
19406 	 */
19407 	bzero(&cdb, sizeof (cdb));
19408 	bzero(&ucmd_buf, sizeof (ucmd_buf));
19409 
19410 	capacity16_buf = kmem_zalloc(SD_CAPACITY_16_SIZE, KM_SLEEP);
19411 
19412 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
19413 	ucmd_buf.uscsi_cdblen	= CDB_GROUP4;
19414 	ucmd_buf.uscsi_bufaddr	= (caddr_t)capacity16_buf;
19415 	ucmd_buf.uscsi_buflen	= SD_CAPACITY_16_SIZE;
19416 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
19417 	ucmd_buf.uscsi_rqlen	= sizeof (sense_buf);
19418 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
19419 	ucmd_buf.uscsi_timeout	= 60;
19420 
19421 	/*
19422 	 * Read Capacity (16) is a Service Action In command.  One
19423 	 * command byte (0x9E) is overloaded for multiple operations,
19424 	 * with the second CDB byte specifying the desired operation
19425 	 */
19426 	cdb.scc_cmd = SCMD_SVC_ACTION_IN_G4;
19427 	cdb.cdb_opaque[1] = SSVC_ACTION_READ_CAPACITY_G4;
19428 
19429 	/*
19430 	 * Fill in allocation length field
19431 	 */
19432 	FORMG4COUNT(&cdb, ucmd_buf.uscsi_buflen);
19433 
19434 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
19435 	    UIO_SYSSPACE, UIO_SYSSPACE, path_flag);
19436 
19437 	switch (status) {
19438 	case 0:
19439 		/* Return failure if we did not get valid capacity data. */
19440 		if (ucmd_buf.uscsi_resid > 20) {
19441 			kmem_free(capacity16_buf, SD_CAPACITY_16_SIZE);
19442 			return (EIO);
19443 		}
19444 
19445 		/*
19446 		 * Read capacity and block size from the READ CAPACITY 10 data.
19447 		 * This data may be adjusted later due to device specific
19448 		 * issues.
19449 		 *
19450 		 * According to the SCSI spec, the READ CAPACITY 10
19451 		 * command returns the following:
19452 		 *
19453 		 *  bytes 0-7: Maximum logical block address available.
19454 		 *		(MSB in byte:0 & LSB in byte:7)
19455 		 *
19456 		 *  bytes 8-11: Block length in bytes
19457 		 *		(MSB in byte:8 & LSB in byte:11)
19458 		 *
19459 		 */
19460 		capacity = BE_64(capacity16_buf[0]);
19461 		lbasize = BE_32(*(uint32_t *)&capacity16_buf[1]);
19462 
19463 		/*
19464 		 * Done with capacity16_buf
19465 		 */
19466 		kmem_free(capacity16_buf, SD_CAPACITY_16_SIZE);
19467 
19468 		/*
19469 		 * if the reported capacity is set to all 0xf's, then
19470 		 * this disk is too large.  This could only happen with
19471 		 * a device that supports LBAs larger than 64 bits which
19472 		 * are not defined by any current T10 standards.
19473 		 */
19474 		if (capacity == 0xffffffffffffffff) {
19475 			return (EIO);
19476 		}
19477 		break;	/* Success! */
19478 	case EIO:
19479 		switch (ucmd_buf.uscsi_status) {
19480 		case STATUS_RESERVATION_CONFLICT:
19481 			status = EACCES;
19482 			break;
19483 		case STATUS_CHECK:
19484 			/*
19485 			 * Check condition; look for ASC/ASCQ of 0x04/0x01
19486 			 * (LOGICAL UNIT IS IN PROCESS OF BECOMING READY)
19487 			 */
19488 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
19489 			    (scsi_sense_asc((uint8_t *)&sense_buf) == 0x04) &&
19490 			    (scsi_sense_ascq((uint8_t *)&sense_buf) == 0x01)) {
19491 				kmem_free(capacity16_buf, SD_CAPACITY_16_SIZE);
19492 				return (EAGAIN);
19493 			}
19494 			break;
19495 		default:
19496 			break;
19497 		}
19498 		/* FALLTHRU */
19499 	default:
19500 		kmem_free(capacity16_buf, SD_CAPACITY_16_SIZE);
19501 		return (status);
19502 	}
19503 
19504 	*capp = capacity;
19505 	*lbap = lbasize;
19506 
19507 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_READ_CAPACITY_16: "
19508 	    "capacity:0x%llx  lbasize:0x%x\n", capacity, lbasize);
19509 
19510 	return (0);
19511 }
19512 
19513 
19514 /*
19515  *    Function: sd_send_scsi_START_STOP_UNIT
19516  *
19517  * Description: Issue a scsi START STOP UNIT command to the target.
19518  *
19519  *   Arguments: un    - pointer to driver soft state (unit) structure for
19520  *			this target.
19521  *		flag  - SD_TARGET_START
19522  *			SD_TARGET_STOP
19523  *			SD_TARGET_EJECT
19524  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
19525  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
19526  *			to use the USCSI "direct" chain and bypass the normal
19527  *			command waitq. SD_PATH_DIRECT_PRIORITY is used when this
19528  *			command is issued as part of an error recovery action.
19529  *
19530  * Return Code: 0   - Success
19531  *		EIO - IO error
19532  *		EACCES - Reservation conflict detected
19533  *		ENXIO  - Not Ready, medium not present
19534  *		errno return code from sd_send_scsi_cmd()
19535  *
19536  *     Context: Can sleep.
19537  */
19538 
19539 static int
19540 sd_send_scsi_START_STOP_UNIT(struct sd_lun *un, int flag, int path_flag)
19541 {
19542 	struct	scsi_extended_sense	sense_buf;
19543 	union scsi_cdb		cdb;
19544 	struct uscsi_cmd	ucmd_buf;
19545 	int			status;
19546 
19547 	ASSERT(un != NULL);
19548 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19549 
19550 	SD_TRACE(SD_LOG_IO, un,
19551 	    "sd_send_scsi_START_STOP_UNIT: entry: un:0x%p\n", un);
19552 
19553 	if (un->un_f_check_start_stop &&
19554 	    ((flag == SD_TARGET_START) || (flag == SD_TARGET_STOP)) &&
19555 	    (un->un_f_start_stop_supported != TRUE)) {
19556 		return (0);
19557 	}
19558 
19559 	bzero(&cdb, sizeof (cdb));
19560 	bzero(&ucmd_buf, sizeof (ucmd_buf));
19561 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
19562 
19563 	cdb.scc_cmd = SCMD_START_STOP;
19564 	cdb.cdb_opaque[4] = (uchar_t)flag;
19565 
19566 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
19567 	ucmd_buf.uscsi_cdblen	= CDB_GROUP0;
19568 	ucmd_buf.uscsi_bufaddr	= NULL;
19569 	ucmd_buf.uscsi_buflen	= 0;
19570 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
19571 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
19572 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_SILENT;
19573 	ucmd_buf.uscsi_timeout	= 200;
19574 
19575 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
19576 	    UIO_SYSSPACE, UIO_SYSSPACE, path_flag);
19577 
19578 	switch (status) {
19579 	case 0:
19580 		break;	/* Success! */
19581 	case EIO:
19582 		switch (ucmd_buf.uscsi_status) {
19583 		case STATUS_RESERVATION_CONFLICT:
19584 			status = EACCES;
19585 			break;
19586 		case STATUS_CHECK:
19587 			if (ucmd_buf.uscsi_rqstatus == STATUS_GOOD) {
19588 				switch (scsi_sense_key(
19589 						(uint8_t *)&sense_buf)) {
19590 				case KEY_ILLEGAL_REQUEST:
19591 					status = ENOTSUP;
19592 					break;
19593 				case KEY_NOT_READY:
19594 					if (scsi_sense_asc(
19595 						    (uint8_t *)&sense_buf)
19596 					    == 0x3A) {
19597 						status = ENXIO;
19598 					}
19599 					break;
19600 				default:
19601 					break;
19602 				}
19603 			}
19604 			break;
19605 		default:
19606 			break;
19607 		}
19608 		break;
19609 	default:
19610 		break;
19611 	}
19612 
19613 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_START_STOP_UNIT: exit\n");
19614 
19615 	return (status);
19616 }
19617 
19618 
19619 /*
19620  *    Function: sd_start_stop_unit_callback
19621  *
19622  * Description: timeout(9F) callback to begin recovery process for a
19623  *		device that has spun down.
19624  *
19625  *   Arguments: arg - pointer to associated softstate struct.
19626  *
19627  *     Context: Executes in a timeout(9F) thread context
19628  */
19629 
19630 static void
19631 sd_start_stop_unit_callback(void *arg)
19632 {
19633 	struct sd_lun	*un = arg;
19634 	ASSERT(un != NULL);
19635 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19636 
19637 	SD_TRACE(SD_LOG_IO, un, "sd_start_stop_unit_callback: entry\n");
19638 
19639 	(void) taskq_dispatch(sd_tq, sd_start_stop_unit_task, un, KM_NOSLEEP);
19640 }
19641 
19642 
19643 /*
19644  *    Function: sd_start_stop_unit_task
19645  *
19646  * Description: Recovery procedure when a drive is spun down.
19647  *
19648  *   Arguments: arg - pointer to associated softstate struct.
19649  *
19650  *     Context: Executes in a taskq() thread context
19651  */
19652 
19653 static void
19654 sd_start_stop_unit_task(void *arg)
19655 {
19656 	struct sd_lun	*un = arg;
19657 
19658 	ASSERT(un != NULL);
19659 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19660 
19661 	SD_TRACE(SD_LOG_IO, un, "sd_start_stop_unit_task: entry\n");
19662 
19663 	/*
19664 	 * Some unformatted drives report not ready error, no need to
19665 	 * restart if format has been initiated.
19666 	 */
19667 	mutex_enter(SD_MUTEX(un));
19668 	if (un->un_f_format_in_progress == TRUE) {
19669 		mutex_exit(SD_MUTEX(un));
19670 		return;
19671 	}
19672 	mutex_exit(SD_MUTEX(un));
19673 
19674 	/*
19675 	 * When a START STOP command is issued from here, it is part of a
19676 	 * failure recovery operation and must be issued before any other
19677 	 * commands, including any pending retries. Thus it must be sent
19678 	 * using SD_PATH_DIRECT_PRIORITY. It doesn't matter if the spin up
19679 	 * succeeds or not, we will start I/O after the attempt.
19680 	 */
19681 	(void) sd_send_scsi_START_STOP_UNIT(un, SD_TARGET_START,
19682 	    SD_PATH_DIRECT_PRIORITY);
19683 
19684 	/*
19685 	 * The above call blocks until the START_STOP_UNIT command completes.
19686 	 * Now that it has completed, we must re-try the original IO that
19687 	 * received the NOT READY condition in the first place. There are
19688 	 * three possible conditions here:
19689 	 *
19690 	 *  (1) The original IO is on un_retry_bp.
19691 	 *  (2) The original IO is on the regular wait queue, and un_retry_bp
19692 	 *	is NULL.
19693 	 *  (3) The original IO is on the regular wait queue, and un_retry_bp
19694 	 *	points to some other, unrelated bp.
19695 	 *
19696 	 * For each case, we must call sd_start_cmds() with un_retry_bp
19697 	 * as the argument. If un_retry_bp is NULL, this will initiate
19698 	 * processing of the regular wait queue.  If un_retry_bp is not NULL,
19699 	 * then this will process the bp on un_retry_bp. That may or may not
19700 	 * be the original IO, but that does not matter: the important thing
19701 	 * is to keep the IO processing going at this point.
19702 	 *
19703 	 * Note: This is a very specific error recovery sequence associated
19704 	 * with a drive that is not spun up. We attempt a START_STOP_UNIT and
19705 	 * serialize the I/O with completion of the spin-up.
19706 	 */
19707 	mutex_enter(SD_MUTEX(un));
19708 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
19709 	    "sd_start_stop_unit_task: un:0x%p starting bp:0x%p\n",
19710 	    un, un->un_retry_bp);
19711 	un->un_startstop_timeid = NULL;	/* Timeout is no longer pending */
19712 	sd_start_cmds(un, un->un_retry_bp);
19713 	mutex_exit(SD_MUTEX(un));
19714 
19715 	SD_TRACE(SD_LOG_IO, un, "sd_start_stop_unit_task: exit\n");
19716 }
19717 
19718 
19719 /*
19720  *    Function: sd_send_scsi_INQUIRY
19721  *
19722  * Description: Issue the scsi INQUIRY command.
19723  *
19724  *   Arguments: un
19725  *		bufaddr
19726  *		buflen
19727  *		evpd
19728  *		page_code
19729  *		page_length
19730  *
19731  * Return Code: 0   - Success
19732  *		errno return code from sd_send_scsi_cmd()
19733  *
19734  *     Context: Can sleep. Does not return until command is completed.
19735  */
19736 
19737 static int
19738 sd_send_scsi_INQUIRY(struct sd_lun *un, uchar_t *bufaddr, size_t buflen,
19739 	uchar_t evpd, uchar_t page_code, size_t *residp)
19740 {
19741 	union scsi_cdb		cdb;
19742 	struct uscsi_cmd	ucmd_buf;
19743 	int			status;
19744 
19745 	ASSERT(un != NULL);
19746 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19747 	ASSERT(bufaddr != NULL);
19748 
19749 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_INQUIRY: entry: un:0x%p\n", un);
19750 
19751 	bzero(&cdb, sizeof (cdb));
19752 	bzero(&ucmd_buf, sizeof (ucmd_buf));
19753 	bzero(bufaddr, buflen);
19754 
19755 	cdb.scc_cmd = SCMD_INQUIRY;
19756 	cdb.cdb_opaque[1] = evpd;
19757 	cdb.cdb_opaque[2] = page_code;
19758 	FORMG0COUNT(&cdb, buflen);
19759 
19760 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
19761 	ucmd_buf.uscsi_cdblen	= CDB_GROUP0;
19762 	ucmd_buf.uscsi_bufaddr	= (caddr_t)bufaddr;
19763 	ucmd_buf.uscsi_buflen	= buflen;
19764 	ucmd_buf.uscsi_rqbuf	= NULL;
19765 	ucmd_buf.uscsi_rqlen	= 0;
19766 	ucmd_buf.uscsi_flags	= USCSI_READ | USCSI_SILENT;
19767 	ucmd_buf.uscsi_timeout	= 200;	/* Excessive legacy value */
19768 
19769 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
19770 	    UIO_SYSSPACE, UIO_SYSSPACE, SD_PATH_DIRECT);
19771 
19772 	if ((status == 0) && (residp != NULL)) {
19773 		*residp = ucmd_buf.uscsi_resid;
19774 	}
19775 
19776 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_INQUIRY: exit\n");
19777 
19778 	return (status);
19779 }
19780 
19781 
19782 /*
19783  *    Function: sd_send_scsi_TEST_UNIT_READY
19784  *
19785  * Description: Issue the scsi TEST UNIT READY command.
19786  *		This routine can be told to set the flag USCSI_DIAGNOSE to
19787  *		prevent retrying failed commands. Use this when the intent
19788  *		is either to check for device readiness, to clear a Unit
19789  *		Attention, or to clear any outstanding sense data.
19790  *		However under specific conditions the expected behavior
19791  *		is for retries to bring a device ready, so use the flag
19792  *		with caution.
19793  *
19794  *   Arguments: un
19795  *		flag:   SD_CHECK_FOR_MEDIA: return ENXIO if no media present
19796  *			SD_DONT_RETRY_TUR: include uscsi flag USCSI_DIAGNOSE.
19797  *			0: dont check for media present, do retries on cmd.
19798  *
19799  * Return Code: 0   - Success
19800  *		EIO - IO error
19801  *		EACCES - Reservation conflict detected
19802  *		ENXIO  - Not Ready, medium not present
19803  *		errno return code from sd_send_scsi_cmd()
19804  *
19805  *     Context: Can sleep. Does not return until command is completed.
19806  */
19807 
19808 static int
19809 sd_send_scsi_TEST_UNIT_READY(struct sd_lun *un, int flag)
19810 {
19811 	struct	scsi_extended_sense	sense_buf;
19812 	union scsi_cdb		cdb;
19813 	struct uscsi_cmd	ucmd_buf;
19814 	int			status;
19815 
19816 	ASSERT(un != NULL);
19817 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19818 
19819 	SD_TRACE(SD_LOG_IO, un,
19820 	    "sd_send_scsi_TEST_UNIT_READY: entry: un:0x%p\n", un);
19821 
19822 	/*
19823 	 * Some Seagate elite1 TQ devices get hung with disconnect/reconnect
19824 	 * timeouts when they receive a TUR and the queue is not empty. Check
19825 	 * the configuration flag set during attach (indicating the drive has
19826 	 * this firmware bug) and un_ncmds_in_transport before issuing the
19827 	 * TUR. If there are
19828 	 * pending commands return success, this is a bit arbitrary but is ok
19829 	 * for non-removables (i.e. the eliteI disks) and non-clustering
19830 	 * configurations.
19831 	 */
19832 	if (un->un_f_cfg_tur_check == TRUE) {
19833 		mutex_enter(SD_MUTEX(un));
19834 		if (un->un_ncmds_in_transport != 0) {
19835 			mutex_exit(SD_MUTEX(un));
19836 			return (0);
19837 		}
19838 		mutex_exit(SD_MUTEX(un));
19839 	}
19840 
19841 	bzero(&cdb, sizeof (cdb));
19842 	bzero(&ucmd_buf, sizeof (ucmd_buf));
19843 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
19844 
19845 	cdb.scc_cmd = SCMD_TEST_UNIT_READY;
19846 
19847 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
19848 	ucmd_buf.uscsi_cdblen	= CDB_GROUP0;
19849 	ucmd_buf.uscsi_bufaddr	= NULL;
19850 	ucmd_buf.uscsi_buflen	= 0;
19851 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
19852 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
19853 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_SILENT;
19854 
19855 	/* Use flag USCSI_DIAGNOSE to prevent retries if it fails. */
19856 	if ((flag & SD_DONT_RETRY_TUR) != 0) {
19857 		ucmd_buf.uscsi_flags |= USCSI_DIAGNOSE;
19858 	}
19859 	ucmd_buf.uscsi_timeout	= 60;
19860 
19861 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
19862 	    UIO_SYSSPACE, UIO_SYSSPACE,
19863 	    ((flag & SD_BYPASS_PM) ? SD_PATH_DIRECT : SD_PATH_STANDARD));
19864 
19865 	switch (status) {
19866 	case 0:
19867 		break;	/* Success! */
19868 	case EIO:
19869 		switch (ucmd_buf.uscsi_status) {
19870 		case STATUS_RESERVATION_CONFLICT:
19871 			status = EACCES;
19872 			break;
19873 		case STATUS_CHECK:
19874 			if ((flag & SD_CHECK_FOR_MEDIA) == 0) {
19875 				break;
19876 			}
19877 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
19878 			    (scsi_sense_key((uint8_t *)&sense_buf) ==
19879 				KEY_NOT_READY) &&
19880 			    (scsi_sense_asc((uint8_t *)&sense_buf) == 0x3A)) {
19881 				status = ENXIO;
19882 			}
19883 			break;
19884 		default:
19885 			break;
19886 		}
19887 		break;
19888 	default:
19889 		break;
19890 	}
19891 
19892 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_TEST_UNIT_READY: exit\n");
19893 
19894 	return (status);
19895 }
19896 
19897 
19898 /*
19899  *    Function: sd_send_scsi_PERSISTENT_RESERVE_IN
19900  *
19901  * Description: Issue the scsi PERSISTENT RESERVE IN command.
19902  *
19903  *   Arguments: un
19904  *
19905  * Return Code: 0   - Success
19906  *		EACCES
19907  *		ENOTSUP
19908  *		errno return code from sd_send_scsi_cmd()
19909  *
19910  *     Context: Can sleep. Does not return until command is completed.
19911  */
19912 
19913 static int
19914 sd_send_scsi_PERSISTENT_RESERVE_IN(struct sd_lun *un, uchar_t  usr_cmd,
19915 	uint16_t data_len, uchar_t *data_bufp)
19916 {
19917 	struct scsi_extended_sense	sense_buf;
19918 	union scsi_cdb		cdb;
19919 	struct uscsi_cmd	ucmd_buf;
19920 	int			status;
19921 	int			no_caller_buf = FALSE;
19922 
19923 	ASSERT(un != NULL);
19924 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19925 	ASSERT((usr_cmd == SD_READ_KEYS) || (usr_cmd == SD_READ_RESV));
19926 
19927 	SD_TRACE(SD_LOG_IO, un,
19928 	    "sd_send_scsi_PERSISTENT_RESERVE_IN: entry: un:0x%p\n", un);
19929 
19930 	bzero(&cdb, sizeof (cdb));
19931 	bzero(&ucmd_buf, sizeof (ucmd_buf));
19932 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
19933 	if (data_bufp == NULL) {
19934 		/* Allocate a default buf if the caller did not give one */
19935 		ASSERT(data_len == 0);
19936 		data_len  = MHIOC_RESV_KEY_SIZE;
19937 		data_bufp = kmem_zalloc(MHIOC_RESV_KEY_SIZE, KM_SLEEP);
19938 		no_caller_buf = TRUE;
19939 	}
19940 
19941 	cdb.scc_cmd = SCMD_PERSISTENT_RESERVE_IN;
19942 	cdb.cdb_opaque[1] = usr_cmd;
19943 	FORMG1COUNT(&cdb, data_len);
19944 
19945 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
19946 	ucmd_buf.uscsi_cdblen	= CDB_GROUP1;
19947 	ucmd_buf.uscsi_bufaddr	= (caddr_t)data_bufp;
19948 	ucmd_buf.uscsi_buflen	= data_len;
19949 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
19950 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
19951 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
19952 	ucmd_buf.uscsi_timeout	= 60;
19953 
19954 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
19955 	    UIO_SYSSPACE, UIO_SYSSPACE, SD_PATH_STANDARD);
19956 
19957 	switch (status) {
19958 	case 0:
19959 		break;	/* Success! */
19960 	case EIO:
19961 		switch (ucmd_buf.uscsi_status) {
19962 		case STATUS_RESERVATION_CONFLICT:
19963 			status = EACCES;
19964 			break;
19965 		case STATUS_CHECK:
19966 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
19967 			    (scsi_sense_key((uint8_t *)&sense_buf) ==
19968 				KEY_ILLEGAL_REQUEST)) {
19969 				status = ENOTSUP;
19970 			}
19971 			break;
19972 		default:
19973 			break;
19974 		}
19975 		break;
19976 	default:
19977 		break;
19978 	}
19979 
19980 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_PERSISTENT_RESERVE_IN: exit\n");
19981 
19982 	if (no_caller_buf == TRUE) {
19983 		kmem_free(data_bufp, data_len);
19984 	}
19985 
19986 	return (status);
19987 }
19988 
19989 
19990 /*
19991  *    Function: sd_send_scsi_PERSISTENT_RESERVE_OUT
19992  *
19993  * Description: This routine is the driver entry point for handling CD-ROM
19994  *		multi-host persistent reservation requests (MHIOCGRP_INKEYS,
19995  *		MHIOCGRP_INRESV) by sending the SCSI-3 PROUT commands to the
19996  *		device.
19997  *
19998  *   Arguments: un  -   Pointer to soft state struct for the target.
19999  *		usr_cmd SCSI-3 reservation facility command (one of
20000  *			SD_SCSI3_REGISTER, SD_SCSI3_RESERVE, SD_SCSI3_RELEASE,
20001  *			SD_SCSI3_PREEMPTANDABORT)
20002  *		usr_bufp - user provided pointer register, reserve descriptor or
20003  *			preempt and abort structure (mhioc_register_t,
20004  *                      mhioc_resv_desc_t, mhioc_preemptandabort_t)
20005  *
20006  * Return Code: 0   - Success
20007  *		EACCES
20008  *		ENOTSUP
20009  *		errno return code from sd_send_scsi_cmd()
20010  *
20011  *     Context: Can sleep. Does not return until command is completed.
20012  */
20013 
20014 static int
20015 sd_send_scsi_PERSISTENT_RESERVE_OUT(struct sd_lun *un, uchar_t usr_cmd,
20016 	uchar_t	*usr_bufp)
20017 {
20018 	struct scsi_extended_sense	sense_buf;
20019 	union scsi_cdb		cdb;
20020 	struct uscsi_cmd	ucmd_buf;
20021 	int			status;
20022 	uchar_t			data_len = sizeof (sd_prout_t);
20023 	sd_prout_t		*prp;
20024 
20025 	ASSERT(un != NULL);
20026 	ASSERT(!mutex_owned(SD_MUTEX(un)));
20027 	ASSERT(data_len == 24);	/* required by scsi spec */
20028 
20029 	SD_TRACE(SD_LOG_IO, un,
20030 	    "sd_send_scsi_PERSISTENT_RESERVE_OUT: entry: un:0x%p\n", un);
20031 
20032 	if (usr_bufp == NULL) {
20033 		return (EINVAL);
20034 	}
20035 
20036 	bzero(&cdb, sizeof (cdb));
20037 	bzero(&ucmd_buf, sizeof (ucmd_buf));
20038 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
20039 	prp = kmem_zalloc(data_len, KM_SLEEP);
20040 
20041 	cdb.scc_cmd = SCMD_PERSISTENT_RESERVE_OUT;
20042 	cdb.cdb_opaque[1] = usr_cmd;
20043 	FORMG1COUNT(&cdb, data_len);
20044 
20045 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
20046 	ucmd_buf.uscsi_cdblen	= CDB_GROUP1;
20047 	ucmd_buf.uscsi_bufaddr	= (caddr_t)prp;
20048 	ucmd_buf.uscsi_buflen	= data_len;
20049 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
20050 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
20051 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_WRITE | USCSI_SILENT;
20052 	ucmd_buf.uscsi_timeout	= 60;
20053 
20054 	switch (usr_cmd) {
20055 	case SD_SCSI3_REGISTER: {
20056 		mhioc_register_t *ptr = (mhioc_register_t *)usr_bufp;
20057 
20058 		bcopy(ptr->oldkey.key, prp->res_key, MHIOC_RESV_KEY_SIZE);
20059 		bcopy(ptr->newkey.key, prp->service_key,
20060 		    MHIOC_RESV_KEY_SIZE);
20061 		prp->aptpl = ptr->aptpl;
20062 		break;
20063 	}
20064 	case SD_SCSI3_RESERVE:
20065 	case SD_SCSI3_RELEASE: {
20066 		mhioc_resv_desc_t *ptr = (mhioc_resv_desc_t *)usr_bufp;
20067 
20068 		bcopy(ptr->key.key, prp->res_key, MHIOC_RESV_KEY_SIZE);
20069 		prp->scope_address = BE_32(ptr->scope_specific_addr);
20070 		cdb.cdb_opaque[2] = ptr->type;
20071 		break;
20072 	}
20073 	case SD_SCSI3_PREEMPTANDABORT: {
20074 		mhioc_preemptandabort_t *ptr =
20075 		    (mhioc_preemptandabort_t *)usr_bufp;
20076 
20077 		bcopy(ptr->resvdesc.key.key, prp->res_key, MHIOC_RESV_KEY_SIZE);
20078 		bcopy(ptr->victim_key.key, prp->service_key,
20079 		    MHIOC_RESV_KEY_SIZE);
20080 		prp->scope_address = BE_32(ptr->resvdesc.scope_specific_addr);
20081 		cdb.cdb_opaque[2] = ptr->resvdesc.type;
20082 		ucmd_buf.uscsi_flags |= USCSI_HEAD;
20083 		break;
20084 	}
20085 	case SD_SCSI3_REGISTERANDIGNOREKEY:
20086 	{
20087 		mhioc_registerandignorekey_t *ptr;
20088 		ptr = (mhioc_registerandignorekey_t *)usr_bufp;
20089 		bcopy(ptr->newkey.key,
20090 		    prp->service_key, MHIOC_RESV_KEY_SIZE);
20091 		prp->aptpl = ptr->aptpl;
20092 		break;
20093 	}
20094 	default:
20095 		ASSERT(FALSE);
20096 		break;
20097 	}
20098 
20099 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
20100 	    UIO_SYSSPACE, UIO_SYSSPACE, SD_PATH_STANDARD);
20101 
20102 	switch (status) {
20103 	case 0:
20104 		break;	/* Success! */
20105 	case EIO:
20106 		switch (ucmd_buf.uscsi_status) {
20107 		case STATUS_RESERVATION_CONFLICT:
20108 			status = EACCES;
20109 			break;
20110 		case STATUS_CHECK:
20111 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
20112 			    (scsi_sense_key((uint8_t *)&sense_buf) ==
20113 				KEY_ILLEGAL_REQUEST)) {
20114 				status = ENOTSUP;
20115 			}
20116 			break;
20117 		default:
20118 			break;
20119 		}
20120 		break;
20121 	default:
20122 		break;
20123 	}
20124 
20125 	kmem_free(prp, data_len);
20126 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_PERSISTENT_RESERVE_OUT: exit\n");
20127 	return (status);
20128 }
20129 
20130 
20131 /*
20132  *    Function: sd_send_scsi_SYNCHRONIZE_CACHE
20133  *
20134  * Description: Issues a scsi SYNCHRONIZE CACHE command to the target
20135  *
20136  *   Arguments: un - pointer to the target's soft state struct
20137  *
20138  * Return Code: 0 - success
20139  *		errno-type error code
20140  *
20141  *     Context: kernel thread context only.
20142  */
20143 
20144 static int
20145 sd_send_scsi_SYNCHRONIZE_CACHE(struct sd_lun *un, struct dk_callback *dkc)
20146 {
20147 	struct sd_uscsi_info	*uip;
20148 	struct uscsi_cmd	*uscmd;
20149 	union scsi_cdb		*cdb;
20150 	struct buf		*bp;
20151 	int			rval = 0;
20152 
20153 	SD_TRACE(SD_LOG_IO, un,
20154 	    "sd_send_scsi_SYNCHRONIZE_CACHE: entry: un:0x%p\n", un);
20155 
20156 	ASSERT(un != NULL);
20157 	ASSERT(!mutex_owned(SD_MUTEX(un)));
20158 
20159 	cdb = kmem_zalloc(CDB_GROUP1, KM_SLEEP);
20160 	cdb->scc_cmd = SCMD_SYNCHRONIZE_CACHE;
20161 
20162 	/*
20163 	 * First get some memory for the uscsi_cmd struct and cdb
20164 	 * and initialize for SYNCHRONIZE_CACHE cmd.
20165 	 */
20166 	uscmd = kmem_zalloc(sizeof (struct uscsi_cmd), KM_SLEEP);
20167 	uscmd->uscsi_cdblen = CDB_GROUP1;
20168 	uscmd->uscsi_cdb = (caddr_t)cdb;
20169 	uscmd->uscsi_bufaddr = NULL;
20170 	uscmd->uscsi_buflen = 0;
20171 	uscmd->uscsi_rqbuf = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
20172 	uscmd->uscsi_rqlen = SENSE_LENGTH;
20173 	uscmd->uscsi_rqresid = SENSE_LENGTH;
20174 	uscmd->uscsi_flags = USCSI_RQENABLE | USCSI_SILENT;
20175 	uscmd->uscsi_timeout = sd_io_time;
20176 
20177 	/*
20178 	 * Allocate an sd_uscsi_info struct and fill it with the info
20179 	 * needed by sd_initpkt_for_uscsi().  Then put the pointer into
20180 	 * b_private in the buf for sd_initpkt_for_uscsi().  Note that
20181 	 * since we allocate the buf here in this function, we do not
20182 	 * need to preserve the prior contents of b_private.
20183 	 * The sd_uscsi_info struct is also used by sd_uscsi_strategy()
20184 	 */
20185 	uip = kmem_zalloc(sizeof (struct sd_uscsi_info), KM_SLEEP);
20186 	uip->ui_flags = SD_PATH_DIRECT;
20187 	uip->ui_cmdp  = uscmd;
20188 
20189 	bp = getrbuf(KM_SLEEP);
20190 	bp->b_private = uip;
20191 
20192 	/*
20193 	 * Setup buffer to carry uscsi request.
20194 	 */
20195 	bp->b_flags  = B_BUSY;
20196 	bp->b_bcount = 0;
20197 	bp->b_blkno  = 0;
20198 
20199 	if (dkc != NULL) {
20200 		bp->b_iodone = sd_send_scsi_SYNCHRONIZE_CACHE_biodone;
20201 		uip->ui_dkc = *dkc;
20202 	}
20203 
20204 	bp->b_edev = SD_GET_DEV(un);
20205 	bp->b_dev = cmpdev(bp->b_edev);	/* maybe unnecessary? */
20206 
20207 	(void) sd_uscsi_strategy(bp);
20208 
20209 	/*
20210 	 * If synchronous request, wait for completion
20211 	 * If async just return and let b_iodone callback
20212 	 * cleanup.
20213 	 * NOTE: On return, u_ncmds_in_driver will be decremented,
20214 	 * but it was also incremented in sd_uscsi_strategy(), so
20215 	 * we should be ok.
20216 	 */
20217 	if (dkc == NULL) {
20218 		(void) biowait(bp);
20219 		rval = sd_send_scsi_SYNCHRONIZE_CACHE_biodone(bp);
20220 	}
20221 
20222 	return (rval);
20223 }
20224 
20225 
20226 static int
20227 sd_send_scsi_SYNCHRONIZE_CACHE_biodone(struct buf *bp)
20228 {
20229 	struct sd_uscsi_info *uip;
20230 	struct uscsi_cmd *uscmd;
20231 	uint8_t *sense_buf;
20232 	struct sd_lun *un;
20233 	int status;
20234 
20235 	uip = (struct sd_uscsi_info *)(bp->b_private);
20236 	ASSERT(uip != NULL);
20237 
20238 	uscmd = uip->ui_cmdp;
20239 	ASSERT(uscmd != NULL);
20240 
20241 	sense_buf = (uint8_t *)uscmd->uscsi_rqbuf;
20242 	ASSERT(sense_buf != NULL);
20243 
20244 	un = ddi_get_soft_state(sd_state, SD_GET_INSTANCE_FROM_BUF(bp));
20245 	ASSERT(un != NULL);
20246 
20247 	status = geterror(bp);
20248 	switch (status) {
20249 	case 0:
20250 		break;	/* Success! */
20251 	case EIO:
20252 		switch (uscmd->uscsi_status) {
20253 		case STATUS_RESERVATION_CONFLICT:
20254 			/* Ignore reservation conflict */
20255 			status = 0;
20256 			goto done;
20257 
20258 		case STATUS_CHECK:
20259 			if ((uscmd->uscsi_rqstatus == STATUS_GOOD) &&
20260 			    (scsi_sense_key(sense_buf) ==
20261 				KEY_ILLEGAL_REQUEST)) {
20262 				/* Ignore Illegal Request error */
20263 				mutex_enter(SD_MUTEX(un));
20264 				un->un_f_sync_cache_supported = FALSE;
20265 				mutex_exit(SD_MUTEX(un));
20266 				status = ENOTSUP;
20267 				goto done;
20268 			}
20269 			break;
20270 		default:
20271 			break;
20272 		}
20273 		/* FALLTHRU */
20274 	default:
20275 		/* Ignore error if the media is not present */
20276 		if (sd_send_scsi_TEST_UNIT_READY(un, 0) != 0) {
20277 			status = 0;
20278 			goto done;
20279 		}
20280 		/* If we reach this, we had an error */
20281 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
20282 		    "SYNCHRONIZE CACHE command failed (%d)\n", status);
20283 		break;
20284 	}
20285 
20286 done:
20287 	if (uip->ui_dkc.dkc_callback != NULL) {
20288 		(*uip->ui_dkc.dkc_callback)(uip->ui_dkc.dkc_cookie, status);
20289 	}
20290 
20291 	ASSERT((bp->b_flags & B_REMAPPED) == 0);
20292 	freerbuf(bp);
20293 	kmem_free(uip, sizeof (struct sd_uscsi_info));
20294 	kmem_free(uscmd->uscsi_rqbuf, SENSE_LENGTH);
20295 	kmem_free(uscmd->uscsi_cdb, (size_t)uscmd->uscsi_cdblen);
20296 	kmem_free(uscmd, sizeof (struct uscsi_cmd));
20297 
20298 	return (status);
20299 }
20300 
20301 
20302 /*
20303  *    Function: sd_send_scsi_GET_CONFIGURATION
20304  *
20305  * Description: Issues the get configuration command to the device.
20306  *		Called from sd_check_for_writable_cd & sd_get_media_info
20307  *		caller needs to ensure that buflen = SD_PROFILE_HEADER_LEN
20308  *   Arguments: un
20309  *		ucmdbuf
20310  *		rqbuf
20311  *		rqbuflen
20312  *		bufaddr
20313  *		buflen
20314  *
20315  * Return Code: 0   - Success
20316  *		errno return code from sd_send_scsi_cmd()
20317  *
20318  *     Context: Can sleep. Does not return until command is completed.
20319  *
20320  */
20321 
20322 static int
20323 sd_send_scsi_GET_CONFIGURATION(struct sd_lun *un, struct uscsi_cmd *ucmdbuf,
20324 	uchar_t *rqbuf, uint_t rqbuflen, uchar_t *bufaddr, uint_t buflen)
20325 {
20326 	char	cdb[CDB_GROUP1];
20327 	int	status;
20328 
20329 	ASSERT(un != NULL);
20330 	ASSERT(!mutex_owned(SD_MUTEX(un)));
20331 	ASSERT(bufaddr != NULL);
20332 	ASSERT(ucmdbuf != NULL);
20333 	ASSERT(rqbuf != NULL);
20334 
20335 	SD_TRACE(SD_LOG_IO, un,
20336 	    "sd_send_scsi_GET_CONFIGURATION: entry: un:0x%p\n", un);
20337 
20338 	bzero(cdb, sizeof (cdb));
20339 	bzero(ucmdbuf, sizeof (struct uscsi_cmd));
20340 	bzero(rqbuf, rqbuflen);
20341 	bzero(bufaddr, buflen);
20342 
20343 	/*
20344 	 * Set up cdb field for the get configuration command.
20345 	 */
20346 	cdb[0] = SCMD_GET_CONFIGURATION;
20347 	cdb[1] = 0x02;  /* Requested Type */
20348 	cdb[8] = SD_PROFILE_HEADER_LEN;
20349 	ucmdbuf->uscsi_cdb = cdb;
20350 	ucmdbuf->uscsi_cdblen = CDB_GROUP1;
20351 	ucmdbuf->uscsi_bufaddr = (caddr_t)bufaddr;
20352 	ucmdbuf->uscsi_buflen = buflen;
20353 	ucmdbuf->uscsi_timeout = sd_io_time;
20354 	ucmdbuf->uscsi_rqbuf = (caddr_t)rqbuf;
20355 	ucmdbuf->uscsi_rqlen = rqbuflen;
20356 	ucmdbuf->uscsi_flags = USCSI_RQENABLE|USCSI_SILENT|USCSI_READ;
20357 
20358 	status = sd_send_scsi_cmd(SD_GET_DEV(un), ucmdbuf, UIO_SYSSPACE,
20359 	    UIO_SYSSPACE, UIO_SYSSPACE, SD_PATH_STANDARD);
20360 
20361 	switch (status) {
20362 	case 0:
20363 		break;  /* Success! */
20364 	case EIO:
20365 		switch (ucmdbuf->uscsi_status) {
20366 		case STATUS_RESERVATION_CONFLICT:
20367 			status = EACCES;
20368 			break;
20369 		default:
20370 			break;
20371 		}
20372 		break;
20373 	default:
20374 		break;
20375 	}
20376 
20377 	if (status == 0) {
20378 		SD_DUMP_MEMORY(un, SD_LOG_IO,
20379 		    "sd_send_scsi_GET_CONFIGURATION: data",
20380 		    (uchar_t *)bufaddr, SD_PROFILE_HEADER_LEN, SD_LOG_HEX);
20381 	}
20382 
20383 	SD_TRACE(SD_LOG_IO, un,
20384 	    "sd_send_scsi_GET_CONFIGURATION: exit\n");
20385 
20386 	return (status);
20387 }
20388 
20389 /*
20390  *    Function: sd_send_scsi_feature_GET_CONFIGURATION
20391  *
20392  * Description: Issues the get configuration command to the device to
20393  *              retrieve a specfic feature. Called from
20394  *		sd_check_for_writable_cd & sd_set_mmc_caps.
20395  *   Arguments: un
20396  *              ucmdbuf
20397  *              rqbuf
20398  *              rqbuflen
20399  *              bufaddr
20400  *              buflen
20401  *		feature
20402  *
20403  * Return Code: 0   - Success
20404  *              errno return code from sd_send_scsi_cmd()
20405  *
20406  *     Context: Can sleep. Does not return until command is completed.
20407  *
20408  */
20409 static int
20410 sd_send_scsi_feature_GET_CONFIGURATION(struct sd_lun *un,
20411 	struct uscsi_cmd *ucmdbuf, uchar_t *rqbuf, uint_t rqbuflen,
20412 	uchar_t *bufaddr, uint_t buflen, char feature)
20413 {
20414 	char    cdb[CDB_GROUP1];
20415 	int	status;
20416 
20417 	ASSERT(un != NULL);
20418 	ASSERT(!mutex_owned(SD_MUTEX(un)));
20419 	ASSERT(bufaddr != NULL);
20420 	ASSERT(ucmdbuf != NULL);
20421 	ASSERT(rqbuf != NULL);
20422 
20423 	SD_TRACE(SD_LOG_IO, un,
20424 	    "sd_send_scsi_feature_GET_CONFIGURATION: entry: un:0x%p\n", un);
20425 
20426 	bzero(cdb, sizeof (cdb));
20427 	bzero(ucmdbuf, sizeof (struct uscsi_cmd));
20428 	bzero(rqbuf, rqbuflen);
20429 	bzero(bufaddr, buflen);
20430 
20431 	/*
20432 	 * Set up cdb field for the get configuration command.
20433 	 */
20434 	cdb[0] = SCMD_GET_CONFIGURATION;
20435 	cdb[1] = 0x02;  /* Requested Type */
20436 	cdb[3] = feature;
20437 	cdb[8] = buflen;
20438 	ucmdbuf->uscsi_cdb = cdb;
20439 	ucmdbuf->uscsi_cdblen = CDB_GROUP1;
20440 	ucmdbuf->uscsi_bufaddr = (caddr_t)bufaddr;
20441 	ucmdbuf->uscsi_buflen = buflen;
20442 	ucmdbuf->uscsi_timeout = sd_io_time;
20443 	ucmdbuf->uscsi_rqbuf = (caddr_t)rqbuf;
20444 	ucmdbuf->uscsi_rqlen = rqbuflen;
20445 	ucmdbuf->uscsi_flags = USCSI_RQENABLE|USCSI_SILENT|USCSI_READ;
20446 
20447 	status = sd_send_scsi_cmd(SD_GET_DEV(un), ucmdbuf, UIO_SYSSPACE,
20448 	    UIO_SYSSPACE, UIO_SYSSPACE, SD_PATH_STANDARD);
20449 
20450 	switch (status) {
20451 	case 0:
20452 		break;  /* Success! */
20453 	case EIO:
20454 		switch (ucmdbuf->uscsi_status) {
20455 		case STATUS_RESERVATION_CONFLICT:
20456 			status = EACCES;
20457 			break;
20458 		default:
20459 			break;
20460 		}
20461 		break;
20462 	default:
20463 		break;
20464 	}
20465 
20466 	if (status == 0) {
20467 		SD_DUMP_MEMORY(un, SD_LOG_IO,
20468 		    "sd_send_scsi_feature_GET_CONFIGURATION: data",
20469 		    (uchar_t *)bufaddr, SD_PROFILE_HEADER_LEN, SD_LOG_HEX);
20470 	}
20471 
20472 	SD_TRACE(SD_LOG_IO, un,
20473 	    "sd_send_scsi_feature_GET_CONFIGURATION: exit\n");
20474 
20475 	return (status);
20476 }
20477 
20478 
20479 /*
20480  *    Function: sd_send_scsi_MODE_SENSE
20481  *
20482  * Description: Utility function for issuing a scsi MODE SENSE command.
20483  *		Note: This routine uses a consistent implementation for Group0,
20484  *		Group1, and Group2 commands across all platforms. ATAPI devices
20485  *		use Group 1 Read/Write commands and Group 2 Mode Sense/Select
20486  *
20487  *   Arguments: un - pointer to the softstate struct for the target.
20488  *		cdbsize - size CDB to be used (CDB_GROUP0 (6 byte), or
20489  *			  CDB_GROUP[1|2] (10 byte).
20490  *		bufaddr - buffer for page data retrieved from the target.
20491  *		buflen - size of page to be retrieved.
20492  *		page_code - page code of data to be retrieved from the target.
20493  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
20494  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
20495  *			to use the USCSI "direct" chain and bypass the normal
20496  *			command waitq.
20497  *
20498  * Return Code: 0   - Success
20499  *		errno return code from sd_send_scsi_cmd()
20500  *
20501  *     Context: Can sleep. Does not return until command is completed.
20502  */
20503 
20504 static int
20505 sd_send_scsi_MODE_SENSE(struct sd_lun *un, int cdbsize, uchar_t *bufaddr,
20506 	size_t buflen,  uchar_t page_code, int path_flag)
20507 {
20508 	struct	scsi_extended_sense	sense_buf;
20509 	union scsi_cdb		cdb;
20510 	struct uscsi_cmd	ucmd_buf;
20511 	int			status;
20512 	int			headlen;
20513 
20514 	ASSERT(un != NULL);
20515 	ASSERT(!mutex_owned(SD_MUTEX(un)));
20516 	ASSERT(bufaddr != NULL);
20517 	ASSERT((cdbsize == CDB_GROUP0) || (cdbsize == CDB_GROUP1) ||
20518 	    (cdbsize == CDB_GROUP2));
20519 
20520 	SD_TRACE(SD_LOG_IO, un,
20521 	    "sd_send_scsi_MODE_SENSE: entry: un:0x%p\n", un);
20522 
20523 	bzero(&cdb, sizeof (cdb));
20524 	bzero(&ucmd_buf, sizeof (ucmd_buf));
20525 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
20526 	bzero(bufaddr, buflen);
20527 
20528 	if (cdbsize == CDB_GROUP0) {
20529 		cdb.scc_cmd = SCMD_MODE_SENSE;
20530 		cdb.cdb_opaque[2] = page_code;
20531 		FORMG0COUNT(&cdb, buflen);
20532 		headlen = MODE_HEADER_LENGTH;
20533 	} else {
20534 		cdb.scc_cmd = SCMD_MODE_SENSE_G1;
20535 		cdb.cdb_opaque[2] = page_code;
20536 		FORMG1COUNT(&cdb, buflen);
20537 		headlen = MODE_HEADER_LENGTH_GRP2;
20538 	}
20539 
20540 	ASSERT(headlen <= buflen);
20541 	SD_FILL_SCSI1_LUN_CDB(un, &cdb);
20542 
20543 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
20544 	ucmd_buf.uscsi_cdblen	= (uchar_t)cdbsize;
20545 	ucmd_buf.uscsi_bufaddr	= (caddr_t)bufaddr;
20546 	ucmd_buf.uscsi_buflen	= buflen;
20547 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
20548 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
20549 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
20550 	ucmd_buf.uscsi_timeout	= 60;
20551 
20552 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
20553 	    UIO_SYSSPACE, UIO_SYSSPACE, path_flag);
20554 
20555 	switch (status) {
20556 	case 0:
20557 		/*
20558 		 * sr_check_wp() uses 0x3f page code and check the header of
20559 		 * mode page to determine if target device is write-protected.
20560 		 * But some USB devices return 0 bytes for 0x3f page code. For
20561 		 * this case, make sure that mode page header is returned at
20562 		 * least.
20563 		 */
20564 		if (buflen - ucmd_buf.uscsi_resid <  headlen)
20565 			status = EIO;
20566 		break;	/* Success! */
20567 	case EIO:
20568 		switch (ucmd_buf.uscsi_status) {
20569 		case STATUS_RESERVATION_CONFLICT:
20570 			status = EACCES;
20571 			break;
20572 		default:
20573 			break;
20574 		}
20575 		break;
20576 	default:
20577 		break;
20578 	}
20579 
20580 	if (status == 0) {
20581 		SD_DUMP_MEMORY(un, SD_LOG_IO, "sd_send_scsi_MODE_SENSE: data",
20582 		    (uchar_t *)bufaddr, buflen, SD_LOG_HEX);
20583 	}
20584 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_MODE_SENSE: exit\n");
20585 
20586 	return (status);
20587 }
20588 
20589 
20590 /*
20591  *    Function: sd_send_scsi_MODE_SELECT
20592  *
20593  * Description: Utility function for issuing a scsi MODE SELECT command.
20594  *		Note: This routine uses a consistent implementation for Group0,
20595  *		Group1, and Group2 commands across all platforms. ATAPI devices
20596  *		use Group 1 Read/Write commands and Group 2 Mode Sense/Select
20597  *
20598  *   Arguments: un - pointer to the softstate struct for the target.
20599  *		cdbsize - size CDB to be used (CDB_GROUP0 (6 byte), or
20600  *			  CDB_GROUP[1|2] (10 byte).
20601  *		bufaddr - buffer for page data retrieved from the target.
20602  *		buflen - size of page to be retrieved.
20603  *		save_page - boolean to determin if SP bit should be set.
20604  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
20605  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
20606  *			to use the USCSI "direct" chain and bypass the normal
20607  *			command waitq.
20608  *
20609  * Return Code: 0   - Success
20610  *		errno return code from sd_send_scsi_cmd()
20611  *
20612  *     Context: Can sleep. Does not return until command is completed.
20613  */
20614 
20615 static int
20616 sd_send_scsi_MODE_SELECT(struct sd_lun *un, int cdbsize, uchar_t *bufaddr,
20617 	size_t buflen,  uchar_t save_page, int path_flag)
20618 {
20619 	struct	scsi_extended_sense	sense_buf;
20620 	union scsi_cdb		cdb;
20621 	struct uscsi_cmd	ucmd_buf;
20622 	int			status;
20623 
20624 	ASSERT(un != NULL);
20625 	ASSERT(!mutex_owned(SD_MUTEX(un)));
20626 	ASSERT(bufaddr != NULL);
20627 	ASSERT((cdbsize == CDB_GROUP0) || (cdbsize == CDB_GROUP1) ||
20628 	    (cdbsize == CDB_GROUP2));
20629 
20630 	SD_TRACE(SD_LOG_IO, un,
20631 	    "sd_send_scsi_MODE_SELECT: entry: un:0x%p\n", un);
20632 
20633 	bzero(&cdb, sizeof (cdb));
20634 	bzero(&ucmd_buf, sizeof (ucmd_buf));
20635 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
20636 
20637 	/* Set the PF bit for many third party drives */
20638 	cdb.cdb_opaque[1] = 0x10;
20639 
20640 	/* Set the savepage(SP) bit if given */
20641 	if (save_page == SD_SAVE_PAGE) {
20642 		cdb.cdb_opaque[1] |= 0x01;
20643 	}
20644 
20645 	if (cdbsize == CDB_GROUP0) {
20646 		cdb.scc_cmd = SCMD_MODE_SELECT;
20647 		FORMG0COUNT(&cdb, buflen);
20648 	} else {
20649 		cdb.scc_cmd = SCMD_MODE_SELECT_G1;
20650 		FORMG1COUNT(&cdb, buflen);
20651 	}
20652 
20653 	SD_FILL_SCSI1_LUN_CDB(un, &cdb);
20654 
20655 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
20656 	ucmd_buf.uscsi_cdblen	= (uchar_t)cdbsize;
20657 	ucmd_buf.uscsi_bufaddr	= (caddr_t)bufaddr;
20658 	ucmd_buf.uscsi_buflen	= buflen;
20659 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
20660 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
20661 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_WRITE | USCSI_SILENT;
20662 	ucmd_buf.uscsi_timeout	= 60;
20663 
20664 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
20665 	    UIO_SYSSPACE, UIO_SYSSPACE, path_flag);
20666 
20667 	switch (status) {
20668 	case 0:
20669 		break;	/* Success! */
20670 	case EIO:
20671 		switch (ucmd_buf.uscsi_status) {
20672 		case STATUS_RESERVATION_CONFLICT:
20673 			status = EACCES;
20674 			break;
20675 		default:
20676 			break;
20677 		}
20678 		break;
20679 	default:
20680 		break;
20681 	}
20682 
20683 	if (status == 0) {
20684 		SD_DUMP_MEMORY(un, SD_LOG_IO, "sd_send_scsi_MODE_SELECT: data",
20685 		    (uchar_t *)bufaddr, buflen, SD_LOG_HEX);
20686 	}
20687 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_MODE_SELECT: exit\n");
20688 
20689 	return (status);
20690 }
20691 
20692 
20693 /*
20694  *    Function: sd_send_scsi_RDWR
20695  *
20696  * Description: Issue a scsi READ or WRITE command with the given parameters.
20697  *
20698  *   Arguments: un:      Pointer to the sd_lun struct for the target.
20699  *		cmd:	 SCMD_READ or SCMD_WRITE
20700  *		bufaddr: Address of caller's buffer to receive the RDWR data
20701  *		buflen:  Length of caller's buffer receive the RDWR data.
20702  *		start_block: Block number for the start of the RDWR operation.
20703  *			 (Assumes target-native block size.)
20704  *		residp:  Pointer to variable to receive the redisual of the
20705  *			 RDWR operation (may be NULL of no residual requested).
20706  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
20707  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
20708  *			to use the USCSI "direct" chain and bypass the normal
20709  *			command waitq.
20710  *
20711  * Return Code: 0   - Success
20712  *		errno return code from sd_send_scsi_cmd()
20713  *
20714  *     Context: Can sleep. Does not return until command is completed.
20715  */
20716 
20717 static int
20718 sd_send_scsi_RDWR(struct sd_lun *un, uchar_t cmd, void *bufaddr,
20719 	size_t buflen, daddr_t start_block, int path_flag)
20720 {
20721 	struct	scsi_extended_sense	sense_buf;
20722 	union scsi_cdb		cdb;
20723 	struct uscsi_cmd	ucmd_buf;
20724 	uint32_t		block_count;
20725 	int			status;
20726 	int			cdbsize;
20727 	uchar_t			flag;
20728 
20729 	ASSERT(un != NULL);
20730 	ASSERT(!mutex_owned(SD_MUTEX(un)));
20731 	ASSERT(bufaddr != NULL);
20732 	ASSERT((cmd == SCMD_READ) || (cmd == SCMD_WRITE));
20733 
20734 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_RDWR: entry: un:0x%p\n", un);
20735 
20736 	if (un->un_f_tgt_blocksize_is_valid != TRUE) {
20737 		return (EINVAL);
20738 	}
20739 
20740 	mutex_enter(SD_MUTEX(un));
20741 	block_count = SD_BYTES2TGTBLOCKS(un, buflen);
20742 	mutex_exit(SD_MUTEX(un));
20743 
20744 	flag = (cmd == SCMD_READ) ? USCSI_READ : USCSI_WRITE;
20745 
20746 	SD_INFO(SD_LOG_IO, un, "sd_send_scsi_RDWR: "
20747 	    "bufaddr:0x%p buflen:0x%x start_block:0x%p block_count:0x%x\n",
20748 	    bufaddr, buflen, start_block, block_count);
20749 
20750 	bzero(&cdb, sizeof (cdb));
20751 	bzero(&ucmd_buf, sizeof (ucmd_buf));
20752 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
20753 
20754 	/* Compute CDB size to use */
20755 	if (start_block > 0xffffffff)
20756 		cdbsize = CDB_GROUP4;
20757 	else if ((start_block & 0xFFE00000) ||
20758 	    (un->un_f_cfg_is_atapi == TRUE))
20759 		cdbsize = CDB_GROUP1;
20760 	else
20761 		cdbsize = CDB_GROUP0;
20762 
20763 	switch (cdbsize) {
20764 	case CDB_GROUP0:	/* 6-byte CDBs */
20765 		cdb.scc_cmd = cmd;
20766 		FORMG0ADDR(&cdb, start_block);
20767 		FORMG0COUNT(&cdb, block_count);
20768 		break;
20769 	case CDB_GROUP1:	/* 10-byte CDBs */
20770 		cdb.scc_cmd = cmd | SCMD_GROUP1;
20771 		FORMG1ADDR(&cdb, start_block);
20772 		FORMG1COUNT(&cdb, block_count);
20773 		break;
20774 	case CDB_GROUP4:	/* 16-byte CDBs */
20775 		cdb.scc_cmd = cmd | SCMD_GROUP4;
20776 		FORMG4LONGADDR(&cdb, (uint64_t)start_block);
20777 		FORMG4COUNT(&cdb, block_count);
20778 		break;
20779 	case CDB_GROUP5:	/* 12-byte CDBs (currently unsupported) */
20780 	default:
20781 		/* All others reserved */
20782 		return (EINVAL);
20783 	}
20784 
20785 	/* Set LUN bit(s) in CDB if this is a SCSI-1 device */
20786 	SD_FILL_SCSI1_LUN_CDB(un, &cdb);
20787 
20788 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
20789 	ucmd_buf.uscsi_cdblen	= (uchar_t)cdbsize;
20790 	ucmd_buf.uscsi_bufaddr	= bufaddr;
20791 	ucmd_buf.uscsi_buflen	= buflen;
20792 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
20793 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
20794 	ucmd_buf.uscsi_flags	= flag | USCSI_RQENABLE | USCSI_SILENT;
20795 	ucmd_buf.uscsi_timeout	= 60;
20796 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
20797 				UIO_SYSSPACE, UIO_SYSSPACE, path_flag);
20798 	switch (status) {
20799 	case 0:
20800 		break;	/* Success! */
20801 	case EIO:
20802 		switch (ucmd_buf.uscsi_status) {
20803 		case STATUS_RESERVATION_CONFLICT:
20804 			status = EACCES;
20805 			break;
20806 		default:
20807 			break;
20808 		}
20809 		break;
20810 	default:
20811 		break;
20812 	}
20813 
20814 	if (status == 0) {
20815 		SD_DUMP_MEMORY(un, SD_LOG_IO, "sd_send_scsi_RDWR: data",
20816 		    (uchar_t *)bufaddr, buflen, SD_LOG_HEX);
20817 	}
20818 
20819 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_RDWR: exit\n");
20820 
20821 	return (status);
20822 }
20823 
20824 
20825 /*
20826  *    Function: sd_send_scsi_LOG_SENSE
20827  *
20828  * Description: Issue a scsi LOG_SENSE command with the given parameters.
20829  *
20830  *   Arguments: un:      Pointer to the sd_lun struct for the target.
20831  *
20832  * Return Code: 0   - Success
20833  *		errno return code from sd_send_scsi_cmd()
20834  *
20835  *     Context: Can sleep. Does not return until command is completed.
20836  */
20837 
20838 static int
20839 sd_send_scsi_LOG_SENSE(struct sd_lun *un, uchar_t *bufaddr, uint16_t buflen,
20840 	uchar_t page_code, uchar_t page_control, uint16_t param_ptr,
20841 	int path_flag)
20842 
20843 {
20844 	struct	scsi_extended_sense	sense_buf;
20845 	union scsi_cdb		cdb;
20846 	struct uscsi_cmd	ucmd_buf;
20847 	int			status;
20848 
20849 	ASSERT(un != NULL);
20850 	ASSERT(!mutex_owned(SD_MUTEX(un)));
20851 
20852 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_LOG_SENSE: entry: un:0x%p\n", un);
20853 
20854 	bzero(&cdb, sizeof (cdb));
20855 	bzero(&ucmd_buf, sizeof (ucmd_buf));
20856 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
20857 
20858 	cdb.scc_cmd = SCMD_LOG_SENSE_G1;
20859 	cdb.cdb_opaque[2] = (page_control << 6) | page_code;
20860 	cdb.cdb_opaque[5] = (uchar_t)((param_ptr & 0xFF00) >> 8);
20861 	cdb.cdb_opaque[6] = (uchar_t)(param_ptr  & 0x00FF);
20862 	FORMG1COUNT(&cdb, buflen);
20863 
20864 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
20865 	ucmd_buf.uscsi_cdblen	= CDB_GROUP1;
20866 	ucmd_buf.uscsi_bufaddr	= (caddr_t)bufaddr;
20867 	ucmd_buf.uscsi_buflen	= buflen;
20868 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
20869 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
20870 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
20871 	ucmd_buf.uscsi_timeout	= 60;
20872 
20873 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
20874 	    UIO_SYSSPACE, UIO_SYSSPACE, path_flag);
20875 
20876 	switch (status) {
20877 	case 0:
20878 		break;
20879 	case EIO:
20880 		switch (ucmd_buf.uscsi_status) {
20881 		case STATUS_RESERVATION_CONFLICT:
20882 			status = EACCES;
20883 			break;
20884 		case STATUS_CHECK:
20885 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
20886 			    (scsi_sense_key((uint8_t *)&sense_buf) ==
20887 				KEY_ILLEGAL_REQUEST) &&
20888 			    (scsi_sense_asc((uint8_t *)&sense_buf) == 0x24)) {
20889 				/*
20890 				 * ASC 0x24: INVALID FIELD IN CDB
20891 				 */
20892 				switch (page_code) {
20893 				case START_STOP_CYCLE_PAGE:
20894 					/*
20895 					 * The start stop cycle counter is
20896 					 * implemented as page 0x31 in earlier
20897 					 * generation disks. In new generation
20898 					 * disks the start stop cycle counter is
20899 					 * implemented as page 0xE. To properly
20900 					 * handle this case if an attempt for
20901 					 * log page 0xE is made and fails we
20902 					 * will try again using page 0x31.
20903 					 *
20904 					 * Network storage BU committed to
20905 					 * maintain the page 0x31 for this
20906 					 * purpose and will not have any other
20907 					 * page implemented with page code 0x31
20908 					 * until all disks transition to the
20909 					 * standard page.
20910 					 */
20911 					mutex_enter(SD_MUTEX(un));
20912 					un->un_start_stop_cycle_page =
20913 					    START_STOP_CYCLE_VU_PAGE;
20914 					cdb.cdb_opaque[2] =
20915 					    (char)(page_control << 6) |
20916 					    un->un_start_stop_cycle_page;
20917 					mutex_exit(SD_MUTEX(un));
20918 					status = sd_send_scsi_cmd(
20919 					    SD_GET_DEV(un), &ucmd_buf,
20920 					    UIO_SYSSPACE, UIO_SYSSPACE,
20921 					    UIO_SYSSPACE, path_flag);
20922 
20923 					break;
20924 				case TEMPERATURE_PAGE:
20925 					status = ENOTTY;
20926 					break;
20927 				default:
20928 					break;
20929 				}
20930 			}
20931 			break;
20932 		default:
20933 			break;
20934 		}
20935 		break;
20936 	default:
20937 		break;
20938 	}
20939 
20940 	if (status == 0) {
20941 		SD_DUMP_MEMORY(un, SD_LOG_IO, "sd_send_scsi_LOG_SENSE: data",
20942 		    (uchar_t *)bufaddr, buflen, SD_LOG_HEX);
20943 	}
20944 
20945 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_LOG_SENSE: exit\n");
20946 
20947 	return (status);
20948 }
20949 
20950 
20951 /*
20952  *    Function: sdioctl
20953  *
20954  * Description: Driver's ioctl(9e) entry point function.
20955  *
20956  *   Arguments: dev     - device number
20957  *		cmd     - ioctl operation to be performed
20958  *		arg     - user argument, contains data to be set or reference
20959  *			  parameter for get
20960  *		flag    - bit flag, indicating open settings, 32/64 bit type
20961  *		cred_p  - user credential pointer
20962  *		rval_p  - calling process return value (OPT)
20963  *
20964  * Return Code: EINVAL
20965  *		ENOTTY
20966  *		ENXIO
20967  *		EIO
20968  *		EFAULT
20969  *		ENOTSUP
20970  *		EPERM
20971  *
20972  *     Context: Called from the device switch at normal priority.
20973  */
20974 
20975 static int
20976 sdioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cred_p, int *rval_p)
20977 {
20978 	struct sd_lun	*un = NULL;
20979 	int		geom_validated = FALSE;
20980 	int		err = 0;
20981 	int		i = 0;
20982 	cred_t		*cr;
20983 
20984 	/*
20985 	 * All device accesses go thru sdstrategy where we check on suspend
20986 	 * status
20987 	 */
20988 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
20989 		return (ENXIO);
20990 	}
20991 
20992 	ASSERT(!mutex_owned(SD_MUTEX(un)));
20993 
20994 	/*
20995 	 * Moved this wait from sd_uscsi_strategy to here for
20996 	 * reasons of deadlock prevention. Internal driver commands,
20997 	 * specifically those to change a devices power level, result
20998 	 * in a call to sd_uscsi_strategy.
20999 	 */
21000 	mutex_enter(SD_MUTEX(un));
21001 	while ((un->un_state == SD_STATE_SUSPENDED) ||
21002 	    (un->un_state == SD_STATE_PM_CHANGING)) {
21003 		cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
21004 	}
21005 	/*
21006 	 * Twiddling the counter here protects commands from now
21007 	 * through to the top of sd_uscsi_strategy. Without the
21008 	 * counter inc. a power down, for example, could get in
21009 	 * after the above check for state is made and before
21010 	 * execution gets to the top of sd_uscsi_strategy.
21011 	 * That would cause problems.
21012 	 */
21013 	un->un_ncmds_in_driver++;
21014 
21015 	if ((un->un_f_geometry_is_valid == FALSE) &&
21016 	    (flag & (FNDELAY | FNONBLOCK))) {
21017 		switch (cmd) {
21018 		case CDROMPAUSE:
21019 		case CDROMRESUME:
21020 		case CDROMPLAYMSF:
21021 		case CDROMPLAYTRKIND:
21022 		case CDROMREADTOCHDR:
21023 		case CDROMREADTOCENTRY:
21024 		case CDROMSTOP:
21025 		case CDROMSTART:
21026 		case CDROMVOLCTRL:
21027 		case CDROMSUBCHNL:
21028 		case CDROMREADMODE2:
21029 		case CDROMREADMODE1:
21030 		case CDROMREADOFFSET:
21031 		case CDROMSBLKMODE:
21032 		case CDROMGBLKMODE:
21033 		case CDROMGDRVSPEED:
21034 		case CDROMSDRVSPEED:
21035 		case CDROMCDDA:
21036 		case CDROMCDXA:
21037 		case CDROMSUBCODE:
21038 			if (!ISCD(un)) {
21039 				un->un_ncmds_in_driver--;
21040 				ASSERT(un->un_ncmds_in_driver >= 0);
21041 				mutex_exit(SD_MUTEX(un));
21042 				return (ENOTTY);
21043 			}
21044 			break;
21045 		case FDEJECT:
21046 		case DKIOCEJECT:
21047 		case CDROMEJECT:
21048 			if (!un->un_f_eject_media_supported) {
21049 				un->un_ncmds_in_driver--;
21050 				ASSERT(un->un_ncmds_in_driver >= 0);
21051 				mutex_exit(SD_MUTEX(un));
21052 				return (ENOTTY);
21053 			}
21054 			break;
21055 		case DKIOCSVTOC:
21056 		case DKIOCSETEFI:
21057 		case DKIOCSMBOOT:
21058 		case DKIOCFLUSHWRITECACHE:
21059 			mutex_exit(SD_MUTEX(un));
21060 			err = sd_send_scsi_TEST_UNIT_READY(un, 0);
21061 			if (err != 0) {
21062 				mutex_enter(SD_MUTEX(un));
21063 				un->un_ncmds_in_driver--;
21064 				ASSERT(un->un_ncmds_in_driver >= 0);
21065 				mutex_exit(SD_MUTEX(un));
21066 				return (EIO);
21067 			}
21068 			mutex_enter(SD_MUTEX(un));
21069 			/* FALLTHROUGH */
21070 		case DKIOCREMOVABLE:
21071 		case DKIOCHOTPLUGGABLE:
21072 		case DKIOCINFO:
21073 		case DKIOCGMEDIAINFO:
21074 		case MHIOCENFAILFAST:
21075 		case MHIOCSTATUS:
21076 		case MHIOCTKOWN:
21077 		case MHIOCRELEASE:
21078 		case MHIOCGRP_INKEYS:
21079 		case MHIOCGRP_INRESV:
21080 		case MHIOCGRP_REGISTER:
21081 		case MHIOCGRP_RESERVE:
21082 		case MHIOCGRP_PREEMPTANDABORT:
21083 		case MHIOCGRP_REGISTERANDIGNOREKEY:
21084 		case CDROMCLOSETRAY:
21085 		case USCSICMD:
21086 			goto skip_ready_valid;
21087 		default:
21088 			break;
21089 		}
21090 
21091 		mutex_exit(SD_MUTEX(un));
21092 		err = sd_ready_and_valid(un);
21093 		mutex_enter(SD_MUTEX(un));
21094 		if (err == SD_READY_NOT_VALID) {
21095 			switch (cmd) {
21096 			case DKIOCGAPART:
21097 			case DKIOCGGEOM:
21098 			case DKIOCSGEOM:
21099 			case DKIOCGVTOC:
21100 			case DKIOCSVTOC:
21101 			case DKIOCSAPART:
21102 			case DKIOCG_PHYGEOM:
21103 			case DKIOCG_VIRTGEOM:
21104 				err = ENOTSUP;
21105 				un->un_ncmds_in_driver--;
21106 				ASSERT(un->un_ncmds_in_driver >= 0);
21107 				mutex_exit(SD_MUTEX(un));
21108 				return (err);
21109 			}
21110 		}
21111 		if (err != SD_READY_VALID) {
21112 			switch (cmd) {
21113 			case DKIOCSTATE:
21114 			case CDROMGDRVSPEED:
21115 			case CDROMSDRVSPEED:
21116 			case FDEJECT:	/* for eject command */
21117 			case DKIOCEJECT:
21118 			case CDROMEJECT:
21119 			case DKIOCGETEFI:
21120 			case DKIOCSGEOM:
21121 			case DKIOCREMOVABLE:
21122 			case DKIOCHOTPLUGGABLE:
21123 			case DKIOCSAPART:
21124 			case DKIOCSETEFI:
21125 				break;
21126 			default:
21127 				if (un->un_f_has_removable_media) {
21128 					err = ENXIO;
21129 				} else {
21130 					/* Do not map EACCES to EIO */
21131 					if (err != EACCES)
21132 						err = EIO;
21133 				}
21134 				un->un_ncmds_in_driver--;
21135 				ASSERT(un->un_ncmds_in_driver >= 0);
21136 				mutex_exit(SD_MUTEX(un));
21137 				return (err);
21138 			}
21139 		}
21140 		geom_validated = TRUE;
21141 	}
21142 	if ((un->un_f_geometry_is_valid == TRUE) &&
21143 	    (un->un_solaris_size > 0)) {
21144 		/*
21145 		 * the "geometry_is_valid" flag could be true if we
21146 		 * have an fdisk table but no Solaris partition
21147 		 */
21148 		if (un->un_vtoc.v_sanity != VTOC_SANE) {
21149 			/* it is EFI, so return ENOTSUP for these */
21150 			switch (cmd) {
21151 			case DKIOCGAPART:
21152 			case DKIOCGGEOM:
21153 			case DKIOCGVTOC:
21154 			case DKIOCSVTOC:
21155 			case DKIOCSAPART:
21156 				err = ENOTSUP;
21157 				un->un_ncmds_in_driver--;
21158 				ASSERT(un->un_ncmds_in_driver >= 0);
21159 				mutex_exit(SD_MUTEX(un));
21160 				return (err);
21161 			}
21162 		}
21163 	}
21164 
21165 skip_ready_valid:
21166 	mutex_exit(SD_MUTEX(un));
21167 
21168 	switch (cmd) {
21169 	case DKIOCINFO:
21170 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCINFO\n");
21171 		err = sd_dkio_ctrl_info(dev, (caddr_t)arg, flag);
21172 		break;
21173 
21174 	case DKIOCGMEDIAINFO:
21175 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCGMEDIAINFO\n");
21176 		err = sd_get_media_info(dev, (caddr_t)arg, flag);
21177 		break;
21178 
21179 	case DKIOCGGEOM:
21180 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCGGEOM\n");
21181 		err = sd_dkio_get_geometry(dev, (caddr_t)arg, flag,
21182 		    geom_validated);
21183 		break;
21184 
21185 	case DKIOCSGEOM:
21186 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCSGEOM\n");
21187 		err = sd_dkio_set_geometry(dev, (caddr_t)arg, flag);
21188 		break;
21189 
21190 	case DKIOCGAPART:
21191 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCGAPART\n");
21192 		err = sd_dkio_get_partition(dev, (caddr_t)arg, flag,
21193 		    geom_validated);
21194 		break;
21195 
21196 	case DKIOCSAPART:
21197 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCSAPART\n");
21198 		err = sd_dkio_set_partition(dev, (caddr_t)arg, flag);
21199 		break;
21200 
21201 	case DKIOCGVTOC:
21202 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCGVTOC\n");
21203 		err = sd_dkio_get_vtoc(dev, (caddr_t)arg, flag,
21204 		    geom_validated);
21205 		break;
21206 
21207 	case DKIOCGETEFI:
21208 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCGETEFI\n");
21209 		err = sd_dkio_get_efi(dev, (caddr_t)arg, flag);
21210 		break;
21211 
21212 	case DKIOCPARTITION:
21213 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCPARTITION\n");
21214 		err = sd_dkio_partition(dev, (caddr_t)arg, flag);
21215 		break;
21216 
21217 	case DKIOCSVTOC:
21218 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCSVTOC\n");
21219 		err = sd_dkio_set_vtoc(dev, (caddr_t)arg, flag);
21220 		break;
21221 
21222 	case DKIOCSETEFI:
21223 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCSETEFI\n");
21224 		err = sd_dkio_set_efi(dev, (caddr_t)arg, flag);
21225 		break;
21226 
21227 	case DKIOCGMBOOT:
21228 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCGMBOOT\n");
21229 		err = sd_dkio_get_mboot(dev, (caddr_t)arg, flag);
21230 		break;
21231 
21232 	case DKIOCSMBOOT:
21233 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCSMBOOT\n");
21234 		err = sd_dkio_set_mboot(dev, (caddr_t)arg, flag);
21235 		break;
21236 
21237 	case DKIOCLOCK:
21238 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCLOCK\n");
21239 		err = sd_send_scsi_DOORLOCK(un, SD_REMOVAL_PREVENT,
21240 		    SD_PATH_STANDARD);
21241 		break;
21242 
21243 	case DKIOCUNLOCK:
21244 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCUNLOCK\n");
21245 		err = sd_send_scsi_DOORLOCK(un, SD_REMOVAL_ALLOW,
21246 		    SD_PATH_STANDARD);
21247 		break;
21248 
21249 	case DKIOCSTATE: {
21250 		enum dkio_state		state;
21251 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCSTATE\n");
21252 
21253 		if (ddi_copyin((void *)arg, &state, sizeof (int), flag) != 0) {
21254 			err = EFAULT;
21255 		} else {
21256 			err = sd_check_media(dev, state);
21257 			if (err == 0) {
21258 				if (ddi_copyout(&un->un_mediastate, (void *)arg,
21259 				    sizeof (int), flag) != 0)
21260 					err = EFAULT;
21261 			}
21262 		}
21263 		break;
21264 	}
21265 
21266 	case DKIOCREMOVABLE:
21267 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCREMOVABLE\n");
21268 		/*
21269 		 * At present, vold only does automount for removable-media
21270 		 * devices, in order not to break current applications, we
21271 		 * still let hopluggable devices pretend to be removable media
21272 		 * devices for vold. In the near future, once vold is EOL'ed,
21273 		 * we should remove this workaround.
21274 		 */
21275 		if (un->un_f_has_removable_media || un->un_f_is_hotpluggable) {
21276 			i = 1;
21277 		} else {
21278 			i = 0;
21279 		}
21280 		if (ddi_copyout(&i, (void *)arg, sizeof (int), flag) != 0) {
21281 			err = EFAULT;
21282 		} else {
21283 			err = 0;
21284 		}
21285 		break;
21286 
21287 	case DKIOCHOTPLUGGABLE:
21288 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCHOTPLUGGABLE\n");
21289 		if (un->un_f_is_hotpluggable) {
21290 			i = 1;
21291 		} else {
21292 			i = 0;
21293 		}
21294 		if (ddi_copyout(&i, (void *)arg, sizeof (int), flag) != 0) {
21295 			err = EFAULT;
21296 		} else {
21297 			err = 0;
21298 		}
21299 		break;
21300 
21301 	case DKIOCGTEMPERATURE:
21302 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCGTEMPERATURE\n");
21303 		err = sd_dkio_get_temp(dev, (caddr_t)arg, flag);
21304 		break;
21305 
21306 	case MHIOCENFAILFAST:
21307 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCENFAILFAST\n");
21308 		if ((err = drv_priv(cred_p)) == 0) {
21309 			err = sd_mhdioc_failfast(dev, (caddr_t)arg, flag);
21310 		}
21311 		break;
21312 
21313 	case MHIOCTKOWN:
21314 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCTKOWN\n");
21315 		if ((err = drv_priv(cred_p)) == 0) {
21316 			err = sd_mhdioc_takeown(dev, (caddr_t)arg, flag);
21317 		}
21318 		break;
21319 
21320 	case MHIOCRELEASE:
21321 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCRELEASE\n");
21322 		if ((err = drv_priv(cred_p)) == 0) {
21323 			err = sd_mhdioc_release(dev);
21324 		}
21325 		break;
21326 
21327 	case MHIOCSTATUS:
21328 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCSTATUS\n");
21329 		if ((err = drv_priv(cred_p)) == 0) {
21330 			switch (sd_send_scsi_TEST_UNIT_READY(un, 0)) {
21331 			case 0:
21332 				err = 0;
21333 				break;
21334 			case EACCES:
21335 				*rval_p = 1;
21336 				err = 0;
21337 				break;
21338 			default:
21339 				err = EIO;
21340 				break;
21341 			}
21342 		}
21343 		break;
21344 
21345 	case MHIOCQRESERVE:
21346 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCQRESERVE\n");
21347 		if ((err = drv_priv(cred_p)) == 0) {
21348 			err = sd_reserve_release(dev, SD_RESERVE);
21349 		}
21350 		break;
21351 
21352 	case MHIOCREREGISTERDEVID:
21353 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCREREGISTERDEVID\n");
21354 		if (drv_priv(cred_p) == EPERM) {
21355 			err = EPERM;
21356 		} else if (!un->un_f_devid_supported) {
21357 			err = ENOTTY;
21358 		} else {
21359 			err = sd_mhdioc_register_devid(dev);
21360 		}
21361 		break;
21362 
21363 	case MHIOCGRP_INKEYS:
21364 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_INKEYS\n");
21365 		if (((err = drv_priv(cred_p)) != EPERM) && arg != NULL) {
21366 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
21367 				err = ENOTSUP;
21368 			} else {
21369 				err = sd_mhdioc_inkeys(dev, (caddr_t)arg,
21370 				    flag);
21371 			}
21372 		}
21373 		break;
21374 
21375 	case MHIOCGRP_INRESV:
21376 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_INRESV\n");
21377 		if (((err = drv_priv(cred_p)) != EPERM) && arg != NULL) {
21378 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
21379 				err = ENOTSUP;
21380 			} else {
21381 				err = sd_mhdioc_inresv(dev, (caddr_t)arg, flag);
21382 			}
21383 		}
21384 		break;
21385 
21386 	case MHIOCGRP_REGISTER:
21387 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_REGISTER\n");
21388 		if ((err = drv_priv(cred_p)) != EPERM) {
21389 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
21390 				err = ENOTSUP;
21391 			} else if (arg != NULL) {
21392 				mhioc_register_t reg;
21393 				if (ddi_copyin((void *)arg, &reg,
21394 				    sizeof (mhioc_register_t), flag) != 0) {
21395 					err = EFAULT;
21396 				} else {
21397 					err =
21398 					    sd_send_scsi_PERSISTENT_RESERVE_OUT(
21399 					    un, SD_SCSI3_REGISTER,
21400 					    (uchar_t *)&reg);
21401 				}
21402 			}
21403 		}
21404 		break;
21405 
21406 	case MHIOCGRP_RESERVE:
21407 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_RESERVE\n");
21408 		if ((err = drv_priv(cred_p)) != EPERM) {
21409 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
21410 				err = ENOTSUP;
21411 			} else if (arg != NULL) {
21412 				mhioc_resv_desc_t resv_desc;
21413 				if (ddi_copyin((void *)arg, &resv_desc,
21414 				    sizeof (mhioc_resv_desc_t), flag) != 0) {
21415 					err = EFAULT;
21416 				} else {
21417 					err =
21418 					    sd_send_scsi_PERSISTENT_RESERVE_OUT(
21419 					    un, SD_SCSI3_RESERVE,
21420 					    (uchar_t *)&resv_desc);
21421 				}
21422 			}
21423 		}
21424 		break;
21425 
21426 	case MHIOCGRP_PREEMPTANDABORT:
21427 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_PREEMPTANDABORT\n");
21428 		if ((err = drv_priv(cred_p)) != EPERM) {
21429 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
21430 				err = ENOTSUP;
21431 			} else if (arg != NULL) {
21432 				mhioc_preemptandabort_t preempt_abort;
21433 				if (ddi_copyin((void *)arg, &preempt_abort,
21434 				    sizeof (mhioc_preemptandabort_t),
21435 				    flag) != 0) {
21436 					err = EFAULT;
21437 				} else {
21438 					err =
21439 					    sd_send_scsi_PERSISTENT_RESERVE_OUT(
21440 					    un, SD_SCSI3_PREEMPTANDABORT,
21441 					    (uchar_t *)&preempt_abort);
21442 				}
21443 			}
21444 		}
21445 		break;
21446 
21447 	case MHIOCGRP_REGISTERANDIGNOREKEY:
21448 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_PREEMPTANDABORT\n");
21449 		if ((err = drv_priv(cred_p)) != EPERM) {
21450 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
21451 				err = ENOTSUP;
21452 			} else if (arg != NULL) {
21453 				mhioc_registerandignorekey_t r_and_i;
21454 				if (ddi_copyin((void *)arg, (void *)&r_and_i,
21455 				    sizeof (mhioc_registerandignorekey_t),
21456 				    flag) != 0) {
21457 					err = EFAULT;
21458 				} else {
21459 					err =
21460 					    sd_send_scsi_PERSISTENT_RESERVE_OUT(
21461 					    un, SD_SCSI3_REGISTERANDIGNOREKEY,
21462 					    (uchar_t *)&r_and_i);
21463 				}
21464 			}
21465 		}
21466 		break;
21467 
21468 	case USCSICMD:
21469 		SD_TRACE(SD_LOG_IOCTL, un, "USCSICMD\n");
21470 		cr = ddi_get_cred();
21471 		if ((drv_priv(cred_p) != 0) && (drv_priv(cr) != 0)) {
21472 			err = EPERM;
21473 		} else {
21474 			err = sd_uscsi_ioctl(dev, (caddr_t)arg, flag);
21475 		}
21476 		break;
21477 
21478 	case CDROMPAUSE:
21479 	case CDROMRESUME:
21480 		SD_TRACE(SD_LOG_IOCTL, un, "PAUSE-RESUME\n");
21481 		if (!ISCD(un)) {
21482 			err = ENOTTY;
21483 		} else {
21484 			err = sr_pause_resume(dev, cmd);
21485 		}
21486 		break;
21487 
21488 	case CDROMPLAYMSF:
21489 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMPLAYMSF\n");
21490 		if (!ISCD(un)) {
21491 			err = ENOTTY;
21492 		} else {
21493 			err = sr_play_msf(dev, (caddr_t)arg, flag);
21494 		}
21495 		break;
21496 
21497 	case CDROMPLAYTRKIND:
21498 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMPLAYTRKIND\n");
21499 #if defined(__i386) || defined(__amd64)
21500 		/*
21501 		 * not supported on ATAPI CD drives, use CDROMPLAYMSF instead
21502 		 */
21503 		if (!ISCD(un) || (un->un_f_cfg_is_atapi == TRUE)) {
21504 #else
21505 		if (!ISCD(un)) {
21506 #endif
21507 			err = ENOTTY;
21508 		} else {
21509 			err = sr_play_trkind(dev, (caddr_t)arg, flag);
21510 		}
21511 		break;
21512 
21513 	case CDROMREADTOCHDR:
21514 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADTOCHDR\n");
21515 		if (!ISCD(un)) {
21516 			err = ENOTTY;
21517 		} else {
21518 			err = sr_read_tochdr(dev, (caddr_t)arg, flag);
21519 		}
21520 		break;
21521 
21522 	case CDROMREADTOCENTRY:
21523 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADTOCENTRY\n");
21524 		if (!ISCD(un)) {
21525 			err = ENOTTY;
21526 		} else {
21527 			err = sr_read_tocentry(dev, (caddr_t)arg, flag);
21528 		}
21529 		break;
21530 
21531 	case CDROMSTOP:
21532 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSTOP\n");
21533 		if (!ISCD(un)) {
21534 			err = ENOTTY;
21535 		} else {
21536 			err = sd_send_scsi_START_STOP_UNIT(un, SD_TARGET_STOP,
21537 			    SD_PATH_STANDARD);
21538 		}
21539 		break;
21540 
21541 	case CDROMSTART:
21542 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSTART\n");
21543 		if (!ISCD(un)) {
21544 			err = ENOTTY;
21545 		} else {
21546 			err = sd_send_scsi_START_STOP_UNIT(un, SD_TARGET_START,
21547 			    SD_PATH_STANDARD);
21548 		}
21549 		break;
21550 
21551 	case CDROMCLOSETRAY:
21552 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMCLOSETRAY\n");
21553 		if (!ISCD(un)) {
21554 			err = ENOTTY;
21555 		} else {
21556 			err = sd_send_scsi_START_STOP_UNIT(un, SD_TARGET_CLOSE,
21557 			    SD_PATH_STANDARD);
21558 		}
21559 		break;
21560 
21561 	case FDEJECT:	/* for eject command */
21562 	case DKIOCEJECT:
21563 	case CDROMEJECT:
21564 		SD_TRACE(SD_LOG_IOCTL, un, "EJECT\n");
21565 		if (!un->un_f_eject_media_supported) {
21566 			err = ENOTTY;
21567 		} else {
21568 			err = sr_eject(dev);
21569 		}
21570 		break;
21571 
21572 	case CDROMVOLCTRL:
21573 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMVOLCTRL\n");
21574 		if (!ISCD(un)) {
21575 			err = ENOTTY;
21576 		} else {
21577 			err = sr_volume_ctrl(dev, (caddr_t)arg, flag);
21578 		}
21579 		break;
21580 
21581 	case CDROMSUBCHNL:
21582 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSUBCHNL\n");
21583 		if (!ISCD(un)) {
21584 			err = ENOTTY;
21585 		} else {
21586 			err = sr_read_subchannel(dev, (caddr_t)arg, flag);
21587 		}
21588 		break;
21589 
21590 	case CDROMREADMODE2:
21591 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADMODE2\n");
21592 		if (!ISCD(un)) {
21593 			err = ENOTTY;
21594 		} else if (un->un_f_cfg_is_atapi == TRUE) {
21595 			/*
21596 			 * If the drive supports READ CD, use that instead of
21597 			 * switching the LBA size via a MODE SELECT
21598 			 * Block Descriptor
21599 			 */
21600 			err = sr_read_cd_mode2(dev, (caddr_t)arg, flag);
21601 		} else {
21602 			err = sr_read_mode2(dev, (caddr_t)arg, flag);
21603 		}
21604 		break;
21605 
21606 	case CDROMREADMODE1:
21607 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADMODE1\n");
21608 		if (!ISCD(un)) {
21609 			err = ENOTTY;
21610 		} else {
21611 			err = sr_read_mode1(dev, (caddr_t)arg, flag);
21612 		}
21613 		break;
21614 
21615 	case CDROMREADOFFSET:
21616 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADOFFSET\n");
21617 		if (!ISCD(un)) {
21618 			err = ENOTTY;
21619 		} else {
21620 			err = sr_read_sony_session_offset(dev, (caddr_t)arg,
21621 			    flag);
21622 		}
21623 		break;
21624 
21625 	case CDROMSBLKMODE:
21626 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSBLKMODE\n");
21627 		/*
21628 		 * There is no means of changing block size in case of atapi
21629 		 * drives, thus return ENOTTY if drive type is atapi
21630 		 */
21631 		if (!ISCD(un) || (un->un_f_cfg_is_atapi == TRUE)) {
21632 			err = ENOTTY;
21633 		} else if (un->un_f_mmc_cap == TRUE) {
21634 
21635 			/*
21636 			 * MMC Devices do not support changing the
21637 			 * logical block size
21638 			 *
21639 			 * Note: EINVAL is being returned instead of ENOTTY to
21640 			 * maintain consistancy with the original mmc
21641 			 * driver update.
21642 			 */
21643 			err = EINVAL;
21644 		} else {
21645 			mutex_enter(SD_MUTEX(un));
21646 			if ((!(un->un_exclopen & (1<<SDPART(dev)))) ||
21647 			    (un->un_ncmds_in_transport > 0)) {
21648 				mutex_exit(SD_MUTEX(un));
21649 				err = EINVAL;
21650 			} else {
21651 				mutex_exit(SD_MUTEX(un));
21652 				err = sr_change_blkmode(dev, cmd, arg, flag);
21653 			}
21654 		}
21655 		break;
21656 
21657 	case CDROMGBLKMODE:
21658 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMGBLKMODE\n");
21659 		if (!ISCD(un)) {
21660 			err = ENOTTY;
21661 		} else if ((un->un_f_cfg_is_atapi != FALSE) &&
21662 		    (un->un_f_blockcount_is_valid != FALSE)) {
21663 			/*
21664 			 * Drive is an ATAPI drive so return target block
21665 			 * size for ATAPI drives since we cannot change the
21666 			 * blocksize on ATAPI drives. Used primarily to detect
21667 			 * if an ATAPI cdrom is present.
21668 			 */
21669 			if (ddi_copyout(&un->un_tgt_blocksize, (void *)arg,
21670 			    sizeof (int), flag) != 0) {
21671 				err = EFAULT;
21672 			} else {
21673 				err = 0;
21674 			}
21675 
21676 		} else {
21677 			/*
21678 			 * Drive supports changing block sizes via a Mode
21679 			 * Select.
21680 			 */
21681 			err = sr_change_blkmode(dev, cmd, arg, flag);
21682 		}
21683 		break;
21684 
21685 	case CDROMGDRVSPEED:
21686 	case CDROMSDRVSPEED:
21687 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMXDRVSPEED\n");
21688 		if (!ISCD(un)) {
21689 			err = ENOTTY;
21690 		} else if (un->un_f_mmc_cap == TRUE) {
21691 			/*
21692 			 * Note: In the future the driver implementation
21693 			 * for getting and
21694 			 * setting cd speed should entail:
21695 			 * 1) If non-mmc try the Toshiba mode page
21696 			 *    (sr_change_speed)
21697 			 * 2) If mmc but no support for Real Time Streaming try
21698 			 *    the SET CD SPEED (0xBB) command
21699 			 *   (sr_atapi_change_speed)
21700 			 * 3) If mmc and support for Real Time Streaming
21701 			 *    try the GET PERFORMANCE and SET STREAMING
21702 			 *    commands (not yet implemented, 4380808)
21703 			 */
21704 			/*
21705 			 * As per recent MMC spec, CD-ROM speed is variable
21706 			 * and changes with LBA. Since there is no such
21707 			 * things as drive speed now, fail this ioctl.
21708 			 *
21709 			 * Note: EINVAL is returned for consistancy of original
21710 			 * implementation which included support for getting
21711 			 * the drive speed of mmc devices but not setting
21712 			 * the drive speed. Thus EINVAL would be returned
21713 			 * if a set request was made for an mmc device.
21714 			 * We no longer support get or set speed for
21715 			 * mmc but need to remain consistant with regard
21716 			 * to the error code returned.
21717 			 */
21718 			err = EINVAL;
21719 		} else if (un->un_f_cfg_is_atapi == TRUE) {
21720 			err = sr_atapi_change_speed(dev, cmd, arg, flag);
21721 		} else {
21722 			err = sr_change_speed(dev, cmd, arg, flag);
21723 		}
21724 		break;
21725 
21726 	case CDROMCDDA:
21727 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMCDDA\n");
21728 		if (!ISCD(un)) {
21729 			err = ENOTTY;
21730 		} else {
21731 			err = sr_read_cdda(dev, (void *)arg, flag);
21732 		}
21733 		break;
21734 
21735 	case CDROMCDXA:
21736 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMCDXA\n");
21737 		if (!ISCD(un)) {
21738 			err = ENOTTY;
21739 		} else {
21740 			err = sr_read_cdxa(dev, (caddr_t)arg, flag);
21741 		}
21742 		break;
21743 
21744 	case CDROMSUBCODE:
21745 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSUBCODE\n");
21746 		if (!ISCD(un)) {
21747 			err = ENOTTY;
21748 		} else {
21749 			err = sr_read_all_subcodes(dev, (caddr_t)arg, flag);
21750 		}
21751 		break;
21752 
21753 	case DKIOCPARTINFO: {
21754 		/*
21755 		 * Return parameters describing the selected disk slice.
21756 		 * Note: this ioctl is for the intel platform only
21757 		 */
21758 #if defined(__i386) || defined(__amd64)
21759 		int part;
21760 
21761 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCPARTINFO\n");
21762 		part = SDPART(dev);
21763 
21764 		/* don't check un_solaris_size for pN */
21765 		if (part < P0_RAW_DISK && un->un_solaris_size == 0) {
21766 			err = EIO;
21767 		} else {
21768 			struct part_info p;
21769 
21770 			p.p_start = (daddr_t)un->un_offset[part];
21771 			p.p_length = (int)un->un_map[part].dkl_nblk;
21772 #ifdef _MULTI_DATAMODEL
21773 			switch (ddi_model_convert_from(flag & FMODELS)) {
21774 			case DDI_MODEL_ILP32:
21775 			{
21776 				struct part_info32 p32;
21777 
21778 				p32.p_start = (daddr32_t)p.p_start;
21779 				p32.p_length = p.p_length;
21780 				if (ddi_copyout(&p32, (void *)arg,
21781 				    sizeof (p32), flag))
21782 					err = EFAULT;
21783 				break;
21784 			}
21785 
21786 			case DDI_MODEL_NONE:
21787 			{
21788 				if (ddi_copyout(&p, (void *)arg, sizeof (p),
21789 				    flag))
21790 					err = EFAULT;
21791 				break;
21792 			}
21793 			}
21794 #else /* ! _MULTI_DATAMODEL */
21795 			if (ddi_copyout(&p, (void *)arg, sizeof (p), flag))
21796 				err = EFAULT;
21797 #endif /* _MULTI_DATAMODEL */
21798 		}
21799 #else
21800 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCPARTINFO\n");
21801 		err = ENOTTY;
21802 #endif
21803 		break;
21804 	}
21805 
21806 	case DKIOCG_PHYGEOM: {
21807 		/* Return the driver's notion of the media physical geometry */
21808 #if defined(__i386) || defined(__amd64)
21809 		uint64_t	capacity;
21810 		struct dk_geom	disk_geom;
21811 		struct dk_geom	*dkgp = &disk_geom;
21812 
21813 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCG_PHYGEOM\n");
21814 		mutex_enter(SD_MUTEX(un));
21815 
21816 		if (un->un_g.dkg_nhead != 0 &&
21817 		    un->un_g.dkg_nsect != 0) {
21818 			/*
21819 			 * We succeeded in getting a geometry, but
21820 			 * right now it is being reported as just the
21821 			 * Solaris fdisk partition, just like for
21822 			 * DKIOCGGEOM. We need to change that to be
21823 			 * correct for the entire disk now.
21824 			 */
21825 			bcopy(&un->un_g, dkgp, sizeof (*dkgp));
21826 			dkgp->dkg_acyl = 0;
21827 			dkgp->dkg_ncyl = un->un_blockcount /
21828 			    (dkgp->dkg_nhead * dkgp->dkg_nsect);
21829 		} else {
21830 			bzero(dkgp, sizeof (struct dk_geom));
21831 			/*
21832 			 * This disk does not have a Solaris VTOC
21833 			 * so we must present a physical geometry
21834 			 * that will remain consistent regardless
21835 			 * of how the disk is used. This will ensure
21836 			 * that the geometry does not change regardless
21837 			 * of the fdisk partition type (ie. EFI, FAT32,
21838 			 * Solaris, etc).
21839 			 */
21840 			if (ISCD(un)) {
21841 				dkgp->dkg_nhead = un->un_pgeom.g_nhead;
21842 				dkgp->dkg_nsect = un->un_pgeom.g_nsect;
21843 				dkgp->dkg_ncyl = un->un_pgeom.g_ncyl;
21844 				dkgp->dkg_acyl = un->un_pgeom.g_acyl;
21845 			} else {
21846 				/*
21847 				 * Invalid un_blockcount can generate invalid
21848 				 * dk_geom and may result in division by zero
21849 				 * system failure. Should make sure blockcount
21850 				 * is valid before using it here.
21851 				 */
21852 				if (un->un_f_blockcount_is_valid == FALSE) {
21853 					mutex_exit(SD_MUTEX(un));
21854 					err = EIO;
21855 
21856 					break;
21857 				}
21858 
21859 				/*
21860 				 * Refer to comments related to off-by-1 at the
21861 				 * header of this file
21862 				 */
21863 				if (!un->un_f_capacity_adjusted &&
21864 					!un->un_f_has_removable_media &&
21865 				    !un->un_f_is_hotpluggable &&
21866 					(un->un_tgt_blocksize ==
21867 					un->un_sys_blocksize))
21868 					capacity = un->un_blockcount - 1;
21869 				else
21870 					capacity = un->un_blockcount;
21871 
21872 				sd_convert_geometry(capacity, dkgp);
21873 				dkgp->dkg_acyl = 0;
21874 				dkgp->dkg_ncyl = capacity /
21875 				    (dkgp->dkg_nhead * dkgp->dkg_nsect);
21876 			}
21877 		}
21878 		dkgp->dkg_pcyl = dkgp->dkg_ncyl + dkgp->dkg_acyl;
21879 
21880 		if (ddi_copyout(dkgp, (void *)arg,
21881 		    sizeof (struct dk_geom), flag)) {
21882 			mutex_exit(SD_MUTEX(un));
21883 			err = EFAULT;
21884 		} else {
21885 			mutex_exit(SD_MUTEX(un));
21886 			err = 0;
21887 		}
21888 #else
21889 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCG_PHYGEOM\n");
21890 		err = ENOTTY;
21891 #endif
21892 		break;
21893 	}
21894 
21895 	case DKIOCG_VIRTGEOM: {
21896 		/* Return the driver's notion of the media's logical geometry */
21897 #if defined(__i386) || defined(__amd64)
21898 		struct dk_geom	disk_geom;
21899 		struct dk_geom	*dkgp = &disk_geom;
21900 
21901 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCG_VIRTGEOM\n");
21902 		mutex_enter(SD_MUTEX(un));
21903 		/*
21904 		 * If there is no HBA geometry available, or
21905 		 * if the HBA returned us something that doesn't
21906 		 * really fit into an Int 13/function 8 geometry
21907 		 * result, just fail the ioctl.  See PSARC 1998/313.
21908 		 */
21909 		if (un->un_lgeom.g_nhead == 0 ||
21910 		    un->un_lgeom.g_nsect == 0 ||
21911 		    un->un_lgeom.g_ncyl > 1024) {
21912 			mutex_exit(SD_MUTEX(un));
21913 			err = EINVAL;
21914 		} else {
21915 			dkgp->dkg_ncyl	= un->un_lgeom.g_ncyl;
21916 			dkgp->dkg_acyl	= un->un_lgeom.g_acyl;
21917 			dkgp->dkg_pcyl	= dkgp->dkg_ncyl + dkgp->dkg_acyl;
21918 			dkgp->dkg_nhead	= un->un_lgeom.g_nhead;
21919 			dkgp->dkg_nsect	= un->un_lgeom.g_nsect;
21920 
21921 			if (ddi_copyout(dkgp, (void *)arg,
21922 			    sizeof (struct dk_geom), flag)) {
21923 				mutex_exit(SD_MUTEX(un));
21924 				err = EFAULT;
21925 			} else {
21926 				mutex_exit(SD_MUTEX(un));
21927 				err = 0;
21928 			}
21929 		}
21930 #else
21931 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCG_VIRTGEOM\n");
21932 		err = ENOTTY;
21933 #endif
21934 		break;
21935 	}
21936 #ifdef SDDEBUG
21937 /* RESET/ABORTS testing ioctls */
21938 	case DKIOCRESET: {
21939 		int	reset_level;
21940 
21941 		if (ddi_copyin((void *)arg, &reset_level, sizeof (int), flag)) {
21942 			err = EFAULT;
21943 		} else {
21944 			SD_INFO(SD_LOG_IOCTL, un, "sdioctl: DKIOCRESET: "
21945 			    "reset_level = 0x%lx\n", reset_level);
21946 			if (scsi_reset(SD_ADDRESS(un), reset_level)) {
21947 				err = 0;
21948 			} else {
21949 				err = EIO;
21950 			}
21951 		}
21952 		break;
21953 	}
21954 
21955 	case DKIOCABORT:
21956 		SD_INFO(SD_LOG_IOCTL, un, "sdioctl: DKIOCABORT:\n");
21957 		if (scsi_abort(SD_ADDRESS(un), NULL)) {
21958 			err = 0;
21959 		} else {
21960 			err = EIO;
21961 		}
21962 		break;
21963 #endif
21964 
21965 #ifdef SD_FAULT_INJECTION
21966 /* SDIOC FaultInjection testing ioctls */
21967 	case SDIOCSTART:
21968 	case SDIOCSTOP:
21969 	case SDIOCINSERTPKT:
21970 	case SDIOCINSERTXB:
21971 	case SDIOCINSERTUN:
21972 	case SDIOCINSERTARQ:
21973 	case SDIOCPUSH:
21974 	case SDIOCRETRIEVE:
21975 	case SDIOCRUN:
21976 		SD_INFO(SD_LOG_SDTEST, un, "sdioctl:"
21977 		    "SDIOC detected cmd:0x%X:\n", cmd);
21978 		/* call error generator */
21979 		sd_faultinjection_ioctl(cmd, arg, un);
21980 		err = 0;
21981 		break;
21982 
21983 #endif /* SD_FAULT_INJECTION */
21984 
21985 	case DKIOCFLUSHWRITECACHE:
21986 		{
21987 			struct dk_callback *dkc = (struct dk_callback *)arg;
21988 
21989 			mutex_enter(SD_MUTEX(un));
21990 			if (!un->un_f_sync_cache_supported ||
21991 			    !un->un_f_write_cache_enabled) {
21992 				err = un->un_f_sync_cache_supported ?
21993 					0 : ENOTSUP;
21994 				mutex_exit(SD_MUTEX(un));
21995 				if ((flag & FKIOCTL) && dkc != NULL &&
21996 				    dkc->dkc_callback != NULL) {
21997 					(*dkc->dkc_callback)(dkc->dkc_cookie,
21998 					    err);
21999 					/*
22000 					 * Did callback and reported error.
22001 					 * Since we did a callback, ioctl
22002 					 * should return 0.
22003 					 */
22004 					err = 0;
22005 				}
22006 				break;
22007 			}
22008 			mutex_exit(SD_MUTEX(un));
22009 
22010 			if ((flag & FKIOCTL) && dkc != NULL &&
22011 			    dkc->dkc_callback != NULL) {
22012 				/* async SYNC CACHE request */
22013 				err = sd_send_scsi_SYNCHRONIZE_CACHE(un, dkc);
22014 			} else {
22015 				/* synchronous SYNC CACHE request */
22016 				err = sd_send_scsi_SYNCHRONIZE_CACHE(un, NULL);
22017 			}
22018 		}
22019 		break;
22020 
22021 	case DKIOCGETWCE: {
22022 
22023 		int wce;
22024 
22025 		if ((err = sd_get_write_cache_enabled(un, &wce)) != 0) {
22026 			break;
22027 		}
22028 
22029 		if (ddi_copyout(&wce, (void *)arg, sizeof (wce), flag)) {
22030 			err = EFAULT;
22031 		}
22032 		break;
22033 	}
22034 
22035 	case DKIOCSETWCE: {
22036 
22037 		int wce, sync_supported;
22038 
22039 		if (ddi_copyin((void *)arg, &wce, sizeof (wce), flag)) {
22040 			err = EFAULT;
22041 			break;
22042 		}
22043 
22044 		/*
22045 		 * Synchronize multiple threads trying to enable
22046 		 * or disable the cache via the un_f_wcc_cv
22047 		 * condition variable.
22048 		 */
22049 		mutex_enter(SD_MUTEX(un));
22050 
22051 		/*
22052 		 * Don't allow the cache to be enabled if the
22053 		 * config file has it disabled.
22054 		 */
22055 		if (un->un_f_opt_disable_cache && wce) {
22056 			mutex_exit(SD_MUTEX(un));
22057 			err = EINVAL;
22058 			break;
22059 		}
22060 
22061 		/*
22062 		 * Wait for write cache change in progress
22063 		 * bit to be clear before proceeding.
22064 		 */
22065 		while (un->un_f_wcc_inprog)
22066 			cv_wait(&un->un_wcc_cv, SD_MUTEX(un));
22067 
22068 		un->un_f_wcc_inprog = 1;
22069 
22070 		if (un->un_f_write_cache_enabled && wce == 0) {
22071 			/*
22072 			 * Disable the write cache.  Don't clear
22073 			 * un_f_write_cache_enabled until after
22074 			 * the mode select and flush are complete.
22075 			 */
22076 			sync_supported = un->un_f_sync_cache_supported;
22077 			mutex_exit(SD_MUTEX(un));
22078 			if ((err = sd_cache_control(un, SD_CACHE_NOCHANGE,
22079 			    SD_CACHE_DISABLE)) == 0 && sync_supported) {
22080 				err = sd_send_scsi_SYNCHRONIZE_CACHE(un, NULL);
22081 			}
22082 
22083 			mutex_enter(SD_MUTEX(un));
22084 			if (err == 0) {
22085 				un->un_f_write_cache_enabled = 0;
22086 			}
22087 
22088 		} else if (!un->un_f_write_cache_enabled && wce != 0) {
22089 			/*
22090 			 * Set un_f_write_cache_enabled first, so there is
22091 			 * no window where the cache is enabled, but the
22092 			 * bit says it isn't.
22093 			 */
22094 			un->un_f_write_cache_enabled = 1;
22095 			mutex_exit(SD_MUTEX(un));
22096 
22097 			err = sd_cache_control(un, SD_CACHE_NOCHANGE,
22098 				SD_CACHE_ENABLE);
22099 
22100 			mutex_enter(SD_MUTEX(un));
22101 
22102 			if (err) {
22103 				un->un_f_write_cache_enabled = 0;
22104 			}
22105 		}
22106 
22107 		un->un_f_wcc_inprog = 0;
22108 		cv_broadcast(&un->un_wcc_cv);
22109 		mutex_exit(SD_MUTEX(un));
22110 		break;
22111 	}
22112 
22113 	default:
22114 		err = ENOTTY;
22115 		break;
22116 	}
22117 	mutex_enter(SD_MUTEX(un));
22118 	un->un_ncmds_in_driver--;
22119 	ASSERT(un->un_ncmds_in_driver >= 0);
22120 	mutex_exit(SD_MUTEX(un));
22121 
22122 	SD_TRACE(SD_LOG_IOCTL, un, "sdioctl: exit: %d\n", err);
22123 	return (err);
22124 }
22125 
22126 
22127 /*
22128  *    Function: sd_uscsi_ioctl
22129  *
22130  * Description: This routine is the driver entry point for handling USCSI ioctl
22131  *		requests (USCSICMD).
22132  *
22133  *   Arguments: dev	- the device number
22134  *		arg	- user provided scsi command
22135  *		flag	- this argument is a pass through to ddi_copyxxx()
22136  *			  directly from the mode argument of ioctl().
22137  *
22138  * Return Code: code returned by sd_send_scsi_cmd
22139  *		ENXIO
22140  *		EFAULT
22141  *		EAGAIN
22142  */
22143 
22144 static int
22145 sd_uscsi_ioctl(dev_t dev, caddr_t arg, int flag)
22146 {
22147 #ifdef _MULTI_DATAMODEL
22148 	/*
22149 	 * For use when a 32 bit app makes a call into a
22150 	 * 64 bit ioctl
22151 	 */
22152 	struct uscsi_cmd32	uscsi_cmd_32_for_64;
22153 	struct uscsi_cmd32	*ucmd32 = &uscsi_cmd_32_for_64;
22154 	model_t			model;
22155 #endif /* _MULTI_DATAMODEL */
22156 	struct uscsi_cmd	*scmd = NULL;
22157 	struct sd_lun		*un = NULL;
22158 	enum uio_seg		uioseg;
22159 	char			cdb[CDB_GROUP0];
22160 	int			rval = 0;
22161 
22162 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
22163 		return (ENXIO);
22164 	}
22165 
22166 	SD_TRACE(SD_LOG_IOCTL, un, "sd_uscsi_ioctl: entry: un:0x%p\n", un);
22167 
22168 	scmd = (struct uscsi_cmd *)
22169 	    kmem_zalloc(sizeof (struct uscsi_cmd), KM_SLEEP);
22170 
22171 #ifdef _MULTI_DATAMODEL
22172 	switch (model = ddi_model_convert_from(flag & FMODELS)) {
22173 	case DDI_MODEL_ILP32:
22174 	{
22175 		if (ddi_copyin((void *)arg, ucmd32, sizeof (*ucmd32), flag)) {
22176 			rval = EFAULT;
22177 			goto done;
22178 		}
22179 		/*
22180 		 * Convert the ILP32 uscsi data from the
22181 		 * application to LP64 for internal use.
22182 		 */
22183 		uscsi_cmd32touscsi_cmd(ucmd32, scmd);
22184 		break;
22185 	}
22186 	case DDI_MODEL_NONE:
22187 		if (ddi_copyin((void *)arg, scmd, sizeof (*scmd), flag)) {
22188 			rval = EFAULT;
22189 			goto done;
22190 		}
22191 		break;
22192 	}
22193 #else /* ! _MULTI_DATAMODEL */
22194 	if (ddi_copyin((void *)arg, scmd, sizeof (*scmd), flag)) {
22195 		rval = EFAULT;
22196 		goto done;
22197 	}
22198 #endif /* _MULTI_DATAMODEL */
22199 
22200 	scmd->uscsi_flags &= ~USCSI_NOINTR;
22201 	uioseg = (flag & FKIOCTL) ? UIO_SYSSPACE : UIO_USERSPACE;
22202 	if (un->un_f_format_in_progress == TRUE) {
22203 		rval = EAGAIN;
22204 		goto done;
22205 	}
22206 
22207 	/*
22208 	 * Gotta do the ddi_copyin() here on the uscsi_cdb so that
22209 	 * we will have a valid cdb[0] to test.
22210 	 */
22211 	if ((ddi_copyin(scmd->uscsi_cdb, cdb, CDB_GROUP0, flag) == 0) &&
22212 	    (cdb[0] == SCMD_FORMAT)) {
22213 		SD_TRACE(SD_LOG_IOCTL, un,
22214 		    "sd_uscsi_ioctl: scmd->uscsi_cdb 0x%x\n", cdb[0]);
22215 		mutex_enter(SD_MUTEX(un));
22216 		un->un_f_format_in_progress = TRUE;
22217 		mutex_exit(SD_MUTEX(un));
22218 		rval = sd_send_scsi_cmd(dev, scmd, uioseg, uioseg, uioseg,
22219 		    SD_PATH_STANDARD);
22220 		mutex_enter(SD_MUTEX(un));
22221 		un->un_f_format_in_progress = FALSE;
22222 		mutex_exit(SD_MUTEX(un));
22223 	} else {
22224 		SD_TRACE(SD_LOG_IOCTL, un,
22225 		    "sd_uscsi_ioctl: scmd->uscsi_cdb 0x%x\n", cdb[0]);
22226 		/*
22227 		 * It's OK to fall into here even if the ddi_copyin()
22228 		 * on the uscsi_cdb above fails, because sd_send_scsi_cmd()
22229 		 * does this same copyin and will return the EFAULT
22230 		 * if it fails.
22231 		 */
22232 		rval = sd_send_scsi_cmd(dev, scmd, uioseg, uioseg, uioseg,
22233 		    SD_PATH_STANDARD);
22234 	}
22235 #ifdef _MULTI_DATAMODEL
22236 	switch (model) {
22237 	case DDI_MODEL_ILP32:
22238 		/*
22239 		 * Convert back to ILP32 before copyout to the
22240 		 * application
22241 		 */
22242 		uscsi_cmdtouscsi_cmd32(scmd, ucmd32);
22243 		if (ddi_copyout(ucmd32, (void *)arg, sizeof (*ucmd32), flag)) {
22244 			if (rval != 0) {
22245 				rval = EFAULT;
22246 			}
22247 		}
22248 		break;
22249 	case DDI_MODEL_NONE:
22250 		if (ddi_copyout(scmd, (void *)arg, sizeof (*scmd), flag)) {
22251 			if (rval != 0) {
22252 				rval = EFAULT;
22253 			}
22254 		}
22255 		break;
22256 	}
22257 #else /* ! _MULTI_DATAMODE */
22258 	if (ddi_copyout(scmd, (void *)arg, sizeof (*scmd), flag)) {
22259 		if (rval != 0) {
22260 			rval = EFAULT;
22261 		}
22262 	}
22263 #endif /* _MULTI_DATAMODE */
22264 done:
22265 	kmem_free(scmd, sizeof (struct uscsi_cmd));
22266 
22267 	SD_TRACE(SD_LOG_IOCTL, un, "sd_uscsi_ioctl: exit: un:0x%p\n", un);
22268 
22269 	return (rval);
22270 }
22271 
22272 
22273 /*
22274  *    Function: sd_dkio_ctrl_info
22275  *
22276  * Description: This routine is the driver entry point for handling controller
22277  *		information ioctl requests (DKIOCINFO).
22278  *
22279  *   Arguments: dev  - the device number
22280  *		arg  - pointer to user provided dk_cinfo structure
22281  *		       specifying the controller type and attributes.
22282  *		flag - this argument is a pass through to ddi_copyxxx()
22283  *		       directly from the mode argument of ioctl().
22284  *
22285  * Return Code: 0
22286  *		EFAULT
22287  *		ENXIO
22288  */
22289 
22290 static int
22291 sd_dkio_ctrl_info(dev_t dev, caddr_t arg, int flag)
22292 {
22293 	struct sd_lun	*un = NULL;
22294 	struct dk_cinfo	*info;
22295 	dev_info_t	*pdip;
22296 	int		lun, tgt;
22297 
22298 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
22299 		return (ENXIO);
22300 	}
22301 
22302 	info = (struct dk_cinfo *)
22303 		kmem_zalloc(sizeof (struct dk_cinfo), KM_SLEEP);
22304 
22305 	switch (un->un_ctype) {
22306 	case CTYPE_CDROM:
22307 		info->dki_ctype = DKC_CDROM;
22308 		break;
22309 	default:
22310 		info->dki_ctype = DKC_SCSI_CCS;
22311 		break;
22312 	}
22313 	pdip = ddi_get_parent(SD_DEVINFO(un));
22314 	info->dki_cnum = ddi_get_instance(pdip);
22315 	if (strlen(ddi_get_name(pdip)) < DK_DEVLEN) {
22316 		(void) strcpy(info->dki_cname, ddi_get_name(pdip));
22317 	} else {
22318 		(void) strncpy(info->dki_cname, ddi_node_name(pdip),
22319 		    DK_DEVLEN - 1);
22320 	}
22321 
22322 	lun = ddi_prop_get_int(DDI_DEV_T_ANY, SD_DEVINFO(un),
22323 	    DDI_PROP_DONTPASS, SCSI_ADDR_PROP_LUN, 0);
22324 	tgt = ddi_prop_get_int(DDI_DEV_T_ANY, SD_DEVINFO(un),
22325 	    DDI_PROP_DONTPASS, SCSI_ADDR_PROP_TARGET, 0);
22326 
22327 	/* Unit Information */
22328 	info->dki_unit = ddi_get_instance(SD_DEVINFO(un));
22329 	info->dki_slave = ((tgt << 3) | lun);
22330 	(void) strncpy(info->dki_dname, ddi_driver_name(SD_DEVINFO(un)),
22331 	    DK_DEVLEN - 1);
22332 	info->dki_flags = DKI_FMTVOL;
22333 	info->dki_partition = SDPART(dev);
22334 
22335 	/* Max Transfer size of this device in blocks */
22336 	info->dki_maxtransfer = un->un_max_xfer_size / un->un_sys_blocksize;
22337 	info->dki_addr = 0;
22338 	info->dki_space = 0;
22339 	info->dki_prio = 0;
22340 	info->dki_vec = 0;
22341 
22342 	if (ddi_copyout(info, arg, sizeof (struct dk_cinfo), flag) != 0) {
22343 		kmem_free(info, sizeof (struct dk_cinfo));
22344 		return (EFAULT);
22345 	} else {
22346 		kmem_free(info, sizeof (struct dk_cinfo));
22347 		return (0);
22348 	}
22349 }
22350 
22351 
22352 /*
22353  *    Function: sd_get_media_info
22354  *
22355  * Description: This routine is the driver entry point for handling ioctl
22356  *		requests for the media type or command set profile used by the
22357  *		drive to operate on the media (DKIOCGMEDIAINFO).
22358  *
22359  *   Arguments: dev	- the device number
22360  *		arg	- pointer to user provided dk_minfo structure
22361  *			  specifying the media type, logical block size and
22362  *			  drive capacity.
22363  *		flag	- this argument is a pass through to ddi_copyxxx()
22364  *			  directly from the mode argument of ioctl().
22365  *
22366  * Return Code: 0
22367  *		EACCESS
22368  *		EFAULT
22369  *		ENXIO
22370  *		EIO
22371  */
22372 
22373 static int
22374 sd_get_media_info(dev_t dev, caddr_t arg, int flag)
22375 {
22376 	struct sd_lun		*un = NULL;
22377 	struct uscsi_cmd	com;
22378 	struct scsi_inquiry	*sinq;
22379 	struct dk_minfo		media_info;
22380 	u_longlong_t		media_capacity;
22381 	uint64_t		capacity;
22382 	uint_t			lbasize;
22383 	uchar_t			*out_data;
22384 	uchar_t			*rqbuf;
22385 	int			rval = 0;
22386 	int			rtn;
22387 
22388 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
22389 	    (un->un_state == SD_STATE_OFFLINE)) {
22390 		return (ENXIO);
22391 	}
22392 
22393 	SD_TRACE(SD_LOG_IOCTL_DKIO, un, "sd_get_media_info: entry\n");
22394 
22395 	out_data = kmem_zalloc(SD_PROFILE_HEADER_LEN, KM_SLEEP);
22396 	rqbuf = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
22397 
22398 	/* Issue a TUR to determine if the drive is ready with media present */
22399 	rval = sd_send_scsi_TEST_UNIT_READY(un, SD_CHECK_FOR_MEDIA);
22400 	if (rval == ENXIO) {
22401 		goto done;
22402 	}
22403 
22404 	/* Now get configuration data */
22405 	if (ISCD(un)) {
22406 		media_info.dki_media_type = DK_CDROM;
22407 
22408 		/* Allow SCMD_GET_CONFIGURATION to MMC devices only */
22409 		if (un->un_f_mmc_cap == TRUE) {
22410 			rtn = sd_send_scsi_GET_CONFIGURATION(un, &com, rqbuf,
22411 				SENSE_LENGTH, out_data, SD_PROFILE_HEADER_LEN);
22412 
22413 			if (rtn) {
22414 				/*
22415 				 * Failed for other than an illegal request
22416 				 * or command not supported
22417 				 */
22418 				if ((com.uscsi_status == STATUS_CHECK) &&
22419 				    (com.uscsi_rqstatus == STATUS_GOOD)) {
22420 					if ((rqbuf[2] != KEY_ILLEGAL_REQUEST) ||
22421 					    (rqbuf[12] != 0x20)) {
22422 						rval = EIO;
22423 						goto done;
22424 					}
22425 				}
22426 			} else {
22427 				/*
22428 				 * The GET CONFIGURATION command succeeded
22429 				 * so set the media type according to the
22430 				 * returned data
22431 				 */
22432 				media_info.dki_media_type = out_data[6];
22433 				media_info.dki_media_type <<= 8;
22434 				media_info.dki_media_type |= out_data[7];
22435 			}
22436 		}
22437 	} else {
22438 		/*
22439 		 * The profile list is not available, so we attempt to identify
22440 		 * the media type based on the inquiry data
22441 		 */
22442 		sinq = un->un_sd->sd_inq;
22443 		if (sinq->inq_qual == 0) {
22444 			/* This is a direct access device */
22445 			media_info.dki_media_type = DK_FIXED_DISK;
22446 
22447 			if ((bcmp(sinq->inq_vid, "IOMEGA", 6) == 0) ||
22448 			    (bcmp(sinq->inq_vid, "iomega", 6) == 0)) {
22449 				if ((bcmp(sinq->inq_pid, "ZIP", 3) == 0)) {
22450 					media_info.dki_media_type = DK_ZIP;
22451 				} else if (
22452 				    (bcmp(sinq->inq_pid, "jaz", 3) == 0)) {
22453 					media_info.dki_media_type = DK_JAZ;
22454 				}
22455 			}
22456 		} else {
22457 			/* Not a CD or direct access so return unknown media */
22458 			media_info.dki_media_type = DK_UNKNOWN;
22459 		}
22460 	}
22461 
22462 	/* Now read the capacity so we can provide the lbasize and capacity */
22463 	switch (sd_send_scsi_READ_CAPACITY(un, &capacity, &lbasize,
22464 	    SD_PATH_DIRECT)) {
22465 	case 0:
22466 		break;
22467 	case EACCES:
22468 		rval = EACCES;
22469 		goto done;
22470 	default:
22471 		rval = EIO;
22472 		goto done;
22473 	}
22474 
22475 	media_info.dki_lbsize = lbasize;
22476 	media_capacity = capacity;
22477 
22478 	/*
22479 	 * sd_send_scsi_READ_CAPACITY() reports capacity in
22480 	 * un->un_sys_blocksize chunks. So we need to convert it into
22481 	 * cap.lbasize chunks.
22482 	 */
22483 	media_capacity *= un->un_sys_blocksize;
22484 	media_capacity /= lbasize;
22485 	media_info.dki_capacity = media_capacity;
22486 
22487 	if (ddi_copyout(&media_info, arg, sizeof (struct dk_minfo), flag)) {
22488 		rval = EFAULT;
22489 		/* Put goto. Anybody might add some code below in future */
22490 		goto done;
22491 	}
22492 done:
22493 	kmem_free(out_data, SD_PROFILE_HEADER_LEN);
22494 	kmem_free(rqbuf, SENSE_LENGTH);
22495 	return (rval);
22496 }
22497 
22498 
22499 /*
22500  *    Function: sd_dkio_get_geometry
22501  *
22502  * Description: This routine is the driver entry point for handling user
22503  *		requests to get the device geometry (DKIOCGGEOM).
22504  *
22505  *   Arguments: dev  - the device number
22506  *		arg  - pointer to user provided dk_geom structure specifying
22507  *			the controller's notion of the current geometry.
22508  *		flag - this argument is a pass through to ddi_copyxxx()
22509  *		       directly from the mode argument of ioctl().
22510  *		geom_validated - flag indicating if the device geometry has been
22511  *				 previously validated in the sdioctl routine.
22512  *
22513  * Return Code: 0
22514  *		EFAULT
22515  *		ENXIO
22516  *		EIO
22517  */
22518 
22519 static int
22520 sd_dkio_get_geometry(dev_t dev, caddr_t arg, int flag, int geom_validated)
22521 {
22522 	struct sd_lun	*un = NULL;
22523 	struct dk_geom	*tmp_geom = NULL;
22524 	int		rval = 0;
22525 
22526 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
22527 		return (ENXIO);
22528 	}
22529 
22530 	if (geom_validated == FALSE) {
22531 		/*
22532 		 * sd_validate_geometry does not spin a disk up
22533 		 * if it was spun down. We need to make sure it
22534 		 * is ready.
22535 		 */
22536 		if ((rval = sd_send_scsi_TEST_UNIT_READY(un, 0)) != 0) {
22537 			return (rval);
22538 		}
22539 		mutex_enter(SD_MUTEX(un));
22540 		rval = sd_validate_geometry(un, SD_PATH_DIRECT);
22541 		mutex_exit(SD_MUTEX(un));
22542 	}
22543 	if (rval)
22544 		return (rval);
22545 
22546 	/*
22547 	 * It is possible that un_solaris_size is 0(uninitialized)
22548 	 * after sd_unit_attach. Reservation conflict may cause the
22549 	 * above situation. Thus, the zero check of un_solaris_size
22550 	 * should occur after the sd_validate_geometry() call.
22551 	 */
22552 #if defined(__i386) || defined(__amd64)
22553 	if (un->un_solaris_size == 0) {
22554 		return (EIO);
22555 	}
22556 #endif
22557 
22558 	/*
22559 	 * Make a local copy of the soft state geometry to avoid some potential
22560 	 * race conditions associated with holding the mutex and updating the
22561 	 * write_reinstruct value
22562 	 */
22563 	tmp_geom = kmem_zalloc(sizeof (struct dk_geom), KM_SLEEP);
22564 	mutex_enter(SD_MUTEX(un));
22565 	bcopy(&un->un_g, tmp_geom, sizeof (struct dk_geom));
22566 	mutex_exit(SD_MUTEX(un));
22567 
22568 	if (tmp_geom->dkg_write_reinstruct == 0) {
22569 		tmp_geom->dkg_write_reinstruct =
22570 		    (int)((int)(tmp_geom->dkg_nsect * tmp_geom->dkg_rpm *
22571 		    sd_rot_delay) / (int)60000);
22572 	}
22573 
22574 	rval = ddi_copyout(tmp_geom, (void *)arg, sizeof (struct dk_geom),
22575 	    flag);
22576 	if (rval != 0) {
22577 		rval = EFAULT;
22578 	}
22579 
22580 	kmem_free(tmp_geom, sizeof (struct dk_geom));
22581 	return (rval);
22582 
22583 }
22584 
22585 
22586 /*
22587  *    Function: sd_dkio_set_geometry
22588  *
22589  * Description: This routine is the driver entry point for handling user
22590  *		requests to set the device geometry (DKIOCSGEOM). The actual
22591  *		device geometry is not updated, just the driver "notion" of it.
22592  *
22593  *   Arguments: dev  - the device number
22594  *		arg  - pointer to user provided dk_geom structure used to set
22595  *			the controller's notion of the current geometry.
22596  *		flag - this argument is a pass through to ddi_copyxxx()
22597  *		       directly from the mode argument of ioctl().
22598  *
22599  * Return Code: 0
22600  *		EFAULT
22601  *		ENXIO
22602  *		EIO
22603  */
22604 
22605 static int
22606 sd_dkio_set_geometry(dev_t dev, caddr_t arg, int flag)
22607 {
22608 	struct sd_lun	*un = NULL;
22609 	struct dk_geom	*tmp_geom;
22610 	struct dk_map	*lp;
22611 	int		rval = 0;
22612 	int		i;
22613 
22614 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
22615 		return (ENXIO);
22616 	}
22617 
22618 	/*
22619 	 * Make sure there is no reservation conflict on the lun.
22620 	 */
22621 	if (sd_send_scsi_TEST_UNIT_READY(un, 0) == EACCES) {
22622 		return (EACCES);
22623 	}
22624 
22625 #if defined(__i386) || defined(__amd64)
22626 	if (un->un_solaris_size == 0) {
22627 		return (EIO);
22628 	}
22629 #endif
22630 
22631 	/*
22632 	 * We need to copy the user specified geometry into local
22633 	 * storage and then update the softstate. We don't want to hold
22634 	 * the mutex and copyin directly from the user to the soft state
22635 	 */
22636 	tmp_geom = (struct dk_geom *)
22637 	    kmem_zalloc(sizeof (struct dk_geom), KM_SLEEP);
22638 	rval = ddi_copyin(arg, tmp_geom, sizeof (struct dk_geom), flag);
22639 	if (rval != 0) {
22640 		kmem_free(tmp_geom, sizeof (struct dk_geom));
22641 		return (EFAULT);
22642 	}
22643 
22644 	mutex_enter(SD_MUTEX(un));
22645 	bcopy(tmp_geom, &un->un_g, sizeof (struct dk_geom));
22646 	for (i = 0; i < NDKMAP; i++) {
22647 		lp  = &un->un_map[i];
22648 		un->un_offset[i] =
22649 		    un->un_g.dkg_nhead * un->un_g.dkg_nsect * lp->dkl_cylno;
22650 #if defined(__i386) || defined(__amd64)
22651 		un->un_offset[i] += un->un_solaris_offset;
22652 #endif
22653 	}
22654 	un->un_f_geometry_is_valid = FALSE;
22655 	mutex_exit(SD_MUTEX(un));
22656 	kmem_free(tmp_geom, sizeof (struct dk_geom));
22657 
22658 	return (rval);
22659 }
22660 
22661 
22662 /*
22663  *    Function: sd_dkio_get_partition
22664  *
22665  * Description: This routine is the driver entry point for handling user
22666  *		requests to get the partition table (DKIOCGAPART).
22667  *
22668  *   Arguments: dev  - the device number
22669  *		arg  - pointer to user provided dk_allmap structure specifying
22670  *			the controller's notion of the current partition table.
22671  *		flag - this argument is a pass through to ddi_copyxxx()
22672  *		       directly from the mode argument of ioctl().
22673  *		geom_validated - flag indicating if the device geometry has been
22674  *				 previously validated in the sdioctl routine.
22675  *
22676  * Return Code: 0
22677  *		EFAULT
22678  *		ENXIO
22679  *		EIO
22680  */
22681 
22682 static int
22683 sd_dkio_get_partition(dev_t dev, caddr_t arg, int flag, int geom_validated)
22684 {
22685 	struct sd_lun	*un = NULL;
22686 	int		rval = 0;
22687 	int		size;
22688 
22689 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
22690 		return (ENXIO);
22691 	}
22692 
22693 	/*
22694 	 * Make sure the geometry is valid before getting the partition
22695 	 * information.
22696 	 */
22697 	mutex_enter(SD_MUTEX(un));
22698 	if (geom_validated == FALSE) {
22699 		/*
22700 		 * sd_validate_geometry does not spin a disk up
22701 		 * if it was spun down. We need to make sure it
22702 		 * is ready before validating the geometry.
22703 		 */
22704 		mutex_exit(SD_MUTEX(un));
22705 		if ((rval = sd_send_scsi_TEST_UNIT_READY(un, 0)) != 0) {
22706 			return (rval);
22707 		}
22708 		mutex_enter(SD_MUTEX(un));
22709 
22710 		if ((rval = sd_validate_geometry(un, SD_PATH_DIRECT)) != 0) {
22711 			mutex_exit(SD_MUTEX(un));
22712 			return (rval);
22713 		}
22714 	}
22715 	mutex_exit(SD_MUTEX(un));
22716 
22717 	/*
22718 	 * It is possible that un_solaris_size is 0(uninitialized)
22719 	 * after sd_unit_attach. Reservation conflict may cause the
22720 	 * above situation. Thus, the zero check of un_solaris_size
22721 	 * should occur after the sd_validate_geometry() call.
22722 	 */
22723 #if defined(__i386) || defined(__amd64)
22724 	if (un->un_solaris_size == 0) {
22725 		return (EIO);
22726 	}
22727 #endif
22728 
22729 #ifdef _MULTI_DATAMODEL
22730 	switch (ddi_model_convert_from(flag & FMODELS)) {
22731 	case DDI_MODEL_ILP32: {
22732 		struct dk_map32 dk_map32[NDKMAP];
22733 		int		i;
22734 
22735 		for (i = 0; i < NDKMAP; i++) {
22736 			dk_map32[i].dkl_cylno = un->un_map[i].dkl_cylno;
22737 			dk_map32[i].dkl_nblk  = un->un_map[i].dkl_nblk;
22738 		}
22739 		size = NDKMAP * sizeof (struct dk_map32);
22740 		rval = ddi_copyout(dk_map32, (void *)arg, size, flag);
22741 		if (rval != 0) {
22742 			rval = EFAULT;
22743 		}
22744 		break;
22745 	}
22746 	case DDI_MODEL_NONE:
22747 		size = NDKMAP * sizeof (struct dk_map);
22748 		rval = ddi_copyout(un->un_map, (void *)arg, size, flag);
22749 		if (rval != 0) {
22750 			rval = EFAULT;
22751 		}
22752 		break;
22753 	}
22754 #else /* ! _MULTI_DATAMODEL */
22755 	size = NDKMAP * sizeof (struct dk_map);
22756 	rval = ddi_copyout(un->un_map, (void *)arg, size, flag);
22757 	if (rval != 0) {
22758 		rval = EFAULT;
22759 	}
22760 #endif /* _MULTI_DATAMODEL */
22761 	return (rval);
22762 }
22763 
22764 
22765 /*
22766  *    Function: sd_dkio_set_partition
22767  *
22768  * Description: This routine is the driver entry point for handling user
22769  *		requests to set the partition table (DKIOCSAPART). The actual
22770  *		device partition is not updated.
22771  *
22772  *   Arguments: dev  - the device number
22773  *		arg  - pointer to user provided dk_allmap structure used to set
22774  *			the controller's notion of the partition table.
22775  *		flag - this argument is a pass through to ddi_copyxxx()
22776  *		       directly from the mode argument of ioctl().
22777  *
22778  * Return Code: 0
22779  *		EINVAL
22780  *		EFAULT
22781  *		ENXIO
22782  *		EIO
22783  */
22784 
22785 static int
22786 sd_dkio_set_partition(dev_t dev, caddr_t arg, int flag)
22787 {
22788 	struct sd_lun	*un = NULL;
22789 	struct dk_map	dk_map[NDKMAP];
22790 	struct dk_map	*lp;
22791 	int		rval = 0;
22792 	int		size;
22793 	int		i;
22794 #if defined(_SUNOS_VTOC_16)
22795 	struct dkl_partition	*vp;
22796 #endif
22797 
22798 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
22799 		return (ENXIO);
22800 	}
22801 
22802 	/*
22803 	 * Set the map for all logical partitions.  We lock
22804 	 * the priority just to make sure an interrupt doesn't
22805 	 * come in while the map is half updated.
22806 	 */
22807 	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::un_solaris_size))
22808 	mutex_enter(SD_MUTEX(un));
22809 	if (un->un_blockcount > DK_MAX_BLOCKS) {
22810 		mutex_exit(SD_MUTEX(un));
22811 		return (ENOTSUP);
22812 	}
22813 	mutex_exit(SD_MUTEX(un));
22814 
22815 	/*
22816 	 * Make sure there is no reservation conflict on the lun.
22817 	 */
22818 	if (sd_send_scsi_TEST_UNIT_READY(un, 0) == EACCES) {
22819 		return (EACCES);
22820 	}
22821 
22822 #if defined(__i386) || defined(__amd64)
22823 	if (un->un_solaris_size == 0) {
22824 		return (EIO);
22825 	}
22826 #endif
22827 
22828 #ifdef _MULTI_DATAMODEL
22829 	switch (ddi_model_convert_from(flag & FMODELS)) {
22830 	case DDI_MODEL_ILP32: {
22831 		struct dk_map32 dk_map32[NDKMAP];
22832 
22833 		size = NDKMAP * sizeof (struct dk_map32);
22834 		rval = ddi_copyin((void *)arg, dk_map32, size, flag);
22835 		if (rval != 0) {
22836 			return (EFAULT);
22837 		}
22838 		for (i = 0; i < NDKMAP; i++) {
22839 			dk_map[i].dkl_cylno = dk_map32[i].dkl_cylno;
22840 			dk_map[i].dkl_nblk  = dk_map32[i].dkl_nblk;
22841 		}
22842 		break;
22843 	}
22844 	case DDI_MODEL_NONE:
22845 		size = NDKMAP * sizeof (struct dk_map);
22846 		rval = ddi_copyin((void *)arg, dk_map, size, flag);
22847 		if (rval != 0) {
22848 			return (EFAULT);
22849 		}
22850 		break;
22851 	}
22852 #else /* ! _MULTI_DATAMODEL */
22853 	size = NDKMAP * sizeof (struct dk_map);
22854 	rval = ddi_copyin((void *)arg, dk_map, size, flag);
22855 	if (rval != 0) {
22856 		return (EFAULT);
22857 	}
22858 #endif /* _MULTI_DATAMODEL */
22859 
22860 	mutex_enter(SD_MUTEX(un));
22861 	/* Note: The size used in this bcopy is set based upon the data model */
22862 	bcopy(dk_map, un->un_map, size);
22863 #if defined(_SUNOS_VTOC_16)
22864 	vp = (struct dkl_partition *)&(un->un_vtoc);
22865 #endif	/* defined(_SUNOS_VTOC_16) */
22866 	for (i = 0; i < NDKMAP; i++) {
22867 		lp  = &un->un_map[i];
22868 		un->un_offset[i] =
22869 		    un->un_g.dkg_nhead * un->un_g.dkg_nsect * lp->dkl_cylno;
22870 #if defined(_SUNOS_VTOC_16)
22871 		vp->p_start = un->un_offset[i];
22872 		vp->p_size = lp->dkl_nblk;
22873 		vp++;
22874 #endif	/* defined(_SUNOS_VTOC_16) */
22875 #if defined(__i386) || defined(__amd64)
22876 		un->un_offset[i] += un->un_solaris_offset;
22877 #endif
22878 	}
22879 	mutex_exit(SD_MUTEX(un));
22880 	return (rval);
22881 }
22882 
22883 
22884 /*
22885  *    Function: sd_dkio_get_vtoc
22886  *
22887  * Description: This routine is the driver entry point for handling user
22888  *		requests to get the current volume table of contents
22889  *		(DKIOCGVTOC).
22890  *
22891  *   Arguments: dev  - the device number
22892  *		arg  - pointer to user provided vtoc structure specifying
22893  *			the current vtoc.
22894  *		flag - this argument is a pass through to ddi_copyxxx()
22895  *		       directly from the mode argument of ioctl().
22896  *		geom_validated - flag indicating if the device geometry has been
22897  *				 previously validated in the sdioctl routine.
22898  *
22899  * Return Code: 0
22900  *		EFAULT
22901  *		ENXIO
22902  *		EIO
22903  */
22904 
22905 static int
22906 sd_dkio_get_vtoc(dev_t dev, caddr_t arg, int flag, int geom_validated)
22907 {
22908 	struct sd_lun	*un = NULL;
22909 #if defined(_SUNOS_VTOC_8)
22910 	struct vtoc	user_vtoc;
22911 #endif	/* defined(_SUNOS_VTOC_8) */
22912 	int		rval = 0;
22913 
22914 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
22915 		return (ENXIO);
22916 	}
22917 
22918 	mutex_enter(SD_MUTEX(un));
22919 	if (geom_validated == FALSE) {
22920 		/*
22921 		 * sd_validate_geometry does not spin a disk up
22922 		 * if it was spun down. We need to make sure it
22923 		 * is ready.
22924 		 */
22925 		mutex_exit(SD_MUTEX(un));
22926 		if ((rval = sd_send_scsi_TEST_UNIT_READY(un, 0)) != 0) {
22927 			return (rval);
22928 		}
22929 		mutex_enter(SD_MUTEX(un));
22930 		if ((rval = sd_validate_geometry(un, SD_PATH_DIRECT)) != 0) {
22931 			mutex_exit(SD_MUTEX(un));
22932 			return (rval);
22933 		}
22934 	}
22935 
22936 #if defined(_SUNOS_VTOC_8)
22937 	sd_build_user_vtoc(un, &user_vtoc);
22938 	mutex_exit(SD_MUTEX(un));
22939 
22940 #ifdef _MULTI_DATAMODEL
22941 	switch (ddi_model_convert_from(flag & FMODELS)) {
22942 	case DDI_MODEL_ILP32: {
22943 		struct vtoc32 user_vtoc32;
22944 
22945 		vtoctovtoc32(user_vtoc, user_vtoc32);
22946 		if (ddi_copyout(&user_vtoc32, (void *)arg,
22947 		    sizeof (struct vtoc32), flag)) {
22948 			return (EFAULT);
22949 		}
22950 		break;
22951 	}
22952 
22953 	case DDI_MODEL_NONE:
22954 		if (ddi_copyout(&user_vtoc, (void *)arg,
22955 		    sizeof (struct vtoc), flag)) {
22956 			return (EFAULT);
22957 		}
22958 		break;
22959 	}
22960 #else /* ! _MULTI_DATAMODEL */
22961 	if (ddi_copyout(&user_vtoc, (void *)arg, sizeof (struct vtoc), flag)) {
22962 		return (EFAULT);
22963 	}
22964 #endif /* _MULTI_DATAMODEL */
22965 
22966 #elif defined(_SUNOS_VTOC_16)
22967 	mutex_exit(SD_MUTEX(un));
22968 
22969 #ifdef _MULTI_DATAMODEL
22970 	/*
22971 	 * The un_vtoc structure is a "struct dk_vtoc"  which is always
22972 	 * 32-bit to maintain compatibility with existing on-disk
22973 	 * structures.  Thus, we need to convert the structure when copying
22974 	 * it out to a datamodel-dependent "struct vtoc" in a 64-bit
22975 	 * program.  If the target is a 32-bit program, then no conversion
22976 	 * is necessary.
22977 	 */
22978 	/* LINTED: logical expression always true: op "||" */
22979 	ASSERT(sizeof (un->un_vtoc) == sizeof (struct vtoc32));
22980 	switch (ddi_model_convert_from(flag & FMODELS)) {
22981 	case DDI_MODEL_ILP32:
22982 		if (ddi_copyout(&(un->un_vtoc), (void *)arg,
22983 		    sizeof (un->un_vtoc), flag)) {
22984 			return (EFAULT);
22985 		}
22986 		break;
22987 
22988 	case DDI_MODEL_NONE: {
22989 		struct vtoc user_vtoc;
22990 
22991 		vtoc32tovtoc(un->un_vtoc, user_vtoc);
22992 		if (ddi_copyout(&user_vtoc, (void *)arg,
22993 		    sizeof (struct vtoc), flag)) {
22994 			return (EFAULT);
22995 		}
22996 		break;
22997 	}
22998 	}
22999 #else /* ! _MULTI_DATAMODEL */
23000 	if (ddi_copyout(&(un->un_vtoc), (void *)arg, sizeof (un->un_vtoc),
23001 	    flag)) {
23002 		return (EFAULT);
23003 	}
23004 #endif /* _MULTI_DATAMODEL */
23005 #else
23006 #error "No VTOC format defined."
23007 #endif
23008 
23009 	return (rval);
23010 }
23011 
23012 static int
23013 sd_dkio_get_efi(dev_t dev, caddr_t arg, int flag)
23014 {
23015 	struct sd_lun	*un = NULL;
23016 	dk_efi_t	user_efi;
23017 	int		rval = 0;
23018 	void		*buffer;
23019 
23020 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL)
23021 		return (ENXIO);
23022 
23023 	if (ddi_copyin(arg, &user_efi, sizeof (dk_efi_t), flag))
23024 		return (EFAULT);
23025 
23026 	user_efi.dki_data = (void *)(uintptr_t)user_efi.dki_data_64;
23027 
23028 	if ((user_efi.dki_length % un->un_tgt_blocksize) ||
23029 	    (user_efi.dki_length > un->un_max_xfer_size))
23030 		return (EINVAL);
23031 
23032 	buffer = kmem_alloc(user_efi.dki_length, KM_SLEEP);
23033 	rval = sd_send_scsi_READ(un, buffer, user_efi.dki_length,
23034 	    user_efi.dki_lba, SD_PATH_DIRECT);
23035 	if (rval == 0 && ddi_copyout(buffer, user_efi.dki_data,
23036 	    user_efi.dki_length, flag) != 0)
23037 		rval = EFAULT;
23038 
23039 	kmem_free(buffer, user_efi.dki_length);
23040 	return (rval);
23041 }
23042 
23043 /*
23044  *    Function: sd_build_user_vtoc
23045  *
23046  * Description: This routine populates a pass by reference variable with the
23047  *		current volume table of contents.
23048  *
23049  *   Arguments: un - driver soft state (unit) structure
23050  *		user_vtoc - pointer to vtoc structure to be populated
23051  */
23052 
23053 static void
23054 sd_build_user_vtoc(struct sd_lun *un, struct vtoc *user_vtoc)
23055 {
23056 	struct dk_map2		*lpart;
23057 	struct dk_map		*lmap;
23058 	struct partition	*vpart;
23059 	int			nblks;
23060 	int			i;
23061 
23062 	ASSERT(mutex_owned(SD_MUTEX(un)));
23063 
23064 	/*
23065 	 * Return vtoc structure fields in the provided VTOC area, addressed
23066 	 * by *vtoc.
23067 	 */
23068 	bzero(user_vtoc, sizeof (struct vtoc));
23069 	user_vtoc->v_bootinfo[0] = un->un_vtoc.v_bootinfo[0];
23070 	user_vtoc->v_bootinfo[1] = un->un_vtoc.v_bootinfo[1];
23071 	user_vtoc->v_bootinfo[2] = un->un_vtoc.v_bootinfo[2];
23072 	user_vtoc->v_sanity	= VTOC_SANE;
23073 	user_vtoc->v_version	= un->un_vtoc.v_version;
23074 	bcopy(un->un_vtoc.v_volume, user_vtoc->v_volume, LEN_DKL_VVOL);
23075 	user_vtoc->v_sectorsz = un->un_sys_blocksize;
23076 	user_vtoc->v_nparts = un->un_vtoc.v_nparts;
23077 	bcopy(un->un_vtoc.v_reserved, user_vtoc->v_reserved,
23078 	    sizeof (un->un_vtoc.v_reserved));
23079 	/*
23080 	 * Convert partitioning information.
23081 	 *
23082 	 * Note the conversion from starting cylinder number
23083 	 * to starting sector number.
23084 	 */
23085 	lmap = un->un_map;
23086 	lpart = (struct dk_map2 *)un->un_vtoc.v_part;
23087 	vpart = user_vtoc->v_part;
23088 
23089 	nblks = un->un_g.dkg_nsect * un->un_g.dkg_nhead;
23090 
23091 	for (i = 0; i < V_NUMPAR; i++) {
23092 		vpart->p_tag	= lpart->p_tag;
23093 		vpart->p_flag	= lpart->p_flag;
23094 		vpart->p_start	= lmap->dkl_cylno * nblks;
23095 		vpart->p_size	= lmap->dkl_nblk;
23096 		lmap++;
23097 		lpart++;
23098 		vpart++;
23099 
23100 		/* (4364927) */
23101 		user_vtoc->timestamp[i] = (time_t)un->un_vtoc.v_timestamp[i];
23102 	}
23103 
23104 	bcopy(un->un_asciilabel, user_vtoc->v_asciilabel, LEN_DKL_ASCII);
23105 }
23106 
23107 static int
23108 sd_dkio_partition(dev_t dev, caddr_t arg, int flag)
23109 {
23110 	struct sd_lun		*un = NULL;
23111 	struct partition64	p64;
23112 	int			rval = 0;
23113 	uint_t			nparts;
23114 	efi_gpe_t		*partitions;
23115 	efi_gpt_t		*buffer;
23116 	diskaddr_t		gpe_lba;
23117 
23118 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
23119 		return (ENXIO);
23120 	}
23121 
23122 	if (ddi_copyin((const void *)arg, &p64,
23123 	    sizeof (struct partition64), flag)) {
23124 		return (EFAULT);
23125 	}
23126 
23127 	buffer = kmem_alloc(EFI_MIN_ARRAY_SIZE, KM_SLEEP);
23128 	rval = sd_send_scsi_READ(un, buffer, DEV_BSIZE,
23129 		1, SD_PATH_DIRECT);
23130 	if (rval != 0)
23131 		goto done_error;
23132 
23133 	sd_swap_efi_gpt(buffer);
23134 
23135 	if ((rval = sd_validate_efi(buffer)) != 0)
23136 		goto done_error;
23137 
23138 	nparts = buffer->efi_gpt_NumberOfPartitionEntries;
23139 	gpe_lba = buffer->efi_gpt_PartitionEntryLBA;
23140 	if (p64.p_partno > nparts) {
23141 		/* couldn't find it */
23142 		rval = ESRCH;
23143 		goto done_error;
23144 	}
23145 	/*
23146 	 * if we're dealing with a partition that's out of the normal
23147 	 * 16K block, adjust accordingly
23148 	 */
23149 	gpe_lba += p64.p_partno / sizeof (efi_gpe_t);
23150 	rval = sd_send_scsi_READ(un, buffer, EFI_MIN_ARRAY_SIZE,
23151 			gpe_lba, SD_PATH_DIRECT);
23152 	if (rval) {
23153 		goto done_error;
23154 	}
23155 	partitions = (efi_gpe_t *)buffer;
23156 
23157 	sd_swap_efi_gpe(nparts, partitions);
23158 
23159 	partitions += p64.p_partno;
23160 	bcopy(&partitions->efi_gpe_PartitionTypeGUID, &p64.p_type,
23161 	    sizeof (struct uuid));
23162 	p64.p_start = partitions->efi_gpe_StartingLBA;
23163 	p64.p_size = partitions->efi_gpe_EndingLBA -
23164 			p64.p_start + 1;
23165 
23166 	if (ddi_copyout(&p64, (void *)arg, sizeof (struct partition64), flag))
23167 		rval = EFAULT;
23168 
23169 done_error:
23170 	kmem_free(buffer, EFI_MIN_ARRAY_SIZE);
23171 	return (rval);
23172 }
23173 
23174 
23175 /*
23176  *    Function: sd_dkio_set_vtoc
23177  *
23178  * Description: This routine is the driver entry point for handling user
23179  *		requests to set the current volume table of contents
23180  *		(DKIOCSVTOC).
23181  *
23182  *   Arguments: dev  - the device number
23183  *		arg  - pointer to user provided vtoc structure used to set the
23184  *			current vtoc.
23185  *		flag - this argument is a pass through to ddi_copyxxx()
23186  *		       directly from the mode argument of ioctl().
23187  *
23188  * Return Code: 0
23189  *		EFAULT
23190  *		ENXIO
23191  *		EINVAL
23192  *		ENOTSUP
23193  */
23194 
23195 static int
23196 sd_dkio_set_vtoc(dev_t dev, caddr_t arg, int flag)
23197 {
23198 	struct sd_lun	*un = NULL;
23199 	struct vtoc	user_vtoc;
23200 	int		rval = 0;
23201 
23202 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
23203 		return (ENXIO);
23204 	}
23205 
23206 #if defined(__i386) || defined(__amd64)
23207 	if (un->un_tgt_blocksize != un->un_sys_blocksize) {
23208 		return (EINVAL);
23209 	}
23210 #endif
23211 
23212 #ifdef _MULTI_DATAMODEL
23213 	switch (ddi_model_convert_from(flag & FMODELS)) {
23214 	case DDI_MODEL_ILP32: {
23215 		struct vtoc32 user_vtoc32;
23216 
23217 		if (ddi_copyin((const void *)arg, &user_vtoc32,
23218 		    sizeof (struct vtoc32), flag)) {
23219 			return (EFAULT);
23220 		}
23221 		vtoc32tovtoc(user_vtoc32, user_vtoc);
23222 		break;
23223 	}
23224 
23225 	case DDI_MODEL_NONE:
23226 		if (ddi_copyin((const void *)arg, &user_vtoc,
23227 		    sizeof (struct vtoc), flag)) {
23228 			return (EFAULT);
23229 		}
23230 		break;
23231 	}
23232 #else /* ! _MULTI_DATAMODEL */
23233 	if (ddi_copyin((const void *)arg, &user_vtoc,
23234 	    sizeof (struct vtoc), flag)) {
23235 		return (EFAULT);
23236 	}
23237 #endif /* _MULTI_DATAMODEL */
23238 
23239 	mutex_enter(SD_MUTEX(un));
23240 	if (un->un_blockcount > DK_MAX_BLOCKS) {
23241 		mutex_exit(SD_MUTEX(un));
23242 		return (ENOTSUP);
23243 	}
23244 	if (un->un_g.dkg_ncyl == 0) {
23245 		mutex_exit(SD_MUTEX(un));
23246 		return (EINVAL);
23247 	}
23248 
23249 	mutex_exit(SD_MUTEX(un));
23250 	sd_clear_efi(un);
23251 	ddi_remove_minor_node(SD_DEVINFO(un), "wd");
23252 	ddi_remove_minor_node(SD_DEVINFO(un), "wd,raw");
23253 	(void) ddi_create_minor_node(SD_DEVINFO(un), "h",
23254 	    S_IFBLK, (SDUNIT(dev) << SDUNIT_SHIFT) | WD_NODE,
23255 	    un->un_node_type, NULL);
23256 	(void) ddi_create_minor_node(SD_DEVINFO(un), "h,raw",
23257 	    S_IFCHR, (SDUNIT(dev) << SDUNIT_SHIFT) | WD_NODE,
23258 	    un->un_node_type, NULL);
23259 	mutex_enter(SD_MUTEX(un));
23260 
23261 	if ((rval = sd_build_label_vtoc(un, &user_vtoc)) == 0) {
23262 		if ((rval = sd_write_label(dev)) == 0) {
23263 			if ((rval = sd_validate_geometry(un, SD_PATH_DIRECT))
23264 			    != 0) {
23265 				SD_ERROR(SD_LOG_IOCTL_DKIO, un,
23266 				    "sd_dkio_set_vtoc: "
23267 				    "Failed validate geometry\n");
23268 			}
23269 		}
23270 	}
23271 
23272 	/*
23273 	 * If sd_build_label_vtoc, or sd_write_label failed above write the
23274 	 * devid anyway, what can it hurt? Also preserve the device id by
23275 	 * writing to the disk acyl for the case where a devid has been
23276 	 * fabricated.
23277 	 */
23278 	if (un->un_f_devid_supported &&
23279 	    (un->un_f_opt_fab_devid == TRUE)) {
23280 		if (un->un_devid == NULL) {
23281 			sd_register_devid(un, SD_DEVINFO(un),
23282 			    SD_TARGET_IS_UNRESERVED);
23283 		} else {
23284 			/*
23285 			 * The device id for this disk has been
23286 			 * fabricated. Fabricated device id's are
23287 			 * managed by storing them in the last 2
23288 			 * available sectors on the drive. The device
23289 			 * id must be preserved by writing it back out
23290 			 * to this location.
23291 			 */
23292 			if (sd_write_deviceid(un) != 0) {
23293 				ddi_devid_free(un->un_devid);
23294 				un->un_devid = NULL;
23295 			}
23296 		}
23297 	}
23298 	mutex_exit(SD_MUTEX(un));
23299 	return (rval);
23300 }
23301 
23302 
23303 /*
23304  *    Function: sd_build_label_vtoc
23305  *
23306  * Description: This routine updates the driver soft state current volume table
23307  *		of contents based on a user specified vtoc.
23308  *
23309  *   Arguments: un - driver soft state (unit) structure
23310  *		user_vtoc - pointer to vtoc structure specifying vtoc to be used
23311  *			    to update the driver soft state.
23312  *
23313  * Return Code: 0
23314  *		EINVAL
23315  */
23316 
23317 static int
23318 sd_build_label_vtoc(struct sd_lun *un, struct vtoc *user_vtoc)
23319 {
23320 	struct dk_map		*lmap;
23321 	struct partition	*vpart;
23322 	int			nblks;
23323 #if defined(_SUNOS_VTOC_8)
23324 	int			ncyl;
23325 	struct dk_map2		*lpart;
23326 #endif	/* defined(_SUNOS_VTOC_8) */
23327 	int			i;
23328 
23329 	ASSERT(mutex_owned(SD_MUTEX(un)));
23330 
23331 	/* Sanity-check the vtoc */
23332 	if (user_vtoc->v_sanity != VTOC_SANE ||
23333 	    user_vtoc->v_sectorsz != un->un_sys_blocksize ||
23334 	    user_vtoc->v_nparts != V_NUMPAR) {
23335 		return (EINVAL);
23336 	}
23337 
23338 	nblks = un->un_g.dkg_nsect * un->un_g.dkg_nhead;
23339 	if (nblks == 0) {
23340 		return (EINVAL);
23341 	}
23342 
23343 #if defined(_SUNOS_VTOC_8)
23344 	vpart = user_vtoc->v_part;
23345 	for (i = 0; i < V_NUMPAR; i++) {
23346 		if ((vpart->p_start % nblks) != 0) {
23347 			return (EINVAL);
23348 		}
23349 		ncyl = vpart->p_start / nblks;
23350 		ncyl += vpart->p_size / nblks;
23351 		if ((vpart->p_size % nblks) != 0) {
23352 			ncyl++;
23353 		}
23354 		if (ncyl > (int)un->un_g.dkg_ncyl) {
23355 			return (EINVAL);
23356 		}
23357 		vpart++;
23358 	}
23359 #endif	/* defined(_SUNOS_VTOC_8) */
23360 
23361 	/* Put appropriate vtoc structure fields into the disk label */
23362 #if defined(_SUNOS_VTOC_16)
23363 	/*
23364 	 * The vtoc is always a 32bit data structure to maintain the
23365 	 * on-disk format. Convert "in place" instead of bcopying it.
23366 	 */
23367 	vtoctovtoc32((*user_vtoc), (*((struct vtoc32 *)&(un->un_vtoc))));
23368 
23369 	/*
23370 	 * in the 16-slice vtoc, starting sectors are expressed in
23371 	 * numbers *relative* to the start of the Solaris fdisk partition.
23372 	 */
23373 	lmap = un->un_map;
23374 	vpart = user_vtoc->v_part;
23375 
23376 	for (i = 0; i < (int)user_vtoc->v_nparts; i++, lmap++, vpart++) {
23377 		lmap->dkl_cylno = vpart->p_start / nblks;
23378 		lmap->dkl_nblk = vpart->p_size;
23379 	}
23380 
23381 #elif defined(_SUNOS_VTOC_8)
23382 
23383 	un->un_vtoc.v_bootinfo[0] = (uint32_t)user_vtoc->v_bootinfo[0];
23384 	un->un_vtoc.v_bootinfo[1] = (uint32_t)user_vtoc->v_bootinfo[1];
23385 	un->un_vtoc.v_bootinfo[2] = (uint32_t)user_vtoc->v_bootinfo[2];
23386 
23387 	un->un_vtoc.v_sanity = (uint32_t)user_vtoc->v_sanity;
23388 	un->un_vtoc.v_version = (uint32_t)user_vtoc->v_version;
23389 
23390 	bcopy(user_vtoc->v_volume, un->un_vtoc.v_volume, LEN_DKL_VVOL);
23391 
23392 	un->un_vtoc.v_nparts = user_vtoc->v_nparts;
23393 
23394 	bcopy(user_vtoc->v_reserved, un->un_vtoc.v_reserved,
23395 	    sizeof (un->un_vtoc.v_reserved));
23396 
23397 	/*
23398 	 * Note the conversion from starting sector number
23399 	 * to starting cylinder number.
23400 	 * Return error if division results in a remainder.
23401 	 */
23402 	lmap = un->un_map;
23403 	lpart = un->un_vtoc.v_part;
23404 	vpart = user_vtoc->v_part;
23405 
23406 	for (i = 0; i < (int)user_vtoc->v_nparts; i++) {
23407 		lpart->p_tag  = vpart->p_tag;
23408 		lpart->p_flag = vpart->p_flag;
23409 		lmap->dkl_cylno = vpart->p_start / nblks;
23410 		lmap->dkl_nblk = vpart->p_size;
23411 
23412 		lmap++;
23413 		lpart++;
23414 		vpart++;
23415 
23416 		/* (4387723) */
23417 #ifdef _LP64
23418 		if (user_vtoc->timestamp[i] > TIME32_MAX) {
23419 			un->un_vtoc.v_timestamp[i] = TIME32_MAX;
23420 		} else {
23421 			un->un_vtoc.v_timestamp[i] = user_vtoc->timestamp[i];
23422 		}
23423 #else
23424 		un->un_vtoc.v_timestamp[i] = user_vtoc->timestamp[i];
23425 #endif
23426 	}
23427 
23428 	bcopy(user_vtoc->v_asciilabel, un->un_asciilabel, LEN_DKL_ASCII);
23429 #else
23430 #error "No VTOC format defined."
23431 #endif
23432 	return (0);
23433 }
23434 
23435 /*
23436  *    Function: sd_clear_efi
23437  *
23438  * Description: This routine clears all EFI labels.
23439  *
23440  *   Arguments: un - driver soft state (unit) structure
23441  *
23442  * Return Code: void
23443  */
23444 
23445 static void
23446 sd_clear_efi(struct sd_lun *un)
23447 {
23448 	efi_gpt_t	*gpt;
23449 	uint_t		lbasize;
23450 	uint64_t	cap;
23451 	int rval;
23452 
23453 	ASSERT(!mutex_owned(SD_MUTEX(un)));
23454 
23455 	gpt = kmem_alloc(sizeof (efi_gpt_t), KM_SLEEP);
23456 
23457 	if (sd_send_scsi_READ(un, gpt, DEV_BSIZE, 1, SD_PATH_DIRECT) != 0) {
23458 		goto done;
23459 	}
23460 
23461 	sd_swap_efi_gpt(gpt);
23462 	rval = sd_validate_efi(gpt);
23463 	if (rval == 0) {
23464 		/* clear primary */
23465 		bzero(gpt, sizeof (efi_gpt_t));
23466 		if ((rval = sd_send_scsi_WRITE(un, gpt, EFI_LABEL_SIZE, 1,
23467 			SD_PATH_DIRECT))) {
23468 			SD_INFO(SD_LOG_IO_PARTITION, un,
23469 				"sd_clear_efi: clear primary label failed\n");
23470 		}
23471 	}
23472 	/* the backup */
23473 	rval = sd_send_scsi_READ_CAPACITY(un, &cap, &lbasize,
23474 	    SD_PATH_DIRECT);
23475 	if (rval) {
23476 		goto done;
23477 	}
23478 	/*
23479 	 * The MMC standard allows READ CAPACITY to be
23480 	 * inaccurate by a bounded amount (in the interest of
23481 	 * response latency).  As a result, failed READs are
23482 	 * commonplace (due to the reading of metadata and not
23483 	 * data). Depending on the per-Vendor/drive Sense data,
23484 	 * the failed READ can cause many (unnecessary) retries.
23485 	 */
23486 	if ((rval = sd_send_scsi_READ(un, gpt, lbasize,
23487 	    cap - 1, ISCD(un) ? SD_PATH_DIRECT_PRIORITY :
23488 		SD_PATH_DIRECT)) != 0) {
23489 		goto done;
23490 	}
23491 	sd_swap_efi_gpt(gpt);
23492 	rval = sd_validate_efi(gpt);
23493 	if (rval == 0) {
23494 		/* clear backup */
23495 		SD_TRACE(SD_LOG_IOCTL, un, "sd_clear_efi clear backup@%lu\n",
23496 			cap-1);
23497 		bzero(gpt, sizeof (efi_gpt_t));
23498 		if ((rval = sd_send_scsi_WRITE(un, gpt, EFI_LABEL_SIZE,
23499 		    cap-1, SD_PATH_DIRECT))) {
23500 			SD_INFO(SD_LOG_IO_PARTITION, un,
23501 				"sd_clear_efi: clear backup label failed\n");
23502 		}
23503 	} else {
23504 		/*
23505 		 * Refer to comments related to off-by-1 at the
23506 		 * header of this file
23507 		 */
23508 		if ((rval = sd_send_scsi_READ(un, gpt, lbasize,
23509 		    cap - 2, ISCD(un) ? SD_PATH_DIRECT_PRIORITY :
23510 			SD_PATH_DIRECT)) != 0) {
23511 			goto done;
23512 		}
23513 		sd_swap_efi_gpt(gpt);
23514 		rval = sd_validate_efi(gpt);
23515 		if (rval == 0) {
23516 			/* clear legacy backup EFI label */
23517 			SD_TRACE(SD_LOG_IOCTL, un,
23518 			    "sd_clear_efi clear backup@%lu\n", cap-2);
23519 			bzero(gpt, sizeof (efi_gpt_t));
23520 			if ((rval = sd_send_scsi_WRITE(un, gpt, EFI_LABEL_SIZE,
23521 			    cap-2, SD_PATH_DIRECT))) {
23522 				SD_INFO(SD_LOG_IO_PARTITION,
23523 				    un, "sd_clear_efi: "
23524 				    " clear legacy backup label failed\n");
23525 			}
23526 		}
23527 	}
23528 
23529 done:
23530 	kmem_free(gpt, sizeof (efi_gpt_t));
23531 }
23532 
23533 /*
23534  *    Function: sd_set_vtoc
23535  *
23536  * Description: This routine writes data to the appropriate positions
23537  *
23538  *   Arguments: un - driver soft state (unit) structure
23539  *              dkl  - the data to be written
23540  *
23541  * Return: void
23542  */
23543 
23544 static int
23545 sd_set_vtoc(struct sd_lun *un, struct dk_label *dkl)
23546 {
23547 	void			*shadow_buf;
23548 	uint_t			label_addr;
23549 	int			sec;
23550 	int			blk;
23551 	int			head;
23552 	int			cyl;
23553 	int			rval;
23554 
23555 #if defined(__i386) || defined(__amd64)
23556 	label_addr = un->un_solaris_offset + DK_LABEL_LOC;
23557 #else
23558 	/* Write the primary label at block 0 of the solaris partition. */
23559 	label_addr = 0;
23560 #endif
23561 
23562 	if (NOT_DEVBSIZE(un)) {
23563 		shadow_buf = kmem_zalloc(un->un_tgt_blocksize, KM_SLEEP);
23564 		/*
23565 		 * Read the target's first block.
23566 		 */
23567 		if ((rval = sd_send_scsi_READ(un, shadow_buf,
23568 		    un->un_tgt_blocksize, label_addr,
23569 		    SD_PATH_STANDARD)) != 0) {
23570 			goto exit;
23571 		}
23572 		/*
23573 		 * Copy the contents of the label into the shadow buffer
23574 		 * which is of the size of target block size.
23575 		 */
23576 		bcopy(dkl, shadow_buf, sizeof (struct dk_label));
23577 	}
23578 
23579 	/* Write the primary label */
23580 	if (NOT_DEVBSIZE(un)) {
23581 		rval = sd_send_scsi_WRITE(un, shadow_buf, un->un_tgt_blocksize,
23582 		    label_addr, SD_PATH_STANDARD);
23583 	} else {
23584 		rval = sd_send_scsi_WRITE(un, dkl, un->un_sys_blocksize,
23585 		    label_addr, SD_PATH_STANDARD);
23586 	}
23587 	if (rval != 0) {
23588 		return (rval);
23589 	}
23590 
23591 	/*
23592 	 * Calculate where the backup labels go.  They are always on
23593 	 * the last alternate cylinder, but some older drives put them
23594 	 * on head 2 instead of the last head.	They are always on the
23595 	 * first 5 odd sectors of the appropriate track.
23596 	 *
23597 	 * We have no choice at this point, but to believe that the
23598 	 * disk label is valid.	 Use the geometry of the disk
23599 	 * as described in the label.
23600 	 */
23601 	cyl  = dkl->dkl_ncyl  + dkl->dkl_acyl - 1;
23602 	head = dkl->dkl_nhead - 1;
23603 
23604 	/*
23605 	 * Write and verify the backup labels. Make sure we don't try to
23606 	 * write past the last cylinder.
23607 	 */
23608 	for (sec = 1; ((sec < 5 * 2 + 1) && (sec < dkl->dkl_nsect)); sec += 2) {
23609 		blk = (daddr_t)(
23610 		    (cyl * ((dkl->dkl_nhead * dkl->dkl_nsect) - dkl->dkl_apc)) +
23611 		    (head * dkl->dkl_nsect) + sec);
23612 #if defined(__i386) || defined(__amd64)
23613 		blk += un->un_solaris_offset;
23614 #endif
23615 		if (NOT_DEVBSIZE(un)) {
23616 			uint64_t	tblk;
23617 			/*
23618 			 * Need to read the block first for read modify write.
23619 			 */
23620 			tblk = (uint64_t)blk;
23621 			blk = (int)((tblk * un->un_sys_blocksize) /
23622 			    un->un_tgt_blocksize);
23623 			if ((rval = sd_send_scsi_READ(un, shadow_buf,
23624 			    un->un_tgt_blocksize, blk,
23625 			    SD_PATH_STANDARD)) != 0) {
23626 				goto exit;
23627 			}
23628 			/*
23629 			 * Modify the shadow buffer with the label.
23630 			 */
23631 			bcopy(dkl, shadow_buf, sizeof (struct dk_label));
23632 			rval = sd_send_scsi_WRITE(un, shadow_buf,
23633 			    un->un_tgt_blocksize, blk, SD_PATH_STANDARD);
23634 		} else {
23635 			rval = sd_send_scsi_WRITE(un, dkl, un->un_sys_blocksize,
23636 			    blk, SD_PATH_STANDARD);
23637 			SD_INFO(SD_LOG_IO_PARTITION, un,
23638 			"sd_set_vtoc: wrote backup label %d\n", blk);
23639 		}
23640 		if (rval != 0) {
23641 			goto exit;
23642 		}
23643 	}
23644 exit:
23645 	if (NOT_DEVBSIZE(un)) {
23646 		kmem_free(shadow_buf, un->un_tgt_blocksize);
23647 	}
23648 	return (rval);
23649 }
23650 
23651 /*
23652  *    Function: sd_clear_vtoc
23653  *
23654  * Description: This routine clears out the VTOC labels.
23655  *
23656  *   Arguments: un - driver soft state (unit) structure
23657  *
23658  * Return: void
23659  */
23660 
23661 static void
23662 sd_clear_vtoc(struct sd_lun *un)
23663 {
23664 	struct dk_label		*dkl;
23665 
23666 	mutex_exit(SD_MUTEX(un));
23667 	dkl = kmem_zalloc(sizeof (struct dk_label), KM_SLEEP);
23668 	mutex_enter(SD_MUTEX(un));
23669 	/*
23670 	 * sd_set_vtoc uses these fields in order to figure out
23671 	 * where to overwrite the backup labels
23672 	 */
23673 	dkl->dkl_apc    = un->un_g.dkg_apc;
23674 	dkl->dkl_ncyl   = un->un_g.dkg_ncyl;
23675 	dkl->dkl_acyl   = un->un_g.dkg_acyl;
23676 	dkl->dkl_nhead  = un->un_g.dkg_nhead;
23677 	dkl->dkl_nsect  = un->un_g.dkg_nsect;
23678 	mutex_exit(SD_MUTEX(un));
23679 	(void) sd_set_vtoc(un, dkl);
23680 	kmem_free(dkl, sizeof (struct dk_label));
23681 
23682 	mutex_enter(SD_MUTEX(un));
23683 }
23684 
23685 /*
23686  *    Function: sd_write_label
23687  *
23688  * Description: This routine will validate and write the driver soft state vtoc
23689  *		contents to the device.
23690  *
23691  *   Arguments: dev - the device number
23692  *
23693  * Return Code: the code returned by sd_send_scsi_cmd()
23694  *		0
23695  *		EINVAL
23696  *		ENXIO
23697  *		ENOMEM
23698  */
23699 
23700 static int
23701 sd_write_label(dev_t dev)
23702 {
23703 	struct sd_lun		*un;
23704 	struct dk_label		*dkl;
23705 	short			sum;
23706 	short			*sp;
23707 	int			i;
23708 	int			rval;
23709 
23710 	if (((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) ||
23711 	    (un->un_state == SD_STATE_OFFLINE)) {
23712 		return (ENXIO);
23713 	}
23714 	ASSERT(mutex_owned(SD_MUTEX(un)));
23715 	mutex_exit(SD_MUTEX(un));
23716 	dkl = kmem_zalloc(sizeof (struct dk_label), KM_SLEEP);
23717 	mutex_enter(SD_MUTEX(un));
23718 
23719 	bcopy(&un->un_vtoc, &dkl->dkl_vtoc, sizeof (struct dk_vtoc));
23720 	dkl->dkl_rpm	= un->un_g.dkg_rpm;
23721 	dkl->dkl_pcyl	= un->un_g.dkg_pcyl;
23722 	dkl->dkl_apc	= un->un_g.dkg_apc;
23723 	dkl->dkl_intrlv = un->un_g.dkg_intrlv;
23724 	dkl->dkl_ncyl	= un->un_g.dkg_ncyl;
23725 	dkl->dkl_acyl	= un->un_g.dkg_acyl;
23726 	dkl->dkl_nhead	= un->un_g.dkg_nhead;
23727 	dkl->dkl_nsect	= un->un_g.dkg_nsect;
23728 
23729 #if defined(_SUNOS_VTOC_8)
23730 	dkl->dkl_obs1	= un->un_g.dkg_obs1;
23731 	dkl->dkl_obs2	= un->un_g.dkg_obs2;
23732 	dkl->dkl_obs3	= un->un_g.dkg_obs3;
23733 	for (i = 0; i < NDKMAP; i++) {
23734 		dkl->dkl_map[i].dkl_cylno = un->un_map[i].dkl_cylno;
23735 		dkl->dkl_map[i].dkl_nblk  = un->un_map[i].dkl_nblk;
23736 	}
23737 	bcopy(un->un_asciilabel, dkl->dkl_asciilabel, LEN_DKL_ASCII);
23738 #elif defined(_SUNOS_VTOC_16)
23739 	dkl->dkl_skew	= un->un_dkg_skew;
23740 #else
23741 #error "No VTOC format defined."
23742 #endif
23743 
23744 	dkl->dkl_magic			= DKL_MAGIC;
23745 	dkl->dkl_write_reinstruct	= un->un_g.dkg_write_reinstruct;
23746 	dkl->dkl_read_reinstruct	= un->un_g.dkg_read_reinstruct;
23747 
23748 	/* Construct checksum for the new disk label */
23749 	sum = 0;
23750 	sp = (short *)dkl;
23751 	i = sizeof (struct dk_label) / sizeof (short);
23752 	while (i--) {
23753 		sum ^= *sp++;
23754 	}
23755 	dkl->dkl_cksum = sum;
23756 
23757 	mutex_exit(SD_MUTEX(un));
23758 
23759 	rval = sd_set_vtoc(un, dkl);
23760 exit:
23761 	kmem_free(dkl, sizeof (struct dk_label));
23762 	mutex_enter(SD_MUTEX(un));
23763 	return (rval);
23764 }
23765 
23766 static int
23767 sd_dkio_set_efi(dev_t dev, caddr_t arg, int flag)
23768 {
23769 	struct sd_lun	*un = NULL;
23770 	dk_efi_t	user_efi;
23771 	int		rval = 0;
23772 	void		*buffer;
23773 
23774 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL)
23775 		return (ENXIO);
23776 
23777 	if (ddi_copyin(arg, &user_efi, sizeof (dk_efi_t), flag))
23778 		return (EFAULT);
23779 
23780 	user_efi.dki_data = (void *)(uintptr_t)user_efi.dki_data_64;
23781 
23782 	if ((user_efi.dki_length % un->un_tgt_blocksize) ||
23783 	    (user_efi.dki_length > un->un_max_xfer_size))
23784 		return (EINVAL);
23785 
23786 	buffer = kmem_alloc(user_efi.dki_length, KM_SLEEP);
23787 	if (ddi_copyin(user_efi.dki_data, buffer, user_efi.dki_length, flag)) {
23788 		rval = EFAULT;
23789 	} else {
23790 		/*
23791 		 * let's clear the vtoc labels and clear the softstate
23792 		 * vtoc.
23793 		 */
23794 		mutex_enter(SD_MUTEX(un));
23795 		if (un->un_vtoc.v_sanity == VTOC_SANE) {
23796 			SD_TRACE(SD_LOG_IO_PARTITION, un,
23797 				"sd_dkio_set_efi: CLEAR VTOC\n");
23798 			sd_clear_vtoc(un);
23799 			bzero(&un->un_vtoc, sizeof (struct dk_vtoc));
23800 			mutex_exit(SD_MUTEX(un));
23801 			ddi_remove_minor_node(SD_DEVINFO(un), "h");
23802 			ddi_remove_minor_node(SD_DEVINFO(un), "h,raw");
23803 			(void) ddi_create_minor_node(SD_DEVINFO(un), "wd",
23804 			    S_IFBLK,
23805 			    (SDUNIT(dev) << SDUNIT_SHIFT) | WD_NODE,
23806 			    un->un_node_type, NULL);
23807 			(void) ddi_create_minor_node(SD_DEVINFO(un), "wd,raw",
23808 			    S_IFCHR,
23809 			    (SDUNIT(dev) << SDUNIT_SHIFT) | WD_NODE,
23810 			    un->un_node_type, NULL);
23811 		} else
23812 			mutex_exit(SD_MUTEX(un));
23813 		rval = sd_send_scsi_WRITE(un, buffer, user_efi.dki_length,
23814 		    user_efi.dki_lba, SD_PATH_DIRECT);
23815 		if (rval == 0) {
23816 			mutex_enter(SD_MUTEX(un));
23817 			un->un_f_geometry_is_valid = FALSE;
23818 			mutex_exit(SD_MUTEX(un));
23819 		}
23820 	}
23821 	kmem_free(buffer, user_efi.dki_length);
23822 	return (rval);
23823 }
23824 
23825 /*
23826  *    Function: sd_dkio_get_mboot
23827  *
23828  * Description: This routine is the driver entry point for handling user
23829  *		requests to get the current device mboot (DKIOCGMBOOT)
23830  *
23831  *   Arguments: dev  - the device number
23832  *		arg  - pointer to user provided mboot structure specifying
23833  *			the current mboot.
23834  *		flag - this argument is a pass through to ddi_copyxxx()
23835  *		       directly from the mode argument of ioctl().
23836  *
23837  * Return Code: 0
23838  *		EINVAL
23839  *		EFAULT
23840  *		ENXIO
23841  */
23842 
23843 static int
23844 sd_dkio_get_mboot(dev_t dev, caddr_t arg, int flag)
23845 {
23846 	struct sd_lun	*un;
23847 	struct mboot	*mboot;
23848 	int		rval;
23849 	size_t		buffer_size;
23850 
23851 	if (((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) ||
23852 	    (un->un_state == SD_STATE_OFFLINE)) {
23853 		return (ENXIO);
23854 	}
23855 
23856 	if (!un->un_f_mboot_supported || arg == NULL) {
23857 		return (EINVAL);
23858 	}
23859 
23860 	/*
23861 	 * Read the mboot block, located at absolute block 0 on the target.
23862 	 */
23863 	buffer_size = SD_REQBYTES2TGTBYTES(un, sizeof (struct mboot));
23864 
23865 	SD_TRACE(SD_LOG_IO_PARTITION, un,
23866 	    "sd_dkio_get_mboot: allocation size: 0x%x\n", buffer_size);
23867 
23868 	mboot = kmem_zalloc(buffer_size, KM_SLEEP);
23869 	if ((rval = sd_send_scsi_READ(un, mboot, buffer_size, 0,
23870 	    SD_PATH_STANDARD)) == 0) {
23871 		if (ddi_copyout(mboot, (void *)arg,
23872 		    sizeof (struct mboot), flag) != 0) {
23873 			rval = EFAULT;
23874 		}
23875 	}
23876 	kmem_free(mboot, buffer_size);
23877 	return (rval);
23878 }
23879 
23880 
23881 /*
23882  *    Function: sd_dkio_set_mboot
23883  *
23884  * Description: This routine is the driver entry point for handling user
23885  *		requests to validate and set the device master boot
23886  *		(DKIOCSMBOOT).
23887  *
23888  *   Arguments: dev  - the device number
23889  *		arg  - pointer to user provided mboot structure used to set the
23890  *			master boot.
23891  *		flag - this argument is a pass through to ddi_copyxxx()
23892  *		       directly from the mode argument of ioctl().
23893  *
23894  * Return Code: 0
23895  *		EINVAL
23896  *		EFAULT
23897  *		ENXIO
23898  */
23899 
23900 static int
23901 sd_dkio_set_mboot(dev_t dev, caddr_t arg, int flag)
23902 {
23903 	struct sd_lun	*un = NULL;
23904 	struct mboot	*mboot = NULL;
23905 	int		rval;
23906 	ushort_t	magic;
23907 
23908 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
23909 		return (ENXIO);
23910 	}
23911 
23912 	ASSERT(!mutex_owned(SD_MUTEX(un)));
23913 
23914 	if (!un->un_f_mboot_supported) {
23915 		return (EINVAL);
23916 	}
23917 
23918 	if (arg == NULL) {
23919 		return (EINVAL);
23920 	}
23921 
23922 	mboot = kmem_zalloc(sizeof (struct mboot), KM_SLEEP);
23923 
23924 	if (ddi_copyin((const void *)arg, mboot,
23925 	    sizeof (struct mboot), flag) != 0) {
23926 		kmem_free(mboot, (size_t)(sizeof (struct mboot)));
23927 		return (EFAULT);
23928 	}
23929 
23930 	/* Is this really a master boot record? */
23931 	magic = LE_16(mboot->signature);
23932 	if (magic != MBB_MAGIC) {
23933 		kmem_free(mboot, (size_t)(sizeof (struct mboot)));
23934 		return (EINVAL);
23935 	}
23936 
23937 	rval = sd_send_scsi_WRITE(un, mboot, un->un_sys_blocksize, 0,
23938 	    SD_PATH_STANDARD);
23939 
23940 	mutex_enter(SD_MUTEX(un));
23941 #if defined(__i386) || defined(__amd64)
23942 	if (rval == 0) {
23943 		/*
23944 		 * mboot has been written successfully.
23945 		 * update the fdisk and vtoc tables in memory
23946 		 */
23947 		rval = sd_update_fdisk_and_vtoc(un);
23948 		if ((un->un_f_geometry_is_valid == FALSE) || (rval != 0)) {
23949 			mutex_exit(SD_MUTEX(un));
23950 			kmem_free(mboot, (size_t)(sizeof (struct mboot)));
23951 			return (rval);
23952 		}
23953 	}
23954 
23955 	/*
23956 	 * If the mboot write fails, write the devid anyway, what can it hurt?
23957 	 * Also preserve the device id by writing to the disk acyl for the case
23958 	 * where a devid has been fabricated.
23959 	 */
23960 	if (un->un_f_devid_supported && un->un_f_opt_fab_devid) {
23961 		if (un->un_devid == NULL) {
23962 			sd_register_devid(un, SD_DEVINFO(un),
23963 			    SD_TARGET_IS_UNRESERVED);
23964 		} else {
23965 			/*
23966 			 * The device id for this disk has been
23967 			 * fabricated. Fabricated device id's are
23968 			 * managed by storing them in the last 2
23969 			 * available sectors on the drive. The device
23970 			 * id must be preserved by writing it back out
23971 			 * to this location.
23972 			 */
23973 			if (sd_write_deviceid(un) != 0) {
23974 				ddi_devid_free(un->un_devid);
23975 				un->un_devid = NULL;
23976 			}
23977 		}
23978 	}
23979 
23980 #ifdef __lock_lint
23981 	sd_setup_default_geometry(un);
23982 #endif
23983 
23984 #else
23985 	if (rval == 0) {
23986 		/*
23987 		 * mboot has been written successfully.
23988 		 * set up the default geometry and VTOC
23989 		 */
23990 		if (un->un_blockcount <= DK_MAX_BLOCKS)
23991 			sd_setup_default_geometry(un);
23992 	}
23993 #endif
23994 	mutex_exit(SD_MUTEX(un));
23995 	kmem_free(mboot, (size_t)(sizeof (struct mboot)));
23996 	return (rval);
23997 }
23998 
23999 
24000 /*
24001  *    Function: sd_setup_default_geometry
24002  *
24003  * Description: This local utility routine sets the default geometry as part of
24004  *		setting the device mboot.
24005  *
24006  *   Arguments: un - driver soft state (unit) structure
24007  *
24008  * Note: This may be redundant with sd_build_default_label.
24009  */
24010 
24011 static void
24012 sd_setup_default_geometry(struct sd_lun *un)
24013 {
24014 	/* zero out the soft state geometry and partition table. */
24015 	bzero(&un->un_g, sizeof (struct dk_geom));
24016 	bzero(&un->un_vtoc, sizeof (struct dk_vtoc));
24017 	bzero(un->un_map, NDKMAP * (sizeof (struct dk_map)));
24018 	un->un_asciilabel[0] = '\0';
24019 
24020 	/*
24021 	 * For the rpm, we use the minimum for the disk.
24022 	 * For the head, cyl and number of sector per track,
24023 	 * if the capacity <= 1GB, head = 64, sect = 32.
24024 	 * else head = 255, sect 63
24025 	 * Note: the capacity should be equal to C*H*S values.
24026 	 * This will cause some truncation of size due to
24027 	 * round off errors. For CD-ROMs, this truncation can
24028 	 * have adverse side effects, so returning ncyl and
24029 	 * nhead as 1. The nsect will overflow for most of
24030 	 * CD-ROMs as nsect is of type ushort.
24031 	 */
24032 	if (ISCD(un)) {
24033 		un->un_g.dkg_ncyl = 1;
24034 		un->un_g.dkg_nhead = 1;
24035 		un->un_g.dkg_nsect = un->un_blockcount;
24036 	} else {
24037 		if (un->un_blockcount <= 0x1000) {
24038 			/* Needed for unlabeled SCSI floppies. */
24039 			un->un_g.dkg_nhead = 2;
24040 			un->un_g.dkg_ncyl = 80;
24041 			un->un_g.dkg_pcyl = 80;
24042 			un->un_g.dkg_nsect = un->un_blockcount / (2 * 80);
24043 		} else if (un->un_blockcount <= 0x200000) {
24044 			un->un_g.dkg_nhead = 64;
24045 			un->un_g.dkg_nsect = 32;
24046 			un->un_g.dkg_ncyl = un->un_blockcount / (64 * 32);
24047 		} else {
24048 			un->un_g.dkg_nhead = 255;
24049 			un->un_g.dkg_nsect = 63;
24050 			un->un_g.dkg_ncyl = un->un_blockcount / (255 * 63);
24051 		}
24052 		un->un_blockcount = un->un_g.dkg_ncyl *
24053 		    un->un_g.dkg_nhead * un->un_g.dkg_nsect;
24054 	}
24055 	un->un_g.dkg_acyl = 0;
24056 	un->un_g.dkg_bcyl = 0;
24057 	un->un_g.dkg_intrlv = 1;
24058 	un->un_g.dkg_rpm = 200;
24059 	un->un_g.dkg_read_reinstruct = 0;
24060 	un->un_g.dkg_write_reinstruct = 0;
24061 	if (un->un_g.dkg_pcyl == 0) {
24062 		un->un_g.dkg_pcyl = un->un_g.dkg_ncyl + un->un_g.dkg_acyl;
24063 	}
24064 
24065 	un->un_map['a'-'a'].dkl_cylno = 0;
24066 	un->un_map['a'-'a'].dkl_nblk = un->un_blockcount;
24067 	un->un_map['c'-'a'].dkl_cylno = 0;
24068 	un->un_map['c'-'a'].dkl_nblk = un->un_blockcount;
24069 	un->un_f_geometry_is_valid = FALSE;
24070 }
24071 
24072 
24073 #if defined(__i386) || defined(__amd64)
24074 /*
24075  *    Function: sd_update_fdisk_and_vtoc
24076  *
24077  * Description: This local utility routine updates the device fdisk and vtoc
24078  *		as part of setting the device mboot.
24079  *
24080  *   Arguments: un - driver soft state (unit) structure
24081  *
24082  * Return Code: 0 for success or errno-type return code.
24083  *
24084  *    Note:x86: This looks like a duplicate of sd_validate_geometry(), but
24085  *		these did exist seperately in x86 sd.c!!!
24086  */
24087 
24088 static int
24089 sd_update_fdisk_and_vtoc(struct sd_lun *un)
24090 {
24091 	static char	labelstring[128];
24092 	static char	buf[256];
24093 	char		*label = 0;
24094 	int		count;
24095 	int		label_rc = 0;
24096 	int		gvalid = un->un_f_geometry_is_valid;
24097 	int		fdisk_rval;
24098 	int		lbasize;
24099 	int		capacity;
24100 
24101 	ASSERT(mutex_owned(SD_MUTEX(un)));
24102 
24103 	if (un->un_f_tgt_blocksize_is_valid == FALSE) {
24104 		return (EINVAL);
24105 	}
24106 
24107 	if (un->un_f_blockcount_is_valid == FALSE) {
24108 		return (EINVAL);
24109 	}
24110 
24111 #if defined(_SUNOS_VTOC_16)
24112 	/*
24113 	 * Set up the "whole disk" fdisk partition; this should always
24114 	 * exist, regardless of whether the disk contains an fdisk table
24115 	 * or vtoc.
24116 	 */
24117 	un->un_map[P0_RAW_DISK].dkl_cylno = 0;
24118 	un->un_map[P0_RAW_DISK].dkl_nblk = un->un_blockcount;
24119 #endif	/* defined(_SUNOS_VTOC_16) */
24120 
24121 	/*
24122 	 * copy the lbasize and capacity so that if they're
24123 	 * reset while we're not holding the SD_MUTEX(un), we will
24124 	 * continue to use valid values after the SD_MUTEX(un) is
24125 	 * reacquired.
24126 	 */
24127 	lbasize  = un->un_tgt_blocksize;
24128 	capacity = un->un_blockcount;
24129 
24130 	/*
24131 	 * refresh the logical and physical geometry caches.
24132 	 * (data from mode sense format/rigid disk geometry pages,
24133 	 * and scsi_ifgetcap("geometry").
24134 	 */
24135 	sd_resync_geom_caches(un, capacity, lbasize, SD_PATH_DIRECT);
24136 
24137 	/*
24138 	 * Only DIRECT ACCESS devices will have Sun labels.
24139 	 * CD's supposedly have a Sun label, too
24140 	 */
24141 	if (un->un_f_vtoc_label_supported) {
24142 		fdisk_rval = sd_read_fdisk(un, capacity, lbasize,
24143 		    SD_PATH_DIRECT);
24144 		if (fdisk_rval == SD_CMD_FAILURE) {
24145 			ASSERT(mutex_owned(SD_MUTEX(un)));
24146 			return (EIO);
24147 		}
24148 
24149 		if (fdisk_rval == SD_CMD_RESERVATION_CONFLICT) {
24150 			ASSERT(mutex_owned(SD_MUTEX(un)));
24151 			return (EACCES);
24152 		}
24153 
24154 		if (un->un_solaris_size <= DK_LABEL_LOC) {
24155 			/*
24156 			 * Found fdisk table but no Solaris partition entry,
24157 			 * so don't call sd_uselabel() and don't create
24158 			 * a default label.
24159 			 */
24160 			label_rc = 0;
24161 			un->un_f_geometry_is_valid = TRUE;
24162 			goto no_solaris_partition;
24163 		}
24164 
24165 #if defined(_SUNOS_VTOC_8)
24166 		label = (char *)un->un_asciilabel;
24167 #elif defined(_SUNOS_VTOC_16)
24168 		label = (char *)un->un_vtoc.v_asciilabel;
24169 #else
24170 #error "No VTOC format defined."
24171 #endif
24172 	} else if (capacity < 0) {
24173 		ASSERT(mutex_owned(SD_MUTEX(un)));
24174 		return (EINVAL);
24175 	}
24176 
24177 	/*
24178 	 * For Removable media We reach here if we have found a
24179 	 * SOLARIS PARTITION.
24180 	 * If un_f_geometry_is_valid is FALSE it indicates that the SOLARIS
24181 	 * PARTITION has changed from the previous one, hence we will setup a
24182 	 * default VTOC in this case.
24183 	 */
24184 	if (un->un_f_geometry_is_valid == FALSE) {
24185 		sd_build_default_label(un);
24186 		label_rc = 0;
24187 	}
24188 
24189 no_solaris_partition:
24190 	if ((!un->un_f_has_removable_media ||
24191 	    (un->un_f_has_removable_media &&
24192 	    un->un_mediastate == DKIO_EJECTED)) &&
24193 		(un->un_state == SD_STATE_NORMAL && !gvalid)) {
24194 		/*
24195 		 * Print out a message indicating who and what we are.
24196 		 * We do this only when we happen to really validate the
24197 		 * geometry. We may call sd_validate_geometry() at other
24198 		 * times, ioctl()'s like Get VTOC in which case we
24199 		 * don't want to print the label.
24200 		 * If the geometry is valid, print the label string,
24201 		 * else print vendor and product info, if available
24202 		 */
24203 		if ((un->un_f_geometry_is_valid == TRUE) && (label != NULL)) {
24204 			SD_INFO(SD_LOG_IOCTL_DKIO, un, "?<%s>\n", label);
24205 		} else {
24206 			mutex_enter(&sd_label_mutex);
24207 			sd_inq_fill(SD_INQUIRY(un)->inq_vid, VIDMAX,
24208 			    labelstring);
24209 			sd_inq_fill(SD_INQUIRY(un)->inq_pid, PIDMAX,
24210 			    &labelstring[64]);
24211 			(void) sprintf(buf, "?Vendor '%s', product '%s'",
24212 			    labelstring, &labelstring[64]);
24213 			if (un->un_f_blockcount_is_valid == TRUE) {
24214 				(void) sprintf(&buf[strlen(buf)],
24215 				    ", %" PRIu64 " %u byte blocks\n",
24216 				    un->un_blockcount,
24217 				    un->un_tgt_blocksize);
24218 			} else {
24219 				(void) sprintf(&buf[strlen(buf)],
24220 				    ", (unknown capacity)\n");
24221 			}
24222 			SD_INFO(SD_LOG_IOCTL_DKIO, un, buf);
24223 			mutex_exit(&sd_label_mutex);
24224 		}
24225 	}
24226 
24227 #if defined(_SUNOS_VTOC_16)
24228 	/*
24229 	 * If we have valid geometry, set up the remaining fdisk partitions.
24230 	 * Note that dkl_cylno is not used for the fdisk map entries, so
24231 	 * we set it to an entirely bogus value.
24232 	 */
24233 	for (count = 0; count < FD_NUMPART; count++) {
24234 		un->un_map[FDISK_P1 + count].dkl_cylno = -1;
24235 		un->un_map[FDISK_P1 + count].dkl_nblk =
24236 		    un->un_fmap[count].fmap_nblk;
24237 		un->un_offset[FDISK_P1 + count] =
24238 		    un->un_fmap[count].fmap_start;
24239 	}
24240 #endif
24241 
24242 	for (count = 0; count < NDKMAP; count++) {
24243 #if defined(_SUNOS_VTOC_8)
24244 		struct dk_map *lp  = &un->un_map[count];
24245 		un->un_offset[count] =
24246 		    un->un_g.dkg_nhead * un->un_g.dkg_nsect * lp->dkl_cylno;
24247 #elif defined(_SUNOS_VTOC_16)
24248 		struct dkl_partition *vp = &un->un_vtoc.v_part[count];
24249 		un->un_offset[count] = vp->p_start + un->un_solaris_offset;
24250 #else
24251 #error "No VTOC format defined."
24252 #endif
24253 	}
24254 
24255 	ASSERT(mutex_owned(SD_MUTEX(un)));
24256 	return (label_rc);
24257 }
24258 #endif
24259 
24260 
24261 /*
24262  *    Function: sd_check_media
24263  *
24264  * Description: This utility routine implements the functionality for the
24265  *		DKIOCSTATE ioctl. This ioctl blocks the user thread until the
24266  *		driver state changes from that specified by the user
24267  *		(inserted or ejected). For example, if the user specifies
24268  *		DKIO_EJECTED and the current media state is inserted this
24269  *		routine will immediately return DKIO_INSERTED. However, if the
24270  *		current media state is not inserted the user thread will be
24271  *		blocked until the drive state changes. If DKIO_NONE is specified
24272  *		the user thread will block until a drive state change occurs.
24273  *
24274  *   Arguments: dev  - the device number
24275  *		state  - user pointer to a dkio_state, updated with the current
24276  *			drive state at return.
24277  *
24278  * Return Code: ENXIO
24279  *		EIO
24280  *		EAGAIN
24281  *		EINTR
24282  */
24283 
24284 static int
24285 sd_check_media(dev_t dev, enum dkio_state state)
24286 {
24287 	struct sd_lun		*un = NULL;
24288 	enum dkio_state		prev_state;
24289 	opaque_t		token = NULL;
24290 	int			rval = 0;
24291 
24292 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24293 		return (ENXIO);
24294 	}
24295 
24296 	SD_TRACE(SD_LOG_COMMON, un, "sd_check_media: entry\n");
24297 
24298 	mutex_enter(SD_MUTEX(un));
24299 
24300 	SD_TRACE(SD_LOG_COMMON, un, "sd_check_media: "
24301 	    "state=%x, mediastate=%x\n", state, un->un_mediastate);
24302 
24303 	prev_state = un->un_mediastate;
24304 
24305 	/* is there anything to do? */
24306 	if (state == un->un_mediastate || un->un_mediastate == DKIO_NONE) {
24307 		/*
24308 		 * submit the request to the scsi_watch service;
24309 		 * scsi_media_watch_cb() does the real work
24310 		 */
24311 		mutex_exit(SD_MUTEX(un));
24312 
24313 		/*
24314 		 * This change handles the case where a scsi watch request is
24315 		 * added to a device that is powered down. To accomplish this
24316 		 * we power up the device before adding the scsi watch request,
24317 		 * since the scsi watch sends a TUR directly to the device
24318 		 * which the device cannot handle if it is powered down.
24319 		 */
24320 		if (sd_pm_entry(un) != DDI_SUCCESS) {
24321 			mutex_enter(SD_MUTEX(un));
24322 			goto done;
24323 		}
24324 
24325 		token = scsi_watch_request_submit(SD_SCSI_DEVP(un),
24326 		    sd_check_media_time, SENSE_LENGTH, sd_media_watch_cb,
24327 		    (caddr_t)dev);
24328 
24329 		sd_pm_exit(un);
24330 
24331 		mutex_enter(SD_MUTEX(un));
24332 		if (token == NULL) {
24333 			rval = EAGAIN;
24334 			goto done;
24335 		}
24336 
24337 		/*
24338 		 * This is a special case IOCTL that doesn't return
24339 		 * until the media state changes. Routine sdpower
24340 		 * knows about and handles this so don't count it
24341 		 * as an active cmd in the driver, which would
24342 		 * keep the device busy to the pm framework.
24343 		 * If the count isn't decremented the device can't
24344 		 * be powered down.
24345 		 */
24346 		un->un_ncmds_in_driver--;
24347 		ASSERT(un->un_ncmds_in_driver >= 0);
24348 
24349 		/*
24350 		 * if a prior request had been made, this will be the same
24351 		 * token, as scsi_watch was designed that way.
24352 		 */
24353 		un->un_swr_token = token;
24354 		un->un_specified_mediastate = state;
24355 
24356 		/*
24357 		 * now wait for media change
24358 		 * we will not be signalled unless mediastate == state but it is
24359 		 * still better to test for this condition, since there is a
24360 		 * 2 sec cv_broadcast delay when mediastate == DKIO_INSERTED
24361 		 */
24362 		SD_TRACE(SD_LOG_COMMON, un,
24363 		    "sd_check_media: waiting for media state change\n");
24364 		while (un->un_mediastate == state) {
24365 			if (cv_wait_sig(&un->un_state_cv, SD_MUTEX(un)) == 0) {
24366 				SD_TRACE(SD_LOG_COMMON, un,
24367 				    "sd_check_media: waiting for media state "
24368 				    "was interrupted\n");
24369 				un->un_ncmds_in_driver++;
24370 				rval = EINTR;
24371 				goto done;
24372 			}
24373 			SD_TRACE(SD_LOG_COMMON, un,
24374 			    "sd_check_media: received signal, state=%x\n",
24375 			    un->un_mediastate);
24376 		}
24377 		/*
24378 		 * Inc the counter to indicate the device once again
24379 		 * has an active outstanding cmd.
24380 		 */
24381 		un->un_ncmds_in_driver++;
24382 	}
24383 
24384 	/* invalidate geometry */
24385 	if (prev_state == DKIO_INSERTED && un->un_mediastate == DKIO_EJECTED) {
24386 		sr_ejected(un);
24387 	}
24388 
24389 	if (un->un_mediastate == DKIO_INSERTED && prev_state != DKIO_INSERTED) {
24390 		uint64_t	capacity;
24391 		uint_t		lbasize;
24392 
24393 		SD_TRACE(SD_LOG_COMMON, un, "sd_check_media: media inserted\n");
24394 		mutex_exit(SD_MUTEX(un));
24395 		/*
24396 		 * Since the following routines use SD_PATH_DIRECT, we must
24397 		 * call PM directly before the upcoming disk accesses. This
24398 		 * may cause the disk to be power/spin up.
24399 		 */
24400 
24401 		if (sd_pm_entry(un) == DDI_SUCCESS) {
24402 			rval = sd_send_scsi_READ_CAPACITY(un,
24403 			    &capacity,
24404 			    &lbasize, SD_PATH_DIRECT);
24405 			if (rval != 0) {
24406 				sd_pm_exit(un);
24407 				mutex_enter(SD_MUTEX(un));
24408 				goto done;
24409 			}
24410 		} else {
24411 			rval = EIO;
24412 			mutex_enter(SD_MUTEX(un));
24413 			goto done;
24414 		}
24415 		mutex_enter(SD_MUTEX(un));
24416 
24417 		sd_update_block_info(un, lbasize, capacity);
24418 
24419 		un->un_f_geometry_is_valid	= FALSE;
24420 		(void) sd_validate_geometry(un, SD_PATH_DIRECT);
24421 
24422 		mutex_exit(SD_MUTEX(un));
24423 		rval = sd_send_scsi_DOORLOCK(un, SD_REMOVAL_PREVENT,
24424 		    SD_PATH_DIRECT);
24425 		sd_pm_exit(un);
24426 
24427 		mutex_enter(SD_MUTEX(un));
24428 	}
24429 done:
24430 	un->un_f_watcht_stopped = FALSE;
24431 	if (un->un_swr_token) {
24432 		/*
24433 		 * Use of this local token and the mutex ensures that we avoid
24434 		 * some race conditions associated with terminating the
24435 		 * scsi watch.
24436 		 */
24437 		token = un->un_swr_token;
24438 		un->un_swr_token = (opaque_t)NULL;
24439 		mutex_exit(SD_MUTEX(un));
24440 		(void) scsi_watch_request_terminate(token,
24441 		    SCSI_WATCH_TERMINATE_WAIT);
24442 		mutex_enter(SD_MUTEX(un));
24443 	}
24444 
24445 	/*
24446 	 * Update the capacity kstat value, if no media previously
24447 	 * (capacity kstat is 0) and a media has been inserted
24448 	 * (un_f_blockcount_is_valid == TRUE)
24449 	 */
24450 	if (un->un_errstats) {
24451 		struct sd_errstats	*stp = NULL;
24452 
24453 		stp = (struct sd_errstats *)un->un_errstats->ks_data;
24454 		if ((stp->sd_capacity.value.ui64 == 0) &&
24455 		    (un->un_f_blockcount_is_valid == TRUE)) {
24456 			stp->sd_capacity.value.ui64 =
24457 			    (uint64_t)((uint64_t)un->un_blockcount *
24458 			    un->un_sys_blocksize);
24459 		}
24460 	}
24461 	mutex_exit(SD_MUTEX(un));
24462 	SD_TRACE(SD_LOG_COMMON, un, "sd_check_media: done\n");
24463 	return (rval);
24464 }
24465 
24466 
24467 /*
24468  *    Function: sd_delayed_cv_broadcast
24469  *
24470  * Description: Delayed cv_broadcast to allow for target to recover from media
24471  *		insertion.
24472  *
24473  *   Arguments: arg - driver soft state (unit) structure
24474  */
24475 
24476 static void
24477 sd_delayed_cv_broadcast(void *arg)
24478 {
24479 	struct sd_lun *un = arg;
24480 
24481 	SD_TRACE(SD_LOG_COMMON, un, "sd_delayed_cv_broadcast\n");
24482 
24483 	mutex_enter(SD_MUTEX(un));
24484 	un->un_dcvb_timeid = NULL;
24485 	cv_broadcast(&un->un_state_cv);
24486 	mutex_exit(SD_MUTEX(un));
24487 }
24488 
24489 
24490 /*
24491  *    Function: sd_media_watch_cb
24492  *
24493  * Description: Callback routine used for support of the DKIOCSTATE ioctl. This
24494  *		routine processes the TUR sense data and updates the driver
24495  *		state if a transition has occurred. The user thread
24496  *		(sd_check_media) is then signalled.
24497  *
24498  *   Arguments: arg -   the device 'dev_t' is used for context to discriminate
24499  *			among multiple watches that share this callback function
24500  *		resultp - scsi watch facility result packet containing scsi
24501  *			  packet, status byte and sense data
24502  *
24503  * Return Code: 0 for success, -1 for failure
24504  */
24505 
24506 static int
24507 sd_media_watch_cb(caddr_t arg, struct scsi_watch_result *resultp)
24508 {
24509 	struct sd_lun			*un;
24510 	struct scsi_status		*statusp = resultp->statusp;
24511 	uint8_t				*sensep = (uint8_t *)resultp->sensep;
24512 	enum dkio_state			state = DKIO_NONE;
24513 	dev_t				dev = (dev_t)arg;
24514 	uchar_t				actual_sense_length;
24515 	uint8_t				skey, asc, ascq;
24516 
24517 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24518 		return (-1);
24519 	}
24520 	actual_sense_length = resultp->actual_sense_length;
24521 
24522 	mutex_enter(SD_MUTEX(un));
24523 	SD_TRACE(SD_LOG_COMMON, un,
24524 	    "sd_media_watch_cb: status=%x, sensep=%p, len=%x\n",
24525 	    *((char *)statusp), (void *)sensep, actual_sense_length);
24526 
24527 	if (resultp->pkt->pkt_reason == CMD_DEV_GONE) {
24528 		un->un_mediastate = DKIO_DEV_GONE;
24529 		cv_broadcast(&un->un_state_cv);
24530 		mutex_exit(SD_MUTEX(un));
24531 
24532 		return (0);
24533 	}
24534 
24535 	/*
24536 	 * If there was a check condition then sensep points to valid sense data
24537 	 * If status was not a check condition but a reservation or busy status
24538 	 * then the new state is DKIO_NONE
24539 	 */
24540 	if (sensep != NULL) {
24541 		skey = scsi_sense_key(sensep);
24542 		asc = scsi_sense_asc(sensep);
24543 		ascq = scsi_sense_ascq(sensep);
24544 
24545 		SD_INFO(SD_LOG_COMMON, un,
24546 		    "sd_media_watch_cb: sense KEY=%x, ASC=%x, ASCQ=%x\n",
24547 		    skey, asc, ascq);
24548 		/* This routine only uses up to 13 bytes of sense data. */
24549 		if (actual_sense_length >= 13) {
24550 			if (skey == KEY_UNIT_ATTENTION) {
24551 				if (asc == 0x28) {
24552 					state = DKIO_INSERTED;
24553 				}
24554 			} else {
24555 				/*
24556 				 * if 02/04/02  means that the host
24557 				 * should send start command. Explicitly
24558 				 * leave the media state as is
24559 				 * (inserted) as the media is inserted
24560 				 * and host has stopped device for PM
24561 				 * reasons. Upon next true read/write
24562 				 * to this media will bring the
24563 				 * device to the right state good for
24564 				 * media access.
24565 				 */
24566 				if ((skey == KEY_NOT_READY) &&
24567 				    (asc == 0x3a)) {
24568 					state = DKIO_EJECTED;
24569 				}
24570 
24571 				/*
24572 				 * If the drivge is busy with an operation
24573 				 * or long write, keep the media in an
24574 				 * inserted state.
24575 				 */
24576 
24577 				if ((skey == KEY_NOT_READY) &&
24578 				    (asc == 0x04) &&
24579 				    ((ascq == 0x02) ||
24580 				    (ascq == 0x07) ||
24581 				    (ascq == 0x08))) {
24582 					state = DKIO_INSERTED;
24583 				}
24584 			}
24585 		}
24586 	} else if ((*((char *)statusp) == STATUS_GOOD) &&
24587 	    (resultp->pkt->pkt_reason == CMD_CMPLT)) {
24588 		state = DKIO_INSERTED;
24589 	}
24590 
24591 	SD_TRACE(SD_LOG_COMMON, un,
24592 	    "sd_media_watch_cb: state=%x, specified=%x\n",
24593 	    state, un->un_specified_mediastate);
24594 
24595 	/*
24596 	 * now signal the waiting thread if this is *not* the specified state;
24597 	 * delay the signal if the state is DKIO_INSERTED to allow the target
24598 	 * to recover
24599 	 */
24600 	if (state != un->un_specified_mediastate) {
24601 		un->un_mediastate = state;
24602 		if (state == DKIO_INSERTED) {
24603 			/*
24604 			 * delay the signal to give the drive a chance
24605 			 * to do what it apparently needs to do
24606 			 */
24607 			SD_TRACE(SD_LOG_COMMON, un,
24608 			    "sd_media_watch_cb: delayed cv_broadcast\n");
24609 			if (un->un_dcvb_timeid == NULL) {
24610 				un->un_dcvb_timeid =
24611 				    timeout(sd_delayed_cv_broadcast, un,
24612 				    drv_usectohz((clock_t)MEDIA_ACCESS_DELAY));
24613 			}
24614 		} else {
24615 			SD_TRACE(SD_LOG_COMMON, un,
24616 			    "sd_media_watch_cb: immediate cv_broadcast\n");
24617 			cv_broadcast(&un->un_state_cv);
24618 		}
24619 	}
24620 	mutex_exit(SD_MUTEX(un));
24621 	return (0);
24622 }
24623 
24624 
24625 /*
24626  *    Function: sd_dkio_get_temp
24627  *
24628  * Description: This routine is the driver entry point for handling ioctl
24629  *		requests to get the disk temperature.
24630  *
24631  *   Arguments: dev  - the device number
24632  *		arg  - pointer to user provided dk_temperature structure.
24633  *		flag - this argument is a pass through to ddi_copyxxx()
24634  *		       directly from the mode argument of ioctl().
24635  *
24636  * Return Code: 0
24637  *		EFAULT
24638  *		ENXIO
24639  *		EAGAIN
24640  */
24641 
24642 static int
24643 sd_dkio_get_temp(dev_t dev, caddr_t arg, int flag)
24644 {
24645 	struct sd_lun		*un = NULL;
24646 	struct dk_temperature	*dktemp = NULL;
24647 	uchar_t			*temperature_page;
24648 	int			rval = 0;
24649 	int			path_flag = SD_PATH_STANDARD;
24650 
24651 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24652 		return (ENXIO);
24653 	}
24654 
24655 	dktemp = kmem_zalloc(sizeof (struct dk_temperature), KM_SLEEP);
24656 
24657 	/* copyin the disk temp argument to get the user flags */
24658 	if (ddi_copyin((void *)arg, dktemp,
24659 	    sizeof (struct dk_temperature), flag) != 0) {
24660 		rval = EFAULT;
24661 		goto done;
24662 	}
24663 
24664 	/* Initialize the temperature to invalid. */
24665 	dktemp->dkt_cur_temp = (short)DKT_INVALID_TEMP;
24666 	dktemp->dkt_ref_temp = (short)DKT_INVALID_TEMP;
24667 
24668 	/*
24669 	 * Note: Investigate removing the "bypass pm" semantic.
24670 	 * Can we just bypass PM always?
24671 	 */
24672 	if (dktemp->dkt_flags & DKT_BYPASS_PM) {
24673 		path_flag = SD_PATH_DIRECT;
24674 		ASSERT(!mutex_owned(&un->un_pm_mutex));
24675 		mutex_enter(&un->un_pm_mutex);
24676 		if (SD_DEVICE_IS_IN_LOW_POWER(un)) {
24677 			/*
24678 			 * If DKT_BYPASS_PM is set, and the drive happens to be
24679 			 * in low power mode, we can not wake it up, Need to
24680 			 * return EAGAIN.
24681 			 */
24682 			mutex_exit(&un->un_pm_mutex);
24683 			rval = EAGAIN;
24684 			goto done;
24685 		} else {
24686 			/*
24687 			 * Indicate to PM the device is busy. This is required
24688 			 * to avoid a race - i.e. the ioctl is issuing a
24689 			 * command and the pm framework brings down the device
24690 			 * to low power mode (possible power cut-off on some
24691 			 * platforms).
24692 			 */
24693 			mutex_exit(&un->un_pm_mutex);
24694 			if (sd_pm_entry(un) != DDI_SUCCESS) {
24695 				rval = EAGAIN;
24696 				goto done;
24697 			}
24698 		}
24699 	}
24700 
24701 	temperature_page = kmem_zalloc(TEMPERATURE_PAGE_SIZE, KM_SLEEP);
24702 
24703 	if ((rval = sd_send_scsi_LOG_SENSE(un, temperature_page,
24704 	    TEMPERATURE_PAGE_SIZE, TEMPERATURE_PAGE, 1, 0, path_flag)) != 0) {
24705 		goto done2;
24706 	}
24707 
24708 	/*
24709 	 * For the current temperature verify that the parameter length is 0x02
24710 	 * and the parameter code is 0x00
24711 	 */
24712 	if ((temperature_page[7] == 0x02) && (temperature_page[4] == 0x00) &&
24713 	    (temperature_page[5] == 0x00)) {
24714 		if (temperature_page[9] == 0xFF) {
24715 			dktemp->dkt_cur_temp = (short)DKT_INVALID_TEMP;
24716 		} else {
24717 			dktemp->dkt_cur_temp = (short)(temperature_page[9]);
24718 		}
24719 	}
24720 
24721 	/*
24722 	 * For the reference temperature verify that the parameter
24723 	 * length is 0x02 and the parameter code is 0x01
24724 	 */
24725 	if ((temperature_page[13] == 0x02) && (temperature_page[10] == 0x00) &&
24726 	    (temperature_page[11] == 0x01)) {
24727 		if (temperature_page[15] == 0xFF) {
24728 			dktemp->dkt_ref_temp = (short)DKT_INVALID_TEMP;
24729 		} else {
24730 			dktemp->dkt_ref_temp = (short)(temperature_page[15]);
24731 		}
24732 	}
24733 
24734 	/* Do the copyout regardless of the temperature commands status. */
24735 	if (ddi_copyout(dktemp, (void *)arg, sizeof (struct dk_temperature),
24736 	    flag) != 0) {
24737 		rval = EFAULT;
24738 	}
24739 
24740 done2:
24741 	if (path_flag == SD_PATH_DIRECT) {
24742 		sd_pm_exit(un);
24743 	}
24744 
24745 	kmem_free(temperature_page, TEMPERATURE_PAGE_SIZE);
24746 done:
24747 	if (dktemp != NULL) {
24748 		kmem_free(dktemp, sizeof (struct dk_temperature));
24749 	}
24750 
24751 	return (rval);
24752 }
24753 
24754 
24755 /*
24756  *    Function: sd_log_page_supported
24757  *
24758  * Description: This routine uses sd_send_scsi_LOG_SENSE to find the list of
24759  *		supported log pages.
24760  *
24761  *   Arguments: un -
24762  *		log_page -
24763  *
24764  * Return Code: -1 - on error (log sense is optional and may not be supported).
24765  *		0  - log page not found.
24766  *  		1  - log page found.
24767  */
24768 
24769 static int
24770 sd_log_page_supported(struct sd_lun *un, int log_page)
24771 {
24772 	uchar_t *log_page_data;
24773 	int	i;
24774 	int	match = 0;
24775 	int	log_size;
24776 
24777 	log_page_data = kmem_zalloc(0xFF, KM_SLEEP);
24778 
24779 	if (sd_send_scsi_LOG_SENSE(un, log_page_data, 0xFF, 0, 0x01, 0,
24780 	    SD_PATH_DIRECT) != 0) {
24781 		SD_ERROR(SD_LOG_COMMON, un,
24782 		    "sd_log_page_supported: failed log page retrieval\n");
24783 		kmem_free(log_page_data, 0xFF);
24784 		return (-1);
24785 	}
24786 	log_size = log_page_data[3];
24787 
24788 	/*
24789 	 * The list of supported log pages start from the fourth byte. Check
24790 	 * until we run out of log pages or a match is found.
24791 	 */
24792 	for (i = 4; (i < (log_size + 4)) && !match; i++) {
24793 		if (log_page_data[i] == log_page) {
24794 			match++;
24795 		}
24796 	}
24797 	kmem_free(log_page_data, 0xFF);
24798 	return (match);
24799 }
24800 
24801 
24802 /*
24803  *    Function: sd_mhdioc_failfast
24804  *
24805  * Description: This routine is the driver entry point for handling ioctl
24806  *		requests to enable/disable the multihost failfast option.
24807  *		(MHIOCENFAILFAST)
24808  *
24809  *   Arguments: dev	- the device number
24810  *		arg	- user specified probing interval.
24811  *		flag	- this argument is a pass through to ddi_copyxxx()
24812  *			  directly from the mode argument of ioctl().
24813  *
24814  * Return Code: 0
24815  *		EFAULT
24816  *		ENXIO
24817  */
24818 
24819 static int
24820 sd_mhdioc_failfast(dev_t dev, caddr_t arg, int flag)
24821 {
24822 	struct sd_lun	*un = NULL;
24823 	int		mh_time;
24824 	int		rval = 0;
24825 
24826 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24827 		return (ENXIO);
24828 	}
24829 
24830 	if (ddi_copyin((void *)arg, &mh_time, sizeof (int), flag))
24831 		return (EFAULT);
24832 
24833 	if (mh_time) {
24834 		mutex_enter(SD_MUTEX(un));
24835 		un->un_resvd_status |= SD_FAILFAST;
24836 		mutex_exit(SD_MUTEX(un));
24837 		/*
24838 		 * If mh_time is INT_MAX, then this ioctl is being used for
24839 		 * SCSI-3 PGR purposes, and we don't need to spawn watch thread.
24840 		 */
24841 		if (mh_time != INT_MAX) {
24842 			rval = sd_check_mhd(dev, mh_time);
24843 		}
24844 	} else {
24845 		(void) sd_check_mhd(dev, 0);
24846 		mutex_enter(SD_MUTEX(un));
24847 		un->un_resvd_status &= ~SD_FAILFAST;
24848 		mutex_exit(SD_MUTEX(un));
24849 	}
24850 	return (rval);
24851 }
24852 
24853 
24854 /*
24855  *    Function: sd_mhdioc_takeown
24856  *
24857  * Description: This routine is the driver entry point for handling ioctl
24858  *		requests to forcefully acquire exclusive access rights to the
24859  *		multihost disk (MHIOCTKOWN).
24860  *
24861  *   Arguments: dev	- the device number
24862  *		arg	- user provided structure specifying the delay
24863  *			  parameters in milliseconds
24864  *		flag	- this argument is a pass through to ddi_copyxxx()
24865  *			  directly from the mode argument of ioctl().
24866  *
24867  * Return Code: 0
24868  *		EFAULT
24869  *		ENXIO
24870  */
24871 
24872 static int
24873 sd_mhdioc_takeown(dev_t dev, caddr_t arg, int flag)
24874 {
24875 	struct sd_lun		*un = NULL;
24876 	struct mhioctkown	*tkown = NULL;
24877 	int			rval = 0;
24878 
24879 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24880 		return (ENXIO);
24881 	}
24882 
24883 	if (arg != NULL) {
24884 		tkown = (struct mhioctkown *)
24885 		    kmem_zalloc(sizeof (struct mhioctkown), KM_SLEEP);
24886 		rval = ddi_copyin(arg, tkown, sizeof (struct mhioctkown), flag);
24887 		if (rval != 0) {
24888 			rval = EFAULT;
24889 			goto error;
24890 		}
24891 	}
24892 
24893 	rval = sd_take_ownership(dev, tkown);
24894 	mutex_enter(SD_MUTEX(un));
24895 	if (rval == 0) {
24896 		un->un_resvd_status |= SD_RESERVE;
24897 		if (tkown != NULL && tkown->reinstate_resv_delay != 0) {
24898 			sd_reinstate_resv_delay =
24899 			    tkown->reinstate_resv_delay * 1000;
24900 		} else {
24901 			sd_reinstate_resv_delay = SD_REINSTATE_RESV_DELAY;
24902 		}
24903 		/*
24904 		 * Give the scsi_watch routine interval set by
24905 		 * the MHIOCENFAILFAST ioctl precedence here.
24906 		 */
24907 		if ((un->un_resvd_status & SD_FAILFAST) == 0) {
24908 			mutex_exit(SD_MUTEX(un));
24909 			(void) sd_check_mhd(dev, sd_reinstate_resv_delay/1000);
24910 			SD_TRACE(SD_LOG_IOCTL_MHD, un,
24911 			    "sd_mhdioc_takeown : %d\n",
24912 			    sd_reinstate_resv_delay);
24913 		} else {
24914 			mutex_exit(SD_MUTEX(un));
24915 		}
24916 		(void) scsi_reset_notify(SD_ADDRESS(un), SCSI_RESET_NOTIFY,
24917 		    sd_mhd_reset_notify_cb, (caddr_t)un);
24918 	} else {
24919 		un->un_resvd_status &= ~SD_RESERVE;
24920 		mutex_exit(SD_MUTEX(un));
24921 	}
24922 
24923 error:
24924 	if (tkown != NULL) {
24925 		kmem_free(tkown, sizeof (struct mhioctkown));
24926 	}
24927 	return (rval);
24928 }
24929 
24930 
24931 /*
24932  *    Function: sd_mhdioc_release
24933  *
24934  * Description: This routine is the driver entry point for handling ioctl
24935  *		requests to release exclusive access rights to the multihost
24936  *		disk (MHIOCRELEASE).
24937  *
24938  *   Arguments: dev	- the device number
24939  *
24940  * Return Code: 0
24941  *		ENXIO
24942  */
24943 
24944 static int
24945 sd_mhdioc_release(dev_t dev)
24946 {
24947 	struct sd_lun		*un = NULL;
24948 	timeout_id_t		resvd_timeid_save;
24949 	int			resvd_status_save;
24950 	int			rval = 0;
24951 
24952 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24953 		return (ENXIO);
24954 	}
24955 
24956 	mutex_enter(SD_MUTEX(un));
24957 	resvd_status_save = un->un_resvd_status;
24958 	un->un_resvd_status &=
24959 	    ~(SD_RESERVE | SD_LOST_RESERVE | SD_WANT_RESERVE);
24960 	if (un->un_resvd_timeid) {
24961 		resvd_timeid_save = un->un_resvd_timeid;
24962 		un->un_resvd_timeid = NULL;
24963 		mutex_exit(SD_MUTEX(un));
24964 		(void) untimeout(resvd_timeid_save);
24965 	} else {
24966 		mutex_exit(SD_MUTEX(un));
24967 	}
24968 
24969 	/*
24970 	 * destroy any pending timeout thread that may be attempting to
24971 	 * reinstate reservation on this device.
24972 	 */
24973 	sd_rmv_resv_reclaim_req(dev);
24974 
24975 	if ((rval = sd_reserve_release(dev, SD_RELEASE)) == 0) {
24976 		mutex_enter(SD_MUTEX(un));
24977 		if ((un->un_mhd_token) &&
24978 		    ((un->un_resvd_status & SD_FAILFAST) == 0)) {
24979 			mutex_exit(SD_MUTEX(un));
24980 			(void) sd_check_mhd(dev, 0);
24981 		} else {
24982 			mutex_exit(SD_MUTEX(un));
24983 		}
24984 		(void) scsi_reset_notify(SD_ADDRESS(un), SCSI_RESET_CANCEL,
24985 		    sd_mhd_reset_notify_cb, (caddr_t)un);
24986 	} else {
24987 		/*
24988 		 * sd_mhd_watch_cb will restart the resvd recover timeout thread
24989 		 */
24990 		mutex_enter(SD_MUTEX(un));
24991 		un->un_resvd_status = resvd_status_save;
24992 		mutex_exit(SD_MUTEX(un));
24993 	}
24994 	return (rval);
24995 }
24996 
24997 
24998 /*
24999  *    Function: sd_mhdioc_register_devid
25000  *
25001  * Description: This routine is the driver entry point for handling ioctl
25002  *		requests to register the device id (MHIOCREREGISTERDEVID).
25003  *
25004  *		Note: The implementation for this ioctl has been updated to
25005  *		be consistent with the original PSARC case (1999/357)
25006  *		(4375899, 4241671, 4220005)
25007  *
25008  *   Arguments: dev	- the device number
25009  *
25010  * Return Code: 0
25011  *		ENXIO
25012  */
25013 
25014 static int
25015 sd_mhdioc_register_devid(dev_t dev)
25016 {
25017 	struct sd_lun	*un = NULL;
25018 	int		rval = 0;
25019 
25020 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
25021 		return (ENXIO);
25022 	}
25023 
25024 	ASSERT(!mutex_owned(SD_MUTEX(un)));
25025 
25026 	mutex_enter(SD_MUTEX(un));
25027 
25028 	/* If a devid already exists, de-register it */
25029 	if (un->un_devid != NULL) {
25030 		ddi_devid_unregister(SD_DEVINFO(un));
25031 		/*
25032 		 * After unregister devid, needs to free devid memory
25033 		 */
25034 		ddi_devid_free(un->un_devid);
25035 		un->un_devid = NULL;
25036 	}
25037 
25038 	/* Check for reservation conflict */
25039 	mutex_exit(SD_MUTEX(un));
25040 	rval = sd_send_scsi_TEST_UNIT_READY(un, 0);
25041 	mutex_enter(SD_MUTEX(un));
25042 
25043 	switch (rval) {
25044 	case 0:
25045 		sd_register_devid(un, SD_DEVINFO(un), SD_TARGET_IS_UNRESERVED);
25046 		break;
25047 	case EACCES:
25048 		break;
25049 	default:
25050 		rval = EIO;
25051 	}
25052 
25053 	mutex_exit(SD_MUTEX(un));
25054 	return (rval);
25055 }
25056 
25057 
25058 /*
25059  *    Function: sd_mhdioc_inkeys
25060  *
25061  * Description: This routine is the driver entry point for handling ioctl
25062  *		requests to issue the SCSI-3 Persistent In Read Keys command
25063  *		to the device (MHIOCGRP_INKEYS).
25064  *
25065  *   Arguments: dev	- the device number
25066  *		arg	- user provided in_keys structure
25067  *		flag	- this argument is a pass through to ddi_copyxxx()
25068  *			  directly from the mode argument of ioctl().
25069  *
25070  * Return Code: code returned by sd_persistent_reservation_in_read_keys()
25071  *		ENXIO
25072  *		EFAULT
25073  */
25074 
25075 static int
25076 sd_mhdioc_inkeys(dev_t dev, caddr_t arg, int flag)
25077 {
25078 	struct sd_lun		*un;
25079 	mhioc_inkeys_t		inkeys;
25080 	int			rval = 0;
25081 
25082 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
25083 		return (ENXIO);
25084 	}
25085 
25086 #ifdef _MULTI_DATAMODEL
25087 	switch (ddi_model_convert_from(flag & FMODELS)) {
25088 	case DDI_MODEL_ILP32: {
25089 		struct mhioc_inkeys32	inkeys32;
25090 
25091 		if (ddi_copyin(arg, &inkeys32,
25092 		    sizeof (struct mhioc_inkeys32), flag) != 0) {
25093 			return (EFAULT);
25094 		}
25095 		inkeys.li = (mhioc_key_list_t *)(uintptr_t)inkeys32.li;
25096 		if ((rval = sd_persistent_reservation_in_read_keys(un,
25097 		    &inkeys, flag)) != 0) {
25098 			return (rval);
25099 		}
25100 		inkeys32.generation = inkeys.generation;
25101 		if (ddi_copyout(&inkeys32, arg, sizeof (struct mhioc_inkeys32),
25102 		    flag) != 0) {
25103 			return (EFAULT);
25104 		}
25105 		break;
25106 	}
25107 	case DDI_MODEL_NONE:
25108 		if (ddi_copyin(arg, &inkeys, sizeof (mhioc_inkeys_t),
25109 		    flag) != 0) {
25110 			return (EFAULT);
25111 		}
25112 		if ((rval = sd_persistent_reservation_in_read_keys(un,
25113 		    &inkeys, flag)) != 0) {
25114 			return (rval);
25115 		}
25116 		if (ddi_copyout(&inkeys, arg, sizeof (mhioc_inkeys_t),
25117 		    flag) != 0) {
25118 			return (EFAULT);
25119 		}
25120 		break;
25121 	}
25122 
25123 #else /* ! _MULTI_DATAMODEL */
25124 
25125 	if (ddi_copyin(arg, &inkeys, sizeof (mhioc_inkeys_t), flag) != 0) {
25126 		return (EFAULT);
25127 	}
25128 	rval = sd_persistent_reservation_in_read_keys(un, &inkeys, flag);
25129 	if (rval != 0) {
25130 		return (rval);
25131 	}
25132 	if (ddi_copyout(&inkeys, arg, sizeof (mhioc_inkeys_t), flag) != 0) {
25133 		return (EFAULT);
25134 	}
25135 
25136 #endif /* _MULTI_DATAMODEL */
25137 
25138 	return (rval);
25139 }
25140 
25141 
25142 /*
25143  *    Function: sd_mhdioc_inresv
25144  *
25145  * Description: This routine is the driver entry point for handling ioctl
25146  *		requests to issue the SCSI-3 Persistent In Read Reservations
25147  *		command to the device (MHIOCGRP_INKEYS).
25148  *
25149  *   Arguments: dev	- the device number
25150  *		arg	- user provided in_resv structure
25151  *		flag	- this argument is a pass through to ddi_copyxxx()
25152  *			  directly from the mode argument of ioctl().
25153  *
25154  * Return Code: code returned by sd_persistent_reservation_in_read_resv()
25155  *		ENXIO
25156  *		EFAULT
25157  */
25158 
25159 static int
25160 sd_mhdioc_inresv(dev_t dev, caddr_t arg, int flag)
25161 {
25162 	struct sd_lun		*un;
25163 	mhioc_inresvs_t		inresvs;
25164 	int			rval = 0;
25165 
25166 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
25167 		return (ENXIO);
25168 	}
25169 
25170 #ifdef _MULTI_DATAMODEL
25171 
25172 	switch (ddi_model_convert_from(flag & FMODELS)) {
25173 	case DDI_MODEL_ILP32: {
25174 		struct mhioc_inresvs32	inresvs32;
25175 
25176 		if (ddi_copyin(arg, &inresvs32,
25177 		    sizeof (struct mhioc_inresvs32), flag) != 0) {
25178 			return (EFAULT);
25179 		}
25180 		inresvs.li = (mhioc_resv_desc_list_t *)(uintptr_t)inresvs32.li;
25181 		if ((rval = sd_persistent_reservation_in_read_resv(un,
25182 		    &inresvs, flag)) != 0) {
25183 			return (rval);
25184 		}
25185 		inresvs32.generation = inresvs.generation;
25186 		if (ddi_copyout(&inresvs32, arg,
25187 		    sizeof (struct mhioc_inresvs32), flag) != 0) {
25188 			return (EFAULT);
25189 		}
25190 		break;
25191 	}
25192 	case DDI_MODEL_NONE:
25193 		if (ddi_copyin(arg, &inresvs,
25194 		    sizeof (mhioc_inresvs_t), flag) != 0) {
25195 			return (EFAULT);
25196 		}
25197 		if ((rval = sd_persistent_reservation_in_read_resv(un,
25198 		    &inresvs, flag)) != 0) {
25199 			return (rval);
25200 		}
25201 		if (ddi_copyout(&inresvs, arg,
25202 		    sizeof (mhioc_inresvs_t), flag) != 0) {
25203 			return (EFAULT);
25204 		}
25205 		break;
25206 	}
25207 
25208 #else /* ! _MULTI_DATAMODEL */
25209 
25210 	if (ddi_copyin(arg, &inresvs, sizeof (mhioc_inresvs_t), flag) != 0) {
25211 		return (EFAULT);
25212 	}
25213 	rval = sd_persistent_reservation_in_read_resv(un, &inresvs, flag);
25214 	if (rval != 0) {
25215 		return (rval);
25216 	}
25217 	if (ddi_copyout(&inresvs, arg, sizeof (mhioc_inresvs_t), flag)) {
25218 		return (EFAULT);
25219 	}
25220 
25221 #endif /* ! _MULTI_DATAMODEL */
25222 
25223 	return (rval);
25224 }
25225 
25226 
25227 /*
25228  * The following routines support the clustering functionality described below
25229  * and implement lost reservation reclaim functionality.
25230  *
25231  * Clustering
25232  * ----------
25233  * The clustering code uses two different, independent forms of SCSI
25234  * reservation. Traditional SCSI-2 Reserve/Release and the newer SCSI-3
25235  * Persistent Group Reservations. For any particular disk, it will use either
25236  * SCSI-2 or SCSI-3 PGR but never both at the same time for the same disk.
25237  *
25238  * SCSI-2
25239  * The cluster software takes ownership of a multi-hosted disk by issuing the
25240  * MHIOCTKOWN ioctl to the disk driver. It releases ownership by issuing the
25241  * MHIOCRELEASE ioctl.Closely related is the MHIOCENFAILFAST ioctl -- a cluster,
25242  * just after taking ownership of the disk with the MHIOCTKOWN ioctl then issues
25243  * the MHIOCENFAILFAST ioctl.  This ioctl "enables failfast" in the driver. The
25244  * meaning of failfast is that if the driver (on this host) ever encounters the
25245  * scsi error return code RESERVATION_CONFLICT from the device, it should
25246  * immediately panic the host. The motivation for this ioctl is that if this
25247  * host does encounter reservation conflict, the underlying cause is that some
25248  * other host of the cluster has decided that this host is no longer in the
25249  * cluster and has seized control of the disks for itself. Since this host is no
25250  * longer in the cluster, it ought to panic itself. The MHIOCENFAILFAST ioctl
25251  * does two things:
25252  *	(a) it sets a flag that will cause any returned RESERVATION_CONFLICT
25253  *      error to panic the host
25254  *      (b) it sets up a periodic timer to test whether this host still has
25255  *      "access" (in that no other host has reserved the device):  if the
25256  *      periodic timer gets RESERVATION_CONFLICT, the host is panicked. The
25257  *      purpose of that periodic timer is to handle scenarios where the host is
25258  *      otherwise temporarily quiescent, temporarily doing no real i/o.
25259  * The MHIOCTKOWN ioctl will "break" a reservation that is held by another host,
25260  * by issuing a SCSI Bus Device Reset.  It will then issue a SCSI Reserve for
25261  * the device itself.
25262  *
25263  * SCSI-3 PGR
25264  * A direct semantic implementation of the SCSI-3 Persistent Reservation
25265  * facility is supported through the shared multihost disk ioctls
25266  * (MHIOCGRP_INKEYS, MHIOCGRP_INRESV, MHIOCGRP_REGISTER, MHIOCGRP_RESERVE,
25267  * MHIOCGRP_PREEMPTANDABORT)
25268  *
25269  * Reservation Reclaim:
25270  * --------------------
25271  * To support the lost reservation reclaim operations this driver creates a
25272  * single thread to handle reinstating reservations on all devices that have
25273  * lost reservations sd_resv_reclaim_requests are logged for all devices that
25274  * have LOST RESERVATIONS when the scsi watch facility callsback sd_mhd_watch_cb
25275  * and the reservation reclaim thread loops through the requests to regain the
25276  * lost reservations.
25277  */
25278 
25279 /*
25280  *    Function: sd_check_mhd()
25281  *
25282  * Description: This function sets up and submits a scsi watch request or
25283  *		terminates an existing watch request. This routine is used in
25284  *		support of reservation reclaim.
25285  *
25286  *   Arguments: dev    - the device 'dev_t' is used for context to discriminate
25287  *			 among multiple watches that share the callback function
25288  *		interval - the number of microseconds specifying the watch
25289  *			   interval for issuing TEST UNIT READY commands. If
25290  *			   set to 0 the watch should be terminated. If the
25291  *			   interval is set to 0 and if the device is required
25292  *			   to hold reservation while disabling failfast, the
25293  *			   watch is restarted with an interval of
25294  *			   reinstate_resv_delay.
25295  *
25296  * Return Code: 0	   - Successful submit/terminate of scsi watch request
25297  *		ENXIO      - Indicates an invalid device was specified
25298  *		EAGAIN     - Unable to submit the scsi watch request
25299  */
25300 
25301 static int
25302 sd_check_mhd(dev_t dev, int interval)
25303 {
25304 	struct sd_lun	*un;
25305 	opaque_t	token;
25306 
25307 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
25308 		return (ENXIO);
25309 	}
25310 
25311 	/* is this a watch termination request? */
25312 	if (interval == 0) {
25313 		mutex_enter(SD_MUTEX(un));
25314 		/* if there is an existing watch task then terminate it */
25315 		if (un->un_mhd_token) {
25316 			token = un->un_mhd_token;
25317 			un->un_mhd_token = NULL;
25318 			mutex_exit(SD_MUTEX(un));
25319 			(void) scsi_watch_request_terminate(token,
25320 			    SCSI_WATCH_TERMINATE_WAIT);
25321 			mutex_enter(SD_MUTEX(un));
25322 		} else {
25323 			mutex_exit(SD_MUTEX(un));
25324 			/*
25325 			 * Note: If we return here we don't check for the
25326 			 * failfast case. This is the original legacy
25327 			 * implementation but perhaps we should be checking
25328 			 * the failfast case.
25329 			 */
25330 			return (0);
25331 		}
25332 		/*
25333 		 * If the device is required to hold reservation while
25334 		 * disabling failfast, we need to restart the scsi_watch
25335 		 * routine with an interval of reinstate_resv_delay.
25336 		 */
25337 		if (un->un_resvd_status & SD_RESERVE) {
25338 			interval = sd_reinstate_resv_delay/1000;
25339 		} else {
25340 			/* no failfast so bail */
25341 			mutex_exit(SD_MUTEX(un));
25342 			return (0);
25343 		}
25344 		mutex_exit(SD_MUTEX(un));
25345 	}
25346 
25347 	/*
25348 	 * adjust minimum time interval to 1 second,
25349 	 * and convert from msecs to usecs
25350 	 */
25351 	if (interval > 0 && interval < 1000) {
25352 		interval = 1000;
25353 	}
25354 	interval *= 1000;
25355 
25356 	/*
25357 	 * submit the request to the scsi_watch service
25358 	 */
25359 	token = scsi_watch_request_submit(SD_SCSI_DEVP(un), interval,
25360 	    SENSE_LENGTH, sd_mhd_watch_cb, (caddr_t)dev);
25361 	if (token == NULL) {
25362 		return (EAGAIN);
25363 	}
25364 
25365 	/*
25366 	 * save token for termination later on
25367 	 */
25368 	mutex_enter(SD_MUTEX(un));
25369 	un->un_mhd_token = token;
25370 	mutex_exit(SD_MUTEX(un));
25371 	return (0);
25372 }
25373 
25374 
25375 /*
25376  *    Function: sd_mhd_watch_cb()
25377  *
25378  * Description: This function is the call back function used by the scsi watch
25379  *		facility. The scsi watch facility sends the "Test Unit Ready"
25380  *		and processes the status. If applicable (i.e. a "Unit Attention"
25381  *		status and automatic "Request Sense" not used) the scsi watch
25382  *		facility will send a "Request Sense" and retrieve the sense data
25383  *		to be passed to this callback function. In either case the
25384  *		automatic "Request Sense" or the facility submitting one, this
25385  *		callback is passed the status and sense data.
25386  *
25387  *   Arguments: arg -   the device 'dev_t' is used for context to discriminate
25388  *			among multiple watches that share this callback function
25389  *		resultp - scsi watch facility result packet containing scsi
25390  *			  packet, status byte and sense data
25391  *
25392  * Return Code: 0 - continue the watch task
25393  *		non-zero - terminate the watch task
25394  */
25395 
25396 static int
25397 sd_mhd_watch_cb(caddr_t arg, struct scsi_watch_result *resultp)
25398 {
25399 	struct sd_lun			*un;
25400 	struct scsi_status		*statusp;
25401 	uint8_t				*sensep;
25402 	struct scsi_pkt			*pkt;
25403 	uchar_t				actual_sense_length;
25404 	dev_t  				dev = (dev_t)arg;
25405 
25406 	ASSERT(resultp != NULL);
25407 	statusp			= resultp->statusp;
25408 	sensep			= (uint8_t *)resultp->sensep;
25409 	pkt			= resultp->pkt;
25410 	actual_sense_length	= resultp->actual_sense_length;
25411 
25412 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
25413 		return (ENXIO);
25414 	}
25415 
25416 	SD_TRACE(SD_LOG_IOCTL_MHD, un,
25417 	    "sd_mhd_watch_cb: reason '%s', status '%s'\n",
25418 	    scsi_rname(pkt->pkt_reason), sd_sname(*((unsigned char *)statusp)));
25419 
25420 	/* Begin processing of the status and/or sense data */
25421 	if (pkt->pkt_reason != CMD_CMPLT) {
25422 		/* Handle the incomplete packet */
25423 		sd_mhd_watch_incomplete(un, pkt);
25424 		return (0);
25425 	} else if (*((unsigned char *)statusp) != STATUS_GOOD) {
25426 		if (*((unsigned char *)statusp)
25427 		    == STATUS_RESERVATION_CONFLICT) {
25428 			/*
25429 			 * Handle a reservation conflict by panicking if
25430 			 * configured for failfast or by logging the conflict
25431 			 * and updating the reservation status
25432 			 */
25433 			mutex_enter(SD_MUTEX(un));
25434 			if ((un->un_resvd_status & SD_FAILFAST) &&
25435 			    (sd_failfast_enable)) {
25436 				sd_panic_for_res_conflict(un);
25437 				/*NOTREACHED*/
25438 			}
25439 			SD_INFO(SD_LOG_IOCTL_MHD, un,
25440 			    "sd_mhd_watch_cb: Reservation Conflict\n");
25441 			un->un_resvd_status |= SD_RESERVATION_CONFLICT;
25442 			mutex_exit(SD_MUTEX(un));
25443 		}
25444 	}
25445 
25446 	if (sensep != NULL) {
25447 		if (actual_sense_length >= (SENSE_LENGTH - 2)) {
25448 			mutex_enter(SD_MUTEX(un));
25449 			if ((scsi_sense_asc(sensep) ==
25450 			    SD_SCSI_RESET_SENSE_CODE) &&
25451 			    (un->un_resvd_status & SD_RESERVE)) {
25452 				/*
25453 				 * The additional sense code indicates a power
25454 				 * on or bus device reset has occurred; update
25455 				 * the reservation status.
25456 				 */
25457 				un->un_resvd_status |=
25458 				    (SD_LOST_RESERVE | SD_WANT_RESERVE);
25459 				SD_INFO(SD_LOG_IOCTL_MHD, un,
25460 				    "sd_mhd_watch_cb: Lost Reservation\n");
25461 			}
25462 		} else {
25463 			return (0);
25464 		}
25465 	} else {
25466 		mutex_enter(SD_MUTEX(un));
25467 	}
25468 
25469 	if ((un->un_resvd_status & SD_RESERVE) &&
25470 	    (un->un_resvd_status & SD_LOST_RESERVE)) {
25471 		if (un->un_resvd_status & SD_WANT_RESERVE) {
25472 			/*
25473 			 * A reset occurred in between the last probe and this
25474 			 * one so if a timeout is pending cancel it.
25475 			 */
25476 			if (un->un_resvd_timeid) {
25477 				timeout_id_t temp_id = un->un_resvd_timeid;
25478 				un->un_resvd_timeid = NULL;
25479 				mutex_exit(SD_MUTEX(un));
25480 				(void) untimeout(temp_id);
25481 				mutex_enter(SD_MUTEX(un));
25482 			}
25483 			un->un_resvd_status &= ~SD_WANT_RESERVE;
25484 		}
25485 		if (un->un_resvd_timeid == 0) {
25486 			/* Schedule a timeout to handle the lost reservation */
25487 			un->un_resvd_timeid = timeout(sd_mhd_resvd_recover,
25488 			    (void *)dev,
25489 			    drv_usectohz(sd_reinstate_resv_delay));
25490 		}
25491 	}
25492 	mutex_exit(SD_MUTEX(un));
25493 	return (0);
25494 }
25495 
25496 
25497 /*
25498  *    Function: sd_mhd_watch_incomplete()
25499  *
25500  * Description: This function is used to find out why a scsi pkt sent by the
25501  *		scsi watch facility was not completed. Under some scenarios this
25502  *		routine will return. Otherwise it will send a bus reset to see
25503  *		if the drive is still online.
25504  *
25505  *   Arguments: un  - driver soft state (unit) structure
25506  *		pkt - incomplete scsi pkt
25507  */
25508 
25509 static void
25510 sd_mhd_watch_incomplete(struct sd_lun *un, struct scsi_pkt *pkt)
25511 {
25512 	int	be_chatty;
25513 	int	perr;
25514 
25515 	ASSERT(pkt != NULL);
25516 	ASSERT(un != NULL);
25517 	be_chatty	= (!(pkt->pkt_flags & FLAG_SILENT));
25518 	perr		= (pkt->pkt_statistics & STAT_PERR);
25519 
25520 	mutex_enter(SD_MUTEX(un));
25521 	if (un->un_state == SD_STATE_DUMPING) {
25522 		mutex_exit(SD_MUTEX(un));
25523 		return;
25524 	}
25525 
25526 	switch (pkt->pkt_reason) {
25527 	case CMD_UNX_BUS_FREE:
25528 		/*
25529 		 * If we had a parity error that caused the target to drop BSY*,
25530 		 * don't be chatty about it.
25531 		 */
25532 		if (perr && be_chatty) {
25533 			be_chatty = 0;
25534 		}
25535 		break;
25536 	case CMD_TAG_REJECT:
25537 		/*
25538 		 * The SCSI-2 spec states that a tag reject will be sent by the
25539 		 * target if tagged queuing is not supported. A tag reject may
25540 		 * also be sent during certain initialization periods or to
25541 		 * control internal resources. For the latter case the target
25542 		 * may also return Queue Full.
25543 		 *
25544 		 * If this driver receives a tag reject from a target that is
25545 		 * going through an init period or controlling internal
25546 		 * resources tagged queuing will be disabled. This is a less
25547 		 * than optimal behavior but the driver is unable to determine
25548 		 * the target state and assumes tagged queueing is not supported
25549 		 */
25550 		pkt->pkt_flags = 0;
25551 		un->un_tagflags = 0;
25552 
25553 		if (un->un_f_opt_queueing == TRUE) {
25554 			un->un_throttle = min(un->un_throttle, 3);
25555 		} else {
25556 			un->un_throttle = 1;
25557 		}
25558 		mutex_exit(SD_MUTEX(un));
25559 		(void) scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing", 0, 1);
25560 		mutex_enter(SD_MUTEX(un));
25561 		break;
25562 	case CMD_INCOMPLETE:
25563 		/*
25564 		 * The transport stopped with an abnormal state, fallthrough and
25565 		 * reset the target and/or bus unless selection did not complete
25566 		 * (indicated by STATE_GOT_BUS) in which case we don't want to
25567 		 * go through a target/bus reset
25568 		 */
25569 		if (pkt->pkt_state == STATE_GOT_BUS) {
25570 			break;
25571 		}
25572 		/*FALLTHROUGH*/
25573 
25574 	case CMD_TIMEOUT:
25575 	default:
25576 		/*
25577 		 * The lun may still be running the command, so a lun reset
25578 		 * should be attempted. If the lun reset fails or cannot be
25579 		 * issued, than try a target reset. Lastly try a bus reset.
25580 		 */
25581 		if ((pkt->pkt_statistics &
25582 		    (STAT_BUS_RESET|STAT_DEV_RESET|STAT_ABORTED)) == 0) {
25583 			int reset_retval = 0;
25584 			mutex_exit(SD_MUTEX(un));
25585 			if (un->un_f_allow_bus_device_reset == TRUE) {
25586 				if (un->un_f_lun_reset_enabled == TRUE) {
25587 					reset_retval =
25588 					    scsi_reset(SD_ADDRESS(un),
25589 					    RESET_LUN);
25590 				}
25591 				if (reset_retval == 0) {
25592 					reset_retval =
25593 					    scsi_reset(SD_ADDRESS(un),
25594 					    RESET_TARGET);
25595 				}
25596 			}
25597 			if (reset_retval == 0) {
25598 				(void) scsi_reset(SD_ADDRESS(un), RESET_ALL);
25599 			}
25600 			mutex_enter(SD_MUTEX(un));
25601 		}
25602 		break;
25603 	}
25604 
25605 	/* A device/bus reset has occurred; update the reservation status. */
25606 	if ((pkt->pkt_reason == CMD_RESET) || (pkt->pkt_statistics &
25607 	    (STAT_BUS_RESET | STAT_DEV_RESET))) {
25608 		if ((un->un_resvd_status & SD_RESERVE) == SD_RESERVE) {
25609 			un->un_resvd_status |=
25610 			    (SD_LOST_RESERVE | SD_WANT_RESERVE);
25611 			SD_INFO(SD_LOG_IOCTL_MHD, un,
25612 			    "sd_mhd_watch_incomplete: Lost Reservation\n");
25613 		}
25614 	}
25615 
25616 	/*
25617 	 * The disk has been turned off; Update the device state.
25618 	 *
25619 	 * Note: Should we be offlining the disk here?
25620 	 */
25621 	if (pkt->pkt_state == STATE_GOT_BUS) {
25622 		SD_INFO(SD_LOG_IOCTL_MHD, un, "sd_mhd_watch_incomplete: "
25623 		    "Disk not responding to selection\n");
25624 		if (un->un_state != SD_STATE_OFFLINE) {
25625 			New_state(un, SD_STATE_OFFLINE);
25626 		}
25627 	} else if (be_chatty) {
25628 		/*
25629 		 * suppress messages if they are all the same pkt reason;
25630 		 * with TQ, many (up to 256) are returned with the same
25631 		 * pkt_reason
25632 		 */
25633 		if (pkt->pkt_reason != un->un_last_pkt_reason) {
25634 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
25635 			    "sd_mhd_watch_incomplete: "
25636 			    "SCSI transport failed: reason '%s'\n",
25637 			    scsi_rname(pkt->pkt_reason));
25638 		}
25639 	}
25640 	un->un_last_pkt_reason = pkt->pkt_reason;
25641 	mutex_exit(SD_MUTEX(un));
25642 }
25643 
25644 
25645 /*
25646  *    Function: sd_sname()
25647  *
25648  * Description: This is a simple little routine to return a string containing
25649  *		a printable description of command status byte for use in
25650  *		logging.
25651  *
25652  *   Arguments: status - pointer to a status byte
25653  *
25654  * Return Code: char * - string containing status description.
25655  */
25656 
25657 static char *
25658 sd_sname(uchar_t status)
25659 {
25660 	switch (status & STATUS_MASK) {
25661 	case STATUS_GOOD:
25662 		return ("good status");
25663 	case STATUS_CHECK:
25664 		return ("check condition");
25665 	case STATUS_MET:
25666 		return ("condition met");
25667 	case STATUS_BUSY:
25668 		return ("busy");
25669 	case STATUS_INTERMEDIATE:
25670 		return ("intermediate");
25671 	case STATUS_INTERMEDIATE_MET:
25672 		return ("intermediate - condition met");
25673 	case STATUS_RESERVATION_CONFLICT:
25674 		return ("reservation_conflict");
25675 	case STATUS_TERMINATED:
25676 		return ("command terminated");
25677 	case STATUS_QFULL:
25678 		return ("queue full");
25679 	default:
25680 		return ("<unknown status>");
25681 	}
25682 }
25683 
25684 
25685 /*
25686  *    Function: sd_mhd_resvd_recover()
25687  *
25688  * Description: This function adds a reservation entry to the
25689  *		sd_resv_reclaim_request list and signals the reservation
25690  *		reclaim thread that there is work pending. If the reservation
25691  *		reclaim thread has not been previously created this function
25692  *		will kick it off.
25693  *
25694  *   Arguments: arg -   the device 'dev_t' is used for context to discriminate
25695  *			among multiple watches that share this callback function
25696  *
25697  *     Context: This routine is called by timeout() and is run in interrupt
25698  *		context. It must not sleep or call other functions which may
25699  *		sleep.
25700  */
25701 
25702 static void
25703 sd_mhd_resvd_recover(void *arg)
25704 {
25705 	dev_t			dev = (dev_t)arg;
25706 	struct sd_lun		*un;
25707 	struct sd_thr_request	*sd_treq = NULL;
25708 	struct sd_thr_request	*sd_cur = NULL;
25709 	struct sd_thr_request	*sd_prev = NULL;
25710 	int			already_there = 0;
25711 
25712 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
25713 		return;
25714 	}
25715 
25716 	mutex_enter(SD_MUTEX(un));
25717 	un->un_resvd_timeid = NULL;
25718 	if (un->un_resvd_status & SD_WANT_RESERVE) {
25719 		/*
25720 		 * There was a reset so don't issue the reserve, allow the
25721 		 * sd_mhd_watch_cb callback function to notice this and
25722 		 * reschedule the timeout for reservation.
25723 		 */
25724 		mutex_exit(SD_MUTEX(un));
25725 		return;
25726 	}
25727 	mutex_exit(SD_MUTEX(un));
25728 
25729 	/*
25730 	 * Add this device to the sd_resv_reclaim_request list and the
25731 	 * sd_resv_reclaim_thread should take care of the rest.
25732 	 *
25733 	 * Note: We can't sleep in this context so if the memory allocation
25734 	 * fails allow the sd_mhd_watch_cb callback function to notice this and
25735 	 * reschedule the timeout for reservation.  (4378460)
25736 	 */
25737 	sd_treq = (struct sd_thr_request *)
25738 	    kmem_zalloc(sizeof (struct sd_thr_request), KM_NOSLEEP);
25739 	if (sd_treq == NULL) {
25740 		return;
25741 	}
25742 
25743 	sd_treq->sd_thr_req_next = NULL;
25744 	sd_treq->dev = dev;
25745 	mutex_enter(&sd_tr.srq_resv_reclaim_mutex);
25746 	if (sd_tr.srq_thr_req_head == NULL) {
25747 		sd_tr.srq_thr_req_head = sd_treq;
25748 	} else {
25749 		sd_cur = sd_prev = sd_tr.srq_thr_req_head;
25750 		for (; sd_cur != NULL; sd_cur = sd_cur->sd_thr_req_next) {
25751 			if (sd_cur->dev == dev) {
25752 				/*
25753 				 * already in Queue so don't log
25754 				 * another request for the device
25755 				 */
25756 				already_there = 1;
25757 				break;
25758 			}
25759 			sd_prev = sd_cur;
25760 		}
25761 		if (!already_there) {
25762 			SD_INFO(SD_LOG_IOCTL_MHD, un, "sd_mhd_resvd_recover: "
25763 			    "logging request for %lx\n", dev);
25764 			sd_prev->sd_thr_req_next = sd_treq;
25765 		} else {
25766 			kmem_free(sd_treq, sizeof (struct sd_thr_request));
25767 		}
25768 	}
25769 
25770 	/*
25771 	 * Create a kernel thread to do the reservation reclaim and free up this
25772 	 * thread. We cannot block this thread while we go away to do the
25773 	 * reservation reclaim
25774 	 */
25775 	if (sd_tr.srq_resv_reclaim_thread == NULL)
25776 		sd_tr.srq_resv_reclaim_thread = thread_create(NULL, 0,
25777 		    sd_resv_reclaim_thread, NULL,
25778 		    0, &p0, TS_RUN, v.v_maxsyspri - 2);
25779 
25780 	/* Tell the reservation reclaim thread that it has work to do */
25781 	cv_signal(&sd_tr.srq_resv_reclaim_cv);
25782 	mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
25783 }
25784 
25785 /*
25786  *    Function: sd_resv_reclaim_thread()
25787  *
25788  * Description: This function implements the reservation reclaim operations
25789  *
25790  *   Arguments: arg - the device 'dev_t' is used for context to discriminate
25791  *		      among multiple watches that share this callback function
25792  */
25793 
25794 static void
25795 sd_resv_reclaim_thread()
25796 {
25797 	struct sd_lun		*un;
25798 	struct sd_thr_request	*sd_mhreq;
25799 
25800 	/* Wait for work */
25801 	mutex_enter(&sd_tr.srq_resv_reclaim_mutex);
25802 	if (sd_tr.srq_thr_req_head == NULL) {
25803 		cv_wait(&sd_tr.srq_resv_reclaim_cv,
25804 		    &sd_tr.srq_resv_reclaim_mutex);
25805 	}
25806 
25807 	/* Loop while we have work */
25808 	while ((sd_tr.srq_thr_cur_req = sd_tr.srq_thr_req_head) != NULL) {
25809 		un = ddi_get_soft_state(sd_state,
25810 		    SDUNIT(sd_tr.srq_thr_cur_req->dev));
25811 		if (un == NULL) {
25812 			/*
25813 			 * softstate structure is NULL so just
25814 			 * dequeue the request and continue
25815 			 */
25816 			sd_tr.srq_thr_req_head =
25817 			    sd_tr.srq_thr_cur_req->sd_thr_req_next;
25818 			kmem_free(sd_tr.srq_thr_cur_req,
25819 			    sizeof (struct sd_thr_request));
25820 			continue;
25821 		}
25822 
25823 		/* dequeue the request */
25824 		sd_mhreq = sd_tr.srq_thr_cur_req;
25825 		sd_tr.srq_thr_req_head =
25826 		    sd_tr.srq_thr_cur_req->sd_thr_req_next;
25827 		mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
25828 
25829 		/*
25830 		 * Reclaim reservation only if SD_RESERVE is still set. There
25831 		 * may have been a call to MHIOCRELEASE before we got here.
25832 		 */
25833 		mutex_enter(SD_MUTEX(un));
25834 		if ((un->un_resvd_status & SD_RESERVE) == SD_RESERVE) {
25835 			/*
25836 			 * Note: The SD_LOST_RESERVE flag is cleared before
25837 			 * reclaiming the reservation. If this is done after the
25838 			 * call to sd_reserve_release a reservation loss in the
25839 			 * window between pkt completion of reserve cmd and
25840 			 * mutex_enter below may not be recognized
25841 			 */
25842 			un->un_resvd_status &= ~SD_LOST_RESERVE;
25843 			mutex_exit(SD_MUTEX(un));
25844 
25845 			if (sd_reserve_release(sd_mhreq->dev,
25846 			    SD_RESERVE) == 0) {
25847 				mutex_enter(SD_MUTEX(un));
25848 				un->un_resvd_status |= SD_RESERVE;
25849 				mutex_exit(SD_MUTEX(un));
25850 				SD_INFO(SD_LOG_IOCTL_MHD, un,
25851 				    "sd_resv_reclaim_thread: "
25852 				    "Reservation Recovered\n");
25853 			} else {
25854 				mutex_enter(SD_MUTEX(un));
25855 				un->un_resvd_status |= SD_LOST_RESERVE;
25856 				mutex_exit(SD_MUTEX(un));
25857 				SD_INFO(SD_LOG_IOCTL_MHD, un,
25858 				    "sd_resv_reclaim_thread: Failed "
25859 				    "Reservation Recovery\n");
25860 			}
25861 		} else {
25862 			mutex_exit(SD_MUTEX(un));
25863 		}
25864 		mutex_enter(&sd_tr.srq_resv_reclaim_mutex);
25865 		ASSERT(sd_mhreq == sd_tr.srq_thr_cur_req);
25866 		kmem_free(sd_mhreq, sizeof (struct sd_thr_request));
25867 		sd_mhreq = sd_tr.srq_thr_cur_req = NULL;
25868 		/*
25869 		 * wakeup the destroy thread if anyone is waiting on
25870 		 * us to complete.
25871 		 */
25872 		cv_signal(&sd_tr.srq_inprocess_cv);
25873 		SD_TRACE(SD_LOG_IOCTL_MHD, un,
25874 		    "sd_resv_reclaim_thread: cv_signalling current request \n");
25875 	}
25876 
25877 	/*
25878 	 * cleanup the sd_tr structure now that this thread will not exist
25879 	 */
25880 	ASSERT(sd_tr.srq_thr_req_head == NULL);
25881 	ASSERT(sd_tr.srq_thr_cur_req == NULL);
25882 	sd_tr.srq_resv_reclaim_thread = NULL;
25883 	mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
25884 	thread_exit();
25885 }
25886 
25887 
25888 /*
25889  *    Function: sd_rmv_resv_reclaim_req()
25890  *
25891  * Description: This function removes any pending reservation reclaim requests
25892  *		for the specified device.
25893  *
25894  *   Arguments: dev - the device 'dev_t'
25895  */
25896 
25897 static void
25898 sd_rmv_resv_reclaim_req(dev_t dev)
25899 {
25900 	struct sd_thr_request *sd_mhreq;
25901 	struct sd_thr_request *sd_prev;
25902 
25903 	/* Remove a reservation reclaim request from the list */
25904 	mutex_enter(&sd_tr.srq_resv_reclaim_mutex);
25905 	if (sd_tr.srq_thr_cur_req && sd_tr.srq_thr_cur_req->dev == dev) {
25906 		/*
25907 		 * We are attempting to reinstate reservation for
25908 		 * this device. We wait for sd_reserve_release()
25909 		 * to return before we return.
25910 		 */
25911 		cv_wait(&sd_tr.srq_inprocess_cv,
25912 		    &sd_tr.srq_resv_reclaim_mutex);
25913 	} else {
25914 		sd_prev = sd_mhreq = sd_tr.srq_thr_req_head;
25915 		if (sd_mhreq && sd_mhreq->dev == dev) {
25916 			sd_tr.srq_thr_req_head = sd_mhreq->sd_thr_req_next;
25917 			kmem_free(sd_mhreq, sizeof (struct sd_thr_request));
25918 			mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
25919 			return;
25920 		}
25921 		for (; sd_mhreq != NULL; sd_mhreq = sd_mhreq->sd_thr_req_next) {
25922 			if (sd_mhreq && sd_mhreq->dev == dev) {
25923 				break;
25924 			}
25925 			sd_prev = sd_mhreq;
25926 		}
25927 		if (sd_mhreq != NULL) {
25928 			sd_prev->sd_thr_req_next = sd_mhreq->sd_thr_req_next;
25929 			kmem_free(sd_mhreq, sizeof (struct sd_thr_request));
25930 		}
25931 	}
25932 	mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
25933 }
25934 
25935 
25936 /*
25937  *    Function: sd_mhd_reset_notify_cb()
25938  *
25939  * Description: This is a call back function for scsi_reset_notify. This
25940  *		function updates the softstate reserved status and logs the
25941  *		reset. The driver scsi watch facility callback function
25942  *		(sd_mhd_watch_cb) and reservation reclaim thread functionality
25943  *		will reclaim the reservation.
25944  *
25945  *   Arguments: arg  - driver soft state (unit) structure
25946  */
25947 
25948 static void
25949 sd_mhd_reset_notify_cb(caddr_t arg)
25950 {
25951 	struct sd_lun *un = (struct sd_lun *)arg;
25952 
25953 	mutex_enter(SD_MUTEX(un));
25954 	if ((un->un_resvd_status & SD_RESERVE) == SD_RESERVE) {
25955 		un->un_resvd_status |= (SD_LOST_RESERVE | SD_WANT_RESERVE);
25956 		SD_INFO(SD_LOG_IOCTL_MHD, un,
25957 		    "sd_mhd_reset_notify_cb: Lost Reservation\n");
25958 	}
25959 	mutex_exit(SD_MUTEX(un));
25960 }
25961 
25962 
25963 /*
25964  *    Function: sd_take_ownership()
25965  *
25966  * Description: This routine implements an algorithm to achieve a stable
25967  *		reservation on disks which don't implement priority reserve,
25968  *		and makes sure that other host lose re-reservation attempts.
25969  *		This algorithm contains of a loop that keeps issuing the RESERVE
25970  *		for some period of time (min_ownership_delay, default 6 seconds)
25971  *		During that loop, it looks to see if there has been a bus device
25972  *		reset or bus reset (both of which cause an existing reservation
25973  *		to be lost). If the reservation is lost issue RESERVE until a
25974  *		period of min_ownership_delay with no resets has gone by, or
25975  *		until max_ownership_delay has expired. This loop ensures that
25976  *		the host really did manage to reserve the device, in spite of
25977  *		resets. The looping for min_ownership_delay (default six
25978  *		seconds) is important to early generation clustering products,
25979  *		Solstice HA 1.x and Sun Cluster 2.x. Those products use an
25980  *		MHIOCENFAILFAST periodic timer of two seconds. By having
25981  *		MHIOCTKOWN issue Reserves in a loop for six seconds, and having
25982  *		MHIOCENFAILFAST poll every two seconds, the idea is that by the
25983  *		time the MHIOCTKOWN ioctl returns, the other host (if any) will
25984  *		have already noticed, via the MHIOCENFAILFAST polling, that it
25985  *		no longer "owns" the disk and will have panicked itself.  Thus,
25986  *		the host issuing the MHIOCTKOWN is assured (with timing
25987  *		dependencies) that by the time it actually starts to use the
25988  *		disk for real work, the old owner is no longer accessing it.
25989  *
25990  *		min_ownership_delay is the minimum amount of time for which the
25991  *		disk must be reserved continuously devoid of resets before the
25992  *		MHIOCTKOWN ioctl will return success.
25993  *
25994  *		max_ownership_delay indicates the amount of time by which the
25995  *		take ownership should succeed or timeout with an error.
25996  *
25997  *   Arguments: dev - the device 'dev_t'
25998  *		*p  - struct containing timing info.
25999  *
26000  * Return Code: 0 for success or error code
26001  */
26002 
26003 static int
26004 sd_take_ownership(dev_t dev, struct mhioctkown *p)
26005 {
26006 	struct sd_lun	*un;
26007 	int		rval;
26008 	int		err;
26009 	int		reservation_count   = 0;
26010 	int		min_ownership_delay =  6000000; /* in usec */
26011 	int		max_ownership_delay = 30000000; /* in usec */
26012 	clock_t		start_time;	/* starting time of this algorithm */
26013 	clock_t		end_time;	/* time limit for giving up */
26014 	clock_t		ownership_time;	/* time limit for stable ownership */
26015 	clock_t		current_time;
26016 	clock_t		previous_current_time;
26017 
26018 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
26019 		return (ENXIO);
26020 	}
26021 
26022 	/*
26023 	 * Attempt a device reservation. A priority reservation is requested.
26024 	 */
26025 	if ((rval = sd_reserve_release(dev, SD_PRIORITY_RESERVE))
26026 	    != SD_SUCCESS) {
26027 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
26028 		    "sd_take_ownership: return(1)=%d\n", rval);
26029 		return (rval);
26030 	}
26031 
26032 	/* Update the softstate reserved status to indicate the reservation */
26033 	mutex_enter(SD_MUTEX(un));
26034 	un->un_resvd_status |= SD_RESERVE;
26035 	un->un_resvd_status &=
26036 	    ~(SD_LOST_RESERVE | SD_WANT_RESERVE | SD_RESERVATION_CONFLICT);
26037 	mutex_exit(SD_MUTEX(un));
26038 
26039 	if (p != NULL) {
26040 		if (p->min_ownership_delay != 0) {
26041 			min_ownership_delay = p->min_ownership_delay * 1000;
26042 		}
26043 		if (p->max_ownership_delay != 0) {
26044 			max_ownership_delay = p->max_ownership_delay * 1000;
26045 		}
26046 	}
26047 	SD_INFO(SD_LOG_IOCTL_MHD, un,
26048 	    "sd_take_ownership: min, max delays: %d, %d\n",
26049 	    min_ownership_delay, max_ownership_delay);
26050 
26051 	start_time = ddi_get_lbolt();
26052 	current_time	= start_time;
26053 	ownership_time	= current_time + drv_usectohz(min_ownership_delay);
26054 	end_time	= start_time + drv_usectohz(max_ownership_delay);
26055 
26056 	while (current_time - end_time < 0) {
26057 		delay(drv_usectohz(500000));
26058 
26059 		if ((err = sd_reserve_release(dev, SD_RESERVE)) != 0) {
26060 			if ((sd_reserve_release(dev, SD_RESERVE)) != 0) {
26061 				mutex_enter(SD_MUTEX(un));
26062 				rval = (un->un_resvd_status &
26063 				    SD_RESERVATION_CONFLICT) ? EACCES : EIO;
26064 				mutex_exit(SD_MUTEX(un));
26065 				break;
26066 			}
26067 		}
26068 		previous_current_time = current_time;
26069 		current_time = ddi_get_lbolt();
26070 		mutex_enter(SD_MUTEX(un));
26071 		if (err || (un->un_resvd_status & SD_LOST_RESERVE)) {
26072 			ownership_time = ddi_get_lbolt() +
26073 			    drv_usectohz(min_ownership_delay);
26074 			reservation_count = 0;
26075 		} else {
26076 			reservation_count++;
26077 		}
26078 		un->un_resvd_status |= SD_RESERVE;
26079 		un->un_resvd_status &= ~(SD_LOST_RESERVE | SD_WANT_RESERVE);
26080 		mutex_exit(SD_MUTEX(un));
26081 
26082 		SD_INFO(SD_LOG_IOCTL_MHD, un,
26083 		    "sd_take_ownership: ticks for loop iteration=%ld, "
26084 		    "reservation=%s\n", (current_time - previous_current_time),
26085 		    reservation_count ? "ok" : "reclaimed");
26086 
26087 		if (current_time - ownership_time >= 0 &&
26088 		    reservation_count >= 4) {
26089 			rval = 0; /* Achieved a stable ownership */
26090 			break;
26091 		}
26092 		if (current_time - end_time >= 0) {
26093 			rval = EACCES; /* No ownership in max possible time */
26094 			break;
26095 		}
26096 	}
26097 	SD_TRACE(SD_LOG_IOCTL_MHD, un,
26098 	    "sd_take_ownership: return(2)=%d\n", rval);
26099 	return (rval);
26100 }
26101 
26102 
26103 /*
26104  *    Function: sd_reserve_release()
26105  *
26106  * Description: This function builds and sends scsi RESERVE, RELEASE, and
26107  *		PRIORITY RESERVE commands based on a user specified command type
26108  *
26109  *   Arguments: dev - the device 'dev_t'
26110  *		cmd - user specified command type; one of SD_PRIORITY_RESERVE,
26111  *		      SD_RESERVE, SD_RELEASE
26112  *
26113  * Return Code: 0 or Error Code
26114  */
26115 
26116 static int
26117 sd_reserve_release(dev_t dev, int cmd)
26118 {
26119 	struct uscsi_cmd	*com = NULL;
26120 	struct sd_lun		*un = NULL;
26121 	char			cdb[CDB_GROUP0];
26122 	int			rval;
26123 
26124 	ASSERT((cmd == SD_RELEASE) || (cmd == SD_RESERVE) ||
26125 	    (cmd == SD_PRIORITY_RESERVE));
26126 
26127 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
26128 		return (ENXIO);
26129 	}
26130 
26131 	/* instantiate and initialize the command and cdb */
26132 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
26133 	bzero(cdb, CDB_GROUP0);
26134 	com->uscsi_flags   = USCSI_SILENT;
26135 	com->uscsi_timeout = un->un_reserve_release_time;
26136 	com->uscsi_cdblen  = CDB_GROUP0;
26137 	com->uscsi_cdb	   = cdb;
26138 	if (cmd == SD_RELEASE) {
26139 		cdb[0] = SCMD_RELEASE;
26140 	} else {
26141 		cdb[0] = SCMD_RESERVE;
26142 	}
26143 
26144 	/* Send the command. */
26145 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
26146 	    UIO_SYSSPACE, SD_PATH_STANDARD);
26147 
26148 	/*
26149 	 * "break" a reservation that is held by another host, by issuing a
26150 	 * reset if priority reserve is desired, and we could not get the
26151 	 * device.
26152 	 */
26153 	if ((cmd == SD_PRIORITY_RESERVE) &&
26154 	    (rval != 0) && (com->uscsi_status == STATUS_RESERVATION_CONFLICT)) {
26155 		/*
26156 		 * First try to reset the LUN. If we cannot, then try a target
26157 		 * reset, followed by a bus reset if the target reset fails.
26158 		 */
26159 		int reset_retval = 0;
26160 		if (un->un_f_lun_reset_enabled == TRUE) {
26161 			reset_retval = scsi_reset(SD_ADDRESS(un), RESET_LUN);
26162 		}
26163 		if (reset_retval == 0) {
26164 			/* The LUN reset either failed or was not issued */
26165 			reset_retval = scsi_reset(SD_ADDRESS(un), RESET_TARGET);
26166 		}
26167 		if ((reset_retval == 0) &&
26168 		    (scsi_reset(SD_ADDRESS(un), RESET_ALL) == 0)) {
26169 			rval = EIO;
26170 			kmem_free(com, sizeof (*com));
26171 			return (rval);
26172 		}
26173 
26174 		bzero(com, sizeof (struct uscsi_cmd));
26175 		com->uscsi_flags   = USCSI_SILENT;
26176 		com->uscsi_cdb	   = cdb;
26177 		com->uscsi_cdblen  = CDB_GROUP0;
26178 		com->uscsi_timeout = 5;
26179 
26180 		/*
26181 		 * Reissue the last reserve command, this time without request
26182 		 * sense.  Assume that it is just a regular reserve command.
26183 		 */
26184 		rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
26185 		    UIO_SYSSPACE, SD_PATH_STANDARD);
26186 	}
26187 
26188 	/* Return an error if still getting a reservation conflict. */
26189 	if ((rval != 0) && (com->uscsi_status == STATUS_RESERVATION_CONFLICT)) {
26190 		rval = EACCES;
26191 	}
26192 
26193 	kmem_free(com, sizeof (*com));
26194 	return (rval);
26195 }
26196 
26197 
26198 #define	SD_NDUMP_RETRIES	12
26199 /*
26200  *	System Crash Dump routine
26201  */
26202 
26203 static int
26204 sddump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk)
26205 {
26206 	int		instance;
26207 	int		partition;
26208 	int		i;
26209 	int		err;
26210 	struct sd_lun	*un;
26211 	struct dk_map	*lp;
26212 	struct scsi_pkt *wr_pktp;
26213 	struct buf	*wr_bp;
26214 	struct buf	wr_buf;
26215 	daddr_t		tgt_byte_offset; /* rmw - byte offset for target */
26216 	daddr_t		tgt_blkno;	/* rmw - blkno for target */
26217 	size_t		tgt_byte_count; /* rmw -  # of bytes to xfer */
26218 	size_t		tgt_nblk; /* rmw -  # of tgt blks to xfer */
26219 	size_t		io_start_offset;
26220 	int		doing_rmw = FALSE;
26221 	int		rval;
26222 #if defined(__i386) || defined(__amd64)
26223 	ssize_t dma_resid;
26224 	daddr_t oblkno;
26225 #endif
26226 
26227 	instance = SDUNIT(dev);
26228 	if (((un = ddi_get_soft_state(sd_state, instance)) == NULL) ||
26229 	    (!un->un_f_geometry_is_valid) || ISCD(un)) {
26230 		return (ENXIO);
26231 	}
26232 
26233 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*un))
26234 
26235 	SD_TRACE(SD_LOG_DUMP, un, "sddump: entry\n");
26236 
26237 	partition = SDPART(dev);
26238 	SD_INFO(SD_LOG_DUMP, un, "sddump: partition = %d\n", partition);
26239 
26240 	/* Validate blocks to dump at against partition size. */
26241 	lp = &un->un_map[partition];
26242 	if ((blkno + nblk) > lp->dkl_nblk) {
26243 		SD_TRACE(SD_LOG_DUMP, un,
26244 		    "sddump: dump range larger than partition: "
26245 		    "blkno = 0x%x, nblk = 0x%x, dkl_nblk = 0x%x\n",
26246 		    blkno, nblk, lp->dkl_nblk);
26247 		return (EINVAL);
26248 	}
26249 
26250 	mutex_enter(&un->un_pm_mutex);
26251 	if (SD_DEVICE_IS_IN_LOW_POWER(un)) {
26252 		struct scsi_pkt *start_pktp;
26253 
26254 		mutex_exit(&un->un_pm_mutex);
26255 
26256 		/*
26257 		 * use pm framework to power on HBA 1st
26258 		 */
26259 		(void) pm_raise_power(SD_DEVINFO(un), 0, SD_SPINDLE_ON);
26260 
26261 		/*
26262 		 * Dump no long uses sdpower to power on a device, it's
26263 		 * in-line here so it can be done in polled mode.
26264 		 */
26265 
26266 		SD_INFO(SD_LOG_DUMP, un, "sddump: starting device\n");
26267 
26268 		start_pktp = scsi_init_pkt(SD_ADDRESS(un), NULL, NULL,
26269 		    CDB_GROUP0, un->un_status_len, 0, 0, NULL_FUNC, NULL);
26270 
26271 		if (start_pktp == NULL) {
26272 			/* We were not given a SCSI packet, fail. */
26273 			return (EIO);
26274 		}
26275 		bzero(start_pktp->pkt_cdbp, CDB_GROUP0);
26276 		start_pktp->pkt_cdbp[0] = SCMD_START_STOP;
26277 		start_pktp->pkt_cdbp[4] = SD_TARGET_START;
26278 		start_pktp->pkt_flags = FLAG_NOINTR;
26279 
26280 		mutex_enter(SD_MUTEX(un));
26281 		SD_FILL_SCSI1_LUN(un, start_pktp);
26282 		mutex_exit(SD_MUTEX(un));
26283 		/*
26284 		 * Scsi_poll returns 0 (success) if the command completes and
26285 		 * the status block is STATUS_GOOD.
26286 		 */
26287 		if (sd_scsi_poll(un, start_pktp) != 0) {
26288 			scsi_destroy_pkt(start_pktp);
26289 			return (EIO);
26290 		}
26291 		scsi_destroy_pkt(start_pktp);
26292 		(void) sd_ddi_pm_resume(un);
26293 	} else {
26294 		mutex_exit(&un->un_pm_mutex);
26295 	}
26296 
26297 	mutex_enter(SD_MUTEX(un));
26298 	un->un_throttle = 0;
26299 
26300 	/*
26301 	 * The first time through, reset the specific target device.
26302 	 * However, when cpr calls sddump we know that sd is in a
26303 	 * a good state so no bus reset is required.
26304 	 * Clear sense data via Request Sense cmd.
26305 	 * In sddump we don't care about allow_bus_device_reset anymore
26306 	 */
26307 
26308 	if ((un->un_state != SD_STATE_SUSPENDED) &&
26309 	    (un->un_state != SD_STATE_DUMPING)) {
26310 
26311 		New_state(un, SD_STATE_DUMPING);
26312 
26313 		if (un->un_f_is_fibre == FALSE) {
26314 			mutex_exit(SD_MUTEX(un));
26315 			/*
26316 			 * Attempt a bus reset for parallel scsi.
26317 			 *
26318 			 * Note: A bus reset is required because on some host
26319 			 * systems (i.e. E420R) a bus device reset is
26320 			 * insufficient to reset the state of the target.
26321 			 *
26322 			 * Note: Don't issue the reset for fibre-channel,
26323 			 * because this tends to hang the bus (loop) for
26324 			 * too long while everyone is logging out and in
26325 			 * and the deadman timer for dumping will fire
26326 			 * before the dump is complete.
26327 			 */
26328 			if (scsi_reset(SD_ADDRESS(un), RESET_ALL) == 0) {
26329 				mutex_enter(SD_MUTEX(un));
26330 				Restore_state(un);
26331 				mutex_exit(SD_MUTEX(un));
26332 				return (EIO);
26333 			}
26334 
26335 			/* Delay to give the device some recovery time. */
26336 			drv_usecwait(10000);
26337 
26338 			if (sd_send_polled_RQS(un) == SD_FAILURE) {
26339 				SD_INFO(SD_LOG_DUMP, un,
26340 					"sddump: sd_send_polled_RQS failed\n");
26341 			}
26342 			mutex_enter(SD_MUTEX(un));
26343 		}
26344 	}
26345 
26346 	/*
26347 	 * Convert the partition-relative block number to a
26348 	 * disk physical block number.
26349 	 */
26350 	blkno += un->un_offset[partition];
26351 	SD_INFO(SD_LOG_DUMP, un, "sddump: disk blkno = 0x%x\n", blkno);
26352 
26353 
26354 	/*
26355 	 * Check if the device has a non-512 block size.
26356 	 */
26357 	wr_bp = NULL;
26358 	if (NOT_DEVBSIZE(un)) {
26359 		tgt_byte_offset = blkno * un->un_sys_blocksize;
26360 		tgt_byte_count = nblk * un->un_sys_blocksize;
26361 		if ((tgt_byte_offset % un->un_tgt_blocksize) ||
26362 		    (tgt_byte_count % un->un_tgt_blocksize)) {
26363 			doing_rmw = TRUE;
26364 			/*
26365 			 * Calculate the block number and number of block
26366 			 * in terms of the media block size.
26367 			 */
26368 			tgt_blkno = tgt_byte_offset / un->un_tgt_blocksize;
26369 			tgt_nblk =
26370 			    ((tgt_byte_offset + tgt_byte_count +
26371 				(un->un_tgt_blocksize - 1)) /
26372 				un->un_tgt_blocksize) - tgt_blkno;
26373 
26374 			/*
26375 			 * Invoke the routine which is going to do read part
26376 			 * of read-modify-write.
26377 			 * Note that this routine returns a pointer to
26378 			 * a valid bp in wr_bp.
26379 			 */
26380 			err = sddump_do_read_of_rmw(un, tgt_blkno, tgt_nblk,
26381 			    &wr_bp);
26382 			if (err) {
26383 				mutex_exit(SD_MUTEX(un));
26384 				return (err);
26385 			}
26386 			/*
26387 			 * Offset is being calculated as -
26388 			 * (original block # * system block size) -
26389 			 * (new block # * target block size)
26390 			 */
26391 			io_start_offset =
26392 			    ((uint64_t)(blkno * un->un_sys_blocksize)) -
26393 			    ((uint64_t)(tgt_blkno * un->un_tgt_blocksize));
26394 
26395 			ASSERT((io_start_offset >= 0) &&
26396 			    (io_start_offset < un->un_tgt_blocksize));
26397 			/*
26398 			 * Do the modify portion of read modify write.
26399 			 */
26400 			bcopy(addr, &wr_bp->b_un.b_addr[io_start_offset],
26401 			    (size_t)nblk * un->un_sys_blocksize);
26402 		} else {
26403 			doing_rmw = FALSE;
26404 			tgt_blkno = tgt_byte_offset / un->un_tgt_blocksize;
26405 			tgt_nblk = tgt_byte_count / un->un_tgt_blocksize;
26406 		}
26407 
26408 		/* Convert blkno and nblk to target blocks */
26409 		blkno = tgt_blkno;
26410 		nblk = tgt_nblk;
26411 	} else {
26412 		wr_bp = &wr_buf;
26413 		bzero(wr_bp, sizeof (struct buf));
26414 		wr_bp->b_flags		= B_BUSY;
26415 		wr_bp->b_un.b_addr	= addr;
26416 		wr_bp->b_bcount		= nblk << DEV_BSHIFT;
26417 		wr_bp->b_resid		= 0;
26418 	}
26419 
26420 	mutex_exit(SD_MUTEX(un));
26421 
26422 	/*
26423 	 * Obtain a SCSI packet for the write command.
26424 	 * It should be safe to call the allocator here without
26425 	 * worrying about being locked for DVMA mapping because
26426 	 * the address we're passed is already a DVMA mapping
26427 	 *
26428 	 * We are also not going to worry about semaphore ownership
26429 	 * in the dump buffer. Dumping is single threaded at present.
26430 	 */
26431 
26432 	wr_pktp = NULL;
26433 
26434 #if defined(__i386) || defined(__amd64)
26435 	dma_resid = wr_bp->b_bcount;
26436 	oblkno = blkno;
26437 	while (dma_resid != 0) {
26438 #endif
26439 
26440 	for (i = 0; i < SD_NDUMP_RETRIES; i++) {
26441 		wr_bp->b_flags &= ~B_ERROR;
26442 
26443 #if defined(__i386) || defined(__amd64)
26444 		blkno = oblkno +
26445 			((wr_bp->b_bcount - dma_resid) /
26446 			    un->un_tgt_blocksize);
26447 		nblk = dma_resid / un->un_tgt_blocksize;
26448 
26449 		if (wr_pktp) {
26450 			/* Partial DMA transfers after initial transfer */
26451 			rval = sd_setup_next_rw_pkt(un, wr_pktp, wr_bp,
26452 			    blkno, nblk);
26453 		} else {
26454 			/* Initial transfer */
26455 			rval = sd_setup_rw_pkt(un, &wr_pktp, wr_bp,
26456 			    un->un_pkt_flags, NULL_FUNC, NULL,
26457 			    blkno, nblk);
26458 		}
26459 #else
26460 		rval = sd_setup_rw_pkt(un, &wr_pktp, wr_bp,
26461 		    0, NULL_FUNC, NULL, blkno, nblk);
26462 #endif
26463 
26464 		if (rval == 0) {
26465 			/* We were given a SCSI packet, continue. */
26466 			break;
26467 		}
26468 
26469 		if (i == 0) {
26470 			if (wr_bp->b_flags & B_ERROR) {
26471 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
26472 				    "no resources for dumping; "
26473 				    "error code: 0x%x, retrying",
26474 				    geterror(wr_bp));
26475 			} else {
26476 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
26477 				    "no resources for dumping; retrying");
26478 			}
26479 		} else if (i != (SD_NDUMP_RETRIES - 1)) {
26480 			if (wr_bp->b_flags & B_ERROR) {
26481 				scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
26482 				    "no resources for dumping; error code: "
26483 				    "0x%x, retrying\n", geterror(wr_bp));
26484 			}
26485 		} else {
26486 			if (wr_bp->b_flags & B_ERROR) {
26487 				scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
26488 				    "no resources for dumping; "
26489 				    "error code: 0x%x, retries failed, "
26490 				    "giving up.\n", geterror(wr_bp));
26491 			} else {
26492 				scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
26493 				    "no resources for dumping; "
26494 				    "retries failed, giving up.\n");
26495 			}
26496 			mutex_enter(SD_MUTEX(un));
26497 			Restore_state(un);
26498 			if (NOT_DEVBSIZE(un) && (doing_rmw == TRUE)) {
26499 				mutex_exit(SD_MUTEX(un));
26500 				scsi_free_consistent_buf(wr_bp);
26501 			} else {
26502 				mutex_exit(SD_MUTEX(un));
26503 			}
26504 			return (EIO);
26505 		}
26506 		drv_usecwait(10000);
26507 	}
26508 
26509 #if defined(__i386) || defined(__amd64)
26510 	/*
26511 	 * save the resid from PARTIAL_DMA
26512 	 */
26513 	dma_resid = wr_pktp->pkt_resid;
26514 	if (dma_resid != 0)
26515 		nblk -= SD_BYTES2TGTBLOCKS(un, dma_resid);
26516 	wr_pktp->pkt_resid = 0;
26517 #endif
26518 
26519 	/* SunBug 1222170 */
26520 	wr_pktp->pkt_flags = FLAG_NOINTR;
26521 
26522 	err = EIO;
26523 	for (i = 0; i < SD_NDUMP_RETRIES; i++) {
26524 
26525 		/*
26526 		 * Scsi_poll returns 0 (success) if the command completes and
26527 		 * the status block is STATUS_GOOD.  We should only check
26528 		 * errors if this condition is not true.  Even then we should
26529 		 * send our own request sense packet only if we have a check
26530 		 * condition and auto request sense has not been performed by
26531 		 * the hba.
26532 		 */
26533 		SD_TRACE(SD_LOG_DUMP, un, "sddump: sending write\n");
26534 
26535 		if ((sd_scsi_poll(un, wr_pktp) == 0) &&
26536 		    (wr_pktp->pkt_resid == 0)) {
26537 			err = SD_SUCCESS;
26538 			break;
26539 		}
26540 
26541 		/*
26542 		 * Check CMD_DEV_GONE 1st, give up if device is gone.
26543 		 */
26544 		if (wr_pktp->pkt_reason == CMD_DEV_GONE) {
26545 			scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
26546 			    "Device is gone\n");
26547 			break;
26548 		}
26549 
26550 		if (SD_GET_PKT_STATUS(wr_pktp) == STATUS_CHECK) {
26551 			SD_INFO(SD_LOG_DUMP, un,
26552 			    "sddump: write failed with CHECK, try # %d\n", i);
26553 			if (((wr_pktp->pkt_state & STATE_ARQ_DONE) == 0)) {
26554 				(void) sd_send_polled_RQS(un);
26555 			}
26556 
26557 			continue;
26558 		}
26559 
26560 		if (SD_GET_PKT_STATUS(wr_pktp) == STATUS_BUSY) {
26561 			int reset_retval = 0;
26562 
26563 			SD_INFO(SD_LOG_DUMP, un,
26564 			    "sddump: write failed with BUSY, try # %d\n", i);
26565 
26566 			if (un->un_f_lun_reset_enabled == TRUE) {
26567 				reset_retval = scsi_reset(SD_ADDRESS(un),
26568 				    RESET_LUN);
26569 			}
26570 			if (reset_retval == 0) {
26571 				(void) scsi_reset(SD_ADDRESS(un), RESET_TARGET);
26572 			}
26573 			(void) sd_send_polled_RQS(un);
26574 
26575 		} else {
26576 			SD_INFO(SD_LOG_DUMP, un,
26577 			    "sddump: write failed with 0x%x, try # %d\n",
26578 			    SD_GET_PKT_STATUS(wr_pktp), i);
26579 			mutex_enter(SD_MUTEX(un));
26580 			sd_reset_target(un, wr_pktp);
26581 			mutex_exit(SD_MUTEX(un));
26582 		}
26583 
26584 		/*
26585 		 * If we are not getting anywhere with lun/target resets,
26586 		 * let's reset the bus.
26587 		 */
26588 		if (i == SD_NDUMP_RETRIES/2) {
26589 			(void) scsi_reset(SD_ADDRESS(un), RESET_ALL);
26590 			(void) sd_send_polled_RQS(un);
26591 		}
26592 
26593 	}
26594 #if defined(__i386) || defined(__amd64)
26595 	}	/* dma_resid */
26596 #endif
26597 
26598 	scsi_destroy_pkt(wr_pktp);
26599 	mutex_enter(SD_MUTEX(un));
26600 	if ((NOT_DEVBSIZE(un)) && (doing_rmw == TRUE)) {
26601 		mutex_exit(SD_MUTEX(un));
26602 		scsi_free_consistent_buf(wr_bp);
26603 	} else {
26604 		mutex_exit(SD_MUTEX(un));
26605 	}
26606 	SD_TRACE(SD_LOG_DUMP, un, "sddump: exit: err = %d\n", err);
26607 	return (err);
26608 }
26609 
26610 /*
26611  *    Function: sd_scsi_poll()
26612  *
26613  * Description: This is a wrapper for the scsi_poll call.
26614  *
26615  *   Arguments: sd_lun - The unit structure
26616  *              scsi_pkt - The scsi packet being sent to the device.
26617  *
26618  * Return Code: 0 - Command completed successfully with good status
26619  *             -1 - Command failed.  This could indicate a check condition
26620  *                  or other status value requiring recovery action.
26621  *
26622  */
26623 
26624 static int
26625 sd_scsi_poll(struct sd_lun *un, struct scsi_pkt *pktp)
26626 {
26627 	int status;
26628 
26629 	ASSERT(un != NULL);
26630 	ASSERT(!mutex_owned(SD_MUTEX(un)));
26631 	ASSERT(pktp != NULL);
26632 
26633 	status = SD_SUCCESS;
26634 
26635 	if (scsi_ifgetcap(&pktp->pkt_address, "tagged-qing", 1) == 1) {
26636 		pktp->pkt_flags |= un->un_tagflags;
26637 		pktp->pkt_flags &= ~FLAG_NODISCON;
26638 	}
26639 
26640 	status = sd_ddi_scsi_poll(pktp);
26641 	/*
26642 	 * Scsi_poll returns 0 (success) if the command completes and the
26643 	 * status block is STATUS_GOOD.  We should only check errors if this
26644 	 * condition is not true.  Even then we should send our own request
26645 	 * sense packet only if we have a check condition and auto
26646 	 * request sense has not been performed by the hba.
26647 	 * Don't get RQS data if pkt_reason is CMD_DEV_GONE.
26648 	 */
26649 	if ((status != SD_SUCCESS) &&
26650 	    (SD_GET_PKT_STATUS(pktp) == STATUS_CHECK) &&
26651 	    (pktp->pkt_state & STATE_ARQ_DONE) == 0 &&
26652 	    (pktp->pkt_reason != CMD_DEV_GONE))
26653 		(void) sd_send_polled_RQS(un);
26654 
26655 	return (status);
26656 }
26657 
26658 /*
26659  *    Function: sd_send_polled_RQS()
26660  *
26661  * Description: This sends the request sense command to a device.
26662  *
26663  *   Arguments: sd_lun - The unit structure
26664  *
26665  * Return Code: 0 - Command completed successfully with good status
26666  *             -1 - Command failed.
26667  *
26668  */
26669 
26670 static int
26671 sd_send_polled_RQS(struct sd_lun *un)
26672 {
26673 	int	ret_val;
26674 	struct	scsi_pkt	*rqs_pktp;
26675 	struct	buf		*rqs_bp;
26676 
26677 	ASSERT(un != NULL);
26678 	ASSERT(!mutex_owned(SD_MUTEX(un)));
26679 
26680 	ret_val = SD_SUCCESS;
26681 
26682 	rqs_pktp = un->un_rqs_pktp;
26683 	rqs_bp	 = un->un_rqs_bp;
26684 
26685 	mutex_enter(SD_MUTEX(un));
26686 
26687 	if (un->un_sense_isbusy) {
26688 		ret_val = SD_FAILURE;
26689 		mutex_exit(SD_MUTEX(un));
26690 		return (ret_val);
26691 	}
26692 
26693 	/*
26694 	 * If the request sense buffer (and packet) is not in use,
26695 	 * let's set the un_sense_isbusy and send our packet
26696 	 */
26697 	un->un_sense_isbusy 	= 1;
26698 	rqs_pktp->pkt_resid  	= 0;
26699 	rqs_pktp->pkt_reason 	= 0;
26700 	rqs_pktp->pkt_flags |= FLAG_NOINTR;
26701 	bzero(rqs_bp->b_un.b_addr, SENSE_LENGTH);
26702 
26703 	mutex_exit(SD_MUTEX(un));
26704 
26705 	SD_INFO(SD_LOG_COMMON, un, "sd_send_polled_RQS: req sense buf at"
26706 	    " 0x%p\n", rqs_bp->b_un.b_addr);
26707 
26708 	/*
26709 	 * Can't send this to sd_scsi_poll, we wrap ourselves around the
26710 	 * axle - it has a call into us!
26711 	 */
26712 	if ((ret_val = sd_ddi_scsi_poll(rqs_pktp)) != 0) {
26713 		SD_INFO(SD_LOG_COMMON, un,
26714 		    "sd_send_polled_RQS: RQS failed\n");
26715 	}
26716 
26717 	SD_DUMP_MEMORY(un, SD_LOG_COMMON, "sd_send_polled_RQS:",
26718 	    (uchar_t *)rqs_bp->b_un.b_addr, SENSE_LENGTH, SD_LOG_HEX);
26719 
26720 	mutex_enter(SD_MUTEX(un));
26721 	un->un_sense_isbusy = 0;
26722 	mutex_exit(SD_MUTEX(un));
26723 
26724 	return (ret_val);
26725 }
26726 
26727 /*
26728  * Defines needed for localized version of the scsi_poll routine.
26729  */
26730 #define	SD_CSEC		10000			/* usecs */
26731 #define	SD_SEC_TO_CSEC	(1000000/SD_CSEC)
26732 
26733 
26734 /*
26735  *    Function: sd_ddi_scsi_poll()
26736  *
26737  * Description: Localized version of the scsi_poll routine.  The purpose is to
26738  *		send a scsi_pkt to a device as a polled command.  This version
26739  *		is to ensure more robust handling of transport errors.
26740  *		Specifically this routine cures not ready, coming ready
26741  *		transition for power up and reset of sonoma's.  This can take
26742  *		up to 45 seconds for power-on and 20 seconds for reset of a
26743  * 		sonoma lun.
26744  *
26745  *   Arguments: scsi_pkt - The scsi_pkt being sent to a device
26746  *
26747  * Return Code: 0 - Command completed successfully with good status
26748  *             -1 - Command failed.
26749  *
26750  */
26751 
26752 static int
26753 sd_ddi_scsi_poll(struct scsi_pkt *pkt)
26754 {
26755 	int busy_count;
26756 	int timeout;
26757 	int rval = SD_FAILURE;
26758 	int savef;
26759 	uint8_t *sensep;
26760 	long savet;
26761 	void (*savec)();
26762 	/*
26763 	 * The following is defined in machdep.c and is used in determining if
26764 	 * the scsi transport system will do polled I/O instead of interrupt
26765 	 * I/O when called from xx_dump().
26766 	 */
26767 	extern int do_polled_io;
26768 
26769 	/*
26770 	 * save old flags in pkt, to restore at end
26771 	 */
26772 	savef = pkt->pkt_flags;
26773 	savec = pkt->pkt_comp;
26774 	savet = pkt->pkt_time;
26775 
26776 	pkt->pkt_flags |= FLAG_NOINTR;
26777 
26778 	/*
26779 	 * XXX there is nothing in the SCSA spec that states that we should not
26780 	 * do a callback for polled cmds; however, removing this will break sd
26781 	 * and probably other target drivers
26782 	 */
26783 	pkt->pkt_comp = NULL;
26784 
26785 	/*
26786 	 * we don't like a polled command without timeout.
26787 	 * 60 seconds seems long enough.
26788 	 */
26789 	if (pkt->pkt_time == 0) {
26790 		pkt->pkt_time = SCSI_POLL_TIMEOUT;
26791 	}
26792 
26793 	/*
26794 	 * Send polled cmd.
26795 	 *
26796 	 * We do some error recovery for various errors.  Tran_busy,
26797 	 * queue full, and non-dispatched commands are retried every 10 msec.
26798 	 * as they are typically transient failures.  Busy status and Not
26799 	 * Ready are retried every second as this status takes a while to
26800 	 * change.  Unit attention is retried for pkt_time (60) times
26801 	 * with no delay.
26802 	 */
26803 	timeout = pkt->pkt_time * SD_SEC_TO_CSEC;
26804 
26805 	for (busy_count = 0; busy_count < timeout; busy_count++) {
26806 		int rc;
26807 		int poll_delay;
26808 
26809 		/*
26810 		 * Initialize pkt status variables.
26811 		 */
26812 		*pkt->pkt_scbp = pkt->pkt_reason = pkt->pkt_state = 0;
26813 
26814 		if ((rc = scsi_transport(pkt)) != TRAN_ACCEPT) {
26815 			if (rc != TRAN_BUSY) {
26816 				/* Transport failed - give up. */
26817 				break;
26818 			} else {
26819 				/* Transport busy - try again. */
26820 				poll_delay = 1 * SD_CSEC; /* 10 msec */
26821 			}
26822 		} else {
26823 			/*
26824 			 * Transport accepted - check pkt status.
26825 			 */
26826 			rc = (*pkt->pkt_scbp) & STATUS_MASK;
26827 			if (pkt->pkt_reason == CMD_CMPLT &&
26828 			    rc == STATUS_CHECK &&
26829 			    pkt->pkt_state & STATE_ARQ_DONE) {
26830 				struct scsi_arq_status *arqstat =
26831 				    (struct scsi_arq_status *)(pkt->pkt_scbp);
26832 
26833 				sensep = (uint8_t *)&arqstat->sts_sensedata;
26834 			} else {
26835 				sensep = NULL;
26836 			}
26837 
26838 			if ((pkt->pkt_reason == CMD_CMPLT) &&
26839 			    (rc == STATUS_GOOD)) {
26840 				/* No error - we're done */
26841 				rval = SD_SUCCESS;
26842 				break;
26843 
26844 			} else if (pkt->pkt_reason == CMD_DEV_GONE) {
26845 				/* Lost connection - give up */
26846 				break;
26847 
26848 			} else if ((pkt->pkt_reason == CMD_INCOMPLETE) &&
26849 			    (pkt->pkt_state == 0)) {
26850 				/* Pkt not dispatched - try again. */
26851 				poll_delay = 1 * SD_CSEC; /* 10 msec. */
26852 
26853 			} else if ((pkt->pkt_reason == CMD_CMPLT) &&
26854 			    (rc == STATUS_QFULL)) {
26855 				/* Queue full - try again. */
26856 				poll_delay = 1 * SD_CSEC; /* 10 msec. */
26857 
26858 			} else if ((pkt->pkt_reason == CMD_CMPLT) &&
26859 			    (rc == STATUS_BUSY)) {
26860 				/* Busy - try again. */
26861 				poll_delay = 100 * SD_CSEC; /* 1 sec. */
26862 				busy_count += (SD_SEC_TO_CSEC - 1);
26863 
26864 			} else if ((sensep != NULL) &&
26865 			    (scsi_sense_key(sensep) ==
26866 				KEY_UNIT_ATTENTION)) {
26867 				/* Unit Attention - try again */
26868 				busy_count += (SD_SEC_TO_CSEC - 1); /* 1 */
26869 				continue;
26870 
26871 			} else if ((sensep != NULL) &&
26872 			    (scsi_sense_key(sensep) == KEY_NOT_READY) &&
26873 			    (scsi_sense_asc(sensep) == 0x04) &&
26874 			    (scsi_sense_ascq(sensep) == 0x01)) {
26875 				/* Not ready -> ready - try again. */
26876 				poll_delay = 100 * SD_CSEC; /* 1 sec. */
26877 				busy_count += (SD_SEC_TO_CSEC - 1);
26878 
26879 			} else {
26880 				/* BAD status - give up. */
26881 				break;
26882 			}
26883 		}
26884 
26885 		if ((curthread->t_flag & T_INTR_THREAD) == 0 &&
26886 		    !do_polled_io) {
26887 			delay(drv_usectohz(poll_delay));
26888 		} else {
26889 			/* we busy wait during cpr_dump or interrupt threads */
26890 			drv_usecwait(poll_delay);
26891 		}
26892 	}
26893 
26894 	pkt->pkt_flags = savef;
26895 	pkt->pkt_comp = savec;
26896 	pkt->pkt_time = savet;
26897 	return (rval);
26898 }
26899 
26900 
26901 /*
26902  *    Function: sd_persistent_reservation_in_read_keys
26903  *
26904  * Description: This routine is the driver entry point for handling CD-ROM
26905  *		multi-host persistent reservation requests (MHIOCGRP_INKEYS)
26906  *		by sending the SCSI-3 PRIN commands to the device.
26907  *		Processes the read keys command response by copying the
26908  *		reservation key information into the user provided buffer.
26909  *		Support for the 32/64 bit _MULTI_DATAMODEL is implemented.
26910  *
26911  *   Arguments: un   -  Pointer to soft state struct for the target.
26912  *		usrp -	user provided pointer to multihost Persistent In Read
26913  *			Keys structure (mhioc_inkeys_t)
26914  *		flag -	this argument is a pass through to ddi_copyxxx()
26915  *			directly from the mode argument of ioctl().
26916  *
26917  * Return Code: 0   - Success
26918  *		EACCES
26919  *		ENOTSUP
26920  *		errno return code from sd_send_scsi_cmd()
26921  *
26922  *     Context: Can sleep. Does not return until command is completed.
26923  */
26924 
26925 static int
26926 sd_persistent_reservation_in_read_keys(struct sd_lun *un,
26927     mhioc_inkeys_t *usrp, int flag)
26928 {
26929 #ifdef _MULTI_DATAMODEL
26930 	struct mhioc_key_list32	li32;
26931 #endif
26932 	sd_prin_readkeys_t	*in;
26933 	mhioc_inkeys_t		*ptr;
26934 	mhioc_key_list_t	li;
26935 	uchar_t			*data_bufp;
26936 	int 			data_len;
26937 	int			rval;
26938 	size_t			copysz;
26939 
26940 	if ((ptr = (mhioc_inkeys_t *)usrp) == NULL) {
26941 		return (EINVAL);
26942 	}
26943 	bzero(&li, sizeof (mhioc_key_list_t));
26944 
26945 	/*
26946 	 * Get the listsize from user
26947 	 */
26948 #ifdef _MULTI_DATAMODEL
26949 
26950 	switch (ddi_model_convert_from(flag & FMODELS)) {
26951 	case DDI_MODEL_ILP32:
26952 		copysz = sizeof (struct mhioc_key_list32);
26953 		if (ddi_copyin(ptr->li, &li32, copysz, flag)) {
26954 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
26955 			    "sd_persistent_reservation_in_read_keys: "
26956 			    "failed ddi_copyin: mhioc_key_list32_t\n");
26957 			rval = EFAULT;
26958 			goto done;
26959 		}
26960 		li.listsize = li32.listsize;
26961 		li.list = (mhioc_resv_key_t *)(uintptr_t)li32.list;
26962 		break;
26963 
26964 	case DDI_MODEL_NONE:
26965 		copysz = sizeof (mhioc_key_list_t);
26966 		if (ddi_copyin(ptr->li, &li, copysz, flag)) {
26967 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
26968 			    "sd_persistent_reservation_in_read_keys: "
26969 			    "failed ddi_copyin: mhioc_key_list_t\n");
26970 			rval = EFAULT;
26971 			goto done;
26972 		}
26973 		break;
26974 	}
26975 
26976 #else /* ! _MULTI_DATAMODEL */
26977 	copysz = sizeof (mhioc_key_list_t);
26978 	if (ddi_copyin(ptr->li, &li, copysz, flag)) {
26979 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
26980 		    "sd_persistent_reservation_in_read_keys: "
26981 		    "failed ddi_copyin: mhioc_key_list_t\n");
26982 		rval = EFAULT;
26983 		goto done;
26984 	}
26985 #endif
26986 
26987 	data_len  = li.listsize * MHIOC_RESV_KEY_SIZE;
26988 	data_len += (sizeof (sd_prin_readkeys_t) - sizeof (caddr_t));
26989 	data_bufp = kmem_zalloc(data_len, KM_SLEEP);
26990 
26991 	if ((rval = sd_send_scsi_PERSISTENT_RESERVE_IN(un, SD_READ_KEYS,
26992 	    data_len, data_bufp)) != 0) {
26993 		goto done;
26994 	}
26995 	in = (sd_prin_readkeys_t *)data_bufp;
26996 	ptr->generation = BE_32(in->generation);
26997 	li.listlen = BE_32(in->len) / MHIOC_RESV_KEY_SIZE;
26998 
26999 	/*
27000 	 * Return the min(listsize, listlen) keys
27001 	 */
27002 #ifdef _MULTI_DATAMODEL
27003 
27004 	switch (ddi_model_convert_from(flag & FMODELS)) {
27005 	case DDI_MODEL_ILP32:
27006 		li32.listlen = li.listlen;
27007 		if (ddi_copyout(&li32, ptr->li, copysz, flag)) {
27008 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
27009 			    "sd_persistent_reservation_in_read_keys: "
27010 			    "failed ddi_copyout: mhioc_key_list32_t\n");
27011 			rval = EFAULT;
27012 			goto done;
27013 		}
27014 		break;
27015 
27016 	case DDI_MODEL_NONE:
27017 		if (ddi_copyout(&li, ptr->li, copysz, flag)) {
27018 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
27019 			    "sd_persistent_reservation_in_read_keys: "
27020 			    "failed ddi_copyout: mhioc_key_list_t\n");
27021 			rval = EFAULT;
27022 			goto done;
27023 		}
27024 		break;
27025 	}
27026 
27027 #else /* ! _MULTI_DATAMODEL */
27028 
27029 	if (ddi_copyout(&li, ptr->li, copysz, flag)) {
27030 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
27031 		    "sd_persistent_reservation_in_read_keys: "
27032 		    "failed ddi_copyout: mhioc_key_list_t\n");
27033 		rval = EFAULT;
27034 		goto done;
27035 	}
27036 
27037 #endif /* _MULTI_DATAMODEL */
27038 
27039 	copysz = min(li.listlen * MHIOC_RESV_KEY_SIZE,
27040 	    li.listsize * MHIOC_RESV_KEY_SIZE);
27041 	if (ddi_copyout(&in->keylist, li.list, copysz, flag)) {
27042 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
27043 		    "sd_persistent_reservation_in_read_keys: "
27044 		    "failed ddi_copyout: keylist\n");
27045 		rval = EFAULT;
27046 	}
27047 done:
27048 	kmem_free(data_bufp, data_len);
27049 	return (rval);
27050 }
27051 
27052 
27053 /*
27054  *    Function: sd_persistent_reservation_in_read_resv
27055  *
27056  * Description: This routine is the driver entry point for handling CD-ROM
27057  *		multi-host persistent reservation requests (MHIOCGRP_INRESV)
27058  *		by sending the SCSI-3 PRIN commands to the device.
27059  *		Process the read persistent reservations command response by
27060  *		copying the reservation information into the user provided
27061  *		buffer. Support for the 32/64 _MULTI_DATAMODEL is implemented.
27062  *
27063  *   Arguments: un   -  Pointer to soft state struct for the target.
27064  *		usrp -	user provided pointer to multihost Persistent In Read
27065  *			Keys structure (mhioc_inkeys_t)
27066  *		flag -	this argument is a pass through to ddi_copyxxx()
27067  *			directly from the mode argument of ioctl().
27068  *
27069  * Return Code: 0   - Success
27070  *		EACCES
27071  *		ENOTSUP
27072  *		errno return code from sd_send_scsi_cmd()
27073  *
27074  *     Context: Can sleep. Does not return until command is completed.
27075  */
27076 
27077 static int
27078 sd_persistent_reservation_in_read_resv(struct sd_lun *un,
27079     mhioc_inresvs_t *usrp, int flag)
27080 {
27081 #ifdef _MULTI_DATAMODEL
27082 	struct mhioc_resv_desc_list32 resvlist32;
27083 #endif
27084 	sd_prin_readresv_t	*in;
27085 	mhioc_inresvs_t		*ptr;
27086 	sd_readresv_desc_t	*readresv_ptr;
27087 	mhioc_resv_desc_list_t	resvlist;
27088 	mhioc_resv_desc_t 	resvdesc;
27089 	uchar_t			*data_bufp;
27090 	int 			data_len;
27091 	int			rval;
27092 	int			i;
27093 	size_t			copysz;
27094 	mhioc_resv_desc_t	*bufp;
27095 
27096 	if ((ptr = usrp) == NULL) {
27097 		return (EINVAL);
27098 	}
27099 
27100 	/*
27101 	 * Get the listsize from user
27102 	 */
27103 #ifdef _MULTI_DATAMODEL
27104 	switch (ddi_model_convert_from(flag & FMODELS)) {
27105 	case DDI_MODEL_ILP32:
27106 		copysz = sizeof (struct mhioc_resv_desc_list32);
27107 		if (ddi_copyin(ptr->li, &resvlist32, copysz, flag)) {
27108 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
27109 			    "sd_persistent_reservation_in_read_resv: "
27110 			    "failed ddi_copyin: mhioc_resv_desc_list_t\n");
27111 			rval = EFAULT;
27112 			goto done;
27113 		}
27114 		resvlist.listsize = resvlist32.listsize;
27115 		resvlist.list = (mhioc_resv_desc_t *)(uintptr_t)resvlist32.list;
27116 		break;
27117 
27118 	case DDI_MODEL_NONE:
27119 		copysz = sizeof (mhioc_resv_desc_list_t);
27120 		if (ddi_copyin(ptr->li, &resvlist, copysz, flag)) {
27121 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
27122 			    "sd_persistent_reservation_in_read_resv: "
27123 			    "failed ddi_copyin: mhioc_resv_desc_list_t\n");
27124 			rval = EFAULT;
27125 			goto done;
27126 		}
27127 		break;
27128 	}
27129 #else /* ! _MULTI_DATAMODEL */
27130 	copysz = sizeof (mhioc_resv_desc_list_t);
27131 	if (ddi_copyin(ptr->li, &resvlist, copysz, flag)) {
27132 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
27133 		    "sd_persistent_reservation_in_read_resv: "
27134 		    "failed ddi_copyin: mhioc_resv_desc_list_t\n");
27135 		rval = EFAULT;
27136 		goto done;
27137 	}
27138 #endif /* ! _MULTI_DATAMODEL */
27139 
27140 	data_len  = resvlist.listsize * SCSI3_RESV_DESC_LEN;
27141 	data_len += (sizeof (sd_prin_readresv_t) - sizeof (caddr_t));
27142 	data_bufp = kmem_zalloc(data_len, KM_SLEEP);
27143 
27144 	if ((rval = sd_send_scsi_PERSISTENT_RESERVE_IN(un, SD_READ_RESV,
27145 	    data_len, data_bufp)) != 0) {
27146 		goto done;
27147 	}
27148 	in = (sd_prin_readresv_t *)data_bufp;
27149 	ptr->generation = BE_32(in->generation);
27150 	resvlist.listlen = BE_32(in->len) / SCSI3_RESV_DESC_LEN;
27151 
27152 	/*
27153 	 * Return the min(listsize, listlen( keys
27154 	 */
27155 #ifdef _MULTI_DATAMODEL
27156 
27157 	switch (ddi_model_convert_from(flag & FMODELS)) {
27158 	case DDI_MODEL_ILP32:
27159 		resvlist32.listlen = resvlist.listlen;
27160 		if (ddi_copyout(&resvlist32, ptr->li, copysz, flag)) {
27161 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
27162 			    "sd_persistent_reservation_in_read_resv: "
27163 			    "failed ddi_copyout: mhioc_resv_desc_list_t\n");
27164 			rval = EFAULT;
27165 			goto done;
27166 		}
27167 		break;
27168 
27169 	case DDI_MODEL_NONE:
27170 		if (ddi_copyout(&resvlist, ptr->li, copysz, flag)) {
27171 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
27172 			    "sd_persistent_reservation_in_read_resv: "
27173 			    "failed ddi_copyout: mhioc_resv_desc_list_t\n");
27174 			rval = EFAULT;
27175 			goto done;
27176 		}
27177 		break;
27178 	}
27179 
27180 #else /* ! _MULTI_DATAMODEL */
27181 
27182 	if (ddi_copyout(&resvlist, ptr->li, copysz, flag)) {
27183 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
27184 		    "sd_persistent_reservation_in_read_resv: "
27185 		    "failed ddi_copyout: mhioc_resv_desc_list_t\n");
27186 		rval = EFAULT;
27187 		goto done;
27188 	}
27189 
27190 #endif /* ! _MULTI_DATAMODEL */
27191 
27192 	readresv_ptr = (sd_readresv_desc_t *)&in->readresv_desc;
27193 	bufp = resvlist.list;
27194 	copysz = sizeof (mhioc_resv_desc_t);
27195 	for (i = 0; i < min(resvlist.listlen, resvlist.listsize);
27196 	    i++, readresv_ptr++, bufp++) {
27197 
27198 		bcopy(&readresv_ptr->resvkey, &resvdesc.key,
27199 		    MHIOC_RESV_KEY_SIZE);
27200 		resvdesc.type  = readresv_ptr->type;
27201 		resvdesc.scope = readresv_ptr->scope;
27202 		resvdesc.scope_specific_addr =
27203 		    BE_32(readresv_ptr->scope_specific_addr);
27204 
27205 		if (ddi_copyout(&resvdesc, bufp, copysz, flag)) {
27206 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
27207 			    "sd_persistent_reservation_in_read_resv: "
27208 			    "failed ddi_copyout: resvlist\n");
27209 			rval = EFAULT;
27210 			goto done;
27211 		}
27212 	}
27213 done:
27214 	kmem_free(data_bufp, data_len);
27215 	return (rval);
27216 }
27217 
27218 
27219 /*
27220  *    Function: sr_change_blkmode()
27221  *
27222  * Description: This routine is the driver entry point for handling CD-ROM
27223  *		block mode ioctl requests. Support for returning and changing
27224  *		the current block size in use by the device is implemented. The
27225  *		LBA size is changed via a MODE SELECT Block Descriptor.
27226  *
27227  *		This routine issues a mode sense with an allocation length of
27228  *		12 bytes for the mode page header and a single block descriptor.
27229  *
27230  *   Arguments: dev - the device 'dev_t'
27231  *		cmd - the request type; one of CDROMGBLKMODE (get) or
27232  *		      CDROMSBLKMODE (set)
27233  *		data - current block size or requested block size
27234  *		flag - this argument is a pass through to ddi_copyxxx() directly
27235  *		       from the mode argument of ioctl().
27236  *
27237  * Return Code: the code returned by sd_send_scsi_cmd()
27238  *		EINVAL if invalid arguments are provided
27239  *		EFAULT if ddi_copyxxx() fails
27240  *		ENXIO if fail ddi_get_soft_state
27241  *		EIO if invalid mode sense block descriptor length
27242  *
27243  */
27244 
27245 static int
27246 sr_change_blkmode(dev_t dev, int cmd, intptr_t data, int flag)
27247 {
27248 	struct sd_lun			*un = NULL;
27249 	struct mode_header		*sense_mhp, *select_mhp;
27250 	struct block_descriptor		*sense_desc, *select_desc;
27251 	int				current_bsize;
27252 	int				rval = EINVAL;
27253 	uchar_t				*sense = NULL;
27254 	uchar_t				*select = NULL;
27255 
27256 	ASSERT((cmd == CDROMGBLKMODE) || (cmd == CDROMSBLKMODE));
27257 
27258 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
27259 		return (ENXIO);
27260 	}
27261 
27262 	/*
27263 	 * The block length is changed via the Mode Select block descriptor, the
27264 	 * "Read/Write Error Recovery" mode page (0x1) contents are not actually
27265 	 * required as part of this routine. Therefore the mode sense allocation
27266 	 * length is specified to be the length of a mode page header and a
27267 	 * block descriptor.
27268 	 */
27269 	sense = kmem_zalloc(BUFLEN_CHG_BLK_MODE, KM_SLEEP);
27270 
27271 	if ((rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, sense,
27272 	    BUFLEN_CHG_BLK_MODE, MODEPAGE_ERR_RECOV, SD_PATH_STANDARD)) != 0) {
27273 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27274 		    "sr_change_blkmode: Mode Sense Failed\n");
27275 		kmem_free(sense, BUFLEN_CHG_BLK_MODE);
27276 		return (rval);
27277 	}
27278 
27279 	/* Check the block descriptor len to handle only 1 block descriptor */
27280 	sense_mhp = (struct mode_header *)sense;
27281 	if ((sense_mhp->bdesc_length == 0) ||
27282 	    (sense_mhp->bdesc_length > MODE_BLK_DESC_LENGTH)) {
27283 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27284 		    "sr_change_blkmode: Mode Sense returned invalid block"
27285 		    " descriptor length\n");
27286 		kmem_free(sense, BUFLEN_CHG_BLK_MODE);
27287 		return (EIO);
27288 	}
27289 	sense_desc = (struct block_descriptor *)(sense + MODE_HEADER_LENGTH);
27290 	current_bsize = ((sense_desc->blksize_hi << 16) |
27291 	    (sense_desc->blksize_mid << 8) | sense_desc->blksize_lo);
27292 
27293 	/* Process command */
27294 	switch (cmd) {
27295 	case CDROMGBLKMODE:
27296 		/* Return the block size obtained during the mode sense */
27297 		if (ddi_copyout(&current_bsize, (void *)data,
27298 		    sizeof (int), flag) != 0)
27299 			rval = EFAULT;
27300 		break;
27301 	case CDROMSBLKMODE:
27302 		/* Validate the requested block size */
27303 		switch (data) {
27304 		case CDROM_BLK_512:
27305 		case CDROM_BLK_1024:
27306 		case CDROM_BLK_2048:
27307 		case CDROM_BLK_2056:
27308 		case CDROM_BLK_2336:
27309 		case CDROM_BLK_2340:
27310 		case CDROM_BLK_2352:
27311 		case CDROM_BLK_2368:
27312 		case CDROM_BLK_2448:
27313 		case CDROM_BLK_2646:
27314 		case CDROM_BLK_2647:
27315 			break;
27316 		default:
27317 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27318 			    "sr_change_blkmode: "
27319 			    "Block Size '%ld' Not Supported\n", data);
27320 			kmem_free(sense, BUFLEN_CHG_BLK_MODE);
27321 			return (EINVAL);
27322 		}
27323 
27324 		/*
27325 		 * The current block size matches the requested block size so
27326 		 * there is no need to send the mode select to change the size
27327 		 */
27328 		if (current_bsize == data) {
27329 			break;
27330 		}
27331 
27332 		/* Build the select data for the requested block size */
27333 		select = kmem_zalloc(BUFLEN_CHG_BLK_MODE, KM_SLEEP);
27334 		select_mhp = (struct mode_header *)select;
27335 		select_desc =
27336 		    (struct block_descriptor *)(select + MODE_HEADER_LENGTH);
27337 		/*
27338 		 * The LBA size is changed via the block descriptor, so the
27339 		 * descriptor is built according to the user data
27340 		 */
27341 		select_mhp->bdesc_length = MODE_BLK_DESC_LENGTH;
27342 		select_desc->blksize_hi  = (char)(((data) & 0x00ff0000) >> 16);
27343 		select_desc->blksize_mid = (char)(((data) & 0x0000ff00) >> 8);
27344 		select_desc->blksize_lo  = (char)((data) & 0x000000ff);
27345 
27346 		/* Send the mode select for the requested block size */
27347 		if ((rval = sd_send_scsi_MODE_SELECT(un, CDB_GROUP0,
27348 		    select, BUFLEN_CHG_BLK_MODE, SD_DONTSAVE_PAGE,
27349 		    SD_PATH_STANDARD)) != 0) {
27350 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27351 			    "sr_change_blkmode: Mode Select Failed\n");
27352 			/*
27353 			 * The mode select failed for the requested block size,
27354 			 * so reset the data for the original block size and
27355 			 * send it to the target. The error is indicated by the
27356 			 * return value for the failed mode select.
27357 			 */
27358 			select_desc->blksize_hi  = sense_desc->blksize_hi;
27359 			select_desc->blksize_mid = sense_desc->blksize_mid;
27360 			select_desc->blksize_lo  = sense_desc->blksize_lo;
27361 			(void) sd_send_scsi_MODE_SELECT(un, CDB_GROUP0,
27362 			    select, BUFLEN_CHG_BLK_MODE, SD_DONTSAVE_PAGE,
27363 			    SD_PATH_STANDARD);
27364 		} else {
27365 			ASSERT(!mutex_owned(SD_MUTEX(un)));
27366 			mutex_enter(SD_MUTEX(un));
27367 			sd_update_block_info(un, (uint32_t)data, 0);
27368 
27369 			mutex_exit(SD_MUTEX(un));
27370 		}
27371 		break;
27372 	default:
27373 		/* should not reach here, but check anyway */
27374 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27375 		    "sr_change_blkmode: Command '%x' Not Supported\n", cmd);
27376 		rval = EINVAL;
27377 		break;
27378 	}
27379 
27380 	if (select) {
27381 		kmem_free(select, BUFLEN_CHG_BLK_MODE);
27382 	}
27383 	if (sense) {
27384 		kmem_free(sense, BUFLEN_CHG_BLK_MODE);
27385 	}
27386 	return (rval);
27387 }
27388 
27389 
27390 /*
27391  * Note: The following sr_change_speed() and sr_atapi_change_speed() routines
27392  * implement driver support for getting and setting the CD speed. The command
27393  * set used will be based on the device type. If the device has not been
27394  * identified as MMC the Toshiba vendor specific mode page will be used. If
27395  * the device is MMC but does not support the Real Time Streaming feature
27396  * the SET CD SPEED command will be used to set speed and mode page 0x2A will
27397  * be used to read the speed.
27398  */
27399 
27400 /*
27401  *    Function: sr_change_speed()
27402  *
27403  * Description: This routine is the driver entry point for handling CD-ROM
27404  *		drive speed ioctl requests for devices supporting the Toshiba
27405  *		vendor specific drive speed mode page. Support for returning
27406  *		and changing the current drive speed in use by the device is
27407  *		implemented.
27408  *
27409  *   Arguments: dev - the device 'dev_t'
27410  *		cmd - the request type; one of CDROMGDRVSPEED (get) or
27411  *		      CDROMSDRVSPEED (set)
27412  *		data - current drive speed or requested drive speed
27413  *		flag - this argument is a pass through to ddi_copyxxx() directly
27414  *		       from the mode argument of ioctl().
27415  *
27416  * Return Code: the code returned by sd_send_scsi_cmd()
27417  *		EINVAL if invalid arguments are provided
27418  *		EFAULT if ddi_copyxxx() fails
27419  *		ENXIO if fail ddi_get_soft_state
27420  *		EIO if invalid mode sense block descriptor length
27421  */
27422 
27423 static int
27424 sr_change_speed(dev_t dev, int cmd, intptr_t data, int flag)
27425 {
27426 	struct sd_lun			*un = NULL;
27427 	struct mode_header		*sense_mhp, *select_mhp;
27428 	struct mode_speed		*sense_page, *select_page;
27429 	int				current_speed;
27430 	int				rval = EINVAL;
27431 	int				bd_len;
27432 	uchar_t				*sense = NULL;
27433 	uchar_t				*select = NULL;
27434 
27435 	ASSERT((cmd == CDROMGDRVSPEED) || (cmd == CDROMSDRVSPEED));
27436 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
27437 		return (ENXIO);
27438 	}
27439 
27440 	/*
27441 	 * Note: The drive speed is being modified here according to a Toshiba
27442 	 * vendor specific mode page (0x31).
27443 	 */
27444 	sense = kmem_zalloc(BUFLEN_MODE_CDROM_SPEED, KM_SLEEP);
27445 
27446 	if ((rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, sense,
27447 	    BUFLEN_MODE_CDROM_SPEED, CDROM_MODE_SPEED,
27448 		SD_PATH_STANDARD)) != 0) {
27449 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27450 		    "sr_change_speed: Mode Sense Failed\n");
27451 		kmem_free(sense, BUFLEN_MODE_CDROM_SPEED);
27452 		return (rval);
27453 	}
27454 	sense_mhp  = (struct mode_header *)sense;
27455 
27456 	/* Check the block descriptor len to handle only 1 block descriptor */
27457 	bd_len = sense_mhp->bdesc_length;
27458 	if (bd_len > MODE_BLK_DESC_LENGTH) {
27459 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27460 		    "sr_change_speed: Mode Sense returned invalid block "
27461 		    "descriptor length\n");
27462 		kmem_free(sense, BUFLEN_MODE_CDROM_SPEED);
27463 		return (EIO);
27464 	}
27465 
27466 	sense_page = (struct mode_speed *)
27467 	    (sense + MODE_HEADER_LENGTH + sense_mhp->bdesc_length);
27468 	current_speed = sense_page->speed;
27469 
27470 	/* Process command */
27471 	switch (cmd) {
27472 	case CDROMGDRVSPEED:
27473 		/* Return the drive speed obtained during the mode sense */
27474 		if (current_speed == 0x2) {
27475 			current_speed = CDROM_TWELVE_SPEED;
27476 		}
27477 		if (ddi_copyout(&current_speed, (void *)data,
27478 		    sizeof (int), flag) != 0) {
27479 			rval = EFAULT;
27480 		}
27481 		break;
27482 	case CDROMSDRVSPEED:
27483 		/* Validate the requested drive speed */
27484 		switch ((uchar_t)data) {
27485 		case CDROM_TWELVE_SPEED:
27486 			data = 0x2;
27487 			/*FALLTHROUGH*/
27488 		case CDROM_NORMAL_SPEED:
27489 		case CDROM_DOUBLE_SPEED:
27490 		case CDROM_QUAD_SPEED:
27491 		case CDROM_MAXIMUM_SPEED:
27492 			break;
27493 		default:
27494 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27495 			    "sr_change_speed: "
27496 			    "Drive Speed '%d' Not Supported\n", (uchar_t)data);
27497 			kmem_free(sense, BUFLEN_MODE_CDROM_SPEED);
27498 			return (EINVAL);
27499 		}
27500 
27501 		/*
27502 		 * The current drive speed matches the requested drive speed so
27503 		 * there is no need to send the mode select to change the speed
27504 		 */
27505 		if (current_speed == data) {
27506 			break;
27507 		}
27508 
27509 		/* Build the select data for the requested drive speed */
27510 		select = kmem_zalloc(BUFLEN_MODE_CDROM_SPEED, KM_SLEEP);
27511 		select_mhp = (struct mode_header *)select;
27512 		select_mhp->bdesc_length = 0;
27513 		select_page =
27514 		    (struct mode_speed *)(select + MODE_HEADER_LENGTH);
27515 		select_page =
27516 		    (struct mode_speed *)(select + MODE_HEADER_LENGTH);
27517 		select_page->mode_page.code = CDROM_MODE_SPEED;
27518 		select_page->mode_page.length = 2;
27519 		select_page->speed = (uchar_t)data;
27520 
27521 		/* Send the mode select for the requested block size */
27522 		if ((rval = sd_send_scsi_MODE_SELECT(un, CDB_GROUP0, select,
27523 		    MODEPAGE_CDROM_SPEED_LEN + MODE_HEADER_LENGTH,
27524 		    SD_DONTSAVE_PAGE, SD_PATH_STANDARD)) != 0) {
27525 			/*
27526 			 * The mode select failed for the requested drive speed,
27527 			 * so reset the data for the original drive speed and
27528 			 * send it to the target. The error is indicated by the
27529 			 * return value for the failed mode select.
27530 			 */
27531 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27532 			    "sr_drive_speed: Mode Select Failed\n");
27533 			select_page->speed = sense_page->speed;
27534 			(void) sd_send_scsi_MODE_SELECT(un, CDB_GROUP0, select,
27535 			    MODEPAGE_CDROM_SPEED_LEN + MODE_HEADER_LENGTH,
27536 			    SD_DONTSAVE_PAGE, SD_PATH_STANDARD);
27537 		}
27538 		break;
27539 	default:
27540 		/* should not reach here, but check anyway */
27541 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27542 		    "sr_change_speed: Command '%x' Not Supported\n", cmd);
27543 		rval = EINVAL;
27544 		break;
27545 	}
27546 
27547 	if (select) {
27548 		kmem_free(select, BUFLEN_MODE_CDROM_SPEED);
27549 	}
27550 	if (sense) {
27551 		kmem_free(sense, BUFLEN_MODE_CDROM_SPEED);
27552 	}
27553 
27554 	return (rval);
27555 }
27556 
27557 
27558 /*
27559  *    Function: sr_atapi_change_speed()
27560  *
27561  * Description: This routine is the driver entry point for handling CD-ROM
27562  *		drive speed ioctl requests for MMC devices that do not support
27563  *		the Real Time Streaming feature (0x107).
27564  *
27565  *		Note: This routine will use the SET SPEED command which may not
27566  *		be supported by all devices.
27567  *
27568  *   Arguments: dev- the device 'dev_t'
27569  *		cmd- the request type; one of CDROMGDRVSPEED (get) or
27570  *		     CDROMSDRVSPEED (set)
27571  *		data- current drive speed or requested drive speed
27572  *		flag- this argument is a pass through to ddi_copyxxx() directly
27573  *		      from the mode argument of ioctl().
27574  *
27575  * Return Code: the code returned by sd_send_scsi_cmd()
27576  *		EINVAL if invalid arguments are provided
27577  *		EFAULT if ddi_copyxxx() fails
27578  *		ENXIO if fail ddi_get_soft_state
27579  *		EIO if invalid mode sense block descriptor length
27580  */
27581 
27582 static int
27583 sr_atapi_change_speed(dev_t dev, int cmd, intptr_t data, int flag)
27584 {
27585 	struct sd_lun			*un;
27586 	struct uscsi_cmd		*com = NULL;
27587 	struct mode_header_grp2		*sense_mhp;
27588 	uchar_t				*sense_page;
27589 	uchar_t				*sense = NULL;
27590 	char				cdb[CDB_GROUP5];
27591 	int				bd_len;
27592 	int				current_speed = 0;
27593 	int				max_speed = 0;
27594 	int				rval;
27595 
27596 	ASSERT((cmd == CDROMGDRVSPEED) || (cmd == CDROMSDRVSPEED));
27597 
27598 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
27599 		return (ENXIO);
27600 	}
27601 
27602 	sense = kmem_zalloc(BUFLEN_MODE_CDROM_CAP, KM_SLEEP);
27603 
27604 	if ((rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP1, sense,
27605 	    BUFLEN_MODE_CDROM_CAP, MODEPAGE_CDROM_CAP,
27606 	    SD_PATH_STANDARD)) != 0) {
27607 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27608 		    "sr_atapi_change_speed: Mode Sense Failed\n");
27609 		kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
27610 		return (rval);
27611 	}
27612 
27613 	/* Check the block descriptor len to handle only 1 block descriptor */
27614 	sense_mhp = (struct mode_header_grp2 *)sense;
27615 	bd_len = (sense_mhp->bdesc_length_hi << 8) | sense_mhp->bdesc_length_lo;
27616 	if (bd_len > MODE_BLK_DESC_LENGTH) {
27617 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27618 		    "sr_atapi_change_speed: Mode Sense returned invalid "
27619 		    "block descriptor length\n");
27620 		kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
27621 		return (EIO);
27622 	}
27623 
27624 	/* Calculate the current and maximum drive speeds */
27625 	sense_page = (uchar_t *)(sense + MODE_HEADER_LENGTH_GRP2 + bd_len);
27626 	current_speed = (sense_page[14] << 8) | sense_page[15];
27627 	max_speed = (sense_page[8] << 8) | sense_page[9];
27628 
27629 	/* Process the command */
27630 	switch (cmd) {
27631 	case CDROMGDRVSPEED:
27632 		current_speed /= SD_SPEED_1X;
27633 		if (ddi_copyout(&current_speed, (void *)data,
27634 		    sizeof (int), flag) != 0)
27635 			rval = EFAULT;
27636 		break;
27637 	case CDROMSDRVSPEED:
27638 		/* Convert the speed code to KB/sec */
27639 		switch ((uchar_t)data) {
27640 		case CDROM_NORMAL_SPEED:
27641 			current_speed = SD_SPEED_1X;
27642 			break;
27643 		case CDROM_DOUBLE_SPEED:
27644 			current_speed = 2 * SD_SPEED_1X;
27645 			break;
27646 		case CDROM_QUAD_SPEED:
27647 			current_speed = 4 * SD_SPEED_1X;
27648 			break;
27649 		case CDROM_TWELVE_SPEED:
27650 			current_speed = 12 * SD_SPEED_1X;
27651 			break;
27652 		case CDROM_MAXIMUM_SPEED:
27653 			current_speed = 0xffff;
27654 			break;
27655 		default:
27656 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27657 			    "sr_atapi_change_speed: invalid drive speed %d\n",
27658 			    (uchar_t)data);
27659 			kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
27660 			return (EINVAL);
27661 		}
27662 
27663 		/* Check the request against the drive's max speed. */
27664 		if (current_speed != 0xffff) {
27665 			if (current_speed > max_speed) {
27666 				kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
27667 				return (EINVAL);
27668 			}
27669 		}
27670 
27671 		/*
27672 		 * Build and send the SET SPEED command
27673 		 *
27674 		 * Note: The SET SPEED (0xBB) command used in this routine is
27675 		 * obsolete per the SCSI MMC spec but still supported in the
27676 		 * MT FUJI vendor spec. Most equipment is adhereing to MT FUJI
27677 		 * therefore the command is still implemented in this routine.
27678 		 */
27679 		bzero(cdb, sizeof (cdb));
27680 		cdb[0] = (char)SCMD_SET_CDROM_SPEED;
27681 		cdb[2] = (uchar_t)(current_speed >> 8);
27682 		cdb[3] = (uchar_t)current_speed;
27683 		com = kmem_zalloc(sizeof (*com), KM_SLEEP);
27684 		com->uscsi_cdb	   = (caddr_t)cdb;
27685 		com->uscsi_cdblen  = CDB_GROUP5;
27686 		com->uscsi_bufaddr = NULL;
27687 		com->uscsi_buflen  = 0;
27688 		com->uscsi_flags   = USCSI_DIAGNOSE|USCSI_SILENT;
27689 		rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, 0,
27690 		    UIO_SYSSPACE, SD_PATH_STANDARD);
27691 		break;
27692 	default:
27693 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27694 		    "sr_atapi_change_speed: Command '%x' Not Supported\n", cmd);
27695 		rval = EINVAL;
27696 	}
27697 
27698 	if (sense) {
27699 		kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
27700 	}
27701 	if (com) {
27702 		kmem_free(com, sizeof (*com));
27703 	}
27704 	return (rval);
27705 }
27706 
27707 
27708 /*
27709  *    Function: sr_pause_resume()
27710  *
27711  * Description: This routine is the driver entry point for handling CD-ROM
27712  *		pause/resume ioctl requests. This only affects the audio play
27713  *		operation.
27714  *
27715  *   Arguments: dev - the device 'dev_t'
27716  *		cmd - the request type; one of CDROMPAUSE or CDROMRESUME, used
27717  *		      for setting the resume bit of the cdb.
27718  *
27719  * Return Code: the code returned by sd_send_scsi_cmd()
27720  *		EINVAL if invalid mode specified
27721  *
27722  */
27723 
27724 static int
27725 sr_pause_resume(dev_t dev, int cmd)
27726 {
27727 	struct sd_lun		*un;
27728 	struct uscsi_cmd	*com;
27729 	char			cdb[CDB_GROUP1];
27730 	int			rval;
27731 
27732 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
27733 		return (ENXIO);
27734 	}
27735 
27736 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
27737 	bzero(cdb, CDB_GROUP1);
27738 	cdb[0] = SCMD_PAUSE_RESUME;
27739 	switch (cmd) {
27740 	case CDROMRESUME:
27741 		cdb[8] = 1;
27742 		break;
27743 	case CDROMPAUSE:
27744 		cdb[8] = 0;
27745 		break;
27746 	default:
27747 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN, "sr_pause_resume:"
27748 		    " Command '%x' Not Supported\n", cmd);
27749 		rval = EINVAL;
27750 		goto done;
27751 	}
27752 
27753 	com->uscsi_cdb    = cdb;
27754 	com->uscsi_cdblen = CDB_GROUP1;
27755 	com->uscsi_flags  = USCSI_DIAGNOSE|USCSI_SILENT;
27756 
27757 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
27758 	    UIO_SYSSPACE, SD_PATH_STANDARD);
27759 
27760 done:
27761 	kmem_free(com, sizeof (*com));
27762 	return (rval);
27763 }
27764 
27765 
27766 /*
27767  *    Function: sr_play_msf()
27768  *
27769  * Description: This routine is the driver entry point for handling CD-ROM
27770  *		ioctl requests to output the audio signals at the specified
27771  *		starting address and continue the audio play until the specified
27772  *		ending address (CDROMPLAYMSF) The address is in Minute Second
27773  *		Frame (MSF) format.
27774  *
27775  *   Arguments: dev	- the device 'dev_t'
27776  *		data	- pointer to user provided audio msf structure,
27777  *		          specifying start/end addresses.
27778  *		flag	- this argument is a pass through to ddi_copyxxx()
27779  *		          directly from the mode argument of ioctl().
27780  *
27781  * Return Code: the code returned by sd_send_scsi_cmd()
27782  *		EFAULT if ddi_copyxxx() fails
27783  *		ENXIO if fail ddi_get_soft_state
27784  *		EINVAL if data pointer is NULL
27785  */
27786 
27787 static int
27788 sr_play_msf(dev_t dev, caddr_t data, int flag)
27789 {
27790 	struct sd_lun		*un;
27791 	struct uscsi_cmd	*com;
27792 	struct cdrom_msf	msf_struct;
27793 	struct cdrom_msf	*msf = &msf_struct;
27794 	char			cdb[CDB_GROUP1];
27795 	int			rval;
27796 
27797 	if (data == NULL) {
27798 		return (EINVAL);
27799 	}
27800 
27801 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
27802 		return (ENXIO);
27803 	}
27804 
27805 	if (ddi_copyin(data, msf, sizeof (struct cdrom_msf), flag)) {
27806 		return (EFAULT);
27807 	}
27808 
27809 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
27810 	bzero(cdb, CDB_GROUP1);
27811 	cdb[0] = SCMD_PLAYAUDIO_MSF;
27812 	if (un->un_f_cfg_playmsf_bcd == TRUE) {
27813 		cdb[3] = BYTE_TO_BCD(msf->cdmsf_min0);
27814 		cdb[4] = BYTE_TO_BCD(msf->cdmsf_sec0);
27815 		cdb[5] = BYTE_TO_BCD(msf->cdmsf_frame0);
27816 		cdb[6] = BYTE_TO_BCD(msf->cdmsf_min1);
27817 		cdb[7] = BYTE_TO_BCD(msf->cdmsf_sec1);
27818 		cdb[8] = BYTE_TO_BCD(msf->cdmsf_frame1);
27819 	} else {
27820 		cdb[3] = msf->cdmsf_min0;
27821 		cdb[4] = msf->cdmsf_sec0;
27822 		cdb[5] = msf->cdmsf_frame0;
27823 		cdb[6] = msf->cdmsf_min1;
27824 		cdb[7] = msf->cdmsf_sec1;
27825 		cdb[8] = msf->cdmsf_frame1;
27826 	}
27827 	com->uscsi_cdb    = cdb;
27828 	com->uscsi_cdblen = CDB_GROUP1;
27829 	com->uscsi_flags  = USCSI_DIAGNOSE|USCSI_SILENT;
27830 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
27831 	    UIO_SYSSPACE, SD_PATH_STANDARD);
27832 	kmem_free(com, sizeof (*com));
27833 	return (rval);
27834 }
27835 
27836 
27837 /*
27838  *    Function: sr_play_trkind()
27839  *
27840  * Description: This routine is the driver entry point for handling CD-ROM
27841  *		ioctl requests to output the audio signals at the specified
27842  *		starting address and continue the audio play until the specified
27843  *		ending address (CDROMPLAYTRKIND). The address is in Track Index
27844  *		format.
27845  *
27846  *   Arguments: dev	- the device 'dev_t'
27847  *		data	- pointer to user provided audio track/index structure,
27848  *		          specifying start/end addresses.
27849  *		flag	- this argument is a pass through to ddi_copyxxx()
27850  *		          directly from the mode argument of ioctl().
27851  *
27852  * Return Code: the code returned by sd_send_scsi_cmd()
27853  *		EFAULT if ddi_copyxxx() fails
27854  *		ENXIO if fail ddi_get_soft_state
27855  *		EINVAL if data pointer is NULL
27856  */
27857 
27858 static int
27859 sr_play_trkind(dev_t dev, caddr_t data, int flag)
27860 {
27861 	struct cdrom_ti		ti_struct;
27862 	struct cdrom_ti		*ti = &ti_struct;
27863 	struct uscsi_cmd	*com = NULL;
27864 	char			cdb[CDB_GROUP1];
27865 	int			rval;
27866 
27867 	if (data == NULL) {
27868 		return (EINVAL);
27869 	}
27870 
27871 	if (ddi_copyin(data, ti, sizeof (struct cdrom_ti), flag)) {
27872 		return (EFAULT);
27873 	}
27874 
27875 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
27876 	bzero(cdb, CDB_GROUP1);
27877 	cdb[0] = SCMD_PLAYAUDIO_TI;
27878 	cdb[4] = ti->cdti_trk0;
27879 	cdb[5] = ti->cdti_ind0;
27880 	cdb[7] = ti->cdti_trk1;
27881 	cdb[8] = ti->cdti_ind1;
27882 	com->uscsi_cdb    = cdb;
27883 	com->uscsi_cdblen = CDB_GROUP1;
27884 	com->uscsi_flags  = USCSI_DIAGNOSE|USCSI_SILENT;
27885 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
27886 	    UIO_SYSSPACE, SD_PATH_STANDARD);
27887 	kmem_free(com, sizeof (*com));
27888 	return (rval);
27889 }
27890 
27891 
27892 /*
27893  *    Function: sr_read_all_subcodes()
27894  *
27895  * Description: This routine is the driver entry point for handling CD-ROM
27896  *		ioctl requests to return raw subcode data while the target is
27897  *		playing audio (CDROMSUBCODE).
27898  *
27899  *   Arguments: dev	- the device 'dev_t'
27900  *		data	- pointer to user provided cdrom subcode structure,
27901  *		          specifying the transfer length and address.
27902  *		flag	- this argument is a pass through to ddi_copyxxx()
27903  *		          directly from the mode argument of ioctl().
27904  *
27905  * Return Code: the code returned by sd_send_scsi_cmd()
27906  *		EFAULT if ddi_copyxxx() fails
27907  *		ENXIO if fail ddi_get_soft_state
27908  *		EINVAL if data pointer is NULL
27909  */
27910 
27911 static int
27912 sr_read_all_subcodes(dev_t dev, caddr_t data, int flag)
27913 {
27914 	struct sd_lun		*un = NULL;
27915 	struct uscsi_cmd	*com = NULL;
27916 	struct cdrom_subcode	*subcode = NULL;
27917 	int			rval;
27918 	size_t			buflen;
27919 	char			cdb[CDB_GROUP5];
27920 
27921 #ifdef _MULTI_DATAMODEL
27922 	/* To support ILP32 applications in an LP64 world */
27923 	struct cdrom_subcode32		cdrom_subcode32;
27924 	struct cdrom_subcode32		*cdsc32 = &cdrom_subcode32;
27925 #endif
27926 	if (data == NULL) {
27927 		return (EINVAL);
27928 	}
27929 
27930 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
27931 		return (ENXIO);
27932 	}
27933 
27934 	subcode = kmem_zalloc(sizeof (struct cdrom_subcode), KM_SLEEP);
27935 
27936 #ifdef _MULTI_DATAMODEL
27937 	switch (ddi_model_convert_from(flag & FMODELS)) {
27938 	case DDI_MODEL_ILP32:
27939 		if (ddi_copyin(data, cdsc32, sizeof (*cdsc32), flag)) {
27940 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27941 			    "sr_read_all_subcodes: ddi_copyin Failed\n");
27942 			kmem_free(subcode, sizeof (struct cdrom_subcode));
27943 			return (EFAULT);
27944 		}
27945 		/* Convert the ILP32 uscsi data from the application to LP64 */
27946 		cdrom_subcode32tocdrom_subcode(cdsc32, subcode);
27947 		break;
27948 	case DDI_MODEL_NONE:
27949 		if (ddi_copyin(data, subcode,
27950 		    sizeof (struct cdrom_subcode), flag)) {
27951 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27952 			    "sr_read_all_subcodes: ddi_copyin Failed\n");
27953 			kmem_free(subcode, sizeof (struct cdrom_subcode));
27954 			return (EFAULT);
27955 		}
27956 		break;
27957 	}
27958 #else /* ! _MULTI_DATAMODEL */
27959 	if (ddi_copyin(data, subcode, sizeof (struct cdrom_subcode), flag)) {
27960 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27961 		    "sr_read_all_subcodes: ddi_copyin Failed\n");
27962 		kmem_free(subcode, sizeof (struct cdrom_subcode));
27963 		return (EFAULT);
27964 	}
27965 #endif /* _MULTI_DATAMODEL */
27966 
27967 	/*
27968 	 * Since MMC-2 expects max 3 bytes for length, check if the
27969 	 * length input is greater than 3 bytes
27970 	 */
27971 	if ((subcode->cdsc_length & 0xFF000000) != 0) {
27972 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27973 		    "sr_read_all_subcodes: "
27974 		    "cdrom transfer length too large: %d (limit %d)\n",
27975 		    subcode->cdsc_length, 0xFFFFFF);
27976 		kmem_free(subcode, sizeof (struct cdrom_subcode));
27977 		return (EINVAL);
27978 	}
27979 
27980 	buflen = CDROM_BLK_SUBCODE * subcode->cdsc_length;
27981 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
27982 	bzero(cdb, CDB_GROUP5);
27983 
27984 	if (un->un_f_mmc_cap == TRUE) {
27985 		cdb[0] = (char)SCMD_READ_CD;
27986 		cdb[2] = (char)0xff;
27987 		cdb[3] = (char)0xff;
27988 		cdb[4] = (char)0xff;
27989 		cdb[5] = (char)0xff;
27990 		cdb[6] = (((subcode->cdsc_length) & 0x00ff0000) >> 16);
27991 		cdb[7] = (((subcode->cdsc_length) & 0x0000ff00) >> 8);
27992 		cdb[8] = ((subcode->cdsc_length) & 0x000000ff);
27993 		cdb[10] = 1;
27994 	} else {
27995 		/*
27996 		 * Note: A vendor specific command (0xDF) is being used her to
27997 		 * request a read of all subcodes.
27998 		 */
27999 		cdb[0] = (char)SCMD_READ_ALL_SUBCODES;
28000 		cdb[6] = (((subcode->cdsc_length) & 0xff000000) >> 24);
28001 		cdb[7] = (((subcode->cdsc_length) & 0x00ff0000) >> 16);
28002 		cdb[8] = (((subcode->cdsc_length) & 0x0000ff00) >> 8);
28003 		cdb[9] = ((subcode->cdsc_length) & 0x000000ff);
28004 	}
28005 	com->uscsi_cdb	   = cdb;
28006 	com->uscsi_cdblen  = CDB_GROUP5;
28007 	com->uscsi_bufaddr = (caddr_t)subcode->cdsc_addr;
28008 	com->uscsi_buflen  = buflen;
28009 	com->uscsi_flags   = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
28010 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_USERSPACE,
28011 	    UIO_SYSSPACE, SD_PATH_STANDARD);
28012 	kmem_free(subcode, sizeof (struct cdrom_subcode));
28013 	kmem_free(com, sizeof (*com));
28014 	return (rval);
28015 }
28016 
28017 
28018 /*
28019  *    Function: sr_read_subchannel()
28020  *
28021  * Description: This routine is the driver entry point for handling CD-ROM
28022  *		ioctl requests to return the Q sub-channel data of the CD
28023  *		current position block. (CDROMSUBCHNL) The data includes the
28024  *		track number, index number, absolute CD-ROM address (LBA or MSF
28025  *		format per the user) , track relative CD-ROM address (LBA or MSF
28026  *		format per the user), control data and audio status.
28027  *
28028  *   Arguments: dev	- the device 'dev_t'
28029  *		data	- pointer to user provided cdrom sub-channel structure
28030  *		flag	- this argument is a pass through to ddi_copyxxx()
28031  *		          directly from the mode argument of ioctl().
28032  *
28033  * Return Code: the code returned by sd_send_scsi_cmd()
28034  *		EFAULT if ddi_copyxxx() fails
28035  *		ENXIO if fail ddi_get_soft_state
28036  *		EINVAL if data pointer is NULL
28037  */
28038 
28039 static int
28040 sr_read_subchannel(dev_t dev, caddr_t data, int flag)
28041 {
28042 	struct sd_lun		*un;
28043 	struct uscsi_cmd	*com;
28044 	struct cdrom_subchnl	subchanel;
28045 	struct cdrom_subchnl	*subchnl = &subchanel;
28046 	char			cdb[CDB_GROUP1];
28047 	caddr_t			buffer;
28048 	int			rval;
28049 
28050 	if (data == NULL) {
28051 		return (EINVAL);
28052 	}
28053 
28054 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
28055 	    (un->un_state == SD_STATE_OFFLINE)) {
28056 		return (ENXIO);
28057 	}
28058 
28059 	if (ddi_copyin(data, subchnl, sizeof (struct cdrom_subchnl), flag)) {
28060 		return (EFAULT);
28061 	}
28062 
28063 	buffer = kmem_zalloc((size_t)16, KM_SLEEP);
28064 	bzero(cdb, CDB_GROUP1);
28065 	cdb[0] = SCMD_READ_SUBCHANNEL;
28066 	/* Set the MSF bit based on the user requested address format */
28067 	cdb[1] = (subchnl->cdsc_format & CDROM_LBA) ? 0 : 0x02;
28068 	/*
28069 	 * Set the Q bit in byte 2 to indicate that Q sub-channel data be
28070 	 * returned
28071 	 */
28072 	cdb[2] = 0x40;
28073 	/*
28074 	 * Set byte 3 to specify the return data format. A value of 0x01
28075 	 * indicates that the CD-ROM current position should be returned.
28076 	 */
28077 	cdb[3] = 0x01;
28078 	cdb[8] = 0x10;
28079 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
28080 	com->uscsi_cdb	   = cdb;
28081 	com->uscsi_cdblen  = CDB_GROUP1;
28082 	com->uscsi_bufaddr = buffer;
28083 	com->uscsi_buflen  = 16;
28084 	com->uscsi_flags   = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
28085 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
28086 	    UIO_SYSSPACE, SD_PATH_STANDARD);
28087 	if (rval != 0) {
28088 		kmem_free(buffer, 16);
28089 		kmem_free(com, sizeof (*com));
28090 		return (rval);
28091 	}
28092 
28093 	/* Process the returned Q sub-channel data */
28094 	subchnl->cdsc_audiostatus = buffer[1];
28095 	subchnl->cdsc_adr	= (buffer[5] & 0xF0);
28096 	subchnl->cdsc_ctrl	= (buffer[5] & 0x0F);
28097 	subchnl->cdsc_trk	= buffer[6];
28098 	subchnl->cdsc_ind	= buffer[7];
28099 	if (subchnl->cdsc_format & CDROM_LBA) {
28100 		subchnl->cdsc_absaddr.lba =
28101 		    ((uchar_t)buffer[8] << 24) + ((uchar_t)buffer[9] << 16) +
28102 		    ((uchar_t)buffer[10] << 8) + ((uchar_t)buffer[11]);
28103 		subchnl->cdsc_reladdr.lba =
28104 		    ((uchar_t)buffer[12] << 24) + ((uchar_t)buffer[13] << 16) +
28105 		    ((uchar_t)buffer[14] << 8) + ((uchar_t)buffer[15]);
28106 	} else if (un->un_f_cfg_readsub_bcd == TRUE) {
28107 		subchnl->cdsc_absaddr.msf.minute = BCD_TO_BYTE(buffer[9]);
28108 		subchnl->cdsc_absaddr.msf.second = BCD_TO_BYTE(buffer[10]);
28109 		subchnl->cdsc_absaddr.msf.frame  = BCD_TO_BYTE(buffer[11]);
28110 		subchnl->cdsc_reladdr.msf.minute = BCD_TO_BYTE(buffer[13]);
28111 		subchnl->cdsc_reladdr.msf.second = BCD_TO_BYTE(buffer[14]);
28112 		subchnl->cdsc_reladdr.msf.frame  = BCD_TO_BYTE(buffer[15]);
28113 	} else {
28114 		subchnl->cdsc_absaddr.msf.minute = buffer[9];
28115 		subchnl->cdsc_absaddr.msf.second = buffer[10];
28116 		subchnl->cdsc_absaddr.msf.frame  = buffer[11];
28117 		subchnl->cdsc_reladdr.msf.minute = buffer[13];
28118 		subchnl->cdsc_reladdr.msf.second = buffer[14];
28119 		subchnl->cdsc_reladdr.msf.frame  = buffer[15];
28120 	}
28121 	kmem_free(buffer, 16);
28122 	kmem_free(com, sizeof (*com));
28123 	if (ddi_copyout(subchnl, data, sizeof (struct cdrom_subchnl), flag)
28124 	    != 0) {
28125 		return (EFAULT);
28126 	}
28127 	return (rval);
28128 }
28129 
28130 
28131 /*
28132  *    Function: sr_read_tocentry()
28133  *
28134  * Description: This routine is the driver entry point for handling CD-ROM
28135  *		ioctl requests to read from the Table of Contents (TOC)
28136  *		(CDROMREADTOCENTRY). This routine provides the ADR and CTRL
28137  *		fields, the starting address (LBA or MSF format per the user)
28138  *		and the data mode if the user specified track is a data track.
28139  *
28140  *		Note: The READ HEADER (0x44) command used in this routine is
28141  *		obsolete per the SCSI MMC spec but still supported in the
28142  *		MT FUJI vendor spec. Most equipment is adhereing to MT FUJI
28143  *		therefore the command is still implemented in this routine.
28144  *
28145  *   Arguments: dev	- the device 'dev_t'
28146  *		data	- pointer to user provided toc entry structure,
28147  *			  specifying the track # and the address format
28148  *			  (LBA or MSF).
28149  *		flag	- this argument is a pass through to ddi_copyxxx()
28150  *		          directly from the mode argument of ioctl().
28151  *
28152  * Return Code: the code returned by sd_send_scsi_cmd()
28153  *		EFAULT if ddi_copyxxx() fails
28154  *		ENXIO if fail ddi_get_soft_state
28155  *		EINVAL if data pointer is NULL
28156  */
28157 
28158 static int
28159 sr_read_tocentry(dev_t dev, caddr_t data, int flag)
28160 {
28161 	struct sd_lun		*un = NULL;
28162 	struct uscsi_cmd	*com;
28163 	struct cdrom_tocentry	toc_entry;
28164 	struct cdrom_tocentry	*entry = &toc_entry;
28165 	caddr_t			buffer;
28166 	int			rval;
28167 	char			cdb[CDB_GROUP1];
28168 
28169 	if (data == NULL) {
28170 		return (EINVAL);
28171 	}
28172 
28173 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
28174 	    (un->un_state == SD_STATE_OFFLINE)) {
28175 		return (ENXIO);
28176 	}
28177 
28178 	if (ddi_copyin(data, entry, sizeof (struct cdrom_tocentry), flag)) {
28179 		return (EFAULT);
28180 	}
28181 
28182 	/* Validate the requested track and address format */
28183 	if (!(entry->cdte_format & (CDROM_LBA | CDROM_MSF))) {
28184 		return (EINVAL);
28185 	}
28186 
28187 	if (entry->cdte_track == 0) {
28188 		return (EINVAL);
28189 	}
28190 
28191 	buffer = kmem_zalloc((size_t)12, KM_SLEEP);
28192 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
28193 	bzero(cdb, CDB_GROUP1);
28194 
28195 	cdb[0] = SCMD_READ_TOC;
28196 	/* Set the MSF bit based on the user requested address format  */
28197 	cdb[1] = ((entry->cdte_format & CDROM_LBA) ? 0 : 2);
28198 	if (un->un_f_cfg_read_toc_trk_bcd == TRUE) {
28199 		cdb[6] = BYTE_TO_BCD(entry->cdte_track);
28200 	} else {
28201 		cdb[6] = entry->cdte_track;
28202 	}
28203 
28204 	/*
28205 	 * Bytes 7 & 8 are the 12 byte allocation length for a single entry.
28206 	 * (4 byte TOC response header + 8 byte track descriptor)
28207 	 */
28208 	cdb[8] = 12;
28209 	com->uscsi_cdb	   = cdb;
28210 	com->uscsi_cdblen  = CDB_GROUP1;
28211 	com->uscsi_bufaddr = buffer;
28212 	com->uscsi_buflen  = 0x0C;
28213 	com->uscsi_flags   = (USCSI_DIAGNOSE | USCSI_SILENT | USCSI_READ);
28214 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
28215 	    UIO_SYSSPACE, SD_PATH_STANDARD);
28216 	if (rval != 0) {
28217 		kmem_free(buffer, 12);
28218 		kmem_free(com, sizeof (*com));
28219 		return (rval);
28220 	}
28221 
28222 	/* Process the toc entry */
28223 	entry->cdte_adr		= (buffer[5] & 0xF0) >> 4;
28224 	entry->cdte_ctrl	= (buffer[5] & 0x0F);
28225 	if (entry->cdte_format & CDROM_LBA) {
28226 		entry->cdte_addr.lba =
28227 		    ((uchar_t)buffer[8] << 24) + ((uchar_t)buffer[9] << 16) +
28228 		    ((uchar_t)buffer[10] << 8) + ((uchar_t)buffer[11]);
28229 	} else if (un->un_f_cfg_read_toc_addr_bcd == TRUE) {
28230 		entry->cdte_addr.msf.minute	= BCD_TO_BYTE(buffer[9]);
28231 		entry->cdte_addr.msf.second	= BCD_TO_BYTE(buffer[10]);
28232 		entry->cdte_addr.msf.frame	= BCD_TO_BYTE(buffer[11]);
28233 		/*
28234 		 * Send a READ TOC command using the LBA address format to get
28235 		 * the LBA for the track requested so it can be used in the
28236 		 * READ HEADER request
28237 		 *
28238 		 * Note: The MSF bit of the READ HEADER command specifies the
28239 		 * output format. The block address specified in that command
28240 		 * must be in LBA format.
28241 		 */
28242 		cdb[1] = 0;
28243 		rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
28244 		    UIO_SYSSPACE, SD_PATH_STANDARD);
28245 		if (rval != 0) {
28246 			kmem_free(buffer, 12);
28247 			kmem_free(com, sizeof (*com));
28248 			return (rval);
28249 		}
28250 	} else {
28251 		entry->cdte_addr.msf.minute	= buffer[9];
28252 		entry->cdte_addr.msf.second	= buffer[10];
28253 		entry->cdte_addr.msf.frame	= buffer[11];
28254 		/*
28255 		 * Send a READ TOC command using the LBA address format to get
28256 		 * the LBA for the track requested so it can be used in the
28257 		 * READ HEADER request
28258 		 *
28259 		 * Note: The MSF bit of the READ HEADER command specifies the
28260 		 * output format. The block address specified in that command
28261 		 * must be in LBA format.
28262 		 */
28263 		cdb[1] = 0;
28264 		rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
28265 		    UIO_SYSSPACE, SD_PATH_STANDARD);
28266 		if (rval != 0) {
28267 			kmem_free(buffer, 12);
28268 			kmem_free(com, sizeof (*com));
28269 			return (rval);
28270 		}
28271 	}
28272 
28273 	/*
28274 	 * Build and send the READ HEADER command to determine the data mode of
28275 	 * the user specified track.
28276 	 */
28277 	if ((entry->cdte_ctrl & CDROM_DATA_TRACK) &&
28278 	    (entry->cdte_track != CDROM_LEADOUT)) {
28279 		bzero(cdb, CDB_GROUP1);
28280 		cdb[0] = SCMD_READ_HEADER;
28281 		cdb[2] = buffer[8];
28282 		cdb[3] = buffer[9];
28283 		cdb[4] = buffer[10];
28284 		cdb[5] = buffer[11];
28285 		cdb[8] = 0x08;
28286 		com->uscsi_buflen = 0x08;
28287 		rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
28288 		    UIO_SYSSPACE, SD_PATH_STANDARD);
28289 		if (rval == 0) {
28290 			entry->cdte_datamode = buffer[0];
28291 		} else {
28292 			/*
28293 			 * READ HEADER command failed, since this is
28294 			 * obsoleted in one spec, its better to return
28295 			 * -1 for an invlid track so that we can still
28296 			 * recieve the rest of the TOC data.
28297 			 */
28298 			entry->cdte_datamode = (uchar_t)-1;
28299 		}
28300 	} else {
28301 		entry->cdte_datamode = (uchar_t)-1;
28302 	}
28303 
28304 	kmem_free(buffer, 12);
28305 	kmem_free(com, sizeof (*com));
28306 	if (ddi_copyout(entry, data, sizeof (struct cdrom_tocentry), flag) != 0)
28307 		return (EFAULT);
28308 
28309 	return (rval);
28310 }
28311 
28312 
28313 /*
28314  *    Function: sr_read_tochdr()
28315  *
28316  * Description: This routine is the driver entry point for handling CD-ROM
28317  * 		ioctl requests to read the Table of Contents (TOC) header
28318  *		(CDROMREADTOHDR). The TOC header consists of the disk starting
28319  *		and ending track numbers
28320  *
28321  *   Arguments: dev	- the device 'dev_t'
28322  *		data	- pointer to user provided toc header structure,
28323  *			  specifying the starting and ending track numbers.
28324  *		flag	- this argument is a pass through to ddi_copyxxx()
28325  *			  directly from the mode argument of ioctl().
28326  *
28327  * Return Code: the code returned by sd_send_scsi_cmd()
28328  *		EFAULT if ddi_copyxxx() fails
28329  *		ENXIO if fail ddi_get_soft_state
28330  *		EINVAL if data pointer is NULL
28331  */
28332 
28333 static int
28334 sr_read_tochdr(dev_t dev, caddr_t data, int flag)
28335 {
28336 	struct sd_lun		*un;
28337 	struct uscsi_cmd	*com;
28338 	struct cdrom_tochdr	toc_header;
28339 	struct cdrom_tochdr	*hdr = &toc_header;
28340 	char			cdb[CDB_GROUP1];
28341 	int			rval;
28342 	caddr_t			buffer;
28343 
28344 	if (data == NULL) {
28345 		return (EINVAL);
28346 	}
28347 
28348 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
28349 	    (un->un_state == SD_STATE_OFFLINE)) {
28350 		return (ENXIO);
28351 	}
28352 
28353 	buffer = kmem_zalloc(4, KM_SLEEP);
28354 	bzero(cdb, CDB_GROUP1);
28355 	cdb[0] = SCMD_READ_TOC;
28356 	/*
28357 	 * Specifying a track number of 0x00 in the READ TOC command indicates
28358 	 * that the TOC header should be returned
28359 	 */
28360 	cdb[6] = 0x00;
28361 	/*
28362 	 * Bytes 7 & 8 are the 4 byte allocation length for TOC header.
28363 	 * (2 byte data len + 1 byte starting track # + 1 byte ending track #)
28364 	 */
28365 	cdb[8] = 0x04;
28366 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
28367 	com->uscsi_cdb	   = cdb;
28368 	com->uscsi_cdblen  = CDB_GROUP1;
28369 	com->uscsi_bufaddr = buffer;
28370 	com->uscsi_buflen  = 0x04;
28371 	com->uscsi_timeout = 300;
28372 	com->uscsi_flags   = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
28373 
28374 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
28375 	    UIO_SYSSPACE, SD_PATH_STANDARD);
28376 	if (un->un_f_cfg_read_toc_trk_bcd == TRUE) {
28377 		hdr->cdth_trk0 = BCD_TO_BYTE(buffer[2]);
28378 		hdr->cdth_trk1 = BCD_TO_BYTE(buffer[3]);
28379 	} else {
28380 		hdr->cdth_trk0 = buffer[2];
28381 		hdr->cdth_trk1 = buffer[3];
28382 	}
28383 	kmem_free(buffer, 4);
28384 	kmem_free(com, sizeof (*com));
28385 	if (ddi_copyout(hdr, data, sizeof (struct cdrom_tochdr), flag) != 0) {
28386 		return (EFAULT);
28387 	}
28388 	return (rval);
28389 }
28390 
28391 
28392 /*
28393  * Note: The following sr_read_mode1(), sr_read_cd_mode2(), sr_read_mode2(),
28394  * sr_read_cdda(), sr_read_cdxa(), routines implement driver support for
28395  * handling CDROMREAD ioctl requests for mode 1 user data, mode 2 user data,
28396  * digital audio and extended architecture digital audio. These modes are
28397  * defined in the IEC908 (Red Book), ISO10149 (Yellow Book), and the SCSI3
28398  * MMC specs.
28399  *
28400  * In addition to support for the various data formats these routines also
28401  * include support for devices that implement only the direct access READ
28402  * commands (0x08, 0x28), devices that implement the READ_CD commands
28403  * (0xBE, 0xD4), and devices that implement the vendor unique READ CDDA and
28404  * READ CDXA commands (0xD8, 0xDB)
28405  */
28406 
28407 /*
28408  *    Function: sr_read_mode1()
28409  *
28410  * Description: This routine is the driver entry point for handling CD-ROM
28411  *		ioctl read mode1 requests (CDROMREADMODE1).
28412  *
28413  *   Arguments: dev	- the device 'dev_t'
28414  *		data	- pointer to user provided cd read structure specifying
28415  *			  the lba buffer address and length.
28416  *		flag	- this argument is a pass through to ddi_copyxxx()
28417  *			  directly from the mode argument of ioctl().
28418  *
28419  * Return Code: the code returned by sd_send_scsi_cmd()
28420  *		EFAULT if ddi_copyxxx() fails
28421  *		ENXIO if fail ddi_get_soft_state
28422  *		EINVAL if data pointer is NULL
28423  */
28424 
28425 static int
28426 sr_read_mode1(dev_t dev, caddr_t data, int flag)
28427 {
28428 	struct sd_lun		*un;
28429 	struct cdrom_read	mode1_struct;
28430 	struct cdrom_read	*mode1 = &mode1_struct;
28431 	int			rval;
28432 #ifdef _MULTI_DATAMODEL
28433 	/* To support ILP32 applications in an LP64 world */
28434 	struct cdrom_read32	cdrom_read32;
28435 	struct cdrom_read32	*cdrd32 = &cdrom_read32;
28436 #endif /* _MULTI_DATAMODEL */
28437 
28438 	if (data == NULL) {
28439 		return (EINVAL);
28440 	}
28441 
28442 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
28443 	    (un->un_state == SD_STATE_OFFLINE)) {
28444 		return (ENXIO);
28445 	}
28446 
28447 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
28448 	    "sd_read_mode1: entry: un:0x%p\n", un);
28449 
28450 #ifdef _MULTI_DATAMODEL
28451 	switch (ddi_model_convert_from(flag & FMODELS)) {
28452 	case DDI_MODEL_ILP32:
28453 		if (ddi_copyin(data, cdrd32, sizeof (*cdrd32), flag) != 0) {
28454 			return (EFAULT);
28455 		}
28456 		/* Convert the ILP32 uscsi data from the application to LP64 */
28457 		cdrom_read32tocdrom_read(cdrd32, mode1);
28458 		break;
28459 	case DDI_MODEL_NONE:
28460 		if (ddi_copyin(data, mode1, sizeof (struct cdrom_read), flag)) {
28461 			return (EFAULT);
28462 		}
28463 	}
28464 #else /* ! _MULTI_DATAMODEL */
28465 	if (ddi_copyin(data, mode1, sizeof (struct cdrom_read), flag)) {
28466 		return (EFAULT);
28467 	}
28468 #endif /* _MULTI_DATAMODEL */
28469 
28470 	rval = sd_send_scsi_READ(un, mode1->cdread_bufaddr,
28471 	    mode1->cdread_buflen, mode1->cdread_lba, SD_PATH_STANDARD);
28472 
28473 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
28474 	    "sd_read_mode1: exit: un:0x%p\n", un);
28475 
28476 	return (rval);
28477 }
28478 
28479 
28480 /*
28481  *    Function: sr_read_cd_mode2()
28482  *
28483  * Description: This routine is the driver entry point for handling CD-ROM
28484  *		ioctl read mode2 requests (CDROMREADMODE2) for devices that
28485  *		support the READ CD (0xBE) command or the 1st generation
28486  *		READ CD (0xD4) command.
28487  *
28488  *   Arguments: dev	- the device 'dev_t'
28489  *		data	- pointer to user provided cd read structure specifying
28490  *			  the lba buffer address and length.
28491  *		flag	- this argument is a pass through to ddi_copyxxx()
28492  *			  directly from the mode argument of ioctl().
28493  *
28494  * Return Code: the code returned by sd_send_scsi_cmd()
28495  *		EFAULT if ddi_copyxxx() fails
28496  *		ENXIO if fail ddi_get_soft_state
28497  *		EINVAL if data pointer is NULL
28498  */
28499 
28500 static int
28501 sr_read_cd_mode2(dev_t dev, caddr_t data, int flag)
28502 {
28503 	struct sd_lun		*un;
28504 	struct uscsi_cmd	*com;
28505 	struct cdrom_read	mode2_struct;
28506 	struct cdrom_read	*mode2 = &mode2_struct;
28507 	uchar_t			cdb[CDB_GROUP5];
28508 	int			nblocks;
28509 	int			rval;
28510 #ifdef _MULTI_DATAMODEL
28511 	/*  To support ILP32 applications in an LP64 world */
28512 	struct cdrom_read32	cdrom_read32;
28513 	struct cdrom_read32	*cdrd32 = &cdrom_read32;
28514 #endif /* _MULTI_DATAMODEL */
28515 
28516 	if (data == NULL) {
28517 		return (EINVAL);
28518 	}
28519 
28520 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
28521 	    (un->un_state == SD_STATE_OFFLINE)) {
28522 		return (ENXIO);
28523 	}
28524 
28525 #ifdef _MULTI_DATAMODEL
28526 	switch (ddi_model_convert_from(flag & FMODELS)) {
28527 	case DDI_MODEL_ILP32:
28528 		if (ddi_copyin(data, cdrd32, sizeof (*cdrd32), flag) != 0) {
28529 			return (EFAULT);
28530 		}
28531 		/* Convert the ILP32 uscsi data from the application to LP64 */
28532 		cdrom_read32tocdrom_read(cdrd32, mode2);
28533 		break;
28534 	case DDI_MODEL_NONE:
28535 		if (ddi_copyin(data, mode2, sizeof (*mode2), flag) != 0) {
28536 			return (EFAULT);
28537 		}
28538 		break;
28539 	}
28540 
28541 #else /* ! _MULTI_DATAMODEL */
28542 	if (ddi_copyin(data, mode2, sizeof (*mode2), flag) != 0) {
28543 		return (EFAULT);
28544 	}
28545 #endif /* _MULTI_DATAMODEL */
28546 
28547 	bzero(cdb, sizeof (cdb));
28548 	if (un->un_f_cfg_read_cd_xd4 == TRUE) {
28549 		/* Read command supported by 1st generation atapi drives */
28550 		cdb[0] = SCMD_READ_CDD4;
28551 	} else {
28552 		/* Universal CD Access Command */
28553 		cdb[0] = SCMD_READ_CD;
28554 	}
28555 
28556 	/*
28557 	 * Set expected sector type to: 2336s byte, Mode 2 Yellow Book
28558 	 */
28559 	cdb[1] = CDROM_SECTOR_TYPE_MODE2;
28560 
28561 	/* set the start address */
28562 	cdb[2] = (uchar_t)((mode2->cdread_lba >> 24) & 0XFF);
28563 	cdb[3] = (uchar_t)((mode2->cdread_lba >> 16) & 0XFF);
28564 	cdb[4] = (uchar_t)((mode2->cdread_lba >> 8) & 0xFF);
28565 	cdb[5] = (uchar_t)(mode2->cdread_lba & 0xFF);
28566 
28567 	/* set the transfer length */
28568 	nblocks = mode2->cdread_buflen / 2336;
28569 	cdb[6] = (uchar_t)(nblocks >> 16);
28570 	cdb[7] = (uchar_t)(nblocks >> 8);
28571 	cdb[8] = (uchar_t)nblocks;
28572 
28573 	/* set the filter bits */
28574 	cdb[9] = CDROM_READ_CD_USERDATA;
28575 
28576 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
28577 	com->uscsi_cdb = (caddr_t)cdb;
28578 	com->uscsi_cdblen = sizeof (cdb);
28579 	com->uscsi_bufaddr = mode2->cdread_bufaddr;
28580 	com->uscsi_buflen = mode2->cdread_buflen;
28581 	com->uscsi_flags = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
28582 
28583 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_USERSPACE,
28584 	    UIO_SYSSPACE, SD_PATH_STANDARD);
28585 	kmem_free(com, sizeof (*com));
28586 	return (rval);
28587 }
28588 
28589 
28590 /*
28591  *    Function: sr_read_mode2()
28592  *
28593  * Description: This routine is the driver entry point for handling CD-ROM
28594  *		ioctl read mode2 requests (CDROMREADMODE2) for devices that
28595  *		do not support the READ CD (0xBE) command.
28596  *
28597  *   Arguments: dev	- the device 'dev_t'
28598  *		data	- pointer to user provided cd read structure specifying
28599  *			  the lba buffer address and length.
28600  *		flag	- this argument is a pass through to ddi_copyxxx()
28601  *			  directly from the mode argument of ioctl().
28602  *
28603  * Return Code: the code returned by sd_send_scsi_cmd()
28604  *		EFAULT if ddi_copyxxx() fails
28605  *		ENXIO if fail ddi_get_soft_state
28606  *		EINVAL if data pointer is NULL
28607  *		EIO if fail to reset block size
28608  *		EAGAIN if commands are in progress in the driver
28609  */
28610 
28611 static int
28612 sr_read_mode2(dev_t dev, caddr_t data, int flag)
28613 {
28614 	struct sd_lun		*un;
28615 	struct cdrom_read	mode2_struct;
28616 	struct cdrom_read	*mode2 = &mode2_struct;
28617 	int			rval;
28618 	uint32_t		restore_blksize;
28619 	struct uscsi_cmd	*com;
28620 	uchar_t			cdb[CDB_GROUP0];
28621 	int			nblocks;
28622 
28623 #ifdef _MULTI_DATAMODEL
28624 	/* To support ILP32 applications in an LP64 world */
28625 	struct cdrom_read32	cdrom_read32;
28626 	struct cdrom_read32	*cdrd32 = &cdrom_read32;
28627 #endif /* _MULTI_DATAMODEL */
28628 
28629 	if (data == NULL) {
28630 		return (EINVAL);
28631 	}
28632 
28633 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
28634 	    (un->un_state == SD_STATE_OFFLINE)) {
28635 		return (ENXIO);
28636 	}
28637 
28638 	/*
28639 	 * Because this routine will update the device and driver block size
28640 	 * being used we want to make sure there are no commands in progress.
28641 	 * If commands are in progress the user will have to try again.
28642 	 *
28643 	 * We check for 1 instead of 0 because we increment un_ncmds_in_driver
28644 	 * in sdioctl to protect commands from sdioctl through to the top of
28645 	 * sd_uscsi_strategy. See sdioctl for details.
28646 	 */
28647 	mutex_enter(SD_MUTEX(un));
28648 	if (un->un_ncmds_in_driver != 1) {
28649 		mutex_exit(SD_MUTEX(un));
28650 		return (EAGAIN);
28651 	}
28652 	mutex_exit(SD_MUTEX(un));
28653 
28654 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
28655 	    "sd_read_mode2: entry: un:0x%p\n", un);
28656 
28657 #ifdef _MULTI_DATAMODEL
28658 	switch (ddi_model_convert_from(flag & FMODELS)) {
28659 	case DDI_MODEL_ILP32:
28660 		if (ddi_copyin(data, cdrd32, sizeof (*cdrd32), flag) != 0) {
28661 			return (EFAULT);
28662 		}
28663 		/* Convert the ILP32 uscsi data from the application to LP64 */
28664 		cdrom_read32tocdrom_read(cdrd32, mode2);
28665 		break;
28666 	case DDI_MODEL_NONE:
28667 		if (ddi_copyin(data, mode2, sizeof (*mode2), flag) != 0) {
28668 			return (EFAULT);
28669 		}
28670 		break;
28671 	}
28672 #else /* ! _MULTI_DATAMODEL */
28673 	if (ddi_copyin(data, mode2, sizeof (*mode2), flag)) {
28674 		return (EFAULT);
28675 	}
28676 #endif /* _MULTI_DATAMODEL */
28677 
28678 	/* Store the current target block size for restoration later */
28679 	restore_blksize = un->un_tgt_blocksize;
28680 
28681 	/* Change the device and soft state target block size to 2336 */
28682 	if (sr_sector_mode(dev, SD_MODE2_BLKSIZE) != 0) {
28683 		rval = EIO;
28684 		goto done;
28685 	}
28686 
28687 
28688 	bzero(cdb, sizeof (cdb));
28689 
28690 	/* set READ operation */
28691 	cdb[0] = SCMD_READ;
28692 
28693 	/* adjust lba for 2kbyte blocks from 512 byte blocks */
28694 	mode2->cdread_lba >>= 2;
28695 
28696 	/* set the start address */
28697 	cdb[1] = (uchar_t)((mode2->cdread_lba >> 16) & 0X1F);
28698 	cdb[2] = (uchar_t)((mode2->cdread_lba >> 8) & 0xFF);
28699 	cdb[3] = (uchar_t)(mode2->cdread_lba & 0xFF);
28700 
28701 	/* set the transfer length */
28702 	nblocks = mode2->cdread_buflen / 2336;
28703 	cdb[4] = (uchar_t)nblocks & 0xFF;
28704 
28705 	/* build command */
28706 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
28707 	com->uscsi_cdb = (caddr_t)cdb;
28708 	com->uscsi_cdblen = sizeof (cdb);
28709 	com->uscsi_bufaddr = mode2->cdread_bufaddr;
28710 	com->uscsi_buflen = mode2->cdread_buflen;
28711 	com->uscsi_flags = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
28712 
28713 	/*
28714 	 * Issue SCSI command with user space address for read buffer.
28715 	 *
28716 	 * This sends the command through main channel in the driver.
28717 	 *
28718 	 * Since this is accessed via an IOCTL call, we go through the
28719 	 * standard path, so that if the device was powered down, then
28720 	 * it would be 'awakened' to handle the command.
28721 	 */
28722 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_USERSPACE,
28723 	    UIO_SYSSPACE, SD_PATH_STANDARD);
28724 
28725 	kmem_free(com, sizeof (*com));
28726 
28727 	/* Restore the device and soft state target block size */
28728 	if (sr_sector_mode(dev, restore_blksize) != 0) {
28729 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
28730 		    "can't do switch back to mode 1\n");
28731 		/*
28732 		 * If sd_send_scsi_READ succeeded we still need to report
28733 		 * an error because we failed to reset the block size
28734 		 */
28735 		if (rval == 0) {
28736 			rval = EIO;
28737 		}
28738 	}
28739 
28740 done:
28741 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
28742 	    "sd_read_mode2: exit: un:0x%p\n", un);
28743 
28744 	return (rval);
28745 }
28746 
28747 
28748 /*
28749  *    Function: sr_sector_mode()
28750  *
28751  * Description: This utility function is used by sr_read_mode2 to set the target
28752  *		block size based on the user specified size. This is a legacy
28753  *		implementation based upon a vendor specific mode page
28754  *
28755  *   Arguments: dev	- the device 'dev_t'
28756  *		data	- flag indicating if block size is being set to 2336 or
28757  *			  512.
28758  *
28759  * Return Code: the code returned by sd_send_scsi_cmd()
28760  *		EFAULT if ddi_copyxxx() fails
28761  *		ENXIO if fail ddi_get_soft_state
28762  *		EINVAL if data pointer is NULL
28763  */
28764 
28765 static int
28766 sr_sector_mode(dev_t dev, uint32_t blksize)
28767 {
28768 	struct sd_lun	*un;
28769 	uchar_t		*sense;
28770 	uchar_t		*select;
28771 	int		rval;
28772 
28773 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
28774 	    (un->un_state == SD_STATE_OFFLINE)) {
28775 		return (ENXIO);
28776 	}
28777 
28778 	sense = kmem_zalloc(20, KM_SLEEP);
28779 
28780 	/* Note: This is a vendor specific mode page (0x81) */
28781 	if ((rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, sense, 20, 0x81,
28782 	    SD_PATH_STANDARD)) != 0) {
28783 		SD_ERROR(SD_LOG_IOCTL_RMMEDIA, un,
28784 		    "sr_sector_mode: Mode Sense failed\n");
28785 		kmem_free(sense, 20);
28786 		return (rval);
28787 	}
28788 	select = kmem_zalloc(20, KM_SLEEP);
28789 	select[3] = 0x08;
28790 	select[10] = ((blksize >> 8) & 0xff);
28791 	select[11] = (blksize & 0xff);
28792 	select[12] = 0x01;
28793 	select[13] = 0x06;
28794 	select[14] = sense[14];
28795 	select[15] = sense[15];
28796 	if (blksize == SD_MODE2_BLKSIZE) {
28797 		select[14] |= 0x01;
28798 	}
28799 
28800 	if ((rval = sd_send_scsi_MODE_SELECT(un, CDB_GROUP0, select, 20,
28801 	    SD_DONTSAVE_PAGE, SD_PATH_STANDARD)) != 0) {
28802 		SD_ERROR(SD_LOG_IOCTL_RMMEDIA, un,
28803 		    "sr_sector_mode: Mode Select failed\n");
28804 	} else {
28805 		/*
28806 		 * Only update the softstate block size if we successfully
28807 		 * changed the device block mode.
28808 		 */
28809 		mutex_enter(SD_MUTEX(un));
28810 		sd_update_block_info(un, blksize, 0);
28811 		mutex_exit(SD_MUTEX(un));
28812 	}
28813 	kmem_free(sense, 20);
28814 	kmem_free(select, 20);
28815 	return (rval);
28816 }
28817 
28818 
28819 /*
28820  *    Function: sr_read_cdda()
28821  *
28822  * Description: This routine is the driver entry point for handling CD-ROM
28823  *		ioctl requests to return CD-DA or subcode data. (CDROMCDDA) If
28824  *		the target supports CDDA these requests are handled via a vendor
28825  *		specific command (0xD8) If the target does not support CDDA
28826  *		these requests are handled via the READ CD command (0xBE).
28827  *
28828  *   Arguments: dev	- the device 'dev_t'
28829  *		data	- pointer to user provided CD-DA structure specifying
28830  *			  the track starting address, transfer length, and
28831  *			  subcode options.
28832  *		flag	- this argument is a pass through to ddi_copyxxx()
28833  *			  directly from the mode argument of ioctl().
28834  *
28835  * Return Code: the code returned by sd_send_scsi_cmd()
28836  *		EFAULT if ddi_copyxxx() fails
28837  *		ENXIO if fail ddi_get_soft_state
28838  *		EINVAL if invalid arguments are provided
28839  *		ENOTTY
28840  */
28841 
28842 static int
28843 sr_read_cdda(dev_t dev, caddr_t data, int flag)
28844 {
28845 	struct sd_lun			*un;
28846 	struct uscsi_cmd		*com;
28847 	struct cdrom_cdda		*cdda;
28848 	int				rval;
28849 	size_t				buflen;
28850 	char				cdb[CDB_GROUP5];
28851 
28852 #ifdef _MULTI_DATAMODEL
28853 	/* To support ILP32 applications in an LP64 world */
28854 	struct cdrom_cdda32	cdrom_cdda32;
28855 	struct cdrom_cdda32	*cdda32 = &cdrom_cdda32;
28856 #endif /* _MULTI_DATAMODEL */
28857 
28858 	if (data == NULL) {
28859 		return (EINVAL);
28860 	}
28861 
28862 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
28863 		return (ENXIO);
28864 	}
28865 
28866 	cdda = kmem_zalloc(sizeof (struct cdrom_cdda), KM_SLEEP);
28867 
28868 #ifdef _MULTI_DATAMODEL
28869 	switch (ddi_model_convert_from(flag & FMODELS)) {
28870 	case DDI_MODEL_ILP32:
28871 		if (ddi_copyin(data, cdda32, sizeof (*cdda32), flag)) {
28872 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
28873 			    "sr_read_cdda: ddi_copyin Failed\n");
28874 			kmem_free(cdda, sizeof (struct cdrom_cdda));
28875 			return (EFAULT);
28876 		}
28877 		/* Convert the ILP32 uscsi data from the application to LP64 */
28878 		cdrom_cdda32tocdrom_cdda(cdda32, cdda);
28879 		break;
28880 	case DDI_MODEL_NONE:
28881 		if (ddi_copyin(data, cdda, sizeof (struct cdrom_cdda), flag)) {
28882 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
28883 			    "sr_read_cdda: ddi_copyin Failed\n");
28884 			kmem_free(cdda, sizeof (struct cdrom_cdda));
28885 			return (EFAULT);
28886 		}
28887 		break;
28888 	}
28889 #else /* ! _MULTI_DATAMODEL */
28890 	if (ddi_copyin(data, cdda, sizeof (struct cdrom_cdda), flag)) {
28891 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
28892 		    "sr_read_cdda: ddi_copyin Failed\n");
28893 		kmem_free(cdda, sizeof (struct cdrom_cdda));
28894 		return (EFAULT);
28895 	}
28896 #endif /* _MULTI_DATAMODEL */
28897 
28898 	/*
28899 	 * Since MMC-2 expects max 3 bytes for length, check if the
28900 	 * length input is greater than 3 bytes
28901 	 */
28902 	if ((cdda->cdda_length & 0xFF000000) != 0) {
28903 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN, "sr_read_cdda: "
28904 		    "cdrom transfer length too large: %d (limit %d)\n",
28905 		    cdda->cdda_length, 0xFFFFFF);
28906 		kmem_free(cdda, sizeof (struct cdrom_cdda));
28907 		return (EINVAL);
28908 	}
28909 
28910 	switch (cdda->cdda_subcode) {
28911 	case CDROM_DA_NO_SUBCODE:
28912 		buflen = CDROM_BLK_2352 * cdda->cdda_length;
28913 		break;
28914 	case CDROM_DA_SUBQ:
28915 		buflen = CDROM_BLK_2368 * cdda->cdda_length;
28916 		break;
28917 	case CDROM_DA_ALL_SUBCODE:
28918 		buflen = CDROM_BLK_2448 * cdda->cdda_length;
28919 		break;
28920 	case CDROM_DA_SUBCODE_ONLY:
28921 		buflen = CDROM_BLK_SUBCODE * cdda->cdda_length;
28922 		break;
28923 	default:
28924 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
28925 		    "sr_read_cdda: Subcode '0x%x' Not Supported\n",
28926 		    cdda->cdda_subcode);
28927 		kmem_free(cdda, sizeof (struct cdrom_cdda));
28928 		return (EINVAL);
28929 	}
28930 
28931 	/* Build and send the command */
28932 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
28933 	bzero(cdb, CDB_GROUP5);
28934 
28935 	if (un->un_f_cfg_cdda == TRUE) {
28936 		cdb[0] = (char)SCMD_READ_CD;
28937 		cdb[1] = 0x04;
28938 		cdb[2] = (((cdda->cdda_addr) & 0xff000000) >> 24);
28939 		cdb[3] = (((cdda->cdda_addr) & 0x00ff0000) >> 16);
28940 		cdb[4] = (((cdda->cdda_addr) & 0x0000ff00) >> 8);
28941 		cdb[5] = ((cdda->cdda_addr) & 0x000000ff);
28942 		cdb[6] = (((cdda->cdda_length) & 0x00ff0000) >> 16);
28943 		cdb[7] = (((cdda->cdda_length) & 0x0000ff00) >> 8);
28944 		cdb[8] = ((cdda->cdda_length) & 0x000000ff);
28945 		cdb[9] = 0x10;
28946 		switch (cdda->cdda_subcode) {
28947 		case CDROM_DA_NO_SUBCODE :
28948 			cdb[10] = 0x0;
28949 			break;
28950 		case CDROM_DA_SUBQ :
28951 			cdb[10] = 0x2;
28952 			break;
28953 		case CDROM_DA_ALL_SUBCODE :
28954 			cdb[10] = 0x1;
28955 			break;
28956 		case CDROM_DA_SUBCODE_ONLY :
28957 			/* FALLTHROUGH */
28958 		default :
28959 			kmem_free(cdda, sizeof (struct cdrom_cdda));
28960 			kmem_free(com, sizeof (*com));
28961 			return (ENOTTY);
28962 		}
28963 	} else {
28964 		cdb[0] = (char)SCMD_READ_CDDA;
28965 		cdb[2] = (((cdda->cdda_addr) & 0xff000000) >> 24);
28966 		cdb[3] = (((cdda->cdda_addr) & 0x00ff0000) >> 16);
28967 		cdb[4] = (((cdda->cdda_addr) & 0x0000ff00) >> 8);
28968 		cdb[5] = ((cdda->cdda_addr) & 0x000000ff);
28969 		cdb[6] = (((cdda->cdda_length) & 0xff000000) >> 24);
28970 		cdb[7] = (((cdda->cdda_length) & 0x00ff0000) >> 16);
28971 		cdb[8] = (((cdda->cdda_length) & 0x0000ff00) >> 8);
28972 		cdb[9] = ((cdda->cdda_length) & 0x000000ff);
28973 		cdb[10] = cdda->cdda_subcode;
28974 	}
28975 
28976 	com->uscsi_cdb = cdb;
28977 	com->uscsi_cdblen = CDB_GROUP5;
28978 	com->uscsi_bufaddr = (caddr_t)cdda->cdda_data;
28979 	com->uscsi_buflen = buflen;
28980 	com->uscsi_flags = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
28981 
28982 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_USERSPACE,
28983 	    UIO_SYSSPACE, SD_PATH_STANDARD);
28984 
28985 	kmem_free(cdda, sizeof (struct cdrom_cdda));
28986 	kmem_free(com, sizeof (*com));
28987 	return (rval);
28988 }
28989 
28990 
28991 /*
28992  *    Function: sr_read_cdxa()
28993  *
28994  * Description: This routine is the driver entry point for handling CD-ROM
28995  *		ioctl requests to return CD-XA (Extended Architecture) data.
28996  *		(CDROMCDXA).
28997  *
28998  *   Arguments: dev	- the device 'dev_t'
28999  *		data	- pointer to user provided CD-XA structure specifying
29000  *			  the data starting address, transfer length, and format
29001  *		flag	- this argument is a pass through to ddi_copyxxx()
29002  *			  directly from the mode argument of ioctl().
29003  *
29004  * Return Code: the code returned by sd_send_scsi_cmd()
29005  *		EFAULT if ddi_copyxxx() fails
29006  *		ENXIO if fail ddi_get_soft_state
29007  *		EINVAL if data pointer is NULL
29008  */
29009 
29010 static int
29011 sr_read_cdxa(dev_t dev, caddr_t data, int flag)
29012 {
29013 	struct sd_lun		*un;
29014 	struct uscsi_cmd	*com;
29015 	struct cdrom_cdxa	*cdxa;
29016 	int			rval;
29017 	size_t			buflen;
29018 	char			cdb[CDB_GROUP5];
29019 	uchar_t			read_flags;
29020 
29021 #ifdef _MULTI_DATAMODEL
29022 	/* To support ILP32 applications in an LP64 world */
29023 	struct cdrom_cdxa32		cdrom_cdxa32;
29024 	struct cdrom_cdxa32		*cdxa32 = &cdrom_cdxa32;
29025 #endif /* _MULTI_DATAMODEL */
29026 
29027 	if (data == NULL) {
29028 		return (EINVAL);
29029 	}
29030 
29031 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
29032 		return (ENXIO);
29033 	}
29034 
29035 	cdxa = kmem_zalloc(sizeof (struct cdrom_cdxa), KM_SLEEP);
29036 
29037 #ifdef _MULTI_DATAMODEL
29038 	switch (ddi_model_convert_from(flag & FMODELS)) {
29039 	case DDI_MODEL_ILP32:
29040 		if (ddi_copyin(data, cdxa32, sizeof (*cdxa32), flag)) {
29041 			kmem_free(cdxa, sizeof (struct cdrom_cdxa));
29042 			return (EFAULT);
29043 		}
29044 		/*
29045 		 * Convert the ILP32 uscsi data from the
29046 		 * application to LP64 for internal use.
29047 		 */
29048 		cdrom_cdxa32tocdrom_cdxa(cdxa32, cdxa);
29049 		break;
29050 	case DDI_MODEL_NONE:
29051 		if (ddi_copyin(data, cdxa, sizeof (struct cdrom_cdxa), flag)) {
29052 			kmem_free(cdxa, sizeof (struct cdrom_cdxa));
29053 			return (EFAULT);
29054 		}
29055 		break;
29056 	}
29057 #else /* ! _MULTI_DATAMODEL */
29058 	if (ddi_copyin(data, cdxa, sizeof (struct cdrom_cdxa), flag)) {
29059 		kmem_free(cdxa, sizeof (struct cdrom_cdxa));
29060 		return (EFAULT);
29061 	}
29062 #endif /* _MULTI_DATAMODEL */
29063 
29064 	/*
29065 	 * Since MMC-2 expects max 3 bytes for length, check if the
29066 	 * length input is greater than 3 bytes
29067 	 */
29068 	if ((cdxa->cdxa_length & 0xFF000000) != 0) {
29069 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN, "sr_read_cdxa: "
29070 		    "cdrom transfer length too large: %d (limit %d)\n",
29071 		    cdxa->cdxa_length, 0xFFFFFF);
29072 		kmem_free(cdxa, sizeof (struct cdrom_cdxa));
29073 		return (EINVAL);
29074 	}
29075 
29076 	switch (cdxa->cdxa_format) {
29077 	case CDROM_XA_DATA:
29078 		buflen = CDROM_BLK_2048 * cdxa->cdxa_length;
29079 		read_flags = 0x10;
29080 		break;
29081 	case CDROM_XA_SECTOR_DATA:
29082 		buflen = CDROM_BLK_2352 * cdxa->cdxa_length;
29083 		read_flags = 0xf8;
29084 		break;
29085 	case CDROM_XA_DATA_W_ERROR:
29086 		buflen = CDROM_BLK_2646 * cdxa->cdxa_length;
29087 		read_flags = 0xfc;
29088 		break;
29089 	default:
29090 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
29091 		    "sr_read_cdxa: Format '0x%x' Not Supported\n",
29092 		    cdxa->cdxa_format);
29093 		kmem_free(cdxa, sizeof (struct cdrom_cdxa));
29094 		return (EINVAL);
29095 	}
29096 
29097 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
29098 	bzero(cdb, CDB_GROUP5);
29099 	if (un->un_f_mmc_cap == TRUE) {
29100 		cdb[0] = (char)SCMD_READ_CD;
29101 		cdb[2] = (((cdxa->cdxa_addr) & 0xff000000) >> 24);
29102 		cdb[3] = (((cdxa->cdxa_addr) & 0x00ff0000) >> 16);
29103 		cdb[4] = (((cdxa->cdxa_addr) & 0x0000ff00) >> 8);
29104 		cdb[5] = ((cdxa->cdxa_addr) & 0x000000ff);
29105 		cdb[6] = (((cdxa->cdxa_length) & 0x00ff0000) >> 16);
29106 		cdb[7] = (((cdxa->cdxa_length) & 0x0000ff00) >> 8);
29107 		cdb[8] = ((cdxa->cdxa_length) & 0x000000ff);
29108 		cdb[9] = (char)read_flags;
29109 	} else {
29110 		/*
29111 		 * Note: A vendor specific command (0xDB) is being used her to
29112 		 * request a read of all subcodes.
29113 		 */
29114 		cdb[0] = (char)SCMD_READ_CDXA;
29115 		cdb[2] = (((cdxa->cdxa_addr) & 0xff000000) >> 24);
29116 		cdb[3] = (((cdxa->cdxa_addr) & 0x00ff0000) >> 16);
29117 		cdb[4] = (((cdxa->cdxa_addr) & 0x0000ff00) >> 8);
29118 		cdb[5] = ((cdxa->cdxa_addr) & 0x000000ff);
29119 		cdb[6] = (((cdxa->cdxa_length) & 0xff000000) >> 24);
29120 		cdb[7] = (((cdxa->cdxa_length) & 0x00ff0000) >> 16);
29121 		cdb[8] = (((cdxa->cdxa_length) & 0x0000ff00) >> 8);
29122 		cdb[9] = ((cdxa->cdxa_length) & 0x000000ff);
29123 		cdb[10] = cdxa->cdxa_format;
29124 	}
29125 	com->uscsi_cdb	   = cdb;
29126 	com->uscsi_cdblen  = CDB_GROUP5;
29127 	com->uscsi_bufaddr = (caddr_t)cdxa->cdxa_data;
29128 	com->uscsi_buflen  = buflen;
29129 	com->uscsi_flags   = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
29130 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_USERSPACE,
29131 	    UIO_SYSSPACE, SD_PATH_STANDARD);
29132 	kmem_free(cdxa, sizeof (struct cdrom_cdxa));
29133 	kmem_free(com, sizeof (*com));
29134 	return (rval);
29135 }
29136 
29137 
29138 /*
29139  *    Function: sr_eject()
29140  *
29141  * Description: This routine is the driver entry point for handling CD-ROM
29142  *		eject ioctl requests (FDEJECT, DKIOCEJECT, CDROMEJECT)
29143  *
29144  *   Arguments: dev	- the device 'dev_t'
29145  *
29146  * Return Code: the code returned by sd_send_scsi_cmd()
29147  */
29148 
29149 static int
29150 sr_eject(dev_t dev)
29151 {
29152 	struct sd_lun	*un;
29153 	int		rval;
29154 
29155 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
29156 	    (un->un_state == SD_STATE_OFFLINE)) {
29157 		return (ENXIO);
29158 	}
29159 	if ((rval = sd_send_scsi_DOORLOCK(un, SD_REMOVAL_ALLOW,
29160 	    SD_PATH_STANDARD)) != 0) {
29161 		return (rval);
29162 	}
29163 
29164 	rval = sd_send_scsi_START_STOP_UNIT(un, SD_TARGET_EJECT,
29165 	    SD_PATH_STANDARD);
29166 
29167 	if (rval == 0) {
29168 		mutex_enter(SD_MUTEX(un));
29169 		sr_ejected(un);
29170 		un->un_mediastate = DKIO_EJECTED;
29171 		cv_broadcast(&un->un_state_cv);
29172 		mutex_exit(SD_MUTEX(un));
29173 	}
29174 	return (rval);
29175 }
29176 
29177 
29178 /*
29179  *    Function: sr_ejected()
29180  *
29181  * Description: This routine updates the soft state structure to invalidate the
29182  *		geometry information after the media has been ejected or a
29183  *		media eject has been detected.
29184  *
29185  *   Arguments: un - driver soft state (unit) structure
29186  */
29187 
29188 static void
29189 sr_ejected(struct sd_lun *un)
29190 {
29191 	struct sd_errstats *stp;
29192 
29193 	ASSERT(un != NULL);
29194 	ASSERT(mutex_owned(SD_MUTEX(un)));
29195 
29196 	un->un_f_blockcount_is_valid	= FALSE;
29197 	un->un_f_tgt_blocksize_is_valid	= FALSE;
29198 	un->un_f_geometry_is_valid	= FALSE;
29199 
29200 	if (un->un_errstats != NULL) {
29201 		stp = (struct sd_errstats *)un->un_errstats->ks_data;
29202 		stp->sd_capacity.value.ui64 = 0;
29203 	}
29204 }
29205 
29206 
29207 /*
29208  *    Function: sr_check_wp()
29209  *
29210  * Description: This routine checks the write protection of a removable
29211  *      media disk and hotpluggable devices via the write protect bit of
29212  *      the Mode Page Header device specific field. Some devices choke
29213  *      on unsupported mode page. In order to workaround this issue,
29214  *      this routine has been implemented to use 0x3f mode page(request
29215  *      for all pages) for all device types.
29216  *
29217  *   Arguments: dev		- the device 'dev_t'
29218  *
29219  * Return Code: int indicating if the device is write protected (1) or not (0)
29220  *
29221  *     Context: Kernel thread.
29222  *
29223  */
29224 
29225 static int
29226 sr_check_wp(dev_t dev)
29227 {
29228 	struct sd_lun	*un;
29229 	uchar_t		device_specific;
29230 	uchar_t		*sense;
29231 	int		hdrlen;
29232 	int		rval = FALSE;
29233 
29234 	/*
29235 	 * Note: The return codes for this routine should be reworked to
29236 	 * properly handle the case of a NULL softstate.
29237 	 */
29238 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
29239 		return (FALSE);
29240 	}
29241 
29242 	if (un->un_f_cfg_is_atapi == TRUE) {
29243 		/*
29244 		 * The mode page contents are not required; set the allocation
29245 		 * length for the mode page header only
29246 		 */
29247 		hdrlen = MODE_HEADER_LENGTH_GRP2;
29248 		sense = kmem_zalloc(hdrlen, KM_SLEEP);
29249 		if (sd_send_scsi_MODE_SENSE(un, CDB_GROUP1, sense, hdrlen,
29250 		    MODEPAGE_ALLPAGES, SD_PATH_STANDARD) != 0)
29251 			goto err_exit;
29252 		device_specific =
29253 		    ((struct mode_header_grp2 *)sense)->device_specific;
29254 	} else {
29255 		hdrlen = MODE_HEADER_LENGTH;
29256 		sense = kmem_zalloc(hdrlen, KM_SLEEP);
29257 		if (sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, sense, hdrlen,
29258 		    MODEPAGE_ALLPAGES, SD_PATH_STANDARD) != 0)
29259 			goto err_exit;
29260 		device_specific =
29261 		    ((struct mode_header *)sense)->device_specific;
29262 	}
29263 
29264 	/*
29265 	 * Write protect mode sense failed; not all disks
29266 	 * understand this query. Return FALSE assuming that
29267 	 * these devices are not writable.
29268 	 */
29269 	if (device_specific & WRITE_PROTECT) {
29270 		rval = TRUE;
29271 	}
29272 
29273 err_exit:
29274 	kmem_free(sense, hdrlen);
29275 	return (rval);
29276 }
29277 
29278 /*
29279  *    Function: sr_volume_ctrl()
29280  *
29281  * Description: This routine is the driver entry point for handling CD-ROM
29282  *		audio output volume ioctl requests. (CDROMVOLCTRL)
29283  *
29284  *   Arguments: dev	- the device 'dev_t'
29285  *		data	- pointer to user audio volume control structure
29286  *		flag	- this argument is a pass through to ddi_copyxxx()
29287  *			  directly from the mode argument of ioctl().
29288  *
29289  * Return Code: the code returned by sd_send_scsi_cmd()
29290  *		EFAULT if ddi_copyxxx() fails
29291  *		ENXIO if fail ddi_get_soft_state
29292  *		EINVAL if data pointer is NULL
29293  *
29294  */
29295 
29296 static int
29297 sr_volume_ctrl(dev_t dev, caddr_t data, int flag)
29298 {
29299 	struct sd_lun		*un;
29300 	struct cdrom_volctrl    volume;
29301 	struct cdrom_volctrl    *vol = &volume;
29302 	uchar_t			*sense_page;
29303 	uchar_t			*select_page;
29304 	uchar_t			*sense;
29305 	uchar_t			*select;
29306 	int			sense_buflen;
29307 	int			select_buflen;
29308 	int			rval;
29309 
29310 	if (data == NULL) {
29311 		return (EINVAL);
29312 	}
29313 
29314 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
29315 	    (un->un_state == SD_STATE_OFFLINE)) {
29316 		return (ENXIO);
29317 	}
29318 
29319 	if (ddi_copyin(data, vol, sizeof (struct cdrom_volctrl), flag)) {
29320 		return (EFAULT);
29321 	}
29322 
29323 	if ((un->un_f_cfg_is_atapi == TRUE) || (un->un_f_mmc_cap == TRUE)) {
29324 		struct mode_header_grp2		*sense_mhp;
29325 		struct mode_header_grp2		*select_mhp;
29326 		int				bd_len;
29327 
29328 		sense_buflen = MODE_PARAM_LENGTH_GRP2 + MODEPAGE_AUDIO_CTRL_LEN;
29329 		select_buflen = MODE_HEADER_LENGTH_GRP2 +
29330 		    MODEPAGE_AUDIO_CTRL_LEN;
29331 		sense  = kmem_zalloc(sense_buflen, KM_SLEEP);
29332 		select = kmem_zalloc(select_buflen, KM_SLEEP);
29333 		if ((rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP1, sense,
29334 		    sense_buflen, MODEPAGE_AUDIO_CTRL,
29335 		    SD_PATH_STANDARD)) != 0) {
29336 			SD_ERROR(SD_LOG_IOCTL_RMMEDIA, un,
29337 			    "sr_volume_ctrl: Mode Sense Failed\n");
29338 			kmem_free(sense, sense_buflen);
29339 			kmem_free(select, select_buflen);
29340 			return (rval);
29341 		}
29342 		sense_mhp = (struct mode_header_grp2 *)sense;
29343 		select_mhp = (struct mode_header_grp2 *)select;
29344 		bd_len = (sense_mhp->bdesc_length_hi << 8) |
29345 		    sense_mhp->bdesc_length_lo;
29346 		if (bd_len > MODE_BLK_DESC_LENGTH) {
29347 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
29348 			    "sr_volume_ctrl: Mode Sense returned invalid "
29349 			    "block descriptor length\n");
29350 			kmem_free(sense, sense_buflen);
29351 			kmem_free(select, select_buflen);
29352 			return (EIO);
29353 		}
29354 		sense_page = (uchar_t *)
29355 		    (sense + MODE_HEADER_LENGTH_GRP2 + bd_len);
29356 		select_page = (uchar_t *)(select + MODE_HEADER_LENGTH_GRP2);
29357 		select_mhp->length_msb = 0;
29358 		select_mhp->length_lsb = 0;
29359 		select_mhp->bdesc_length_hi = 0;
29360 		select_mhp->bdesc_length_lo = 0;
29361 	} else {
29362 		struct mode_header		*sense_mhp, *select_mhp;
29363 
29364 		sense_buflen = MODE_PARAM_LENGTH + MODEPAGE_AUDIO_CTRL_LEN;
29365 		select_buflen = MODE_HEADER_LENGTH + MODEPAGE_AUDIO_CTRL_LEN;
29366 		sense  = kmem_zalloc(sense_buflen, KM_SLEEP);
29367 		select = kmem_zalloc(select_buflen, KM_SLEEP);
29368 		if ((rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, sense,
29369 		    sense_buflen, MODEPAGE_AUDIO_CTRL,
29370 		    SD_PATH_STANDARD)) != 0) {
29371 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
29372 			    "sr_volume_ctrl: Mode Sense Failed\n");
29373 			kmem_free(sense, sense_buflen);
29374 			kmem_free(select, select_buflen);
29375 			return (rval);
29376 		}
29377 		sense_mhp  = (struct mode_header *)sense;
29378 		select_mhp = (struct mode_header *)select;
29379 		if (sense_mhp->bdesc_length > MODE_BLK_DESC_LENGTH) {
29380 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
29381 			    "sr_volume_ctrl: Mode Sense returned invalid "
29382 			    "block descriptor length\n");
29383 			kmem_free(sense, sense_buflen);
29384 			kmem_free(select, select_buflen);
29385 			return (EIO);
29386 		}
29387 		sense_page = (uchar_t *)
29388 		    (sense + MODE_HEADER_LENGTH + sense_mhp->bdesc_length);
29389 		select_page = (uchar_t *)(select + MODE_HEADER_LENGTH);
29390 		select_mhp->length = 0;
29391 		select_mhp->bdesc_length = 0;
29392 	}
29393 	/*
29394 	 * Note: An audio control data structure could be created and overlayed
29395 	 * on the following in place of the array indexing method implemented.
29396 	 */
29397 
29398 	/* Build the select data for the user volume data */
29399 	select_page[0] = MODEPAGE_AUDIO_CTRL;
29400 	select_page[1] = 0xE;
29401 	/* Set the immediate bit */
29402 	select_page[2] = 0x04;
29403 	/* Zero out reserved fields */
29404 	select_page[3] = 0x00;
29405 	select_page[4] = 0x00;
29406 	/* Return sense data for fields not to be modified */
29407 	select_page[5] = sense_page[5];
29408 	select_page[6] = sense_page[6];
29409 	select_page[7] = sense_page[7];
29410 	/* Set the user specified volume levels for channel 0 and 1 */
29411 	select_page[8] = 0x01;
29412 	select_page[9] = vol->channel0;
29413 	select_page[10] = 0x02;
29414 	select_page[11] = vol->channel1;
29415 	/* Channel 2 and 3 are currently unsupported so return the sense data */
29416 	select_page[12] = sense_page[12];
29417 	select_page[13] = sense_page[13];
29418 	select_page[14] = sense_page[14];
29419 	select_page[15] = sense_page[15];
29420 
29421 	if ((un->un_f_cfg_is_atapi == TRUE) || (un->un_f_mmc_cap == TRUE)) {
29422 		rval = sd_send_scsi_MODE_SELECT(un, CDB_GROUP1, select,
29423 		    select_buflen, SD_DONTSAVE_PAGE, SD_PATH_STANDARD);
29424 	} else {
29425 		rval = sd_send_scsi_MODE_SELECT(un, CDB_GROUP0, select,
29426 		    select_buflen, SD_DONTSAVE_PAGE, SD_PATH_STANDARD);
29427 	}
29428 
29429 	kmem_free(sense, sense_buflen);
29430 	kmem_free(select, select_buflen);
29431 	return (rval);
29432 }
29433 
29434 
29435 /*
29436  *    Function: sr_read_sony_session_offset()
29437  *
29438  * Description: This routine is the driver entry point for handling CD-ROM
29439  *		ioctl requests for session offset information. (CDROMREADOFFSET)
29440  *		The address of the first track in the last session of a
29441  *		multi-session CD-ROM is returned
29442  *
29443  *		Note: This routine uses a vendor specific key value in the
29444  *		command control field without implementing any vendor check here
29445  *		or in the ioctl routine.
29446  *
29447  *   Arguments: dev	- the device 'dev_t'
29448  *		data	- pointer to an int to hold the requested address
29449  *		flag	- this argument is a pass through to ddi_copyxxx()
29450  *			  directly from the mode argument of ioctl().
29451  *
29452  * Return Code: the code returned by sd_send_scsi_cmd()
29453  *		EFAULT if ddi_copyxxx() fails
29454  *		ENXIO if fail ddi_get_soft_state
29455  *		EINVAL if data pointer is NULL
29456  */
29457 
29458 static int
29459 sr_read_sony_session_offset(dev_t dev, caddr_t data, int flag)
29460 {
29461 	struct sd_lun		*un;
29462 	struct uscsi_cmd	*com;
29463 	caddr_t			buffer;
29464 	char			cdb[CDB_GROUP1];
29465 	int			session_offset = 0;
29466 	int			rval;
29467 
29468 	if (data == NULL) {
29469 		return (EINVAL);
29470 	}
29471 
29472 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
29473 	    (un->un_state == SD_STATE_OFFLINE)) {
29474 		return (ENXIO);
29475 	}
29476 
29477 	buffer = kmem_zalloc((size_t)SONY_SESSION_OFFSET_LEN, KM_SLEEP);
29478 	bzero(cdb, CDB_GROUP1);
29479 	cdb[0] = SCMD_READ_TOC;
29480 	/*
29481 	 * Bytes 7 & 8 are the 12 byte allocation length for a single entry.
29482 	 * (4 byte TOC response header + 8 byte response data)
29483 	 */
29484 	cdb[8] = SONY_SESSION_OFFSET_LEN;
29485 	/* Byte 9 is the control byte. A vendor specific value is used */
29486 	cdb[9] = SONY_SESSION_OFFSET_KEY;
29487 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
29488 	com->uscsi_cdb = cdb;
29489 	com->uscsi_cdblen = CDB_GROUP1;
29490 	com->uscsi_bufaddr = buffer;
29491 	com->uscsi_buflen = SONY_SESSION_OFFSET_LEN;
29492 	com->uscsi_flags = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
29493 
29494 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
29495 	    UIO_SYSSPACE, SD_PATH_STANDARD);
29496 	if (rval != 0) {
29497 		kmem_free(buffer, SONY_SESSION_OFFSET_LEN);
29498 		kmem_free(com, sizeof (*com));
29499 		return (rval);
29500 	}
29501 	if (buffer[1] == SONY_SESSION_OFFSET_VALID) {
29502 		session_offset =
29503 		    ((uchar_t)buffer[8] << 24) + ((uchar_t)buffer[9] << 16) +
29504 		    ((uchar_t)buffer[10] << 8) + ((uchar_t)buffer[11]);
29505 		/*
29506 		 * Offset returned offset in current lbasize block's. Convert to
29507 		 * 2k block's to return to the user
29508 		 */
29509 		if (un->un_tgt_blocksize == CDROM_BLK_512) {
29510 			session_offset >>= 2;
29511 		} else if (un->un_tgt_blocksize == CDROM_BLK_1024) {
29512 			session_offset >>= 1;
29513 		}
29514 	}
29515 
29516 	if (ddi_copyout(&session_offset, data, sizeof (int), flag) != 0) {
29517 		rval = EFAULT;
29518 	}
29519 
29520 	kmem_free(buffer, SONY_SESSION_OFFSET_LEN);
29521 	kmem_free(com, sizeof (*com));
29522 	return (rval);
29523 }
29524 
29525 
29526 /*
29527  *    Function: sd_wm_cache_constructor()
29528  *
29529  * Description: Cache Constructor for the wmap cache for the read/modify/write
29530  * 		devices.
29531  *
29532  *   Arguments: wm      - A pointer to the sd_w_map to be initialized.
29533  *		un	- sd_lun structure for the device.
29534  *		flag	- the km flags passed to constructor
29535  *
29536  * Return Code: 0 on success.
29537  *		-1 on failure.
29538  */
29539 
29540 /*ARGSUSED*/
29541 static int
29542 sd_wm_cache_constructor(void *wm, void *un, int flags)
29543 {
29544 	bzero(wm, sizeof (struct sd_w_map));
29545 	cv_init(&((struct sd_w_map *)wm)->wm_avail, NULL, CV_DRIVER, NULL);
29546 	return (0);
29547 }
29548 
29549 
29550 /*
29551  *    Function: sd_wm_cache_destructor()
29552  *
29553  * Description: Cache destructor for the wmap cache for the read/modify/write
29554  * 		devices.
29555  *
29556  *   Arguments: wm      - A pointer to the sd_w_map to be initialized.
29557  *		un	- sd_lun structure for the device.
29558  */
29559 /*ARGSUSED*/
29560 static void
29561 sd_wm_cache_destructor(void *wm, void *un)
29562 {
29563 	cv_destroy(&((struct sd_w_map *)wm)->wm_avail);
29564 }
29565 
29566 
29567 /*
29568  *    Function: sd_range_lock()
29569  *
29570  * Description: Lock the range of blocks specified as parameter to ensure
29571  *		that read, modify write is atomic and no other i/o writes
29572  *		to the same location. The range is specified in terms
29573  *		of start and end blocks. Block numbers are the actual
29574  *		media block numbers and not system.
29575  *
29576  *   Arguments: un	- sd_lun structure for the device.
29577  *		startb - The starting block number
29578  *		endb - The end block number
29579  *		typ - type of i/o - simple/read_modify_write
29580  *
29581  * Return Code: wm  - pointer to the wmap structure.
29582  *
29583  *     Context: This routine can sleep.
29584  */
29585 
29586 static struct sd_w_map *
29587 sd_range_lock(struct sd_lun *un, daddr_t startb, daddr_t endb, ushort_t typ)
29588 {
29589 	struct sd_w_map *wmp = NULL;
29590 	struct sd_w_map *sl_wmp = NULL;
29591 	struct sd_w_map *tmp_wmp;
29592 	wm_state state = SD_WM_CHK_LIST;
29593 
29594 
29595 	ASSERT(un != NULL);
29596 	ASSERT(!mutex_owned(SD_MUTEX(un)));
29597 
29598 	mutex_enter(SD_MUTEX(un));
29599 
29600 	while (state != SD_WM_DONE) {
29601 
29602 		switch (state) {
29603 		case SD_WM_CHK_LIST:
29604 			/*
29605 			 * This is the starting state. Check the wmap list
29606 			 * to see if the range is currently available.
29607 			 */
29608 			if (!(typ & SD_WTYPE_RMW) && !(un->un_rmw_count)) {
29609 				/*
29610 				 * If this is a simple write and no rmw
29611 				 * i/o is pending then try to lock the
29612 				 * range as the range should be available.
29613 				 */
29614 				state = SD_WM_LOCK_RANGE;
29615 			} else {
29616 				tmp_wmp = sd_get_range(un, startb, endb);
29617 				if (tmp_wmp != NULL) {
29618 					if ((wmp != NULL) && ONLIST(un, wmp)) {
29619 						/*
29620 						 * Should not keep onlist wmps
29621 						 * while waiting this macro
29622 						 * will also do wmp = NULL;
29623 						 */
29624 						FREE_ONLIST_WMAP(un, wmp);
29625 					}
29626 					/*
29627 					 * sl_wmp is the wmap on which wait
29628 					 * is done, since the tmp_wmp points
29629 					 * to the inuse wmap, set sl_wmp to
29630 					 * tmp_wmp and change the state to sleep
29631 					 */
29632 					sl_wmp = tmp_wmp;
29633 					state = SD_WM_WAIT_MAP;
29634 				} else {
29635 					state = SD_WM_LOCK_RANGE;
29636 				}
29637 
29638 			}
29639 			break;
29640 
29641 		case SD_WM_LOCK_RANGE:
29642 			ASSERT(un->un_wm_cache);
29643 			/*
29644 			 * The range need to be locked, try to get a wmap.
29645 			 * First attempt it with NO_SLEEP, want to avoid a sleep
29646 			 * if possible as we will have to release the sd mutex
29647 			 * if we have to sleep.
29648 			 */
29649 			if (wmp == NULL)
29650 				wmp = kmem_cache_alloc(un->un_wm_cache,
29651 				    KM_NOSLEEP);
29652 			if (wmp == NULL) {
29653 				mutex_exit(SD_MUTEX(un));
29654 				_NOTE(DATA_READABLE_WITHOUT_LOCK
29655 				    (sd_lun::un_wm_cache))
29656 				wmp = kmem_cache_alloc(un->un_wm_cache,
29657 				    KM_SLEEP);
29658 				mutex_enter(SD_MUTEX(un));
29659 				/*
29660 				 * we released the mutex so recheck and go to
29661 				 * check list state.
29662 				 */
29663 				state = SD_WM_CHK_LIST;
29664 			} else {
29665 				/*
29666 				 * We exit out of state machine since we
29667 				 * have the wmap. Do the housekeeping first.
29668 				 * place the wmap on the wmap list if it is not
29669 				 * on it already and then set the state to done.
29670 				 */
29671 				wmp->wm_start = startb;
29672 				wmp->wm_end = endb;
29673 				wmp->wm_flags = typ | SD_WM_BUSY;
29674 				if (typ & SD_WTYPE_RMW) {
29675 					un->un_rmw_count++;
29676 				}
29677 				/*
29678 				 * If not already on the list then link
29679 				 */
29680 				if (!ONLIST(un, wmp)) {
29681 					wmp->wm_next = un->un_wm;
29682 					wmp->wm_prev = NULL;
29683 					if (wmp->wm_next)
29684 						wmp->wm_next->wm_prev = wmp;
29685 					un->un_wm = wmp;
29686 				}
29687 				state = SD_WM_DONE;
29688 			}
29689 			break;
29690 
29691 		case SD_WM_WAIT_MAP:
29692 			ASSERT(sl_wmp->wm_flags & SD_WM_BUSY);
29693 			/*
29694 			 * Wait is done on sl_wmp, which is set in the
29695 			 * check_list state.
29696 			 */
29697 			sl_wmp->wm_wanted_count++;
29698 			cv_wait(&sl_wmp->wm_avail, SD_MUTEX(un));
29699 			sl_wmp->wm_wanted_count--;
29700 			/*
29701 			 * We can reuse the memory from the completed sl_wmp
29702 			 * lock range for our new lock, but only if noone is
29703 			 * waiting for it.
29704 			 */
29705 			ASSERT(!(sl_wmp->wm_flags & SD_WM_BUSY));
29706 			if (sl_wmp->wm_wanted_count == 0) {
29707 				if (wmp != NULL)
29708 					CHK_N_FREEWMP(un, wmp);
29709 				wmp = sl_wmp;
29710 			}
29711 			sl_wmp = NULL;
29712 			/*
29713 			 * After waking up, need to recheck for availability of
29714 			 * range.
29715 			 */
29716 			state = SD_WM_CHK_LIST;
29717 			break;
29718 
29719 		default:
29720 			panic("sd_range_lock: "
29721 			    "Unknown state %d in sd_range_lock", state);
29722 			/*NOTREACHED*/
29723 		} /* switch(state) */
29724 
29725 	} /* while(state != SD_WM_DONE) */
29726 
29727 	mutex_exit(SD_MUTEX(un));
29728 
29729 	ASSERT(wmp != NULL);
29730 
29731 	return (wmp);
29732 }
29733 
29734 
29735 /*
29736  *    Function: sd_get_range()
29737  *
29738  * Description: Find if there any overlapping I/O to this one
29739  *		Returns the write-map of 1st such I/O, NULL otherwise.
29740  *
29741  *   Arguments: un	- sd_lun structure for the device.
29742  *		startb - The starting block number
29743  *		endb - The end block number
29744  *
29745  * Return Code: wm  - pointer to the wmap structure.
29746  */
29747 
29748 static struct sd_w_map *
29749 sd_get_range(struct sd_lun *un, daddr_t startb, daddr_t endb)
29750 {
29751 	struct sd_w_map *wmp;
29752 
29753 	ASSERT(un != NULL);
29754 
29755 	for (wmp = un->un_wm; wmp != NULL; wmp = wmp->wm_next) {
29756 		if (!(wmp->wm_flags & SD_WM_BUSY)) {
29757 			continue;
29758 		}
29759 		if ((startb >= wmp->wm_start) && (startb <= wmp->wm_end)) {
29760 			break;
29761 		}
29762 		if ((endb >= wmp->wm_start) && (endb <= wmp->wm_end)) {
29763 			break;
29764 		}
29765 	}
29766 
29767 	return (wmp);
29768 }
29769 
29770 
29771 /*
29772  *    Function: sd_free_inlist_wmap()
29773  *
29774  * Description: Unlink and free a write map struct.
29775  *
29776  *   Arguments: un      - sd_lun structure for the device.
29777  *		wmp	- sd_w_map which needs to be unlinked.
29778  */
29779 
29780 static void
29781 sd_free_inlist_wmap(struct sd_lun *un, struct sd_w_map *wmp)
29782 {
29783 	ASSERT(un != NULL);
29784 
29785 	if (un->un_wm == wmp) {
29786 		un->un_wm = wmp->wm_next;
29787 	} else {
29788 		wmp->wm_prev->wm_next = wmp->wm_next;
29789 	}
29790 
29791 	if (wmp->wm_next) {
29792 		wmp->wm_next->wm_prev = wmp->wm_prev;
29793 	}
29794 
29795 	wmp->wm_next = wmp->wm_prev = NULL;
29796 
29797 	kmem_cache_free(un->un_wm_cache, wmp);
29798 }
29799 
29800 
29801 /*
29802  *    Function: sd_range_unlock()
29803  *
29804  * Description: Unlock the range locked by wm.
29805  *		Free write map if nobody else is waiting on it.
29806  *
29807  *   Arguments: un      - sd_lun structure for the device.
29808  *              wmp     - sd_w_map which needs to be unlinked.
29809  */
29810 
29811 static void
29812 sd_range_unlock(struct sd_lun *un, struct sd_w_map *wm)
29813 {
29814 	ASSERT(un != NULL);
29815 	ASSERT(wm != NULL);
29816 	ASSERT(!mutex_owned(SD_MUTEX(un)));
29817 
29818 	mutex_enter(SD_MUTEX(un));
29819 
29820 	if (wm->wm_flags & SD_WTYPE_RMW) {
29821 		un->un_rmw_count--;
29822 	}
29823 
29824 	if (wm->wm_wanted_count) {
29825 		wm->wm_flags = 0;
29826 		/*
29827 		 * Broadcast that the wmap is available now.
29828 		 */
29829 		cv_broadcast(&wm->wm_avail);
29830 	} else {
29831 		/*
29832 		 * If no one is waiting on the map, it should be free'ed.
29833 		 */
29834 		sd_free_inlist_wmap(un, wm);
29835 	}
29836 
29837 	mutex_exit(SD_MUTEX(un));
29838 }
29839 
29840 
29841 /*
29842  *    Function: sd_read_modify_write_task
29843  *
29844  * Description: Called from a taskq thread to initiate the write phase of
29845  *		a read-modify-write request.  This is used for targets where
29846  *		un->un_sys_blocksize != un->un_tgt_blocksize.
29847  *
29848  *   Arguments: arg - a pointer to the buf(9S) struct for the write command.
29849  *
29850  *     Context: Called under taskq thread context.
29851  */
29852 
29853 static void
29854 sd_read_modify_write_task(void *arg)
29855 {
29856 	struct sd_mapblocksize_info	*bsp;
29857 	struct buf	*bp;
29858 	struct sd_xbuf	*xp;
29859 	struct sd_lun	*un;
29860 
29861 	bp = arg;	/* The bp is given in arg */
29862 	ASSERT(bp != NULL);
29863 
29864 	/* Get the pointer to the layer-private data struct */
29865 	xp = SD_GET_XBUF(bp);
29866 	ASSERT(xp != NULL);
29867 	bsp = xp->xb_private;
29868 	ASSERT(bsp != NULL);
29869 
29870 	un = SD_GET_UN(bp);
29871 	ASSERT(un != NULL);
29872 	ASSERT(!mutex_owned(SD_MUTEX(un)));
29873 
29874 	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
29875 	    "sd_read_modify_write_task: entry: buf:0x%p\n", bp);
29876 
29877 	/*
29878 	 * This is the write phase of a read-modify-write request, called
29879 	 * under the context of a taskq thread in response to the completion
29880 	 * of the read portion of the rmw request completing under interrupt
29881 	 * context. The write request must be sent from here down the iostart
29882 	 * chain as if it were being sent from sd_mapblocksize_iostart(), so
29883 	 * we use the layer index saved in the layer-private data area.
29884 	 */
29885 	SD_NEXT_IOSTART(bsp->mbs_layer_index, un, bp);
29886 
29887 	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
29888 	    "sd_read_modify_write_task: exit: buf:0x%p\n", bp);
29889 }
29890 
29891 
29892 /*
29893  *    Function: sddump_do_read_of_rmw()
29894  *
29895  * Description: This routine will be called from sddump, If sddump is called
29896  *		with an I/O which not aligned on device blocksize boundary
29897  *		then the write has to be converted to read-modify-write.
29898  *		Do the read part here in order to keep sddump simple.
29899  *		Note - That the sd_mutex is held across the call to this
29900  *		routine.
29901  *
29902  *   Arguments: un	- sd_lun
29903  *		blkno	- block number in terms of media block size.
29904  *		nblk	- number of blocks.
29905  *		bpp	- pointer to pointer to the buf structure. On return
29906  *			from this function, *bpp points to the valid buffer
29907  *			to which the write has to be done.
29908  *
29909  * Return Code: 0 for success or errno-type return code
29910  */
29911 
29912 static int
29913 sddump_do_read_of_rmw(struct sd_lun *un, uint64_t blkno, uint64_t nblk,
29914 	struct buf **bpp)
29915 {
29916 	int err;
29917 	int i;
29918 	int rval;
29919 	struct buf *bp;
29920 	struct scsi_pkt *pkt = NULL;
29921 	uint32_t target_blocksize;
29922 
29923 	ASSERT(un != NULL);
29924 	ASSERT(mutex_owned(SD_MUTEX(un)));
29925 
29926 	target_blocksize = un->un_tgt_blocksize;
29927 
29928 	mutex_exit(SD_MUTEX(un));
29929 
29930 	bp = scsi_alloc_consistent_buf(SD_ADDRESS(un), (struct buf *)NULL,
29931 	    (size_t)(nblk * target_blocksize), B_READ, NULL_FUNC, NULL);
29932 	if (bp == NULL) {
29933 		scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
29934 		    "no resources for dumping; giving up");
29935 		err = ENOMEM;
29936 		goto done;
29937 	}
29938 
29939 	rval = sd_setup_rw_pkt(un, &pkt, bp, 0, NULL_FUNC, NULL,
29940 	    blkno, nblk);
29941 	if (rval != 0) {
29942 		scsi_free_consistent_buf(bp);
29943 		scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
29944 		    "no resources for dumping; giving up");
29945 		err = ENOMEM;
29946 		goto done;
29947 	}
29948 
29949 	pkt->pkt_flags |= FLAG_NOINTR;
29950 
29951 	err = EIO;
29952 	for (i = 0; i < SD_NDUMP_RETRIES; i++) {
29953 
29954 		/*
29955 		 * Scsi_poll returns 0 (success) if the command completes and
29956 		 * the status block is STATUS_GOOD.  We should only check
29957 		 * errors if this condition is not true.  Even then we should
29958 		 * send our own request sense packet only if we have a check
29959 		 * condition and auto request sense has not been performed by
29960 		 * the hba.
29961 		 */
29962 		SD_TRACE(SD_LOG_DUMP, un, "sddump: sending read\n");
29963 
29964 		if ((sd_scsi_poll(un, pkt) == 0) && (pkt->pkt_resid == 0)) {
29965 			err = 0;
29966 			break;
29967 		}
29968 
29969 		/*
29970 		 * Check CMD_DEV_GONE 1st, give up if device is gone,
29971 		 * no need to read RQS data.
29972 		 */
29973 		if (pkt->pkt_reason == CMD_DEV_GONE) {
29974 			scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
29975 			    "Device is gone\n");
29976 			break;
29977 		}
29978 
29979 		if (SD_GET_PKT_STATUS(pkt) == STATUS_CHECK) {
29980 			SD_INFO(SD_LOG_DUMP, un,
29981 			    "sddump: read failed with CHECK, try # %d\n", i);
29982 			if (((pkt->pkt_state & STATE_ARQ_DONE) == 0)) {
29983 				(void) sd_send_polled_RQS(un);
29984 			}
29985 
29986 			continue;
29987 		}
29988 
29989 		if (SD_GET_PKT_STATUS(pkt) == STATUS_BUSY) {
29990 			int reset_retval = 0;
29991 
29992 			SD_INFO(SD_LOG_DUMP, un,
29993 			    "sddump: read failed with BUSY, try # %d\n", i);
29994 
29995 			if (un->un_f_lun_reset_enabled == TRUE) {
29996 				reset_retval = scsi_reset(SD_ADDRESS(un),
29997 				    RESET_LUN);
29998 			}
29999 			if (reset_retval == 0) {
30000 				(void) scsi_reset(SD_ADDRESS(un), RESET_TARGET);
30001 			}
30002 			(void) sd_send_polled_RQS(un);
30003 
30004 		} else {
30005 			SD_INFO(SD_LOG_DUMP, un,
30006 			    "sddump: read failed with 0x%x, try # %d\n",
30007 			    SD_GET_PKT_STATUS(pkt), i);
30008 			mutex_enter(SD_MUTEX(un));
30009 			sd_reset_target(un, pkt);
30010 			mutex_exit(SD_MUTEX(un));
30011 		}
30012 
30013 		/*
30014 		 * If we are not getting anywhere with lun/target resets,
30015 		 * let's reset the bus.
30016 		 */
30017 		if (i > SD_NDUMP_RETRIES/2) {
30018 			(void) scsi_reset(SD_ADDRESS(un), RESET_ALL);
30019 			(void) sd_send_polled_RQS(un);
30020 		}
30021 
30022 	}
30023 	scsi_destroy_pkt(pkt);
30024 
30025 	if (err != 0) {
30026 		scsi_free_consistent_buf(bp);
30027 		*bpp = NULL;
30028 	} else {
30029 		*bpp = bp;
30030 	}
30031 
30032 done:
30033 	mutex_enter(SD_MUTEX(un));
30034 	return (err);
30035 }
30036 
30037 
30038 /*
30039  *    Function: sd_failfast_flushq
30040  *
30041  * Description: Take all bp's on the wait queue that have B_FAILFAST set
30042  *		in b_flags and move them onto the failfast queue, then kick
30043  *		off a thread to return all bp's on the failfast queue to
30044  *		their owners with an error set.
30045  *
30046  *   Arguments: un - pointer to the soft state struct for the instance.
30047  *
30048  *     Context: may execute in interrupt context.
30049  */
30050 
30051 static void
30052 sd_failfast_flushq(struct sd_lun *un)
30053 {
30054 	struct buf *bp;
30055 	struct buf *next_waitq_bp;
30056 	struct buf *prev_waitq_bp = NULL;
30057 
30058 	ASSERT(un != NULL);
30059 	ASSERT(mutex_owned(SD_MUTEX(un)));
30060 	ASSERT(un->un_failfast_state == SD_FAILFAST_ACTIVE);
30061 	ASSERT(un->un_failfast_bp == NULL);
30062 
30063 	SD_TRACE(SD_LOG_IO_FAILFAST, un,
30064 	    "sd_failfast_flushq: entry: un:0x%p\n", un);
30065 
30066 	/*
30067 	 * Check if we should flush all bufs when entering failfast state, or
30068 	 * just those with B_FAILFAST set.
30069 	 */
30070 	if (sd_failfast_flushctl & SD_FAILFAST_FLUSH_ALL_BUFS) {
30071 		/*
30072 		 * Move *all* bp's on the wait queue to the failfast flush
30073 		 * queue, including those that do NOT have B_FAILFAST set.
30074 		 */
30075 		if (un->un_failfast_headp == NULL) {
30076 			ASSERT(un->un_failfast_tailp == NULL);
30077 			un->un_failfast_headp = un->un_waitq_headp;
30078 		} else {
30079 			ASSERT(un->un_failfast_tailp != NULL);
30080 			un->un_failfast_tailp->av_forw = un->un_waitq_headp;
30081 		}
30082 
30083 		un->un_failfast_tailp = un->un_waitq_tailp;
30084 
30085 		/* update kstat for each bp moved out of the waitq */
30086 		for (bp = un->un_waitq_headp; bp != NULL; bp = bp->av_forw) {
30087 			SD_UPDATE_KSTATS(un, kstat_waitq_exit, bp);
30088 		}
30089 
30090 		/* empty the waitq */
30091 		un->un_waitq_headp = un->un_waitq_tailp = NULL;
30092 
30093 	} else {
30094 		/*
30095 		 * Go thru the wait queue, pick off all entries with
30096 		 * B_FAILFAST set, and move these onto the failfast queue.
30097 		 */
30098 		for (bp = un->un_waitq_headp; bp != NULL; bp = next_waitq_bp) {
30099 			/*
30100 			 * Save the pointer to the next bp on the wait queue,
30101 			 * so we get to it on the next iteration of this loop.
30102 			 */
30103 			next_waitq_bp = bp->av_forw;
30104 
30105 			/*
30106 			 * If this bp from the wait queue does NOT have
30107 			 * B_FAILFAST set, just move on to the next element
30108 			 * in the wait queue. Note, this is the only place
30109 			 * where it is correct to set prev_waitq_bp.
30110 			 */
30111 			if ((bp->b_flags & B_FAILFAST) == 0) {
30112 				prev_waitq_bp = bp;
30113 				continue;
30114 			}
30115 
30116 			/*
30117 			 * Remove the bp from the wait queue.
30118 			 */
30119 			if (bp == un->un_waitq_headp) {
30120 				/* The bp is the first element of the waitq. */
30121 				un->un_waitq_headp = next_waitq_bp;
30122 				if (un->un_waitq_headp == NULL) {
30123 					/* The wait queue is now empty */
30124 					un->un_waitq_tailp = NULL;
30125 				}
30126 			} else {
30127 				/*
30128 				 * The bp is either somewhere in the middle
30129 				 * or at the end of the wait queue.
30130 				 */
30131 				ASSERT(un->un_waitq_headp != NULL);
30132 				ASSERT(prev_waitq_bp != NULL);
30133 				ASSERT((prev_waitq_bp->b_flags & B_FAILFAST)
30134 				    == 0);
30135 				if (bp == un->un_waitq_tailp) {
30136 					/* bp is the last entry on the waitq. */
30137 					ASSERT(next_waitq_bp == NULL);
30138 					un->un_waitq_tailp = prev_waitq_bp;
30139 				}
30140 				prev_waitq_bp->av_forw = next_waitq_bp;
30141 			}
30142 			bp->av_forw = NULL;
30143 
30144 			/*
30145 			 * update kstat since the bp is moved out of
30146 			 * the waitq
30147 			 */
30148 			SD_UPDATE_KSTATS(un, kstat_waitq_exit, bp);
30149 
30150 			/*
30151 			 * Now put the bp onto the failfast queue.
30152 			 */
30153 			if (un->un_failfast_headp == NULL) {
30154 				/* failfast queue is currently empty */
30155 				ASSERT(un->un_failfast_tailp == NULL);
30156 				un->un_failfast_headp =
30157 				    un->un_failfast_tailp = bp;
30158 			} else {
30159 				/* Add the bp to the end of the failfast q */
30160 				ASSERT(un->un_failfast_tailp != NULL);
30161 				ASSERT(un->un_failfast_tailp->b_flags &
30162 				    B_FAILFAST);
30163 				un->un_failfast_tailp->av_forw = bp;
30164 				un->un_failfast_tailp = bp;
30165 			}
30166 		}
30167 	}
30168 
30169 	/*
30170 	 * Now return all bp's on the failfast queue to their owners.
30171 	 */
30172 	while ((bp = un->un_failfast_headp) != NULL) {
30173 
30174 		un->un_failfast_headp = bp->av_forw;
30175 		if (un->un_failfast_headp == NULL) {
30176 			un->un_failfast_tailp = NULL;
30177 		}
30178 
30179 		/*
30180 		 * We want to return the bp with a failure error code, but
30181 		 * we do not want a call to sd_start_cmds() to occur here,
30182 		 * so use sd_return_failed_command_no_restart() instead of
30183 		 * sd_return_failed_command().
30184 		 */
30185 		sd_return_failed_command_no_restart(un, bp, EIO);
30186 	}
30187 
30188 	/* Flush the xbuf queues if required. */
30189 	if (sd_failfast_flushctl & SD_FAILFAST_FLUSH_ALL_QUEUES) {
30190 		ddi_xbuf_flushq(un->un_xbuf_attr, sd_failfast_flushq_callback);
30191 	}
30192 
30193 	SD_TRACE(SD_LOG_IO_FAILFAST, un,
30194 	    "sd_failfast_flushq: exit: un:0x%p\n", un);
30195 }
30196 
30197 
30198 /*
30199  *    Function: sd_failfast_flushq_callback
30200  *
30201  * Description: Return TRUE if the given bp meets the criteria for failfast
30202  *		flushing. Used with ddi_xbuf_flushq(9F).
30203  *
30204  *   Arguments: bp - ptr to buf struct to be examined.
30205  *
30206  *     Context: Any
30207  */
30208 
30209 static int
30210 sd_failfast_flushq_callback(struct buf *bp)
30211 {
30212 	/*
30213 	 * Return TRUE if (1) we want to flush ALL bufs when the failfast
30214 	 * state is entered; OR (2) the given bp has B_FAILFAST set.
30215 	 */
30216 	return (((sd_failfast_flushctl & SD_FAILFAST_FLUSH_ALL_BUFS) ||
30217 	    (bp->b_flags & B_FAILFAST)) ? TRUE : FALSE);
30218 }
30219 
30220 
30221 
30222 #if defined(__i386) || defined(__amd64)
30223 /*
30224  * Function: sd_setup_next_xfer
30225  *
30226  * Description: Prepare next I/O operation using DMA_PARTIAL
30227  *
30228  */
30229 
30230 static int
30231 sd_setup_next_xfer(struct sd_lun *un, struct buf *bp,
30232     struct scsi_pkt *pkt, struct sd_xbuf *xp)
30233 {
30234 	ssize_t	num_blks_not_xfered;
30235 	daddr_t	strt_blk_num;
30236 	ssize_t	bytes_not_xfered;
30237 	int	rval;
30238 
30239 	ASSERT(pkt->pkt_resid == 0);
30240 
30241 	/*
30242 	 * Calculate next block number and amount to be transferred.
30243 	 *
30244 	 * How much data NOT transfered to the HBA yet.
30245 	 */
30246 	bytes_not_xfered = xp->xb_dma_resid;
30247 
30248 	/*
30249 	 * figure how many blocks NOT transfered to the HBA yet.
30250 	 */
30251 	num_blks_not_xfered = SD_BYTES2TGTBLOCKS(un, bytes_not_xfered);
30252 
30253 	/*
30254 	 * set starting block number to the end of what WAS transfered.
30255 	 */
30256 	strt_blk_num = xp->xb_blkno +
30257 	    SD_BYTES2TGTBLOCKS(un, bp->b_bcount - bytes_not_xfered);
30258 
30259 	/*
30260 	 * Move pkt to the next portion of the xfer.  sd_setup_next_rw_pkt
30261 	 * will call scsi_initpkt with NULL_FUNC so we do not have to release
30262 	 * the disk mutex here.
30263 	 */
30264 	rval = sd_setup_next_rw_pkt(un, pkt, bp,
30265 	    strt_blk_num, num_blks_not_xfered);
30266 
30267 	if (rval == 0) {
30268 
30269 		/*
30270 		 * Success.
30271 		 *
30272 		 * Adjust things if there are still more blocks to be
30273 		 * transfered.
30274 		 */
30275 		xp->xb_dma_resid = pkt->pkt_resid;
30276 		pkt->pkt_resid = 0;
30277 
30278 		return (1);
30279 	}
30280 
30281 	/*
30282 	 * There's really only one possible return value from
30283 	 * sd_setup_next_rw_pkt which occurs when scsi_init_pkt
30284 	 * returns NULL.
30285 	 */
30286 	ASSERT(rval == SD_PKT_ALLOC_FAILURE);
30287 
30288 	bp->b_resid = bp->b_bcount;
30289 	bp->b_flags |= B_ERROR;
30290 
30291 	scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
30292 	    "Error setting up next portion of DMA transfer\n");
30293 
30294 	return (0);
30295 }
30296 #endif
30297 
30298 /*
30299  *    Function: sd_panic_for_res_conflict
30300  *
30301  * Description: Call panic with a string formated with "Reservation Conflict"
30302  *		and a human readable identifier indicating the SD instance
30303  *		that experienced the reservation conflict.
30304  *
30305  *   Arguments: un - pointer to the soft state struct for the instance.
30306  *
30307  *     Context: may execute in interrupt context.
30308  */
30309 
30310 #define	SD_RESV_CONFLICT_FMT_LEN 40
30311 void
30312 sd_panic_for_res_conflict(struct sd_lun *un)
30313 {
30314 	char panic_str[SD_RESV_CONFLICT_FMT_LEN+MAXPATHLEN];
30315 	char path_str[MAXPATHLEN];
30316 
30317 	(void) snprintf(panic_str, sizeof (panic_str),
30318 	    "Reservation Conflict\nDisk: %s",
30319 	    ddi_pathname(SD_DEVINFO(un), path_str));
30320 
30321 	panic(panic_str);
30322 }
30323 
30324 /*
30325  * Note: The following sd_faultinjection_ioctl( ) routines implement
30326  * driver support for handling fault injection for error analysis
30327  * causing faults in multiple layers of the driver.
30328  *
30329  */
30330 
30331 #ifdef SD_FAULT_INJECTION
30332 static uint_t   sd_fault_injection_on = 0;
30333 
30334 /*
30335  *    Function: sd_faultinjection_ioctl()
30336  *
30337  * Description: This routine is the driver entry point for handling
30338  *              faultinjection ioctls to inject errors into the
30339  *              layer model
30340  *
30341  *   Arguments: cmd	- the ioctl cmd recieved
30342  *		arg	- the arguments from user and returns
30343  */
30344 
30345 static void
30346 sd_faultinjection_ioctl(int cmd, intptr_t arg,  struct sd_lun *un) {
30347 
30348 	uint_t i;
30349 	uint_t rval;
30350 
30351 	SD_TRACE(SD_LOG_IOERR, un, "sd_faultinjection_ioctl: entry\n");
30352 
30353 	mutex_enter(SD_MUTEX(un));
30354 
30355 	switch (cmd) {
30356 	case SDIOCRUN:
30357 		/* Allow pushed faults to be injected */
30358 		SD_INFO(SD_LOG_SDTEST, un,
30359 		    "sd_faultinjection_ioctl: Injecting Fault Run\n");
30360 
30361 		sd_fault_injection_on = 1;
30362 
30363 		SD_INFO(SD_LOG_IOERR, un,
30364 		    "sd_faultinjection_ioctl: run finished\n");
30365 		break;
30366 
30367 	case SDIOCSTART:
30368 		/* Start Injection Session */
30369 		SD_INFO(SD_LOG_SDTEST, un,
30370 		    "sd_faultinjection_ioctl: Injecting Fault Start\n");
30371 
30372 		sd_fault_injection_on = 0;
30373 		un->sd_injection_mask = 0xFFFFFFFF;
30374 		for (i = 0; i < SD_FI_MAX_ERROR; i++) {
30375 			un->sd_fi_fifo_pkt[i] = NULL;
30376 			un->sd_fi_fifo_xb[i] = NULL;
30377 			un->sd_fi_fifo_un[i] = NULL;
30378 			un->sd_fi_fifo_arq[i] = NULL;
30379 		}
30380 		un->sd_fi_fifo_start = 0;
30381 		un->sd_fi_fifo_end = 0;
30382 
30383 		mutex_enter(&(un->un_fi_mutex));
30384 		un->sd_fi_log[0] = '\0';
30385 		un->sd_fi_buf_len = 0;
30386 		mutex_exit(&(un->un_fi_mutex));
30387 
30388 		SD_INFO(SD_LOG_IOERR, un,
30389 		    "sd_faultinjection_ioctl: start finished\n");
30390 		break;
30391 
30392 	case SDIOCSTOP:
30393 		/* Stop Injection Session */
30394 		SD_INFO(SD_LOG_SDTEST, un,
30395 		    "sd_faultinjection_ioctl: Injecting Fault Stop\n");
30396 		sd_fault_injection_on = 0;
30397 		un->sd_injection_mask = 0x0;
30398 
30399 		/* Empty stray or unuseds structs from fifo */
30400 		for (i = 0; i < SD_FI_MAX_ERROR; i++) {
30401 			if (un->sd_fi_fifo_pkt[i] != NULL) {
30402 				kmem_free(un->sd_fi_fifo_pkt[i],
30403 				    sizeof (struct sd_fi_pkt));
30404 			}
30405 			if (un->sd_fi_fifo_xb[i] != NULL) {
30406 				kmem_free(un->sd_fi_fifo_xb[i],
30407 				    sizeof (struct sd_fi_xb));
30408 			}
30409 			if (un->sd_fi_fifo_un[i] != NULL) {
30410 				kmem_free(un->sd_fi_fifo_un[i],
30411 				    sizeof (struct sd_fi_un));
30412 			}
30413 			if (un->sd_fi_fifo_arq[i] != NULL) {
30414 				kmem_free(un->sd_fi_fifo_arq[i],
30415 				    sizeof (struct sd_fi_arq));
30416 			}
30417 			un->sd_fi_fifo_pkt[i] = NULL;
30418 			un->sd_fi_fifo_un[i] = NULL;
30419 			un->sd_fi_fifo_xb[i] = NULL;
30420 			un->sd_fi_fifo_arq[i] = NULL;
30421 		}
30422 		un->sd_fi_fifo_start = 0;
30423 		un->sd_fi_fifo_end = 0;
30424 
30425 		SD_INFO(SD_LOG_IOERR, un,
30426 		    "sd_faultinjection_ioctl: stop finished\n");
30427 		break;
30428 
30429 	case SDIOCINSERTPKT:
30430 		/* Store a packet struct to be pushed onto fifo */
30431 		SD_INFO(SD_LOG_SDTEST, un,
30432 		    "sd_faultinjection_ioctl: Injecting Fault Insert Pkt\n");
30433 
30434 		i = un->sd_fi_fifo_end % SD_FI_MAX_ERROR;
30435 
30436 		sd_fault_injection_on = 0;
30437 
30438 		/* No more that SD_FI_MAX_ERROR allowed in Queue */
30439 		if (un->sd_fi_fifo_pkt[i] != NULL) {
30440 			kmem_free(un->sd_fi_fifo_pkt[i],
30441 			    sizeof (struct sd_fi_pkt));
30442 		}
30443 		if (arg != NULL) {
30444 			un->sd_fi_fifo_pkt[i] =
30445 			    kmem_alloc(sizeof (struct sd_fi_pkt), KM_NOSLEEP);
30446 			if (un->sd_fi_fifo_pkt[i] == NULL) {
30447 				/* Alloc failed don't store anything */
30448 				break;
30449 			}
30450 			rval = ddi_copyin((void *)arg, un->sd_fi_fifo_pkt[i],
30451 			    sizeof (struct sd_fi_pkt), 0);
30452 			if (rval == -1) {
30453 				kmem_free(un->sd_fi_fifo_pkt[i],
30454 				    sizeof (struct sd_fi_pkt));
30455 				un->sd_fi_fifo_pkt[i] = NULL;
30456 			}
30457 		} else {
30458 			SD_INFO(SD_LOG_IOERR, un,
30459 			    "sd_faultinjection_ioctl: pkt null\n");
30460 		}
30461 		break;
30462 
30463 	case SDIOCINSERTXB:
30464 		/* Store a xb struct to be pushed onto fifo */
30465 		SD_INFO(SD_LOG_SDTEST, un,
30466 		    "sd_faultinjection_ioctl: Injecting Fault Insert XB\n");
30467 
30468 		i = un->sd_fi_fifo_end % SD_FI_MAX_ERROR;
30469 
30470 		sd_fault_injection_on = 0;
30471 
30472 		if (un->sd_fi_fifo_xb[i] != NULL) {
30473 			kmem_free(un->sd_fi_fifo_xb[i],
30474 			    sizeof (struct sd_fi_xb));
30475 			un->sd_fi_fifo_xb[i] = NULL;
30476 		}
30477 		if (arg != NULL) {
30478 			un->sd_fi_fifo_xb[i] =
30479 			    kmem_alloc(sizeof (struct sd_fi_xb), KM_NOSLEEP);
30480 			if (un->sd_fi_fifo_xb[i] == NULL) {
30481 				/* Alloc failed don't store anything */
30482 				break;
30483 			}
30484 			rval = ddi_copyin((void *)arg, un->sd_fi_fifo_xb[i],
30485 			    sizeof (struct sd_fi_xb), 0);
30486 
30487 			if (rval == -1) {
30488 				kmem_free(un->sd_fi_fifo_xb[i],
30489 				    sizeof (struct sd_fi_xb));
30490 				un->sd_fi_fifo_xb[i] = NULL;
30491 			}
30492 		} else {
30493 			SD_INFO(SD_LOG_IOERR, un,
30494 			    "sd_faultinjection_ioctl: xb null\n");
30495 		}
30496 		break;
30497 
30498 	case SDIOCINSERTUN:
30499 		/* Store a un struct to be pushed onto fifo */
30500 		SD_INFO(SD_LOG_SDTEST, un,
30501 		    "sd_faultinjection_ioctl: Injecting Fault Insert UN\n");
30502 
30503 		i = un->sd_fi_fifo_end % SD_FI_MAX_ERROR;
30504 
30505 		sd_fault_injection_on = 0;
30506 
30507 		if (un->sd_fi_fifo_un[i] != NULL) {
30508 			kmem_free(un->sd_fi_fifo_un[i],
30509 			    sizeof (struct sd_fi_un));
30510 			un->sd_fi_fifo_un[i] = NULL;
30511 		}
30512 		if (arg != NULL) {
30513 			un->sd_fi_fifo_un[i] =
30514 			    kmem_alloc(sizeof (struct sd_fi_un), KM_NOSLEEP);
30515 			if (un->sd_fi_fifo_un[i] == NULL) {
30516 				/* Alloc failed don't store anything */
30517 				break;
30518 			}
30519 			rval = ddi_copyin((void *)arg, un->sd_fi_fifo_un[i],
30520 			    sizeof (struct sd_fi_un), 0);
30521 			if (rval == -1) {
30522 				kmem_free(un->sd_fi_fifo_un[i],
30523 				    sizeof (struct sd_fi_un));
30524 				un->sd_fi_fifo_un[i] = NULL;
30525 			}
30526 
30527 		} else {
30528 			SD_INFO(SD_LOG_IOERR, un,
30529 			    "sd_faultinjection_ioctl: un null\n");
30530 		}
30531 
30532 		break;
30533 
30534 	case SDIOCINSERTARQ:
30535 		/* Store a arq struct to be pushed onto fifo */
30536 		SD_INFO(SD_LOG_SDTEST, un,
30537 		    "sd_faultinjection_ioctl: Injecting Fault Insert ARQ\n");
30538 		i = un->sd_fi_fifo_end % SD_FI_MAX_ERROR;
30539 
30540 		sd_fault_injection_on = 0;
30541 
30542 		if (un->sd_fi_fifo_arq[i] != NULL) {
30543 			kmem_free(un->sd_fi_fifo_arq[i],
30544 			    sizeof (struct sd_fi_arq));
30545 			un->sd_fi_fifo_arq[i] = NULL;
30546 		}
30547 		if (arg != NULL) {
30548 			un->sd_fi_fifo_arq[i] =
30549 			    kmem_alloc(sizeof (struct sd_fi_arq), KM_NOSLEEP);
30550 			if (un->sd_fi_fifo_arq[i] == NULL) {
30551 				/* Alloc failed don't store anything */
30552 				break;
30553 			}
30554 			rval = ddi_copyin((void *)arg, un->sd_fi_fifo_arq[i],
30555 			    sizeof (struct sd_fi_arq), 0);
30556 			if (rval == -1) {
30557 				kmem_free(un->sd_fi_fifo_arq[i],
30558 				    sizeof (struct sd_fi_arq));
30559 				un->sd_fi_fifo_arq[i] = NULL;
30560 			}
30561 
30562 		} else {
30563 			SD_INFO(SD_LOG_IOERR, un,
30564 			    "sd_faultinjection_ioctl: arq null\n");
30565 		}
30566 
30567 		break;
30568 
30569 	case SDIOCPUSH:
30570 		/* Push stored xb, pkt, un, and arq onto fifo */
30571 		sd_fault_injection_on = 0;
30572 
30573 		if (arg != NULL) {
30574 			rval = ddi_copyin((void *)arg, &i, sizeof (uint_t), 0);
30575 			if (rval != -1 &&
30576 			    un->sd_fi_fifo_end + i < SD_FI_MAX_ERROR) {
30577 				un->sd_fi_fifo_end += i;
30578 			}
30579 		} else {
30580 			SD_INFO(SD_LOG_IOERR, un,
30581 			    "sd_faultinjection_ioctl: push arg null\n");
30582 			if (un->sd_fi_fifo_end + i < SD_FI_MAX_ERROR) {
30583 				un->sd_fi_fifo_end++;
30584 			}
30585 		}
30586 		SD_INFO(SD_LOG_IOERR, un,
30587 		    "sd_faultinjection_ioctl: push to end=%d\n",
30588 		    un->sd_fi_fifo_end);
30589 		break;
30590 
30591 	case SDIOCRETRIEVE:
30592 		/* Return buffer of log from Injection session */
30593 		SD_INFO(SD_LOG_SDTEST, un,
30594 		    "sd_faultinjection_ioctl: Injecting Fault Retreive");
30595 
30596 		sd_fault_injection_on = 0;
30597 
30598 		mutex_enter(&(un->un_fi_mutex));
30599 		rval = ddi_copyout(un->sd_fi_log, (void *)arg,
30600 		    un->sd_fi_buf_len+1, 0);
30601 		mutex_exit(&(un->un_fi_mutex));
30602 
30603 		if (rval == -1) {
30604 			/*
30605 			 * arg is possibly invalid setting
30606 			 * it to NULL for return
30607 			 */
30608 			arg = NULL;
30609 		}
30610 		break;
30611 	}
30612 
30613 	mutex_exit(SD_MUTEX(un));
30614 	SD_TRACE(SD_LOG_IOERR, un, "sd_faultinjection_ioctl:"
30615 			    " exit\n");
30616 }
30617 
30618 
30619 /*
30620  *    Function: sd_injection_log()
30621  *
30622  * Description: This routine adds buff to the already existing injection log
30623  *              for retrieval via faultinjection_ioctl for use in fault
30624  *              detection and recovery
30625  *
30626  *   Arguments: buf - the string to add to the log
30627  */
30628 
30629 static void
30630 sd_injection_log(char *buf, struct sd_lun *un)
30631 {
30632 	uint_t len;
30633 
30634 	ASSERT(un != NULL);
30635 	ASSERT(buf != NULL);
30636 
30637 	mutex_enter(&(un->un_fi_mutex));
30638 
30639 	len = min(strlen(buf), 255);
30640 	/* Add logged value to Injection log to be returned later */
30641 	if (len + un->sd_fi_buf_len < SD_FI_MAX_BUF) {
30642 		uint_t	offset = strlen((char *)un->sd_fi_log);
30643 		char *destp = (char *)un->sd_fi_log + offset;
30644 		int i;
30645 		for (i = 0; i < len; i++) {
30646 			*destp++ = *buf++;
30647 		}
30648 		un->sd_fi_buf_len += len;
30649 		un->sd_fi_log[un->sd_fi_buf_len] = '\0';
30650 	}
30651 
30652 	mutex_exit(&(un->un_fi_mutex));
30653 }
30654 
30655 
30656 /*
30657  *    Function: sd_faultinjection()
30658  *
30659  * Description: This routine takes the pkt and changes its
30660  *		content based on error injection scenerio.
30661  *
30662  *   Arguments: pktp	- packet to be changed
30663  */
30664 
30665 static void
30666 sd_faultinjection(struct scsi_pkt *pktp)
30667 {
30668 	uint_t i;
30669 	struct sd_fi_pkt *fi_pkt;
30670 	struct sd_fi_xb *fi_xb;
30671 	struct sd_fi_un *fi_un;
30672 	struct sd_fi_arq *fi_arq;
30673 	struct buf *bp;
30674 	struct sd_xbuf *xb;
30675 	struct sd_lun *un;
30676 
30677 	ASSERT(pktp != NULL);
30678 
30679 	/* pull bp xb and un from pktp */
30680 	bp = (struct buf *)pktp->pkt_private;
30681 	xb = SD_GET_XBUF(bp);
30682 	un = SD_GET_UN(bp);
30683 
30684 	ASSERT(un != NULL);
30685 
30686 	mutex_enter(SD_MUTEX(un));
30687 
30688 	SD_TRACE(SD_LOG_SDTEST, un,
30689 	    "sd_faultinjection: entry Injection from sdintr\n");
30690 
30691 	/* if injection is off return */
30692 	if (sd_fault_injection_on == 0 ||
30693 		un->sd_fi_fifo_start == un->sd_fi_fifo_end) {
30694 		mutex_exit(SD_MUTEX(un));
30695 		return;
30696 	}
30697 
30698 
30699 	/* take next set off fifo */
30700 	i = un->sd_fi_fifo_start % SD_FI_MAX_ERROR;
30701 
30702 	fi_pkt = un->sd_fi_fifo_pkt[i];
30703 	fi_xb = un->sd_fi_fifo_xb[i];
30704 	fi_un = un->sd_fi_fifo_un[i];
30705 	fi_arq = un->sd_fi_fifo_arq[i];
30706 
30707 
30708 	/* set variables accordingly */
30709 	/* set pkt if it was on fifo */
30710 	if (fi_pkt != NULL) {
30711 		SD_CONDSET(pktp, pkt, pkt_flags, "pkt_flags");
30712 		SD_CONDSET(*pktp, pkt, pkt_scbp, "pkt_scbp");
30713 		SD_CONDSET(*pktp, pkt, pkt_cdbp, "pkt_cdbp");
30714 		SD_CONDSET(pktp, pkt, pkt_state, "pkt_state");
30715 		SD_CONDSET(pktp, pkt, pkt_statistics, "pkt_statistics");
30716 		SD_CONDSET(pktp, pkt, pkt_reason, "pkt_reason");
30717 
30718 	}
30719 
30720 	/* set xb if it was on fifo */
30721 	if (fi_xb != NULL) {
30722 		SD_CONDSET(xb, xb, xb_blkno, "xb_blkno");
30723 		SD_CONDSET(xb, xb, xb_dma_resid, "xb_dma_resid");
30724 		SD_CONDSET(xb, xb, xb_retry_count, "xb_retry_count");
30725 		SD_CONDSET(xb, xb, xb_victim_retry_count,
30726 		    "xb_victim_retry_count");
30727 		SD_CONDSET(xb, xb, xb_sense_status, "xb_sense_status");
30728 		SD_CONDSET(xb, xb, xb_sense_state, "xb_sense_state");
30729 		SD_CONDSET(xb, xb, xb_sense_resid, "xb_sense_resid");
30730 
30731 		/* copy in block data from sense */
30732 		if (fi_xb->xb_sense_data[0] != -1) {
30733 			bcopy(fi_xb->xb_sense_data, xb->xb_sense_data,
30734 			    SENSE_LENGTH);
30735 		}
30736 
30737 		/* copy in extended sense codes */
30738 		SD_CONDSET(((struct scsi_extended_sense *)xb), xb, es_code,
30739 		    "es_code");
30740 		SD_CONDSET(((struct scsi_extended_sense *)xb), xb, es_key,
30741 		    "es_key");
30742 		SD_CONDSET(((struct scsi_extended_sense *)xb), xb, es_add_code,
30743 		    "es_add_code");
30744 		SD_CONDSET(((struct scsi_extended_sense *)xb), xb,
30745 		    es_qual_code, "es_qual_code");
30746 	}
30747 
30748 	/* set un if it was on fifo */
30749 	if (fi_un != NULL) {
30750 		SD_CONDSET(un->un_sd->sd_inq, un, inq_rmb, "inq_rmb");
30751 		SD_CONDSET(un, un, un_ctype, "un_ctype");
30752 		SD_CONDSET(un, un, un_reset_retry_count,
30753 		    "un_reset_retry_count");
30754 		SD_CONDSET(un, un, un_reservation_type, "un_reservation_type");
30755 		SD_CONDSET(un, un, un_resvd_status, "un_resvd_status");
30756 		SD_CONDSET(un, un, un_f_arq_enabled, "un_f_arq_enabled");
30757 		SD_CONDSET(un, un, un_f_geometry_is_valid,
30758 		    "un_f_geometry_is_valid");
30759 		SD_CONDSET(un, un, un_f_allow_bus_device_reset,
30760 		    "un_f_allow_bus_device_reset");
30761 		SD_CONDSET(un, un, un_f_opt_queueing, "un_f_opt_queueing");
30762 
30763 	}
30764 
30765 	/* copy in auto request sense if it was on fifo */
30766 	if (fi_arq != NULL) {
30767 		bcopy(fi_arq, pktp->pkt_scbp, sizeof (struct sd_fi_arq));
30768 	}
30769 
30770 	/* free structs */
30771 	if (un->sd_fi_fifo_pkt[i] != NULL) {
30772 		kmem_free(un->sd_fi_fifo_pkt[i], sizeof (struct sd_fi_pkt));
30773 	}
30774 	if (un->sd_fi_fifo_xb[i] != NULL) {
30775 		kmem_free(un->sd_fi_fifo_xb[i], sizeof (struct sd_fi_xb));
30776 	}
30777 	if (un->sd_fi_fifo_un[i] != NULL) {
30778 		kmem_free(un->sd_fi_fifo_un[i], sizeof (struct sd_fi_un));
30779 	}
30780 	if (un->sd_fi_fifo_arq[i] != NULL) {
30781 		kmem_free(un->sd_fi_fifo_arq[i], sizeof (struct sd_fi_arq));
30782 	}
30783 
30784 	/*
30785 	 * kmem_free does not gurantee to set to NULL
30786 	 * since we uses these to determine if we set
30787 	 * values or not lets confirm they are always
30788 	 * NULL after free
30789 	 */
30790 	un->sd_fi_fifo_pkt[i] = NULL;
30791 	un->sd_fi_fifo_un[i] = NULL;
30792 	un->sd_fi_fifo_xb[i] = NULL;
30793 	un->sd_fi_fifo_arq[i] = NULL;
30794 
30795 	un->sd_fi_fifo_start++;
30796 
30797 	mutex_exit(SD_MUTEX(un));
30798 
30799 	SD_TRACE(SD_LOG_SDTEST, un, "sd_faultinjection: exit\n");
30800 }
30801 
30802 #endif /* SD_FAULT_INJECTION */
30803 
30804 /*
30805  * This routine is invoked in sd_unit_attach(). Before calling it, the
30806  * properties in conf file should be processed already, and "hotpluggable"
30807  * property was processed also.
30808  *
30809  * The sd driver distinguishes 3 different type of devices: removable media,
30810  * non-removable media, and hotpluggable. Below the differences are defined:
30811  *
30812  * 1. Device ID
30813  *
30814  *     The device ID of a device is used to identify this device. Refer to
30815  *     ddi_devid_register(9F).
30816  *
30817  *     For a non-removable media disk device which can provide 0x80 or 0x83
30818  *     VPD page (refer to INQUIRY command of SCSI SPC specification), a unique
30819  *     device ID is created to identify this device. For other non-removable
30820  *     media devices, a default device ID is created only if this device has
30821  *     at least 2 alter cylinders. Otherwise, this device has no devid.
30822  *
30823  *     -------------------------------------------------------
30824  *     removable media   hotpluggable  | Can Have Device ID
30825  *     -------------------------------------------------------
30826  *         false             false     |     Yes
30827  *         false             true      |     Yes
30828  *         true                x       |     No
30829  *     ------------------------------------------------------
30830  *
30831  *
30832  * 2. SCSI group 4 commands
30833  *
30834  *     In SCSI specs, only some commands in group 4 command set can use
30835  *     8-byte addresses that can be used to access >2TB storage spaces.
30836  *     Other commands have no such capability. Without supporting group4,
30837  *     it is impossible to make full use of storage spaces of a disk with
30838  *     capacity larger than 2TB.
30839  *
30840  *     -----------------------------------------------
30841  *     removable media   hotpluggable   LP64  |  Group
30842  *     -----------------------------------------------
30843  *           false          false       false |   1
30844  *           false          false       true  |   4
30845  *           false          true        false |   1
30846  *           false          true        true  |   4
30847  *           true             x           x   |   5
30848  *     -----------------------------------------------
30849  *
30850  *
30851  * 3. Check for VTOC Label
30852  *
30853  *     If a direct-access disk has no EFI label, sd will check if it has a
30854  *     valid VTOC label. Now, sd also does that check for removable media
30855  *     and hotpluggable devices.
30856  *
30857  *     --------------------------------------------------------------
30858  *     Direct-Access   removable media    hotpluggable |  Check Label
30859  *     -------------------------------------------------------------
30860  *         false          false           false        |   No
30861  *         false          false           true         |   No
30862  *         false          true            false        |   Yes
30863  *         false          true            true         |   Yes
30864  *         true            x                x          |   Yes
30865  *     --------------------------------------------------------------
30866  *
30867  *
30868  * 4. Building default VTOC label
30869  *
30870  *     As section 3 says, sd checks if some kinds of devices have VTOC label.
30871  *     If those devices have no valid VTOC label, sd(7d) will attempt to
30872  *     create default VTOC for them. Currently sd creates default VTOC label
30873  *     for all devices on x86 platform (VTOC_16), but only for removable
30874  *     media devices on SPARC (VTOC_8).
30875  *
30876  *     -----------------------------------------------------------
30877  *       removable media hotpluggable platform   |   Default Label
30878  *     -----------------------------------------------------------
30879  *             false          false    sparc     |     No
30880  *             false          true      x86      |     Yes
30881  *             false          true     sparc     |     Yes
30882  *             true             x        x       |     Yes
30883  *     ----------------------------------------------------------
30884  *
30885  *
30886  * 5. Supported blocksizes of target devices
30887  *
30888  *     Sd supports non-512-byte blocksize for removable media devices only.
30889  *     For other devices, only 512-byte blocksize is supported. This may be
30890  *     changed in near future because some RAID devices require non-512-byte
30891  *     blocksize
30892  *
30893  *     -----------------------------------------------------------
30894  *     removable media    hotpluggable    | non-512-byte blocksize
30895  *     -----------------------------------------------------------
30896  *           false          false         |   No
30897  *           false          true          |   No
30898  *           true             x           |   Yes
30899  *     -----------------------------------------------------------
30900  *
30901  *
30902  * 6. Automatic mount & unmount (i.e. vold)
30903  *
30904  *     Sd(7d) driver provides DKIOCREMOVABLE ioctl. This ioctl is used to query
30905  *     if a device is removable media device. It return 1 for removable media
30906  *     devices, and 0 for others.
30907  *
30908  *     Vold treats a device as removable one only if DKIOREMOVABLE returns 1.
30909  *     And it does automounting only for removable media devices. In order to
30910  *     preserve users' experience and let vold continue to do automounting for
30911  *     USB disk devices, DKIOCREMOVABLE ioctl still returns 1 for USB/1394 disk
30912  *     devices.
30913  *
30914  *      ------------------------------------------------------
30915  *       removable media    hotpluggable   |  automatic mount
30916  *      ------------------------------------------------------
30917  *             false          false        |   No
30918  *             false          true         |   Yes
30919  *             true             x          |   Yes
30920  *      ------------------------------------------------------
30921  *
30922  *
30923  * 7. fdisk partition management
30924  *
30925  *     Fdisk is traditional partition method on x86 platform. Sd(7d) driver
30926  *     just supports fdisk partitions on x86 platform. On sparc platform, sd
30927  *     doesn't support fdisk partitions at all. Note: pcfs(7fs) can recognize
30928  *     fdisk partitions on both x86 and SPARC platform.
30929  *
30930  *     -----------------------------------------------------------
30931  *       platform   removable media  USB/1394  |  fdisk supported
30932  *     -----------------------------------------------------------
30933  *        x86         X               X        |       true
30934  *     ------------------------------------------------------------
30935  *        sparc       X               X        |       false
30936  *     ------------------------------------------------------------
30937  *
30938  *
30939  * 8. MBOOT/MBR
30940  *
30941  *     Although sd(7d) doesn't support fdisk on SPARC platform, it does support
30942  *     read/write mboot for removable media devices on sparc platform.
30943  *
30944  *     -----------------------------------------------------------
30945  *       platform   removable media  USB/1394  |  mboot supported
30946  *     -----------------------------------------------------------
30947  *        x86         X               X        |       true
30948  *     ------------------------------------------------------------
30949  *        sparc      false           false     |       false
30950  *        sparc      false           true      |       true
30951  *        sparc      true            false     |       true
30952  *        sparc      true            true      |       true
30953  *     ------------------------------------------------------------
30954  *
30955  *
30956  * 9.  error handling during opening device
30957  *
30958  *     If failed to open a disk device, an errno is returned. For some kinds
30959  *     of errors, different errno is returned depending on if this device is
30960  *     a removable media device. This brings USB/1394 hard disks in line with
30961  *     expected hard disk behavior. It is not expected that this breaks any
30962  *     application.
30963  *
30964  *     ------------------------------------------------------
30965  *       removable media    hotpluggable   |  errno
30966  *     ------------------------------------------------------
30967  *             false          false        |   EIO
30968  *             false          true         |   EIO
30969  *             true             x          |   ENXIO
30970  *     ------------------------------------------------------
30971  *
30972  *
30973  * 11. ioctls: DKIOCEJECT, CDROMEJECT
30974  *
30975  *     These IOCTLs are applicable only to removable media devices.
30976  *
30977  *     -----------------------------------------------------------
30978  *       removable media    hotpluggable   |DKIOCEJECT, CDROMEJECT
30979  *     -----------------------------------------------------------
30980  *             false          false        |     No
30981  *             false          true         |     No
30982  *             true            x           |     Yes
30983  *     -----------------------------------------------------------
30984  *
30985  *
30986  * 12. Kstats for partitions
30987  *
30988  *     sd creates partition kstat for non-removable media devices. USB and
30989  *     Firewire hard disks now have partition kstats
30990  *
30991  *      ------------------------------------------------------
30992  *       removable media    hotplugable    |   kstat
30993  *      ------------------------------------------------------
30994  *             false          false        |    Yes
30995  *             false          true         |    Yes
30996  *             true             x          |    No
30997  *       ------------------------------------------------------
30998  *
30999  *
31000  * 13. Removable media & hotpluggable properties
31001  *
31002  *     Sd driver creates a "removable-media" property for removable media
31003  *     devices. Parent nexus drivers create a "hotpluggable" property if
31004  *     it supports hotplugging.
31005  *
31006  *     ---------------------------------------------------------------------
31007  *     removable media   hotpluggable |  "removable-media"   " hotpluggable"
31008  *     ---------------------------------------------------------------------
31009  *       false            false       |    No                   No
31010  *       false            true        |    No                   Yes
31011  *       true             false       |    Yes                  No
31012  *       true             true        |    Yes                  Yes
31013  *     ---------------------------------------------------------------------
31014  *
31015  *
31016  * 14. Power Management
31017  *
31018  *     sd only power manages removable media devices or devices that support
31019  *     LOG_SENSE or have a "pm-capable" property  (PSARC/2002/250)
31020  *
31021  *     A parent nexus that supports hotplugging can also set "pm-capable"
31022  *     if the disk can be power managed.
31023  *
31024  *     ------------------------------------------------------------
31025  *       removable media hotpluggable pm-capable  |   power manage
31026  *     ------------------------------------------------------------
31027  *             false          false     false     |     No
31028  *             false          false     true      |     Yes
31029  *             false          true      false     |     No
31030  *             false          true      true      |     Yes
31031  *             true             x        x        |     Yes
31032  *     ------------------------------------------------------------
31033  *
31034  *      USB and firewire hard disks can now be power managed independently
31035  *      of the framebuffer
31036  *
31037  *
31038  * 15. Support for USB disks with capacity larger than 1TB
31039  *
31040  *     Currently, sd doesn't permit a fixed disk device with capacity
31041  *     larger than 1TB to be used in a 32-bit operating system environment.
31042  *     However, sd doesn't do that for removable media devices. Instead, it
31043  *     assumes that removable media devices cannot have a capacity larger
31044  *     than 1TB. Therefore, using those devices on 32-bit system is partially
31045  *     supported, which can cause some unexpected results.
31046  *
31047  *     ---------------------------------------------------------------------
31048  *       removable media    USB/1394 | Capacity > 1TB |   Used in 32-bit env
31049  *     ---------------------------------------------------------------------
31050  *             false          false  |   true         |     no
31051  *             false          true   |   true         |     no
31052  *             true           false  |   true         |     Yes
31053  *             true           true   |   true         |     Yes
31054  *     ---------------------------------------------------------------------
31055  *
31056  *
31057  * 16. Check write-protection at open time
31058  *
31059  *     When a removable media device is being opened for writing without NDELAY
31060  *     flag, sd will check if this device is writable. If attempting to open
31061  *     without NDELAY flag a write-protected device, this operation will abort.
31062  *
31063  *     ------------------------------------------------------------
31064  *       removable media    USB/1394   |   WP Check
31065  *     ------------------------------------------------------------
31066  *             false          false    |     No
31067  *             false          true     |     No
31068  *             true           false    |     Yes
31069  *             true           true     |     Yes
31070  *     ------------------------------------------------------------
31071  *
31072  *
31073  * 17. syslog when corrupted VTOC is encountered
31074  *
31075  *      Currently, if an invalid VTOC is encountered, sd only print syslog
31076  *      for fixed SCSI disks.
31077  *     ------------------------------------------------------------
31078  *       removable media    USB/1394   |   print syslog
31079  *     ------------------------------------------------------------
31080  *             false          false    |     Yes
31081  *             false          true     |     No
31082  *             true           false    |     No
31083  *             true           true     |     No
31084  *     ------------------------------------------------------------
31085  */
31086 static void
31087 sd_set_unit_attributes(struct sd_lun *un, dev_info_t *devi)
31088 {
31089 	int	pm_capable_prop;
31090 
31091 	ASSERT(un->un_sd);
31092 	ASSERT(un->un_sd->sd_inq);
31093 
31094 #if defined(_SUNOS_VTOC_16)
31095 	/*
31096 	 * For VTOC_16 devices, the default label will be created for all
31097 	 * devices. (see sd_build_default_label)
31098 	 */
31099 	un->un_f_default_vtoc_supported = TRUE;
31100 #endif
31101 
31102 	if (un->un_sd->sd_inq->inq_rmb) {
31103 		/*
31104 		 * The media of this device is removable. And for this kind
31105 		 * of devices, it is possible to change medium after openning
31106 		 * devices. Thus we should support this operation.
31107 		 */
31108 		un->un_f_has_removable_media = TRUE;
31109 
31110 #if defined(_SUNOS_VTOC_8)
31111 		/*
31112 		 * Note: currently, for VTOC_8 devices, default label is
31113 		 * created for removable and hotpluggable devices only.
31114 		 */
31115 		un->un_f_default_vtoc_supported = TRUE;
31116 #endif
31117 		/*
31118 		 * support non-512-byte blocksize of removable media devices
31119 		 */
31120 		un->un_f_non_devbsize_supported = TRUE;
31121 
31122 		/*
31123 		 * Assume that all removable media devices support DOOR_LOCK
31124 		 */
31125 		un->un_f_doorlock_supported = TRUE;
31126 
31127 		/*
31128 		 * For a removable media device, it is possible to be opened
31129 		 * with NDELAY flag when there is no media in drive, in this
31130 		 * case we don't care if device is writable. But if without
31131 		 * NDELAY flag, we need to check if media is write-protected.
31132 		 */
31133 		un->un_f_chk_wp_open = TRUE;
31134 
31135 		/*
31136 		 * need to start a SCSI watch thread to monitor media state,
31137 		 * when media is being inserted or ejected, notify syseventd.
31138 		 */
31139 		un->un_f_monitor_media_state = TRUE;
31140 
31141 		/*
31142 		 * Some devices don't support START_STOP_UNIT command.
31143 		 * Therefore, we'd better check if a device supports it
31144 		 * before sending it.
31145 		 */
31146 		un->un_f_check_start_stop = TRUE;
31147 
31148 		/*
31149 		 * support eject media ioctl:
31150 		 *		FDEJECT, DKIOCEJECT, CDROMEJECT
31151 		 */
31152 		un->un_f_eject_media_supported = TRUE;
31153 
31154 		/*
31155 		 * Because many removable-media devices don't support
31156 		 * LOG_SENSE, we couldn't use this command to check if
31157 		 * a removable media device support power-management.
31158 		 * We assume that they support power-management via
31159 		 * START_STOP_UNIT command and can be spun up and down
31160 		 * without limitations.
31161 		 */
31162 		un->un_f_pm_supported = TRUE;
31163 
31164 		/*
31165 		 * Need to create a zero length (Boolean) property
31166 		 * removable-media for the removable media devices.
31167 		 * Note that the return value of the property is not being
31168 		 * checked, since if unable to create the property
31169 		 * then do not want the attach to fail altogether. Consistent
31170 		 * with other property creation in attach.
31171 		 */
31172 		(void) ddi_prop_create(DDI_DEV_T_NONE, devi,
31173 		    DDI_PROP_CANSLEEP, "removable-media", NULL, 0);
31174 
31175 	} else {
31176 		/*
31177 		 * create device ID for device
31178 		 */
31179 		un->un_f_devid_supported = TRUE;
31180 
31181 		/*
31182 		 * Spin up non-removable-media devices once it is attached
31183 		 */
31184 		un->un_f_attach_spinup = TRUE;
31185 
31186 		/*
31187 		 * According to SCSI specification, Sense data has two kinds of
31188 		 * format: fixed format, and descriptor format. At present, we
31189 		 * don't support descriptor format sense data for removable
31190 		 * media.
31191 		 */
31192 		if (SD_INQUIRY(un)->inq_dtype == DTYPE_DIRECT) {
31193 			un->un_f_descr_format_supported = TRUE;
31194 		}
31195 
31196 		/*
31197 		 * kstats are created only for non-removable media devices.
31198 		 *
31199 		 * Set this in sd.conf to 0 in order to disable kstats.  The
31200 		 * default is 1, so they are enabled by default.
31201 		 */
31202 		un->un_f_pkstats_enabled = (ddi_prop_get_int(DDI_DEV_T_ANY,
31203 		    SD_DEVINFO(un), DDI_PROP_DONTPASS,
31204 			"enable-partition-kstats", 1));
31205 
31206 		/*
31207 		 * Check if HBA has set the "pm-capable" property.
31208 		 * If "pm-capable" exists and is non-zero then we can
31209 		 * power manage the device without checking the start/stop
31210 		 * cycle count log sense page.
31211 		 *
31212 		 * If "pm-capable" exists and is SD_PM_CAPABLE_FALSE (0)
31213 		 * then we should not power manage the device.
31214 		 *
31215 		 * If "pm-capable" doesn't exist then pm_capable_prop will
31216 		 * be set to SD_PM_CAPABLE_UNDEFINED (-1).  In this case,
31217 		 * sd will check the start/stop cycle count log sense page
31218 		 * and power manage the device if the cycle count limit has
31219 		 * not been exceeded.
31220 		 */
31221 		pm_capable_prop = ddi_prop_get_int(DDI_DEV_T_ANY, devi,
31222 		    DDI_PROP_DONTPASS, "pm-capable", SD_PM_CAPABLE_UNDEFINED);
31223 		if (pm_capable_prop == SD_PM_CAPABLE_UNDEFINED) {
31224 			un->un_f_log_sense_supported = TRUE;
31225 		} else {
31226 			/*
31227 			 * pm-capable property exists.
31228 			 *
31229 			 * Convert "TRUE" values for pm_capable_prop to
31230 			 * SD_PM_CAPABLE_TRUE (1) to make it easier to check
31231 			 * later. "TRUE" values are any values except
31232 			 * SD_PM_CAPABLE_FALSE (0) and
31233 			 * SD_PM_CAPABLE_UNDEFINED (-1)
31234 			 */
31235 			if (pm_capable_prop == SD_PM_CAPABLE_FALSE) {
31236 				un->un_f_log_sense_supported = FALSE;
31237 			} else {
31238 				un->un_f_pm_supported = TRUE;
31239 			}
31240 
31241 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
31242 			    "sd_unit_attach: un:0x%p pm-capable "
31243 			    "property set to %d.\n", un, un->un_f_pm_supported);
31244 		}
31245 	}
31246 
31247 	if (un->un_f_is_hotpluggable) {
31248 #if defined(_SUNOS_VTOC_8)
31249 		/*
31250 		 * Note: currently, for VTOC_8 devices, default label is
31251 		 * created for removable and hotpluggable devices only.
31252 		 */
31253 		un->un_f_default_vtoc_supported = TRUE;
31254 #endif
31255 
31256 		/*
31257 		 * Temporarily, let hotpluggable devices pretend to be
31258 		 * removable-media devices for vold.
31259 		 */
31260 		un->un_f_monitor_media_state = TRUE;
31261 
31262 		un->un_f_check_start_stop = TRUE;
31263 
31264 	}
31265 
31266 	/*
31267 	 * By default, only DIRECT ACCESS devices and CDs will have Sun
31268 	 * labels.
31269 	 */
31270 	if ((SD_INQUIRY(un)->inq_dtype == DTYPE_DIRECT) ||
31271 	    (un->un_sd->sd_inq->inq_rmb)) {
31272 		/*
31273 		 * Direct access devices have disk label
31274 		 */
31275 		un->un_f_vtoc_label_supported = TRUE;
31276 	}
31277 
31278 	/*
31279 	 * Fdisk partitions are supported for all direct access devices on
31280 	 * x86 platform, and just for removable media and hotpluggable
31281 	 * devices on SPARC platform. Later, we will set the following flag
31282 	 * to FALSE if current device is not removable media or hotpluggable
31283 	 * device and if sd works on SAPRC platform.
31284 	 */
31285 	if (SD_INQUIRY(un)->inq_dtype == DTYPE_DIRECT) {
31286 		un->un_f_mboot_supported = TRUE;
31287 	}
31288 
31289 	if (!un->un_f_is_hotpluggable &&
31290 	    !un->un_sd->sd_inq->inq_rmb) {
31291 
31292 #if defined(_SUNOS_VTOC_8)
31293 		/*
31294 		 * Don't support fdisk on fixed disk
31295 		 */
31296 		un->un_f_mboot_supported = FALSE;
31297 #endif
31298 
31299 		/*
31300 		 * Fixed disk support SYNC CACHE
31301 		 */
31302 		un->un_f_sync_cache_supported = TRUE;
31303 
31304 		/*
31305 		 * For fixed disk, if its VTOC is not valid, we will write
31306 		 * errlog into system log
31307 		 */
31308 		if (un->un_f_vtoc_label_supported)
31309 			un->un_f_vtoc_errlog_supported = TRUE;
31310 	}
31311 }
31312