xref: /titanic_50/usr/src/uts/common/io/scsi/targets/sd.c (revision 774ef820abb3ffe8f38adcc17dee852d45d871a0)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * SCSI disk target driver.
30  */
31 #include <sys/scsi/scsi.h>
32 #include <sys/dkbad.h>
33 #include <sys/dklabel.h>
34 #include <sys/dkio.h>
35 #include <sys/fdio.h>
36 #include <sys/cdio.h>
37 #include <sys/mhd.h>
38 #include <sys/vtoc.h>
39 #include <sys/dktp/fdisk.h>
40 #include <sys/file.h>
41 #include <sys/stat.h>
42 #include <sys/kstat.h>
43 #include <sys/vtrace.h>
44 #include <sys/note.h>
45 #include <sys/thread.h>
46 #include <sys/proc.h>
47 #include <sys/efi_partition.h>
48 #include <sys/var.h>
49 #include <sys/aio_req.h>
50 
51 #ifdef __lock_lint
52 #define	_LP64
53 #define	__amd64
54 #endif
55 
56 #if (defined(__fibre))
57 /* Note: is there a leadville version of the following? */
58 #include <sys/fc4/fcal_linkapp.h>
59 #endif
60 #include <sys/taskq.h>
61 #include <sys/uuid.h>
62 #include <sys/byteorder.h>
63 #include <sys/sdt.h>
64 
65 #include "sd_xbuf.h"
66 
67 #include <sys/scsi/targets/sddef.h>
68 
69 
70 /*
71  * Loadable module info.
72  */
73 #if (defined(__fibre))
74 #define	SD_MODULE_NAME	"SCSI SSA/FCAL Disk Driver %I%"
75 char _depends_on[]	= "misc/scsi drv/fcp";
76 #else
77 #define	SD_MODULE_NAME	"SCSI Disk Driver %I%"
78 char _depends_on[]	= "misc/scsi";
79 #endif
80 
81 /*
82  * Define the interconnect type, to allow the driver to distinguish
83  * between parallel SCSI (sd) and fibre channel (ssd) behaviors.
84  *
85  * This is really for backward compatability. In the future, the driver
86  * should actually check the "interconnect-type" property as reported by
87  * the HBA; however at present this property is not defined by all HBAs,
88  * so we will use this #define (1) to permit the driver to run in
89  * backward-compatability mode; and (2) to print a notification message
90  * if an FC HBA does not support the "interconnect-type" property.  The
91  * behavior of the driver will be to assume parallel SCSI behaviors unless
92  * the "interconnect-type" property is defined by the HBA **AND** has a
93  * value of either INTERCONNECT_FIBRE, INTERCONNECT_SSA, or
94  * INTERCONNECT_FABRIC, in which case the driver will assume Fibre
95  * Channel behaviors (as per the old ssd).  (Note that the
96  * INTERCONNECT_1394 and INTERCONNECT_USB types are not supported and
97  * will result in the driver assuming parallel SCSI behaviors.)
98  *
99  * (see common/sys/scsi/impl/services.h)
100  *
101  * Note: For ssd semantics, don't use INTERCONNECT_FABRIC as the default
102  * since some FC HBAs may already support that, and there is some code in
103  * the driver that already looks for it.  Using INTERCONNECT_FABRIC as the
104  * default would confuse that code, and besides things should work fine
105  * anyways if the FC HBA already reports INTERCONNECT_FABRIC for the
106  * "interconnect_type" property.
107  */
108 #if (defined(__fibre))
109 #define	SD_DEFAULT_INTERCONNECT_TYPE	SD_INTERCONNECT_FIBRE
110 #else
111 #define	SD_DEFAULT_INTERCONNECT_TYPE	SD_INTERCONNECT_PARALLEL
112 #endif
113 
114 /*
115  * The name of the driver, established from the module name in _init.
116  */
117 static	char *sd_label			= NULL;
118 
119 /*
120  * Driver name is unfortunately prefixed on some driver.conf properties.
121  */
122 #if (defined(__fibre))
123 #define	sd_max_xfer_size		ssd_max_xfer_size
124 #define	sd_config_list			ssd_config_list
125 static	char *sd_max_xfer_size		= "ssd_max_xfer_size";
126 static	char *sd_config_list		= "ssd-config-list";
127 #else
128 static	char *sd_max_xfer_size		= "sd_max_xfer_size";
129 static	char *sd_config_list		= "sd-config-list";
130 #endif
131 
132 /*
133  * Driver global variables
134  */
135 
136 #if (defined(__fibre))
137 /*
138  * These #defines are to avoid namespace collisions that occur because this
139  * code is currently used to compile two seperate driver modules: sd and ssd.
140  * All global variables need to be treated this way (even if declared static)
141  * in order to allow the debugger to resolve the names properly.
142  * It is anticipated that in the near future the ssd module will be obsoleted,
143  * at which time this namespace issue should go away.
144  */
145 #define	sd_state			ssd_state
146 #define	sd_io_time			ssd_io_time
147 #define	sd_failfast_enable		ssd_failfast_enable
148 #define	sd_ua_retry_count		ssd_ua_retry_count
149 #define	sd_report_pfa			ssd_report_pfa
150 #define	sd_max_throttle			ssd_max_throttle
151 #define	sd_min_throttle			ssd_min_throttle
152 #define	sd_rot_delay			ssd_rot_delay
153 
154 #define	sd_retry_on_reservation_conflict	\
155 					ssd_retry_on_reservation_conflict
156 #define	sd_reinstate_resv_delay		ssd_reinstate_resv_delay
157 #define	sd_resv_conflict_name		ssd_resv_conflict_name
158 
159 #define	sd_component_mask		ssd_component_mask
160 #define	sd_level_mask			ssd_level_mask
161 #define	sd_debug_un			ssd_debug_un
162 #define	sd_error_level			ssd_error_level
163 
164 #define	sd_xbuf_active_limit		ssd_xbuf_active_limit
165 #define	sd_xbuf_reserve_limit		ssd_xbuf_reserve_limit
166 
167 #define	sd_tr				ssd_tr
168 #define	sd_reset_throttle_timeout	ssd_reset_throttle_timeout
169 #define	sd_qfull_throttle_timeout	ssd_qfull_throttle_timeout
170 #define	sd_qfull_throttle_enable	ssd_qfull_throttle_enable
171 #define	sd_check_media_time		ssd_check_media_time
172 #define	sd_wait_cmds_complete		ssd_wait_cmds_complete
173 #define	sd_label_mutex			ssd_label_mutex
174 #define	sd_detach_mutex			ssd_detach_mutex
175 #define	sd_log_buf			ssd_log_buf
176 #define	sd_log_mutex			ssd_log_mutex
177 
178 #define	sd_disk_table			ssd_disk_table
179 #define	sd_disk_table_size		ssd_disk_table_size
180 #define	sd_sense_mutex			ssd_sense_mutex
181 #define	sd_cdbtab			ssd_cdbtab
182 
183 #define	sd_cb_ops			ssd_cb_ops
184 #define	sd_ops				ssd_ops
185 #define	sd_additional_codes		ssd_additional_codes
186 
187 #define	sd_minor_data			ssd_minor_data
188 #define	sd_minor_data_efi		ssd_minor_data_efi
189 
190 #define	sd_tq				ssd_tq
191 #define	sd_wmr_tq			ssd_wmr_tq
192 #define	sd_taskq_name			ssd_taskq_name
193 #define	sd_wmr_taskq_name		ssd_wmr_taskq_name
194 #define	sd_taskq_minalloc		ssd_taskq_minalloc
195 #define	sd_taskq_maxalloc		ssd_taskq_maxalloc
196 
197 #define	sd_dump_format_string		ssd_dump_format_string
198 
199 #define	sd_iostart_chain		ssd_iostart_chain
200 #define	sd_iodone_chain			ssd_iodone_chain
201 
202 #define	sd_pm_idletime			ssd_pm_idletime
203 
204 #define	sd_force_pm_supported		ssd_force_pm_supported
205 
206 #define	sd_dtype_optical_bind		ssd_dtype_optical_bind
207 
208 #endif
209 
210 
211 #ifdef	SDDEBUG
212 int	sd_force_pm_supported		= 0;
213 #endif	/* SDDEBUG */
214 
215 void *sd_state				= NULL;
216 int sd_io_time				= SD_IO_TIME;
217 int sd_failfast_enable			= 1;
218 int sd_ua_retry_count			= SD_UA_RETRY_COUNT;
219 int sd_report_pfa			= 1;
220 int sd_max_throttle			= SD_MAX_THROTTLE;
221 int sd_min_throttle			= SD_MIN_THROTTLE;
222 int sd_rot_delay			= 4; /* Default 4ms Rotation delay */
223 int sd_qfull_throttle_enable		= TRUE;
224 
225 int sd_retry_on_reservation_conflict	= 1;
226 int sd_reinstate_resv_delay		= SD_REINSTATE_RESV_DELAY;
227 _NOTE(SCHEME_PROTECTS_DATA("safe sharing", sd_reinstate_resv_delay))
228 
229 static int sd_dtype_optical_bind	= -1;
230 
231 /* Note: the following is not a bug, it really is "sd_" and not "ssd_" */
232 static	char *sd_resv_conflict_name	= "sd_retry_on_reservation_conflict";
233 
234 /*
235  * Global data for debug logging. To enable debug printing, sd_component_mask
236  * and sd_level_mask should be set to the desired bit patterns as outlined in
237  * sddef.h.
238  */
239 uint_t	sd_component_mask		= 0x0;
240 uint_t	sd_level_mask			= 0x0;
241 struct	sd_lun *sd_debug_un		= NULL;
242 uint_t	sd_error_level			= SCSI_ERR_RETRYABLE;
243 
244 /* Note: these may go away in the future... */
245 static uint32_t	sd_xbuf_active_limit	= 512;
246 static uint32_t sd_xbuf_reserve_limit	= 16;
247 
248 static struct sd_resv_reclaim_request	sd_tr = { NULL, NULL, NULL, 0, 0, 0 };
249 
250 /*
251  * Timer value used to reset the throttle after it has been reduced
252  * (typically in response to TRAN_BUSY or STATUS_QFULL)
253  */
254 static int sd_reset_throttle_timeout	= SD_RESET_THROTTLE_TIMEOUT;
255 static int sd_qfull_throttle_timeout	= SD_QFULL_THROTTLE_TIMEOUT;
256 
257 /*
258  * Interval value associated with the media change scsi watch.
259  */
260 static int sd_check_media_time		= 3000000;
261 
262 /*
263  * Wait value used for in progress operations during a DDI_SUSPEND
264  */
265 static int sd_wait_cmds_complete	= SD_WAIT_CMDS_COMPLETE;
266 
267 /*
268  * sd_label_mutex protects a static buffer used in the disk label
269  * component of the driver
270  */
271 static kmutex_t sd_label_mutex;
272 
273 /*
274  * sd_detach_mutex protects un_layer_count, un_detach_count, and
275  * un_opens_in_progress in the sd_lun structure.
276  */
277 static kmutex_t sd_detach_mutex;
278 
279 _NOTE(MUTEX_PROTECTS_DATA(sd_detach_mutex,
280 	sd_lun::{un_layer_count un_detach_count un_opens_in_progress}))
281 
282 /*
283  * Global buffer and mutex for debug logging
284  */
285 static char	sd_log_buf[1024];
286 static kmutex_t	sd_log_mutex;
287 
288 
289 /*
290  * "Smart" Probe Caching structs, globals, #defines, etc.
291  * For parallel scsi and non-self-identify device only.
292  */
293 
294 /*
295  * The following resources and routines are implemented to support
296  * "smart" probing, which caches the scsi_probe() results in an array,
297  * in order to help avoid long probe times.
298  */
299 struct sd_scsi_probe_cache {
300 	struct	sd_scsi_probe_cache	*next;
301 	dev_info_t	*pdip;
302 	int		cache[NTARGETS_WIDE];
303 };
304 
305 static kmutex_t	sd_scsi_probe_cache_mutex;
306 static struct	sd_scsi_probe_cache *sd_scsi_probe_cache_head = NULL;
307 
308 /*
309  * Really we only need protection on the head of the linked list, but
310  * better safe than sorry.
311  */
312 _NOTE(MUTEX_PROTECTS_DATA(sd_scsi_probe_cache_mutex,
313     sd_scsi_probe_cache::next sd_scsi_probe_cache::pdip))
314 
315 _NOTE(MUTEX_PROTECTS_DATA(sd_scsi_probe_cache_mutex,
316     sd_scsi_probe_cache_head))
317 
318 
319 /*
320  * Vendor specific data name property declarations
321  */
322 
323 #if defined(__fibre) || defined(__i386) ||defined(__amd64)
324 
325 static sd_tunables seagate_properties = {
326 	SEAGATE_THROTTLE_VALUE,
327 	0,
328 	0,
329 	0,
330 	0,
331 	0,
332 	0,
333 	0,
334 	0
335 };
336 
337 
338 static sd_tunables fujitsu_properties = {
339 	FUJITSU_THROTTLE_VALUE,
340 	0,
341 	0,
342 	0,
343 	0,
344 	0,
345 	0,
346 	0,
347 	0
348 };
349 
350 static sd_tunables ibm_properties = {
351 	IBM_THROTTLE_VALUE,
352 	0,
353 	0,
354 	0,
355 	0,
356 	0,
357 	0,
358 	0,
359 	0
360 };
361 
362 static sd_tunables purple_properties = {
363 	PURPLE_THROTTLE_VALUE,
364 	0,
365 	0,
366 	PURPLE_BUSY_RETRIES,
367 	PURPLE_RESET_RETRY_COUNT,
368 	PURPLE_RESERVE_RELEASE_TIME,
369 	0,
370 	0,
371 	0
372 };
373 
374 static sd_tunables sve_properties = {
375 	SVE_THROTTLE_VALUE,
376 	0,
377 	0,
378 	SVE_BUSY_RETRIES,
379 	SVE_RESET_RETRY_COUNT,
380 	SVE_RESERVE_RELEASE_TIME,
381 	SVE_MIN_THROTTLE_VALUE,
382 	SVE_DISKSORT_DISABLED_FLAG,
383 	0
384 };
385 
386 static sd_tunables maserati_properties = {
387 	0,
388 	0,
389 	0,
390 	0,
391 	0,
392 	0,
393 	0,
394 	MASERATI_DISKSORT_DISABLED_FLAG,
395 	MASERATI_LUN_RESET_ENABLED_FLAG
396 };
397 
398 static sd_tunables pirus_properties = {
399 	PIRUS_THROTTLE_VALUE,
400 	0,
401 	PIRUS_NRR_COUNT,
402 	PIRUS_BUSY_RETRIES,
403 	PIRUS_RESET_RETRY_COUNT,
404 	0,
405 	PIRUS_MIN_THROTTLE_VALUE,
406 	PIRUS_DISKSORT_DISABLED_FLAG,
407 	PIRUS_LUN_RESET_ENABLED_FLAG
408 };
409 
410 #endif
411 
412 #if (defined(__sparc) && !defined(__fibre)) || \
413 	(defined(__i386) || defined(__amd64))
414 
415 
416 static sd_tunables elite_properties = {
417 	ELITE_THROTTLE_VALUE,
418 	0,
419 	0,
420 	0,
421 	0,
422 	0,
423 	0,
424 	0,
425 	0
426 };
427 
428 static sd_tunables st31200n_properties = {
429 	ST31200N_THROTTLE_VALUE,
430 	0,
431 	0,
432 	0,
433 	0,
434 	0,
435 	0,
436 	0,
437 	0
438 };
439 
440 #endif /* Fibre or not */
441 
442 static sd_tunables lsi_properties_scsi = {
443 	LSI_THROTTLE_VALUE,
444 	0,
445 	LSI_NOTREADY_RETRIES,
446 	0,
447 	0,
448 	0,
449 	0,
450 	0,
451 	0
452 };
453 
454 static sd_tunables symbios_properties = {
455 	SYMBIOS_THROTTLE_VALUE,
456 	0,
457 	SYMBIOS_NOTREADY_RETRIES,
458 	0,
459 	0,
460 	0,
461 	0,
462 	0,
463 	0
464 };
465 
466 static sd_tunables lsi_properties = {
467 	0,
468 	0,
469 	LSI_NOTREADY_RETRIES,
470 	0,
471 	0,
472 	0,
473 	0,
474 	0,
475 	0
476 };
477 
478 static sd_tunables lsi_oem_properties = {
479 	0,
480 	0,
481 	LSI_OEM_NOTREADY_RETRIES,
482 	0,
483 	0,
484 	0,
485 	0,
486 	0,
487 	0
488 };
489 
490 
491 
492 #if (defined(SD_PROP_TST))
493 
494 #define	SD_TST_CTYPE_VAL	CTYPE_CDROM
495 #define	SD_TST_THROTTLE_VAL	16
496 #define	SD_TST_NOTREADY_VAL	12
497 #define	SD_TST_BUSY_VAL		60
498 #define	SD_TST_RST_RETRY_VAL	36
499 #define	SD_TST_RSV_REL_TIME	60
500 
501 static sd_tunables tst_properties = {
502 	SD_TST_THROTTLE_VAL,
503 	SD_TST_CTYPE_VAL,
504 	SD_TST_NOTREADY_VAL,
505 	SD_TST_BUSY_VAL,
506 	SD_TST_RST_RETRY_VAL,
507 	SD_TST_RSV_REL_TIME,
508 	0,
509 	0,
510 	0
511 };
512 #endif
513 
514 /* This is similiar to the ANSI toupper implementation */
515 #define	SD_TOUPPER(C)	(((C) >= 'a' && (C) <= 'z') ? (C) - 'a' + 'A' : (C))
516 
517 /*
518  * Static Driver Configuration Table
519  *
520  * This is the table of disks which need throttle adjustment (or, perhaps
521  * something else as defined by the flags at a future time.)  device_id
522  * is a string consisting of concatenated vid (vendor), pid (product/model)
523  * and revision strings as defined in the scsi_inquiry structure.  Offsets of
524  * the parts of the string are as defined by the sizes in the scsi_inquiry
525  * structure.  Device type is searched as far as the device_id string is
526  * defined.  Flags defines which values are to be set in the driver from the
527  * properties list.
528  *
529  * Entries below which begin and end with a "*" are a special case.
530  * These do not have a specific vendor, and the string which follows
531  * can appear anywhere in the 16 byte PID portion of the inquiry data.
532  *
533  * Entries below which begin and end with a " " (blank) are a special
534  * case. The comparison function will treat multiple consecutive blanks
535  * as equivalent to a single blank. For example, this causes a
536  * sd_disk_table entry of " NEC CDROM " to match a device's id string
537  * of  "NEC       CDROM".
538  *
539  * Note: The MD21 controller type has been obsoleted.
540  *	 ST318202F is a Legacy device
541  *	 MAM3182FC, MAM3364FC, MAM3738FC do not appear to have ever been
542  *	 made with an FC connection. The entries here are a legacy.
543  */
544 static sd_disk_config_t sd_disk_table[] = {
545 #if defined(__fibre) || defined(__i386) || defined(__amd64)
546 	{ "SEAGATE ST34371FC", SD_CONF_BSET_THROTTLE, &seagate_properties },
547 	{ "SEAGATE ST19171FC", SD_CONF_BSET_THROTTLE, &seagate_properties },
548 	{ "SEAGATE ST39102FC", SD_CONF_BSET_THROTTLE, &seagate_properties },
549 	{ "SEAGATE ST39103FC", SD_CONF_BSET_THROTTLE, &seagate_properties },
550 	{ "SEAGATE ST118273F", SD_CONF_BSET_THROTTLE, &seagate_properties },
551 	{ "SEAGATE ST318202F", SD_CONF_BSET_THROTTLE, &seagate_properties },
552 	{ "SEAGATE ST318203F", SD_CONF_BSET_THROTTLE, &seagate_properties },
553 	{ "SEAGATE ST136403F", SD_CONF_BSET_THROTTLE, &seagate_properties },
554 	{ "SEAGATE ST318304F", SD_CONF_BSET_THROTTLE, &seagate_properties },
555 	{ "SEAGATE ST336704F", SD_CONF_BSET_THROTTLE, &seagate_properties },
556 	{ "SEAGATE ST373405F", SD_CONF_BSET_THROTTLE, &seagate_properties },
557 	{ "SEAGATE ST336605F", SD_CONF_BSET_THROTTLE, &seagate_properties },
558 	{ "SEAGATE ST336752F", SD_CONF_BSET_THROTTLE, &seagate_properties },
559 	{ "SEAGATE ST318452F", SD_CONF_BSET_THROTTLE, &seagate_properties },
560 	{ "FUJITSU MAG3091F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
561 	{ "FUJITSU MAG3182F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
562 	{ "FUJITSU MAA3182F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
563 	{ "FUJITSU MAF3364F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
564 	{ "FUJITSU MAL3364F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
565 	{ "FUJITSU MAL3738F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
566 	{ "FUJITSU MAM3182FC",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
567 	{ "FUJITSU MAM3364FC",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
568 	{ "FUJITSU MAM3738FC",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
569 	{ "IBM     DDYFT1835",  SD_CONF_BSET_THROTTLE, &ibm_properties },
570 	{ "IBM     DDYFT3695",  SD_CONF_BSET_THROTTLE, &ibm_properties },
571 	{ "IBM     IC35LF2D2",  SD_CONF_BSET_THROTTLE, &ibm_properties },
572 	{ "IBM     IC35LF2PR",  SD_CONF_BSET_THROTTLE, &ibm_properties },
573 	{ "IBM     3526",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
574 	{ "IBM     3542",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
575 	{ "IBM     3552",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
576 	{ "IBM     1722",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
577 	{ "IBM     1742",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
578 	{ "IBM     1815",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
579 	{ "IBM     FAStT",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
580 	{ "IBM     1814",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
581 	{ "IBM     1814-200",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
582 	{ "LSI     INF",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
583 	{ "ENGENIO INF",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
584 	{ "SGI     TP",		SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
585 	{ "SGI     IS",		SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
586 	{ "*CSM100_*",		SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
587 	{ "*CSM200_*",		SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
588 	{ "Fujitsu SX300",	SD_CONF_BSET_THROTTLE,  &lsi_oem_properties },
589 	{ "LSI",		SD_CONF_BSET_NRR_COUNT, &lsi_properties },
590 	{ "SUN     T3", SD_CONF_BSET_THROTTLE |
591 			SD_CONF_BSET_BSY_RETRY_COUNT|
592 			SD_CONF_BSET_RST_RETRIES|
593 			SD_CONF_BSET_RSV_REL_TIME,
594 		&purple_properties },
595 	{ "SUN     SESS01", SD_CONF_BSET_THROTTLE |
596 		SD_CONF_BSET_BSY_RETRY_COUNT|
597 		SD_CONF_BSET_RST_RETRIES|
598 		SD_CONF_BSET_RSV_REL_TIME|
599 		SD_CONF_BSET_MIN_THROTTLE|
600 		SD_CONF_BSET_DISKSORT_DISABLED,
601 		&sve_properties },
602 	{ "SUN     T4", SD_CONF_BSET_THROTTLE |
603 			SD_CONF_BSET_BSY_RETRY_COUNT|
604 			SD_CONF_BSET_RST_RETRIES|
605 			SD_CONF_BSET_RSV_REL_TIME,
606 		&purple_properties },
607 	{ "SUN     SVE01", SD_CONF_BSET_DISKSORT_DISABLED |
608 		SD_CONF_BSET_LUN_RESET_ENABLED,
609 		&maserati_properties },
610 	{ "SUN     SE6920", SD_CONF_BSET_THROTTLE |
611 		SD_CONF_BSET_NRR_COUNT|
612 		SD_CONF_BSET_BSY_RETRY_COUNT|
613 		SD_CONF_BSET_RST_RETRIES|
614 		SD_CONF_BSET_MIN_THROTTLE|
615 		SD_CONF_BSET_DISKSORT_DISABLED|
616 		SD_CONF_BSET_LUN_RESET_ENABLED,
617 		&pirus_properties },
618 	{ "SUN     SE6940", SD_CONF_BSET_THROTTLE |
619 		SD_CONF_BSET_NRR_COUNT|
620 		SD_CONF_BSET_BSY_RETRY_COUNT|
621 		SD_CONF_BSET_RST_RETRIES|
622 		SD_CONF_BSET_MIN_THROTTLE|
623 		SD_CONF_BSET_DISKSORT_DISABLED|
624 		SD_CONF_BSET_LUN_RESET_ENABLED,
625 		&pirus_properties },
626 	{ "SUN     StorageTek 6920", SD_CONF_BSET_THROTTLE |
627 		SD_CONF_BSET_NRR_COUNT|
628 		SD_CONF_BSET_BSY_RETRY_COUNT|
629 		SD_CONF_BSET_RST_RETRIES|
630 		SD_CONF_BSET_MIN_THROTTLE|
631 		SD_CONF_BSET_DISKSORT_DISABLED|
632 		SD_CONF_BSET_LUN_RESET_ENABLED,
633 		&pirus_properties },
634 	{ "SUN     StorageTek 6940", SD_CONF_BSET_THROTTLE |
635 		SD_CONF_BSET_NRR_COUNT|
636 		SD_CONF_BSET_BSY_RETRY_COUNT|
637 		SD_CONF_BSET_RST_RETRIES|
638 		SD_CONF_BSET_MIN_THROTTLE|
639 		SD_CONF_BSET_DISKSORT_DISABLED|
640 		SD_CONF_BSET_LUN_RESET_ENABLED,
641 		&pirus_properties },
642 	{ "SUN     PSX1000", SD_CONF_BSET_THROTTLE |
643 		SD_CONF_BSET_NRR_COUNT|
644 		SD_CONF_BSET_BSY_RETRY_COUNT|
645 		SD_CONF_BSET_RST_RETRIES|
646 		SD_CONF_BSET_MIN_THROTTLE|
647 		SD_CONF_BSET_DISKSORT_DISABLED|
648 		SD_CONF_BSET_LUN_RESET_ENABLED,
649 		&pirus_properties },
650 	{ "SUN     SE6330", SD_CONF_BSET_THROTTLE |
651 		SD_CONF_BSET_NRR_COUNT|
652 		SD_CONF_BSET_BSY_RETRY_COUNT|
653 		SD_CONF_BSET_RST_RETRIES|
654 		SD_CONF_BSET_MIN_THROTTLE|
655 		SD_CONF_BSET_DISKSORT_DISABLED|
656 		SD_CONF_BSET_LUN_RESET_ENABLED,
657 		&pirus_properties },
658 	{ "STK     OPENstorage", SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
659 	{ "STK     OpenStorage", SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
660 	{ "STK     BladeCtlr",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
661 	{ "STK     FLEXLINE",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
662 	{ "SYMBIOS", SD_CONF_BSET_NRR_COUNT, &symbios_properties },
663 #endif /* fibre or NON-sparc platforms */
664 #if ((defined(__sparc) && !defined(__fibre)) ||\
665 	(defined(__i386) || defined(__amd64)))
666 	{ "SEAGATE ST42400N", SD_CONF_BSET_THROTTLE, &elite_properties },
667 	{ "SEAGATE ST31200N", SD_CONF_BSET_THROTTLE, &st31200n_properties },
668 	{ "SEAGATE ST41600N", SD_CONF_BSET_TUR_CHECK, NULL },
669 	{ "CONNER  CP30540",  SD_CONF_BSET_NOCACHE,  NULL },
670 	{ "*SUN0104*", SD_CONF_BSET_FAB_DEVID, NULL },
671 	{ "*SUN0207*", SD_CONF_BSET_FAB_DEVID, NULL },
672 	{ "*SUN0327*", SD_CONF_BSET_FAB_DEVID, NULL },
673 	{ "*SUN0340*", SD_CONF_BSET_FAB_DEVID, NULL },
674 	{ "*SUN0424*", SD_CONF_BSET_FAB_DEVID, NULL },
675 	{ "*SUN0669*", SD_CONF_BSET_FAB_DEVID, NULL },
676 	{ "*SUN1.0G*", SD_CONF_BSET_FAB_DEVID, NULL },
677 	{ "SYMBIOS INF-01-00       ", SD_CONF_BSET_FAB_DEVID, NULL },
678 	{ "SYMBIOS", SD_CONF_BSET_THROTTLE|SD_CONF_BSET_NRR_COUNT,
679 	    &symbios_properties },
680 	{ "LSI", SD_CONF_BSET_THROTTLE | SD_CONF_BSET_NRR_COUNT,
681 	    &lsi_properties_scsi },
682 #if defined(__i386) || defined(__amd64)
683 	{ " NEC CD-ROM DRIVE:260 ", (SD_CONF_BSET_PLAYMSF_BCD
684 				    | SD_CONF_BSET_READSUB_BCD
685 				    | SD_CONF_BSET_READ_TOC_ADDR_BCD
686 				    | SD_CONF_BSET_NO_READ_HEADER
687 				    | SD_CONF_BSET_READ_CD_XD4), NULL },
688 
689 	{ " NEC CD-ROM DRIVE:270 ", (SD_CONF_BSET_PLAYMSF_BCD
690 				    | SD_CONF_BSET_READSUB_BCD
691 				    | SD_CONF_BSET_READ_TOC_ADDR_BCD
692 				    | SD_CONF_BSET_NO_READ_HEADER
693 				    | SD_CONF_BSET_READ_CD_XD4), NULL },
694 #endif /* __i386 || __amd64 */
695 #endif /* sparc NON-fibre or NON-sparc platforms */
696 
697 #if (defined(SD_PROP_TST))
698 	{ "VENDOR  PRODUCT ", (SD_CONF_BSET_THROTTLE
699 				| SD_CONF_BSET_CTYPE
700 				| SD_CONF_BSET_NRR_COUNT
701 				| SD_CONF_BSET_FAB_DEVID
702 				| SD_CONF_BSET_NOCACHE
703 				| SD_CONF_BSET_BSY_RETRY_COUNT
704 				| SD_CONF_BSET_PLAYMSF_BCD
705 				| SD_CONF_BSET_READSUB_BCD
706 				| SD_CONF_BSET_READ_TOC_TRK_BCD
707 				| SD_CONF_BSET_READ_TOC_ADDR_BCD
708 				| SD_CONF_BSET_NO_READ_HEADER
709 				| SD_CONF_BSET_READ_CD_XD4
710 				| SD_CONF_BSET_RST_RETRIES
711 				| SD_CONF_BSET_RSV_REL_TIME
712 				| SD_CONF_BSET_TUR_CHECK), &tst_properties},
713 #endif
714 };
715 
716 static const int sd_disk_table_size =
717 	sizeof (sd_disk_table)/ sizeof (sd_disk_config_t);
718 
719 
720 /*
721  * Return codes of sd_uselabel().
722  */
723 #define	SD_LABEL_IS_VALID		0
724 #define	SD_LABEL_IS_INVALID		1
725 
726 #define	SD_INTERCONNECT_PARALLEL	0
727 #define	SD_INTERCONNECT_FABRIC		1
728 #define	SD_INTERCONNECT_FIBRE		2
729 #define	SD_INTERCONNECT_SSA		3
730 #define	SD_IS_PARALLEL_SCSI(un)		\
731 	((un)->un_interconnect_type == SD_INTERCONNECT_PARALLEL)
732 
733 /*
734  * Definitions used by device id registration routines
735  */
736 #define	VPD_HEAD_OFFSET		3	/* size of head for vpd page */
737 #define	VPD_PAGE_LENGTH		3	/* offset for pge length data */
738 #define	VPD_MODE_PAGE		1	/* offset into vpd pg for "page code" */
739 #define	WD_NODE			7	/* the whole disk minor */
740 
741 static kmutex_t sd_sense_mutex = {0};
742 
743 /*
744  * Macros for updates of the driver state
745  */
746 #define	New_state(un, s)        \
747 	(un)->un_last_state = (un)->un_state, (un)->un_state = (s)
748 #define	Restore_state(un)	\
749 	{ uchar_t tmp = (un)->un_last_state; New_state((un), tmp); }
750 
751 static struct sd_cdbinfo sd_cdbtab[] = {
752 	{ CDB_GROUP0, 0x00,	   0x1FFFFF,   0xFF,	    },
753 	{ CDB_GROUP1, SCMD_GROUP1, 0xFFFFFFFF, 0xFFFF,	    },
754 	{ CDB_GROUP5, SCMD_GROUP5, 0xFFFFFFFF, 0xFFFFFFFF,  },
755 	{ CDB_GROUP4, SCMD_GROUP4, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFF, },
756 };
757 
758 /*
759  * Specifies the number of seconds that must have elapsed since the last
760  * cmd. has completed for a device to be declared idle to the PM framework.
761  */
762 static int sd_pm_idletime = 1;
763 
764 /*
765  * Internal function prototypes
766  */
767 
768 #if (defined(__fibre))
769 /*
770  * These #defines are to avoid namespace collisions that occur because this
771  * code is currently used to compile two seperate driver modules: sd and ssd.
772  * All function names need to be treated this way (even if declared static)
773  * in order to allow the debugger to resolve the names properly.
774  * It is anticipated that in the near future the ssd module will be obsoleted,
775  * at which time this ugliness should go away.
776  */
777 #define	sd_log_trace			ssd_log_trace
778 #define	sd_log_info			ssd_log_info
779 #define	sd_log_err			ssd_log_err
780 #define	sdprobe				ssdprobe
781 #define	sdinfo				ssdinfo
782 #define	sd_prop_op			ssd_prop_op
783 #define	sd_scsi_probe_cache_init	ssd_scsi_probe_cache_init
784 #define	sd_scsi_probe_cache_fini	ssd_scsi_probe_cache_fini
785 #define	sd_scsi_clear_probe_cache	ssd_scsi_clear_probe_cache
786 #define	sd_scsi_probe_with_cache	ssd_scsi_probe_with_cache
787 #define	sd_spin_up_unit			ssd_spin_up_unit
788 #define	sd_enable_descr_sense		ssd_enable_descr_sense
789 #define	sd_set_mmc_caps			ssd_set_mmc_caps
790 #define	sd_read_unit_properties		ssd_read_unit_properties
791 #define	sd_process_sdconf_file		ssd_process_sdconf_file
792 #define	sd_process_sdconf_table		ssd_process_sdconf_table
793 #define	sd_sdconf_id_match		ssd_sdconf_id_match
794 #define	sd_blank_cmp			ssd_blank_cmp
795 #define	sd_chk_vers1_data		ssd_chk_vers1_data
796 #define	sd_set_vers1_properties		ssd_set_vers1_properties
797 #define	sd_validate_geometry		ssd_validate_geometry
798 
799 #if defined(_SUNOS_VTOC_16)
800 #define	sd_convert_geometry		ssd_convert_geometry
801 #endif
802 
803 #define	sd_resync_geom_caches		ssd_resync_geom_caches
804 #define	sd_read_fdisk			ssd_read_fdisk
805 #define	sd_get_physical_geometry	ssd_get_physical_geometry
806 #define	sd_get_virtual_geometry		ssd_get_virtual_geometry
807 #define	sd_update_block_info		ssd_update_block_info
808 #define	sd_swap_efi_gpt			ssd_swap_efi_gpt
809 #define	sd_swap_efi_gpe			ssd_swap_efi_gpe
810 #define	sd_validate_efi			ssd_validate_efi
811 #define	sd_use_efi			ssd_use_efi
812 #define	sd_uselabel			ssd_uselabel
813 #define	sd_build_default_label		ssd_build_default_label
814 #define	sd_has_max_chs_vals		ssd_has_max_chs_vals
815 #define	sd_inq_fill			ssd_inq_fill
816 #define	sd_register_devid		ssd_register_devid
817 #define	sd_get_devid_block		ssd_get_devid_block
818 #define	sd_get_devid			ssd_get_devid
819 #define	sd_create_devid			ssd_create_devid
820 #define	sd_write_deviceid		ssd_write_deviceid
821 #define	sd_check_vpd_page_support	ssd_check_vpd_page_support
822 #define	sd_setup_pm			ssd_setup_pm
823 #define	sd_create_pm_components		ssd_create_pm_components
824 #define	sd_ddi_suspend			ssd_ddi_suspend
825 #define	sd_ddi_pm_suspend		ssd_ddi_pm_suspend
826 #define	sd_ddi_resume			ssd_ddi_resume
827 #define	sd_ddi_pm_resume		ssd_ddi_pm_resume
828 #define	sdpower				ssdpower
829 #define	sdattach			ssdattach
830 #define	sddetach			ssddetach
831 #define	sd_unit_attach			ssd_unit_attach
832 #define	sd_unit_detach			ssd_unit_detach
833 #define	sd_set_unit_attributes		ssd_set_unit_attributes
834 #define	sd_create_minor_nodes		ssd_create_minor_nodes
835 #define	sd_create_errstats		ssd_create_errstats
836 #define	sd_set_errstats			ssd_set_errstats
837 #define	sd_set_pstats			ssd_set_pstats
838 #define	sddump				ssddump
839 #define	sd_scsi_poll			ssd_scsi_poll
840 #define	sd_send_polled_RQS		ssd_send_polled_RQS
841 #define	sd_ddi_scsi_poll		ssd_ddi_scsi_poll
842 #define	sd_init_event_callbacks		ssd_init_event_callbacks
843 #define	sd_event_callback		ssd_event_callback
844 #define	sd_cache_control		ssd_cache_control
845 #define	sd_get_write_cache_enabled	ssd_get_write_cache_enabled
846 #define	sd_make_device			ssd_make_device
847 #define	sdopen				ssdopen
848 #define	sdclose				ssdclose
849 #define	sd_ready_and_valid		ssd_ready_and_valid
850 #define	sdmin				ssdmin
851 #define	sdread				ssdread
852 #define	sdwrite				ssdwrite
853 #define	sdaread				ssdaread
854 #define	sdawrite			ssdawrite
855 #define	sdstrategy			ssdstrategy
856 #define	sdioctl				ssdioctl
857 #define	sd_mapblockaddr_iostart		ssd_mapblockaddr_iostart
858 #define	sd_mapblocksize_iostart		ssd_mapblocksize_iostart
859 #define	sd_checksum_iostart		ssd_checksum_iostart
860 #define	sd_checksum_uscsi_iostart	ssd_checksum_uscsi_iostart
861 #define	sd_pm_iostart			ssd_pm_iostart
862 #define	sd_core_iostart			ssd_core_iostart
863 #define	sd_mapblockaddr_iodone		ssd_mapblockaddr_iodone
864 #define	sd_mapblocksize_iodone		ssd_mapblocksize_iodone
865 #define	sd_checksum_iodone		ssd_checksum_iodone
866 #define	sd_checksum_uscsi_iodone	ssd_checksum_uscsi_iodone
867 #define	sd_pm_iodone			ssd_pm_iodone
868 #define	sd_initpkt_for_buf		ssd_initpkt_for_buf
869 #define	sd_destroypkt_for_buf		ssd_destroypkt_for_buf
870 #define	sd_setup_rw_pkt			ssd_setup_rw_pkt
871 #define	sd_setup_next_rw_pkt		ssd_setup_next_rw_pkt
872 #define	sd_buf_iodone			ssd_buf_iodone
873 #define	sd_uscsi_strategy		ssd_uscsi_strategy
874 #define	sd_initpkt_for_uscsi		ssd_initpkt_for_uscsi
875 #define	sd_destroypkt_for_uscsi		ssd_destroypkt_for_uscsi
876 #define	sd_uscsi_iodone			ssd_uscsi_iodone
877 #define	sd_xbuf_strategy		ssd_xbuf_strategy
878 #define	sd_xbuf_init			ssd_xbuf_init
879 #define	sd_pm_entry			ssd_pm_entry
880 #define	sd_pm_exit			ssd_pm_exit
881 
882 #define	sd_pm_idletimeout_handler	ssd_pm_idletimeout_handler
883 #define	sd_pm_timeout_handler		ssd_pm_timeout_handler
884 
885 #define	sd_add_buf_to_waitq		ssd_add_buf_to_waitq
886 #define	sdintr				ssdintr
887 #define	sd_start_cmds			ssd_start_cmds
888 #define	sd_send_scsi_cmd		ssd_send_scsi_cmd
889 #define	sd_bioclone_alloc		ssd_bioclone_alloc
890 #define	sd_bioclone_free		ssd_bioclone_free
891 #define	sd_shadow_buf_alloc		ssd_shadow_buf_alloc
892 #define	sd_shadow_buf_free		ssd_shadow_buf_free
893 #define	sd_print_transport_rejected_message	\
894 					ssd_print_transport_rejected_message
895 #define	sd_retry_command		ssd_retry_command
896 #define	sd_set_retry_bp			ssd_set_retry_bp
897 #define	sd_send_request_sense_command	ssd_send_request_sense_command
898 #define	sd_start_retry_command		ssd_start_retry_command
899 #define	sd_start_direct_priority_command	\
900 					ssd_start_direct_priority_command
901 #define	sd_return_failed_command	ssd_return_failed_command
902 #define	sd_return_failed_command_no_restart	\
903 					ssd_return_failed_command_no_restart
904 #define	sd_return_command		ssd_return_command
905 #define	sd_sync_with_callback		ssd_sync_with_callback
906 #define	sdrunout			ssdrunout
907 #define	sd_mark_rqs_busy		ssd_mark_rqs_busy
908 #define	sd_mark_rqs_idle		ssd_mark_rqs_idle
909 #define	sd_reduce_throttle		ssd_reduce_throttle
910 #define	sd_restore_throttle		ssd_restore_throttle
911 #define	sd_print_incomplete_msg		ssd_print_incomplete_msg
912 #define	sd_init_cdb_limits		ssd_init_cdb_limits
913 #define	sd_pkt_status_good		ssd_pkt_status_good
914 #define	sd_pkt_status_check_condition	ssd_pkt_status_check_condition
915 #define	sd_pkt_status_busy		ssd_pkt_status_busy
916 #define	sd_pkt_status_reservation_conflict	\
917 					ssd_pkt_status_reservation_conflict
918 #define	sd_pkt_status_qfull		ssd_pkt_status_qfull
919 #define	sd_handle_request_sense		ssd_handle_request_sense
920 #define	sd_handle_auto_request_sense	ssd_handle_auto_request_sense
921 #define	sd_print_sense_failed_msg	ssd_print_sense_failed_msg
922 #define	sd_validate_sense_data		ssd_validate_sense_data
923 #define	sd_decode_sense			ssd_decode_sense
924 #define	sd_print_sense_msg		ssd_print_sense_msg
925 #define	sd_extract_sense_info_descr	ssd_extract_sense_info_descr
926 #define	sd_sense_key_no_sense		ssd_sense_key_no_sense
927 #define	sd_sense_key_recoverable_error	ssd_sense_key_recoverable_error
928 #define	sd_sense_key_not_ready		ssd_sense_key_not_ready
929 #define	sd_sense_key_medium_or_hardware_error	\
930 					ssd_sense_key_medium_or_hardware_error
931 #define	sd_sense_key_illegal_request	ssd_sense_key_illegal_request
932 #define	sd_sense_key_unit_attention	ssd_sense_key_unit_attention
933 #define	sd_sense_key_fail_command	ssd_sense_key_fail_command
934 #define	sd_sense_key_blank_check	ssd_sense_key_blank_check
935 #define	sd_sense_key_aborted_command	ssd_sense_key_aborted_command
936 #define	sd_sense_key_default		ssd_sense_key_default
937 #define	sd_print_retry_msg		ssd_print_retry_msg
938 #define	sd_print_cmd_incomplete_msg	ssd_print_cmd_incomplete_msg
939 #define	sd_pkt_reason_cmd_incomplete	ssd_pkt_reason_cmd_incomplete
940 #define	sd_pkt_reason_cmd_tran_err	ssd_pkt_reason_cmd_tran_err
941 #define	sd_pkt_reason_cmd_reset		ssd_pkt_reason_cmd_reset
942 #define	sd_pkt_reason_cmd_aborted	ssd_pkt_reason_cmd_aborted
943 #define	sd_pkt_reason_cmd_timeout	ssd_pkt_reason_cmd_timeout
944 #define	sd_pkt_reason_cmd_unx_bus_free	ssd_pkt_reason_cmd_unx_bus_free
945 #define	sd_pkt_reason_cmd_tag_reject	ssd_pkt_reason_cmd_tag_reject
946 #define	sd_pkt_reason_default		ssd_pkt_reason_default
947 #define	sd_reset_target			ssd_reset_target
948 #define	sd_start_stop_unit_callback	ssd_start_stop_unit_callback
949 #define	sd_start_stop_unit_task		ssd_start_stop_unit_task
950 #define	sd_taskq_create			ssd_taskq_create
951 #define	sd_taskq_delete			ssd_taskq_delete
952 #define	sd_media_change_task		ssd_media_change_task
953 #define	sd_handle_mchange		ssd_handle_mchange
954 #define	sd_send_scsi_DOORLOCK		ssd_send_scsi_DOORLOCK
955 #define	sd_send_scsi_READ_CAPACITY	ssd_send_scsi_READ_CAPACITY
956 #define	sd_send_scsi_READ_CAPACITY_16	ssd_send_scsi_READ_CAPACITY_16
957 #define	sd_send_scsi_GET_CONFIGURATION	ssd_send_scsi_GET_CONFIGURATION
958 #define	sd_send_scsi_feature_GET_CONFIGURATION	\
959 					sd_send_scsi_feature_GET_CONFIGURATION
960 #define	sd_send_scsi_START_STOP_UNIT	ssd_send_scsi_START_STOP_UNIT
961 #define	sd_send_scsi_INQUIRY		ssd_send_scsi_INQUIRY
962 #define	sd_send_scsi_TEST_UNIT_READY	ssd_send_scsi_TEST_UNIT_READY
963 #define	sd_send_scsi_PERSISTENT_RESERVE_IN	\
964 					ssd_send_scsi_PERSISTENT_RESERVE_IN
965 #define	sd_send_scsi_PERSISTENT_RESERVE_OUT	\
966 					ssd_send_scsi_PERSISTENT_RESERVE_OUT
967 #define	sd_send_scsi_SYNCHRONIZE_CACHE	ssd_send_scsi_SYNCHRONIZE_CACHE
968 #define	sd_send_scsi_SYNCHRONIZE_CACHE_biodone	\
969 					ssd_send_scsi_SYNCHRONIZE_CACHE_biodone
970 #define	sd_send_scsi_MODE_SENSE		ssd_send_scsi_MODE_SENSE
971 #define	sd_send_scsi_MODE_SELECT	ssd_send_scsi_MODE_SELECT
972 #define	sd_send_scsi_RDWR		ssd_send_scsi_RDWR
973 #define	sd_send_scsi_LOG_SENSE		ssd_send_scsi_LOG_SENSE
974 #define	sd_alloc_rqs			ssd_alloc_rqs
975 #define	sd_free_rqs			ssd_free_rqs
976 #define	sd_dump_memory			ssd_dump_memory
977 #define	sd_uscsi_ioctl			ssd_uscsi_ioctl
978 #define	sd_get_media_info		ssd_get_media_info
979 #define	sd_dkio_ctrl_info		ssd_dkio_ctrl_info
980 #define	sd_dkio_get_geometry		ssd_dkio_get_geometry
981 #define	sd_dkio_set_geometry		ssd_dkio_set_geometry
982 #define	sd_dkio_get_partition		ssd_dkio_get_partition
983 #define	sd_dkio_set_partition		ssd_dkio_set_partition
984 #define	sd_dkio_partition		ssd_dkio_partition
985 #define	sd_dkio_get_vtoc		ssd_dkio_get_vtoc
986 #define	sd_dkio_get_efi			ssd_dkio_get_efi
987 #define	sd_build_user_vtoc		ssd_build_user_vtoc
988 #define	sd_dkio_set_vtoc		ssd_dkio_set_vtoc
989 #define	sd_dkio_set_efi			ssd_dkio_set_efi
990 #define	sd_build_label_vtoc		ssd_build_label_vtoc
991 #define	sd_write_label			ssd_write_label
992 #define	sd_clear_vtoc			ssd_clear_vtoc
993 #define	sd_clear_efi			ssd_clear_efi
994 #define	sd_get_tunables_from_conf	ssd_get_tunables_from_conf
995 #define	sd_setup_next_xfer		ssd_setup_next_xfer
996 #define	sd_dkio_get_temp		ssd_dkio_get_temp
997 #define	sd_dkio_get_mboot		ssd_dkio_get_mboot
998 #define	sd_dkio_set_mboot		ssd_dkio_set_mboot
999 #define	sd_setup_default_geometry	ssd_setup_default_geometry
1000 #define	sd_update_fdisk_and_vtoc	ssd_update_fdisk_and_vtoc
1001 #define	sd_check_mhd			ssd_check_mhd
1002 #define	sd_mhd_watch_cb			ssd_mhd_watch_cb
1003 #define	sd_mhd_watch_incomplete		ssd_mhd_watch_incomplete
1004 #define	sd_sname			ssd_sname
1005 #define	sd_mhd_resvd_recover		ssd_mhd_resvd_recover
1006 #define	sd_resv_reclaim_thread		ssd_resv_reclaim_thread
1007 #define	sd_take_ownership		ssd_take_ownership
1008 #define	sd_reserve_release		ssd_reserve_release
1009 #define	sd_rmv_resv_reclaim_req		ssd_rmv_resv_reclaim_req
1010 #define	sd_mhd_reset_notify_cb		ssd_mhd_reset_notify_cb
1011 #define	sd_persistent_reservation_in_read_keys	\
1012 					ssd_persistent_reservation_in_read_keys
1013 #define	sd_persistent_reservation_in_read_resv	\
1014 					ssd_persistent_reservation_in_read_resv
1015 #define	sd_mhdioc_takeown		ssd_mhdioc_takeown
1016 #define	sd_mhdioc_failfast		ssd_mhdioc_failfast
1017 #define	sd_mhdioc_release		ssd_mhdioc_release
1018 #define	sd_mhdioc_register_devid	ssd_mhdioc_register_devid
1019 #define	sd_mhdioc_inkeys		ssd_mhdioc_inkeys
1020 #define	sd_mhdioc_inresv		ssd_mhdioc_inresv
1021 #define	sr_change_blkmode		ssr_change_blkmode
1022 #define	sr_change_speed			ssr_change_speed
1023 #define	sr_atapi_change_speed		ssr_atapi_change_speed
1024 #define	sr_pause_resume			ssr_pause_resume
1025 #define	sr_play_msf			ssr_play_msf
1026 #define	sr_play_trkind			ssr_play_trkind
1027 #define	sr_read_all_subcodes		ssr_read_all_subcodes
1028 #define	sr_read_subchannel		ssr_read_subchannel
1029 #define	sr_read_tocentry		ssr_read_tocentry
1030 #define	sr_read_tochdr			ssr_read_tochdr
1031 #define	sr_read_cdda			ssr_read_cdda
1032 #define	sr_read_cdxa			ssr_read_cdxa
1033 #define	sr_read_mode1			ssr_read_mode1
1034 #define	sr_read_mode2			ssr_read_mode2
1035 #define	sr_read_cd_mode2		ssr_read_cd_mode2
1036 #define	sr_sector_mode			ssr_sector_mode
1037 #define	sr_eject			ssr_eject
1038 #define	sr_ejected			ssr_ejected
1039 #define	sr_check_wp			ssr_check_wp
1040 #define	sd_check_media			ssd_check_media
1041 #define	sd_media_watch_cb		ssd_media_watch_cb
1042 #define	sd_delayed_cv_broadcast		ssd_delayed_cv_broadcast
1043 #define	sr_volume_ctrl			ssr_volume_ctrl
1044 #define	sr_read_sony_session_offset	ssr_read_sony_session_offset
1045 #define	sd_log_page_supported		ssd_log_page_supported
1046 #define	sd_check_for_writable_cd	ssd_check_for_writable_cd
1047 #define	sd_wm_cache_constructor		ssd_wm_cache_constructor
1048 #define	sd_wm_cache_destructor		ssd_wm_cache_destructor
1049 #define	sd_range_lock			ssd_range_lock
1050 #define	sd_get_range			ssd_get_range
1051 #define	sd_free_inlist_wmap		ssd_free_inlist_wmap
1052 #define	sd_range_unlock			ssd_range_unlock
1053 #define	sd_read_modify_write_task	ssd_read_modify_write_task
1054 #define	sddump_do_read_of_rmw		ssddump_do_read_of_rmw
1055 
1056 #define	sd_iostart_chain		ssd_iostart_chain
1057 #define	sd_iodone_chain			ssd_iodone_chain
1058 #define	sd_initpkt_map			ssd_initpkt_map
1059 #define	sd_destroypkt_map		ssd_destroypkt_map
1060 #define	sd_chain_type_map		ssd_chain_type_map
1061 #define	sd_chain_index_map		ssd_chain_index_map
1062 
1063 #define	sd_failfast_flushctl		ssd_failfast_flushctl
1064 #define	sd_failfast_flushq		ssd_failfast_flushq
1065 #define	sd_failfast_flushq_callback	ssd_failfast_flushq_callback
1066 
1067 #define	sd_is_lsi			ssd_is_lsi
1068 
1069 #endif	/* #if (defined(__fibre)) */
1070 
1071 
1072 int _init(void);
1073 int _fini(void);
1074 int _info(struct modinfo *modinfop);
1075 
1076 /*PRINTFLIKE3*/
1077 static void sd_log_trace(uint_t comp, struct sd_lun *un, const char *fmt, ...);
1078 /*PRINTFLIKE3*/
1079 static void sd_log_info(uint_t comp, struct sd_lun *un, const char *fmt, ...);
1080 /*PRINTFLIKE3*/
1081 static void sd_log_err(uint_t comp, struct sd_lun *un, const char *fmt, ...);
1082 
1083 static int sdprobe(dev_info_t *devi);
1084 static int sdinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg,
1085     void **result);
1086 static int sd_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op,
1087     int mod_flags, char *name, caddr_t valuep, int *lengthp);
1088 
1089 /*
1090  * Smart probe for parallel scsi
1091  */
1092 static void sd_scsi_probe_cache_init(void);
1093 static void sd_scsi_probe_cache_fini(void);
1094 static void sd_scsi_clear_probe_cache(void);
1095 static int  sd_scsi_probe_with_cache(struct scsi_device *devp, int (*fn)());
1096 
1097 static int	sd_spin_up_unit(struct sd_lun *un);
1098 #ifdef _LP64
1099 static void	sd_enable_descr_sense(struct sd_lun *un);
1100 #endif /* _LP64 */
1101 static void	sd_set_mmc_caps(struct sd_lun *un);
1102 
1103 static void sd_read_unit_properties(struct sd_lun *un);
1104 static int  sd_process_sdconf_file(struct sd_lun *un);
1105 static void sd_get_tunables_from_conf(struct sd_lun *un, int flags,
1106     int *data_list, sd_tunables *values);
1107 static void sd_process_sdconf_table(struct sd_lun *un);
1108 static int  sd_sdconf_id_match(struct sd_lun *un, char *id, int idlen);
1109 static int  sd_blank_cmp(struct sd_lun *un, char *id, int idlen);
1110 static int  sd_chk_vers1_data(struct sd_lun *un, int flags, int *prop_list,
1111 	int list_len, char *dataname_ptr);
1112 static void sd_set_vers1_properties(struct sd_lun *un, int flags,
1113     sd_tunables *prop_list);
1114 static int  sd_validate_geometry(struct sd_lun *un, int path_flag);
1115 
1116 #if defined(_SUNOS_VTOC_16)
1117 static void sd_convert_geometry(uint64_t capacity, struct dk_geom *un_g);
1118 #endif
1119 
1120 static void sd_resync_geom_caches(struct sd_lun *un, int capacity, int lbasize,
1121 	int path_flag);
1122 static int  sd_read_fdisk(struct sd_lun *un, uint_t capacity, int lbasize,
1123 	int path_flag);
1124 static void sd_get_physical_geometry(struct sd_lun *un,
1125 	struct geom_cache *pgeom_p, int capacity, int lbasize, int path_flag);
1126 static void sd_get_virtual_geometry(struct sd_lun *un, int capacity,
1127 	int lbasize);
1128 static int  sd_uselabel(struct sd_lun *un, struct dk_label *l, int path_flag);
1129 static void sd_swap_efi_gpt(efi_gpt_t *);
1130 static void sd_swap_efi_gpe(int nparts, efi_gpe_t *);
1131 static int sd_validate_efi(efi_gpt_t *);
1132 static int sd_use_efi(struct sd_lun *, int);
1133 static void sd_build_default_label(struct sd_lun *un);
1134 
1135 #if defined(_FIRMWARE_NEEDS_FDISK)
1136 static int  sd_has_max_chs_vals(struct ipart *fdp);
1137 #endif
1138 static void sd_inq_fill(char *p, int l, char *s);
1139 
1140 
1141 static void sd_register_devid(struct sd_lun *un, dev_info_t *devi,
1142     int reservation_flag);
1143 static daddr_t  sd_get_devid_block(struct sd_lun *un);
1144 static int  sd_get_devid(struct sd_lun *un);
1145 static int  sd_get_serialnum(struct sd_lun *un, uchar_t *wwn, int *len);
1146 static ddi_devid_t sd_create_devid(struct sd_lun *un);
1147 static int  sd_write_deviceid(struct sd_lun *un);
1148 static int  sd_get_devid_page(struct sd_lun *un, uchar_t *wwn, int *len);
1149 static int  sd_check_vpd_page_support(struct sd_lun *un);
1150 
1151 static void sd_setup_pm(struct sd_lun *un, dev_info_t *devi);
1152 static void sd_create_pm_components(dev_info_t *devi, struct sd_lun *un);
1153 
1154 static int  sd_ddi_suspend(dev_info_t *devi);
1155 static int  sd_ddi_pm_suspend(struct sd_lun *un);
1156 static int  sd_ddi_resume(dev_info_t *devi);
1157 static int  sd_ddi_pm_resume(struct sd_lun *un);
1158 static int  sdpower(dev_info_t *devi, int component, int level);
1159 
1160 static int  sdattach(dev_info_t *devi, ddi_attach_cmd_t cmd);
1161 static int  sddetach(dev_info_t *devi, ddi_detach_cmd_t cmd);
1162 static int  sd_unit_attach(dev_info_t *devi);
1163 static int  sd_unit_detach(dev_info_t *devi);
1164 
1165 static void sd_set_unit_attributes(struct sd_lun *un, dev_info_t *devi);
1166 static int  sd_create_minor_nodes(struct sd_lun *un, dev_info_t *devi);
1167 static void sd_create_errstats(struct sd_lun *un, int instance);
1168 static void sd_set_errstats(struct sd_lun *un);
1169 static void sd_set_pstats(struct sd_lun *un);
1170 
1171 static int  sddump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk);
1172 static int  sd_scsi_poll(struct sd_lun *un, struct scsi_pkt *pkt);
1173 static int  sd_send_polled_RQS(struct sd_lun *un);
1174 static int  sd_ddi_scsi_poll(struct scsi_pkt *pkt);
1175 
1176 #if (defined(__fibre))
1177 /*
1178  * Event callbacks (photon)
1179  */
1180 static void sd_init_event_callbacks(struct sd_lun *un);
1181 static void  sd_event_callback(dev_info_t *, ddi_eventcookie_t, void *, void *);
1182 #endif
1183 
1184 /*
1185  * Defines for sd_cache_control
1186  */
1187 
1188 #define	SD_CACHE_ENABLE		1
1189 #define	SD_CACHE_DISABLE	0
1190 #define	SD_CACHE_NOCHANGE	-1
1191 
1192 static int   sd_cache_control(struct sd_lun *un, int rcd_flag, int wce_flag);
1193 static int   sd_get_write_cache_enabled(struct sd_lun *un, int *is_enabled);
1194 static dev_t sd_make_device(dev_info_t *devi);
1195 
1196 static void  sd_update_block_info(struct sd_lun *un, uint32_t lbasize,
1197 	uint64_t capacity);
1198 
1199 /*
1200  * Driver entry point functions.
1201  */
1202 static int  sdopen(dev_t *dev_p, int flag, int otyp, cred_t *cred_p);
1203 static int  sdclose(dev_t dev, int flag, int otyp, cred_t *cred_p);
1204 static int  sd_ready_and_valid(struct sd_lun *un);
1205 
1206 static void sdmin(struct buf *bp);
1207 static int sdread(dev_t dev, struct uio *uio, cred_t *cred_p);
1208 static int sdwrite(dev_t dev, struct uio *uio, cred_t *cred_p);
1209 static int sdaread(dev_t dev, struct aio_req *aio, cred_t *cred_p);
1210 static int sdawrite(dev_t dev, struct aio_req *aio, cred_t *cred_p);
1211 
1212 static int sdstrategy(struct buf *bp);
1213 static int sdioctl(dev_t, int, intptr_t, int, cred_t *, int *);
1214 
1215 /*
1216  * Function prototypes for layering functions in the iostart chain.
1217  */
1218 static void sd_mapblockaddr_iostart(int index, struct sd_lun *un,
1219 	struct buf *bp);
1220 static void sd_mapblocksize_iostart(int index, struct sd_lun *un,
1221 	struct buf *bp);
1222 static void sd_checksum_iostart(int index, struct sd_lun *un, struct buf *bp);
1223 static void sd_checksum_uscsi_iostart(int index, struct sd_lun *un,
1224 	struct buf *bp);
1225 static void sd_pm_iostart(int index, struct sd_lun *un, struct buf *bp);
1226 static void sd_core_iostart(int index, struct sd_lun *un, struct buf *bp);
1227 
1228 /*
1229  * Function prototypes for layering functions in the iodone chain.
1230  */
1231 static void sd_buf_iodone(int index, struct sd_lun *un, struct buf *bp);
1232 static void sd_uscsi_iodone(int index, struct sd_lun *un, struct buf *bp);
1233 static void sd_mapblockaddr_iodone(int index, struct sd_lun *un,
1234 	struct buf *bp);
1235 static void sd_mapblocksize_iodone(int index, struct sd_lun *un,
1236 	struct buf *bp);
1237 static void sd_checksum_iodone(int index, struct sd_lun *un, struct buf *bp);
1238 static void sd_checksum_uscsi_iodone(int index, struct sd_lun *un,
1239 	struct buf *bp);
1240 static void sd_pm_iodone(int index, struct sd_lun *un, struct buf *bp);
1241 
1242 /*
1243  * Prototypes for functions to support buf(9S) based IO.
1244  */
1245 static void sd_xbuf_strategy(struct buf *bp, ddi_xbuf_t xp, void *arg);
1246 static int sd_initpkt_for_buf(struct buf *, struct scsi_pkt **);
1247 static void sd_destroypkt_for_buf(struct buf *);
1248 static int sd_setup_rw_pkt(struct sd_lun *un, struct scsi_pkt **pktpp,
1249 	struct buf *bp, int flags,
1250 	int (*callback)(caddr_t), caddr_t callback_arg,
1251 	diskaddr_t lba, uint32_t blockcount);
1252 #if defined(__i386) || defined(__amd64)
1253 static int sd_setup_next_rw_pkt(struct sd_lun *un, struct scsi_pkt *pktp,
1254 	struct buf *bp, diskaddr_t lba, uint32_t blockcount);
1255 #endif /* defined(__i386) || defined(__amd64) */
1256 
1257 /*
1258  * Prototypes for functions to support USCSI IO.
1259  */
1260 static int sd_uscsi_strategy(struct buf *bp);
1261 static int sd_initpkt_for_uscsi(struct buf *, struct scsi_pkt **);
1262 static void sd_destroypkt_for_uscsi(struct buf *);
1263 
1264 static void sd_xbuf_init(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
1265 	uchar_t chain_type, void *pktinfop);
1266 
1267 static int  sd_pm_entry(struct sd_lun *un);
1268 static void sd_pm_exit(struct sd_lun *un);
1269 
1270 static void sd_pm_idletimeout_handler(void *arg);
1271 
1272 /*
1273  * sd_core internal functions (used at the sd_core_io layer).
1274  */
1275 static void sd_add_buf_to_waitq(struct sd_lun *un, struct buf *bp);
1276 static void sdintr(struct scsi_pkt *pktp);
1277 static void sd_start_cmds(struct sd_lun *un, struct buf *immed_bp);
1278 
1279 static int sd_send_scsi_cmd(dev_t dev, struct uscsi_cmd *incmd,
1280 	enum uio_seg cdbspace, enum uio_seg dataspace, enum uio_seg rqbufspace,
1281 	int path_flag);
1282 
1283 static struct buf *sd_bioclone_alloc(struct buf *bp, size_t datalen,
1284 	daddr_t blkno, int (*func)(struct buf *));
1285 static struct buf *sd_shadow_buf_alloc(struct buf *bp, size_t datalen,
1286 	uint_t bflags, daddr_t blkno, int (*func)(struct buf *));
1287 static void sd_bioclone_free(struct buf *bp);
1288 static void sd_shadow_buf_free(struct buf *bp);
1289 
1290 static void sd_print_transport_rejected_message(struct sd_lun *un,
1291 	struct sd_xbuf *xp, int code);
1292 static void sd_print_incomplete_msg(struct sd_lun *un, struct buf *bp,
1293     void *arg, int code);
1294 static void sd_print_sense_failed_msg(struct sd_lun *un, struct buf *bp,
1295     void *arg, int code);
1296 static void sd_print_cmd_incomplete_msg(struct sd_lun *un, struct buf *bp,
1297     void *arg, int code);
1298 
1299 static void sd_retry_command(struct sd_lun *un, struct buf *bp,
1300 	int retry_check_flag,
1301 	void (*user_funcp)(struct sd_lun *un, struct buf *bp, void *argp,
1302 		int c),
1303 	void *user_arg, int failure_code,  clock_t retry_delay,
1304 	void (*statp)(kstat_io_t *));
1305 
1306 static void sd_set_retry_bp(struct sd_lun *un, struct buf *bp,
1307 	clock_t retry_delay, void (*statp)(kstat_io_t *));
1308 
1309 static void sd_send_request_sense_command(struct sd_lun *un, struct buf *bp,
1310 	struct scsi_pkt *pktp);
1311 static void sd_start_retry_command(void *arg);
1312 static void sd_start_direct_priority_command(void *arg);
1313 static void sd_return_failed_command(struct sd_lun *un, struct buf *bp,
1314 	int errcode);
1315 static void sd_return_failed_command_no_restart(struct sd_lun *un,
1316 	struct buf *bp, int errcode);
1317 static void sd_return_command(struct sd_lun *un, struct buf *bp);
1318 static void sd_sync_with_callback(struct sd_lun *un);
1319 static int sdrunout(caddr_t arg);
1320 
1321 static void sd_mark_rqs_busy(struct sd_lun *un, struct buf *bp);
1322 static struct buf *sd_mark_rqs_idle(struct sd_lun *un, struct sd_xbuf *xp);
1323 
1324 static void sd_reduce_throttle(struct sd_lun *un, int throttle_type);
1325 static void sd_restore_throttle(void *arg);
1326 
1327 static void sd_init_cdb_limits(struct sd_lun *un);
1328 
1329 static void sd_pkt_status_good(struct sd_lun *un, struct buf *bp,
1330 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1331 
1332 /*
1333  * Error handling functions
1334  */
1335 static void sd_pkt_status_check_condition(struct sd_lun *un, struct buf *bp,
1336 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1337 static void sd_pkt_status_busy(struct sd_lun *un, struct buf *bp,
1338 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1339 static void sd_pkt_status_reservation_conflict(struct sd_lun *un,
1340 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1341 static void sd_pkt_status_qfull(struct sd_lun *un, struct buf *bp,
1342 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1343 
1344 static void sd_handle_request_sense(struct sd_lun *un, struct buf *bp,
1345 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1346 static void sd_handle_auto_request_sense(struct sd_lun *un, struct buf *bp,
1347 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1348 static int sd_validate_sense_data(struct sd_lun *un, struct buf *bp,
1349 	struct sd_xbuf *xp);
1350 static void sd_decode_sense(struct sd_lun *un, struct buf *bp,
1351 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1352 
1353 static void sd_print_sense_msg(struct sd_lun *un, struct buf *bp,
1354 	void *arg, int code);
1355 static diskaddr_t sd_extract_sense_info_descr(
1356 	struct scsi_descr_sense_hdr *sdsp);
1357 
1358 static void sd_sense_key_no_sense(struct sd_lun *un, struct buf *bp,
1359 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1360 static void sd_sense_key_recoverable_error(struct sd_lun *un,
1361 	uint8_t asc,
1362 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1363 static void sd_sense_key_not_ready(struct sd_lun *un,
1364 	uint8_t asc, uint8_t ascq,
1365 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1366 static void sd_sense_key_medium_or_hardware_error(struct sd_lun *un,
1367 	int sense_key, uint8_t asc,
1368 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1369 static void sd_sense_key_illegal_request(struct sd_lun *un, struct buf *bp,
1370 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1371 static void sd_sense_key_unit_attention(struct sd_lun *un,
1372 	uint8_t asc,
1373 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1374 static void sd_sense_key_fail_command(struct sd_lun *un, struct buf *bp,
1375 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1376 static void sd_sense_key_blank_check(struct sd_lun *un, struct buf *bp,
1377 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1378 static void sd_sense_key_aborted_command(struct sd_lun *un, struct buf *bp,
1379 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1380 static void sd_sense_key_default(struct sd_lun *un,
1381 	int sense_key,
1382 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1383 
1384 static void sd_print_retry_msg(struct sd_lun *un, struct buf *bp,
1385 	void *arg, int flag);
1386 
1387 static void sd_pkt_reason_cmd_incomplete(struct sd_lun *un, struct buf *bp,
1388 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1389 static void sd_pkt_reason_cmd_tran_err(struct sd_lun *un, struct buf *bp,
1390 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1391 static void sd_pkt_reason_cmd_reset(struct sd_lun *un, struct buf *bp,
1392 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1393 static void sd_pkt_reason_cmd_aborted(struct sd_lun *un, struct buf *bp,
1394 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1395 static void sd_pkt_reason_cmd_timeout(struct sd_lun *un, struct buf *bp,
1396 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1397 static void sd_pkt_reason_cmd_unx_bus_free(struct sd_lun *un, struct buf *bp,
1398 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1399 static void sd_pkt_reason_cmd_tag_reject(struct sd_lun *un, struct buf *bp,
1400 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1401 static void sd_pkt_reason_default(struct sd_lun *un, struct buf *bp,
1402 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1403 
1404 static void sd_reset_target(struct sd_lun *un, struct scsi_pkt *pktp);
1405 
1406 static void sd_start_stop_unit_callback(void *arg);
1407 static void sd_start_stop_unit_task(void *arg);
1408 
1409 static void sd_taskq_create(void);
1410 static void sd_taskq_delete(void);
1411 static void sd_media_change_task(void *arg);
1412 
1413 static int sd_handle_mchange(struct sd_lun *un);
1414 static int sd_send_scsi_DOORLOCK(struct sd_lun *un, int flag, int path_flag);
1415 static int sd_send_scsi_READ_CAPACITY(struct sd_lun *un, uint64_t *capp,
1416 	uint32_t *lbap, int path_flag);
1417 static int sd_send_scsi_READ_CAPACITY_16(struct sd_lun *un, uint64_t *capp,
1418 	uint32_t *lbap, int path_flag);
1419 static int sd_send_scsi_START_STOP_UNIT(struct sd_lun *un, int flag,
1420 	int path_flag);
1421 static int sd_send_scsi_INQUIRY(struct sd_lun *un, uchar_t *bufaddr,
1422 	size_t buflen, uchar_t evpd, uchar_t page_code, size_t *residp);
1423 static int sd_send_scsi_TEST_UNIT_READY(struct sd_lun *un, int flag);
1424 static int sd_send_scsi_PERSISTENT_RESERVE_IN(struct sd_lun *un,
1425 	uchar_t usr_cmd, uint16_t data_len, uchar_t *data_bufp);
1426 static int sd_send_scsi_PERSISTENT_RESERVE_OUT(struct sd_lun *un,
1427 	uchar_t usr_cmd, uchar_t *usr_bufp);
1428 static int sd_send_scsi_SYNCHRONIZE_CACHE(struct sd_lun *un,
1429 	struct dk_callback *dkc);
1430 static int sd_send_scsi_SYNCHRONIZE_CACHE_biodone(struct buf *bp);
1431 static int sd_send_scsi_GET_CONFIGURATION(struct sd_lun *un,
1432 	struct uscsi_cmd *ucmdbuf, uchar_t *rqbuf, uint_t rqbuflen,
1433 	uchar_t *bufaddr, uint_t buflen);
1434 static int sd_send_scsi_feature_GET_CONFIGURATION(struct sd_lun *un,
1435 	struct uscsi_cmd *ucmdbuf, uchar_t *rqbuf, uint_t rqbuflen,
1436 	uchar_t *bufaddr, uint_t buflen, char feature);
1437 static int sd_send_scsi_MODE_SENSE(struct sd_lun *un, int cdbsize,
1438 	uchar_t *bufaddr, size_t buflen, uchar_t page_code, int path_flag);
1439 static int sd_send_scsi_MODE_SELECT(struct sd_lun *un, int cdbsize,
1440 	uchar_t *bufaddr, size_t buflen, uchar_t save_page, int path_flag);
1441 static int sd_send_scsi_RDWR(struct sd_lun *un, uchar_t cmd, void *bufaddr,
1442 	size_t buflen, daddr_t start_block, int path_flag);
1443 #define	sd_send_scsi_READ(un, bufaddr, buflen, start_block, path_flag)	\
1444 	sd_send_scsi_RDWR(un, SCMD_READ, bufaddr, buflen, start_block, \
1445 	path_flag)
1446 #define	sd_send_scsi_WRITE(un, bufaddr, buflen, start_block, path_flag)	\
1447 	sd_send_scsi_RDWR(un, SCMD_WRITE, bufaddr, buflen, start_block,\
1448 	path_flag)
1449 
1450 static int sd_send_scsi_LOG_SENSE(struct sd_lun *un, uchar_t *bufaddr,
1451 	uint16_t buflen, uchar_t page_code, uchar_t page_control,
1452 	uint16_t param_ptr, int path_flag);
1453 
1454 static int  sd_alloc_rqs(struct scsi_device *devp, struct sd_lun *un);
1455 static void sd_free_rqs(struct sd_lun *un);
1456 
1457 static void sd_dump_memory(struct sd_lun *un, uint_t comp, char *title,
1458 	uchar_t *data, int len, int fmt);
1459 static void sd_panic_for_res_conflict(struct sd_lun *un);
1460 
1461 /*
1462  * Disk Ioctl Function Prototypes
1463  */
1464 static int sd_uscsi_ioctl(dev_t dev, caddr_t arg, int flag);
1465 static int sd_get_media_info(dev_t dev, caddr_t arg, int flag);
1466 static int sd_dkio_ctrl_info(dev_t dev, caddr_t arg, int flag);
1467 static int sd_dkio_get_geometry(dev_t dev, caddr_t arg, int flag,
1468 	int geom_validated);
1469 static int sd_dkio_set_geometry(dev_t dev, caddr_t arg, int flag);
1470 static int sd_dkio_get_partition(dev_t dev, caddr_t arg, int flag,
1471 	int geom_validated);
1472 static int sd_dkio_set_partition(dev_t dev, caddr_t arg, int flag);
1473 static int sd_dkio_get_vtoc(dev_t dev, caddr_t arg, int flag,
1474 	int geom_validated);
1475 static int sd_dkio_get_efi(dev_t dev, caddr_t arg, int flag);
1476 static int sd_dkio_partition(dev_t dev, caddr_t arg, int flag);
1477 static void sd_build_user_vtoc(struct sd_lun *un, struct vtoc *user_vtoc);
1478 static int sd_dkio_set_vtoc(dev_t dev, caddr_t arg, int flag);
1479 static int sd_dkio_set_efi(dev_t dev, caddr_t arg, int flag);
1480 static int sd_build_label_vtoc(struct sd_lun *un, struct vtoc *user_vtoc);
1481 static int sd_write_label(dev_t dev);
1482 static int sd_set_vtoc(struct sd_lun *un, struct dk_label *dkl);
1483 static void sd_clear_vtoc(struct sd_lun *un);
1484 static void sd_clear_efi(struct sd_lun *un);
1485 static int sd_dkio_get_temp(dev_t dev, caddr_t arg, int flag);
1486 static int sd_dkio_get_mboot(dev_t dev, caddr_t arg, int flag);
1487 static int sd_dkio_set_mboot(dev_t dev, caddr_t arg, int flag);
1488 static void sd_setup_default_geometry(struct sd_lun *un);
1489 #if defined(__i386) || defined(__amd64)
1490 static int sd_update_fdisk_and_vtoc(struct sd_lun *un);
1491 #endif
1492 
1493 /*
1494  * Multi-host Ioctl Prototypes
1495  */
1496 static int sd_check_mhd(dev_t dev, int interval);
1497 static int sd_mhd_watch_cb(caddr_t arg, struct scsi_watch_result *resultp);
1498 static void sd_mhd_watch_incomplete(struct sd_lun *un, struct scsi_pkt *pkt);
1499 static char *sd_sname(uchar_t status);
1500 static void sd_mhd_resvd_recover(void *arg);
1501 static void sd_resv_reclaim_thread();
1502 static int sd_take_ownership(dev_t dev, struct mhioctkown *p);
1503 static int sd_reserve_release(dev_t dev, int cmd);
1504 static void sd_rmv_resv_reclaim_req(dev_t dev);
1505 static void sd_mhd_reset_notify_cb(caddr_t arg);
1506 static int sd_persistent_reservation_in_read_keys(struct sd_lun *un,
1507 	mhioc_inkeys_t *usrp, int flag);
1508 static int sd_persistent_reservation_in_read_resv(struct sd_lun *un,
1509 	mhioc_inresvs_t *usrp, int flag);
1510 static int sd_mhdioc_takeown(dev_t dev, caddr_t arg, int flag);
1511 static int sd_mhdioc_failfast(dev_t dev, caddr_t arg, int flag);
1512 static int sd_mhdioc_release(dev_t dev);
1513 static int sd_mhdioc_register_devid(dev_t dev);
1514 static int sd_mhdioc_inkeys(dev_t dev, caddr_t arg, int flag);
1515 static int sd_mhdioc_inresv(dev_t dev, caddr_t arg, int flag);
1516 
1517 /*
1518  * SCSI removable prototypes
1519  */
1520 static int sr_change_blkmode(dev_t dev, int cmd, intptr_t data, int flag);
1521 static int sr_change_speed(dev_t dev, int cmd, intptr_t data, int flag);
1522 static int sr_atapi_change_speed(dev_t dev, int cmd, intptr_t data, int flag);
1523 static int sr_pause_resume(dev_t dev, int mode);
1524 static int sr_play_msf(dev_t dev, caddr_t data, int flag);
1525 static int sr_play_trkind(dev_t dev, caddr_t data, int flag);
1526 static int sr_read_all_subcodes(dev_t dev, caddr_t data, int flag);
1527 static int sr_read_subchannel(dev_t dev, caddr_t data, int flag);
1528 static int sr_read_tocentry(dev_t dev, caddr_t data, int flag);
1529 static int sr_read_tochdr(dev_t dev, caddr_t data, int flag);
1530 static int sr_read_cdda(dev_t dev, caddr_t data, int flag);
1531 static int sr_read_cdxa(dev_t dev, caddr_t data, int flag);
1532 static int sr_read_mode1(dev_t dev, caddr_t data, int flag);
1533 static int sr_read_mode2(dev_t dev, caddr_t data, int flag);
1534 static int sr_read_cd_mode2(dev_t dev, caddr_t data, int flag);
1535 static int sr_sector_mode(dev_t dev, uint32_t blksize);
1536 static int sr_eject(dev_t dev);
1537 static void sr_ejected(register struct sd_lun *un);
1538 static int sr_check_wp(dev_t dev);
1539 static int sd_check_media(dev_t dev, enum dkio_state state);
1540 static int sd_media_watch_cb(caddr_t arg, struct scsi_watch_result *resultp);
1541 static void sd_delayed_cv_broadcast(void *arg);
1542 static int sr_volume_ctrl(dev_t dev, caddr_t data, int flag);
1543 static int sr_read_sony_session_offset(dev_t dev, caddr_t data, int flag);
1544 
1545 static int sd_log_page_supported(struct sd_lun *un, int log_page);
1546 
1547 /*
1548  * Function Prototype for the non-512 support (DVDRAM, MO etc.) functions.
1549  */
1550 static void sd_check_for_writable_cd(struct sd_lun *un);
1551 static int sd_wm_cache_constructor(void *wm, void *un, int flags);
1552 static void sd_wm_cache_destructor(void *wm, void *un);
1553 static struct sd_w_map *sd_range_lock(struct sd_lun *un, daddr_t startb,
1554 	daddr_t endb, ushort_t typ);
1555 static struct sd_w_map *sd_get_range(struct sd_lun *un, daddr_t startb,
1556 	daddr_t endb);
1557 static void sd_free_inlist_wmap(struct sd_lun *un, struct sd_w_map *wmp);
1558 static void sd_range_unlock(struct sd_lun *un, struct sd_w_map *wm);
1559 static void sd_read_modify_write_task(void * arg);
1560 static int
1561 sddump_do_read_of_rmw(struct sd_lun *un, uint64_t blkno, uint64_t nblk,
1562 	struct buf **bpp);
1563 
1564 
1565 /*
1566  * Function prototypes for failfast support.
1567  */
1568 static void sd_failfast_flushq(struct sd_lun *un);
1569 static int sd_failfast_flushq_callback(struct buf *bp);
1570 
1571 /*
1572  * Function prototypes to check for lsi devices
1573  */
1574 static void sd_is_lsi(struct sd_lun *un);
1575 
1576 /*
1577  * Function prototypes for x86 support
1578  */
1579 #if defined(__i386) || defined(__amd64)
1580 static int sd_setup_next_xfer(struct sd_lun *un, struct buf *bp,
1581 		struct scsi_pkt *pkt, struct sd_xbuf *xp);
1582 #endif
1583 
1584 /*
1585  * Constants for failfast support:
1586  *
1587  * SD_FAILFAST_INACTIVE: Instance is currently in a normal state, with NO
1588  * failfast processing being performed.
1589  *
1590  * SD_FAILFAST_ACTIVE: Instance is in the failfast state and is performing
1591  * failfast processing on all bufs with B_FAILFAST set.
1592  */
1593 
1594 #define	SD_FAILFAST_INACTIVE		0
1595 #define	SD_FAILFAST_ACTIVE		1
1596 
1597 /*
1598  * Bitmask to control behavior of buf(9S) flushes when a transition to
1599  * the failfast state occurs. Optional bits include:
1600  *
1601  * SD_FAILFAST_FLUSH_ALL_BUFS: When set, flush ALL bufs including those that
1602  * do NOT have B_FAILFAST set. When clear, only bufs with B_FAILFAST will
1603  * be flushed.
1604  *
1605  * SD_FAILFAST_FLUSH_ALL_QUEUES: When set, flush any/all other queues in the
1606  * driver, in addition to the regular wait queue. This includes the xbuf
1607  * queues. When clear, only the driver's wait queue will be flushed.
1608  */
1609 #define	SD_FAILFAST_FLUSH_ALL_BUFS	0x01
1610 #define	SD_FAILFAST_FLUSH_ALL_QUEUES	0x02
1611 
1612 /*
1613  * The default behavior is to only flush bufs that have B_FAILFAST set, but
1614  * to flush all queues within the driver.
1615  */
1616 static int sd_failfast_flushctl = SD_FAILFAST_FLUSH_ALL_QUEUES;
1617 
1618 
1619 /*
1620  * SD Testing Fault Injection
1621  */
1622 #ifdef SD_FAULT_INJECTION
1623 static void sd_faultinjection_ioctl(int cmd, intptr_t arg, struct sd_lun *un);
1624 static void sd_faultinjection(struct scsi_pkt *pktp);
1625 static void sd_injection_log(char *buf, struct sd_lun *un);
1626 #endif
1627 
1628 /*
1629  * Device driver ops vector
1630  */
1631 static struct cb_ops sd_cb_ops = {
1632 	sdopen,			/* open */
1633 	sdclose,		/* close */
1634 	sdstrategy,		/* strategy */
1635 	nodev,			/* print */
1636 	sddump,			/* dump */
1637 	sdread,			/* read */
1638 	sdwrite,		/* write */
1639 	sdioctl,		/* ioctl */
1640 	nodev,			/* devmap */
1641 	nodev,			/* mmap */
1642 	nodev,			/* segmap */
1643 	nochpoll,		/* poll */
1644 	sd_prop_op,		/* cb_prop_op */
1645 	0,			/* streamtab  */
1646 	D_64BIT | D_MP | D_NEW | D_HOTPLUG, /* Driver compatibility flags */
1647 	CB_REV,			/* cb_rev */
1648 	sdaread, 		/* async I/O read entry point */
1649 	sdawrite		/* async I/O write entry point */
1650 };
1651 
1652 static struct dev_ops sd_ops = {
1653 	DEVO_REV,		/* devo_rev, */
1654 	0,			/* refcnt  */
1655 	sdinfo,			/* info */
1656 	nulldev,		/* identify */
1657 	sdprobe,		/* probe */
1658 	sdattach,		/* attach */
1659 	sddetach,		/* detach */
1660 	nodev,			/* reset */
1661 	&sd_cb_ops,		/* driver operations */
1662 	NULL,			/* bus operations */
1663 	sdpower			/* power */
1664 };
1665 
1666 
1667 /*
1668  * This is the loadable module wrapper.
1669  */
1670 #include <sys/modctl.h>
1671 
1672 static struct modldrv modldrv = {
1673 	&mod_driverops,		/* Type of module. This one is a driver */
1674 	SD_MODULE_NAME,		/* Module name. */
1675 	&sd_ops			/* driver ops */
1676 };
1677 
1678 
1679 static struct modlinkage modlinkage = {
1680 	MODREV_1,
1681 	&modldrv,
1682 	NULL
1683 };
1684 
1685 
1686 static struct scsi_asq_key_strings sd_additional_codes[] = {
1687 	0x81, 0, "Logical Unit is Reserved",
1688 	0x85, 0, "Audio Address Not Valid",
1689 	0xb6, 0, "Media Load Mechanism Failed",
1690 	0xB9, 0, "Audio Play Operation Aborted",
1691 	0xbf, 0, "Buffer Overflow for Read All Subcodes Command",
1692 	0x53, 2, "Medium removal prevented",
1693 	0x6f, 0, "Authentication failed during key exchange",
1694 	0x6f, 1, "Key not present",
1695 	0x6f, 2, "Key not established",
1696 	0x6f, 3, "Read without proper authentication",
1697 	0x6f, 4, "Mismatched region to this logical unit",
1698 	0x6f, 5, "Region reset count error",
1699 	0xffff, 0x0, NULL
1700 };
1701 
1702 
1703 /*
1704  * Struct for passing printing information for sense data messages
1705  */
1706 struct sd_sense_info {
1707 	int	ssi_severity;
1708 	int	ssi_pfa_flag;
1709 };
1710 
1711 /*
1712  * Table of function pointers for iostart-side routines. Seperate "chains"
1713  * of layered function calls are formed by placing the function pointers
1714  * sequentially in the desired order. Functions are called according to an
1715  * incrementing table index ordering. The last function in each chain must
1716  * be sd_core_iostart(). The corresponding iodone-side routines are expected
1717  * in the sd_iodone_chain[] array.
1718  *
1719  * Note: It may seem more natural to organize both the iostart and iodone
1720  * functions together, into an array of structures (or some similar
1721  * organization) with a common index, rather than two seperate arrays which
1722  * must be maintained in synchronization. The purpose of this division is
1723  * to achiece improved performance: individual arrays allows for more
1724  * effective cache line utilization on certain platforms.
1725  */
1726 
1727 typedef void (*sd_chain_t)(int index, struct sd_lun *un, struct buf *bp);
1728 
1729 
1730 static sd_chain_t sd_iostart_chain[] = {
1731 
1732 	/* Chain for buf IO for disk drive targets (PM enabled) */
1733 	sd_mapblockaddr_iostart,	/* Index: 0 */
1734 	sd_pm_iostart,			/* Index: 1 */
1735 	sd_core_iostart,		/* Index: 2 */
1736 
1737 	/* Chain for buf IO for disk drive targets (PM disabled) */
1738 	sd_mapblockaddr_iostart,	/* Index: 3 */
1739 	sd_core_iostart,		/* Index: 4 */
1740 
1741 	/* Chain for buf IO for removable-media targets (PM enabled) */
1742 	sd_mapblockaddr_iostart,	/* Index: 5 */
1743 	sd_mapblocksize_iostart,	/* Index: 6 */
1744 	sd_pm_iostart,			/* Index: 7 */
1745 	sd_core_iostart,		/* Index: 8 */
1746 
1747 	/* Chain for buf IO for removable-media targets (PM disabled) */
1748 	sd_mapblockaddr_iostart,	/* Index: 9 */
1749 	sd_mapblocksize_iostart,	/* Index: 10 */
1750 	sd_core_iostart,		/* Index: 11 */
1751 
1752 	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
1753 	sd_mapblockaddr_iostart,	/* Index: 12 */
1754 	sd_checksum_iostart,		/* Index: 13 */
1755 	sd_pm_iostart,			/* Index: 14 */
1756 	sd_core_iostart,		/* Index: 15 */
1757 
1758 	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
1759 	sd_mapblockaddr_iostart,	/* Index: 16 */
1760 	sd_checksum_iostart,		/* Index: 17 */
1761 	sd_core_iostart,		/* Index: 18 */
1762 
1763 	/* Chain for USCSI commands (all targets) */
1764 	sd_pm_iostart,			/* Index: 19 */
1765 	sd_core_iostart,		/* Index: 20 */
1766 
1767 	/* Chain for checksumming USCSI commands (all targets) */
1768 	sd_checksum_uscsi_iostart,	/* Index: 21 */
1769 	sd_pm_iostart,			/* Index: 22 */
1770 	sd_core_iostart,		/* Index: 23 */
1771 
1772 	/* Chain for "direct" USCSI commands (all targets) */
1773 	sd_core_iostart,		/* Index: 24 */
1774 
1775 	/* Chain for "direct priority" USCSI commands (all targets) */
1776 	sd_core_iostart,		/* Index: 25 */
1777 };
1778 
1779 /*
1780  * Macros to locate the first function of each iostart chain in the
1781  * sd_iostart_chain[] array. These are located by the index in the array.
1782  */
1783 #define	SD_CHAIN_DISK_IOSTART			0
1784 #define	SD_CHAIN_DISK_IOSTART_NO_PM		3
1785 #define	SD_CHAIN_RMMEDIA_IOSTART		5
1786 #define	SD_CHAIN_RMMEDIA_IOSTART_NO_PM		9
1787 #define	SD_CHAIN_CHKSUM_IOSTART			12
1788 #define	SD_CHAIN_CHKSUM_IOSTART_NO_PM		16
1789 #define	SD_CHAIN_USCSI_CMD_IOSTART		19
1790 #define	SD_CHAIN_USCSI_CHKSUM_IOSTART		21
1791 #define	SD_CHAIN_DIRECT_CMD_IOSTART		24
1792 #define	SD_CHAIN_PRIORITY_CMD_IOSTART		25
1793 
1794 
1795 /*
1796  * Table of function pointers for the iodone-side routines for the driver-
1797  * internal layering mechanism.  The calling sequence for iodone routines
1798  * uses a decrementing table index, so the last routine called in a chain
1799  * must be at the lowest array index location for that chain.  The last
1800  * routine for each chain must be either sd_buf_iodone() (for buf(9S) IOs)
1801  * or sd_uscsi_iodone() (for uscsi IOs).  Other than this, the ordering
1802  * of the functions in an iodone side chain must correspond to the ordering
1803  * of the iostart routines for that chain.  Note that there is no iodone
1804  * side routine that corresponds to sd_core_iostart(), so there is no
1805  * entry in the table for this.
1806  */
1807 
1808 static sd_chain_t sd_iodone_chain[] = {
1809 
1810 	/* Chain for buf IO for disk drive targets (PM enabled) */
1811 	sd_buf_iodone,			/* Index: 0 */
1812 	sd_mapblockaddr_iodone,		/* Index: 1 */
1813 	sd_pm_iodone,			/* Index: 2 */
1814 
1815 	/* Chain for buf IO for disk drive targets (PM disabled) */
1816 	sd_buf_iodone,			/* Index: 3 */
1817 	sd_mapblockaddr_iodone,		/* Index: 4 */
1818 
1819 	/* Chain for buf IO for removable-media targets (PM enabled) */
1820 	sd_buf_iodone,			/* Index: 5 */
1821 	sd_mapblockaddr_iodone,		/* Index: 6 */
1822 	sd_mapblocksize_iodone,		/* Index: 7 */
1823 	sd_pm_iodone,			/* Index: 8 */
1824 
1825 	/* Chain for buf IO for removable-media targets (PM disabled) */
1826 	sd_buf_iodone,			/* Index: 9 */
1827 	sd_mapblockaddr_iodone,		/* Index: 10 */
1828 	sd_mapblocksize_iodone,		/* Index: 11 */
1829 
1830 	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
1831 	sd_buf_iodone,			/* Index: 12 */
1832 	sd_mapblockaddr_iodone,		/* Index: 13 */
1833 	sd_checksum_iodone,		/* Index: 14 */
1834 	sd_pm_iodone,			/* Index: 15 */
1835 
1836 	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
1837 	sd_buf_iodone,			/* Index: 16 */
1838 	sd_mapblockaddr_iodone,		/* Index: 17 */
1839 	sd_checksum_iodone,		/* Index: 18 */
1840 
1841 	/* Chain for USCSI commands (non-checksum targets) */
1842 	sd_uscsi_iodone,		/* Index: 19 */
1843 	sd_pm_iodone,			/* Index: 20 */
1844 
1845 	/* Chain for USCSI commands (checksum targets) */
1846 	sd_uscsi_iodone,		/* Index: 21 */
1847 	sd_checksum_uscsi_iodone,	/* Index: 22 */
1848 	sd_pm_iodone,			/* Index: 22 */
1849 
1850 	/* Chain for "direct" USCSI commands (all targets) */
1851 	sd_uscsi_iodone,		/* Index: 24 */
1852 
1853 	/* Chain for "direct priority" USCSI commands (all targets) */
1854 	sd_uscsi_iodone,		/* Index: 25 */
1855 };
1856 
1857 
1858 /*
1859  * Macros to locate the "first" function in the sd_iodone_chain[] array for
1860  * each iodone-side chain. These are located by the array index, but as the
1861  * iodone side functions are called in a decrementing-index order, the
1862  * highest index number in each chain must be specified (as these correspond
1863  * to the first function in the iodone chain that will be called by the core
1864  * at IO completion time).
1865  */
1866 
1867 #define	SD_CHAIN_DISK_IODONE			2
1868 #define	SD_CHAIN_DISK_IODONE_NO_PM		4
1869 #define	SD_CHAIN_RMMEDIA_IODONE			8
1870 #define	SD_CHAIN_RMMEDIA_IODONE_NO_PM		11
1871 #define	SD_CHAIN_CHKSUM_IODONE			15
1872 #define	SD_CHAIN_CHKSUM_IODONE_NO_PM		18
1873 #define	SD_CHAIN_USCSI_CMD_IODONE		20
1874 #define	SD_CHAIN_USCSI_CHKSUM_IODONE		22
1875 #define	SD_CHAIN_DIRECT_CMD_IODONE		24
1876 #define	SD_CHAIN_PRIORITY_CMD_IODONE		25
1877 
1878 
1879 
1880 
1881 /*
1882  * Array to map a layering chain index to the appropriate initpkt routine.
1883  * The redundant entries are present so that the index used for accessing
1884  * the above sd_iostart_chain and sd_iodone_chain tables can be used directly
1885  * with this table as well.
1886  */
1887 typedef int (*sd_initpkt_t)(struct buf *, struct scsi_pkt **);
1888 
1889 static sd_initpkt_t	sd_initpkt_map[] = {
1890 
1891 	/* Chain for buf IO for disk drive targets (PM enabled) */
1892 	sd_initpkt_for_buf,		/* Index: 0 */
1893 	sd_initpkt_for_buf,		/* Index: 1 */
1894 	sd_initpkt_for_buf,		/* Index: 2 */
1895 
1896 	/* Chain for buf IO for disk drive targets (PM disabled) */
1897 	sd_initpkt_for_buf,		/* Index: 3 */
1898 	sd_initpkt_for_buf,		/* Index: 4 */
1899 
1900 	/* Chain for buf IO for removable-media targets (PM enabled) */
1901 	sd_initpkt_for_buf,		/* Index: 5 */
1902 	sd_initpkt_for_buf,		/* Index: 6 */
1903 	sd_initpkt_for_buf,		/* Index: 7 */
1904 	sd_initpkt_for_buf,		/* Index: 8 */
1905 
1906 	/* Chain for buf IO for removable-media targets (PM disabled) */
1907 	sd_initpkt_for_buf,		/* Index: 9 */
1908 	sd_initpkt_for_buf,		/* Index: 10 */
1909 	sd_initpkt_for_buf,		/* Index: 11 */
1910 
1911 	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
1912 	sd_initpkt_for_buf,		/* Index: 12 */
1913 	sd_initpkt_for_buf,		/* Index: 13 */
1914 	sd_initpkt_for_buf,		/* Index: 14 */
1915 	sd_initpkt_for_buf,		/* Index: 15 */
1916 
1917 	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
1918 	sd_initpkt_for_buf,		/* Index: 16 */
1919 	sd_initpkt_for_buf,		/* Index: 17 */
1920 	sd_initpkt_for_buf,		/* Index: 18 */
1921 
1922 	/* Chain for USCSI commands (non-checksum targets) */
1923 	sd_initpkt_for_uscsi,		/* Index: 19 */
1924 	sd_initpkt_for_uscsi,		/* Index: 20 */
1925 
1926 	/* Chain for USCSI commands (checksum targets) */
1927 	sd_initpkt_for_uscsi,		/* Index: 21 */
1928 	sd_initpkt_for_uscsi,		/* Index: 22 */
1929 	sd_initpkt_for_uscsi,		/* Index: 22 */
1930 
1931 	/* Chain for "direct" USCSI commands (all targets) */
1932 	sd_initpkt_for_uscsi,		/* Index: 24 */
1933 
1934 	/* Chain for "direct priority" USCSI commands (all targets) */
1935 	sd_initpkt_for_uscsi,		/* Index: 25 */
1936 
1937 };
1938 
1939 
1940 /*
1941  * Array to map a layering chain index to the appropriate destroypktpkt routine.
1942  * The redundant entries are present so that the index used for accessing
1943  * the above sd_iostart_chain and sd_iodone_chain tables can be used directly
1944  * with this table as well.
1945  */
1946 typedef void (*sd_destroypkt_t)(struct buf *);
1947 
1948 static sd_destroypkt_t	sd_destroypkt_map[] = {
1949 
1950 	/* Chain for buf IO for disk drive targets (PM enabled) */
1951 	sd_destroypkt_for_buf,		/* Index: 0 */
1952 	sd_destroypkt_for_buf,		/* Index: 1 */
1953 	sd_destroypkt_for_buf,		/* Index: 2 */
1954 
1955 	/* Chain for buf IO for disk drive targets (PM disabled) */
1956 	sd_destroypkt_for_buf,		/* Index: 3 */
1957 	sd_destroypkt_for_buf,		/* Index: 4 */
1958 
1959 	/* Chain for buf IO for removable-media targets (PM enabled) */
1960 	sd_destroypkt_for_buf,		/* Index: 5 */
1961 	sd_destroypkt_for_buf,		/* Index: 6 */
1962 	sd_destroypkt_for_buf,		/* Index: 7 */
1963 	sd_destroypkt_for_buf,		/* Index: 8 */
1964 
1965 	/* Chain for buf IO for removable-media targets (PM disabled) */
1966 	sd_destroypkt_for_buf,		/* Index: 9 */
1967 	sd_destroypkt_for_buf,		/* Index: 10 */
1968 	sd_destroypkt_for_buf,		/* Index: 11 */
1969 
1970 	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
1971 	sd_destroypkt_for_buf,		/* Index: 12 */
1972 	sd_destroypkt_for_buf,		/* Index: 13 */
1973 	sd_destroypkt_for_buf,		/* Index: 14 */
1974 	sd_destroypkt_for_buf,		/* Index: 15 */
1975 
1976 	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
1977 	sd_destroypkt_for_buf,		/* Index: 16 */
1978 	sd_destroypkt_for_buf,		/* Index: 17 */
1979 	sd_destroypkt_for_buf,		/* Index: 18 */
1980 
1981 	/* Chain for USCSI commands (non-checksum targets) */
1982 	sd_destroypkt_for_uscsi,	/* Index: 19 */
1983 	sd_destroypkt_for_uscsi,	/* Index: 20 */
1984 
1985 	/* Chain for USCSI commands (checksum targets) */
1986 	sd_destroypkt_for_uscsi,	/* Index: 21 */
1987 	sd_destroypkt_for_uscsi,	/* Index: 22 */
1988 	sd_destroypkt_for_uscsi,	/* Index: 22 */
1989 
1990 	/* Chain for "direct" USCSI commands (all targets) */
1991 	sd_destroypkt_for_uscsi,	/* Index: 24 */
1992 
1993 	/* Chain for "direct priority" USCSI commands (all targets) */
1994 	sd_destroypkt_for_uscsi,	/* Index: 25 */
1995 
1996 };
1997 
1998 
1999 
2000 /*
2001  * Array to map a layering chain index to the appropriate chain "type".
2002  * The chain type indicates a specific property/usage of the chain.
2003  * The redundant entries are present so that the index used for accessing
2004  * the above sd_iostart_chain and sd_iodone_chain tables can be used directly
2005  * with this table as well.
2006  */
2007 
2008 #define	SD_CHAIN_NULL			0	/* for the special RQS cmd */
2009 #define	SD_CHAIN_BUFIO			1	/* regular buf IO */
2010 #define	SD_CHAIN_USCSI			2	/* regular USCSI commands */
2011 #define	SD_CHAIN_DIRECT			3	/* uscsi, w/ bypass power mgt */
2012 #define	SD_CHAIN_DIRECT_PRIORITY	4	/* uscsi, w/ bypass power mgt */
2013 						/* (for error recovery) */
2014 
2015 static int sd_chain_type_map[] = {
2016 
2017 	/* Chain for buf IO for disk drive targets (PM enabled) */
2018 	SD_CHAIN_BUFIO,			/* Index: 0 */
2019 	SD_CHAIN_BUFIO,			/* Index: 1 */
2020 	SD_CHAIN_BUFIO,			/* Index: 2 */
2021 
2022 	/* Chain for buf IO for disk drive targets (PM disabled) */
2023 	SD_CHAIN_BUFIO,			/* Index: 3 */
2024 	SD_CHAIN_BUFIO,			/* Index: 4 */
2025 
2026 	/* Chain for buf IO for removable-media targets (PM enabled) */
2027 	SD_CHAIN_BUFIO,			/* Index: 5 */
2028 	SD_CHAIN_BUFIO,			/* Index: 6 */
2029 	SD_CHAIN_BUFIO,			/* Index: 7 */
2030 	SD_CHAIN_BUFIO,			/* Index: 8 */
2031 
2032 	/* Chain for buf IO for removable-media targets (PM disabled) */
2033 	SD_CHAIN_BUFIO,			/* Index: 9 */
2034 	SD_CHAIN_BUFIO,			/* Index: 10 */
2035 	SD_CHAIN_BUFIO,			/* Index: 11 */
2036 
2037 	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
2038 	SD_CHAIN_BUFIO,			/* Index: 12 */
2039 	SD_CHAIN_BUFIO,			/* Index: 13 */
2040 	SD_CHAIN_BUFIO,			/* Index: 14 */
2041 	SD_CHAIN_BUFIO,			/* Index: 15 */
2042 
2043 	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
2044 	SD_CHAIN_BUFIO,			/* Index: 16 */
2045 	SD_CHAIN_BUFIO,			/* Index: 17 */
2046 	SD_CHAIN_BUFIO,			/* Index: 18 */
2047 
2048 	/* Chain for USCSI commands (non-checksum targets) */
2049 	SD_CHAIN_USCSI,			/* Index: 19 */
2050 	SD_CHAIN_USCSI,			/* Index: 20 */
2051 
2052 	/* Chain for USCSI commands (checksum targets) */
2053 	SD_CHAIN_USCSI,			/* Index: 21 */
2054 	SD_CHAIN_USCSI,			/* Index: 22 */
2055 	SD_CHAIN_USCSI,			/* Index: 22 */
2056 
2057 	/* Chain for "direct" USCSI commands (all targets) */
2058 	SD_CHAIN_DIRECT,		/* Index: 24 */
2059 
2060 	/* Chain for "direct priority" USCSI commands (all targets) */
2061 	SD_CHAIN_DIRECT_PRIORITY,	/* Index: 25 */
2062 };
2063 
2064 
2065 /* Macro to return TRUE if the IO has come from the sd_buf_iostart() chain. */
2066 #define	SD_IS_BUFIO(xp)			\
2067 	(sd_chain_type_map[(xp)->xb_chain_iostart] == SD_CHAIN_BUFIO)
2068 
2069 /* Macro to return TRUE if the IO has come from the "direct priority" chain. */
2070 #define	SD_IS_DIRECT_PRIORITY(xp)	\
2071 	(sd_chain_type_map[(xp)->xb_chain_iostart] == SD_CHAIN_DIRECT_PRIORITY)
2072 
2073 
2074 
2075 /*
2076  * Struct, array, and macros to map a specific chain to the appropriate
2077  * layering indexes in the sd_iostart_chain[] and sd_iodone_chain[] arrays.
2078  *
2079  * The sd_chain_index_map[] array is used at attach time to set the various
2080  * un_xxx_chain type members of the sd_lun softstate to the specific layering
2081  * chain to be used with the instance. This allows different instances to use
2082  * different chain for buf IO, uscsi IO, etc.. Also, since the xb_chain_iostart
2083  * and xb_chain_iodone index values in the sd_xbuf are initialized to these
2084  * values at sd_xbuf init time, this allows (1) layering chains may be changed
2085  * dynamically & without the use of locking; and (2) a layer may update the
2086  * xb_chain_io[start|done] member in a given xbuf with its current index value,
2087  * to allow for deferred processing of an IO within the same chain from a
2088  * different execution context.
2089  */
2090 
2091 struct sd_chain_index {
2092 	int	sci_iostart_index;
2093 	int	sci_iodone_index;
2094 };
2095 
2096 static struct sd_chain_index	sd_chain_index_map[] = {
2097 	{ SD_CHAIN_DISK_IOSTART,		SD_CHAIN_DISK_IODONE },
2098 	{ SD_CHAIN_DISK_IOSTART_NO_PM,		SD_CHAIN_DISK_IODONE_NO_PM },
2099 	{ SD_CHAIN_RMMEDIA_IOSTART,		SD_CHAIN_RMMEDIA_IODONE },
2100 	{ SD_CHAIN_RMMEDIA_IOSTART_NO_PM,	SD_CHAIN_RMMEDIA_IODONE_NO_PM },
2101 	{ SD_CHAIN_CHKSUM_IOSTART,		SD_CHAIN_CHKSUM_IODONE },
2102 	{ SD_CHAIN_CHKSUM_IOSTART_NO_PM,	SD_CHAIN_CHKSUM_IODONE_NO_PM },
2103 	{ SD_CHAIN_USCSI_CMD_IOSTART,		SD_CHAIN_USCSI_CMD_IODONE },
2104 	{ SD_CHAIN_USCSI_CHKSUM_IOSTART,	SD_CHAIN_USCSI_CHKSUM_IODONE },
2105 	{ SD_CHAIN_DIRECT_CMD_IOSTART,		SD_CHAIN_DIRECT_CMD_IODONE },
2106 	{ SD_CHAIN_PRIORITY_CMD_IOSTART,	SD_CHAIN_PRIORITY_CMD_IODONE },
2107 };
2108 
2109 
2110 /*
2111  * The following are indexes into the sd_chain_index_map[] array.
2112  */
2113 
2114 /* un->un_buf_chain_type must be set to one of these */
2115 #define	SD_CHAIN_INFO_DISK		0
2116 #define	SD_CHAIN_INFO_DISK_NO_PM	1
2117 #define	SD_CHAIN_INFO_RMMEDIA		2
2118 #define	SD_CHAIN_INFO_RMMEDIA_NO_PM	3
2119 #define	SD_CHAIN_INFO_CHKSUM		4
2120 #define	SD_CHAIN_INFO_CHKSUM_NO_PM	5
2121 
2122 /* un->un_uscsi_chain_type must be set to one of these */
2123 #define	SD_CHAIN_INFO_USCSI_CMD		6
2124 /* USCSI with PM disabled is the same as DIRECT */
2125 #define	SD_CHAIN_INFO_USCSI_CMD_NO_PM	8
2126 #define	SD_CHAIN_INFO_USCSI_CHKSUM	7
2127 
2128 /* un->un_direct_chain_type must be set to one of these */
2129 #define	SD_CHAIN_INFO_DIRECT_CMD	8
2130 
2131 /* un->un_priority_chain_type must be set to one of these */
2132 #define	SD_CHAIN_INFO_PRIORITY_CMD	9
2133 
2134 /* size for devid inquiries */
2135 #define	MAX_INQUIRY_SIZE		0xF0
2136 
2137 /*
2138  * Macros used by functions to pass a given buf(9S) struct along to the
2139  * next function in the layering chain for further processing.
2140  *
2141  * In the following macros, passing more than three arguments to the called
2142  * routines causes the optimizer for the SPARC compiler to stop doing tail
2143  * call elimination which results in significant performance degradation.
2144  */
2145 #define	SD_BEGIN_IOSTART(index, un, bp)	\
2146 	((*(sd_iostart_chain[index]))(index, un, bp))
2147 
2148 #define	SD_BEGIN_IODONE(index, un, bp)	\
2149 	((*(sd_iodone_chain[index]))(index, un, bp))
2150 
2151 #define	SD_NEXT_IOSTART(index, un, bp)				\
2152 	((*(sd_iostart_chain[(index) + 1]))((index) + 1, un, bp))
2153 
2154 #define	SD_NEXT_IODONE(index, un, bp)				\
2155 	((*(sd_iodone_chain[(index) - 1]))((index) - 1, un, bp))
2156 
2157 /*
2158  *    Function: _init
2159  *
2160  * Description: This is the driver _init(9E) entry point.
2161  *
2162  * Return Code: Returns the value from mod_install(9F) or
2163  *		ddi_soft_state_init(9F) as appropriate.
2164  *
2165  *     Context: Called when driver module loaded.
2166  */
2167 
2168 int
2169 _init(void)
2170 {
2171 	int	err;
2172 
2173 	/* establish driver name from module name */
2174 	sd_label = mod_modname(&modlinkage);
2175 
2176 	err = ddi_soft_state_init(&sd_state, sizeof (struct sd_lun),
2177 		SD_MAXUNIT);
2178 
2179 	if (err != 0) {
2180 		return (err);
2181 	}
2182 
2183 	mutex_init(&sd_detach_mutex, NULL, MUTEX_DRIVER, NULL);
2184 	mutex_init(&sd_log_mutex,    NULL, MUTEX_DRIVER, NULL);
2185 	mutex_init(&sd_label_mutex,  NULL, MUTEX_DRIVER, NULL);
2186 
2187 	mutex_init(&sd_tr.srq_resv_reclaim_mutex, NULL, MUTEX_DRIVER, NULL);
2188 	cv_init(&sd_tr.srq_resv_reclaim_cv, NULL, CV_DRIVER, NULL);
2189 	cv_init(&sd_tr.srq_inprocess_cv, NULL, CV_DRIVER, NULL);
2190 
2191 	/*
2192 	 * it's ok to init here even for fibre device
2193 	 */
2194 	sd_scsi_probe_cache_init();
2195 
2196 	/*
2197 	 * Creating taskq before mod_install ensures that all callers (threads)
2198 	 * that enter the module after a successfull mod_install encounter
2199 	 * a valid taskq.
2200 	 */
2201 	sd_taskq_create();
2202 
2203 	err = mod_install(&modlinkage);
2204 	if (err != 0) {
2205 		/* delete taskq if install fails */
2206 		sd_taskq_delete();
2207 
2208 		mutex_destroy(&sd_detach_mutex);
2209 		mutex_destroy(&sd_log_mutex);
2210 		mutex_destroy(&sd_label_mutex);
2211 
2212 		mutex_destroy(&sd_tr.srq_resv_reclaim_mutex);
2213 		cv_destroy(&sd_tr.srq_resv_reclaim_cv);
2214 		cv_destroy(&sd_tr.srq_inprocess_cv);
2215 
2216 		sd_scsi_probe_cache_fini();
2217 
2218 		ddi_soft_state_fini(&sd_state);
2219 		return (err);
2220 	}
2221 
2222 	return (err);
2223 }
2224 
2225 
2226 /*
2227  *    Function: _fini
2228  *
2229  * Description: This is the driver _fini(9E) entry point.
2230  *
2231  * Return Code: Returns the value from mod_remove(9F)
2232  *
2233  *     Context: Called when driver module is unloaded.
2234  */
2235 
2236 int
2237 _fini(void)
2238 {
2239 	int err;
2240 
2241 	if ((err = mod_remove(&modlinkage)) != 0) {
2242 		return (err);
2243 	}
2244 
2245 	sd_taskq_delete();
2246 
2247 	mutex_destroy(&sd_detach_mutex);
2248 	mutex_destroy(&sd_log_mutex);
2249 	mutex_destroy(&sd_label_mutex);
2250 	mutex_destroy(&sd_tr.srq_resv_reclaim_mutex);
2251 
2252 	sd_scsi_probe_cache_fini();
2253 
2254 	cv_destroy(&sd_tr.srq_resv_reclaim_cv);
2255 	cv_destroy(&sd_tr.srq_inprocess_cv);
2256 
2257 	ddi_soft_state_fini(&sd_state);
2258 
2259 	return (err);
2260 }
2261 
2262 
2263 /*
2264  *    Function: _info
2265  *
2266  * Description: This is the driver _info(9E) entry point.
2267  *
2268  *   Arguments: modinfop - pointer to the driver modinfo structure
2269  *
2270  * Return Code: Returns the value from mod_info(9F).
2271  *
2272  *     Context: Kernel thread context
2273  */
2274 
2275 int
2276 _info(struct modinfo *modinfop)
2277 {
2278 	return (mod_info(&modlinkage, modinfop));
2279 }
2280 
2281 
2282 /*
2283  * The following routines implement the driver message logging facility.
2284  * They provide component- and level- based debug output filtering.
2285  * Output may also be restricted to messages for a single instance by
2286  * specifying a soft state pointer in sd_debug_un. If sd_debug_un is set
2287  * to NULL, then messages for all instances are printed.
2288  *
2289  * These routines have been cloned from each other due to the language
2290  * constraints of macros and variable argument list processing.
2291  */
2292 
2293 
2294 /*
2295  *    Function: sd_log_err
2296  *
2297  * Description: This routine is called by the SD_ERROR macro for debug
2298  *		logging of error conditions.
2299  *
2300  *   Arguments: comp - driver component being logged
2301  *		dev  - pointer to driver info structure
2302  *		fmt  - error string and format to be logged
2303  */
2304 
2305 static void
2306 sd_log_err(uint_t comp, struct sd_lun *un, const char *fmt, ...)
2307 {
2308 	va_list		ap;
2309 	dev_info_t	*dev;
2310 
2311 	ASSERT(un != NULL);
2312 	dev = SD_DEVINFO(un);
2313 	ASSERT(dev != NULL);
2314 
2315 	/*
2316 	 * Filter messages based on the global component and level masks.
2317 	 * Also print if un matches the value of sd_debug_un, or if
2318 	 * sd_debug_un is set to NULL.
2319 	 */
2320 	if ((sd_component_mask & comp) && (sd_level_mask & SD_LOGMASK_ERROR) &&
2321 	    ((sd_debug_un == NULL) || (sd_debug_un == un))) {
2322 		mutex_enter(&sd_log_mutex);
2323 		va_start(ap, fmt);
2324 		(void) vsprintf(sd_log_buf, fmt, ap);
2325 		va_end(ap);
2326 		scsi_log(dev, sd_label, CE_CONT, "%s", sd_log_buf);
2327 		mutex_exit(&sd_log_mutex);
2328 	}
2329 #ifdef SD_FAULT_INJECTION
2330 	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::sd_injection_mask));
2331 	if (un->sd_injection_mask & comp) {
2332 		mutex_enter(&sd_log_mutex);
2333 		va_start(ap, fmt);
2334 		(void) vsprintf(sd_log_buf, fmt, ap);
2335 		va_end(ap);
2336 		sd_injection_log(sd_log_buf, un);
2337 		mutex_exit(&sd_log_mutex);
2338 	}
2339 #endif
2340 }
2341 
2342 
2343 /*
2344  *    Function: sd_log_info
2345  *
2346  * Description: This routine is called by the SD_INFO macro for debug
2347  *		logging of general purpose informational conditions.
2348  *
2349  *   Arguments: comp - driver component being logged
2350  *		dev  - pointer to driver info structure
2351  *		fmt  - info string and format to be logged
2352  */
2353 
2354 static void
2355 sd_log_info(uint_t component, struct sd_lun *un, const char *fmt, ...)
2356 {
2357 	va_list		ap;
2358 	dev_info_t	*dev;
2359 
2360 	ASSERT(un != NULL);
2361 	dev = SD_DEVINFO(un);
2362 	ASSERT(dev != NULL);
2363 
2364 	/*
2365 	 * Filter messages based on the global component and level masks.
2366 	 * Also print if un matches the value of sd_debug_un, or if
2367 	 * sd_debug_un is set to NULL.
2368 	 */
2369 	if ((sd_component_mask & component) &&
2370 	    (sd_level_mask & SD_LOGMASK_INFO) &&
2371 	    ((sd_debug_un == NULL) || (sd_debug_un == un))) {
2372 		mutex_enter(&sd_log_mutex);
2373 		va_start(ap, fmt);
2374 		(void) vsprintf(sd_log_buf, fmt, ap);
2375 		va_end(ap);
2376 		scsi_log(dev, sd_label, CE_CONT, "%s", sd_log_buf);
2377 		mutex_exit(&sd_log_mutex);
2378 	}
2379 #ifdef SD_FAULT_INJECTION
2380 	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::sd_injection_mask));
2381 	if (un->sd_injection_mask & component) {
2382 		mutex_enter(&sd_log_mutex);
2383 		va_start(ap, fmt);
2384 		(void) vsprintf(sd_log_buf, fmt, ap);
2385 		va_end(ap);
2386 		sd_injection_log(sd_log_buf, un);
2387 		mutex_exit(&sd_log_mutex);
2388 	}
2389 #endif
2390 }
2391 
2392 
2393 /*
2394  *    Function: sd_log_trace
2395  *
2396  * Description: This routine is called by the SD_TRACE macro for debug
2397  *		logging of trace conditions (i.e. function entry/exit).
2398  *
2399  *   Arguments: comp - driver component being logged
2400  *		dev  - pointer to driver info structure
2401  *		fmt  - trace string and format to be logged
2402  */
2403 
2404 static void
2405 sd_log_trace(uint_t component, struct sd_lun *un, const char *fmt, ...)
2406 {
2407 	va_list		ap;
2408 	dev_info_t	*dev;
2409 
2410 	ASSERT(un != NULL);
2411 	dev = SD_DEVINFO(un);
2412 	ASSERT(dev != NULL);
2413 
2414 	/*
2415 	 * Filter messages based on the global component and level masks.
2416 	 * Also print if un matches the value of sd_debug_un, or if
2417 	 * sd_debug_un is set to NULL.
2418 	 */
2419 	if ((sd_component_mask & component) &&
2420 	    (sd_level_mask & SD_LOGMASK_TRACE) &&
2421 	    ((sd_debug_un == NULL) || (sd_debug_un == un))) {
2422 		mutex_enter(&sd_log_mutex);
2423 		va_start(ap, fmt);
2424 		(void) vsprintf(sd_log_buf, fmt, ap);
2425 		va_end(ap);
2426 		scsi_log(dev, sd_label, CE_CONT, "%s", sd_log_buf);
2427 		mutex_exit(&sd_log_mutex);
2428 	}
2429 #ifdef SD_FAULT_INJECTION
2430 	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::sd_injection_mask));
2431 	if (un->sd_injection_mask & component) {
2432 		mutex_enter(&sd_log_mutex);
2433 		va_start(ap, fmt);
2434 		(void) vsprintf(sd_log_buf, fmt, ap);
2435 		va_end(ap);
2436 		sd_injection_log(sd_log_buf, un);
2437 		mutex_exit(&sd_log_mutex);
2438 	}
2439 #endif
2440 }
2441 
2442 
2443 /*
2444  *    Function: sdprobe
2445  *
2446  * Description: This is the driver probe(9e) entry point function.
2447  *
2448  *   Arguments: devi - opaque device info handle
2449  *
2450  * Return Code: DDI_PROBE_SUCCESS: If the probe was successful.
2451  *              DDI_PROBE_FAILURE: If the probe failed.
2452  *              DDI_PROBE_PARTIAL: If the instance is not present now,
2453  *				   but may be present in the future.
2454  */
2455 
2456 static int
2457 sdprobe(dev_info_t *devi)
2458 {
2459 	struct scsi_device	*devp;
2460 	int			rval;
2461 	int			instance;
2462 
2463 	/*
2464 	 * if it wasn't for pln, sdprobe could actually be nulldev
2465 	 * in the "__fibre" case.
2466 	 */
2467 	if (ddi_dev_is_sid(devi) == DDI_SUCCESS) {
2468 		return (DDI_PROBE_DONTCARE);
2469 	}
2470 
2471 	devp = ddi_get_driver_private(devi);
2472 
2473 	if (devp == NULL) {
2474 		/* Ooops... nexus driver is mis-configured... */
2475 		return (DDI_PROBE_FAILURE);
2476 	}
2477 
2478 	instance = ddi_get_instance(devi);
2479 
2480 	if (ddi_get_soft_state(sd_state, instance) != NULL) {
2481 		return (DDI_PROBE_PARTIAL);
2482 	}
2483 
2484 	/*
2485 	 * Call the SCSA utility probe routine to see if we actually
2486 	 * have a target at this SCSI nexus.
2487 	 */
2488 	switch (sd_scsi_probe_with_cache(devp, NULL_FUNC)) {
2489 	case SCSIPROBE_EXISTS:
2490 		switch (devp->sd_inq->inq_dtype) {
2491 		case DTYPE_DIRECT:
2492 			rval = DDI_PROBE_SUCCESS;
2493 			break;
2494 		case DTYPE_RODIRECT:
2495 			/* CDs etc. Can be removable media */
2496 			rval = DDI_PROBE_SUCCESS;
2497 			break;
2498 		case DTYPE_OPTICAL:
2499 			/*
2500 			 * Rewritable optical driver HP115AA
2501 			 * Can also be removable media
2502 			 */
2503 
2504 			/*
2505 			 * Do not attempt to bind to  DTYPE_OPTICAL if
2506 			 * pre solaris 9 sparc sd behavior is required
2507 			 *
2508 			 * If first time through and sd_dtype_optical_bind
2509 			 * has not been set in /etc/system check properties
2510 			 */
2511 
2512 			if (sd_dtype_optical_bind  < 0) {
2513 			    sd_dtype_optical_bind = ddi_prop_get_int
2514 				(DDI_DEV_T_ANY,	devi,	0,
2515 				"optical-device-bind",	1);
2516 			}
2517 
2518 			if (sd_dtype_optical_bind == 0) {
2519 				rval = DDI_PROBE_FAILURE;
2520 			} else {
2521 				rval = DDI_PROBE_SUCCESS;
2522 			}
2523 			break;
2524 
2525 		case DTYPE_NOTPRESENT:
2526 		default:
2527 			rval = DDI_PROBE_FAILURE;
2528 			break;
2529 		}
2530 		break;
2531 	default:
2532 		rval = DDI_PROBE_PARTIAL;
2533 		break;
2534 	}
2535 
2536 	/*
2537 	 * This routine checks for resource allocation prior to freeing,
2538 	 * so it will take care of the "smart probing" case where a
2539 	 * scsi_probe() may or may not have been issued and will *not*
2540 	 * free previously-freed resources.
2541 	 */
2542 	scsi_unprobe(devp);
2543 	return (rval);
2544 }
2545 
2546 
2547 /*
2548  *    Function: sdinfo
2549  *
2550  * Description: This is the driver getinfo(9e) entry point function.
2551  * 		Given the device number, return the devinfo pointer from
2552  *		the scsi_device structure or the instance number
2553  *		associated with the dev_t.
2554  *
2555  *   Arguments: dip     - pointer to device info structure
2556  *		infocmd - command argument (DDI_INFO_DEVT2DEVINFO,
2557  *			  DDI_INFO_DEVT2INSTANCE)
2558  *		arg     - driver dev_t
2559  *		resultp - user buffer for request response
2560  *
2561  * Return Code: DDI_SUCCESS
2562  *              DDI_FAILURE
2563  */
2564 /* ARGSUSED */
2565 static int
2566 sdinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
2567 {
2568 	struct sd_lun	*un;
2569 	dev_t		dev;
2570 	int		instance;
2571 	int		error;
2572 
2573 	switch (infocmd) {
2574 	case DDI_INFO_DEVT2DEVINFO:
2575 		dev = (dev_t)arg;
2576 		instance = SDUNIT(dev);
2577 		if ((un = ddi_get_soft_state(sd_state, instance)) == NULL) {
2578 			return (DDI_FAILURE);
2579 		}
2580 		*result = (void *) SD_DEVINFO(un);
2581 		error = DDI_SUCCESS;
2582 		break;
2583 	case DDI_INFO_DEVT2INSTANCE:
2584 		dev = (dev_t)arg;
2585 		instance = SDUNIT(dev);
2586 		*result = (void *)(uintptr_t)instance;
2587 		error = DDI_SUCCESS;
2588 		break;
2589 	default:
2590 		error = DDI_FAILURE;
2591 	}
2592 	return (error);
2593 }
2594 
2595 /*
2596  *    Function: sd_prop_op
2597  *
2598  * Description: This is the driver prop_op(9e) entry point function.
2599  *		Return the number of blocks for the partition in question
2600  *		or forward the request to the property facilities.
2601  *
2602  *   Arguments: dev       - device number
2603  *		dip       - pointer to device info structure
2604  *		prop_op   - property operator
2605  *		mod_flags - DDI_PROP_DONTPASS, don't pass to parent
2606  *		name      - pointer to property name
2607  *		valuep    - pointer or address of the user buffer
2608  *		lengthp   - property length
2609  *
2610  * Return Code: DDI_PROP_SUCCESS
2611  *              DDI_PROP_NOT_FOUND
2612  *              DDI_PROP_UNDEFINED
2613  *              DDI_PROP_NO_MEMORY
2614  *              DDI_PROP_BUF_TOO_SMALL
2615  */
2616 
2617 static int
2618 sd_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, int mod_flags,
2619 	char *name, caddr_t valuep, int *lengthp)
2620 {
2621 	int		instance = ddi_get_instance(dip);
2622 	struct sd_lun	*un;
2623 	uint64_t	nblocks64;
2624 
2625 	/*
2626 	 * Our dynamic properties are all device specific and size oriented.
2627 	 * Requests issued under conditions where size is valid are passed
2628 	 * to ddi_prop_op_nblocks with the size information, otherwise the
2629 	 * request is passed to ddi_prop_op. Size depends on valid geometry.
2630 	 */
2631 	un = ddi_get_soft_state(sd_state, instance);
2632 	if ((dev == DDI_DEV_T_ANY) || (un == NULL) ||
2633 	    (un->un_f_geometry_is_valid == FALSE)) {
2634 		return (ddi_prop_op(dev, dip, prop_op, mod_flags,
2635 		    name, valuep, lengthp));
2636 	} else {
2637 		/* get nblocks value */
2638 		ASSERT(!mutex_owned(SD_MUTEX(un)));
2639 		mutex_enter(SD_MUTEX(un));
2640 		nblocks64 = (ulong_t)un->un_map[SDPART(dev)].dkl_nblk;
2641 		mutex_exit(SD_MUTEX(un));
2642 
2643 		return (ddi_prop_op_nblocks(dev, dip, prop_op, mod_flags,
2644 		    name, valuep, lengthp, nblocks64));
2645 	}
2646 }
2647 
2648 /*
2649  * The following functions are for smart probing:
2650  * sd_scsi_probe_cache_init()
2651  * sd_scsi_probe_cache_fini()
2652  * sd_scsi_clear_probe_cache()
2653  * sd_scsi_probe_with_cache()
2654  */
2655 
2656 /*
2657  *    Function: sd_scsi_probe_cache_init
2658  *
2659  * Description: Initializes the probe response cache mutex and head pointer.
2660  *
2661  *     Context: Kernel thread context
2662  */
2663 
2664 static void
2665 sd_scsi_probe_cache_init(void)
2666 {
2667 	mutex_init(&sd_scsi_probe_cache_mutex, NULL, MUTEX_DRIVER, NULL);
2668 	sd_scsi_probe_cache_head = NULL;
2669 }
2670 
2671 
2672 /*
2673  *    Function: sd_scsi_probe_cache_fini
2674  *
2675  * Description: Frees all resources associated with the probe response cache.
2676  *
2677  *     Context: Kernel thread context
2678  */
2679 
2680 static void
2681 sd_scsi_probe_cache_fini(void)
2682 {
2683 	struct sd_scsi_probe_cache *cp;
2684 	struct sd_scsi_probe_cache *ncp;
2685 
2686 	/* Clean up our smart probing linked list */
2687 	for (cp = sd_scsi_probe_cache_head; cp != NULL; cp = ncp) {
2688 		ncp = cp->next;
2689 		kmem_free(cp, sizeof (struct sd_scsi_probe_cache));
2690 	}
2691 	sd_scsi_probe_cache_head = NULL;
2692 	mutex_destroy(&sd_scsi_probe_cache_mutex);
2693 }
2694 
2695 
2696 /*
2697  *    Function: sd_scsi_clear_probe_cache
2698  *
2699  * Description: This routine clears the probe response cache. This is
2700  *		done when open() returns ENXIO so that when deferred
2701  *		attach is attempted (possibly after a device has been
2702  *		turned on) we will retry the probe. Since we don't know
2703  *		which target we failed to open, we just clear the
2704  *		entire cache.
2705  *
2706  *     Context: Kernel thread context
2707  */
2708 
2709 static void
2710 sd_scsi_clear_probe_cache(void)
2711 {
2712 	struct sd_scsi_probe_cache	*cp;
2713 	int				i;
2714 
2715 	mutex_enter(&sd_scsi_probe_cache_mutex);
2716 	for (cp = sd_scsi_probe_cache_head; cp != NULL; cp = cp->next) {
2717 		/*
2718 		 * Reset all entries to SCSIPROBE_EXISTS.  This will
2719 		 * force probing to be performed the next time
2720 		 * sd_scsi_probe_with_cache is called.
2721 		 */
2722 		for (i = 0; i < NTARGETS_WIDE; i++) {
2723 			cp->cache[i] = SCSIPROBE_EXISTS;
2724 		}
2725 	}
2726 	mutex_exit(&sd_scsi_probe_cache_mutex);
2727 }
2728 
2729 
2730 /*
2731  *    Function: sd_scsi_probe_with_cache
2732  *
2733  * Description: This routine implements support for a scsi device probe
2734  *		with cache. The driver maintains a cache of the target
2735  *		responses to scsi probes. If we get no response from a
2736  *		target during a probe inquiry, we remember that, and we
2737  *		avoid additional calls to scsi_probe on non-zero LUNs
2738  *		on the same target until the cache is cleared. By doing
2739  *		so we avoid the 1/4 sec selection timeout for nonzero
2740  *		LUNs. lun0 of a target is always probed.
2741  *
2742  *   Arguments: devp     - Pointer to a scsi_device(9S) structure
2743  *              waitfunc - indicates what the allocator routines should
2744  *			   do when resources are not available. This value
2745  *			   is passed on to scsi_probe() when that routine
2746  *			   is called.
2747  *
2748  * Return Code: SCSIPROBE_NORESP if a NORESP in probe response cache;
2749  *		otherwise the value returned by scsi_probe(9F).
2750  *
2751  *     Context: Kernel thread context
2752  */
2753 
2754 static int
2755 sd_scsi_probe_with_cache(struct scsi_device *devp, int (*waitfn)())
2756 {
2757 	struct sd_scsi_probe_cache	*cp;
2758 	dev_info_t	*pdip = ddi_get_parent(devp->sd_dev);
2759 	int		lun, tgt;
2760 
2761 	lun = ddi_prop_get_int(DDI_DEV_T_ANY, devp->sd_dev, DDI_PROP_DONTPASS,
2762 	    SCSI_ADDR_PROP_LUN, 0);
2763 	tgt = ddi_prop_get_int(DDI_DEV_T_ANY, devp->sd_dev, DDI_PROP_DONTPASS,
2764 	    SCSI_ADDR_PROP_TARGET, -1);
2765 
2766 	/* Make sure caching enabled and target in range */
2767 	if ((tgt < 0) || (tgt >= NTARGETS_WIDE)) {
2768 		/* do it the old way (no cache) */
2769 		return (scsi_probe(devp, waitfn));
2770 	}
2771 
2772 	mutex_enter(&sd_scsi_probe_cache_mutex);
2773 
2774 	/* Find the cache for this scsi bus instance */
2775 	for (cp = sd_scsi_probe_cache_head; cp != NULL; cp = cp->next) {
2776 		if (cp->pdip == pdip) {
2777 			break;
2778 		}
2779 	}
2780 
2781 	/* If we can't find a cache for this pdip, create one */
2782 	if (cp == NULL) {
2783 		int i;
2784 
2785 		cp = kmem_zalloc(sizeof (struct sd_scsi_probe_cache),
2786 		    KM_SLEEP);
2787 		cp->pdip = pdip;
2788 		cp->next = sd_scsi_probe_cache_head;
2789 		sd_scsi_probe_cache_head = cp;
2790 		for (i = 0; i < NTARGETS_WIDE; i++) {
2791 			cp->cache[i] = SCSIPROBE_EXISTS;
2792 		}
2793 	}
2794 
2795 	mutex_exit(&sd_scsi_probe_cache_mutex);
2796 
2797 	/* Recompute the cache for this target if LUN zero */
2798 	if (lun == 0) {
2799 		cp->cache[tgt] = SCSIPROBE_EXISTS;
2800 	}
2801 
2802 	/* Don't probe if cache remembers a NORESP from a previous LUN. */
2803 	if (cp->cache[tgt] != SCSIPROBE_EXISTS) {
2804 		return (SCSIPROBE_NORESP);
2805 	}
2806 
2807 	/* Do the actual probe; save & return the result */
2808 	return (cp->cache[tgt] = scsi_probe(devp, waitfn));
2809 }
2810 
2811 
2812 /*
2813  *    Function: sd_spin_up_unit
2814  *
2815  * Description: Issues the following commands to spin-up the device:
2816  *		START STOP UNIT, and INQUIRY.
2817  *
2818  *   Arguments: un - driver soft state (unit) structure
2819  *
2820  * Return Code: 0 - success
2821  *		EIO - failure
2822  *		EACCES - reservation conflict
2823  *
2824  *     Context: Kernel thread context
2825  */
2826 
2827 static int
2828 sd_spin_up_unit(struct sd_lun *un)
2829 {
2830 	size_t	resid		= 0;
2831 	int	has_conflict	= FALSE;
2832 	uchar_t *bufaddr;
2833 
2834 	ASSERT(un != NULL);
2835 
2836 	/*
2837 	 * Send a throwaway START UNIT command.
2838 	 *
2839 	 * If we fail on this, we don't care presently what precisely
2840 	 * is wrong.  EMC's arrays will also fail this with a check
2841 	 * condition (0x2/0x4/0x3) if the device is "inactive," but
2842 	 * we don't want to fail the attach because it may become
2843 	 * "active" later.
2844 	 */
2845 	if (sd_send_scsi_START_STOP_UNIT(un, SD_TARGET_START, SD_PATH_DIRECT)
2846 	    == EACCES)
2847 		has_conflict = TRUE;
2848 
2849 	/*
2850 	 * Send another INQUIRY command to the target. This is necessary for
2851 	 * non-removable media direct access devices because their INQUIRY data
2852 	 * may not be fully qualified until they are spun up (perhaps via the
2853 	 * START command above).  Note: This seems to be needed for some
2854 	 * legacy devices only.) The INQUIRY command should succeed even if a
2855 	 * Reservation Conflict is present.
2856 	 */
2857 	bufaddr = kmem_zalloc(SUN_INQSIZE, KM_SLEEP);
2858 	if (sd_send_scsi_INQUIRY(un, bufaddr, SUN_INQSIZE, 0, 0, &resid) != 0) {
2859 		kmem_free(bufaddr, SUN_INQSIZE);
2860 		return (EIO);
2861 	}
2862 
2863 	/*
2864 	 * If we got enough INQUIRY data, copy it over the old INQUIRY data.
2865 	 * Note that this routine does not return a failure here even if the
2866 	 * INQUIRY command did not return any data.  This is a legacy behavior.
2867 	 */
2868 	if ((SUN_INQSIZE - resid) >= SUN_MIN_INQLEN) {
2869 		bcopy(bufaddr, SD_INQUIRY(un), SUN_INQSIZE);
2870 	}
2871 
2872 	kmem_free(bufaddr, SUN_INQSIZE);
2873 
2874 	/* If we hit a reservation conflict above, tell the caller. */
2875 	if (has_conflict == TRUE) {
2876 		return (EACCES);
2877 	}
2878 
2879 	return (0);
2880 }
2881 
2882 #ifdef _LP64
2883 /*
2884  *    Function: sd_enable_descr_sense
2885  *
2886  * Description: This routine attempts to select descriptor sense format
2887  *		using the Control mode page.  Devices that support 64 bit
2888  *		LBAs (for >2TB luns) should also implement descriptor
2889  *		sense data so we will call this function whenever we see
2890  *		a lun larger than 2TB.  If for some reason the device
2891  *		supports 64 bit LBAs but doesn't support descriptor sense
2892  *		presumably the mode select will fail.  Everything will
2893  *		continue to work normally except that we will not get
2894  *		complete sense data for commands that fail with an LBA
2895  *		larger than 32 bits.
2896  *
2897  *   Arguments: un - driver soft state (unit) structure
2898  *
2899  *     Context: Kernel thread context only
2900  */
2901 
2902 static void
2903 sd_enable_descr_sense(struct sd_lun *un)
2904 {
2905 	uchar_t			*header;
2906 	struct mode_control_scsi3 *ctrl_bufp;
2907 	size_t			buflen;
2908 	size_t			bd_len;
2909 
2910 	/*
2911 	 * Read MODE SENSE page 0xA, Control Mode Page
2912 	 */
2913 	buflen = MODE_HEADER_LENGTH + MODE_BLK_DESC_LENGTH +
2914 	    sizeof (struct mode_control_scsi3);
2915 	header = kmem_zalloc(buflen, KM_SLEEP);
2916 	if (sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, header, buflen,
2917 	    MODEPAGE_CTRL_MODE, SD_PATH_DIRECT) != 0) {
2918 		SD_ERROR(SD_LOG_COMMON, un,
2919 		    "sd_enable_descr_sense: mode sense ctrl page failed\n");
2920 		goto eds_exit;
2921 	}
2922 
2923 	/*
2924 	 * Determine size of Block Descriptors in order to locate
2925 	 * the mode page data. ATAPI devices return 0, SCSI devices
2926 	 * should return MODE_BLK_DESC_LENGTH.
2927 	 */
2928 	bd_len  = ((struct mode_header *)header)->bdesc_length;
2929 
2930 	ctrl_bufp = (struct mode_control_scsi3 *)
2931 	    (header + MODE_HEADER_LENGTH + bd_len);
2932 
2933 	/*
2934 	 * Clear PS bit for MODE SELECT
2935 	 */
2936 	ctrl_bufp->mode_page.ps = 0;
2937 
2938 	/*
2939 	 * Set D_SENSE to enable descriptor sense format.
2940 	 */
2941 	ctrl_bufp->d_sense = 1;
2942 
2943 	/*
2944 	 * Use MODE SELECT to commit the change to the D_SENSE bit
2945 	 */
2946 	if (sd_send_scsi_MODE_SELECT(un, CDB_GROUP0, header,
2947 	    buflen, SD_DONTSAVE_PAGE, SD_PATH_DIRECT) != 0) {
2948 		SD_INFO(SD_LOG_COMMON, un,
2949 		    "sd_enable_descr_sense: mode select ctrl page failed\n");
2950 		goto eds_exit;
2951 	}
2952 
2953 eds_exit:
2954 	kmem_free(header, buflen);
2955 }
2956 #endif /* _LP64 */
2957 
2958 
2959 /*
2960  *    Function: sd_set_mmc_caps
2961  *
2962  * Description: This routine determines if the device is MMC compliant and if
2963  *		the device supports CDDA via a mode sense of the CDVD
2964  *		capabilities mode page. Also checks if the device is a
2965  *		dvdram writable device.
2966  *
2967  *   Arguments: un - driver soft state (unit) structure
2968  *
2969  *     Context: Kernel thread context only
2970  */
2971 
2972 static void
2973 sd_set_mmc_caps(struct sd_lun *un)
2974 {
2975 	struct mode_header_grp2		*sense_mhp;
2976 	uchar_t				*sense_page;
2977 	caddr_t				buf;
2978 	int				bd_len;
2979 	int				status;
2980 	struct uscsi_cmd		com;
2981 	int				rtn;
2982 	uchar_t				*out_data_rw, *out_data_hd;
2983 	uchar_t				*rqbuf_rw, *rqbuf_hd;
2984 
2985 	ASSERT(un != NULL);
2986 
2987 	/*
2988 	 * The flags which will be set in this function are - mmc compliant,
2989 	 * dvdram writable device, cdda support. Initialize them to FALSE
2990 	 * and if a capability is detected - it will be set to TRUE.
2991 	 */
2992 	un->un_f_mmc_cap = FALSE;
2993 	un->un_f_dvdram_writable_device = FALSE;
2994 	un->un_f_cfg_cdda = FALSE;
2995 
2996 	buf = kmem_zalloc(BUFLEN_MODE_CDROM_CAP, KM_SLEEP);
2997 	status = sd_send_scsi_MODE_SENSE(un, CDB_GROUP1, (uchar_t *)buf,
2998 	    BUFLEN_MODE_CDROM_CAP, MODEPAGE_CDROM_CAP, SD_PATH_DIRECT);
2999 
3000 	if (status != 0) {
3001 		/* command failed; just return */
3002 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3003 		return;
3004 	}
3005 	/*
3006 	 * If the mode sense request for the CDROM CAPABILITIES
3007 	 * page (0x2A) succeeds the device is assumed to be MMC.
3008 	 */
3009 	un->un_f_mmc_cap = TRUE;
3010 
3011 	/* Get to the page data */
3012 	sense_mhp = (struct mode_header_grp2 *)buf;
3013 	bd_len = (sense_mhp->bdesc_length_hi << 8) |
3014 	    sense_mhp->bdesc_length_lo;
3015 	if (bd_len > MODE_BLK_DESC_LENGTH) {
3016 		/*
3017 		 * We did not get back the expected block descriptor
3018 		 * length so we cannot determine if the device supports
3019 		 * CDDA. However, we still indicate the device is MMC
3020 		 * according to the successful response to the page
3021 		 * 0x2A mode sense request.
3022 		 */
3023 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3024 		    "sd_set_mmc_caps: Mode Sense returned "
3025 		    "invalid block descriptor length\n");
3026 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3027 		return;
3028 	}
3029 
3030 	/* See if read CDDA is supported */
3031 	sense_page = (uchar_t *)(buf + MODE_HEADER_LENGTH_GRP2 +
3032 	    bd_len);
3033 	un->un_f_cfg_cdda = (sense_page[5] & 0x01) ? TRUE : FALSE;
3034 
3035 	/* See if writing DVD RAM is supported. */
3036 	un->un_f_dvdram_writable_device = (sense_page[3] & 0x20) ? TRUE : FALSE;
3037 	if (un->un_f_dvdram_writable_device == TRUE) {
3038 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3039 		return;
3040 	}
3041 
3042 	/*
3043 	 * If the device presents DVD or CD capabilities in the mode
3044 	 * page, we can return here since a RRD will not have
3045 	 * these capabilities.
3046 	 */
3047 	if ((sense_page[2] & 0x3f) || (sense_page[3] & 0x3f)) {
3048 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3049 		return;
3050 	}
3051 	kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3052 
3053 	/*
3054 	 * If un->un_f_dvdram_writable_device is still FALSE,
3055 	 * check for a Removable Rigid Disk (RRD).  A RRD
3056 	 * device is identified by the features RANDOM_WRITABLE and
3057 	 * HARDWARE_DEFECT_MANAGEMENT.
3058 	 */
3059 	out_data_rw = kmem_zalloc(SD_CURRENT_FEATURE_LEN, KM_SLEEP);
3060 	rqbuf_rw = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3061 
3062 	rtn = sd_send_scsi_feature_GET_CONFIGURATION(un, &com, rqbuf_rw,
3063 	    SENSE_LENGTH, out_data_rw, SD_CURRENT_FEATURE_LEN,
3064 	    RANDOM_WRITABLE);
3065 	if (rtn != 0) {
3066 		kmem_free(out_data_rw, SD_CURRENT_FEATURE_LEN);
3067 		kmem_free(rqbuf_rw, SENSE_LENGTH);
3068 		return;
3069 	}
3070 
3071 	out_data_hd = kmem_zalloc(SD_CURRENT_FEATURE_LEN, KM_SLEEP);
3072 	rqbuf_hd = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3073 
3074 	rtn = sd_send_scsi_feature_GET_CONFIGURATION(un, &com, rqbuf_hd,
3075 	    SENSE_LENGTH, out_data_hd, SD_CURRENT_FEATURE_LEN,
3076 	    HARDWARE_DEFECT_MANAGEMENT);
3077 	if (rtn == 0) {
3078 		/*
3079 		 * We have good information, check for random writable
3080 		 * and hardware defect features.
3081 		 */
3082 		if ((out_data_rw[9] & RANDOM_WRITABLE) &&
3083 		    (out_data_hd[9] & HARDWARE_DEFECT_MANAGEMENT)) {
3084 			un->un_f_dvdram_writable_device = TRUE;
3085 		}
3086 	}
3087 
3088 	kmem_free(out_data_rw, SD_CURRENT_FEATURE_LEN);
3089 	kmem_free(rqbuf_rw, SENSE_LENGTH);
3090 	kmem_free(out_data_hd, SD_CURRENT_FEATURE_LEN);
3091 	kmem_free(rqbuf_hd, SENSE_LENGTH);
3092 }
3093 
3094 /*
3095  *    Function: sd_check_for_writable_cd
3096  *
3097  * Description: This routine determines if the media in the device is
3098  *		writable or not. It uses the get configuration command (0x46)
3099  *		to determine if the media is writable
3100  *
3101  *   Arguments: un - driver soft state (unit) structure
3102  *
3103  *     Context: Never called at interrupt context.
3104  */
3105 
3106 static void
3107 sd_check_for_writable_cd(struct sd_lun *un)
3108 {
3109 	struct uscsi_cmd		com;
3110 	uchar_t				*out_data;
3111 	uchar_t				*rqbuf;
3112 	int				rtn;
3113 	uchar_t				*out_data_rw, *out_data_hd;
3114 	uchar_t				*rqbuf_rw, *rqbuf_hd;
3115 	struct mode_header_grp2		*sense_mhp;
3116 	uchar_t				*sense_page;
3117 	caddr_t				buf;
3118 	int				bd_len;
3119 	int				status;
3120 
3121 	ASSERT(un != NULL);
3122 	ASSERT(mutex_owned(SD_MUTEX(un)));
3123 
3124 	/*
3125 	 * Initialize the writable media to false, if configuration info.
3126 	 * tells us otherwise then only we will set it.
3127 	 */
3128 	un->un_f_mmc_writable_media = FALSE;
3129 	mutex_exit(SD_MUTEX(un));
3130 
3131 	out_data = kmem_zalloc(SD_PROFILE_HEADER_LEN, KM_SLEEP);
3132 	rqbuf = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3133 
3134 	rtn = sd_send_scsi_GET_CONFIGURATION(un, &com, rqbuf, SENSE_LENGTH,
3135 	    out_data, SD_PROFILE_HEADER_LEN);
3136 
3137 	mutex_enter(SD_MUTEX(un));
3138 	if (rtn == 0) {
3139 		/*
3140 		 * We have good information, check for writable DVD.
3141 		 */
3142 		if ((out_data[6] == 0) && (out_data[7] == 0x12)) {
3143 			un->un_f_mmc_writable_media = TRUE;
3144 			kmem_free(out_data, SD_PROFILE_HEADER_LEN);
3145 			kmem_free(rqbuf, SENSE_LENGTH);
3146 			return;
3147 		}
3148 	}
3149 
3150 	kmem_free(out_data, SD_PROFILE_HEADER_LEN);
3151 	kmem_free(rqbuf, SENSE_LENGTH);
3152 
3153 	/*
3154 	 * Determine if this is a RRD type device.
3155 	 */
3156 	mutex_exit(SD_MUTEX(un));
3157 	buf = kmem_zalloc(BUFLEN_MODE_CDROM_CAP, KM_SLEEP);
3158 	status = sd_send_scsi_MODE_SENSE(un, CDB_GROUP1, (uchar_t *)buf,
3159 	    BUFLEN_MODE_CDROM_CAP, MODEPAGE_CDROM_CAP, SD_PATH_DIRECT);
3160 	mutex_enter(SD_MUTEX(un));
3161 	if (status != 0) {
3162 		/* command failed; just return */
3163 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3164 		return;
3165 	}
3166 
3167 	/* Get to the page data */
3168 	sense_mhp = (struct mode_header_grp2 *)buf;
3169 	bd_len = (sense_mhp->bdesc_length_hi << 8) | sense_mhp->bdesc_length_lo;
3170 	if (bd_len > MODE_BLK_DESC_LENGTH) {
3171 		/*
3172 		 * We did not get back the expected block descriptor length so
3173 		 * we cannot check the mode page.
3174 		 */
3175 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3176 		    "sd_check_for_writable_cd: Mode Sense returned "
3177 		    "invalid block descriptor length\n");
3178 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3179 		return;
3180 	}
3181 
3182 	/*
3183 	 * If the device presents DVD or CD capabilities in the mode
3184 	 * page, we can return here since a RRD device will not have
3185 	 * these capabilities.
3186 	 */
3187 	sense_page = (uchar_t *)(buf + MODE_HEADER_LENGTH_GRP2 + bd_len);
3188 	if ((sense_page[2] & 0x3f) || (sense_page[3] & 0x3f)) {
3189 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3190 		return;
3191 	}
3192 	kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3193 
3194 	/*
3195 	 * If un->un_f_mmc_writable_media is still FALSE,
3196 	 * check for RRD type media.  A RRD device is identified
3197 	 * by the features RANDOM_WRITABLE and HARDWARE_DEFECT_MANAGEMENT.
3198 	 */
3199 	mutex_exit(SD_MUTEX(un));
3200 	out_data_rw = kmem_zalloc(SD_CURRENT_FEATURE_LEN, KM_SLEEP);
3201 	rqbuf_rw = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3202 
3203 	rtn = sd_send_scsi_feature_GET_CONFIGURATION(un, &com, rqbuf_rw,
3204 	    SENSE_LENGTH, out_data_rw, SD_CURRENT_FEATURE_LEN,
3205 	    RANDOM_WRITABLE);
3206 	if (rtn != 0) {
3207 		kmem_free(out_data_rw, SD_CURRENT_FEATURE_LEN);
3208 		kmem_free(rqbuf_rw, SENSE_LENGTH);
3209 		mutex_enter(SD_MUTEX(un));
3210 		return;
3211 	}
3212 
3213 	out_data_hd = kmem_zalloc(SD_CURRENT_FEATURE_LEN, KM_SLEEP);
3214 	rqbuf_hd = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3215 
3216 	rtn = sd_send_scsi_feature_GET_CONFIGURATION(un, &com, rqbuf_hd,
3217 	    SENSE_LENGTH, out_data_hd, SD_CURRENT_FEATURE_LEN,
3218 	    HARDWARE_DEFECT_MANAGEMENT);
3219 	mutex_enter(SD_MUTEX(un));
3220 	if (rtn == 0) {
3221 		/*
3222 		 * We have good information, check for random writable
3223 		 * and hardware defect features as current.
3224 		 */
3225 		if ((out_data_rw[9] & RANDOM_WRITABLE) &&
3226 		    (out_data_rw[10] & 0x1) &&
3227 		    (out_data_hd[9] & HARDWARE_DEFECT_MANAGEMENT) &&
3228 		    (out_data_hd[10] & 0x1)) {
3229 			un->un_f_mmc_writable_media = TRUE;
3230 		}
3231 	}
3232 
3233 	kmem_free(out_data_rw, SD_CURRENT_FEATURE_LEN);
3234 	kmem_free(rqbuf_rw, SENSE_LENGTH);
3235 	kmem_free(out_data_hd, SD_CURRENT_FEATURE_LEN);
3236 	kmem_free(rqbuf_hd, SENSE_LENGTH);
3237 }
3238 
3239 /*
3240  *    Function: sd_read_unit_properties
3241  *
3242  * Description: The following implements a property lookup mechanism.
3243  *		Properties for particular disks (keyed on vendor, model
3244  *		and rev numbers) are sought in the sd.conf file via
3245  *		sd_process_sdconf_file(), and if not found there, are
3246  *		looked for in a list hardcoded in this driver via
3247  *		sd_process_sdconf_table() Once located the properties
3248  *		are used to update the driver unit structure.
3249  *
3250  *   Arguments: un - driver soft state (unit) structure
3251  */
3252 
3253 static void
3254 sd_read_unit_properties(struct sd_lun *un)
3255 {
3256 	/*
3257 	 * sd_process_sdconf_file returns SD_FAILURE if it cannot find
3258 	 * the "sd-config-list" property (from the sd.conf file) or if
3259 	 * there was not a match for the inquiry vid/pid. If this event
3260 	 * occurs the static driver configuration table is searched for
3261 	 * a match.
3262 	 */
3263 	ASSERT(un != NULL);
3264 	if (sd_process_sdconf_file(un) == SD_FAILURE) {
3265 		sd_process_sdconf_table(un);
3266 	}
3267 
3268 	/* check for LSI device */
3269 	sd_is_lsi(un);
3270 
3271 
3272 }
3273 
3274 
3275 /*
3276  *    Function: sd_process_sdconf_file
3277  *
3278  * Description: Use ddi_getlongprop to obtain the properties from the
3279  *		driver's config file (ie, sd.conf) and update the driver
3280  *		soft state structure accordingly.
3281  *
3282  *   Arguments: un - driver soft state (unit) structure
3283  *
3284  * Return Code: SD_SUCCESS - The properties were successfully set according
3285  *			     to the driver configuration file.
3286  *		SD_FAILURE - The driver config list was not obtained or
3287  *			     there was no vid/pid match. This indicates that
3288  *			     the static config table should be used.
3289  *
3290  * The config file has a property, "sd-config-list", which consists of
3291  * one or more duplets as follows:
3292  *
3293  *  sd-config-list=
3294  *	<duplet>,
3295  *	[<duplet>,]
3296  *	[<duplet>];
3297  *
3298  * The structure of each duplet is as follows:
3299  *
3300  *  <duplet>:= <vid+pid>,<data-property-name_list>
3301  *
3302  * The first entry of the duplet is the device ID string (the concatenated
3303  * vid & pid; not to be confused with a device_id).  This is defined in
3304  * the same way as in the sd_disk_table.
3305  *
3306  * The second part of the duplet is a string that identifies a
3307  * data-property-name-list. The data-property-name-list is defined as
3308  * follows:
3309  *
3310  *  <data-property-name-list>:=<data-property-name> [<data-property-name>]
3311  *
3312  * The syntax of <data-property-name> depends on the <version> field.
3313  *
3314  * If version = SD_CONF_VERSION_1 we have the following syntax:
3315  *
3316  * 	<data-property-name>:=<version>,<flags>,<prop0>,<prop1>,.....<propN>
3317  *
3318  * where the prop0 value will be used to set prop0 if bit0 set in the
3319  * flags, prop1 if bit1 set, etc. and N = SD_CONF_MAX_ITEMS -1
3320  *
3321  */
3322 
3323 static int
3324 sd_process_sdconf_file(struct sd_lun *un)
3325 {
3326 	char	*config_list = NULL;
3327 	int	config_list_len;
3328 	int	len;
3329 	int	dupletlen = 0;
3330 	char	*vidptr;
3331 	int	vidlen;
3332 	char	*dnlist_ptr;
3333 	char	*dataname_ptr;
3334 	int	dnlist_len;
3335 	int	dataname_len;
3336 	int	*data_list;
3337 	int	data_list_len;
3338 	int	rval = SD_FAILURE;
3339 	int	i;
3340 
3341 	ASSERT(un != NULL);
3342 
3343 	/* Obtain the configuration list associated with the .conf file */
3344 	if (ddi_getlongprop(DDI_DEV_T_ANY, SD_DEVINFO(un), DDI_PROP_DONTPASS,
3345 	    sd_config_list, (caddr_t)&config_list, &config_list_len)
3346 	    != DDI_PROP_SUCCESS) {
3347 		return (SD_FAILURE);
3348 	}
3349 
3350 	/*
3351 	 * Compare vids in each duplet to the inquiry vid - if a match is
3352 	 * made, get the data value and update the soft state structure
3353 	 * accordingly.
3354 	 *
3355 	 * Note: This algorithm is complex and difficult to maintain. It should
3356 	 * be replaced with a more robust implementation.
3357 	 */
3358 	for (len = config_list_len, vidptr = config_list; len > 0;
3359 	    vidptr += dupletlen, len -= dupletlen) {
3360 		/*
3361 		 * Note: The assumption here is that each vid entry is on
3362 		 * a unique line from its associated duplet.
3363 		 */
3364 		vidlen = dupletlen = (int)strlen(vidptr);
3365 		if ((vidlen == 0) ||
3366 		    (sd_sdconf_id_match(un, vidptr, vidlen) != SD_SUCCESS)) {
3367 			dupletlen++;
3368 			continue;
3369 		}
3370 
3371 		/*
3372 		 * dnlist contains 1 or more blank separated
3373 		 * data-property-name entries
3374 		 */
3375 		dnlist_ptr = vidptr + vidlen + 1;
3376 		dnlist_len = (int)strlen(dnlist_ptr);
3377 		dupletlen += dnlist_len + 2;
3378 
3379 		/*
3380 		 * Set a pointer for the first data-property-name
3381 		 * entry in the list
3382 		 */
3383 		dataname_ptr = dnlist_ptr;
3384 		dataname_len = 0;
3385 
3386 		/*
3387 		 * Loop through all data-property-name entries in the
3388 		 * data-property-name-list setting the properties for each.
3389 		 */
3390 		while (dataname_len < dnlist_len) {
3391 			int version;
3392 
3393 			/*
3394 			 * Determine the length of the current
3395 			 * data-property-name entry by indexing until a
3396 			 * blank or NULL is encountered. When the space is
3397 			 * encountered reset it to a NULL for compliance
3398 			 * with ddi_getlongprop().
3399 			 */
3400 			for (i = 0; ((dataname_ptr[i] != ' ') &&
3401 			    (dataname_ptr[i] != '\0')); i++) {
3402 				;
3403 			}
3404 
3405 			dataname_len += i;
3406 			/* If not null terminated, Make it so */
3407 			if (dataname_ptr[i] == ' ') {
3408 				dataname_ptr[i] = '\0';
3409 			}
3410 			dataname_len++;
3411 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3412 			    "sd_process_sdconf_file: disk:%s, data:%s\n",
3413 			    vidptr, dataname_ptr);
3414 
3415 			/* Get the data list */
3416 			if (ddi_getlongprop(DDI_DEV_T_ANY, SD_DEVINFO(un), 0,
3417 			    dataname_ptr, (caddr_t)&data_list, &data_list_len)
3418 			    != DDI_PROP_SUCCESS) {
3419 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
3420 				    "sd_process_sdconf_file: data property (%s)"
3421 				    " has no value\n", dataname_ptr);
3422 				dataname_ptr = dnlist_ptr + dataname_len;
3423 				continue;
3424 			}
3425 
3426 			version = data_list[0];
3427 
3428 			if (version == SD_CONF_VERSION_1) {
3429 				sd_tunables values;
3430 
3431 				/* Set the properties */
3432 				if (sd_chk_vers1_data(un, data_list[1],
3433 				    &data_list[2], data_list_len, dataname_ptr)
3434 				    == SD_SUCCESS) {
3435 					sd_get_tunables_from_conf(un,
3436 					    data_list[1], &data_list[2],
3437 					    &values);
3438 					sd_set_vers1_properties(un,
3439 					    data_list[1], &values);
3440 					rval = SD_SUCCESS;
3441 				} else {
3442 					rval = SD_FAILURE;
3443 				}
3444 			} else {
3445 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3446 				    "data property %s version 0x%x is invalid.",
3447 				    dataname_ptr, version);
3448 				rval = SD_FAILURE;
3449 			}
3450 			kmem_free(data_list, data_list_len);
3451 			dataname_ptr = dnlist_ptr + dataname_len;
3452 		}
3453 	}
3454 
3455 	/* free up the memory allocated by ddi_getlongprop */
3456 	if (config_list) {
3457 		kmem_free(config_list, config_list_len);
3458 	}
3459 
3460 	return (rval);
3461 }
3462 
3463 /*
3464  *    Function: sd_get_tunables_from_conf()
3465  *
3466  *
3467  *    This function reads the data list from the sd.conf file and pulls
3468  *    the values that can have numeric values as arguments and places
3469  *    the values in the apropriate sd_tunables member.
3470  *    Since the order of the data list members varies across platforms
3471  *    This function reads them from the data list in a platform specific
3472  *    order and places them into the correct sd_tunable member that is
3473  *    a consistant across all platforms.
3474  */
3475 static void
3476 sd_get_tunables_from_conf(struct sd_lun *un, int flags, int *data_list,
3477     sd_tunables *values)
3478 {
3479 	int i;
3480 	int mask;
3481 
3482 	bzero(values, sizeof (sd_tunables));
3483 
3484 	for (i = 0; i < SD_CONF_MAX_ITEMS; i++) {
3485 
3486 		mask = 1 << i;
3487 		if (mask > flags) {
3488 			break;
3489 		}
3490 
3491 		switch (mask & flags) {
3492 		case 0:	/* This mask bit not set in flags */
3493 			continue;
3494 		case SD_CONF_BSET_THROTTLE:
3495 			values->sdt_throttle = data_list[i];
3496 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3497 			    "sd_get_tunables_from_conf: throttle = %d\n",
3498 			    values->sdt_throttle);
3499 			break;
3500 		case SD_CONF_BSET_CTYPE:
3501 			values->sdt_ctype = data_list[i];
3502 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3503 			    "sd_get_tunables_from_conf: ctype = %d\n",
3504 			    values->sdt_ctype);
3505 			break;
3506 		case SD_CONF_BSET_NRR_COUNT:
3507 			values->sdt_not_rdy_retries = data_list[i];
3508 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3509 			    "sd_get_tunables_from_conf: not_rdy_retries = %d\n",
3510 			    values->sdt_not_rdy_retries);
3511 			break;
3512 		case SD_CONF_BSET_BSY_RETRY_COUNT:
3513 			values->sdt_busy_retries = data_list[i];
3514 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3515 			    "sd_get_tunables_from_conf: busy_retries = %d\n",
3516 			    values->sdt_busy_retries);
3517 			break;
3518 		case SD_CONF_BSET_RST_RETRIES:
3519 			values->sdt_reset_retries = data_list[i];
3520 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3521 			    "sd_get_tunables_from_conf: reset_retries = %d\n",
3522 			    values->sdt_reset_retries);
3523 			break;
3524 		case SD_CONF_BSET_RSV_REL_TIME:
3525 			values->sdt_reserv_rel_time = data_list[i];
3526 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3527 			    "sd_get_tunables_from_conf: reserv_rel_time = %d\n",
3528 			    values->sdt_reserv_rel_time);
3529 			break;
3530 		case SD_CONF_BSET_MIN_THROTTLE:
3531 			values->sdt_min_throttle = data_list[i];
3532 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3533 			    "sd_get_tunables_from_conf: min_throttle = %d\n",
3534 			    values->sdt_min_throttle);
3535 			break;
3536 		case SD_CONF_BSET_DISKSORT_DISABLED:
3537 			values->sdt_disk_sort_dis = data_list[i];
3538 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3539 			    "sd_get_tunables_from_conf: disk_sort_dis = %d\n",
3540 			    values->sdt_disk_sort_dis);
3541 			break;
3542 		case SD_CONF_BSET_LUN_RESET_ENABLED:
3543 			values->sdt_lun_reset_enable = data_list[i];
3544 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3545 			    "sd_get_tunables_from_conf: lun_reset_enable = %d"
3546 			    "\n", values->sdt_lun_reset_enable);
3547 			break;
3548 		}
3549 	}
3550 }
3551 
3552 /*
3553  *    Function: sd_process_sdconf_table
3554  *
3555  * Description: Search the static configuration table for a match on the
3556  *		inquiry vid/pid and update the driver soft state structure
3557  *		according to the table property values for the device.
3558  *
3559  *		The form of a configuration table entry is:
3560  *		  <vid+pid>,<flags>,<property-data>
3561  *		  "SEAGATE ST42400N",1,63,0,0			(Fibre)
3562  *		  "SEAGATE ST42400N",1,63,0,0,0,0		(Sparc)
3563  *		  "SEAGATE ST42400N",1,63,0,0,0,0,0,0,0,0,0,0	(Intel)
3564  *
3565  *   Arguments: un - driver soft state (unit) structure
3566  */
3567 
3568 static void
3569 sd_process_sdconf_table(struct sd_lun *un)
3570 {
3571 	char	*id = NULL;
3572 	int	table_index;
3573 	int	idlen;
3574 
3575 	ASSERT(un != NULL);
3576 	for (table_index = 0; table_index < sd_disk_table_size;
3577 	    table_index++) {
3578 		id = sd_disk_table[table_index].device_id;
3579 		idlen = strlen(id);
3580 		if (idlen == 0) {
3581 			continue;
3582 		}
3583 
3584 		/*
3585 		 * The static configuration table currently does not
3586 		 * implement version 10 properties. Additionally,
3587 		 * multiple data-property-name entries are not
3588 		 * implemented in the static configuration table.
3589 		 */
3590 		if (sd_sdconf_id_match(un, id, idlen) == SD_SUCCESS) {
3591 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3592 			    "sd_process_sdconf_table: disk %s\n", id);
3593 			sd_set_vers1_properties(un,
3594 			    sd_disk_table[table_index].flags,
3595 			    sd_disk_table[table_index].properties);
3596 			break;
3597 		}
3598 	}
3599 }
3600 
3601 
3602 /*
3603  *    Function: sd_sdconf_id_match
3604  *
3605  * Description: This local function implements a case sensitive vid/pid
3606  *		comparison as well as the boundary cases of wild card and
3607  *		multiple blanks.
3608  *
3609  *		Note: An implicit assumption made here is that the scsi
3610  *		inquiry structure will always keep the vid, pid and
3611  *		revision strings in consecutive sequence, so they can be
3612  *		read as a single string. If this assumption is not the
3613  *		case, a separate string, to be used for the check, needs
3614  *		to be built with these strings concatenated.
3615  *
3616  *   Arguments: un - driver soft state (unit) structure
3617  *		id - table or config file vid/pid
3618  *		idlen  - length of the vid/pid (bytes)
3619  *
3620  * Return Code: SD_SUCCESS - Indicates a match with the inquiry vid/pid
3621  *		SD_FAILURE - Indicates no match with the inquiry vid/pid
3622  */
3623 
3624 static int
3625 sd_sdconf_id_match(struct sd_lun *un, char *id, int idlen)
3626 {
3627 	struct scsi_inquiry	*sd_inq;
3628 	int 			rval = SD_SUCCESS;
3629 
3630 	ASSERT(un != NULL);
3631 	sd_inq = un->un_sd->sd_inq;
3632 	ASSERT(id != NULL);
3633 
3634 	/*
3635 	 * We use the inq_vid as a pointer to a buffer containing the
3636 	 * vid and pid and use the entire vid/pid length of the table
3637 	 * entry for the comparison. This works because the inq_pid
3638 	 * data member follows inq_vid in the scsi_inquiry structure.
3639 	 */
3640 	if (strncasecmp(sd_inq->inq_vid, id, idlen) != 0) {
3641 		/*
3642 		 * The user id string is compared to the inquiry vid/pid
3643 		 * using a case insensitive comparison and ignoring
3644 		 * multiple spaces.
3645 		 */
3646 		rval = sd_blank_cmp(un, id, idlen);
3647 		if (rval != SD_SUCCESS) {
3648 			/*
3649 			 * User id strings that start and end with a "*"
3650 			 * are a special case. These do not have a
3651 			 * specific vendor, and the product string can
3652 			 * appear anywhere in the 16 byte PID portion of
3653 			 * the inquiry data. This is a simple strstr()
3654 			 * type search for the user id in the inquiry data.
3655 			 */
3656 			if ((id[0] == '*') && (id[idlen - 1] == '*')) {
3657 				char	*pidptr = &id[1];
3658 				int	i;
3659 				int	j;
3660 				int	pidstrlen = idlen - 2;
3661 				j = sizeof (SD_INQUIRY(un)->inq_pid) -
3662 				    pidstrlen;
3663 
3664 				if (j < 0) {
3665 					return (SD_FAILURE);
3666 				}
3667 				for (i = 0; i < j; i++) {
3668 					if (bcmp(&SD_INQUIRY(un)->inq_pid[i],
3669 					    pidptr, pidstrlen) == 0) {
3670 						rval = SD_SUCCESS;
3671 						break;
3672 					}
3673 				}
3674 			}
3675 		}
3676 	}
3677 	return (rval);
3678 }
3679 
3680 
3681 /*
3682  *    Function: sd_blank_cmp
3683  *
3684  * Description: If the id string starts and ends with a space, treat
3685  *		multiple consecutive spaces as equivalent to a single
3686  *		space. For example, this causes a sd_disk_table entry
3687  *		of " NEC CDROM " to match a device's id string of
3688  *		"NEC       CDROM".
3689  *
3690  *		Note: The success exit condition for this routine is if
3691  *		the pointer to the table entry is '\0' and the cnt of
3692  *		the inquiry length is zero. This will happen if the inquiry
3693  *		string returned by the device is padded with spaces to be
3694  *		exactly 24 bytes in length (8 byte vid + 16 byte pid). The
3695  *		SCSI spec states that the inquiry string is to be padded with
3696  *		spaces.
3697  *
3698  *   Arguments: un - driver soft state (unit) structure
3699  *		id - table or config file vid/pid
3700  *		idlen  - length of the vid/pid (bytes)
3701  *
3702  * Return Code: SD_SUCCESS - Indicates a match with the inquiry vid/pid
3703  *		SD_FAILURE - Indicates no match with the inquiry vid/pid
3704  */
3705 
3706 static int
3707 sd_blank_cmp(struct sd_lun *un, char *id, int idlen)
3708 {
3709 	char		*p1;
3710 	char		*p2;
3711 	int		cnt;
3712 	cnt = sizeof (SD_INQUIRY(un)->inq_vid) +
3713 	    sizeof (SD_INQUIRY(un)->inq_pid);
3714 
3715 	ASSERT(un != NULL);
3716 	p2 = un->un_sd->sd_inq->inq_vid;
3717 	ASSERT(id != NULL);
3718 	p1 = id;
3719 
3720 	if ((id[0] == ' ') && (id[idlen - 1] == ' ')) {
3721 		/*
3722 		 * Note: string p1 is terminated by a NUL but string p2
3723 		 * isn't.  The end of p2 is determined by cnt.
3724 		 */
3725 		for (;;) {
3726 			/* skip over any extra blanks in both strings */
3727 			while ((*p1 != '\0') && (*p1 == ' ')) {
3728 				p1++;
3729 			}
3730 			while ((cnt != 0) && (*p2 == ' ')) {
3731 				p2++;
3732 				cnt--;
3733 			}
3734 
3735 			/* compare the two strings */
3736 			if ((cnt == 0) ||
3737 			    (SD_TOUPPER(*p1) != SD_TOUPPER(*p2))) {
3738 				break;
3739 			}
3740 			while ((cnt > 0) &&
3741 			    (SD_TOUPPER(*p1) == SD_TOUPPER(*p2))) {
3742 				p1++;
3743 				p2++;
3744 				cnt--;
3745 			}
3746 		}
3747 	}
3748 
3749 	/* return SD_SUCCESS if both strings match */
3750 	return (((*p1 == '\0') && (cnt == 0)) ? SD_SUCCESS : SD_FAILURE);
3751 }
3752 
3753 
3754 /*
3755  *    Function: sd_chk_vers1_data
3756  *
3757  * Description: Verify the version 1 device properties provided by the
3758  *		user via the configuration file
3759  *
3760  *   Arguments: un	     - driver soft state (unit) structure
3761  *		flags	     - integer mask indicating properties to be set
3762  *		prop_list    - integer list of property values
3763  *		list_len     - length of user provided data
3764  *
3765  * Return Code: SD_SUCCESS - Indicates the user provided data is valid
3766  *		SD_FAILURE - Indicates the user provided data is invalid
3767  */
3768 
3769 static int
3770 sd_chk_vers1_data(struct sd_lun *un, int flags, int *prop_list,
3771     int list_len, char *dataname_ptr)
3772 {
3773 	int i;
3774 	int mask = 1;
3775 	int index = 0;
3776 
3777 	ASSERT(un != NULL);
3778 
3779 	/* Check for a NULL property name and list */
3780 	if (dataname_ptr == NULL) {
3781 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3782 		    "sd_chk_vers1_data: NULL data property name.");
3783 		return (SD_FAILURE);
3784 	}
3785 	if (prop_list == NULL) {
3786 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3787 		    "sd_chk_vers1_data: %s NULL data property list.",
3788 		    dataname_ptr);
3789 		return (SD_FAILURE);
3790 	}
3791 
3792 	/* Display a warning if undefined bits are set in the flags */
3793 	if (flags & ~SD_CONF_BIT_MASK) {
3794 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3795 		    "sd_chk_vers1_data: invalid bits 0x%x in data list %s. "
3796 		    "Properties not set.",
3797 		    (flags & ~SD_CONF_BIT_MASK), dataname_ptr);
3798 		return (SD_FAILURE);
3799 	}
3800 
3801 	/*
3802 	 * Verify the length of the list by identifying the highest bit set
3803 	 * in the flags and validating that the property list has a length
3804 	 * up to the index of this bit.
3805 	 */
3806 	for (i = 0; i < SD_CONF_MAX_ITEMS; i++) {
3807 		if (flags & mask) {
3808 			index++;
3809 		}
3810 		mask = 1 << i;
3811 	}
3812 	if ((list_len / sizeof (int)) < (index + 2)) {
3813 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3814 		    "sd_chk_vers1_data: "
3815 		    "Data property list %s size is incorrect. "
3816 		    "Properties not set.", dataname_ptr);
3817 		scsi_log(SD_DEVINFO(un), sd_label, CE_CONT, "Size expected: "
3818 		    "version + 1 flagword + %d properties", SD_CONF_MAX_ITEMS);
3819 		return (SD_FAILURE);
3820 	}
3821 	return (SD_SUCCESS);
3822 }
3823 
3824 
3825 /*
3826  *    Function: sd_set_vers1_properties
3827  *
3828  * Description: Set version 1 device properties based on a property list
3829  *		retrieved from the driver configuration file or static
3830  *		configuration table. Version 1 properties have the format:
3831  *
3832  * 	<data-property-name>:=<version>,<flags>,<prop0>,<prop1>,.....<propN>
3833  *
3834  *		where the prop0 value will be used to set prop0 if bit0
3835  *		is set in the flags
3836  *
3837  *   Arguments: un	     - driver soft state (unit) structure
3838  *		flags	     - integer mask indicating properties to be set
3839  *		prop_list    - integer list of property values
3840  */
3841 
3842 static void
3843 sd_set_vers1_properties(struct sd_lun *un, int flags, sd_tunables *prop_list)
3844 {
3845 	ASSERT(un != NULL);
3846 
3847 	/*
3848 	 * Set the flag to indicate cache is to be disabled. An attempt
3849 	 * to disable the cache via sd_cache_control() will be made
3850 	 * later during attach once the basic initialization is complete.
3851 	 */
3852 	if (flags & SD_CONF_BSET_NOCACHE) {
3853 		un->un_f_opt_disable_cache = TRUE;
3854 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3855 		    "sd_set_vers1_properties: caching disabled flag set\n");
3856 	}
3857 
3858 	/* CD-specific configuration parameters */
3859 	if (flags & SD_CONF_BSET_PLAYMSF_BCD) {
3860 		un->un_f_cfg_playmsf_bcd = TRUE;
3861 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3862 		    "sd_set_vers1_properties: playmsf_bcd set\n");
3863 	}
3864 	if (flags & SD_CONF_BSET_READSUB_BCD) {
3865 		un->un_f_cfg_readsub_bcd = TRUE;
3866 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3867 		    "sd_set_vers1_properties: readsub_bcd set\n");
3868 	}
3869 	if (flags & SD_CONF_BSET_READ_TOC_TRK_BCD) {
3870 		un->un_f_cfg_read_toc_trk_bcd = TRUE;
3871 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3872 		    "sd_set_vers1_properties: read_toc_trk_bcd set\n");
3873 	}
3874 	if (flags & SD_CONF_BSET_READ_TOC_ADDR_BCD) {
3875 		un->un_f_cfg_read_toc_addr_bcd = TRUE;
3876 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3877 		    "sd_set_vers1_properties: read_toc_addr_bcd set\n");
3878 	}
3879 	if (flags & SD_CONF_BSET_NO_READ_HEADER) {
3880 		un->un_f_cfg_no_read_header = TRUE;
3881 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3882 			    "sd_set_vers1_properties: no_read_header set\n");
3883 	}
3884 	if (flags & SD_CONF_BSET_READ_CD_XD4) {
3885 		un->un_f_cfg_read_cd_xd4 = TRUE;
3886 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3887 		    "sd_set_vers1_properties: read_cd_xd4 set\n");
3888 	}
3889 
3890 	/* Support for devices which do not have valid/unique serial numbers */
3891 	if (flags & SD_CONF_BSET_FAB_DEVID) {
3892 		un->un_f_opt_fab_devid = TRUE;
3893 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3894 		    "sd_set_vers1_properties: fab_devid bit set\n");
3895 	}
3896 
3897 	/* Support for user throttle configuration */
3898 	if (flags & SD_CONF_BSET_THROTTLE) {
3899 		ASSERT(prop_list != NULL);
3900 		un->un_saved_throttle = un->un_throttle =
3901 		    prop_list->sdt_throttle;
3902 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3903 		    "sd_set_vers1_properties: throttle set to %d\n",
3904 		    prop_list->sdt_throttle);
3905 	}
3906 
3907 	/* Set the per disk retry count according to the conf file or table. */
3908 	if (flags & SD_CONF_BSET_NRR_COUNT) {
3909 		ASSERT(prop_list != NULL);
3910 		if (prop_list->sdt_not_rdy_retries) {
3911 			un->un_notready_retry_count =
3912 				prop_list->sdt_not_rdy_retries;
3913 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3914 			    "sd_set_vers1_properties: not ready retry count"
3915 			    " set to %d\n", un->un_notready_retry_count);
3916 		}
3917 	}
3918 
3919 	/* The controller type is reported for generic disk driver ioctls */
3920 	if (flags & SD_CONF_BSET_CTYPE) {
3921 		ASSERT(prop_list != NULL);
3922 		switch (prop_list->sdt_ctype) {
3923 		case CTYPE_CDROM:
3924 			un->un_ctype = prop_list->sdt_ctype;
3925 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3926 			    "sd_set_vers1_properties: ctype set to "
3927 			    "CTYPE_CDROM\n");
3928 			break;
3929 		case CTYPE_CCS:
3930 			un->un_ctype = prop_list->sdt_ctype;
3931 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3932 				"sd_set_vers1_properties: ctype set to "
3933 				"CTYPE_CCS\n");
3934 			break;
3935 		case CTYPE_ROD:		/* RW optical */
3936 			un->un_ctype = prop_list->sdt_ctype;
3937 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3938 			    "sd_set_vers1_properties: ctype set to "
3939 			    "CTYPE_ROD\n");
3940 			break;
3941 		default:
3942 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3943 			    "sd_set_vers1_properties: Could not set "
3944 			    "invalid ctype value (%d)",
3945 			    prop_list->sdt_ctype);
3946 		}
3947 	}
3948 
3949 	/* Purple failover timeout */
3950 	if (flags & SD_CONF_BSET_BSY_RETRY_COUNT) {
3951 		ASSERT(prop_list != NULL);
3952 		un->un_busy_retry_count =
3953 			prop_list->sdt_busy_retries;
3954 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3955 		    "sd_set_vers1_properties: "
3956 		    "busy retry count set to %d\n",
3957 		    un->un_busy_retry_count);
3958 	}
3959 
3960 	/* Purple reset retry count */
3961 	if (flags & SD_CONF_BSET_RST_RETRIES) {
3962 		ASSERT(prop_list != NULL);
3963 		un->un_reset_retry_count =
3964 			prop_list->sdt_reset_retries;
3965 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3966 		    "sd_set_vers1_properties: "
3967 		    "reset retry count set to %d\n",
3968 		    un->un_reset_retry_count);
3969 	}
3970 
3971 	/* Purple reservation release timeout */
3972 	if (flags & SD_CONF_BSET_RSV_REL_TIME) {
3973 		ASSERT(prop_list != NULL);
3974 		un->un_reserve_release_time =
3975 			prop_list->sdt_reserv_rel_time;
3976 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3977 		    "sd_set_vers1_properties: "
3978 		    "reservation release timeout set to %d\n",
3979 		    un->un_reserve_release_time);
3980 	}
3981 
3982 	/*
3983 	 * Driver flag telling the driver to verify that no commands are pending
3984 	 * for a device before issuing a Test Unit Ready. This is a workaround
3985 	 * for a firmware bug in some Seagate eliteI drives.
3986 	 */
3987 	if (flags & SD_CONF_BSET_TUR_CHECK) {
3988 		un->un_f_cfg_tur_check = TRUE;
3989 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3990 		    "sd_set_vers1_properties: tur queue check set\n");
3991 	}
3992 
3993 	if (flags & SD_CONF_BSET_MIN_THROTTLE) {
3994 		un->un_min_throttle = prop_list->sdt_min_throttle;
3995 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3996 		    "sd_set_vers1_properties: min throttle set to %d\n",
3997 		    un->un_min_throttle);
3998 	}
3999 
4000 	if (flags & SD_CONF_BSET_DISKSORT_DISABLED) {
4001 		un->un_f_disksort_disabled =
4002 		    (prop_list->sdt_disk_sort_dis != 0) ?
4003 		    TRUE : FALSE;
4004 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4005 		    "sd_set_vers1_properties: disksort disabled "
4006 		    "flag set to %d\n",
4007 		    prop_list->sdt_disk_sort_dis);
4008 	}
4009 
4010 	if (flags & SD_CONF_BSET_LUN_RESET_ENABLED) {
4011 		un->un_f_lun_reset_enabled =
4012 		    (prop_list->sdt_lun_reset_enable != 0) ?
4013 		    TRUE : FALSE;
4014 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4015 		    "sd_set_vers1_properties: lun reset enabled "
4016 		    "flag set to %d\n",
4017 		    prop_list->sdt_lun_reset_enable);
4018 	}
4019 
4020 	/*
4021 	 * Validate the throttle values.
4022 	 * If any of the numbers are invalid, set everything to defaults.
4023 	 */
4024 	if ((un->un_throttle < SD_LOWEST_VALID_THROTTLE) ||
4025 	    (un->un_min_throttle < SD_LOWEST_VALID_THROTTLE) ||
4026 	    (un->un_min_throttle > un->un_throttle)) {
4027 		un->un_saved_throttle = un->un_throttle = sd_max_throttle;
4028 		un->un_min_throttle = sd_min_throttle;
4029 	}
4030 }
4031 
4032 /*
4033  *   Function: sd_is_lsi()
4034  *
4035  *   Description: Check for lsi devices, step throught the static device
4036  *	table to match vid/pid.
4037  *
4038  *   Args: un - ptr to sd_lun
4039  *
4040  *   Notes:  When creating new LSI property, need to add the new LSI property
4041  *		to this function.
4042  */
4043 static void
4044 sd_is_lsi(struct sd_lun *un)
4045 {
4046 	char	*id = NULL;
4047 	int	table_index;
4048 	int	idlen;
4049 	void	*prop;
4050 
4051 	ASSERT(un != NULL);
4052 	for (table_index = 0; table_index < sd_disk_table_size;
4053 	    table_index++) {
4054 		id = sd_disk_table[table_index].device_id;
4055 		idlen = strlen(id);
4056 		if (idlen == 0) {
4057 			continue;
4058 		}
4059 
4060 		if (sd_sdconf_id_match(un, id, idlen) == SD_SUCCESS) {
4061 			prop = sd_disk_table[table_index].properties;
4062 			if (prop == &lsi_properties ||
4063 			    prop == &lsi_oem_properties ||
4064 			    prop == &lsi_properties_scsi ||
4065 			    prop == &symbios_properties) {
4066 				un->un_f_cfg_is_lsi = TRUE;
4067 			}
4068 			break;
4069 		}
4070 	}
4071 }
4072 
4073 
4074 /*
4075  * The following routines support reading and interpretation of disk labels,
4076  * including Solaris BE (8-slice) vtoc's, Solaris LE (16-slice) vtoc's, and
4077  * fdisk tables.
4078  */
4079 
4080 /*
4081  *    Function: sd_validate_geometry
4082  *
4083  * Description: Read the label from the disk (if present). Update the unit's
4084  *		geometry and vtoc information from the data in the label.
4085  *		Verify that the label is valid.
4086  *
4087  *   Arguments: un - driver soft state (unit) structure
4088  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
4089  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
4090  *			to use the USCSI "direct" chain and bypass the normal
4091  *			command waitq.
4092  *
4093  * Return Code: 0 - Successful completion
4094  *		EINVAL  - Invalid value in un->un_tgt_blocksize or
4095  *			  un->un_blockcount; or label on disk is corrupted
4096  *			  or unreadable.
4097  *		EACCES  - Reservation conflict at the device.
4098  *		ENOMEM  - Resource allocation error
4099  *		ENOTSUP - geometry not applicable
4100  *
4101  *     Context: Kernel thread only (can sleep).
4102  */
4103 
4104 static int
4105 sd_validate_geometry(struct sd_lun *un, int path_flag)
4106 {
4107 	static	char		labelstring[128];
4108 	static	char		buf[256];
4109 	char	*label		= NULL;
4110 	int	label_error	= 0;
4111 	int	gvalid		= un->un_f_geometry_is_valid;
4112 	int	lbasize;
4113 	uint_t	capacity;
4114 	int	count;
4115 
4116 	ASSERT(un != NULL);
4117 	ASSERT(mutex_owned(SD_MUTEX(un)));
4118 
4119 	/*
4120 	 * If the required values are not valid, then try getting them
4121 	 * once via read capacity. If that fails, then fail this call.
4122 	 * This is necessary with the new mpxio failover behavior in
4123 	 * the T300 where we can get an attach for the inactive path
4124 	 * before the active path. The inactive path fails commands with
4125 	 * sense data of 02,04,88 which happens to the read capacity
4126 	 * before mpxio has had sufficient knowledge to know if it should
4127 	 * force a fail over or not. (Which it won't do at attach anyhow).
4128 	 * If the read capacity at attach time fails, un_tgt_blocksize and
4129 	 * un_blockcount won't be valid.
4130 	 */
4131 	if ((un->un_f_tgt_blocksize_is_valid != TRUE) ||
4132 	    (un->un_f_blockcount_is_valid != TRUE)) {
4133 		uint64_t	cap;
4134 		uint32_t	lbasz;
4135 		int		rval;
4136 
4137 		mutex_exit(SD_MUTEX(un));
4138 		rval = sd_send_scsi_READ_CAPACITY(un, &cap,
4139 		    &lbasz, SD_PATH_DIRECT);
4140 		mutex_enter(SD_MUTEX(un));
4141 		if (rval == 0) {
4142 			/*
4143 			 * The following relies on
4144 			 * sd_send_scsi_READ_CAPACITY never
4145 			 * returning 0 for capacity and/or lbasize.
4146 			 */
4147 			sd_update_block_info(un, lbasz, cap);
4148 		}
4149 
4150 		if ((un->un_f_tgt_blocksize_is_valid != TRUE) ||
4151 		    (un->un_f_blockcount_is_valid != TRUE)) {
4152 			return (EINVAL);
4153 		}
4154 	}
4155 
4156 	/*
4157 	 * Copy the lbasize and capacity so that if they're reset while we're
4158 	 * not holding the SD_MUTEX, we will continue to use valid values
4159 	 * after the SD_MUTEX is reacquired. (4119659)
4160 	 */
4161 	lbasize  = un->un_tgt_blocksize;
4162 	capacity = un->un_blockcount;
4163 
4164 #if defined(_SUNOS_VTOC_16)
4165 	/*
4166 	 * Set up the "whole disk" fdisk partition; this should always
4167 	 * exist, regardless of whether the disk contains an fdisk table
4168 	 * or vtoc.
4169 	 */
4170 	un->un_map[P0_RAW_DISK].dkl_cylno = 0;
4171 	un->un_map[P0_RAW_DISK].dkl_nblk  = capacity;
4172 #endif
4173 
4174 	/*
4175 	 * Refresh the logical and physical geometry caches.
4176 	 * (data from MODE SENSE format/rigid disk geometry pages,
4177 	 * and scsi_ifgetcap("geometry").
4178 	 */
4179 	sd_resync_geom_caches(un, capacity, lbasize, path_flag);
4180 
4181 	label_error = sd_use_efi(un, path_flag);
4182 	if (label_error == 0) {
4183 		/* found a valid EFI label */
4184 		SD_TRACE(SD_LOG_IO_PARTITION, un,
4185 			"sd_validate_geometry: found EFI label\n");
4186 		un->un_solaris_offset = 0;
4187 		un->un_solaris_size = capacity;
4188 		return (ENOTSUP);
4189 	}
4190 	if (un->un_blockcount > DK_MAX_BLOCKS) {
4191 		if (label_error == ESRCH) {
4192 			/*
4193 			 * they've configured a LUN over 1TB, but used
4194 			 * format.dat to restrict format's view of the
4195 			 * capacity to be under 1TB
4196 			 */
4197 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
4198 "is >1TB and has a VTOC label: use format(1M) to either decrease the");
4199 			scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
4200 "size to be < 1TB or relabel the disk with an EFI label");
4201 		} else {
4202 			/* unlabeled disk over 1TB */
4203 			return (ENOTSUP);
4204 		}
4205 	}
4206 	label_error = 0;
4207 
4208 	/*
4209 	 * at this point it is either labeled with a VTOC or it is
4210 	 * under 1TB
4211 	 */
4212 	if (un->un_f_vtoc_label_supported) {
4213 		struct	dk_label *dkl;
4214 		offset_t dkl1;
4215 		offset_t label_addr, real_addr;
4216 		int	rval;
4217 		size_t	buffer_size;
4218 
4219 		/*
4220 		 * Note: This will set up un->un_solaris_size and
4221 		 * un->un_solaris_offset.
4222 		 */
4223 		switch (sd_read_fdisk(un, capacity, lbasize, path_flag)) {
4224 		case SD_CMD_RESERVATION_CONFLICT:
4225 			ASSERT(mutex_owned(SD_MUTEX(un)));
4226 			return (EACCES);
4227 		case SD_CMD_FAILURE:
4228 			ASSERT(mutex_owned(SD_MUTEX(un)));
4229 			return (ENOMEM);
4230 		}
4231 
4232 		if (un->un_solaris_size <= DK_LABEL_LOC) {
4233 			/*
4234 			 * Found fdisk table but no Solaris partition entry,
4235 			 * so don't call sd_uselabel() and don't create
4236 			 * a default label.
4237 			 */
4238 			label_error = 0;
4239 			un->un_f_geometry_is_valid = TRUE;
4240 			goto no_solaris_partition;
4241 		}
4242 		label_addr = (daddr_t)(un->un_solaris_offset + DK_LABEL_LOC);
4243 
4244 		/*
4245 		 * sys_blocksize != tgt_blocksize, need to re-adjust
4246 		 * blkno and save the index to beginning of dk_label
4247 		 */
4248 		real_addr = SD_SYS2TGTBLOCK(un, label_addr);
4249 		buffer_size = SD_REQBYTES2TGTBYTES(un,
4250 		    sizeof (struct dk_label));
4251 
4252 		SD_TRACE(SD_LOG_IO_PARTITION, un, "sd_validate_geometry: "
4253 		    "label_addr: 0x%x allocation size: 0x%x\n",
4254 		    label_addr, buffer_size);
4255 		dkl = kmem_zalloc(buffer_size, KM_NOSLEEP);
4256 		if (dkl == NULL) {
4257 			return (ENOMEM);
4258 		}
4259 
4260 		mutex_exit(SD_MUTEX(un));
4261 		rval = sd_send_scsi_READ(un, dkl, buffer_size, real_addr,
4262 		    path_flag);
4263 		mutex_enter(SD_MUTEX(un));
4264 
4265 		switch (rval) {
4266 		case 0:
4267 			/*
4268 			 * sd_uselabel will establish that the geometry
4269 			 * is valid.
4270 			 * For sys_blocksize != tgt_blocksize, need
4271 			 * to index into the beginning of dk_label
4272 			 */
4273 			dkl1 = (daddr_t)dkl
4274 				+ SD_TGTBYTEOFFSET(un, label_addr, real_addr);
4275 			if (sd_uselabel(un, (struct dk_label *)(uintptr_t)dkl1,
4276 			    path_flag) != SD_LABEL_IS_VALID) {
4277 				label_error = EINVAL;
4278 			}
4279 			break;
4280 		case EACCES:
4281 			label_error = EACCES;
4282 			break;
4283 		default:
4284 			label_error = EINVAL;
4285 			break;
4286 		}
4287 
4288 		kmem_free(dkl, buffer_size);
4289 
4290 #if defined(_SUNOS_VTOC_8)
4291 		label = (char *)un->un_asciilabel;
4292 #elif defined(_SUNOS_VTOC_16)
4293 		label = (char *)un->un_vtoc.v_asciilabel;
4294 #else
4295 #error "No VTOC format defined."
4296 #endif
4297 	}
4298 
4299 	/*
4300 	 * If a valid label was not found, AND if no reservation conflict
4301 	 * was detected, then go ahead and create a default label (4069506).
4302 	 */
4303 
4304 	if (un->un_f_default_vtoc_supported && (label_error != EACCES)) {
4305 		if (un->un_f_geometry_is_valid == FALSE) {
4306 			sd_build_default_label(un);
4307 		}
4308 		label_error = 0;
4309 	}
4310 
4311 no_solaris_partition:
4312 	if ((!un->un_f_has_removable_media ||
4313 	    (un->un_f_has_removable_media &&
4314 		un->un_mediastate == DKIO_EJECTED)) &&
4315 		(un->un_state == SD_STATE_NORMAL && !gvalid)) {
4316 		/*
4317 		 * Print out a message indicating who and what we are.
4318 		 * We do this only when we happen to really validate the
4319 		 * geometry. We may call sd_validate_geometry() at other
4320 		 * times, e.g., ioctl()'s like Get VTOC in which case we
4321 		 * don't want to print the label.
4322 		 * If the geometry is valid, print the label string,
4323 		 * else print vendor and product info, if available
4324 		 */
4325 		if ((un->un_f_geometry_is_valid == TRUE) && (label != NULL)) {
4326 			SD_INFO(SD_LOG_ATTACH_DETACH, un, "?<%s>\n", label);
4327 		} else {
4328 			mutex_enter(&sd_label_mutex);
4329 			sd_inq_fill(SD_INQUIRY(un)->inq_vid, VIDMAX,
4330 			    labelstring);
4331 			sd_inq_fill(SD_INQUIRY(un)->inq_pid, PIDMAX,
4332 			    &labelstring[64]);
4333 			(void) sprintf(buf, "?Vendor '%s', product '%s'",
4334 			    labelstring, &labelstring[64]);
4335 			if (un->un_f_blockcount_is_valid == TRUE) {
4336 				(void) sprintf(&buf[strlen(buf)],
4337 				    ", %llu %u byte blocks\n",
4338 				    (longlong_t)un->un_blockcount,
4339 				    un->un_tgt_blocksize);
4340 			} else {
4341 				(void) sprintf(&buf[strlen(buf)],
4342 				    ", (unknown capacity)\n");
4343 			}
4344 			SD_INFO(SD_LOG_ATTACH_DETACH, un, buf);
4345 			mutex_exit(&sd_label_mutex);
4346 		}
4347 	}
4348 
4349 #if defined(_SUNOS_VTOC_16)
4350 	/*
4351 	 * If we have valid geometry, set up the remaining fdisk partitions.
4352 	 * Note that dkl_cylno is not used for the fdisk map entries, so
4353 	 * we set it to an entirely bogus value.
4354 	 */
4355 	for (count = 0; count < FD_NUMPART; count++) {
4356 		un->un_map[FDISK_P1 + count].dkl_cylno = -1;
4357 		un->un_map[FDISK_P1 + count].dkl_nblk =
4358 		    un->un_fmap[count].fmap_nblk;
4359 
4360 		un->un_offset[FDISK_P1 + count] =
4361 		    un->un_fmap[count].fmap_start;
4362 	}
4363 #endif
4364 
4365 	for (count = 0; count < NDKMAP; count++) {
4366 #if defined(_SUNOS_VTOC_8)
4367 		struct dk_map *lp  = &un->un_map[count];
4368 		un->un_offset[count] =
4369 		    un->un_g.dkg_nhead * un->un_g.dkg_nsect * lp->dkl_cylno;
4370 #elif defined(_SUNOS_VTOC_16)
4371 		struct dkl_partition *vp = &un->un_vtoc.v_part[count];
4372 
4373 		un->un_offset[count] = vp->p_start + un->un_solaris_offset;
4374 #else
4375 #error "No VTOC format defined."
4376 #endif
4377 	}
4378 
4379 	return (label_error);
4380 }
4381 
4382 
4383 #if defined(_SUNOS_VTOC_16)
4384 /*
4385  * Macro: MAX_BLKS
4386  *
4387  *	This macro is used for table entries where we need to have the largest
4388  *	possible sector value for that head & SPT (sectors per track)
4389  *	combination.  Other entries for some smaller disk sizes are set by
4390  *	convention to match those used by X86 BIOS usage.
4391  */
4392 #define	MAX_BLKS(heads, spt)	UINT16_MAX * heads * spt, heads, spt
4393 
4394 /*
4395  *    Function: sd_convert_geometry
4396  *
4397  * Description: Convert physical geometry into a dk_geom structure. In
4398  *		other words, make sure we don't wrap 16-bit values.
4399  *		e.g. converting from geom_cache to dk_geom
4400  *
4401  *     Context: Kernel thread only
4402  */
4403 static void
4404 sd_convert_geometry(uint64_t capacity, struct dk_geom *un_g)
4405 {
4406 	int i;
4407 	static const struct chs_values {
4408 		uint_t max_cap;		/* Max Capacity for this HS. */
4409 		uint_t nhead;		/* Heads to use. */
4410 		uint_t nsect;		/* SPT to use. */
4411 	} CHS_values[] = {
4412 		{0x00200000,  64, 32},		/* 1GB or smaller disk. */
4413 		{0x01000000, 128, 32},		/* 8GB or smaller disk. */
4414 		{MAX_BLKS(255,  63)},		/* 502.02GB or smaller disk. */
4415 		{MAX_BLKS(255, 126)},		/* .98TB or smaller disk. */
4416 		{DK_MAX_BLOCKS, 255, 189}	/* Max size is just under 1TB */
4417 	};
4418 
4419 	/* Unlabeled SCSI floppy device */
4420 	if (capacity <= 0x1000) {
4421 		un_g->dkg_nhead = 2;
4422 		un_g->dkg_ncyl = 80;
4423 		un_g->dkg_nsect = capacity / (un_g->dkg_nhead * un_g->dkg_ncyl);
4424 		return;
4425 	}
4426 
4427 	/*
4428 	 * For all devices we calculate cylinders using the
4429 	 * heads and sectors we assign based on capacity of the
4430 	 * device.  The table is designed to be compatible with the
4431 	 * way other operating systems lay out fdisk tables for X86
4432 	 * and to insure that the cylinders never exceed 65535 to
4433 	 * prevent problems with X86 ioctls that report geometry.
4434 	 * We use SPT that are multiples of 63, since other OSes that
4435 	 * are not limited to 16-bits for cylinders stop at 63 SPT
4436 	 * we make do by using multiples of 63 SPT.
4437 	 *
4438 	 * Note than capacities greater than or equal to 1TB will simply
4439 	 * get the largest geometry from the table. This should be okay
4440 	 * since disks this large shouldn't be using CHS values anyway.
4441 	 */
4442 	for (i = 0; CHS_values[i].max_cap < capacity &&
4443 	    CHS_values[i].max_cap != DK_MAX_BLOCKS; i++)
4444 		;
4445 
4446 	un_g->dkg_nhead = CHS_values[i].nhead;
4447 	un_g->dkg_nsect = CHS_values[i].nsect;
4448 }
4449 #endif
4450 
4451 
4452 /*
4453  *    Function: sd_resync_geom_caches
4454  *
4455  * Description: (Re)initialize both geometry caches: the virtual geometry
4456  *		information is extracted from the HBA (the "geometry"
4457  *		capability), and the physical geometry cache data is
4458  *		generated by issuing MODE SENSE commands.
4459  *
4460  *   Arguments: un - driver soft state (unit) structure
4461  *		capacity - disk capacity in #blocks
4462  *		lbasize - disk block size in bytes
4463  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
4464  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
4465  *			to use the USCSI "direct" chain and bypass the normal
4466  *			command waitq.
4467  *
4468  *     Context: Kernel thread only (can sleep).
4469  */
4470 
4471 static void
4472 sd_resync_geom_caches(struct sd_lun *un, int capacity, int lbasize,
4473 	int path_flag)
4474 {
4475 	struct 	geom_cache 	pgeom;
4476 	struct 	geom_cache	*pgeom_p = &pgeom;
4477 	int 	spc;
4478 	unsigned short nhead;
4479 	unsigned short nsect;
4480 
4481 	ASSERT(un != NULL);
4482 	ASSERT(mutex_owned(SD_MUTEX(un)));
4483 
4484 	/*
4485 	 * Ask the controller for its logical geometry.
4486 	 * Note: if the HBA does not support scsi_ifgetcap("geometry"),
4487 	 * then the lgeom cache will be invalid.
4488 	 */
4489 	sd_get_virtual_geometry(un, capacity, lbasize);
4490 
4491 	/*
4492 	 * Initialize the pgeom cache from lgeom, so that if MODE SENSE
4493 	 * doesn't work, DKIOCG_PHYSGEOM can return reasonable values.
4494 	 */
4495 	if (un->un_lgeom.g_nsect == 0 || un->un_lgeom.g_nhead == 0) {
4496 		/*
4497 		 * Note: Perhaps this needs to be more adaptive? The rationale
4498 		 * is that, if there's no HBA geometry from the HBA driver, any
4499 		 * guess is good, since this is the physical geometry. If MODE
4500 		 * SENSE fails this gives a max cylinder size for non-LBA access
4501 		 */
4502 		nhead = 255;
4503 		nsect = 63;
4504 	} else {
4505 		nhead = un->un_lgeom.g_nhead;
4506 		nsect = un->un_lgeom.g_nsect;
4507 	}
4508 
4509 	if (ISCD(un)) {
4510 		pgeom_p->g_nhead = 1;
4511 		pgeom_p->g_nsect = nsect * nhead;
4512 	} else {
4513 		pgeom_p->g_nhead = nhead;
4514 		pgeom_p->g_nsect = nsect;
4515 	}
4516 
4517 	spc = pgeom_p->g_nhead * pgeom_p->g_nsect;
4518 	pgeom_p->g_capacity = capacity;
4519 	pgeom_p->g_ncyl = pgeom_p->g_capacity / spc;
4520 	pgeom_p->g_acyl = 0;
4521 
4522 	/*
4523 	 * Retrieve fresh geometry data from the hardware, stash it
4524 	 * here temporarily before we rebuild the incore label.
4525 	 *
4526 	 * We want to use the MODE SENSE commands to derive the
4527 	 * physical geometry of the device, but if either command
4528 	 * fails, the logical geometry is used as the fallback for
4529 	 * disk label geometry.
4530 	 */
4531 	mutex_exit(SD_MUTEX(un));
4532 	sd_get_physical_geometry(un, pgeom_p, capacity, lbasize, path_flag);
4533 	mutex_enter(SD_MUTEX(un));
4534 
4535 	/*
4536 	 * Now update the real copy while holding the mutex. This
4537 	 * way the global copy is never in an inconsistent state.
4538 	 */
4539 	bcopy(pgeom_p, &un->un_pgeom,  sizeof (un->un_pgeom));
4540 
4541 	SD_INFO(SD_LOG_COMMON, un, "sd_resync_geom_caches: "
4542 	    "(cached from lgeom)\n");
4543 	SD_INFO(SD_LOG_COMMON, un,
4544 	    "   ncyl: %ld; acyl: %d; nhead: %d; nsect: %d\n",
4545 	    un->un_pgeom.g_ncyl, un->un_pgeom.g_acyl,
4546 	    un->un_pgeom.g_nhead, un->un_pgeom.g_nsect);
4547 	SD_INFO(SD_LOG_COMMON, un, "   lbasize: %d; capacity: %ld; "
4548 	    "intrlv: %d; rpm: %d\n", un->un_pgeom.g_secsize,
4549 	    un->un_pgeom.g_capacity, un->un_pgeom.g_intrlv,
4550 	    un->un_pgeom.g_rpm);
4551 }
4552 
4553 
4554 /*
4555  *    Function: sd_read_fdisk
4556  *
4557  * Description: utility routine to read the fdisk table.
4558  *
4559  *   Arguments: un - driver soft state (unit) structure
4560  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
4561  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
4562  *			to use the USCSI "direct" chain and bypass the normal
4563  *			command waitq.
4564  *
4565  * Return Code: SD_CMD_SUCCESS
4566  *		SD_CMD_FAILURE
4567  *
4568  *     Context: Kernel thread only (can sleep).
4569  */
4570 /* ARGSUSED */
4571 static int
4572 sd_read_fdisk(struct sd_lun *un, uint_t capacity, int lbasize, int path_flag)
4573 {
4574 #if defined(_NO_FDISK_PRESENT)
4575 
4576 	un->un_solaris_offset = 0;
4577 	un->un_solaris_size = capacity;
4578 	bzero(un->un_fmap, sizeof (struct fmap) * FD_NUMPART);
4579 	return (SD_CMD_SUCCESS);
4580 
4581 #elif defined(_FIRMWARE_NEEDS_FDISK)
4582 
4583 	struct ipart	*fdp;
4584 	struct mboot	*mbp;
4585 	struct ipart	fdisk[FD_NUMPART];
4586 	int		i;
4587 	char		sigbuf[2];
4588 	caddr_t		bufp;
4589 	int		uidx;
4590 	int		rval;
4591 	int		lba = 0;
4592 	uint_t		solaris_offset;	/* offset to solaris part. */
4593 	daddr_t		solaris_size;	/* size of solaris partition */
4594 	uint32_t	blocksize;
4595 
4596 	ASSERT(un != NULL);
4597 	ASSERT(mutex_owned(SD_MUTEX(un)));
4598 	ASSERT(un->un_f_tgt_blocksize_is_valid == TRUE);
4599 
4600 	blocksize = un->un_tgt_blocksize;
4601 
4602 	/*
4603 	 * Start off assuming no fdisk table
4604 	 */
4605 	solaris_offset = 0;
4606 	solaris_size   = capacity;
4607 
4608 	mutex_exit(SD_MUTEX(un));
4609 	bufp = kmem_zalloc(blocksize, KM_SLEEP);
4610 	rval = sd_send_scsi_READ(un, bufp, blocksize, 0, path_flag);
4611 	mutex_enter(SD_MUTEX(un));
4612 
4613 	if (rval != 0) {
4614 		SD_ERROR(SD_LOG_ATTACH_DETACH, un,
4615 		    "sd_read_fdisk: fdisk read err\n");
4616 		kmem_free(bufp, blocksize);
4617 		return (SD_CMD_FAILURE);
4618 	}
4619 
4620 	mbp = (struct mboot *)bufp;
4621 
4622 	/*
4623 	 * The fdisk table does not begin on a 4-byte boundary within the
4624 	 * master boot record, so we copy it to an aligned structure to avoid
4625 	 * alignment exceptions on some processors.
4626 	 */
4627 	bcopy(&mbp->parts[0], fdisk, sizeof (fdisk));
4628 
4629 	/*
4630 	 * Check for lba support before verifying sig; sig might not be
4631 	 * there, say on a blank disk, but the max_chs mark may still
4632 	 * be present.
4633 	 *
4634 	 * Note: LBA support and BEFs are an x86-only concept but this
4635 	 * code should work OK on SPARC as well.
4636 	 */
4637 
4638 	/*
4639 	 * First, check for lba-access-ok on root node (or prom root node)
4640 	 * if present there, don't need to search fdisk table.
4641 	 */
4642 	if (ddi_getprop(DDI_DEV_T_ANY, ddi_root_node(), 0,
4643 	    "lba-access-ok", 0) != 0) {
4644 		/* All drives do LBA; don't search fdisk table */
4645 		lba = 1;
4646 	} else {
4647 		/* Okay, look for mark in fdisk table */
4648 		for (fdp = fdisk, i = 0; i < FD_NUMPART; i++, fdp++) {
4649 			/* accumulate "lba" value from all partitions */
4650 			lba = (lba || sd_has_max_chs_vals(fdp));
4651 		}
4652 	}
4653 
4654 	if (lba != 0) {
4655 		dev_t dev = sd_make_device(SD_DEVINFO(un));
4656 
4657 		if (ddi_getprop(dev, SD_DEVINFO(un), DDI_PROP_DONTPASS,
4658 		    "lba-access-ok", 0) == 0) {
4659 			/* not found; create it */
4660 			if (ddi_prop_create(dev, SD_DEVINFO(un), 0,
4661 			    "lba-access-ok", (caddr_t)NULL, 0) !=
4662 			    DDI_PROP_SUCCESS) {
4663 				SD_ERROR(SD_LOG_ATTACH_DETACH, un,
4664 				    "sd_read_fdisk: Can't create lba property "
4665 				    "for instance %d\n",
4666 				    ddi_get_instance(SD_DEVINFO(un)));
4667 			}
4668 		}
4669 	}
4670 
4671 	bcopy(&mbp->signature, sigbuf, sizeof (sigbuf));
4672 
4673 	/*
4674 	 * Endian-independent signature check
4675 	 */
4676 	if (((sigbuf[1] & 0xFF) != ((MBB_MAGIC >> 8) & 0xFF)) ||
4677 	    (sigbuf[0] != (MBB_MAGIC & 0xFF))) {
4678 		SD_ERROR(SD_LOG_ATTACH_DETACH, un,
4679 		    "sd_read_fdisk: no fdisk\n");
4680 		bzero(un->un_fmap, sizeof (struct fmap) * FD_NUMPART);
4681 		rval = SD_CMD_SUCCESS;
4682 		goto done;
4683 	}
4684 
4685 #ifdef SDDEBUG
4686 	if (sd_level_mask & SD_LOGMASK_INFO) {
4687 		fdp = fdisk;
4688 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_read_fdisk:\n");
4689 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "         relsect    "
4690 		    "numsect         sysid       bootid\n");
4691 		for (i = 0; i < FD_NUMPART; i++, fdp++) {
4692 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4693 			    "    %d:  %8d   %8d     0x%08x     0x%08x\n",
4694 			    i, fdp->relsect, fdp->numsect,
4695 			    fdp->systid, fdp->bootid);
4696 		}
4697 	}
4698 #endif
4699 
4700 	/*
4701 	 * Try to find the unix partition
4702 	 */
4703 	uidx = -1;
4704 	solaris_offset = 0;
4705 	solaris_size   = 0;
4706 
4707 	for (fdp = fdisk, i = 0; i < FD_NUMPART; i++, fdp++) {
4708 		int	relsect;
4709 		int	numsect;
4710 
4711 		if (fdp->numsect == 0) {
4712 			un->un_fmap[i].fmap_start = 0;
4713 			un->un_fmap[i].fmap_nblk  = 0;
4714 			continue;
4715 		}
4716 
4717 		/*
4718 		 * Data in the fdisk table is little-endian.
4719 		 */
4720 		relsect = LE_32(fdp->relsect);
4721 		numsect = LE_32(fdp->numsect);
4722 
4723 		un->un_fmap[i].fmap_start = relsect;
4724 		un->un_fmap[i].fmap_nblk  = numsect;
4725 
4726 		if (fdp->systid != SUNIXOS &&
4727 		    fdp->systid != SUNIXOS2 &&
4728 		    fdp->systid != EFI_PMBR) {
4729 			continue;
4730 		}
4731 
4732 		/*
4733 		 * use the last active solaris partition id found
4734 		 * (there should only be 1 active partition id)
4735 		 *
4736 		 * if there are no active solaris partition id
4737 		 * then use the first inactive solaris partition id
4738 		 */
4739 		if ((uidx == -1) || (fdp->bootid == ACTIVE)) {
4740 			uidx = i;
4741 			solaris_offset = relsect;
4742 			solaris_size   = numsect;
4743 		}
4744 	}
4745 
4746 	SD_INFO(SD_LOG_ATTACH_DETACH, un, "fdisk 0x%x 0x%lx",
4747 	    un->un_solaris_offset, un->un_solaris_size);
4748 
4749 	rval = SD_CMD_SUCCESS;
4750 
4751 done:
4752 
4753 	/*
4754 	 * Clear the VTOC info, only if the Solaris partition entry
4755 	 * has moved, changed size, been deleted, or if the size of
4756 	 * the partition is too small to even fit the label sector.
4757 	 */
4758 	if ((un->un_solaris_offset != solaris_offset) ||
4759 	    (un->un_solaris_size != solaris_size) ||
4760 	    solaris_size <= DK_LABEL_LOC) {
4761 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "fdisk moved 0x%x 0x%lx",
4762 			solaris_offset, solaris_size);
4763 		bzero(&un->un_g, sizeof (struct dk_geom));
4764 		bzero(&un->un_vtoc, sizeof (struct dk_vtoc));
4765 		bzero(&un->un_map, NDKMAP * (sizeof (struct dk_map)));
4766 		un->un_f_geometry_is_valid = FALSE;
4767 	}
4768 	un->un_solaris_offset = solaris_offset;
4769 	un->un_solaris_size = solaris_size;
4770 	kmem_free(bufp, blocksize);
4771 	return (rval);
4772 
4773 #else	/* #elif defined(_FIRMWARE_NEEDS_FDISK) */
4774 #error "fdisk table presence undetermined for this platform."
4775 #endif	/* #if defined(_NO_FDISK_PRESENT) */
4776 }
4777 
4778 
4779 /*
4780  *    Function: sd_get_physical_geometry
4781  *
4782  * Description: Retrieve the MODE SENSE page 3 (Format Device Page) and
4783  *		MODE SENSE page 4 (Rigid Disk Drive Geometry Page) from the
4784  *		target, and use this information to initialize the physical
4785  *		geometry cache specified by pgeom_p.
4786  *
4787  *		MODE SENSE is an optional command, so failure in this case
4788  *		does not necessarily denote an error. We want to use the
4789  *		MODE SENSE commands to derive the physical geometry of the
4790  *		device, but if either command fails, the logical geometry is
4791  *		used as the fallback for disk label geometry.
4792  *
4793  *		This requires that un->un_blockcount and un->un_tgt_blocksize
4794  *		have already been initialized for the current target and
4795  *		that the current values be passed as args so that we don't
4796  *		end up ever trying to use -1 as a valid value. This could
4797  *		happen if either value is reset while we're not holding
4798  *		the mutex.
4799  *
4800  *   Arguments: un - driver soft state (unit) structure
4801  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
4802  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
4803  *			to use the USCSI "direct" chain and bypass the normal
4804  *			command waitq.
4805  *
4806  *     Context: Kernel thread only (can sleep).
4807  */
4808 
4809 static void
4810 sd_get_physical_geometry(struct sd_lun *un, struct geom_cache *pgeom_p,
4811 	int capacity, int lbasize, int path_flag)
4812 {
4813 	struct	mode_format	*page3p;
4814 	struct	mode_geometry	*page4p;
4815 	struct	mode_header	*headerp;
4816 	int	sector_size;
4817 	int	nsect;
4818 	int	nhead;
4819 	int	ncyl;
4820 	int	intrlv;
4821 	int	spc;
4822 	int	modesense_capacity;
4823 	int	rpm;
4824 	int	bd_len;
4825 	int	mode_header_length;
4826 	uchar_t	*p3bufp;
4827 	uchar_t	*p4bufp;
4828 	int	cdbsize;
4829 
4830 	ASSERT(un != NULL);
4831 	ASSERT(!(mutex_owned(SD_MUTEX(un))));
4832 
4833 	if (un->un_f_blockcount_is_valid != TRUE) {
4834 		return;
4835 	}
4836 
4837 	if (un->un_f_tgt_blocksize_is_valid != TRUE) {
4838 		return;
4839 	}
4840 
4841 	if (lbasize == 0) {
4842 		if (ISCD(un)) {
4843 			lbasize = 2048;
4844 		} else {
4845 			lbasize = un->un_sys_blocksize;
4846 		}
4847 	}
4848 	pgeom_p->g_secsize = (unsigned short)lbasize;
4849 
4850 	cdbsize = (un->un_f_cfg_is_atapi == TRUE) ? CDB_GROUP2 : CDB_GROUP0;
4851 
4852 	/*
4853 	 * Retrieve MODE SENSE page 3 - Format Device Page
4854 	 */
4855 	p3bufp = kmem_zalloc(SD_MODE_SENSE_PAGE3_LENGTH, KM_SLEEP);
4856 	if (sd_send_scsi_MODE_SENSE(un, cdbsize, p3bufp,
4857 	    SD_MODE_SENSE_PAGE3_LENGTH, SD_MODE_SENSE_PAGE3_CODE, path_flag)
4858 	    != 0) {
4859 		SD_ERROR(SD_LOG_COMMON, un,
4860 		    "sd_get_physical_geometry: mode sense page 3 failed\n");
4861 		goto page3_exit;
4862 	}
4863 
4864 	/*
4865 	 * Determine size of Block Descriptors in order to locate the mode
4866 	 * page data.  ATAPI devices return 0, SCSI devices should return
4867 	 * MODE_BLK_DESC_LENGTH.
4868 	 */
4869 	headerp = (struct mode_header *)p3bufp;
4870 	if (un->un_f_cfg_is_atapi == TRUE) {
4871 		struct mode_header_grp2 *mhp =
4872 		    (struct mode_header_grp2 *)headerp;
4873 		mode_header_length = MODE_HEADER_LENGTH_GRP2;
4874 		bd_len = (mhp->bdesc_length_hi << 8) | mhp->bdesc_length_lo;
4875 	} else {
4876 		mode_header_length = MODE_HEADER_LENGTH;
4877 		bd_len = ((struct mode_header *)headerp)->bdesc_length;
4878 	}
4879 
4880 	if (bd_len > MODE_BLK_DESC_LENGTH) {
4881 		SD_ERROR(SD_LOG_COMMON, un, "sd_get_physical_geometry: "
4882 		    "received unexpected bd_len of %d, page3\n", bd_len);
4883 		goto page3_exit;
4884 	}
4885 
4886 	page3p = (struct mode_format *)
4887 	    ((caddr_t)headerp + mode_header_length + bd_len);
4888 
4889 	if (page3p->mode_page.code != SD_MODE_SENSE_PAGE3_CODE) {
4890 		SD_ERROR(SD_LOG_COMMON, un, "sd_get_physical_geometry: "
4891 		    "mode sense pg3 code mismatch %d\n",
4892 		    page3p->mode_page.code);
4893 		goto page3_exit;
4894 	}
4895 
4896 	/*
4897 	 * Use this physical geometry data only if BOTH MODE SENSE commands
4898 	 * complete successfully; otherwise, revert to the logical geometry.
4899 	 * So, we need to save everything in temporary variables.
4900 	 */
4901 	sector_size = BE_16(page3p->data_bytes_sect);
4902 
4903 	/*
4904 	 * 1243403: The NEC D38x7 drives do not support MODE SENSE sector size
4905 	 */
4906 	if (sector_size == 0) {
4907 		sector_size = (ISCD(un)) ? 2048 : un->un_sys_blocksize;
4908 	} else {
4909 		sector_size &= ~(un->un_sys_blocksize - 1);
4910 	}
4911 
4912 	nsect  = BE_16(page3p->sect_track);
4913 	intrlv = BE_16(page3p->interleave);
4914 
4915 	SD_INFO(SD_LOG_COMMON, un,
4916 	    "sd_get_physical_geometry: Format Parameters (page 3)\n");
4917 	SD_INFO(SD_LOG_COMMON, un,
4918 	    "   mode page: %d; nsect: %d; sector size: %d;\n",
4919 	    page3p->mode_page.code, nsect, sector_size);
4920 	SD_INFO(SD_LOG_COMMON, un,
4921 	    "   interleave: %d; track skew: %d; cylinder skew: %d;\n", intrlv,
4922 	    BE_16(page3p->track_skew),
4923 	    BE_16(page3p->cylinder_skew));
4924 
4925 
4926 	/*
4927 	 * Retrieve MODE SENSE page 4 - Rigid Disk Drive Geometry Page
4928 	 */
4929 	p4bufp = kmem_zalloc(SD_MODE_SENSE_PAGE4_LENGTH, KM_SLEEP);
4930 	if (sd_send_scsi_MODE_SENSE(un, cdbsize, p4bufp,
4931 	    SD_MODE_SENSE_PAGE4_LENGTH, SD_MODE_SENSE_PAGE4_CODE, path_flag)
4932 	    != 0) {
4933 		SD_ERROR(SD_LOG_COMMON, un,
4934 		    "sd_get_physical_geometry: mode sense page 4 failed\n");
4935 		goto page4_exit;
4936 	}
4937 
4938 	/*
4939 	 * Determine size of Block Descriptors in order to locate the mode
4940 	 * page data.  ATAPI devices return 0, SCSI devices should return
4941 	 * MODE_BLK_DESC_LENGTH.
4942 	 */
4943 	headerp = (struct mode_header *)p4bufp;
4944 	if (un->un_f_cfg_is_atapi == TRUE) {
4945 		struct mode_header_grp2 *mhp =
4946 		    (struct mode_header_grp2 *)headerp;
4947 		bd_len = (mhp->bdesc_length_hi << 8) | mhp->bdesc_length_lo;
4948 	} else {
4949 		bd_len = ((struct mode_header *)headerp)->bdesc_length;
4950 	}
4951 
4952 	if (bd_len > MODE_BLK_DESC_LENGTH) {
4953 		SD_ERROR(SD_LOG_COMMON, un, "sd_get_physical_geometry: "
4954 		    "received unexpected bd_len of %d, page4\n", bd_len);
4955 		goto page4_exit;
4956 	}
4957 
4958 	page4p = (struct mode_geometry *)
4959 	    ((caddr_t)headerp + mode_header_length + bd_len);
4960 
4961 	if (page4p->mode_page.code != SD_MODE_SENSE_PAGE4_CODE) {
4962 		SD_ERROR(SD_LOG_COMMON, un, "sd_get_physical_geometry: "
4963 		    "mode sense pg4 code mismatch %d\n",
4964 		    page4p->mode_page.code);
4965 		goto page4_exit;
4966 	}
4967 
4968 	/*
4969 	 * Stash the data now, after we know that both commands completed.
4970 	 */
4971 
4972 	mutex_enter(SD_MUTEX(un));
4973 
4974 	nhead = (int)page4p->heads;	/* uchar, so no conversion needed */
4975 	spc   = nhead * nsect;
4976 	ncyl  = (page4p->cyl_ub << 16) + (page4p->cyl_mb << 8) + page4p->cyl_lb;
4977 	rpm   = BE_16(page4p->rpm);
4978 
4979 	modesense_capacity = spc * ncyl;
4980 
4981 	SD_INFO(SD_LOG_COMMON, un,
4982 	    "sd_get_physical_geometry: Geometry Parameters (page 4)\n");
4983 	SD_INFO(SD_LOG_COMMON, un,
4984 	    "   cylinders: %d; heads: %d; rpm: %d;\n", ncyl, nhead, rpm);
4985 	SD_INFO(SD_LOG_COMMON, un,
4986 	    "   computed capacity(h*s*c): %d;\n", modesense_capacity);
4987 	SD_INFO(SD_LOG_COMMON, un, "   pgeom_p: %p; read cap: %d\n",
4988 	    (void *)pgeom_p, capacity);
4989 
4990 	/*
4991 	 * Compensate if the drive's geometry is not rectangular, i.e.,
4992 	 * the product of C * H * S returned by MODE SENSE >= that returned
4993 	 * by read capacity. This is an idiosyncrasy of the original x86
4994 	 * disk subsystem.
4995 	 */
4996 	if (modesense_capacity >= capacity) {
4997 		SD_INFO(SD_LOG_COMMON, un,
4998 		    "sd_get_physical_geometry: adjusting acyl; "
4999 		    "old: %d; new: %d\n", pgeom_p->g_acyl,
5000 		    (modesense_capacity - capacity + spc - 1) / spc);
5001 		if (sector_size != 0) {
5002 			/* 1243403: NEC D38x7 drives don't support sec size */
5003 			pgeom_p->g_secsize = (unsigned short)sector_size;
5004 		}
5005 		pgeom_p->g_nsect    = (unsigned short)nsect;
5006 		pgeom_p->g_nhead    = (unsigned short)nhead;
5007 		pgeom_p->g_capacity = capacity;
5008 		pgeom_p->g_acyl	    =
5009 		    (modesense_capacity - pgeom_p->g_capacity + spc - 1) / spc;
5010 		pgeom_p->g_ncyl	    = ncyl - pgeom_p->g_acyl;
5011 	}
5012 
5013 	pgeom_p->g_rpm    = (unsigned short)rpm;
5014 	pgeom_p->g_intrlv = (unsigned short)intrlv;
5015 
5016 	SD_INFO(SD_LOG_COMMON, un,
5017 	    "sd_get_physical_geometry: mode sense geometry:\n");
5018 	SD_INFO(SD_LOG_COMMON, un,
5019 	    "   nsect: %d; sector size: %d; interlv: %d\n",
5020 	    nsect, sector_size, intrlv);
5021 	SD_INFO(SD_LOG_COMMON, un,
5022 	    "   nhead: %d; ncyl: %d; rpm: %d; capacity(ms): %d\n",
5023 	    nhead, ncyl, rpm, modesense_capacity);
5024 	SD_INFO(SD_LOG_COMMON, un,
5025 	    "sd_get_physical_geometry: (cached)\n");
5026 	SD_INFO(SD_LOG_COMMON, un,
5027 	    "   ncyl: %ld; acyl: %d; nhead: %d; nsect: %d\n",
5028 	    un->un_pgeom.g_ncyl,  un->un_pgeom.g_acyl,
5029 	    un->un_pgeom.g_nhead, un->un_pgeom.g_nsect);
5030 	SD_INFO(SD_LOG_COMMON, un,
5031 	    "   lbasize: %d; capacity: %ld; intrlv: %d; rpm: %d\n",
5032 	    un->un_pgeom.g_secsize, un->un_pgeom.g_capacity,
5033 	    un->un_pgeom.g_intrlv, un->un_pgeom.g_rpm);
5034 
5035 	mutex_exit(SD_MUTEX(un));
5036 
5037 page4_exit:
5038 	kmem_free(p4bufp, SD_MODE_SENSE_PAGE4_LENGTH);
5039 page3_exit:
5040 	kmem_free(p3bufp, SD_MODE_SENSE_PAGE3_LENGTH);
5041 }
5042 
5043 
5044 /*
5045  *    Function: sd_get_virtual_geometry
5046  *
5047  * Description: Ask the controller to tell us about the target device.
5048  *
5049  *   Arguments: un - pointer to softstate
5050  *		capacity - disk capacity in #blocks
5051  *		lbasize - disk block size in bytes
5052  *
5053  *     Context: Kernel thread only
5054  */
5055 
5056 static void
5057 sd_get_virtual_geometry(struct sd_lun *un, int capacity, int lbasize)
5058 {
5059 	struct	geom_cache 	*lgeom_p = &un->un_lgeom;
5060 	uint_t	geombuf;
5061 	int	spc;
5062 
5063 	ASSERT(un != NULL);
5064 	ASSERT(mutex_owned(SD_MUTEX(un)));
5065 
5066 	mutex_exit(SD_MUTEX(un));
5067 
5068 	/* Set sector size, and total number of sectors */
5069 	(void) scsi_ifsetcap(SD_ADDRESS(un), "sector-size",   lbasize,  1);
5070 	(void) scsi_ifsetcap(SD_ADDRESS(un), "total-sectors", capacity, 1);
5071 
5072 	/* Let the HBA tell us its geometry */
5073 	geombuf = (uint_t)scsi_ifgetcap(SD_ADDRESS(un), "geometry", 1);
5074 
5075 	mutex_enter(SD_MUTEX(un));
5076 
5077 	/* A value of -1 indicates an undefined "geometry" property */
5078 	if (geombuf == (-1)) {
5079 		return;
5080 	}
5081 
5082 	/* Initialize the logical geometry cache. */
5083 	lgeom_p->g_nhead   = (geombuf >> 16) & 0xffff;
5084 	lgeom_p->g_nsect   = geombuf & 0xffff;
5085 	lgeom_p->g_secsize = un->un_sys_blocksize;
5086 
5087 	spc = lgeom_p->g_nhead * lgeom_p->g_nsect;
5088 
5089 	/*
5090 	 * Note: The driver originally converted the capacity value from
5091 	 * target blocks to system blocks. However, the capacity value passed
5092 	 * to this routine is already in terms of system blocks (this scaling
5093 	 * is done when the READ CAPACITY command is issued and processed).
5094 	 * This 'error' may have gone undetected because the usage of g_ncyl
5095 	 * (which is based upon g_capacity) is very limited within the driver
5096 	 */
5097 	lgeom_p->g_capacity = capacity;
5098 
5099 	/*
5100 	 * Set ncyl to zero if the hba returned a zero nhead or nsect value. The
5101 	 * hba may return zero values if the device has been removed.
5102 	 */
5103 	if (spc == 0) {
5104 		lgeom_p->g_ncyl = 0;
5105 	} else {
5106 		lgeom_p->g_ncyl = lgeom_p->g_capacity / spc;
5107 	}
5108 	lgeom_p->g_acyl = 0;
5109 
5110 	SD_INFO(SD_LOG_COMMON, un, "sd_get_virtual_geometry: (cached)\n");
5111 	SD_INFO(SD_LOG_COMMON, un,
5112 	    "   ncyl: %ld; acyl: %d; nhead: %d; nsect: %d\n",
5113 	    un->un_lgeom.g_ncyl,  un->un_lgeom.g_acyl,
5114 	    un->un_lgeom.g_nhead, un->un_lgeom.g_nsect);
5115 	SD_INFO(SD_LOG_COMMON, un, "   lbasize: %d; capacity: %ld; "
5116 	    "intrlv: %d; rpm: %d\n", un->un_lgeom.g_secsize,
5117 	    un->un_lgeom.g_capacity, un->un_lgeom.g_intrlv, un->un_lgeom.g_rpm);
5118 }
5119 
5120 
5121 /*
5122  *    Function: sd_update_block_info
5123  *
5124  * Description: Calculate a byte count to sector count bitshift value
5125  *		from sector size.
5126  *
5127  *   Arguments: un: unit struct.
5128  *		lbasize: new target sector size
5129  *		capacity: new target capacity, ie. block count
5130  *
5131  *     Context: Kernel thread context
5132  */
5133 
5134 static void
5135 sd_update_block_info(struct sd_lun *un, uint32_t lbasize, uint64_t capacity)
5136 {
5137 	if (lbasize != 0) {
5138 		un->un_tgt_blocksize = lbasize;
5139 		un->un_f_tgt_blocksize_is_valid	= TRUE;
5140 	}
5141 
5142 	if (capacity != 0) {
5143 		un->un_blockcount		= capacity;
5144 		un->un_f_blockcount_is_valid	= TRUE;
5145 	}
5146 }
5147 
5148 
5149 static void
5150 sd_swap_efi_gpt(efi_gpt_t *e)
5151 {
5152 	_NOTE(ASSUMING_PROTECTED(*e))
5153 	e->efi_gpt_Signature = LE_64(e->efi_gpt_Signature);
5154 	e->efi_gpt_Revision = LE_32(e->efi_gpt_Revision);
5155 	e->efi_gpt_HeaderSize = LE_32(e->efi_gpt_HeaderSize);
5156 	e->efi_gpt_HeaderCRC32 = LE_32(e->efi_gpt_HeaderCRC32);
5157 	e->efi_gpt_MyLBA = LE_64(e->efi_gpt_MyLBA);
5158 	e->efi_gpt_AlternateLBA = LE_64(e->efi_gpt_AlternateLBA);
5159 	e->efi_gpt_FirstUsableLBA = LE_64(e->efi_gpt_FirstUsableLBA);
5160 	e->efi_gpt_LastUsableLBA = LE_64(e->efi_gpt_LastUsableLBA);
5161 	UUID_LE_CONVERT(e->efi_gpt_DiskGUID, e->efi_gpt_DiskGUID);
5162 	e->efi_gpt_PartitionEntryLBA = LE_64(e->efi_gpt_PartitionEntryLBA);
5163 	e->efi_gpt_NumberOfPartitionEntries =
5164 	    LE_32(e->efi_gpt_NumberOfPartitionEntries);
5165 	e->efi_gpt_SizeOfPartitionEntry =
5166 	    LE_32(e->efi_gpt_SizeOfPartitionEntry);
5167 	e->efi_gpt_PartitionEntryArrayCRC32 =
5168 	    LE_32(e->efi_gpt_PartitionEntryArrayCRC32);
5169 }
5170 
5171 static void
5172 sd_swap_efi_gpe(int nparts, efi_gpe_t *p)
5173 {
5174 	int i;
5175 
5176 	_NOTE(ASSUMING_PROTECTED(*p))
5177 	for (i = 0; i < nparts; i++) {
5178 		UUID_LE_CONVERT(p[i].efi_gpe_PartitionTypeGUID,
5179 		    p[i].efi_gpe_PartitionTypeGUID);
5180 		p[i].efi_gpe_StartingLBA = LE_64(p[i].efi_gpe_StartingLBA);
5181 		p[i].efi_gpe_EndingLBA = LE_64(p[i].efi_gpe_EndingLBA);
5182 		/* PartitionAttrs */
5183 	}
5184 }
5185 
5186 static int
5187 sd_validate_efi(efi_gpt_t *labp)
5188 {
5189 	if (labp->efi_gpt_Signature != EFI_SIGNATURE)
5190 		return (EINVAL);
5191 	/* at least 96 bytes in this version of the spec. */
5192 	if (sizeof (efi_gpt_t) - sizeof (labp->efi_gpt_Reserved2) >
5193 	    labp->efi_gpt_HeaderSize)
5194 		return (EINVAL);
5195 	/* this should be 128 bytes */
5196 	if (labp->efi_gpt_SizeOfPartitionEntry != sizeof (efi_gpe_t))
5197 		return (EINVAL);
5198 	return (0);
5199 }
5200 
5201 static int
5202 sd_use_efi(struct sd_lun *un, int path_flag)
5203 {
5204 	int		i;
5205 	int		rval = 0;
5206 	efi_gpe_t	*partitions;
5207 	uchar_t		*buf;
5208 	uint_t		lbasize;
5209 	uint64_t	cap;
5210 	uint_t		nparts;
5211 	diskaddr_t	gpe_lba;
5212 
5213 	ASSERT(mutex_owned(SD_MUTEX(un)));
5214 	lbasize = un->un_tgt_blocksize;
5215 
5216 	mutex_exit(SD_MUTEX(un));
5217 
5218 	buf = kmem_zalloc(EFI_MIN_ARRAY_SIZE, KM_SLEEP);
5219 
5220 	if (un->un_tgt_blocksize != un->un_sys_blocksize) {
5221 		rval = EINVAL;
5222 		goto done_err;
5223 	}
5224 
5225 	rval = sd_send_scsi_READ(un, buf, lbasize, 0, path_flag);
5226 	if (rval) {
5227 		goto done_err;
5228 	}
5229 	if (((struct dk_label *)buf)->dkl_magic == DKL_MAGIC) {
5230 		/* not ours */
5231 		rval = ESRCH;
5232 		goto done_err;
5233 	}
5234 
5235 	rval = sd_send_scsi_READ(un, buf, lbasize, 1, path_flag);
5236 	if (rval) {
5237 		goto done_err;
5238 	}
5239 	sd_swap_efi_gpt((efi_gpt_t *)buf);
5240 
5241 	if ((rval = sd_validate_efi((efi_gpt_t *)buf)) != 0) {
5242 		/*
5243 		 * Couldn't read the primary, try the backup.  Our
5244 		 * capacity at this point could be based on CHS, so
5245 		 * check what the device reports.
5246 		 */
5247 		rval = sd_send_scsi_READ_CAPACITY(un, &cap, &lbasize,
5248 		    path_flag);
5249 		if (rval) {
5250 			goto done_err;
5251 		}
5252 
5253 		/*
5254 		 * The MMC standard allows READ CAPACITY to be
5255 		 * inaccurate by a bounded amount (in the interest of
5256 		 * response latency).  As a result, failed READs are
5257 		 * commonplace (due to the reading of metadata and not
5258 		 * data). Depending on the per-Vendor/drive Sense data,
5259 		 * the failed READ can cause many (unnecessary) retries.
5260 		 */
5261 		if ((rval = sd_send_scsi_READ(un, buf, lbasize,
5262 		    cap - 1, (ISCD(un)) ? SD_PATH_DIRECT_PRIORITY :
5263 			path_flag)) != 0) {
5264 				goto done_err;
5265 		}
5266 
5267 		sd_swap_efi_gpt((efi_gpt_t *)buf);
5268 		if ((rval = sd_validate_efi((efi_gpt_t *)buf)) != 0)
5269 			goto done_err;
5270 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
5271 		    "primary label corrupt; using backup\n");
5272 	}
5273 
5274 	nparts = ((efi_gpt_t *)buf)->efi_gpt_NumberOfPartitionEntries;
5275 	gpe_lba = ((efi_gpt_t *)buf)->efi_gpt_PartitionEntryLBA;
5276 
5277 	rval = sd_send_scsi_READ(un, buf, EFI_MIN_ARRAY_SIZE, gpe_lba,
5278 	    path_flag);
5279 	if (rval) {
5280 		goto done_err;
5281 	}
5282 	partitions = (efi_gpe_t *)buf;
5283 
5284 	if (nparts > MAXPART) {
5285 		nparts = MAXPART;
5286 	}
5287 	sd_swap_efi_gpe(nparts, partitions);
5288 
5289 	mutex_enter(SD_MUTEX(un));
5290 
5291 	/* Fill in partition table. */
5292 	for (i = 0; i < nparts; i++) {
5293 		if (partitions->efi_gpe_StartingLBA != 0 ||
5294 		    partitions->efi_gpe_EndingLBA != 0) {
5295 			un->un_map[i].dkl_cylno =
5296 			    partitions->efi_gpe_StartingLBA;
5297 			un->un_map[i].dkl_nblk =
5298 			    partitions->efi_gpe_EndingLBA -
5299 			    partitions->efi_gpe_StartingLBA + 1;
5300 			un->un_offset[i] =
5301 			    partitions->efi_gpe_StartingLBA;
5302 		}
5303 		if (i == WD_NODE) {
5304 			/*
5305 			 * minor number 7 corresponds to the whole disk
5306 			 */
5307 			un->un_map[i].dkl_cylno = 0;
5308 			un->un_map[i].dkl_nblk = un->un_blockcount;
5309 			un->un_offset[i] = 0;
5310 		}
5311 		partitions++;
5312 	}
5313 	un->un_solaris_offset = 0;
5314 	un->un_solaris_size = cap;
5315 	un->un_f_geometry_is_valid = TRUE;
5316 
5317 	/* clear the vtoc label */
5318 	bzero(&un->un_vtoc, sizeof (struct dk_vtoc));
5319 
5320 	kmem_free(buf, EFI_MIN_ARRAY_SIZE);
5321 	return (0);
5322 
5323 done_err:
5324 	kmem_free(buf, EFI_MIN_ARRAY_SIZE);
5325 	mutex_enter(SD_MUTEX(un));
5326 	/*
5327 	 * if we didn't find something that could look like a VTOC
5328 	 * and the disk is over 1TB, we know there isn't a valid label.
5329 	 * Otherwise let sd_uselabel decide what to do.  We only
5330 	 * want to invalidate this if we're certain the label isn't
5331 	 * valid because sd_prop_op will now fail, which in turn
5332 	 * causes things like opens and stats on the partition to fail.
5333 	 */
5334 	if ((un->un_blockcount > DK_MAX_BLOCKS) && (rval != ESRCH)) {
5335 		un->un_f_geometry_is_valid = FALSE;
5336 	}
5337 	return (rval);
5338 }
5339 
5340 
5341 /*
5342  *    Function: sd_uselabel
5343  *
5344  * Description: Validate the disk label and update the relevant data (geometry,
5345  *		partition, vtoc, and capacity data) in the sd_lun struct.
5346  *		Marks the geometry of the unit as being valid.
5347  *
5348  *   Arguments: un: unit struct.
5349  *		dk_label: disk label
5350  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
5351  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
5352  *			to use the USCSI "direct" chain and bypass the normal
5353  *			command waitq.
5354  *
5355  * Return Code: SD_LABEL_IS_VALID: Label read from disk is OK; geometry,
5356  *		partition, vtoc, and capacity data are good.
5357  *
5358  *		SD_LABEL_IS_INVALID: Magic number or checksum error in the
5359  *		label; or computed capacity does not jibe with capacity
5360  *		reported from the READ CAPACITY command.
5361  *
5362  *     Context: Kernel thread only (can sleep).
5363  */
5364 
5365 static int
5366 sd_uselabel(struct sd_lun *un, struct dk_label *labp, int path_flag)
5367 {
5368 	short	*sp;
5369 	short	sum;
5370 	short	count;
5371 	int	label_error = SD_LABEL_IS_VALID;
5372 	int	i;
5373 	int	capacity;
5374 	int	part_end;
5375 	int	track_capacity;
5376 	int	err;
5377 #if defined(_SUNOS_VTOC_16)
5378 	struct	dkl_partition	*vpartp;
5379 #endif
5380 	ASSERT(un != NULL);
5381 	ASSERT(mutex_owned(SD_MUTEX(un)));
5382 
5383 	/* Validate the magic number of the label. */
5384 	if (labp->dkl_magic != DKL_MAGIC) {
5385 #if defined(__sparc)
5386 		if ((un->un_state == SD_STATE_NORMAL) &&
5387 			un->un_f_vtoc_errlog_supported) {
5388 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
5389 			    "Corrupt label; wrong magic number\n");
5390 		}
5391 #endif
5392 		return (SD_LABEL_IS_INVALID);
5393 	}
5394 
5395 	/* Validate the checksum of the label. */
5396 	sp  = (short *)labp;
5397 	sum = 0;
5398 	count = sizeof (struct dk_label) / sizeof (short);
5399 	while (count--)	 {
5400 		sum ^= *sp++;
5401 	}
5402 
5403 	if (sum != 0) {
5404 #if	defined(_SUNOS_VTOC_16)
5405 		if ((un->un_state == SD_STATE_NORMAL) && !ISCD(un)) {
5406 #elif defined(_SUNOS_VTOC_8)
5407 		if ((un->un_state == SD_STATE_NORMAL) &&
5408 		    un->un_f_vtoc_errlog_supported) {
5409 #endif
5410 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
5411 			    "Corrupt label - label checksum failed\n");
5412 		}
5413 		return (SD_LABEL_IS_INVALID);
5414 	}
5415 
5416 
5417 	/*
5418 	 * Fill in geometry structure with data from label.
5419 	 */
5420 	bzero(&un->un_g, sizeof (struct dk_geom));
5421 	un->un_g.dkg_ncyl   = labp->dkl_ncyl;
5422 	un->un_g.dkg_acyl   = labp->dkl_acyl;
5423 	un->un_g.dkg_bcyl   = 0;
5424 	un->un_g.dkg_nhead  = labp->dkl_nhead;
5425 	un->un_g.dkg_nsect  = labp->dkl_nsect;
5426 	un->un_g.dkg_intrlv = labp->dkl_intrlv;
5427 
5428 #if defined(_SUNOS_VTOC_8)
5429 	un->un_g.dkg_gap1   = labp->dkl_gap1;
5430 	un->un_g.dkg_gap2   = labp->dkl_gap2;
5431 	un->un_g.dkg_bhead  = labp->dkl_bhead;
5432 #endif
5433 #if defined(_SUNOS_VTOC_16)
5434 	un->un_dkg_skew = labp->dkl_skew;
5435 #endif
5436 
5437 #if defined(__i386) || defined(__amd64)
5438 	un->un_g.dkg_apc = labp->dkl_apc;
5439 #endif
5440 
5441 	/*
5442 	 * Currently we rely on the values in the label being accurate. If
5443 	 * dlk_rpm or dlk_pcly are zero in the label, use a default value.
5444 	 *
5445 	 * Note: In the future a MODE SENSE may be used to retrieve this data,
5446 	 * although this command is optional in SCSI-2.
5447 	 */
5448 	un->un_g.dkg_rpm  = (labp->dkl_rpm  != 0) ? labp->dkl_rpm  : 3600;
5449 	un->un_g.dkg_pcyl = (labp->dkl_pcyl != 0) ? labp->dkl_pcyl :
5450 	    (un->un_g.dkg_ncyl + un->un_g.dkg_acyl);
5451 
5452 	/*
5453 	 * The Read and Write reinstruct values may not be valid
5454 	 * for older disks.
5455 	 */
5456 	un->un_g.dkg_read_reinstruct  = labp->dkl_read_reinstruct;
5457 	un->un_g.dkg_write_reinstruct = labp->dkl_write_reinstruct;
5458 
5459 	/* Fill in partition table. */
5460 #if defined(_SUNOS_VTOC_8)
5461 	for (i = 0; i < NDKMAP; i++) {
5462 		un->un_map[i].dkl_cylno = labp->dkl_map[i].dkl_cylno;
5463 		un->un_map[i].dkl_nblk  = labp->dkl_map[i].dkl_nblk;
5464 	}
5465 #endif
5466 #if  defined(_SUNOS_VTOC_16)
5467 	vpartp		= labp->dkl_vtoc.v_part;
5468 	track_capacity	= labp->dkl_nhead * labp->dkl_nsect;
5469 
5470 	/* Prevent divide by zero */
5471 	if (track_capacity == 0) {
5472 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
5473 		    "Corrupt label - zero nhead or nsect value\n");
5474 
5475 		return (SD_LABEL_IS_INVALID);
5476 	}
5477 
5478 	for (i = 0; i < NDKMAP; i++, vpartp++) {
5479 		un->un_map[i].dkl_cylno = vpartp->p_start / track_capacity;
5480 		un->un_map[i].dkl_nblk  = vpartp->p_size;
5481 	}
5482 #endif
5483 
5484 	/* Fill in VTOC Structure. */
5485 	bcopy(&labp->dkl_vtoc, &un->un_vtoc, sizeof (struct dk_vtoc));
5486 #if defined(_SUNOS_VTOC_8)
5487 	/*
5488 	 * The 8-slice vtoc does not include the ascii label; save it into
5489 	 * the device's soft state structure here.
5490 	 */
5491 	bcopy(labp->dkl_asciilabel, un->un_asciilabel, LEN_DKL_ASCII);
5492 #endif
5493 
5494 	/* Now look for a valid capacity. */
5495 	track_capacity	= (un->un_g.dkg_nhead * un->un_g.dkg_nsect);
5496 	capacity	= (un->un_g.dkg_ncyl  * track_capacity);
5497 
5498 	if (un->un_g.dkg_acyl) {
5499 #if defined(__i386) || defined(__amd64)
5500 		/* we may have > 1 alts cylinder */
5501 		capacity += (track_capacity * un->un_g.dkg_acyl);
5502 #else
5503 		capacity += track_capacity;
5504 #endif
5505 	}
5506 
5507 	/*
5508 	 * Force check here to ensure the computed capacity is valid.
5509 	 * If capacity is zero, it indicates an invalid label and
5510 	 * we should abort updating the relevant data then.
5511 	 */
5512 	if (capacity == 0) {
5513 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
5514 		    "Corrupt label - no valid capacity could be retrieved\n");
5515 
5516 		return (SD_LABEL_IS_INVALID);
5517 	}
5518 
5519 	/* Mark the geometry as valid. */
5520 	un->un_f_geometry_is_valid = TRUE;
5521 
5522 	/*
5523 	 * At this point, un->un_blockcount should contain valid data from
5524 	 * the READ CAPACITY command.
5525 	 */
5526 	if (un->un_f_blockcount_is_valid != TRUE) {
5527 		/*
5528 		 * We have a situation where the target didn't give us a good
5529 		 * READ CAPACITY value, yet there appears to be a valid label.
5530 		 * In this case, we'll fake the capacity.
5531 		 */
5532 		un->un_blockcount = capacity;
5533 		un->un_f_blockcount_is_valid = TRUE;
5534 		goto done;
5535 	}
5536 
5537 
5538 	if ((capacity <= un->un_blockcount) ||
5539 	    (un->un_state != SD_STATE_NORMAL)) {
5540 #if defined(_SUNOS_VTOC_8)
5541 		/*
5542 		 * We can't let this happen on drives that are subdivided
5543 		 * into logical disks (i.e., that have an fdisk table).
5544 		 * The un_blockcount field should always hold the full media
5545 		 * size in sectors, period.  This code would overwrite
5546 		 * un_blockcount with the size of the Solaris fdisk partition.
5547 		 */
5548 		SD_ERROR(SD_LOG_COMMON, un,
5549 		    "sd_uselabel: Label %d blocks; Drive %d blocks\n",
5550 		    capacity, un->un_blockcount);
5551 		un->un_blockcount = capacity;
5552 		un->un_f_blockcount_is_valid = TRUE;
5553 #endif	/* defined(_SUNOS_VTOC_8) */
5554 		goto done;
5555 	}
5556 
5557 	if (ISCD(un)) {
5558 		/* For CDROMs, we trust that the data in the label is OK. */
5559 #if defined(_SUNOS_VTOC_8)
5560 		for (i = 0; i < NDKMAP; i++) {
5561 			part_end = labp->dkl_nhead * labp->dkl_nsect *
5562 			    labp->dkl_map[i].dkl_cylno +
5563 			    labp->dkl_map[i].dkl_nblk  - 1;
5564 
5565 			if ((labp->dkl_map[i].dkl_nblk) &&
5566 			    (part_end > un->un_blockcount)) {
5567 				un->un_f_geometry_is_valid = FALSE;
5568 				break;
5569 			}
5570 		}
5571 #endif
5572 #if defined(_SUNOS_VTOC_16)
5573 		vpartp = &(labp->dkl_vtoc.v_part[0]);
5574 		for (i = 0; i < NDKMAP; i++, vpartp++) {
5575 			part_end = vpartp->p_start + vpartp->p_size;
5576 			if ((vpartp->p_size > 0) &&
5577 			    (part_end > un->un_blockcount)) {
5578 				un->un_f_geometry_is_valid = FALSE;
5579 				break;
5580 			}
5581 		}
5582 #endif
5583 	} else {
5584 		uint64_t t_capacity;
5585 		uint32_t t_lbasize;
5586 
5587 		mutex_exit(SD_MUTEX(un));
5588 		err = sd_send_scsi_READ_CAPACITY(un, &t_capacity, &t_lbasize,
5589 		    path_flag);
5590 		ASSERT(t_capacity <= DK_MAX_BLOCKS);
5591 		mutex_enter(SD_MUTEX(un));
5592 
5593 		if (err == 0) {
5594 			sd_update_block_info(un, t_lbasize, t_capacity);
5595 		}
5596 
5597 		if (capacity > un->un_blockcount) {
5598 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
5599 			    "Corrupt label - bad geometry\n");
5600 			scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
5601 			    "Label says %u blocks; Drive says %llu blocks\n",
5602 			    capacity, (unsigned long long)un->un_blockcount);
5603 			un->un_f_geometry_is_valid = FALSE;
5604 			label_error = SD_LABEL_IS_INVALID;
5605 		}
5606 	}
5607 
5608 done:
5609 
5610 	SD_INFO(SD_LOG_COMMON, un, "sd_uselabel: (label geometry)\n");
5611 	SD_INFO(SD_LOG_COMMON, un,
5612 	    "   ncyl: %d; acyl: %d; nhead: %d; nsect: %d\n",
5613 	    un->un_g.dkg_ncyl,  un->un_g.dkg_acyl,
5614 	    un->un_g.dkg_nhead, un->un_g.dkg_nsect);
5615 	SD_INFO(SD_LOG_COMMON, un,
5616 	    "   lbasize: %d; capacity: %d; intrlv: %d; rpm: %d\n",
5617 	    un->un_tgt_blocksize, un->un_blockcount,
5618 	    un->un_g.dkg_intrlv, un->un_g.dkg_rpm);
5619 	SD_INFO(SD_LOG_COMMON, un, "   wrt_reinstr: %d; rd_reinstr: %d\n",
5620 	    un->un_g.dkg_write_reinstruct, un->un_g.dkg_read_reinstruct);
5621 
5622 	ASSERT(mutex_owned(SD_MUTEX(un)));
5623 
5624 	return (label_error);
5625 }
5626 
5627 
5628 /*
5629  *    Function: sd_build_default_label
5630  *
5631  * Description: Generate a default label for those devices that do not have
5632  *		one, e.g., new media, removable cartridges, etc..
5633  *
5634  *     Context: Kernel thread only
5635  */
5636 
5637 static void
5638 sd_build_default_label(struct sd_lun *un)
5639 {
5640 #if defined(_SUNOS_VTOC_16)
5641 	uint_t	phys_spc;
5642 	uint_t	disksize;
5643 	struct	dk_geom un_g;
5644 #endif
5645 
5646 	ASSERT(un != NULL);
5647 	ASSERT(mutex_owned(SD_MUTEX(un)));
5648 
5649 #if defined(_SUNOS_VTOC_8)
5650 	/*
5651 	 * Note: This is a legacy check for non-removable devices on VTOC_8
5652 	 * only. This may be a valid check for VTOC_16 as well.
5653 	 * Once we understand why there is this difference between SPARC and
5654 	 * x86 platform, we could remove this legacy check.
5655 	 */
5656 	ASSERT(un->un_f_default_vtoc_supported);
5657 #endif
5658 
5659 	bzero(&un->un_g, sizeof (struct dk_geom));
5660 	bzero(&un->un_vtoc, sizeof (struct dk_vtoc));
5661 	bzero(&un->un_map, NDKMAP * (sizeof (struct dk_map)));
5662 
5663 #if defined(_SUNOS_VTOC_8)
5664 
5665 	/*
5666 	 * It's a REMOVABLE media, therefore no label (on sparc, anyway).
5667 	 * But it is still necessary to set up various geometry information,
5668 	 * and we are doing this here.
5669 	 */
5670 
5671 	/*
5672 	 * For the rpm, we use the minimum for the disk.  For the head, cyl,
5673 	 * and number of sector per track, if the capacity <= 1GB, head = 64,
5674 	 * sect = 32.  else head = 255, sect 63 Note: the capacity should be
5675 	 * equal to C*H*S values.  This will cause some truncation of size due
5676 	 * to round off errors. For CD-ROMs, this truncation can have adverse
5677 	 * side effects, so returning ncyl and nhead as 1. The nsect will
5678 	 * overflow for most of CD-ROMs as nsect is of type ushort. (4190569)
5679 	 */
5680 	if (ISCD(un)) {
5681 		/*
5682 		 * Preserve the old behavior for non-writable
5683 		 * medias. Since dkg_nsect is a ushort, it
5684 		 * will lose bits as cdroms have more than
5685 		 * 65536 sectors. So if we recalculate
5686 		 * capacity, it will become much shorter.
5687 		 * But the dkg_* information is not
5688 		 * used for CDROMs so it is OK. But for
5689 		 * Writable CDs we need this information
5690 		 * to be valid (for newfs say). So we
5691 		 * make nsect and nhead > 1 that way
5692 		 * nsect can still stay within ushort limit
5693 		 * without losing any bits.
5694 		 */
5695 		if (un->un_f_mmc_writable_media == TRUE) {
5696 			un->un_g.dkg_nhead = 64;
5697 			un->un_g.dkg_nsect = 32;
5698 			un->un_g.dkg_ncyl = un->un_blockcount / (64 * 32);
5699 			un->un_blockcount = un->un_g.dkg_ncyl *
5700 			    un->un_g.dkg_nhead * un->un_g.dkg_nsect;
5701 		} else {
5702 			un->un_g.dkg_ncyl  = 1;
5703 			un->un_g.dkg_nhead = 1;
5704 			un->un_g.dkg_nsect = un->un_blockcount;
5705 		}
5706 	} else {
5707 		if (un->un_blockcount <= 0x1000) {
5708 			/* unlabeled SCSI floppy device */
5709 			un->un_g.dkg_nhead = 2;
5710 			un->un_g.dkg_ncyl = 80;
5711 			un->un_g.dkg_nsect = un->un_blockcount / (2 * 80);
5712 		} else if (un->un_blockcount <= 0x200000) {
5713 			un->un_g.dkg_nhead = 64;
5714 			un->un_g.dkg_nsect = 32;
5715 			un->un_g.dkg_ncyl  = un->un_blockcount / (64 * 32);
5716 		} else {
5717 			un->un_g.dkg_nhead = 255;
5718 			un->un_g.dkg_nsect = 63;
5719 			un->un_g.dkg_ncyl  = un->un_blockcount / (255 * 63);
5720 		}
5721 		un->un_blockcount =
5722 		    un->un_g.dkg_ncyl * un->un_g.dkg_nhead * un->un_g.dkg_nsect;
5723 	}
5724 
5725 	un->un_g.dkg_acyl	= 0;
5726 	un->un_g.dkg_bcyl	= 0;
5727 	un->un_g.dkg_rpm	= 200;
5728 	un->un_asciilabel[0]	= '\0';
5729 	un->un_g.dkg_pcyl	= un->un_g.dkg_ncyl;
5730 
5731 	un->un_map[0].dkl_cylno = 0;
5732 	un->un_map[0].dkl_nblk  = un->un_blockcount;
5733 	un->un_map[2].dkl_cylno = 0;
5734 	un->un_map[2].dkl_nblk  = un->un_blockcount;
5735 
5736 #elif defined(_SUNOS_VTOC_16)
5737 
5738 	if (un->un_solaris_size == 0) {
5739 		/*
5740 		 * Got fdisk table but no solaris entry therefore
5741 		 * don't create a default label
5742 		 */
5743 		un->un_f_geometry_is_valid = TRUE;
5744 		return;
5745 	}
5746 
5747 	/*
5748 	 * For CDs we continue to use the physical geometry to calculate
5749 	 * number of cylinders. All other devices must convert the
5750 	 * physical geometry (geom_cache) to values that will fit
5751 	 * in a dk_geom structure.
5752 	 */
5753 	if (ISCD(un)) {
5754 		phys_spc = un->un_pgeom.g_nhead * un->un_pgeom.g_nsect;
5755 	} else {
5756 		/* Convert physical geometry to disk geometry */
5757 		bzero(&un_g, sizeof (struct dk_geom));
5758 		sd_convert_geometry(un->un_blockcount, &un_g);
5759 		bcopy(&un_g, &un->un_g, sizeof (un->un_g));
5760 		phys_spc = un->un_g.dkg_nhead * un->un_g.dkg_nsect;
5761 	}
5762 
5763 	ASSERT(phys_spc != 0);
5764 	un->un_g.dkg_pcyl = un->un_solaris_size / phys_spc;
5765 	un->un_g.dkg_acyl = DK_ACYL;
5766 	un->un_g.dkg_ncyl = un->un_g.dkg_pcyl - DK_ACYL;
5767 	disksize = un->un_g.dkg_ncyl * phys_spc;
5768 
5769 	if (ISCD(un)) {
5770 		/*
5771 		 * CD's don't use the "heads * sectors * cyls"-type of
5772 		 * geometry, but instead use the entire capacity of the media.
5773 		 */
5774 		disksize = un->un_solaris_size;
5775 		un->un_g.dkg_nhead = 1;
5776 		un->un_g.dkg_nsect = 1;
5777 		un->un_g.dkg_rpm =
5778 		    (un->un_pgeom.g_rpm == 0) ? 200 : un->un_pgeom.g_rpm;
5779 
5780 		un->un_vtoc.v_part[0].p_start = 0;
5781 		un->un_vtoc.v_part[0].p_size  = disksize;
5782 		un->un_vtoc.v_part[0].p_tag   = V_BACKUP;
5783 		un->un_vtoc.v_part[0].p_flag  = V_UNMNT;
5784 
5785 		un->un_map[0].dkl_cylno = 0;
5786 		un->un_map[0].dkl_nblk  = disksize;
5787 		un->un_offset[0] = 0;
5788 
5789 	} else {
5790 		/*
5791 		 * Hard disks and removable media cartridges
5792 		 */
5793 		un->un_g.dkg_rpm =
5794 		    (un->un_pgeom.g_rpm == 0) ? 3600: un->un_pgeom.g_rpm;
5795 		un->un_vtoc.v_sectorsz = un->un_sys_blocksize;
5796 
5797 		/* Add boot slice */
5798 		un->un_vtoc.v_part[8].p_start = 0;
5799 		un->un_vtoc.v_part[8].p_size  = phys_spc;
5800 		un->un_vtoc.v_part[8].p_tag   = V_BOOT;
5801 		un->un_vtoc.v_part[8].p_flag  = V_UNMNT;
5802 
5803 		un->un_map[8].dkl_cylno = 0;
5804 		un->un_map[8].dkl_nblk  = phys_spc;
5805 		un->un_offset[8] = 0;
5806 	}
5807 
5808 	un->un_g.dkg_apc = 0;
5809 	un->un_vtoc.v_nparts = V_NUMPAR;
5810 	un->un_vtoc.v_version = V_VERSION;
5811 
5812 	/* Add backup slice */
5813 	un->un_vtoc.v_part[2].p_start = 0;
5814 	un->un_vtoc.v_part[2].p_size  = disksize;
5815 	un->un_vtoc.v_part[2].p_tag   = V_BACKUP;
5816 	un->un_vtoc.v_part[2].p_flag  = V_UNMNT;
5817 
5818 	un->un_map[2].dkl_cylno = 0;
5819 	un->un_map[2].dkl_nblk  = disksize;
5820 	un->un_offset[2] = 0;
5821 
5822 	(void) sprintf(un->un_vtoc.v_asciilabel, "DEFAULT cyl %d alt %d"
5823 	    " hd %d sec %d", un->un_g.dkg_ncyl, un->un_g.dkg_acyl,
5824 	    un->un_g.dkg_nhead, un->un_g.dkg_nsect);
5825 
5826 #else
5827 #error "No VTOC format defined."
5828 #endif
5829 
5830 	un->un_g.dkg_read_reinstruct  = 0;
5831 	un->un_g.dkg_write_reinstruct = 0;
5832 
5833 	un->un_g.dkg_intrlv = 1;
5834 
5835 	un->un_vtoc.v_sanity  = VTOC_SANE;
5836 
5837 	un->un_f_geometry_is_valid = TRUE;
5838 
5839 	SD_INFO(SD_LOG_COMMON, un,
5840 	    "sd_build_default_label: Default label created: "
5841 	    "cyl: %d\tacyl: %d\tnhead: %d\tnsect: %d\tcap: %d\n",
5842 	    un->un_g.dkg_ncyl, un->un_g.dkg_acyl, un->un_g.dkg_nhead,
5843 	    un->un_g.dkg_nsect, un->un_blockcount);
5844 }
5845 
5846 
5847 #if defined(_FIRMWARE_NEEDS_FDISK)
5848 /*
5849  * Max CHS values, as they are encoded into bytes, for 1022/254/63
5850  */
5851 #define	LBA_MAX_SECT	(63 | ((1022 & 0x300) >> 2))
5852 #define	LBA_MAX_CYL	(1022 & 0xFF)
5853 #define	LBA_MAX_HEAD	(254)
5854 
5855 
5856 /*
5857  *    Function: sd_has_max_chs_vals
5858  *
5859  * Description: Return TRUE if Cylinder-Head-Sector values are all at maximum.
5860  *
5861  *   Arguments: fdp - ptr to CHS info
5862  *
5863  * Return Code: True or false
5864  *
5865  *     Context: Any.
5866  */
5867 
5868 static int
5869 sd_has_max_chs_vals(struct ipart *fdp)
5870 {
5871 	return ((fdp->begcyl  == LBA_MAX_CYL)	&&
5872 	    (fdp->beghead == LBA_MAX_HEAD)	&&
5873 	    (fdp->begsect == LBA_MAX_SECT)	&&
5874 	    (fdp->endcyl  == LBA_MAX_CYL)	&&
5875 	    (fdp->endhead == LBA_MAX_HEAD)	&&
5876 	    (fdp->endsect == LBA_MAX_SECT));
5877 }
5878 #endif
5879 
5880 
5881 /*
5882  *    Function: sd_inq_fill
5883  *
5884  * Description: Print a piece of inquiry data, cleaned up for non-printable
5885  *		characters and stopping at the first space character after
5886  *		the beginning of the passed string;
5887  *
5888  *   Arguments: p - source string
5889  *		l - maximum length to copy
5890  *		s - destination string
5891  *
5892  *     Context: Any.
5893  */
5894 
5895 static void
5896 sd_inq_fill(char *p, int l, char *s)
5897 {
5898 	unsigned i = 0;
5899 	char c;
5900 
5901 	while (i++ < l) {
5902 		if ((c = *p++) < ' ' || c >= 0x7F) {
5903 			c = '*';
5904 		} else if (i != 1 && c == ' ') {
5905 			break;
5906 		}
5907 		*s++ = c;
5908 	}
5909 	*s++ = 0;
5910 }
5911 
5912 
5913 /*
5914  *    Function: sd_register_devid
5915  *
5916  * Description: This routine will obtain the device id information from the
5917  *		target, obtain the serial number, and register the device
5918  *		id with the ddi framework.
5919  *
5920  *   Arguments: devi - the system's dev_info_t for the device.
5921  *		un - driver soft state (unit) structure
5922  *		reservation_flag - indicates if a reservation conflict
5923  *		occurred during attach
5924  *
5925  *     Context: Kernel Thread
5926  */
5927 static void
5928 sd_register_devid(struct sd_lun *un, dev_info_t *devi, int reservation_flag)
5929 {
5930 	int		rval		= 0;
5931 	uchar_t		*inq80		= NULL;
5932 	size_t		inq80_len	= MAX_INQUIRY_SIZE;
5933 	size_t		inq80_resid	= 0;
5934 	uchar_t		*inq83		= NULL;
5935 	size_t		inq83_len	= MAX_INQUIRY_SIZE;
5936 	size_t		inq83_resid	= 0;
5937 
5938 	ASSERT(un != NULL);
5939 	ASSERT(mutex_owned(SD_MUTEX(un)));
5940 	ASSERT((SD_DEVINFO(un)) == devi);
5941 
5942 	/*
5943 	 * This is the case of antiquated Sun disk drives that have the
5944 	 * FAB_DEVID property set in the disk_table.  These drives
5945 	 * manage the devid's by storing them in last 2 available sectors
5946 	 * on the drive and have them fabricated by the ddi layer by calling
5947 	 * ddi_devid_init and passing the DEVID_FAB flag.
5948 	 */
5949 	if (un->un_f_opt_fab_devid == TRUE) {
5950 		/*
5951 		 * Depending on EINVAL isn't reliable, since a reserved disk
5952 		 * may result in invalid geometry, so check to make sure a
5953 		 * reservation conflict did not occur during attach.
5954 		 */
5955 		if ((sd_get_devid(un) == EINVAL) &&
5956 		    (reservation_flag != SD_TARGET_IS_RESERVED)) {
5957 			/*
5958 			 * The devid is invalid AND there is no reservation
5959 			 * conflict.  Fabricate a new devid.
5960 			 */
5961 			(void) sd_create_devid(un);
5962 		}
5963 
5964 		/* Register the devid if it exists */
5965 		if (un->un_devid != NULL) {
5966 			(void) ddi_devid_register(SD_DEVINFO(un),
5967 			    un->un_devid);
5968 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
5969 			    "sd_register_devid: Devid Fabricated\n");
5970 		}
5971 		return;
5972 	}
5973 
5974 	/*
5975 	 * We check the availibility of the World Wide Name (0x83) and Unit
5976 	 * Serial Number (0x80) pages in sd_check_vpd_page_support(), and using
5977 	 * un_vpd_page_mask from them, we decide which way to get the WWN.  If
5978 	 * 0x83 is availible, that is the best choice.  Our next choice is
5979 	 * 0x80.  If neither are availible, we munge the devid from the device
5980 	 * vid/pid/serial # for Sun qualified disks, or use the ddi framework
5981 	 * to fabricate a devid for non-Sun qualified disks.
5982 	 */
5983 	if (sd_check_vpd_page_support(un) == 0) {
5984 		/* collect page 80 data if available */
5985 		if (un->un_vpd_page_mask & SD_VPD_UNIT_SERIAL_PG) {
5986 
5987 			mutex_exit(SD_MUTEX(un));
5988 			inq80 = kmem_zalloc(inq80_len, KM_SLEEP);
5989 			rval = sd_send_scsi_INQUIRY(un, inq80, inq80_len,
5990 			    0x01, 0x80, &inq80_resid);
5991 
5992 			if (rval != 0) {
5993 				kmem_free(inq80, inq80_len);
5994 				inq80 = NULL;
5995 				inq80_len = 0;
5996 			}
5997 			mutex_enter(SD_MUTEX(un));
5998 		}
5999 
6000 		/* collect page 83 data if available */
6001 		if (un->un_vpd_page_mask & SD_VPD_DEVID_WWN_PG) {
6002 			mutex_exit(SD_MUTEX(un));
6003 			inq83 = kmem_zalloc(inq83_len, KM_SLEEP);
6004 			rval = sd_send_scsi_INQUIRY(un, inq83, inq83_len,
6005 			    0x01, 0x83, &inq83_resid);
6006 
6007 			if (rval != 0) {
6008 				kmem_free(inq83, inq83_len);
6009 				inq83 = NULL;
6010 				inq83_len = 0;
6011 			}
6012 			mutex_enter(SD_MUTEX(un));
6013 		}
6014 	}
6015 
6016 	/* encode best devid possible based on data available */
6017 	if (ddi_devid_scsi_encode(DEVID_SCSI_ENCODE_VERSION_LATEST,
6018 	    (char *)ddi_driver_name(SD_DEVINFO(un)),
6019 	    (uchar_t *)SD_INQUIRY(un), sizeof (*SD_INQUIRY(un)),
6020 	    inq80, inq80_len - inq80_resid, inq83, inq83_len -
6021 	    inq83_resid, &un->un_devid) == DDI_SUCCESS) {
6022 
6023 		/* devid successfully encoded, register devid */
6024 		(void) ddi_devid_register(SD_DEVINFO(un), un->un_devid);
6025 
6026 	} else {
6027 		/*
6028 		 * Unable to encode a devid based on data available.
6029 		 * This is not a Sun qualified disk.  Older Sun disk
6030 		 * drives that have the SD_FAB_DEVID property
6031 		 * set in the disk_table and non Sun qualified
6032 		 * disks are treated in the same manner.  These
6033 		 * drives manage the devid's by storing them in
6034 		 * last 2 available sectors on the drive and
6035 		 * have them fabricated by the ddi layer by
6036 		 * calling ddi_devid_init and passing the
6037 		 * DEVID_FAB flag.
6038 		 * Create a fabricate devid only if there's no
6039 		 * fabricate devid existed.
6040 		 */
6041 		if (sd_get_devid(un) == EINVAL) {
6042 			(void) sd_create_devid(un);
6043 			un->un_f_opt_fab_devid = TRUE;
6044 		}
6045 
6046 		/* Register the devid if it exists */
6047 		if (un->un_devid != NULL) {
6048 			(void) ddi_devid_register(SD_DEVINFO(un),
6049 			    un->un_devid);
6050 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
6051 			    "sd_register_devid: devid fabricated using "
6052 			    "ddi framework\n");
6053 		}
6054 	}
6055 
6056 	/* clean up resources */
6057 	if (inq80 != NULL) {
6058 		kmem_free(inq80, inq80_len);
6059 	}
6060 	if (inq83 != NULL) {
6061 		kmem_free(inq83, inq83_len);
6062 	}
6063 }
6064 
6065 static daddr_t
6066 sd_get_devid_block(struct sd_lun *un)
6067 {
6068 	daddr_t			spc, blk, head, cyl;
6069 
6070 	if (un->un_blockcount <= DK_MAX_BLOCKS) {
6071 		/* this geometry doesn't allow us to write a devid */
6072 		if (un->un_g.dkg_acyl < 2) {
6073 			return (-1);
6074 		}
6075 
6076 		/*
6077 		 * Subtract 2 guarantees that the next to last cylinder
6078 		 * is used
6079 		 */
6080 		cyl  = un->un_g.dkg_ncyl  + un->un_g.dkg_acyl - 2;
6081 		spc  = un->un_g.dkg_nhead * un->un_g.dkg_nsect;
6082 		head = un->un_g.dkg_nhead - 1;
6083 		blk  = (cyl * (spc - un->un_g.dkg_apc)) +
6084 		    (head * un->un_g.dkg_nsect) + 1;
6085 	} else {
6086 		if (un->un_reserved != -1) {
6087 			blk = un->un_map[un->un_reserved].dkl_cylno + 1;
6088 		} else {
6089 			return (-1);
6090 		}
6091 	}
6092 	return (blk);
6093 }
6094 
6095 /*
6096  *    Function: sd_get_devid
6097  *
6098  * Description: This routine will return 0 if a valid device id has been
6099  *		obtained from the target and stored in the soft state. If a
6100  *		valid device id has not been previously read and stored, a
6101  *		read attempt will be made.
6102  *
6103  *   Arguments: un - driver soft state (unit) structure
6104  *
6105  * Return Code: 0 if we successfully get the device id
6106  *
6107  *     Context: Kernel Thread
6108  */
6109 
6110 static int
6111 sd_get_devid(struct sd_lun *un)
6112 {
6113 	struct dk_devid		*dkdevid;
6114 	ddi_devid_t		tmpid;
6115 	uint_t			*ip;
6116 	size_t			sz;
6117 	daddr_t			blk;
6118 	int			status;
6119 	int			chksum;
6120 	int			i;
6121 	size_t			buffer_size;
6122 
6123 	ASSERT(un != NULL);
6124 	ASSERT(mutex_owned(SD_MUTEX(un)));
6125 
6126 	SD_TRACE(SD_LOG_ATTACH_DETACH, un, "sd_get_devid: entry: un: 0x%p\n",
6127 	    un);
6128 
6129 	if (un->un_devid != NULL) {
6130 		return (0);
6131 	}
6132 
6133 	blk = sd_get_devid_block(un);
6134 	if (blk < 0)
6135 		return (EINVAL);
6136 
6137 	/*
6138 	 * Read and verify device id, stored in the reserved cylinders at the
6139 	 * end of the disk. Backup label is on the odd sectors of the last
6140 	 * track of the last cylinder. Device id will be on track of the next
6141 	 * to last cylinder.
6142 	 */
6143 	buffer_size = SD_REQBYTES2TGTBYTES(un, sizeof (struct dk_devid));
6144 	mutex_exit(SD_MUTEX(un));
6145 	dkdevid = kmem_alloc(buffer_size, KM_SLEEP);
6146 	status = sd_send_scsi_READ(un, dkdevid, buffer_size, blk,
6147 	    SD_PATH_DIRECT);
6148 	if (status != 0) {
6149 		goto error;
6150 	}
6151 
6152 	/* Validate the revision */
6153 	if ((dkdevid->dkd_rev_hi != DK_DEVID_REV_MSB) ||
6154 	    (dkdevid->dkd_rev_lo != DK_DEVID_REV_LSB)) {
6155 		status = EINVAL;
6156 		goto error;
6157 	}
6158 
6159 	/* Calculate the checksum */
6160 	chksum = 0;
6161 	ip = (uint_t *)dkdevid;
6162 	for (i = 0; i < ((un->un_sys_blocksize - sizeof (int))/sizeof (int));
6163 	    i++) {
6164 		chksum ^= ip[i];
6165 	}
6166 
6167 	/* Compare the checksums */
6168 	if (DKD_GETCHKSUM(dkdevid) != chksum) {
6169 		status = EINVAL;
6170 		goto error;
6171 	}
6172 
6173 	/* Validate the device id */
6174 	if (ddi_devid_valid((ddi_devid_t)&dkdevid->dkd_devid) != DDI_SUCCESS) {
6175 		status = EINVAL;
6176 		goto error;
6177 	}
6178 
6179 	/*
6180 	 * Store the device id in the driver soft state
6181 	 */
6182 	sz = ddi_devid_sizeof((ddi_devid_t)&dkdevid->dkd_devid);
6183 	tmpid = kmem_alloc(sz, KM_SLEEP);
6184 
6185 	mutex_enter(SD_MUTEX(un));
6186 
6187 	un->un_devid = tmpid;
6188 	bcopy(&dkdevid->dkd_devid, un->un_devid, sz);
6189 
6190 	kmem_free(dkdevid, buffer_size);
6191 
6192 	SD_TRACE(SD_LOG_ATTACH_DETACH, un, "sd_get_devid: exit: un:0x%p\n", un);
6193 
6194 	return (status);
6195 error:
6196 	mutex_enter(SD_MUTEX(un));
6197 	kmem_free(dkdevid, buffer_size);
6198 	return (status);
6199 }
6200 
6201 
6202 /*
6203  *    Function: sd_create_devid
6204  *
6205  * Description: This routine will fabricate the device id and write it
6206  *		to the disk.
6207  *
6208  *   Arguments: un - driver soft state (unit) structure
6209  *
6210  * Return Code: value of the fabricated device id
6211  *
6212  *     Context: Kernel Thread
6213  */
6214 
6215 static ddi_devid_t
6216 sd_create_devid(struct sd_lun *un)
6217 {
6218 	ASSERT(un != NULL);
6219 
6220 	/* Fabricate the devid */
6221 	if (ddi_devid_init(SD_DEVINFO(un), DEVID_FAB, 0, NULL, &un->un_devid)
6222 	    == DDI_FAILURE) {
6223 		return (NULL);
6224 	}
6225 
6226 	/* Write the devid to disk */
6227 	if (sd_write_deviceid(un) != 0) {
6228 		ddi_devid_free(un->un_devid);
6229 		un->un_devid = NULL;
6230 	}
6231 
6232 	return (un->un_devid);
6233 }
6234 
6235 
6236 /*
6237  *    Function: sd_write_deviceid
6238  *
6239  * Description: This routine will write the device id to the disk
6240  *		reserved sector.
6241  *
6242  *   Arguments: un - driver soft state (unit) structure
6243  *
6244  * Return Code: EINVAL
6245  *		value returned by sd_send_scsi_cmd
6246  *
6247  *     Context: Kernel Thread
6248  */
6249 
6250 static int
6251 sd_write_deviceid(struct sd_lun *un)
6252 {
6253 	struct dk_devid		*dkdevid;
6254 	daddr_t			blk;
6255 	uint_t			*ip, chksum;
6256 	int			status;
6257 	int			i;
6258 
6259 	ASSERT(mutex_owned(SD_MUTEX(un)));
6260 
6261 	blk = sd_get_devid_block(un);
6262 	if (blk < 0)
6263 		return (-1);
6264 	mutex_exit(SD_MUTEX(un));
6265 
6266 	/* Allocate the buffer */
6267 	dkdevid = kmem_zalloc(un->un_sys_blocksize, KM_SLEEP);
6268 
6269 	/* Fill in the revision */
6270 	dkdevid->dkd_rev_hi = DK_DEVID_REV_MSB;
6271 	dkdevid->dkd_rev_lo = DK_DEVID_REV_LSB;
6272 
6273 	/* Copy in the device id */
6274 	mutex_enter(SD_MUTEX(un));
6275 	bcopy(un->un_devid, &dkdevid->dkd_devid,
6276 	    ddi_devid_sizeof(un->un_devid));
6277 	mutex_exit(SD_MUTEX(un));
6278 
6279 	/* Calculate the checksum */
6280 	chksum = 0;
6281 	ip = (uint_t *)dkdevid;
6282 	for (i = 0; i < ((un->un_sys_blocksize - sizeof (int))/sizeof (int));
6283 	    i++) {
6284 		chksum ^= ip[i];
6285 	}
6286 
6287 	/* Fill-in checksum */
6288 	DKD_FORMCHKSUM(chksum, dkdevid);
6289 
6290 	/* Write the reserved sector */
6291 	status = sd_send_scsi_WRITE(un, dkdevid, un->un_sys_blocksize, blk,
6292 	    SD_PATH_DIRECT);
6293 
6294 	kmem_free(dkdevid, un->un_sys_blocksize);
6295 
6296 	mutex_enter(SD_MUTEX(un));
6297 	return (status);
6298 }
6299 
6300 
6301 /*
6302  *    Function: sd_check_vpd_page_support
6303  *
6304  * Description: This routine sends an inquiry command with the EVPD bit set and
6305  *		a page code of 0x00 to the device. It is used to determine which
6306  *		vital product pages are availible to find the devid. We are
6307  *		looking for pages 0x83 or 0x80.  If we return a negative 1, the
6308  *		device does not support that command.
6309  *
6310  *   Arguments: un  - driver soft state (unit) structure
6311  *
6312  * Return Code: 0 - success
6313  *		1 - check condition
6314  *
6315  *     Context: This routine can sleep.
6316  */
6317 
6318 static int
6319 sd_check_vpd_page_support(struct sd_lun *un)
6320 {
6321 	uchar_t	*page_list	= NULL;
6322 	uchar_t	page_length	= 0xff;	/* Use max possible length */
6323 	uchar_t	evpd		= 0x01;	/* Set the EVPD bit */
6324 	uchar_t	page_code	= 0x00;	/* Supported VPD Pages */
6325 	int    	rval		= 0;
6326 	int	counter;
6327 
6328 	ASSERT(un != NULL);
6329 	ASSERT(mutex_owned(SD_MUTEX(un)));
6330 
6331 	mutex_exit(SD_MUTEX(un));
6332 
6333 	/*
6334 	 * We'll set the page length to the maximum to save figuring it out
6335 	 * with an additional call.
6336 	 */
6337 	page_list =  kmem_zalloc(page_length, KM_SLEEP);
6338 
6339 	rval = sd_send_scsi_INQUIRY(un, page_list, page_length, evpd,
6340 	    page_code, NULL);
6341 
6342 	mutex_enter(SD_MUTEX(un));
6343 
6344 	/*
6345 	 * Now we must validate that the device accepted the command, as some
6346 	 * drives do not support it.  If the drive does support it, we will
6347 	 * return 0, and the supported pages will be in un_vpd_page_mask.  If
6348 	 * not, we return -1.
6349 	 */
6350 	if ((rval == 0) && (page_list[VPD_MODE_PAGE] == 0x00)) {
6351 		/* Loop to find one of the 2 pages we need */
6352 		counter = 4;  /* Supported pages start at byte 4, with 0x00 */
6353 
6354 		/*
6355 		 * Pages are returned in ascending order, and 0x83 is what we
6356 		 * are hoping for.
6357 		 */
6358 		while ((page_list[counter] <= 0x83) &&
6359 		    (counter <= (page_list[VPD_PAGE_LENGTH] +
6360 		    VPD_HEAD_OFFSET))) {
6361 			/*
6362 			 * Add 3 because page_list[3] is the number of
6363 			 * pages minus 3
6364 			 */
6365 
6366 			switch (page_list[counter]) {
6367 			case 0x00:
6368 				un->un_vpd_page_mask |= SD_VPD_SUPPORTED_PG;
6369 				break;
6370 			case 0x80:
6371 				un->un_vpd_page_mask |= SD_VPD_UNIT_SERIAL_PG;
6372 				break;
6373 			case 0x81:
6374 				un->un_vpd_page_mask |= SD_VPD_OPERATING_PG;
6375 				break;
6376 			case 0x82:
6377 				un->un_vpd_page_mask |= SD_VPD_ASCII_OP_PG;
6378 				break;
6379 			case 0x83:
6380 				un->un_vpd_page_mask |= SD_VPD_DEVID_WWN_PG;
6381 				break;
6382 			}
6383 			counter++;
6384 		}
6385 
6386 	} else {
6387 		rval = -1;
6388 
6389 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
6390 		    "sd_check_vpd_page_support: This drive does not implement "
6391 		    "VPD pages.\n");
6392 	}
6393 
6394 	kmem_free(page_list, page_length);
6395 
6396 	return (rval);
6397 }
6398 
6399 
6400 /*
6401  *    Function: sd_setup_pm
6402  *
6403  * Description: Initialize Power Management on the device
6404  *
6405  *     Context: Kernel Thread
6406  */
6407 
6408 static void
6409 sd_setup_pm(struct sd_lun *un, dev_info_t *devi)
6410 {
6411 	uint_t	log_page_size;
6412 	uchar_t	*log_page_data;
6413 	int	rval;
6414 
6415 	/*
6416 	 * Since we are called from attach, holding a mutex for
6417 	 * un is unnecessary. Because some of the routines called
6418 	 * from here require SD_MUTEX to not be held, assert this
6419 	 * right up front.
6420 	 */
6421 	ASSERT(!mutex_owned(SD_MUTEX(un)));
6422 	/*
6423 	 * Since the sd device does not have the 'reg' property,
6424 	 * cpr will not call its DDI_SUSPEND/DDI_RESUME entries.
6425 	 * The following code is to tell cpr that this device
6426 	 * DOES need to be suspended and resumed.
6427 	 */
6428 	(void) ddi_prop_update_string(DDI_DEV_T_NONE, devi,
6429 	    "pm-hardware-state", "needs-suspend-resume");
6430 
6431 	/*
6432 	 * This complies with the new power management framework
6433 	 * for certain desktop machines. Create the pm_components
6434 	 * property as a string array property.
6435 	 */
6436 	if (un->un_f_pm_supported) {
6437 		/*
6438 		 * not all devices have a motor, try it first.
6439 		 * some devices may return ILLEGAL REQUEST, some
6440 		 * will hang
6441 		 * The following START_STOP_UNIT is used to check if target
6442 		 * device has a motor.
6443 		 */
6444 		un->un_f_start_stop_supported = TRUE;
6445 		if (sd_send_scsi_START_STOP_UNIT(un, SD_TARGET_START,
6446 		    SD_PATH_DIRECT) != 0) {
6447 			un->un_f_start_stop_supported = FALSE;
6448 		}
6449 
6450 		/*
6451 		 * create pm properties anyways otherwise the parent can't
6452 		 * go to sleep
6453 		 */
6454 		(void) sd_create_pm_components(devi, un);
6455 		un->un_f_pm_is_enabled = TRUE;
6456 		return;
6457 	}
6458 
6459 	if (!un->un_f_log_sense_supported) {
6460 		un->un_power_level = SD_SPINDLE_ON;
6461 		un->un_f_pm_is_enabled = FALSE;
6462 		return;
6463 	}
6464 
6465 	rval = sd_log_page_supported(un, START_STOP_CYCLE_PAGE);
6466 
6467 #ifdef	SDDEBUG
6468 	if (sd_force_pm_supported) {
6469 		/* Force a successful result */
6470 		rval = 1;
6471 	}
6472 #endif
6473 
6474 	/*
6475 	 * If the start-stop cycle counter log page is not supported
6476 	 * or if the pm-capable property is SD_PM_CAPABLE_FALSE (0)
6477 	 * then we should not create the pm_components property.
6478 	 */
6479 	if (rval == -1) {
6480 		/*
6481 		 * Error.
6482 		 * Reading log sense failed, most likely this is
6483 		 * an older drive that does not support log sense.
6484 		 * If this fails auto-pm is not supported.
6485 		 */
6486 		un->un_power_level = SD_SPINDLE_ON;
6487 		un->un_f_pm_is_enabled = FALSE;
6488 
6489 	} else if (rval == 0) {
6490 		/*
6491 		 * Page not found.
6492 		 * The start stop cycle counter is implemented as page
6493 		 * START_STOP_CYCLE_PAGE_VU_PAGE (0x31) in older disks. For
6494 		 * newer disks it is implemented as START_STOP_CYCLE_PAGE (0xE).
6495 		 */
6496 		if (sd_log_page_supported(un, START_STOP_CYCLE_VU_PAGE) == 1) {
6497 			/*
6498 			 * Page found, use this one.
6499 			 */
6500 			un->un_start_stop_cycle_page = START_STOP_CYCLE_VU_PAGE;
6501 			un->un_f_pm_is_enabled = TRUE;
6502 		} else {
6503 			/*
6504 			 * Error or page not found.
6505 			 * auto-pm is not supported for this device.
6506 			 */
6507 			un->un_power_level = SD_SPINDLE_ON;
6508 			un->un_f_pm_is_enabled = FALSE;
6509 		}
6510 	} else {
6511 		/*
6512 		 * Page found, use it.
6513 		 */
6514 		un->un_start_stop_cycle_page = START_STOP_CYCLE_PAGE;
6515 		un->un_f_pm_is_enabled = TRUE;
6516 	}
6517 
6518 
6519 	if (un->un_f_pm_is_enabled == TRUE) {
6520 		log_page_size = START_STOP_CYCLE_COUNTER_PAGE_SIZE;
6521 		log_page_data = kmem_zalloc(log_page_size, KM_SLEEP);
6522 
6523 		rval = sd_send_scsi_LOG_SENSE(un, log_page_data,
6524 		    log_page_size, un->un_start_stop_cycle_page,
6525 		    0x01, 0, SD_PATH_DIRECT);
6526 #ifdef	SDDEBUG
6527 		if (sd_force_pm_supported) {
6528 			/* Force a successful result */
6529 			rval = 0;
6530 		}
6531 #endif
6532 
6533 		/*
6534 		 * If the Log sense for Page( Start/stop cycle counter page)
6535 		 * succeeds, then power managment is supported and we can
6536 		 * enable auto-pm.
6537 		 */
6538 		if (rval == 0)  {
6539 			(void) sd_create_pm_components(devi, un);
6540 		} else {
6541 			un->un_power_level = SD_SPINDLE_ON;
6542 			un->un_f_pm_is_enabled = FALSE;
6543 		}
6544 
6545 		kmem_free(log_page_data, log_page_size);
6546 	}
6547 }
6548 
6549 
6550 /*
6551  *    Function: sd_create_pm_components
6552  *
6553  * Description: Initialize PM property.
6554  *
6555  *     Context: Kernel thread context
6556  */
6557 
6558 static void
6559 sd_create_pm_components(dev_info_t *devi, struct sd_lun *un)
6560 {
6561 	char *pm_comp[] = { "NAME=spindle-motor", "0=off", "1=on", NULL };
6562 
6563 	ASSERT(!mutex_owned(SD_MUTEX(un)));
6564 
6565 	if (ddi_prop_update_string_array(DDI_DEV_T_NONE, devi,
6566 	    "pm-components", pm_comp, 3) == DDI_PROP_SUCCESS) {
6567 		/*
6568 		 * When components are initially created they are idle,
6569 		 * power up any non-removables.
6570 		 * Note: the return value of pm_raise_power can't be used
6571 		 * for determining if PM should be enabled for this device.
6572 		 * Even if you check the return values and remove this
6573 		 * property created above, the PM framework will not honor the
6574 		 * change after the first call to pm_raise_power. Hence,
6575 		 * removal of that property does not help if pm_raise_power
6576 		 * fails. In the case of removable media, the start/stop
6577 		 * will fail if the media is not present.
6578 		 */
6579 		if (un->un_f_attach_spinup && (pm_raise_power(SD_DEVINFO(un), 0,
6580 		    SD_SPINDLE_ON) == DDI_SUCCESS)) {
6581 			mutex_enter(SD_MUTEX(un));
6582 			un->un_power_level = SD_SPINDLE_ON;
6583 			mutex_enter(&un->un_pm_mutex);
6584 			/* Set to on and not busy. */
6585 			un->un_pm_count = 0;
6586 		} else {
6587 			mutex_enter(SD_MUTEX(un));
6588 			un->un_power_level = SD_SPINDLE_OFF;
6589 			mutex_enter(&un->un_pm_mutex);
6590 			/* Set to off. */
6591 			un->un_pm_count = -1;
6592 		}
6593 		mutex_exit(&un->un_pm_mutex);
6594 		mutex_exit(SD_MUTEX(un));
6595 	} else {
6596 		un->un_power_level = SD_SPINDLE_ON;
6597 		un->un_f_pm_is_enabled = FALSE;
6598 	}
6599 }
6600 
6601 
6602 /*
6603  *    Function: sd_ddi_suspend
6604  *
6605  * Description: Performs system power-down operations. This includes
6606  *		setting the drive state to indicate its suspended so
6607  *		that no new commands will be accepted. Also, wait for
6608  *		all commands that are in transport or queued to a timer
6609  *		for retry to complete. All timeout threads are cancelled.
6610  *
6611  * Return Code: DDI_FAILURE or DDI_SUCCESS
6612  *
6613  *     Context: Kernel thread context
6614  */
6615 
6616 static int
6617 sd_ddi_suspend(dev_info_t *devi)
6618 {
6619 	struct	sd_lun	*un;
6620 	clock_t		wait_cmds_complete;
6621 
6622 	un = ddi_get_soft_state(sd_state, ddi_get_instance(devi));
6623 	if (un == NULL) {
6624 		return (DDI_FAILURE);
6625 	}
6626 
6627 	SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: entry\n");
6628 
6629 	mutex_enter(SD_MUTEX(un));
6630 
6631 	/* Return success if the device is already suspended. */
6632 	if (un->un_state == SD_STATE_SUSPENDED) {
6633 		mutex_exit(SD_MUTEX(un));
6634 		SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: "
6635 		    "device already suspended, exiting\n");
6636 		return (DDI_SUCCESS);
6637 	}
6638 
6639 	/* Return failure if the device is being used by HA */
6640 	if (un->un_resvd_status &
6641 	    (SD_RESERVE | SD_WANT_RESERVE | SD_LOST_RESERVE)) {
6642 		mutex_exit(SD_MUTEX(un));
6643 		SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: "
6644 		    "device in use by HA, exiting\n");
6645 		return (DDI_FAILURE);
6646 	}
6647 
6648 	/*
6649 	 * Return failure if the device is in a resource wait
6650 	 * or power changing state.
6651 	 */
6652 	if ((un->un_state == SD_STATE_RWAIT) ||
6653 	    (un->un_state == SD_STATE_PM_CHANGING)) {
6654 		mutex_exit(SD_MUTEX(un));
6655 		SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: "
6656 		    "device in resource wait state, exiting\n");
6657 		return (DDI_FAILURE);
6658 	}
6659 
6660 
6661 	un->un_save_state = un->un_last_state;
6662 	New_state(un, SD_STATE_SUSPENDED);
6663 
6664 	/*
6665 	 * Wait for all commands that are in transport or queued to a timer
6666 	 * for retry to complete.
6667 	 *
6668 	 * While waiting, no new commands will be accepted or sent because of
6669 	 * the new state we set above.
6670 	 *
6671 	 * Wait till current operation has completed. If we are in the resource
6672 	 * wait state (with an intr outstanding) then we need to wait till the
6673 	 * intr completes and starts the next cmd. We want to wait for
6674 	 * SD_WAIT_CMDS_COMPLETE seconds before failing the DDI_SUSPEND.
6675 	 */
6676 	wait_cmds_complete = ddi_get_lbolt() +
6677 	    (sd_wait_cmds_complete * drv_usectohz(1000000));
6678 
6679 	while (un->un_ncmds_in_transport != 0) {
6680 		/*
6681 		 * Fail if commands do not finish in the specified time.
6682 		 */
6683 		if (cv_timedwait(&un->un_disk_busy_cv, SD_MUTEX(un),
6684 		    wait_cmds_complete) == -1) {
6685 			/*
6686 			 * Undo the state changes made above. Everything
6687 			 * must go back to it's original value.
6688 			 */
6689 			Restore_state(un);
6690 			un->un_last_state = un->un_save_state;
6691 			/* Wake up any threads that might be waiting. */
6692 			cv_broadcast(&un->un_suspend_cv);
6693 			mutex_exit(SD_MUTEX(un));
6694 			SD_ERROR(SD_LOG_IO_PM, un,
6695 			    "sd_ddi_suspend: failed due to outstanding cmds\n");
6696 			SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: exiting\n");
6697 			return (DDI_FAILURE);
6698 		}
6699 	}
6700 
6701 	/*
6702 	 * Cancel SCSI watch thread and timeouts, if any are active
6703 	 */
6704 
6705 	if (SD_OK_TO_SUSPEND_SCSI_WATCHER(un)) {
6706 		opaque_t temp_token = un->un_swr_token;
6707 		mutex_exit(SD_MUTEX(un));
6708 		scsi_watch_suspend(temp_token);
6709 		mutex_enter(SD_MUTEX(un));
6710 	}
6711 
6712 	if (un->un_reset_throttle_timeid != NULL) {
6713 		timeout_id_t temp_id = un->un_reset_throttle_timeid;
6714 		un->un_reset_throttle_timeid = NULL;
6715 		mutex_exit(SD_MUTEX(un));
6716 		(void) untimeout(temp_id);
6717 		mutex_enter(SD_MUTEX(un));
6718 	}
6719 
6720 	if (un->un_dcvb_timeid != NULL) {
6721 		timeout_id_t temp_id = un->un_dcvb_timeid;
6722 		un->un_dcvb_timeid = NULL;
6723 		mutex_exit(SD_MUTEX(un));
6724 		(void) untimeout(temp_id);
6725 		mutex_enter(SD_MUTEX(un));
6726 	}
6727 
6728 	mutex_enter(&un->un_pm_mutex);
6729 	if (un->un_pm_timeid != NULL) {
6730 		timeout_id_t temp_id = un->un_pm_timeid;
6731 		un->un_pm_timeid = NULL;
6732 		mutex_exit(&un->un_pm_mutex);
6733 		mutex_exit(SD_MUTEX(un));
6734 		(void) untimeout(temp_id);
6735 		mutex_enter(SD_MUTEX(un));
6736 	} else {
6737 		mutex_exit(&un->un_pm_mutex);
6738 	}
6739 
6740 	if (un->un_retry_timeid != NULL) {
6741 		timeout_id_t temp_id = un->un_retry_timeid;
6742 		un->un_retry_timeid = NULL;
6743 		mutex_exit(SD_MUTEX(un));
6744 		(void) untimeout(temp_id);
6745 		mutex_enter(SD_MUTEX(un));
6746 	}
6747 
6748 	if (un->un_direct_priority_timeid != NULL) {
6749 		timeout_id_t temp_id = un->un_direct_priority_timeid;
6750 		un->un_direct_priority_timeid = NULL;
6751 		mutex_exit(SD_MUTEX(un));
6752 		(void) untimeout(temp_id);
6753 		mutex_enter(SD_MUTEX(un));
6754 	}
6755 
6756 	if (un->un_f_is_fibre == TRUE) {
6757 		/*
6758 		 * Remove callbacks for insert and remove events
6759 		 */
6760 		if (un->un_insert_event != NULL) {
6761 			mutex_exit(SD_MUTEX(un));
6762 			(void) ddi_remove_event_handler(un->un_insert_cb_id);
6763 			mutex_enter(SD_MUTEX(un));
6764 			un->un_insert_event = NULL;
6765 		}
6766 
6767 		if (un->un_remove_event != NULL) {
6768 			mutex_exit(SD_MUTEX(un));
6769 			(void) ddi_remove_event_handler(un->un_remove_cb_id);
6770 			mutex_enter(SD_MUTEX(un));
6771 			un->un_remove_event = NULL;
6772 		}
6773 	}
6774 
6775 	mutex_exit(SD_MUTEX(un));
6776 
6777 	SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: exit\n");
6778 
6779 	return (DDI_SUCCESS);
6780 }
6781 
6782 
6783 /*
6784  *    Function: sd_ddi_pm_suspend
6785  *
6786  * Description: Set the drive state to low power.
6787  *		Someone else is required to actually change the drive
6788  *		power level.
6789  *
6790  *   Arguments: un - driver soft state (unit) structure
6791  *
6792  * Return Code: DDI_FAILURE or DDI_SUCCESS
6793  *
6794  *     Context: Kernel thread context
6795  */
6796 
6797 static int
6798 sd_ddi_pm_suspend(struct sd_lun *un)
6799 {
6800 	ASSERT(un != NULL);
6801 	SD_TRACE(SD_LOG_POWER, un, "sd_ddi_pm_suspend: entry\n");
6802 
6803 	ASSERT(!mutex_owned(SD_MUTEX(un)));
6804 	mutex_enter(SD_MUTEX(un));
6805 
6806 	/*
6807 	 * Exit if power management is not enabled for this device, or if
6808 	 * the device is being used by HA.
6809 	 */
6810 	if ((un->un_f_pm_is_enabled == FALSE) || (un->un_resvd_status &
6811 	    (SD_RESERVE | SD_WANT_RESERVE | SD_LOST_RESERVE))) {
6812 		mutex_exit(SD_MUTEX(un));
6813 		SD_TRACE(SD_LOG_POWER, un, "sd_ddi_pm_suspend: exiting\n");
6814 		return (DDI_SUCCESS);
6815 	}
6816 
6817 	SD_INFO(SD_LOG_POWER, un, "sd_ddi_pm_suspend: un_ncmds_in_driver=%ld\n",
6818 	    un->un_ncmds_in_driver);
6819 
6820 	/*
6821 	 * See if the device is not busy, ie.:
6822 	 *    - we have no commands in the driver for this device
6823 	 *    - not waiting for resources
6824 	 */
6825 	if ((un->un_ncmds_in_driver == 0) &&
6826 	    (un->un_state != SD_STATE_RWAIT)) {
6827 		/*
6828 		 * The device is not busy, so it is OK to go to low power state.
6829 		 * Indicate low power, but rely on someone else to actually
6830 		 * change it.
6831 		 */
6832 		mutex_enter(&un->un_pm_mutex);
6833 		un->un_pm_count = -1;
6834 		mutex_exit(&un->un_pm_mutex);
6835 		un->un_power_level = SD_SPINDLE_OFF;
6836 	}
6837 
6838 	mutex_exit(SD_MUTEX(un));
6839 
6840 	SD_TRACE(SD_LOG_POWER, un, "sd_ddi_pm_suspend: exit\n");
6841 
6842 	return (DDI_SUCCESS);
6843 }
6844 
6845 
6846 /*
6847  *    Function: sd_ddi_resume
6848  *
6849  * Description: Performs system power-up operations..
6850  *
6851  * Return Code: DDI_SUCCESS
6852  *		DDI_FAILURE
6853  *
6854  *     Context: Kernel thread context
6855  */
6856 
6857 static int
6858 sd_ddi_resume(dev_info_t *devi)
6859 {
6860 	struct	sd_lun	*un;
6861 
6862 	un = ddi_get_soft_state(sd_state, ddi_get_instance(devi));
6863 	if (un == NULL) {
6864 		return (DDI_FAILURE);
6865 	}
6866 
6867 	SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_resume: entry\n");
6868 
6869 	mutex_enter(SD_MUTEX(un));
6870 	Restore_state(un);
6871 
6872 	/*
6873 	 * Restore the state which was saved to give the
6874 	 * the right state in un_last_state
6875 	 */
6876 	un->un_last_state = un->un_save_state;
6877 	/*
6878 	 * Note: throttle comes back at full.
6879 	 * Also note: this MUST be done before calling pm_raise_power
6880 	 * otherwise the system can get hung in biowait. The scenario where
6881 	 * this'll happen is under cpr suspend. Writing of the system
6882 	 * state goes through sddump, which writes 0 to un_throttle. If
6883 	 * writing the system state then fails, example if the partition is
6884 	 * too small, then cpr attempts a resume. If throttle isn't restored
6885 	 * from the saved value until after calling pm_raise_power then
6886 	 * cmds sent in sdpower are not transported and sd_send_scsi_cmd hangs
6887 	 * in biowait.
6888 	 */
6889 	un->un_throttle = un->un_saved_throttle;
6890 
6891 	/*
6892 	 * The chance of failure is very rare as the only command done in power
6893 	 * entry point is START command when you transition from 0->1 or
6894 	 * unknown->1. Put it to SPINDLE ON state irrespective of the state at
6895 	 * which suspend was done. Ignore the return value as the resume should
6896 	 * not be failed. In the case of removable media the media need not be
6897 	 * inserted and hence there is a chance that raise power will fail with
6898 	 * media not present.
6899 	 */
6900 	if (un->un_f_attach_spinup) {
6901 		mutex_exit(SD_MUTEX(un));
6902 		(void) pm_raise_power(SD_DEVINFO(un), 0, SD_SPINDLE_ON);
6903 		mutex_enter(SD_MUTEX(un));
6904 	}
6905 
6906 	/*
6907 	 * Don't broadcast to the suspend cv and therefore possibly
6908 	 * start I/O until after power has been restored.
6909 	 */
6910 	cv_broadcast(&un->un_suspend_cv);
6911 	cv_broadcast(&un->un_state_cv);
6912 
6913 	/* restart thread */
6914 	if (SD_OK_TO_RESUME_SCSI_WATCHER(un)) {
6915 		scsi_watch_resume(un->un_swr_token);
6916 	}
6917 
6918 #if (defined(__fibre))
6919 	if (un->un_f_is_fibre == TRUE) {
6920 		/*
6921 		 * Add callbacks for insert and remove events
6922 		 */
6923 		if (strcmp(un->un_node_type, DDI_NT_BLOCK_CHAN)) {
6924 			sd_init_event_callbacks(un);
6925 		}
6926 	}
6927 #endif
6928 
6929 	/*
6930 	 * Transport any pending commands to the target.
6931 	 *
6932 	 * If this is a low-activity device commands in queue will have to wait
6933 	 * until new commands come in, which may take awhile. Also, we
6934 	 * specifically don't check un_ncmds_in_transport because we know that
6935 	 * there really are no commands in progress after the unit was
6936 	 * suspended and we could have reached the throttle level, been
6937 	 * suspended, and have no new commands coming in for awhile. Highly
6938 	 * unlikely, but so is the low-activity disk scenario.
6939 	 */
6940 	ddi_xbuf_dispatch(un->un_xbuf_attr);
6941 
6942 	sd_start_cmds(un, NULL);
6943 	mutex_exit(SD_MUTEX(un));
6944 
6945 	SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_resume: exit\n");
6946 
6947 	return (DDI_SUCCESS);
6948 }
6949 
6950 
6951 /*
6952  *    Function: sd_ddi_pm_resume
6953  *
6954  * Description: Set the drive state to powered on.
6955  *		Someone else is required to actually change the drive
6956  *		power level.
6957  *
6958  *   Arguments: un - driver soft state (unit) structure
6959  *
6960  * Return Code: DDI_SUCCESS
6961  *
6962  *     Context: Kernel thread context
6963  */
6964 
6965 static int
6966 sd_ddi_pm_resume(struct sd_lun *un)
6967 {
6968 	ASSERT(un != NULL);
6969 
6970 	ASSERT(!mutex_owned(SD_MUTEX(un)));
6971 	mutex_enter(SD_MUTEX(un));
6972 	un->un_power_level = SD_SPINDLE_ON;
6973 
6974 	ASSERT(!mutex_owned(&un->un_pm_mutex));
6975 	mutex_enter(&un->un_pm_mutex);
6976 	if (SD_DEVICE_IS_IN_LOW_POWER(un)) {
6977 		un->un_pm_count++;
6978 		ASSERT(un->un_pm_count == 0);
6979 		/*
6980 		 * Note: no longer do the cv_broadcast on un_suspend_cv. The
6981 		 * un_suspend_cv is for a system resume, not a power management
6982 		 * device resume. (4297749)
6983 		 *	 cv_broadcast(&un->un_suspend_cv);
6984 		 */
6985 	}
6986 	mutex_exit(&un->un_pm_mutex);
6987 	mutex_exit(SD_MUTEX(un));
6988 
6989 	return (DDI_SUCCESS);
6990 }
6991 
6992 
6993 /*
6994  *    Function: sd_pm_idletimeout_handler
6995  *
6996  * Description: A timer routine that's active only while a device is busy.
6997  *		The purpose is to extend slightly the pm framework's busy
6998  *		view of the device to prevent busy/idle thrashing for
6999  *		back-to-back commands. Do this by comparing the current time
7000  *		to the time at which the last command completed and when the
7001  *		difference is greater than sd_pm_idletime, call
7002  *		pm_idle_component. In addition to indicating idle to the pm
7003  *		framework, update the chain type to again use the internal pm
7004  *		layers of the driver.
7005  *
7006  *   Arguments: arg - driver soft state (unit) structure
7007  *
7008  *     Context: Executes in a timeout(9F) thread context
7009  */
7010 
7011 static void
7012 sd_pm_idletimeout_handler(void *arg)
7013 {
7014 	struct sd_lun *un = arg;
7015 
7016 	time_t	now;
7017 
7018 	mutex_enter(&sd_detach_mutex);
7019 	if (un->un_detach_count != 0) {
7020 		/* Abort if the instance is detaching */
7021 		mutex_exit(&sd_detach_mutex);
7022 		return;
7023 	}
7024 	mutex_exit(&sd_detach_mutex);
7025 
7026 	now = ddi_get_time();
7027 	/*
7028 	 * Grab both mutexes, in the proper order, since we're accessing
7029 	 * both PM and softstate variables.
7030 	 */
7031 	mutex_enter(SD_MUTEX(un));
7032 	mutex_enter(&un->un_pm_mutex);
7033 	if (((now - un->un_pm_idle_time) > sd_pm_idletime) &&
7034 	    (un->un_ncmds_in_driver == 0) && (un->un_pm_count == 0)) {
7035 		/*
7036 		 * Update the chain types.
7037 		 * This takes affect on the next new command received.
7038 		 */
7039 		if (un->un_f_non_devbsize_supported) {
7040 			un->un_buf_chain_type = SD_CHAIN_INFO_RMMEDIA;
7041 		} else {
7042 			un->un_buf_chain_type = SD_CHAIN_INFO_DISK;
7043 		}
7044 		un->un_uscsi_chain_type  = SD_CHAIN_INFO_USCSI_CMD;
7045 
7046 		SD_TRACE(SD_LOG_IO_PM, un,
7047 		    "sd_pm_idletimeout_handler: idling device\n");
7048 		(void) pm_idle_component(SD_DEVINFO(un), 0);
7049 		un->un_pm_idle_timeid = NULL;
7050 	} else {
7051 		un->un_pm_idle_timeid =
7052 			timeout(sd_pm_idletimeout_handler, un,
7053 			(drv_usectohz((clock_t)300000))); /* 300 ms. */
7054 	}
7055 	mutex_exit(&un->un_pm_mutex);
7056 	mutex_exit(SD_MUTEX(un));
7057 }
7058 
7059 
7060 /*
7061  *    Function: sd_pm_timeout_handler
7062  *
7063  * Description: Callback to tell framework we are idle.
7064  *
7065  *     Context: timeout(9f) thread context.
7066  */
7067 
7068 static void
7069 sd_pm_timeout_handler(void *arg)
7070 {
7071 	struct sd_lun *un = arg;
7072 
7073 	(void) pm_idle_component(SD_DEVINFO(un), 0);
7074 	mutex_enter(&un->un_pm_mutex);
7075 	un->un_pm_timeid = NULL;
7076 	mutex_exit(&un->un_pm_mutex);
7077 }
7078 
7079 
7080 /*
7081  *    Function: sdpower
7082  *
7083  * Description: PM entry point.
7084  *
7085  * Return Code: DDI_SUCCESS
7086  *		DDI_FAILURE
7087  *
7088  *     Context: Kernel thread context
7089  */
7090 
7091 static int
7092 sdpower(dev_info_t *devi, int component, int level)
7093 {
7094 	struct sd_lun	*un;
7095 	int		instance;
7096 	int		rval = DDI_SUCCESS;
7097 	uint_t		i, log_page_size, maxcycles, ncycles;
7098 	uchar_t		*log_page_data;
7099 	int		log_sense_page;
7100 	int		medium_present;
7101 	time_t		intvlp;
7102 	dev_t		dev;
7103 	struct pm_trans_data	sd_pm_tran_data;
7104 	uchar_t		save_state;
7105 	int		sval;
7106 	uchar_t		state_before_pm;
7107 	int		got_semaphore_here;
7108 
7109 	instance = ddi_get_instance(devi);
7110 
7111 	if (((un = ddi_get_soft_state(sd_state, instance)) == NULL) ||
7112 	    (SD_SPINDLE_OFF > level) || (level > SD_SPINDLE_ON) ||
7113 	    component != 0) {
7114 		return (DDI_FAILURE);
7115 	}
7116 
7117 	dev = sd_make_device(SD_DEVINFO(un));
7118 
7119 	SD_TRACE(SD_LOG_IO_PM, un, "sdpower: entry, level = %d\n", level);
7120 
7121 	/*
7122 	 * Must synchronize power down with close.
7123 	 * Attempt to decrement/acquire the open/close semaphore,
7124 	 * but do NOT wait on it. If it's not greater than zero,
7125 	 * ie. it can't be decremented without waiting, then
7126 	 * someone else, either open or close, already has it
7127 	 * and the try returns 0. Use that knowledge here to determine
7128 	 * if it's OK to change the device power level.
7129 	 * Also, only increment it on exit if it was decremented, ie. gotten,
7130 	 * here.
7131 	 */
7132 	got_semaphore_here = sema_tryp(&un->un_semoclose);
7133 
7134 	mutex_enter(SD_MUTEX(un));
7135 
7136 	SD_INFO(SD_LOG_POWER, un, "sdpower: un_ncmds_in_driver = %ld\n",
7137 	    un->un_ncmds_in_driver);
7138 
7139 	/*
7140 	 * If un_ncmds_in_driver is non-zero it indicates commands are
7141 	 * already being processed in the driver, or if the semaphore was
7142 	 * not gotten here it indicates an open or close is being processed.
7143 	 * At the same time somebody is requesting to go low power which
7144 	 * can't happen, therefore we need to return failure.
7145 	 */
7146 	if ((level == SD_SPINDLE_OFF) &&
7147 	    ((un->un_ncmds_in_driver != 0) || (got_semaphore_here == 0))) {
7148 		mutex_exit(SD_MUTEX(un));
7149 
7150 		if (got_semaphore_here != 0) {
7151 			sema_v(&un->un_semoclose);
7152 		}
7153 		SD_TRACE(SD_LOG_IO_PM, un,
7154 		    "sdpower: exit, device has queued cmds.\n");
7155 		return (DDI_FAILURE);
7156 	}
7157 
7158 	/*
7159 	 * if it is OFFLINE that means the disk is completely dead
7160 	 * in our case we have to put the disk in on or off by sending commands
7161 	 * Of course that will fail anyway so return back here.
7162 	 *
7163 	 * Power changes to a device that's OFFLINE or SUSPENDED
7164 	 * are not allowed.
7165 	 */
7166 	if ((un->un_state == SD_STATE_OFFLINE) ||
7167 	    (un->un_state == SD_STATE_SUSPENDED)) {
7168 		mutex_exit(SD_MUTEX(un));
7169 
7170 		if (got_semaphore_here != 0) {
7171 			sema_v(&un->un_semoclose);
7172 		}
7173 		SD_TRACE(SD_LOG_IO_PM, un,
7174 		    "sdpower: exit, device is off-line.\n");
7175 		return (DDI_FAILURE);
7176 	}
7177 
7178 	/*
7179 	 * Change the device's state to indicate it's power level
7180 	 * is being changed. Do this to prevent a power off in the
7181 	 * middle of commands, which is especially bad on devices
7182 	 * that are really powered off instead of just spun down.
7183 	 */
7184 	state_before_pm = un->un_state;
7185 	un->un_state = SD_STATE_PM_CHANGING;
7186 
7187 	mutex_exit(SD_MUTEX(un));
7188 
7189 	/*
7190 	 * If "pm-capable" property is set to TRUE by HBA drivers,
7191 	 * bypass the following checking, otherwise, check the log
7192 	 * sense information for this device
7193 	 */
7194 	if ((level == SD_SPINDLE_OFF) && un->un_f_log_sense_supported) {
7195 		/*
7196 		 * Get the log sense information to understand whether the
7197 		 * the powercycle counts have gone beyond the threshhold.
7198 		 */
7199 		log_page_size = START_STOP_CYCLE_COUNTER_PAGE_SIZE;
7200 		log_page_data = kmem_zalloc(log_page_size, KM_SLEEP);
7201 
7202 		mutex_enter(SD_MUTEX(un));
7203 		log_sense_page = un->un_start_stop_cycle_page;
7204 		mutex_exit(SD_MUTEX(un));
7205 
7206 		rval = sd_send_scsi_LOG_SENSE(un, log_page_data,
7207 		    log_page_size, log_sense_page, 0x01, 0, SD_PATH_DIRECT);
7208 #ifdef	SDDEBUG
7209 		if (sd_force_pm_supported) {
7210 			/* Force a successful result */
7211 			rval = 0;
7212 		}
7213 #endif
7214 		if (rval != 0) {
7215 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
7216 			    "Log Sense Failed\n");
7217 			kmem_free(log_page_data, log_page_size);
7218 			/* Cannot support power management on those drives */
7219 
7220 			if (got_semaphore_here != 0) {
7221 				sema_v(&un->un_semoclose);
7222 			}
7223 			/*
7224 			 * On exit put the state back to it's original value
7225 			 * and broadcast to anyone waiting for the power
7226 			 * change completion.
7227 			 */
7228 			mutex_enter(SD_MUTEX(un));
7229 			un->un_state = state_before_pm;
7230 			cv_broadcast(&un->un_suspend_cv);
7231 			mutex_exit(SD_MUTEX(un));
7232 			SD_TRACE(SD_LOG_IO_PM, un,
7233 			    "sdpower: exit, Log Sense Failed.\n");
7234 			return (DDI_FAILURE);
7235 		}
7236 
7237 		/*
7238 		 * From the page data - Convert the essential information to
7239 		 * pm_trans_data
7240 		 */
7241 		maxcycles =
7242 		    (log_page_data[0x1c] << 24) | (log_page_data[0x1d] << 16) |
7243 		    (log_page_data[0x1E] << 8)  | log_page_data[0x1F];
7244 
7245 		sd_pm_tran_data.un.scsi_cycles.lifemax = maxcycles;
7246 
7247 		ncycles =
7248 		    (log_page_data[0x24] << 24) | (log_page_data[0x25] << 16) |
7249 		    (log_page_data[0x26] << 8)  | log_page_data[0x27];
7250 
7251 		sd_pm_tran_data.un.scsi_cycles.ncycles = ncycles;
7252 
7253 		for (i = 0; i < DC_SCSI_MFR_LEN; i++) {
7254 			sd_pm_tran_data.un.scsi_cycles.svc_date[i] =
7255 			    log_page_data[8+i];
7256 		}
7257 
7258 		kmem_free(log_page_data, log_page_size);
7259 
7260 		/*
7261 		 * Call pm_trans_check routine to get the Ok from
7262 		 * the global policy
7263 		 */
7264 
7265 		sd_pm_tran_data.format = DC_SCSI_FORMAT;
7266 		sd_pm_tran_data.un.scsi_cycles.flag = 0;
7267 
7268 		rval = pm_trans_check(&sd_pm_tran_data, &intvlp);
7269 #ifdef	SDDEBUG
7270 		if (sd_force_pm_supported) {
7271 			/* Force a successful result */
7272 			rval = 1;
7273 		}
7274 #endif
7275 		switch (rval) {
7276 		case 0:
7277 			/*
7278 			 * Not Ok to Power cycle or error in parameters passed
7279 			 * Would have given the advised time to consider power
7280 			 * cycle. Based on the new intvlp parameter we are
7281 			 * supposed to pretend we are busy so that pm framework
7282 			 * will never call our power entry point. Because of
7283 			 * that install a timeout handler and wait for the
7284 			 * recommended time to elapse so that power management
7285 			 * can be effective again.
7286 			 *
7287 			 * To effect this behavior, call pm_busy_component to
7288 			 * indicate to the framework this device is busy.
7289 			 * By not adjusting un_pm_count the rest of PM in
7290 			 * the driver will function normally, and independant
7291 			 * of this but because the framework is told the device
7292 			 * is busy it won't attempt powering down until it gets
7293 			 * a matching idle. The timeout handler sends this.
7294 			 * Note: sd_pm_entry can't be called here to do this
7295 			 * because sdpower may have been called as a result
7296 			 * of a call to pm_raise_power from within sd_pm_entry.
7297 			 *
7298 			 * If a timeout handler is already active then
7299 			 * don't install another.
7300 			 */
7301 			mutex_enter(&un->un_pm_mutex);
7302 			if (un->un_pm_timeid == NULL) {
7303 				un->un_pm_timeid =
7304 				    timeout(sd_pm_timeout_handler,
7305 				    un, intvlp * drv_usectohz(1000000));
7306 				mutex_exit(&un->un_pm_mutex);
7307 				(void) pm_busy_component(SD_DEVINFO(un), 0);
7308 			} else {
7309 				mutex_exit(&un->un_pm_mutex);
7310 			}
7311 			if (got_semaphore_here != 0) {
7312 				sema_v(&un->un_semoclose);
7313 			}
7314 			/*
7315 			 * On exit put the state back to it's original value
7316 			 * and broadcast to anyone waiting for the power
7317 			 * change completion.
7318 			 */
7319 			mutex_enter(SD_MUTEX(un));
7320 			un->un_state = state_before_pm;
7321 			cv_broadcast(&un->un_suspend_cv);
7322 			mutex_exit(SD_MUTEX(un));
7323 
7324 			SD_TRACE(SD_LOG_IO_PM, un, "sdpower: exit, "
7325 			    "trans check Failed, not ok to power cycle.\n");
7326 			return (DDI_FAILURE);
7327 
7328 		case -1:
7329 			if (got_semaphore_here != 0) {
7330 				sema_v(&un->un_semoclose);
7331 			}
7332 			/*
7333 			 * On exit put the state back to it's original value
7334 			 * and broadcast to anyone waiting for the power
7335 			 * change completion.
7336 			 */
7337 			mutex_enter(SD_MUTEX(un));
7338 			un->un_state = state_before_pm;
7339 			cv_broadcast(&un->un_suspend_cv);
7340 			mutex_exit(SD_MUTEX(un));
7341 			SD_TRACE(SD_LOG_IO_PM, un,
7342 			    "sdpower: exit, trans check command Failed.\n");
7343 			return (DDI_FAILURE);
7344 		}
7345 	}
7346 
7347 	if (level == SD_SPINDLE_OFF) {
7348 		/*
7349 		 * Save the last state... if the STOP FAILS we need it
7350 		 * for restoring
7351 		 */
7352 		mutex_enter(SD_MUTEX(un));
7353 		save_state = un->un_last_state;
7354 		/*
7355 		 * There must not be any cmds. getting processed
7356 		 * in the driver when we get here. Power to the
7357 		 * device is potentially going off.
7358 		 */
7359 		ASSERT(un->un_ncmds_in_driver == 0);
7360 		mutex_exit(SD_MUTEX(un));
7361 
7362 		/*
7363 		 * For now suspend the device completely before spindle is
7364 		 * turned off
7365 		 */
7366 		if ((rval = sd_ddi_pm_suspend(un)) == DDI_FAILURE) {
7367 			if (got_semaphore_here != 0) {
7368 				sema_v(&un->un_semoclose);
7369 			}
7370 			/*
7371 			 * On exit put the state back to it's original value
7372 			 * and broadcast to anyone waiting for the power
7373 			 * change completion.
7374 			 */
7375 			mutex_enter(SD_MUTEX(un));
7376 			un->un_state = state_before_pm;
7377 			cv_broadcast(&un->un_suspend_cv);
7378 			mutex_exit(SD_MUTEX(un));
7379 			SD_TRACE(SD_LOG_IO_PM, un,
7380 			    "sdpower: exit, PM suspend Failed.\n");
7381 			return (DDI_FAILURE);
7382 		}
7383 	}
7384 
7385 	/*
7386 	 * The transition from SPINDLE_OFF to SPINDLE_ON can happen in open,
7387 	 * close, or strategy. Dump no long uses this routine, it uses it's
7388 	 * own code so it can be done in polled mode.
7389 	 */
7390 
7391 	medium_present = TRUE;
7392 
7393 	/*
7394 	 * When powering up, issue a TUR in case the device is at unit
7395 	 * attention.  Don't do retries. Bypass the PM layer, otherwise
7396 	 * a deadlock on un_pm_busy_cv will occur.
7397 	 */
7398 	if (level == SD_SPINDLE_ON) {
7399 		(void) sd_send_scsi_TEST_UNIT_READY(un,
7400 		    SD_DONT_RETRY_TUR | SD_BYPASS_PM);
7401 	}
7402 
7403 	SD_TRACE(SD_LOG_IO_PM, un, "sdpower: sending \'%s\' unit\n",
7404 	    ((level == SD_SPINDLE_ON) ? "START" : "STOP"));
7405 
7406 	sval = sd_send_scsi_START_STOP_UNIT(un,
7407 	    ((level == SD_SPINDLE_ON) ? SD_TARGET_START : SD_TARGET_STOP),
7408 	    SD_PATH_DIRECT);
7409 	/* Command failed, check for media present. */
7410 	if ((sval == ENXIO) && un->un_f_has_removable_media) {
7411 		medium_present = FALSE;
7412 	}
7413 
7414 	/*
7415 	 * The conditions of interest here are:
7416 	 *   if a spindle off with media present fails,
7417 	 *	then restore the state and return an error.
7418 	 *   else if a spindle on fails,
7419 	 *	then return an error (there's no state to restore).
7420 	 * In all other cases we setup for the new state
7421 	 * and return success.
7422 	 */
7423 	switch (level) {
7424 	case SD_SPINDLE_OFF:
7425 		if ((medium_present == TRUE) && (sval != 0)) {
7426 			/* The stop command from above failed */
7427 			rval = DDI_FAILURE;
7428 			/*
7429 			 * The stop command failed, and we have media
7430 			 * present. Put the level back by calling the
7431 			 * sd_pm_resume() and set the state back to
7432 			 * it's previous value.
7433 			 */
7434 			(void) sd_ddi_pm_resume(un);
7435 			mutex_enter(SD_MUTEX(un));
7436 			un->un_last_state = save_state;
7437 			mutex_exit(SD_MUTEX(un));
7438 			break;
7439 		}
7440 		/*
7441 		 * The stop command from above succeeded.
7442 		 */
7443 		if (un->un_f_monitor_media_state) {
7444 			/*
7445 			 * Terminate watch thread in case of removable media
7446 			 * devices going into low power state. This is as per
7447 			 * the requirements of pm framework, otherwise commands
7448 			 * will be generated for the device (through watch
7449 			 * thread), even when the device is in low power state.
7450 			 */
7451 			mutex_enter(SD_MUTEX(un));
7452 			un->un_f_watcht_stopped = FALSE;
7453 			if (un->un_swr_token != NULL) {
7454 				opaque_t temp_token = un->un_swr_token;
7455 				un->un_f_watcht_stopped = TRUE;
7456 				un->un_swr_token = NULL;
7457 				mutex_exit(SD_MUTEX(un));
7458 				(void) scsi_watch_request_terminate(temp_token,
7459 				    SCSI_WATCH_TERMINATE_WAIT);
7460 			} else {
7461 				mutex_exit(SD_MUTEX(un));
7462 			}
7463 		}
7464 		break;
7465 
7466 	default:	/* The level requested is spindle on... */
7467 		/*
7468 		 * Legacy behavior: return success on a failed spinup
7469 		 * if there is no media in the drive.
7470 		 * Do this by looking at medium_present here.
7471 		 */
7472 		if ((sval != 0) && medium_present) {
7473 			/* The start command from above failed */
7474 			rval = DDI_FAILURE;
7475 			break;
7476 		}
7477 		/*
7478 		 * The start command from above succeeded
7479 		 * Resume the devices now that we have
7480 		 * started the disks
7481 		 */
7482 		(void) sd_ddi_pm_resume(un);
7483 
7484 		/*
7485 		 * Resume the watch thread since it was suspended
7486 		 * when the device went into low power mode.
7487 		 */
7488 		if (un->un_f_monitor_media_state) {
7489 			mutex_enter(SD_MUTEX(un));
7490 			if (un->un_f_watcht_stopped == TRUE) {
7491 				opaque_t temp_token;
7492 
7493 				un->un_f_watcht_stopped = FALSE;
7494 				mutex_exit(SD_MUTEX(un));
7495 				temp_token = scsi_watch_request_submit(
7496 				    SD_SCSI_DEVP(un),
7497 				    sd_check_media_time,
7498 				    SENSE_LENGTH, sd_media_watch_cb,
7499 				    (caddr_t)dev);
7500 				mutex_enter(SD_MUTEX(un));
7501 				un->un_swr_token = temp_token;
7502 			}
7503 			mutex_exit(SD_MUTEX(un));
7504 		}
7505 	}
7506 	if (got_semaphore_here != 0) {
7507 		sema_v(&un->un_semoclose);
7508 	}
7509 	/*
7510 	 * On exit put the state back to it's original value
7511 	 * and broadcast to anyone waiting for the power
7512 	 * change completion.
7513 	 */
7514 	mutex_enter(SD_MUTEX(un));
7515 	un->un_state = state_before_pm;
7516 	cv_broadcast(&un->un_suspend_cv);
7517 	mutex_exit(SD_MUTEX(un));
7518 
7519 	SD_TRACE(SD_LOG_IO_PM, un, "sdpower: exit, status = 0x%x\n", rval);
7520 
7521 	return (rval);
7522 }
7523 
7524 
7525 
7526 /*
7527  *    Function: sdattach
7528  *
7529  * Description: Driver's attach(9e) entry point function.
7530  *
7531  *   Arguments: devi - opaque device info handle
7532  *		cmd  - attach  type
7533  *
7534  * Return Code: DDI_SUCCESS
7535  *		DDI_FAILURE
7536  *
7537  *     Context: Kernel thread context
7538  */
7539 
7540 static int
7541 sdattach(dev_info_t *devi, ddi_attach_cmd_t cmd)
7542 {
7543 	switch (cmd) {
7544 	case DDI_ATTACH:
7545 		return (sd_unit_attach(devi));
7546 	case DDI_RESUME:
7547 		return (sd_ddi_resume(devi));
7548 	default:
7549 		break;
7550 	}
7551 	return (DDI_FAILURE);
7552 }
7553 
7554 
7555 /*
7556  *    Function: sddetach
7557  *
7558  * Description: Driver's detach(9E) entry point function.
7559  *
7560  *   Arguments: devi - opaque device info handle
7561  *		cmd  - detach  type
7562  *
7563  * Return Code: DDI_SUCCESS
7564  *		DDI_FAILURE
7565  *
7566  *     Context: Kernel thread context
7567  */
7568 
7569 static int
7570 sddetach(dev_info_t *devi, ddi_detach_cmd_t cmd)
7571 {
7572 	switch (cmd) {
7573 	case DDI_DETACH:
7574 		return (sd_unit_detach(devi));
7575 	case DDI_SUSPEND:
7576 		return (sd_ddi_suspend(devi));
7577 	default:
7578 		break;
7579 	}
7580 	return (DDI_FAILURE);
7581 }
7582 
7583 
7584 /*
7585  *     Function: sd_sync_with_callback
7586  *
7587  *  Description: Prevents sd_unit_attach or sd_unit_detach from freeing the soft
7588  *		 state while the callback routine is active.
7589  *
7590  *    Arguments: un: softstate structure for the instance
7591  *
7592  *	Context: Kernel thread context
7593  */
7594 
7595 static void
7596 sd_sync_with_callback(struct sd_lun *un)
7597 {
7598 	ASSERT(un != NULL);
7599 
7600 	mutex_enter(SD_MUTEX(un));
7601 
7602 	ASSERT(un->un_in_callback >= 0);
7603 
7604 	while (un->un_in_callback > 0) {
7605 		mutex_exit(SD_MUTEX(un));
7606 		delay(2);
7607 		mutex_enter(SD_MUTEX(un));
7608 	}
7609 
7610 	mutex_exit(SD_MUTEX(un));
7611 }
7612 
7613 /*
7614  *    Function: sd_unit_attach
7615  *
7616  * Description: Performs DDI_ATTACH processing for sdattach(). Allocates
7617  *		the soft state structure for the device and performs
7618  *		all necessary structure and device initializations.
7619  *
7620  *   Arguments: devi: the system's dev_info_t for the device.
7621  *
7622  * Return Code: DDI_SUCCESS if attach is successful.
7623  *		DDI_FAILURE if any part of the attach fails.
7624  *
7625  *     Context: Called at attach(9e) time for the DDI_ATTACH flag.
7626  *		Kernel thread context only.  Can sleep.
7627  */
7628 
7629 static int
7630 sd_unit_attach(dev_info_t *devi)
7631 {
7632 	struct	scsi_device	*devp;
7633 	struct	sd_lun		*un;
7634 	char			*variantp;
7635 	int	reservation_flag = SD_TARGET_IS_UNRESERVED;
7636 	int	instance;
7637 	int	rval;
7638 	int	wc_enabled;
7639 	uint64_t	capacity;
7640 	uint_t		lbasize;
7641 
7642 	/*
7643 	 * Retrieve the target driver's private data area. This was set
7644 	 * up by the HBA.
7645 	 */
7646 	devp = ddi_get_driver_private(devi);
7647 
7648 	/*
7649 	 * Since we have no idea what state things were left in by the last
7650 	 * user of the device, set up some 'default' settings, ie. turn 'em
7651 	 * off. The scsi_ifsetcap calls force re-negotiations with the drive.
7652 	 * Do this before the scsi_probe, which sends an inquiry.
7653 	 * This is a fix for bug (4430280).
7654 	 * Of special importance is wide-xfer. The drive could have been left
7655 	 * in wide transfer mode by the last driver to communicate with it,
7656 	 * this includes us. If that's the case, and if the following is not
7657 	 * setup properly or we don't re-negotiate with the drive prior to
7658 	 * transferring data to/from the drive, it causes bus parity errors,
7659 	 * data overruns, and unexpected interrupts. This first occurred when
7660 	 * the fix for bug (4378686) was made.
7661 	 */
7662 	(void) scsi_ifsetcap(&devp->sd_address, "lun-reset", 0, 1);
7663 	(void) scsi_ifsetcap(&devp->sd_address, "wide-xfer", 0, 1);
7664 	(void) scsi_ifsetcap(&devp->sd_address, "tagged-qing", 0, 1);
7665 	(void) scsi_ifsetcap(&devp->sd_address, "auto-rqsense", 0, 1);
7666 
7667 	/*
7668 	 * Use scsi_probe() to issue an INQUIRY command to the device.
7669 	 * This call will allocate and fill in the scsi_inquiry structure
7670 	 * and point the sd_inq member of the scsi_device structure to it.
7671 	 * If the attach succeeds, then this memory will not be de-allocated
7672 	 * (via scsi_unprobe()) until the instance is detached.
7673 	 */
7674 	if (scsi_probe(devp, SLEEP_FUNC) != SCSIPROBE_EXISTS) {
7675 		goto probe_failed;
7676 	}
7677 
7678 	/*
7679 	 * Check the device type as specified in the inquiry data and
7680 	 * claim it if it is of a type that we support.
7681 	 */
7682 	switch (devp->sd_inq->inq_dtype) {
7683 	case DTYPE_DIRECT:
7684 		break;
7685 	case DTYPE_RODIRECT:
7686 		break;
7687 	case DTYPE_OPTICAL:
7688 		break;
7689 	case DTYPE_NOTPRESENT:
7690 	default:
7691 		/* Unsupported device type; fail the attach. */
7692 		goto probe_failed;
7693 	}
7694 
7695 	/*
7696 	 * Allocate the soft state structure for this unit.
7697 	 *
7698 	 * We rely upon this memory being set to all zeroes by
7699 	 * ddi_soft_state_zalloc().  We assume that any member of the
7700 	 * soft state structure that is not explicitly initialized by
7701 	 * this routine will have a value of zero.
7702 	 */
7703 	instance = ddi_get_instance(devp->sd_dev);
7704 	if (ddi_soft_state_zalloc(sd_state, instance) != DDI_SUCCESS) {
7705 		goto probe_failed;
7706 	}
7707 
7708 	/*
7709 	 * Retrieve a pointer to the newly-allocated soft state.
7710 	 *
7711 	 * This should NEVER fail if the ddi_soft_state_zalloc() call above
7712 	 * was successful, unless something has gone horribly wrong and the
7713 	 * ddi's soft state internals are corrupt (in which case it is
7714 	 * probably better to halt here than just fail the attach....)
7715 	 */
7716 	if ((un = ddi_get_soft_state(sd_state, instance)) == NULL) {
7717 		panic("sd_unit_attach: NULL soft state on instance:0x%x",
7718 		    instance);
7719 		/*NOTREACHED*/
7720 	}
7721 
7722 	/*
7723 	 * Link the back ptr of the driver soft state to the scsi_device
7724 	 * struct for this lun.
7725 	 * Save a pointer to the softstate in the driver-private area of
7726 	 * the scsi_device struct.
7727 	 * Note: We cannot call SD_INFO, SD_TRACE, SD_ERROR, or SD_DIAG until
7728 	 * we first set un->un_sd below.
7729 	 */
7730 	un->un_sd = devp;
7731 	devp->sd_private = (opaque_t)un;
7732 
7733 	/*
7734 	 * The following must be after devp is stored in the soft state struct.
7735 	 */
7736 #ifdef SDDEBUG
7737 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
7738 	    "%s_unit_attach: un:0x%p instance:%d\n",
7739 	    ddi_driver_name(devi), un, instance);
7740 #endif
7741 
7742 	/*
7743 	 * Set up the device type and node type (for the minor nodes).
7744 	 * By default we assume that the device can at least support the
7745 	 * Common Command Set. Call it a CD-ROM if it reports itself
7746 	 * as a RODIRECT device.
7747 	 */
7748 	switch (devp->sd_inq->inq_dtype) {
7749 	case DTYPE_RODIRECT:
7750 		un->un_node_type = DDI_NT_CD_CHAN;
7751 		un->un_ctype	 = CTYPE_CDROM;
7752 		break;
7753 	case DTYPE_OPTICAL:
7754 		un->un_node_type = DDI_NT_BLOCK_CHAN;
7755 		un->un_ctype	 = CTYPE_ROD;
7756 		break;
7757 	default:
7758 		un->un_node_type = DDI_NT_BLOCK_CHAN;
7759 		un->un_ctype	 = CTYPE_CCS;
7760 		break;
7761 	}
7762 
7763 	/*
7764 	 * Try to read the interconnect type from the HBA.
7765 	 *
7766 	 * Note: This driver is currently compiled as two binaries, a parallel
7767 	 * scsi version (sd) and a fibre channel version (ssd). All functional
7768 	 * differences are determined at compile time. In the future a single
7769 	 * binary will be provided and the inteconnect type will be used to
7770 	 * differentiate between fibre and parallel scsi behaviors. At that time
7771 	 * it will be necessary for all fibre channel HBAs to support this
7772 	 * property.
7773 	 *
7774 	 * set un_f_is_fiber to TRUE ( default fiber )
7775 	 */
7776 	un->un_f_is_fibre = TRUE;
7777 	switch (scsi_ifgetcap(SD_ADDRESS(un), "interconnect-type", -1)) {
7778 	case INTERCONNECT_SSA:
7779 		un->un_interconnect_type = SD_INTERCONNECT_SSA;
7780 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
7781 		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_SSA\n", un);
7782 		break;
7783 	case INTERCONNECT_PARALLEL:
7784 		un->un_f_is_fibre = FALSE;
7785 		un->un_interconnect_type = SD_INTERCONNECT_PARALLEL;
7786 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
7787 		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_PARALLEL\n", un);
7788 		break;
7789 	case INTERCONNECT_FIBRE:
7790 		un->un_interconnect_type = SD_INTERCONNECT_FIBRE;
7791 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
7792 		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_FIBRE\n", un);
7793 		break;
7794 	case INTERCONNECT_FABRIC:
7795 		un->un_interconnect_type = SD_INTERCONNECT_FABRIC;
7796 		un->un_node_type = DDI_NT_BLOCK_FABRIC;
7797 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
7798 		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_FABRIC\n", un);
7799 		break;
7800 	default:
7801 #ifdef SD_DEFAULT_INTERCONNECT_TYPE
7802 		/*
7803 		 * The HBA does not support the "interconnect-type" property
7804 		 * (or did not provide a recognized type).
7805 		 *
7806 		 * Note: This will be obsoleted when a single fibre channel
7807 		 * and parallel scsi driver is delivered. In the meantime the
7808 		 * interconnect type will be set to the platform default.If that
7809 		 * type is not parallel SCSI, it means that we should be
7810 		 * assuming "ssd" semantics. However, here this also means that
7811 		 * the FC HBA is not supporting the "interconnect-type" property
7812 		 * like we expect it to, so log this occurrence.
7813 		 */
7814 		un->un_interconnect_type = SD_DEFAULT_INTERCONNECT_TYPE;
7815 		if (!SD_IS_PARALLEL_SCSI(un)) {
7816 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
7817 			    "sd_unit_attach: un:0x%p Assuming "
7818 			    "INTERCONNECT_FIBRE\n", un);
7819 		} else {
7820 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
7821 			    "sd_unit_attach: un:0x%p Assuming "
7822 			    "INTERCONNECT_PARALLEL\n", un);
7823 			un->un_f_is_fibre = FALSE;
7824 		}
7825 #else
7826 		/*
7827 		 * Note: This source will be implemented when a single fibre
7828 		 * channel and parallel scsi driver is delivered. The default
7829 		 * will be to assume that if a device does not support the
7830 		 * "interconnect-type" property it is a parallel SCSI HBA and
7831 		 * we will set the interconnect type for parallel scsi.
7832 		 */
7833 		un->un_interconnect_type = SD_INTERCONNECT_PARALLEL;
7834 		un->un_f_is_fibre = FALSE;
7835 #endif
7836 		break;
7837 	}
7838 
7839 	if (un->un_f_is_fibre == TRUE) {
7840 		if (scsi_ifgetcap(SD_ADDRESS(un), "scsi-version", 1) ==
7841 			SCSI_VERSION_3) {
7842 			switch (un->un_interconnect_type) {
7843 			case SD_INTERCONNECT_FIBRE:
7844 			case SD_INTERCONNECT_SSA:
7845 				un->un_node_type = DDI_NT_BLOCK_WWN;
7846 				break;
7847 			default:
7848 				break;
7849 			}
7850 		}
7851 	}
7852 
7853 	/*
7854 	 * Initialize the Request Sense command for the target
7855 	 */
7856 	if (sd_alloc_rqs(devp, un) != DDI_SUCCESS) {
7857 		goto alloc_rqs_failed;
7858 	}
7859 
7860 	/*
7861 	 * Set un_retry_count with SD_RETRY_COUNT, this is ok for Sparc
7862 	 * with seperate binary for sd and ssd.
7863 	 *
7864 	 * x86 has 1 binary, un_retry_count is set base on connection type.
7865 	 * The hardcoded values will go away when Sparc uses 1 binary
7866 	 * for sd and ssd.  This hardcoded values need to match
7867 	 * SD_RETRY_COUNT in sddef.h
7868 	 * The value used is base on interconnect type.
7869 	 * fibre = 3, parallel = 5
7870 	 */
7871 #if defined(__i386) || defined(__amd64)
7872 	un->un_retry_count = un->un_f_is_fibre ? 3 : 5;
7873 #else
7874 	un->un_retry_count = SD_RETRY_COUNT;
7875 #endif
7876 
7877 	/*
7878 	 * Set the per disk retry count to the default number of retries
7879 	 * for disks and CDROMs. This value can be overridden by the
7880 	 * disk property list or an entry in sd.conf.
7881 	 */
7882 	un->un_notready_retry_count =
7883 	    ISCD(un) ? CD_NOT_READY_RETRY_COUNT(un)
7884 			: DISK_NOT_READY_RETRY_COUNT(un);
7885 
7886 	/*
7887 	 * Set the busy retry count to the default value of un_retry_count.
7888 	 * This can be overridden by entries in sd.conf or the device
7889 	 * config table.
7890 	 */
7891 	un->un_busy_retry_count = un->un_retry_count;
7892 
7893 	/*
7894 	 * Init the reset threshold for retries.  This number determines
7895 	 * how many retries must be performed before a reset can be issued
7896 	 * (for certain error conditions). This can be overridden by entries
7897 	 * in sd.conf or the device config table.
7898 	 */
7899 	un->un_reset_retry_count = (un->un_retry_count / 2);
7900 
7901 	/*
7902 	 * Set the victim_retry_count to the default un_retry_count
7903 	 */
7904 	un->un_victim_retry_count = (2 * un->un_retry_count);
7905 
7906 	/*
7907 	 * Set the reservation release timeout to the default value of
7908 	 * 5 seconds. This can be overridden by entries in ssd.conf or the
7909 	 * device config table.
7910 	 */
7911 	un->un_reserve_release_time = 5;
7912 
7913 	/*
7914 	 * Set up the default maximum transfer size. Note that this may
7915 	 * get updated later in the attach, when setting up default wide
7916 	 * operations for disks.
7917 	 */
7918 #if defined(__i386) || defined(__amd64)
7919 	un->un_max_xfer_size = (uint_t)SD_DEFAULT_MAX_XFER_SIZE;
7920 #else
7921 	un->un_max_xfer_size = (uint_t)maxphys;
7922 #endif
7923 
7924 	/*
7925 	 * Get "allow bus device reset" property (defaults to "enabled" if
7926 	 * the property was not defined). This is to disable bus resets for
7927 	 * certain kinds of error recovery. Note: In the future when a run-time
7928 	 * fibre check is available the soft state flag should default to
7929 	 * enabled.
7930 	 */
7931 	if (un->un_f_is_fibre == TRUE) {
7932 		un->un_f_allow_bus_device_reset = TRUE;
7933 	} else {
7934 		if (ddi_getprop(DDI_DEV_T_ANY, devi, DDI_PROP_DONTPASS,
7935 			"allow-bus-device-reset", 1) != 0) {
7936 			un->un_f_allow_bus_device_reset = TRUE;
7937 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
7938 			"sd_unit_attach: un:0x%p Bus device reset enabled\n",
7939 				un);
7940 		} else {
7941 			un->un_f_allow_bus_device_reset = FALSE;
7942 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
7943 			"sd_unit_attach: un:0x%p Bus device reset disabled\n",
7944 				un);
7945 		}
7946 	}
7947 
7948 	/*
7949 	 * Check if this is an ATAPI device. ATAPI devices use Group 1
7950 	 * Read/Write commands and Group 2 Mode Sense/Select commands.
7951 	 *
7952 	 * Note: The "obsolete" way of doing this is to check for the "atapi"
7953 	 * property. The new "variant" property with a value of "atapi" has been
7954 	 * introduced so that future 'variants' of standard SCSI behavior (like
7955 	 * atapi) could be specified by the underlying HBA drivers by supplying
7956 	 * a new value for the "variant" property, instead of having to define a
7957 	 * new property.
7958 	 */
7959 	if (ddi_prop_get_int(DDI_DEV_T_ANY, devi, 0, "atapi", -1) != -1) {
7960 		un->un_f_cfg_is_atapi = TRUE;
7961 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
7962 		    "sd_unit_attach: un:0x%p Atapi device\n", un);
7963 	}
7964 	if (ddi_prop_lookup_string(DDI_DEV_T_ANY, devi, 0, "variant",
7965 	    &variantp) == DDI_PROP_SUCCESS) {
7966 		if (strcmp(variantp, "atapi") == 0) {
7967 			un->un_f_cfg_is_atapi = TRUE;
7968 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
7969 			    "sd_unit_attach: un:0x%p Atapi device\n", un);
7970 		}
7971 		ddi_prop_free(variantp);
7972 	}
7973 
7974 	un->un_cmd_timeout	= SD_IO_TIME;
7975 
7976 	/* Info on current states, statuses, etc. (Updated frequently) */
7977 	un->un_state		= SD_STATE_NORMAL;
7978 	un->un_last_state	= SD_STATE_NORMAL;
7979 
7980 	/* Control & status info for command throttling */
7981 	un->un_throttle		= sd_max_throttle;
7982 	un->un_saved_throttle	= sd_max_throttle;
7983 	un->un_min_throttle	= sd_min_throttle;
7984 
7985 	if (un->un_f_is_fibre == TRUE) {
7986 		un->un_f_use_adaptive_throttle = TRUE;
7987 	} else {
7988 		un->un_f_use_adaptive_throttle = FALSE;
7989 	}
7990 
7991 	/* Removable media support. */
7992 	cv_init(&un->un_state_cv, NULL, CV_DRIVER, NULL);
7993 	un->un_mediastate		= DKIO_NONE;
7994 	un->un_specified_mediastate	= DKIO_NONE;
7995 
7996 	/* CVs for suspend/resume (PM or DR) */
7997 	cv_init(&un->un_suspend_cv,   NULL, CV_DRIVER, NULL);
7998 	cv_init(&un->un_disk_busy_cv, NULL, CV_DRIVER, NULL);
7999 
8000 	/* Power management support. */
8001 	un->un_power_level = SD_SPINDLE_UNINIT;
8002 
8003 	cv_init(&un->un_wcc_cv,   NULL, CV_DRIVER, NULL);
8004 	un->un_f_wcc_inprog = 0;
8005 
8006 	/*
8007 	 * The open/close semaphore is used to serialize threads executing
8008 	 * in the driver's open & close entry point routines for a given
8009 	 * instance.
8010 	 */
8011 	(void) sema_init(&un->un_semoclose, 1, NULL, SEMA_DRIVER, NULL);
8012 
8013 	/*
8014 	 * The conf file entry and softstate variable is a forceful override,
8015 	 * meaning a non-zero value must be entered to change the default.
8016 	 */
8017 	un->un_f_disksort_disabled = FALSE;
8018 
8019 	/*
8020 	 * Retrieve the properties from the static driver table or the driver
8021 	 * configuration file (.conf) for this unit and update the soft state
8022 	 * for the device as needed for the indicated properties.
8023 	 * Note: the property configuration needs to occur here as some of the
8024 	 * following routines may have dependancies on soft state flags set
8025 	 * as part of the driver property configuration.
8026 	 */
8027 	sd_read_unit_properties(un);
8028 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
8029 	    "sd_unit_attach: un:0x%p property configuration complete.\n", un);
8030 
8031 	/*
8032 	 * Only if a device has "hotpluggable" property, it is
8033 	 * treated as hotpluggable device. Otherwise, it is
8034 	 * regarded as non-hotpluggable one.
8035 	 */
8036 	if (ddi_prop_get_int(DDI_DEV_T_ANY, devi, 0, "hotpluggable",
8037 	    -1) != -1) {
8038 		un->un_f_is_hotpluggable = TRUE;
8039 	}
8040 
8041 	/*
8042 	 * set unit's attributes(flags) according to "hotpluggable" and
8043 	 * RMB bit in INQUIRY data.
8044 	 */
8045 	sd_set_unit_attributes(un, devi);
8046 
8047 	/*
8048 	 * By default, we mark the capacity, lbasize, and geometry
8049 	 * as invalid. Only if we successfully read a valid capacity
8050 	 * will we update the un_blockcount and un_tgt_blocksize with the
8051 	 * valid values (the geometry will be validated later).
8052 	 */
8053 	un->un_f_blockcount_is_valid	= FALSE;
8054 	un->un_f_tgt_blocksize_is_valid	= FALSE;
8055 	un->un_f_geometry_is_valid	= FALSE;
8056 
8057 	/*
8058 	 * Use DEV_BSIZE and DEV_BSHIFT as defaults, until we can determine
8059 	 * otherwise.
8060 	 */
8061 	un->un_tgt_blocksize  = un->un_sys_blocksize  = DEV_BSIZE;
8062 	un->un_blockcount = 0;
8063 
8064 	/*
8065 	 * Set up the per-instance info needed to determine the correct
8066 	 * CDBs and other info for issuing commands to the target.
8067 	 */
8068 	sd_init_cdb_limits(un);
8069 
8070 	/*
8071 	 * Set up the IO chains to use, based upon the target type.
8072 	 */
8073 	if (un->un_f_non_devbsize_supported) {
8074 		un->un_buf_chain_type = SD_CHAIN_INFO_RMMEDIA;
8075 	} else {
8076 		un->un_buf_chain_type = SD_CHAIN_INFO_DISK;
8077 	}
8078 	un->un_uscsi_chain_type  = SD_CHAIN_INFO_USCSI_CMD;
8079 	un->un_direct_chain_type = SD_CHAIN_INFO_DIRECT_CMD;
8080 	un->un_priority_chain_type = SD_CHAIN_INFO_PRIORITY_CMD;
8081 
8082 	un->un_xbuf_attr = ddi_xbuf_attr_create(sizeof (struct sd_xbuf),
8083 	    sd_xbuf_strategy, un, sd_xbuf_active_limit,  sd_xbuf_reserve_limit,
8084 	    ddi_driver_major(devi), DDI_XBUF_QTHREAD_DRIVER);
8085 	ddi_xbuf_attr_register_devinfo(un->un_xbuf_attr, devi);
8086 
8087 
8088 	if (ISCD(un)) {
8089 		un->un_additional_codes = sd_additional_codes;
8090 	} else {
8091 		un->un_additional_codes = NULL;
8092 	}
8093 
8094 	/*
8095 	 * Create the kstats here so they can be available for attach-time
8096 	 * routines that send commands to the unit (either polled or via
8097 	 * sd_send_scsi_cmd).
8098 	 *
8099 	 * Note: This is a critical sequence that needs to be maintained:
8100 	 *	1) Instantiate the kstats here, before any routines using the
8101 	 *	   iopath (i.e. sd_send_scsi_cmd).
8102 	 *	2) Initialize the error stats (sd_set_errstats) and partition
8103 	 *	   stats (sd_set_pstats), following sd_validate_geometry(),
8104 	 *	   sd_register_devid(), and sd_cache_control().
8105 	 */
8106 
8107 	un->un_stats = kstat_create(sd_label, instance,
8108 	    NULL, "disk", KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT);
8109 	if (un->un_stats != NULL) {
8110 		un->un_stats->ks_lock = SD_MUTEX(un);
8111 		kstat_install(un->un_stats);
8112 	}
8113 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
8114 	    "sd_unit_attach: un:0x%p un_stats created\n", un);
8115 
8116 	sd_create_errstats(un, instance);
8117 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
8118 	    "sd_unit_attach: un:0x%p errstats created\n", un);
8119 
8120 	/*
8121 	 * The following if/else code was relocated here from below as part
8122 	 * of the fix for bug (4430280). However with the default setup added
8123 	 * on entry to this routine, it's no longer absolutely necessary for
8124 	 * this to be before the call to sd_spin_up_unit.
8125 	 */
8126 	if (SD_IS_PARALLEL_SCSI(un)) {
8127 		/*
8128 		 * If SCSI-2 tagged queueing is supported by the target
8129 		 * and by the host adapter then we will enable it.
8130 		 */
8131 		un->un_tagflags = 0;
8132 		if ((devp->sd_inq->inq_rdf == RDF_SCSI2) &&
8133 		    (devp->sd_inq->inq_cmdque) &&
8134 		    (un->un_f_arq_enabled == TRUE)) {
8135 			if (scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing",
8136 			    1, 1) == 1) {
8137 				un->un_tagflags = FLAG_STAG;
8138 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
8139 				    "sd_unit_attach: un:0x%p tag queueing "
8140 				    "enabled\n", un);
8141 			} else if (scsi_ifgetcap(SD_ADDRESS(un),
8142 			    "untagged-qing", 0) == 1) {
8143 				un->un_f_opt_queueing = TRUE;
8144 				un->un_saved_throttle = un->un_throttle =
8145 				    min(un->un_throttle, 3);
8146 			} else {
8147 				un->un_f_opt_queueing = FALSE;
8148 				un->un_saved_throttle = un->un_throttle = 1;
8149 			}
8150 		} else if ((scsi_ifgetcap(SD_ADDRESS(un), "untagged-qing", 0)
8151 		    == 1) && (un->un_f_arq_enabled == TRUE)) {
8152 			/* The Host Adapter supports internal queueing. */
8153 			un->un_f_opt_queueing = TRUE;
8154 			un->un_saved_throttle = un->un_throttle =
8155 			    min(un->un_throttle, 3);
8156 		} else {
8157 			un->un_f_opt_queueing = FALSE;
8158 			un->un_saved_throttle = un->un_throttle = 1;
8159 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
8160 			    "sd_unit_attach: un:0x%p no tag queueing\n", un);
8161 		}
8162 
8163 
8164 		/* Setup or tear down default wide operations for disks */
8165 
8166 		/*
8167 		 * Note: Legacy: it may be possible for both "sd_max_xfer_size"
8168 		 * and "ssd_max_xfer_size" to exist simultaneously on the same
8169 		 * system and be set to different values. In the future this
8170 		 * code may need to be updated when the ssd module is
8171 		 * obsoleted and removed from the system. (4299588)
8172 		 */
8173 		if ((devp->sd_inq->inq_rdf == RDF_SCSI2) &&
8174 		    (devp->sd_inq->inq_wbus16 || devp->sd_inq->inq_wbus32)) {
8175 			if (scsi_ifsetcap(SD_ADDRESS(un), "wide-xfer",
8176 			    1, 1) == 1) {
8177 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
8178 				    "sd_unit_attach: un:0x%p Wide Transfer "
8179 				    "enabled\n", un);
8180 			}
8181 
8182 			/*
8183 			 * If tagged queuing has also been enabled, then
8184 			 * enable large xfers
8185 			 */
8186 			if (un->un_saved_throttle == sd_max_throttle) {
8187 				un->un_max_xfer_size =
8188 				    ddi_getprop(DDI_DEV_T_ANY, devi, 0,
8189 				    sd_max_xfer_size, SD_MAX_XFER_SIZE);
8190 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
8191 				    "sd_unit_attach: un:0x%p max transfer "
8192 				    "size=0x%x\n", un, un->un_max_xfer_size);
8193 			}
8194 		} else {
8195 			if (scsi_ifsetcap(SD_ADDRESS(un), "wide-xfer",
8196 			    0, 1) == 1) {
8197 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
8198 				    "sd_unit_attach: un:0x%p "
8199 				    "Wide Transfer disabled\n", un);
8200 			}
8201 		}
8202 	} else {
8203 		un->un_tagflags = FLAG_STAG;
8204 		un->un_max_xfer_size = ddi_getprop(DDI_DEV_T_ANY,
8205 		    devi, 0, sd_max_xfer_size, SD_MAX_XFER_SIZE);
8206 	}
8207 
8208 	/*
8209 	 * If this target supports LUN reset, try to enable it.
8210 	 */
8211 	if (un->un_f_lun_reset_enabled) {
8212 		if (scsi_ifsetcap(SD_ADDRESS(un), "lun-reset", 1, 1) == 1) {
8213 			SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_unit_attach: "
8214 			    "un:0x%p lun_reset capability set\n", un);
8215 		} else {
8216 			SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_unit_attach: "
8217 			    "un:0x%p lun-reset capability not set\n", un);
8218 		}
8219 	}
8220 
8221 	/*
8222 	 * At this point in the attach, we have enough info in the
8223 	 * soft state to be able to issue commands to the target.
8224 	 *
8225 	 * All command paths used below MUST issue their commands as
8226 	 * SD_PATH_DIRECT. This is important as intermediate layers
8227 	 * are not all initialized yet (such as PM).
8228 	 */
8229 
8230 	/*
8231 	 * Send a TEST UNIT READY command to the device. This should clear
8232 	 * any outstanding UNIT ATTENTION that may be present.
8233 	 *
8234 	 * Note: Don't check for success, just track if there is a reservation,
8235 	 * this is a throw away command to clear any unit attentions.
8236 	 *
8237 	 * Note: This MUST be the first command issued to the target during
8238 	 * attach to ensure power on UNIT ATTENTIONS are cleared.
8239 	 * Pass in flag SD_DONT_RETRY_TUR to prevent the long delays associated
8240 	 * with attempts at spinning up a device with no media.
8241 	 */
8242 	if (sd_send_scsi_TEST_UNIT_READY(un, SD_DONT_RETRY_TUR) == EACCES) {
8243 		reservation_flag = SD_TARGET_IS_RESERVED;
8244 	}
8245 
8246 	/*
8247 	 * If the device is NOT a removable media device, attempt to spin
8248 	 * it up (using the START_STOP_UNIT command) and read its capacity
8249 	 * (using the READ CAPACITY command).  Note, however, that either
8250 	 * of these could fail and in some cases we would continue with
8251 	 * the attach despite the failure (see below).
8252 	 */
8253 	if (un->un_f_descr_format_supported) {
8254 		switch (sd_spin_up_unit(un)) {
8255 		case 0:
8256 			/*
8257 			 * Spin-up was successful; now try to read the
8258 			 * capacity.  If successful then save the results
8259 			 * and mark the capacity & lbasize as valid.
8260 			 */
8261 			SD_TRACE(SD_LOG_ATTACH_DETACH, un,
8262 			    "sd_unit_attach: un:0x%p spin-up successful\n", un);
8263 
8264 			switch (sd_send_scsi_READ_CAPACITY(un, &capacity,
8265 			    &lbasize, SD_PATH_DIRECT)) {
8266 			case 0: {
8267 				if (capacity > DK_MAX_BLOCKS) {
8268 #ifdef _LP64
8269 					/*
8270 					 * Enable descriptor format sense data
8271 					 * so that we can get 64 bit sense
8272 					 * data fields.
8273 					 */
8274 					sd_enable_descr_sense(un);
8275 #else
8276 					/* 32-bit kernels can't handle this */
8277 					scsi_log(SD_DEVINFO(un),
8278 					    sd_label, CE_WARN,
8279 					    "disk has %llu blocks, which "
8280 					    "is too large for a 32-bit "
8281 					    "kernel", capacity);
8282 					goto spinup_failed;
8283 #endif
8284 				}
8285 
8286 				/*
8287 				 * Here it's not necessary to check the case:
8288 				 * the capacity of the device is bigger than
8289 				 * what the max hba cdb can support. Because
8290 				 * sd_send_scsi_READ_CAPACITY will retrieve
8291 				 * the capacity by sending USCSI command, which
8292 				 * is constrained by the max hba cdb. Actually,
8293 				 * sd_send_scsi_READ_CAPACITY will return
8294 				 * EINVAL when using bigger cdb than required
8295 				 * cdb length. Will handle this case in
8296 				 * "case EINVAL".
8297 				 */
8298 
8299 				/*
8300 				 * The following relies on
8301 				 * sd_send_scsi_READ_CAPACITY never
8302 				 * returning 0 for capacity and/or lbasize.
8303 				 */
8304 				sd_update_block_info(un, lbasize, capacity);
8305 
8306 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
8307 				    "sd_unit_attach: un:0x%p capacity = %ld "
8308 				    "blocks; lbasize= %ld.\n", un,
8309 				    un->un_blockcount, un->un_tgt_blocksize);
8310 
8311 				break;
8312 			}
8313 			case EINVAL:
8314 				/*
8315 				 * In the case where the max-cdb-length property
8316 				 * is smaller than the required CDB length for
8317 				 * a SCSI device, a target driver can fail to
8318 				 * attach to that device.
8319 				 */
8320 				scsi_log(SD_DEVINFO(un),
8321 				    sd_label, CE_WARN,
8322 				    "disk capacity is too large "
8323 				    "for current cdb length");
8324 				goto spinup_failed;
8325 			case EACCES:
8326 				/*
8327 				 * Should never get here if the spin-up
8328 				 * succeeded, but code it in anyway.
8329 				 * From here, just continue with the attach...
8330 				 */
8331 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
8332 				    "sd_unit_attach: un:0x%p "
8333 				    "sd_send_scsi_READ_CAPACITY "
8334 				    "returned reservation conflict\n", un);
8335 				reservation_flag = SD_TARGET_IS_RESERVED;
8336 				break;
8337 			default:
8338 				/*
8339 				 * Likewise, should never get here if the
8340 				 * spin-up succeeded. Just continue with
8341 				 * the attach...
8342 				 */
8343 				break;
8344 			}
8345 			break;
8346 		case EACCES:
8347 			/*
8348 			 * Device is reserved by another host.  In this case
8349 			 * we could not spin it up or read the capacity, but
8350 			 * we continue with the attach anyway.
8351 			 */
8352 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
8353 			    "sd_unit_attach: un:0x%p spin-up reservation "
8354 			    "conflict.\n", un);
8355 			reservation_flag = SD_TARGET_IS_RESERVED;
8356 			break;
8357 		default:
8358 			/* Fail the attach if the spin-up failed. */
8359 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
8360 			    "sd_unit_attach: un:0x%p spin-up failed.", un);
8361 			goto spinup_failed;
8362 		}
8363 	}
8364 
8365 	/*
8366 	 * Check to see if this is a MMC drive
8367 	 */
8368 	if (ISCD(un)) {
8369 		sd_set_mmc_caps(un);
8370 	}
8371 
8372 	/*
8373 	 * Create the minor nodes for the device.
8374 	 * Note: If we want to support fdisk on both sparc and intel, this will
8375 	 * have to separate out the notion that VTOC8 is always sparc, and
8376 	 * VTOC16 is always intel (tho these can be the defaults).  The vtoc
8377 	 * type will have to be determined at run-time, and the fdisk
8378 	 * partitioning will have to have been read & set up before we
8379 	 * create the minor nodes. (any other inits (such as kstats) that
8380 	 * also ought to be done before creating the minor nodes?) (Doesn't
8381 	 * setting up the minor nodes kind of imply that we're ready to
8382 	 * handle an open from userland?)
8383 	 */
8384 	if (sd_create_minor_nodes(un, devi) != DDI_SUCCESS) {
8385 		goto create_minor_nodes_failed;
8386 	}
8387 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
8388 	    "sd_unit_attach: un:0x%p minor nodes created\n", un);
8389 
8390 	/*
8391 	 * Add a zero-length attribute to tell the world we support
8392 	 * kernel ioctls (for layered drivers)
8393 	 */
8394 	(void) ddi_prop_create(DDI_DEV_T_NONE, devi, DDI_PROP_CANSLEEP,
8395 	    DDI_KERNEL_IOCTL, NULL, 0);
8396 
8397 	/*
8398 	 * Add a boolean property to tell the world we support
8399 	 * the B_FAILFAST flag (for layered drivers)
8400 	 */
8401 	(void) ddi_prop_create(DDI_DEV_T_NONE, devi, DDI_PROP_CANSLEEP,
8402 	    "ddi-failfast-supported", NULL, 0);
8403 
8404 	/*
8405 	 * Initialize power management
8406 	 */
8407 	mutex_init(&un->un_pm_mutex, NULL, MUTEX_DRIVER, NULL);
8408 	cv_init(&un->un_pm_busy_cv, NULL, CV_DRIVER, NULL);
8409 	sd_setup_pm(un, devi);
8410 	if (un->un_f_pm_is_enabled == FALSE) {
8411 		/*
8412 		 * For performance, point to a jump table that does
8413 		 * not include pm.
8414 		 * The direct and priority chains don't change with PM.
8415 		 *
8416 		 * Note: this is currently done based on individual device
8417 		 * capabilities. When an interface for determining system
8418 		 * power enabled state becomes available, or when additional
8419 		 * layers are added to the command chain, these values will
8420 		 * have to be re-evaluated for correctness.
8421 		 */
8422 		if (un->un_f_non_devbsize_supported) {
8423 			un->un_buf_chain_type = SD_CHAIN_INFO_RMMEDIA_NO_PM;
8424 		} else {
8425 			un->un_buf_chain_type = SD_CHAIN_INFO_DISK_NO_PM;
8426 		}
8427 		un->un_uscsi_chain_type  = SD_CHAIN_INFO_USCSI_CMD_NO_PM;
8428 	}
8429 
8430 	/*
8431 	 * This property is set to 0 by HA software to avoid retries
8432 	 * on a reserved disk. (The preferred property name is
8433 	 * "retry-on-reservation-conflict") (1189689)
8434 	 *
8435 	 * Note: The use of a global here can have unintended consequences. A
8436 	 * per instance variable is preferrable to match the capabilities of
8437 	 * different underlying hba's (4402600)
8438 	 */
8439 	sd_retry_on_reservation_conflict = ddi_getprop(DDI_DEV_T_ANY, devi,
8440 	    DDI_PROP_DONTPASS, "retry-on-reservation-conflict",
8441 	    sd_retry_on_reservation_conflict);
8442 	if (sd_retry_on_reservation_conflict != 0) {
8443 		sd_retry_on_reservation_conflict = ddi_getprop(DDI_DEV_T_ANY,
8444 		    devi, DDI_PROP_DONTPASS, sd_resv_conflict_name,
8445 		    sd_retry_on_reservation_conflict);
8446 	}
8447 
8448 	/* Set up options for QFULL handling. */
8449 	if ((rval = ddi_getprop(DDI_DEV_T_ANY, devi, 0,
8450 	    "qfull-retries", -1)) != -1) {
8451 		(void) scsi_ifsetcap(SD_ADDRESS(un), "qfull-retries",
8452 		    rval, 1);
8453 	}
8454 	if ((rval = ddi_getprop(DDI_DEV_T_ANY, devi, 0,
8455 	    "qfull-retry-interval", -1)) != -1) {
8456 		(void) scsi_ifsetcap(SD_ADDRESS(un), "qfull-retry-interval",
8457 		    rval, 1);
8458 	}
8459 
8460 	/*
8461 	 * This just prints a message that announces the existence of the
8462 	 * device. The message is always printed in the system logfile, but
8463 	 * only appears on the console if the system is booted with the
8464 	 * -v (verbose) argument.
8465 	 */
8466 	ddi_report_dev(devi);
8467 
8468 	/*
8469 	 * The framework calls driver attach routines single-threaded
8470 	 * for a given instance.  However we still acquire SD_MUTEX here
8471 	 * because this required for calling the sd_validate_geometry()
8472 	 * and sd_register_devid() functions.
8473 	 */
8474 	mutex_enter(SD_MUTEX(un));
8475 	un->un_f_geometry_is_valid = FALSE;
8476 	un->un_mediastate = DKIO_NONE;
8477 	un->un_reserved = -1;
8478 
8479 	/*
8480 	 * Read and validate the device's geometry (ie, disk label)
8481 	 * A new unformatted drive will not have a valid geometry, but
8482 	 * the driver needs to successfully attach to this device so
8483 	 * the drive can be formatted via ioctls.
8484 	 */
8485 	if (((sd_validate_geometry(un, SD_PATH_DIRECT) ==
8486 	    ENOTSUP)) &&
8487 	    (un->un_blockcount < DK_MAX_BLOCKS)) {
8488 		/*
8489 		 * We found a small disk with an EFI label on it;
8490 		 * we need to fix up the minor nodes accordingly.
8491 		 */
8492 		ddi_remove_minor_node(devi, "h");
8493 		ddi_remove_minor_node(devi, "h,raw");
8494 		(void) ddi_create_minor_node(devi, "wd",
8495 		    S_IFBLK,
8496 		    (instance << SDUNIT_SHIFT) | WD_NODE,
8497 		    un->un_node_type, NULL);
8498 		(void) ddi_create_minor_node(devi, "wd,raw",
8499 		    S_IFCHR,
8500 		    (instance << SDUNIT_SHIFT) | WD_NODE,
8501 		    un->un_node_type, NULL);
8502 	}
8503 
8504 	/*
8505 	 * Read and initialize the devid for the unit.
8506 	 */
8507 	ASSERT(un->un_errstats != NULL);
8508 	if (un->un_f_devid_supported) {
8509 		sd_register_devid(un, devi, reservation_flag);
8510 	}
8511 	mutex_exit(SD_MUTEX(un));
8512 
8513 #if (defined(__fibre))
8514 	/*
8515 	 * Register callbacks for fibre only.  You can't do this soley
8516 	 * on the basis of the devid_type because this is hba specific.
8517 	 * We need to query our hba capabilities to find out whether to
8518 	 * register or not.
8519 	 */
8520 	if (un->un_f_is_fibre) {
8521 	    if (strcmp(un->un_node_type, DDI_NT_BLOCK_CHAN)) {
8522 		sd_init_event_callbacks(un);
8523 		SD_TRACE(SD_LOG_ATTACH_DETACH, un,
8524 		    "sd_unit_attach: un:0x%p event callbacks inserted", un);
8525 	    }
8526 	}
8527 #endif
8528 
8529 	if (un->un_f_opt_disable_cache == TRUE) {
8530 		/*
8531 		 * Disable both read cache and write cache.  This is
8532 		 * the historic behavior of the keywords in the config file.
8533 		 */
8534 		if (sd_cache_control(un, SD_CACHE_DISABLE, SD_CACHE_DISABLE) !=
8535 		    0) {
8536 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
8537 			    "sd_unit_attach: un:0x%p Could not disable "
8538 			    "caching", un);
8539 			goto devid_failed;
8540 		}
8541 	}
8542 
8543 	/*
8544 	 * Check the value of the WCE bit now and
8545 	 * set un_f_write_cache_enabled accordingly.
8546 	 */
8547 	(void) sd_get_write_cache_enabled(un, &wc_enabled);
8548 	mutex_enter(SD_MUTEX(un));
8549 	un->un_f_write_cache_enabled = (wc_enabled != 0);
8550 	mutex_exit(SD_MUTEX(un));
8551 
8552 	/*
8553 	 * Set the pstat and error stat values here, so data obtained during the
8554 	 * previous attach-time routines is available.
8555 	 *
8556 	 * Note: This is a critical sequence that needs to be maintained:
8557 	 *	1) Instantiate the kstats before any routines using the iopath
8558 	 *	   (i.e. sd_send_scsi_cmd).
8559 	 *	2) Initialize the error stats (sd_set_errstats) and partition
8560 	 *	   stats (sd_set_pstats)here, following sd_validate_geometry(),
8561 	 *	   sd_register_devid(), and sd_cache_control().
8562 	 */
8563 	if (un->un_f_pkstats_enabled) {
8564 		sd_set_pstats(un);
8565 		SD_TRACE(SD_LOG_ATTACH_DETACH, un,
8566 		    "sd_unit_attach: un:0x%p pstats created and set\n", un);
8567 	}
8568 
8569 	sd_set_errstats(un);
8570 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
8571 	    "sd_unit_attach: un:0x%p errstats set\n", un);
8572 
8573 	/*
8574 	 * Find out what type of reservation this disk supports.
8575 	 */
8576 	switch (sd_send_scsi_PERSISTENT_RESERVE_IN(un, SD_READ_KEYS, 0, NULL)) {
8577 	case 0:
8578 		/*
8579 		 * SCSI-3 reservations are supported.
8580 		 */
8581 		un->un_reservation_type = SD_SCSI3_RESERVATION;
8582 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
8583 		    "sd_unit_attach: un:0x%p SCSI-3 reservations\n", un);
8584 		break;
8585 	case ENOTSUP:
8586 		/*
8587 		 * The PERSISTENT RESERVE IN command would not be recognized by
8588 		 * a SCSI-2 device, so assume the reservation type is SCSI-2.
8589 		 */
8590 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
8591 		    "sd_unit_attach: un:0x%p SCSI-2 reservations\n", un);
8592 		un->un_reservation_type = SD_SCSI2_RESERVATION;
8593 		break;
8594 	default:
8595 		/*
8596 		 * default to SCSI-3 reservations
8597 		 */
8598 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
8599 		    "sd_unit_attach: un:0x%p default SCSI3 reservations\n", un);
8600 		un->un_reservation_type = SD_SCSI3_RESERVATION;
8601 		break;
8602 	}
8603 
8604 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
8605 	    "sd_unit_attach: un:0x%p exit success\n", un);
8606 
8607 	return (DDI_SUCCESS);
8608 
8609 	/*
8610 	 * An error occurred during the attach; clean up & return failure.
8611 	 */
8612 
8613 devid_failed:
8614 
8615 setup_pm_failed:
8616 	ddi_remove_minor_node(devi, NULL);
8617 
8618 create_minor_nodes_failed:
8619 	/*
8620 	 * Cleanup from the scsi_ifsetcap() calls (437868)
8621 	 */
8622 	(void) scsi_ifsetcap(SD_ADDRESS(un), "lun-reset", 0, 1);
8623 	(void) scsi_ifsetcap(SD_ADDRESS(un), "wide-xfer", 0, 1);
8624 	(void) scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing", 0, 1);
8625 
8626 	if (un->un_f_is_fibre == FALSE) {
8627 	    (void) scsi_ifsetcap(SD_ADDRESS(un), "auto-rqsense", 0, 1);
8628 	}
8629 
8630 spinup_failed:
8631 
8632 	mutex_enter(SD_MUTEX(un));
8633 
8634 	/* Cancel callback for SD_PATH_DIRECT_PRIORITY cmd. restart */
8635 	if (un->un_direct_priority_timeid != NULL) {
8636 		timeout_id_t temp_id = un->un_direct_priority_timeid;
8637 		un->un_direct_priority_timeid = NULL;
8638 		mutex_exit(SD_MUTEX(un));
8639 		(void) untimeout(temp_id);
8640 		mutex_enter(SD_MUTEX(un));
8641 	}
8642 
8643 	/* Cancel any pending start/stop timeouts */
8644 	if (un->un_startstop_timeid != NULL) {
8645 		timeout_id_t temp_id = un->un_startstop_timeid;
8646 		un->un_startstop_timeid = NULL;
8647 		mutex_exit(SD_MUTEX(un));
8648 		(void) untimeout(temp_id);
8649 		mutex_enter(SD_MUTEX(un));
8650 	}
8651 
8652 	/* Cancel any pending reset-throttle timeouts */
8653 	if (un->un_reset_throttle_timeid != NULL) {
8654 		timeout_id_t temp_id = un->un_reset_throttle_timeid;
8655 		un->un_reset_throttle_timeid = NULL;
8656 		mutex_exit(SD_MUTEX(un));
8657 		(void) untimeout(temp_id);
8658 		mutex_enter(SD_MUTEX(un));
8659 	}
8660 
8661 	/* Cancel any pending retry timeouts */
8662 	if (un->un_retry_timeid != NULL) {
8663 		timeout_id_t temp_id = un->un_retry_timeid;
8664 		un->un_retry_timeid = NULL;
8665 		mutex_exit(SD_MUTEX(un));
8666 		(void) untimeout(temp_id);
8667 		mutex_enter(SD_MUTEX(un));
8668 	}
8669 
8670 	/* Cancel any pending delayed cv broadcast timeouts */
8671 	if (un->un_dcvb_timeid != NULL) {
8672 		timeout_id_t temp_id = un->un_dcvb_timeid;
8673 		un->un_dcvb_timeid = NULL;
8674 		mutex_exit(SD_MUTEX(un));
8675 		(void) untimeout(temp_id);
8676 		mutex_enter(SD_MUTEX(un));
8677 	}
8678 
8679 	mutex_exit(SD_MUTEX(un));
8680 
8681 	/* There should not be any in-progress I/O so ASSERT this check */
8682 	ASSERT(un->un_ncmds_in_transport == 0);
8683 	ASSERT(un->un_ncmds_in_driver == 0);
8684 
8685 	/* Do not free the softstate if the callback routine is active */
8686 	sd_sync_with_callback(un);
8687 
8688 	/*
8689 	 * Partition stats apparently are not used with removables. These would
8690 	 * not have been created during attach, so no need to clean them up...
8691 	 */
8692 	if (un->un_stats != NULL) {
8693 		kstat_delete(un->un_stats);
8694 		un->un_stats = NULL;
8695 	}
8696 	if (un->un_errstats != NULL) {
8697 		kstat_delete(un->un_errstats);
8698 		un->un_errstats = NULL;
8699 	}
8700 
8701 	ddi_xbuf_attr_unregister_devinfo(un->un_xbuf_attr, devi);
8702 	ddi_xbuf_attr_destroy(un->un_xbuf_attr);
8703 
8704 	ddi_prop_remove_all(devi);
8705 	sema_destroy(&un->un_semoclose);
8706 	cv_destroy(&un->un_state_cv);
8707 
8708 getrbuf_failed:
8709 
8710 	sd_free_rqs(un);
8711 
8712 alloc_rqs_failed:
8713 
8714 	devp->sd_private = NULL;
8715 	bzero(un, sizeof (struct sd_lun));	/* Clear any stale data! */
8716 
8717 get_softstate_failed:
8718 	/*
8719 	 * Note: the man pages are unclear as to whether or not doing a
8720 	 * ddi_soft_state_free(sd_state, instance) is the right way to
8721 	 * clean up after the ddi_soft_state_zalloc() if the subsequent
8722 	 * ddi_get_soft_state() fails.  The implication seems to be
8723 	 * that the get_soft_state cannot fail if the zalloc succeeds.
8724 	 */
8725 	ddi_soft_state_free(sd_state, instance);
8726 
8727 probe_failed:
8728 	scsi_unprobe(devp);
8729 #ifdef SDDEBUG
8730 	if ((sd_component_mask & SD_LOG_ATTACH_DETACH) &&
8731 	    (sd_level_mask & SD_LOGMASK_TRACE)) {
8732 		cmn_err(CE_CONT, "sd_unit_attach: un:0x%p exit failure\n",
8733 		    (void *)un);
8734 	}
8735 #endif
8736 	return (DDI_FAILURE);
8737 }
8738 
8739 
8740 /*
8741  *    Function: sd_unit_detach
8742  *
8743  * Description: Performs DDI_DETACH processing for sddetach().
8744  *
8745  * Return Code: DDI_SUCCESS
8746  *		DDI_FAILURE
8747  *
8748  *     Context: Kernel thread context
8749  */
8750 
8751 static int
8752 sd_unit_detach(dev_info_t *devi)
8753 {
8754 	struct scsi_device	*devp;
8755 	struct sd_lun		*un;
8756 	int			i;
8757 	dev_t			dev;
8758 	int			instance = ddi_get_instance(devi);
8759 
8760 	mutex_enter(&sd_detach_mutex);
8761 
8762 	/*
8763 	 * Fail the detach for any of the following:
8764 	 *  - Unable to get the sd_lun struct for the instance
8765 	 *  - A layered driver has an outstanding open on the instance
8766 	 *  - Another thread is already detaching this instance
8767 	 *  - Another thread is currently performing an open
8768 	 */
8769 	devp = ddi_get_driver_private(devi);
8770 	if ((devp == NULL) ||
8771 	    ((un = (struct sd_lun *)devp->sd_private) == NULL) ||
8772 	    (un->un_ncmds_in_driver != 0) || (un->un_layer_count != 0) ||
8773 	    (un->un_detach_count != 0) || (un->un_opens_in_progress != 0)) {
8774 		mutex_exit(&sd_detach_mutex);
8775 		return (DDI_FAILURE);
8776 	}
8777 
8778 	SD_TRACE(SD_LOG_ATTACH_DETACH, un, "sd_unit_detach: entry 0x%p\n", un);
8779 
8780 	/*
8781 	 * Mark this instance as currently in a detach, to inhibit any
8782 	 * opens from a layered driver.
8783 	 */
8784 	un->un_detach_count++;
8785 	mutex_exit(&sd_detach_mutex);
8786 
8787 	dev = sd_make_device(SD_DEVINFO(un));
8788 
8789 	_NOTE(COMPETING_THREADS_NOW);
8790 
8791 	mutex_enter(SD_MUTEX(un));
8792 
8793 	/*
8794 	 * Fail the detach if there are any outstanding layered
8795 	 * opens on this device.
8796 	 */
8797 	for (i = 0; i < NDKMAP; i++) {
8798 		if (un->un_ocmap.lyropen[i] != 0) {
8799 			goto err_notclosed;
8800 		}
8801 	}
8802 
8803 	/*
8804 	 * Verify there are NO outstanding commands issued to this device.
8805 	 * ie, un_ncmds_in_transport == 0.
8806 	 * It's possible to have outstanding commands through the physio
8807 	 * code path, even though everything's closed.
8808 	 */
8809 	if ((un->un_ncmds_in_transport != 0) || (un->un_retry_timeid != NULL) ||
8810 	    (un->un_direct_priority_timeid != NULL) ||
8811 	    (un->un_state == SD_STATE_RWAIT)) {
8812 		mutex_exit(SD_MUTEX(un));
8813 		SD_ERROR(SD_LOG_ATTACH_DETACH, un,
8814 		    "sd_dr_detach: Detach failure due to outstanding cmds\n");
8815 		goto err_stillbusy;
8816 	}
8817 
8818 	/*
8819 	 * If we have the device reserved, release the reservation.
8820 	 */
8821 	if ((un->un_resvd_status & SD_RESERVE) &&
8822 	    !(un->un_resvd_status & SD_LOST_RESERVE)) {
8823 		mutex_exit(SD_MUTEX(un));
8824 		/*
8825 		 * Note: sd_reserve_release sends a command to the device
8826 		 * via the sd_ioctlcmd() path, and can sleep.
8827 		 */
8828 		if (sd_reserve_release(dev, SD_RELEASE) != 0) {
8829 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
8830 			    "sd_dr_detach: Cannot release reservation \n");
8831 		}
8832 	} else {
8833 		mutex_exit(SD_MUTEX(un));
8834 	}
8835 
8836 	/*
8837 	 * Untimeout any reserve recover, throttle reset, restart unit
8838 	 * and delayed broadcast timeout threads. Protect the timeout pointer
8839 	 * from getting nulled by their callback functions.
8840 	 */
8841 	mutex_enter(SD_MUTEX(un));
8842 	if (un->un_resvd_timeid != NULL) {
8843 		timeout_id_t temp_id = un->un_resvd_timeid;
8844 		un->un_resvd_timeid = NULL;
8845 		mutex_exit(SD_MUTEX(un));
8846 		(void) untimeout(temp_id);
8847 		mutex_enter(SD_MUTEX(un));
8848 	}
8849 
8850 	if (un->un_reset_throttle_timeid != NULL) {
8851 		timeout_id_t temp_id = un->un_reset_throttle_timeid;
8852 		un->un_reset_throttle_timeid = NULL;
8853 		mutex_exit(SD_MUTEX(un));
8854 		(void) untimeout(temp_id);
8855 		mutex_enter(SD_MUTEX(un));
8856 	}
8857 
8858 	if (un->un_startstop_timeid != NULL) {
8859 		timeout_id_t temp_id = un->un_startstop_timeid;
8860 		un->un_startstop_timeid = NULL;
8861 		mutex_exit(SD_MUTEX(un));
8862 		(void) untimeout(temp_id);
8863 		mutex_enter(SD_MUTEX(un));
8864 	}
8865 
8866 	if (un->un_dcvb_timeid != NULL) {
8867 		timeout_id_t temp_id = un->un_dcvb_timeid;
8868 		un->un_dcvb_timeid = NULL;
8869 		mutex_exit(SD_MUTEX(un));
8870 		(void) untimeout(temp_id);
8871 	} else {
8872 		mutex_exit(SD_MUTEX(un));
8873 	}
8874 
8875 	/* Remove any pending reservation reclaim requests for this device */
8876 	sd_rmv_resv_reclaim_req(dev);
8877 
8878 	mutex_enter(SD_MUTEX(un));
8879 
8880 	/* Cancel any pending callbacks for SD_PATH_DIRECT_PRIORITY cmd. */
8881 	if (un->un_direct_priority_timeid != NULL) {
8882 		timeout_id_t temp_id = un->un_direct_priority_timeid;
8883 		un->un_direct_priority_timeid = NULL;
8884 		mutex_exit(SD_MUTEX(un));
8885 		(void) untimeout(temp_id);
8886 		mutex_enter(SD_MUTEX(un));
8887 	}
8888 
8889 	/* Cancel any active multi-host disk watch thread requests */
8890 	if (un->un_mhd_token != NULL) {
8891 		mutex_exit(SD_MUTEX(un));
8892 		 _NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::un_mhd_token));
8893 		if (scsi_watch_request_terminate(un->un_mhd_token,
8894 		    SCSI_WATCH_TERMINATE_NOWAIT)) {
8895 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
8896 			    "sd_dr_detach: Cannot cancel mhd watch request\n");
8897 			/*
8898 			 * Note: We are returning here after having removed
8899 			 * some driver timeouts above. This is consistent with
8900 			 * the legacy implementation but perhaps the watch
8901 			 * terminate call should be made with the wait flag set.
8902 			 */
8903 			goto err_stillbusy;
8904 		}
8905 		mutex_enter(SD_MUTEX(un));
8906 		un->un_mhd_token = NULL;
8907 	}
8908 
8909 	if (un->un_swr_token != NULL) {
8910 		mutex_exit(SD_MUTEX(un));
8911 		_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::un_swr_token));
8912 		if (scsi_watch_request_terminate(un->un_swr_token,
8913 		    SCSI_WATCH_TERMINATE_NOWAIT)) {
8914 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
8915 			    "sd_dr_detach: Cannot cancel swr watch request\n");
8916 			/*
8917 			 * Note: We are returning here after having removed
8918 			 * some driver timeouts above. This is consistent with
8919 			 * the legacy implementation but perhaps the watch
8920 			 * terminate call should be made with the wait flag set.
8921 			 */
8922 			goto err_stillbusy;
8923 		}
8924 		mutex_enter(SD_MUTEX(un));
8925 		un->un_swr_token = NULL;
8926 	}
8927 
8928 	mutex_exit(SD_MUTEX(un));
8929 
8930 	/*
8931 	 * Clear any scsi_reset_notifies. We clear the reset notifies
8932 	 * if we have not registered one.
8933 	 * Note: The sd_mhd_reset_notify_cb() fn tries to acquire SD_MUTEX!
8934 	 */
8935 	(void) scsi_reset_notify(SD_ADDRESS(un), SCSI_RESET_CANCEL,
8936 	    sd_mhd_reset_notify_cb, (caddr_t)un);
8937 
8938 	/*
8939 	 * protect the timeout pointers from getting nulled by
8940 	 * their callback functions during the cancellation process.
8941 	 * In such a scenario untimeout can be invoked with a null value.
8942 	 */
8943 	_NOTE(NO_COMPETING_THREADS_NOW);
8944 
8945 	mutex_enter(&un->un_pm_mutex);
8946 	if (un->un_pm_idle_timeid != NULL) {
8947 		timeout_id_t temp_id = un->un_pm_idle_timeid;
8948 		un->un_pm_idle_timeid = NULL;
8949 		mutex_exit(&un->un_pm_mutex);
8950 
8951 		/*
8952 		 * Timeout is active; cancel it.
8953 		 * Note that it'll never be active on a device
8954 		 * that does not support PM therefore we don't
8955 		 * have to check before calling pm_idle_component.
8956 		 */
8957 		(void) untimeout(temp_id);
8958 		(void) pm_idle_component(SD_DEVINFO(un), 0);
8959 		mutex_enter(&un->un_pm_mutex);
8960 	}
8961 
8962 	/*
8963 	 * Check whether there is already a timeout scheduled for power
8964 	 * management. If yes then don't lower the power here, that's.
8965 	 * the timeout handler's job.
8966 	 */
8967 	if (un->un_pm_timeid != NULL) {
8968 		timeout_id_t temp_id = un->un_pm_timeid;
8969 		un->un_pm_timeid = NULL;
8970 		mutex_exit(&un->un_pm_mutex);
8971 		/*
8972 		 * Timeout is active; cancel it.
8973 		 * Note that it'll never be active on a device
8974 		 * that does not support PM therefore we don't
8975 		 * have to check before calling pm_idle_component.
8976 		 */
8977 		(void) untimeout(temp_id);
8978 		(void) pm_idle_component(SD_DEVINFO(un), 0);
8979 
8980 	} else {
8981 		mutex_exit(&un->un_pm_mutex);
8982 		if ((un->un_f_pm_is_enabled == TRUE) &&
8983 		    (pm_lower_power(SD_DEVINFO(un), 0, SD_SPINDLE_OFF) !=
8984 		    DDI_SUCCESS)) {
8985 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
8986 		    "sd_dr_detach: Lower power request failed, ignoring.\n");
8987 			/*
8988 			 * Fix for bug: 4297749, item # 13
8989 			 * The above test now includes a check to see if PM is
8990 			 * supported by this device before call
8991 			 * pm_lower_power().
8992 			 * Note, the following is not dead code. The call to
8993 			 * pm_lower_power above will generate a call back into
8994 			 * our sdpower routine which might result in a timeout
8995 			 * handler getting activated. Therefore the following
8996 			 * code is valid and necessary.
8997 			 */
8998 			mutex_enter(&un->un_pm_mutex);
8999 			if (un->un_pm_timeid != NULL) {
9000 				timeout_id_t temp_id = un->un_pm_timeid;
9001 				un->un_pm_timeid = NULL;
9002 				mutex_exit(&un->un_pm_mutex);
9003 				(void) untimeout(temp_id);
9004 				(void) pm_idle_component(SD_DEVINFO(un), 0);
9005 			} else {
9006 				mutex_exit(&un->un_pm_mutex);
9007 			}
9008 		}
9009 	}
9010 
9011 	/*
9012 	 * Cleanup from the scsi_ifsetcap() calls (437868)
9013 	 * Relocated here from above to be after the call to
9014 	 * pm_lower_power, which was getting errors.
9015 	 */
9016 	(void) scsi_ifsetcap(SD_ADDRESS(un), "lun-reset", 0, 1);
9017 	(void) scsi_ifsetcap(SD_ADDRESS(un), "wide-xfer", 0, 1);
9018 	(void) scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing", 0, 1);
9019 
9020 	if (un->un_f_is_fibre == FALSE) {
9021 		(void) scsi_ifsetcap(SD_ADDRESS(un), "auto-rqsense", 0, 1);
9022 	}
9023 
9024 	/*
9025 	 * Remove any event callbacks, fibre only
9026 	 */
9027 	if (un->un_f_is_fibre == TRUE) {
9028 		if ((un->un_insert_event != NULL) &&
9029 			(ddi_remove_event_handler(un->un_insert_cb_id) !=
9030 				DDI_SUCCESS)) {
9031 			/*
9032 			 * Note: We are returning here after having done
9033 			 * substantial cleanup above. This is consistent
9034 			 * with the legacy implementation but this may not
9035 			 * be the right thing to do.
9036 			 */
9037 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
9038 				"sd_dr_detach: Cannot cancel insert event\n");
9039 			goto err_remove_event;
9040 		}
9041 		un->un_insert_event = NULL;
9042 
9043 		if ((un->un_remove_event != NULL) &&
9044 			(ddi_remove_event_handler(un->un_remove_cb_id) !=
9045 				DDI_SUCCESS)) {
9046 			/*
9047 			 * Note: We are returning here after having done
9048 			 * substantial cleanup above. This is consistent
9049 			 * with the legacy implementation but this may not
9050 			 * be the right thing to do.
9051 			 */
9052 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
9053 				"sd_dr_detach: Cannot cancel remove event\n");
9054 			goto err_remove_event;
9055 		}
9056 		un->un_remove_event = NULL;
9057 	}
9058 
9059 	/* Do not free the softstate if the callback routine is active */
9060 	sd_sync_with_callback(un);
9061 
9062 	/*
9063 	 * Hold the detach mutex here, to make sure that no other threads ever
9064 	 * can access a (partially) freed soft state structure.
9065 	 */
9066 	mutex_enter(&sd_detach_mutex);
9067 
9068 	/*
9069 	 * Clean up the soft state struct.
9070 	 * Cleanup is done in reverse order of allocs/inits.
9071 	 * At this point there should be no competing threads anymore.
9072 	 */
9073 
9074 	/* Unregister and free device id. */
9075 	ddi_devid_unregister(devi);
9076 	if (un->un_devid) {
9077 		ddi_devid_free(un->un_devid);
9078 		un->un_devid = NULL;
9079 	}
9080 
9081 	/*
9082 	 * Destroy wmap cache if it exists.
9083 	 */
9084 	if (un->un_wm_cache != NULL) {
9085 		kmem_cache_destroy(un->un_wm_cache);
9086 		un->un_wm_cache = NULL;
9087 	}
9088 
9089 	/* Remove minor nodes */
9090 	ddi_remove_minor_node(devi, NULL);
9091 
9092 	/*
9093 	 * kstat cleanup is done in detach for all device types (4363169).
9094 	 * We do not want to fail detach if the device kstats are not deleted
9095 	 * since there is a confusion about the devo_refcnt for the device.
9096 	 * We just delete the kstats and let detach complete successfully.
9097 	 */
9098 	if (un->un_stats != NULL) {
9099 		kstat_delete(un->un_stats);
9100 		un->un_stats = NULL;
9101 	}
9102 	if (un->un_errstats != NULL) {
9103 		kstat_delete(un->un_errstats);
9104 		un->un_errstats = NULL;
9105 	}
9106 
9107 	/* Remove partition stats */
9108 	if (un->un_f_pkstats_enabled) {
9109 		for (i = 0; i < NSDMAP; i++) {
9110 			if (un->un_pstats[i] != NULL) {
9111 				kstat_delete(un->un_pstats[i]);
9112 				un->un_pstats[i] = NULL;
9113 			}
9114 		}
9115 	}
9116 
9117 	/* Remove xbuf registration */
9118 	ddi_xbuf_attr_unregister_devinfo(un->un_xbuf_attr, devi);
9119 	ddi_xbuf_attr_destroy(un->un_xbuf_attr);
9120 
9121 	/* Remove driver properties */
9122 	ddi_prop_remove_all(devi);
9123 
9124 	mutex_destroy(&un->un_pm_mutex);
9125 	cv_destroy(&un->un_pm_busy_cv);
9126 
9127 	cv_destroy(&un->un_wcc_cv);
9128 
9129 	/* Open/close semaphore */
9130 	sema_destroy(&un->un_semoclose);
9131 
9132 	/* Removable media condvar. */
9133 	cv_destroy(&un->un_state_cv);
9134 
9135 	/* Suspend/resume condvar. */
9136 	cv_destroy(&un->un_suspend_cv);
9137 	cv_destroy(&un->un_disk_busy_cv);
9138 
9139 	sd_free_rqs(un);
9140 
9141 	/* Free up soft state */
9142 	devp->sd_private = NULL;
9143 	bzero(un, sizeof (struct sd_lun));
9144 	ddi_soft_state_free(sd_state, instance);
9145 
9146 	mutex_exit(&sd_detach_mutex);
9147 
9148 	/* This frees up the INQUIRY data associated with the device. */
9149 	scsi_unprobe(devp);
9150 
9151 	return (DDI_SUCCESS);
9152 
9153 err_notclosed:
9154 	mutex_exit(SD_MUTEX(un));
9155 
9156 err_stillbusy:
9157 	_NOTE(NO_COMPETING_THREADS_NOW);
9158 
9159 err_remove_event:
9160 	mutex_enter(&sd_detach_mutex);
9161 	un->un_detach_count--;
9162 	mutex_exit(&sd_detach_mutex);
9163 
9164 	SD_TRACE(SD_LOG_ATTACH_DETACH, un, "sd_unit_detach: exit failure\n");
9165 	return (DDI_FAILURE);
9166 }
9167 
9168 
9169 /*
9170  * Driver minor node structure and data table
9171  */
9172 struct driver_minor_data {
9173 	char	*name;
9174 	minor_t	minor;
9175 	int	type;
9176 };
9177 
9178 static struct driver_minor_data sd_minor_data[] = {
9179 	{"a", 0, S_IFBLK},
9180 	{"b", 1, S_IFBLK},
9181 	{"c", 2, S_IFBLK},
9182 	{"d", 3, S_IFBLK},
9183 	{"e", 4, S_IFBLK},
9184 	{"f", 5, S_IFBLK},
9185 	{"g", 6, S_IFBLK},
9186 	{"h", 7, S_IFBLK},
9187 #if defined(_SUNOS_VTOC_16)
9188 	{"i", 8, S_IFBLK},
9189 	{"j", 9, S_IFBLK},
9190 	{"k", 10, S_IFBLK},
9191 	{"l", 11, S_IFBLK},
9192 	{"m", 12, S_IFBLK},
9193 	{"n", 13, S_IFBLK},
9194 	{"o", 14, S_IFBLK},
9195 	{"p", 15, S_IFBLK},
9196 #endif			/* defined(_SUNOS_VTOC_16) */
9197 #if defined(_FIRMWARE_NEEDS_FDISK)
9198 	{"q", 16, S_IFBLK},
9199 	{"r", 17, S_IFBLK},
9200 	{"s", 18, S_IFBLK},
9201 	{"t", 19, S_IFBLK},
9202 	{"u", 20, S_IFBLK},
9203 #endif			/* defined(_FIRMWARE_NEEDS_FDISK) */
9204 	{"a,raw", 0, S_IFCHR},
9205 	{"b,raw", 1, S_IFCHR},
9206 	{"c,raw", 2, S_IFCHR},
9207 	{"d,raw", 3, S_IFCHR},
9208 	{"e,raw", 4, S_IFCHR},
9209 	{"f,raw", 5, S_IFCHR},
9210 	{"g,raw", 6, S_IFCHR},
9211 	{"h,raw", 7, S_IFCHR},
9212 #if defined(_SUNOS_VTOC_16)
9213 	{"i,raw", 8, S_IFCHR},
9214 	{"j,raw", 9, S_IFCHR},
9215 	{"k,raw", 10, S_IFCHR},
9216 	{"l,raw", 11, S_IFCHR},
9217 	{"m,raw", 12, S_IFCHR},
9218 	{"n,raw", 13, S_IFCHR},
9219 	{"o,raw", 14, S_IFCHR},
9220 	{"p,raw", 15, S_IFCHR},
9221 #endif			/* defined(_SUNOS_VTOC_16) */
9222 #if defined(_FIRMWARE_NEEDS_FDISK)
9223 	{"q,raw", 16, S_IFCHR},
9224 	{"r,raw", 17, S_IFCHR},
9225 	{"s,raw", 18, S_IFCHR},
9226 	{"t,raw", 19, S_IFCHR},
9227 	{"u,raw", 20, S_IFCHR},
9228 #endif			/* defined(_FIRMWARE_NEEDS_FDISK) */
9229 	{0}
9230 };
9231 
9232 static struct driver_minor_data sd_minor_data_efi[] = {
9233 	{"a", 0, S_IFBLK},
9234 	{"b", 1, S_IFBLK},
9235 	{"c", 2, S_IFBLK},
9236 	{"d", 3, S_IFBLK},
9237 	{"e", 4, S_IFBLK},
9238 	{"f", 5, S_IFBLK},
9239 	{"g", 6, S_IFBLK},
9240 	{"wd", 7, S_IFBLK},
9241 #if defined(_FIRMWARE_NEEDS_FDISK)
9242 	{"q", 16, S_IFBLK},
9243 	{"r", 17, S_IFBLK},
9244 	{"s", 18, S_IFBLK},
9245 	{"t", 19, S_IFBLK},
9246 	{"u", 20, S_IFBLK},
9247 #endif			/* defined(_FIRMWARE_NEEDS_FDISK) */
9248 	{"a,raw", 0, S_IFCHR},
9249 	{"b,raw", 1, S_IFCHR},
9250 	{"c,raw", 2, S_IFCHR},
9251 	{"d,raw", 3, S_IFCHR},
9252 	{"e,raw", 4, S_IFCHR},
9253 	{"f,raw", 5, S_IFCHR},
9254 	{"g,raw", 6, S_IFCHR},
9255 	{"wd,raw", 7, S_IFCHR},
9256 #if defined(_FIRMWARE_NEEDS_FDISK)
9257 	{"q,raw", 16, S_IFCHR},
9258 	{"r,raw", 17, S_IFCHR},
9259 	{"s,raw", 18, S_IFCHR},
9260 	{"t,raw", 19, S_IFCHR},
9261 	{"u,raw", 20, S_IFCHR},
9262 #endif			/* defined(_FIRMWARE_NEEDS_FDISK) */
9263 	{0}
9264 };
9265 
9266 
9267 /*
9268  *    Function: sd_create_minor_nodes
9269  *
9270  * Description: Create the minor device nodes for the instance.
9271  *
9272  *   Arguments: un - driver soft state (unit) structure
9273  *		devi - pointer to device info structure
9274  *
9275  * Return Code: DDI_SUCCESS
9276  *		DDI_FAILURE
9277  *
9278  *     Context: Kernel thread context
9279  */
9280 
9281 static int
9282 sd_create_minor_nodes(struct sd_lun *un, dev_info_t *devi)
9283 {
9284 	struct driver_minor_data	*dmdp;
9285 	struct scsi_device		*devp;
9286 	int				instance;
9287 	char				name[48];
9288 
9289 	ASSERT(un != NULL);
9290 	devp = ddi_get_driver_private(devi);
9291 	instance = ddi_get_instance(devp->sd_dev);
9292 
9293 	/*
9294 	 * Create all the minor nodes for this target.
9295 	 */
9296 	if (un->un_blockcount > DK_MAX_BLOCKS)
9297 		dmdp = sd_minor_data_efi;
9298 	else
9299 		dmdp = sd_minor_data;
9300 	while (dmdp->name != NULL) {
9301 
9302 		(void) sprintf(name, "%s", dmdp->name);
9303 
9304 		if (ddi_create_minor_node(devi, name, dmdp->type,
9305 		    (instance << SDUNIT_SHIFT) | dmdp->minor,
9306 		    un->un_node_type, NULL) == DDI_FAILURE) {
9307 			/*
9308 			 * Clean up any nodes that may have been created, in
9309 			 * case this fails in the middle of the loop.
9310 			 */
9311 			ddi_remove_minor_node(devi, NULL);
9312 			return (DDI_FAILURE);
9313 		}
9314 		dmdp++;
9315 	}
9316 
9317 	return (DDI_SUCCESS);
9318 }
9319 
9320 
9321 /*
9322  *    Function: sd_create_errstats
9323  *
9324  * Description: This routine instantiates the device error stats.
9325  *
9326  *		Note: During attach the stats are instantiated first so they are
9327  *		available for attach-time routines that utilize the driver
9328  *		iopath to send commands to the device. The stats are initialized
9329  *		separately so data obtained during some attach-time routines is
9330  *		available. (4362483)
9331  *
9332  *   Arguments: un - driver soft state (unit) structure
9333  *		instance - driver instance
9334  *
9335  *     Context: Kernel thread context
9336  */
9337 
9338 static void
9339 sd_create_errstats(struct sd_lun *un, int instance)
9340 {
9341 	struct	sd_errstats	*stp;
9342 	char	kstatmodule_err[KSTAT_STRLEN];
9343 	char	kstatname[KSTAT_STRLEN];
9344 	int	ndata = (sizeof (struct sd_errstats) / sizeof (kstat_named_t));
9345 
9346 	ASSERT(un != NULL);
9347 
9348 	if (un->un_errstats != NULL) {
9349 		return;
9350 	}
9351 
9352 	(void) snprintf(kstatmodule_err, sizeof (kstatmodule_err),
9353 	    "%serr", sd_label);
9354 	(void) snprintf(kstatname, sizeof (kstatname),
9355 	    "%s%d,err", sd_label, instance);
9356 
9357 	un->un_errstats = kstat_create(kstatmodule_err, instance, kstatname,
9358 	    "device_error", KSTAT_TYPE_NAMED, ndata, KSTAT_FLAG_PERSISTENT);
9359 
9360 	if (un->un_errstats == NULL) {
9361 		SD_ERROR(SD_LOG_ATTACH_DETACH, un,
9362 		    "sd_create_errstats: Failed kstat_create\n");
9363 		return;
9364 	}
9365 
9366 	stp = (struct sd_errstats *)un->un_errstats->ks_data;
9367 	kstat_named_init(&stp->sd_softerrs,	"Soft Errors",
9368 	    KSTAT_DATA_UINT32);
9369 	kstat_named_init(&stp->sd_harderrs,	"Hard Errors",
9370 	    KSTAT_DATA_UINT32);
9371 	kstat_named_init(&stp->sd_transerrs,	"Transport Errors",
9372 	    KSTAT_DATA_UINT32);
9373 	kstat_named_init(&stp->sd_vid,		"Vendor",
9374 	    KSTAT_DATA_CHAR);
9375 	kstat_named_init(&stp->sd_pid,		"Product",
9376 	    KSTAT_DATA_CHAR);
9377 	kstat_named_init(&stp->sd_revision,	"Revision",
9378 	    KSTAT_DATA_CHAR);
9379 	kstat_named_init(&stp->sd_serial,	"Serial No",
9380 	    KSTAT_DATA_CHAR);
9381 	kstat_named_init(&stp->sd_capacity,	"Size",
9382 	    KSTAT_DATA_ULONGLONG);
9383 	kstat_named_init(&stp->sd_rq_media_err,	"Media Error",
9384 	    KSTAT_DATA_UINT32);
9385 	kstat_named_init(&stp->sd_rq_ntrdy_err,	"Device Not Ready",
9386 	    KSTAT_DATA_UINT32);
9387 	kstat_named_init(&stp->sd_rq_nodev_err,	"No Device",
9388 	    KSTAT_DATA_UINT32);
9389 	kstat_named_init(&stp->sd_rq_recov_err,	"Recoverable",
9390 	    KSTAT_DATA_UINT32);
9391 	kstat_named_init(&stp->sd_rq_illrq_err,	"Illegal Request",
9392 	    KSTAT_DATA_UINT32);
9393 	kstat_named_init(&stp->sd_rq_pfa_err,	"Predictive Failure Analysis",
9394 	    KSTAT_DATA_UINT32);
9395 
9396 	un->un_errstats->ks_private = un;
9397 	un->un_errstats->ks_update  = nulldev;
9398 
9399 	kstat_install(un->un_errstats);
9400 }
9401 
9402 
9403 /*
9404  *    Function: sd_set_errstats
9405  *
9406  * Description: This routine sets the value of the vendor id, product id,
9407  *		revision, serial number, and capacity device error stats.
9408  *
9409  *		Note: During attach the stats are instantiated first so they are
9410  *		available for attach-time routines that utilize the driver
9411  *		iopath to send commands to the device. The stats are initialized
9412  *		separately so data obtained during some attach-time routines is
9413  *		available. (4362483)
9414  *
9415  *   Arguments: un - driver soft state (unit) structure
9416  *
9417  *     Context: Kernel thread context
9418  */
9419 
9420 static void
9421 sd_set_errstats(struct sd_lun *un)
9422 {
9423 	struct	sd_errstats	*stp;
9424 
9425 	ASSERT(un != NULL);
9426 	ASSERT(un->un_errstats != NULL);
9427 	stp = (struct sd_errstats *)un->un_errstats->ks_data;
9428 	ASSERT(stp != NULL);
9429 	(void) strncpy(stp->sd_vid.value.c, un->un_sd->sd_inq->inq_vid, 8);
9430 	(void) strncpy(stp->sd_pid.value.c, un->un_sd->sd_inq->inq_pid, 16);
9431 	(void) strncpy(stp->sd_revision.value.c,
9432 	    un->un_sd->sd_inq->inq_revision, 4);
9433 
9434 	/*
9435 	 * Set the "Serial No" kstat for Sun qualified drives (indicated by
9436 	 * "SUN" in bytes 25-27 of the inquiry data (bytes 9-11 of the pid)
9437 	 * (4376302))
9438 	 */
9439 	if (bcmp(&SD_INQUIRY(un)->inq_pid[9], "SUN", 3) == 0) {
9440 		bcopy(&SD_INQUIRY(un)->inq_serial, stp->sd_serial.value.c,
9441 		    sizeof (SD_INQUIRY(un)->inq_serial));
9442 	}
9443 
9444 	if (un->un_f_blockcount_is_valid != TRUE) {
9445 		/*
9446 		 * Set capacity error stat to 0 for no media. This ensures
9447 		 * a valid capacity is displayed in response to 'iostat -E'
9448 		 * when no media is present in the device.
9449 		 */
9450 		stp->sd_capacity.value.ui64 = 0;
9451 	} else {
9452 		/*
9453 		 * Multiply un_blockcount by un->un_sys_blocksize to get
9454 		 * capacity.
9455 		 *
9456 		 * Note: for non-512 blocksize devices "un_blockcount" has been
9457 		 * "scaled" in sd_send_scsi_READ_CAPACITY by multiplying by
9458 		 * (un_tgt_blocksize / un->un_sys_blocksize).
9459 		 */
9460 		stp->sd_capacity.value.ui64 = (uint64_t)
9461 		    ((uint64_t)un->un_blockcount * un->un_sys_blocksize);
9462 	}
9463 }
9464 
9465 
9466 /*
9467  *    Function: sd_set_pstats
9468  *
9469  * Description: This routine instantiates and initializes the partition
9470  *              stats for each partition with more than zero blocks.
9471  *		(4363169)
9472  *
9473  *   Arguments: un - driver soft state (unit) structure
9474  *
9475  *     Context: Kernel thread context
9476  */
9477 
9478 static void
9479 sd_set_pstats(struct sd_lun *un)
9480 {
9481 	char	kstatname[KSTAT_STRLEN];
9482 	int	instance;
9483 	int	i;
9484 
9485 	ASSERT(un != NULL);
9486 
9487 	instance = ddi_get_instance(SD_DEVINFO(un));
9488 
9489 	/* Note:x86: is this a VTOC8/VTOC16 difference? */
9490 	for (i = 0; i < NSDMAP; i++) {
9491 		if ((un->un_pstats[i] == NULL) &&
9492 		    (un->un_map[i].dkl_nblk != 0)) {
9493 			(void) snprintf(kstatname, sizeof (kstatname),
9494 			    "%s%d,%s", sd_label, instance,
9495 			    sd_minor_data[i].name);
9496 			un->un_pstats[i] = kstat_create(sd_label,
9497 			    instance, kstatname, "partition", KSTAT_TYPE_IO,
9498 			    1, KSTAT_FLAG_PERSISTENT);
9499 			if (un->un_pstats[i] != NULL) {
9500 				un->un_pstats[i]->ks_lock = SD_MUTEX(un);
9501 				kstat_install(un->un_pstats[i]);
9502 			}
9503 		}
9504 	}
9505 }
9506 
9507 
9508 #if (defined(__fibre))
9509 /*
9510  *    Function: sd_init_event_callbacks
9511  *
9512  * Description: This routine initializes the insertion and removal event
9513  *		callbacks. (fibre only)
9514  *
9515  *   Arguments: un - driver soft state (unit) structure
9516  *
9517  *     Context: Kernel thread context
9518  */
9519 
9520 static void
9521 sd_init_event_callbacks(struct sd_lun *un)
9522 {
9523 	ASSERT(un != NULL);
9524 
9525 	if ((un->un_insert_event == NULL) &&
9526 	    (ddi_get_eventcookie(SD_DEVINFO(un), FCAL_INSERT_EVENT,
9527 	    &un->un_insert_event) == DDI_SUCCESS)) {
9528 		/*
9529 		 * Add the callback for an insertion event
9530 		 */
9531 		(void) ddi_add_event_handler(SD_DEVINFO(un),
9532 		    un->un_insert_event, sd_event_callback, (void *)un,
9533 		    &(un->un_insert_cb_id));
9534 	}
9535 
9536 	if ((un->un_remove_event == NULL) &&
9537 	    (ddi_get_eventcookie(SD_DEVINFO(un), FCAL_REMOVE_EVENT,
9538 	    &un->un_remove_event) == DDI_SUCCESS)) {
9539 		/*
9540 		 * Add the callback for a removal event
9541 		 */
9542 		(void) ddi_add_event_handler(SD_DEVINFO(un),
9543 		    un->un_remove_event, sd_event_callback, (void *)un,
9544 		    &(un->un_remove_cb_id));
9545 	}
9546 }
9547 
9548 
9549 /*
9550  *    Function: sd_event_callback
9551  *
9552  * Description: This routine handles insert/remove events (photon). The
9553  *		state is changed to OFFLINE which can be used to supress
9554  *		error msgs. (fibre only)
9555  *
9556  *   Arguments: un - driver soft state (unit) structure
9557  *
9558  *     Context: Callout thread context
9559  */
9560 /* ARGSUSED */
9561 static void
9562 sd_event_callback(dev_info_t *dip, ddi_eventcookie_t event, void *arg,
9563     void *bus_impldata)
9564 {
9565 	struct sd_lun *un = (struct sd_lun *)arg;
9566 
9567 	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::un_insert_event));
9568 	if (event == un->un_insert_event) {
9569 		SD_TRACE(SD_LOG_COMMON, un, "sd_event_callback: insert event");
9570 		mutex_enter(SD_MUTEX(un));
9571 		if (un->un_state == SD_STATE_OFFLINE) {
9572 			if (un->un_last_state != SD_STATE_SUSPENDED) {
9573 				un->un_state = un->un_last_state;
9574 			} else {
9575 				/*
9576 				 * We have gone through SUSPEND/RESUME while
9577 				 * we were offline. Restore the last state
9578 				 */
9579 				un->un_state = un->un_save_state;
9580 			}
9581 		}
9582 		mutex_exit(SD_MUTEX(un));
9583 
9584 	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::un_remove_event));
9585 	} else if (event == un->un_remove_event) {
9586 		SD_TRACE(SD_LOG_COMMON, un, "sd_event_callback: remove event");
9587 		mutex_enter(SD_MUTEX(un));
9588 		/*
9589 		 * We need to handle an event callback that occurs during
9590 		 * the suspend operation, since we don't prevent it.
9591 		 */
9592 		if (un->un_state != SD_STATE_OFFLINE) {
9593 			if (un->un_state != SD_STATE_SUSPENDED) {
9594 				New_state(un, SD_STATE_OFFLINE);
9595 			} else {
9596 				un->un_last_state = SD_STATE_OFFLINE;
9597 			}
9598 		}
9599 		mutex_exit(SD_MUTEX(un));
9600 	} else {
9601 		scsi_log(SD_DEVINFO(un), sd_label, CE_NOTE,
9602 		    "!Unknown event\n");
9603 	}
9604 
9605 }
9606 #endif
9607 
9608 /*
9609  *    Function: sd_cache_control()
9610  *
9611  * Description: This routine is the driver entry point for setting
9612  *		read and write caching by modifying the WCE (write cache
9613  *		enable) and RCD (read cache disable) bits of mode
9614  *		page 8 (MODEPAGE_CACHING).
9615  *
9616  *   Arguments: un - driver soft state (unit) structure
9617  *		rcd_flag - flag for controlling the read cache
9618  *		wce_flag - flag for controlling the write cache
9619  *
9620  * Return Code: EIO
9621  *		code returned by sd_send_scsi_MODE_SENSE and
9622  *		sd_send_scsi_MODE_SELECT
9623  *
9624  *     Context: Kernel Thread
9625  */
9626 
9627 static int
9628 sd_cache_control(struct sd_lun *un, int rcd_flag, int wce_flag)
9629 {
9630 	struct mode_caching	*mode_caching_page;
9631 	uchar_t			*header;
9632 	size_t			buflen;
9633 	int			hdrlen;
9634 	int			bd_len;
9635 	int			rval = 0;
9636 	struct mode_header_grp2	*mhp;
9637 
9638 	ASSERT(un != NULL);
9639 
9640 	/*
9641 	 * Do a test unit ready, otherwise a mode sense may not work if this
9642 	 * is the first command sent to the device after boot.
9643 	 */
9644 	(void) sd_send_scsi_TEST_UNIT_READY(un, 0);
9645 
9646 	if (un->un_f_cfg_is_atapi == TRUE) {
9647 		hdrlen = MODE_HEADER_LENGTH_GRP2;
9648 	} else {
9649 		hdrlen = MODE_HEADER_LENGTH;
9650 	}
9651 
9652 	/*
9653 	 * Allocate memory for the retrieved mode page and its headers.  Set
9654 	 * a pointer to the page itself.  Use mode_cache_scsi3 to insure
9655 	 * we get all of the mode sense data otherwise, the mode select
9656 	 * will fail.  mode_cache_scsi3 is a superset of mode_caching.
9657 	 */
9658 	buflen = hdrlen + MODE_BLK_DESC_LENGTH +
9659 		sizeof (struct mode_cache_scsi3);
9660 
9661 	header = kmem_zalloc(buflen, KM_SLEEP);
9662 
9663 	/* Get the information from the device. */
9664 	if (un->un_f_cfg_is_atapi == TRUE) {
9665 		rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP1, header, buflen,
9666 		    MODEPAGE_CACHING, SD_PATH_DIRECT);
9667 	} else {
9668 		rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, header, buflen,
9669 		    MODEPAGE_CACHING, SD_PATH_DIRECT);
9670 	}
9671 	if (rval != 0) {
9672 		SD_ERROR(SD_LOG_IOCTL_RMMEDIA, un,
9673 		    "sd_cache_control: Mode Sense Failed\n");
9674 		kmem_free(header, buflen);
9675 		return (rval);
9676 	}
9677 
9678 	/*
9679 	 * Determine size of Block Descriptors in order to locate
9680 	 * the mode page data. ATAPI devices return 0, SCSI devices
9681 	 * should return MODE_BLK_DESC_LENGTH.
9682 	 */
9683 	if (un->un_f_cfg_is_atapi == TRUE) {
9684 		mhp	= (struct mode_header_grp2 *)header;
9685 		bd_len  = (mhp->bdesc_length_hi << 8) | mhp->bdesc_length_lo;
9686 	} else {
9687 		bd_len  = ((struct mode_header *)header)->bdesc_length;
9688 	}
9689 
9690 	if (bd_len > MODE_BLK_DESC_LENGTH) {
9691 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
9692 		    "sd_cache_control: Mode Sense returned invalid "
9693 		    "block descriptor length\n");
9694 		kmem_free(header, buflen);
9695 		return (EIO);
9696 	}
9697 
9698 	mode_caching_page = (struct mode_caching *)(header + hdrlen + bd_len);
9699 
9700 	/* Check the relevant bits on successful mode sense. */
9701 	if ((mode_caching_page->rcd && rcd_flag == SD_CACHE_ENABLE) ||
9702 	    (!mode_caching_page->rcd && rcd_flag == SD_CACHE_DISABLE) ||
9703 	    (mode_caching_page->wce && wce_flag == SD_CACHE_DISABLE) ||
9704 	    (!mode_caching_page->wce && wce_flag == SD_CACHE_ENABLE)) {
9705 
9706 		size_t sbuflen;
9707 		uchar_t save_pg;
9708 
9709 		/*
9710 		 * Construct select buffer length based on the
9711 		 * length of the sense data returned.
9712 		 */
9713 		sbuflen =  hdrlen + MODE_BLK_DESC_LENGTH +
9714 				sizeof (struct mode_page) +
9715 				(int)mode_caching_page->mode_page.length;
9716 
9717 		/*
9718 		 * Set the caching bits as requested.
9719 		 */
9720 		if (rcd_flag == SD_CACHE_ENABLE)
9721 			mode_caching_page->rcd = 0;
9722 		else if (rcd_flag == SD_CACHE_DISABLE)
9723 			mode_caching_page->rcd = 1;
9724 
9725 		if (wce_flag == SD_CACHE_ENABLE)
9726 			mode_caching_page->wce = 1;
9727 		else if (wce_flag == SD_CACHE_DISABLE)
9728 			mode_caching_page->wce = 0;
9729 
9730 		/*
9731 		 * Save the page if the mode sense says the
9732 		 * drive supports it.
9733 		 */
9734 		save_pg = mode_caching_page->mode_page.ps ?
9735 				SD_SAVE_PAGE : SD_DONTSAVE_PAGE;
9736 
9737 		/* Clear reserved bits before mode select. */
9738 		mode_caching_page->mode_page.ps = 0;
9739 
9740 		/*
9741 		 * Clear out mode header for mode select.
9742 		 * The rest of the retrieved page will be reused.
9743 		 */
9744 		bzero(header, hdrlen);
9745 
9746 		if (un->un_f_cfg_is_atapi == TRUE) {
9747 			mhp = (struct mode_header_grp2 *)header;
9748 			mhp->bdesc_length_hi = bd_len >> 8;
9749 			mhp->bdesc_length_lo = (uchar_t)bd_len & 0xff;
9750 		} else {
9751 			((struct mode_header *)header)->bdesc_length = bd_len;
9752 		}
9753 
9754 		/* Issue mode select to change the cache settings */
9755 		if (un->un_f_cfg_is_atapi == TRUE) {
9756 			rval = sd_send_scsi_MODE_SELECT(un, CDB_GROUP1, header,
9757 			    sbuflen, save_pg, SD_PATH_DIRECT);
9758 		} else {
9759 			rval = sd_send_scsi_MODE_SELECT(un, CDB_GROUP0, header,
9760 			    sbuflen, save_pg, SD_PATH_DIRECT);
9761 		}
9762 	}
9763 
9764 	kmem_free(header, buflen);
9765 	return (rval);
9766 }
9767 
9768 
9769 /*
9770  *    Function: sd_get_write_cache_enabled()
9771  *
9772  * Description: This routine is the driver entry point for determining if
9773  *		write caching is enabled.  It examines the WCE (write cache
9774  *		enable) bits of mode page 8 (MODEPAGE_CACHING).
9775  *
9776  *   Arguments: un - driver soft state (unit) structure
9777  *   		is_enabled - pointer to int where write cache enabled state
9778  *   			is returned (non-zero -> write cache enabled)
9779  *
9780  *
9781  * Return Code: EIO
9782  *		code returned by sd_send_scsi_MODE_SENSE
9783  *
9784  *     Context: Kernel Thread
9785  *
9786  * NOTE: If ioctl is added to disable write cache, this sequence should
9787  * be followed so that no locking is required for accesses to
9788  * un->un_f_write_cache_enabled:
9789  * 	do mode select to clear wce
9790  * 	do synchronize cache to flush cache
9791  * 	set un->un_f_write_cache_enabled = FALSE
9792  *
9793  * Conversely, an ioctl to enable the write cache should be done
9794  * in this order:
9795  * 	set un->un_f_write_cache_enabled = TRUE
9796  * 	do mode select to set wce
9797  */
9798 
9799 static int
9800 sd_get_write_cache_enabled(struct sd_lun *un, int *is_enabled)
9801 {
9802 	struct mode_caching	*mode_caching_page;
9803 	uchar_t			*header;
9804 	size_t			buflen;
9805 	int			hdrlen;
9806 	int			bd_len;
9807 	int			rval = 0;
9808 
9809 	ASSERT(un != NULL);
9810 	ASSERT(is_enabled != NULL);
9811 
9812 	/* in case of error, flag as enabled */
9813 	*is_enabled = TRUE;
9814 
9815 	/*
9816 	 * Do a test unit ready, otherwise a mode sense may not work if this
9817 	 * is the first command sent to the device after boot.
9818 	 */
9819 	(void) sd_send_scsi_TEST_UNIT_READY(un, 0);
9820 
9821 	if (un->un_f_cfg_is_atapi == TRUE) {
9822 		hdrlen = MODE_HEADER_LENGTH_GRP2;
9823 	} else {
9824 		hdrlen = MODE_HEADER_LENGTH;
9825 	}
9826 
9827 	/*
9828 	 * Allocate memory for the retrieved mode page and its headers.  Set
9829 	 * a pointer to the page itself.
9830 	 */
9831 	buflen = hdrlen + MODE_BLK_DESC_LENGTH + sizeof (struct mode_caching);
9832 	header = kmem_zalloc(buflen, KM_SLEEP);
9833 
9834 	/* Get the information from the device. */
9835 	if (un->un_f_cfg_is_atapi == TRUE) {
9836 		rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP1, header, buflen,
9837 		    MODEPAGE_CACHING, SD_PATH_DIRECT);
9838 	} else {
9839 		rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, header, buflen,
9840 		    MODEPAGE_CACHING, SD_PATH_DIRECT);
9841 	}
9842 	if (rval != 0) {
9843 		SD_ERROR(SD_LOG_IOCTL_RMMEDIA, un,
9844 		    "sd_get_write_cache_enabled: Mode Sense Failed\n");
9845 		kmem_free(header, buflen);
9846 		return (rval);
9847 	}
9848 
9849 	/*
9850 	 * Determine size of Block Descriptors in order to locate
9851 	 * the mode page data. ATAPI devices return 0, SCSI devices
9852 	 * should return MODE_BLK_DESC_LENGTH.
9853 	 */
9854 	if (un->un_f_cfg_is_atapi == TRUE) {
9855 		struct mode_header_grp2	*mhp;
9856 		mhp	= (struct mode_header_grp2 *)header;
9857 		bd_len  = (mhp->bdesc_length_hi << 8) | mhp->bdesc_length_lo;
9858 	} else {
9859 		bd_len  = ((struct mode_header *)header)->bdesc_length;
9860 	}
9861 
9862 	if (bd_len > MODE_BLK_DESC_LENGTH) {
9863 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
9864 		    "sd_get_write_cache_enabled: Mode Sense returned invalid "
9865 		    "block descriptor length\n");
9866 		kmem_free(header, buflen);
9867 		return (EIO);
9868 	}
9869 
9870 	mode_caching_page = (struct mode_caching *)(header + hdrlen + bd_len);
9871 	*is_enabled = mode_caching_page->wce;
9872 
9873 	kmem_free(header, buflen);
9874 	return (0);
9875 }
9876 
9877 
9878 /*
9879  *    Function: sd_make_device
9880  *
9881  * Description: Utility routine to return the Solaris device number from
9882  *		the data in the device's dev_info structure.
9883  *
9884  * Return Code: The Solaris device number
9885  *
9886  *     Context: Any
9887  */
9888 
9889 static dev_t
9890 sd_make_device(dev_info_t *devi)
9891 {
9892 	return (makedevice(ddi_name_to_major(ddi_get_name(devi)),
9893 	    ddi_get_instance(devi) << SDUNIT_SHIFT));
9894 }
9895 
9896 
9897 /*
9898  *    Function: sd_pm_entry
9899  *
9900  * Description: Called at the start of a new command to manage power
9901  *		and busy status of a device. This includes determining whether
9902  *		the current power state of the device is sufficient for
9903  *		performing the command or whether it must be changed.
9904  *		The PM framework is notified appropriately.
9905  *		Only with a return status of DDI_SUCCESS will the
9906  *		component be busy to the framework.
9907  *
9908  *		All callers of sd_pm_entry must check the return status
9909  *		and only call sd_pm_exit it it was DDI_SUCCESS. A status
9910  *		of DDI_FAILURE indicates the device failed to power up.
9911  *		In this case un_pm_count has been adjusted so the result
9912  *		on exit is still powered down, ie. count is less than 0.
9913  *		Calling sd_pm_exit with this count value hits an ASSERT.
9914  *
9915  * Return Code: DDI_SUCCESS or DDI_FAILURE
9916  *
9917  *     Context: Kernel thread context.
9918  */
9919 
9920 static int
9921 sd_pm_entry(struct sd_lun *un)
9922 {
9923 	int return_status = DDI_SUCCESS;
9924 
9925 	ASSERT(!mutex_owned(SD_MUTEX(un)));
9926 	ASSERT(!mutex_owned(&un->un_pm_mutex));
9927 
9928 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_entry: entry\n");
9929 
9930 	if (un->un_f_pm_is_enabled == FALSE) {
9931 		SD_TRACE(SD_LOG_IO_PM, un,
9932 		    "sd_pm_entry: exiting, PM not enabled\n");
9933 		return (return_status);
9934 	}
9935 
9936 	/*
9937 	 * Just increment a counter if PM is enabled. On the transition from
9938 	 * 0 ==> 1, mark the device as busy.  The iodone side will decrement
9939 	 * the count with each IO and mark the device as idle when the count
9940 	 * hits 0.
9941 	 *
9942 	 * If the count is less than 0 the device is powered down. If a powered
9943 	 * down device is successfully powered up then the count must be
9944 	 * incremented to reflect the power up. Note that it'll get incremented
9945 	 * a second time to become busy.
9946 	 *
9947 	 * Because the following has the potential to change the device state
9948 	 * and must release the un_pm_mutex to do so, only one thread can be
9949 	 * allowed through at a time.
9950 	 */
9951 
9952 	mutex_enter(&un->un_pm_mutex);
9953 	while (un->un_pm_busy == TRUE) {
9954 		cv_wait(&un->un_pm_busy_cv, &un->un_pm_mutex);
9955 	}
9956 	un->un_pm_busy = TRUE;
9957 
9958 	if (un->un_pm_count < 1) {
9959 
9960 		SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_entry: busy component\n");
9961 
9962 		/*
9963 		 * Indicate we are now busy so the framework won't attempt to
9964 		 * power down the device. This call will only fail if either
9965 		 * we passed a bad component number or the device has no
9966 		 * components. Neither of these should ever happen.
9967 		 */
9968 		mutex_exit(&un->un_pm_mutex);
9969 		return_status = pm_busy_component(SD_DEVINFO(un), 0);
9970 		ASSERT(return_status == DDI_SUCCESS);
9971 
9972 		mutex_enter(&un->un_pm_mutex);
9973 
9974 		if (un->un_pm_count < 0) {
9975 			mutex_exit(&un->un_pm_mutex);
9976 
9977 			SD_TRACE(SD_LOG_IO_PM, un,
9978 			    "sd_pm_entry: power up component\n");
9979 
9980 			/*
9981 			 * pm_raise_power will cause sdpower to be called
9982 			 * which brings the device power level to the
9983 			 * desired state, ON in this case. If successful,
9984 			 * un_pm_count and un_power_level will be updated
9985 			 * appropriately.
9986 			 */
9987 			return_status = pm_raise_power(SD_DEVINFO(un), 0,
9988 			    SD_SPINDLE_ON);
9989 
9990 			mutex_enter(&un->un_pm_mutex);
9991 
9992 			if (return_status != DDI_SUCCESS) {
9993 				/*
9994 				 * Power up failed.
9995 				 * Idle the device and adjust the count
9996 				 * so the result on exit is that we're
9997 				 * still powered down, ie. count is less than 0.
9998 				 */
9999 				SD_TRACE(SD_LOG_IO_PM, un,
10000 				    "sd_pm_entry: power up failed,"
10001 				    " idle the component\n");
10002 
10003 				(void) pm_idle_component(SD_DEVINFO(un), 0);
10004 				un->un_pm_count--;
10005 			} else {
10006 				/*
10007 				 * Device is powered up, verify the
10008 				 * count is non-negative.
10009 				 * This is debug only.
10010 				 */
10011 				ASSERT(un->un_pm_count == 0);
10012 			}
10013 		}
10014 
10015 		if (return_status == DDI_SUCCESS) {
10016 			/*
10017 			 * For performance, now that the device has been tagged
10018 			 * as busy, and it's known to be powered up, update the
10019 			 * chain types to use jump tables that do not include
10020 			 * pm. This significantly lowers the overhead and
10021 			 * therefore improves performance.
10022 			 */
10023 
10024 			mutex_exit(&un->un_pm_mutex);
10025 			mutex_enter(SD_MUTEX(un));
10026 			SD_TRACE(SD_LOG_IO_PM, un,
10027 			    "sd_pm_entry: changing uscsi_chain_type from %d\n",
10028 			    un->un_uscsi_chain_type);
10029 
10030 			if (un->un_f_non_devbsize_supported) {
10031 				un->un_buf_chain_type =
10032 				    SD_CHAIN_INFO_RMMEDIA_NO_PM;
10033 			} else {
10034 				un->un_buf_chain_type =
10035 				    SD_CHAIN_INFO_DISK_NO_PM;
10036 			}
10037 			un->un_uscsi_chain_type = SD_CHAIN_INFO_USCSI_CMD_NO_PM;
10038 
10039 			SD_TRACE(SD_LOG_IO_PM, un,
10040 			    "             changed  uscsi_chain_type to   %d\n",
10041 			    un->un_uscsi_chain_type);
10042 			mutex_exit(SD_MUTEX(un));
10043 			mutex_enter(&un->un_pm_mutex);
10044 
10045 			if (un->un_pm_idle_timeid == NULL) {
10046 				/* 300 ms. */
10047 				un->un_pm_idle_timeid =
10048 				    timeout(sd_pm_idletimeout_handler, un,
10049 				    (drv_usectohz((clock_t)300000)));
10050 				/*
10051 				 * Include an extra call to busy which keeps the
10052 				 * device busy with-respect-to the PM layer
10053 				 * until the timer fires, at which time it'll
10054 				 * get the extra idle call.
10055 				 */
10056 				(void) pm_busy_component(SD_DEVINFO(un), 0);
10057 			}
10058 		}
10059 	}
10060 	un->un_pm_busy = FALSE;
10061 	/* Next... */
10062 	cv_signal(&un->un_pm_busy_cv);
10063 
10064 	un->un_pm_count++;
10065 
10066 	SD_TRACE(SD_LOG_IO_PM, un,
10067 	    "sd_pm_entry: exiting, un_pm_count = %d\n", un->un_pm_count);
10068 
10069 	mutex_exit(&un->un_pm_mutex);
10070 
10071 	return (return_status);
10072 }
10073 
10074 
10075 /*
10076  *    Function: sd_pm_exit
10077  *
10078  * Description: Called at the completion of a command to manage busy
10079  *		status for the device. If the device becomes idle the
10080  *		PM framework is notified.
10081  *
10082  *     Context: Kernel thread context
10083  */
10084 
10085 static void
10086 sd_pm_exit(struct sd_lun *un)
10087 {
10088 	ASSERT(!mutex_owned(SD_MUTEX(un)));
10089 	ASSERT(!mutex_owned(&un->un_pm_mutex));
10090 
10091 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_exit: entry\n");
10092 
10093 	/*
10094 	 * After attach the following flag is only read, so don't
10095 	 * take the penalty of acquiring a mutex for it.
10096 	 */
10097 	if (un->un_f_pm_is_enabled == TRUE) {
10098 
10099 		mutex_enter(&un->un_pm_mutex);
10100 		un->un_pm_count--;
10101 
10102 		SD_TRACE(SD_LOG_IO_PM, un,
10103 		    "sd_pm_exit: un_pm_count = %d\n", un->un_pm_count);
10104 
10105 		ASSERT(un->un_pm_count >= 0);
10106 		if (un->un_pm_count == 0) {
10107 			mutex_exit(&un->un_pm_mutex);
10108 
10109 			SD_TRACE(SD_LOG_IO_PM, un,
10110 			    "sd_pm_exit: idle component\n");
10111 
10112 			(void) pm_idle_component(SD_DEVINFO(un), 0);
10113 
10114 		} else {
10115 			mutex_exit(&un->un_pm_mutex);
10116 		}
10117 	}
10118 
10119 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_exit: exiting\n");
10120 }
10121 
10122 
10123 /*
10124  *    Function: sdopen
10125  *
10126  * Description: Driver's open(9e) entry point function.
10127  *
10128  *   Arguments: dev_i   - pointer to device number
10129  *		flag    - how to open file (FEXCL, FNDELAY, FREAD, FWRITE)
10130  *		otyp    - open type (OTYP_BLK, OTYP_CHR, OTYP_LYR)
10131  *		cred_p  - user credential pointer
10132  *
10133  * Return Code: EINVAL
10134  *		ENXIO
10135  *		EIO
10136  *		EROFS
10137  *		EBUSY
10138  *
10139  *     Context: Kernel thread context
10140  */
10141 /* ARGSUSED */
10142 static int
10143 sdopen(dev_t *dev_p, int flag, int otyp, cred_t *cred_p)
10144 {
10145 	struct sd_lun	*un;
10146 	int		nodelay;
10147 	int		part;
10148 	uint64_t	partmask;
10149 	int		instance;
10150 	dev_t		dev;
10151 	int		rval = EIO;
10152 
10153 	/* Validate the open type */
10154 	if (otyp >= OTYPCNT) {
10155 		return (EINVAL);
10156 	}
10157 
10158 	dev = *dev_p;
10159 	instance = SDUNIT(dev);
10160 	mutex_enter(&sd_detach_mutex);
10161 
10162 	/*
10163 	 * Fail the open if there is no softstate for the instance, or
10164 	 * if another thread somewhere is trying to detach the instance.
10165 	 */
10166 	if (((un = ddi_get_soft_state(sd_state, instance)) == NULL) ||
10167 	    (un->un_detach_count != 0)) {
10168 		mutex_exit(&sd_detach_mutex);
10169 		/*
10170 		 * The probe cache only needs to be cleared when open (9e) fails
10171 		 * with ENXIO (4238046).
10172 		 */
10173 		/*
10174 		 * un-conditionally clearing probe cache is ok with
10175 		 * separate sd/ssd binaries
10176 		 * x86 platform can be an issue with both parallel
10177 		 * and fibre in 1 binary
10178 		 */
10179 		sd_scsi_clear_probe_cache();
10180 		return (ENXIO);
10181 	}
10182 
10183 	/*
10184 	 * The un_layer_count is to prevent another thread in specfs from
10185 	 * trying to detach the instance, which can happen when we are
10186 	 * called from a higher-layer driver instead of thru specfs.
10187 	 * This will not be needed when DDI provides a layered driver
10188 	 * interface that allows specfs to know that an instance is in
10189 	 * use by a layered driver & should not be detached.
10190 	 *
10191 	 * Note: the semantics for layered driver opens are exactly one
10192 	 * close for every open.
10193 	 */
10194 	if (otyp == OTYP_LYR) {
10195 		un->un_layer_count++;
10196 	}
10197 
10198 	/*
10199 	 * Keep a count of the current # of opens in progress. This is because
10200 	 * some layered drivers try to call us as a regular open. This can
10201 	 * cause problems that we cannot prevent, however by keeping this count
10202 	 * we can at least keep our open and detach routines from racing against
10203 	 * each other under such conditions.
10204 	 */
10205 	un->un_opens_in_progress++;
10206 	mutex_exit(&sd_detach_mutex);
10207 
10208 	nodelay  = (flag & (FNDELAY | FNONBLOCK));
10209 	part	 = SDPART(dev);
10210 	partmask = 1 << part;
10211 
10212 	/*
10213 	 * We use a semaphore here in order to serialize
10214 	 * open and close requests on the device.
10215 	 */
10216 	sema_p(&un->un_semoclose);
10217 
10218 	mutex_enter(SD_MUTEX(un));
10219 
10220 	/*
10221 	 * All device accesses go thru sdstrategy() where we check
10222 	 * on suspend status but there could be a scsi_poll command,
10223 	 * which bypasses sdstrategy(), so we need to check pm
10224 	 * status.
10225 	 */
10226 
10227 	if (!nodelay) {
10228 		while ((un->un_state == SD_STATE_SUSPENDED) ||
10229 		    (un->un_state == SD_STATE_PM_CHANGING)) {
10230 			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
10231 		}
10232 
10233 		mutex_exit(SD_MUTEX(un));
10234 		if (sd_pm_entry(un) != DDI_SUCCESS) {
10235 			rval = EIO;
10236 			SD_ERROR(SD_LOG_OPEN_CLOSE, un,
10237 			    "sdopen: sd_pm_entry failed\n");
10238 			goto open_failed_with_pm;
10239 		}
10240 		mutex_enter(SD_MUTEX(un));
10241 	}
10242 
10243 	/* check for previous exclusive open */
10244 	SD_TRACE(SD_LOG_OPEN_CLOSE, un, "sdopen: un=%p\n", (void *)un);
10245 	SD_TRACE(SD_LOG_OPEN_CLOSE, un,
10246 	    "sdopen: exclopen=%x, flag=%x, regopen=%x\n",
10247 	    un->un_exclopen, flag, un->un_ocmap.regopen[otyp]);
10248 
10249 	if (un->un_exclopen & (partmask)) {
10250 		goto excl_open_fail;
10251 	}
10252 
10253 	if (flag & FEXCL) {
10254 		int i;
10255 		if (un->un_ocmap.lyropen[part]) {
10256 			goto excl_open_fail;
10257 		}
10258 		for (i = 0; i < (OTYPCNT - 1); i++) {
10259 			if (un->un_ocmap.regopen[i] & (partmask)) {
10260 				goto excl_open_fail;
10261 			}
10262 		}
10263 	}
10264 
10265 	/*
10266 	 * Check the write permission if this is a removable media device,
10267 	 * NDELAY has not been set, and writable permission is requested.
10268 	 *
10269 	 * Note: If NDELAY was set and this is write-protected media the WRITE
10270 	 * attempt will fail with EIO as part of the I/O processing. This is a
10271 	 * more permissive implementation that allows the open to succeed and
10272 	 * WRITE attempts to fail when appropriate.
10273 	 */
10274 	if (un->un_f_chk_wp_open) {
10275 		if ((flag & FWRITE) && (!nodelay)) {
10276 			mutex_exit(SD_MUTEX(un));
10277 			/*
10278 			 * Defer the check for write permission on writable
10279 			 * DVD drive till sdstrategy and will not fail open even
10280 			 * if FWRITE is set as the device can be writable
10281 			 * depending upon the media and the media can change
10282 			 * after the call to open().
10283 			 */
10284 			if (un->un_f_dvdram_writable_device == FALSE) {
10285 				if (ISCD(un) || sr_check_wp(dev)) {
10286 				rval = EROFS;
10287 				mutex_enter(SD_MUTEX(un));
10288 				SD_ERROR(SD_LOG_OPEN_CLOSE, un, "sdopen: "
10289 				    "write to cd or write protected media\n");
10290 				goto open_fail;
10291 				}
10292 			}
10293 			mutex_enter(SD_MUTEX(un));
10294 		}
10295 	}
10296 
10297 	/*
10298 	 * If opening in NDELAY/NONBLOCK mode, just return.
10299 	 * Check if disk is ready and has a valid geometry later.
10300 	 */
10301 	if (!nodelay) {
10302 		mutex_exit(SD_MUTEX(un));
10303 		rval = sd_ready_and_valid(un);
10304 		mutex_enter(SD_MUTEX(un));
10305 		/*
10306 		 * Fail if device is not ready or if the number of disk
10307 		 * blocks is zero or negative for non CD devices.
10308 		 */
10309 		if ((rval != SD_READY_VALID) ||
10310 		    (!ISCD(un) && un->un_map[part].dkl_nblk <= 0)) {
10311 			rval = un->un_f_has_removable_media ? ENXIO : EIO;
10312 			SD_ERROR(SD_LOG_OPEN_CLOSE, un, "sdopen: "
10313 			    "device not ready or invalid disk block value\n");
10314 			goto open_fail;
10315 		}
10316 #if defined(__i386) || defined(__amd64)
10317 	} else {
10318 		uchar_t *cp;
10319 		/*
10320 		 * x86 requires special nodelay handling, so that p0 is
10321 		 * always defined and accessible.
10322 		 * Invalidate geometry only if device is not already open.
10323 		 */
10324 		cp = &un->un_ocmap.chkd[0];
10325 		while (cp < &un->un_ocmap.chkd[OCSIZE]) {
10326 			if (*cp != (uchar_t)0) {
10327 			    break;
10328 			}
10329 			cp++;
10330 		}
10331 		if (cp == &un->un_ocmap.chkd[OCSIZE]) {
10332 			un->un_f_geometry_is_valid = FALSE;
10333 		}
10334 
10335 #endif
10336 	}
10337 
10338 	if (otyp == OTYP_LYR) {
10339 		un->un_ocmap.lyropen[part]++;
10340 	} else {
10341 		un->un_ocmap.regopen[otyp] |= partmask;
10342 	}
10343 
10344 	/* Set up open and exclusive open flags */
10345 	if (flag & FEXCL) {
10346 		un->un_exclopen |= (partmask);
10347 	}
10348 
10349 	SD_TRACE(SD_LOG_OPEN_CLOSE, un, "sdopen: "
10350 	    "open of part %d type %d\n", part, otyp);
10351 
10352 	mutex_exit(SD_MUTEX(un));
10353 	if (!nodelay) {
10354 		sd_pm_exit(un);
10355 	}
10356 
10357 	sema_v(&un->un_semoclose);
10358 
10359 	mutex_enter(&sd_detach_mutex);
10360 	un->un_opens_in_progress--;
10361 	mutex_exit(&sd_detach_mutex);
10362 
10363 	SD_TRACE(SD_LOG_OPEN_CLOSE, un, "sdopen: exit success\n");
10364 	return (DDI_SUCCESS);
10365 
10366 excl_open_fail:
10367 	SD_ERROR(SD_LOG_OPEN_CLOSE, un, "sdopen: fail exclusive open\n");
10368 	rval = EBUSY;
10369 
10370 open_fail:
10371 	mutex_exit(SD_MUTEX(un));
10372 
10373 	/*
10374 	 * On a failed open we must exit the pm management.
10375 	 */
10376 	if (!nodelay) {
10377 		sd_pm_exit(un);
10378 	}
10379 open_failed_with_pm:
10380 	sema_v(&un->un_semoclose);
10381 
10382 	mutex_enter(&sd_detach_mutex);
10383 	un->un_opens_in_progress--;
10384 	if (otyp == OTYP_LYR) {
10385 		un->un_layer_count--;
10386 	}
10387 	mutex_exit(&sd_detach_mutex);
10388 
10389 	return (rval);
10390 }
10391 
10392 
10393 /*
10394  *    Function: sdclose
10395  *
10396  * Description: Driver's close(9e) entry point function.
10397  *
10398  *   Arguments: dev    - device number
10399  *		flag   - file status flag, informational only
10400  *		otyp   - close type (OTYP_BLK, OTYP_CHR, OTYP_LYR)
10401  *		cred_p - user credential pointer
10402  *
10403  * Return Code: ENXIO
10404  *
10405  *     Context: Kernel thread context
10406  */
10407 /* ARGSUSED */
10408 static int
10409 sdclose(dev_t dev, int flag, int otyp, cred_t *cred_p)
10410 {
10411 	struct sd_lun	*un;
10412 	uchar_t		*cp;
10413 	int		part;
10414 	int		nodelay;
10415 	int		rval = 0;
10416 
10417 	/* Validate the open type */
10418 	if (otyp >= OTYPCNT) {
10419 		return (ENXIO);
10420 	}
10421 
10422 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
10423 		return (ENXIO);
10424 	}
10425 
10426 	part = SDPART(dev);
10427 	nodelay = flag & (FNDELAY | FNONBLOCK);
10428 
10429 	SD_TRACE(SD_LOG_OPEN_CLOSE, un,
10430 	    "sdclose: close of part %d type %d\n", part, otyp);
10431 
10432 	/*
10433 	 * We use a semaphore here in order to serialize
10434 	 * open and close requests on the device.
10435 	 */
10436 	sema_p(&un->un_semoclose);
10437 
10438 	mutex_enter(SD_MUTEX(un));
10439 
10440 	/* Don't proceed if power is being changed. */
10441 	while (un->un_state == SD_STATE_PM_CHANGING) {
10442 		cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
10443 	}
10444 
10445 	if (un->un_exclopen & (1 << part)) {
10446 		un->un_exclopen &= ~(1 << part);
10447 	}
10448 
10449 	/* Update the open partition map */
10450 	if (otyp == OTYP_LYR) {
10451 		un->un_ocmap.lyropen[part] -= 1;
10452 	} else {
10453 		un->un_ocmap.regopen[otyp] &= ~(1 << part);
10454 	}
10455 
10456 	cp = &un->un_ocmap.chkd[0];
10457 	while (cp < &un->un_ocmap.chkd[OCSIZE]) {
10458 		if (*cp != NULL) {
10459 			break;
10460 		}
10461 		cp++;
10462 	}
10463 
10464 	if (cp == &un->un_ocmap.chkd[OCSIZE]) {
10465 		SD_TRACE(SD_LOG_OPEN_CLOSE, un, "sdclose: last close\n");
10466 
10467 		/*
10468 		 * We avoid persistance upon the last close, and set
10469 		 * the throttle back to the maximum.
10470 		 */
10471 		un->un_throttle = un->un_saved_throttle;
10472 
10473 		if (un->un_state == SD_STATE_OFFLINE) {
10474 			if (un->un_f_is_fibre == FALSE) {
10475 				scsi_log(SD_DEVINFO(un), sd_label,
10476 					CE_WARN, "offline\n");
10477 			}
10478 			un->un_f_geometry_is_valid = FALSE;
10479 
10480 		} else {
10481 			/*
10482 			 * Flush any outstanding writes in NVRAM cache.
10483 			 * Note: SYNCHRONIZE CACHE is an optional SCSI-2
10484 			 * cmd, it may not work for non-Pluto devices.
10485 			 * SYNCHRONIZE CACHE is not required for removables,
10486 			 * except DVD-RAM drives.
10487 			 *
10488 			 * Also note: because SYNCHRONIZE CACHE is currently
10489 			 * the only command issued here that requires the
10490 			 * drive be powered up, only do the power up before
10491 			 * sending the Sync Cache command. If additional
10492 			 * commands are added which require a powered up
10493 			 * drive, the following sequence may have to change.
10494 			 *
10495 			 * And finally, note that parallel SCSI on SPARC
10496 			 * only issues a Sync Cache to DVD-RAM, a newly
10497 			 * supported device.
10498 			 */
10499 #if defined(__i386) || defined(__amd64)
10500 			if (un->un_f_sync_cache_supported ||
10501 			    un->un_f_dvdram_writable_device == TRUE) {
10502 #else
10503 			if (un->un_f_dvdram_writable_device == TRUE) {
10504 #endif
10505 				mutex_exit(SD_MUTEX(un));
10506 				if (sd_pm_entry(un) == DDI_SUCCESS) {
10507 					rval =
10508 					    sd_send_scsi_SYNCHRONIZE_CACHE(un,
10509 					    NULL);
10510 					/* ignore error if not supported */
10511 					if (rval == ENOTSUP) {
10512 						rval = 0;
10513 					} else if (rval != 0) {
10514 						rval = EIO;
10515 					}
10516 					sd_pm_exit(un);
10517 				} else {
10518 					rval = EIO;
10519 				}
10520 				mutex_enter(SD_MUTEX(un));
10521 			}
10522 
10523 			/*
10524 			 * For devices which supports DOOR_LOCK, send an ALLOW
10525 			 * MEDIA REMOVAL command, but don't get upset if it
10526 			 * fails. We need to raise the power of the drive before
10527 			 * we can call sd_send_scsi_DOORLOCK()
10528 			 */
10529 			if (un->un_f_doorlock_supported) {
10530 				mutex_exit(SD_MUTEX(un));
10531 				if (sd_pm_entry(un) == DDI_SUCCESS) {
10532 					rval = sd_send_scsi_DOORLOCK(un,
10533 					    SD_REMOVAL_ALLOW, SD_PATH_DIRECT);
10534 
10535 					sd_pm_exit(un);
10536 					if (ISCD(un) && (rval != 0) &&
10537 					    (nodelay != 0)) {
10538 						rval = ENXIO;
10539 					}
10540 				} else {
10541 					rval = EIO;
10542 				}
10543 				mutex_enter(SD_MUTEX(un));
10544 			}
10545 
10546 			/*
10547 			 * If a device has removable media, invalidate all
10548 			 * parameters related to media, such as geometry,
10549 			 * blocksize, and blockcount.
10550 			 */
10551 			if (un->un_f_has_removable_media) {
10552 				sr_ejected(un);
10553 			}
10554 
10555 			/*
10556 			 * Destroy the cache (if it exists) which was
10557 			 * allocated for the write maps since this is
10558 			 * the last close for this media.
10559 			 */
10560 			if (un->un_wm_cache) {
10561 				/*
10562 				 * Check if there are pending commands.
10563 				 * and if there are give a warning and
10564 				 * do not destroy the cache.
10565 				 */
10566 				if (un->un_ncmds_in_driver > 0) {
10567 					scsi_log(SD_DEVINFO(un),
10568 					    sd_label, CE_WARN,
10569 					    "Unable to clean up memory "
10570 					    "because of pending I/O\n");
10571 				} else {
10572 					kmem_cache_destroy(
10573 					    un->un_wm_cache);
10574 					un->un_wm_cache = NULL;
10575 				}
10576 			}
10577 		}
10578 	}
10579 
10580 	mutex_exit(SD_MUTEX(un));
10581 	sema_v(&un->un_semoclose);
10582 
10583 	if (otyp == OTYP_LYR) {
10584 		mutex_enter(&sd_detach_mutex);
10585 		/*
10586 		 * The detach routine may run when the layer count
10587 		 * drops to zero.
10588 		 */
10589 		un->un_layer_count--;
10590 		mutex_exit(&sd_detach_mutex);
10591 	}
10592 
10593 	return (rval);
10594 }
10595 
10596 
10597 /*
10598  *    Function: sd_ready_and_valid
10599  *
10600  * Description: Test if device is ready and has a valid geometry.
10601  *
10602  *   Arguments: dev - device number
10603  *		un  - driver soft state (unit) structure
10604  *
10605  * Return Code: SD_READY_VALID		ready and valid label
10606  *		SD_READY_NOT_VALID	ready, geom ops never applicable
10607  *		SD_NOT_READY_VALID	not ready, no label
10608  *
10609  *     Context: Never called at interrupt context.
10610  */
10611 
10612 static int
10613 sd_ready_and_valid(struct sd_lun *un)
10614 {
10615 	struct sd_errstats	*stp;
10616 	uint64_t		capacity;
10617 	uint_t			lbasize;
10618 	int			rval = SD_READY_VALID;
10619 	char			name_str[48];
10620 
10621 	ASSERT(un != NULL);
10622 	ASSERT(!mutex_owned(SD_MUTEX(un)));
10623 
10624 	mutex_enter(SD_MUTEX(un));
10625 	/*
10626 	 * If a device has removable media, we must check if media is
10627 	 * ready when checking if this device is ready and valid.
10628 	 */
10629 	if (un->un_f_has_removable_media) {
10630 		mutex_exit(SD_MUTEX(un));
10631 		if (sd_send_scsi_TEST_UNIT_READY(un, 0) != 0) {
10632 			rval = SD_NOT_READY_VALID;
10633 			mutex_enter(SD_MUTEX(un));
10634 			goto done;
10635 		}
10636 
10637 		mutex_enter(SD_MUTEX(un));
10638 		if ((un->un_f_geometry_is_valid == FALSE) ||
10639 		    (un->un_f_blockcount_is_valid == FALSE) ||
10640 		    (un->un_f_tgt_blocksize_is_valid == FALSE)) {
10641 
10642 			/* capacity has to be read every open. */
10643 			mutex_exit(SD_MUTEX(un));
10644 			if (sd_send_scsi_READ_CAPACITY(un, &capacity,
10645 			    &lbasize, SD_PATH_DIRECT) != 0) {
10646 				mutex_enter(SD_MUTEX(un));
10647 				un->un_f_geometry_is_valid = FALSE;
10648 				rval = SD_NOT_READY_VALID;
10649 				goto done;
10650 			} else {
10651 				mutex_enter(SD_MUTEX(un));
10652 				sd_update_block_info(un, lbasize, capacity);
10653 			}
10654 		}
10655 
10656 		/*
10657 		 * Check if the media in the device is writable or not.
10658 		 */
10659 		if ((un->un_f_geometry_is_valid == FALSE) && ISCD(un)) {
10660 			sd_check_for_writable_cd(un);
10661 		}
10662 
10663 	} else {
10664 		/*
10665 		 * Do a test unit ready to clear any unit attention from non-cd
10666 		 * devices.
10667 		 */
10668 		mutex_exit(SD_MUTEX(un));
10669 		(void) sd_send_scsi_TEST_UNIT_READY(un, 0);
10670 		mutex_enter(SD_MUTEX(un));
10671 	}
10672 
10673 
10674 	/*
10675 	 * If this is a non 512 block device, allocate space for
10676 	 * the wmap cache. This is being done here since every time
10677 	 * a media is changed this routine will be called and the
10678 	 * block size is a function of media rather than device.
10679 	 */
10680 	if (un->un_f_non_devbsize_supported && NOT_DEVBSIZE(un)) {
10681 		if (!(un->un_wm_cache)) {
10682 			(void) snprintf(name_str, sizeof (name_str),
10683 			    "%s%d_cache",
10684 			    ddi_driver_name(SD_DEVINFO(un)),
10685 			    ddi_get_instance(SD_DEVINFO(un)));
10686 			un->un_wm_cache = kmem_cache_create(
10687 			    name_str, sizeof (struct sd_w_map),
10688 			    8, sd_wm_cache_constructor,
10689 			    sd_wm_cache_destructor, NULL,
10690 			    (void *)un, NULL, 0);
10691 			if (!(un->un_wm_cache)) {
10692 					rval = ENOMEM;
10693 					goto done;
10694 			}
10695 		}
10696 	}
10697 
10698 	if (un->un_state == SD_STATE_NORMAL) {
10699 		/*
10700 		 * If the target is not yet ready here (defined by a TUR
10701 		 * failure), invalidate the geometry and print an 'offline'
10702 		 * message. This is a legacy message, as the state of the
10703 		 * target is not actually changed to SD_STATE_OFFLINE.
10704 		 *
10705 		 * If the TUR fails for EACCES (Reservation Conflict), it
10706 		 * means there actually is nothing wrong with the target that
10707 		 * would require invalidating the geometry, so continue in
10708 		 * that case as if the TUR was successful.
10709 		 */
10710 		int err;
10711 
10712 		mutex_exit(SD_MUTEX(un));
10713 		err = sd_send_scsi_TEST_UNIT_READY(un, 0);
10714 		mutex_enter(SD_MUTEX(un));
10715 
10716 		if ((err != 0) && (err != EACCES)) {
10717 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
10718 			    "offline\n");
10719 			un->un_f_geometry_is_valid = FALSE;
10720 			rval = SD_NOT_READY_VALID;
10721 			goto done;
10722 		}
10723 	}
10724 
10725 	if (un->un_f_format_in_progress == FALSE) {
10726 		/*
10727 		 * Note: sd_validate_geometry may return TRUE, but that does
10728 		 * not necessarily mean un_f_geometry_is_valid == TRUE!
10729 		 */
10730 		rval = sd_validate_geometry(un, SD_PATH_DIRECT);
10731 		if (rval == ENOTSUP) {
10732 			if (un->un_f_geometry_is_valid == TRUE)
10733 				rval = 0;
10734 			else {
10735 				rval = SD_READY_NOT_VALID;
10736 				goto done;
10737 			}
10738 		}
10739 		if (rval != 0) {
10740 			/*
10741 			 * We don't check the validity of geometry for
10742 			 * CDROMs. Also we assume we have a good label
10743 			 * even if sd_validate_geometry returned ENOMEM.
10744 			 */
10745 			if (!ISCD(un) && rval != ENOMEM) {
10746 				rval = SD_NOT_READY_VALID;
10747 				goto done;
10748 			}
10749 		}
10750 	}
10751 
10752 	/*
10753 	 * If this device supports DOOR_LOCK command, try and send
10754 	 * this command to PREVENT MEDIA REMOVAL, but don't get upset
10755 	 * if it fails. For a CD, however, it is an error
10756 	 */
10757 	if (un->un_f_doorlock_supported) {
10758 		mutex_exit(SD_MUTEX(un));
10759 		if ((sd_send_scsi_DOORLOCK(un, SD_REMOVAL_PREVENT,
10760 		    SD_PATH_DIRECT) != 0) && ISCD(un)) {
10761 			rval = SD_NOT_READY_VALID;
10762 			mutex_enter(SD_MUTEX(un));
10763 			goto done;
10764 		}
10765 		mutex_enter(SD_MUTEX(un));
10766 	}
10767 
10768 	/* The state has changed, inform the media watch routines */
10769 	un->un_mediastate = DKIO_INSERTED;
10770 	cv_broadcast(&un->un_state_cv);
10771 	rval = SD_READY_VALID;
10772 
10773 done:
10774 
10775 	/*
10776 	 * Initialize the capacity kstat value, if no media previously
10777 	 * (capacity kstat is 0) and a media has been inserted
10778 	 * (un_blockcount > 0).
10779 	 */
10780 	if (un->un_errstats != NULL) {
10781 		stp = (struct sd_errstats *)un->un_errstats->ks_data;
10782 		if ((stp->sd_capacity.value.ui64 == 0) &&
10783 		    (un->un_f_blockcount_is_valid == TRUE)) {
10784 			stp->sd_capacity.value.ui64 =
10785 			    (uint64_t)((uint64_t)un->un_blockcount *
10786 			    un->un_sys_blocksize);
10787 		}
10788 	}
10789 
10790 	mutex_exit(SD_MUTEX(un));
10791 	return (rval);
10792 }
10793 
10794 
10795 /*
10796  *    Function: sdmin
10797  *
10798  * Description: Routine to limit the size of a data transfer. Used in
10799  *		conjunction with physio(9F).
10800  *
10801  *   Arguments: bp - pointer to the indicated buf(9S) struct.
10802  *
10803  *     Context: Kernel thread context.
10804  */
10805 
10806 static void
10807 sdmin(struct buf *bp)
10808 {
10809 	struct sd_lun	*un;
10810 	int		instance;
10811 
10812 	instance = SDUNIT(bp->b_edev);
10813 
10814 	un = ddi_get_soft_state(sd_state, instance);
10815 	ASSERT(un != NULL);
10816 
10817 	if (bp->b_bcount > un->un_max_xfer_size) {
10818 		bp->b_bcount = un->un_max_xfer_size;
10819 	}
10820 }
10821 
10822 
10823 /*
10824  *    Function: sdread
10825  *
10826  * Description: Driver's read(9e) entry point function.
10827  *
10828  *   Arguments: dev   - device number
10829  *		uio   - structure pointer describing where data is to be stored
10830  *			in user's space
10831  *		cred_p  - user credential pointer
10832  *
10833  * Return Code: ENXIO
10834  *		EIO
10835  *		EINVAL
10836  *		value returned by physio
10837  *
10838  *     Context: Kernel thread context.
10839  */
10840 /* ARGSUSED */
10841 static int
10842 sdread(dev_t dev, struct uio *uio, cred_t *cred_p)
10843 {
10844 	struct sd_lun	*un = NULL;
10845 	int		secmask;
10846 	int		err;
10847 
10848 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
10849 		return (ENXIO);
10850 	}
10851 
10852 	ASSERT(!mutex_owned(SD_MUTEX(un)));
10853 
10854 	if ((un->un_f_geometry_is_valid == FALSE) && !ISCD(un)) {
10855 		mutex_enter(SD_MUTEX(un));
10856 		/*
10857 		 * Because the call to sd_ready_and_valid will issue I/O we
10858 		 * must wait here if either the device is suspended or
10859 		 * if it's power level is changing.
10860 		 */
10861 		while ((un->un_state == SD_STATE_SUSPENDED) ||
10862 		    (un->un_state == SD_STATE_PM_CHANGING)) {
10863 			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
10864 		}
10865 		un->un_ncmds_in_driver++;
10866 		mutex_exit(SD_MUTEX(un));
10867 		if ((sd_ready_and_valid(un)) != SD_READY_VALID) {
10868 			mutex_enter(SD_MUTEX(un));
10869 			un->un_ncmds_in_driver--;
10870 			ASSERT(un->un_ncmds_in_driver >= 0);
10871 			mutex_exit(SD_MUTEX(un));
10872 			return (EIO);
10873 		}
10874 		mutex_enter(SD_MUTEX(un));
10875 		un->un_ncmds_in_driver--;
10876 		ASSERT(un->un_ncmds_in_driver >= 0);
10877 		mutex_exit(SD_MUTEX(un));
10878 	}
10879 
10880 	/*
10881 	 * Read requests are restricted to multiples of the system block size.
10882 	 */
10883 	secmask = un->un_sys_blocksize - 1;
10884 
10885 	if (uio->uio_loffset & ((offset_t)(secmask))) {
10886 		SD_ERROR(SD_LOG_READ_WRITE, un,
10887 		    "sdread: file offset not modulo %d\n",
10888 		    un->un_sys_blocksize);
10889 		err = EINVAL;
10890 	} else if (uio->uio_iov->iov_len & (secmask)) {
10891 		SD_ERROR(SD_LOG_READ_WRITE, un,
10892 		    "sdread: transfer length not modulo %d\n",
10893 		    un->un_sys_blocksize);
10894 		err = EINVAL;
10895 	} else {
10896 		err = physio(sdstrategy, NULL, dev, B_READ, sdmin, uio);
10897 	}
10898 	return (err);
10899 }
10900 
10901 
10902 /*
10903  *    Function: sdwrite
10904  *
10905  * Description: Driver's write(9e) entry point function.
10906  *
10907  *   Arguments: dev   - device number
10908  *		uio   - structure pointer describing where data is stored in
10909  *			user's space
10910  *		cred_p  - user credential pointer
10911  *
10912  * Return Code: ENXIO
10913  *		EIO
10914  *		EINVAL
10915  *		value returned by physio
10916  *
10917  *     Context: Kernel thread context.
10918  */
10919 /* ARGSUSED */
10920 static int
10921 sdwrite(dev_t dev, struct uio *uio, cred_t *cred_p)
10922 {
10923 	struct sd_lun	*un = NULL;
10924 	int		secmask;
10925 	int		err;
10926 
10927 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
10928 		return (ENXIO);
10929 	}
10930 
10931 	ASSERT(!mutex_owned(SD_MUTEX(un)));
10932 
10933 	if ((un->un_f_geometry_is_valid == FALSE) && !ISCD(un)) {
10934 		mutex_enter(SD_MUTEX(un));
10935 		/*
10936 		 * Because the call to sd_ready_and_valid will issue I/O we
10937 		 * must wait here if either the device is suspended or
10938 		 * if it's power level is changing.
10939 		 */
10940 		while ((un->un_state == SD_STATE_SUSPENDED) ||
10941 		    (un->un_state == SD_STATE_PM_CHANGING)) {
10942 			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
10943 		}
10944 		un->un_ncmds_in_driver++;
10945 		mutex_exit(SD_MUTEX(un));
10946 		if ((sd_ready_and_valid(un)) != SD_READY_VALID) {
10947 			mutex_enter(SD_MUTEX(un));
10948 			un->un_ncmds_in_driver--;
10949 			ASSERT(un->un_ncmds_in_driver >= 0);
10950 			mutex_exit(SD_MUTEX(un));
10951 			return (EIO);
10952 		}
10953 		mutex_enter(SD_MUTEX(un));
10954 		un->un_ncmds_in_driver--;
10955 		ASSERT(un->un_ncmds_in_driver >= 0);
10956 		mutex_exit(SD_MUTEX(un));
10957 	}
10958 
10959 	/*
10960 	 * Write requests are restricted to multiples of the system block size.
10961 	 */
10962 	secmask = un->un_sys_blocksize - 1;
10963 
10964 	if (uio->uio_loffset & ((offset_t)(secmask))) {
10965 		SD_ERROR(SD_LOG_READ_WRITE, un,
10966 		    "sdwrite: file offset not modulo %d\n",
10967 		    un->un_sys_blocksize);
10968 		err = EINVAL;
10969 	} else if (uio->uio_iov->iov_len & (secmask)) {
10970 		SD_ERROR(SD_LOG_READ_WRITE, un,
10971 		    "sdwrite: transfer length not modulo %d\n",
10972 		    un->un_sys_blocksize);
10973 		err = EINVAL;
10974 	} else {
10975 		err = physio(sdstrategy, NULL, dev, B_WRITE, sdmin, uio);
10976 	}
10977 	return (err);
10978 }
10979 
10980 
10981 /*
10982  *    Function: sdaread
10983  *
10984  * Description: Driver's aread(9e) entry point function.
10985  *
10986  *   Arguments: dev   - device number
10987  *		aio   - structure pointer describing where data is to be stored
10988  *		cred_p  - user credential pointer
10989  *
10990  * Return Code: ENXIO
10991  *		EIO
10992  *		EINVAL
10993  *		value returned by aphysio
10994  *
10995  *     Context: Kernel thread context.
10996  */
10997 /* ARGSUSED */
10998 static int
10999 sdaread(dev_t dev, struct aio_req *aio, cred_t *cred_p)
11000 {
11001 	struct sd_lun	*un = NULL;
11002 	struct uio	*uio = aio->aio_uio;
11003 	int		secmask;
11004 	int		err;
11005 
11006 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
11007 		return (ENXIO);
11008 	}
11009 
11010 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11011 
11012 	if ((un->un_f_geometry_is_valid == FALSE) && !ISCD(un)) {
11013 		mutex_enter(SD_MUTEX(un));
11014 		/*
11015 		 * Because the call to sd_ready_and_valid will issue I/O we
11016 		 * must wait here if either the device is suspended or
11017 		 * if it's power level is changing.
11018 		 */
11019 		while ((un->un_state == SD_STATE_SUSPENDED) ||
11020 		    (un->un_state == SD_STATE_PM_CHANGING)) {
11021 			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
11022 		}
11023 		un->un_ncmds_in_driver++;
11024 		mutex_exit(SD_MUTEX(un));
11025 		if ((sd_ready_and_valid(un)) != SD_READY_VALID) {
11026 			mutex_enter(SD_MUTEX(un));
11027 			un->un_ncmds_in_driver--;
11028 			ASSERT(un->un_ncmds_in_driver >= 0);
11029 			mutex_exit(SD_MUTEX(un));
11030 			return (EIO);
11031 		}
11032 		mutex_enter(SD_MUTEX(un));
11033 		un->un_ncmds_in_driver--;
11034 		ASSERT(un->un_ncmds_in_driver >= 0);
11035 		mutex_exit(SD_MUTEX(un));
11036 	}
11037 
11038 	/*
11039 	 * Read requests are restricted to multiples of the system block size.
11040 	 */
11041 	secmask = un->un_sys_blocksize - 1;
11042 
11043 	if (uio->uio_loffset & ((offset_t)(secmask))) {
11044 		SD_ERROR(SD_LOG_READ_WRITE, un,
11045 		    "sdaread: file offset not modulo %d\n",
11046 		    un->un_sys_blocksize);
11047 		err = EINVAL;
11048 	} else if (uio->uio_iov->iov_len & (secmask)) {
11049 		SD_ERROR(SD_LOG_READ_WRITE, un,
11050 		    "sdaread: transfer length not modulo %d\n",
11051 		    un->un_sys_blocksize);
11052 		err = EINVAL;
11053 	} else {
11054 		err = aphysio(sdstrategy, anocancel, dev, B_READ, sdmin, aio);
11055 	}
11056 	return (err);
11057 }
11058 
11059 
11060 /*
11061  *    Function: sdawrite
11062  *
11063  * Description: Driver's awrite(9e) entry point function.
11064  *
11065  *   Arguments: dev   - device number
11066  *		aio   - structure pointer describing where data is stored
11067  *		cred_p  - user credential pointer
11068  *
11069  * Return Code: ENXIO
11070  *		EIO
11071  *		EINVAL
11072  *		value returned by aphysio
11073  *
11074  *     Context: Kernel thread context.
11075  */
11076 /* ARGSUSED */
11077 static int
11078 sdawrite(dev_t dev, struct aio_req *aio, cred_t *cred_p)
11079 {
11080 	struct sd_lun	*un = NULL;
11081 	struct uio	*uio = aio->aio_uio;
11082 	int		secmask;
11083 	int		err;
11084 
11085 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
11086 		return (ENXIO);
11087 	}
11088 
11089 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11090 
11091 	if ((un->un_f_geometry_is_valid == FALSE) && !ISCD(un)) {
11092 		mutex_enter(SD_MUTEX(un));
11093 		/*
11094 		 * Because the call to sd_ready_and_valid will issue I/O we
11095 		 * must wait here if either the device is suspended or
11096 		 * if it's power level is changing.
11097 		 */
11098 		while ((un->un_state == SD_STATE_SUSPENDED) ||
11099 		    (un->un_state == SD_STATE_PM_CHANGING)) {
11100 			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
11101 		}
11102 		un->un_ncmds_in_driver++;
11103 		mutex_exit(SD_MUTEX(un));
11104 		if ((sd_ready_and_valid(un)) != SD_READY_VALID) {
11105 			mutex_enter(SD_MUTEX(un));
11106 			un->un_ncmds_in_driver--;
11107 			ASSERT(un->un_ncmds_in_driver >= 0);
11108 			mutex_exit(SD_MUTEX(un));
11109 			return (EIO);
11110 		}
11111 		mutex_enter(SD_MUTEX(un));
11112 		un->un_ncmds_in_driver--;
11113 		ASSERT(un->un_ncmds_in_driver >= 0);
11114 		mutex_exit(SD_MUTEX(un));
11115 	}
11116 
11117 	/*
11118 	 * Write requests are restricted to multiples of the system block size.
11119 	 */
11120 	secmask = un->un_sys_blocksize - 1;
11121 
11122 	if (uio->uio_loffset & ((offset_t)(secmask))) {
11123 		SD_ERROR(SD_LOG_READ_WRITE, un,
11124 		    "sdawrite: file offset not modulo %d\n",
11125 		    un->un_sys_blocksize);
11126 		err = EINVAL;
11127 	} else if (uio->uio_iov->iov_len & (secmask)) {
11128 		SD_ERROR(SD_LOG_READ_WRITE, un,
11129 		    "sdawrite: transfer length not modulo %d\n",
11130 		    un->un_sys_blocksize);
11131 		err = EINVAL;
11132 	} else {
11133 		err = aphysio(sdstrategy, anocancel, dev, B_WRITE, sdmin, aio);
11134 	}
11135 	return (err);
11136 }
11137 
11138 
11139 
11140 
11141 
11142 /*
11143  * Driver IO processing follows the following sequence:
11144  *
11145  *     sdioctl(9E)     sdstrategy(9E)         biodone(9F)
11146  *         |                |                     ^
11147  *         v                v                     |
11148  * sd_send_scsi_cmd()  ddi_xbuf_qstrategy()       +-------------------+
11149  *         |                |                     |                   |
11150  *         v                |                     |                   |
11151  * sd_uscsi_strategy() sd_xbuf_strategy()   sd_buf_iodone()   sd_uscsi_iodone()
11152  *         |                |                     ^                   ^
11153  *         v                v                     |                   |
11154  * SD_BEGIN_IOSTART()  SD_BEGIN_IOSTART()         |                   |
11155  *         |                |                     |                   |
11156  *     +---+                |                     +------------+      +-------+
11157  *     |                    |                                  |              |
11158  *     |   SD_NEXT_IOSTART()|                  SD_NEXT_IODONE()|              |
11159  *     |                    v                                  |              |
11160  *     |         sd_mapblockaddr_iostart()           sd_mapblockaddr_iodone() |
11161  *     |                    |                                  ^              |
11162  *     |   SD_NEXT_IOSTART()|                  SD_NEXT_IODONE()|              |
11163  *     |                    v                                  |              |
11164  *     |         sd_mapblocksize_iostart()           sd_mapblocksize_iodone() |
11165  *     |                    |                                  ^              |
11166  *     |   SD_NEXT_IOSTART()|                  SD_NEXT_IODONE()|              |
11167  *     |                    v                                  |              |
11168  *     |           sd_checksum_iostart()               sd_checksum_iodone()   |
11169  *     |                    |                                  ^              |
11170  *     +-> SD_NEXT_IOSTART()|                  SD_NEXT_IODONE()+------------->+
11171  *     |                    v                                  |              |
11172  *     |              sd_pm_iostart()                     sd_pm_iodone()      |
11173  *     |                    |                                  ^              |
11174  *     |                    |                                  |              |
11175  *     +-> SD_NEXT_IOSTART()|               SD_BEGIN_IODONE()--+--------------+
11176  *                          |                           ^
11177  *                          v                           |
11178  *                   sd_core_iostart()                  |
11179  *                          |                           |
11180  *                          |                           +------>(*destroypkt)()
11181  *                          +-> sd_start_cmds() <-+     |           |
11182  *                          |                     |     |           v
11183  *                          |                     |     |  scsi_destroy_pkt(9F)
11184  *                          |                     |     |
11185  *                          +->(*initpkt)()       +- sdintr()
11186  *                          |  |                        |  |
11187  *                          |  +-> scsi_init_pkt(9F)    |  +-> sd_handle_xxx()
11188  *                          |  +-> scsi_setup_cdb(9F)   |
11189  *                          |                           |
11190  *                          +--> scsi_transport(9F)     |
11191  *                                     |                |
11192  *                                     +----> SCSA ---->+
11193  *
11194  *
11195  * This code is based upon the following presumtions:
11196  *
11197  *   - iostart and iodone functions operate on buf(9S) structures. These
11198  *     functions perform the necessary operations on the buf(9S) and pass
11199  *     them along to the next function in the chain by using the macros
11200  *     SD_NEXT_IOSTART() (for iostart side functions) and SD_NEXT_IODONE()
11201  *     (for iodone side functions).
11202  *
11203  *   - The iostart side functions may sleep. The iodone side functions
11204  *     are called under interrupt context and may NOT sleep. Therefore
11205  *     iodone side functions also may not call iostart side functions.
11206  *     (NOTE: iostart side functions should NOT sleep for memory, as
11207  *     this could result in deadlock.)
11208  *
11209  *   - An iostart side function may call its corresponding iodone side
11210  *     function directly (if necessary).
11211  *
11212  *   - In the event of an error, an iostart side function can return a buf(9S)
11213  *     to its caller by calling SD_BEGIN_IODONE() (after setting B_ERROR and
11214  *     b_error in the usual way of course).
11215  *
11216  *   - The taskq mechanism may be used by the iodone side functions to dispatch
11217  *     requests to the iostart side functions.  The iostart side functions in
11218  *     this case would be called under the context of a taskq thread, so it's
11219  *     OK for them to block/sleep/spin in this case.
11220  *
11221  *   - iostart side functions may allocate "shadow" buf(9S) structs and
11222  *     pass them along to the next function in the chain.  The corresponding
11223  *     iodone side functions must coalesce the "shadow" bufs and return
11224  *     the "original" buf to the next higher layer.
11225  *
11226  *   - The b_private field of the buf(9S) struct holds a pointer to
11227  *     an sd_xbuf struct, which contains information needed to
11228  *     construct the scsi_pkt for the command.
11229  *
11230  *   - The SD_MUTEX(un) is NOT held across calls to the next layer. Each
11231  *     layer must acquire & release the SD_MUTEX(un) as needed.
11232  */
11233 
11234 
11235 /*
11236  * Create taskq for all targets in the system. This is created at
11237  * _init(9E) and destroyed at _fini(9E).
11238  *
11239  * Note: here we set the minalloc to a reasonably high number to ensure that
11240  * we will have an adequate supply of task entries available at interrupt time.
11241  * This is used in conjunction with the TASKQ_PREPOPULATE flag in
11242  * sd_create_taskq().  Since we do not want to sleep for allocations at
11243  * interrupt time, set maxalloc equal to minalloc. That way we will just fail
11244  * the command if we ever try to dispatch more than SD_TASKQ_MAXALLOC taskq
11245  * requests any one instant in time.
11246  */
11247 #define	SD_TASKQ_NUMTHREADS	8
11248 #define	SD_TASKQ_MINALLOC	256
11249 #define	SD_TASKQ_MAXALLOC	256
11250 
11251 static taskq_t	*sd_tq = NULL;
11252 _NOTE(SCHEME_PROTECTS_DATA("stable data", sd_tq))
11253 
11254 static int	sd_taskq_minalloc = SD_TASKQ_MINALLOC;
11255 static int	sd_taskq_maxalloc = SD_TASKQ_MAXALLOC;
11256 
11257 /*
11258  * The following task queue is being created for the write part of
11259  * read-modify-write of non-512 block size devices.
11260  * Limit the number of threads to 1 for now. This number has been choosen
11261  * considering the fact that it applies only to dvd ram drives/MO drives
11262  * currently. Performance for which is not main criteria at this stage.
11263  * Note: It needs to be explored if we can use a single taskq in future
11264  */
11265 #define	SD_WMR_TASKQ_NUMTHREADS	1
11266 static taskq_t	*sd_wmr_tq = NULL;
11267 _NOTE(SCHEME_PROTECTS_DATA("stable data", sd_wmr_tq))
11268 
11269 /*
11270  *    Function: sd_taskq_create
11271  *
11272  * Description: Create taskq thread(s) and preallocate task entries
11273  *
11274  * Return Code: Returns a pointer to the allocated taskq_t.
11275  *
11276  *     Context: Can sleep. Requires blockable context.
11277  *
11278  *       Notes: - The taskq() facility currently is NOT part of the DDI.
11279  *		  (definitely NOT recommeded for 3rd-party drivers!) :-)
11280  *		- taskq_create() will block for memory, also it will panic
11281  *		  if it cannot create the requested number of threads.
11282  *		- Currently taskq_create() creates threads that cannot be
11283  *		  swapped.
11284  *		- We use TASKQ_PREPOPULATE to ensure we have an adequate
11285  *		  supply of taskq entries at interrupt time (ie, so that we
11286  *		  do not have to sleep for memory)
11287  */
11288 
11289 static void
11290 sd_taskq_create(void)
11291 {
11292 	char	taskq_name[TASKQ_NAMELEN];
11293 
11294 	ASSERT(sd_tq == NULL);
11295 	ASSERT(sd_wmr_tq == NULL);
11296 
11297 	(void) snprintf(taskq_name, sizeof (taskq_name),
11298 	    "%s_drv_taskq", sd_label);
11299 	sd_tq = (taskq_create(taskq_name, SD_TASKQ_NUMTHREADS,
11300 	    (v.v_maxsyspri - 2), sd_taskq_minalloc, sd_taskq_maxalloc,
11301 	    TASKQ_PREPOPULATE));
11302 
11303 	(void) snprintf(taskq_name, sizeof (taskq_name),
11304 	    "%s_rmw_taskq", sd_label);
11305 	sd_wmr_tq = (taskq_create(taskq_name, SD_WMR_TASKQ_NUMTHREADS,
11306 	    (v.v_maxsyspri - 2), sd_taskq_minalloc, sd_taskq_maxalloc,
11307 	    TASKQ_PREPOPULATE));
11308 }
11309 
11310 
11311 /*
11312  *    Function: sd_taskq_delete
11313  *
11314  * Description: Complementary cleanup routine for sd_taskq_create().
11315  *
11316  *     Context: Kernel thread context.
11317  */
11318 
11319 static void
11320 sd_taskq_delete(void)
11321 {
11322 	ASSERT(sd_tq != NULL);
11323 	ASSERT(sd_wmr_tq != NULL);
11324 	taskq_destroy(sd_tq);
11325 	taskq_destroy(sd_wmr_tq);
11326 	sd_tq = NULL;
11327 	sd_wmr_tq = NULL;
11328 }
11329 
11330 
11331 /*
11332  *    Function: sdstrategy
11333  *
11334  * Description: Driver's strategy (9E) entry point function.
11335  *
11336  *   Arguments: bp - pointer to buf(9S)
11337  *
11338  * Return Code: Always returns zero
11339  *
11340  *     Context: Kernel thread context.
11341  */
11342 
11343 static int
11344 sdstrategy(struct buf *bp)
11345 {
11346 	struct sd_lun *un;
11347 
11348 	un = ddi_get_soft_state(sd_state, SD_GET_INSTANCE_FROM_BUF(bp));
11349 	if (un == NULL) {
11350 		bioerror(bp, EIO);
11351 		bp->b_resid = bp->b_bcount;
11352 		biodone(bp);
11353 		return (0);
11354 	}
11355 	/* As was done in the past, fail new cmds. if state is dumping. */
11356 	if (un->un_state == SD_STATE_DUMPING) {
11357 		bioerror(bp, ENXIO);
11358 		bp->b_resid = bp->b_bcount;
11359 		biodone(bp);
11360 		return (0);
11361 	}
11362 
11363 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11364 
11365 	/*
11366 	 * Commands may sneak in while we released the mutex in
11367 	 * DDI_SUSPEND, we should block new commands. However, old
11368 	 * commands that are still in the driver at this point should
11369 	 * still be allowed to drain.
11370 	 */
11371 	mutex_enter(SD_MUTEX(un));
11372 	/*
11373 	 * Must wait here if either the device is suspended or
11374 	 * if it's power level is changing.
11375 	 */
11376 	while ((un->un_state == SD_STATE_SUSPENDED) ||
11377 	    (un->un_state == SD_STATE_PM_CHANGING)) {
11378 		cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
11379 	}
11380 
11381 	un->un_ncmds_in_driver++;
11382 
11383 	/*
11384 	 * atapi: Since we are running the CD for now in PIO mode we need to
11385 	 * call bp_mapin here to avoid bp_mapin called interrupt context under
11386 	 * the HBA's init_pkt routine.
11387 	 */
11388 	if (un->un_f_cfg_is_atapi == TRUE) {
11389 		mutex_exit(SD_MUTEX(un));
11390 		bp_mapin(bp);
11391 		mutex_enter(SD_MUTEX(un));
11392 	}
11393 	SD_INFO(SD_LOG_IO, un, "sdstrategy: un_ncmds_in_driver = %ld\n",
11394 	    un->un_ncmds_in_driver);
11395 
11396 	mutex_exit(SD_MUTEX(un));
11397 
11398 	/*
11399 	 * This will (eventually) allocate the sd_xbuf area and
11400 	 * call sd_xbuf_strategy().  We just want to return the
11401 	 * result of ddi_xbuf_qstrategy so that we have an opt-
11402 	 * imized tail call which saves us a stack frame.
11403 	 */
11404 	return (ddi_xbuf_qstrategy(bp, un->un_xbuf_attr));
11405 }
11406 
11407 
11408 /*
11409  *    Function: sd_xbuf_strategy
11410  *
11411  * Description: Function for initiating IO operations via the
11412  *		ddi_xbuf_qstrategy() mechanism.
11413  *
11414  *     Context: Kernel thread context.
11415  */
11416 
11417 static void
11418 sd_xbuf_strategy(struct buf *bp, ddi_xbuf_t xp, void *arg)
11419 {
11420 	struct sd_lun *un = arg;
11421 
11422 	ASSERT(bp != NULL);
11423 	ASSERT(xp != NULL);
11424 	ASSERT(un != NULL);
11425 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11426 
11427 	/*
11428 	 * Initialize the fields in the xbuf and save a pointer to the
11429 	 * xbuf in bp->b_private.
11430 	 */
11431 	sd_xbuf_init(un, bp, xp, SD_CHAIN_BUFIO, NULL);
11432 
11433 	/* Send the buf down the iostart chain */
11434 	SD_BEGIN_IOSTART(((struct sd_xbuf *)xp)->xb_chain_iostart, un, bp);
11435 }
11436 
11437 
11438 /*
11439  *    Function: sd_xbuf_init
11440  *
11441  * Description: Prepare the given sd_xbuf struct for use.
11442  *
11443  *   Arguments: un - ptr to softstate
11444  *		bp - ptr to associated buf(9S)
11445  *		xp - ptr to associated sd_xbuf
11446  *		chain_type - IO chain type to use:
11447  *			SD_CHAIN_NULL
11448  *			SD_CHAIN_BUFIO
11449  *			SD_CHAIN_USCSI
11450  *			SD_CHAIN_DIRECT
11451  *			SD_CHAIN_DIRECT_PRIORITY
11452  *		pktinfop - ptr to private data struct for scsi_pkt(9S)
11453  *			initialization; may be NULL if none.
11454  *
11455  *     Context: Kernel thread context
11456  */
11457 
11458 static void
11459 sd_xbuf_init(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
11460 	uchar_t chain_type, void *pktinfop)
11461 {
11462 	int index;
11463 
11464 	ASSERT(un != NULL);
11465 	ASSERT(bp != NULL);
11466 	ASSERT(xp != NULL);
11467 
11468 	SD_INFO(SD_LOG_IO, un, "sd_xbuf_init: buf:0x%p chain type:0x%x\n",
11469 	    bp, chain_type);
11470 
11471 	xp->xb_un	= un;
11472 	xp->xb_pktp	= NULL;
11473 	xp->xb_pktinfo	= pktinfop;
11474 	xp->xb_private	= bp->b_private;
11475 	xp->xb_blkno	= (daddr_t)bp->b_blkno;
11476 
11477 	/*
11478 	 * Set up the iostart and iodone chain indexes in the xbuf, based
11479 	 * upon the specified chain type to use.
11480 	 */
11481 	switch (chain_type) {
11482 	case SD_CHAIN_NULL:
11483 		/*
11484 		 * Fall thru to just use the values for the buf type, even
11485 		 * tho for the NULL chain these values will never be used.
11486 		 */
11487 		/* FALLTHRU */
11488 	case SD_CHAIN_BUFIO:
11489 		index = un->un_buf_chain_type;
11490 		break;
11491 	case SD_CHAIN_USCSI:
11492 		index = un->un_uscsi_chain_type;
11493 		break;
11494 	case SD_CHAIN_DIRECT:
11495 		index = un->un_direct_chain_type;
11496 		break;
11497 	case SD_CHAIN_DIRECT_PRIORITY:
11498 		index = un->un_priority_chain_type;
11499 		break;
11500 	default:
11501 		/* We're really broken if we ever get here... */
11502 		panic("sd_xbuf_init: illegal chain type!");
11503 		/*NOTREACHED*/
11504 	}
11505 
11506 	xp->xb_chain_iostart = sd_chain_index_map[index].sci_iostart_index;
11507 	xp->xb_chain_iodone = sd_chain_index_map[index].sci_iodone_index;
11508 
11509 	/*
11510 	 * It might be a bit easier to simply bzero the entire xbuf above,
11511 	 * but it turns out that since we init a fair number of members anyway,
11512 	 * we save a fair number cycles by doing explicit assignment of zero.
11513 	 */
11514 	xp->xb_pkt_flags	= 0;
11515 	xp->xb_dma_resid	= 0;
11516 	xp->xb_retry_count	= 0;
11517 	xp->xb_victim_retry_count = 0;
11518 	xp->xb_ua_retry_count	= 0;
11519 	xp->xb_sense_bp		= NULL;
11520 	xp->xb_sense_status	= 0;
11521 	xp->xb_sense_state	= 0;
11522 	xp->xb_sense_resid	= 0;
11523 
11524 	bp->b_private	= xp;
11525 	bp->b_flags	&= ~(B_DONE | B_ERROR);
11526 	bp->b_resid	= 0;
11527 	bp->av_forw	= NULL;
11528 	bp->av_back	= NULL;
11529 	bioerror(bp, 0);
11530 
11531 	SD_INFO(SD_LOG_IO, un, "sd_xbuf_init: done.\n");
11532 }
11533 
11534 
11535 /*
11536  *    Function: sd_uscsi_strategy
11537  *
11538  * Description: Wrapper for calling into the USCSI chain via physio(9F)
11539  *
11540  *   Arguments: bp - buf struct ptr
11541  *
11542  * Return Code: Always returns 0
11543  *
11544  *     Context: Kernel thread context
11545  */
11546 
11547 static int
11548 sd_uscsi_strategy(struct buf *bp)
11549 {
11550 	struct sd_lun		*un;
11551 	struct sd_uscsi_info	*uip;
11552 	struct sd_xbuf		*xp;
11553 	uchar_t			chain_type;
11554 
11555 	ASSERT(bp != NULL);
11556 
11557 	un = ddi_get_soft_state(sd_state, SD_GET_INSTANCE_FROM_BUF(bp));
11558 	if (un == NULL) {
11559 		bioerror(bp, EIO);
11560 		bp->b_resid = bp->b_bcount;
11561 		biodone(bp);
11562 		return (0);
11563 	}
11564 
11565 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11566 
11567 	SD_TRACE(SD_LOG_IO, un, "sd_uscsi_strategy: entry: buf:0x%p\n", bp);
11568 
11569 	mutex_enter(SD_MUTEX(un));
11570 	/*
11571 	 * atapi: Since we are running the CD for now in PIO mode we need to
11572 	 * call bp_mapin here to avoid bp_mapin called interrupt context under
11573 	 * the HBA's init_pkt routine.
11574 	 */
11575 	if (un->un_f_cfg_is_atapi == TRUE) {
11576 		mutex_exit(SD_MUTEX(un));
11577 		bp_mapin(bp);
11578 		mutex_enter(SD_MUTEX(un));
11579 	}
11580 	un->un_ncmds_in_driver++;
11581 	SD_INFO(SD_LOG_IO, un, "sd_uscsi_strategy: un_ncmds_in_driver = %ld\n",
11582 	    un->un_ncmds_in_driver);
11583 	mutex_exit(SD_MUTEX(un));
11584 
11585 	/*
11586 	 * A pointer to a struct sd_uscsi_info is expected in bp->b_private
11587 	 */
11588 	ASSERT(bp->b_private != NULL);
11589 	uip = (struct sd_uscsi_info *)bp->b_private;
11590 
11591 	switch (uip->ui_flags) {
11592 	case SD_PATH_DIRECT:
11593 		chain_type = SD_CHAIN_DIRECT;
11594 		break;
11595 	case SD_PATH_DIRECT_PRIORITY:
11596 		chain_type = SD_CHAIN_DIRECT_PRIORITY;
11597 		break;
11598 	default:
11599 		chain_type = SD_CHAIN_USCSI;
11600 		break;
11601 	}
11602 
11603 	xp = kmem_alloc(sizeof (struct sd_xbuf), KM_SLEEP);
11604 	sd_xbuf_init(un, bp, xp, chain_type, uip->ui_cmdp);
11605 
11606 	/* Use the index obtained within xbuf_init */
11607 	SD_BEGIN_IOSTART(xp->xb_chain_iostart, un, bp);
11608 
11609 	SD_TRACE(SD_LOG_IO, un, "sd_uscsi_strategy: exit: buf:0x%p\n", bp);
11610 
11611 	return (0);
11612 }
11613 
11614 
11615 /*
11616  * These routines perform raw i/o operations.
11617  */
11618 /*ARGSUSED*/
11619 static void
11620 sduscsimin(struct buf *bp)
11621 {
11622 	/*
11623 	 * do not break up because the CDB count would then
11624 	 * be incorrect and data underruns would result (incomplete
11625 	 * read/writes which would be retried and then failed, see
11626 	 * sdintr().
11627 	 */
11628 }
11629 
11630 
11631 
11632 /*
11633  *    Function: sd_send_scsi_cmd
11634  *
11635  * Description: Runs a USCSI command for user (when called thru sdioctl),
11636  *		or for the driver
11637  *
11638  *   Arguments: dev - the dev_t for the device
11639  *		incmd - ptr to a valid uscsi_cmd struct
11640  *		cdbspace - UIO_USERSPACE or UIO_SYSSPACE
11641  *		dataspace - UIO_USERSPACE or UIO_SYSSPACE
11642  *		rqbufspace - UIO_USERSPACE or UIO_SYSSPACE
11643  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
11644  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
11645  *			to use the USCSI "direct" chain and bypass the normal
11646  *			command waitq.
11647  *
11648  * Return Code: 0 -  successful completion of the given command
11649  *		EIO - scsi_reset() failed, or see biowait()/physio() codes.
11650  *		ENXIO  - soft state not found for specified dev
11651  *		EINVAL
11652  *		EFAULT - copyin/copyout error
11653  *		return code of biowait(9F) or physio(9F):
11654  *			EIO - IO error, caller may check incmd->uscsi_status
11655  *			ENXIO
11656  *			EACCES - reservation conflict
11657  *
11658  *     Context: Waits for command to complete. Can sleep.
11659  */
11660 
11661 static int
11662 sd_send_scsi_cmd(dev_t dev, struct uscsi_cmd *incmd,
11663 	enum uio_seg cdbspace, enum uio_seg dataspace, enum uio_seg rqbufspace,
11664 	int path_flag)
11665 {
11666 	struct sd_uscsi_info	*uip;
11667 	struct uscsi_cmd	*uscmd;
11668 	struct sd_lun	*un;
11669 	struct buf	*bp;
11670 	int	rval;
11671 	int	flags;
11672 
11673 	un = ddi_get_soft_state(sd_state, SDUNIT(dev));
11674 	if (un == NULL) {
11675 		return (ENXIO);
11676 	}
11677 
11678 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11679 
11680 #ifdef SDDEBUG
11681 	switch (dataspace) {
11682 	case UIO_USERSPACE:
11683 		SD_TRACE(SD_LOG_IO, un,
11684 		    "sd_send_scsi_cmd: entry: un:0x%p UIO_USERSPACE\n", un);
11685 		break;
11686 	case UIO_SYSSPACE:
11687 		SD_TRACE(SD_LOG_IO, un,
11688 		    "sd_send_scsi_cmd: entry: un:0x%p UIO_SYSSPACE\n", un);
11689 		break;
11690 	default:
11691 		SD_TRACE(SD_LOG_IO, un,
11692 		    "sd_send_scsi_cmd: entry: un:0x%p UNEXPECTED SPACE\n", un);
11693 		break;
11694 	}
11695 #endif
11696 
11697 	/*
11698 	 * Perform resets directly; no need to generate a command to do it.
11699 	 */
11700 	if (incmd->uscsi_flags & (USCSI_RESET | USCSI_RESET_ALL)) {
11701 		flags = ((incmd->uscsi_flags & USCSI_RESET_ALL) != 0) ?
11702 		    RESET_ALL : RESET_TARGET;
11703 		SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_cmd: Issuing reset\n");
11704 		if (scsi_reset(SD_ADDRESS(un), flags) == 0) {
11705 			/* Reset attempt was unsuccessful */
11706 			SD_TRACE(SD_LOG_IO, un,
11707 			    "sd_send_scsi_cmd: reset: failure\n");
11708 			return (EIO);
11709 		}
11710 		SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_cmd: reset: success\n");
11711 		return (0);
11712 	}
11713 
11714 	/* Perfunctory sanity check... */
11715 	if (incmd->uscsi_cdblen <= 0) {
11716 		SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_cmd: "
11717 		    "invalid uscsi_cdblen, returning EINVAL\n");
11718 		return (EINVAL);
11719 	} else if (incmd->uscsi_cdblen > un->un_max_hba_cdb) {
11720 		SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_cmd: "
11721 		    "unsupported uscsi_cdblen, returning EINVAL\n");
11722 		return (EINVAL);
11723 	}
11724 
11725 	/*
11726 	 * In order to not worry about where the uscsi structure came from
11727 	 * (or where the cdb it points to came from) we're going to make
11728 	 * kmem_alloc'd copies of them here. This will also allow reference
11729 	 * to the data they contain long after this process has gone to
11730 	 * sleep and its kernel stack has been unmapped, etc.
11731 	 *
11732 	 * First get some memory for the uscsi_cmd struct and copy the
11733 	 * contents of the given uscsi_cmd struct into it.
11734 	 */
11735 	uscmd = kmem_zalloc(sizeof (struct uscsi_cmd), KM_SLEEP);
11736 	bcopy(incmd, uscmd, sizeof (struct uscsi_cmd));
11737 
11738 	SD_DUMP_MEMORY(un, SD_LOG_IO, "sd_send_scsi_cmd: uscsi_cmd",
11739 	    (uchar_t *)uscmd, sizeof (struct uscsi_cmd), SD_LOG_HEX);
11740 
11741 	/*
11742 	 * Now get some space for the CDB, and copy the given CDB into
11743 	 * it. Use ddi_copyin() in case the data is in user space.
11744 	 */
11745 	uscmd->uscsi_cdb = kmem_zalloc((size_t)incmd->uscsi_cdblen, KM_SLEEP);
11746 	flags = (cdbspace == UIO_SYSSPACE) ? FKIOCTL : 0;
11747 	if (ddi_copyin(incmd->uscsi_cdb, uscmd->uscsi_cdb,
11748 	    (uint_t)incmd->uscsi_cdblen, flags) != 0) {
11749 		kmem_free(uscmd->uscsi_cdb, (size_t)incmd->uscsi_cdblen);
11750 		kmem_free(uscmd, sizeof (struct uscsi_cmd));
11751 		return (EFAULT);
11752 	}
11753 
11754 	SD_DUMP_MEMORY(un, SD_LOG_IO, "sd_send_scsi_cmd: CDB",
11755 	    (uchar_t *)uscmd->uscsi_cdb, incmd->uscsi_cdblen, SD_LOG_HEX);
11756 
11757 	bp = getrbuf(KM_SLEEP);
11758 
11759 	/*
11760 	 * Allocate an sd_uscsi_info struct and fill it with the info
11761 	 * needed by sd_initpkt_for_uscsi().  Then put the pointer into
11762 	 * b_private in the buf for sd_initpkt_for_uscsi().  Note that
11763 	 * since we allocate the buf here in this function, we do not
11764 	 * need to preserve the prior contents of b_private.
11765 	 * The sd_uscsi_info struct is also used by sd_uscsi_strategy()
11766 	 */
11767 	uip = kmem_zalloc(sizeof (struct sd_uscsi_info), KM_SLEEP);
11768 	uip->ui_flags = path_flag;
11769 	uip->ui_cmdp  = uscmd;
11770 	bp->b_private = uip;
11771 
11772 	/*
11773 	 * Initialize Request Sense buffering, if requested.
11774 	 */
11775 	if (((uscmd->uscsi_flags & USCSI_RQENABLE) != 0) &&
11776 	    (uscmd->uscsi_rqlen != 0) && (uscmd->uscsi_rqbuf != NULL)) {
11777 		/*
11778 		 * Here uscmd->uscsi_rqbuf currently points to the caller's
11779 		 * buffer, but we replace this with a kernel buffer that
11780 		 * we allocate to use with the sense data. The sense data
11781 		 * (if present) gets copied into this new buffer before the
11782 		 * command is completed.  Then we copy the sense data from
11783 		 * our allocated buf into the caller's buffer below. Note
11784 		 * that incmd->uscsi_rqbuf and incmd->uscsi_rqlen are used
11785 		 * below to perform the copy back to the caller's buf.
11786 		 */
11787 		uscmd->uscsi_rqbuf = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
11788 		if (rqbufspace == UIO_USERSPACE) {
11789 			uscmd->uscsi_rqlen   = SENSE_LENGTH;
11790 			uscmd->uscsi_rqresid = SENSE_LENGTH;
11791 		} else {
11792 			uchar_t rlen = min(SENSE_LENGTH, uscmd->uscsi_rqlen);
11793 			uscmd->uscsi_rqlen   = rlen;
11794 			uscmd->uscsi_rqresid = rlen;
11795 		}
11796 	} else {
11797 		uscmd->uscsi_rqbuf = NULL;
11798 		uscmd->uscsi_rqlen   = 0;
11799 		uscmd->uscsi_rqresid = 0;
11800 	}
11801 
11802 	SD_INFO(SD_LOG_IO, un, "sd_send_scsi_cmd: rqbuf:0x%p  rqlen:%d\n",
11803 	    uscmd->uscsi_rqbuf, uscmd->uscsi_rqlen);
11804 
11805 	if (un->un_f_is_fibre == FALSE) {
11806 		/*
11807 		 * Force asynchronous mode, if necessary.  Doing this here
11808 		 * has the unfortunate effect of running other queued
11809 		 * commands async also, but since the main purpose of this
11810 		 * capability is downloading new drive firmware, we can
11811 		 * probably live with it.
11812 		 */
11813 		if ((uscmd->uscsi_flags & USCSI_ASYNC) != 0) {
11814 			if (scsi_ifgetcap(SD_ADDRESS(un), "synchronous", 1)
11815 				== 1) {
11816 				if (scsi_ifsetcap(SD_ADDRESS(un),
11817 					    "synchronous", 0, 1) == 1) {
11818 					SD_TRACE(SD_LOG_IO, un,
11819 					"sd_send_scsi_cmd: forced async ok\n");
11820 				} else {
11821 					SD_TRACE(SD_LOG_IO, un,
11822 					"sd_send_scsi_cmd:\
11823 					forced async failed\n");
11824 					rval = EINVAL;
11825 					goto done;
11826 				}
11827 			}
11828 		}
11829 
11830 		/*
11831 		 * Re-enable synchronous mode, if requested
11832 		 */
11833 		if (uscmd->uscsi_flags & USCSI_SYNC) {
11834 			if (scsi_ifgetcap(SD_ADDRESS(un), "synchronous", 1)
11835 				== 0) {
11836 				int i = scsi_ifsetcap(SD_ADDRESS(un),
11837 						"synchronous", 1, 1);
11838 				SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_cmd: "
11839 					"re-enabled sync %s\n",
11840 					(i == 1) ? "ok" : "failed");
11841 			}
11842 		}
11843 	}
11844 
11845 	/*
11846 	 * Commands sent with priority are intended for error recovery
11847 	 * situations, and do not have retries performed.
11848 	 */
11849 	if (path_flag == SD_PATH_DIRECT_PRIORITY) {
11850 		uscmd->uscsi_flags |= USCSI_DIAGNOSE;
11851 	}
11852 
11853 	/*
11854 	 * If we're going to do actual I/O, let physio do all the right things
11855 	 */
11856 	if (uscmd->uscsi_buflen != 0) {
11857 		struct iovec	aiov;
11858 		struct uio	auio;
11859 		struct uio	*uio = &auio;
11860 
11861 		bzero(&auio, sizeof (struct uio));
11862 		bzero(&aiov, sizeof (struct iovec));
11863 		aiov.iov_base = uscmd->uscsi_bufaddr;
11864 		aiov.iov_len  = uscmd->uscsi_buflen;
11865 		uio->uio_iov  = &aiov;
11866 
11867 		uio->uio_iovcnt  = 1;
11868 		uio->uio_resid   = uscmd->uscsi_buflen;
11869 		uio->uio_segflg  = dataspace;
11870 
11871 		/*
11872 		 * physio() will block here until the command completes....
11873 		 */
11874 		SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_cmd: calling physio.\n");
11875 
11876 		rval = physio(sd_uscsi_strategy, bp, dev,
11877 		    ((uscmd->uscsi_flags & USCSI_READ) ? B_READ : B_WRITE),
11878 		    sduscsimin, uio);
11879 
11880 		SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_cmd: "
11881 		    "returned from physio with 0x%x\n", rval);
11882 
11883 	} else {
11884 		/*
11885 		 * We have to mimic what physio would do here! Argh!
11886 		 */
11887 		bp->b_flags  = B_BUSY |
11888 		    ((uscmd->uscsi_flags & USCSI_READ) ? B_READ : B_WRITE);
11889 		bp->b_edev   = dev;
11890 		bp->b_dev    = cmpdev(dev);	/* maybe unnecessary? */
11891 		bp->b_bcount = 0;
11892 		bp->b_blkno  = 0;
11893 
11894 		SD_TRACE(SD_LOG_IO, un,
11895 		    "sd_send_scsi_cmd: calling sd_uscsi_strategy...\n");
11896 
11897 		(void) sd_uscsi_strategy(bp);
11898 
11899 		SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_cmd: calling biowait\n");
11900 
11901 		rval = biowait(bp);
11902 
11903 		SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_cmd: "
11904 		    "returned from  biowait with 0x%x\n", rval);
11905 	}
11906 
11907 done:
11908 
11909 #ifdef SDDEBUG
11910 	SD_INFO(SD_LOG_IO, un, "sd_send_scsi_cmd: "
11911 	    "uscsi_status: 0x%02x  uscsi_resid:0x%x\n",
11912 	    uscmd->uscsi_status, uscmd->uscsi_resid);
11913 	if (uscmd->uscsi_bufaddr != NULL) {
11914 		SD_INFO(SD_LOG_IO, un, "sd_send_scsi_cmd: "
11915 		    "uscmd->uscsi_bufaddr: 0x%p  uscmd->uscsi_buflen:%d\n",
11916 		    uscmd->uscsi_bufaddr, uscmd->uscsi_buflen);
11917 		if (dataspace == UIO_SYSSPACE) {
11918 			SD_DUMP_MEMORY(un, SD_LOG_IO,
11919 			    "data", (uchar_t *)uscmd->uscsi_bufaddr,
11920 			    uscmd->uscsi_buflen, SD_LOG_HEX);
11921 		}
11922 	}
11923 #endif
11924 
11925 	/*
11926 	 * Get the status and residual to return to the caller.
11927 	 */
11928 	incmd->uscsi_status = uscmd->uscsi_status;
11929 	incmd->uscsi_resid  = uscmd->uscsi_resid;
11930 
11931 	/*
11932 	 * If the caller wants sense data, copy back whatever sense data
11933 	 * we may have gotten, and update the relevant rqsense info.
11934 	 */
11935 	if (((uscmd->uscsi_flags & USCSI_RQENABLE) != 0) &&
11936 	    (uscmd->uscsi_rqlen != 0) && (uscmd->uscsi_rqbuf != NULL)) {
11937 
11938 		int rqlen = uscmd->uscsi_rqlen - uscmd->uscsi_rqresid;
11939 		rqlen = min(((int)incmd->uscsi_rqlen), rqlen);
11940 
11941 		/* Update the Request Sense status and resid */
11942 		incmd->uscsi_rqresid  = incmd->uscsi_rqlen - rqlen;
11943 		incmd->uscsi_rqstatus = uscmd->uscsi_rqstatus;
11944 
11945 		SD_INFO(SD_LOG_IO, un, "sd_send_scsi_cmd: "
11946 		    "uscsi_rqstatus: 0x%02x  uscsi_rqresid:0x%x\n",
11947 		    incmd->uscsi_rqstatus, incmd->uscsi_rqresid);
11948 
11949 		/* Copy out the sense data for user processes */
11950 		if ((incmd->uscsi_rqbuf != NULL) && (rqlen != 0)) {
11951 			int flags =
11952 			    (rqbufspace == UIO_USERSPACE) ? 0 : FKIOCTL;
11953 			if (ddi_copyout(uscmd->uscsi_rqbuf, incmd->uscsi_rqbuf,
11954 			    rqlen, flags) != 0) {
11955 				rval = EFAULT;
11956 			}
11957 			/*
11958 			 * Note: Can't touch incmd->uscsi_rqbuf so use
11959 			 * uscmd->uscsi_rqbuf instead. They're the same.
11960 			 */
11961 			SD_INFO(SD_LOG_IO, un, "sd_send_scsi_cmd: "
11962 			    "incmd->uscsi_rqbuf: 0x%p  rqlen:%d\n",
11963 			    incmd->uscsi_rqbuf, rqlen);
11964 			SD_DUMP_MEMORY(un, SD_LOG_IO, "rq",
11965 			    (uchar_t *)uscmd->uscsi_rqbuf, rqlen, SD_LOG_HEX);
11966 		}
11967 	}
11968 
11969 	/*
11970 	 * Free allocated resources and return; mapout the buf in case it was
11971 	 * mapped in by a lower layer.
11972 	 */
11973 	bp_mapout(bp);
11974 	freerbuf(bp);
11975 	kmem_free(uip, sizeof (struct sd_uscsi_info));
11976 	if (uscmd->uscsi_rqbuf != NULL) {
11977 		kmem_free(uscmd->uscsi_rqbuf, SENSE_LENGTH);
11978 	}
11979 	kmem_free(uscmd->uscsi_cdb, (size_t)uscmd->uscsi_cdblen);
11980 	kmem_free(uscmd, sizeof (struct uscsi_cmd));
11981 
11982 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_cmd: exit\n");
11983 
11984 	return (rval);
11985 }
11986 
11987 
11988 /*
11989  *    Function: sd_buf_iodone
11990  *
11991  * Description: Frees the sd_xbuf & returns the buf to its originator.
11992  *
11993  *     Context: May be called from interrupt context.
11994  */
11995 /* ARGSUSED */
11996 static void
11997 sd_buf_iodone(int index, struct sd_lun *un, struct buf *bp)
11998 {
11999 	struct sd_xbuf *xp;
12000 
12001 	ASSERT(un != NULL);
12002 	ASSERT(bp != NULL);
12003 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12004 
12005 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_buf_iodone: entry.\n");
12006 
12007 	xp = SD_GET_XBUF(bp);
12008 	ASSERT(xp != NULL);
12009 
12010 	mutex_enter(SD_MUTEX(un));
12011 
12012 	/*
12013 	 * Grab time when the cmd completed.
12014 	 * This is used for determining if the system has been
12015 	 * idle long enough to make it idle to the PM framework.
12016 	 * This is for lowering the overhead, and therefore improving
12017 	 * performance per I/O operation.
12018 	 */
12019 	un->un_pm_idle_time = ddi_get_time();
12020 
12021 	un->un_ncmds_in_driver--;
12022 	ASSERT(un->un_ncmds_in_driver >= 0);
12023 	SD_INFO(SD_LOG_IO, un, "sd_buf_iodone: un_ncmds_in_driver = %ld\n",
12024 	    un->un_ncmds_in_driver);
12025 
12026 	mutex_exit(SD_MUTEX(un));
12027 
12028 	ddi_xbuf_done(bp, un->un_xbuf_attr);	/* xbuf is gone after this */
12029 	biodone(bp);				/* bp is gone after this */
12030 
12031 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_buf_iodone: exit.\n");
12032 }
12033 
12034 
12035 /*
12036  *    Function: sd_uscsi_iodone
12037  *
12038  * Description: Frees the sd_xbuf & returns the buf to its originator.
12039  *
12040  *     Context: May be called from interrupt context.
12041  */
12042 /* ARGSUSED */
12043 static void
12044 sd_uscsi_iodone(int index, struct sd_lun *un, struct buf *bp)
12045 {
12046 	struct sd_xbuf *xp;
12047 
12048 	ASSERT(un != NULL);
12049 	ASSERT(bp != NULL);
12050 
12051 	xp = SD_GET_XBUF(bp);
12052 	ASSERT(xp != NULL);
12053 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12054 
12055 	SD_INFO(SD_LOG_IO, un, "sd_uscsi_iodone: entry.\n");
12056 
12057 	bp->b_private = xp->xb_private;
12058 
12059 	mutex_enter(SD_MUTEX(un));
12060 
12061 	/*
12062 	 * Grab time when the cmd completed.
12063 	 * This is used for determining if the system has been
12064 	 * idle long enough to make it idle to the PM framework.
12065 	 * This is for lowering the overhead, and therefore improving
12066 	 * performance per I/O operation.
12067 	 */
12068 	un->un_pm_idle_time = ddi_get_time();
12069 
12070 	un->un_ncmds_in_driver--;
12071 	ASSERT(un->un_ncmds_in_driver >= 0);
12072 	SD_INFO(SD_LOG_IO, un, "sd_uscsi_iodone: un_ncmds_in_driver = %ld\n",
12073 	    un->un_ncmds_in_driver);
12074 
12075 	mutex_exit(SD_MUTEX(un));
12076 
12077 	kmem_free(xp, sizeof (struct sd_xbuf));
12078 	biodone(bp);
12079 
12080 	SD_INFO(SD_LOG_IO, un, "sd_uscsi_iodone: exit.\n");
12081 }
12082 
12083 
12084 /*
12085  *    Function: sd_mapblockaddr_iostart
12086  *
12087  * Description: Verify request lies withing the partition limits for
12088  *		the indicated minor device.  Issue "overrun" buf if
12089  *		request would exceed partition range.  Converts
12090  *		partition-relative block address to absolute.
12091  *
12092  *     Context: Can sleep
12093  *
12094  *      Issues: This follows what the old code did, in terms of accessing
12095  *		some of the partition info in the unit struct without holding
12096  *		the mutext.  This is a general issue, if the partition info
12097  *		can be altered while IO is in progress... as soon as we send
12098  *		a buf, its partitioning can be invalid before it gets to the
12099  *		device.  Probably the right fix is to move partitioning out
12100  *		of the driver entirely.
12101  */
12102 
12103 static void
12104 sd_mapblockaddr_iostart(int index, struct sd_lun *un, struct buf *bp)
12105 {
12106 	daddr_t	nblocks;	/* #blocks in the given partition */
12107 	daddr_t	blocknum;	/* Block number specified by the buf */
12108 	size_t	requested_nblocks;
12109 	size_t	available_nblocks;
12110 	int	partition;
12111 	diskaddr_t	partition_offset;
12112 	struct sd_xbuf *xp;
12113 
12114 
12115 	ASSERT(un != NULL);
12116 	ASSERT(bp != NULL);
12117 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12118 
12119 	SD_TRACE(SD_LOG_IO_PARTITION, un,
12120 	    "sd_mapblockaddr_iostart: entry: buf:0x%p\n", bp);
12121 
12122 	xp = SD_GET_XBUF(bp);
12123 	ASSERT(xp != NULL);
12124 
12125 	/*
12126 	 * If the geometry is not indicated as valid, attempt to access
12127 	 * the unit & verify the geometry/label. This can be the case for
12128 	 * removable-media devices, of if the device was opened in
12129 	 * NDELAY/NONBLOCK mode.
12130 	 */
12131 	if ((un->un_f_geometry_is_valid != TRUE) &&
12132 	    (sd_ready_and_valid(un) != SD_READY_VALID)) {
12133 		/*
12134 		 * For removable devices it is possible to start an I/O
12135 		 * without a media by opening the device in nodelay mode.
12136 		 * Also for writable CDs there can be many scenarios where
12137 		 * there is no geometry yet but volume manager is trying to
12138 		 * issue a read() just because it can see TOC on the CD. So
12139 		 * do not print a message for removables.
12140 		 */
12141 		if (!un->un_f_has_removable_media) {
12142 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
12143 			    "i/o to invalid geometry\n");
12144 		}
12145 		bioerror(bp, EIO);
12146 		bp->b_resid = bp->b_bcount;
12147 		SD_BEGIN_IODONE(index, un, bp);
12148 		return;
12149 	}
12150 
12151 	partition = SDPART(bp->b_edev);
12152 
12153 	/* #blocks in partition */
12154 	nblocks = un->un_map[partition].dkl_nblk;    /* #blocks in partition */
12155 
12156 	/* Use of a local variable potentially improves performance slightly */
12157 	partition_offset = un->un_offset[partition];
12158 
12159 	/*
12160 	 * blocknum is the starting block number of the request. At this
12161 	 * point it is still relative to the start of the minor device.
12162 	 */
12163 	blocknum = xp->xb_blkno;
12164 
12165 	/*
12166 	 * Legacy: If the starting block number is one past the last block
12167 	 * in the partition, do not set B_ERROR in the buf.
12168 	 */
12169 	if (blocknum == nblocks)  {
12170 		goto error_exit;
12171 	}
12172 
12173 	/*
12174 	 * Confirm that the first block of the request lies within the
12175 	 * partition limits. Also the requested number of bytes must be
12176 	 * a multiple of the system block size.
12177 	 */
12178 	if ((blocknum < 0) || (blocknum >= nblocks) ||
12179 	    ((bp->b_bcount & (un->un_sys_blocksize - 1)) != 0)) {
12180 		bp->b_flags |= B_ERROR;
12181 		goto error_exit;
12182 	}
12183 
12184 	/*
12185 	 * If the requsted # blocks exceeds the available # blocks, that
12186 	 * is an overrun of the partition.
12187 	 */
12188 	requested_nblocks = SD_BYTES2SYSBLOCKS(un, bp->b_bcount);
12189 	available_nblocks = (size_t)(nblocks - blocknum);
12190 	ASSERT(nblocks >= blocknum);
12191 
12192 	if (requested_nblocks > available_nblocks) {
12193 		/*
12194 		 * Allocate an "overrun" buf to allow the request to proceed
12195 		 * for the amount of space available in the partition. The
12196 		 * amount not transferred will be added into the b_resid
12197 		 * when the operation is complete. The overrun buf
12198 		 * replaces the original buf here, and the original buf
12199 		 * is saved inside the overrun buf, for later use.
12200 		 */
12201 		size_t resid = SD_SYSBLOCKS2BYTES(un,
12202 		    (offset_t)(requested_nblocks - available_nblocks));
12203 		size_t count = bp->b_bcount - resid;
12204 		/*
12205 		 * Note: count is an unsigned entity thus it'll NEVER
12206 		 * be less than 0 so ASSERT the original values are
12207 		 * correct.
12208 		 */
12209 		ASSERT(bp->b_bcount >= resid);
12210 
12211 		bp = sd_bioclone_alloc(bp, count, blocknum,
12212 			(int (*)(struct buf *)) sd_mapblockaddr_iodone);
12213 		xp = SD_GET_XBUF(bp); /* Update for 'new' bp! */
12214 		ASSERT(xp != NULL);
12215 	}
12216 
12217 	/* At this point there should be no residual for this buf. */
12218 	ASSERT(bp->b_resid == 0);
12219 
12220 	/* Convert the block number to an absolute address. */
12221 	xp->xb_blkno += partition_offset;
12222 
12223 	SD_NEXT_IOSTART(index, un, bp);
12224 
12225 	SD_TRACE(SD_LOG_IO_PARTITION, un,
12226 	    "sd_mapblockaddr_iostart: exit 0: buf:0x%p\n", bp);
12227 
12228 	return;
12229 
12230 error_exit:
12231 	bp->b_resid = bp->b_bcount;
12232 	SD_BEGIN_IODONE(index, un, bp);
12233 	SD_TRACE(SD_LOG_IO_PARTITION, un,
12234 	    "sd_mapblockaddr_iostart: exit 1: buf:0x%p\n", bp);
12235 }
12236 
12237 
12238 /*
12239  *    Function: sd_mapblockaddr_iodone
12240  *
12241  * Description: Completion-side processing for partition management.
12242  *
12243  *     Context: May be called under interrupt context
12244  */
12245 
12246 static void
12247 sd_mapblockaddr_iodone(int index, struct sd_lun *un, struct buf *bp)
12248 {
12249 	/* int	partition; */	/* Not used, see below. */
12250 	ASSERT(un != NULL);
12251 	ASSERT(bp != NULL);
12252 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12253 
12254 	SD_TRACE(SD_LOG_IO_PARTITION, un,
12255 	    "sd_mapblockaddr_iodone: entry: buf:0x%p\n", bp);
12256 
12257 	if (bp->b_iodone == (int (*)(struct buf *)) sd_mapblockaddr_iodone) {
12258 		/*
12259 		 * We have an "overrun" buf to deal with...
12260 		 */
12261 		struct sd_xbuf	*xp;
12262 		struct buf	*obp;	/* ptr to the original buf */
12263 
12264 		xp = SD_GET_XBUF(bp);
12265 		ASSERT(xp != NULL);
12266 
12267 		/* Retrieve the pointer to the original buf */
12268 		obp = (struct buf *)xp->xb_private;
12269 		ASSERT(obp != NULL);
12270 
12271 		obp->b_resid = obp->b_bcount - (bp->b_bcount - bp->b_resid);
12272 		bioerror(obp, bp->b_error);
12273 
12274 		sd_bioclone_free(bp);
12275 
12276 		/*
12277 		 * Get back the original buf.
12278 		 * Note that since the restoration of xb_blkno below
12279 		 * was removed, the sd_xbuf is not needed.
12280 		 */
12281 		bp = obp;
12282 		/*
12283 		 * xp = SD_GET_XBUF(bp);
12284 		 * ASSERT(xp != NULL);
12285 		 */
12286 	}
12287 
12288 	/*
12289 	 * Convert sd->xb_blkno back to a minor-device relative value.
12290 	 * Note: this has been commented out, as it is not needed in the
12291 	 * current implementation of the driver (ie, since this function
12292 	 * is at the top of the layering chains, so the info will be
12293 	 * discarded) and it is in the "hot" IO path.
12294 	 *
12295 	 * partition = getminor(bp->b_edev) & SDPART_MASK;
12296 	 * xp->xb_blkno -= un->un_offset[partition];
12297 	 */
12298 
12299 	SD_NEXT_IODONE(index, un, bp);
12300 
12301 	SD_TRACE(SD_LOG_IO_PARTITION, un,
12302 	    "sd_mapblockaddr_iodone: exit: buf:0x%p\n", bp);
12303 }
12304 
12305 
12306 /*
12307  *    Function: sd_mapblocksize_iostart
12308  *
12309  * Description: Convert between system block size (un->un_sys_blocksize)
12310  *		and target block size (un->un_tgt_blocksize).
12311  *
12312  *     Context: Can sleep to allocate resources.
12313  *
12314  * Assumptions: A higher layer has already performed any partition validation,
12315  *		and converted the xp->xb_blkno to an absolute value relative
12316  *		to the start of the device.
12317  *
12318  *		It is also assumed that the higher layer has implemented
12319  *		an "overrun" mechanism for the case where the request would
12320  *		read/write beyond the end of a partition.  In this case we
12321  *		assume (and ASSERT) that bp->b_resid == 0.
12322  *
12323  *		Note: The implementation for this routine assumes the target
12324  *		block size remains constant between allocation and transport.
12325  */
12326 
12327 static void
12328 sd_mapblocksize_iostart(int index, struct sd_lun *un, struct buf *bp)
12329 {
12330 	struct sd_mapblocksize_info	*bsp;
12331 	struct sd_xbuf			*xp;
12332 	offset_t first_byte;
12333 	daddr_t	start_block, end_block;
12334 	daddr_t	request_bytes;
12335 	ushort_t is_aligned = FALSE;
12336 
12337 	ASSERT(un != NULL);
12338 	ASSERT(bp != NULL);
12339 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12340 	ASSERT(bp->b_resid == 0);
12341 
12342 	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
12343 	    "sd_mapblocksize_iostart: entry: buf:0x%p\n", bp);
12344 
12345 	/*
12346 	 * For a non-writable CD, a write request is an error
12347 	 */
12348 	if (ISCD(un) && ((bp->b_flags & B_READ) == 0) &&
12349 	    (un->un_f_mmc_writable_media == FALSE)) {
12350 		bioerror(bp, EIO);
12351 		bp->b_resid = bp->b_bcount;
12352 		SD_BEGIN_IODONE(index, un, bp);
12353 		return;
12354 	}
12355 
12356 	/*
12357 	 * We do not need a shadow buf if the device is using
12358 	 * un->un_sys_blocksize as its block size or if bcount == 0.
12359 	 * In this case there is no layer-private data block allocated.
12360 	 */
12361 	if ((un->un_tgt_blocksize == un->un_sys_blocksize) ||
12362 	    (bp->b_bcount == 0)) {
12363 		goto done;
12364 	}
12365 
12366 #if defined(__i386) || defined(__amd64)
12367 	/* We do not support non-block-aligned transfers for ROD devices */
12368 	ASSERT(!ISROD(un));
12369 #endif
12370 
12371 	xp = SD_GET_XBUF(bp);
12372 	ASSERT(xp != NULL);
12373 
12374 	SD_INFO(SD_LOG_IO_RMMEDIA, un, "sd_mapblocksize_iostart: "
12375 	    "tgt_blocksize:0x%x sys_blocksize: 0x%x\n",
12376 	    un->un_tgt_blocksize, un->un_sys_blocksize);
12377 	SD_INFO(SD_LOG_IO_RMMEDIA, un, "sd_mapblocksize_iostart: "
12378 	    "request start block:0x%x\n", xp->xb_blkno);
12379 	SD_INFO(SD_LOG_IO_RMMEDIA, un, "sd_mapblocksize_iostart: "
12380 	    "request len:0x%x\n", bp->b_bcount);
12381 
12382 	/*
12383 	 * Allocate the layer-private data area for the mapblocksize layer.
12384 	 * Layers are allowed to use the xp_private member of the sd_xbuf
12385 	 * struct to store the pointer to their layer-private data block, but
12386 	 * each layer also has the responsibility of restoring the prior
12387 	 * contents of xb_private before returning the buf/xbuf to the
12388 	 * higher layer that sent it.
12389 	 *
12390 	 * Here we save the prior contents of xp->xb_private into the
12391 	 * bsp->mbs_oprivate field of our layer-private data area. This value
12392 	 * is restored by sd_mapblocksize_iodone() just prior to freeing up
12393 	 * the layer-private area and returning the buf/xbuf to the layer
12394 	 * that sent it.
12395 	 *
12396 	 * Note that here we use kmem_zalloc for the allocation as there are
12397 	 * parts of the mapblocksize code that expect certain fields to be
12398 	 * zero unless explicitly set to a required value.
12399 	 */
12400 	bsp = kmem_zalloc(sizeof (struct sd_mapblocksize_info), KM_SLEEP);
12401 	bsp->mbs_oprivate = xp->xb_private;
12402 	xp->xb_private = bsp;
12403 
12404 	/*
12405 	 * This treats the data on the disk (target) as an array of bytes.
12406 	 * first_byte is the byte offset, from the beginning of the device,
12407 	 * to the location of the request. This is converted from a
12408 	 * un->un_sys_blocksize block address to a byte offset, and then back
12409 	 * to a block address based upon a un->un_tgt_blocksize block size.
12410 	 *
12411 	 * xp->xb_blkno should be absolute upon entry into this function,
12412 	 * but, but it is based upon partitions that use the "system"
12413 	 * block size. It must be adjusted to reflect the block size of
12414 	 * the target.
12415 	 *
12416 	 * Note that end_block is actually the block that follows the last
12417 	 * block of the request, but that's what is needed for the computation.
12418 	 */
12419 	first_byte  = SD_SYSBLOCKS2BYTES(un, (offset_t)xp->xb_blkno);
12420 	start_block = xp->xb_blkno = first_byte / un->un_tgt_blocksize;
12421 	end_block   = (first_byte + bp->b_bcount + un->un_tgt_blocksize - 1) /
12422 	    un->un_tgt_blocksize;
12423 
12424 	/* request_bytes is rounded up to a multiple of the target block size */
12425 	request_bytes = (end_block - start_block) * un->un_tgt_blocksize;
12426 
12427 	/*
12428 	 * See if the starting address of the request and the request
12429 	 * length are aligned on a un->un_tgt_blocksize boundary. If aligned
12430 	 * then we do not need to allocate a shadow buf to handle the request.
12431 	 */
12432 	if (((first_byte   % un->un_tgt_blocksize) == 0) &&
12433 	    ((bp->b_bcount % un->un_tgt_blocksize) == 0)) {
12434 		is_aligned = TRUE;
12435 	}
12436 
12437 	if ((bp->b_flags & B_READ) == 0) {
12438 		/*
12439 		 * Lock the range for a write operation. An aligned request is
12440 		 * considered a simple write; otherwise the request must be a
12441 		 * read-modify-write.
12442 		 */
12443 		bsp->mbs_wmp = sd_range_lock(un, start_block, end_block - 1,
12444 		    (is_aligned == TRUE) ? SD_WTYPE_SIMPLE : SD_WTYPE_RMW);
12445 	}
12446 
12447 	/*
12448 	 * Alloc a shadow buf if the request is not aligned. Also, this is
12449 	 * where the READ command is generated for a read-modify-write. (The
12450 	 * write phase is deferred until after the read completes.)
12451 	 */
12452 	if (is_aligned == FALSE) {
12453 
12454 		struct sd_mapblocksize_info	*shadow_bsp;
12455 		struct sd_xbuf	*shadow_xp;
12456 		struct buf	*shadow_bp;
12457 
12458 		/*
12459 		 * Allocate the shadow buf and it associated xbuf. Note that
12460 		 * after this call the xb_blkno value in both the original
12461 		 * buf's sd_xbuf _and_ the shadow buf's sd_xbuf will be the
12462 		 * same: absolute relative to the start of the device, and
12463 		 * adjusted for the target block size. The b_blkno in the
12464 		 * shadow buf will also be set to this value. We should never
12465 		 * change b_blkno in the original bp however.
12466 		 *
12467 		 * Note also that the shadow buf will always need to be a
12468 		 * READ command, regardless of whether the incoming command
12469 		 * is a READ or a WRITE.
12470 		 */
12471 		shadow_bp = sd_shadow_buf_alloc(bp, request_bytes, B_READ,
12472 		    xp->xb_blkno,
12473 		    (int (*)(struct buf *)) sd_mapblocksize_iodone);
12474 
12475 		shadow_xp = SD_GET_XBUF(shadow_bp);
12476 
12477 		/*
12478 		 * Allocate the layer-private data for the shadow buf.
12479 		 * (No need to preserve xb_private in the shadow xbuf.)
12480 		 */
12481 		shadow_xp->xb_private = shadow_bsp =
12482 		    kmem_zalloc(sizeof (struct sd_mapblocksize_info), KM_SLEEP);
12483 
12484 		/*
12485 		 * bsp->mbs_copy_offset is used later by sd_mapblocksize_iodone
12486 		 * to figure out where the start of the user data is (based upon
12487 		 * the system block size) in the data returned by the READ
12488 		 * command (which will be based upon the target blocksize). Note
12489 		 * that this is only really used if the request is unaligned.
12490 		 */
12491 		bsp->mbs_copy_offset = (ssize_t)(first_byte -
12492 		    ((offset_t)xp->xb_blkno * un->un_tgt_blocksize));
12493 		ASSERT((bsp->mbs_copy_offset >= 0) &&
12494 		    (bsp->mbs_copy_offset < un->un_tgt_blocksize));
12495 
12496 		shadow_bsp->mbs_copy_offset = bsp->mbs_copy_offset;
12497 
12498 		shadow_bsp->mbs_layer_index = bsp->mbs_layer_index = index;
12499 
12500 		/* Transfer the wmap (if any) to the shadow buf */
12501 		shadow_bsp->mbs_wmp = bsp->mbs_wmp;
12502 		bsp->mbs_wmp = NULL;
12503 
12504 		/*
12505 		 * The shadow buf goes on from here in place of the
12506 		 * original buf.
12507 		 */
12508 		shadow_bsp->mbs_orig_bp = bp;
12509 		bp = shadow_bp;
12510 	}
12511 
12512 	SD_INFO(SD_LOG_IO_RMMEDIA, un,
12513 	    "sd_mapblocksize_iostart: tgt start block:0x%x\n", xp->xb_blkno);
12514 	SD_INFO(SD_LOG_IO_RMMEDIA, un,
12515 	    "sd_mapblocksize_iostart: tgt request len:0x%x\n",
12516 	    request_bytes);
12517 	SD_INFO(SD_LOG_IO_RMMEDIA, un,
12518 	    "sd_mapblocksize_iostart: shadow buf:0x%x\n", bp);
12519 
12520 done:
12521 	SD_NEXT_IOSTART(index, un, bp);
12522 
12523 	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
12524 	    "sd_mapblocksize_iostart: exit: buf:0x%p\n", bp);
12525 }
12526 
12527 
12528 /*
12529  *    Function: sd_mapblocksize_iodone
12530  *
12531  * Description: Completion side processing for block-size mapping.
12532  *
12533  *     Context: May be called under interrupt context
12534  */
12535 
12536 static void
12537 sd_mapblocksize_iodone(int index, struct sd_lun *un, struct buf *bp)
12538 {
12539 	struct sd_mapblocksize_info	*bsp;
12540 	struct sd_xbuf	*xp;
12541 	struct sd_xbuf	*orig_xp;	/* sd_xbuf for the original buf */
12542 	struct buf	*orig_bp;	/* ptr to the original buf */
12543 	offset_t	shadow_end;
12544 	offset_t	request_end;
12545 	offset_t	shadow_start;
12546 	ssize_t		copy_offset;
12547 	size_t		copy_length;
12548 	size_t		shortfall;
12549 	uint_t		is_write;	/* TRUE if this bp is a WRITE */
12550 	uint_t		has_wmap;	/* TRUE is this bp has a wmap */
12551 
12552 	ASSERT(un != NULL);
12553 	ASSERT(bp != NULL);
12554 
12555 	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
12556 	    "sd_mapblocksize_iodone: entry: buf:0x%p\n", bp);
12557 
12558 	/*
12559 	 * There is no shadow buf or layer-private data if the target is
12560 	 * using un->un_sys_blocksize as its block size or if bcount == 0.
12561 	 */
12562 	if ((un->un_tgt_blocksize == un->un_sys_blocksize) ||
12563 	    (bp->b_bcount == 0)) {
12564 		goto exit;
12565 	}
12566 
12567 	xp = SD_GET_XBUF(bp);
12568 	ASSERT(xp != NULL);
12569 
12570 	/* Retrieve the pointer to the layer-private data area from the xbuf. */
12571 	bsp = xp->xb_private;
12572 
12573 	is_write = ((bp->b_flags & B_READ) == 0) ? TRUE : FALSE;
12574 	has_wmap = (bsp->mbs_wmp != NULL) ? TRUE : FALSE;
12575 
12576 	if (is_write) {
12577 		/*
12578 		 * For a WRITE request we must free up the block range that
12579 		 * we have locked up.  This holds regardless of whether this is
12580 		 * an aligned write request or a read-modify-write request.
12581 		 */
12582 		sd_range_unlock(un, bsp->mbs_wmp);
12583 		bsp->mbs_wmp = NULL;
12584 	}
12585 
12586 	if ((bp->b_iodone != (int(*)(struct buf *))sd_mapblocksize_iodone)) {
12587 		/*
12588 		 * An aligned read or write command will have no shadow buf;
12589 		 * there is not much else to do with it.
12590 		 */
12591 		goto done;
12592 	}
12593 
12594 	orig_bp = bsp->mbs_orig_bp;
12595 	ASSERT(orig_bp != NULL);
12596 	orig_xp = SD_GET_XBUF(orig_bp);
12597 	ASSERT(orig_xp != NULL);
12598 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12599 
12600 	if (!is_write && has_wmap) {
12601 		/*
12602 		 * A READ with a wmap means this is the READ phase of a
12603 		 * read-modify-write. If an error occurred on the READ then
12604 		 * we do not proceed with the WRITE phase or copy any data.
12605 		 * Just release the write maps and return with an error.
12606 		 */
12607 		if ((bp->b_resid != 0) || (bp->b_error != 0)) {
12608 			orig_bp->b_resid = orig_bp->b_bcount;
12609 			bioerror(orig_bp, bp->b_error);
12610 			sd_range_unlock(un, bsp->mbs_wmp);
12611 			goto freebuf_done;
12612 		}
12613 	}
12614 
12615 	/*
12616 	 * Here is where we set up to copy the data from the shadow buf
12617 	 * into the space associated with the original buf.
12618 	 *
12619 	 * To deal with the conversion between block sizes, these
12620 	 * computations treat the data as an array of bytes, with the
12621 	 * first byte (byte 0) corresponding to the first byte in the
12622 	 * first block on the disk.
12623 	 */
12624 
12625 	/*
12626 	 * shadow_start and shadow_len indicate the location and size of
12627 	 * the data returned with the shadow IO request.
12628 	 */
12629 	shadow_start  = SD_TGTBLOCKS2BYTES(un, (offset_t)xp->xb_blkno);
12630 	shadow_end    = shadow_start + bp->b_bcount - bp->b_resid;
12631 
12632 	/*
12633 	 * copy_offset gives the offset (in bytes) from the start of the first
12634 	 * block of the READ request to the beginning of the data.  We retrieve
12635 	 * this value from xb_pktp in the ORIGINAL xbuf, as it has been saved
12636 	 * there by sd_mapblockize_iostart(). copy_length gives the amount of
12637 	 * data to be copied (in bytes).
12638 	 */
12639 	copy_offset  = bsp->mbs_copy_offset;
12640 	ASSERT((copy_offset >= 0) && (copy_offset < un->un_tgt_blocksize));
12641 	copy_length  = orig_bp->b_bcount;
12642 	request_end  = shadow_start + copy_offset + orig_bp->b_bcount;
12643 
12644 	/*
12645 	 * Set up the resid and error fields of orig_bp as appropriate.
12646 	 */
12647 	if (shadow_end >= request_end) {
12648 		/* We got all the requested data; set resid to zero */
12649 		orig_bp->b_resid = 0;
12650 	} else {
12651 		/*
12652 		 * We failed to get enough data to fully satisfy the original
12653 		 * request. Just copy back whatever data we got and set
12654 		 * up the residual and error code as required.
12655 		 *
12656 		 * 'shortfall' is the amount by which the data received with the
12657 		 * shadow buf has "fallen short" of the requested amount.
12658 		 */
12659 		shortfall = (size_t)(request_end - shadow_end);
12660 
12661 		if (shortfall > orig_bp->b_bcount) {
12662 			/*
12663 			 * We did not get enough data to even partially
12664 			 * fulfill the original request.  The residual is
12665 			 * equal to the amount requested.
12666 			 */
12667 			orig_bp->b_resid = orig_bp->b_bcount;
12668 		} else {
12669 			/*
12670 			 * We did not get all the data that we requested
12671 			 * from the device, but we will try to return what
12672 			 * portion we did get.
12673 			 */
12674 			orig_bp->b_resid = shortfall;
12675 		}
12676 		ASSERT(copy_length >= orig_bp->b_resid);
12677 		copy_length  -= orig_bp->b_resid;
12678 	}
12679 
12680 	/* Propagate the error code from the shadow buf to the original buf */
12681 	bioerror(orig_bp, bp->b_error);
12682 
12683 	if (is_write) {
12684 		goto freebuf_done;	/* No data copying for a WRITE */
12685 	}
12686 
12687 	if (has_wmap) {
12688 		/*
12689 		 * This is a READ command from the READ phase of a
12690 		 * read-modify-write request. We have to copy the data given
12691 		 * by the user OVER the data returned by the READ command,
12692 		 * then convert the command from a READ to a WRITE and send
12693 		 * it back to the target.
12694 		 */
12695 		bcopy(orig_bp->b_un.b_addr, bp->b_un.b_addr + copy_offset,
12696 		    copy_length);
12697 
12698 		bp->b_flags &= ~((int)B_READ);	/* Convert to a WRITE */
12699 
12700 		/*
12701 		 * Dispatch the WRITE command to the taskq thread, which
12702 		 * will in turn send the command to the target. When the
12703 		 * WRITE command completes, we (sd_mapblocksize_iodone())
12704 		 * will get called again as part of the iodone chain
12705 		 * processing for it. Note that we will still be dealing
12706 		 * with the shadow buf at that point.
12707 		 */
12708 		if (taskq_dispatch(sd_wmr_tq, sd_read_modify_write_task, bp,
12709 		    KM_NOSLEEP) != 0) {
12710 			/*
12711 			 * Dispatch was successful so we are done. Return
12712 			 * without going any higher up the iodone chain. Do
12713 			 * not free up any layer-private data until after the
12714 			 * WRITE completes.
12715 			 */
12716 			return;
12717 		}
12718 
12719 		/*
12720 		 * Dispatch of the WRITE command failed; set up the error
12721 		 * condition and send this IO back up the iodone chain.
12722 		 */
12723 		bioerror(orig_bp, EIO);
12724 		orig_bp->b_resid = orig_bp->b_bcount;
12725 
12726 	} else {
12727 		/*
12728 		 * This is a regular READ request (ie, not a RMW). Copy the
12729 		 * data from the shadow buf into the original buf. The
12730 		 * copy_offset compensates for any "misalignment" between the
12731 		 * shadow buf (with its un->un_tgt_blocksize blocks) and the
12732 		 * original buf (with its un->un_sys_blocksize blocks).
12733 		 */
12734 		bcopy(bp->b_un.b_addr + copy_offset, orig_bp->b_un.b_addr,
12735 		    copy_length);
12736 	}
12737 
12738 freebuf_done:
12739 
12740 	/*
12741 	 * At this point we still have both the shadow buf AND the original
12742 	 * buf to deal with, as well as the layer-private data area in each.
12743 	 * Local variables are as follows:
12744 	 *
12745 	 * bp -- points to shadow buf
12746 	 * xp -- points to xbuf of shadow buf
12747 	 * bsp -- points to layer-private data area of shadow buf
12748 	 * orig_bp -- points to original buf
12749 	 *
12750 	 * First free the shadow buf and its associated xbuf, then free the
12751 	 * layer-private data area from the shadow buf. There is no need to
12752 	 * restore xb_private in the shadow xbuf.
12753 	 */
12754 	sd_shadow_buf_free(bp);
12755 	kmem_free(bsp, sizeof (struct sd_mapblocksize_info));
12756 
12757 	/*
12758 	 * Now update the local variables to point to the original buf, xbuf,
12759 	 * and layer-private area.
12760 	 */
12761 	bp = orig_bp;
12762 	xp = SD_GET_XBUF(bp);
12763 	ASSERT(xp != NULL);
12764 	ASSERT(xp == orig_xp);
12765 	bsp = xp->xb_private;
12766 	ASSERT(bsp != NULL);
12767 
12768 done:
12769 	/*
12770 	 * Restore xb_private to whatever it was set to by the next higher
12771 	 * layer in the chain, then free the layer-private data area.
12772 	 */
12773 	xp->xb_private = bsp->mbs_oprivate;
12774 	kmem_free(bsp, sizeof (struct sd_mapblocksize_info));
12775 
12776 exit:
12777 	SD_TRACE(SD_LOG_IO_RMMEDIA, SD_GET_UN(bp),
12778 	    "sd_mapblocksize_iodone: calling SD_NEXT_IODONE: buf:0x%p\n", bp);
12779 
12780 	SD_NEXT_IODONE(index, un, bp);
12781 }
12782 
12783 
12784 /*
12785  *    Function: sd_checksum_iostart
12786  *
12787  * Description: A stub function for a layer that's currently not used.
12788  *		For now just a placeholder.
12789  *
12790  *     Context: Kernel thread context
12791  */
12792 
12793 static void
12794 sd_checksum_iostart(int index, struct sd_lun *un, struct buf *bp)
12795 {
12796 	ASSERT(un != NULL);
12797 	ASSERT(bp != NULL);
12798 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12799 	SD_NEXT_IOSTART(index, un, bp);
12800 }
12801 
12802 
12803 /*
12804  *    Function: sd_checksum_iodone
12805  *
12806  * Description: A stub function for a layer that's currently not used.
12807  *		For now just a placeholder.
12808  *
12809  *     Context: May be called under interrupt context
12810  */
12811 
12812 static void
12813 sd_checksum_iodone(int index, struct sd_lun *un, struct buf *bp)
12814 {
12815 	ASSERT(un != NULL);
12816 	ASSERT(bp != NULL);
12817 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12818 	SD_NEXT_IODONE(index, un, bp);
12819 }
12820 
12821 
12822 /*
12823  *    Function: sd_checksum_uscsi_iostart
12824  *
12825  * Description: A stub function for a layer that's currently not used.
12826  *		For now just a placeholder.
12827  *
12828  *     Context: Kernel thread context
12829  */
12830 
12831 static void
12832 sd_checksum_uscsi_iostart(int index, struct sd_lun *un, struct buf *bp)
12833 {
12834 	ASSERT(un != NULL);
12835 	ASSERT(bp != NULL);
12836 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12837 	SD_NEXT_IOSTART(index, un, bp);
12838 }
12839 
12840 
12841 /*
12842  *    Function: sd_checksum_uscsi_iodone
12843  *
12844  * Description: A stub function for a layer that's currently not used.
12845  *		For now just a placeholder.
12846  *
12847  *     Context: May be called under interrupt context
12848  */
12849 
12850 static void
12851 sd_checksum_uscsi_iodone(int index, struct sd_lun *un, struct buf *bp)
12852 {
12853 	ASSERT(un != NULL);
12854 	ASSERT(bp != NULL);
12855 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12856 	SD_NEXT_IODONE(index, un, bp);
12857 }
12858 
12859 
12860 /*
12861  *    Function: sd_pm_iostart
12862  *
12863  * Description: iostart-side routine for Power mangement.
12864  *
12865  *     Context: Kernel thread context
12866  */
12867 
12868 static void
12869 sd_pm_iostart(int index, struct sd_lun *un, struct buf *bp)
12870 {
12871 	ASSERT(un != NULL);
12872 	ASSERT(bp != NULL);
12873 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12874 	ASSERT(!mutex_owned(&un->un_pm_mutex));
12875 
12876 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iostart: entry\n");
12877 
12878 	if (sd_pm_entry(un) != DDI_SUCCESS) {
12879 		/*
12880 		 * Set up to return the failed buf back up the 'iodone'
12881 		 * side of the calling chain.
12882 		 */
12883 		bioerror(bp, EIO);
12884 		bp->b_resid = bp->b_bcount;
12885 
12886 		SD_BEGIN_IODONE(index, un, bp);
12887 
12888 		SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iostart: exit\n");
12889 		return;
12890 	}
12891 
12892 	SD_NEXT_IOSTART(index, un, bp);
12893 
12894 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iostart: exit\n");
12895 }
12896 
12897 
12898 /*
12899  *    Function: sd_pm_iodone
12900  *
12901  * Description: iodone-side routine for power mangement.
12902  *
12903  *     Context: may be called from interrupt context
12904  */
12905 
12906 static void
12907 sd_pm_iodone(int index, struct sd_lun *un, struct buf *bp)
12908 {
12909 	ASSERT(un != NULL);
12910 	ASSERT(bp != NULL);
12911 	ASSERT(!mutex_owned(&un->un_pm_mutex));
12912 
12913 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iodone: entry\n");
12914 
12915 	/*
12916 	 * After attach the following flag is only read, so don't
12917 	 * take the penalty of acquiring a mutex for it.
12918 	 */
12919 	if (un->un_f_pm_is_enabled == TRUE) {
12920 		sd_pm_exit(un);
12921 	}
12922 
12923 	SD_NEXT_IODONE(index, un, bp);
12924 
12925 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iodone: exit\n");
12926 }
12927 
12928 
12929 /*
12930  *    Function: sd_core_iostart
12931  *
12932  * Description: Primary driver function for enqueuing buf(9S) structs from
12933  *		the system and initiating IO to the target device
12934  *
12935  *     Context: Kernel thread context. Can sleep.
12936  *
12937  * Assumptions:  - The given xp->xb_blkno is absolute
12938  *		   (ie, relative to the start of the device).
12939  *		 - The IO is to be done using the native blocksize of
12940  *		   the device, as specified in un->un_tgt_blocksize.
12941  */
12942 /* ARGSUSED */
12943 static void
12944 sd_core_iostart(int index, struct sd_lun *un, struct buf *bp)
12945 {
12946 	struct sd_xbuf *xp;
12947 
12948 	ASSERT(un != NULL);
12949 	ASSERT(bp != NULL);
12950 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12951 	ASSERT(bp->b_resid == 0);
12952 
12953 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_core_iostart: entry: bp:0x%p\n", bp);
12954 
12955 	xp = SD_GET_XBUF(bp);
12956 	ASSERT(xp != NULL);
12957 
12958 	mutex_enter(SD_MUTEX(un));
12959 
12960 	/*
12961 	 * If we are currently in the failfast state, fail any new IO
12962 	 * that has B_FAILFAST set, then return.
12963 	 */
12964 	if ((bp->b_flags & B_FAILFAST) &&
12965 	    (un->un_failfast_state == SD_FAILFAST_ACTIVE)) {
12966 		mutex_exit(SD_MUTEX(un));
12967 		bioerror(bp, EIO);
12968 		bp->b_resid = bp->b_bcount;
12969 		SD_BEGIN_IODONE(index, un, bp);
12970 		return;
12971 	}
12972 
12973 	if (SD_IS_DIRECT_PRIORITY(xp)) {
12974 		/*
12975 		 * Priority command -- transport it immediately.
12976 		 *
12977 		 * Note: We may want to assert that USCSI_DIAGNOSE is set,
12978 		 * because all direct priority commands should be associated
12979 		 * with error recovery actions which we don't want to retry.
12980 		 */
12981 		sd_start_cmds(un, bp);
12982 	} else {
12983 		/*
12984 		 * Normal command -- add it to the wait queue, then start
12985 		 * transporting commands from the wait queue.
12986 		 */
12987 		sd_add_buf_to_waitq(un, bp);
12988 		SD_UPDATE_KSTATS(un, kstat_waitq_enter, bp);
12989 		sd_start_cmds(un, NULL);
12990 	}
12991 
12992 	mutex_exit(SD_MUTEX(un));
12993 
12994 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_core_iostart: exit: bp:0x%p\n", bp);
12995 }
12996 
12997 
12998 /*
12999  *    Function: sd_init_cdb_limits
13000  *
13001  * Description: This is to handle scsi_pkt initialization differences
13002  *		between the driver platforms.
13003  *
13004  *		Legacy behaviors:
13005  *
13006  *		If the block number or the sector count exceeds the
13007  *		capabilities of a Group 0 command, shift over to a
13008  *		Group 1 command. We don't blindly use Group 1
13009  *		commands because a) some drives (CDC Wren IVs) get a
13010  *		bit confused, and b) there is probably a fair amount
13011  *		of speed difference for a target to receive and decode
13012  *		a 10 byte command instead of a 6 byte command.
13013  *
13014  *		The xfer time difference of 6 vs 10 byte CDBs is
13015  *		still significant so this code is still worthwhile.
13016  *		10 byte CDBs are very inefficient with the fas HBA driver
13017  *		and older disks. Each CDB byte took 1 usec with some
13018  *		popular disks.
13019  *
13020  *     Context: Must be called at attach time
13021  */
13022 
13023 static void
13024 sd_init_cdb_limits(struct sd_lun *un)
13025 {
13026 	int hba_cdb_limit;
13027 
13028 	/*
13029 	 * Use CDB_GROUP1 commands for most devices except for
13030 	 * parallel SCSI fixed drives in which case we get better
13031 	 * performance using CDB_GROUP0 commands (where applicable).
13032 	 */
13033 	un->un_mincdb = SD_CDB_GROUP1;
13034 #if !defined(__fibre)
13035 	if (!un->un_f_is_fibre && !un->un_f_cfg_is_atapi && !ISROD(un) &&
13036 	    !un->un_f_has_removable_media) {
13037 		un->un_mincdb = SD_CDB_GROUP0;
13038 	}
13039 #endif
13040 
13041 	/*
13042 	 * Try to read the max-cdb-length supported by HBA.
13043 	 */
13044 	un->un_max_hba_cdb = scsi_ifgetcap(SD_ADDRESS(un), "max-cdb-length", 1);
13045 	if (0 >= un->un_max_hba_cdb) {
13046 		un->un_max_hba_cdb = CDB_GROUP4;
13047 		hba_cdb_limit = SD_CDB_GROUP4;
13048 	} else if (0 < un->un_max_hba_cdb &&
13049 	    un->un_max_hba_cdb < CDB_GROUP1) {
13050 		hba_cdb_limit = SD_CDB_GROUP0;
13051 	} else if (CDB_GROUP1 <= un->un_max_hba_cdb &&
13052 	    un->un_max_hba_cdb < CDB_GROUP5) {
13053 		hba_cdb_limit = SD_CDB_GROUP1;
13054 	} else if (CDB_GROUP5 <= un->un_max_hba_cdb &&
13055 	    un->un_max_hba_cdb < CDB_GROUP4) {
13056 		hba_cdb_limit = SD_CDB_GROUP5;
13057 	} else {
13058 		hba_cdb_limit = SD_CDB_GROUP4;
13059 	}
13060 
13061 	/*
13062 	 * Use CDB_GROUP5 commands for removable devices.  Use CDB_GROUP4
13063 	 * commands for fixed disks unless we are building for a 32 bit
13064 	 * kernel.
13065 	 */
13066 #ifdef _LP64
13067 	un->un_maxcdb = (un->un_f_has_removable_media) ? SD_CDB_GROUP5 :
13068 	    min(hba_cdb_limit, SD_CDB_GROUP4);
13069 #else
13070 	un->un_maxcdb = (un->un_f_has_removable_media) ? SD_CDB_GROUP5 :
13071 	    min(hba_cdb_limit, SD_CDB_GROUP1);
13072 #endif
13073 
13074 	/*
13075 	 * x86 systems require the PKT_DMA_PARTIAL flag
13076 	 */
13077 #if defined(__x86)
13078 	un->un_pkt_flags = PKT_DMA_PARTIAL;
13079 #else
13080 	un->un_pkt_flags = 0;
13081 #endif
13082 
13083 	un->un_status_len = (int)((un->un_f_arq_enabled == TRUE)
13084 	    ? sizeof (struct scsi_arq_status) : 1);
13085 	un->un_cmd_timeout = (ushort_t)sd_io_time;
13086 	un->un_uscsi_timeout = ((ISCD(un)) ? 2 : 1) * un->un_cmd_timeout;
13087 }
13088 
13089 
13090 /*
13091  *    Function: sd_initpkt_for_buf
13092  *
13093  * Description: Allocate and initialize for transport a scsi_pkt struct,
13094  *		based upon the info specified in the given buf struct.
13095  *
13096  *		Assumes the xb_blkno in the request is absolute (ie,
13097  *		relative to the start of the device (NOT partition!).
13098  *		Also assumes that the request is using the native block
13099  *		size of the device (as returned by the READ CAPACITY
13100  *		command).
13101  *
13102  * Return Code: SD_PKT_ALLOC_SUCCESS
13103  *		SD_PKT_ALLOC_FAILURE
13104  *		SD_PKT_ALLOC_FAILURE_NO_DMA
13105  *		SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL
13106  *
13107  *     Context: Kernel thread and may be called from software interrupt context
13108  *		as part of a sdrunout callback. This function may not block or
13109  *		call routines that block
13110  */
13111 
13112 static int
13113 sd_initpkt_for_buf(struct buf *bp, struct scsi_pkt **pktpp)
13114 {
13115 	struct sd_xbuf	*xp;
13116 	struct scsi_pkt *pktp = NULL;
13117 	struct sd_lun	*un;
13118 	size_t		blockcount;
13119 	daddr_t		startblock;
13120 	int		rval;
13121 	int		cmd_flags;
13122 
13123 	ASSERT(bp != NULL);
13124 	ASSERT(pktpp != NULL);
13125 	xp = SD_GET_XBUF(bp);
13126 	ASSERT(xp != NULL);
13127 	un = SD_GET_UN(bp);
13128 	ASSERT(un != NULL);
13129 	ASSERT(mutex_owned(SD_MUTEX(un)));
13130 	ASSERT(bp->b_resid == 0);
13131 
13132 	SD_TRACE(SD_LOG_IO_CORE, un,
13133 	    "sd_initpkt_for_buf: entry: buf:0x%p\n", bp);
13134 
13135 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
13136 	if (xp->xb_pkt_flags & SD_XB_DMA_FREED) {
13137 		/*
13138 		 * Already have a scsi_pkt -- just need DMA resources.
13139 		 * We must recompute the CDB in case the mapping returns
13140 		 * a nonzero pkt_resid.
13141 		 * Note: if this is a portion of a PKT_DMA_PARTIAL transfer
13142 		 * that is being retried, the unmap/remap of the DMA resouces
13143 		 * will result in the entire transfer starting over again
13144 		 * from the very first block.
13145 		 */
13146 		ASSERT(xp->xb_pktp != NULL);
13147 		pktp = xp->xb_pktp;
13148 	} else {
13149 		pktp = NULL;
13150 	}
13151 #endif /* __i386 || __amd64 */
13152 
13153 	startblock = xp->xb_blkno;	/* Absolute block num. */
13154 	blockcount = SD_BYTES2TGTBLOCKS(un, bp->b_bcount);
13155 
13156 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
13157 
13158 	cmd_flags = un->un_pkt_flags | (xp->xb_pkt_flags & SD_XB_INITPKT_MASK);
13159 
13160 #else
13161 
13162 	cmd_flags = un->un_pkt_flags | xp->xb_pkt_flags;
13163 
13164 #endif
13165 
13166 	/*
13167 	 * sd_setup_rw_pkt will determine the appropriate CDB group to use,
13168 	 * call scsi_init_pkt, and build the CDB.
13169 	 */
13170 	rval = sd_setup_rw_pkt(un, &pktp, bp,
13171 	    cmd_flags, sdrunout, (caddr_t)un,
13172 	    startblock, blockcount);
13173 
13174 	if (rval == 0) {
13175 		/*
13176 		 * Success.
13177 		 *
13178 		 * If partial DMA is being used and required for this transfer.
13179 		 * set it up here.
13180 		 */
13181 		if ((un->un_pkt_flags & PKT_DMA_PARTIAL) != 0 &&
13182 		    (pktp->pkt_resid != 0)) {
13183 
13184 			/*
13185 			 * Save the CDB length and pkt_resid for the
13186 			 * next xfer
13187 			 */
13188 			xp->xb_dma_resid = pktp->pkt_resid;
13189 
13190 			/* rezero resid */
13191 			pktp->pkt_resid = 0;
13192 
13193 		} else {
13194 			xp->xb_dma_resid = 0;
13195 		}
13196 
13197 		pktp->pkt_flags = un->un_tagflags;
13198 		pktp->pkt_time  = un->un_cmd_timeout;
13199 		pktp->pkt_comp  = sdintr;
13200 
13201 		pktp->pkt_private = bp;
13202 		*pktpp = pktp;
13203 
13204 		SD_TRACE(SD_LOG_IO_CORE, un,
13205 		    "sd_initpkt_for_buf: exit: buf:0x%p\n", bp);
13206 
13207 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
13208 		xp->xb_pkt_flags &= ~SD_XB_DMA_FREED;
13209 #endif
13210 
13211 		return (SD_PKT_ALLOC_SUCCESS);
13212 
13213 	}
13214 
13215 	/*
13216 	 * SD_PKT_ALLOC_FAILURE is the only expected failure code
13217 	 * from sd_setup_rw_pkt.
13218 	 */
13219 	ASSERT(rval == SD_PKT_ALLOC_FAILURE);
13220 
13221 	if (rval == SD_PKT_ALLOC_FAILURE) {
13222 		*pktpp = NULL;
13223 		/*
13224 		 * Set the driver state to RWAIT to indicate the driver
13225 		 * is waiting on resource allocations. The driver will not
13226 		 * suspend, pm_suspend, or detatch while the state is RWAIT.
13227 		 */
13228 		New_state(un, SD_STATE_RWAIT);
13229 
13230 		SD_ERROR(SD_LOG_IO_CORE, un,
13231 		    "sd_initpkt_for_buf: No pktp. exit bp:0x%p\n", bp);
13232 
13233 		if ((bp->b_flags & B_ERROR) != 0) {
13234 			return (SD_PKT_ALLOC_FAILURE_NO_DMA);
13235 		}
13236 		return (SD_PKT_ALLOC_FAILURE);
13237 	} else {
13238 		/*
13239 		 * PKT_ALLOC_FAILURE_CDB_TOO_SMALL
13240 		 *
13241 		 * This should never happen.  Maybe someone messed with the
13242 		 * kernel's minphys?
13243 		 */
13244 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
13245 		    "Request rejected: too large for CDB: "
13246 		    "lba:0x%08lx  len:0x%08lx\n", startblock, blockcount);
13247 		SD_ERROR(SD_LOG_IO_CORE, un,
13248 		    "sd_initpkt_for_buf: No cp. exit bp:0x%p\n", bp);
13249 		return (SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL);
13250 
13251 	}
13252 }
13253 
13254 
13255 /*
13256  *    Function: sd_destroypkt_for_buf
13257  *
13258  * Description: Free the scsi_pkt(9S) for the given bp (buf IO processing).
13259  *
13260  *     Context: Kernel thread or interrupt context
13261  */
13262 
13263 static void
13264 sd_destroypkt_for_buf(struct buf *bp)
13265 {
13266 	ASSERT(bp != NULL);
13267 	ASSERT(SD_GET_UN(bp) != NULL);
13268 
13269 	SD_TRACE(SD_LOG_IO_CORE, SD_GET_UN(bp),
13270 	    "sd_destroypkt_for_buf: entry: buf:0x%p\n", bp);
13271 
13272 	ASSERT(SD_GET_PKTP(bp) != NULL);
13273 	scsi_destroy_pkt(SD_GET_PKTP(bp));
13274 
13275 	SD_TRACE(SD_LOG_IO_CORE, SD_GET_UN(bp),
13276 	    "sd_destroypkt_for_buf: exit: buf:0x%p\n", bp);
13277 }
13278 
13279 /*
13280  *    Function: sd_setup_rw_pkt
13281  *
13282  * Description: Determines appropriate CDB group for the requested LBA
13283  *		and transfer length, calls scsi_init_pkt, and builds
13284  *		the CDB.  Do not use for partial DMA transfers except
13285  *		for the initial transfer since the CDB size must
13286  *		remain constant.
13287  *
13288  *     Context: Kernel thread and may be called from software interrupt
13289  *		context as part of a sdrunout callback. This function may not
13290  *		block or call routines that block
13291  */
13292 
13293 
13294 int
13295 sd_setup_rw_pkt(struct sd_lun *un,
13296     struct scsi_pkt **pktpp, struct buf *bp, int flags,
13297     int (*callback)(caddr_t), caddr_t callback_arg,
13298     diskaddr_t lba, uint32_t blockcount)
13299 {
13300 	struct scsi_pkt *return_pktp;
13301 	union scsi_cdb *cdbp;
13302 	struct sd_cdbinfo *cp = NULL;
13303 	int i;
13304 
13305 	/*
13306 	 * See which size CDB to use, based upon the request.
13307 	 */
13308 	for (i = un->un_mincdb; i <= un->un_maxcdb; i++) {
13309 
13310 		/*
13311 		 * Check lba and block count against sd_cdbtab limits.
13312 		 * In the partial DMA case, we have to use the same size
13313 		 * CDB for all the transfers.  Check lba + blockcount
13314 		 * against the max LBA so we know that segment of the
13315 		 * transfer can use the CDB we select.
13316 		 */
13317 		if ((lba + blockcount - 1 <= sd_cdbtab[i].sc_maxlba) &&
13318 		    (blockcount <= sd_cdbtab[i].sc_maxlen)) {
13319 
13320 			/*
13321 			 * The command will fit into the CDB type
13322 			 * specified by sd_cdbtab[i].
13323 			 */
13324 			cp = sd_cdbtab + i;
13325 
13326 			/*
13327 			 * Call scsi_init_pkt so we can fill in the
13328 			 * CDB.
13329 			 */
13330 			return_pktp = scsi_init_pkt(SD_ADDRESS(un), *pktpp,
13331 			    bp, cp->sc_grpcode, un->un_status_len, 0,
13332 			    flags, callback, callback_arg);
13333 
13334 			if (return_pktp != NULL) {
13335 
13336 				/*
13337 				 * Return new value of pkt
13338 				 */
13339 				*pktpp = return_pktp;
13340 
13341 				/*
13342 				 * To be safe, zero the CDB insuring there is
13343 				 * no leftover data from a previous command.
13344 				 */
13345 				bzero(return_pktp->pkt_cdbp, cp->sc_grpcode);
13346 
13347 				/*
13348 				 * Handle partial DMA mapping
13349 				 */
13350 				if (return_pktp->pkt_resid != 0) {
13351 
13352 					/*
13353 					 * Not going to xfer as many blocks as
13354 					 * originally expected
13355 					 */
13356 					blockcount -=
13357 					    SD_BYTES2TGTBLOCKS(un,
13358 						return_pktp->pkt_resid);
13359 				}
13360 
13361 				cdbp = (union scsi_cdb *)return_pktp->pkt_cdbp;
13362 
13363 				/*
13364 				 * Set command byte based on the CDB
13365 				 * type we matched.
13366 				 */
13367 				cdbp->scc_cmd = cp->sc_grpmask |
13368 				    ((bp->b_flags & B_READ) ?
13369 					SCMD_READ : SCMD_WRITE);
13370 
13371 				SD_FILL_SCSI1_LUN(un, return_pktp);
13372 
13373 				/*
13374 				 * Fill in LBA and length
13375 				 */
13376 				ASSERT((cp->sc_grpcode == CDB_GROUP1) ||
13377 				    (cp->sc_grpcode == CDB_GROUP4) ||
13378 				    (cp->sc_grpcode == CDB_GROUP0) ||
13379 				    (cp->sc_grpcode == CDB_GROUP5));
13380 
13381 				if (cp->sc_grpcode == CDB_GROUP1) {
13382 					FORMG1ADDR(cdbp, lba);
13383 					FORMG1COUNT(cdbp, blockcount);
13384 					return (0);
13385 				} else if (cp->sc_grpcode == CDB_GROUP4) {
13386 					FORMG4LONGADDR(cdbp, lba);
13387 					FORMG4COUNT(cdbp, blockcount);
13388 					return (0);
13389 				} else if (cp->sc_grpcode == CDB_GROUP0) {
13390 					FORMG0ADDR(cdbp, lba);
13391 					FORMG0COUNT(cdbp, blockcount);
13392 					return (0);
13393 				} else if (cp->sc_grpcode == CDB_GROUP5) {
13394 					FORMG5ADDR(cdbp, lba);
13395 					FORMG5COUNT(cdbp, blockcount);
13396 					return (0);
13397 				}
13398 
13399 				/*
13400 				 * It should be impossible to not match one
13401 				 * of the CDB types above, so we should never
13402 				 * reach this point.  Set the CDB command byte
13403 				 * to test-unit-ready to avoid writing
13404 				 * to somewhere we don't intend.
13405 				 */
13406 				cdbp->scc_cmd = SCMD_TEST_UNIT_READY;
13407 				return (SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL);
13408 			} else {
13409 				/*
13410 				 * Couldn't get scsi_pkt
13411 				 */
13412 				return (SD_PKT_ALLOC_FAILURE);
13413 			}
13414 		}
13415 	}
13416 
13417 	/*
13418 	 * None of the available CDB types were suitable.  This really
13419 	 * should never happen:  on a 64 bit system we support
13420 	 * READ16/WRITE16 which will hold an entire 64 bit disk address
13421 	 * and on a 32 bit system we will refuse to bind to a device
13422 	 * larger than 2TB so addresses will never be larger than 32 bits.
13423 	 */
13424 	return (SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL);
13425 }
13426 
13427 #if defined(__i386) || defined(__amd64)
13428 /*
13429  *    Function: sd_setup_next_rw_pkt
13430  *
13431  * Description: Setup packet for partial DMA transfers, except for the
13432  * 		initial transfer.  sd_setup_rw_pkt should be used for
13433  *		the initial transfer.
13434  *
13435  *     Context: Kernel thread and may be called from interrupt context.
13436  */
13437 
13438 int
13439 sd_setup_next_rw_pkt(struct sd_lun *un,
13440     struct scsi_pkt *pktp, struct buf *bp,
13441     diskaddr_t lba, uint32_t blockcount)
13442 {
13443 	uchar_t com;
13444 	union scsi_cdb *cdbp;
13445 	uchar_t cdb_group_id;
13446 
13447 	ASSERT(pktp != NULL);
13448 	ASSERT(pktp->pkt_cdbp != NULL);
13449 
13450 	cdbp = (union scsi_cdb *)pktp->pkt_cdbp;
13451 	com = cdbp->scc_cmd;
13452 	cdb_group_id = CDB_GROUPID(com);
13453 
13454 	ASSERT((cdb_group_id == CDB_GROUPID_0) ||
13455 	    (cdb_group_id == CDB_GROUPID_1) ||
13456 	    (cdb_group_id == CDB_GROUPID_4) ||
13457 	    (cdb_group_id == CDB_GROUPID_5));
13458 
13459 	/*
13460 	 * Move pkt to the next portion of the xfer.
13461 	 * func is NULL_FUNC so we do not have to release
13462 	 * the disk mutex here.
13463 	 */
13464 	if (scsi_init_pkt(SD_ADDRESS(un), pktp, bp, 0, 0, 0, 0,
13465 	    NULL_FUNC, NULL) == pktp) {
13466 		/* Success.  Handle partial DMA */
13467 		if (pktp->pkt_resid != 0) {
13468 			blockcount -=
13469 			    SD_BYTES2TGTBLOCKS(un, pktp->pkt_resid);
13470 		}
13471 
13472 		cdbp->scc_cmd = com;
13473 		SD_FILL_SCSI1_LUN(un, pktp);
13474 		if (cdb_group_id == CDB_GROUPID_1) {
13475 			FORMG1ADDR(cdbp, lba);
13476 			FORMG1COUNT(cdbp, blockcount);
13477 			return (0);
13478 		} else if (cdb_group_id == CDB_GROUPID_4) {
13479 			FORMG4LONGADDR(cdbp, lba);
13480 			FORMG4COUNT(cdbp, blockcount);
13481 			return (0);
13482 		} else if (cdb_group_id == CDB_GROUPID_0) {
13483 			FORMG0ADDR(cdbp, lba);
13484 			FORMG0COUNT(cdbp, blockcount);
13485 			return (0);
13486 		} else if (cdb_group_id == CDB_GROUPID_5) {
13487 			FORMG5ADDR(cdbp, lba);
13488 			FORMG5COUNT(cdbp, blockcount);
13489 			return (0);
13490 		}
13491 
13492 		/* Unreachable */
13493 		return (SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL);
13494 	}
13495 
13496 	/*
13497 	 * Error setting up next portion of cmd transfer.
13498 	 * Something is definitely very wrong and this
13499 	 * should not happen.
13500 	 */
13501 	return (SD_PKT_ALLOC_FAILURE);
13502 }
13503 #endif /* defined(__i386) || defined(__amd64) */
13504 
13505 /*
13506  *    Function: sd_initpkt_for_uscsi
13507  *
13508  * Description: Allocate and initialize for transport a scsi_pkt struct,
13509  *		based upon the info specified in the given uscsi_cmd struct.
13510  *
13511  * Return Code: SD_PKT_ALLOC_SUCCESS
13512  *		SD_PKT_ALLOC_FAILURE
13513  *		SD_PKT_ALLOC_FAILURE_NO_DMA
13514  *		SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL
13515  *
13516  *     Context: Kernel thread and may be called from software interrupt context
13517  *		as part of a sdrunout callback. This function may not block or
13518  *		call routines that block
13519  */
13520 
13521 static int
13522 sd_initpkt_for_uscsi(struct buf *bp, struct scsi_pkt **pktpp)
13523 {
13524 	struct uscsi_cmd *uscmd;
13525 	struct sd_xbuf	*xp;
13526 	struct scsi_pkt	*pktp;
13527 	struct sd_lun	*un;
13528 	uint32_t	flags = 0;
13529 
13530 	ASSERT(bp != NULL);
13531 	ASSERT(pktpp != NULL);
13532 	xp = SD_GET_XBUF(bp);
13533 	ASSERT(xp != NULL);
13534 	un = SD_GET_UN(bp);
13535 	ASSERT(un != NULL);
13536 	ASSERT(mutex_owned(SD_MUTEX(un)));
13537 
13538 	/* The pointer to the uscsi_cmd struct is expected in xb_pktinfo */
13539 	uscmd = (struct uscsi_cmd *)xp->xb_pktinfo;
13540 	ASSERT(uscmd != NULL);
13541 
13542 	SD_TRACE(SD_LOG_IO_CORE, un,
13543 	    "sd_initpkt_for_uscsi: entry: buf:0x%p\n", bp);
13544 
13545 	/*
13546 	 * Allocate the scsi_pkt for the command.
13547 	 * Note: If PKT_DMA_PARTIAL flag is set, scsi_vhci binds a path
13548 	 *	 during scsi_init_pkt time and will continue to use the
13549 	 *	 same path as long as the same scsi_pkt is used without
13550 	 *	 intervening scsi_dma_free(). Since uscsi command does
13551 	 *	 not call scsi_dmafree() before retry failed command, it
13552 	 *	 is necessary to make sure PKT_DMA_PARTIAL flag is NOT
13553 	 *	 set such that scsi_vhci can use other available path for
13554 	 *	 retry. Besides, ucsci command does not allow DMA breakup,
13555 	 *	 so there is no need to set PKT_DMA_PARTIAL flag.
13556 	 */
13557 	pktp = scsi_init_pkt(SD_ADDRESS(un), NULL,
13558 	    ((bp->b_bcount != 0) ? bp : NULL), uscmd->uscsi_cdblen,
13559 	    sizeof (struct scsi_arq_status), 0,
13560 	    (un->un_pkt_flags & ~PKT_DMA_PARTIAL),
13561 	    sdrunout, (caddr_t)un);
13562 
13563 	if (pktp == NULL) {
13564 		*pktpp = NULL;
13565 		/*
13566 		 * Set the driver state to RWAIT to indicate the driver
13567 		 * is waiting on resource allocations. The driver will not
13568 		 * suspend, pm_suspend, or detatch while the state is RWAIT.
13569 		 */
13570 		New_state(un, SD_STATE_RWAIT);
13571 
13572 		SD_ERROR(SD_LOG_IO_CORE, un,
13573 		    "sd_initpkt_for_uscsi: No pktp. exit bp:0x%p\n", bp);
13574 
13575 		if ((bp->b_flags & B_ERROR) != 0) {
13576 			return (SD_PKT_ALLOC_FAILURE_NO_DMA);
13577 		}
13578 		return (SD_PKT_ALLOC_FAILURE);
13579 	}
13580 
13581 	/*
13582 	 * We do not do DMA breakup for USCSI commands, so return failure
13583 	 * here if all the needed DMA resources were not allocated.
13584 	 */
13585 	if ((un->un_pkt_flags & PKT_DMA_PARTIAL) &&
13586 	    (bp->b_bcount != 0) && (pktp->pkt_resid != 0)) {
13587 		scsi_destroy_pkt(pktp);
13588 		SD_ERROR(SD_LOG_IO_CORE, un, "sd_initpkt_for_uscsi: "
13589 		    "No partial DMA for USCSI. exit: buf:0x%p\n", bp);
13590 		return (SD_PKT_ALLOC_FAILURE_PKT_TOO_SMALL);
13591 	}
13592 
13593 	/* Init the cdb from the given uscsi struct */
13594 	(void) scsi_setup_cdb((union scsi_cdb *)pktp->pkt_cdbp,
13595 	    uscmd->uscsi_cdb[0], 0, 0, 0);
13596 
13597 	SD_FILL_SCSI1_LUN(un, pktp);
13598 
13599 	/*
13600 	 * Set up the optional USCSI flags. See the uscsi (7I) man page
13601 	 * for listing of the supported flags.
13602 	 */
13603 
13604 	if (uscmd->uscsi_flags & USCSI_SILENT) {
13605 		flags |= FLAG_SILENT;
13606 	}
13607 
13608 	if (uscmd->uscsi_flags & USCSI_DIAGNOSE) {
13609 		flags |= FLAG_DIAGNOSE;
13610 	}
13611 
13612 	if (uscmd->uscsi_flags & USCSI_ISOLATE) {
13613 		flags |= FLAG_ISOLATE;
13614 	}
13615 
13616 	if (un->un_f_is_fibre == FALSE) {
13617 		if (uscmd->uscsi_flags & USCSI_RENEGOT) {
13618 			flags |= FLAG_RENEGOTIATE_WIDE_SYNC;
13619 		}
13620 	}
13621 
13622 	/*
13623 	 * Set the pkt flags here so we save time later.
13624 	 * Note: These flags are NOT in the uscsi man page!!!
13625 	 */
13626 	if (uscmd->uscsi_flags & USCSI_HEAD) {
13627 		flags |= FLAG_HEAD;
13628 	}
13629 
13630 	if (uscmd->uscsi_flags & USCSI_NOINTR) {
13631 		flags |= FLAG_NOINTR;
13632 	}
13633 
13634 	/*
13635 	 * For tagged queueing, things get a bit complicated.
13636 	 * Check first for head of queue and last for ordered queue.
13637 	 * If neither head nor order, use the default driver tag flags.
13638 	 */
13639 	if ((uscmd->uscsi_flags & USCSI_NOTAG) == 0) {
13640 		if (uscmd->uscsi_flags & USCSI_HTAG) {
13641 			flags |= FLAG_HTAG;
13642 		} else if (uscmd->uscsi_flags & USCSI_OTAG) {
13643 			flags |= FLAG_OTAG;
13644 		} else {
13645 			flags |= un->un_tagflags & FLAG_TAGMASK;
13646 		}
13647 	}
13648 
13649 	if (uscmd->uscsi_flags & USCSI_NODISCON) {
13650 		flags = (flags & ~FLAG_TAGMASK) | FLAG_NODISCON;
13651 	}
13652 
13653 	pktp->pkt_flags = flags;
13654 
13655 	/* Copy the caller's CDB into the pkt... */
13656 	bcopy(uscmd->uscsi_cdb, pktp->pkt_cdbp, uscmd->uscsi_cdblen);
13657 
13658 	if (uscmd->uscsi_timeout == 0) {
13659 		pktp->pkt_time = un->un_uscsi_timeout;
13660 	} else {
13661 		pktp->pkt_time = uscmd->uscsi_timeout;
13662 	}
13663 
13664 	/* need it later to identify USCSI request in sdintr */
13665 	xp->xb_pkt_flags |= SD_XB_USCSICMD;
13666 
13667 	xp->xb_sense_resid = uscmd->uscsi_rqresid;
13668 
13669 	pktp->pkt_private = bp;
13670 	pktp->pkt_comp = sdintr;
13671 	*pktpp = pktp;
13672 
13673 	SD_TRACE(SD_LOG_IO_CORE, un,
13674 	    "sd_initpkt_for_uscsi: exit: buf:0x%p\n", bp);
13675 
13676 	return (SD_PKT_ALLOC_SUCCESS);
13677 }
13678 
13679 
13680 /*
13681  *    Function: sd_destroypkt_for_uscsi
13682  *
13683  * Description: Free the scsi_pkt(9S) struct for the given bp, for uscsi
13684  *		IOs.. Also saves relevant info into the associated uscsi_cmd
13685  *		struct.
13686  *
13687  *     Context: May be called under interrupt context
13688  */
13689 
13690 static void
13691 sd_destroypkt_for_uscsi(struct buf *bp)
13692 {
13693 	struct uscsi_cmd *uscmd;
13694 	struct sd_xbuf	*xp;
13695 	struct scsi_pkt	*pktp;
13696 	struct sd_lun	*un;
13697 
13698 	ASSERT(bp != NULL);
13699 	xp = SD_GET_XBUF(bp);
13700 	ASSERT(xp != NULL);
13701 	un = SD_GET_UN(bp);
13702 	ASSERT(un != NULL);
13703 	ASSERT(!mutex_owned(SD_MUTEX(un)));
13704 	pktp = SD_GET_PKTP(bp);
13705 	ASSERT(pktp != NULL);
13706 
13707 	SD_TRACE(SD_LOG_IO_CORE, un,
13708 	    "sd_destroypkt_for_uscsi: entry: buf:0x%p\n", bp);
13709 
13710 	/* The pointer to the uscsi_cmd struct is expected in xb_pktinfo */
13711 	uscmd = (struct uscsi_cmd *)xp->xb_pktinfo;
13712 	ASSERT(uscmd != NULL);
13713 
13714 	/* Save the status and the residual into the uscsi_cmd struct */
13715 	uscmd->uscsi_status = ((*(pktp)->pkt_scbp) & STATUS_MASK);
13716 	uscmd->uscsi_resid  = bp->b_resid;
13717 
13718 	/*
13719 	 * If enabled, copy any saved sense data into the area specified
13720 	 * by the uscsi command.
13721 	 */
13722 	if (((uscmd->uscsi_flags & USCSI_RQENABLE) != 0) &&
13723 	    (uscmd->uscsi_rqlen != 0) && (uscmd->uscsi_rqbuf != NULL)) {
13724 		/*
13725 		 * Note: uscmd->uscsi_rqbuf should always point to a buffer
13726 		 * at least SENSE_LENGTH bytes in size (see sd_send_scsi_cmd())
13727 		 */
13728 		uscmd->uscsi_rqstatus = xp->xb_sense_status;
13729 		uscmd->uscsi_rqresid  = xp->xb_sense_resid;
13730 		bcopy(xp->xb_sense_data, uscmd->uscsi_rqbuf, SENSE_LENGTH);
13731 	}
13732 
13733 	/* We are done with the scsi_pkt; free it now */
13734 	ASSERT(SD_GET_PKTP(bp) != NULL);
13735 	scsi_destroy_pkt(SD_GET_PKTP(bp));
13736 
13737 	SD_TRACE(SD_LOG_IO_CORE, un,
13738 	    "sd_destroypkt_for_uscsi: exit: buf:0x%p\n", bp);
13739 }
13740 
13741 
13742 /*
13743  *    Function: sd_bioclone_alloc
13744  *
13745  * Description: Allocate a buf(9S) and init it as per the given buf
13746  *		and the various arguments.  The associated sd_xbuf
13747  *		struct is (nearly) duplicated.  The struct buf *bp
13748  *		argument is saved in new_xp->xb_private.
13749  *
13750  *   Arguments: bp - ptr the the buf(9S) to be "shadowed"
13751  *		datalen - size of data area for the shadow bp
13752  *		blkno - starting LBA
13753  *		func - function pointer for b_iodone in the shadow buf. (May
13754  *			be NULL if none.)
13755  *
13756  * Return Code: Pointer to allocates buf(9S) struct
13757  *
13758  *     Context: Can sleep.
13759  */
13760 
13761 static struct buf *
13762 sd_bioclone_alloc(struct buf *bp, size_t datalen,
13763 	daddr_t blkno, int (*func)(struct buf *))
13764 {
13765 	struct	sd_lun	*un;
13766 	struct	sd_xbuf	*xp;
13767 	struct	sd_xbuf	*new_xp;
13768 	struct	buf	*new_bp;
13769 
13770 	ASSERT(bp != NULL);
13771 	xp = SD_GET_XBUF(bp);
13772 	ASSERT(xp != NULL);
13773 	un = SD_GET_UN(bp);
13774 	ASSERT(un != NULL);
13775 	ASSERT(!mutex_owned(SD_MUTEX(un)));
13776 
13777 	new_bp = bioclone(bp, 0, datalen, SD_GET_DEV(un), blkno, func,
13778 	    NULL, KM_SLEEP);
13779 
13780 	new_bp->b_lblkno	= blkno;
13781 
13782 	/*
13783 	 * Allocate an xbuf for the shadow bp and copy the contents of the
13784 	 * original xbuf into it.
13785 	 */
13786 	new_xp = kmem_alloc(sizeof (struct sd_xbuf), KM_SLEEP);
13787 	bcopy(xp, new_xp, sizeof (struct sd_xbuf));
13788 
13789 	/*
13790 	 * The given bp is automatically saved in the xb_private member
13791 	 * of the new xbuf.  Callers are allowed to depend on this.
13792 	 */
13793 	new_xp->xb_private = bp;
13794 
13795 	new_bp->b_private  = new_xp;
13796 
13797 	return (new_bp);
13798 }
13799 
13800 /*
13801  *    Function: sd_shadow_buf_alloc
13802  *
13803  * Description: Allocate a buf(9S) and init it as per the given buf
13804  *		and the various arguments.  The associated sd_xbuf
13805  *		struct is (nearly) duplicated.  The struct buf *bp
13806  *		argument is saved in new_xp->xb_private.
13807  *
13808  *   Arguments: bp - ptr the the buf(9S) to be "shadowed"
13809  *		datalen - size of data area for the shadow bp
13810  *		bflags - B_READ or B_WRITE (pseudo flag)
13811  *		blkno - starting LBA
13812  *		func - function pointer for b_iodone in the shadow buf. (May
13813  *			be NULL if none.)
13814  *
13815  * Return Code: Pointer to allocates buf(9S) struct
13816  *
13817  *     Context: Can sleep.
13818  */
13819 
13820 static struct buf *
13821 sd_shadow_buf_alloc(struct buf *bp, size_t datalen, uint_t bflags,
13822 	daddr_t blkno, int (*func)(struct buf *))
13823 {
13824 	struct	sd_lun	*un;
13825 	struct	sd_xbuf	*xp;
13826 	struct	sd_xbuf	*new_xp;
13827 	struct	buf	*new_bp;
13828 
13829 	ASSERT(bp != NULL);
13830 	xp = SD_GET_XBUF(bp);
13831 	ASSERT(xp != NULL);
13832 	un = SD_GET_UN(bp);
13833 	ASSERT(un != NULL);
13834 	ASSERT(!mutex_owned(SD_MUTEX(un)));
13835 
13836 	if (bp->b_flags & (B_PAGEIO | B_PHYS)) {
13837 		bp_mapin(bp);
13838 	}
13839 
13840 	bflags &= (B_READ | B_WRITE);
13841 #if defined(__i386) || defined(__amd64)
13842 	new_bp = getrbuf(KM_SLEEP);
13843 	new_bp->b_un.b_addr = kmem_zalloc(datalen, KM_SLEEP);
13844 	new_bp->b_bcount = datalen;
13845 	new_bp->b_flags = bflags |
13846 	    (bp->b_flags & ~(B_PAGEIO | B_PHYS | B_REMAPPED | B_SHADOW));
13847 #else
13848 	new_bp = scsi_alloc_consistent_buf(SD_ADDRESS(un), NULL,
13849 	    datalen, bflags, SLEEP_FUNC, NULL);
13850 #endif
13851 	new_bp->av_forw	= NULL;
13852 	new_bp->av_back	= NULL;
13853 	new_bp->b_dev	= bp->b_dev;
13854 	new_bp->b_blkno	= blkno;
13855 	new_bp->b_iodone = func;
13856 	new_bp->b_edev	= bp->b_edev;
13857 	new_bp->b_resid	= 0;
13858 
13859 	/* We need to preserve the B_FAILFAST flag */
13860 	if (bp->b_flags & B_FAILFAST) {
13861 		new_bp->b_flags |= B_FAILFAST;
13862 	}
13863 
13864 	/*
13865 	 * Allocate an xbuf for the shadow bp and copy the contents of the
13866 	 * original xbuf into it.
13867 	 */
13868 	new_xp = kmem_alloc(sizeof (struct sd_xbuf), KM_SLEEP);
13869 	bcopy(xp, new_xp, sizeof (struct sd_xbuf));
13870 
13871 	/* Need later to copy data between the shadow buf & original buf! */
13872 	new_xp->xb_pkt_flags |= PKT_CONSISTENT;
13873 
13874 	/*
13875 	 * The given bp is automatically saved in the xb_private member
13876 	 * of the new xbuf.  Callers are allowed to depend on this.
13877 	 */
13878 	new_xp->xb_private = bp;
13879 
13880 	new_bp->b_private  = new_xp;
13881 
13882 	return (new_bp);
13883 }
13884 
13885 /*
13886  *    Function: sd_bioclone_free
13887  *
13888  * Description: Deallocate a buf(9S) that was used for 'shadow' IO operations
13889  *		in the larger than partition operation.
13890  *
13891  *     Context: May be called under interrupt context
13892  */
13893 
13894 static void
13895 sd_bioclone_free(struct buf *bp)
13896 {
13897 	struct sd_xbuf	*xp;
13898 
13899 	ASSERT(bp != NULL);
13900 	xp = SD_GET_XBUF(bp);
13901 	ASSERT(xp != NULL);
13902 
13903 	/*
13904 	 * Call bp_mapout() before freeing the buf,  in case a lower
13905 	 * layer or HBA  had done a bp_mapin().  we must do this here
13906 	 * as we are the "originator" of the shadow buf.
13907 	 */
13908 	bp_mapout(bp);
13909 
13910 	/*
13911 	 * Null out b_iodone before freeing the bp, to ensure that the driver
13912 	 * never gets confused by a stale value in this field. (Just a little
13913 	 * extra defensiveness here.)
13914 	 */
13915 	bp->b_iodone = NULL;
13916 
13917 	freerbuf(bp);
13918 
13919 	kmem_free(xp, sizeof (struct sd_xbuf));
13920 }
13921 
13922 /*
13923  *    Function: sd_shadow_buf_free
13924  *
13925  * Description: Deallocate a buf(9S) that was used for 'shadow' IO operations.
13926  *
13927  *     Context: May be called under interrupt context
13928  */
13929 
13930 static void
13931 sd_shadow_buf_free(struct buf *bp)
13932 {
13933 	struct sd_xbuf	*xp;
13934 
13935 	ASSERT(bp != NULL);
13936 	xp = SD_GET_XBUF(bp);
13937 	ASSERT(xp != NULL);
13938 
13939 #if defined(__sparc)
13940 	/*
13941 	 * Call bp_mapout() before freeing the buf,  in case a lower
13942 	 * layer or HBA  had done a bp_mapin().  we must do this here
13943 	 * as we are the "originator" of the shadow buf.
13944 	 */
13945 	bp_mapout(bp);
13946 #endif
13947 
13948 	/*
13949 	 * Null out b_iodone before freeing the bp, to ensure that the driver
13950 	 * never gets confused by a stale value in this field. (Just a little
13951 	 * extra defensiveness here.)
13952 	 */
13953 	bp->b_iodone = NULL;
13954 
13955 #if defined(__i386) || defined(__amd64)
13956 	kmem_free(bp->b_un.b_addr, bp->b_bcount);
13957 	freerbuf(bp);
13958 #else
13959 	scsi_free_consistent_buf(bp);
13960 #endif
13961 
13962 	kmem_free(xp, sizeof (struct sd_xbuf));
13963 }
13964 
13965 
13966 /*
13967  *    Function: sd_print_transport_rejected_message
13968  *
13969  * Description: This implements the ludicrously complex rules for printing
13970  *		a "transport rejected" message.  This is to address the
13971  *		specific problem of having a flood of this error message
13972  *		produced when a failover occurs.
13973  *
13974  *     Context: Any.
13975  */
13976 
13977 static void
13978 sd_print_transport_rejected_message(struct sd_lun *un, struct sd_xbuf *xp,
13979 	int code)
13980 {
13981 	ASSERT(un != NULL);
13982 	ASSERT(mutex_owned(SD_MUTEX(un)));
13983 	ASSERT(xp != NULL);
13984 
13985 	/*
13986 	 * Print the "transport rejected" message under the following
13987 	 * conditions:
13988 	 *
13989 	 * - Whenever the SD_LOGMASK_DIAG bit of sd_level_mask is set
13990 	 * - The error code from scsi_transport() is NOT a TRAN_FATAL_ERROR.
13991 	 * - If the error code IS a TRAN_FATAL_ERROR, then the message is
13992 	 *   printed the FIRST time a TRAN_FATAL_ERROR is returned from
13993 	 *   scsi_transport(9F) (which indicates that the target might have
13994 	 *   gone off-line).  This uses the un->un_tran_fatal_count
13995 	 *   count, which is incremented whenever a TRAN_FATAL_ERROR is
13996 	 *   received, and reset to zero whenver a TRAN_ACCEPT is returned
13997 	 *   from scsi_transport().
13998 	 *
13999 	 * The FLAG_SILENT in the scsi_pkt must be CLEARED in ALL of
14000 	 * the preceeding cases in order for the message to be printed.
14001 	 */
14002 	if ((xp->xb_pktp->pkt_flags & FLAG_SILENT) == 0) {
14003 		if ((sd_level_mask & SD_LOGMASK_DIAG) ||
14004 		    (code != TRAN_FATAL_ERROR) ||
14005 		    (un->un_tran_fatal_count == 1)) {
14006 			switch (code) {
14007 			case TRAN_BADPKT:
14008 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
14009 				    "transport rejected bad packet\n");
14010 				break;
14011 			case TRAN_FATAL_ERROR:
14012 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
14013 				    "transport rejected fatal error\n");
14014 				break;
14015 			default:
14016 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
14017 				    "transport rejected (%d)\n", code);
14018 				break;
14019 			}
14020 		}
14021 	}
14022 }
14023 
14024 
14025 /*
14026  *    Function: sd_add_buf_to_waitq
14027  *
14028  * Description: Add the given buf(9S) struct to the wait queue for the
14029  *		instance.  If sorting is enabled, then the buf is added
14030  *		to the queue via an elevator sort algorithm (a la
14031  *		disksort(9F)).  The SD_GET_BLKNO(bp) is used as the sort key.
14032  *		If sorting is not enabled, then the buf is just added
14033  *		to the end of the wait queue.
14034  *
14035  * Return Code: void
14036  *
14037  *     Context: Does not sleep/block, therefore technically can be called
14038  *		from any context.  However if sorting is enabled then the
14039  *		execution time is indeterminate, and may take long if
14040  *		the wait queue grows large.
14041  */
14042 
14043 static void
14044 sd_add_buf_to_waitq(struct sd_lun *un, struct buf *bp)
14045 {
14046 	struct buf *ap;
14047 
14048 	ASSERT(bp != NULL);
14049 	ASSERT(un != NULL);
14050 	ASSERT(mutex_owned(SD_MUTEX(un)));
14051 
14052 	/* If the queue is empty, add the buf as the only entry & return. */
14053 	if (un->un_waitq_headp == NULL) {
14054 		ASSERT(un->un_waitq_tailp == NULL);
14055 		un->un_waitq_headp = un->un_waitq_tailp = bp;
14056 		bp->av_forw = NULL;
14057 		return;
14058 	}
14059 
14060 	ASSERT(un->un_waitq_tailp != NULL);
14061 
14062 	/*
14063 	 * If sorting is disabled, just add the buf to the tail end of
14064 	 * the wait queue and return.
14065 	 */
14066 	if (un->un_f_disksort_disabled) {
14067 		un->un_waitq_tailp->av_forw = bp;
14068 		un->un_waitq_tailp = bp;
14069 		bp->av_forw = NULL;
14070 		return;
14071 	}
14072 
14073 	/*
14074 	 * Sort thru the list of requests currently on the wait queue
14075 	 * and add the new buf request at the appropriate position.
14076 	 *
14077 	 * The un->un_waitq_headp is an activity chain pointer on which
14078 	 * we keep two queues, sorted in ascending SD_GET_BLKNO() order. The
14079 	 * first queue holds those requests which are positioned after
14080 	 * the current SD_GET_BLKNO() (in the first request); the second holds
14081 	 * requests which came in after their SD_GET_BLKNO() number was passed.
14082 	 * Thus we implement a one way scan, retracting after reaching
14083 	 * the end of the drive to the first request on the second
14084 	 * queue, at which time it becomes the first queue.
14085 	 * A one-way scan is natural because of the way UNIX read-ahead
14086 	 * blocks are allocated.
14087 	 *
14088 	 * If we lie after the first request, then we must locate the
14089 	 * second request list and add ourselves to it.
14090 	 */
14091 	ap = un->un_waitq_headp;
14092 	if (SD_GET_BLKNO(bp) < SD_GET_BLKNO(ap)) {
14093 		while (ap->av_forw != NULL) {
14094 			/*
14095 			 * Look for an "inversion" in the (normally
14096 			 * ascending) block numbers. This indicates
14097 			 * the start of the second request list.
14098 			 */
14099 			if (SD_GET_BLKNO(ap->av_forw) < SD_GET_BLKNO(ap)) {
14100 				/*
14101 				 * Search the second request list for the
14102 				 * first request at a larger block number.
14103 				 * We go before that; however if there is
14104 				 * no such request, we go at the end.
14105 				 */
14106 				do {
14107 					if (SD_GET_BLKNO(bp) <
14108 					    SD_GET_BLKNO(ap->av_forw)) {
14109 						goto insert;
14110 					}
14111 					ap = ap->av_forw;
14112 				} while (ap->av_forw != NULL);
14113 				goto insert;		/* after last */
14114 			}
14115 			ap = ap->av_forw;
14116 		}
14117 
14118 		/*
14119 		 * No inversions... we will go after the last, and
14120 		 * be the first request in the second request list.
14121 		 */
14122 		goto insert;
14123 	}
14124 
14125 	/*
14126 	 * Request is at/after the current request...
14127 	 * sort in the first request list.
14128 	 */
14129 	while (ap->av_forw != NULL) {
14130 		/*
14131 		 * We want to go after the current request (1) if
14132 		 * there is an inversion after it (i.e. it is the end
14133 		 * of the first request list), or (2) if the next
14134 		 * request is a larger block no. than our request.
14135 		 */
14136 		if ((SD_GET_BLKNO(ap->av_forw) < SD_GET_BLKNO(ap)) ||
14137 		    (SD_GET_BLKNO(bp) < SD_GET_BLKNO(ap->av_forw))) {
14138 			goto insert;
14139 		}
14140 		ap = ap->av_forw;
14141 	}
14142 
14143 	/*
14144 	 * Neither a second list nor a larger request, therefore
14145 	 * we go at the end of the first list (which is the same
14146 	 * as the end of the whole schebang).
14147 	 */
14148 insert:
14149 	bp->av_forw = ap->av_forw;
14150 	ap->av_forw = bp;
14151 
14152 	/*
14153 	 * If we inserted onto the tail end of the waitq, make sure the
14154 	 * tail pointer is updated.
14155 	 */
14156 	if (ap == un->un_waitq_tailp) {
14157 		un->un_waitq_tailp = bp;
14158 	}
14159 }
14160 
14161 
14162 /*
14163  *    Function: sd_start_cmds
14164  *
14165  * Description: Remove and transport cmds from the driver queues.
14166  *
14167  *   Arguments: un - pointer to the unit (soft state) struct for the target.
14168  *
14169  *		immed_bp - ptr to a buf to be transported immediately. Only
14170  *		the immed_bp is transported; bufs on the waitq are not
14171  *		processed and the un_retry_bp is not checked.  If immed_bp is
14172  *		NULL, then normal queue processing is performed.
14173  *
14174  *     Context: May be called from kernel thread context, interrupt context,
14175  *		or runout callback context. This function may not block or
14176  *		call routines that block.
14177  */
14178 
14179 static void
14180 sd_start_cmds(struct sd_lun *un, struct buf *immed_bp)
14181 {
14182 	struct	sd_xbuf	*xp;
14183 	struct	buf	*bp;
14184 	void	(*statp)(kstat_io_t *);
14185 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
14186 	void	(*saved_statp)(kstat_io_t *);
14187 #endif
14188 	int	rval;
14189 
14190 	ASSERT(un != NULL);
14191 	ASSERT(mutex_owned(SD_MUTEX(un)));
14192 	ASSERT(un->un_ncmds_in_transport >= 0);
14193 	ASSERT(un->un_throttle >= 0);
14194 
14195 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_start_cmds: entry\n");
14196 
14197 	do {
14198 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
14199 		saved_statp = NULL;
14200 #endif
14201 
14202 		/*
14203 		 * If we are syncing or dumping, fail the command to
14204 		 * avoid recursively calling back into scsi_transport().
14205 		 * The dump I/O itself uses a separate code path so this
14206 		 * only prevents non-dump I/O from being sent while dumping.
14207 		 * File system sync takes place before dumping begins.
14208 		 * During panic, filesystem I/O is allowed provided
14209 		 * un_in_callback is <= 1.  This is to prevent recursion
14210 		 * such as sd_start_cmds -> scsi_transport -> sdintr ->
14211 		 * sd_start_cmds and so on.  See panic.c for more information
14212 		 * about the states the system can be in during panic.
14213 		 */
14214 		if ((un->un_state == SD_STATE_DUMPING) ||
14215 		    (ddi_in_panic() && (un->un_in_callback > 1))) {
14216 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14217 			    "sd_start_cmds: panicking\n");
14218 			goto exit;
14219 		}
14220 
14221 		if ((bp = immed_bp) != NULL) {
14222 			/*
14223 			 * We have a bp that must be transported immediately.
14224 			 * It's OK to transport the immed_bp here without doing
14225 			 * the throttle limit check because the immed_bp is
14226 			 * always used in a retry/recovery case. This means
14227 			 * that we know we are not at the throttle limit by
14228 			 * virtue of the fact that to get here we must have
14229 			 * already gotten a command back via sdintr(). This also
14230 			 * relies on (1) the command on un_retry_bp preventing
14231 			 * further commands from the waitq from being issued;
14232 			 * and (2) the code in sd_retry_command checking the
14233 			 * throttle limit before issuing a delayed or immediate
14234 			 * retry. This holds even if the throttle limit is
14235 			 * currently ratcheted down from its maximum value.
14236 			 */
14237 			statp = kstat_runq_enter;
14238 			if (bp == un->un_retry_bp) {
14239 				ASSERT((un->un_retry_statp == NULL) ||
14240 				    (un->un_retry_statp == kstat_waitq_enter) ||
14241 				    (un->un_retry_statp ==
14242 				    kstat_runq_back_to_waitq));
14243 				/*
14244 				 * If the waitq kstat was incremented when
14245 				 * sd_set_retry_bp() queued this bp for a retry,
14246 				 * then we must set up statp so that the waitq
14247 				 * count will get decremented correctly below.
14248 				 * Also we must clear un->un_retry_statp to
14249 				 * ensure that we do not act on a stale value
14250 				 * in this field.
14251 				 */
14252 				if ((un->un_retry_statp == kstat_waitq_enter) ||
14253 				    (un->un_retry_statp ==
14254 				    kstat_runq_back_to_waitq)) {
14255 					statp = kstat_waitq_to_runq;
14256 				}
14257 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
14258 				saved_statp = un->un_retry_statp;
14259 #endif
14260 				un->un_retry_statp = NULL;
14261 
14262 				SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
14263 				    "sd_start_cmds: un:0x%p: GOT retry_bp:0x%p "
14264 				    "un_throttle:%d un_ncmds_in_transport:%d\n",
14265 				    un, un->un_retry_bp, un->un_throttle,
14266 				    un->un_ncmds_in_transport);
14267 			} else {
14268 				SD_TRACE(SD_LOG_IO_CORE, un, "sd_start_cmds: "
14269 				    "processing priority bp:0x%p\n", bp);
14270 			}
14271 
14272 		} else if ((bp = un->un_waitq_headp) != NULL) {
14273 			/*
14274 			 * A command on the waitq is ready to go, but do not
14275 			 * send it if:
14276 			 *
14277 			 * (1) the throttle limit has been reached, or
14278 			 * (2) a retry is pending, or
14279 			 * (3) a START_STOP_UNIT callback pending, or
14280 			 * (4) a callback for a SD_PATH_DIRECT_PRIORITY
14281 			 *	command is pending.
14282 			 *
14283 			 * For all of these conditions, IO processing will
14284 			 * restart after the condition is cleared.
14285 			 */
14286 			if (un->un_ncmds_in_transport >= un->un_throttle) {
14287 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14288 				    "sd_start_cmds: exiting, "
14289 				    "throttle limit reached!\n");
14290 				goto exit;
14291 			}
14292 			if (un->un_retry_bp != NULL) {
14293 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14294 				    "sd_start_cmds: exiting, retry pending!\n");
14295 				goto exit;
14296 			}
14297 			if (un->un_startstop_timeid != NULL) {
14298 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14299 				    "sd_start_cmds: exiting, "
14300 				    "START_STOP pending!\n");
14301 				goto exit;
14302 			}
14303 			if (un->un_direct_priority_timeid != NULL) {
14304 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14305 				    "sd_start_cmds: exiting, "
14306 				    "SD_PATH_DIRECT_PRIORITY cmd. pending!\n");
14307 				goto exit;
14308 			}
14309 
14310 			/* Dequeue the command */
14311 			un->un_waitq_headp = bp->av_forw;
14312 			if (un->un_waitq_headp == NULL) {
14313 				un->un_waitq_tailp = NULL;
14314 			}
14315 			bp->av_forw = NULL;
14316 			statp = kstat_waitq_to_runq;
14317 			SD_TRACE(SD_LOG_IO_CORE, un,
14318 			    "sd_start_cmds: processing waitq bp:0x%p\n", bp);
14319 
14320 		} else {
14321 			/* No work to do so bail out now */
14322 			SD_TRACE(SD_LOG_IO_CORE, un,
14323 			    "sd_start_cmds: no more work, exiting!\n");
14324 			goto exit;
14325 		}
14326 
14327 		/*
14328 		 * Reset the state to normal. This is the mechanism by which
14329 		 * the state transitions from either SD_STATE_RWAIT or
14330 		 * SD_STATE_OFFLINE to SD_STATE_NORMAL.
14331 		 * If state is SD_STATE_PM_CHANGING then this command is
14332 		 * part of the device power control and the state must
14333 		 * not be put back to normal. Doing so would would
14334 		 * allow new commands to proceed when they shouldn't,
14335 		 * the device may be going off.
14336 		 */
14337 		if ((un->un_state != SD_STATE_SUSPENDED) &&
14338 		    (un->un_state != SD_STATE_PM_CHANGING)) {
14339 			New_state(un, SD_STATE_NORMAL);
14340 		    }
14341 
14342 		xp = SD_GET_XBUF(bp);
14343 		ASSERT(xp != NULL);
14344 
14345 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
14346 		/*
14347 		 * Allocate the scsi_pkt if we need one, or attach DMA
14348 		 * resources if we have a scsi_pkt that needs them. The
14349 		 * latter should only occur for commands that are being
14350 		 * retried.
14351 		 */
14352 		if ((xp->xb_pktp == NULL) ||
14353 		    ((xp->xb_pkt_flags & SD_XB_DMA_FREED) != 0)) {
14354 #else
14355 		if (xp->xb_pktp == NULL) {
14356 #endif
14357 			/*
14358 			 * There is no scsi_pkt allocated for this buf. Call
14359 			 * the initpkt function to allocate & init one.
14360 			 *
14361 			 * The scsi_init_pkt runout callback functionality is
14362 			 * implemented as follows:
14363 			 *
14364 			 * 1) The initpkt function always calls
14365 			 *    scsi_init_pkt(9F) with sdrunout specified as the
14366 			 *    callback routine.
14367 			 * 2) A successful packet allocation is initialized and
14368 			 *    the I/O is transported.
14369 			 * 3) The I/O associated with an allocation resource
14370 			 *    failure is left on its queue to be retried via
14371 			 *    runout or the next I/O.
14372 			 * 4) The I/O associated with a DMA error is removed
14373 			 *    from the queue and failed with EIO. Processing of
14374 			 *    the transport queues is also halted to be
14375 			 *    restarted via runout or the next I/O.
14376 			 * 5) The I/O associated with a CDB size or packet
14377 			 *    size error is removed from the queue and failed
14378 			 *    with EIO. Processing of the transport queues is
14379 			 *    continued.
14380 			 *
14381 			 * Note: there is no interface for canceling a runout
14382 			 * callback. To prevent the driver from detaching or
14383 			 * suspending while a runout is pending the driver
14384 			 * state is set to SD_STATE_RWAIT
14385 			 *
14386 			 * Note: using the scsi_init_pkt callback facility can
14387 			 * result in an I/O request persisting at the head of
14388 			 * the list which cannot be satisfied even after
14389 			 * multiple retries. In the future the driver may
14390 			 * implement some kind of maximum runout count before
14391 			 * failing an I/O.
14392 			 *
14393 			 * Note: the use of funcp below may seem superfluous,
14394 			 * but it helps warlock figure out the correct
14395 			 * initpkt function calls (see [s]sd.wlcmd).
14396 			 */
14397 			struct scsi_pkt	*pktp;
14398 			int (*funcp)(struct buf *bp, struct scsi_pkt **pktp);
14399 
14400 			ASSERT(bp != un->un_rqs_bp);
14401 
14402 			funcp = sd_initpkt_map[xp->xb_chain_iostart];
14403 			switch ((*funcp)(bp, &pktp)) {
14404 			case  SD_PKT_ALLOC_SUCCESS:
14405 				xp->xb_pktp = pktp;
14406 				SD_TRACE(SD_LOG_IO_CORE, un,
14407 				    "sd_start_cmd: SD_PKT_ALLOC_SUCCESS 0x%p\n",
14408 				    pktp);
14409 				goto got_pkt;
14410 
14411 			case SD_PKT_ALLOC_FAILURE:
14412 				/*
14413 				 * Temporary (hopefully) resource depletion.
14414 				 * Since retries and RQS commands always have a
14415 				 * scsi_pkt allocated, these cases should never
14416 				 * get here. So the only cases this needs to
14417 				 * handle is a bp from the waitq (which we put
14418 				 * back onto the waitq for sdrunout), or a bp
14419 				 * sent as an immed_bp (which we just fail).
14420 				 */
14421 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14422 				    "sd_start_cmds: SD_PKT_ALLOC_FAILURE\n");
14423 
14424 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
14425 
14426 				if (bp == immed_bp) {
14427 					/*
14428 					 * If SD_XB_DMA_FREED is clear, then
14429 					 * this is a failure to allocate a
14430 					 * scsi_pkt, and we must fail the
14431 					 * command.
14432 					 */
14433 					if ((xp->xb_pkt_flags &
14434 					    SD_XB_DMA_FREED) == 0) {
14435 						break;
14436 					}
14437 
14438 					/*
14439 					 * If this immediate command is NOT our
14440 					 * un_retry_bp, then we must fail it.
14441 					 */
14442 					if (bp != un->un_retry_bp) {
14443 						break;
14444 					}
14445 
14446 					/*
14447 					 * We get here if this cmd is our
14448 					 * un_retry_bp that was DMAFREED, but
14449 					 * scsi_init_pkt() failed to reallocate
14450 					 * DMA resources when we attempted to
14451 					 * retry it. This can happen when an
14452 					 * mpxio failover is in progress, but
14453 					 * we don't want to just fail the
14454 					 * command in this case.
14455 					 *
14456 					 * Use timeout(9F) to restart it after
14457 					 * a 100ms delay.  We don't want to
14458 					 * let sdrunout() restart it, because
14459 					 * sdrunout() is just supposed to start
14460 					 * commands that are sitting on the
14461 					 * wait queue.  The un_retry_bp stays
14462 					 * set until the command completes, but
14463 					 * sdrunout can be called many times
14464 					 * before that happens.  Since sdrunout
14465 					 * cannot tell if the un_retry_bp is
14466 					 * already in the transport, it could
14467 					 * end up calling scsi_transport() for
14468 					 * the un_retry_bp multiple times.
14469 					 *
14470 					 * Also: don't schedule the callback
14471 					 * if some other callback is already
14472 					 * pending.
14473 					 */
14474 					if (un->un_retry_statp == NULL) {
14475 						/*
14476 						 * restore the kstat pointer to
14477 						 * keep kstat counts coherent
14478 						 * when we do retry the command.
14479 						 */
14480 						un->un_retry_statp =
14481 						    saved_statp;
14482 					}
14483 
14484 					if ((un->un_startstop_timeid == NULL) &&
14485 					    (un->un_retry_timeid == NULL) &&
14486 					    (un->un_direct_priority_timeid ==
14487 					    NULL)) {
14488 
14489 						un->un_retry_timeid =
14490 						    timeout(
14491 						    sd_start_retry_command,
14492 						    un, SD_RESTART_TIMEOUT);
14493 					}
14494 					goto exit;
14495 				}
14496 
14497 #else
14498 				if (bp == immed_bp) {
14499 					break;	/* Just fail the command */
14500 				}
14501 #endif
14502 
14503 				/* Add the buf back to the head of the waitq */
14504 				bp->av_forw = un->un_waitq_headp;
14505 				un->un_waitq_headp = bp;
14506 				if (un->un_waitq_tailp == NULL) {
14507 					un->un_waitq_tailp = bp;
14508 				}
14509 				goto exit;
14510 
14511 			case SD_PKT_ALLOC_FAILURE_NO_DMA:
14512 				/*
14513 				 * HBA DMA resource failure. Fail the command
14514 				 * and continue processing of the queues.
14515 				 */
14516 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14517 				    "sd_start_cmds: "
14518 				    "SD_PKT_ALLOC_FAILURE_NO_DMA\n");
14519 				break;
14520 
14521 			case SD_PKT_ALLOC_FAILURE_PKT_TOO_SMALL:
14522 				/*
14523 				 * Note:x86: Partial DMA mapping not supported
14524 				 * for USCSI commands, and all the needed DMA
14525 				 * resources were not allocated.
14526 				 */
14527 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14528 				    "sd_start_cmds: "
14529 				    "SD_PKT_ALLOC_FAILURE_PKT_TOO_SMALL\n");
14530 				break;
14531 
14532 			case SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL:
14533 				/*
14534 				 * Note:x86: Request cannot fit into CDB based
14535 				 * on lba and len.
14536 				 */
14537 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14538 				    "sd_start_cmds: "
14539 				    "SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL\n");
14540 				break;
14541 
14542 			default:
14543 				/* Should NEVER get here! */
14544 				panic("scsi_initpkt error");
14545 				/*NOTREACHED*/
14546 			}
14547 
14548 			/*
14549 			 * Fatal error in allocating a scsi_pkt for this buf.
14550 			 * Update kstats & return the buf with an error code.
14551 			 * We must use sd_return_failed_command_no_restart() to
14552 			 * avoid a recursive call back into sd_start_cmds().
14553 			 * However this also means that we must keep processing
14554 			 * the waitq here in order to avoid stalling.
14555 			 */
14556 			if (statp == kstat_waitq_to_runq) {
14557 				SD_UPDATE_KSTATS(un, kstat_waitq_exit, bp);
14558 			}
14559 			sd_return_failed_command_no_restart(un, bp, EIO);
14560 			if (bp == immed_bp) {
14561 				/* immed_bp is gone by now, so clear this */
14562 				immed_bp = NULL;
14563 			}
14564 			continue;
14565 		}
14566 got_pkt:
14567 		if (bp == immed_bp) {
14568 			/* goto the head of the class.... */
14569 			xp->xb_pktp->pkt_flags |= FLAG_HEAD;
14570 		}
14571 
14572 		un->un_ncmds_in_transport++;
14573 		SD_UPDATE_KSTATS(un, statp, bp);
14574 
14575 		/*
14576 		 * Call scsi_transport() to send the command to the target.
14577 		 * According to SCSA architecture, we must drop the mutex here
14578 		 * before calling scsi_transport() in order to avoid deadlock.
14579 		 * Note that the scsi_pkt's completion routine can be executed
14580 		 * (from interrupt context) even before the call to
14581 		 * scsi_transport() returns.
14582 		 */
14583 		SD_TRACE(SD_LOG_IO_CORE, un,
14584 		    "sd_start_cmds: calling scsi_transport()\n");
14585 		DTRACE_PROBE1(scsi__transport__dispatch, struct buf *, bp);
14586 
14587 		mutex_exit(SD_MUTEX(un));
14588 		rval = scsi_transport(xp->xb_pktp);
14589 		mutex_enter(SD_MUTEX(un));
14590 
14591 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14592 		    "sd_start_cmds: scsi_transport() returned %d\n", rval);
14593 
14594 		switch (rval) {
14595 		case TRAN_ACCEPT:
14596 			/* Clear this with every pkt accepted by the HBA */
14597 			un->un_tran_fatal_count = 0;
14598 			break;	/* Success; try the next cmd (if any) */
14599 
14600 		case TRAN_BUSY:
14601 			un->un_ncmds_in_transport--;
14602 			ASSERT(un->un_ncmds_in_transport >= 0);
14603 
14604 			/*
14605 			 * Don't retry request sense, the sense data
14606 			 * is lost when another request is sent.
14607 			 * Free up the rqs buf and retry
14608 			 * the original failed cmd.  Update kstat.
14609 			 */
14610 			if (bp == un->un_rqs_bp) {
14611 				SD_UPDATE_KSTATS(un, kstat_runq_exit, bp);
14612 				bp = sd_mark_rqs_idle(un, xp);
14613 				sd_retry_command(un, bp, SD_RETRIES_STANDARD,
14614 					NULL, NULL, EIO, SD_BSY_TIMEOUT / 500,
14615 					kstat_waitq_enter);
14616 				goto exit;
14617 			}
14618 
14619 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
14620 			/*
14621 			 * Free the DMA resources for the  scsi_pkt. This will
14622 			 * allow mpxio to select another path the next time
14623 			 * we call scsi_transport() with this scsi_pkt.
14624 			 * See sdintr() for the rationalization behind this.
14625 			 */
14626 			if ((un->un_f_is_fibre == TRUE) &&
14627 			    ((xp->xb_pkt_flags & SD_XB_USCSICMD) == 0) &&
14628 			    ((xp->xb_pktp->pkt_flags & FLAG_SENSING) == 0)) {
14629 				scsi_dmafree(xp->xb_pktp);
14630 				xp->xb_pkt_flags |= SD_XB_DMA_FREED;
14631 			}
14632 #endif
14633 
14634 			if (SD_IS_DIRECT_PRIORITY(SD_GET_XBUF(bp))) {
14635 				/*
14636 				 * Commands that are SD_PATH_DIRECT_PRIORITY
14637 				 * are for error recovery situations. These do
14638 				 * not use the normal command waitq, so if they
14639 				 * get a TRAN_BUSY we cannot put them back onto
14640 				 * the waitq for later retry. One possible
14641 				 * problem is that there could already be some
14642 				 * other command on un_retry_bp that is waiting
14643 				 * for this one to complete, so we would be
14644 				 * deadlocked if we put this command back onto
14645 				 * the waitq for later retry (since un_retry_bp
14646 				 * must complete before the driver gets back to
14647 				 * commands on the waitq).
14648 				 *
14649 				 * To avoid deadlock we must schedule a callback
14650 				 * that will restart this command after a set
14651 				 * interval.  This should keep retrying for as
14652 				 * long as the underlying transport keeps
14653 				 * returning TRAN_BUSY (just like for other
14654 				 * commands).  Use the same timeout interval as
14655 				 * for the ordinary TRAN_BUSY retry.
14656 				 */
14657 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14658 				    "sd_start_cmds: scsi_transport() returned "
14659 				    "TRAN_BUSY for DIRECT_PRIORITY cmd!\n");
14660 
14661 				SD_UPDATE_KSTATS(un, kstat_runq_exit, bp);
14662 				un->un_direct_priority_timeid =
14663 				    timeout(sd_start_direct_priority_command,
14664 				    bp, SD_BSY_TIMEOUT / 500);
14665 
14666 				goto exit;
14667 			}
14668 
14669 			/*
14670 			 * For TRAN_BUSY, we want to reduce the throttle value,
14671 			 * unless we are retrying a command.
14672 			 */
14673 			if (bp != un->un_retry_bp) {
14674 				sd_reduce_throttle(un, SD_THROTTLE_TRAN_BUSY);
14675 			}
14676 
14677 			/*
14678 			 * Set up the bp to be tried again 10 ms later.
14679 			 * Note:x86: Is there a timeout value in the sd_lun
14680 			 * for this condition?
14681 			 */
14682 			sd_set_retry_bp(un, bp, SD_BSY_TIMEOUT / 500,
14683 				kstat_runq_back_to_waitq);
14684 			goto exit;
14685 
14686 		case TRAN_FATAL_ERROR:
14687 			un->un_tran_fatal_count++;
14688 			/* FALLTHRU */
14689 
14690 		case TRAN_BADPKT:
14691 		default:
14692 			un->un_ncmds_in_transport--;
14693 			ASSERT(un->un_ncmds_in_transport >= 0);
14694 
14695 			/*
14696 			 * If this is our REQUEST SENSE command with a
14697 			 * transport error, we must get back the pointers
14698 			 * to the original buf, and mark the REQUEST
14699 			 * SENSE command as "available".
14700 			 */
14701 			if (bp == un->un_rqs_bp) {
14702 				bp = sd_mark_rqs_idle(un, xp);
14703 				xp = SD_GET_XBUF(bp);
14704 			} else {
14705 				/*
14706 				 * Legacy behavior: do not update transport
14707 				 * error count for request sense commands.
14708 				 */
14709 				SD_UPDATE_ERRSTATS(un, sd_transerrs);
14710 			}
14711 
14712 			SD_UPDATE_KSTATS(un, kstat_runq_exit, bp);
14713 			sd_print_transport_rejected_message(un, xp, rval);
14714 
14715 			/*
14716 			 * We must use sd_return_failed_command_no_restart() to
14717 			 * avoid a recursive call back into sd_start_cmds().
14718 			 * However this also means that we must keep processing
14719 			 * the waitq here in order to avoid stalling.
14720 			 */
14721 			sd_return_failed_command_no_restart(un, bp, EIO);
14722 
14723 			/*
14724 			 * Notify any threads waiting in sd_ddi_suspend() that
14725 			 * a command completion has occurred.
14726 			 */
14727 			if (un->un_state == SD_STATE_SUSPENDED) {
14728 				cv_broadcast(&un->un_disk_busy_cv);
14729 			}
14730 
14731 			if (bp == immed_bp) {
14732 				/* immed_bp is gone by now, so clear this */
14733 				immed_bp = NULL;
14734 			}
14735 			break;
14736 		}
14737 
14738 	} while (immed_bp == NULL);
14739 
14740 exit:
14741 	ASSERT(mutex_owned(SD_MUTEX(un)));
14742 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_start_cmds: exit\n");
14743 }
14744 
14745 
14746 /*
14747  *    Function: sd_return_command
14748  *
14749  * Description: Returns a command to its originator (with or without an
14750  *		error).  Also starts commands waiting to be transported
14751  *		to the target.
14752  *
14753  *     Context: May be called from interrupt, kernel, or timeout context
14754  */
14755 
14756 static void
14757 sd_return_command(struct sd_lun *un, struct buf *bp)
14758 {
14759 	struct sd_xbuf *xp;
14760 #if defined(__i386) || defined(__amd64)
14761 	struct scsi_pkt *pktp;
14762 #endif
14763 
14764 	ASSERT(bp != NULL);
14765 	ASSERT(un != NULL);
14766 	ASSERT(mutex_owned(SD_MUTEX(un)));
14767 	ASSERT(bp != un->un_rqs_bp);
14768 	xp = SD_GET_XBUF(bp);
14769 	ASSERT(xp != NULL);
14770 
14771 #if defined(__i386) || defined(__amd64)
14772 	pktp = SD_GET_PKTP(bp);
14773 #endif
14774 
14775 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_return_command: entry\n");
14776 
14777 #if defined(__i386) || defined(__amd64)
14778 	/*
14779 	 * Note:x86: check for the "sdrestart failed" case.
14780 	 */
14781 	if (((xp->xb_pkt_flags & SD_XB_USCSICMD) != SD_XB_USCSICMD) &&
14782 		(geterror(bp) == 0) && (xp->xb_dma_resid != 0) &&
14783 		(xp->xb_pktp->pkt_resid == 0)) {
14784 
14785 		if (sd_setup_next_xfer(un, bp, pktp, xp) != 0) {
14786 			/*
14787 			 * Successfully set up next portion of cmd
14788 			 * transfer, try sending it
14789 			 */
14790 			sd_retry_command(un, bp, SD_RETRIES_NOCHECK,
14791 			    NULL, NULL, 0, (clock_t)0, NULL);
14792 			sd_start_cmds(un, NULL);
14793 			return;	/* Note:x86: need a return here? */
14794 		}
14795 	}
14796 #endif
14797 
14798 	/*
14799 	 * If this is the failfast bp, clear it from un_failfast_bp. This
14800 	 * can happen if upon being re-tried the failfast bp either
14801 	 * succeeded or encountered another error (possibly even a different
14802 	 * error than the one that precipitated the failfast state, but in
14803 	 * that case it would have had to exhaust retries as well). Regardless,
14804 	 * this should not occur whenever the instance is in the active
14805 	 * failfast state.
14806 	 */
14807 	if (bp == un->un_failfast_bp) {
14808 		ASSERT(un->un_failfast_state == SD_FAILFAST_INACTIVE);
14809 		un->un_failfast_bp = NULL;
14810 	}
14811 
14812 	/*
14813 	 * Clear the failfast state upon successful completion of ANY cmd.
14814 	 */
14815 	if (bp->b_error == 0) {
14816 		un->un_failfast_state = SD_FAILFAST_INACTIVE;
14817 	}
14818 
14819 	/*
14820 	 * This is used if the command was retried one or more times. Show that
14821 	 * we are done with it, and allow processing of the waitq to resume.
14822 	 */
14823 	if (bp == un->un_retry_bp) {
14824 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14825 		    "sd_return_command: un:0x%p: "
14826 		    "RETURNING retry_bp:0x%p\n", un, un->un_retry_bp);
14827 		un->un_retry_bp = NULL;
14828 		un->un_retry_statp = NULL;
14829 	}
14830 
14831 	SD_UPDATE_RDWR_STATS(un, bp);
14832 	SD_UPDATE_PARTITION_STATS(un, bp);
14833 
14834 	switch (un->un_state) {
14835 	case SD_STATE_SUSPENDED:
14836 		/*
14837 		 * Notify any threads waiting in sd_ddi_suspend() that
14838 		 * a command completion has occurred.
14839 		 */
14840 		cv_broadcast(&un->un_disk_busy_cv);
14841 		break;
14842 	default:
14843 		sd_start_cmds(un, NULL);
14844 		break;
14845 	}
14846 
14847 	/* Return this command up the iodone chain to its originator. */
14848 	mutex_exit(SD_MUTEX(un));
14849 
14850 	(*(sd_destroypkt_map[xp->xb_chain_iodone]))(bp);
14851 	xp->xb_pktp = NULL;
14852 
14853 	SD_BEGIN_IODONE(xp->xb_chain_iodone, un, bp);
14854 
14855 	ASSERT(!mutex_owned(SD_MUTEX(un)));
14856 	mutex_enter(SD_MUTEX(un));
14857 
14858 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_return_command: exit\n");
14859 }
14860 
14861 
14862 /*
14863  *    Function: sd_return_failed_command
14864  *
14865  * Description: Command completion when an error occurred.
14866  *
14867  *     Context: May be called from interrupt context
14868  */
14869 
14870 static void
14871 sd_return_failed_command(struct sd_lun *un, struct buf *bp, int errcode)
14872 {
14873 	ASSERT(bp != NULL);
14874 	ASSERT(un != NULL);
14875 	ASSERT(mutex_owned(SD_MUTEX(un)));
14876 
14877 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14878 	    "sd_return_failed_command: entry\n");
14879 
14880 	/*
14881 	 * b_resid could already be nonzero due to a partial data
14882 	 * transfer, so do not change it here.
14883 	 */
14884 	SD_BIOERROR(bp, errcode);
14885 
14886 	sd_return_command(un, bp);
14887 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14888 	    "sd_return_failed_command: exit\n");
14889 }
14890 
14891 
14892 /*
14893  *    Function: sd_return_failed_command_no_restart
14894  *
14895  * Description: Same as sd_return_failed_command, but ensures that no
14896  *		call back into sd_start_cmds will be issued.
14897  *
14898  *     Context: May be called from interrupt context
14899  */
14900 
14901 static void
14902 sd_return_failed_command_no_restart(struct sd_lun *un, struct buf *bp,
14903 	int errcode)
14904 {
14905 	struct sd_xbuf *xp;
14906 
14907 	ASSERT(bp != NULL);
14908 	ASSERT(un != NULL);
14909 	ASSERT(mutex_owned(SD_MUTEX(un)));
14910 	xp = SD_GET_XBUF(bp);
14911 	ASSERT(xp != NULL);
14912 	ASSERT(errcode != 0);
14913 
14914 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14915 	    "sd_return_failed_command_no_restart: entry\n");
14916 
14917 	/*
14918 	 * b_resid could already be nonzero due to a partial data
14919 	 * transfer, so do not change it here.
14920 	 */
14921 	SD_BIOERROR(bp, errcode);
14922 
14923 	/*
14924 	 * If this is the failfast bp, clear it. This can happen if the
14925 	 * failfast bp encounterd a fatal error when we attempted to
14926 	 * re-try it (such as a scsi_transport(9F) failure).  However
14927 	 * we should NOT be in an active failfast state if the failfast
14928 	 * bp is not NULL.
14929 	 */
14930 	if (bp == un->un_failfast_bp) {
14931 		ASSERT(un->un_failfast_state == SD_FAILFAST_INACTIVE);
14932 		un->un_failfast_bp = NULL;
14933 	}
14934 
14935 	if (bp == un->un_retry_bp) {
14936 		/*
14937 		 * This command was retried one or more times. Show that we are
14938 		 * done with it, and allow processing of the waitq to resume.
14939 		 */
14940 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14941 		    "sd_return_failed_command_no_restart: "
14942 		    " un:0x%p: RETURNING retry_bp:0x%p\n", un, un->un_retry_bp);
14943 		un->un_retry_bp = NULL;
14944 		un->un_retry_statp = NULL;
14945 	}
14946 
14947 	SD_UPDATE_RDWR_STATS(un, bp);
14948 	SD_UPDATE_PARTITION_STATS(un, bp);
14949 
14950 	mutex_exit(SD_MUTEX(un));
14951 
14952 	if (xp->xb_pktp != NULL) {
14953 		(*(sd_destroypkt_map[xp->xb_chain_iodone]))(bp);
14954 		xp->xb_pktp = NULL;
14955 	}
14956 
14957 	SD_BEGIN_IODONE(xp->xb_chain_iodone, un, bp);
14958 
14959 	mutex_enter(SD_MUTEX(un));
14960 
14961 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14962 	    "sd_return_failed_command_no_restart: exit\n");
14963 }
14964 
14965 
14966 /*
14967  *    Function: sd_retry_command
14968  *
14969  * Description: queue up a command for retry, or (optionally) fail it
14970  *		if retry counts are exhausted.
14971  *
14972  *   Arguments: un - Pointer to the sd_lun struct for the target.
14973  *
14974  *		bp - Pointer to the buf for the command to be retried.
14975  *
14976  *		retry_check_flag - Flag to see which (if any) of the retry
14977  *		   counts should be decremented/checked. If the indicated
14978  *		   retry count is exhausted, then the command will not be
14979  *		   retried; it will be failed instead. This should use a
14980  *		   value equal to one of the following:
14981  *
14982  *			SD_RETRIES_NOCHECK
14983  *			SD_RESD_RETRIES_STANDARD
14984  *			SD_RETRIES_VICTIM
14985  *
14986  *		   Optionally may be bitwise-OR'ed with SD_RETRIES_ISOLATE
14987  *		   if the check should be made to see of FLAG_ISOLATE is set
14988  *		   in the pkt. If FLAG_ISOLATE is set, then the command is
14989  *		   not retried, it is simply failed.
14990  *
14991  *		user_funcp - Ptr to function to call before dispatching the
14992  *		   command. May be NULL if no action needs to be performed.
14993  *		   (Primarily intended for printing messages.)
14994  *
14995  *		user_arg - Optional argument to be passed along to
14996  *		   the user_funcp call.
14997  *
14998  *		failure_code - errno return code to set in the bp if the
14999  *		   command is going to be failed.
15000  *
15001  *		retry_delay - Retry delay interval in (clock_t) units. May
15002  *		   be zero which indicates that the retry should be retried
15003  *		   immediately (ie, without an intervening delay).
15004  *
15005  *		statp - Ptr to kstat function to be updated if the command
15006  *		   is queued for a delayed retry. May be NULL if no kstat
15007  *		   update is desired.
15008  *
15009  *     Context: May be called from interupt context.
15010  */
15011 
15012 static void
15013 sd_retry_command(struct sd_lun *un, struct buf *bp, int retry_check_flag,
15014 	void (*user_funcp)(struct sd_lun *un, struct buf *bp, void *argp, int
15015 	code), void *user_arg, int failure_code,  clock_t retry_delay,
15016 	void (*statp)(kstat_io_t *))
15017 {
15018 	struct sd_xbuf	*xp;
15019 	struct scsi_pkt	*pktp;
15020 
15021 	ASSERT(un != NULL);
15022 	ASSERT(mutex_owned(SD_MUTEX(un)));
15023 	ASSERT(bp != NULL);
15024 	xp = SD_GET_XBUF(bp);
15025 	ASSERT(xp != NULL);
15026 	pktp = SD_GET_PKTP(bp);
15027 	ASSERT(pktp != NULL);
15028 
15029 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
15030 	    "sd_retry_command: entry: bp:0x%p xp:0x%p\n", bp, xp);
15031 
15032 	/*
15033 	 * If we are syncing or dumping, fail the command to avoid
15034 	 * recursively calling back into scsi_transport().
15035 	 */
15036 	if (ddi_in_panic()) {
15037 		goto fail_command_no_log;
15038 	}
15039 
15040 	/*
15041 	 * We should never be be retrying a command with FLAG_DIAGNOSE set, so
15042 	 * log an error and fail the command.
15043 	 */
15044 	if ((pktp->pkt_flags & FLAG_DIAGNOSE) != 0) {
15045 		scsi_log(SD_DEVINFO(un), sd_label, CE_NOTE,
15046 		    "ERROR, retrying FLAG_DIAGNOSE command.\n");
15047 		sd_dump_memory(un, SD_LOG_IO, "CDB",
15048 		    (uchar_t *)pktp->pkt_cdbp, CDB_SIZE, SD_LOG_HEX);
15049 		sd_dump_memory(un, SD_LOG_IO, "Sense Data",
15050 		    (uchar_t *)xp->xb_sense_data, SENSE_LENGTH, SD_LOG_HEX);
15051 		goto fail_command;
15052 	}
15053 
15054 	/*
15055 	 * If we are suspended, then put the command onto head of the
15056 	 * wait queue since we don't want to start more commands.
15057 	 */
15058 	switch (un->un_state) {
15059 	case SD_STATE_SUSPENDED:
15060 	case SD_STATE_DUMPING:
15061 		bp->av_forw = un->un_waitq_headp;
15062 		un->un_waitq_headp = bp;
15063 		if (un->un_waitq_tailp == NULL) {
15064 			un->un_waitq_tailp = bp;
15065 		}
15066 		SD_UPDATE_KSTATS(un, kstat_waitq_enter, bp);
15067 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_retry_command: "
15068 		    "exiting; cmd bp:0x%p requeued for SUSPEND/DUMP\n", bp);
15069 		return;
15070 	default:
15071 		break;
15072 	}
15073 
15074 	/*
15075 	 * If the caller wants us to check FLAG_ISOLATE, then see if that
15076 	 * is set; if it is then we do not want to retry the command.
15077 	 * Normally, FLAG_ISOLATE is only used with USCSI cmds.
15078 	 */
15079 	if ((retry_check_flag & SD_RETRIES_ISOLATE) != 0) {
15080 		if ((pktp->pkt_flags & FLAG_ISOLATE) != 0) {
15081 			goto fail_command;
15082 		}
15083 	}
15084 
15085 
15086 	/*
15087 	 * If SD_RETRIES_FAILFAST is set, it indicates that either a
15088 	 * command timeout or a selection timeout has occurred. This means
15089 	 * that we were unable to establish an kind of communication with
15090 	 * the target, and subsequent retries and/or commands are likely
15091 	 * to encounter similar results and take a long time to complete.
15092 	 *
15093 	 * If this is a failfast error condition, we need to update the
15094 	 * failfast state, even if this bp does not have B_FAILFAST set.
15095 	 */
15096 	if (retry_check_flag & SD_RETRIES_FAILFAST) {
15097 		if (un->un_failfast_state == SD_FAILFAST_ACTIVE) {
15098 			ASSERT(un->un_failfast_bp == NULL);
15099 			/*
15100 			 * If we are already in the active failfast state, and
15101 			 * another failfast error condition has been detected,
15102 			 * then fail this command if it has B_FAILFAST set.
15103 			 * If B_FAILFAST is clear, then maintain the legacy
15104 			 * behavior of retrying heroically, even tho this will
15105 			 * take a lot more time to fail the command.
15106 			 */
15107 			if (bp->b_flags & B_FAILFAST) {
15108 				goto fail_command;
15109 			}
15110 		} else {
15111 			/*
15112 			 * We're not in the active failfast state, but we
15113 			 * have a failfast error condition, so we must begin
15114 			 * transition to the next state. We do this regardless
15115 			 * of whether or not this bp has B_FAILFAST set.
15116 			 */
15117 			if (un->un_failfast_bp == NULL) {
15118 				/*
15119 				 * This is the first bp to meet a failfast
15120 				 * condition so save it on un_failfast_bp &
15121 				 * do normal retry processing. Do not enter
15122 				 * active failfast state yet. This marks
15123 				 * entry into the "failfast pending" state.
15124 				 */
15125 				un->un_failfast_bp = bp;
15126 
15127 			} else if (un->un_failfast_bp == bp) {
15128 				/*
15129 				 * This is the second time *this* bp has
15130 				 * encountered a failfast error condition,
15131 				 * so enter active failfast state & flush
15132 				 * queues as appropriate.
15133 				 */
15134 				un->un_failfast_state = SD_FAILFAST_ACTIVE;
15135 				un->un_failfast_bp = NULL;
15136 				sd_failfast_flushq(un);
15137 
15138 				/*
15139 				 * Fail this bp now if B_FAILFAST set;
15140 				 * otherwise continue with retries. (It would
15141 				 * be pretty ironic if this bp succeeded on a
15142 				 * subsequent retry after we just flushed all
15143 				 * the queues).
15144 				 */
15145 				if (bp->b_flags & B_FAILFAST) {
15146 					goto fail_command;
15147 				}
15148 
15149 #if !defined(lint) && !defined(__lint)
15150 			} else {
15151 				/*
15152 				 * If neither of the preceeding conditionals
15153 				 * was true, it means that there is some
15154 				 * *other* bp that has met an inital failfast
15155 				 * condition and is currently either being
15156 				 * retried or is waiting to be retried. In
15157 				 * that case we should perform normal retry
15158 				 * processing on *this* bp, since there is a
15159 				 * chance that the current failfast condition
15160 				 * is transient and recoverable. If that does
15161 				 * not turn out to be the case, then retries
15162 				 * will be cleared when the wait queue is
15163 				 * flushed anyway.
15164 				 */
15165 #endif
15166 			}
15167 		}
15168 	} else {
15169 		/*
15170 		 * SD_RETRIES_FAILFAST is clear, which indicates that we
15171 		 * likely were able to at least establish some level of
15172 		 * communication with the target and subsequent commands
15173 		 * and/or retries are likely to get through to the target,
15174 		 * In this case we want to be aggressive about clearing
15175 		 * the failfast state. Note that this does not affect
15176 		 * the "failfast pending" condition.
15177 		 */
15178 		un->un_failfast_state = SD_FAILFAST_INACTIVE;
15179 	}
15180 
15181 
15182 	/*
15183 	 * Check the specified retry count to see if we can still do
15184 	 * any retries with this pkt before we should fail it.
15185 	 */
15186 	switch (retry_check_flag & SD_RETRIES_MASK) {
15187 	case SD_RETRIES_VICTIM:
15188 		/*
15189 		 * Check the victim retry count. If exhausted, then fall
15190 		 * thru & check against the standard retry count.
15191 		 */
15192 		if (xp->xb_victim_retry_count < un->un_victim_retry_count) {
15193 			/* Increment count & proceed with the retry */
15194 			xp->xb_victim_retry_count++;
15195 			break;
15196 		}
15197 		/* Victim retries exhausted, fall back to std. retries... */
15198 		/* FALLTHRU */
15199 
15200 	case SD_RETRIES_STANDARD:
15201 		if (xp->xb_retry_count >= un->un_retry_count) {
15202 			/* Retries exhausted, fail the command */
15203 			SD_TRACE(SD_LOG_IO_CORE, un,
15204 			    "sd_retry_command: retries exhausted!\n");
15205 			/*
15206 			 * update b_resid for failed SCMD_READ & SCMD_WRITE
15207 			 * commands with nonzero pkt_resid.
15208 			 */
15209 			if ((pktp->pkt_reason == CMD_CMPLT) &&
15210 			    (SD_GET_PKT_STATUS(pktp) == STATUS_GOOD) &&
15211 			    (pktp->pkt_resid != 0)) {
15212 				uchar_t op = SD_GET_PKT_OPCODE(pktp) & 0x1F;
15213 				if ((op == SCMD_READ) || (op == SCMD_WRITE)) {
15214 					SD_UPDATE_B_RESID(bp, pktp);
15215 				}
15216 			}
15217 			goto fail_command;
15218 		}
15219 		xp->xb_retry_count++;
15220 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15221 		    "sd_retry_command: retry count:%d\n", xp->xb_retry_count);
15222 		break;
15223 
15224 	case SD_RETRIES_UA:
15225 		if (xp->xb_ua_retry_count >= sd_ua_retry_count) {
15226 			/* Retries exhausted, fail the command */
15227 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
15228 			    "Unit Attention retries exhausted. "
15229 			    "Check the target.\n");
15230 			goto fail_command;
15231 		}
15232 		xp->xb_ua_retry_count++;
15233 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15234 		    "sd_retry_command: retry count:%d\n",
15235 			xp->xb_ua_retry_count);
15236 		break;
15237 
15238 	case SD_RETRIES_BUSY:
15239 		if (xp->xb_retry_count >= un->un_busy_retry_count) {
15240 			/* Retries exhausted, fail the command */
15241 			SD_TRACE(SD_LOG_IO_CORE, un,
15242 			    "sd_retry_command: retries exhausted!\n");
15243 			goto fail_command;
15244 		}
15245 		xp->xb_retry_count++;
15246 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15247 		    "sd_retry_command: retry count:%d\n", xp->xb_retry_count);
15248 		break;
15249 
15250 	case SD_RETRIES_NOCHECK:
15251 	default:
15252 		/* No retry count to check. Just proceed with the retry */
15253 		break;
15254 	}
15255 
15256 	xp->xb_pktp->pkt_flags |= FLAG_HEAD;
15257 
15258 	/*
15259 	 * If we were given a zero timeout, we must attempt to retry the
15260 	 * command immediately (ie, without a delay).
15261 	 */
15262 	if (retry_delay == 0) {
15263 		/*
15264 		 * Check some limiting conditions to see if we can actually
15265 		 * do the immediate retry.  If we cannot, then we must
15266 		 * fall back to queueing up a delayed retry.
15267 		 */
15268 		if (un->un_ncmds_in_transport >= un->un_throttle) {
15269 			/*
15270 			 * We are at the throttle limit for the target,
15271 			 * fall back to delayed retry.
15272 			 */
15273 			retry_delay = SD_BSY_TIMEOUT;
15274 			statp = kstat_waitq_enter;
15275 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15276 			    "sd_retry_command: immed. retry hit "
15277 			    "throttle!\n");
15278 		} else {
15279 			/*
15280 			 * We're clear to proceed with the immediate retry.
15281 			 * First call the user-provided function (if any)
15282 			 */
15283 			if (user_funcp != NULL) {
15284 				(*user_funcp)(un, bp, user_arg,
15285 				    SD_IMMEDIATE_RETRY_ISSUED);
15286 #ifdef __lock_lint
15287 				sd_print_incomplete_msg(un, bp, user_arg,
15288 				    SD_IMMEDIATE_RETRY_ISSUED);
15289 				sd_print_cmd_incomplete_msg(un, bp, user_arg,
15290 				    SD_IMMEDIATE_RETRY_ISSUED);
15291 				sd_print_sense_failed_msg(un, bp, user_arg,
15292 				    SD_IMMEDIATE_RETRY_ISSUED);
15293 #endif
15294 			}
15295 
15296 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15297 			    "sd_retry_command: issuing immediate retry\n");
15298 
15299 			/*
15300 			 * Call sd_start_cmds() to transport the command to
15301 			 * the target.
15302 			 */
15303 			sd_start_cmds(un, bp);
15304 
15305 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15306 			    "sd_retry_command exit\n");
15307 			return;
15308 		}
15309 	}
15310 
15311 	/*
15312 	 * Set up to retry the command after a delay.
15313 	 * First call the user-provided function (if any)
15314 	 */
15315 	if (user_funcp != NULL) {
15316 		(*user_funcp)(un, bp, user_arg, SD_DELAYED_RETRY_ISSUED);
15317 	}
15318 
15319 	sd_set_retry_bp(un, bp, retry_delay, statp);
15320 
15321 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_retry_command: exit\n");
15322 	return;
15323 
15324 fail_command:
15325 
15326 	if (user_funcp != NULL) {
15327 		(*user_funcp)(un, bp, user_arg, SD_NO_RETRY_ISSUED);
15328 	}
15329 
15330 fail_command_no_log:
15331 
15332 	SD_INFO(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15333 	    "sd_retry_command: returning failed command\n");
15334 
15335 	sd_return_failed_command(un, bp, failure_code);
15336 
15337 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_retry_command: exit\n");
15338 }
15339 
15340 
15341 /*
15342  *    Function: sd_set_retry_bp
15343  *
15344  * Description: Set up the given bp for retry.
15345  *
15346  *   Arguments: un - ptr to associated softstate
15347  *		bp - ptr to buf(9S) for the command
15348  *		retry_delay - time interval before issuing retry (may be 0)
15349  *		statp - optional pointer to kstat function
15350  *
15351  *     Context: May be called under interrupt context
15352  */
15353 
15354 static void
15355 sd_set_retry_bp(struct sd_lun *un, struct buf *bp, clock_t retry_delay,
15356 	void (*statp)(kstat_io_t *))
15357 {
15358 	ASSERT(un != NULL);
15359 	ASSERT(mutex_owned(SD_MUTEX(un)));
15360 	ASSERT(bp != NULL);
15361 
15362 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
15363 	    "sd_set_retry_bp: entry: un:0x%p bp:0x%p\n", un, bp);
15364 
15365 	/*
15366 	 * Indicate that the command is being retried. This will not allow any
15367 	 * other commands on the wait queue to be transported to the target
15368 	 * until this command has been completed (success or failure). The
15369 	 * "retry command" is not transported to the target until the given
15370 	 * time delay expires, unless the user specified a 0 retry_delay.
15371 	 *
15372 	 * Note: the timeout(9F) callback routine is what actually calls
15373 	 * sd_start_cmds() to transport the command, with the exception of a
15374 	 * zero retry_delay. The only current implementor of a zero retry delay
15375 	 * is the case where a START_STOP_UNIT is sent to spin-up a device.
15376 	 */
15377 	if (un->un_retry_bp == NULL) {
15378 		ASSERT(un->un_retry_statp == NULL);
15379 		un->un_retry_bp = bp;
15380 
15381 		/*
15382 		 * If the user has not specified a delay the command should
15383 		 * be queued and no timeout should be scheduled.
15384 		 */
15385 		if (retry_delay == 0) {
15386 			/*
15387 			 * Save the kstat pointer that will be used in the
15388 			 * call to SD_UPDATE_KSTATS() below, so that
15389 			 * sd_start_cmds() can correctly decrement the waitq
15390 			 * count when it is time to transport this command.
15391 			 */
15392 			un->un_retry_statp = statp;
15393 			goto done;
15394 		}
15395 	}
15396 
15397 	if (un->un_retry_bp == bp) {
15398 		/*
15399 		 * Save the kstat pointer that will be used in the call to
15400 		 * SD_UPDATE_KSTATS() below, so that sd_start_cmds() can
15401 		 * correctly decrement the waitq count when it is time to
15402 		 * transport this command.
15403 		 */
15404 		un->un_retry_statp = statp;
15405 
15406 		/*
15407 		 * Schedule a timeout if:
15408 		 *   1) The user has specified a delay.
15409 		 *   2) There is not a START_STOP_UNIT callback pending.
15410 		 *
15411 		 * If no delay has been specified, then it is up to the caller
15412 		 * to ensure that IO processing continues without stalling.
15413 		 * Effectively, this means that the caller will issue the
15414 		 * required call to sd_start_cmds(). The START_STOP_UNIT
15415 		 * callback does this after the START STOP UNIT command has
15416 		 * completed. In either of these cases we should not schedule
15417 		 * a timeout callback here.  Also don't schedule the timeout if
15418 		 * an SD_PATH_DIRECT_PRIORITY command is waiting to restart.
15419 		 */
15420 		if ((retry_delay != 0) && (un->un_startstop_timeid == NULL) &&
15421 		    (un->un_direct_priority_timeid == NULL)) {
15422 			un->un_retry_timeid =
15423 			    timeout(sd_start_retry_command, un, retry_delay);
15424 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15425 			    "sd_set_retry_bp: setting timeout: un: 0x%p"
15426 			    " bp:0x%p un_retry_timeid:0x%p\n",
15427 			    un, bp, un->un_retry_timeid);
15428 		}
15429 	} else {
15430 		/*
15431 		 * We only get in here if there is already another command
15432 		 * waiting to be retried.  In this case, we just put the
15433 		 * given command onto the wait queue, so it can be transported
15434 		 * after the current retry command has completed.
15435 		 *
15436 		 * Also we have to make sure that if the command at the head
15437 		 * of the wait queue is the un_failfast_bp, that we do not
15438 		 * put ahead of it any other commands that are to be retried.
15439 		 */
15440 		if ((un->un_failfast_bp != NULL) &&
15441 		    (un->un_failfast_bp == un->un_waitq_headp)) {
15442 			/*
15443 			 * Enqueue this command AFTER the first command on
15444 			 * the wait queue (which is also un_failfast_bp).
15445 			 */
15446 			bp->av_forw = un->un_waitq_headp->av_forw;
15447 			un->un_waitq_headp->av_forw = bp;
15448 			if (un->un_waitq_headp == un->un_waitq_tailp) {
15449 				un->un_waitq_tailp = bp;
15450 			}
15451 		} else {
15452 			/* Enqueue this command at the head of the waitq. */
15453 			bp->av_forw = un->un_waitq_headp;
15454 			un->un_waitq_headp = bp;
15455 			if (un->un_waitq_tailp == NULL) {
15456 				un->un_waitq_tailp = bp;
15457 			}
15458 		}
15459 
15460 		if (statp == NULL) {
15461 			statp = kstat_waitq_enter;
15462 		}
15463 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15464 		    "sd_set_retry_bp: un:0x%p already delayed retry\n", un);
15465 	}
15466 
15467 done:
15468 	if (statp != NULL) {
15469 		SD_UPDATE_KSTATS(un, statp, bp);
15470 	}
15471 
15472 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15473 	    "sd_set_retry_bp: exit un:0x%p\n", un);
15474 }
15475 
15476 
15477 /*
15478  *    Function: sd_start_retry_command
15479  *
15480  * Description: Start the command that has been waiting on the target's
15481  *		retry queue.  Called from timeout(9F) context after the
15482  *		retry delay interval has expired.
15483  *
15484  *   Arguments: arg - pointer to associated softstate for the device.
15485  *
15486  *     Context: timeout(9F) thread context.  May not sleep.
15487  */
15488 
15489 static void
15490 sd_start_retry_command(void *arg)
15491 {
15492 	struct sd_lun *un = arg;
15493 
15494 	ASSERT(un != NULL);
15495 	ASSERT(!mutex_owned(SD_MUTEX(un)));
15496 
15497 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15498 	    "sd_start_retry_command: entry\n");
15499 
15500 	mutex_enter(SD_MUTEX(un));
15501 
15502 	un->un_retry_timeid = NULL;
15503 
15504 	if (un->un_retry_bp != NULL) {
15505 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15506 		    "sd_start_retry_command: un:0x%p STARTING bp:0x%p\n",
15507 		    un, un->un_retry_bp);
15508 		sd_start_cmds(un, un->un_retry_bp);
15509 	}
15510 
15511 	mutex_exit(SD_MUTEX(un));
15512 
15513 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15514 	    "sd_start_retry_command: exit\n");
15515 }
15516 
15517 
15518 /*
15519  *    Function: sd_start_direct_priority_command
15520  *
15521  * Description: Used to re-start an SD_PATH_DIRECT_PRIORITY command that had
15522  *		received TRAN_BUSY when we called scsi_transport() to send it
15523  *		to the underlying HBA. This function is called from timeout(9F)
15524  *		context after the delay interval has expired.
15525  *
15526  *   Arguments: arg - pointer to associated buf(9S) to be restarted.
15527  *
15528  *     Context: timeout(9F) thread context.  May not sleep.
15529  */
15530 
15531 static void
15532 sd_start_direct_priority_command(void *arg)
15533 {
15534 	struct buf	*priority_bp = arg;
15535 	struct sd_lun	*un;
15536 
15537 	ASSERT(priority_bp != NULL);
15538 	un = SD_GET_UN(priority_bp);
15539 	ASSERT(un != NULL);
15540 	ASSERT(!mutex_owned(SD_MUTEX(un)));
15541 
15542 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15543 	    "sd_start_direct_priority_command: entry\n");
15544 
15545 	mutex_enter(SD_MUTEX(un));
15546 	un->un_direct_priority_timeid = NULL;
15547 	sd_start_cmds(un, priority_bp);
15548 	mutex_exit(SD_MUTEX(un));
15549 
15550 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15551 	    "sd_start_direct_priority_command: exit\n");
15552 }
15553 
15554 
15555 /*
15556  *    Function: sd_send_request_sense_command
15557  *
15558  * Description: Sends a REQUEST SENSE command to the target
15559  *
15560  *     Context: May be called from interrupt context.
15561  */
15562 
15563 static void
15564 sd_send_request_sense_command(struct sd_lun *un, struct buf *bp,
15565 	struct scsi_pkt *pktp)
15566 {
15567 	ASSERT(bp != NULL);
15568 	ASSERT(un != NULL);
15569 	ASSERT(mutex_owned(SD_MUTEX(un)));
15570 
15571 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sd_send_request_sense_command: "
15572 	    "entry: buf:0x%p\n", bp);
15573 
15574 	/*
15575 	 * If we are syncing or dumping, then fail the command to avoid a
15576 	 * recursive callback into scsi_transport(). Also fail the command
15577 	 * if we are suspended (legacy behavior).
15578 	 */
15579 	if (ddi_in_panic() || (un->un_state == SD_STATE_SUSPENDED) ||
15580 	    (un->un_state == SD_STATE_DUMPING)) {
15581 		sd_return_failed_command(un, bp, EIO);
15582 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15583 		    "sd_send_request_sense_command: syncing/dumping, exit\n");
15584 		return;
15585 	}
15586 
15587 	/*
15588 	 * Retry the failed command and don't issue the request sense if:
15589 	 *    1) the sense buf is busy
15590 	 *    2) we have 1 or more outstanding commands on the target
15591 	 *    (the sense data will be cleared or invalidated any way)
15592 	 *
15593 	 * Note: There could be an issue with not checking a retry limit here,
15594 	 * the problem is determining which retry limit to check.
15595 	 */
15596 	if ((un->un_sense_isbusy != 0) || (un->un_ncmds_in_transport > 0)) {
15597 		/* Don't retry if the command is flagged as non-retryable */
15598 		if ((pktp->pkt_flags & FLAG_DIAGNOSE) == 0) {
15599 			sd_retry_command(un, bp, SD_RETRIES_NOCHECK,
15600 			    NULL, NULL, 0, SD_BSY_TIMEOUT, kstat_waitq_enter);
15601 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15602 			    "sd_send_request_sense_command: "
15603 			    "at full throttle, retrying exit\n");
15604 		} else {
15605 			sd_return_failed_command(un, bp, EIO);
15606 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15607 			    "sd_send_request_sense_command: "
15608 			    "at full throttle, non-retryable exit\n");
15609 		}
15610 		return;
15611 	}
15612 
15613 	sd_mark_rqs_busy(un, bp);
15614 	sd_start_cmds(un, un->un_rqs_bp);
15615 
15616 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15617 	    "sd_send_request_sense_command: exit\n");
15618 }
15619 
15620 
15621 /*
15622  *    Function: sd_mark_rqs_busy
15623  *
15624  * Description: Indicate that the request sense bp for this instance is
15625  *		in use.
15626  *
15627  *     Context: May be called under interrupt context
15628  */
15629 
15630 static void
15631 sd_mark_rqs_busy(struct sd_lun *un, struct buf *bp)
15632 {
15633 	struct sd_xbuf	*sense_xp;
15634 
15635 	ASSERT(un != NULL);
15636 	ASSERT(bp != NULL);
15637 	ASSERT(mutex_owned(SD_MUTEX(un)));
15638 	ASSERT(un->un_sense_isbusy == 0);
15639 
15640 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_mark_rqs_busy: entry: "
15641 	    "buf:0x%p xp:0x%p un:0x%p\n", bp, SD_GET_XBUF(bp), un);
15642 
15643 	sense_xp = SD_GET_XBUF(un->un_rqs_bp);
15644 	ASSERT(sense_xp != NULL);
15645 
15646 	SD_INFO(SD_LOG_IO, un,
15647 	    "sd_mark_rqs_busy: entry: sense_xp:0x%p\n", sense_xp);
15648 
15649 	ASSERT(sense_xp->xb_pktp != NULL);
15650 	ASSERT((sense_xp->xb_pktp->pkt_flags & (FLAG_SENSING | FLAG_HEAD))
15651 	    == (FLAG_SENSING | FLAG_HEAD));
15652 
15653 	un->un_sense_isbusy = 1;
15654 	un->un_rqs_bp->b_resid = 0;
15655 	sense_xp->xb_pktp->pkt_resid  = 0;
15656 	sense_xp->xb_pktp->pkt_reason = 0;
15657 
15658 	/* So we can get back the bp at interrupt time! */
15659 	sense_xp->xb_sense_bp = bp;
15660 
15661 	bzero(un->un_rqs_bp->b_un.b_addr, SENSE_LENGTH);
15662 
15663 	/*
15664 	 * Mark this buf as awaiting sense data. (This is already set in
15665 	 * the pkt_flags for the RQS packet.)
15666 	 */
15667 	((SD_GET_XBUF(bp))->xb_pktp)->pkt_flags |= FLAG_SENSING;
15668 
15669 	sense_xp->xb_retry_count	= 0;
15670 	sense_xp->xb_victim_retry_count = 0;
15671 	sense_xp->xb_ua_retry_count	= 0;
15672 	sense_xp->xb_dma_resid  = 0;
15673 
15674 	/* Clean up the fields for auto-request sense */
15675 	sense_xp->xb_sense_status = 0;
15676 	sense_xp->xb_sense_state  = 0;
15677 	sense_xp->xb_sense_resid  = 0;
15678 	bzero(sense_xp->xb_sense_data, sizeof (sense_xp->xb_sense_data));
15679 
15680 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_mark_rqs_busy: exit\n");
15681 }
15682 
15683 
15684 /*
15685  *    Function: sd_mark_rqs_idle
15686  *
15687  * Description: SD_MUTEX must be held continuously through this routine
15688  *		to prevent reuse of the rqs struct before the caller can
15689  *		complete it's processing.
15690  *
15691  * Return Code: Pointer to the RQS buf
15692  *
15693  *     Context: May be called under interrupt context
15694  */
15695 
15696 static struct buf *
15697 sd_mark_rqs_idle(struct sd_lun *un, struct sd_xbuf *sense_xp)
15698 {
15699 	struct buf *bp;
15700 	ASSERT(un != NULL);
15701 	ASSERT(sense_xp != NULL);
15702 	ASSERT(mutex_owned(SD_MUTEX(un)));
15703 	ASSERT(un->un_sense_isbusy != 0);
15704 
15705 	un->un_sense_isbusy = 0;
15706 	bp = sense_xp->xb_sense_bp;
15707 	sense_xp->xb_sense_bp = NULL;
15708 
15709 	/* This pkt is no longer interested in getting sense data */
15710 	((SD_GET_XBUF(bp))->xb_pktp)->pkt_flags &= ~FLAG_SENSING;
15711 
15712 	return (bp);
15713 }
15714 
15715 
15716 
15717 /*
15718  *    Function: sd_alloc_rqs
15719  *
15720  * Description: Set up the unit to receive auto request sense data
15721  *
15722  * Return Code: DDI_SUCCESS or DDI_FAILURE
15723  *
15724  *     Context: Called under attach(9E) context
15725  */
15726 
15727 static int
15728 sd_alloc_rqs(struct scsi_device *devp, struct sd_lun *un)
15729 {
15730 	struct sd_xbuf *xp;
15731 
15732 	ASSERT(un != NULL);
15733 	ASSERT(!mutex_owned(SD_MUTEX(un)));
15734 	ASSERT(un->un_rqs_bp == NULL);
15735 	ASSERT(un->un_rqs_pktp == NULL);
15736 
15737 	/*
15738 	 * First allocate the required buf and scsi_pkt structs, then set up
15739 	 * the CDB in the scsi_pkt for a REQUEST SENSE command.
15740 	 */
15741 	un->un_rqs_bp = scsi_alloc_consistent_buf(&devp->sd_address, NULL,
15742 	    SENSE_LENGTH, B_READ, SLEEP_FUNC, NULL);
15743 	if (un->un_rqs_bp == NULL) {
15744 		return (DDI_FAILURE);
15745 	}
15746 
15747 	un->un_rqs_pktp = scsi_init_pkt(&devp->sd_address, NULL, un->un_rqs_bp,
15748 	    CDB_GROUP0, 1, 0, PKT_CONSISTENT, SLEEP_FUNC, NULL);
15749 
15750 	if (un->un_rqs_pktp == NULL) {
15751 		sd_free_rqs(un);
15752 		return (DDI_FAILURE);
15753 	}
15754 
15755 	/* Set up the CDB in the scsi_pkt for a REQUEST SENSE command. */
15756 	(void) scsi_setup_cdb((union scsi_cdb *)un->un_rqs_pktp->pkt_cdbp,
15757 	    SCMD_REQUEST_SENSE, 0, SENSE_LENGTH, 0);
15758 
15759 	SD_FILL_SCSI1_LUN(un, un->un_rqs_pktp);
15760 
15761 	/* Set up the other needed members in the ARQ scsi_pkt. */
15762 	un->un_rqs_pktp->pkt_comp   = sdintr;
15763 	un->un_rqs_pktp->pkt_time   = sd_io_time;
15764 	un->un_rqs_pktp->pkt_flags |=
15765 	    (FLAG_SENSING | FLAG_HEAD);	/* (1222170) */
15766 
15767 	/*
15768 	 * Allocate  & init the sd_xbuf struct for the RQS command. Do not
15769 	 * provide any intpkt, destroypkt routines as we take care of
15770 	 * scsi_pkt allocation/freeing here and in sd_free_rqs().
15771 	 */
15772 	xp = kmem_alloc(sizeof (struct sd_xbuf), KM_SLEEP);
15773 	sd_xbuf_init(un, un->un_rqs_bp, xp, SD_CHAIN_NULL, NULL);
15774 	xp->xb_pktp = un->un_rqs_pktp;
15775 	SD_INFO(SD_LOG_ATTACH_DETACH, un,
15776 	    "sd_alloc_rqs: un 0x%p, rqs  xp 0x%p,  pkt 0x%p,  buf 0x%p\n",
15777 	    un, xp, un->un_rqs_pktp, un->un_rqs_bp);
15778 
15779 	/*
15780 	 * Save the pointer to the request sense private bp so it can
15781 	 * be retrieved in sdintr.
15782 	 */
15783 	un->un_rqs_pktp->pkt_private = un->un_rqs_bp;
15784 	ASSERT(un->un_rqs_bp->b_private == xp);
15785 
15786 	/*
15787 	 * See if the HBA supports auto-request sense for the specified
15788 	 * target/lun. If it does, then try to enable it (if not already
15789 	 * enabled).
15790 	 *
15791 	 * Note: For some HBAs (ifp & sf), scsi_ifsetcap will always return
15792 	 * failure, while for other HBAs (pln) scsi_ifsetcap will always
15793 	 * return success.  However, in both of these cases ARQ is always
15794 	 * enabled and scsi_ifgetcap will always return true. The best approach
15795 	 * is to issue the scsi_ifgetcap() first, then try the scsi_ifsetcap().
15796 	 *
15797 	 * The 3rd case is the HBA (adp) always return enabled on
15798 	 * scsi_ifgetgetcap even when it's not enable, the best approach
15799 	 * is issue a scsi_ifsetcap then a scsi_ifgetcap
15800 	 * Note: this case is to circumvent the Adaptec bug. (x86 only)
15801 	 */
15802 
15803 	if (un->un_f_is_fibre == TRUE) {
15804 		un->un_f_arq_enabled = TRUE;
15805 	} else {
15806 #if defined(__i386) || defined(__amd64)
15807 		/*
15808 		 * Circumvent the Adaptec bug, remove this code when
15809 		 * the bug is fixed
15810 		 */
15811 		(void) scsi_ifsetcap(SD_ADDRESS(un), "auto-rqsense", 1, 1);
15812 #endif
15813 		switch (scsi_ifgetcap(SD_ADDRESS(un), "auto-rqsense", 1)) {
15814 		case 0:
15815 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
15816 				"sd_alloc_rqs: HBA supports ARQ\n");
15817 			/*
15818 			 * ARQ is supported by this HBA but currently is not
15819 			 * enabled. Attempt to enable it and if successful then
15820 			 * mark this instance as ARQ enabled.
15821 			 */
15822 			if (scsi_ifsetcap(SD_ADDRESS(un), "auto-rqsense", 1, 1)
15823 				== 1) {
15824 				/* Successfully enabled ARQ in the HBA */
15825 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
15826 					"sd_alloc_rqs: ARQ enabled\n");
15827 				un->un_f_arq_enabled = TRUE;
15828 			} else {
15829 				/* Could not enable ARQ in the HBA */
15830 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
15831 				"sd_alloc_rqs: failed ARQ enable\n");
15832 				un->un_f_arq_enabled = FALSE;
15833 			}
15834 			break;
15835 		case 1:
15836 			/*
15837 			 * ARQ is supported by this HBA and is already enabled.
15838 			 * Just mark ARQ as enabled for this instance.
15839 			 */
15840 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
15841 				"sd_alloc_rqs: ARQ already enabled\n");
15842 			un->un_f_arq_enabled = TRUE;
15843 			break;
15844 		default:
15845 			/*
15846 			 * ARQ is not supported by this HBA; disable it for this
15847 			 * instance.
15848 			 */
15849 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
15850 				"sd_alloc_rqs: HBA does not support ARQ\n");
15851 			un->un_f_arq_enabled = FALSE;
15852 			break;
15853 		}
15854 	}
15855 
15856 	return (DDI_SUCCESS);
15857 }
15858 
15859 
15860 /*
15861  *    Function: sd_free_rqs
15862  *
15863  * Description: Cleanup for the pre-instance RQS command.
15864  *
15865  *     Context: Kernel thread context
15866  */
15867 
15868 static void
15869 sd_free_rqs(struct sd_lun *un)
15870 {
15871 	ASSERT(un != NULL);
15872 
15873 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_free_rqs: entry\n");
15874 
15875 	/*
15876 	 * If consistent memory is bound to a scsi_pkt, the pkt
15877 	 * has to be destroyed *before* freeing the consistent memory.
15878 	 * Don't change the sequence of this operations.
15879 	 * scsi_destroy_pkt() might access memory, which isn't allowed,
15880 	 * after it was freed in scsi_free_consistent_buf().
15881 	 */
15882 	if (un->un_rqs_pktp != NULL) {
15883 		scsi_destroy_pkt(un->un_rqs_pktp);
15884 		un->un_rqs_pktp = NULL;
15885 	}
15886 
15887 	if (un->un_rqs_bp != NULL) {
15888 		kmem_free(SD_GET_XBUF(un->un_rqs_bp), sizeof (struct sd_xbuf));
15889 		scsi_free_consistent_buf(un->un_rqs_bp);
15890 		un->un_rqs_bp = NULL;
15891 	}
15892 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_free_rqs: exit\n");
15893 }
15894 
15895 
15896 
15897 /*
15898  *    Function: sd_reduce_throttle
15899  *
15900  * Description: Reduces the maximun # of outstanding commands on a
15901  *		target to the current number of outstanding commands.
15902  *		Queues a tiemout(9F) callback to restore the limit
15903  *		after a specified interval has elapsed.
15904  *		Typically used when we get a TRAN_BUSY return code
15905  *		back from scsi_transport().
15906  *
15907  *   Arguments: un - ptr to the sd_lun softstate struct
15908  *		throttle_type: SD_THROTTLE_TRAN_BUSY or SD_THROTTLE_QFULL
15909  *
15910  *     Context: May be called from interrupt context
15911  */
15912 
15913 static void
15914 sd_reduce_throttle(struct sd_lun *un, int throttle_type)
15915 {
15916 	ASSERT(un != NULL);
15917 	ASSERT(mutex_owned(SD_MUTEX(un)));
15918 	ASSERT(un->un_ncmds_in_transport >= 0);
15919 
15920 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_reduce_throttle: "
15921 	    "entry: un:0x%p un_throttle:%d un_ncmds_in_transport:%d\n",
15922 	    un, un->un_throttle, un->un_ncmds_in_transport);
15923 
15924 	if (un->un_throttle > 1) {
15925 		if (un->un_f_use_adaptive_throttle == TRUE) {
15926 			switch (throttle_type) {
15927 			case SD_THROTTLE_TRAN_BUSY:
15928 				if (un->un_busy_throttle == 0) {
15929 					un->un_busy_throttle = un->un_throttle;
15930 				}
15931 				break;
15932 			case SD_THROTTLE_QFULL:
15933 				un->un_busy_throttle = 0;
15934 				break;
15935 			default:
15936 				ASSERT(FALSE);
15937 			}
15938 
15939 			if (un->un_ncmds_in_transport > 0) {
15940 			    un->un_throttle = un->un_ncmds_in_transport;
15941 			}
15942 
15943 		} else {
15944 			if (un->un_ncmds_in_transport == 0) {
15945 				un->un_throttle = 1;
15946 			} else {
15947 				un->un_throttle = un->un_ncmds_in_transport;
15948 			}
15949 		}
15950 	}
15951 
15952 	/* Reschedule the timeout if none is currently active */
15953 	if (un->un_reset_throttle_timeid == NULL) {
15954 		un->un_reset_throttle_timeid = timeout(sd_restore_throttle,
15955 		    un, SD_THROTTLE_RESET_INTERVAL);
15956 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15957 		    "sd_reduce_throttle: timeout scheduled!\n");
15958 	}
15959 
15960 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_reduce_throttle: "
15961 	    "exit: un:0x%p un_throttle:%d\n", un, un->un_throttle);
15962 }
15963 
15964 
15965 
15966 /*
15967  *    Function: sd_restore_throttle
15968  *
15969  * Description: Callback function for timeout(9F).  Resets the current
15970  *		value of un->un_throttle to its default.
15971  *
15972  *   Arguments: arg - pointer to associated softstate for the device.
15973  *
15974  *     Context: May be called from interrupt context
15975  */
15976 
15977 static void
15978 sd_restore_throttle(void *arg)
15979 {
15980 	struct sd_lun	*un = arg;
15981 
15982 	ASSERT(un != NULL);
15983 	ASSERT(!mutex_owned(SD_MUTEX(un)));
15984 
15985 	mutex_enter(SD_MUTEX(un));
15986 
15987 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sd_restore_throttle: "
15988 	    "entry: un:0x%p un_throttle:%d\n", un, un->un_throttle);
15989 
15990 	un->un_reset_throttle_timeid = NULL;
15991 
15992 	if (un->un_f_use_adaptive_throttle == TRUE) {
15993 		/*
15994 		 * If un_busy_throttle is nonzero, then it contains the
15995 		 * value that un_throttle was when we got a TRAN_BUSY back
15996 		 * from scsi_transport(). We want to revert back to this
15997 		 * value.
15998 		 *
15999 		 * In the QFULL case, the throttle limit will incrementally
16000 		 * increase until it reaches max throttle.
16001 		 */
16002 		if (un->un_busy_throttle > 0) {
16003 			un->un_throttle = un->un_busy_throttle;
16004 			un->un_busy_throttle = 0;
16005 		} else {
16006 			/*
16007 			 * increase throttle by 10% open gate slowly, schedule
16008 			 * another restore if saved throttle has not been
16009 			 * reached
16010 			 */
16011 			short throttle;
16012 			if (sd_qfull_throttle_enable) {
16013 				throttle = un->un_throttle +
16014 				    max((un->un_throttle / 10), 1);
16015 				un->un_throttle =
16016 				    (throttle < un->un_saved_throttle) ?
16017 				    throttle : un->un_saved_throttle;
16018 				if (un->un_throttle < un->un_saved_throttle) {
16019 				    un->un_reset_throttle_timeid =
16020 					timeout(sd_restore_throttle,
16021 					un, SD_QFULL_THROTTLE_RESET_INTERVAL);
16022 				}
16023 			}
16024 		}
16025 
16026 		/*
16027 		 * If un_throttle has fallen below the low-water mark, we
16028 		 * restore the maximum value here (and allow it to ratchet
16029 		 * down again if necessary).
16030 		 */
16031 		if (un->un_throttle < un->un_min_throttle) {
16032 			un->un_throttle = un->un_saved_throttle;
16033 		}
16034 	} else {
16035 		SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sd_restore_throttle: "
16036 		    "restoring limit from 0x%x to 0x%x\n",
16037 		    un->un_throttle, un->un_saved_throttle);
16038 		un->un_throttle = un->un_saved_throttle;
16039 	}
16040 
16041 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
16042 	    "sd_restore_throttle: calling sd_start_cmds!\n");
16043 
16044 	sd_start_cmds(un, NULL);
16045 
16046 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
16047 	    "sd_restore_throttle: exit: un:0x%p un_throttle:%d\n",
16048 	    un, un->un_throttle);
16049 
16050 	mutex_exit(SD_MUTEX(un));
16051 
16052 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sd_restore_throttle: exit\n");
16053 }
16054 
16055 /*
16056  *    Function: sdrunout
16057  *
16058  * Description: Callback routine for scsi_init_pkt when a resource allocation
16059  *		fails.
16060  *
16061  *   Arguments: arg - a pointer to the sd_lun unit struct for the particular
16062  *		soft state instance.
16063  *
16064  * Return Code: The scsi_init_pkt routine allows for the callback function to
16065  *		return a 0 indicating the callback should be rescheduled or a 1
16066  *		indicating not to reschedule. This routine always returns 1
16067  *		because the driver always provides a callback function to
16068  *		scsi_init_pkt. This results in a callback always being scheduled
16069  *		(via the scsi_init_pkt callback implementation) if a resource
16070  *		failure occurs.
16071  *
16072  *     Context: This callback function may not block or call routines that block
16073  *
16074  *        Note: Using the scsi_init_pkt callback facility can result in an I/O
16075  *		request persisting at the head of the list which cannot be
16076  *		satisfied even after multiple retries. In the future the driver
16077  *		may implement some time of maximum runout count before failing
16078  *		an I/O.
16079  */
16080 
16081 static int
16082 sdrunout(caddr_t arg)
16083 {
16084 	struct sd_lun	*un = (struct sd_lun *)arg;
16085 
16086 	ASSERT(un != NULL);
16087 	ASSERT(!mutex_owned(SD_MUTEX(un)));
16088 
16089 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sdrunout: entry\n");
16090 
16091 	mutex_enter(SD_MUTEX(un));
16092 	sd_start_cmds(un, NULL);
16093 	mutex_exit(SD_MUTEX(un));
16094 	/*
16095 	 * This callback routine always returns 1 (i.e. do not reschedule)
16096 	 * because we always specify sdrunout as the callback handler for
16097 	 * scsi_init_pkt inside the call to sd_start_cmds.
16098 	 */
16099 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sdrunout: exit\n");
16100 	return (1);
16101 }
16102 
16103 
16104 /*
16105  *    Function: sdintr
16106  *
16107  * Description: Completion callback routine for scsi_pkt(9S) structs
16108  *		sent to the HBA driver via scsi_transport(9F).
16109  *
16110  *     Context: Interrupt context
16111  */
16112 
16113 static void
16114 sdintr(struct scsi_pkt *pktp)
16115 {
16116 	struct buf	*bp;
16117 	struct sd_xbuf	*xp;
16118 	struct sd_lun	*un;
16119 
16120 	ASSERT(pktp != NULL);
16121 	bp = (struct buf *)pktp->pkt_private;
16122 	ASSERT(bp != NULL);
16123 	xp = SD_GET_XBUF(bp);
16124 	ASSERT(xp != NULL);
16125 	ASSERT(xp->xb_pktp != NULL);
16126 	un = SD_GET_UN(bp);
16127 	ASSERT(un != NULL);
16128 	ASSERT(!mutex_owned(SD_MUTEX(un)));
16129 
16130 #ifdef SD_FAULT_INJECTION
16131 
16132 	SD_INFO(SD_LOG_IOERR, un, "sdintr: sdintr calling Fault injection\n");
16133 	/* SD FaultInjection */
16134 	sd_faultinjection(pktp);
16135 
16136 #endif /* SD_FAULT_INJECTION */
16137 
16138 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sdintr: entry: buf:0x%p,"
16139 	    " xp:0x%p, un:0x%p\n", bp, xp, un);
16140 
16141 	mutex_enter(SD_MUTEX(un));
16142 
16143 	/* Reduce the count of the #commands currently in transport */
16144 	un->un_ncmds_in_transport--;
16145 	ASSERT(un->un_ncmds_in_transport >= 0);
16146 
16147 	/* Increment counter to indicate that the callback routine is active */
16148 	un->un_in_callback++;
16149 
16150 	SD_UPDATE_KSTATS(un, kstat_runq_exit, bp);
16151 
16152 #ifdef	SDDEBUG
16153 	if (bp == un->un_retry_bp) {
16154 		SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sdintr: "
16155 		    "un:0x%p: GOT retry_bp:0x%p un_ncmds_in_transport:%d\n",
16156 		    un, un->un_retry_bp, un->un_ncmds_in_transport);
16157 	}
16158 #endif
16159 
16160 	/*
16161 	 * If pkt_reason is CMD_DEV_GONE, just fail the command
16162 	 */
16163 	if (pktp->pkt_reason == CMD_DEV_GONE) {
16164 		scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
16165 			    "Device is gone\n");
16166 		sd_return_failed_command(un, bp, EIO);
16167 		goto exit;
16168 	}
16169 
16170 	/*
16171 	 * First see if the pkt has auto-request sense data with it....
16172 	 * Look at the packet state first so we don't take a performance
16173 	 * hit looking at the arq enabled flag unless absolutely necessary.
16174 	 */
16175 	if ((pktp->pkt_state & STATE_ARQ_DONE) &&
16176 	    (un->un_f_arq_enabled == TRUE)) {
16177 		/*
16178 		 * The HBA did an auto request sense for this command so check
16179 		 * for FLAG_DIAGNOSE. If set this indicates a uscsi or internal
16180 		 * driver command that should not be retried.
16181 		 */
16182 		if ((pktp->pkt_flags & FLAG_DIAGNOSE) != 0) {
16183 			/*
16184 			 * Save the relevant sense info into the xp for the
16185 			 * original cmd.
16186 			 */
16187 			struct scsi_arq_status *asp;
16188 			asp = (struct scsi_arq_status *)(pktp->pkt_scbp);
16189 			xp->xb_sense_status =
16190 			    *((uchar_t *)(&(asp->sts_rqpkt_status)));
16191 			xp->xb_sense_state  = asp->sts_rqpkt_state;
16192 			xp->xb_sense_resid  = asp->sts_rqpkt_resid;
16193 			bcopy(&asp->sts_sensedata, xp->xb_sense_data,
16194 			    min(sizeof (struct scsi_extended_sense),
16195 			    SENSE_LENGTH));
16196 
16197 			/* fail the command */
16198 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16199 			    "sdintr: arq done and FLAG_DIAGNOSE set\n");
16200 			sd_return_failed_command(un, bp, EIO);
16201 			goto exit;
16202 		}
16203 
16204 #if (defined(__i386) || defined(__amd64))	/* DMAFREE for x86 only */
16205 		/*
16206 		 * We want to either retry or fail this command, so free
16207 		 * the DMA resources here.  If we retry the command then
16208 		 * the DMA resources will be reallocated in sd_start_cmds().
16209 		 * Note that when PKT_DMA_PARTIAL is used, this reallocation
16210 		 * causes the *entire* transfer to start over again from the
16211 		 * beginning of the request, even for PARTIAL chunks that
16212 		 * have already transferred successfully.
16213 		 */
16214 		if ((un->un_f_is_fibre == TRUE) &&
16215 		    ((xp->xb_pkt_flags & SD_XB_USCSICMD) == 0) &&
16216 		    ((pktp->pkt_flags & FLAG_SENSING) == 0))  {
16217 			scsi_dmafree(pktp);
16218 			xp->xb_pkt_flags |= SD_XB_DMA_FREED;
16219 		}
16220 #endif
16221 
16222 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16223 		    "sdintr: arq done, sd_handle_auto_request_sense\n");
16224 
16225 		sd_handle_auto_request_sense(un, bp, xp, pktp);
16226 		goto exit;
16227 	}
16228 
16229 	/* Next see if this is the REQUEST SENSE pkt for the instance */
16230 	if (pktp->pkt_flags & FLAG_SENSING)  {
16231 		/* This pktp is from the unit's REQUEST_SENSE command */
16232 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16233 		    "sdintr: sd_handle_request_sense\n");
16234 		sd_handle_request_sense(un, bp, xp, pktp);
16235 		goto exit;
16236 	}
16237 
16238 	/*
16239 	 * Check to see if the command successfully completed as requested;
16240 	 * this is the most common case (and also the hot performance path).
16241 	 *
16242 	 * Requirements for successful completion are:
16243 	 * pkt_reason is CMD_CMPLT and packet status is status good.
16244 	 * In addition:
16245 	 * - A residual of zero indicates successful completion no matter what
16246 	 *   the command is.
16247 	 * - If the residual is not zero and the command is not a read or
16248 	 *   write, then it's still defined as successful completion. In other
16249 	 *   words, if the command is a read or write the residual must be
16250 	 *   zero for successful completion.
16251 	 * - If the residual is not zero and the command is a read or
16252 	 *   write, and it's a USCSICMD, then it's still defined as
16253 	 *   successful completion.
16254 	 */
16255 	if ((pktp->pkt_reason == CMD_CMPLT) &&
16256 	    (SD_GET_PKT_STATUS(pktp) == STATUS_GOOD)) {
16257 
16258 		/*
16259 		 * Since this command is returned with a good status, we
16260 		 * can reset the count for Sonoma failover.
16261 		 */
16262 		un->un_sonoma_failure_count = 0;
16263 
16264 		/*
16265 		 * Return all USCSI commands on good status
16266 		 */
16267 		if (pktp->pkt_resid == 0) {
16268 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16269 			    "sdintr: returning command for resid == 0\n");
16270 		} else if (((SD_GET_PKT_OPCODE(pktp) & 0x1F) != SCMD_READ) &&
16271 		    ((SD_GET_PKT_OPCODE(pktp) & 0x1F) != SCMD_WRITE)) {
16272 			SD_UPDATE_B_RESID(bp, pktp);
16273 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16274 			    "sdintr: returning command for resid != 0\n");
16275 		} else if (xp->xb_pkt_flags & SD_XB_USCSICMD) {
16276 			SD_UPDATE_B_RESID(bp, pktp);
16277 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16278 				"sdintr: returning uscsi command\n");
16279 		} else {
16280 			goto not_successful;
16281 		}
16282 		sd_return_command(un, bp);
16283 
16284 		/*
16285 		 * Decrement counter to indicate that the callback routine
16286 		 * is done.
16287 		 */
16288 		un->un_in_callback--;
16289 		ASSERT(un->un_in_callback >= 0);
16290 		mutex_exit(SD_MUTEX(un));
16291 
16292 		return;
16293 	}
16294 
16295 not_successful:
16296 
16297 #if (defined(__i386) || defined(__amd64))	/* DMAFREE for x86 only */
16298 	/*
16299 	 * The following is based upon knowledge of the underlying transport
16300 	 * and its use of DMA resources.  This code should be removed when
16301 	 * PKT_DMA_PARTIAL support is taken out of the disk driver in favor
16302 	 * of the new PKT_CMD_BREAKUP protocol. See also sd_initpkt_for_buf()
16303 	 * and sd_start_cmds().
16304 	 *
16305 	 * Free any DMA resources associated with this command if there
16306 	 * is a chance it could be retried or enqueued for later retry.
16307 	 * If we keep the DMA binding then mpxio cannot reissue the
16308 	 * command on another path whenever a path failure occurs.
16309 	 *
16310 	 * Note that when PKT_DMA_PARTIAL is used, free/reallocation
16311 	 * causes the *entire* transfer to start over again from the
16312 	 * beginning of the request, even for PARTIAL chunks that
16313 	 * have already transferred successfully.
16314 	 *
16315 	 * This is only done for non-uscsi commands (and also skipped for the
16316 	 * driver's internal RQS command). Also just do this for Fibre Channel
16317 	 * devices as these are the only ones that support mpxio.
16318 	 */
16319 	if ((un->un_f_is_fibre == TRUE) &&
16320 	    ((xp->xb_pkt_flags & SD_XB_USCSICMD) == 0) &&
16321 	    ((pktp->pkt_flags & FLAG_SENSING) == 0))  {
16322 		scsi_dmafree(pktp);
16323 		xp->xb_pkt_flags |= SD_XB_DMA_FREED;
16324 	}
16325 #endif
16326 
16327 	/*
16328 	 * The command did not successfully complete as requested so check
16329 	 * for FLAG_DIAGNOSE. If set this indicates a uscsi or internal
16330 	 * driver command that should not be retried so just return. If
16331 	 * FLAG_DIAGNOSE is not set the error will be processed below.
16332 	 */
16333 	if ((pktp->pkt_flags & FLAG_DIAGNOSE) != 0) {
16334 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16335 		    "sdintr: FLAG_DIAGNOSE: sd_return_failed_command\n");
16336 		/*
16337 		 * Issue a request sense if a check condition caused the error
16338 		 * (we handle the auto request sense case above), otherwise
16339 		 * just fail the command.
16340 		 */
16341 		if ((pktp->pkt_reason == CMD_CMPLT) &&
16342 		    (SD_GET_PKT_STATUS(pktp) == STATUS_CHECK)) {
16343 			sd_send_request_sense_command(un, bp, pktp);
16344 		} else {
16345 			sd_return_failed_command(un, bp, EIO);
16346 		}
16347 		goto exit;
16348 	}
16349 
16350 	/*
16351 	 * The command did not successfully complete as requested so process
16352 	 * the error, retry, and/or attempt recovery.
16353 	 */
16354 	switch (pktp->pkt_reason) {
16355 	case CMD_CMPLT:
16356 		switch (SD_GET_PKT_STATUS(pktp)) {
16357 		case STATUS_GOOD:
16358 			/*
16359 			 * The command completed successfully with a non-zero
16360 			 * residual
16361 			 */
16362 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16363 			    "sdintr: STATUS_GOOD \n");
16364 			sd_pkt_status_good(un, bp, xp, pktp);
16365 			break;
16366 
16367 		case STATUS_CHECK:
16368 		case STATUS_TERMINATED:
16369 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16370 			    "sdintr: STATUS_TERMINATED | STATUS_CHECK\n");
16371 			sd_pkt_status_check_condition(un, bp, xp, pktp);
16372 			break;
16373 
16374 		case STATUS_BUSY:
16375 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16376 			    "sdintr: STATUS_BUSY\n");
16377 			sd_pkt_status_busy(un, bp, xp, pktp);
16378 			break;
16379 
16380 		case STATUS_RESERVATION_CONFLICT:
16381 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16382 			    "sdintr: STATUS_RESERVATION_CONFLICT\n");
16383 			sd_pkt_status_reservation_conflict(un, bp, xp, pktp);
16384 			break;
16385 
16386 		case STATUS_QFULL:
16387 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16388 			    "sdintr: STATUS_QFULL\n");
16389 			sd_pkt_status_qfull(un, bp, xp, pktp);
16390 			break;
16391 
16392 		case STATUS_MET:
16393 		case STATUS_INTERMEDIATE:
16394 		case STATUS_SCSI2:
16395 		case STATUS_INTERMEDIATE_MET:
16396 		case STATUS_ACA_ACTIVE:
16397 			scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
16398 			    "Unexpected SCSI status received: 0x%x\n",
16399 			    SD_GET_PKT_STATUS(pktp));
16400 			sd_return_failed_command(un, bp, EIO);
16401 			break;
16402 
16403 		default:
16404 			scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
16405 			    "Invalid SCSI status received: 0x%x\n",
16406 			    SD_GET_PKT_STATUS(pktp));
16407 			sd_return_failed_command(un, bp, EIO);
16408 			break;
16409 
16410 		}
16411 		break;
16412 
16413 	case CMD_INCOMPLETE:
16414 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16415 		    "sdintr:  CMD_INCOMPLETE\n");
16416 		sd_pkt_reason_cmd_incomplete(un, bp, xp, pktp);
16417 		break;
16418 	case CMD_TRAN_ERR:
16419 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16420 		    "sdintr: CMD_TRAN_ERR\n");
16421 		sd_pkt_reason_cmd_tran_err(un, bp, xp, pktp);
16422 		break;
16423 	case CMD_RESET:
16424 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16425 		    "sdintr: CMD_RESET \n");
16426 		sd_pkt_reason_cmd_reset(un, bp, xp, pktp);
16427 		break;
16428 	case CMD_ABORTED:
16429 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16430 		    "sdintr: CMD_ABORTED \n");
16431 		sd_pkt_reason_cmd_aborted(un, bp, xp, pktp);
16432 		break;
16433 	case CMD_TIMEOUT:
16434 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16435 		    "sdintr: CMD_TIMEOUT\n");
16436 		sd_pkt_reason_cmd_timeout(un, bp, xp, pktp);
16437 		break;
16438 	case CMD_UNX_BUS_FREE:
16439 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16440 		    "sdintr: CMD_UNX_BUS_FREE \n");
16441 		sd_pkt_reason_cmd_unx_bus_free(un, bp, xp, pktp);
16442 		break;
16443 	case CMD_TAG_REJECT:
16444 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16445 		    "sdintr: CMD_TAG_REJECT\n");
16446 		sd_pkt_reason_cmd_tag_reject(un, bp, xp, pktp);
16447 		break;
16448 	default:
16449 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16450 		    "sdintr: default\n");
16451 		sd_pkt_reason_default(un, bp, xp, pktp);
16452 		break;
16453 	}
16454 
16455 exit:
16456 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sdintr: exit\n");
16457 
16458 	/* Decrement counter to indicate that the callback routine is done. */
16459 	un->un_in_callback--;
16460 	ASSERT(un->un_in_callback >= 0);
16461 
16462 	/*
16463 	 * At this point, the pkt has been dispatched, ie, it is either
16464 	 * being re-tried or has been returned to its caller and should
16465 	 * not be referenced.
16466 	 */
16467 
16468 	mutex_exit(SD_MUTEX(un));
16469 }
16470 
16471 
16472 /*
16473  *    Function: sd_print_incomplete_msg
16474  *
16475  * Description: Prints the error message for a CMD_INCOMPLETE error.
16476  *
16477  *   Arguments: un - ptr to associated softstate for the device.
16478  *		bp - ptr to the buf(9S) for the command.
16479  *		arg - message string ptr
16480  *		code - SD_DELAYED_RETRY_ISSUED, SD_IMMEDIATE_RETRY_ISSUED,
16481  *			or SD_NO_RETRY_ISSUED.
16482  *
16483  *     Context: May be called under interrupt context
16484  */
16485 
16486 static void
16487 sd_print_incomplete_msg(struct sd_lun *un, struct buf *bp, void *arg, int code)
16488 {
16489 	struct scsi_pkt	*pktp;
16490 	char	*msgp;
16491 	char	*cmdp = arg;
16492 
16493 	ASSERT(un != NULL);
16494 	ASSERT(mutex_owned(SD_MUTEX(un)));
16495 	ASSERT(bp != NULL);
16496 	ASSERT(arg != NULL);
16497 	pktp = SD_GET_PKTP(bp);
16498 	ASSERT(pktp != NULL);
16499 
16500 	switch (code) {
16501 	case SD_DELAYED_RETRY_ISSUED:
16502 	case SD_IMMEDIATE_RETRY_ISSUED:
16503 		msgp = "retrying";
16504 		break;
16505 	case SD_NO_RETRY_ISSUED:
16506 	default:
16507 		msgp = "giving up";
16508 		break;
16509 	}
16510 
16511 	if ((pktp->pkt_flags & FLAG_SILENT) == 0) {
16512 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
16513 		    "incomplete %s- %s\n", cmdp, msgp);
16514 	}
16515 }
16516 
16517 
16518 
16519 /*
16520  *    Function: sd_pkt_status_good
16521  *
16522  * Description: Processing for a STATUS_GOOD code in pkt_status.
16523  *
16524  *     Context: May be called under interrupt context
16525  */
16526 
16527 static void
16528 sd_pkt_status_good(struct sd_lun *un, struct buf *bp,
16529 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
16530 {
16531 	char	*cmdp;
16532 
16533 	ASSERT(un != NULL);
16534 	ASSERT(mutex_owned(SD_MUTEX(un)));
16535 	ASSERT(bp != NULL);
16536 	ASSERT(xp != NULL);
16537 	ASSERT(pktp != NULL);
16538 	ASSERT(pktp->pkt_reason == CMD_CMPLT);
16539 	ASSERT(SD_GET_PKT_STATUS(pktp) == STATUS_GOOD);
16540 	ASSERT(pktp->pkt_resid != 0);
16541 
16542 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_pkt_status_good: entry\n");
16543 
16544 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
16545 	switch (SD_GET_PKT_OPCODE(pktp) & 0x1F) {
16546 	case SCMD_READ:
16547 		cmdp = "read";
16548 		break;
16549 	case SCMD_WRITE:
16550 		cmdp = "write";
16551 		break;
16552 	default:
16553 		SD_UPDATE_B_RESID(bp, pktp);
16554 		sd_return_command(un, bp);
16555 		SD_TRACE(SD_LOG_IO_CORE, un, "sd_pkt_status_good: exit\n");
16556 		return;
16557 	}
16558 
16559 	/*
16560 	 * See if we can retry the read/write, preferrably immediately.
16561 	 * If retries are exhaused, then sd_retry_command() will update
16562 	 * the b_resid count.
16563 	 */
16564 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_incomplete_msg,
16565 	    cmdp, EIO, (clock_t)0, NULL);
16566 
16567 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_pkt_status_good: exit\n");
16568 }
16569 
16570 
16571 
16572 
16573 
16574 /*
16575  *    Function: sd_handle_request_sense
16576  *
16577  * Description: Processing for non-auto Request Sense command.
16578  *
16579  *   Arguments: un - ptr to associated softstate
16580  *		sense_bp - ptr to buf(9S) for the RQS command
16581  *		sense_xp - ptr to the sd_xbuf for the RQS command
16582  *		sense_pktp - ptr to the scsi_pkt(9S) for the RQS command
16583  *
16584  *     Context: May be called under interrupt context
16585  */
16586 
16587 static void
16588 sd_handle_request_sense(struct sd_lun *un, struct buf *sense_bp,
16589 	struct sd_xbuf *sense_xp, struct scsi_pkt *sense_pktp)
16590 {
16591 	struct buf	*cmd_bp;	/* buf for the original command */
16592 	struct sd_xbuf	*cmd_xp;	/* sd_xbuf for the original command */
16593 	struct scsi_pkt *cmd_pktp;	/* pkt for the original command */
16594 
16595 	ASSERT(un != NULL);
16596 	ASSERT(mutex_owned(SD_MUTEX(un)));
16597 	ASSERT(sense_bp != NULL);
16598 	ASSERT(sense_xp != NULL);
16599 	ASSERT(sense_pktp != NULL);
16600 
16601 	/*
16602 	 * Note the sense_bp, sense_xp, and sense_pktp here are for the
16603 	 * RQS command and not the original command.
16604 	 */
16605 	ASSERT(sense_pktp == un->un_rqs_pktp);
16606 	ASSERT(sense_bp   == un->un_rqs_bp);
16607 	ASSERT((sense_pktp->pkt_flags & (FLAG_SENSING | FLAG_HEAD)) ==
16608 	    (FLAG_SENSING | FLAG_HEAD));
16609 	ASSERT((((SD_GET_XBUF(sense_xp->xb_sense_bp))->xb_pktp->pkt_flags) &
16610 	    FLAG_SENSING) == FLAG_SENSING);
16611 
16612 	/* These are the bp, xp, and pktp for the original command */
16613 	cmd_bp = sense_xp->xb_sense_bp;
16614 	cmd_xp = SD_GET_XBUF(cmd_bp);
16615 	cmd_pktp = SD_GET_PKTP(cmd_bp);
16616 
16617 	if (sense_pktp->pkt_reason != CMD_CMPLT) {
16618 		/*
16619 		 * The REQUEST SENSE command failed.  Release the REQUEST
16620 		 * SENSE command for re-use, get back the bp for the original
16621 		 * command, and attempt to re-try the original command if
16622 		 * FLAG_DIAGNOSE is not set in the original packet.
16623 		 */
16624 		SD_UPDATE_ERRSTATS(un, sd_harderrs);
16625 		if ((cmd_pktp->pkt_flags & FLAG_DIAGNOSE) == 0) {
16626 			cmd_bp = sd_mark_rqs_idle(un, sense_xp);
16627 			sd_retry_command(un, cmd_bp, SD_RETRIES_STANDARD,
16628 			    NULL, NULL, EIO, (clock_t)0, NULL);
16629 			return;
16630 		}
16631 	}
16632 
16633 	/*
16634 	 * Save the relevant sense info into the xp for the original cmd.
16635 	 *
16636 	 * Note: if the request sense failed the state info will be zero
16637 	 * as set in sd_mark_rqs_busy()
16638 	 */
16639 	cmd_xp->xb_sense_status = *(sense_pktp->pkt_scbp);
16640 	cmd_xp->xb_sense_state  = sense_pktp->pkt_state;
16641 	cmd_xp->xb_sense_resid  = sense_pktp->pkt_resid;
16642 	bcopy(sense_bp->b_un.b_addr, cmd_xp->xb_sense_data, SENSE_LENGTH);
16643 
16644 	/*
16645 	 *  Free up the RQS command....
16646 	 *  NOTE:
16647 	 *	Must do this BEFORE calling sd_validate_sense_data!
16648 	 *	sd_validate_sense_data may return the original command in
16649 	 *	which case the pkt will be freed and the flags can no
16650 	 *	longer be touched.
16651 	 *	SD_MUTEX is held through this process until the command
16652 	 *	is dispatched based upon the sense data, so there are
16653 	 *	no race conditions.
16654 	 */
16655 	(void) sd_mark_rqs_idle(un, sense_xp);
16656 
16657 	/*
16658 	 * For a retryable command see if we have valid sense data, if so then
16659 	 * turn it over to sd_decode_sense() to figure out the right course of
16660 	 * action. Just fail a non-retryable command.
16661 	 */
16662 	if ((cmd_pktp->pkt_flags & FLAG_DIAGNOSE) == 0) {
16663 		if (sd_validate_sense_data(un, cmd_bp, cmd_xp) ==
16664 		    SD_SENSE_DATA_IS_VALID) {
16665 			sd_decode_sense(un, cmd_bp, cmd_xp, cmd_pktp);
16666 		}
16667 	} else {
16668 		SD_DUMP_MEMORY(un, SD_LOG_IO_CORE, "Failed CDB",
16669 		    (uchar_t *)cmd_pktp->pkt_cdbp, CDB_SIZE, SD_LOG_HEX);
16670 		SD_DUMP_MEMORY(un, SD_LOG_IO_CORE, "Sense Data",
16671 		    (uchar_t *)cmd_xp->xb_sense_data, SENSE_LENGTH, SD_LOG_HEX);
16672 		sd_return_failed_command(un, cmd_bp, EIO);
16673 	}
16674 }
16675 
16676 
16677 
16678 
16679 /*
16680  *    Function: sd_handle_auto_request_sense
16681  *
16682  * Description: Processing for auto-request sense information.
16683  *
16684  *   Arguments: un - ptr to associated softstate
16685  *		bp - ptr to buf(9S) for the command
16686  *		xp - ptr to the sd_xbuf for the command
16687  *		pktp - ptr to the scsi_pkt(9S) for the command
16688  *
16689  *     Context: May be called under interrupt context
16690  */
16691 
16692 static void
16693 sd_handle_auto_request_sense(struct sd_lun *un, struct buf *bp,
16694 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
16695 {
16696 	struct scsi_arq_status *asp;
16697 
16698 	ASSERT(un != NULL);
16699 	ASSERT(mutex_owned(SD_MUTEX(un)));
16700 	ASSERT(bp != NULL);
16701 	ASSERT(xp != NULL);
16702 	ASSERT(pktp != NULL);
16703 	ASSERT(pktp != un->un_rqs_pktp);
16704 	ASSERT(bp   != un->un_rqs_bp);
16705 
16706 	/*
16707 	 * For auto-request sense, we get a scsi_arq_status back from
16708 	 * the HBA, with the sense data in the sts_sensedata member.
16709 	 * The pkt_scbp of the packet points to this scsi_arq_status.
16710 	 */
16711 	asp = (struct scsi_arq_status *)(pktp->pkt_scbp);
16712 
16713 	if (asp->sts_rqpkt_reason != CMD_CMPLT) {
16714 		/*
16715 		 * The auto REQUEST SENSE failed; see if we can re-try
16716 		 * the original command.
16717 		 */
16718 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
16719 		    "auto request sense failed (reason=%s)\n",
16720 		    scsi_rname(asp->sts_rqpkt_reason));
16721 
16722 		sd_reset_target(un, pktp);
16723 
16724 		sd_retry_command(un, bp, SD_RETRIES_STANDARD,
16725 		    NULL, NULL, EIO, (clock_t)0, NULL);
16726 		return;
16727 	}
16728 
16729 	/* Save the relevant sense info into the xp for the original cmd. */
16730 	xp->xb_sense_status = *((uchar_t *)(&(asp->sts_rqpkt_status)));
16731 	xp->xb_sense_state  = asp->sts_rqpkt_state;
16732 	xp->xb_sense_resid  = asp->sts_rqpkt_resid;
16733 	bcopy(&asp->sts_sensedata, xp->xb_sense_data,
16734 	    min(sizeof (struct scsi_extended_sense), SENSE_LENGTH));
16735 
16736 	/*
16737 	 * See if we have valid sense data, if so then turn it over to
16738 	 * sd_decode_sense() to figure out the right course of action.
16739 	 */
16740 	if (sd_validate_sense_data(un, bp, xp) == SD_SENSE_DATA_IS_VALID) {
16741 		sd_decode_sense(un, bp, xp, pktp);
16742 	}
16743 }
16744 
16745 
16746 /*
16747  *    Function: sd_print_sense_failed_msg
16748  *
16749  * Description: Print log message when RQS has failed.
16750  *
16751  *   Arguments: un - ptr to associated softstate
16752  *		bp - ptr to buf(9S) for the command
16753  *		arg - generic message string ptr
16754  *		code - SD_IMMEDIATE_RETRY_ISSUED, SD_DELAYED_RETRY_ISSUED,
16755  *			or SD_NO_RETRY_ISSUED
16756  *
16757  *     Context: May be called from interrupt context
16758  */
16759 
16760 static void
16761 sd_print_sense_failed_msg(struct sd_lun *un, struct buf *bp, void *arg,
16762 	int code)
16763 {
16764 	char	*msgp = arg;
16765 
16766 	ASSERT(un != NULL);
16767 	ASSERT(mutex_owned(SD_MUTEX(un)));
16768 	ASSERT(bp != NULL);
16769 
16770 	if ((code == SD_NO_RETRY_ISSUED) && (msgp != NULL)) {
16771 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN, msgp);
16772 	}
16773 }
16774 
16775 
16776 /*
16777  *    Function: sd_validate_sense_data
16778  *
16779  * Description: Check the given sense data for validity.
16780  *		If the sense data is not valid, the command will
16781  *		be either failed or retried!
16782  *
16783  * Return Code: SD_SENSE_DATA_IS_INVALID
16784  *		SD_SENSE_DATA_IS_VALID
16785  *
16786  *     Context: May be called from interrupt context
16787  */
16788 
16789 static int
16790 sd_validate_sense_data(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp)
16791 {
16792 	struct scsi_extended_sense *esp;
16793 	struct	scsi_pkt *pktp;
16794 	size_t	actual_len;
16795 	char	*msgp = NULL;
16796 
16797 	ASSERT(un != NULL);
16798 	ASSERT(mutex_owned(SD_MUTEX(un)));
16799 	ASSERT(bp != NULL);
16800 	ASSERT(bp != un->un_rqs_bp);
16801 	ASSERT(xp != NULL);
16802 
16803 	pktp = SD_GET_PKTP(bp);
16804 	ASSERT(pktp != NULL);
16805 
16806 	/*
16807 	 * Check the status of the RQS command (auto or manual).
16808 	 */
16809 	switch (xp->xb_sense_status & STATUS_MASK) {
16810 	case STATUS_GOOD:
16811 		break;
16812 
16813 	case STATUS_RESERVATION_CONFLICT:
16814 		sd_pkt_status_reservation_conflict(un, bp, xp, pktp);
16815 		return (SD_SENSE_DATA_IS_INVALID);
16816 
16817 	case STATUS_BUSY:
16818 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
16819 		    "Busy Status on REQUEST SENSE\n");
16820 		sd_retry_command(un, bp, SD_RETRIES_BUSY, NULL,
16821 		    NULL, EIO, SD_BSY_TIMEOUT / 500, kstat_waitq_enter);
16822 		return (SD_SENSE_DATA_IS_INVALID);
16823 
16824 	case STATUS_QFULL:
16825 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
16826 		    "QFULL Status on REQUEST SENSE\n");
16827 		sd_retry_command(un, bp, SD_RETRIES_STANDARD, NULL,
16828 		    NULL, EIO, SD_BSY_TIMEOUT / 500, kstat_waitq_enter);
16829 		return (SD_SENSE_DATA_IS_INVALID);
16830 
16831 	case STATUS_CHECK:
16832 	case STATUS_TERMINATED:
16833 		msgp = "Check Condition on REQUEST SENSE\n";
16834 		goto sense_failed;
16835 
16836 	default:
16837 		msgp = "Not STATUS_GOOD on REQUEST_SENSE\n";
16838 		goto sense_failed;
16839 	}
16840 
16841 	/*
16842 	 * See if we got the minimum required amount of sense data.
16843 	 * Note: We are assuming the returned sense data is SENSE_LENGTH bytes
16844 	 * or less.
16845 	 */
16846 	actual_len = (int)(SENSE_LENGTH - xp->xb_sense_resid);
16847 	if (((xp->xb_sense_state & STATE_XFERRED_DATA) == 0) ||
16848 	    (actual_len == 0)) {
16849 		msgp = "Request Sense couldn't get sense data\n";
16850 		goto sense_failed;
16851 	}
16852 
16853 	if (actual_len < SUN_MIN_SENSE_LENGTH) {
16854 		msgp = "Not enough sense information\n";
16855 		goto sense_failed;
16856 	}
16857 
16858 	/*
16859 	 * We require the extended sense data
16860 	 */
16861 	esp = (struct scsi_extended_sense *)xp->xb_sense_data;
16862 	if (esp->es_class != CLASS_EXTENDED_SENSE) {
16863 		if ((pktp->pkt_flags & FLAG_SILENT) == 0) {
16864 			static char tmp[8];
16865 			static char buf[148];
16866 			char *p = (char *)(xp->xb_sense_data);
16867 			int i;
16868 
16869 			mutex_enter(&sd_sense_mutex);
16870 			(void) strcpy(buf, "undecodable sense information:");
16871 			for (i = 0; i < actual_len; i++) {
16872 				(void) sprintf(tmp, " 0x%x", *(p++)&0xff);
16873 				(void) strcpy(&buf[strlen(buf)], tmp);
16874 			}
16875 			i = strlen(buf);
16876 			(void) strcpy(&buf[i], "-(assumed fatal)\n");
16877 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN, buf);
16878 			mutex_exit(&sd_sense_mutex);
16879 		}
16880 		/* Note: Legacy behavior, fail the command with no retry */
16881 		sd_return_failed_command(un, bp, EIO);
16882 		return (SD_SENSE_DATA_IS_INVALID);
16883 	}
16884 
16885 	/*
16886 	 * Check that es_code is valid (es_class concatenated with es_code
16887 	 * make up the "response code" field.  es_class will always be 7, so
16888 	 * make sure es_code is 0, 1, 2, 3 or 0xf.  es_code will indicate the
16889 	 * format.
16890 	 */
16891 	if ((esp->es_code != CODE_FMT_FIXED_CURRENT) &&
16892 	    (esp->es_code != CODE_FMT_FIXED_DEFERRED) &&
16893 	    (esp->es_code != CODE_FMT_DESCR_CURRENT) &&
16894 	    (esp->es_code != CODE_FMT_DESCR_DEFERRED) &&
16895 	    (esp->es_code != CODE_FMT_VENDOR_SPECIFIC)) {
16896 		goto sense_failed;
16897 	}
16898 
16899 	return (SD_SENSE_DATA_IS_VALID);
16900 
16901 sense_failed:
16902 	/*
16903 	 * If the request sense failed (for whatever reason), attempt
16904 	 * to retry the original command.
16905 	 */
16906 #if defined(__i386) || defined(__amd64)
16907 	/*
16908 	 * SD_RETRY_DELAY is conditionally compile (#if fibre) in
16909 	 * sddef.h for Sparc platform, and x86 uses 1 binary
16910 	 * for both SCSI/FC.
16911 	 * The SD_RETRY_DELAY value need to be adjusted here
16912 	 * when SD_RETRY_DELAY change in sddef.h
16913 	 */
16914 	sd_retry_command(un, bp, SD_RETRIES_STANDARD,
16915 	    sd_print_sense_failed_msg, msgp, EIO,
16916 		un->un_f_is_fibre?drv_usectohz(100000):(clock_t)0, NULL);
16917 #else
16918 	sd_retry_command(un, bp, SD_RETRIES_STANDARD,
16919 	    sd_print_sense_failed_msg, msgp, EIO, SD_RETRY_DELAY, NULL);
16920 #endif
16921 
16922 	return (SD_SENSE_DATA_IS_INVALID);
16923 }
16924 
16925 
16926 
16927 /*
16928  *    Function: sd_decode_sense
16929  *
16930  * Description: Take recovery action(s) when SCSI Sense Data is received.
16931  *
16932  *     Context: Interrupt context.
16933  */
16934 
16935 static void
16936 sd_decode_sense(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
16937 	struct scsi_pkt *pktp)
16938 {
16939 	struct scsi_extended_sense *esp;
16940 	struct scsi_descr_sense_hdr *sdsp;
16941 	uint8_t asc, ascq, sense_key;
16942 
16943 	ASSERT(un != NULL);
16944 	ASSERT(mutex_owned(SD_MUTEX(un)));
16945 	ASSERT(bp != NULL);
16946 	ASSERT(bp != un->un_rqs_bp);
16947 	ASSERT(xp != NULL);
16948 	ASSERT(pktp != NULL);
16949 
16950 	esp = (struct scsi_extended_sense *)xp->xb_sense_data;
16951 
16952 	switch (esp->es_code) {
16953 	case CODE_FMT_DESCR_CURRENT:
16954 	case CODE_FMT_DESCR_DEFERRED:
16955 		sdsp = (struct scsi_descr_sense_hdr *)xp->xb_sense_data;
16956 		sense_key = sdsp->ds_key;
16957 		asc = sdsp->ds_add_code;
16958 		ascq = sdsp->ds_qual_code;
16959 		break;
16960 	case CODE_FMT_VENDOR_SPECIFIC:
16961 	case CODE_FMT_FIXED_CURRENT:
16962 	case CODE_FMT_FIXED_DEFERRED:
16963 	default:
16964 		sense_key = esp->es_key;
16965 		asc = esp->es_add_code;
16966 		ascq = esp->es_qual_code;
16967 		break;
16968 	}
16969 
16970 	switch (sense_key) {
16971 	case KEY_NO_SENSE:
16972 		sd_sense_key_no_sense(un, bp, xp, pktp);
16973 		break;
16974 	case KEY_RECOVERABLE_ERROR:
16975 		sd_sense_key_recoverable_error(un, asc, bp, xp, pktp);
16976 		break;
16977 	case KEY_NOT_READY:
16978 		sd_sense_key_not_ready(un, asc, ascq, bp, xp, pktp);
16979 		break;
16980 	case KEY_MEDIUM_ERROR:
16981 	case KEY_HARDWARE_ERROR:
16982 		sd_sense_key_medium_or_hardware_error(un,
16983 		    sense_key, asc, bp, xp, pktp);
16984 		break;
16985 	case KEY_ILLEGAL_REQUEST:
16986 		sd_sense_key_illegal_request(un, bp, xp, pktp);
16987 		break;
16988 	case KEY_UNIT_ATTENTION:
16989 		sd_sense_key_unit_attention(un, asc, bp, xp, pktp);
16990 		break;
16991 	case KEY_WRITE_PROTECT:
16992 	case KEY_VOLUME_OVERFLOW:
16993 	case KEY_MISCOMPARE:
16994 		sd_sense_key_fail_command(un, bp, xp, pktp);
16995 		break;
16996 	case KEY_BLANK_CHECK:
16997 		sd_sense_key_blank_check(un, bp, xp, pktp);
16998 		break;
16999 	case KEY_ABORTED_COMMAND:
17000 		sd_sense_key_aborted_command(un, bp, xp, pktp);
17001 		break;
17002 	case KEY_VENDOR_UNIQUE:
17003 	case KEY_COPY_ABORTED:
17004 	case KEY_EQUAL:
17005 	case KEY_RESERVED:
17006 	default:
17007 		sd_sense_key_default(un, sense_key, bp, xp, pktp);
17008 		break;
17009 	}
17010 }
17011 
17012 
17013 /*
17014  *    Function: sd_dump_memory
17015  *
17016  * Description: Debug logging routine to print the contents of a user provided
17017  *		buffer. The output of the buffer is broken up into 256 byte
17018  *		segments due to a size constraint of the scsi_log.
17019  *		implementation.
17020  *
17021  *   Arguments: un - ptr to softstate
17022  *		comp - component mask
17023  *		title - "title" string to preceed data when printed
17024  *		data - ptr to data block to be printed
17025  *		len - size of data block to be printed
17026  *		fmt - SD_LOG_HEX (use 0x%02x format) or SD_LOG_CHAR (use %c)
17027  *
17028  *     Context: May be called from interrupt context
17029  */
17030 
17031 #define	SD_DUMP_MEMORY_BUF_SIZE	256
17032 
17033 static char *sd_dump_format_string[] = {
17034 		" 0x%02x",
17035 		" %c"
17036 };
17037 
17038 static void
17039 sd_dump_memory(struct sd_lun *un, uint_t comp, char *title, uchar_t *data,
17040     int len, int fmt)
17041 {
17042 	int	i, j;
17043 	int	avail_count;
17044 	int	start_offset;
17045 	int	end_offset;
17046 	size_t	entry_len;
17047 	char	*bufp;
17048 	char	*local_buf;
17049 	char	*format_string;
17050 
17051 	ASSERT((fmt == SD_LOG_HEX) || (fmt == SD_LOG_CHAR));
17052 
17053 	/*
17054 	 * In the debug version of the driver, this function is called from a
17055 	 * number of places which are NOPs in the release driver.
17056 	 * The debug driver therefore has additional methods of filtering
17057 	 * debug output.
17058 	 */
17059 #ifdef SDDEBUG
17060 	/*
17061 	 * In the debug version of the driver we can reduce the amount of debug
17062 	 * messages by setting sd_error_level to something other than
17063 	 * SCSI_ERR_ALL and clearing bits in sd_level_mask and
17064 	 * sd_component_mask.
17065 	 */
17066 	if (((sd_level_mask & (SD_LOGMASK_DUMP_MEM | SD_LOGMASK_DIAG)) == 0) ||
17067 	    (sd_error_level != SCSI_ERR_ALL)) {
17068 		return;
17069 	}
17070 	if (((sd_component_mask & comp) == 0) ||
17071 	    (sd_error_level != SCSI_ERR_ALL)) {
17072 		return;
17073 	}
17074 #else
17075 	if (sd_error_level != SCSI_ERR_ALL) {
17076 		return;
17077 	}
17078 #endif
17079 
17080 	local_buf = kmem_zalloc(SD_DUMP_MEMORY_BUF_SIZE, KM_SLEEP);
17081 	bufp = local_buf;
17082 	/*
17083 	 * Available length is the length of local_buf[], minus the
17084 	 * length of the title string, minus one for the ":", minus
17085 	 * one for the newline, minus one for the NULL terminator.
17086 	 * This gives the #bytes available for holding the printed
17087 	 * values from the given data buffer.
17088 	 */
17089 	if (fmt == SD_LOG_HEX) {
17090 		format_string = sd_dump_format_string[0];
17091 	} else /* SD_LOG_CHAR */ {
17092 		format_string = sd_dump_format_string[1];
17093 	}
17094 	/*
17095 	 * Available count is the number of elements from the given
17096 	 * data buffer that we can fit into the available length.
17097 	 * This is based upon the size of the format string used.
17098 	 * Make one entry and find it's size.
17099 	 */
17100 	(void) sprintf(bufp, format_string, data[0]);
17101 	entry_len = strlen(bufp);
17102 	avail_count = (SD_DUMP_MEMORY_BUF_SIZE - strlen(title) - 3) / entry_len;
17103 
17104 	j = 0;
17105 	while (j < len) {
17106 		bufp = local_buf;
17107 		bzero(bufp, SD_DUMP_MEMORY_BUF_SIZE);
17108 		start_offset = j;
17109 
17110 		end_offset = start_offset + avail_count;
17111 
17112 		(void) sprintf(bufp, "%s:", title);
17113 		bufp += strlen(bufp);
17114 		for (i = start_offset; ((i < end_offset) && (j < len));
17115 		    i++, j++) {
17116 			(void) sprintf(bufp, format_string, data[i]);
17117 			bufp += entry_len;
17118 		}
17119 		(void) sprintf(bufp, "\n");
17120 
17121 		scsi_log(SD_DEVINFO(un), sd_label, CE_NOTE, "%s", local_buf);
17122 	}
17123 	kmem_free(local_buf, SD_DUMP_MEMORY_BUF_SIZE);
17124 }
17125 
17126 /*
17127  *    Function: sd_print_sense_msg
17128  *
17129  * Description: Log a message based upon the given sense data.
17130  *
17131  *   Arguments: un - ptr to associated softstate
17132  *		bp - ptr to buf(9S) for the command
17133  *		arg - ptr to associate sd_sense_info struct
17134  *		code - SD_IMMEDIATE_RETRY_ISSUED, SD_DELAYED_RETRY_ISSUED,
17135  *			or SD_NO_RETRY_ISSUED
17136  *
17137  *     Context: May be called from interrupt context
17138  */
17139 
17140 static void
17141 sd_print_sense_msg(struct sd_lun *un, struct buf *bp, void *arg, int code)
17142 {
17143 	struct sd_xbuf	*xp;
17144 	struct scsi_pkt	*pktp;
17145 	struct scsi_extended_sense *sensep;
17146 	daddr_t request_blkno;
17147 	diskaddr_t err_blkno;
17148 	int severity;
17149 	int pfa_flag;
17150 	int fixed_format = TRUE;
17151 	extern struct scsi_key_strings scsi_cmds[];
17152 
17153 	ASSERT(un != NULL);
17154 	ASSERT(mutex_owned(SD_MUTEX(un)));
17155 	ASSERT(bp != NULL);
17156 	xp = SD_GET_XBUF(bp);
17157 	ASSERT(xp != NULL);
17158 	pktp = SD_GET_PKTP(bp);
17159 	ASSERT(pktp != NULL);
17160 	ASSERT(arg != NULL);
17161 
17162 	severity = ((struct sd_sense_info *)(arg))->ssi_severity;
17163 	pfa_flag = ((struct sd_sense_info *)(arg))->ssi_pfa_flag;
17164 
17165 	if ((code == SD_DELAYED_RETRY_ISSUED) ||
17166 	    (code == SD_IMMEDIATE_RETRY_ISSUED)) {
17167 		severity = SCSI_ERR_RETRYABLE;
17168 	}
17169 
17170 	/* Use absolute block number for the request block number */
17171 	request_blkno = xp->xb_blkno;
17172 
17173 	/*
17174 	 * Now try to get the error block number from the sense data
17175 	 */
17176 	sensep = (struct scsi_extended_sense *)xp->xb_sense_data;
17177 	switch (sensep->es_code) {
17178 	case CODE_FMT_DESCR_CURRENT:
17179 	case CODE_FMT_DESCR_DEFERRED:
17180 		err_blkno =
17181 		    sd_extract_sense_info_descr(
17182 			(struct scsi_descr_sense_hdr *)sensep);
17183 		fixed_format = FALSE;
17184 		break;
17185 	case CODE_FMT_FIXED_CURRENT:
17186 	case CODE_FMT_FIXED_DEFERRED:
17187 	case CODE_FMT_VENDOR_SPECIFIC:
17188 	default:
17189 		/*
17190 		 * With the es_valid bit set, we assume that the error
17191 		 * blkno is in the sense data.  Also, if xp->xb_blkno is
17192 		 * greater than 0xffffffff then the target *should* have used
17193 		 * a descriptor sense format (or it shouldn't have set
17194 		 * the es_valid bit), and we may as well ignore the
17195 		 * 32-bit value.
17196 		 */
17197 		if ((sensep->es_valid != 0) && (xp->xb_blkno <= 0xffffffff)) {
17198 			err_blkno = (diskaddr_t)
17199 			    ((sensep->es_info_1 << 24) |
17200 			    (sensep->es_info_2 << 16) |
17201 			    (sensep->es_info_3 << 8)  |
17202 			    (sensep->es_info_4));
17203 		} else {
17204 			err_blkno = (diskaddr_t)-1;
17205 		}
17206 		break;
17207 	}
17208 
17209 	if (err_blkno == (diskaddr_t)-1) {
17210 		/*
17211 		 * Without the es_valid bit set (for fixed format) or an
17212 		 * information descriptor (for descriptor format) we cannot
17213 		 * be certain of the error blkno, so just use the
17214 		 * request_blkno.
17215 		 */
17216 		err_blkno = (diskaddr_t)request_blkno;
17217 	} else {
17218 		/*
17219 		 * We retrieved the error block number from the information
17220 		 * portion of the sense data.
17221 		 *
17222 		 * For USCSI commands we are better off using the error
17223 		 * block no. as the requested block no. (This is the best
17224 		 * we can estimate.)
17225 		 */
17226 		if ((SD_IS_BUFIO(xp) == FALSE) &&
17227 		    ((pktp->pkt_flags & FLAG_SILENT) == 0)) {
17228 			request_blkno = err_blkno;
17229 		}
17230 	}
17231 
17232 	/*
17233 	 * The following will log the buffer contents for the release driver
17234 	 * if the SD_LOGMASK_DIAG bit of sd_level_mask is set, or the error
17235 	 * level is set to verbose.
17236 	 */
17237 	sd_dump_memory(un, SD_LOG_IO, "Failed CDB",
17238 	    (uchar_t *)pktp->pkt_cdbp, CDB_SIZE, SD_LOG_HEX);
17239 	sd_dump_memory(un, SD_LOG_IO, "Sense Data",
17240 	    (uchar_t *)sensep, SENSE_LENGTH, SD_LOG_HEX);
17241 
17242 	if (pfa_flag == FALSE) {
17243 		/* This is normally only set for USCSI */
17244 		if ((pktp->pkt_flags & FLAG_SILENT) != 0) {
17245 			return;
17246 		}
17247 
17248 		if ((SD_IS_BUFIO(xp) == TRUE) &&
17249 		    (((sd_level_mask & SD_LOGMASK_DIAG) == 0) &&
17250 		    (severity < sd_error_level))) {
17251 			return;
17252 		}
17253 	}
17254 
17255 	/*
17256 	 * If the data is fixed format then check for Sonoma Failover,
17257 	 * and keep a count of how many failed I/O's.  We should not have
17258 	 * to worry about Sonoma returning descriptor format sense data,
17259 	 * and asc/ascq are in a different location in descriptor format.
17260 	 */
17261 	if (fixed_format &&
17262 	    (SD_IS_LSI(un)) && (sensep->es_key == KEY_ILLEGAL_REQUEST) &&
17263 	    (sensep->es_add_code == 0x94) && (sensep->es_qual_code == 0x01)) {
17264 		un->un_sonoma_failure_count++;
17265 		if (un->un_sonoma_failure_count > 1) {
17266 			return;
17267 		}
17268 	}
17269 
17270 	scsi_vu_errmsg(SD_SCSI_DEVP(un), pktp, sd_label, severity,
17271 	    request_blkno, err_blkno, scsi_cmds, sensep,
17272 	    un->un_additional_codes, NULL);
17273 }
17274 
17275 /*
17276  *    Function: sd_extract_sense_info_descr
17277  *
17278  * Description: Retrieve "information" field from descriptor format
17279  *              sense data.  Iterates through each sense descriptor
17280  *              looking for the information descriptor and returns
17281  *              the information field from that descriptor.
17282  *
17283  *     Context: May be called from interrupt context
17284  */
17285 
17286 static diskaddr_t
17287 sd_extract_sense_info_descr(struct scsi_descr_sense_hdr *sdsp)
17288 {
17289 	diskaddr_t result;
17290 	uint8_t *descr_offset;
17291 	int valid_sense_length;
17292 	struct scsi_information_sense_descr *isd;
17293 
17294 	/*
17295 	 * Initialize result to -1 indicating there is no information
17296 	 * descriptor
17297 	 */
17298 	result = (diskaddr_t)-1;
17299 
17300 	/*
17301 	 * The first descriptor will immediately follow the header
17302 	 */
17303 	descr_offset = (uint8_t *)(sdsp+1); /* Pointer arithmetic */
17304 
17305 	/*
17306 	 * Calculate the amount of valid sense data
17307 	 */
17308 	valid_sense_length =
17309 	    min((sizeof (struct scsi_descr_sense_hdr) +
17310 	    sdsp->ds_addl_sense_length),
17311 	    SENSE_LENGTH);
17312 
17313 	/*
17314 	 * Iterate through the list of descriptors, stopping when we
17315 	 * run out of sense data
17316 	 */
17317 	while ((descr_offset + sizeof (struct scsi_information_sense_descr)) <=
17318 	    (uint8_t *)sdsp + valid_sense_length) {
17319 		/*
17320 		 * Check if this is an information descriptor.  We can
17321 		 * use the scsi_information_sense_descr structure as a
17322 		 * template sense the first two fields are always the
17323 		 * same
17324 		 */
17325 		isd = (struct scsi_information_sense_descr *)descr_offset;
17326 		if (isd->isd_descr_type == DESCR_INFORMATION) {
17327 			/*
17328 			 * Found an information descriptor.  Copy the
17329 			 * information field.  There will only be one
17330 			 * information descriptor so we can stop looking.
17331 			 */
17332 			result =
17333 			    (((diskaddr_t)isd->isd_information[0] << 56) |
17334 				((diskaddr_t)isd->isd_information[1] << 48) |
17335 				((diskaddr_t)isd->isd_information[2] << 40) |
17336 				((diskaddr_t)isd->isd_information[3] << 32) |
17337 				((diskaddr_t)isd->isd_information[4] << 24) |
17338 				((diskaddr_t)isd->isd_information[5] << 16) |
17339 				((diskaddr_t)isd->isd_information[6] << 8)  |
17340 				((diskaddr_t)isd->isd_information[7]));
17341 			break;
17342 		}
17343 
17344 		/*
17345 		 * Get pointer to the next descriptor.  The "additional
17346 		 * length" field holds the length of the descriptor except
17347 		 * for the "type" and "additional length" fields, so
17348 		 * we need to add 2 to get the total length.
17349 		 */
17350 		descr_offset += (isd->isd_addl_length + 2);
17351 	}
17352 
17353 	return (result);
17354 }
17355 
17356 /*
17357  *    Function: sd_sense_key_no_sense
17358  *
17359  * Description: Recovery action when sense data was not received.
17360  *
17361  *     Context: May be called from interrupt context
17362  */
17363 
17364 static void
17365 sd_sense_key_no_sense(struct sd_lun *un, struct buf *bp,
17366 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
17367 {
17368 	struct sd_sense_info	si;
17369 
17370 	ASSERT(un != NULL);
17371 	ASSERT(mutex_owned(SD_MUTEX(un)));
17372 	ASSERT(bp != NULL);
17373 	ASSERT(xp != NULL);
17374 	ASSERT(pktp != NULL);
17375 
17376 	si.ssi_severity = SCSI_ERR_FATAL;
17377 	si.ssi_pfa_flag = FALSE;
17378 
17379 	SD_UPDATE_ERRSTATS(un, sd_softerrs);
17380 
17381 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
17382 		&si, EIO, (clock_t)0, NULL);
17383 }
17384 
17385 
17386 /*
17387  *    Function: sd_sense_key_recoverable_error
17388  *
17389  * Description: Recovery actions for a SCSI "Recovered Error" sense key.
17390  *
17391  *     Context: May be called from interrupt context
17392  */
17393 
17394 static void
17395 sd_sense_key_recoverable_error(struct sd_lun *un,
17396 	uint8_t asc,
17397 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp)
17398 {
17399 	struct sd_sense_info	si;
17400 
17401 	ASSERT(un != NULL);
17402 	ASSERT(mutex_owned(SD_MUTEX(un)));
17403 	ASSERT(bp != NULL);
17404 	ASSERT(xp != NULL);
17405 	ASSERT(pktp != NULL);
17406 
17407 	/*
17408 	 * 0x5D: FAILURE PREDICTION THRESHOLD EXCEEDED
17409 	 */
17410 	if ((asc == 0x5D) && (sd_report_pfa != 0)) {
17411 		SD_UPDATE_ERRSTATS(un, sd_rq_pfa_err);
17412 		si.ssi_severity = SCSI_ERR_INFO;
17413 		si.ssi_pfa_flag = TRUE;
17414 	} else {
17415 		SD_UPDATE_ERRSTATS(un, sd_softerrs);
17416 		SD_UPDATE_ERRSTATS(un, sd_rq_recov_err);
17417 		si.ssi_severity = SCSI_ERR_RECOVERED;
17418 		si.ssi_pfa_flag = FALSE;
17419 	}
17420 
17421 	if (pktp->pkt_resid == 0) {
17422 		sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
17423 		sd_return_command(un, bp);
17424 		return;
17425 	}
17426 
17427 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
17428 	    &si, EIO, (clock_t)0, NULL);
17429 }
17430 
17431 
17432 
17433 
17434 /*
17435  *    Function: sd_sense_key_not_ready
17436  *
17437  * Description: Recovery actions for a SCSI "Not Ready" sense key.
17438  *
17439  *     Context: May be called from interrupt context
17440  */
17441 
17442 static void
17443 sd_sense_key_not_ready(struct sd_lun *un,
17444 	uint8_t asc, uint8_t ascq,
17445 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp)
17446 {
17447 	struct sd_sense_info	si;
17448 
17449 	ASSERT(un != NULL);
17450 	ASSERT(mutex_owned(SD_MUTEX(un)));
17451 	ASSERT(bp != NULL);
17452 	ASSERT(xp != NULL);
17453 	ASSERT(pktp != NULL);
17454 
17455 	si.ssi_severity = SCSI_ERR_FATAL;
17456 	si.ssi_pfa_flag = FALSE;
17457 
17458 	/*
17459 	 * Update error stats after first NOT READY error. Disks may have
17460 	 * been powered down and may need to be restarted.  For CDROMs,
17461 	 * report NOT READY errors only if media is present.
17462 	 */
17463 	if ((ISCD(un) && (un->un_f_geometry_is_valid == TRUE)) ||
17464 	    (xp->xb_retry_count > 0)) {
17465 		SD_UPDATE_ERRSTATS(un, sd_harderrs);
17466 		SD_UPDATE_ERRSTATS(un, sd_rq_ntrdy_err);
17467 	}
17468 
17469 	/*
17470 	 * Just fail if the "not ready" retry limit has been reached.
17471 	 */
17472 	if (xp->xb_retry_count >= un->un_notready_retry_count) {
17473 		/* Special check for error message printing for removables. */
17474 		if (un->un_f_has_removable_media && (asc == 0x04) &&
17475 		    (ascq >= 0x04)) {
17476 			si.ssi_severity = SCSI_ERR_ALL;
17477 		}
17478 		goto fail_command;
17479 	}
17480 
17481 	/*
17482 	 * Check the ASC and ASCQ in the sense data as needed, to determine
17483 	 * what to do.
17484 	 */
17485 	switch (asc) {
17486 	case 0x04:	/* LOGICAL UNIT NOT READY */
17487 		/*
17488 		 * disk drives that don't spin up result in a very long delay
17489 		 * in format without warning messages. We will log a message
17490 		 * if the error level is set to verbose.
17491 		 */
17492 		if (sd_error_level < SCSI_ERR_RETRYABLE) {
17493 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
17494 			    "logical unit not ready, resetting disk\n");
17495 		}
17496 
17497 		/*
17498 		 * There are different requirements for CDROMs and disks for
17499 		 * the number of retries.  If a CD-ROM is giving this, it is
17500 		 * probably reading TOC and is in the process of getting
17501 		 * ready, so we should keep on trying for a long time to make
17502 		 * sure that all types of media are taken in account (for
17503 		 * some media the drive takes a long time to read TOC).  For
17504 		 * disks we do not want to retry this too many times as this
17505 		 * can cause a long hang in format when the drive refuses to
17506 		 * spin up (a very common failure).
17507 		 */
17508 		switch (ascq) {
17509 		case 0x00:  /* LUN NOT READY, CAUSE NOT REPORTABLE */
17510 			/*
17511 			 * Disk drives frequently refuse to spin up which
17512 			 * results in a very long hang in format without
17513 			 * warning messages.
17514 			 *
17515 			 * Note: This code preserves the legacy behavior of
17516 			 * comparing xb_retry_count against zero for fibre
17517 			 * channel targets instead of comparing against the
17518 			 * un_reset_retry_count value.  The reason for this
17519 			 * discrepancy has been so utterly lost beneath the
17520 			 * Sands of Time that even Indiana Jones could not
17521 			 * find it.
17522 			 */
17523 			if (un->un_f_is_fibre == TRUE) {
17524 				if (((sd_level_mask & SD_LOGMASK_DIAG) ||
17525 					(xp->xb_retry_count > 0)) &&
17526 					(un->un_startstop_timeid == NULL)) {
17527 					scsi_log(SD_DEVINFO(un), sd_label,
17528 					CE_WARN, "logical unit not ready, "
17529 					"resetting disk\n");
17530 					sd_reset_target(un, pktp);
17531 				}
17532 			} else {
17533 				if (((sd_level_mask & SD_LOGMASK_DIAG) ||
17534 					(xp->xb_retry_count >
17535 					un->un_reset_retry_count)) &&
17536 					(un->un_startstop_timeid == NULL)) {
17537 					scsi_log(SD_DEVINFO(un), sd_label,
17538 					CE_WARN, "logical unit not ready, "
17539 					"resetting disk\n");
17540 					sd_reset_target(un, pktp);
17541 				}
17542 			}
17543 			break;
17544 
17545 		case 0x01:  /* LUN IS IN PROCESS OF BECOMING READY */
17546 			/*
17547 			 * If the target is in the process of becoming
17548 			 * ready, just proceed with the retry. This can
17549 			 * happen with CD-ROMs that take a long time to
17550 			 * read TOC after a power cycle or reset.
17551 			 */
17552 			goto do_retry;
17553 
17554 		case 0x02:  /* LUN NOT READY, INITITIALIZING CMD REQUIRED */
17555 			break;
17556 
17557 		case 0x03:  /* LUN NOT READY, MANUAL INTERVENTION REQUIRED */
17558 			/*
17559 			 * Retries cannot help here so just fail right away.
17560 			 */
17561 			goto fail_command;
17562 
17563 		case 0x88:
17564 			/*
17565 			 * Vendor-unique code for T3/T4: it indicates a
17566 			 * path problem in a mutipathed config, but as far as
17567 			 * the target driver is concerned it equates to a fatal
17568 			 * error, so we should just fail the command right away
17569 			 * (without printing anything to the console). If this
17570 			 * is not a T3/T4, fall thru to the default recovery
17571 			 * action.
17572 			 * T3/T4 is FC only, don't need to check is_fibre
17573 			 */
17574 			if (SD_IS_T3(un) || SD_IS_T4(un)) {
17575 				sd_return_failed_command(un, bp, EIO);
17576 				return;
17577 			}
17578 			/* FALLTHRU */
17579 
17580 		case 0x04:  /* LUN NOT READY, FORMAT IN PROGRESS */
17581 		case 0x05:  /* LUN NOT READY, REBUILD IN PROGRESS */
17582 		case 0x06:  /* LUN NOT READY, RECALCULATION IN PROGRESS */
17583 		case 0x07:  /* LUN NOT READY, OPERATION IN PROGRESS */
17584 		case 0x08:  /* LUN NOT READY, LONG WRITE IN PROGRESS */
17585 		default:    /* Possible future codes in SCSI spec? */
17586 			/*
17587 			 * For removable-media devices, do not retry if
17588 			 * ASCQ > 2 as these result mostly from USCSI commands
17589 			 * on MMC devices issued to check status of an
17590 			 * operation initiated in immediate mode.  Also for
17591 			 * ASCQ >= 4 do not print console messages as these
17592 			 * mainly represent a user-initiated operation
17593 			 * instead of a system failure.
17594 			 */
17595 			if (un->un_f_has_removable_media) {
17596 				si.ssi_severity = SCSI_ERR_ALL;
17597 				goto fail_command;
17598 			}
17599 			break;
17600 		}
17601 
17602 		/*
17603 		 * As part of our recovery attempt for the NOT READY
17604 		 * condition, we issue a START STOP UNIT command. However
17605 		 * we want to wait for a short delay before attempting this
17606 		 * as there may still be more commands coming back from the
17607 		 * target with the check condition. To do this we use
17608 		 * timeout(9F) to call sd_start_stop_unit_callback() after
17609 		 * the delay interval expires. (sd_start_stop_unit_callback()
17610 		 * dispatches sd_start_stop_unit_task(), which will issue
17611 		 * the actual START STOP UNIT command. The delay interval
17612 		 * is one-half of the delay that we will use to retry the
17613 		 * command that generated the NOT READY condition.
17614 		 *
17615 		 * Note that we could just dispatch sd_start_stop_unit_task()
17616 		 * from here and allow it to sleep for the delay interval,
17617 		 * but then we would be tying up the taskq thread
17618 		 * uncesessarily for the duration of the delay.
17619 		 *
17620 		 * Do not issue the START STOP UNIT if the current command
17621 		 * is already a START STOP UNIT.
17622 		 */
17623 		if (pktp->pkt_cdbp[0] == SCMD_START_STOP) {
17624 			break;
17625 		}
17626 
17627 		/*
17628 		 * Do not schedule the timeout if one is already pending.
17629 		 */
17630 		if (un->un_startstop_timeid != NULL) {
17631 			SD_INFO(SD_LOG_ERROR, un,
17632 			    "sd_sense_key_not_ready: restart already issued to"
17633 			    " %s%d\n", ddi_driver_name(SD_DEVINFO(un)),
17634 			    ddi_get_instance(SD_DEVINFO(un)));
17635 			break;
17636 		}
17637 
17638 		/*
17639 		 * Schedule the START STOP UNIT command, then queue the command
17640 		 * for a retry.
17641 		 *
17642 		 * Note: A timeout is not scheduled for this retry because we
17643 		 * want the retry to be serial with the START_STOP_UNIT. The
17644 		 * retry will be started when the START_STOP_UNIT is completed
17645 		 * in sd_start_stop_unit_task.
17646 		 */
17647 		un->un_startstop_timeid = timeout(sd_start_stop_unit_callback,
17648 		    un, SD_BSY_TIMEOUT / 2);
17649 		xp->xb_retry_count++;
17650 		sd_set_retry_bp(un, bp, 0, kstat_waitq_enter);
17651 		return;
17652 
17653 	case 0x05:	/* LOGICAL UNIT DOES NOT RESPOND TO SELECTION */
17654 		if (sd_error_level < SCSI_ERR_RETRYABLE) {
17655 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
17656 			    "unit does not respond to selection\n");
17657 		}
17658 		break;
17659 
17660 	case 0x3A:	/* MEDIUM NOT PRESENT */
17661 		if (sd_error_level >= SCSI_ERR_FATAL) {
17662 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
17663 			    "Caddy not inserted in drive\n");
17664 		}
17665 
17666 		sr_ejected(un);
17667 		un->un_mediastate = DKIO_EJECTED;
17668 		/* The state has changed, inform the media watch routines */
17669 		cv_broadcast(&un->un_state_cv);
17670 		/* Just fail if no media is present in the drive. */
17671 		goto fail_command;
17672 
17673 	default:
17674 		if (sd_error_level < SCSI_ERR_RETRYABLE) {
17675 			scsi_log(SD_DEVINFO(un), sd_label, CE_NOTE,
17676 			    "Unit not Ready. Additional sense code 0x%x\n",
17677 			    asc);
17678 		}
17679 		break;
17680 	}
17681 
17682 do_retry:
17683 
17684 	/*
17685 	 * Retry the command, as some targets may report NOT READY for
17686 	 * several seconds after being reset.
17687 	 */
17688 	xp->xb_retry_count++;
17689 	si.ssi_severity = SCSI_ERR_RETRYABLE;
17690 	sd_retry_command(un, bp, SD_RETRIES_NOCHECK, sd_print_sense_msg,
17691 	    &si, EIO, SD_BSY_TIMEOUT, NULL);
17692 
17693 	return;
17694 
17695 fail_command:
17696 	sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
17697 	sd_return_failed_command(un, bp, EIO);
17698 }
17699 
17700 
17701 
17702 /*
17703  *    Function: sd_sense_key_medium_or_hardware_error
17704  *
17705  * Description: Recovery actions for a SCSI "Medium Error" or "Hardware Error"
17706  *		sense key.
17707  *
17708  *     Context: May be called from interrupt context
17709  */
17710 
17711 static void
17712 sd_sense_key_medium_or_hardware_error(struct sd_lun *un,
17713 	int sense_key, uint8_t asc,
17714 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp)
17715 {
17716 	struct sd_sense_info	si;
17717 
17718 	ASSERT(un != NULL);
17719 	ASSERT(mutex_owned(SD_MUTEX(un)));
17720 	ASSERT(bp != NULL);
17721 	ASSERT(xp != NULL);
17722 	ASSERT(pktp != NULL);
17723 
17724 	si.ssi_severity = SCSI_ERR_FATAL;
17725 	si.ssi_pfa_flag = FALSE;
17726 
17727 	if (sense_key == KEY_MEDIUM_ERROR) {
17728 		SD_UPDATE_ERRSTATS(un, sd_rq_media_err);
17729 	}
17730 
17731 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
17732 
17733 	if ((un->un_reset_retry_count != 0) &&
17734 	    (xp->xb_retry_count == un->un_reset_retry_count)) {
17735 		mutex_exit(SD_MUTEX(un));
17736 		/* Do NOT do a RESET_ALL here: too intrusive. (4112858) */
17737 		if (un->un_f_allow_bus_device_reset == TRUE) {
17738 
17739 			boolean_t try_resetting_target = B_TRUE;
17740 
17741 			/*
17742 			 * We need to be able to handle specific ASC when we are
17743 			 * handling a KEY_HARDWARE_ERROR. In particular
17744 			 * taking the default action of resetting the target may
17745 			 * not be the appropriate way to attempt recovery.
17746 			 * Resetting a target because of a single LUN failure
17747 			 * victimizes all LUNs on that target.
17748 			 *
17749 			 * This is true for the LSI arrays, if an LSI
17750 			 * array controller returns an ASC of 0x84 (LUN Dead) we
17751 			 * should trust it.
17752 			 */
17753 
17754 			if (sense_key == KEY_HARDWARE_ERROR) {
17755 				switch (asc) {
17756 				case 0x84:
17757 					if (SD_IS_LSI(un)) {
17758 						try_resetting_target = B_FALSE;
17759 					}
17760 					break;
17761 				default:
17762 					break;
17763 				}
17764 			}
17765 
17766 			if (try_resetting_target == B_TRUE) {
17767 				int reset_retval = 0;
17768 				if (un->un_f_lun_reset_enabled == TRUE) {
17769 					SD_TRACE(SD_LOG_IO_CORE, un,
17770 					    "sd_sense_key_medium_or_hardware_"
17771 					    "error: issuing RESET_LUN\n");
17772 					reset_retval =
17773 					    scsi_reset(SD_ADDRESS(un),
17774 					    RESET_LUN);
17775 				}
17776 				if (reset_retval == 0) {
17777 					SD_TRACE(SD_LOG_IO_CORE, un,
17778 					    "sd_sense_key_medium_or_hardware_"
17779 					    "error: issuing RESET_TARGET\n");
17780 					(void) scsi_reset(SD_ADDRESS(un),
17781 					    RESET_TARGET);
17782 				}
17783 			}
17784 		}
17785 		mutex_enter(SD_MUTEX(un));
17786 	}
17787 
17788 	/*
17789 	 * This really ought to be a fatal error, but we will retry anyway
17790 	 * as some drives report this as a spurious error.
17791 	 */
17792 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
17793 	    &si, EIO, (clock_t)0, NULL);
17794 }
17795 
17796 
17797 
17798 /*
17799  *    Function: sd_sense_key_illegal_request
17800  *
17801  * Description: Recovery actions for a SCSI "Illegal Request" sense key.
17802  *
17803  *     Context: May be called from interrupt context
17804  */
17805 
17806 static void
17807 sd_sense_key_illegal_request(struct sd_lun *un, struct buf *bp,
17808 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
17809 {
17810 	struct sd_sense_info	si;
17811 
17812 	ASSERT(un != NULL);
17813 	ASSERT(mutex_owned(SD_MUTEX(un)));
17814 	ASSERT(bp != NULL);
17815 	ASSERT(xp != NULL);
17816 	ASSERT(pktp != NULL);
17817 
17818 	SD_UPDATE_ERRSTATS(un, sd_softerrs);
17819 	SD_UPDATE_ERRSTATS(un, sd_rq_illrq_err);
17820 
17821 	si.ssi_severity = SCSI_ERR_INFO;
17822 	si.ssi_pfa_flag = FALSE;
17823 
17824 	/* Pointless to retry if the target thinks it's an illegal request */
17825 	sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
17826 	sd_return_failed_command(un, bp, EIO);
17827 }
17828 
17829 
17830 
17831 
17832 /*
17833  *    Function: sd_sense_key_unit_attention
17834  *
17835  * Description: Recovery actions for a SCSI "Unit Attention" sense key.
17836  *
17837  *     Context: May be called from interrupt context
17838  */
17839 
17840 static void
17841 sd_sense_key_unit_attention(struct sd_lun *un,
17842 	uint8_t asc,
17843 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp)
17844 {
17845 	/*
17846 	 * For UNIT ATTENTION we allow retries for one minute. Devices
17847 	 * like Sonoma can return UNIT ATTENTION close to a minute
17848 	 * under certain conditions.
17849 	 */
17850 	int	retry_check_flag = SD_RETRIES_UA;
17851 	boolean_t	kstat_updated = B_FALSE;
17852 	struct	sd_sense_info		si;
17853 
17854 	ASSERT(un != NULL);
17855 	ASSERT(mutex_owned(SD_MUTEX(un)));
17856 	ASSERT(bp != NULL);
17857 	ASSERT(xp != NULL);
17858 	ASSERT(pktp != NULL);
17859 
17860 	si.ssi_severity = SCSI_ERR_INFO;
17861 	si.ssi_pfa_flag = FALSE;
17862 
17863 
17864 	switch (asc) {
17865 	case 0x5D:  /* FAILURE PREDICTION THRESHOLD EXCEEDED */
17866 		if (sd_report_pfa != 0) {
17867 			SD_UPDATE_ERRSTATS(un, sd_rq_pfa_err);
17868 			si.ssi_pfa_flag = TRUE;
17869 			retry_check_flag = SD_RETRIES_STANDARD;
17870 			goto do_retry;
17871 		}
17872 		break;
17873 
17874 	case 0x29:  /* POWER ON, RESET, OR BUS DEVICE RESET OCCURRED */
17875 		if ((un->un_resvd_status & SD_RESERVE) == SD_RESERVE) {
17876 			un->un_resvd_status |=
17877 			    (SD_LOST_RESERVE | SD_WANT_RESERVE);
17878 		}
17879 		/* FALLTHRU */
17880 
17881 	case 0x28: /* NOT READY TO READY CHANGE, MEDIUM MAY HAVE CHANGED */
17882 		if (!un->un_f_has_removable_media) {
17883 			break;
17884 		}
17885 
17886 		/*
17887 		 * When we get a unit attention from a removable-media device,
17888 		 * it may be in a state that will take a long time to recover
17889 		 * (e.g., from a reset).  Since we are executing in interrupt
17890 		 * context here, we cannot wait around for the device to come
17891 		 * back. So hand this command off to sd_media_change_task()
17892 		 * for deferred processing under taskq thread context. (Note
17893 		 * that the command still may be failed if a problem is
17894 		 * encountered at a later time.)
17895 		 */
17896 		if (taskq_dispatch(sd_tq, sd_media_change_task, pktp,
17897 		    KM_NOSLEEP) == 0) {
17898 			/*
17899 			 * Cannot dispatch the request so fail the command.
17900 			 */
17901 			SD_UPDATE_ERRSTATS(un, sd_harderrs);
17902 			SD_UPDATE_ERRSTATS(un, sd_rq_nodev_err);
17903 			si.ssi_severity = SCSI_ERR_FATAL;
17904 			sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
17905 			sd_return_failed_command(un, bp, EIO);
17906 		}
17907 
17908 		/*
17909 		 * If failed to dispatch sd_media_change_task(), we already
17910 		 * updated kstat. If succeed to dispatch sd_media_change_task(),
17911 		 * we should update kstat later if it encounters an error. So,
17912 		 * we update kstat_updated flag here.
17913 		 */
17914 		kstat_updated = B_TRUE;
17915 
17916 		/*
17917 		 * Either the command has been successfully dispatched to a
17918 		 * task Q for retrying, or the dispatch failed. In either case
17919 		 * do NOT retry again by calling sd_retry_command. This sets up
17920 		 * two retries of the same command and when one completes and
17921 		 * frees the resources the other will access freed memory,
17922 		 * a bad thing.
17923 		 */
17924 		return;
17925 
17926 	default:
17927 		break;
17928 	}
17929 
17930 	/*
17931 	 * Update kstat if we haven't done that.
17932 	 */
17933 	if (!kstat_updated) {
17934 		SD_UPDATE_ERRSTATS(un, sd_harderrs);
17935 		SD_UPDATE_ERRSTATS(un, sd_rq_nodev_err);
17936 	}
17937 
17938 do_retry:
17939 	sd_retry_command(un, bp, retry_check_flag, sd_print_sense_msg, &si,
17940 	    EIO, SD_UA_RETRY_DELAY, NULL);
17941 }
17942 
17943 
17944 
17945 /*
17946  *    Function: sd_sense_key_fail_command
17947  *
17948  * Description: Use to fail a command when we don't like the sense key that
17949  *		was returned.
17950  *
17951  *     Context: May be called from interrupt context
17952  */
17953 
17954 static void
17955 sd_sense_key_fail_command(struct sd_lun *un, struct buf *bp,
17956 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
17957 {
17958 	struct sd_sense_info	si;
17959 
17960 	ASSERT(un != NULL);
17961 	ASSERT(mutex_owned(SD_MUTEX(un)));
17962 	ASSERT(bp != NULL);
17963 	ASSERT(xp != NULL);
17964 	ASSERT(pktp != NULL);
17965 
17966 	si.ssi_severity = SCSI_ERR_FATAL;
17967 	si.ssi_pfa_flag = FALSE;
17968 
17969 	sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
17970 	sd_return_failed_command(un, bp, EIO);
17971 }
17972 
17973 
17974 
17975 /*
17976  *    Function: sd_sense_key_blank_check
17977  *
17978  * Description: Recovery actions for a SCSI "Blank Check" sense key.
17979  *		Has no monetary connotation.
17980  *
17981  *     Context: May be called from interrupt context
17982  */
17983 
17984 static void
17985 sd_sense_key_blank_check(struct sd_lun *un, struct buf *bp,
17986 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
17987 {
17988 	struct sd_sense_info	si;
17989 
17990 	ASSERT(un != NULL);
17991 	ASSERT(mutex_owned(SD_MUTEX(un)));
17992 	ASSERT(bp != NULL);
17993 	ASSERT(xp != NULL);
17994 	ASSERT(pktp != NULL);
17995 
17996 	/*
17997 	 * Blank check is not fatal for removable devices, therefore
17998 	 * it does not require a console message.
17999 	 */
18000 	si.ssi_severity = (un->un_f_has_removable_media) ? SCSI_ERR_ALL :
18001 	    SCSI_ERR_FATAL;
18002 	si.ssi_pfa_flag = FALSE;
18003 
18004 	sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
18005 	sd_return_failed_command(un, bp, EIO);
18006 }
18007 
18008 
18009 
18010 
18011 /*
18012  *    Function: sd_sense_key_aborted_command
18013  *
18014  * Description: Recovery actions for a SCSI "Aborted Command" sense key.
18015  *
18016  *     Context: May be called from interrupt context
18017  */
18018 
18019 static void
18020 sd_sense_key_aborted_command(struct sd_lun *un, struct buf *bp,
18021 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18022 {
18023 	struct sd_sense_info	si;
18024 
18025 	ASSERT(un != NULL);
18026 	ASSERT(mutex_owned(SD_MUTEX(un)));
18027 	ASSERT(bp != NULL);
18028 	ASSERT(xp != NULL);
18029 	ASSERT(pktp != NULL);
18030 
18031 	si.ssi_severity = SCSI_ERR_FATAL;
18032 	si.ssi_pfa_flag = FALSE;
18033 
18034 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
18035 
18036 	/*
18037 	 * This really ought to be a fatal error, but we will retry anyway
18038 	 * as some drives report this as a spurious error.
18039 	 */
18040 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
18041 	    &si, EIO, (clock_t)0, NULL);
18042 }
18043 
18044 
18045 
18046 /*
18047  *    Function: sd_sense_key_default
18048  *
18049  * Description: Default recovery action for several SCSI sense keys (basically
18050  *		attempts a retry).
18051  *
18052  *     Context: May be called from interrupt context
18053  */
18054 
18055 static void
18056 sd_sense_key_default(struct sd_lun *un,
18057 	int sense_key,
18058 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp)
18059 {
18060 	struct sd_sense_info	si;
18061 
18062 	ASSERT(un != NULL);
18063 	ASSERT(mutex_owned(SD_MUTEX(un)));
18064 	ASSERT(bp != NULL);
18065 	ASSERT(xp != NULL);
18066 	ASSERT(pktp != NULL);
18067 
18068 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
18069 
18070 	/*
18071 	 * Undecoded sense key.	Attempt retries and hope that will fix
18072 	 * the problem.  Otherwise, we're dead.
18073 	 */
18074 	if ((pktp->pkt_flags & FLAG_SILENT) == 0) {
18075 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
18076 		    "Unhandled Sense Key '%s'\n", sense_keys[sense_key]);
18077 	}
18078 
18079 	si.ssi_severity = SCSI_ERR_FATAL;
18080 	si.ssi_pfa_flag = FALSE;
18081 
18082 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
18083 	    &si, EIO, (clock_t)0, NULL);
18084 }
18085 
18086 
18087 
18088 /*
18089  *    Function: sd_print_retry_msg
18090  *
18091  * Description: Print a message indicating the retry action being taken.
18092  *
18093  *   Arguments: un - ptr to associated softstate
18094  *		bp - ptr to buf(9S) for the command
18095  *		arg - not used.
18096  *		flag - SD_IMMEDIATE_RETRY_ISSUED, SD_DELAYED_RETRY_ISSUED,
18097  *			or SD_NO_RETRY_ISSUED
18098  *
18099  *     Context: May be called from interrupt context
18100  */
18101 /* ARGSUSED */
18102 static void
18103 sd_print_retry_msg(struct sd_lun *un, struct buf *bp, void *arg, int flag)
18104 {
18105 	struct sd_xbuf	*xp;
18106 	struct scsi_pkt *pktp;
18107 	char *reasonp;
18108 	char *msgp;
18109 
18110 	ASSERT(un != NULL);
18111 	ASSERT(mutex_owned(SD_MUTEX(un)));
18112 	ASSERT(bp != NULL);
18113 	pktp = SD_GET_PKTP(bp);
18114 	ASSERT(pktp != NULL);
18115 	xp = SD_GET_XBUF(bp);
18116 	ASSERT(xp != NULL);
18117 
18118 	ASSERT(!mutex_owned(&un->un_pm_mutex));
18119 	mutex_enter(&un->un_pm_mutex);
18120 	if ((un->un_state == SD_STATE_SUSPENDED) ||
18121 	    (SD_DEVICE_IS_IN_LOW_POWER(un)) ||
18122 	    (pktp->pkt_flags & FLAG_SILENT)) {
18123 		mutex_exit(&un->un_pm_mutex);
18124 		goto update_pkt_reason;
18125 	}
18126 	mutex_exit(&un->un_pm_mutex);
18127 
18128 	/*
18129 	 * Suppress messages if they are all the same pkt_reason; with
18130 	 * TQ, many (up to 256) are returned with the same pkt_reason.
18131 	 * If we are in panic, then suppress the retry messages.
18132 	 */
18133 	switch (flag) {
18134 	case SD_NO_RETRY_ISSUED:
18135 		msgp = "giving up";
18136 		break;
18137 	case SD_IMMEDIATE_RETRY_ISSUED:
18138 	case SD_DELAYED_RETRY_ISSUED:
18139 		if (ddi_in_panic() || (un->un_state == SD_STATE_OFFLINE) ||
18140 		    ((pktp->pkt_reason == un->un_last_pkt_reason) &&
18141 		    (sd_error_level != SCSI_ERR_ALL))) {
18142 			return;
18143 		}
18144 		msgp = "retrying command";
18145 		break;
18146 	default:
18147 		goto update_pkt_reason;
18148 	}
18149 
18150 	reasonp = (((pktp->pkt_statistics & STAT_PERR) != 0) ? "parity error" :
18151 	    scsi_rname(pktp->pkt_reason));
18152 
18153 	scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
18154 	    "SCSI transport failed: reason '%s': %s\n", reasonp, msgp);
18155 
18156 update_pkt_reason:
18157 	/*
18158 	 * Update un->un_last_pkt_reason with the value in pktp->pkt_reason.
18159 	 * This is to prevent multiple console messages for the same failure
18160 	 * condition.  Note that un->un_last_pkt_reason is NOT restored if &
18161 	 * when the command is retried successfully because there still may be
18162 	 * more commands coming back with the same value of pktp->pkt_reason.
18163 	 */
18164 	if ((pktp->pkt_reason != CMD_CMPLT) || (xp->xb_retry_count == 0)) {
18165 		un->un_last_pkt_reason = pktp->pkt_reason;
18166 	}
18167 }
18168 
18169 
18170 /*
18171  *    Function: sd_print_cmd_incomplete_msg
18172  *
18173  * Description: Message logging fn. for a SCSA "CMD_INCOMPLETE" pkt_reason.
18174  *
18175  *   Arguments: un - ptr to associated softstate
18176  *		bp - ptr to buf(9S) for the command
18177  *		arg - passed to sd_print_retry_msg()
18178  *		code - SD_IMMEDIATE_RETRY_ISSUED, SD_DELAYED_RETRY_ISSUED,
18179  *			or SD_NO_RETRY_ISSUED
18180  *
18181  *     Context: May be called from interrupt context
18182  */
18183 
18184 static void
18185 sd_print_cmd_incomplete_msg(struct sd_lun *un, struct buf *bp, void *arg,
18186 	int code)
18187 {
18188 	dev_info_t	*dip;
18189 
18190 	ASSERT(un != NULL);
18191 	ASSERT(mutex_owned(SD_MUTEX(un)));
18192 	ASSERT(bp != NULL);
18193 
18194 	switch (code) {
18195 	case SD_NO_RETRY_ISSUED:
18196 		/* Command was failed. Someone turned off this target? */
18197 		if (un->un_state != SD_STATE_OFFLINE) {
18198 			/*
18199 			 * Suppress message if we are detaching and
18200 			 * device has been disconnected
18201 			 * Note that DEVI_IS_DEVICE_REMOVED is a consolidation
18202 			 * private interface and not part of the DDI
18203 			 */
18204 			dip = un->un_sd->sd_dev;
18205 			if (!(DEVI_IS_DETACHING(dip) &&
18206 			    DEVI_IS_DEVICE_REMOVED(dip))) {
18207 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
18208 				"disk not responding to selection\n");
18209 			}
18210 			New_state(un, SD_STATE_OFFLINE);
18211 		}
18212 		break;
18213 
18214 	case SD_DELAYED_RETRY_ISSUED:
18215 	case SD_IMMEDIATE_RETRY_ISSUED:
18216 	default:
18217 		/* Command was successfully queued for retry */
18218 		sd_print_retry_msg(un, bp, arg, code);
18219 		break;
18220 	}
18221 }
18222 
18223 
18224 /*
18225  *    Function: sd_pkt_reason_cmd_incomplete
18226  *
18227  * Description: Recovery actions for a SCSA "CMD_INCOMPLETE" pkt_reason.
18228  *
18229  *     Context: May be called from interrupt context
18230  */
18231 
18232 static void
18233 sd_pkt_reason_cmd_incomplete(struct sd_lun *un, struct buf *bp,
18234 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18235 {
18236 	int flag = SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE;
18237 
18238 	ASSERT(un != NULL);
18239 	ASSERT(mutex_owned(SD_MUTEX(un)));
18240 	ASSERT(bp != NULL);
18241 	ASSERT(xp != NULL);
18242 	ASSERT(pktp != NULL);
18243 
18244 	/* Do not do a reset if selection did not complete */
18245 	/* Note: Should this not just check the bit? */
18246 	if (pktp->pkt_state != STATE_GOT_BUS) {
18247 		SD_UPDATE_ERRSTATS(un, sd_transerrs);
18248 		sd_reset_target(un, pktp);
18249 	}
18250 
18251 	/*
18252 	 * If the target was not successfully selected, then set
18253 	 * SD_RETRIES_FAILFAST to indicate that we lost communication
18254 	 * with the target, and further retries and/or commands are
18255 	 * likely to take a long time.
18256 	 */
18257 	if ((pktp->pkt_state & STATE_GOT_TARGET) == 0) {
18258 		flag |= SD_RETRIES_FAILFAST;
18259 	}
18260 
18261 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
18262 
18263 	sd_retry_command(un, bp, flag,
18264 	    sd_print_cmd_incomplete_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
18265 }
18266 
18267 
18268 
18269 /*
18270  *    Function: sd_pkt_reason_cmd_tran_err
18271  *
18272  * Description: Recovery actions for a SCSA "CMD_TRAN_ERR" pkt_reason.
18273  *
18274  *     Context: May be called from interrupt context
18275  */
18276 
18277 static void
18278 sd_pkt_reason_cmd_tran_err(struct sd_lun *un, struct buf *bp,
18279 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18280 {
18281 	ASSERT(un != NULL);
18282 	ASSERT(mutex_owned(SD_MUTEX(un)));
18283 	ASSERT(bp != NULL);
18284 	ASSERT(xp != NULL);
18285 	ASSERT(pktp != NULL);
18286 
18287 	/*
18288 	 * Do not reset if we got a parity error, or if
18289 	 * selection did not complete.
18290 	 */
18291 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
18292 	/* Note: Should this not just check the bit for pkt_state? */
18293 	if (((pktp->pkt_statistics & STAT_PERR) == 0) &&
18294 	    (pktp->pkt_state != STATE_GOT_BUS)) {
18295 		SD_UPDATE_ERRSTATS(un, sd_transerrs);
18296 		sd_reset_target(un, pktp);
18297 	}
18298 
18299 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
18300 
18301 	sd_retry_command(un, bp, (SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE),
18302 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
18303 }
18304 
18305 
18306 
18307 /*
18308  *    Function: sd_pkt_reason_cmd_reset
18309  *
18310  * Description: Recovery actions for a SCSA "CMD_RESET" pkt_reason.
18311  *
18312  *     Context: May be called from interrupt context
18313  */
18314 
18315 static void
18316 sd_pkt_reason_cmd_reset(struct sd_lun *un, struct buf *bp,
18317 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18318 {
18319 	ASSERT(un != NULL);
18320 	ASSERT(mutex_owned(SD_MUTEX(un)));
18321 	ASSERT(bp != NULL);
18322 	ASSERT(xp != NULL);
18323 	ASSERT(pktp != NULL);
18324 
18325 	/* The target may still be running the command, so try to reset. */
18326 	SD_UPDATE_ERRSTATS(un, sd_transerrs);
18327 	sd_reset_target(un, pktp);
18328 
18329 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
18330 
18331 	/*
18332 	 * If pkt_reason is CMD_RESET chances are that this pkt got
18333 	 * reset because another target on this bus caused it. The target
18334 	 * that caused it should get CMD_TIMEOUT with pkt_statistics
18335 	 * of STAT_TIMEOUT/STAT_DEV_RESET.
18336 	 */
18337 
18338 	sd_retry_command(un, bp, (SD_RETRIES_VICTIM | SD_RETRIES_ISOLATE),
18339 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
18340 }
18341 
18342 
18343 
18344 
18345 /*
18346  *    Function: sd_pkt_reason_cmd_aborted
18347  *
18348  * Description: Recovery actions for a SCSA "CMD_ABORTED" pkt_reason.
18349  *
18350  *     Context: May be called from interrupt context
18351  */
18352 
18353 static void
18354 sd_pkt_reason_cmd_aborted(struct sd_lun *un, struct buf *bp,
18355 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18356 {
18357 	ASSERT(un != NULL);
18358 	ASSERT(mutex_owned(SD_MUTEX(un)));
18359 	ASSERT(bp != NULL);
18360 	ASSERT(xp != NULL);
18361 	ASSERT(pktp != NULL);
18362 
18363 	/* The target may still be running the command, so try to reset. */
18364 	SD_UPDATE_ERRSTATS(un, sd_transerrs);
18365 	sd_reset_target(un, pktp);
18366 
18367 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
18368 
18369 	/*
18370 	 * If pkt_reason is CMD_ABORTED chances are that this pkt got
18371 	 * aborted because another target on this bus caused it. The target
18372 	 * that caused it should get CMD_TIMEOUT with pkt_statistics
18373 	 * of STAT_TIMEOUT/STAT_DEV_RESET.
18374 	 */
18375 
18376 	sd_retry_command(un, bp, (SD_RETRIES_VICTIM | SD_RETRIES_ISOLATE),
18377 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
18378 }
18379 
18380 
18381 
18382 /*
18383  *    Function: sd_pkt_reason_cmd_timeout
18384  *
18385  * Description: Recovery actions for a SCSA "CMD_TIMEOUT" pkt_reason.
18386  *
18387  *     Context: May be called from interrupt context
18388  */
18389 
18390 static void
18391 sd_pkt_reason_cmd_timeout(struct sd_lun *un, struct buf *bp,
18392 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18393 {
18394 	ASSERT(un != NULL);
18395 	ASSERT(mutex_owned(SD_MUTEX(un)));
18396 	ASSERT(bp != NULL);
18397 	ASSERT(xp != NULL);
18398 	ASSERT(pktp != NULL);
18399 
18400 
18401 	SD_UPDATE_ERRSTATS(un, sd_transerrs);
18402 	sd_reset_target(un, pktp);
18403 
18404 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
18405 
18406 	/*
18407 	 * A command timeout indicates that we could not establish
18408 	 * communication with the target, so set SD_RETRIES_FAILFAST
18409 	 * as further retries/commands are likely to take a long time.
18410 	 */
18411 	sd_retry_command(un, bp,
18412 	    (SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE | SD_RETRIES_FAILFAST),
18413 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
18414 }
18415 
18416 
18417 
18418 /*
18419  *    Function: sd_pkt_reason_cmd_unx_bus_free
18420  *
18421  * Description: Recovery actions for a SCSA "CMD_UNX_BUS_FREE" pkt_reason.
18422  *
18423  *     Context: May be called from interrupt context
18424  */
18425 
18426 static void
18427 sd_pkt_reason_cmd_unx_bus_free(struct sd_lun *un, struct buf *bp,
18428 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18429 {
18430 	void (*funcp)(struct sd_lun *un, struct buf *bp, void *arg, int code);
18431 
18432 	ASSERT(un != NULL);
18433 	ASSERT(mutex_owned(SD_MUTEX(un)));
18434 	ASSERT(bp != NULL);
18435 	ASSERT(xp != NULL);
18436 	ASSERT(pktp != NULL);
18437 
18438 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
18439 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
18440 
18441 	funcp = ((pktp->pkt_statistics & STAT_PERR) == 0) ?
18442 	    sd_print_retry_msg : NULL;
18443 
18444 	sd_retry_command(un, bp, (SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE),
18445 	    funcp, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
18446 }
18447 
18448 
18449 /*
18450  *    Function: sd_pkt_reason_cmd_tag_reject
18451  *
18452  * Description: Recovery actions for a SCSA "CMD_TAG_REJECT" pkt_reason.
18453  *
18454  *     Context: May be called from interrupt context
18455  */
18456 
18457 static void
18458 sd_pkt_reason_cmd_tag_reject(struct sd_lun *un, struct buf *bp,
18459 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18460 {
18461 	ASSERT(un != NULL);
18462 	ASSERT(mutex_owned(SD_MUTEX(un)));
18463 	ASSERT(bp != NULL);
18464 	ASSERT(xp != NULL);
18465 	ASSERT(pktp != NULL);
18466 
18467 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
18468 	pktp->pkt_flags = 0;
18469 	un->un_tagflags = 0;
18470 	if (un->un_f_opt_queueing == TRUE) {
18471 		un->un_throttle = min(un->un_throttle, 3);
18472 	} else {
18473 		un->un_throttle = 1;
18474 	}
18475 	mutex_exit(SD_MUTEX(un));
18476 	(void) scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing", 0, 1);
18477 	mutex_enter(SD_MUTEX(un));
18478 
18479 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
18480 
18481 	/* Legacy behavior not to check retry counts here. */
18482 	sd_retry_command(un, bp, (SD_RETRIES_NOCHECK | SD_RETRIES_ISOLATE),
18483 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
18484 }
18485 
18486 
18487 /*
18488  *    Function: sd_pkt_reason_default
18489  *
18490  * Description: Default recovery actions for SCSA pkt_reason values that
18491  *		do not have more explicit recovery actions.
18492  *
18493  *     Context: May be called from interrupt context
18494  */
18495 
18496 static void
18497 sd_pkt_reason_default(struct sd_lun *un, struct buf *bp,
18498 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18499 {
18500 	ASSERT(un != NULL);
18501 	ASSERT(mutex_owned(SD_MUTEX(un)));
18502 	ASSERT(bp != NULL);
18503 	ASSERT(xp != NULL);
18504 	ASSERT(pktp != NULL);
18505 
18506 	SD_UPDATE_ERRSTATS(un, sd_transerrs);
18507 	sd_reset_target(un, pktp);
18508 
18509 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
18510 
18511 	sd_retry_command(un, bp, (SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE),
18512 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
18513 }
18514 
18515 
18516 
18517 /*
18518  *    Function: sd_pkt_status_check_condition
18519  *
18520  * Description: Recovery actions for a "STATUS_CHECK" SCSI command status.
18521  *
18522  *     Context: May be called from interrupt context
18523  */
18524 
18525 static void
18526 sd_pkt_status_check_condition(struct sd_lun *un, struct buf *bp,
18527 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18528 {
18529 	ASSERT(un != NULL);
18530 	ASSERT(mutex_owned(SD_MUTEX(un)));
18531 	ASSERT(bp != NULL);
18532 	ASSERT(xp != NULL);
18533 	ASSERT(pktp != NULL);
18534 
18535 	SD_TRACE(SD_LOG_IO, un, "sd_pkt_status_check_condition: "
18536 	    "entry: buf:0x%p xp:0x%p\n", bp, xp);
18537 
18538 	/*
18539 	 * If ARQ is NOT enabled, then issue a REQUEST SENSE command (the
18540 	 * command will be retried after the request sense). Otherwise, retry
18541 	 * the command. Note: we are issuing the request sense even though the
18542 	 * retry limit may have been reached for the failed command.
18543 	 */
18544 	if (un->un_f_arq_enabled == FALSE) {
18545 		SD_INFO(SD_LOG_IO_CORE, un, "sd_pkt_status_check_condition: "
18546 		    "no ARQ, sending request sense command\n");
18547 		sd_send_request_sense_command(un, bp, pktp);
18548 	} else {
18549 		SD_INFO(SD_LOG_IO_CORE, un, "sd_pkt_status_check_condition: "
18550 		    "ARQ,retrying request sense command\n");
18551 #if defined(__i386) || defined(__amd64)
18552 		/*
18553 		 * The SD_RETRY_DELAY value need to be adjusted here
18554 		 * when SD_RETRY_DELAY change in sddef.h
18555 		 */
18556 		sd_retry_command(un, bp, SD_RETRIES_STANDARD, NULL, NULL, EIO,
18557 			un->un_f_is_fibre?drv_usectohz(100000):(clock_t)0,
18558 			NULL);
18559 #else
18560 		sd_retry_command(un, bp, SD_RETRIES_STANDARD, NULL, NULL,
18561 		    EIO, SD_RETRY_DELAY, NULL);
18562 #endif
18563 	}
18564 
18565 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_pkt_status_check_condition: exit\n");
18566 }
18567 
18568 
18569 /*
18570  *    Function: sd_pkt_status_busy
18571  *
18572  * Description: Recovery actions for a "STATUS_BUSY" SCSI command status.
18573  *
18574  *     Context: May be called from interrupt context
18575  */
18576 
18577 static void
18578 sd_pkt_status_busy(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
18579 	struct scsi_pkt *pktp)
18580 {
18581 	ASSERT(un != NULL);
18582 	ASSERT(mutex_owned(SD_MUTEX(un)));
18583 	ASSERT(bp != NULL);
18584 	ASSERT(xp != NULL);
18585 	ASSERT(pktp != NULL);
18586 
18587 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18588 	    "sd_pkt_status_busy: entry\n");
18589 
18590 	/* If retries are exhausted, just fail the command. */
18591 	if (xp->xb_retry_count >= un->un_busy_retry_count) {
18592 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
18593 		    "device busy too long\n");
18594 		sd_return_failed_command(un, bp, EIO);
18595 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18596 		    "sd_pkt_status_busy: exit\n");
18597 		return;
18598 	}
18599 	xp->xb_retry_count++;
18600 
18601 	/*
18602 	 * Try to reset the target. However, we do not want to perform
18603 	 * more than one reset if the device continues to fail. The reset
18604 	 * will be performed when the retry count reaches the reset
18605 	 * threshold.  This threshold should be set such that at least
18606 	 * one retry is issued before the reset is performed.
18607 	 */
18608 	if (xp->xb_retry_count ==
18609 	    ((un->un_reset_retry_count < 2) ? 2 : un->un_reset_retry_count)) {
18610 		int rval = 0;
18611 		mutex_exit(SD_MUTEX(un));
18612 		if (un->un_f_allow_bus_device_reset == TRUE) {
18613 			/*
18614 			 * First try to reset the LUN; if we cannot then
18615 			 * try to reset the target.
18616 			 */
18617 			if (un->un_f_lun_reset_enabled == TRUE) {
18618 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18619 				    "sd_pkt_status_busy: RESET_LUN\n");
18620 				rval = scsi_reset(SD_ADDRESS(un), RESET_LUN);
18621 			}
18622 			if (rval == 0) {
18623 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18624 				    "sd_pkt_status_busy: RESET_TARGET\n");
18625 				rval = scsi_reset(SD_ADDRESS(un), RESET_TARGET);
18626 			}
18627 		}
18628 		if (rval == 0) {
18629 			/*
18630 			 * If the RESET_LUN and/or RESET_TARGET failed,
18631 			 * try RESET_ALL
18632 			 */
18633 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18634 			    "sd_pkt_status_busy: RESET_ALL\n");
18635 			rval = scsi_reset(SD_ADDRESS(un), RESET_ALL);
18636 		}
18637 		mutex_enter(SD_MUTEX(un));
18638 		if (rval == 0) {
18639 			/*
18640 			 * The RESET_LUN, RESET_TARGET, and/or RESET_ALL failed.
18641 			 * At this point we give up & fail the command.
18642 			 */
18643 			sd_return_failed_command(un, bp, EIO);
18644 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18645 			    "sd_pkt_status_busy: exit (failed cmd)\n");
18646 			return;
18647 		}
18648 	}
18649 
18650 	/*
18651 	 * Retry the command. Be sure to specify SD_RETRIES_NOCHECK as
18652 	 * we have already checked the retry counts above.
18653 	 */
18654 	sd_retry_command(un, bp, SD_RETRIES_NOCHECK, NULL, NULL,
18655 	    EIO, SD_BSY_TIMEOUT, NULL);
18656 
18657 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18658 	    "sd_pkt_status_busy: exit\n");
18659 }
18660 
18661 
18662 /*
18663  *    Function: sd_pkt_status_reservation_conflict
18664  *
18665  * Description: Recovery actions for a "STATUS_RESERVATION_CONFLICT" SCSI
18666  *		command status.
18667  *
18668  *     Context: May be called from interrupt context
18669  */
18670 
18671 static void
18672 sd_pkt_status_reservation_conflict(struct sd_lun *un, struct buf *bp,
18673 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18674 {
18675 	ASSERT(un != NULL);
18676 	ASSERT(mutex_owned(SD_MUTEX(un)));
18677 	ASSERT(bp != NULL);
18678 	ASSERT(xp != NULL);
18679 	ASSERT(pktp != NULL);
18680 
18681 	/*
18682 	 * If the command was PERSISTENT_RESERVATION_[IN|OUT] then reservation
18683 	 * conflict could be due to various reasons like incorrect keys, not
18684 	 * registered or not reserved etc. So, we return EACCES to the caller.
18685 	 */
18686 	if (un->un_reservation_type == SD_SCSI3_RESERVATION) {
18687 		int cmd = SD_GET_PKT_OPCODE(pktp);
18688 		if ((cmd == SCMD_PERSISTENT_RESERVE_IN) ||
18689 		    (cmd == SCMD_PERSISTENT_RESERVE_OUT)) {
18690 			sd_return_failed_command(un, bp, EACCES);
18691 			return;
18692 		}
18693 	}
18694 
18695 	un->un_resvd_status |= SD_RESERVATION_CONFLICT;
18696 
18697 	if ((un->un_resvd_status & SD_FAILFAST) != 0) {
18698 		if (sd_failfast_enable != 0) {
18699 			/* By definition, we must panic here.... */
18700 			sd_panic_for_res_conflict(un);
18701 			/*NOTREACHED*/
18702 		}
18703 		SD_ERROR(SD_LOG_IO, un,
18704 		    "sd_handle_resv_conflict: Disk Reserved\n");
18705 		sd_return_failed_command(un, bp, EACCES);
18706 		return;
18707 	}
18708 
18709 	/*
18710 	 * 1147670: retry only if sd_retry_on_reservation_conflict
18711 	 * property is set (default is 1). Retries will not succeed
18712 	 * on a disk reserved by another initiator. HA systems
18713 	 * may reset this via sd.conf to avoid these retries.
18714 	 *
18715 	 * Note: The legacy return code for this failure is EIO, however EACCES
18716 	 * seems more appropriate for a reservation conflict.
18717 	 */
18718 	if (sd_retry_on_reservation_conflict == 0) {
18719 		SD_ERROR(SD_LOG_IO, un,
18720 		    "sd_handle_resv_conflict: Device Reserved\n");
18721 		sd_return_failed_command(un, bp, EIO);
18722 		return;
18723 	}
18724 
18725 	/*
18726 	 * Retry the command if we can.
18727 	 *
18728 	 * Note: The legacy return code for this failure is EIO, however EACCES
18729 	 * seems more appropriate for a reservation conflict.
18730 	 */
18731 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, NULL, NULL, EIO,
18732 	    (clock_t)2, NULL);
18733 }
18734 
18735 
18736 
18737 /*
18738  *    Function: sd_pkt_status_qfull
18739  *
18740  * Description: Handle a QUEUE FULL condition from the target.  This can
18741  *		occur if the HBA does not handle the queue full condition.
18742  *		(Basically this means third-party HBAs as Sun HBAs will
18743  *		handle the queue full condition.)  Note that if there are
18744  *		some commands already in the transport, then the queue full
18745  *		has occurred because the queue for this nexus is actually
18746  *		full. If there are no commands in the transport, then the
18747  *		queue full is resulting from some other initiator or lun
18748  *		consuming all the resources at the target.
18749  *
18750  *     Context: May be called from interrupt context
18751  */
18752 
18753 static void
18754 sd_pkt_status_qfull(struct sd_lun *un, struct buf *bp,
18755 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18756 {
18757 	ASSERT(un != NULL);
18758 	ASSERT(mutex_owned(SD_MUTEX(un)));
18759 	ASSERT(bp != NULL);
18760 	ASSERT(xp != NULL);
18761 	ASSERT(pktp != NULL);
18762 
18763 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18764 	    "sd_pkt_status_qfull: entry\n");
18765 
18766 	/*
18767 	 * Just lower the QFULL throttle and retry the command.  Note that
18768 	 * we do not limit the number of retries here.
18769 	 */
18770 	sd_reduce_throttle(un, SD_THROTTLE_QFULL);
18771 	sd_retry_command(un, bp, SD_RETRIES_NOCHECK, NULL, NULL, 0,
18772 	    SD_RESTART_TIMEOUT, NULL);
18773 
18774 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18775 	    "sd_pkt_status_qfull: exit\n");
18776 }
18777 
18778 
18779 /*
18780  *    Function: sd_reset_target
18781  *
18782  * Description: Issue a scsi_reset(9F), with either RESET_LUN,
18783  *		RESET_TARGET, or RESET_ALL.
18784  *
18785  *     Context: May be called under interrupt context.
18786  */
18787 
18788 static void
18789 sd_reset_target(struct sd_lun *un, struct scsi_pkt *pktp)
18790 {
18791 	int rval = 0;
18792 
18793 	ASSERT(un != NULL);
18794 	ASSERT(mutex_owned(SD_MUTEX(un)));
18795 	ASSERT(pktp != NULL);
18796 
18797 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_reset_target: entry\n");
18798 
18799 	/*
18800 	 * No need to reset if the transport layer has already done so.
18801 	 */
18802 	if ((pktp->pkt_statistics &
18803 	    (STAT_BUS_RESET | STAT_DEV_RESET | STAT_ABORTED)) != 0) {
18804 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18805 		    "sd_reset_target: no reset\n");
18806 		return;
18807 	}
18808 
18809 	mutex_exit(SD_MUTEX(un));
18810 
18811 	if (un->un_f_allow_bus_device_reset == TRUE) {
18812 		if (un->un_f_lun_reset_enabled == TRUE) {
18813 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18814 			    "sd_reset_target: RESET_LUN\n");
18815 			rval = scsi_reset(SD_ADDRESS(un), RESET_LUN);
18816 		}
18817 		if (rval == 0) {
18818 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18819 			    "sd_reset_target: RESET_TARGET\n");
18820 			rval = scsi_reset(SD_ADDRESS(un), RESET_TARGET);
18821 		}
18822 	}
18823 
18824 	if (rval == 0) {
18825 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18826 		    "sd_reset_target: RESET_ALL\n");
18827 		(void) scsi_reset(SD_ADDRESS(un), RESET_ALL);
18828 	}
18829 
18830 	mutex_enter(SD_MUTEX(un));
18831 
18832 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_reset_target: exit\n");
18833 }
18834 
18835 
18836 /*
18837  *    Function: sd_media_change_task
18838  *
18839  * Description: Recovery action for CDROM to become available.
18840  *
18841  *     Context: Executes in a taskq() thread context
18842  */
18843 
18844 static void
18845 sd_media_change_task(void *arg)
18846 {
18847 	struct	scsi_pkt	*pktp = arg;
18848 	struct	sd_lun		*un;
18849 	struct	buf		*bp;
18850 	struct	sd_xbuf		*xp;
18851 	int	err		= 0;
18852 	int	retry_count	= 0;
18853 	int	retry_limit	= SD_UNIT_ATTENTION_RETRY/10;
18854 	struct	sd_sense_info	si;
18855 
18856 	ASSERT(pktp != NULL);
18857 	bp = (struct buf *)pktp->pkt_private;
18858 	ASSERT(bp != NULL);
18859 	xp = SD_GET_XBUF(bp);
18860 	ASSERT(xp != NULL);
18861 	un = SD_GET_UN(bp);
18862 	ASSERT(un != NULL);
18863 	ASSERT(!mutex_owned(SD_MUTEX(un)));
18864 	ASSERT(un->un_f_monitor_media_state);
18865 
18866 	si.ssi_severity = SCSI_ERR_INFO;
18867 	si.ssi_pfa_flag = FALSE;
18868 
18869 	/*
18870 	 * When a reset is issued on a CDROM, it takes a long time to
18871 	 * recover. First few attempts to read capacity and other things
18872 	 * related to handling unit attention fail (with a ASC 0x4 and
18873 	 * ASCQ 0x1). In that case we want to do enough retries and we want
18874 	 * to limit the retries in other cases of genuine failures like
18875 	 * no media in drive.
18876 	 */
18877 	while (retry_count++ < retry_limit) {
18878 		if ((err = sd_handle_mchange(un)) == 0) {
18879 			break;
18880 		}
18881 		if (err == EAGAIN) {
18882 			retry_limit = SD_UNIT_ATTENTION_RETRY;
18883 		}
18884 		/* Sleep for 0.5 sec. & try again */
18885 		delay(drv_usectohz(500000));
18886 	}
18887 
18888 	/*
18889 	 * Dispatch (retry or fail) the original command here,
18890 	 * along with appropriate console messages....
18891 	 *
18892 	 * Must grab the mutex before calling sd_retry_command,
18893 	 * sd_print_sense_msg and sd_return_failed_command.
18894 	 */
18895 	mutex_enter(SD_MUTEX(un));
18896 	if (err != SD_CMD_SUCCESS) {
18897 		SD_UPDATE_ERRSTATS(un, sd_harderrs);
18898 		SD_UPDATE_ERRSTATS(un, sd_rq_nodev_err);
18899 		si.ssi_severity = SCSI_ERR_FATAL;
18900 		sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
18901 		sd_return_failed_command(un, bp, EIO);
18902 	} else {
18903 		sd_retry_command(un, bp, SD_RETRIES_NOCHECK, sd_print_sense_msg,
18904 		    &si, EIO, (clock_t)0, NULL);
18905 	}
18906 	mutex_exit(SD_MUTEX(un));
18907 }
18908 
18909 
18910 
18911 /*
18912  *    Function: sd_handle_mchange
18913  *
18914  * Description: Perform geometry validation & other recovery when CDROM
18915  *		has been removed from drive.
18916  *
18917  * Return Code: 0 for success
18918  *		errno-type return code of either sd_send_scsi_DOORLOCK() or
18919  *		sd_send_scsi_READ_CAPACITY()
18920  *
18921  *     Context: Executes in a taskq() thread context
18922  */
18923 
18924 static int
18925 sd_handle_mchange(struct sd_lun *un)
18926 {
18927 	uint64_t	capacity;
18928 	uint32_t	lbasize;
18929 	int		rval;
18930 
18931 	ASSERT(!mutex_owned(SD_MUTEX(un)));
18932 	ASSERT(un->un_f_monitor_media_state);
18933 
18934 	if ((rval = sd_send_scsi_READ_CAPACITY(un, &capacity, &lbasize,
18935 	    SD_PATH_DIRECT_PRIORITY)) != 0) {
18936 		return (rval);
18937 	}
18938 
18939 	mutex_enter(SD_MUTEX(un));
18940 	sd_update_block_info(un, lbasize, capacity);
18941 
18942 	if (un->un_errstats != NULL) {
18943 		struct	sd_errstats *stp =
18944 		    (struct sd_errstats *)un->un_errstats->ks_data;
18945 		stp->sd_capacity.value.ui64 = (uint64_t)
18946 		    ((uint64_t)un->un_blockcount *
18947 		    (uint64_t)un->un_tgt_blocksize);
18948 	}
18949 
18950 	/*
18951 	 * Note: Maybe let the strategy/partitioning chain worry about getting
18952 	 * valid geometry.
18953 	 */
18954 	un->un_f_geometry_is_valid = FALSE;
18955 	(void) sd_validate_geometry(un, SD_PATH_DIRECT_PRIORITY);
18956 	if (un->un_f_geometry_is_valid == FALSE) {
18957 		mutex_exit(SD_MUTEX(un));
18958 		return (EIO);
18959 	}
18960 
18961 	mutex_exit(SD_MUTEX(un));
18962 
18963 	/*
18964 	 * Try to lock the door
18965 	 */
18966 	return (sd_send_scsi_DOORLOCK(un, SD_REMOVAL_PREVENT,
18967 	    SD_PATH_DIRECT_PRIORITY));
18968 }
18969 
18970 
18971 /*
18972  *    Function: sd_send_scsi_DOORLOCK
18973  *
18974  * Description: Issue the scsi DOOR LOCK command
18975  *
18976  *   Arguments: un    - pointer to driver soft state (unit) structure for
18977  *			this target.
18978  *		flag  - SD_REMOVAL_ALLOW
18979  *			SD_REMOVAL_PREVENT
18980  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
18981  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
18982  *			to use the USCSI "direct" chain and bypass the normal
18983  *			command waitq. SD_PATH_DIRECT_PRIORITY is used when this
18984  *			command is issued as part of an error recovery action.
18985  *
18986  * Return Code: 0   - Success
18987  *		errno return code from sd_send_scsi_cmd()
18988  *
18989  *     Context: Can sleep.
18990  */
18991 
18992 static int
18993 sd_send_scsi_DOORLOCK(struct sd_lun *un, int flag, int path_flag)
18994 {
18995 	union scsi_cdb		cdb;
18996 	struct uscsi_cmd	ucmd_buf;
18997 	struct scsi_extended_sense	sense_buf;
18998 	int			status;
18999 
19000 	ASSERT(un != NULL);
19001 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19002 
19003 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_DOORLOCK: entry: un:0x%p\n", un);
19004 
19005 	/* already determined doorlock is not supported, fake success */
19006 	if (un->un_f_doorlock_supported == FALSE) {
19007 		return (0);
19008 	}
19009 
19010 	bzero(&cdb, sizeof (cdb));
19011 	bzero(&ucmd_buf, sizeof (ucmd_buf));
19012 
19013 	cdb.scc_cmd = SCMD_DOORLOCK;
19014 	cdb.cdb_opaque[4] = (uchar_t)flag;
19015 
19016 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
19017 	ucmd_buf.uscsi_cdblen	= CDB_GROUP0;
19018 	ucmd_buf.uscsi_bufaddr	= NULL;
19019 	ucmd_buf.uscsi_buflen	= 0;
19020 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
19021 	ucmd_buf.uscsi_rqlen	= sizeof (sense_buf);
19022 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_SILENT;
19023 	ucmd_buf.uscsi_timeout	= 15;
19024 
19025 	SD_TRACE(SD_LOG_IO, un,
19026 	    "sd_send_scsi_DOORLOCK: returning sd_send_scsi_cmd()\n");
19027 
19028 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
19029 	    UIO_SYSSPACE, UIO_SYSSPACE, path_flag);
19030 
19031 	if ((status == EIO) && (ucmd_buf.uscsi_status == STATUS_CHECK) &&
19032 	    (ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
19033 	    (sense_buf.es_key == KEY_ILLEGAL_REQUEST)) {
19034 		/* fake success and skip subsequent doorlock commands */
19035 		un->un_f_doorlock_supported = FALSE;
19036 		return (0);
19037 	}
19038 
19039 	return (status);
19040 }
19041 
19042 /*
19043  *    Function: sd_send_scsi_READ_CAPACITY
19044  *
19045  * Description: This routine uses the scsi READ CAPACITY command to determine
19046  *		the device capacity in number of blocks and the device native
19047  *		block size. If this function returns a failure, then the
19048  *		values in *capp and *lbap are undefined.  If the capacity
19049  *		returned is 0xffffffff then the lun is too large for a
19050  *		normal READ CAPACITY command and the results of a
19051  *		READ CAPACITY 16 will be used instead.
19052  *
19053  *   Arguments: un   - ptr to soft state struct for the target
19054  *		capp - ptr to unsigned 64-bit variable to receive the
19055  *			capacity value from the command.
19056  *		lbap - ptr to unsigned 32-bit varaible to receive the
19057  *			block size value from the command
19058  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
19059  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
19060  *			to use the USCSI "direct" chain and bypass the normal
19061  *			command waitq. SD_PATH_DIRECT_PRIORITY is used when this
19062  *			command is issued as part of an error recovery action.
19063  *
19064  * Return Code: 0   - Success
19065  *		EIO - IO error
19066  *		EACCES - Reservation conflict detected
19067  *		EAGAIN - Device is becoming ready
19068  *		errno return code from sd_send_scsi_cmd()
19069  *
19070  *     Context: Can sleep.  Blocks until command completes.
19071  */
19072 
19073 #define	SD_CAPACITY_SIZE	sizeof (struct scsi_capacity)
19074 
19075 static int
19076 sd_send_scsi_READ_CAPACITY(struct sd_lun *un, uint64_t *capp, uint32_t *lbap,
19077 	int path_flag)
19078 {
19079 	struct	scsi_extended_sense	sense_buf;
19080 	struct	uscsi_cmd	ucmd_buf;
19081 	union	scsi_cdb	cdb;
19082 	uint32_t		*capacity_buf;
19083 	uint64_t		capacity;
19084 	uint32_t		lbasize;
19085 	int			status;
19086 
19087 	ASSERT(un != NULL);
19088 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19089 	ASSERT(capp != NULL);
19090 	ASSERT(lbap != NULL);
19091 
19092 	SD_TRACE(SD_LOG_IO, un,
19093 	    "sd_send_scsi_READ_CAPACITY: entry: un:0x%p\n", un);
19094 
19095 	/*
19096 	 * First send a READ_CAPACITY command to the target.
19097 	 * (This command is mandatory under SCSI-2.)
19098 	 *
19099 	 * Set up the CDB for the READ_CAPACITY command.  The Partial
19100 	 * Medium Indicator bit is cleared.  The address field must be
19101 	 * zero if the PMI bit is zero.
19102 	 */
19103 	bzero(&cdb, sizeof (cdb));
19104 	bzero(&ucmd_buf, sizeof (ucmd_buf));
19105 
19106 	capacity_buf = kmem_zalloc(SD_CAPACITY_SIZE, KM_SLEEP);
19107 
19108 	cdb.scc_cmd = SCMD_READ_CAPACITY;
19109 
19110 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
19111 	ucmd_buf.uscsi_cdblen	= CDB_GROUP1;
19112 	ucmd_buf.uscsi_bufaddr	= (caddr_t)capacity_buf;
19113 	ucmd_buf.uscsi_buflen	= SD_CAPACITY_SIZE;
19114 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
19115 	ucmd_buf.uscsi_rqlen	= sizeof (sense_buf);
19116 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
19117 	ucmd_buf.uscsi_timeout	= 60;
19118 
19119 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
19120 	    UIO_SYSSPACE, UIO_SYSSPACE, path_flag);
19121 
19122 	switch (status) {
19123 	case 0:
19124 		/* Return failure if we did not get valid capacity data. */
19125 		if (ucmd_buf.uscsi_resid != 0) {
19126 			kmem_free(capacity_buf, SD_CAPACITY_SIZE);
19127 			return (EIO);
19128 		}
19129 
19130 		/*
19131 		 * Read capacity and block size from the READ CAPACITY 10 data.
19132 		 * This data may be adjusted later due to device specific
19133 		 * issues.
19134 		 *
19135 		 * According to the SCSI spec, the READ CAPACITY 10
19136 		 * command returns the following:
19137 		 *
19138 		 *  bytes 0-3: Maximum logical block address available.
19139 		 *		(MSB in byte:0 & LSB in byte:3)
19140 		 *
19141 		 *  bytes 4-7: Block length in bytes
19142 		 *		(MSB in byte:4 & LSB in byte:7)
19143 		 *
19144 		 */
19145 		capacity = BE_32(capacity_buf[0]);
19146 		lbasize = BE_32(capacity_buf[1]);
19147 
19148 		/*
19149 		 * Done with capacity_buf
19150 		 */
19151 		kmem_free(capacity_buf, SD_CAPACITY_SIZE);
19152 
19153 		/*
19154 		 * if the reported capacity is set to all 0xf's, then
19155 		 * this disk is too large and requires SBC-2 commands.
19156 		 * Reissue the request using READ CAPACITY 16.
19157 		 */
19158 		if (capacity == 0xffffffff) {
19159 			status = sd_send_scsi_READ_CAPACITY_16(un, &capacity,
19160 			    &lbasize, path_flag);
19161 			if (status != 0) {
19162 				return (status);
19163 			}
19164 		}
19165 		break;	/* Success! */
19166 	case EIO:
19167 		switch (ucmd_buf.uscsi_status) {
19168 		case STATUS_RESERVATION_CONFLICT:
19169 			status = EACCES;
19170 			break;
19171 		case STATUS_CHECK:
19172 			/*
19173 			 * Check condition; look for ASC/ASCQ of 0x04/0x01
19174 			 * (LOGICAL UNIT IS IN PROCESS OF BECOMING READY)
19175 			 */
19176 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
19177 			    (sense_buf.es_add_code  == 0x04) &&
19178 			    (sense_buf.es_qual_code == 0x01)) {
19179 				kmem_free(capacity_buf, SD_CAPACITY_SIZE);
19180 				return (EAGAIN);
19181 			}
19182 			break;
19183 		default:
19184 			break;
19185 		}
19186 		/* FALLTHRU */
19187 	default:
19188 		kmem_free(capacity_buf, SD_CAPACITY_SIZE);
19189 		return (status);
19190 	}
19191 
19192 	/*
19193 	 * Some ATAPI CD-ROM drives report inaccurate LBA size values
19194 	 * (2352 and 0 are common) so for these devices always force the value
19195 	 * to 2048 as required by the ATAPI specs.
19196 	 */
19197 	if ((un->un_f_cfg_is_atapi == TRUE) && (ISCD(un))) {
19198 		lbasize = 2048;
19199 	}
19200 
19201 	/*
19202 	 * Get the maximum LBA value from the READ CAPACITY data.
19203 	 * Here we assume that the Partial Medium Indicator (PMI) bit
19204 	 * was cleared when issuing the command. This means that the LBA
19205 	 * returned from the device is the LBA of the last logical block
19206 	 * on the logical unit.  The actual logical block count will be
19207 	 * this value plus one.
19208 	 *
19209 	 * Currently the capacity is saved in terms of un->un_sys_blocksize,
19210 	 * so scale the capacity value to reflect this.
19211 	 */
19212 	capacity = (capacity + 1) * (lbasize / un->un_sys_blocksize);
19213 
19214 #if defined(__i386) || defined(__amd64)
19215 	/*
19216 	 * On x86, compensate for off-by-1 error (number of sectors on
19217 	 * media)  (1175930)
19218 	 */
19219 	if (!un->un_f_has_removable_media && !un->un_f_is_hotpluggable &&
19220 	    (lbasize == un->un_sys_blocksize)) {
19221 		capacity -= 1;
19222 	}
19223 #endif
19224 
19225 	/*
19226 	 * Copy the values from the READ CAPACITY command into the space
19227 	 * provided by the caller.
19228 	 */
19229 	*capp = capacity;
19230 	*lbap = lbasize;
19231 
19232 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_READ_CAPACITY: "
19233 	    "capacity:0x%llx  lbasize:0x%x\n", capacity, lbasize);
19234 
19235 	/*
19236 	 * Both the lbasize and capacity from the device must be nonzero,
19237 	 * otherwise we assume that the values are not valid and return
19238 	 * failure to the caller. (4203735)
19239 	 */
19240 	if ((capacity == 0) || (lbasize == 0)) {
19241 		return (EIO);
19242 	}
19243 
19244 	return (0);
19245 }
19246 
19247 /*
19248  *    Function: sd_send_scsi_READ_CAPACITY_16
19249  *
19250  * Description: This routine uses the scsi READ CAPACITY 16 command to
19251  *		determine the device capacity in number of blocks and the
19252  *		device native block size.  If this function returns a failure,
19253  *		then the values in *capp and *lbap are undefined.
19254  *		This routine should always be called by
19255  *		sd_send_scsi_READ_CAPACITY which will appy any device
19256  *		specific adjustments to capacity and lbasize.
19257  *
19258  *   Arguments: un   - ptr to soft state struct for the target
19259  *		capp - ptr to unsigned 64-bit variable to receive the
19260  *			capacity value from the command.
19261  *		lbap - ptr to unsigned 32-bit varaible to receive the
19262  *			block size value from the command
19263  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
19264  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
19265  *			to use the USCSI "direct" chain and bypass the normal
19266  *			command waitq. SD_PATH_DIRECT_PRIORITY is used when
19267  *			this command is issued as part of an error recovery
19268  *			action.
19269  *
19270  * Return Code: 0   - Success
19271  *		EIO - IO error
19272  *		EACCES - Reservation conflict detected
19273  *		EAGAIN - Device is becoming ready
19274  *		errno return code from sd_send_scsi_cmd()
19275  *
19276  *     Context: Can sleep.  Blocks until command completes.
19277  */
19278 
19279 #define	SD_CAPACITY_16_SIZE	sizeof (struct scsi_capacity_16)
19280 
19281 static int
19282 sd_send_scsi_READ_CAPACITY_16(struct sd_lun *un, uint64_t *capp,
19283 	uint32_t *lbap, int path_flag)
19284 {
19285 	struct	scsi_extended_sense	sense_buf;
19286 	struct	uscsi_cmd	ucmd_buf;
19287 	union	scsi_cdb	cdb;
19288 	uint64_t		*capacity16_buf;
19289 	uint64_t		capacity;
19290 	uint32_t		lbasize;
19291 	int			status;
19292 
19293 	ASSERT(un != NULL);
19294 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19295 	ASSERT(capp != NULL);
19296 	ASSERT(lbap != NULL);
19297 
19298 	SD_TRACE(SD_LOG_IO, un,
19299 	    "sd_send_scsi_READ_CAPACITY: entry: un:0x%p\n", un);
19300 
19301 	/*
19302 	 * First send a READ_CAPACITY_16 command to the target.
19303 	 *
19304 	 * Set up the CDB for the READ_CAPACITY_16 command.  The Partial
19305 	 * Medium Indicator bit is cleared.  The address field must be
19306 	 * zero if the PMI bit is zero.
19307 	 */
19308 	bzero(&cdb, sizeof (cdb));
19309 	bzero(&ucmd_buf, sizeof (ucmd_buf));
19310 
19311 	capacity16_buf = kmem_zalloc(SD_CAPACITY_16_SIZE, KM_SLEEP);
19312 
19313 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
19314 	ucmd_buf.uscsi_cdblen	= CDB_GROUP4;
19315 	ucmd_buf.uscsi_bufaddr	= (caddr_t)capacity16_buf;
19316 	ucmd_buf.uscsi_buflen	= SD_CAPACITY_16_SIZE;
19317 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
19318 	ucmd_buf.uscsi_rqlen	= sizeof (sense_buf);
19319 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
19320 	ucmd_buf.uscsi_timeout	= 60;
19321 
19322 	/*
19323 	 * Read Capacity (16) is a Service Action In command.  One
19324 	 * command byte (0x9E) is overloaded for multiple operations,
19325 	 * with the second CDB byte specifying the desired operation
19326 	 */
19327 	cdb.scc_cmd = SCMD_SVC_ACTION_IN_G4;
19328 	cdb.cdb_opaque[1] = SSVC_ACTION_READ_CAPACITY_G4;
19329 
19330 	/*
19331 	 * Fill in allocation length field
19332 	 */
19333 	FORMG4COUNT(&cdb, ucmd_buf.uscsi_buflen);
19334 
19335 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
19336 	    UIO_SYSSPACE, UIO_SYSSPACE, path_flag);
19337 
19338 	switch (status) {
19339 	case 0:
19340 		/* Return failure if we did not get valid capacity data. */
19341 		if (ucmd_buf.uscsi_resid > 20) {
19342 			kmem_free(capacity16_buf, SD_CAPACITY_16_SIZE);
19343 			return (EIO);
19344 		}
19345 
19346 		/*
19347 		 * Read capacity and block size from the READ CAPACITY 10 data.
19348 		 * This data may be adjusted later due to device specific
19349 		 * issues.
19350 		 *
19351 		 * According to the SCSI spec, the READ CAPACITY 10
19352 		 * command returns the following:
19353 		 *
19354 		 *  bytes 0-7: Maximum logical block address available.
19355 		 *		(MSB in byte:0 & LSB in byte:7)
19356 		 *
19357 		 *  bytes 8-11: Block length in bytes
19358 		 *		(MSB in byte:8 & LSB in byte:11)
19359 		 *
19360 		 */
19361 		capacity = BE_64(capacity16_buf[0]);
19362 		lbasize = BE_32(*(uint32_t *)&capacity16_buf[1]);
19363 
19364 		/*
19365 		 * Done with capacity16_buf
19366 		 */
19367 		kmem_free(capacity16_buf, SD_CAPACITY_16_SIZE);
19368 
19369 		/*
19370 		 * if the reported capacity is set to all 0xf's, then
19371 		 * this disk is too large.  This could only happen with
19372 		 * a device that supports LBAs larger than 64 bits which
19373 		 * are not defined by any current T10 standards.
19374 		 */
19375 		if (capacity == 0xffffffffffffffff) {
19376 			return (EIO);
19377 		}
19378 		break;	/* Success! */
19379 	case EIO:
19380 		switch (ucmd_buf.uscsi_status) {
19381 		case STATUS_RESERVATION_CONFLICT:
19382 			status = EACCES;
19383 			break;
19384 		case STATUS_CHECK:
19385 			/*
19386 			 * Check condition; look for ASC/ASCQ of 0x04/0x01
19387 			 * (LOGICAL UNIT IS IN PROCESS OF BECOMING READY)
19388 			 */
19389 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
19390 			    (sense_buf.es_add_code  == 0x04) &&
19391 			    (sense_buf.es_qual_code == 0x01)) {
19392 				kmem_free(capacity16_buf, SD_CAPACITY_16_SIZE);
19393 				return (EAGAIN);
19394 			}
19395 			break;
19396 		default:
19397 			break;
19398 		}
19399 		/* FALLTHRU */
19400 	default:
19401 		kmem_free(capacity16_buf, SD_CAPACITY_16_SIZE);
19402 		return (status);
19403 	}
19404 
19405 	*capp = capacity;
19406 	*lbap = lbasize;
19407 
19408 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_READ_CAPACITY_16: "
19409 	    "capacity:0x%llx  lbasize:0x%x\n", capacity, lbasize);
19410 
19411 	return (0);
19412 }
19413 
19414 
19415 /*
19416  *    Function: sd_send_scsi_START_STOP_UNIT
19417  *
19418  * Description: Issue a scsi START STOP UNIT command to the target.
19419  *
19420  *   Arguments: un    - pointer to driver soft state (unit) structure for
19421  *			this target.
19422  *		flag  - SD_TARGET_START
19423  *			SD_TARGET_STOP
19424  *			SD_TARGET_EJECT
19425  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
19426  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
19427  *			to use the USCSI "direct" chain and bypass the normal
19428  *			command waitq. SD_PATH_DIRECT_PRIORITY is used when this
19429  *			command is issued as part of an error recovery action.
19430  *
19431  * Return Code: 0   - Success
19432  *		EIO - IO error
19433  *		EACCES - Reservation conflict detected
19434  *		ENXIO  - Not Ready, medium not present
19435  *		errno return code from sd_send_scsi_cmd()
19436  *
19437  *     Context: Can sleep.
19438  */
19439 
19440 static int
19441 sd_send_scsi_START_STOP_UNIT(struct sd_lun *un, int flag, int path_flag)
19442 {
19443 	struct	scsi_extended_sense	sense_buf;
19444 	union scsi_cdb		cdb;
19445 	struct uscsi_cmd	ucmd_buf;
19446 	int			status;
19447 
19448 	ASSERT(un != NULL);
19449 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19450 
19451 	SD_TRACE(SD_LOG_IO, un,
19452 	    "sd_send_scsi_START_STOP_UNIT: entry: un:0x%p\n", un);
19453 
19454 	if (un->un_f_check_start_stop &&
19455 	    ((flag == SD_TARGET_START) || (flag == SD_TARGET_STOP)) &&
19456 	    (un->un_f_start_stop_supported != TRUE)) {
19457 		return (0);
19458 	}
19459 
19460 	bzero(&cdb, sizeof (cdb));
19461 	bzero(&ucmd_buf, sizeof (ucmd_buf));
19462 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
19463 
19464 	cdb.scc_cmd = SCMD_START_STOP;
19465 	cdb.cdb_opaque[4] = (uchar_t)flag;
19466 
19467 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
19468 	ucmd_buf.uscsi_cdblen	= CDB_GROUP0;
19469 	ucmd_buf.uscsi_bufaddr	= NULL;
19470 	ucmd_buf.uscsi_buflen	= 0;
19471 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
19472 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
19473 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_SILENT;
19474 	ucmd_buf.uscsi_timeout	= 200;
19475 
19476 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
19477 	    UIO_SYSSPACE, UIO_SYSSPACE, path_flag);
19478 
19479 	switch (status) {
19480 	case 0:
19481 		break;	/* Success! */
19482 	case EIO:
19483 		switch (ucmd_buf.uscsi_status) {
19484 		case STATUS_RESERVATION_CONFLICT:
19485 			status = EACCES;
19486 			break;
19487 		case STATUS_CHECK:
19488 			if (ucmd_buf.uscsi_rqstatus == STATUS_GOOD) {
19489 				switch (sense_buf.es_key) {
19490 				case KEY_ILLEGAL_REQUEST:
19491 					status = ENOTSUP;
19492 					break;
19493 				case KEY_NOT_READY:
19494 					if (sense_buf.es_add_code == 0x3A) {
19495 						status = ENXIO;
19496 					}
19497 					break;
19498 				default:
19499 					break;
19500 				}
19501 			}
19502 			break;
19503 		default:
19504 			break;
19505 		}
19506 		break;
19507 	default:
19508 		break;
19509 	}
19510 
19511 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_START_STOP_UNIT: exit\n");
19512 
19513 	return (status);
19514 }
19515 
19516 
19517 /*
19518  *    Function: sd_start_stop_unit_callback
19519  *
19520  * Description: timeout(9F) callback to begin recovery process for a
19521  *		device that has spun down.
19522  *
19523  *   Arguments: arg - pointer to associated softstate struct.
19524  *
19525  *     Context: Executes in a timeout(9F) thread context
19526  */
19527 
19528 static void
19529 sd_start_stop_unit_callback(void *arg)
19530 {
19531 	struct sd_lun	*un = arg;
19532 	ASSERT(un != NULL);
19533 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19534 
19535 	SD_TRACE(SD_LOG_IO, un, "sd_start_stop_unit_callback: entry\n");
19536 
19537 	(void) taskq_dispatch(sd_tq, sd_start_stop_unit_task, un, KM_NOSLEEP);
19538 }
19539 
19540 
19541 /*
19542  *    Function: sd_start_stop_unit_task
19543  *
19544  * Description: Recovery procedure when a drive is spun down.
19545  *
19546  *   Arguments: arg - pointer to associated softstate struct.
19547  *
19548  *     Context: Executes in a taskq() thread context
19549  */
19550 
19551 static void
19552 sd_start_stop_unit_task(void *arg)
19553 {
19554 	struct sd_lun	*un = arg;
19555 
19556 	ASSERT(un != NULL);
19557 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19558 
19559 	SD_TRACE(SD_LOG_IO, un, "sd_start_stop_unit_task: entry\n");
19560 
19561 	/*
19562 	 * Some unformatted drives report not ready error, no need to
19563 	 * restart if format has been initiated.
19564 	 */
19565 	mutex_enter(SD_MUTEX(un));
19566 	if (un->un_f_format_in_progress == TRUE) {
19567 		mutex_exit(SD_MUTEX(un));
19568 		return;
19569 	}
19570 	mutex_exit(SD_MUTEX(un));
19571 
19572 	/*
19573 	 * When a START STOP command is issued from here, it is part of a
19574 	 * failure recovery operation and must be issued before any other
19575 	 * commands, including any pending retries. Thus it must be sent
19576 	 * using SD_PATH_DIRECT_PRIORITY. It doesn't matter if the spin up
19577 	 * succeeds or not, we will start I/O after the attempt.
19578 	 */
19579 	(void) sd_send_scsi_START_STOP_UNIT(un, SD_TARGET_START,
19580 	    SD_PATH_DIRECT_PRIORITY);
19581 
19582 	/*
19583 	 * The above call blocks until the START_STOP_UNIT command completes.
19584 	 * Now that it has completed, we must re-try the original IO that
19585 	 * received the NOT READY condition in the first place. There are
19586 	 * three possible conditions here:
19587 	 *
19588 	 *  (1) The original IO is on un_retry_bp.
19589 	 *  (2) The original IO is on the regular wait queue, and un_retry_bp
19590 	 *	is NULL.
19591 	 *  (3) The original IO is on the regular wait queue, and un_retry_bp
19592 	 *	points to some other, unrelated bp.
19593 	 *
19594 	 * For each case, we must call sd_start_cmds() with un_retry_bp
19595 	 * as the argument. If un_retry_bp is NULL, this will initiate
19596 	 * processing of the regular wait queue.  If un_retry_bp is not NULL,
19597 	 * then this will process the bp on un_retry_bp. That may or may not
19598 	 * be the original IO, but that does not matter: the important thing
19599 	 * is to keep the IO processing going at this point.
19600 	 *
19601 	 * Note: This is a very specific error recovery sequence associated
19602 	 * with a drive that is not spun up. We attempt a START_STOP_UNIT and
19603 	 * serialize the I/O with completion of the spin-up.
19604 	 */
19605 	mutex_enter(SD_MUTEX(un));
19606 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
19607 	    "sd_start_stop_unit_task: un:0x%p starting bp:0x%p\n",
19608 	    un, un->un_retry_bp);
19609 	un->un_startstop_timeid = NULL;	/* Timeout is no longer pending */
19610 	sd_start_cmds(un, un->un_retry_bp);
19611 	mutex_exit(SD_MUTEX(un));
19612 
19613 	SD_TRACE(SD_LOG_IO, un, "sd_start_stop_unit_task: exit\n");
19614 }
19615 
19616 
19617 /*
19618  *    Function: sd_send_scsi_INQUIRY
19619  *
19620  * Description: Issue the scsi INQUIRY command.
19621  *
19622  *   Arguments: un
19623  *		bufaddr
19624  *		buflen
19625  *		evpd
19626  *		page_code
19627  *		page_length
19628  *
19629  * Return Code: 0   - Success
19630  *		errno return code from sd_send_scsi_cmd()
19631  *
19632  *     Context: Can sleep. Does not return until command is completed.
19633  */
19634 
19635 static int
19636 sd_send_scsi_INQUIRY(struct sd_lun *un, uchar_t *bufaddr, size_t buflen,
19637 	uchar_t evpd, uchar_t page_code, size_t *residp)
19638 {
19639 	union scsi_cdb		cdb;
19640 	struct uscsi_cmd	ucmd_buf;
19641 	int			status;
19642 
19643 	ASSERT(un != NULL);
19644 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19645 	ASSERT(bufaddr != NULL);
19646 
19647 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_INQUIRY: entry: un:0x%p\n", un);
19648 
19649 	bzero(&cdb, sizeof (cdb));
19650 	bzero(&ucmd_buf, sizeof (ucmd_buf));
19651 	bzero(bufaddr, buflen);
19652 
19653 	cdb.scc_cmd = SCMD_INQUIRY;
19654 	cdb.cdb_opaque[1] = evpd;
19655 	cdb.cdb_opaque[2] = page_code;
19656 	FORMG0COUNT(&cdb, buflen);
19657 
19658 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
19659 	ucmd_buf.uscsi_cdblen	= CDB_GROUP0;
19660 	ucmd_buf.uscsi_bufaddr	= (caddr_t)bufaddr;
19661 	ucmd_buf.uscsi_buflen	= buflen;
19662 	ucmd_buf.uscsi_rqbuf	= NULL;
19663 	ucmd_buf.uscsi_rqlen	= 0;
19664 	ucmd_buf.uscsi_flags	= USCSI_READ | USCSI_SILENT;
19665 	ucmd_buf.uscsi_timeout	= 200;	/* Excessive legacy value */
19666 
19667 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
19668 	    UIO_SYSSPACE, UIO_SYSSPACE, SD_PATH_DIRECT);
19669 
19670 	if ((status == 0) && (residp != NULL)) {
19671 		*residp = ucmd_buf.uscsi_resid;
19672 	}
19673 
19674 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_INQUIRY: exit\n");
19675 
19676 	return (status);
19677 }
19678 
19679 
19680 /*
19681  *    Function: sd_send_scsi_TEST_UNIT_READY
19682  *
19683  * Description: Issue the scsi TEST UNIT READY command.
19684  *		This routine can be told to set the flag USCSI_DIAGNOSE to
19685  *		prevent retrying failed commands. Use this when the intent
19686  *		is either to check for device readiness, to clear a Unit
19687  *		Attention, or to clear any outstanding sense data.
19688  *		However under specific conditions the expected behavior
19689  *		is for retries to bring a device ready, so use the flag
19690  *		with caution.
19691  *
19692  *   Arguments: un
19693  *		flag:   SD_CHECK_FOR_MEDIA: return ENXIO if no media present
19694  *			SD_DONT_RETRY_TUR: include uscsi flag USCSI_DIAGNOSE.
19695  *			0: dont check for media present, do retries on cmd.
19696  *
19697  * Return Code: 0   - Success
19698  *		EIO - IO error
19699  *		EACCES - Reservation conflict detected
19700  *		ENXIO  - Not Ready, medium not present
19701  *		errno return code from sd_send_scsi_cmd()
19702  *
19703  *     Context: Can sleep. Does not return until command is completed.
19704  */
19705 
19706 static int
19707 sd_send_scsi_TEST_UNIT_READY(struct sd_lun *un, int flag)
19708 {
19709 	struct	scsi_extended_sense	sense_buf;
19710 	union scsi_cdb		cdb;
19711 	struct uscsi_cmd	ucmd_buf;
19712 	int			status;
19713 
19714 	ASSERT(un != NULL);
19715 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19716 
19717 	SD_TRACE(SD_LOG_IO, un,
19718 	    "sd_send_scsi_TEST_UNIT_READY: entry: un:0x%p\n", un);
19719 
19720 	/*
19721 	 * Some Seagate elite1 TQ devices get hung with disconnect/reconnect
19722 	 * timeouts when they receive a TUR and the queue is not empty. Check
19723 	 * the configuration flag set during attach (indicating the drive has
19724 	 * this firmware bug) and un_ncmds_in_transport before issuing the
19725 	 * TUR. If there are
19726 	 * pending commands return success, this is a bit arbitrary but is ok
19727 	 * for non-removables (i.e. the eliteI disks) and non-clustering
19728 	 * configurations.
19729 	 */
19730 	if (un->un_f_cfg_tur_check == TRUE) {
19731 		mutex_enter(SD_MUTEX(un));
19732 		if (un->un_ncmds_in_transport != 0) {
19733 			mutex_exit(SD_MUTEX(un));
19734 			return (0);
19735 		}
19736 		mutex_exit(SD_MUTEX(un));
19737 	}
19738 
19739 	bzero(&cdb, sizeof (cdb));
19740 	bzero(&ucmd_buf, sizeof (ucmd_buf));
19741 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
19742 
19743 	cdb.scc_cmd = SCMD_TEST_UNIT_READY;
19744 
19745 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
19746 	ucmd_buf.uscsi_cdblen	= CDB_GROUP0;
19747 	ucmd_buf.uscsi_bufaddr	= NULL;
19748 	ucmd_buf.uscsi_buflen	= 0;
19749 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
19750 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
19751 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_SILENT;
19752 
19753 	/* Use flag USCSI_DIAGNOSE to prevent retries if it fails. */
19754 	if ((flag & SD_DONT_RETRY_TUR) != 0) {
19755 		ucmd_buf.uscsi_flags |= USCSI_DIAGNOSE;
19756 	}
19757 	ucmd_buf.uscsi_timeout	= 60;
19758 
19759 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
19760 	    UIO_SYSSPACE, UIO_SYSSPACE,
19761 	    ((flag & SD_BYPASS_PM) ? SD_PATH_DIRECT : SD_PATH_STANDARD));
19762 
19763 	switch (status) {
19764 	case 0:
19765 		break;	/* Success! */
19766 	case EIO:
19767 		switch (ucmd_buf.uscsi_status) {
19768 		case STATUS_RESERVATION_CONFLICT:
19769 			status = EACCES;
19770 			break;
19771 		case STATUS_CHECK:
19772 			if ((flag & SD_CHECK_FOR_MEDIA) == 0) {
19773 				break;
19774 			}
19775 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
19776 			    (sense_buf.es_key == KEY_NOT_READY) &&
19777 			    (sense_buf.es_add_code == 0x3A)) {
19778 				status = ENXIO;
19779 			}
19780 			break;
19781 		default:
19782 			break;
19783 		}
19784 		break;
19785 	default:
19786 		break;
19787 	}
19788 
19789 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_TEST_UNIT_READY: exit\n");
19790 
19791 	return (status);
19792 }
19793 
19794 
19795 /*
19796  *    Function: sd_send_scsi_PERSISTENT_RESERVE_IN
19797  *
19798  * Description: Issue the scsi PERSISTENT RESERVE IN command.
19799  *
19800  *   Arguments: un
19801  *
19802  * Return Code: 0   - Success
19803  *		EACCES
19804  *		ENOTSUP
19805  *		errno return code from sd_send_scsi_cmd()
19806  *
19807  *     Context: Can sleep. Does not return until command is completed.
19808  */
19809 
19810 static int
19811 sd_send_scsi_PERSISTENT_RESERVE_IN(struct sd_lun *un, uchar_t  usr_cmd,
19812 	uint16_t data_len, uchar_t *data_bufp)
19813 {
19814 	struct scsi_extended_sense	sense_buf;
19815 	union scsi_cdb		cdb;
19816 	struct uscsi_cmd	ucmd_buf;
19817 	int			status;
19818 	int			no_caller_buf = FALSE;
19819 
19820 	ASSERT(un != NULL);
19821 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19822 	ASSERT((usr_cmd == SD_READ_KEYS) || (usr_cmd == SD_READ_RESV));
19823 
19824 	SD_TRACE(SD_LOG_IO, un,
19825 	    "sd_send_scsi_PERSISTENT_RESERVE_IN: entry: un:0x%p\n", un);
19826 
19827 	bzero(&cdb, sizeof (cdb));
19828 	bzero(&ucmd_buf, sizeof (ucmd_buf));
19829 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
19830 	if (data_bufp == NULL) {
19831 		/* Allocate a default buf if the caller did not give one */
19832 		ASSERT(data_len == 0);
19833 		data_len  = MHIOC_RESV_KEY_SIZE;
19834 		data_bufp = kmem_zalloc(MHIOC_RESV_KEY_SIZE, KM_SLEEP);
19835 		no_caller_buf = TRUE;
19836 	}
19837 
19838 	cdb.scc_cmd = SCMD_PERSISTENT_RESERVE_IN;
19839 	cdb.cdb_opaque[1] = usr_cmd;
19840 	FORMG1COUNT(&cdb, data_len);
19841 
19842 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
19843 	ucmd_buf.uscsi_cdblen	= CDB_GROUP1;
19844 	ucmd_buf.uscsi_bufaddr	= (caddr_t)data_bufp;
19845 	ucmd_buf.uscsi_buflen	= data_len;
19846 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
19847 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
19848 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
19849 	ucmd_buf.uscsi_timeout	= 60;
19850 
19851 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
19852 	    UIO_SYSSPACE, UIO_SYSSPACE, SD_PATH_STANDARD);
19853 
19854 	switch (status) {
19855 	case 0:
19856 		break;	/* Success! */
19857 	case EIO:
19858 		switch (ucmd_buf.uscsi_status) {
19859 		case STATUS_RESERVATION_CONFLICT:
19860 			status = EACCES;
19861 			break;
19862 		case STATUS_CHECK:
19863 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
19864 			    (sense_buf.es_key == KEY_ILLEGAL_REQUEST)) {
19865 				status = ENOTSUP;
19866 			}
19867 			break;
19868 		default:
19869 			break;
19870 		}
19871 		break;
19872 	default:
19873 		break;
19874 	}
19875 
19876 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_PERSISTENT_RESERVE_IN: exit\n");
19877 
19878 	if (no_caller_buf == TRUE) {
19879 		kmem_free(data_bufp, data_len);
19880 	}
19881 
19882 	return (status);
19883 }
19884 
19885 
19886 /*
19887  *    Function: sd_send_scsi_PERSISTENT_RESERVE_OUT
19888  *
19889  * Description: This routine is the driver entry point for handling CD-ROM
19890  *		multi-host persistent reservation requests (MHIOCGRP_INKEYS,
19891  *		MHIOCGRP_INRESV) by sending the SCSI-3 PROUT commands to the
19892  *		device.
19893  *
19894  *   Arguments: un  -   Pointer to soft state struct for the target.
19895  *		usr_cmd SCSI-3 reservation facility command (one of
19896  *			SD_SCSI3_REGISTER, SD_SCSI3_RESERVE, SD_SCSI3_RELEASE,
19897  *			SD_SCSI3_PREEMPTANDABORT)
19898  *		usr_bufp - user provided pointer register, reserve descriptor or
19899  *			preempt and abort structure (mhioc_register_t,
19900  *                      mhioc_resv_desc_t, mhioc_preemptandabort_t)
19901  *
19902  * Return Code: 0   - Success
19903  *		EACCES
19904  *		ENOTSUP
19905  *		errno return code from sd_send_scsi_cmd()
19906  *
19907  *     Context: Can sleep. Does not return until command is completed.
19908  */
19909 
19910 static int
19911 sd_send_scsi_PERSISTENT_RESERVE_OUT(struct sd_lun *un, uchar_t usr_cmd,
19912 	uchar_t	*usr_bufp)
19913 {
19914 	struct scsi_extended_sense	sense_buf;
19915 	union scsi_cdb		cdb;
19916 	struct uscsi_cmd	ucmd_buf;
19917 	int			status;
19918 	uchar_t			data_len = sizeof (sd_prout_t);
19919 	sd_prout_t		*prp;
19920 
19921 	ASSERT(un != NULL);
19922 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19923 	ASSERT(data_len == 24);	/* required by scsi spec */
19924 
19925 	SD_TRACE(SD_LOG_IO, un,
19926 	    "sd_send_scsi_PERSISTENT_RESERVE_OUT: entry: un:0x%p\n", un);
19927 
19928 	if (usr_bufp == NULL) {
19929 		return (EINVAL);
19930 	}
19931 
19932 	bzero(&cdb, sizeof (cdb));
19933 	bzero(&ucmd_buf, sizeof (ucmd_buf));
19934 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
19935 	prp = kmem_zalloc(data_len, KM_SLEEP);
19936 
19937 	cdb.scc_cmd = SCMD_PERSISTENT_RESERVE_OUT;
19938 	cdb.cdb_opaque[1] = usr_cmd;
19939 	FORMG1COUNT(&cdb, data_len);
19940 
19941 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
19942 	ucmd_buf.uscsi_cdblen	= CDB_GROUP1;
19943 	ucmd_buf.uscsi_bufaddr	= (caddr_t)prp;
19944 	ucmd_buf.uscsi_buflen	= data_len;
19945 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
19946 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
19947 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_WRITE | USCSI_SILENT;
19948 	ucmd_buf.uscsi_timeout	= 60;
19949 
19950 	switch (usr_cmd) {
19951 	case SD_SCSI3_REGISTER: {
19952 		mhioc_register_t *ptr = (mhioc_register_t *)usr_bufp;
19953 
19954 		bcopy(ptr->oldkey.key, prp->res_key, MHIOC_RESV_KEY_SIZE);
19955 		bcopy(ptr->newkey.key, prp->service_key,
19956 		    MHIOC_RESV_KEY_SIZE);
19957 		prp->aptpl = ptr->aptpl;
19958 		break;
19959 	}
19960 	case SD_SCSI3_RESERVE:
19961 	case SD_SCSI3_RELEASE: {
19962 		mhioc_resv_desc_t *ptr = (mhioc_resv_desc_t *)usr_bufp;
19963 
19964 		bcopy(ptr->key.key, prp->res_key, MHIOC_RESV_KEY_SIZE);
19965 		prp->scope_address = BE_32(ptr->scope_specific_addr);
19966 		cdb.cdb_opaque[2] = ptr->type;
19967 		break;
19968 	}
19969 	case SD_SCSI3_PREEMPTANDABORT: {
19970 		mhioc_preemptandabort_t *ptr =
19971 		    (mhioc_preemptandabort_t *)usr_bufp;
19972 
19973 		bcopy(ptr->resvdesc.key.key, prp->res_key, MHIOC_RESV_KEY_SIZE);
19974 		bcopy(ptr->victim_key.key, prp->service_key,
19975 		    MHIOC_RESV_KEY_SIZE);
19976 		prp->scope_address = BE_32(ptr->resvdesc.scope_specific_addr);
19977 		cdb.cdb_opaque[2] = ptr->resvdesc.type;
19978 		ucmd_buf.uscsi_flags |= USCSI_HEAD;
19979 		break;
19980 	}
19981 	case SD_SCSI3_REGISTERANDIGNOREKEY:
19982 	{
19983 		mhioc_registerandignorekey_t *ptr;
19984 		ptr = (mhioc_registerandignorekey_t *)usr_bufp;
19985 		bcopy(ptr->newkey.key,
19986 		    prp->service_key, MHIOC_RESV_KEY_SIZE);
19987 		prp->aptpl = ptr->aptpl;
19988 		break;
19989 	}
19990 	default:
19991 		ASSERT(FALSE);
19992 		break;
19993 	}
19994 
19995 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
19996 	    UIO_SYSSPACE, UIO_SYSSPACE, SD_PATH_STANDARD);
19997 
19998 	switch (status) {
19999 	case 0:
20000 		break;	/* Success! */
20001 	case EIO:
20002 		switch (ucmd_buf.uscsi_status) {
20003 		case STATUS_RESERVATION_CONFLICT:
20004 			status = EACCES;
20005 			break;
20006 		case STATUS_CHECK:
20007 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
20008 			    (sense_buf.es_key == KEY_ILLEGAL_REQUEST)) {
20009 				status = ENOTSUP;
20010 			}
20011 			break;
20012 		default:
20013 			break;
20014 		}
20015 		break;
20016 	default:
20017 		break;
20018 	}
20019 
20020 	kmem_free(prp, data_len);
20021 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_PERSISTENT_RESERVE_OUT: exit\n");
20022 	return (status);
20023 }
20024 
20025 
20026 /*
20027  *    Function: sd_send_scsi_SYNCHRONIZE_CACHE
20028  *
20029  * Description: Issues a scsi SYNCHRONIZE CACHE command to the target
20030  *
20031  *   Arguments: un - pointer to the target's soft state struct
20032  *
20033  * Return Code: 0 - success
20034  *		errno-type error code
20035  *
20036  *     Context: kernel thread context only.
20037  */
20038 
20039 static int
20040 sd_send_scsi_SYNCHRONIZE_CACHE(struct sd_lun *un, struct dk_callback *dkc)
20041 {
20042 	struct sd_uscsi_info	*uip;
20043 	struct uscsi_cmd	*uscmd;
20044 	union scsi_cdb		*cdb;
20045 	struct buf		*bp;
20046 	int			rval = 0;
20047 
20048 	SD_TRACE(SD_LOG_IO, un,
20049 	    "sd_send_scsi_SYNCHRONIZE_CACHE: entry: un:0x%p\n", un);
20050 
20051 	ASSERT(un != NULL);
20052 	ASSERT(!mutex_owned(SD_MUTEX(un)));
20053 
20054 	cdb = kmem_zalloc(CDB_GROUP1, KM_SLEEP);
20055 	cdb->scc_cmd = SCMD_SYNCHRONIZE_CACHE;
20056 
20057 	/*
20058 	 * First get some memory for the uscsi_cmd struct and cdb
20059 	 * and initialize for SYNCHRONIZE_CACHE cmd.
20060 	 */
20061 	uscmd = kmem_zalloc(sizeof (struct uscsi_cmd), KM_SLEEP);
20062 	uscmd->uscsi_cdblen = CDB_GROUP1;
20063 	uscmd->uscsi_cdb = (caddr_t)cdb;
20064 	uscmd->uscsi_bufaddr = NULL;
20065 	uscmd->uscsi_buflen = 0;
20066 	uscmd->uscsi_rqbuf = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
20067 	uscmd->uscsi_rqlen = SENSE_LENGTH;
20068 	uscmd->uscsi_rqresid = SENSE_LENGTH;
20069 	uscmd->uscsi_flags = USCSI_RQENABLE | USCSI_SILENT;
20070 	uscmd->uscsi_timeout = sd_io_time;
20071 
20072 	/*
20073 	 * Allocate an sd_uscsi_info struct and fill it with the info
20074 	 * needed by sd_initpkt_for_uscsi().  Then put the pointer into
20075 	 * b_private in the buf for sd_initpkt_for_uscsi().  Note that
20076 	 * since we allocate the buf here in this function, we do not
20077 	 * need to preserve the prior contents of b_private.
20078 	 * The sd_uscsi_info struct is also used by sd_uscsi_strategy()
20079 	 */
20080 	uip = kmem_zalloc(sizeof (struct sd_uscsi_info), KM_SLEEP);
20081 	uip->ui_flags = SD_PATH_DIRECT;
20082 	uip->ui_cmdp  = uscmd;
20083 
20084 	bp = getrbuf(KM_SLEEP);
20085 	bp->b_private = uip;
20086 
20087 	/*
20088 	 * Setup buffer to carry uscsi request.
20089 	 */
20090 	bp->b_flags  = B_BUSY;
20091 	bp->b_bcount = 0;
20092 	bp->b_blkno  = 0;
20093 
20094 	if (dkc != NULL) {
20095 		bp->b_iodone = sd_send_scsi_SYNCHRONIZE_CACHE_biodone;
20096 		uip->ui_dkc = *dkc;
20097 	}
20098 
20099 	bp->b_edev = SD_GET_DEV(un);
20100 	bp->b_dev = cmpdev(bp->b_edev);	/* maybe unnecessary? */
20101 
20102 	(void) sd_uscsi_strategy(bp);
20103 
20104 	/*
20105 	 * If synchronous request, wait for completion
20106 	 * If async just return and let b_iodone callback
20107 	 * cleanup.
20108 	 * NOTE: On return, u_ncmds_in_driver will be decremented,
20109 	 * but it was also incremented in sd_uscsi_strategy(), so
20110 	 * we should be ok.
20111 	 */
20112 	if (dkc == NULL) {
20113 		(void) biowait(bp);
20114 		rval = sd_send_scsi_SYNCHRONIZE_CACHE_biodone(bp);
20115 	}
20116 
20117 	return (rval);
20118 }
20119 
20120 
20121 static int
20122 sd_send_scsi_SYNCHRONIZE_CACHE_biodone(struct buf *bp)
20123 {
20124 	struct sd_uscsi_info *uip;
20125 	struct uscsi_cmd *uscmd;
20126 	struct scsi_extended_sense *sense_buf;
20127 	struct sd_lun *un;
20128 	int status;
20129 
20130 	uip = (struct sd_uscsi_info *)(bp->b_private);
20131 	ASSERT(uip != NULL);
20132 
20133 	uscmd = uip->ui_cmdp;
20134 	ASSERT(uscmd != NULL);
20135 
20136 	sense_buf = (struct scsi_extended_sense *)uscmd->uscsi_rqbuf;
20137 	ASSERT(sense_buf != NULL);
20138 
20139 	un = ddi_get_soft_state(sd_state, SD_GET_INSTANCE_FROM_BUF(bp));
20140 	ASSERT(un != NULL);
20141 
20142 	status = geterror(bp);
20143 	switch (status) {
20144 	case 0:
20145 		break;	/* Success! */
20146 	case EIO:
20147 		switch (uscmd->uscsi_status) {
20148 		case STATUS_RESERVATION_CONFLICT:
20149 			/* Ignore reservation conflict */
20150 			status = 0;
20151 			goto done;
20152 
20153 		case STATUS_CHECK:
20154 			if ((uscmd->uscsi_rqstatus == STATUS_GOOD) &&
20155 			    (sense_buf->es_key == KEY_ILLEGAL_REQUEST)) {
20156 				/* Ignore Illegal Request error */
20157 				mutex_enter(SD_MUTEX(un));
20158 				un->un_f_sync_cache_supported = FALSE;
20159 				mutex_exit(SD_MUTEX(un));
20160 				status = ENOTSUP;
20161 				goto done;
20162 			}
20163 			break;
20164 		default:
20165 			break;
20166 		}
20167 		/* FALLTHRU */
20168 	default:
20169 		/* Ignore error if the media is not present */
20170 		if (sd_send_scsi_TEST_UNIT_READY(un, 0) != 0) {
20171 			status = 0;
20172 			goto done;
20173 		}
20174 		/* If we reach this, we had an error */
20175 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
20176 		    "SYNCHRONIZE CACHE command failed (%d)\n", status);
20177 		break;
20178 	}
20179 
20180 done:
20181 	if (uip->ui_dkc.dkc_callback != NULL) {
20182 		(*uip->ui_dkc.dkc_callback)(uip->ui_dkc.dkc_cookie, status);
20183 	}
20184 
20185 	ASSERT((bp->b_flags & B_REMAPPED) == 0);
20186 	freerbuf(bp);
20187 	kmem_free(uip, sizeof (struct sd_uscsi_info));
20188 	kmem_free(uscmd->uscsi_rqbuf, SENSE_LENGTH);
20189 	kmem_free(uscmd->uscsi_cdb, (size_t)uscmd->uscsi_cdblen);
20190 	kmem_free(uscmd, sizeof (struct uscsi_cmd));
20191 
20192 	return (status);
20193 }
20194 
20195 
20196 /*
20197  *    Function: sd_send_scsi_GET_CONFIGURATION
20198  *
20199  * Description: Issues the get configuration command to the device.
20200  *		Called from sd_check_for_writable_cd & sd_get_media_info
20201  *		caller needs to ensure that buflen = SD_PROFILE_HEADER_LEN
20202  *   Arguments: un
20203  *		ucmdbuf
20204  *		rqbuf
20205  *		rqbuflen
20206  *		bufaddr
20207  *		buflen
20208  *
20209  * Return Code: 0   - Success
20210  *		errno return code from sd_send_scsi_cmd()
20211  *
20212  *     Context: Can sleep. Does not return until command is completed.
20213  *
20214  */
20215 
20216 static int
20217 sd_send_scsi_GET_CONFIGURATION(struct sd_lun *un, struct uscsi_cmd *ucmdbuf,
20218 	uchar_t *rqbuf, uint_t rqbuflen, uchar_t *bufaddr, uint_t buflen)
20219 {
20220 	char	cdb[CDB_GROUP1];
20221 	int	status;
20222 
20223 	ASSERT(un != NULL);
20224 	ASSERT(!mutex_owned(SD_MUTEX(un)));
20225 	ASSERT(bufaddr != NULL);
20226 	ASSERT(ucmdbuf != NULL);
20227 	ASSERT(rqbuf != NULL);
20228 
20229 	SD_TRACE(SD_LOG_IO, un,
20230 	    "sd_send_scsi_GET_CONFIGURATION: entry: un:0x%p\n", un);
20231 
20232 	bzero(cdb, sizeof (cdb));
20233 	bzero(ucmdbuf, sizeof (struct uscsi_cmd));
20234 	bzero(rqbuf, rqbuflen);
20235 	bzero(bufaddr, buflen);
20236 
20237 	/*
20238 	 * Set up cdb field for the get configuration command.
20239 	 */
20240 	cdb[0] = SCMD_GET_CONFIGURATION;
20241 	cdb[1] = 0x02;  /* Requested Type */
20242 	cdb[8] = SD_PROFILE_HEADER_LEN;
20243 	ucmdbuf->uscsi_cdb = cdb;
20244 	ucmdbuf->uscsi_cdblen = CDB_GROUP1;
20245 	ucmdbuf->uscsi_bufaddr = (caddr_t)bufaddr;
20246 	ucmdbuf->uscsi_buflen = buflen;
20247 	ucmdbuf->uscsi_timeout = sd_io_time;
20248 	ucmdbuf->uscsi_rqbuf = (caddr_t)rqbuf;
20249 	ucmdbuf->uscsi_rqlen = rqbuflen;
20250 	ucmdbuf->uscsi_flags = USCSI_RQENABLE|USCSI_SILENT|USCSI_READ;
20251 
20252 	status = sd_send_scsi_cmd(SD_GET_DEV(un), ucmdbuf, UIO_SYSSPACE,
20253 	    UIO_SYSSPACE, UIO_SYSSPACE, SD_PATH_STANDARD);
20254 
20255 	switch (status) {
20256 	case 0:
20257 		break;  /* Success! */
20258 	case EIO:
20259 		switch (ucmdbuf->uscsi_status) {
20260 		case STATUS_RESERVATION_CONFLICT:
20261 			status = EACCES;
20262 			break;
20263 		default:
20264 			break;
20265 		}
20266 		break;
20267 	default:
20268 		break;
20269 	}
20270 
20271 	if (status == 0) {
20272 		SD_DUMP_MEMORY(un, SD_LOG_IO,
20273 		    "sd_send_scsi_GET_CONFIGURATION: data",
20274 		    (uchar_t *)bufaddr, SD_PROFILE_HEADER_LEN, SD_LOG_HEX);
20275 	}
20276 
20277 	SD_TRACE(SD_LOG_IO, un,
20278 	    "sd_send_scsi_GET_CONFIGURATION: exit\n");
20279 
20280 	return (status);
20281 }
20282 
20283 /*
20284  *    Function: sd_send_scsi_feature_GET_CONFIGURATION
20285  *
20286  * Description: Issues the get configuration command to the device to
20287  *              retrieve a specfic feature. Called from
20288  *		sd_check_for_writable_cd & sd_set_mmc_caps.
20289  *   Arguments: un
20290  *              ucmdbuf
20291  *              rqbuf
20292  *              rqbuflen
20293  *              bufaddr
20294  *              buflen
20295  *		feature
20296  *
20297  * Return Code: 0   - Success
20298  *              errno return code from sd_send_scsi_cmd()
20299  *
20300  *     Context: Can sleep. Does not return until command is completed.
20301  *
20302  */
20303 static int
20304 sd_send_scsi_feature_GET_CONFIGURATION(struct sd_lun *un,
20305 	struct uscsi_cmd *ucmdbuf, uchar_t *rqbuf, uint_t rqbuflen,
20306 	uchar_t *bufaddr, uint_t buflen, char feature)
20307 {
20308 	char    cdb[CDB_GROUP1];
20309 	int	status;
20310 
20311 	ASSERT(un != NULL);
20312 	ASSERT(!mutex_owned(SD_MUTEX(un)));
20313 	ASSERT(bufaddr != NULL);
20314 	ASSERT(ucmdbuf != NULL);
20315 	ASSERT(rqbuf != NULL);
20316 
20317 	SD_TRACE(SD_LOG_IO, un,
20318 	    "sd_send_scsi_feature_GET_CONFIGURATION: entry: un:0x%p\n", un);
20319 
20320 	bzero(cdb, sizeof (cdb));
20321 	bzero(ucmdbuf, sizeof (struct uscsi_cmd));
20322 	bzero(rqbuf, rqbuflen);
20323 	bzero(bufaddr, buflen);
20324 
20325 	/*
20326 	 * Set up cdb field for the get configuration command.
20327 	 */
20328 	cdb[0] = SCMD_GET_CONFIGURATION;
20329 	cdb[1] = 0x02;  /* Requested Type */
20330 	cdb[3] = feature;
20331 	cdb[8] = buflen;
20332 	ucmdbuf->uscsi_cdb = cdb;
20333 	ucmdbuf->uscsi_cdblen = CDB_GROUP1;
20334 	ucmdbuf->uscsi_bufaddr = (caddr_t)bufaddr;
20335 	ucmdbuf->uscsi_buflen = buflen;
20336 	ucmdbuf->uscsi_timeout = sd_io_time;
20337 	ucmdbuf->uscsi_rqbuf = (caddr_t)rqbuf;
20338 	ucmdbuf->uscsi_rqlen = rqbuflen;
20339 	ucmdbuf->uscsi_flags = USCSI_RQENABLE|USCSI_SILENT|USCSI_READ;
20340 
20341 	status = sd_send_scsi_cmd(SD_GET_DEV(un), ucmdbuf, UIO_SYSSPACE,
20342 	    UIO_SYSSPACE, UIO_SYSSPACE, SD_PATH_STANDARD);
20343 
20344 	switch (status) {
20345 	case 0:
20346 		break;  /* Success! */
20347 	case EIO:
20348 		switch (ucmdbuf->uscsi_status) {
20349 		case STATUS_RESERVATION_CONFLICT:
20350 			status = EACCES;
20351 			break;
20352 		default:
20353 			break;
20354 		}
20355 		break;
20356 	default:
20357 		break;
20358 	}
20359 
20360 	if (status == 0) {
20361 		SD_DUMP_MEMORY(un, SD_LOG_IO,
20362 		    "sd_send_scsi_feature_GET_CONFIGURATION: data",
20363 		    (uchar_t *)bufaddr, SD_PROFILE_HEADER_LEN, SD_LOG_HEX);
20364 	}
20365 
20366 	SD_TRACE(SD_LOG_IO, un,
20367 	    "sd_send_scsi_feature_GET_CONFIGURATION: exit\n");
20368 
20369 	return (status);
20370 }
20371 
20372 
20373 /*
20374  *    Function: sd_send_scsi_MODE_SENSE
20375  *
20376  * Description: Utility function for issuing a scsi MODE SENSE command.
20377  *		Note: This routine uses a consistent implementation for Group0,
20378  *		Group1, and Group2 commands across all platforms. ATAPI devices
20379  *		use Group 1 Read/Write commands and Group 2 Mode Sense/Select
20380  *
20381  *   Arguments: un - pointer to the softstate struct for the target.
20382  *		cdbsize - size CDB to be used (CDB_GROUP0 (6 byte), or
20383  *			  CDB_GROUP[1|2] (10 byte).
20384  *		bufaddr - buffer for page data retrieved from the target.
20385  *		buflen - size of page to be retrieved.
20386  *		page_code - page code of data to be retrieved from the target.
20387  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
20388  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
20389  *			to use the USCSI "direct" chain and bypass the normal
20390  *			command waitq.
20391  *
20392  * Return Code: 0   - Success
20393  *		errno return code from sd_send_scsi_cmd()
20394  *
20395  *     Context: Can sleep. Does not return until command is completed.
20396  */
20397 
20398 static int
20399 sd_send_scsi_MODE_SENSE(struct sd_lun *un, int cdbsize, uchar_t *bufaddr,
20400 	size_t buflen,  uchar_t page_code, int path_flag)
20401 {
20402 	struct	scsi_extended_sense	sense_buf;
20403 	union scsi_cdb		cdb;
20404 	struct uscsi_cmd	ucmd_buf;
20405 	int			status;
20406 	int			headlen;
20407 
20408 	ASSERT(un != NULL);
20409 	ASSERT(!mutex_owned(SD_MUTEX(un)));
20410 	ASSERT(bufaddr != NULL);
20411 	ASSERT((cdbsize == CDB_GROUP0) || (cdbsize == CDB_GROUP1) ||
20412 	    (cdbsize == CDB_GROUP2));
20413 
20414 	SD_TRACE(SD_LOG_IO, un,
20415 	    "sd_send_scsi_MODE_SENSE: entry: un:0x%p\n", un);
20416 
20417 	bzero(&cdb, sizeof (cdb));
20418 	bzero(&ucmd_buf, sizeof (ucmd_buf));
20419 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
20420 	bzero(bufaddr, buflen);
20421 
20422 	if (cdbsize == CDB_GROUP0) {
20423 		cdb.scc_cmd = SCMD_MODE_SENSE;
20424 		cdb.cdb_opaque[2] = page_code;
20425 		FORMG0COUNT(&cdb, buflen);
20426 		headlen = MODE_HEADER_LENGTH;
20427 	} else {
20428 		cdb.scc_cmd = SCMD_MODE_SENSE_G1;
20429 		cdb.cdb_opaque[2] = page_code;
20430 		FORMG1COUNT(&cdb, buflen);
20431 		headlen = MODE_HEADER_LENGTH_GRP2;
20432 	}
20433 
20434 	ASSERT(headlen <= buflen);
20435 	SD_FILL_SCSI1_LUN_CDB(un, &cdb);
20436 
20437 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
20438 	ucmd_buf.uscsi_cdblen	= (uchar_t)cdbsize;
20439 	ucmd_buf.uscsi_bufaddr	= (caddr_t)bufaddr;
20440 	ucmd_buf.uscsi_buflen	= buflen;
20441 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
20442 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
20443 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
20444 	ucmd_buf.uscsi_timeout	= 60;
20445 
20446 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
20447 	    UIO_SYSSPACE, UIO_SYSSPACE, path_flag);
20448 
20449 	switch (status) {
20450 	case 0:
20451 		/*
20452 		 * sr_check_wp() uses 0x3f page code and check the header of
20453 		 * mode page to determine if target device is write-protected.
20454 		 * But some USB devices return 0 bytes for 0x3f page code. For
20455 		 * this case, make sure that mode page header is returned at
20456 		 * least.
20457 		 */
20458 		if (buflen - ucmd_buf.uscsi_resid <  headlen)
20459 			status = EIO;
20460 		break;	/* Success! */
20461 	case EIO:
20462 		switch (ucmd_buf.uscsi_status) {
20463 		case STATUS_RESERVATION_CONFLICT:
20464 			status = EACCES;
20465 			break;
20466 		default:
20467 			break;
20468 		}
20469 		break;
20470 	default:
20471 		break;
20472 	}
20473 
20474 	if (status == 0) {
20475 		SD_DUMP_MEMORY(un, SD_LOG_IO, "sd_send_scsi_MODE_SENSE: data",
20476 		    (uchar_t *)bufaddr, buflen, SD_LOG_HEX);
20477 	}
20478 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_MODE_SENSE: exit\n");
20479 
20480 	return (status);
20481 }
20482 
20483 
20484 /*
20485  *    Function: sd_send_scsi_MODE_SELECT
20486  *
20487  * Description: Utility function for issuing a scsi MODE SELECT command.
20488  *		Note: This routine uses a consistent implementation for Group0,
20489  *		Group1, and Group2 commands across all platforms. ATAPI devices
20490  *		use Group 1 Read/Write commands and Group 2 Mode Sense/Select
20491  *
20492  *   Arguments: un - pointer to the softstate struct for the target.
20493  *		cdbsize - size CDB to be used (CDB_GROUP0 (6 byte), or
20494  *			  CDB_GROUP[1|2] (10 byte).
20495  *		bufaddr - buffer for page data retrieved from the target.
20496  *		buflen - size of page to be retrieved.
20497  *		save_page - boolean to determin if SP bit should be set.
20498  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
20499  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
20500  *			to use the USCSI "direct" chain and bypass the normal
20501  *			command waitq.
20502  *
20503  * Return Code: 0   - Success
20504  *		errno return code from sd_send_scsi_cmd()
20505  *
20506  *     Context: Can sleep. Does not return until command is completed.
20507  */
20508 
20509 static int
20510 sd_send_scsi_MODE_SELECT(struct sd_lun *un, int cdbsize, uchar_t *bufaddr,
20511 	size_t buflen,  uchar_t save_page, int path_flag)
20512 {
20513 	struct	scsi_extended_sense	sense_buf;
20514 	union scsi_cdb		cdb;
20515 	struct uscsi_cmd	ucmd_buf;
20516 	int			status;
20517 
20518 	ASSERT(un != NULL);
20519 	ASSERT(!mutex_owned(SD_MUTEX(un)));
20520 	ASSERT(bufaddr != NULL);
20521 	ASSERT((cdbsize == CDB_GROUP0) || (cdbsize == CDB_GROUP1) ||
20522 	    (cdbsize == CDB_GROUP2));
20523 
20524 	SD_TRACE(SD_LOG_IO, un,
20525 	    "sd_send_scsi_MODE_SELECT: entry: un:0x%p\n", un);
20526 
20527 	bzero(&cdb, sizeof (cdb));
20528 	bzero(&ucmd_buf, sizeof (ucmd_buf));
20529 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
20530 
20531 	/* Set the PF bit for many third party drives */
20532 	cdb.cdb_opaque[1] = 0x10;
20533 
20534 	/* Set the savepage(SP) bit if given */
20535 	if (save_page == SD_SAVE_PAGE) {
20536 		cdb.cdb_opaque[1] |= 0x01;
20537 	}
20538 
20539 	if (cdbsize == CDB_GROUP0) {
20540 		cdb.scc_cmd = SCMD_MODE_SELECT;
20541 		FORMG0COUNT(&cdb, buflen);
20542 	} else {
20543 		cdb.scc_cmd = SCMD_MODE_SELECT_G1;
20544 		FORMG1COUNT(&cdb, buflen);
20545 	}
20546 
20547 	SD_FILL_SCSI1_LUN_CDB(un, &cdb);
20548 
20549 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
20550 	ucmd_buf.uscsi_cdblen	= (uchar_t)cdbsize;
20551 	ucmd_buf.uscsi_bufaddr	= (caddr_t)bufaddr;
20552 	ucmd_buf.uscsi_buflen	= buflen;
20553 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
20554 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
20555 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_WRITE | USCSI_SILENT;
20556 	ucmd_buf.uscsi_timeout	= 60;
20557 
20558 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
20559 	    UIO_SYSSPACE, UIO_SYSSPACE, path_flag);
20560 
20561 	switch (status) {
20562 	case 0:
20563 		break;	/* Success! */
20564 	case EIO:
20565 		switch (ucmd_buf.uscsi_status) {
20566 		case STATUS_RESERVATION_CONFLICT:
20567 			status = EACCES;
20568 			break;
20569 		default:
20570 			break;
20571 		}
20572 		break;
20573 	default:
20574 		break;
20575 	}
20576 
20577 	if (status == 0) {
20578 		SD_DUMP_MEMORY(un, SD_LOG_IO, "sd_send_scsi_MODE_SELECT: data",
20579 		    (uchar_t *)bufaddr, buflen, SD_LOG_HEX);
20580 	}
20581 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_MODE_SELECT: exit\n");
20582 
20583 	return (status);
20584 }
20585 
20586 
20587 /*
20588  *    Function: sd_send_scsi_RDWR
20589  *
20590  * Description: Issue a scsi READ or WRITE command with the given parameters.
20591  *
20592  *   Arguments: un:      Pointer to the sd_lun struct for the target.
20593  *		cmd:	 SCMD_READ or SCMD_WRITE
20594  *		bufaddr: Address of caller's buffer to receive the RDWR data
20595  *		buflen:  Length of caller's buffer receive the RDWR data.
20596  *		start_block: Block number for the start of the RDWR operation.
20597  *			 (Assumes target-native block size.)
20598  *		residp:  Pointer to variable to receive the redisual of the
20599  *			 RDWR operation (may be NULL of no residual requested).
20600  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
20601  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
20602  *			to use the USCSI "direct" chain and bypass the normal
20603  *			command waitq.
20604  *
20605  * Return Code: 0   - Success
20606  *		errno return code from sd_send_scsi_cmd()
20607  *
20608  *     Context: Can sleep. Does not return until command is completed.
20609  */
20610 
20611 static int
20612 sd_send_scsi_RDWR(struct sd_lun *un, uchar_t cmd, void *bufaddr,
20613 	size_t buflen, daddr_t start_block, int path_flag)
20614 {
20615 	struct	scsi_extended_sense	sense_buf;
20616 	union scsi_cdb		cdb;
20617 	struct uscsi_cmd	ucmd_buf;
20618 	uint32_t		block_count;
20619 	int			status;
20620 	int			cdbsize;
20621 	uchar_t			flag;
20622 
20623 	ASSERT(un != NULL);
20624 	ASSERT(!mutex_owned(SD_MUTEX(un)));
20625 	ASSERT(bufaddr != NULL);
20626 	ASSERT((cmd == SCMD_READ) || (cmd == SCMD_WRITE));
20627 
20628 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_RDWR: entry: un:0x%p\n", un);
20629 
20630 	if (un->un_f_tgt_blocksize_is_valid != TRUE) {
20631 		return (EINVAL);
20632 	}
20633 
20634 	mutex_enter(SD_MUTEX(un));
20635 	block_count = SD_BYTES2TGTBLOCKS(un, buflen);
20636 	mutex_exit(SD_MUTEX(un));
20637 
20638 	flag = (cmd == SCMD_READ) ? USCSI_READ : USCSI_WRITE;
20639 
20640 	SD_INFO(SD_LOG_IO, un, "sd_send_scsi_RDWR: "
20641 	    "bufaddr:0x%p buflen:0x%x start_block:0x%p block_count:0x%x\n",
20642 	    bufaddr, buflen, start_block, block_count);
20643 
20644 	bzero(&cdb, sizeof (cdb));
20645 	bzero(&ucmd_buf, sizeof (ucmd_buf));
20646 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
20647 
20648 	/* Compute CDB size to use */
20649 	if (start_block > 0xffffffff)
20650 		cdbsize = CDB_GROUP4;
20651 	else if ((start_block & 0xFFE00000) ||
20652 	    (un->un_f_cfg_is_atapi == TRUE))
20653 		cdbsize = CDB_GROUP1;
20654 	else
20655 		cdbsize = CDB_GROUP0;
20656 
20657 	switch (cdbsize) {
20658 	case CDB_GROUP0:	/* 6-byte CDBs */
20659 		cdb.scc_cmd = cmd;
20660 		FORMG0ADDR(&cdb, start_block);
20661 		FORMG0COUNT(&cdb, block_count);
20662 		break;
20663 	case CDB_GROUP1:	/* 10-byte CDBs */
20664 		cdb.scc_cmd = cmd | SCMD_GROUP1;
20665 		FORMG1ADDR(&cdb, start_block);
20666 		FORMG1COUNT(&cdb, block_count);
20667 		break;
20668 	case CDB_GROUP4:	/* 16-byte CDBs */
20669 		cdb.scc_cmd = cmd | SCMD_GROUP4;
20670 		FORMG4LONGADDR(&cdb, (uint64_t)start_block);
20671 		FORMG4COUNT(&cdb, block_count);
20672 		break;
20673 	case CDB_GROUP5:	/* 12-byte CDBs (currently unsupported) */
20674 	default:
20675 		/* All others reserved */
20676 		return (EINVAL);
20677 	}
20678 
20679 	/* Set LUN bit(s) in CDB if this is a SCSI-1 device */
20680 	SD_FILL_SCSI1_LUN_CDB(un, &cdb);
20681 
20682 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
20683 	ucmd_buf.uscsi_cdblen	= (uchar_t)cdbsize;
20684 	ucmd_buf.uscsi_bufaddr	= bufaddr;
20685 	ucmd_buf.uscsi_buflen	= buflen;
20686 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
20687 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
20688 	ucmd_buf.uscsi_flags	= flag | USCSI_RQENABLE | USCSI_SILENT;
20689 	ucmd_buf.uscsi_timeout	= 60;
20690 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
20691 				UIO_SYSSPACE, UIO_SYSSPACE, path_flag);
20692 	switch (status) {
20693 	case 0:
20694 		break;	/* Success! */
20695 	case EIO:
20696 		switch (ucmd_buf.uscsi_status) {
20697 		case STATUS_RESERVATION_CONFLICT:
20698 			status = EACCES;
20699 			break;
20700 		default:
20701 			break;
20702 		}
20703 		break;
20704 	default:
20705 		break;
20706 	}
20707 
20708 	if (status == 0) {
20709 		SD_DUMP_MEMORY(un, SD_LOG_IO, "sd_send_scsi_RDWR: data",
20710 		    (uchar_t *)bufaddr, buflen, SD_LOG_HEX);
20711 	}
20712 
20713 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_RDWR: exit\n");
20714 
20715 	return (status);
20716 }
20717 
20718 
20719 /*
20720  *    Function: sd_send_scsi_LOG_SENSE
20721  *
20722  * Description: Issue a scsi LOG_SENSE command with the given parameters.
20723  *
20724  *   Arguments: un:      Pointer to the sd_lun struct for the target.
20725  *
20726  * Return Code: 0   - Success
20727  *		errno return code from sd_send_scsi_cmd()
20728  *
20729  *     Context: Can sleep. Does not return until command is completed.
20730  */
20731 
20732 static int
20733 sd_send_scsi_LOG_SENSE(struct sd_lun *un, uchar_t *bufaddr, uint16_t buflen,
20734 	uchar_t page_code, uchar_t page_control, uint16_t param_ptr,
20735 	int path_flag)
20736 
20737 {
20738 	struct	scsi_extended_sense	sense_buf;
20739 	union scsi_cdb		cdb;
20740 	struct uscsi_cmd	ucmd_buf;
20741 	int			status;
20742 
20743 	ASSERT(un != NULL);
20744 	ASSERT(!mutex_owned(SD_MUTEX(un)));
20745 
20746 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_LOG_SENSE: entry: un:0x%p\n", un);
20747 
20748 	bzero(&cdb, sizeof (cdb));
20749 	bzero(&ucmd_buf, sizeof (ucmd_buf));
20750 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
20751 
20752 	cdb.scc_cmd = SCMD_LOG_SENSE_G1;
20753 	cdb.cdb_opaque[2] = (page_control << 6) | page_code;
20754 	cdb.cdb_opaque[5] = (uchar_t)((param_ptr & 0xFF00) >> 8);
20755 	cdb.cdb_opaque[6] = (uchar_t)(param_ptr  & 0x00FF);
20756 	FORMG1COUNT(&cdb, buflen);
20757 
20758 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
20759 	ucmd_buf.uscsi_cdblen	= CDB_GROUP1;
20760 	ucmd_buf.uscsi_bufaddr	= (caddr_t)bufaddr;
20761 	ucmd_buf.uscsi_buflen	= buflen;
20762 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
20763 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
20764 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
20765 	ucmd_buf.uscsi_timeout	= 60;
20766 
20767 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
20768 	    UIO_SYSSPACE, UIO_SYSSPACE, path_flag);
20769 
20770 	switch (status) {
20771 	case 0:
20772 		break;
20773 	case EIO:
20774 		switch (ucmd_buf.uscsi_status) {
20775 		case STATUS_RESERVATION_CONFLICT:
20776 			status = EACCES;
20777 			break;
20778 		case STATUS_CHECK:
20779 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
20780 			    (sense_buf.es_key == KEY_ILLEGAL_REQUEST) &&
20781 			    (sense_buf.es_add_code == 0x24)) {
20782 				/*
20783 				 * ASC 0x24: INVALID FIELD IN CDB
20784 				 */
20785 				switch (page_code) {
20786 				case START_STOP_CYCLE_PAGE:
20787 					/*
20788 					 * The start stop cycle counter is
20789 					 * implemented as page 0x31 in earlier
20790 					 * generation disks. In new generation
20791 					 * disks the start stop cycle counter is
20792 					 * implemented as page 0xE. To properly
20793 					 * handle this case if an attempt for
20794 					 * log page 0xE is made and fails we
20795 					 * will try again using page 0x31.
20796 					 *
20797 					 * Network storage BU committed to
20798 					 * maintain the page 0x31 for this
20799 					 * purpose and will not have any other
20800 					 * page implemented with page code 0x31
20801 					 * until all disks transition to the
20802 					 * standard page.
20803 					 */
20804 					mutex_enter(SD_MUTEX(un));
20805 					un->un_start_stop_cycle_page =
20806 					    START_STOP_CYCLE_VU_PAGE;
20807 					cdb.cdb_opaque[2] =
20808 					    (char)(page_control << 6) |
20809 					    un->un_start_stop_cycle_page;
20810 					mutex_exit(SD_MUTEX(un));
20811 					status = sd_send_scsi_cmd(
20812 					    SD_GET_DEV(un), &ucmd_buf,
20813 					    UIO_SYSSPACE, UIO_SYSSPACE,
20814 					    UIO_SYSSPACE, path_flag);
20815 
20816 					break;
20817 				case TEMPERATURE_PAGE:
20818 					status = ENOTTY;
20819 					break;
20820 				default:
20821 					break;
20822 				}
20823 			}
20824 			break;
20825 		default:
20826 			break;
20827 		}
20828 		break;
20829 	default:
20830 		break;
20831 	}
20832 
20833 	if (status == 0) {
20834 		SD_DUMP_MEMORY(un, SD_LOG_IO, "sd_send_scsi_LOG_SENSE: data",
20835 		    (uchar_t *)bufaddr, buflen, SD_LOG_HEX);
20836 	}
20837 
20838 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_LOG_SENSE: exit\n");
20839 
20840 	return (status);
20841 }
20842 
20843 
20844 /*
20845  *    Function: sdioctl
20846  *
20847  * Description: Driver's ioctl(9e) entry point function.
20848  *
20849  *   Arguments: dev     - device number
20850  *		cmd     - ioctl operation to be performed
20851  *		arg     - user argument, contains data to be set or reference
20852  *			  parameter for get
20853  *		flag    - bit flag, indicating open settings, 32/64 bit type
20854  *		cred_p  - user credential pointer
20855  *		rval_p  - calling process return value (OPT)
20856  *
20857  * Return Code: EINVAL
20858  *		ENOTTY
20859  *		ENXIO
20860  *		EIO
20861  *		EFAULT
20862  *		ENOTSUP
20863  *		EPERM
20864  *
20865  *     Context: Called from the device switch at normal priority.
20866  */
20867 
20868 static int
20869 sdioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cred_p, int *rval_p)
20870 {
20871 	struct sd_lun	*un = NULL;
20872 	int		geom_validated = FALSE;
20873 	int		err = 0;
20874 	int		i = 0;
20875 	cred_t		*cr;
20876 
20877 	/*
20878 	 * All device accesses go thru sdstrategy where we check on suspend
20879 	 * status
20880 	 */
20881 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
20882 		return (ENXIO);
20883 	}
20884 
20885 	ASSERT(!mutex_owned(SD_MUTEX(un)));
20886 
20887 	/*
20888 	 * Moved this wait from sd_uscsi_strategy to here for
20889 	 * reasons of deadlock prevention. Internal driver commands,
20890 	 * specifically those to change a devices power level, result
20891 	 * in a call to sd_uscsi_strategy.
20892 	 */
20893 	mutex_enter(SD_MUTEX(un));
20894 	while ((un->un_state == SD_STATE_SUSPENDED) ||
20895 	    (un->un_state == SD_STATE_PM_CHANGING)) {
20896 		cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
20897 	}
20898 	/*
20899 	 * Twiddling the counter here protects commands from now
20900 	 * through to the top of sd_uscsi_strategy. Without the
20901 	 * counter inc. a power down, for example, could get in
20902 	 * after the above check for state is made and before
20903 	 * execution gets to the top of sd_uscsi_strategy.
20904 	 * That would cause problems.
20905 	 */
20906 	un->un_ncmds_in_driver++;
20907 
20908 	if ((un->un_f_geometry_is_valid == FALSE) &&
20909 	    (flag & (FNDELAY | FNONBLOCK))) {
20910 		switch (cmd) {
20911 		case CDROMPAUSE:
20912 		case CDROMRESUME:
20913 		case CDROMPLAYMSF:
20914 		case CDROMPLAYTRKIND:
20915 		case CDROMREADTOCHDR:
20916 		case CDROMREADTOCENTRY:
20917 		case CDROMSTOP:
20918 		case CDROMSTART:
20919 		case CDROMVOLCTRL:
20920 		case CDROMSUBCHNL:
20921 		case CDROMREADMODE2:
20922 		case CDROMREADMODE1:
20923 		case CDROMREADOFFSET:
20924 		case CDROMSBLKMODE:
20925 		case CDROMGBLKMODE:
20926 		case CDROMGDRVSPEED:
20927 		case CDROMSDRVSPEED:
20928 		case CDROMCDDA:
20929 		case CDROMCDXA:
20930 		case CDROMSUBCODE:
20931 			if (!ISCD(un)) {
20932 				un->un_ncmds_in_driver--;
20933 				ASSERT(un->un_ncmds_in_driver >= 0);
20934 				mutex_exit(SD_MUTEX(un));
20935 				return (ENOTTY);
20936 			}
20937 			break;
20938 		case FDEJECT:
20939 		case DKIOCEJECT:
20940 		case CDROMEJECT:
20941 			if (!un->un_f_eject_media_supported) {
20942 				un->un_ncmds_in_driver--;
20943 				ASSERT(un->un_ncmds_in_driver >= 0);
20944 				mutex_exit(SD_MUTEX(un));
20945 				return (ENOTTY);
20946 			}
20947 			break;
20948 		case DKIOCSVTOC:
20949 		case DKIOCSETEFI:
20950 		case DKIOCSMBOOT:
20951 		case DKIOCFLUSHWRITECACHE:
20952 			mutex_exit(SD_MUTEX(un));
20953 			err = sd_send_scsi_TEST_UNIT_READY(un, 0);
20954 			if (err != 0) {
20955 				mutex_enter(SD_MUTEX(un));
20956 				un->un_ncmds_in_driver--;
20957 				ASSERT(un->un_ncmds_in_driver >= 0);
20958 				mutex_exit(SD_MUTEX(un));
20959 				return (EIO);
20960 			}
20961 			mutex_enter(SD_MUTEX(un));
20962 			/* FALLTHROUGH */
20963 		case DKIOCREMOVABLE:
20964 		case DKIOCHOTPLUGGABLE:
20965 		case DKIOCINFO:
20966 		case DKIOCGMEDIAINFO:
20967 		case MHIOCENFAILFAST:
20968 		case MHIOCSTATUS:
20969 		case MHIOCTKOWN:
20970 		case MHIOCRELEASE:
20971 		case MHIOCGRP_INKEYS:
20972 		case MHIOCGRP_INRESV:
20973 		case MHIOCGRP_REGISTER:
20974 		case MHIOCGRP_RESERVE:
20975 		case MHIOCGRP_PREEMPTANDABORT:
20976 		case MHIOCGRP_REGISTERANDIGNOREKEY:
20977 		case CDROMCLOSETRAY:
20978 		case USCSICMD:
20979 			goto skip_ready_valid;
20980 		default:
20981 			break;
20982 		}
20983 
20984 		mutex_exit(SD_MUTEX(un));
20985 		err = sd_ready_and_valid(un);
20986 		mutex_enter(SD_MUTEX(un));
20987 		if (err == SD_READY_NOT_VALID) {
20988 			switch (cmd) {
20989 			case DKIOCGAPART:
20990 			case DKIOCGGEOM:
20991 			case DKIOCSGEOM:
20992 			case DKIOCGVTOC:
20993 			case DKIOCSVTOC:
20994 			case DKIOCSAPART:
20995 			case DKIOCG_PHYGEOM:
20996 			case DKIOCG_VIRTGEOM:
20997 				err = ENOTSUP;
20998 				un->un_ncmds_in_driver--;
20999 				ASSERT(un->un_ncmds_in_driver >= 0);
21000 				mutex_exit(SD_MUTEX(un));
21001 				return (err);
21002 			}
21003 		}
21004 		if (err != SD_READY_VALID) {
21005 			switch (cmd) {
21006 			case DKIOCSTATE:
21007 			case CDROMGDRVSPEED:
21008 			case CDROMSDRVSPEED:
21009 			case FDEJECT:	/* for eject command */
21010 			case DKIOCEJECT:
21011 			case CDROMEJECT:
21012 			case DKIOCGETEFI:
21013 			case DKIOCSGEOM:
21014 			case DKIOCREMOVABLE:
21015 			case DKIOCHOTPLUGGABLE:
21016 			case DKIOCSAPART:
21017 			case DKIOCSETEFI:
21018 				break;
21019 			default:
21020 				if (un->un_f_has_removable_media) {
21021 					err = ENXIO;
21022 				} else {
21023 					/* Do not map EACCES to EIO */
21024 					if (err != EACCES)
21025 						err = EIO;
21026 				}
21027 				un->un_ncmds_in_driver--;
21028 				ASSERT(un->un_ncmds_in_driver >= 0);
21029 				mutex_exit(SD_MUTEX(un));
21030 				return (err);
21031 			}
21032 		}
21033 		geom_validated = TRUE;
21034 	}
21035 	if ((un->un_f_geometry_is_valid == TRUE) &&
21036 	    (un->un_solaris_size > 0)) {
21037 		/*
21038 		 * the "geometry_is_valid" flag could be true if we
21039 		 * have an fdisk table but no Solaris partition
21040 		 */
21041 		if (un->un_vtoc.v_sanity != VTOC_SANE) {
21042 			/* it is EFI, so return ENOTSUP for these */
21043 			switch (cmd) {
21044 			case DKIOCGAPART:
21045 			case DKIOCGGEOM:
21046 			case DKIOCGVTOC:
21047 			case DKIOCSVTOC:
21048 			case DKIOCSAPART:
21049 				err = ENOTSUP;
21050 				un->un_ncmds_in_driver--;
21051 				ASSERT(un->un_ncmds_in_driver >= 0);
21052 				mutex_exit(SD_MUTEX(un));
21053 				return (err);
21054 			}
21055 		}
21056 	}
21057 
21058 skip_ready_valid:
21059 	mutex_exit(SD_MUTEX(un));
21060 
21061 	switch (cmd) {
21062 	case DKIOCINFO:
21063 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCINFO\n");
21064 		err = sd_dkio_ctrl_info(dev, (caddr_t)arg, flag);
21065 		break;
21066 
21067 	case DKIOCGMEDIAINFO:
21068 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCGMEDIAINFO\n");
21069 		err = sd_get_media_info(dev, (caddr_t)arg, flag);
21070 		break;
21071 
21072 	case DKIOCGGEOM:
21073 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCGGEOM\n");
21074 		err = sd_dkio_get_geometry(dev, (caddr_t)arg, flag,
21075 		    geom_validated);
21076 		break;
21077 
21078 	case DKIOCSGEOM:
21079 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCSGEOM\n");
21080 		err = sd_dkio_set_geometry(dev, (caddr_t)arg, flag);
21081 		break;
21082 
21083 	case DKIOCGAPART:
21084 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCGAPART\n");
21085 		err = sd_dkio_get_partition(dev, (caddr_t)arg, flag,
21086 		    geom_validated);
21087 		break;
21088 
21089 	case DKIOCSAPART:
21090 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCSAPART\n");
21091 		err = sd_dkio_set_partition(dev, (caddr_t)arg, flag);
21092 		break;
21093 
21094 	case DKIOCGVTOC:
21095 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCGVTOC\n");
21096 		err = sd_dkio_get_vtoc(dev, (caddr_t)arg, flag,
21097 		    geom_validated);
21098 		break;
21099 
21100 	case DKIOCGETEFI:
21101 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCGETEFI\n");
21102 		err = sd_dkio_get_efi(dev, (caddr_t)arg, flag);
21103 		break;
21104 
21105 	case DKIOCPARTITION:
21106 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCPARTITION\n");
21107 		err = sd_dkio_partition(dev, (caddr_t)arg, flag);
21108 		break;
21109 
21110 	case DKIOCSVTOC:
21111 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCSVTOC\n");
21112 		err = sd_dkio_set_vtoc(dev, (caddr_t)arg, flag);
21113 		break;
21114 
21115 	case DKIOCSETEFI:
21116 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCSETEFI\n");
21117 		err = sd_dkio_set_efi(dev, (caddr_t)arg, flag);
21118 		break;
21119 
21120 	case DKIOCGMBOOT:
21121 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCGMBOOT\n");
21122 		err = sd_dkio_get_mboot(dev, (caddr_t)arg, flag);
21123 		break;
21124 
21125 	case DKIOCSMBOOT:
21126 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCSMBOOT\n");
21127 		err = sd_dkio_set_mboot(dev, (caddr_t)arg, flag);
21128 		break;
21129 
21130 	case DKIOCLOCK:
21131 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCLOCK\n");
21132 		err = sd_send_scsi_DOORLOCK(un, SD_REMOVAL_PREVENT,
21133 		    SD_PATH_STANDARD);
21134 		break;
21135 
21136 	case DKIOCUNLOCK:
21137 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCUNLOCK\n");
21138 		err = sd_send_scsi_DOORLOCK(un, SD_REMOVAL_ALLOW,
21139 		    SD_PATH_STANDARD);
21140 		break;
21141 
21142 	case DKIOCSTATE: {
21143 		enum dkio_state		state;
21144 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCSTATE\n");
21145 
21146 		if (ddi_copyin((void *)arg, &state, sizeof (int), flag) != 0) {
21147 			err = EFAULT;
21148 		} else {
21149 			err = sd_check_media(dev, state);
21150 			if (err == 0) {
21151 				if (ddi_copyout(&un->un_mediastate, (void *)arg,
21152 				    sizeof (int), flag) != 0)
21153 					err = EFAULT;
21154 			}
21155 		}
21156 		break;
21157 	}
21158 
21159 	case DKIOCREMOVABLE:
21160 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCREMOVABLE\n");
21161 		/*
21162 		 * At present, vold only does automount for removable-media
21163 		 * devices, in order not to break current applications, we
21164 		 * still let hopluggable devices pretend to be removable media
21165 		 * devices for vold. In the near future, once vold is EOL'ed,
21166 		 * we should remove this workaround.
21167 		 */
21168 		if (un->un_f_has_removable_media || un->un_f_is_hotpluggable) {
21169 			i = 1;
21170 		} else {
21171 			i = 0;
21172 		}
21173 		if (ddi_copyout(&i, (void *)arg, sizeof (int), flag) != 0) {
21174 			err = EFAULT;
21175 		} else {
21176 			err = 0;
21177 		}
21178 		break;
21179 
21180 	case DKIOCHOTPLUGGABLE:
21181 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCHOTPLUGGABLE\n");
21182 		if (un->un_f_is_hotpluggable) {
21183 			i = 1;
21184 		} else {
21185 			i = 0;
21186 		}
21187 		if (ddi_copyout(&i, (void *)arg, sizeof (int), flag) != 0) {
21188 			err = EFAULT;
21189 		} else {
21190 			err = 0;
21191 		}
21192 		break;
21193 
21194 	case DKIOCGTEMPERATURE:
21195 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCGTEMPERATURE\n");
21196 		err = sd_dkio_get_temp(dev, (caddr_t)arg, flag);
21197 		break;
21198 
21199 	case MHIOCENFAILFAST:
21200 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCENFAILFAST\n");
21201 		if ((err = drv_priv(cred_p)) == 0) {
21202 			err = sd_mhdioc_failfast(dev, (caddr_t)arg, flag);
21203 		}
21204 		break;
21205 
21206 	case MHIOCTKOWN:
21207 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCTKOWN\n");
21208 		if ((err = drv_priv(cred_p)) == 0) {
21209 			err = sd_mhdioc_takeown(dev, (caddr_t)arg, flag);
21210 		}
21211 		break;
21212 
21213 	case MHIOCRELEASE:
21214 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCRELEASE\n");
21215 		if ((err = drv_priv(cred_p)) == 0) {
21216 			err = sd_mhdioc_release(dev);
21217 		}
21218 		break;
21219 
21220 	case MHIOCSTATUS:
21221 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCSTATUS\n");
21222 		if ((err = drv_priv(cred_p)) == 0) {
21223 			switch (sd_send_scsi_TEST_UNIT_READY(un, 0)) {
21224 			case 0:
21225 				err = 0;
21226 				break;
21227 			case EACCES:
21228 				*rval_p = 1;
21229 				err = 0;
21230 				break;
21231 			default:
21232 				err = EIO;
21233 				break;
21234 			}
21235 		}
21236 		break;
21237 
21238 	case MHIOCQRESERVE:
21239 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCQRESERVE\n");
21240 		if ((err = drv_priv(cred_p)) == 0) {
21241 			err = sd_reserve_release(dev, SD_RESERVE);
21242 		}
21243 		break;
21244 
21245 	case MHIOCREREGISTERDEVID:
21246 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCREREGISTERDEVID\n");
21247 		if (drv_priv(cred_p) == EPERM) {
21248 			err = EPERM;
21249 		} else if (!un->un_f_devid_supported) {
21250 			err = ENOTTY;
21251 		} else {
21252 			err = sd_mhdioc_register_devid(dev);
21253 		}
21254 		break;
21255 
21256 	case MHIOCGRP_INKEYS:
21257 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_INKEYS\n");
21258 		if (((err = drv_priv(cred_p)) != EPERM) && arg != NULL) {
21259 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
21260 				err = ENOTSUP;
21261 			} else {
21262 				err = sd_mhdioc_inkeys(dev, (caddr_t)arg,
21263 				    flag);
21264 			}
21265 		}
21266 		break;
21267 
21268 	case MHIOCGRP_INRESV:
21269 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_INRESV\n");
21270 		if (((err = drv_priv(cred_p)) != EPERM) && arg != NULL) {
21271 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
21272 				err = ENOTSUP;
21273 			} else {
21274 				err = sd_mhdioc_inresv(dev, (caddr_t)arg, flag);
21275 			}
21276 		}
21277 		break;
21278 
21279 	case MHIOCGRP_REGISTER:
21280 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_REGISTER\n");
21281 		if ((err = drv_priv(cred_p)) != EPERM) {
21282 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
21283 				err = ENOTSUP;
21284 			} else if (arg != NULL) {
21285 				mhioc_register_t reg;
21286 				if (ddi_copyin((void *)arg, &reg,
21287 				    sizeof (mhioc_register_t), flag) != 0) {
21288 					err = EFAULT;
21289 				} else {
21290 					err =
21291 					    sd_send_scsi_PERSISTENT_RESERVE_OUT(
21292 					    un, SD_SCSI3_REGISTER,
21293 					    (uchar_t *)&reg);
21294 				}
21295 			}
21296 		}
21297 		break;
21298 
21299 	case MHIOCGRP_RESERVE:
21300 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_RESERVE\n");
21301 		if ((err = drv_priv(cred_p)) != EPERM) {
21302 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
21303 				err = ENOTSUP;
21304 			} else if (arg != NULL) {
21305 				mhioc_resv_desc_t resv_desc;
21306 				if (ddi_copyin((void *)arg, &resv_desc,
21307 				    sizeof (mhioc_resv_desc_t), flag) != 0) {
21308 					err = EFAULT;
21309 				} else {
21310 					err =
21311 					    sd_send_scsi_PERSISTENT_RESERVE_OUT(
21312 					    un, SD_SCSI3_RESERVE,
21313 					    (uchar_t *)&resv_desc);
21314 				}
21315 			}
21316 		}
21317 		break;
21318 
21319 	case MHIOCGRP_PREEMPTANDABORT:
21320 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_PREEMPTANDABORT\n");
21321 		if ((err = drv_priv(cred_p)) != EPERM) {
21322 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
21323 				err = ENOTSUP;
21324 			} else if (arg != NULL) {
21325 				mhioc_preemptandabort_t preempt_abort;
21326 				if (ddi_copyin((void *)arg, &preempt_abort,
21327 				    sizeof (mhioc_preemptandabort_t),
21328 				    flag) != 0) {
21329 					err = EFAULT;
21330 				} else {
21331 					err =
21332 					    sd_send_scsi_PERSISTENT_RESERVE_OUT(
21333 					    un, SD_SCSI3_PREEMPTANDABORT,
21334 					    (uchar_t *)&preempt_abort);
21335 				}
21336 			}
21337 		}
21338 		break;
21339 
21340 	case MHIOCGRP_REGISTERANDIGNOREKEY:
21341 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_PREEMPTANDABORT\n");
21342 		if ((err = drv_priv(cred_p)) != EPERM) {
21343 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
21344 				err = ENOTSUP;
21345 			} else if (arg != NULL) {
21346 				mhioc_registerandignorekey_t r_and_i;
21347 				if (ddi_copyin((void *)arg, (void *)&r_and_i,
21348 				    sizeof (mhioc_registerandignorekey_t),
21349 				    flag) != 0) {
21350 					err = EFAULT;
21351 				} else {
21352 					err =
21353 					    sd_send_scsi_PERSISTENT_RESERVE_OUT(
21354 					    un, SD_SCSI3_REGISTERANDIGNOREKEY,
21355 					    (uchar_t *)&r_and_i);
21356 				}
21357 			}
21358 		}
21359 		break;
21360 
21361 	case USCSICMD:
21362 		SD_TRACE(SD_LOG_IOCTL, un, "USCSICMD\n");
21363 		cr = ddi_get_cred();
21364 		if ((drv_priv(cred_p) != 0) && (drv_priv(cr) != 0)) {
21365 			err = EPERM;
21366 		} else {
21367 			err = sd_uscsi_ioctl(dev, (caddr_t)arg, flag);
21368 		}
21369 		break;
21370 
21371 	case CDROMPAUSE:
21372 	case CDROMRESUME:
21373 		SD_TRACE(SD_LOG_IOCTL, un, "PAUSE-RESUME\n");
21374 		if (!ISCD(un)) {
21375 			err = ENOTTY;
21376 		} else {
21377 			err = sr_pause_resume(dev, cmd);
21378 		}
21379 		break;
21380 
21381 	case CDROMPLAYMSF:
21382 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMPLAYMSF\n");
21383 		if (!ISCD(un)) {
21384 			err = ENOTTY;
21385 		} else {
21386 			err = sr_play_msf(dev, (caddr_t)arg, flag);
21387 		}
21388 		break;
21389 
21390 	case CDROMPLAYTRKIND:
21391 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMPLAYTRKIND\n");
21392 #if defined(__i386) || defined(__amd64)
21393 		/*
21394 		 * not supported on ATAPI CD drives, use CDROMPLAYMSF instead
21395 		 */
21396 		if (!ISCD(un) || (un->un_f_cfg_is_atapi == TRUE)) {
21397 #else
21398 		if (!ISCD(un)) {
21399 #endif
21400 			err = ENOTTY;
21401 		} else {
21402 			err = sr_play_trkind(dev, (caddr_t)arg, flag);
21403 		}
21404 		break;
21405 
21406 	case CDROMREADTOCHDR:
21407 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADTOCHDR\n");
21408 		if (!ISCD(un)) {
21409 			err = ENOTTY;
21410 		} else {
21411 			err = sr_read_tochdr(dev, (caddr_t)arg, flag);
21412 		}
21413 		break;
21414 
21415 	case CDROMREADTOCENTRY:
21416 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADTOCENTRY\n");
21417 		if (!ISCD(un)) {
21418 			err = ENOTTY;
21419 		} else {
21420 			err = sr_read_tocentry(dev, (caddr_t)arg, flag);
21421 		}
21422 		break;
21423 
21424 	case CDROMSTOP:
21425 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSTOP\n");
21426 		if (!ISCD(un)) {
21427 			err = ENOTTY;
21428 		} else {
21429 			err = sd_send_scsi_START_STOP_UNIT(un, SD_TARGET_STOP,
21430 			    SD_PATH_STANDARD);
21431 		}
21432 		break;
21433 
21434 	case CDROMSTART:
21435 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSTART\n");
21436 		if (!ISCD(un)) {
21437 			err = ENOTTY;
21438 		} else {
21439 			err = sd_send_scsi_START_STOP_UNIT(un, SD_TARGET_START,
21440 			    SD_PATH_STANDARD);
21441 		}
21442 		break;
21443 
21444 	case CDROMCLOSETRAY:
21445 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMCLOSETRAY\n");
21446 		if (!ISCD(un)) {
21447 			err = ENOTTY;
21448 		} else {
21449 			err = sd_send_scsi_START_STOP_UNIT(un, SD_TARGET_CLOSE,
21450 			    SD_PATH_STANDARD);
21451 		}
21452 		break;
21453 
21454 	case FDEJECT:	/* for eject command */
21455 	case DKIOCEJECT:
21456 	case CDROMEJECT:
21457 		SD_TRACE(SD_LOG_IOCTL, un, "EJECT\n");
21458 		if (!un->un_f_eject_media_supported) {
21459 			err = ENOTTY;
21460 		} else {
21461 			err = sr_eject(dev);
21462 		}
21463 		break;
21464 
21465 	case CDROMVOLCTRL:
21466 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMVOLCTRL\n");
21467 		if (!ISCD(un)) {
21468 			err = ENOTTY;
21469 		} else {
21470 			err = sr_volume_ctrl(dev, (caddr_t)arg, flag);
21471 		}
21472 		break;
21473 
21474 	case CDROMSUBCHNL:
21475 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSUBCHNL\n");
21476 		if (!ISCD(un)) {
21477 			err = ENOTTY;
21478 		} else {
21479 			err = sr_read_subchannel(dev, (caddr_t)arg, flag);
21480 		}
21481 		break;
21482 
21483 	case CDROMREADMODE2:
21484 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADMODE2\n");
21485 		if (!ISCD(un)) {
21486 			err = ENOTTY;
21487 		} else if (un->un_f_cfg_is_atapi == TRUE) {
21488 			/*
21489 			 * If the drive supports READ CD, use that instead of
21490 			 * switching the LBA size via a MODE SELECT
21491 			 * Block Descriptor
21492 			 */
21493 			err = sr_read_cd_mode2(dev, (caddr_t)arg, flag);
21494 		} else {
21495 			err = sr_read_mode2(dev, (caddr_t)arg, flag);
21496 		}
21497 		break;
21498 
21499 	case CDROMREADMODE1:
21500 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADMODE1\n");
21501 		if (!ISCD(un)) {
21502 			err = ENOTTY;
21503 		} else {
21504 			err = sr_read_mode1(dev, (caddr_t)arg, flag);
21505 		}
21506 		break;
21507 
21508 	case CDROMREADOFFSET:
21509 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADOFFSET\n");
21510 		if (!ISCD(un)) {
21511 			err = ENOTTY;
21512 		} else {
21513 			err = sr_read_sony_session_offset(dev, (caddr_t)arg,
21514 			    flag);
21515 		}
21516 		break;
21517 
21518 	case CDROMSBLKMODE:
21519 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSBLKMODE\n");
21520 		/*
21521 		 * There is no means of changing block size in case of atapi
21522 		 * drives, thus return ENOTTY if drive type is atapi
21523 		 */
21524 		if (!ISCD(un) || (un->un_f_cfg_is_atapi == TRUE)) {
21525 			err = ENOTTY;
21526 		} else if (un->un_f_mmc_cap == TRUE) {
21527 
21528 			/*
21529 			 * MMC Devices do not support changing the
21530 			 * logical block size
21531 			 *
21532 			 * Note: EINVAL is being returned instead of ENOTTY to
21533 			 * maintain consistancy with the original mmc
21534 			 * driver update.
21535 			 */
21536 			err = EINVAL;
21537 		} else {
21538 			mutex_enter(SD_MUTEX(un));
21539 			if ((!(un->un_exclopen & (1<<SDPART(dev)))) ||
21540 			    (un->un_ncmds_in_transport > 0)) {
21541 				mutex_exit(SD_MUTEX(un));
21542 				err = EINVAL;
21543 			} else {
21544 				mutex_exit(SD_MUTEX(un));
21545 				err = sr_change_blkmode(dev, cmd, arg, flag);
21546 			}
21547 		}
21548 		break;
21549 
21550 	case CDROMGBLKMODE:
21551 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMGBLKMODE\n");
21552 		if (!ISCD(un)) {
21553 			err = ENOTTY;
21554 		} else if ((un->un_f_cfg_is_atapi != FALSE) &&
21555 		    (un->un_f_blockcount_is_valid != FALSE)) {
21556 			/*
21557 			 * Drive is an ATAPI drive so return target block
21558 			 * size for ATAPI drives since we cannot change the
21559 			 * blocksize on ATAPI drives. Used primarily to detect
21560 			 * if an ATAPI cdrom is present.
21561 			 */
21562 			if (ddi_copyout(&un->un_tgt_blocksize, (void *)arg,
21563 			    sizeof (int), flag) != 0) {
21564 				err = EFAULT;
21565 			} else {
21566 				err = 0;
21567 			}
21568 
21569 		} else {
21570 			/*
21571 			 * Drive supports changing block sizes via a Mode
21572 			 * Select.
21573 			 */
21574 			err = sr_change_blkmode(dev, cmd, arg, flag);
21575 		}
21576 		break;
21577 
21578 	case CDROMGDRVSPEED:
21579 	case CDROMSDRVSPEED:
21580 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMXDRVSPEED\n");
21581 		if (!ISCD(un)) {
21582 			err = ENOTTY;
21583 		} else if (un->un_f_mmc_cap == TRUE) {
21584 			/*
21585 			 * Note: In the future the driver implementation
21586 			 * for getting and
21587 			 * setting cd speed should entail:
21588 			 * 1) If non-mmc try the Toshiba mode page
21589 			 *    (sr_change_speed)
21590 			 * 2) If mmc but no support for Real Time Streaming try
21591 			 *    the SET CD SPEED (0xBB) command
21592 			 *   (sr_atapi_change_speed)
21593 			 * 3) If mmc and support for Real Time Streaming
21594 			 *    try the GET PERFORMANCE and SET STREAMING
21595 			 *    commands (not yet implemented, 4380808)
21596 			 */
21597 			/*
21598 			 * As per recent MMC spec, CD-ROM speed is variable
21599 			 * and changes with LBA. Since there is no such
21600 			 * things as drive speed now, fail this ioctl.
21601 			 *
21602 			 * Note: EINVAL is returned for consistancy of original
21603 			 * implementation which included support for getting
21604 			 * the drive speed of mmc devices but not setting
21605 			 * the drive speed. Thus EINVAL would be returned
21606 			 * if a set request was made for an mmc device.
21607 			 * We no longer support get or set speed for
21608 			 * mmc but need to remain consistant with regard
21609 			 * to the error code returned.
21610 			 */
21611 			err = EINVAL;
21612 		} else if (un->un_f_cfg_is_atapi == TRUE) {
21613 			err = sr_atapi_change_speed(dev, cmd, arg, flag);
21614 		} else {
21615 			err = sr_change_speed(dev, cmd, arg, flag);
21616 		}
21617 		break;
21618 
21619 	case CDROMCDDA:
21620 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMCDDA\n");
21621 		if (!ISCD(un)) {
21622 			err = ENOTTY;
21623 		} else {
21624 			err = sr_read_cdda(dev, (void *)arg, flag);
21625 		}
21626 		break;
21627 
21628 	case CDROMCDXA:
21629 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMCDXA\n");
21630 		if (!ISCD(un)) {
21631 			err = ENOTTY;
21632 		} else {
21633 			err = sr_read_cdxa(dev, (caddr_t)arg, flag);
21634 		}
21635 		break;
21636 
21637 	case CDROMSUBCODE:
21638 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSUBCODE\n");
21639 		if (!ISCD(un)) {
21640 			err = ENOTTY;
21641 		} else {
21642 			err = sr_read_all_subcodes(dev, (caddr_t)arg, flag);
21643 		}
21644 		break;
21645 
21646 	case DKIOCPARTINFO: {
21647 		/*
21648 		 * Return parameters describing the selected disk slice.
21649 		 * Note: this ioctl is for the intel platform only
21650 		 */
21651 #if defined(__i386) || defined(__amd64)
21652 		int part;
21653 
21654 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCPARTINFO\n");
21655 		part = SDPART(dev);
21656 
21657 		/* don't check un_solaris_size for pN */
21658 		if (part < P0_RAW_DISK && un->un_solaris_size == 0) {
21659 			err = EIO;
21660 		} else {
21661 			struct part_info p;
21662 
21663 			p.p_start = (daddr_t)un->un_offset[part];
21664 			p.p_length = (int)un->un_map[part].dkl_nblk;
21665 #ifdef _MULTI_DATAMODEL
21666 			switch (ddi_model_convert_from(flag & FMODELS)) {
21667 			case DDI_MODEL_ILP32:
21668 			{
21669 				struct part_info32 p32;
21670 
21671 				p32.p_start = (daddr32_t)p.p_start;
21672 				p32.p_length = p.p_length;
21673 				if (ddi_copyout(&p32, (void *)arg,
21674 				    sizeof (p32), flag))
21675 					err = EFAULT;
21676 				break;
21677 			}
21678 
21679 			case DDI_MODEL_NONE:
21680 			{
21681 				if (ddi_copyout(&p, (void *)arg, sizeof (p),
21682 				    flag))
21683 					err = EFAULT;
21684 				break;
21685 			}
21686 			}
21687 #else /* ! _MULTI_DATAMODEL */
21688 			if (ddi_copyout(&p, (void *)arg, sizeof (p), flag))
21689 				err = EFAULT;
21690 #endif /* _MULTI_DATAMODEL */
21691 		}
21692 #else
21693 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCPARTINFO\n");
21694 		err = ENOTTY;
21695 #endif
21696 		break;
21697 	}
21698 
21699 	case DKIOCG_PHYGEOM: {
21700 		/* Return the driver's notion of the media physical geometry */
21701 #if defined(__i386) || defined(__amd64)
21702 		struct dk_geom	disk_geom;
21703 		struct dk_geom	*dkgp = &disk_geom;
21704 
21705 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCG_PHYGEOM\n");
21706 		mutex_enter(SD_MUTEX(un));
21707 
21708 		if (un->un_g.dkg_nhead != 0 &&
21709 		    un->un_g.dkg_nsect != 0) {
21710 			/*
21711 			 * We succeeded in getting a geometry, but
21712 			 * right now it is being reported as just the
21713 			 * Solaris fdisk partition, just like for
21714 			 * DKIOCGGEOM. We need to change that to be
21715 			 * correct for the entire disk now.
21716 			 */
21717 			bcopy(&un->un_g, dkgp, sizeof (*dkgp));
21718 			dkgp->dkg_acyl = 0;
21719 			dkgp->dkg_ncyl = un->un_blockcount /
21720 			    (dkgp->dkg_nhead * dkgp->dkg_nsect);
21721 		} else {
21722 			bzero(dkgp, sizeof (struct dk_geom));
21723 			/*
21724 			 * This disk does not have a Solaris VTOC
21725 			 * so we must present a physical geometry
21726 			 * that will remain consistent regardless
21727 			 * of how the disk is used. This will ensure
21728 			 * that the geometry does not change regardless
21729 			 * of the fdisk partition type (ie. EFI, FAT32,
21730 			 * Solaris, etc).
21731 			 */
21732 			if (ISCD(un)) {
21733 				dkgp->dkg_nhead = un->un_pgeom.g_nhead;
21734 				dkgp->dkg_nsect = un->un_pgeom.g_nsect;
21735 				dkgp->dkg_ncyl = un->un_pgeom.g_ncyl;
21736 				dkgp->dkg_acyl = un->un_pgeom.g_acyl;
21737 			} else {
21738 				/*
21739 				 * Invalid un_blockcount can generate invalid
21740 				 * dk_geom and may result in division by zero
21741 				 * system failure. Should make sure blockcount
21742 				 * is valid before using it here.
21743 				 */
21744 				if (un->un_f_blockcount_is_valid == FALSE) {
21745 					mutex_exit(SD_MUTEX(un));
21746 					err = EIO;
21747 
21748 					break;
21749 				}
21750 				sd_convert_geometry(un->un_blockcount, dkgp);
21751 				dkgp->dkg_acyl = 0;
21752 				dkgp->dkg_ncyl = un->un_blockcount /
21753 				    (dkgp->dkg_nhead * dkgp->dkg_nsect);
21754 			}
21755 		}
21756 		dkgp->dkg_pcyl = dkgp->dkg_ncyl + dkgp->dkg_acyl;
21757 
21758 		if (ddi_copyout(dkgp, (void *)arg,
21759 		    sizeof (struct dk_geom), flag)) {
21760 			mutex_exit(SD_MUTEX(un));
21761 			err = EFAULT;
21762 		} else {
21763 			mutex_exit(SD_MUTEX(un));
21764 			err = 0;
21765 		}
21766 #else
21767 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCG_PHYGEOM\n");
21768 		err = ENOTTY;
21769 #endif
21770 		break;
21771 	}
21772 
21773 	case DKIOCG_VIRTGEOM: {
21774 		/* Return the driver's notion of the media's logical geometry */
21775 #if defined(__i386) || defined(__amd64)
21776 		struct dk_geom	disk_geom;
21777 		struct dk_geom	*dkgp = &disk_geom;
21778 
21779 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCG_VIRTGEOM\n");
21780 		mutex_enter(SD_MUTEX(un));
21781 		/*
21782 		 * If there is no HBA geometry available, or
21783 		 * if the HBA returned us something that doesn't
21784 		 * really fit into an Int 13/function 8 geometry
21785 		 * result, just fail the ioctl.  See PSARC 1998/313.
21786 		 */
21787 		if (un->un_lgeom.g_nhead == 0 ||
21788 		    un->un_lgeom.g_nsect == 0 ||
21789 		    un->un_lgeom.g_ncyl > 1024) {
21790 			mutex_exit(SD_MUTEX(un));
21791 			err = EINVAL;
21792 		} else {
21793 			dkgp->dkg_ncyl	= un->un_lgeom.g_ncyl;
21794 			dkgp->dkg_acyl	= un->un_lgeom.g_acyl;
21795 			dkgp->dkg_pcyl	= dkgp->dkg_ncyl + dkgp->dkg_acyl;
21796 			dkgp->dkg_nhead	= un->un_lgeom.g_nhead;
21797 			dkgp->dkg_nsect	= un->un_lgeom.g_nsect;
21798 
21799 			if (ddi_copyout(dkgp, (void *)arg,
21800 			    sizeof (struct dk_geom), flag)) {
21801 				mutex_exit(SD_MUTEX(un));
21802 				err = EFAULT;
21803 			} else {
21804 				mutex_exit(SD_MUTEX(un));
21805 				err = 0;
21806 			}
21807 		}
21808 #else
21809 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCG_VIRTGEOM\n");
21810 		err = ENOTTY;
21811 #endif
21812 		break;
21813 	}
21814 #ifdef SDDEBUG
21815 /* RESET/ABORTS testing ioctls */
21816 	case DKIOCRESET: {
21817 		int	reset_level;
21818 
21819 		if (ddi_copyin((void *)arg, &reset_level, sizeof (int), flag)) {
21820 			err = EFAULT;
21821 		} else {
21822 			SD_INFO(SD_LOG_IOCTL, un, "sdioctl: DKIOCRESET: "
21823 			    "reset_level = 0x%lx\n", reset_level);
21824 			if (scsi_reset(SD_ADDRESS(un), reset_level)) {
21825 				err = 0;
21826 			} else {
21827 				err = EIO;
21828 			}
21829 		}
21830 		break;
21831 	}
21832 
21833 	case DKIOCABORT:
21834 		SD_INFO(SD_LOG_IOCTL, un, "sdioctl: DKIOCABORT:\n");
21835 		if (scsi_abort(SD_ADDRESS(un), NULL)) {
21836 			err = 0;
21837 		} else {
21838 			err = EIO;
21839 		}
21840 		break;
21841 #endif
21842 
21843 #ifdef SD_FAULT_INJECTION
21844 /* SDIOC FaultInjection testing ioctls */
21845 	case SDIOCSTART:
21846 	case SDIOCSTOP:
21847 	case SDIOCINSERTPKT:
21848 	case SDIOCINSERTXB:
21849 	case SDIOCINSERTUN:
21850 	case SDIOCINSERTARQ:
21851 	case SDIOCPUSH:
21852 	case SDIOCRETRIEVE:
21853 	case SDIOCRUN:
21854 		SD_INFO(SD_LOG_SDTEST, un, "sdioctl:"
21855 		    "SDIOC detected cmd:0x%X:\n", cmd);
21856 		/* call error generator */
21857 		sd_faultinjection_ioctl(cmd, arg, un);
21858 		err = 0;
21859 		break;
21860 
21861 #endif /* SD_FAULT_INJECTION */
21862 
21863 	case DKIOCFLUSHWRITECACHE:
21864 		{
21865 			struct dk_callback *dkc = (struct dk_callback *)arg;
21866 
21867 			mutex_enter(SD_MUTEX(un));
21868 			if (!un->un_f_sync_cache_supported ||
21869 			    !un->un_f_write_cache_enabled) {
21870 				err = un->un_f_sync_cache_supported ?
21871 					0 : ENOTSUP;
21872 				mutex_exit(SD_MUTEX(un));
21873 				if ((flag & FKIOCTL) && dkc != NULL &&
21874 				    dkc->dkc_callback != NULL) {
21875 					(*dkc->dkc_callback)(dkc->dkc_cookie,
21876 					    err);
21877 					/*
21878 					 * Did callback and reported error.
21879 					 * Since we did a callback, ioctl
21880 					 * should return 0.
21881 					 */
21882 					err = 0;
21883 				}
21884 				break;
21885 			}
21886 			mutex_exit(SD_MUTEX(un));
21887 
21888 			if ((flag & FKIOCTL) && dkc != NULL &&
21889 			    dkc->dkc_callback != NULL) {
21890 				/* async SYNC CACHE request */
21891 				err = sd_send_scsi_SYNCHRONIZE_CACHE(un, dkc);
21892 			} else {
21893 				/* synchronous SYNC CACHE request */
21894 				err = sd_send_scsi_SYNCHRONIZE_CACHE(un, NULL);
21895 			}
21896 		}
21897 		break;
21898 
21899 	case DKIOCGETWCE: {
21900 
21901 		int wce;
21902 
21903 		if ((err = sd_get_write_cache_enabled(un, &wce)) != 0) {
21904 			break;
21905 		}
21906 
21907 		if (ddi_copyout(&wce, (void *)arg, sizeof (wce), flag)) {
21908 			err = EFAULT;
21909 		}
21910 		break;
21911 	}
21912 
21913 	case DKIOCSETWCE: {
21914 
21915 		int wce, sync_supported;
21916 
21917 		if (ddi_copyin((void *)arg, &wce, sizeof (wce), flag)) {
21918 			err = EFAULT;
21919 			break;
21920 		}
21921 
21922 		/*
21923 		 * Synchronize multiple threads trying to enable
21924 		 * or disable the cache via the un_f_wcc_cv
21925 		 * condition variable.
21926 		 */
21927 		mutex_enter(SD_MUTEX(un));
21928 
21929 		/*
21930 		 * Don't allow the cache to be enabled if the
21931 		 * config file has it disabled.
21932 		 */
21933 		if (un->un_f_opt_disable_cache && wce) {
21934 			mutex_exit(SD_MUTEX(un));
21935 			err = EINVAL;
21936 			break;
21937 		}
21938 
21939 		/*
21940 		 * Wait for write cache change in progress
21941 		 * bit to be clear before proceeding.
21942 		 */
21943 		while (un->un_f_wcc_inprog)
21944 			cv_wait(&un->un_wcc_cv, SD_MUTEX(un));
21945 
21946 		un->un_f_wcc_inprog = 1;
21947 
21948 		if (un->un_f_write_cache_enabled && wce == 0) {
21949 			/*
21950 			 * Disable the write cache.  Don't clear
21951 			 * un_f_write_cache_enabled until after
21952 			 * the mode select and flush are complete.
21953 			 */
21954 			sync_supported = un->un_f_sync_cache_supported;
21955 			mutex_exit(SD_MUTEX(un));
21956 			if ((err = sd_cache_control(un, SD_CACHE_NOCHANGE,
21957 			    SD_CACHE_DISABLE)) == 0 && sync_supported) {
21958 				err = sd_send_scsi_SYNCHRONIZE_CACHE(un, NULL);
21959 			}
21960 
21961 			mutex_enter(SD_MUTEX(un));
21962 			if (err == 0) {
21963 				un->un_f_write_cache_enabled = 0;
21964 			}
21965 
21966 		} else if (!un->un_f_write_cache_enabled && wce != 0) {
21967 			/*
21968 			 * Set un_f_write_cache_enabled first, so there is
21969 			 * no window where the cache is enabled, but the
21970 			 * bit says it isn't.
21971 			 */
21972 			un->un_f_write_cache_enabled = 1;
21973 			mutex_exit(SD_MUTEX(un));
21974 
21975 			err = sd_cache_control(un, SD_CACHE_NOCHANGE,
21976 				SD_CACHE_ENABLE);
21977 
21978 			mutex_enter(SD_MUTEX(un));
21979 
21980 			if (err) {
21981 				un->un_f_write_cache_enabled = 0;
21982 			}
21983 		}
21984 
21985 		un->un_f_wcc_inprog = 0;
21986 		cv_broadcast(&un->un_wcc_cv);
21987 		mutex_exit(SD_MUTEX(un));
21988 		break;
21989 	}
21990 
21991 	default:
21992 		err = ENOTTY;
21993 		break;
21994 	}
21995 	mutex_enter(SD_MUTEX(un));
21996 	un->un_ncmds_in_driver--;
21997 	ASSERT(un->un_ncmds_in_driver >= 0);
21998 	mutex_exit(SD_MUTEX(un));
21999 
22000 	SD_TRACE(SD_LOG_IOCTL, un, "sdioctl: exit: %d\n", err);
22001 	return (err);
22002 }
22003 
22004 
22005 /*
22006  *    Function: sd_uscsi_ioctl
22007  *
22008  * Description: This routine is the driver entry point for handling USCSI ioctl
22009  *		requests (USCSICMD).
22010  *
22011  *   Arguments: dev	- the device number
22012  *		arg	- user provided scsi command
22013  *		flag	- this argument is a pass through to ddi_copyxxx()
22014  *			  directly from the mode argument of ioctl().
22015  *
22016  * Return Code: code returned by sd_send_scsi_cmd
22017  *		ENXIO
22018  *		EFAULT
22019  *		EAGAIN
22020  */
22021 
22022 static int
22023 sd_uscsi_ioctl(dev_t dev, caddr_t arg, int flag)
22024 {
22025 #ifdef _MULTI_DATAMODEL
22026 	/*
22027 	 * For use when a 32 bit app makes a call into a
22028 	 * 64 bit ioctl
22029 	 */
22030 	struct uscsi_cmd32	uscsi_cmd_32_for_64;
22031 	struct uscsi_cmd32	*ucmd32 = &uscsi_cmd_32_for_64;
22032 	model_t			model;
22033 #endif /* _MULTI_DATAMODEL */
22034 	struct uscsi_cmd	*scmd = NULL;
22035 	struct sd_lun		*un = NULL;
22036 	enum uio_seg		uioseg;
22037 	char			cdb[CDB_GROUP0];
22038 	int			rval = 0;
22039 
22040 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
22041 		return (ENXIO);
22042 	}
22043 
22044 	SD_TRACE(SD_LOG_IOCTL, un, "sd_uscsi_ioctl: entry: un:0x%p\n", un);
22045 
22046 	scmd = (struct uscsi_cmd *)
22047 	    kmem_zalloc(sizeof (struct uscsi_cmd), KM_SLEEP);
22048 
22049 #ifdef _MULTI_DATAMODEL
22050 	switch (model = ddi_model_convert_from(flag & FMODELS)) {
22051 	case DDI_MODEL_ILP32:
22052 	{
22053 		if (ddi_copyin((void *)arg, ucmd32, sizeof (*ucmd32), flag)) {
22054 			rval = EFAULT;
22055 			goto done;
22056 		}
22057 		/*
22058 		 * Convert the ILP32 uscsi data from the
22059 		 * application to LP64 for internal use.
22060 		 */
22061 		uscsi_cmd32touscsi_cmd(ucmd32, scmd);
22062 		break;
22063 	}
22064 	case DDI_MODEL_NONE:
22065 		if (ddi_copyin((void *)arg, scmd, sizeof (*scmd), flag)) {
22066 			rval = EFAULT;
22067 			goto done;
22068 		}
22069 		break;
22070 	}
22071 #else /* ! _MULTI_DATAMODEL */
22072 	if (ddi_copyin((void *)arg, scmd, sizeof (*scmd), flag)) {
22073 		rval = EFAULT;
22074 		goto done;
22075 	}
22076 #endif /* _MULTI_DATAMODEL */
22077 
22078 	scmd->uscsi_flags &= ~USCSI_NOINTR;
22079 	uioseg = (flag & FKIOCTL) ? UIO_SYSSPACE : UIO_USERSPACE;
22080 	if (un->un_f_format_in_progress == TRUE) {
22081 		rval = EAGAIN;
22082 		goto done;
22083 	}
22084 
22085 	/*
22086 	 * Gotta do the ddi_copyin() here on the uscsi_cdb so that
22087 	 * we will have a valid cdb[0] to test.
22088 	 */
22089 	if ((ddi_copyin(scmd->uscsi_cdb, cdb, CDB_GROUP0, flag) == 0) &&
22090 	    (cdb[0] == SCMD_FORMAT)) {
22091 		SD_TRACE(SD_LOG_IOCTL, un,
22092 		    "sd_uscsi_ioctl: scmd->uscsi_cdb 0x%x\n", cdb[0]);
22093 		mutex_enter(SD_MUTEX(un));
22094 		un->un_f_format_in_progress = TRUE;
22095 		mutex_exit(SD_MUTEX(un));
22096 		rval = sd_send_scsi_cmd(dev, scmd, uioseg, uioseg, uioseg,
22097 		    SD_PATH_STANDARD);
22098 		mutex_enter(SD_MUTEX(un));
22099 		un->un_f_format_in_progress = FALSE;
22100 		mutex_exit(SD_MUTEX(un));
22101 	} else {
22102 		SD_TRACE(SD_LOG_IOCTL, un,
22103 		    "sd_uscsi_ioctl: scmd->uscsi_cdb 0x%x\n", cdb[0]);
22104 		/*
22105 		 * It's OK to fall into here even if the ddi_copyin()
22106 		 * on the uscsi_cdb above fails, because sd_send_scsi_cmd()
22107 		 * does this same copyin and will return the EFAULT
22108 		 * if it fails.
22109 		 */
22110 		rval = sd_send_scsi_cmd(dev, scmd, uioseg, uioseg, uioseg,
22111 		    SD_PATH_STANDARD);
22112 	}
22113 #ifdef _MULTI_DATAMODEL
22114 	switch (model) {
22115 	case DDI_MODEL_ILP32:
22116 		/*
22117 		 * Convert back to ILP32 before copyout to the
22118 		 * application
22119 		 */
22120 		uscsi_cmdtouscsi_cmd32(scmd, ucmd32);
22121 		if (ddi_copyout(ucmd32, (void *)arg, sizeof (*ucmd32), flag)) {
22122 			if (rval != 0) {
22123 				rval = EFAULT;
22124 			}
22125 		}
22126 		break;
22127 	case DDI_MODEL_NONE:
22128 		if (ddi_copyout(scmd, (void *)arg, sizeof (*scmd), flag)) {
22129 			if (rval != 0) {
22130 				rval = EFAULT;
22131 			}
22132 		}
22133 		break;
22134 	}
22135 #else /* ! _MULTI_DATAMODE */
22136 	if (ddi_copyout(scmd, (void *)arg, sizeof (*scmd), flag)) {
22137 		if (rval != 0) {
22138 			rval = EFAULT;
22139 		}
22140 	}
22141 #endif /* _MULTI_DATAMODE */
22142 done:
22143 	kmem_free(scmd, sizeof (struct uscsi_cmd));
22144 
22145 	SD_TRACE(SD_LOG_IOCTL, un, "sd_uscsi_ioctl: exit: un:0x%p\n", un);
22146 
22147 	return (rval);
22148 }
22149 
22150 
22151 /*
22152  *    Function: sd_dkio_ctrl_info
22153  *
22154  * Description: This routine is the driver entry point for handling controller
22155  *		information ioctl requests (DKIOCINFO).
22156  *
22157  *   Arguments: dev  - the device number
22158  *		arg  - pointer to user provided dk_cinfo structure
22159  *		       specifying the controller type and attributes.
22160  *		flag - this argument is a pass through to ddi_copyxxx()
22161  *		       directly from the mode argument of ioctl().
22162  *
22163  * Return Code: 0
22164  *		EFAULT
22165  *		ENXIO
22166  */
22167 
22168 static int
22169 sd_dkio_ctrl_info(dev_t dev, caddr_t arg, int flag)
22170 {
22171 	struct sd_lun	*un = NULL;
22172 	struct dk_cinfo	*info;
22173 	dev_info_t	*pdip;
22174 	int		lun, tgt;
22175 
22176 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
22177 		return (ENXIO);
22178 	}
22179 
22180 	info = (struct dk_cinfo *)
22181 		kmem_zalloc(sizeof (struct dk_cinfo), KM_SLEEP);
22182 
22183 	switch (un->un_ctype) {
22184 	case CTYPE_CDROM:
22185 		info->dki_ctype = DKC_CDROM;
22186 		break;
22187 	default:
22188 		info->dki_ctype = DKC_SCSI_CCS;
22189 		break;
22190 	}
22191 	pdip = ddi_get_parent(SD_DEVINFO(un));
22192 	info->dki_cnum = ddi_get_instance(pdip);
22193 	if (strlen(ddi_get_name(pdip)) < DK_DEVLEN) {
22194 		(void) strcpy(info->dki_cname, ddi_get_name(pdip));
22195 	} else {
22196 		(void) strncpy(info->dki_cname, ddi_node_name(pdip),
22197 		    DK_DEVLEN - 1);
22198 	}
22199 
22200 	lun = ddi_prop_get_int(DDI_DEV_T_ANY, SD_DEVINFO(un),
22201 	    DDI_PROP_DONTPASS, SCSI_ADDR_PROP_LUN, 0);
22202 	tgt = ddi_prop_get_int(DDI_DEV_T_ANY, SD_DEVINFO(un),
22203 	    DDI_PROP_DONTPASS, SCSI_ADDR_PROP_TARGET, 0);
22204 
22205 	/* Unit Information */
22206 	info->dki_unit = ddi_get_instance(SD_DEVINFO(un));
22207 	info->dki_slave = ((tgt << 3) | lun);
22208 	(void) strncpy(info->dki_dname, ddi_driver_name(SD_DEVINFO(un)),
22209 	    DK_DEVLEN - 1);
22210 	info->dki_flags = DKI_FMTVOL;
22211 	info->dki_partition = SDPART(dev);
22212 
22213 	/* Max Transfer size of this device in blocks */
22214 	info->dki_maxtransfer = un->un_max_xfer_size / un->un_sys_blocksize;
22215 	info->dki_addr = 0;
22216 	info->dki_space = 0;
22217 	info->dki_prio = 0;
22218 	info->dki_vec = 0;
22219 
22220 	if (ddi_copyout(info, arg, sizeof (struct dk_cinfo), flag) != 0) {
22221 		kmem_free(info, sizeof (struct dk_cinfo));
22222 		return (EFAULT);
22223 	} else {
22224 		kmem_free(info, sizeof (struct dk_cinfo));
22225 		return (0);
22226 	}
22227 }
22228 
22229 
22230 /*
22231  *    Function: sd_get_media_info
22232  *
22233  * Description: This routine is the driver entry point for handling ioctl
22234  *		requests for the media type or command set profile used by the
22235  *		drive to operate on the media (DKIOCGMEDIAINFO).
22236  *
22237  *   Arguments: dev	- the device number
22238  *		arg	- pointer to user provided dk_minfo structure
22239  *			  specifying the media type, logical block size and
22240  *			  drive capacity.
22241  *		flag	- this argument is a pass through to ddi_copyxxx()
22242  *			  directly from the mode argument of ioctl().
22243  *
22244  * Return Code: 0
22245  *		EACCESS
22246  *		EFAULT
22247  *		ENXIO
22248  *		EIO
22249  */
22250 
22251 static int
22252 sd_get_media_info(dev_t dev, caddr_t arg, int flag)
22253 {
22254 	struct sd_lun		*un = NULL;
22255 	struct uscsi_cmd	com;
22256 	struct scsi_inquiry	*sinq;
22257 	struct dk_minfo		media_info;
22258 	u_longlong_t		media_capacity;
22259 	uint64_t		capacity;
22260 	uint_t			lbasize;
22261 	uchar_t			*out_data;
22262 	uchar_t			*rqbuf;
22263 	int			rval = 0;
22264 	int			rtn;
22265 
22266 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
22267 	    (un->un_state == SD_STATE_OFFLINE)) {
22268 		return (ENXIO);
22269 	}
22270 
22271 	SD_TRACE(SD_LOG_IOCTL_DKIO, un, "sd_get_media_info: entry\n");
22272 
22273 	out_data = kmem_zalloc(SD_PROFILE_HEADER_LEN, KM_SLEEP);
22274 	rqbuf = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
22275 
22276 	/* Issue a TUR to determine if the drive is ready with media present */
22277 	rval = sd_send_scsi_TEST_UNIT_READY(un, SD_CHECK_FOR_MEDIA);
22278 	if (rval == ENXIO) {
22279 		goto done;
22280 	}
22281 
22282 	/* Now get configuration data */
22283 	if (ISCD(un)) {
22284 		media_info.dki_media_type = DK_CDROM;
22285 
22286 		/* Allow SCMD_GET_CONFIGURATION to MMC devices only */
22287 		if (un->un_f_mmc_cap == TRUE) {
22288 			rtn = sd_send_scsi_GET_CONFIGURATION(un, &com, rqbuf,
22289 				SENSE_LENGTH, out_data, SD_PROFILE_HEADER_LEN);
22290 
22291 			if (rtn) {
22292 				/*
22293 				 * Failed for other than an illegal request
22294 				 * or command not supported
22295 				 */
22296 				if ((com.uscsi_status == STATUS_CHECK) &&
22297 				    (com.uscsi_rqstatus == STATUS_GOOD)) {
22298 					if ((rqbuf[2] != KEY_ILLEGAL_REQUEST) ||
22299 					    (rqbuf[12] != 0x20)) {
22300 						rval = EIO;
22301 						goto done;
22302 					}
22303 				}
22304 			} else {
22305 				/*
22306 				 * The GET CONFIGURATION command succeeded
22307 				 * so set the media type according to the
22308 				 * returned data
22309 				 */
22310 				media_info.dki_media_type = out_data[6];
22311 				media_info.dki_media_type <<= 8;
22312 				media_info.dki_media_type |= out_data[7];
22313 			}
22314 		}
22315 	} else {
22316 		/*
22317 		 * The profile list is not available, so we attempt to identify
22318 		 * the media type based on the inquiry data
22319 		 */
22320 		sinq = un->un_sd->sd_inq;
22321 		if (sinq->inq_qual == 0) {
22322 			/* This is a direct access device */
22323 			media_info.dki_media_type = DK_FIXED_DISK;
22324 
22325 			if ((bcmp(sinq->inq_vid, "IOMEGA", 6) == 0) ||
22326 			    (bcmp(sinq->inq_vid, "iomega", 6) == 0)) {
22327 				if ((bcmp(sinq->inq_pid, "ZIP", 3) == 0)) {
22328 					media_info.dki_media_type = DK_ZIP;
22329 				} else if (
22330 				    (bcmp(sinq->inq_pid, "jaz", 3) == 0)) {
22331 					media_info.dki_media_type = DK_JAZ;
22332 				}
22333 			}
22334 		} else {
22335 			/* Not a CD or direct access so return unknown media */
22336 			media_info.dki_media_type = DK_UNKNOWN;
22337 		}
22338 	}
22339 
22340 	/* Now read the capacity so we can provide the lbasize and capacity */
22341 	switch (sd_send_scsi_READ_CAPACITY(un, &capacity, &lbasize,
22342 	    SD_PATH_DIRECT)) {
22343 	case 0:
22344 		break;
22345 	case EACCES:
22346 		rval = EACCES;
22347 		goto done;
22348 	default:
22349 		rval = EIO;
22350 		goto done;
22351 	}
22352 
22353 	media_info.dki_lbsize = lbasize;
22354 	media_capacity = capacity;
22355 
22356 	/*
22357 	 * sd_send_scsi_READ_CAPACITY() reports capacity in
22358 	 * un->un_sys_blocksize chunks. So we need to convert it into
22359 	 * cap.lbasize chunks.
22360 	 */
22361 	media_capacity *= un->un_sys_blocksize;
22362 	media_capacity /= lbasize;
22363 	media_info.dki_capacity = media_capacity;
22364 
22365 	if (ddi_copyout(&media_info, arg, sizeof (struct dk_minfo), flag)) {
22366 		rval = EFAULT;
22367 		/* Put goto. Anybody might add some code below in future */
22368 		goto done;
22369 	}
22370 done:
22371 	kmem_free(out_data, SD_PROFILE_HEADER_LEN);
22372 	kmem_free(rqbuf, SENSE_LENGTH);
22373 	return (rval);
22374 }
22375 
22376 
22377 /*
22378  *    Function: sd_dkio_get_geometry
22379  *
22380  * Description: This routine is the driver entry point for handling user
22381  *		requests to get the device geometry (DKIOCGGEOM).
22382  *
22383  *   Arguments: dev  - the device number
22384  *		arg  - pointer to user provided dk_geom structure specifying
22385  *			the controller's notion of the current geometry.
22386  *		flag - this argument is a pass through to ddi_copyxxx()
22387  *		       directly from the mode argument of ioctl().
22388  *		geom_validated - flag indicating if the device geometry has been
22389  *				 previously validated in the sdioctl routine.
22390  *
22391  * Return Code: 0
22392  *		EFAULT
22393  *		ENXIO
22394  *		EIO
22395  */
22396 
22397 static int
22398 sd_dkio_get_geometry(dev_t dev, caddr_t arg, int flag, int geom_validated)
22399 {
22400 	struct sd_lun	*un = NULL;
22401 	struct dk_geom	*tmp_geom = NULL;
22402 	int		rval = 0;
22403 
22404 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
22405 		return (ENXIO);
22406 	}
22407 
22408 	if (geom_validated == FALSE) {
22409 		/*
22410 		 * sd_validate_geometry does not spin a disk up
22411 		 * if it was spun down. We need to make sure it
22412 		 * is ready.
22413 		 */
22414 		if ((rval = sd_send_scsi_TEST_UNIT_READY(un, 0)) != 0) {
22415 			return (rval);
22416 		}
22417 		mutex_enter(SD_MUTEX(un));
22418 		rval = sd_validate_geometry(un, SD_PATH_DIRECT);
22419 		mutex_exit(SD_MUTEX(un));
22420 	}
22421 	if (rval)
22422 		return (rval);
22423 
22424 	/*
22425 	 * It is possible that un_solaris_size is 0(uninitialized)
22426 	 * after sd_unit_attach. Reservation conflict may cause the
22427 	 * above situation. Thus, the zero check of un_solaris_size
22428 	 * should occur after the sd_validate_geometry() call.
22429 	 */
22430 #if defined(__i386) || defined(__amd64)
22431 	if (un->un_solaris_size == 0) {
22432 		return (EIO);
22433 	}
22434 #endif
22435 
22436 	/*
22437 	 * Make a local copy of the soft state geometry to avoid some potential
22438 	 * race conditions associated with holding the mutex and updating the
22439 	 * write_reinstruct value
22440 	 */
22441 	tmp_geom = kmem_zalloc(sizeof (struct dk_geom), KM_SLEEP);
22442 	mutex_enter(SD_MUTEX(un));
22443 	bcopy(&un->un_g, tmp_geom, sizeof (struct dk_geom));
22444 	mutex_exit(SD_MUTEX(un));
22445 
22446 	if (tmp_geom->dkg_write_reinstruct == 0) {
22447 		tmp_geom->dkg_write_reinstruct =
22448 		    (int)((int)(tmp_geom->dkg_nsect * tmp_geom->dkg_rpm *
22449 		    sd_rot_delay) / (int)60000);
22450 	}
22451 
22452 	rval = ddi_copyout(tmp_geom, (void *)arg, sizeof (struct dk_geom),
22453 	    flag);
22454 	if (rval != 0) {
22455 		rval = EFAULT;
22456 	}
22457 
22458 	kmem_free(tmp_geom, sizeof (struct dk_geom));
22459 	return (rval);
22460 
22461 }
22462 
22463 
22464 /*
22465  *    Function: sd_dkio_set_geometry
22466  *
22467  * Description: This routine is the driver entry point for handling user
22468  *		requests to set the device geometry (DKIOCSGEOM). The actual
22469  *		device geometry is not updated, just the driver "notion" of it.
22470  *
22471  *   Arguments: dev  - the device number
22472  *		arg  - pointer to user provided dk_geom structure used to set
22473  *			the controller's notion of the current geometry.
22474  *		flag - this argument is a pass through to ddi_copyxxx()
22475  *		       directly from the mode argument of ioctl().
22476  *
22477  * Return Code: 0
22478  *		EFAULT
22479  *		ENXIO
22480  *		EIO
22481  */
22482 
22483 static int
22484 sd_dkio_set_geometry(dev_t dev, caddr_t arg, int flag)
22485 {
22486 	struct sd_lun	*un = NULL;
22487 	struct dk_geom	*tmp_geom;
22488 	struct dk_map	*lp;
22489 	int		rval = 0;
22490 	int		i;
22491 
22492 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
22493 		return (ENXIO);
22494 	}
22495 
22496 	/*
22497 	 * Make sure there is no reservation conflict on the lun.
22498 	 */
22499 	if (sd_send_scsi_TEST_UNIT_READY(un, 0) == EACCES) {
22500 		return (EACCES);
22501 	}
22502 
22503 #if defined(__i386) || defined(__amd64)
22504 	if (un->un_solaris_size == 0) {
22505 		return (EIO);
22506 	}
22507 #endif
22508 
22509 	/*
22510 	 * We need to copy the user specified geometry into local
22511 	 * storage and then update the softstate. We don't want to hold
22512 	 * the mutex and copyin directly from the user to the soft state
22513 	 */
22514 	tmp_geom = (struct dk_geom *)
22515 	    kmem_zalloc(sizeof (struct dk_geom), KM_SLEEP);
22516 	rval = ddi_copyin(arg, tmp_geom, sizeof (struct dk_geom), flag);
22517 	if (rval != 0) {
22518 		kmem_free(tmp_geom, sizeof (struct dk_geom));
22519 		return (EFAULT);
22520 	}
22521 
22522 	mutex_enter(SD_MUTEX(un));
22523 	bcopy(tmp_geom, &un->un_g, sizeof (struct dk_geom));
22524 	for (i = 0; i < NDKMAP; i++) {
22525 		lp  = &un->un_map[i];
22526 		un->un_offset[i] =
22527 		    un->un_g.dkg_nhead * un->un_g.dkg_nsect * lp->dkl_cylno;
22528 #if defined(__i386) || defined(__amd64)
22529 		un->un_offset[i] += un->un_solaris_offset;
22530 #endif
22531 	}
22532 	un->un_f_geometry_is_valid = FALSE;
22533 	mutex_exit(SD_MUTEX(un));
22534 	kmem_free(tmp_geom, sizeof (struct dk_geom));
22535 
22536 	return (rval);
22537 }
22538 
22539 
22540 /*
22541  *    Function: sd_dkio_get_partition
22542  *
22543  * Description: This routine is the driver entry point for handling user
22544  *		requests to get the partition table (DKIOCGAPART).
22545  *
22546  *   Arguments: dev  - the device number
22547  *		arg  - pointer to user provided dk_allmap structure specifying
22548  *			the controller's notion of the current partition table.
22549  *		flag - this argument is a pass through to ddi_copyxxx()
22550  *		       directly from the mode argument of ioctl().
22551  *		geom_validated - flag indicating if the device geometry has been
22552  *				 previously validated in the sdioctl routine.
22553  *
22554  * Return Code: 0
22555  *		EFAULT
22556  *		ENXIO
22557  *		EIO
22558  */
22559 
22560 static int
22561 sd_dkio_get_partition(dev_t dev, caddr_t arg, int flag, int geom_validated)
22562 {
22563 	struct sd_lun	*un = NULL;
22564 	int		rval = 0;
22565 	int		size;
22566 
22567 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
22568 		return (ENXIO);
22569 	}
22570 
22571 	/*
22572 	 * Make sure the geometry is valid before getting the partition
22573 	 * information.
22574 	 */
22575 	mutex_enter(SD_MUTEX(un));
22576 	if (geom_validated == FALSE) {
22577 		/*
22578 		 * sd_validate_geometry does not spin a disk up
22579 		 * if it was spun down. We need to make sure it
22580 		 * is ready before validating the geometry.
22581 		 */
22582 		mutex_exit(SD_MUTEX(un));
22583 		if ((rval = sd_send_scsi_TEST_UNIT_READY(un, 0)) != 0) {
22584 			return (rval);
22585 		}
22586 		mutex_enter(SD_MUTEX(un));
22587 
22588 		if ((rval = sd_validate_geometry(un, SD_PATH_DIRECT)) != 0) {
22589 			mutex_exit(SD_MUTEX(un));
22590 			return (rval);
22591 		}
22592 	}
22593 	mutex_exit(SD_MUTEX(un));
22594 
22595 	/*
22596 	 * It is possible that un_solaris_size is 0(uninitialized)
22597 	 * after sd_unit_attach. Reservation conflict may cause the
22598 	 * above situation. Thus, the zero check of un_solaris_size
22599 	 * should occur after the sd_validate_geometry() call.
22600 	 */
22601 #if defined(__i386) || defined(__amd64)
22602 	if (un->un_solaris_size == 0) {
22603 		return (EIO);
22604 	}
22605 #endif
22606 
22607 #ifdef _MULTI_DATAMODEL
22608 	switch (ddi_model_convert_from(flag & FMODELS)) {
22609 	case DDI_MODEL_ILP32: {
22610 		struct dk_map32 dk_map32[NDKMAP];
22611 		int		i;
22612 
22613 		for (i = 0; i < NDKMAP; i++) {
22614 			dk_map32[i].dkl_cylno = un->un_map[i].dkl_cylno;
22615 			dk_map32[i].dkl_nblk  = un->un_map[i].dkl_nblk;
22616 		}
22617 		size = NDKMAP * sizeof (struct dk_map32);
22618 		rval = ddi_copyout(dk_map32, (void *)arg, size, flag);
22619 		if (rval != 0) {
22620 			rval = EFAULT;
22621 		}
22622 		break;
22623 	}
22624 	case DDI_MODEL_NONE:
22625 		size = NDKMAP * sizeof (struct dk_map);
22626 		rval = ddi_copyout(un->un_map, (void *)arg, size, flag);
22627 		if (rval != 0) {
22628 			rval = EFAULT;
22629 		}
22630 		break;
22631 	}
22632 #else /* ! _MULTI_DATAMODEL */
22633 	size = NDKMAP * sizeof (struct dk_map);
22634 	rval = ddi_copyout(un->un_map, (void *)arg, size, flag);
22635 	if (rval != 0) {
22636 		rval = EFAULT;
22637 	}
22638 #endif /* _MULTI_DATAMODEL */
22639 	return (rval);
22640 }
22641 
22642 
22643 /*
22644  *    Function: sd_dkio_set_partition
22645  *
22646  * Description: This routine is the driver entry point for handling user
22647  *		requests to set the partition table (DKIOCSAPART). The actual
22648  *		device partition is not updated.
22649  *
22650  *   Arguments: dev  - the device number
22651  *		arg  - pointer to user provided dk_allmap structure used to set
22652  *			the controller's notion of the partition table.
22653  *		flag - this argument is a pass through to ddi_copyxxx()
22654  *		       directly from the mode argument of ioctl().
22655  *
22656  * Return Code: 0
22657  *		EINVAL
22658  *		EFAULT
22659  *		ENXIO
22660  *		EIO
22661  */
22662 
22663 static int
22664 sd_dkio_set_partition(dev_t dev, caddr_t arg, int flag)
22665 {
22666 	struct sd_lun	*un = NULL;
22667 	struct dk_map	dk_map[NDKMAP];
22668 	struct dk_map	*lp;
22669 	int		rval = 0;
22670 	int		size;
22671 	int		i;
22672 #if defined(_SUNOS_VTOC_16)
22673 	struct dkl_partition	*vp;
22674 #endif
22675 
22676 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
22677 		return (ENXIO);
22678 	}
22679 
22680 	/*
22681 	 * Set the map for all logical partitions.  We lock
22682 	 * the priority just to make sure an interrupt doesn't
22683 	 * come in while the map is half updated.
22684 	 */
22685 	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::un_solaris_size))
22686 	mutex_enter(SD_MUTEX(un));
22687 	if (un->un_blockcount > DK_MAX_BLOCKS) {
22688 		mutex_exit(SD_MUTEX(un));
22689 		return (ENOTSUP);
22690 	}
22691 	mutex_exit(SD_MUTEX(un));
22692 
22693 	/*
22694 	 * Make sure there is no reservation conflict on the lun.
22695 	 */
22696 	if (sd_send_scsi_TEST_UNIT_READY(un, 0) == EACCES) {
22697 		return (EACCES);
22698 	}
22699 
22700 #if defined(__i386) || defined(__amd64)
22701 	if (un->un_solaris_size == 0) {
22702 		return (EIO);
22703 	}
22704 #endif
22705 
22706 #ifdef _MULTI_DATAMODEL
22707 	switch (ddi_model_convert_from(flag & FMODELS)) {
22708 	case DDI_MODEL_ILP32: {
22709 		struct dk_map32 dk_map32[NDKMAP];
22710 
22711 		size = NDKMAP * sizeof (struct dk_map32);
22712 		rval = ddi_copyin((void *)arg, dk_map32, size, flag);
22713 		if (rval != 0) {
22714 			return (EFAULT);
22715 		}
22716 		for (i = 0; i < NDKMAP; i++) {
22717 			dk_map[i].dkl_cylno = dk_map32[i].dkl_cylno;
22718 			dk_map[i].dkl_nblk  = dk_map32[i].dkl_nblk;
22719 		}
22720 		break;
22721 	}
22722 	case DDI_MODEL_NONE:
22723 		size = NDKMAP * sizeof (struct dk_map);
22724 		rval = ddi_copyin((void *)arg, dk_map, size, flag);
22725 		if (rval != 0) {
22726 			return (EFAULT);
22727 		}
22728 		break;
22729 	}
22730 #else /* ! _MULTI_DATAMODEL */
22731 	size = NDKMAP * sizeof (struct dk_map);
22732 	rval = ddi_copyin((void *)arg, dk_map, size, flag);
22733 	if (rval != 0) {
22734 		return (EFAULT);
22735 	}
22736 #endif /* _MULTI_DATAMODEL */
22737 
22738 	mutex_enter(SD_MUTEX(un));
22739 	/* Note: The size used in this bcopy is set based upon the data model */
22740 	bcopy(dk_map, un->un_map, size);
22741 #if defined(_SUNOS_VTOC_16)
22742 	vp = (struct dkl_partition *)&(un->un_vtoc);
22743 #endif	/* defined(_SUNOS_VTOC_16) */
22744 	for (i = 0; i < NDKMAP; i++) {
22745 		lp  = &un->un_map[i];
22746 		un->un_offset[i] =
22747 		    un->un_g.dkg_nhead * un->un_g.dkg_nsect * lp->dkl_cylno;
22748 #if defined(_SUNOS_VTOC_16)
22749 		vp->p_start = un->un_offset[i];
22750 		vp->p_size = lp->dkl_nblk;
22751 		vp++;
22752 #endif	/* defined(_SUNOS_VTOC_16) */
22753 #if defined(__i386) || defined(__amd64)
22754 		un->un_offset[i] += un->un_solaris_offset;
22755 #endif
22756 	}
22757 	mutex_exit(SD_MUTEX(un));
22758 	return (rval);
22759 }
22760 
22761 
22762 /*
22763  *    Function: sd_dkio_get_vtoc
22764  *
22765  * Description: This routine is the driver entry point for handling user
22766  *		requests to get the current volume table of contents
22767  *		(DKIOCGVTOC).
22768  *
22769  *   Arguments: dev  - the device number
22770  *		arg  - pointer to user provided vtoc structure specifying
22771  *			the current vtoc.
22772  *		flag - this argument is a pass through to ddi_copyxxx()
22773  *		       directly from the mode argument of ioctl().
22774  *		geom_validated - flag indicating if the device geometry has been
22775  *				 previously validated in the sdioctl routine.
22776  *
22777  * Return Code: 0
22778  *		EFAULT
22779  *		ENXIO
22780  *		EIO
22781  */
22782 
22783 static int
22784 sd_dkio_get_vtoc(dev_t dev, caddr_t arg, int flag, int geom_validated)
22785 {
22786 	struct sd_lun	*un = NULL;
22787 #if defined(_SUNOS_VTOC_8)
22788 	struct vtoc	user_vtoc;
22789 #endif	/* defined(_SUNOS_VTOC_8) */
22790 	int		rval = 0;
22791 
22792 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
22793 		return (ENXIO);
22794 	}
22795 
22796 	mutex_enter(SD_MUTEX(un));
22797 	if (geom_validated == FALSE) {
22798 		/*
22799 		 * sd_validate_geometry does not spin a disk up
22800 		 * if it was spun down. We need to make sure it
22801 		 * is ready.
22802 		 */
22803 		mutex_exit(SD_MUTEX(un));
22804 		if ((rval = sd_send_scsi_TEST_UNIT_READY(un, 0)) != 0) {
22805 			return (rval);
22806 		}
22807 		mutex_enter(SD_MUTEX(un));
22808 		if ((rval = sd_validate_geometry(un, SD_PATH_DIRECT)) != 0) {
22809 			mutex_exit(SD_MUTEX(un));
22810 			return (rval);
22811 		}
22812 	}
22813 
22814 #if defined(_SUNOS_VTOC_8)
22815 	sd_build_user_vtoc(un, &user_vtoc);
22816 	mutex_exit(SD_MUTEX(un));
22817 
22818 #ifdef _MULTI_DATAMODEL
22819 	switch (ddi_model_convert_from(flag & FMODELS)) {
22820 	case DDI_MODEL_ILP32: {
22821 		struct vtoc32 user_vtoc32;
22822 
22823 		vtoctovtoc32(user_vtoc, user_vtoc32);
22824 		if (ddi_copyout(&user_vtoc32, (void *)arg,
22825 		    sizeof (struct vtoc32), flag)) {
22826 			return (EFAULT);
22827 		}
22828 		break;
22829 	}
22830 
22831 	case DDI_MODEL_NONE:
22832 		if (ddi_copyout(&user_vtoc, (void *)arg,
22833 		    sizeof (struct vtoc), flag)) {
22834 			return (EFAULT);
22835 		}
22836 		break;
22837 	}
22838 #else /* ! _MULTI_DATAMODEL */
22839 	if (ddi_copyout(&user_vtoc, (void *)arg, sizeof (struct vtoc), flag)) {
22840 		return (EFAULT);
22841 	}
22842 #endif /* _MULTI_DATAMODEL */
22843 
22844 #elif defined(_SUNOS_VTOC_16)
22845 	mutex_exit(SD_MUTEX(un));
22846 
22847 #ifdef _MULTI_DATAMODEL
22848 	/*
22849 	 * The un_vtoc structure is a "struct dk_vtoc"  which is always
22850 	 * 32-bit to maintain compatibility with existing on-disk
22851 	 * structures.  Thus, we need to convert the structure when copying
22852 	 * it out to a datamodel-dependent "struct vtoc" in a 64-bit
22853 	 * program.  If the target is a 32-bit program, then no conversion
22854 	 * is necessary.
22855 	 */
22856 	/* LINTED: logical expression always true: op "||" */
22857 	ASSERT(sizeof (un->un_vtoc) == sizeof (struct vtoc32));
22858 	switch (ddi_model_convert_from(flag & FMODELS)) {
22859 	case DDI_MODEL_ILP32:
22860 		if (ddi_copyout(&(un->un_vtoc), (void *)arg,
22861 		    sizeof (un->un_vtoc), flag)) {
22862 			return (EFAULT);
22863 		}
22864 		break;
22865 
22866 	case DDI_MODEL_NONE: {
22867 		struct vtoc user_vtoc;
22868 
22869 		vtoc32tovtoc(un->un_vtoc, user_vtoc);
22870 		if (ddi_copyout(&user_vtoc, (void *)arg,
22871 		    sizeof (struct vtoc), flag)) {
22872 			return (EFAULT);
22873 		}
22874 		break;
22875 	}
22876 	}
22877 #else /* ! _MULTI_DATAMODEL */
22878 	if (ddi_copyout(&(un->un_vtoc), (void *)arg, sizeof (un->un_vtoc),
22879 	    flag)) {
22880 		return (EFAULT);
22881 	}
22882 #endif /* _MULTI_DATAMODEL */
22883 #else
22884 #error "No VTOC format defined."
22885 #endif
22886 
22887 	return (rval);
22888 }
22889 
22890 static int
22891 sd_dkio_get_efi(dev_t dev, caddr_t arg, int flag)
22892 {
22893 	struct sd_lun	*un = NULL;
22894 	dk_efi_t	user_efi;
22895 	int		rval = 0;
22896 	void		*buffer;
22897 
22898 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL)
22899 		return (ENXIO);
22900 
22901 	if (ddi_copyin(arg, &user_efi, sizeof (dk_efi_t), flag))
22902 		return (EFAULT);
22903 
22904 	user_efi.dki_data = (void *)(uintptr_t)user_efi.dki_data_64;
22905 
22906 	if ((user_efi.dki_length % un->un_tgt_blocksize) ||
22907 	    (user_efi.dki_length > un->un_max_xfer_size))
22908 		return (EINVAL);
22909 
22910 	buffer = kmem_alloc(user_efi.dki_length, KM_SLEEP);
22911 	rval = sd_send_scsi_READ(un, buffer, user_efi.dki_length,
22912 	    user_efi.dki_lba, SD_PATH_DIRECT);
22913 	if (rval == 0 && ddi_copyout(buffer, user_efi.dki_data,
22914 	    user_efi.dki_length, flag) != 0)
22915 		rval = EFAULT;
22916 
22917 	kmem_free(buffer, user_efi.dki_length);
22918 	return (rval);
22919 }
22920 
22921 /*
22922  *    Function: sd_build_user_vtoc
22923  *
22924  * Description: This routine populates a pass by reference variable with the
22925  *		current volume table of contents.
22926  *
22927  *   Arguments: un - driver soft state (unit) structure
22928  *		user_vtoc - pointer to vtoc structure to be populated
22929  */
22930 
22931 static void
22932 sd_build_user_vtoc(struct sd_lun *un, struct vtoc *user_vtoc)
22933 {
22934 	struct dk_map2		*lpart;
22935 	struct dk_map		*lmap;
22936 	struct partition	*vpart;
22937 	int			nblks;
22938 	int			i;
22939 
22940 	ASSERT(mutex_owned(SD_MUTEX(un)));
22941 
22942 	/*
22943 	 * Return vtoc structure fields in the provided VTOC area, addressed
22944 	 * by *vtoc.
22945 	 */
22946 	bzero(user_vtoc, sizeof (struct vtoc));
22947 	user_vtoc->v_bootinfo[0] = un->un_vtoc.v_bootinfo[0];
22948 	user_vtoc->v_bootinfo[1] = un->un_vtoc.v_bootinfo[1];
22949 	user_vtoc->v_bootinfo[2] = un->un_vtoc.v_bootinfo[2];
22950 	user_vtoc->v_sanity	= VTOC_SANE;
22951 	user_vtoc->v_version	= un->un_vtoc.v_version;
22952 	bcopy(un->un_vtoc.v_volume, user_vtoc->v_volume, LEN_DKL_VVOL);
22953 	user_vtoc->v_sectorsz = un->un_sys_blocksize;
22954 	user_vtoc->v_nparts = un->un_vtoc.v_nparts;
22955 	bcopy(un->un_vtoc.v_reserved, user_vtoc->v_reserved,
22956 	    sizeof (un->un_vtoc.v_reserved));
22957 	/*
22958 	 * Convert partitioning information.
22959 	 *
22960 	 * Note the conversion from starting cylinder number
22961 	 * to starting sector number.
22962 	 */
22963 	lmap = un->un_map;
22964 	lpart = (struct dk_map2 *)un->un_vtoc.v_part;
22965 	vpart = user_vtoc->v_part;
22966 
22967 	nblks = un->un_g.dkg_nsect * un->un_g.dkg_nhead;
22968 
22969 	for (i = 0; i < V_NUMPAR; i++) {
22970 		vpart->p_tag	= lpart->p_tag;
22971 		vpart->p_flag	= lpart->p_flag;
22972 		vpart->p_start	= lmap->dkl_cylno * nblks;
22973 		vpart->p_size	= lmap->dkl_nblk;
22974 		lmap++;
22975 		lpart++;
22976 		vpart++;
22977 
22978 		/* (4364927) */
22979 		user_vtoc->timestamp[i] = (time_t)un->un_vtoc.v_timestamp[i];
22980 	}
22981 
22982 	bcopy(un->un_asciilabel, user_vtoc->v_asciilabel, LEN_DKL_ASCII);
22983 }
22984 
22985 static int
22986 sd_dkio_partition(dev_t dev, caddr_t arg, int flag)
22987 {
22988 	struct sd_lun		*un = NULL;
22989 	struct partition64	p64;
22990 	int			rval = 0;
22991 	uint_t			nparts;
22992 	efi_gpe_t		*partitions;
22993 	efi_gpt_t		*buffer;
22994 	diskaddr_t		gpe_lba;
22995 
22996 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
22997 		return (ENXIO);
22998 	}
22999 
23000 	if (ddi_copyin((const void *)arg, &p64,
23001 	    sizeof (struct partition64), flag)) {
23002 		return (EFAULT);
23003 	}
23004 
23005 	buffer = kmem_alloc(EFI_MIN_ARRAY_SIZE, KM_SLEEP);
23006 	rval = sd_send_scsi_READ(un, buffer, DEV_BSIZE,
23007 		1, SD_PATH_DIRECT);
23008 	if (rval != 0)
23009 		goto done_error;
23010 
23011 	sd_swap_efi_gpt(buffer);
23012 
23013 	if ((rval = sd_validate_efi(buffer)) != 0)
23014 		goto done_error;
23015 
23016 	nparts = buffer->efi_gpt_NumberOfPartitionEntries;
23017 	gpe_lba = buffer->efi_gpt_PartitionEntryLBA;
23018 	if (p64.p_partno > nparts) {
23019 		/* couldn't find it */
23020 		rval = ESRCH;
23021 		goto done_error;
23022 	}
23023 	/*
23024 	 * if we're dealing with a partition that's out of the normal
23025 	 * 16K block, adjust accordingly
23026 	 */
23027 	gpe_lba += p64.p_partno / sizeof (efi_gpe_t);
23028 	rval = sd_send_scsi_READ(un, buffer, EFI_MIN_ARRAY_SIZE,
23029 			gpe_lba, SD_PATH_DIRECT);
23030 	if (rval) {
23031 		goto done_error;
23032 	}
23033 	partitions = (efi_gpe_t *)buffer;
23034 
23035 	sd_swap_efi_gpe(nparts, partitions);
23036 
23037 	partitions += p64.p_partno;
23038 	bcopy(&partitions->efi_gpe_PartitionTypeGUID, &p64.p_type,
23039 	    sizeof (struct uuid));
23040 	p64.p_start = partitions->efi_gpe_StartingLBA;
23041 	p64.p_size = partitions->efi_gpe_EndingLBA -
23042 			p64.p_start + 1;
23043 
23044 	if (ddi_copyout(&p64, (void *)arg, sizeof (struct partition64), flag))
23045 		rval = EFAULT;
23046 
23047 done_error:
23048 	kmem_free(buffer, EFI_MIN_ARRAY_SIZE);
23049 	return (rval);
23050 }
23051 
23052 
23053 /*
23054  *    Function: sd_dkio_set_vtoc
23055  *
23056  * Description: This routine is the driver entry point for handling user
23057  *		requests to set the current volume table of contents
23058  *		(DKIOCSVTOC).
23059  *
23060  *   Arguments: dev  - the device number
23061  *		arg  - pointer to user provided vtoc structure used to set the
23062  *			current vtoc.
23063  *		flag - this argument is a pass through to ddi_copyxxx()
23064  *		       directly from the mode argument of ioctl().
23065  *
23066  * Return Code: 0
23067  *		EFAULT
23068  *		ENXIO
23069  *		EINVAL
23070  *		ENOTSUP
23071  */
23072 
23073 static int
23074 sd_dkio_set_vtoc(dev_t dev, caddr_t arg, int flag)
23075 {
23076 	struct sd_lun	*un = NULL;
23077 	struct vtoc	user_vtoc;
23078 	int		rval = 0;
23079 
23080 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
23081 		return (ENXIO);
23082 	}
23083 
23084 #if defined(__i386) || defined(__amd64)
23085 	if (un->un_tgt_blocksize != un->un_sys_blocksize) {
23086 		return (EINVAL);
23087 	}
23088 #endif
23089 
23090 #ifdef _MULTI_DATAMODEL
23091 	switch (ddi_model_convert_from(flag & FMODELS)) {
23092 	case DDI_MODEL_ILP32: {
23093 		struct vtoc32 user_vtoc32;
23094 
23095 		if (ddi_copyin((const void *)arg, &user_vtoc32,
23096 		    sizeof (struct vtoc32), flag)) {
23097 			return (EFAULT);
23098 		}
23099 		vtoc32tovtoc(user_vtoc32, user_vtoc);
23100 		break;
23101 	}
23102 
23103 	case DDI_MODEL_NONE:
23104 		if (ddi_copyin((const void *)arg, &user_vtoc,
23105 		    sizeof (struct vtoc), flag)) {
23106 			return (EFAULT);
23107 		}
23108 		break;
23109 	}
23110 #else /* ! _MULTI_DATAMODEL */
23111 	if (ddi_copyin((const void *)arg, &user_vtoc,
23112 	    sizeof (struct vtoc), flag)) {
23113 		return (EFAULT);
23114 	}
23115 #endif /* _MULTI_DATAMODEL */
23116 
23117 	mutex_enter(SD_MUTEX(un));
23118 	if (un->un_blockcount > DK_MAX_BLOCKS) {
23119 		mutex_exit(SD_MUTEX(un));
23120 		return (ENOTSUP);
23121 	}
23122 	if (un->un_g.dkg_ncyl == 0) {
23123 		mutex_exit(SD_MUTEX(un));
23124 		return (EINVAL);
23125 	}
23126 
23127 	mutex_exit(SD_MUTEX(un));
23128 	sd_clear_efi(un);
23129 	ddi_remove_minor_node(SD_DEVINFO(un), "wd");
23130 	ddi_remove_minor_node(SD_DEVINFO(un), "wd,raw");
23131 	(void) ddi_create_minor_node(SD_DEVINFO(un), "h",
23132 	    S_IFBLK, (SDUNIT(dev) << SDUNIT_SHIFT) | WD_NODE,
23133 	    un->un_node_type, NULL);
23134 	(void) ddi_create_minor_node(SD_DEVINFO(un), "h,raw",
23135 	    S_IFCHR, (SDUNIT(dev) << SDUNIT_SHIFT) | WD_NODE,
23136 	    un->un_node_type, NULL);
23137 	mutex_enter(SD_MUTEX(un));
23138 
23139 	if ((rval = sd_build_label_vtoc(un, &user_vtoc)) == 0) {
23140 		if ((rval = sd_write_label(dev)) == 0) {
23141 			if ((rval = sd_validate_geometry(un, SD_PATH_DIRECT))
23142 			    != 0) {
23143 				SD_ERROR(SD_LOG_IOCTL_DKIO, un,
23144 				    "sd_dkio_set_vtoc: "
23145 				    "Failed validate geometry\n");
23146 			}
23147 		}
23148 	}
23149 
23150 	/*
23151 	 * If sd_build_label_vtoc, or sd_write_label failed above write the
23152 	 * devid anyway, what can it hurt? Also preserve the device id by
23153 	 * writing to the disk acyl for the case where a devid has been
23154 	 * fabricated.
23155 	 */
23156 	if (un->un_f_devid_supported &&
23157 	    (un->un_f_opt_fab_devid == TRUE)) {
23158 		if (un->un_devid == NULL) {
23159 			sd_register_devid(un, SD_DEVINFO(un),
23160 			    SD_TARGET_IS_UNRESERVED);
23161 		} else {
23162 			/*
23163 			 * The device id for this disk has been
23164 			 * fabricated. Fabricated device id's are
23165 			 * managed by storing them in the last 2
23166 			 * available sectors on the drive. The device
23167 			 * id must be preserved by writing it back out
23168 			 * to this location.
23169 			 */
23170 			if (sd_write_deviceid(un) != 0) {
23171 				ddi_devid_free(un->un_devid);
23172 				un->un_devid = NULL;
23173 			}
23174 		}
23175 	}
23176 	mutex_exit(SD_MUTEX(un));
23177 	return (rval);
23178 }
23179 
23180 
23181 /*
23182  *    Function: sd_build_label_vtoc
23183  *
23184  * Description: This routine updates the driver soft state current volume table
23185  *		of contents based on a user specified vtoc.
23186  *
23187  *   Arguments: un - driver soft state (unit) structure
23188  *		user_vtoc - pointer to vtoc structure specifying vtoc to be used
23189  *			    to update the driver soft state.
23190  *
23191  * Return Code: 0
23192  *		EINVAL
23193  */
23194 
23195 static int
23196 sd_build_label_vtoc(struct sd_lun *un, struct vtoc *user_vtoc)
23197 {
23198 	struct dk_map		*lmap;
23199 	struct partition	*vpart;
23200 	int			nblks;
23201 #if defined(_SUNOS_VTOC_8)
23202 	int			ncyl;
23203 	struct dk_map2		*lpart;
23204 #endif	/* defined(_SUNOS_VTOC_8) */
23205 	int			i;
23206 
23207 	ASSERT(mutex_owned(SD_MUTEX(un)));
23208 
23209 	/* Sanity-check the vtoc */
23210 	if (user_vtoc->v_sanity != VTOC_SANE ||
23211 	    user_vtoc->v_sectorsz != un->un_sys_blocksize ||
23212 	    user_vtoc->v_nparts != V_NUMPAR) {
23213 		return (EINVAL);
23214 	}
23215 
23216 	nblks = un->un_g.dkg_nsect * un->un_g.dkg_nhead;
23217 	if (nblks == 0) {
23218 		return (EINVAL);
23219 	}
23220 
23221 #if defined(_SUNOS_VTOC_8)
23222 	vpart = user_vtoc->v_part;
23223 	for (i = 0; i < V_NUMPAR; i++) {
23224 		if ((vpart->p_start % nblks) != 0) {
23225 			return (EINVAL);
23226 		}
23227 		ncyl = vpart->p_start / nblks;
23228 		ncyl += vpart->p_size / nblks;
23229 		if ((vpart->p_size % nblks) != 0) {
23230 			ncyl++;
23231 		}
23232 		if (ncyl > (int)un->un_g.dkg_ncyl) {
23233 			return (EINVAL);
23234 		}
23235 		vpart++;
23236 	}
23237 #endif	/* defined(_SUNOS_VTOC_8) */
23238 
23239 	/* Put appropriate vtoc structure fields into the disk label */
23240 #if defined(_SUNOS_VTOC_16)
23241 	/*
23242 	 * The vtoc is always a 32bit data structure to maintain the
23243 	 * on-disk format. Convert "in place" instead of bcopying it.
23244 	 */
23245 	vtoctovtoc32((*user_vtoc), (*((struct vtoc32 *)&(un->un_vtoc))));
23246 
23247 	/*
23248 	 * in the 16-slice vtoc, starting sectors are expressed in
23249 	 * numbers *relative* to the start of the Solaris fdisk partition.
23250 	 */
23251 	lmap = un->un_map;
23252 	vpart = user_vtoc->v_part;
23253 
23254 	for (i = 0; i < (int)user_vtoc->v_nparts; i++, lmap++, vpart++) {
23255 		lmap->dkl_cylno = vpart->p_start / nblks;
23256 		lmap->dkl_nblk = vpart->p_size;
23257 	}
23258 
23259 #elif defined(_SUNOS_VTOC_8)
23260 
23261 	un->un_vtoc.v_bootinfo[0] = (uint32_t)user_vtoc->v_bootinfo[0];
23262 	un->un_vtoc.v_bootinfo[1] = (uint32_t)user_vtoc->v_bootinfo[1];
23263 	un->un_vtoc.v_bootinfo[2] = (uint32_t)user_vtoc->v_bootinfo[2];
23264 
23265 	un->un_vtoc.v_sanity = (uint32_t)user_vtoc->v_sanity;
23266 	un->un_vtoc.v_version = (uint32_t)user_vtoc->v_version;
23267 
23268 	bcopy(user_vtoc->v_volume, un->un_vtoc.v_volume, LEN_DKL_VVOL);
23269 
23270 	un->un_vtoc.v_nparts = user_vtoc->v_nparts;
23271 
23272 	bcopy(user_vtoc->v_reserved, un->un_vtoc.v_reserved,
23273 	    sizeof (un->un_vtoc.v_reserved));
23274 
23275 	/*
23276 	 * Note the conversion from starting sector number
23277 	 * to starting cylinder number.
23278 	 * Return error if division results in a remainder.
23279 	 */
23280 	lmap = un->un_map;
23281 	lpart = un->un_vtoc.v_part;
23282 	vpart = user_vtoc->v_part;
23283 
23284 	for (i = 0; i < (int)user_vtoc->v_nparts; i++) {
23285 		lpart->p_tag  = vpart->p_tag;
23286 		lpart->p_flag = vpart->p_flag;
23287 		lmap->dkl_cylno = vpart->p_start / nblks;
23288 		lmap->dkl_nblk = vpart->p_size;
23289 
23290 		lmap++;
23291 		lpart++;
23292 		vpart++;
23293 
23294 		/* (4387723) */
23295 #ifdef _LP64
23296 		if (user_vtoc->timestamp[i] > TIME32_MAX) {
23297 			un->un_vtoc.v_timestamp[i] = TIME32_MAX;
23298 		} else {
23299 			un->un_vtoc.v_timestamp[i] = user_vtoc->timestamp[i];
23300 		}
23301 #else
23302 		un->un_vtoc.v_timestamp[i] = user_vtoc->timestamp[i];
23303 #endif
23304 	}
23305 
23306 	bcopy(user_vtoc->v_asciilabel, un->un_asciilabel, LEN_DKL_ASCII);
23307 #else
23308 #error "No VTOC format defined."
23309 #endif
23310 	return (0);
23311 }
23312 
23313 /*
23314  *    Function: sd_clear_efi
23315  *
23316  * Description: This routine clears all EFI labels.
23317  *
23318  *   Arguments: un - driver soft state (unit) structure
23319  *
23320  * Return Code: void
23321  */
23322 
23323 static void
23324 sd_clear_efi(struct sd_lun *un)
23325 {
23326 	efi_gpt_t	*gpt;
23327 	uint_t		lbasize;
23328 	uint64_t	cap;
23329 	int rval;
23330 
23331 	ASSERT(!mutex_owned(SD_MUTEX(un)));
23332 
23333 	gpt = kmem_alloc(sizeof (efi_gpt_t), KM_SLEEP);
23334 
23335 	if (sd_send_scsi_READ(un, gpt, DEV_BSIZE, 1, SD_PATH_DIRECT) != 0) {
23336 		goto done;
23337 	}
23338 
23339 	sd_swap_efi_gpt(gpt);
23340 	rval = sd_validate_efi(gpt);
23341 	if (rval == 0) {
23342 		/* clear primary */
23343 		bzero(gpt, sizeof (efi_gpt_t));
23344 		if ((rval = sd_send_scsi_WRITE(un, gpt, EFI_LABEL_SIZE, 1,
23345 			SD_PATH_DIRECT))) {
23346 			SD_INFO(SD_LOG_IO_PARTITION, un,
23347 				"sd_clear_efi: clear primary label failed\n");
23348 		}
23349 	}
23350 	/* the backup */
23351 	rval = sd_send_scsi_READ_CAPACITY(un, &cap, &lbasize,
23352 	    SD_PATH_DIRECT);
23353 	if (rval) {
23354 		goto done;
23355 	}
23356 	/*
23357 	 * The MMC standard allows READ CAPACITY to be
23358 	 * inaccurate by a bounded amount (in the interest of
23359 	 * response latency).  As a result, failed READs are
23360 	 * commonplace (due to the reading of metadata and not
23361 	 * data). Depending on the per-Vendor/drive Sense data,
23362 	 * the failed READ can cause many (unnecessary) retries.
23363 	 */
23364 	if ((rval = sd_send_scsi_READ(un, gpt, lbasize,
23365 	    cap - 1, ISCD(un) ? SD_PATH_DIRECT_PRIORITY :
23366 		SD_PATH_DIRECT)) != 0) {
23367 		goto done;
23368 	}
23369 	sd_swap_efi_gpt(gpt);
23370 	rval = sd_validate_efi(gpt);
23371 	if (rval == 0) {
23372 		/* clear backup */
23373 		SD_TRACE(SD_LOG_IOCTL, un, "sd_clear_efi clear backup@%lu\n",
23374 			cap-1);
23375 		bzero(gpt, sizeof (efi_gpt_t));
23376 		if ((rval = sd_send_scsi_WRITE(un, gpt, EFI_LABEL_SIZE,
23377 		    cap-1, SD_PATH_DIRECT))) {
23378 			SD_INFO(SD_LOG_IO_PARTITION, un,
23379 				"sd_clear_efi: clear backup label failed\n");
23380 		}
23381 	}
23382 
23383 done:
23384 	kmem_free(gpt, sizeof (efi_gpt_t));
23385 }
23386 
23387 /*
23388  *    Function: sd_set_vtoc
23389  *
23390  * Description: This routine writes data to the appropriate positions
23391  *
23392  *   Arguments: un - driver soft state (unit) structure
23393  *              dkl  - the data to be written
23394  *
23395  * Return: void
23396  */
23397 
23398 static int
23399 sd_set_vtoc(struct sd_lun *un, struct dk_label *dkl)
23400 {
23401 	void			*shadow_buf;
23402 	uint_t			label_addr;
23403 	int			sec;
23404 	int			blk;
23405 	int			head;
23406 	int			cyl;
23407 	int			rval;
23408 
23409 #if defined(__i386) || defined(__amd64)
23410 	label_addr = un->un_solaris_offset + DK_LABEL_LOC;
23411 #else
23412 	/* Write the primary label at block 0 of the solaris partition. */
23413 	label_addr = 0;
23414 #endif
23415 
23416 	if (NOT_DEVBSIZE(un)) {
23417 		shadow_buf = kmem_zalloc(un->un_tgt_blocksize, KM_SLEEP);
23418 		/*
23419 		 * Read the target's first block.
23420 		 */
23421 		if ((rval = sd_send_scsi_READ(un, shadow_buf,
23422 		    un->un_tgt_blocksize, label_addr,
23423 		    SD_PATH_STANDARD)) != 0) {
23424 			goto exit;
23425 		}
23426 		/*
23427 		 * Copy the contents of the label into the shadow buffer
23428 		 * which is of the size of target block size.
23429 		 */
23430 		bcopy(dkl, shadow_buf, sizeof (struct dk_label));
23431 	}
23432 
23433 	/* Write the primary label */
23434 	if (NOT_DEVBSIZE(un)) {
23435 		rval = sd_send_scsi_WRITE(un, shadow_buf, un->un_tgt_blocksize,
23436 		    label_addr, SD_PATH_STANDARD);
23437 	} else {
23438 		rval = sd_send_scsi_WRITE(un, dkl, un->un_sys_blocksize,
23439 		    label_addr, SD_PATH_STANDARD);
23440 	}
23441 	if (rval != 0) {
23442 		return (rval);
23443 	}
23444 
23445 	/*
23446 	 * Calculate where the backup labels go.  They are always on
23447 	 * the last alternate cylinder, but some older drives put them
23448 	 * on head 2 instead of the last head.	They are always on the
23449 	 * first 5 odd sectors of the appropriate track.
23450 	 *
23451 	 * We have no choice at this point, but to believe that the
23452 	 * disk label is valid.	 Use the geometry of the disk
23453 	 * as described in the label.
23454 	 */
23455 	cyl  = dkl->dkl_ncyl  + dkl->dkl_acyl - 1;
23456 	head = dkl->dkl_nhead - 1;
23457 
23458 	/*
23459 	 * Write and verify the backup labels. Make sure we don't try to
23460 	 * write past the last cylinder.
23461 	 */
23462 	for (sec = 1; ((sec < 5 * 2 + 1) && (sec < dkl->dkl_nsect)); sec += 2) {
23463 		blk = (daddr_t)(
23464 		    (cyl * ((dkl->dkl_nhead * dkl->dkl_nsect) - dkl->dkl_apc)) +
23465 		    (head * dkl->dkl_nsect) + sec);
23466 #if defined(__i386) || defined(__amd64)
23467 		blk += un->un_solaris_offset;
23468 #endif
23469 		if (NOT_DEVBSIZE(un)) {
23470 			uint64_t	tblk;
23471 			/*
23472 			 * Need to read the block first for read modify write.
23473 			 */
23474 			tblk = (uint64_t)blk;
23475 			blk = (int)((tblk * un->un_sys_blocksize) /
23476 			    un->un_tgt_blocksize);
23477 			if ((rval = sd_send_scsi_READ(un, shadow_buf,
23478 			    un->un_tgt_blocksize, blk,
23479 			    SD_PATH_STANDARD)) != 0) {
23480 				goto exit;
23481 			}
23482 			/*
23483 			 * Modify the shadow buffer with the label.
23484 			 */
23485 			bcopy(dkl, shadow_buf, sizeof (struct dk_label));
23486 			rval = sd_send_scsi_WRITE(un, shadow_buf,
23487 			    un->un_tgt_blocksize, blk, SD_PATH_STANDARD);
23488 		} else {
23489 			rval = sd_send_scsi_WRITE(un, dkl, un->un_sys_blocksize,
23490 			    blk, SD_PATH_STANDARD);
23491 			SD_INFO(SD_LOG_IO_PARTITION, un,
23492 			"sd_set_vtoc: wrote backup label %d\n", blk);
23493 		}
23494 		if (rval != 0) {
23495 			goto exit;
23496 		}
23497 	}
23498 exit:
23499 	if (NOT_DEVBSIZE(un)) {
23500 		kmem_free(shadow_buf, un->un_tgt_blocksize);
23501 	}
23502 	return (rval);
23503 }
23504 
23505 /*
23506  *    Function: sd_clear_vtoc
23507  *
23508  * Description: This routine clears out the VTOC labels.
23509  *
23510  *   Arguments: un - driver soft state (unit) structure
23511  *
23512  * Return: void
23513  */
23514 
23515 static void
23516 sd_clear_vtoc(struct sd_lun *un)
23517 {
23518 	struct dk_label		*dkl;
23519 
23520 	mutex_exit(SD_MUTEX(un));
23521 	dkl = kmem_zalloc(sizeof (struct dk_label), KM_SLEEP);
23522 	mutex_enter(SD_MUTEX(un));
23523 	/*
23524 	 * sd_set_vtoc uses these fields in order to figure out
23525 	 * where to overwrite the backup labels
23526 	 */
23527 	dkl->dkl_apc    = un->un_g.dkg_apc;
23528 	dkl->dkl_ncyl   = un->un_g.dkg_ncyl;
23529 	dkl->dkl_acyl   = un->un_g.dkg_acyl;
23530 	dkl->dkl_nhead  = un->un_g.dkg_nhead;
23531 	dkl->dkl_nsect  = un->un_g.dkg_nsect;
23532 	mutex_exit(SD_MUTEX(un));
23533 	(void) sd_set_vtoc(un, dkl);
23534 	kmem_free(dkl, sizeof (struct dk_label));
23535 
23536 	mutex_enter(SD_MUTEX(un));
23537 }
23538 
23539 /*
23540  *    Function: sd_write_label
23541  *
23542  * Description: This routine will validate and write the driver soft state vtoc
23543  *		contents to the device.
23544  *
23545  *   Arguments: dev - the device number
23546  *
23547  * Return Code: the code returned by sd_send_scsi_cmd()
23548  *		0
23549  *		EINVAL
23550  *		ENXIO
23551  *		ENOMEM
23552  */
23553 
23554 static int
23555 sd_write_label(dev_t dev)
23556 {
23557 	struct sd_lun		*un;
23558 	struct dk_label		*dkl;
23559 	short			sum;
23560 	short			*sp;
23561 	int			i;
23562 	int			rval;
23563 
23564 	if (((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) ||
23565 	    (un->un_state == SD_STATE_OFFLINE)) {
23566 		return (ENXIO);
23567 	}
23568 	ASSERT(mutex_owned(SD_MUTEX(un)));
23569 	mutex_exit(SD_MUTEX(un));
23570 	dkl = kmem_zalloc(sizeof (struct dk_label), KM_SLEEP);
23571 	mutex_enter(SD_MUTEX(un));
23572 
23573 	bcopy(&un->un_vtoc, &dkl->dkl_vtoc, sizeof (struct dk_vtoc));
23574 	dkl->dkl_rpm	= un->un_g.dkg_rpm;
23575 	dkl->dkl_pcyl	= un->un_g.dkg_pcyl;
23576 	dkl->dkl_apc	= un->un_g.dkg_apc;
23577 	dkl->dkl_intrlv = un->un_g.dkg_intrlv;
23578 	dkl->dkl_ncyl	= un->un_g.dkg_ncyl;
23579 	dkl->dkl_acyl	= un->un_g.dkg_acyl;
23580 	dkl->dkl_nhead	= un->un_g.dkg_nhead;
23581 	dkl->dkl_nsect	= un->un_g.dkg_nsect;
23582 
23583 #if defined(_SUNOS_VTOC_8)
23584 	dkl->dkl_obs1	= un->un_g.dkg_obs1;
23585 	dkl->dkl_obs2	= un->un_g.dkg_obs2;
23586 	dkl->dkl_obs3	= un->un_g.dkg_obs3;
23587 	for (i = 0; i < NDKMAP; i++) {
23588 		dkl->dkl_map[i].dkl_cylno = un->un_map[i].dkl_cylno;
23589 		dkl->dkl_map[i].dkl_nblk  = un->un_map[i].dkl_nblk;
23590 	}
23591 	bcopy(un->un_asciilabel, dkl->dkl_asciilabel, LEN_DKL_ASCII);
23592 #elif defined(_SUNOS_VTOC_16)
23593 	dkl->dkl_skew	= un->un_dkg_skew;
23594 #else
23595 #error "No VTOC format defined."
23596 #endif
23597 
23598 	dkl->dkl_magic			= DKL_MAGIC;
23599 	dkl->dkl_write_reinstruct	= un->un_g.dkg_write_reinstruct;
23600 	dkl->dkl_read_reinstruct	= un->un_g.dkg_read_reinstruct;
23601 
23602 	/* Construct checksum for the new disk label */
23603 	sum = 0;
23604 	sp = (short *)dkl;
23605 	i = sizeof (struct dk_label) / sizeof (short);
23606 	while (i--) {
23607 		sum ^= *sp++;
23608 	}
23609 	dkl->dkl_cksum = sum;
23610 
23611 	mutex_exit(SD_MUTEX(un));
23612 
23613 	rval = sd_set_vtoc(un, dkl);
23614 exit:
23615 	kmem_free(dkl, sizeof (struct dk_label));
23616 	mutex_enter(SD_MUTEX(un));
23617 	return (rval);
23618 }
23619 
23620 static int
23621 sd_dkio_set_efi(dev_t dev, caddr_t arg, int flag)
23622 {
23623 	struct sd_lun	*un = NULL;
23624 	dk_efi_t	user_efi;
23625 	int		rval = 0;
23626 	void		*buffer;
23627 
23628 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL)
23629 		return (ENXIO);
23630 
23631 	if (ddi_copyin(arg, &user_efi, sizeof (dk_efi_t), flag))
23632 		return (EFAULT);
23633 
23634 	user_efi.dki_data = (void *)(uintptr_t)user_efi.dki_data_64;
23635 
23636 	if ((user_efi.dki_length % un->un_tgt_blocksize) ||
23637 	    (user_efi.dki_length > un->un_max_xfer_size))
23638 		return (EINVAL);
23639 
23640 	buffer = kmem_alloc(user_efi.dki_length, KM_SLEEP);
23641 	if (ddi_copyin(user_efi.dki_data, buffer, user_efi.dki_length, flag)) {
23642 		rval = EFAULT;
23643 	} else {
23644 		/*
23645 		 * let's clear the vtoc labels and clear the softstate
23646 		 * vtoc.
23647 		 */
23648 		mutex_enter(SD_MUTEX(un));
23649 		if (un->un_vtoc.v_sanity == VTOC_SANE) {
23650 			SD_TRACE(SD_LOG_IO_PARTITION, un,
23651 				"sd_dkio_set_efi: CLEAR VTOC\n");
23652 			sd_clear_vtoc(un);
23653 			bzero(&un->un_vtoc, sizeof (struct dk_vtoc));
23654 			mutex_exit(SD_MUTEX(un));
23655 			ddi_remove_minor_node(SD_DEVINFO(un), "h");
23656 			ddi_remove_minor_node(SD_DEVINFO(un), "h,raw");
23657 			(void) ddi_create_minor_node(SD_DEVINFO(un), "wd",
23658 			    S_IFBLK,
23659 			    (SDUNIT(dev) << SDUNIT_SHIFT) | WD_NODE,
23660 			    un->un_node_type, NULL);
23661 			(void) ddi_create_minor_node(SD_DEVINFO(un), "wd,raw",
23662 			    S_IFCHR,
23663 			    (SDUNIT(dev) << SDUNIT_SHIFT) | WD_NODE,
23664 			    un->un_node_type, NULL);
23665 		} else
23666 			mutex_exit(SD_MUTEX(un));
23667 		rval = sd_send_scsi_WRITE(un, buffer, user_efi.dki_length,
23668 		    user_efi.dki_lba, SD_PATH_DIRECT);
23669 		if (rval == 0) {
23670 			mutex_enter(SD_MUTEX(un));
23671 			un->un_f_geometry_is_valid = FALSE;
23672 			mutex_exit(SD_MUTEX(un));
23673 		}
23674 	}
23675 	kmem_free(buffer, user_efi.dki_length);
23676 	return (rval);
23677 }
23678 
23679 /*
23680  *    Function: sd_dkio_get_mboot
23681  *
23682  * Description: This routine is the driver entry point for handling user
23683  *		requests to get the current device mboot (DKIOCGMBOOT)
23684  *
23685  *   Arguments: dev  - the device number
23686  *		arg  - pointer to user provided mboot structure specifying
23687  *			the current mboot.
23688  *		flag - this argument is a pass through to ddi_copyxxx()
23689  *		       directly from the mode argument of ioctl().
23690  *
23691  * Return Code: 0
23692  *		EINVAL
23693  *		EFAULT
23694  *		ENXIO
23695  */
23696 
23697 static int
23698 sd_dkio_get_mboot(dev_t dev, caddr_t arg, int flag)
23699 {
23700 	struct sd_lun	*un;
23701 	struct mboot	*mboot;
23702 	int		rval;
23703 	size_t		buffer_size;
23704 
23705 	if (((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) ||
23706 	    (un->un_state == SD_STATE_OFFLINE)) {
23707 		return (ENXIO);
23708 	}
23709 
23710 	if (!un->un_f_mboot_supported || arg == NULL) {
23711 		return (EINVAL);
23712 	}
23713 
23714 	/*
23715 	 * Read the mboot block, located at absolute block 0 on the target.
23716 	 */
23717 	buffer_size = SD_REQBYTES2TGTBYTES(un, sizeof (struct mboot));
23718 
23719 	SD_TRACE(SD_LOG_IO_PARTITION, un,
23720 	    "sd_dkio_get_mboot: allocation size: 0x%x\n", buffer_size);
23721 
23722 	mboot = kmem_zalloc(buffer_size, KM_SLEEP);
23723 	if ((rval = sd_send_scsi_READ(un, mboot, buffer_size, 0,
23724 	    SD_PATH_STANDARD)) == 0) {
23725 		if (ddi_copyout(mboot, (void *)arg,
23726 		    sizeof (struct mboot), flag) != 0) {
23727 			rval = EFAULT;
23728 		}
23729 	}
23730 	kmem_free(mboot, buffer_size);
23731 	return (rval);
23732 }
23733 
23734 
23735 /*
23736  *    Function: sd_dkio_set_mboot
23737  *
23738  * Description: This routine is the driver entry point for handling user
23739  *		requests to validate and set the device master boot
23740  *		(DKIOCSMBOOT).
23741  *
23742  *   Arguments: dev  - the device number
23743  *		arg  - pointer to user provided mboot structure used to set the
23744  *			master boot.
23745  *		flag - this argument is a pass through to ddi_copyxxx()
23746  *		       directly from the mode argument of ioctl().
23747  *
23748  * Return Code: 0
23749  *		EINVAL
23750  *		EFAULT
23751  *		ENXIO
23752  */
23753 
23754 static int
23755 sd_dkio_set_mboot(dev_t dev, caddr_t arg, int flag)
23756 {
23757 	struct sd_lun	*un = NULL;
23758 	struct mboot	*mboot = NULL;
23759 	int		rval;
23760 	ushort_t	magic;
23761 
23762 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
23763 		return (ENXIO);
23764 	}
23765 
23766 	ASSERT(!mutex_owned(SD_MUTEX(un)));
23767 
23768 	if (!un->un_f_mboot_supported) {
23769 		return (EINVAL);
23770 	}
23771 
23772 	if (arg == NULL) {
23773 		return (EINVAL);
23774 	}
23775 
23776 	mboot = kmem_zalloc(sizeof (struct mboot), KM_SLEEP);
23777 
23778 	if (ddi_copyin((const void *)arg, mboot,
23779 	    sizeof (struct mboot), flag) != 0) {
23780 		kmem_free(mboot, (size_t)(sizeof (struct mboot)));
23781 		return (EFAULT);
23782 	}
23783 
23784 	/* Is this really a master boot record? */
23785 	magic = LE_16(mboot->signature);
23786 	if (magic != MBB_MAGIC) {
23787 		kmem_free(mboot, (size_t)(sizeof (struct mboot)));
23788 		return (EINVAL);
23789 	}
23790 
23791 	rval = sd_send_scsi_WRITE(un, mboot, un->un_sys_blocksize, 0,
23792 	    SD_PATH_STANDARD);
23793 
23794 	mutex_enter(SD_MUTEX(un));
23795 #if defined(__i386) || defined(__amd64)
23796 	if (rval == 0) {
23797 		/*
23798 		 * mboot has been written successfully.
23799 		 * update the fdisk and vtoc tables in memory
23800 		 */
23801 		rval = sd_update_fdisk_and_vtoc(un);
23802 		if ((un->un_f_geometry_is_valid == FALSE) || (rval != 0)) {
23803 			mutex_exit(SD_MUTEX(un));
23804 			kmem_free(mboot, (size_t)(sizeof (struct mboot)));
23805 			return (rval);
23806 		}
23807 	}
23808 
23809 	/*
23810 	 * If the mboot write fails, write the devid anyway, what can it hurt?
23811 	 * Also preserve the device id by writing to the disk acyl for the case
23812 	 * where a devid has been fabricated.
23813 	 */
23814 	if (un->un_f_devid_supported && un->un_f_opt_fab_devid) {
23815 		if (un->un_devid == NULL) {
23816 			sd_register_devid(un, SD_DEVINFO(un),
23817 			    SD_TARGET_IS_UNRESERVED);
23818 		} else {
23819 			/*
23820 			 * The device id for this disk has been
23821 			 * fabricated. Fabricated device id's are
23822 			 * managed by storing them in the last 2
23823 			 * available sectors on the drive. The device
23824 			 * id must be preserved by writing it back out
23825 			 * to this location.
23826 			 */
23827 			if (sd_write_deviceid(un) != 0) {
23828 				ddi_devid_free(un->un_devid);
23829 				un->un_devid = NULL;
23830 			}
23831 		}
23832 	}
23833 
23834 #ifdef __lock_lint
23835 	sd_setup_default_geometry(un);
23836 #endif
23837 
23838 #else
23839 	if (rval == 0) {
23840 		/*
23841 		 * mboot has been written successfully.
23842 		 * set up the default geometry and VTOC
23843 		 */
23844 		if (un->un_blockcount <= DK_MAX_BLOCKS)
23845 			sd_setup_default_geometry(un);
23846 	}
23847 #endif
23848 	mutex_exit(SD_MUTEX(un));
23849 	kmem_free(mboot, (size_t)(sizeof (struct mboot)));
23850 	return (rval);
23851 }
23852 
23853 
23854 /*
23855  *    Function: sd_setup_default_geometry
23856  *
23857  * Description: This local utility routine sets the default geometry as part of
23858  *		setting the device mboot.
23859  *
23860  *   Arguments: un - driver soft state (unit) structure
23861  *
23862  * Note: This may be redundant with sd_build_default_label.
23863  */
23864 
23865 static void
23866 sd_setup_default_geometry(struct sd_lun *un)
23867 {
23868 	/* zero out the soft state geometry and partition table. */
23869 	bzero(&un->un_g, sizeof (struct dk_geom));
23870 	bzero(&un->un_vtoc, sizeof (struct dk_vtoc));
23871 	bzero(un->un_map, NDKMAP * (sizeof (struct dk_map)));
23872 	un->un_asciilabel[0] = '\0';
23873 
23874 	/*
23875 	 * For the rpm, we use the minimum for the disk.
23876 	 * For the head, cyl and number of sector per track,
23877 	 * if the capacity <= 1GB, head = 64, sect = 32.
23878 	 * else head = 255, sect 63
23879 	 * Note: the capacity should be equal to C*H*S values.
23880 	 * This will cause some truncation of size due to
23881 	 * round off errors. For CD-ROMs, this truncation can
23882 	 * have adverse side effects, so returning ncyl and
23883 	 * nhead as 1. The nsect will overflow for most of
23884 	 * CD-ROMs as nsect is of type ushort.
23885 	 */
23886 	if (ISCD(un)) {
23887 		un->un_g.dkg_ncyl = 1;
23888 		un->un_g.dkg_nhead = 1;
23889 		un->un_g.dkg_nsect = un->un_blockcount;
23890 	} else {
23891 		if (un->un_blockcount <= 0x1000) {
23892 			/* Needed for unlabeled SCSI floppies. */
23893 			un->un_g.dkg_nhead = 2;
23894 			un->un_g.dkg_ncyl = 80;
23895 			un->un_g.dkg_pcyl = 80;
23896 			un->un_g.dkg_nsect = un->un_blockcount / (2 * 80);
23897 		} else if (un->un_blockcount <= 0x200000) {
23898 			un->un_g.dkg_nhead = 64;
23899 			un->un_g.dkg_nsect = 32;
23900 			un->un_g.dkg_ncyl = un->un_blockcount / (64 * 32);
23901 		} else {
23902 			un->un_g.dkg_nhead = 255;
23903 			un->un_g.dkg_nsect = 63;
23904 			un->un_g.dkg_ncyl = un->un_blockcount / (255 * 63);
23905 		}
23906 		un->un_blockcount = un->un_g.dkg_ncyl *
23907 		    un->un_g.dkg_nhead * un->un_g.dkg_nsect;
23908 	}
23909 	un->un_g.dkg_acyl = 0;
23910 	un->un_g.dkg_bcyl = 0;
23911 	un->un_g.dkg_intrlv = 1;
23912 	un->un_g.dkg_rpm = 200;
23913 	un->un_g.dkg_read_reinstruct = 0;
23914 	un->un_g.dkg_write_reinstruct = 0;
23915 	if (un->un_g.dkg_pcyl == 0) {
23916 		un->un_g.dkg_pcyl = un->un_g.dkg_ncyl + un->un_g.dkg_acyl;
23917 	}
23918 
23919 	un->un_map['a'-'a'].dkl_cylno = 0;
23920 	un->un_map['a'-'a'].dkl_nblk = un->un_blockcount;
23921 	un->un_map['c'-'a'].dkl_cylno = 0;
23922 	un->un_map['c'-'a'].dkl_nblk = un->un_blockcount;
23923 	un->un_f_geometry_is_valid = FALSE;
23924 }
23925 
23926 
23927 #if defined(__i386) || defined(__amd64)
23928 /*
23929  *    Function: sd_update_fdisk_and_vtoc
23930  *
23931  * Description: This local utility routine updates the device fdisk and vtoc
23932  *		as part of setting the device mboot.
23933  *
23934  *   Arguments: un - driver soft state (unit) structure
23935  *
23936  * Return Code: 0 for success or errno-type return code.
23937  *
23938  *    Note:x86: This looks like a duplicate of sd_validate_geometry(), but
23939  *		these did exist seperately in x86 sd.c!!!
23940  */
23941 
23942 static int
23943 sd_update_fdisk_and_vtoc(struct sd_lun *un)
23944 {
23945 	static char	labelstring[128];
23946 	static char	buf[256];
23947 	char		*label = 0;
23948 	int		count;
23949 	int		label_rc = 0;
23950 	int		gvalid = un->un_f_geometry_is_valid;
23951 	int		fdisk_rval;
23952 	int		lbasize;
23953 	int		capacity;
23954 
23955 	ASSERT(mutex_owned(SD_MUTEX(un)));
23956 
23957 	if (un->un_f_tgt_blocksize_is_valid == FALSE) {
23958 		return (EINVAL);
23959 	}
23960 
23961 	if (un->un_f_blockcount_is_valid == FALSE) {
23962 		return (EINVAL);
23963 	}
23964 
23965 #if defined(_SUNOS_VTOC_16)
23966 	/*
23967 	 * Set up the "whole disk" fdisk partition; this should always
23968 	 * exist, regardless of whether the disk contains an fdisk table
23969 	 * or vtoc.
23970 	 */
23971 	un->un_map[P0_RAW_DISK].dkl_cylno = 0;
23972 	un->un_map[P0_RAW_DISK].dkl_nblk = un->un_blockcount;
23973 #endif	/* defined(_SUNOS_VTOC_16) */
23974 
23975 	/*
23976 	 * copy the lbasize and capacity so that if they're
23977 	 * reset while we're not holding the SD_MUTEX(un), we will
23978 	 * continue to use valid values after the SD_MUTEX(un) is
23979 	 * reacquired.
23980 	 */
23981 	lbasize  = un->un_tgt_blocksize;
23982 	capacity = un->un_blockcount;
23983 
23984 	/*
23985 	 * refresh the logical and physical geometry caches.
23986 	 * (data from mode sense format/rigid disk geometry pages,
23987 	 * and scsi_ifgetcap("geometry").
23988 	 */
23989 	sd_resync_geom_caches(un, capacity, lbasize, SD_PATH_DIRECT);
23990 
23991 	/*
23992 	 * Only DIRECT ACCESS devices will have Sun labels.
23993 	 * CD's supposedly have a Sun label, too
23994 	 */
23995 	if (un->un_f_vtoc_label_supported) {
23996 		fdisk_rval = sd_read_fdisk(un, capacity, lbasize,
23997 		    SD_PATH_DIRECT);
23998 		if (fdisk_rval == SD_CMD_FAILURE) {
23999 			ASSERT(mutex_owned(SD_MUTEX(un)));
24000 			return (EIO);
24001 		}
24002 
24003 		if (fdisk_rval == SD_CMD_RESERVATION_CONFLICT) {
24004 			ASSERT(mutex_owned(SD_MUTEX(un)));
24005 			return (EACCES);
24006 		}
24007 
24008 		if (un->un_solaris_size <= DK_LABEL_LOC) {
24009 			/*
24010 			 * Found fdisk table but no Solaris partition entry,
24011 			 * so don't call sd_uselabel() and don't create
24012 			 * a default label.
24013 			 */
24014 			label_rc = 0;
24015 			un->un_f_geometry_is_valid = TRUE;
24016 			goto no_solaris_partition;
24017 		}
24018 
24019 #if defined(_SUNOS_VTOC_8)
24020 		label = (char *)un->un_asciilabel;
24021 #elif defined(_SUNOS_VTOC_16)
24022 		label = (char *)un->un_vtoc.v_asciilabel;
24023 #else
24024 #error "No VTOC format defined."
24025 #endif
24026 	} else if (capacity < 0) {
24027 		ASSERT(mutex_owned(SD_MUTEX(un)));
24028 		return (EINVAL);
24029 	}
24030 
24031 	/*
24032 	 * For Removable media We reach here if we have found a
24033 	 * SOLARIS PARTITION.
24034 	 * If un_f_geometry_is_valid is FALSE it indicates that the SOLARIS
24035 	 * PARTITION has changed from the previous one, hence we will setup a
24036 	 * default VTOC in this case.
24037 	 */
24038 	if (un->un_f_geometry_is_valid == FALSE) {
24039 		sd_build_default_label(un);
24040 		label_rc = 0;
24041 	}
24042 
24043 no_solaris_partition:
24044 	if ((!un->un_f_has_removable_media ||
24045 	    (un->un_f_has_removable_media &&
24046 	    un->un_mediastate == DKIO_EJECTED)) &&
24047 		(un->un_state == SD_STATE_NORMAL && !gvalid)) {
24048 		/*
24049 		 * Print out a message indicating who and what we are.
24050 		 * We do this only when we happen to really validate the
24051 		 * geometry. We may call sd_validate_geometry() at other
24052 		 * times, ioctl()'s like Get VTOC in which case we
24053 		 * don't want to print the label.
24054 		 * If the geometry is valid, print the label string,
24055 		 * else print vendor and product info, if available
24056 		 */
24057 		if ((un->un_f_geometry_is_valid == TRUE) && (label != NULL)) {
24058 			SD_INFO(SD_LOG_IOCTL_DKIO, un, "?<%s>\n", label);
24059 		} else {
24060 			mutex_enter(&sd_label_mutex);
24061 			sd_inq_fill(SD_INQUIRY(un)->inq_vid, VIDMAX,
24062 			    labelstring);
24063 			sd_inq_fill(SD_INQUIRY(un)->inq_pid, PIDMAX,
24064 			    &labelstring[64]);
24065 			(void) sprintf(buf, "?Vendor '%s', product '%s'",
24066 			    labelstring, &labelstring[64]);
24067 			if (un->un_f_blockcount_is_valid == TRUE) {
24068 				(void) sprintf(&buf[strlen(buf)],
24069 				    ", %" PRIu64 " %u byte blocks\n",
24070 				    un->un_blockcount,
24071 				    un->un_tgt_blocksize);
24072 			} else {
24073 				(void) sprintf(&buf[strlen(buf)],
24074 				    ", (unknown capacity)\n");
24075 			}
24076 			SD_INFO(SD_LOG_IOCTL_DKIO, un, buf);
24077 			mutex_exit(&sd_label_mutex);
24078 		}
24079 	}
24080 
24081 #if defined(_SUNOS_VTOC_16)
24082 	/*
24083 	 * If we have valid geometry, set up the remaining fdisk partitions.
24084 	 * Note that dkl_cylno is not used for the fdisk map entries, so
24085 	 * we set it to an entirely bogus value.
24086 	 */
24087 	for (count = 0; count < FD_NUMPART; count++) {
24088 		un->un_map[FDISK_P1 + count].dkl_cylno = -1;
24089 		un->un_map[FDISK_P1 + count].dkl_nblk =
24090 		    un->un_fmap[count].fmap_nblk;
24091 		un->un_offset[FDISK_P1 + count] =
24092 		    un->un_fmap[count].fmap_start;
24093 	}
24094 #endif
24095 
24096 	for (count = 0; count < NDKMAP; count++) {
24097 #if defined(_SUNOS_VTOC_8)
24098 		struct dk_map *lp  = &un->un_map[count];
24099 		un->un_offset[count] =
24100 		    un->un_g.dkg_nhead * un->un_g.dkg_nsect * lp->dkl_cylno;
24101 #elif defined(_SUNOS_VTOC_16)
24102 		struct dkl_partition *vp = &un->un_vtoc.v_part[count];
24103 		un->un_offset[count] = vp->p_start + un->un_solaris_offset;
24104 #else
24105 #error "No VTOC format defined."
24106 #endif
24107 	}
24108 
24109 	ASSERT(mutex_owned(SD_MUTEX(un)));
24110 	return (label_rc);
24111 }
24112 #endif
24113 
24114 
24115 /*
24116  *    Function: sd_check_media
24117  *
24118  * Description: This utility routine implements the functionality for the
24119  *		DKIOCSTATE ioctl. This ioctl blocks the user thread until the
24120  *		driver state changes from that specified by the user
24121  *		(inserted or ejected). For example, if the user specifies
24122  *		DKIO_EJECTED and the current media state is inserted this
24123  *		routine will immediately return DKIO_INSERTED. However, if the
24124  *		current media state is not inserted the user thread will be
24125  *		blocked until the drive state changes. If DKIO_NONE is specified
24126  *		the user thread will block until a drive state change occurs.
24127  *
24128  *   Arguments: dev  - the device number
24129  *		state  - user pointer to a dkio_state, updated with the current
24130  *			drive state at return.
24131  *
24132  * Return Code: ENXIO
24133  *		EIO
24134  *		EAGAIN
24135  *		EINTR
24136  */
24137 
24138 static int
24139 sd_check_media(dev_t dev, enum dkio_state state)
24140 {
24141 	struct sd_lun		*un = NULL;
24142 	enum dkio_state		prev_state;
24143 	opaque_t		token = NULL;
24144 	int			rval = 0;
24145 
24146 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24147 		return (ENXIO);
24148 	}
24149 
24150 	SD_TRACE(SD_LOG_COMMON, un, "sd_check_media: entry\n");
24151 
24152 	mutex_enter(SD_MUTEX(un));
24153 
24154 	SD_TRACE(SD_LOG_COMMON, un, "sd_check_media: "
24155 	    "state=%x, mediastate=%x\n", state, un->un_mediastate);
24156 
24157 	prev_state = un->un_mediastate;
24158 
24159 	/* is there anything to do? */
24160 	if (state == un->un_mediastate || un->un_mediastate == DKIO_NONE) {
24161 		/*
24162 		 * submit the request to the scsi_watch service;
24163 		 * scsi_media_watch_cb() does the real work
24164 		 */
24165 		mutex_exit(SD_MUTEX(un));
24166 
24167 		/*
24168 		 * This change handles the case where a scsi watch request is
24169 		 * added to a device that is powered down. To accomplish this
24170 		 * we power up the device before adding the scsi watch request,
24171 		 * since the scsi watch sends a TUR directly to the device
24172 		 * which the device cannot handle if it is powered down.
24173 		 */
24174 		if (sd_pm_entry(un) != DDI_SUCCESS) {
24175 			mutex_enter(SD_MUTEX(un));
24176 			goto done;
24177 		}
24178 
24179 		token = scsi_watch_request_submit(SD_SCSI_DEVP(un),
24180 		    sd_check_media_time, SENSE_LENGTH, sd_media_watch_cb,
24181 		    (caddr_t)dev);
24182 
24183 		sd_pm_exit(un);
24184 
24185 		mutex_enter(SD_MUTEX(un));
24186 		if (token == NULL) {
24187 			rval = EAGAIN;
24188 			goto done;
24189 		}
24190 
24191 		/*
24192 		 * This is a special case IOCTL that doesn't return
24193 		 * until the media state changes. Routine sdpower
24194 		 * knows about and handles this so don't count it
24195 		 * as an active cmd in the driver, which would
24196 		 * keep the device busy to the pm framework.
24197 		 * If the count isn't decremented the device can't
24198 		 * be powered down.
24199 		 */
24200 		un->un_ncmds_in_driver--;
24201 		ASSERT(un->un_ncmds_in_driver >= 0);
24202 
24203 		/*
24204 		 * if a prior request had been made, this will be the same
24205 		 * token, as scsi_watch was designed that way.
24206 		 */
24207 		un->un_swr_token = token;
24208 		un->un_specified_mediastate = state;
24209 
24210 		/*
24211 		 * now wait for media change
24212 		 * we will not be signalled unless mediastate == state but it is
24213 		 * still better to test for this condition, since there is a
24214 		 * 2 sec cv_broadcast delay when mediastate == DKIO_INSERTED
24215 		 */
24216 		SD_TRACE(SD_LOG_COMMON, un,
24217 		    "sd_check_media: waiting for media state change\n");
24218 		while (un->un_mediastate == state) {
24219 			if (cv_wait_sig(&un->un_state_cv, SD_MUTEX(un)) == 0) {
24220 				SD_TRACE(SD_LOG_COMMON, un,
24221 				    "sd_check_media: waiting for media state "
24222 				    "was interrupted\n");
24223 				un->un_ncmds_in_driver++;
24224 				rval = EINTR;
24225 				goto done;
24226 			}
24227 			SD_TRACE(SD_LOG_COMMON, un,
24228 			    "sd_check_media: received signal, state=%x\n",
24229 			    un->un_mediastate);
24230 		}
24231 		/*
24232 		 * Inc the counter to indicate the device once again
24233 		 * has an active outstanding cmd.
24234 		 */
24235 		un->un_ncmds_in_driver++;
24236 	}
24237 
24238 	/* invalidate geometry */
24239 	if (prev_state == DKIO_INSERTED && un->un_mediastate == DKIO_EJECTED) {
24240 		sr_ejected(un);
24241 	}
24242 
24243 	if (un->un_mediastate == DKIO_INSERTED && prev_state != DKIO_INSERTED) {
24244 		uint64_t	capacity;
24245 		uint_t		lbasize;
24246 
24247 		SD_TRACE(SD_LOG_COMMON, un, "sd_check_media: media inserted\n");
24248 		mutex_exit(SD_MUTEX(un));
24249 		/*
24250 		 * Since the following routines use SD_PATH_DIRECT, we must
24251 		 * call PM directly before the upcoming disk accesses. This
24252 		 * may cause the disk to be power/spin up.
24253 		 */
24254 
24255 		if (sd_pm_entry(un) == DDI_SUCCESS) {
24256 			rval = sd_send_scsi_READ_CAPACITY(un,
24257 			    &capacity,
24258 			    &lbasize, SD_PATH_DIRECT);
24259 			if (rval != 0) {
24260 				sd_pm_exit(un);
24261 				mutex_enter(SD_MUTEX(un));
24262 				goto done;
24263 			}
24264 		} else {
24265 			rval = EIO;
24266 			mutex_enter(SD_MUTEX(un));
24267 			goto done;
24268 		}
24269 		mutex_enter(SD_MUTEX(un));
24270 
24271 		sd_update_block_info(un, lbasize, capacity);
24272 
24273 		un->un_f_geometry_is_valid	= FALSE;
24274 		(void) sd_validate_geometry(un, SD_PATH_DIRECT);
24275 
24276 		mutex_exit(SD_MUTEX(un));
24277 		rval = sd_send_scsi_DOORLOCK(un, SD_REMOVAL_PREVENT,
24278 		    SD_PATH_DIRECT);
24279 		sd_pm_exit(un);
24280 
24281 		mutex_enter(SD_MUTEX(un));
24282 	}
24283 done:
24284 	un->un_f_watcht_stopped = FALSE;
24285 	if (un->un_swr_token) {
24286 		/*
24287 		 * Use of this local token and the mutex ensures that we avoid
24288 		 * some race conditions associated with terminating the
24289 		 * scsi watch.
24290 		 */
24291 		token = un->un_swr_token;
24292 		un->un_swr_token = (opaque_t)NULL;
24293 		mutex_exit(SD_MUTEX(un));
24294 		(void) scsi_watch_request_terminate(token,
24295 		    SCSI_WATCH_TERMINATE_WAIT);
24296 		mutex_enter(SD_MUTEX(un));
24297 	}
24298 
24299 	/*
24300 	 * Update the capacity kstat value, if no media previously
24301 	 * (capacity kstat is 0) and a media has been inserted
24302 	 * (un_f_blockcount_is_valid == TRUE)
24303 	 */
24304 	if (un->un_errstats) {
24305 		struct sd_errstats	*stp = NULL;
24306 
24307 		stp = (struct sd_errstats *)un->un_errstats->ks_data;
24308 		if ((stp->sd_capacity.value.ui64 == 0) &&
24309 		    (un->un_f_blockcount_is_valid == TRUE)) {
24310 			stp->sd_capacity.value.ui64 =
24311 			    (uint64_t)((uint64_t)un->un_blockcount *
24312 			    un->un_sys_blocksize);
24313 		}
24314 	}
24315 	mutex_exit(SD_MUTEX(un));
24316 	SD_TRACE(SD_LOG_COMMON, un, "sd_check_media: done\n");
24317 	return (rval);
24318 }
24319 
24320 
24321 /*
24322  *    Function: sd_delayed_cv_broadcast
24323  *
24324  * Description: Delayed cv_broadcast to allow for target to recover from media
24325  *		insertion.
24326  *
24327  *   Arguments: arg - driver soft state (unit) structure
24328  */
24329 
24330 static void
24331 sd_delayed_cv_broadcast(void *arg)
24332 {
24333 	struct sd_lun *un = arg;
24334 
24335 	SD_TRACE(SD_LOG_COMMON, un, "sd_delayed_cv_broadcast\n");
24336 
24337 	mutex_enter(SD_MUTEX(un));
24338 	un->un_dcvb_timeid = NULL;
24339 	cv_broadcast(&un->un_state_cv);
24340 	mutex_exit(SD_MUTEX(un));
24341 }
24342 
24343 
24344 /*
24345  *    Function: sd_media_watch_cb
24346  *
24347  * Description: Callback routine used for support of the DKIOCSTATE ioctl. This
24348  *		routine processes the TUR sense data and updates the driver
24349  *		state if a transition has occurred. The user thread
24350  *		(sd_check_media) is then signalled.
24351  *
24352  *   Arguments: arg -   the device 'dev_t' is used for context to discriminate
24353  *			among multiple watches that share this callback function
24354  *		resultp - scsi watch facility result packet containing scsi
24355  *			  packet, status byte and sense data
24356  *
24357  * Return Code: 0 for success, -1 for failure
24358  */
24359 
24360 static int
24361 sd_media_watch_cb(caddr_t arg, struct scsi_watch_result *resultp)
24362 {
24363 	struct sd_lun			*un;
24364 	struct scsi_status		*statusp = resultp->statusp;
24365 	struct scsi_extended_sense	*sensep = resultp->sensep;
24366 	enum dkio_state			state = DKIO_NONE;
24367 	dev_t				dev = (dev_t)arg;
24368 	uchar_t				actual_sense_length;
24369 
24370 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24371 		return (-1);
24372 	}
24373 	actual_sense_length = resultp->actual_sense_length;
24374 
24375 	mutex_enter(SD_MUTEX(un));
24376 	SD_TRACE(SD_LOG_COMMON, un,
24377 	    "sd_media_watch_cb: status=%x, sensep=%p, len=%x\n",
24378 	    *((char *)statusp), (void *)sensep, actual_sense_length);
24379 
24380 	if (resultp->pkt->pkt_reason == CMD_DEV_GONE) {
24381 		un->un_mediastate = DKIO_DEV_GONE;
24382 		cv_broadcast(&un->un_state_cv);
24383 		mutex_exit(SD_MUTEX(un));
24384 
24385 		return (0);
24386 	}
24387 
24388 	/*
24389 	 * If there was a check condition then sensep points to valid sense data
24390 	 * If status was not a check condition but a reservation or busy status
24391 	 * then the new state is DKIO_NONE
24392 	 */
24393 	if (sensep != NULL) {
24394 		SD_INFO(SD_LOG_COMMON, un,
24395 		    "sd_media_watch_cb: sense KEY=%x, ASC=%x, ASCQ=%x\n",
24396 		    sensep->es_key, sensep->es_add_code, sensep->es_qual_code);
24397 		/* This routine only uses up to 13 bytes of sense data. */
24398 		if (actual_sense_length >= 13) {
24399 			if (sensep->es_key == KEY_UNIT_ATTENTION) {
24400 				if (sensep->es_add_code == 0x28) {
24401 					state = DKIO_INSERTED;
24402 				}
24403 			} else {
24404 				/*
24405 				 * if 02/04/02  means that the host
24406 				 * should send start command. Explicitly
24407 				 * leave the media state as is
24408 				 * (inserted) as the media is inserted
24409 				 * and host has stopped device for PM
24410 				 * reasons. Upon next true read/write
24411 				 * to this media will bring the
24412 				 * device to the right state good for
24413 				 * media access.
24414 				 */
24415 				if ((sensep->es_key == KEY_NOT_READY) &&
24416 				    (sensep->es_add_code == 0x3a)) {
24417 					state = DKIO_EJECTED;
24418 				}
24419 
24420 				/*
24421 				 * If the drivge is busy with an operation
24422 				 * or long write, keep the media in an
24423 				 * inserted state.
24424 				 */
24425 
24426 				if ((sensep->es_key == KEY_NOT_READY) &&
24427 				    (sensep->es_add_code == 0x04) &&
24428 				    ((sensep->es_qual_code == 0x02) ||
24429 				    (sensep->es_qual_code == 0x07) ||
24430 				    (sensep->es_qual_code == 0x08))) {
24431 					state = DKIO_INSERTED;
24432 				}
24433 			}
24434 		}
24435 	} else if ((*((char *)statusp) == STATUS_GOOD) &&
24436 	    (resultp->pkt->pkt_reason == CMD_CMPLT)) {
24437 		state = DKIO_INSERTED;
24438 	}
24439 
24440 	SD_TRACE(SD_LOG_COMMON, un,
24441 	    "sd_media_watch_cb: state=%x, specified=%x\n",
24442 	    state, un->un_specified_mediastate);
24443 
24444 	/*
24445 	 * now signal the waiting thread if this is *not* the specified state;
24446 	 * delay the signal if the state is DKIO_INSERTED to allow the target
24447 	 * to recover
24448 	 */
24449 	if (state != un->un_specified_mediastate) {
24450 		un->un_mediastate = state;
24451 		if (state == DKIO_INSERTED) {
24452 			/*
24453 			 * delay the signal to give the drive a chance
24454 			 * to do what it apparently needs to do
24455 			 */
24456 			SD_TRACE(SD_LOG_COMMON, un,
24457 			    "sd_media_watch_cb: delayed cv_broadcast\n");
24458 			if (un->un_dcvb_timeid == NULL) {
24459 				un->un_dcvb_timeid =
24460 				    timeout(sd_delayed_cv_broadcast, un,
24461 				    drv_usectohz((clock_t)MEDIA_ACCESS_DELAY));
24462 			}
24463 		} else {
24464 			SD_TRACE(SD_LOG_COMMON, un,
24465 			    "sd_media_watch_cb: immediate cv_broadcast\n");
24466 			cv_broadcast(&un->un_state_cv);
24467 		}
24468 	}
24469 	mutex_exit(SD_MUTEX(un));
24470 	return (0);
24471 }
24472 
24473 
24474 /*
24475  *    Function: sd_dkio_get_temp
24476  *
24477  * Description: This routine is the driver entry point for handling ioctl
24478  *		requests to get the disk temperature.
24479  *
24480  *   Arguments: dev  - the device number
24481  *		arg  - pointer to user provided dk_temperature structure.
24482  *		flag - this argument is a pass through to ddi_copyxxx()
24483  *		       directly from the mode argument of ioctl().
24484  *
24485  * Return Code: 0
24486  *		EFAULT
24487  *		ENXIO
24488  *		EAGAIN
24489  */
24490 
24491 static int
24492 sd_dkio_get_temp(dev_t dev, caddr_t arg, int flag)
24493 {
24494 	struct sd_lun		*un = NULL;
24495 	struct dk_temperature	*dktemp = NULL;
24496 	uchar_t			*temperature_page;
24497 	int			rval = 0;
24498 	int			path_flag = SD_PATH_STANDARD;
24499 
24500 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24501 		return (ENXIO);
24502 	}
24503 
24504 	dktemp = kmem_zalloc(sizeof (struct dk_temperature), KM_SLEEP);
24505 
24506 	/* copyin the disk temp argument to get the user flags */
24507 	if (ddi_copyin((void *)arg, dktemp,
24508 	    sizeof (struct dk_temperature), flag) != 0) {
24509 		rval = EFAULT;
24510 		goto done;
24511 	}
24512 
24513 	/* Initialize the temperature to invalid. */
24514 	dktemp->dkt_cur_temp = (short)DKT_INVALID_TEMP;
24515 	dktemp->dkt_ref_temp = (short)DKT_INVALID_TEMP;
24516 
24517 	/*
24518 	 * Note: Investigate removing the "bypass pm" semantic.
24519 	 * Can we just bypass PM always?
24520 	 */
24521 	if (dktemp->dkt_flags & DKT_BYPASS_PM) {
24522 		path_flag = SD_PATH_DIRECT;
24523 		ASSERT(!mutex_owned(&un->un_pm_mutex));
24524 		mutex_enter(&un->un_pm_mutex);
24525 		if (SD_DEVICE_IS_IN_LOW_POWER(un)) {
24526 			/*
24527 			 * If DKT_BYPASS_PM is set, and the drive happens to be
24528 			 * in low power mode, we can not wake it up, Need to
24529 			 * return EAGAIN.
24530 			 */
24531 			mutex_exit(&un->un_pm_mutex);
24532 			rval = EAGAIN;
24533 			goto done;
24534 		} else {
24535 			/*
24536 			 * Indicate to PM the device is busy. This is required
24537 			 * to avoid a race - i.e. the ioctl is issuing a
24538 			 * command and the pm framework brings down the device
24539 			 * to low power mode (possible power cut-off on some
24540 			 * platforms).
24541 			 */
24542 			mutex_exit(&un->un_pm_mutex);
24543 			if (sd_pm_entry(un) != DDI_SUCCESS) {
24544 				rval = EAGAIN;
24545 				goto done;
24546 			}
24547 		}
24548 	}
24549 
24550 	temperature_page = kmem_zalloc(TEMPERATURE_PAGE_SIZE, KM_SLEEP);
24551 
24552 	if ((rval = sd_send_scsi_LOG_SENSE(un, temperature_page,
24553 	    TEMPERATURE_PAGE_SIZE, TEMPERATURE_PAGE, 1, 0, path_flag)) != 0) {
24554 		goto done2;
24555 	}
24556 
24557 	/*
24558 	 * For the current temperature verify that the parameter length is 0x02
24559 	 * and the parameter code is 0x00
24560 	 */
24561 	if ((temperature_page[7] == 0x02) && (temperature_page[4] == 0x00) &&
24562 	    (temperature_page[5] == 0x00)) {
24563 		if (temperature_page[9] == 0xFF) {
24564 			dktemp->dkt_cur_temp = (short)DKT_INVALID_TEMP;
24565 		} else {
24566 			dktemp->dkt_cur_temp = (short)(temperature_page[9]);
24567 		}
24568 	}
24569 
24570 	/*
24571 	 * For the reference temperature verify that the parameter
24572 	 * length is 0x02 and the parameter code is 0x01
24573 	 */
24574 	if ((temperature_page[13] == 0x02) && (temperature_page[10] == 0x00) &&
24575 	    (temperature_page[11] == 0x01)) {
24576 		if (temperature_page[15] == 0xFF) {
24577 			dktemp->dkt_ref_temp = (short)DKT_INVALID_TEMP;
24578 		} else {
24579 			dktemp->dkt_ref_temp = (short)(temperature_page[15]);
24580 		}
24581 	}
24582 
24583 	/* Do the copyout regardless of the temperature commands status. */
24584 	if (ddi_copyout(dktemp, (void *)arg, sizeof (struct dk_temperature),
24585 	    flag) != 0) {
24586 		rval = EFAULT;
24587 	}
24588 
24589 done2:
24590 	if (path_flag == SD_PATH_DIRECT) {
24591 		sd_pm_exit(un);
24592 	}
24593 
24594 	kmem_free(temperature_page, TEMPERATURE_PAGE_SIZE);
24595 done:
24596 	if (dktemp != NULL) {
24597 		kmem_free(dktemp, sizeof (struct dk_temperature));
24598 	}
24599 
24600 	return (rval);
24601 }
24602 
24603 
24604 /*
24605  *    Function: sd_log_page_supported
24606  *
24607  * Description: This routine uses sd_send_scsi_LOG_SENSE to find the list of
24608  *		supported log pages.
24609  *
24610  *   Arguments: un -
24611  *		log_page -
24612  *
24613  * Return Code: -1 - on error (log sense is optional and may not be supported).
24614  *		0  - log page not found.
24615  *  		1  - log page found.
24616  */
24617 
24618 static int
24619 sd_log_page_supported(struct sd_lun *un, int log_page)
24620 {
24621 	uchar_t *log_page_data;
24622 	int	i;
24623 	int	match = 0;
24624 	int	log_size;
24625 
24626 	log_page_data = kmem_zalloc(0xFF, KM_SLEEP);
24627 
24628 	if (sd_send_scsi_LOG_SENSE(un, log_page_data, 0xFF, 0, 0x01, 0,
24629 	    SD_PATH_DIRECT) != 0) {
24630 		SD_ERROR(SD_LOG_COMMON, un,
24631 		    "sd_log_page_supported: failed log page retrieval\n");
24632 		kmem_free(log_page_data, 0xFF);
24633 		return (-1);
24634 	}
24635 	log_size = log_page_data[3];
24636 
24637 	/*
24638 	 * The list of supported log pages start from the fourth byte. Check
24639 	 * until we run out of log pages or a match is found.
24640 	 */
24641 	for (i = 4; (i < (log_size + 4)) && !match; i++) {
24642 		if (log_page_data[i] == log_page) {
24643 			match++;
24644 		}
24645 	}
24646 	kmem_free(log_page_data, 0xFF);
24647 	return (match);
24648 }
24649 
24650 
24651 /*
24652  *    Function: sd_mhdioc_failfast
24653  *
24654  * Description: This routine is the driver entry point for handling ioctl
24655  *		requests to enable/disable the multihost failfast option.
24656  *		(MHIOCENFAILFAST)
24657  *
24658  *   Arguments: dev	- the device number
24659  *		arg	- user specified probing interval.
24660  *		flag	- this argument is a pass through to ddi_copyxxx()
24661  *			  directly from the mode argument of ioctl().
24662  *
24663  * Return Code: 0
24664  *		EFAULT
24665  *		ENXIO
24666  */
24667 
24668 static int
24669 sd_mhdioc_failfast(dev_t dev, caddr_t arg, int flag)
24670 {
24671 	struct sd_lun	*un = NULL;
24672 	int		mh_time;
24673 	int		rval = 0;
24674 
24675 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24676 		return (ENXIO);
24677 	}
24678 
24679 	if (ddi_copyin((void *)arg, &mh_time, sizeof (int), flag))
24680 		return (EFAULT);
24681 
24682 	if (mh_time) {
24683 		mutex_enter(SD_MUTEX(un));
24684 		un->un_resvd_status |= SD_FAILFAST;
24685 		mutex_exit(SD_MUTEX(un));
24686 		/*
24687 		 * If mh_time is INT_MAX, then this ioctl is being used for
24688 		 * SCSI-3 PGR purposes, and we don't need to spawn watch thread.
24689 		 */
24690 		if (mh_time != INT_MAX) {
24691 			rval = sd_check_mhd(dev, mh_time);
24692 		}
24693 	} else {
24694 		(void) sd_check_mhd(dev, 0);
24695 		mutex_enter(SD_MUTEX(un));
24696 		un->un_resvd_status &= ~SD_FAILFAST;
24697 		mutex_exit(SD_MUTEX(un));
24698 	}
24699 	return (rval);
24700 }
24701 
24702 
24703 /*
24704  *    Function: sd_mhdioc_takeown
24705  *
24706  * Description: This routine is the driver entry point for handling ioctl
24707  *		requests to forcefully acquire exclusive access rights to the
24708  *		multihost disk (MHIOCTKOWN).
24709  *
24710  *   Arguments: dev	- the device number
24711  *		arg	- user provided structure specifying the delay
24712  *			  parameters in milliseconds
24713  *		flag	- this argument is a pass through to ddi_copyxxx()
24714  *			  directly from the mode argument of ioctl().
24715  *
24716  * Return Code: 0
24717  *		EFAULT
24718  *		ENXIO
24719  */
24720 
24721 static int
24722 sd_mhdioc_takeown(dev_t dev, caddr_t arg, int flag)
24723 {
24724 	struct sd_lun		*un = NULL;
24725 	struct mhioctkown	*tkown = NULL;
24726 	int			rval = 0;
24727 
24728 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24729 		return (ENXIO);
24730 	}
24731 
24732 	if (arg != NULL) {
24733 		tkown = (struct mhioctkown *)
24734 		    kmem_zalloc(sizeof (struct mhioctkown), KM_SLEEP);
24735 		rval = ddi_copyin(arg, tkown, sizeof (struct mhioctkown), flag);
24736 		if (rval != 0) {
24737 			rval = EFAULT;
24738 			goto error;
24739 		}
24740 	}
24741 
24742 	rval = sd_take_ownership(dev, tkown);
24743 	mutex_enter(SD_MUTEX(un));
24744 	if (rval == 0) {
24745 		un->un_resvd_status |= SD_RESERVE;
24746 		if (tkown != NULL && tkown->reinstate_resv_delay != 0) {
24747 			sd_reinstate_resv_delay =
24748 			    tkown->reinstate_resv_delay * 1000;
24749 		} else {
24750 			sd_reinstate_resv_delay = SD_REINSTATE_RESV_DELAY;
24751 		}
24752 		/*
24753 		 * Give the scsi_watch routine interval set by
24754 		 * the MHIOCENFAILFAST ioctl precedence here.
24755 		 */
24756 		if ((un->un_resvd_status & SD_FAILFAST) == 0) {
24757 			mutex_exit(SD_MUTEX(un));
24758 			(void) sd_check_mhd(dev, sd_reinstate_resv_delay/1000);
24759 			SD_TRACE(SD_LOG_IOCTL_MHD, un,
24760 			    "sd_mhdioc_takeown : %d\n",
24761 			    sd_reinstate_resv_delay);
24762 		} else {
24763 			mutex_exit(SD_MUTEX(un));
24764 		}
24765 		(void) scsi_reset_notify(SD_ADDRESS(un), SCSI_RESET_NOTIFY,
24766 		    sd_mhd_reset_notify_cb, (caddr_t)un);
24767 	} else {
24768 		un->un_resvd_status &= ~SD_RESERVE;
24769 		mutex_exit(SD_MUTEX(un));
24770 	}
24771 
24772 error:
24773 	if (tkown != NULL) {
24774 		kmem_free(tkown, sizeof (struct mhioctkown));
24775 	}
24776 	return (rval);
24777 }
24778 
24779 
24780 /*
24781  *    Function: sd_mhdioc_release
24782  *
24783  * Description: This routine is the driver entry point for handling ioctl
24784  *		requests to release exclusive access rights to the multihost
24785  *		disk (MHIOCRELEASE).
24786  *
24787  *   Arguments: dev	- the device number
24788  *
24789  * Return Code: 0
24790  *		ENXIO
24791  */
24792 
24793 static int
24794 sd_mhdioc_release(dev_t dev)
24795 {
24796 	struct sd_lun		*un = NULL;
24797 	timeout_id_t		resvd_timeid_save;
24798 	int			resvd_status_save;
24799 	int			rval = 0;
24800 
24801 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24802 		return (ENXIO);
24803 	}
24804 
24805 	mutex_enter(SD_MUTEX(un));
24806 	resvd_status_save = un->un_resvd_status;
24807 	un->un_resvd_status &=
24808 	    ~(SD_RESERVE | SD_LOST_RESERVE | SD_WANT_RESERVE);
24809 	if (un->un_resvd_timeid) {
24810 		resvd_timeid_save = un->un_resvd_timeid;
24811 		un->un_resvd_timeid = NULL;
24812 		mutex_exit(SD_MUTEX(un));
24813 		(void) untimeout(resvd_timeid_save);
24814 	} else {
24815 		mutex_exit(SD_MUTEX(un));
24816 	}
24817 
24818 	/*
24819 	 * destroy any pending timeout thread that may be attempting to
24820 	 * reinstate reservation on this device.
24821 	 */
24822 	sd_rmv_resv_reclaim_req(dev);
24823 
24824 	if ((rval = sd_reserve_release(dev, SD_RELEASE)) == 0) {
24825 		mutex_enter(SD_MUTEX(un));
24826 		if ((un->un_mhd_token) &&
24827 		    ((un->un_resvd_status & SD_FAILFAST) == 0)) {
24828 			mutex_exit(SD_MUTEX(un));
24829 			(void) sd_check_mhd(dev, 0);
24830 		} else {
24831 			mutex_exit(SD_MUTEX(un));
24832 		}
24833 		(void) scsi_reset_notify(SD_ADDRESS(un), SCSI_RESET_CANCEL,
24834 		    sd_mhd_reset_notify_cb, (caddr_t)un);
24835 	} else {
24836 		/*
24837 		 * sd_mhd_watch_cb will restart the resvd recover timeout thread
24838 		 */
24839 		mutex_enter(SD_MUTEX(un));
24840 		un->un_resvd_status = resvd_status_save;
24841 		mutex_exit(SD_MUTEX(un));
24842 	}
24843 	return (rval);
24844 }
24845 
24846 
24847 /*
24848  *    Function: sd_mhdioc_register_devid
24849  *
24850  * Description: This routine is the driver entry point for handling ioctl
24851  *		requests to register the device id (MHIOCREREGISTERDEVID).
24852  *
24853  *		Note: The implementation for this ioctl has been updated to
24854  *		be consistent with the original PSARC case (1999/357)
24855  *		(4375899, 4241671, 4220005)
24856  *
24857  *   Arguments: dev	- the device number
24858  *
24859  * Return Code: 0
24860  *		ENXIO
24861  */
24862 
24863 static int
24864 sd_mhdioc_register_devid(dev_t dev)
24865 {
24866 	struct sd_lun	*un = NULL;
24867 	int		rval = 0;
24868 
24869 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24870 		return (ENXIO);
24871 	}
24872 
24873 	ASSERT(!mutex_owned(SD_MUTEX(un)));
24874 
24875 	mutex_enter(SD_MUTEX(un));
24876 
24877 	/* If a devid already exists, de-register it */
24878 	if (un->un_devid != NULL) {
24879 		ddi_devid_unregister(SD_DEVINFO(un));
24880 		/*
24881 		 * After unregister devid, needs to free devid memory
24882 		 */
24883 		ddi_devid_free(un->un_devid);
24884 		un->un_devid = NULL;
24885 	}
24886 
24887 	/* Check for reservation conflict */
24888 	mutex_exit(SD_MUTEX(un));
24889 	rval = sd_send_scsi_TEST_UNIT_READY(un, 0);
24890 	mutex_enter(SD_MUTEX(un));
24891 
24892 	switch (rval) {
24893 	case 0:
24894 		sd_register_devid(un, SD_DEVINFO(un), SD_TARGET_IS_UNRESERVED);
24895 		break;
24896 	case EACCES:
24897 		break;
24898 	default:
24899 		rval = EIO;
24900 	}
24901 
24902 	mutex_exit(SD_MUTEX(un));
24903 	return (rval);
24904 }
24905 
24906 
24907 /*
24908  *    Function: sd_mhdioc_inkeys
24909  *
24910  * Description: This routine is the driver entry point for handling ioctl
24911  *		requests to issue the SCSI-3 Persistent In Read Keys command
24912  *		to the device (MHIOCGRP_INKEYS).
24913  *
24914  *   Arguments: dev	- the device number
24915  *		arg	- user provided in_keys structure
24916  *		flag	- this argument is a pass through to ddi_copyxxx()
24917  *			  directly from the mode argument of ioctl().
24918  *
24919  * Return Code: code returned by sd_persistent_reservation_in_read_keys()
24920  *		ENXIO
24921  *		EFAULT
24922  */
24923 
24924 static int
24925 sd_mhdioc_inkeys(dev_t dev, caddr_t arg, int flag)
24926 {
24927 	struct sd_lun		*un;
24928 	mhioc_inkeys_t		inkeys;
24929 	int			rval = 0;
24930 
24931 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24932 		return (ENXIO);
24933 	}
24934 
24935 #ifdef _MULTI_DATAMODEL
24936 	switch (ddi_model_convert_from(flag & FMODELS)) {
24937 	case DDI_MODEL_ILP32: {
24938 		struct mhioc_inkeys32	inkeys32;
24939 
24940 		if (ddi_copyin(arg, &inkeys32,
24941 		    sizeof (struct mhioc_inkeys32), flag) != 0) {
24942 			return (EFAULT);
24943 		}
24944 		inkeys.li = (mhioc_key_list_t *)(uintptr_t)inkeys32.li;
24945 		if ((rval = sd_persistent_reservation_in_read_keys(un,
24946 		    &inkeys, flag)) != 0) {
24947 			return (rval);
24948 		}
24949 		inkeys32.generation = inkeys.generation;
24950 		if (ddi_copyout(&inkeys32, arg, sizeof (struct mhioc_inkeys32),
24951 		    flag) != 0) {
24952 			return (EFAULT);
24953 		}
24954 		break;
24955 	}
24956 	case DDI_MODEL_NONE:
24957 		if (ddi_copyin(arg, &inkeys, sizeof (mhioc_inkeys_t),
24958 		    flag) != 0) {
24959 			return (EFAULT);
24960 		}
24961 		if ((rval = sd_persistent_reservation_in_read_keys(un,
24962 		    &inkeys, flag)) != 0) {
24963 			return (rval);
24964 		}
24965 		if (ddi_copyout(&inkeys, arg, sizeof (mhioc_inkeys_t),
24966 		    flag) != 0) {
24967 			return (EFAULT);
24968 		}
24969 		break;
24970 	}
24971 
24972 #else /* ! _MULTI_DATAMODEL */
24973 
24974 	if (ddi_copyin(arg, &inkeys, sizeof (mhioc_inkeys_t), flag) != 0) {
24975 		return (EFAULT);
24976 	}
24977 	rval = sd_persistent_reservation_in_read_keys(un, &inkeys, flag);
24978 	if (rval != 0) {
24979 		return (rval);
24980 	}
24981 	if (ddi_copyout(&inkeys, arg, sizeof (mhioc_inkeys_t), flag) != 0) {
24982 		return (EFAULT);
24983 	}
24984 
24985 #endif /* _MULTI_DATAMODEL */
24986 
24987 	return (rval);
24988 }
24989 
24990 
24991 /*
24992  *    Function: sd_mhdioc_inresv
24993  *
24994  * Description: This routine is the driver entry point for handling ioctl
24995  *		requests to issue the SCSI-3 Persistent In Read Reservations
24996  *		command to the device (MHIOCGRP_INKEYS).
24997  *
24998  *   Arguments: dev	- the device number
24999  *		arg	- user provided in_resv structure
25000  *		flag	- this argument is a pass through to ddi_copyxxx()
25001  *			  directly from the mode argument of ioctl().
25002  *
25003  * Return Code: code returned by sd_persistent_reservation_in_read_resv()
25004  *		ENXIO
25005  *		EFAULT
25006  */
25007 
25008 static int
25009 sd_mhdioc_inresv(dev_t dev, caddr_t arg, int flag)
25010 {
25011 	struct sd_lun		*un;
25012 	mhioc_inresvs_t		inresvs;
25013 	int			rval = 0;
25014 
25015 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
25016 		return (ENXIO);
25017 	}
25018 
25019 #ifdef _MULTI_DATAMODEL
25020 
25021 	switch (ddi_model_convert_from(flag & FMODELS)) {
25022 	case DDI_MODEL_ILP32: {
25023 		struct mhioc_inresvs32	inresvs32;
25024 
25025 		if (ddi_copyin(arg, &inresvs32,
25026 		    sizeof (struct mhioc_inresvs32), flag) != 0) {
25027 			return (EFAULT);
25028 		}
25029 		inresvs.li = (mhioc_resv_desc_list_t *)(uintptr_t)inresvs32.li;
25030 		if ((rval = sd_persistent_reservation_in_read_resv(un,
25031 		    &inresvs, flag)) != 0) {
25032 			return (rval);
25033 		}
25034 		inresvs32.generation = inresvs.generation;
25035 		if (ddi_copyout(&inresvs32, arg,
25036 		    sizeof (struct mhioc_inresvs32), flag) != 0) {
25037 			return (EFAULT);
25038 		}
25039 		break;
25040 	}
25041 	case DDI_MODEL_NONE:
25042 		if (ddi_copyin(arg, &inresvs,
25043 		    sizeof (mhioc_inresvs_t), flag) != 0) {
25044 			return (EFAULT);
25045 		}
25046 		if ((rval = sd_persistent_reservation_in_read_resv(un,
25047 		    &inresvs, flag)) != 0) {
25048 			return (rval);
25049 		}
25050 		if (ddi_copyout(&inresvs, arg,
25051 		    sizeof (mhioc_inresvs_t), flag) != 0) {
25052 			return (EFAULT);
25053 		}
25054 		break;
25055 	}
25056 
25057 #else /* ! _MULTI_DATAMODEL */
25058 
25059 	if (ddi_copyin(arg, &inresvs, sizeof (mhioc_inresvs_t), flag) != 0) {
25060 		return (EFAULT);
25061 	}
25062 	rval = sd_persistent_reservation_in_read_resv(un, &inresvs, flag);
25063 	if (rval != 0) {
25064 		return (rval);
25065 	}
25066 	if (ddi_copyout(&inresvs, arg, sizeof (mhioc_inresvs_t), flag)) {
25067 		return (EFAULT);
25068 	}
25069 
25070 #endif /* ! _MULTI_DATAMODEL */
25071 
25072 	return (rval);
25073 }
25074 
25075 
25076 /*
25077  * The following routines support the clustering functionality described below
25078  * and implement lost reservation reclaim functionality.
25079  *
25080  * Clustering
25081  * ----------
25082  * The clustering code uses two different, independent forms of SCSI
25083  * reservation. Traditional SCSI-2 Reserve/Release and the newer SCSI-3
25084  * Persistent Group Reservations. For any particular disk, it will use either
25085  * SCSI-2 or SCSI-3 PGR but never both at the same time for the same disk.
25086  *
25087  * SCSI-2
25088  * The cluster software takes ownership of a multi-hosted disk by issuing the
25089  * MHIOCTKOWN ioctl to the disk driver. It releases ownership by issuing the
25090  * MHIOCRELEASE ioctl.Closely related is the MHIOCENFAILFAST ioctl -- a cluster,
25091  * just after taking ownership of the disk with the MHIOCTKOWN ioctl then issues
25092  * the MHIOCENFAILFAST ioctl.  This ioctl "enables failfast" in the driver. The
25093  * meaning of failfast is that if the driver (on this host) ever encounters the
25094  * scsi error return code RESERVATION_CONFLICT from the device, it should
25095  * immediately panic the host. The motivation for this ioctl is that if this
25096  * host does encounter reservation conflict, the underlying cause is that some
25097  * other host of the cluster has decided that this host is no longer in the
25098  * cluster and has seized control of the disks for itself. Since this host is no
25099  * longer in the cluster, it ought to panic itself. The MHIOCENFAILFAST ioctl
25100  * does two things:
25101  *	(a) it sets a flag that will cause any returned RESERVATION_CONFLICT
25102  *      error to panic the host
25103  *      (b) it sets up a periodic timer to test whether this host still has
25104  *      "access" (in that no other host has reserved the device):  if the
25105  *      periodic timer gets RESERVATION_CONFLICT, the host is panicked. The
25106  *      purpose of that periodic timer is to handle scenarios where the host is
25107  *      otherwise temporarily quiescent, temporarily doing no real i/o.
25108  * The MHIOCTKOWN ioctl will "break" a reservation that is held by another host,
25109  * by issuing a SCSI Bus Device Reset.  It will then issue a SCSI Reserve for
25110  * the device itself.
25111  *
25112  * SCSI-3 PGR
25113  * A direct semantic implementation of the SCSI-3 Persistent Reservation
25114  * facility is supported through the shared multihost disk ioctls
25115  * (MHIOCGRP_INKEYS, MHIOCGRP_INRESV, MHIOCGRP_REGISTER, MHIOCGRP_RESERVE,
25116  * MHIOCGRP_PREEMPTANDABORT)
25117  *
25118  * Reservation Reclaim:
25119  * --------------------
25120  * To support the lost reservation reclaim operations this driver creates a
25121  * single thread to handle reinstating reservations on all devices that have
25122  * lost reservations sd_resv_reclaim_requests are logged for all devices that
25123  * have LOST RESERVATIONS when the scsi watch facility callsback sd_mhd_watch_cb
25124  * and the reservation reclaim thread loops through the requests to regain the
25125  * lost reservations.
25126  */
25127 
25128 /*
25129  *    Function: sd_check_mhd()
25130  *
25131  * Description: This function sets up and submits a scsi watch request or
25132  *		terminates an existing watch request. This routine is used in
25133  *		support of reservation reclaim.
25134  *
25135  *   Arguments: dev    - the device 'dev_t' is used for context to discriminate
25136  *			 among multiple watches that share the callback function
25137  *		interval - the number of microseconds specifying the watch
25138  *			   interval for issuing TEST UNIT READY commands. If
25139  *			   set to 0 the watch should be terminated. If the
25140  *			   interval is set to 0 and if the device is required
25141  *			   to hold reservation while disabling failfast, the
25142  *			   watch is restarted with an interval of
25143  *			   reinstate_resv_delay.
25144  *
25145  * Return Code: 0	   - Successful submit/terminate of scsi watch request
25146  *		ENXIO      - Indicates an invalid device was specified
25147  *		EAGAIN     - Unable to submit the scsi watch request
25148  */
25149 
25150 static int
25151 sd_check_mhd(dev_t dev, int interval)
25152 {
25153 	struct sd_lun	*un;
25154 	opaque_t	token;
25155 
25156 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
25157 		return (ENXIO);
25158 	}
25159 
25160 	/* is this a watch termination request? */
25161 	if (interval == 0) {
25162 		mutex_enter(SD_MUTEX(un));
25163 		/* if there is an existing watch task then terminate it */
25164 		if (un->un_mhd_token) {
25165 			token = un->un_mhd_token;
25166 			un->un_mhd_token = NULL;
25167 			mutex_exit(SD_MUTEX(un));
25168 			(void) scsi_watch_request_terminate(token,
25169 			    SCSI_WATCH_TERMINATE_WAIT);
25170 			mutex_enter(SD_MUTEX(un));
25171 		} else {
25172 			mutex_exit(SD_MUTEX(un));
25173 			/*
25174 			 * Note: If we return here we don't check for the
25175 			 * failfast case. This is the original legacy
25176 			 * implementation but perhaps we should be checking
25177 			 * the failfast case.
25178 			 */
25179 			return (0);
25180 		}
25181 		/*
25182 		 * If the device is required to hold reservation while
25183 		 * disabling failfast, we need to restart the scsi_watch
25184 		 * routine with an interval of reinstate_resv_delay.
25185 		 */
25186 		if (un->un_resvd_status & SD_RESERVE) {
25187 			interval = sd_reinstate_resv_delay/1000;
25188 		} else {
25189 			/* no failfast so bail */
25190 			mutex_exit(SD_MUTEX(un));
25191 			return (0);
25192 		}
25193 		mutex_exit(SD_MUTEX(un));
25194 	}
25195 
25196 	/*
25197 	 * adjust minimum time interval to 1 second,
25198 	 * and convert from msecs to usecs
25199 	 */
25200 	if (interval > 0 && interval < 1000) {
25201 		interval = 1000;
25202 	}
25203 	interval *= 1000;
25204 
25205 	/*
25206 	 * submit the request to the scsi_watch service
25207 	 */
25208 	token = scsi_watch_request_submit(SD_SCSI_DEVP(un), interval,
25209 	    SENSE_LENGTH, sd_mhd_watch_cb, (caddr_t)dev);
25210 	if (token == NULL) {
25211 		return (EAGAIN);
25212 	}
25213 
25214 	/*
25215 	 * save token for termination later on
25216 	 */
25217 	mutex_enter(SD_MUTEX(un));
25218 	un->un_mhd_token = token;
25219 	mutex_exit(SD_MUTEX(un));
25220 	return (0);
25221 }
25222 
25223 
25224 /*
25225  *    Function: sd_mhd_watch_cb()
25226  *
25227  * Description: This function is the call back function used by the scsi watch
25228  *		facility. The scsi watch facility sends the "Test Unit Ready"
25229  *		and processes the status. If applicable (i.e. a "Unit Attention"
25230  *		status and automatic "Request Sense" not used) the scsi watch
25231  *		facility will send a "Request Sense" and retrieve the sense data
25232  *		to be passed to this callback function. In either case the
25233  *		automatic "Request Sense" or the facility submitting one, this
25234  *		callback is passed the status and sense data.
25235  *
25236  *   Arguments: arg -   the device 'dev_t' is used for context to discriminate
25237  *			among multiple watches that share this callback function
25238  *		resultp - scsi watch facility result packet containing scsi
25239  *			  packet, status byte and sense data
25240  *
25241  * Return Code: 0 - continue the watch task
25242  *		non-zero - terminate the watch task
25243  */
25244 
25245 static int
25246 sd_mhd_watch_cb(caddr_t arg, struct scsi_watch_result *resultp)
25247 {
25248 	struct sd_lun			*un;
25249 	struct scsi_status		*statusp;
25250 	struct scsi_extended_sense	*sensep;
25251 	struct scsi_pkt			*pkt;
25252 	uchar_t				actual_sense_length;
25253 	dev_t  				dev = (dev_t)arg;
25254 
25255 	ASSERT(resultp != NULL);
25256 	statusp			= resultp->statusp;
25257 	sensep			= resultp->sensep;
25258 	pkt			= resultp->pkt;
25259 	actual_sense_length	= resultp->actual_sense_length;
25260 
25261 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
25262 		return (ENXIO);
25263 	}
25264 
25265 	SD_TRACE(SD_LOG_IOCTL_MHD, un,
25266 	    "sd_mhd_watch_cb: reason '%s', status '%s'\n",
25267 	    scsi_rname(pkt->pkt_reason), sd_sname(*((unsigned char *)statusp)));
25268 
25269 	/* Begin processing of the status and/or sense data */
25270 	if (pkt->pkt_reason != CMD_CMPLT) {
25271 		/* Handle the incomplete packet */
25272 		sd_mhd_watch_incomplete(un, pkt);
25273 		return (0);
25274 	} else if (*((unsigned char *)statusp) != STATUS_GOOD) {
25275 		if (*((unsigned char *)statusp)
25276 		    == STATUS_RESERVATION_CONFLICT) {
25277 			/*
25278 			 * Handle a reservation conflict by panicking if
25279 			 * configured for failfast or by logging the conflict
25280 			 * and updating the reservation status
25281 			 */
25282 			mutex_enter(SD_MUTEX(un));
25283 			if ((un->un_resvd_status & SD_FAILFAST) &&
25284 			    (sd_failfast_enable)) {
25285 				sd_panic_for_res_conflict(un);
25286 				/*NOTREACHED*/
25287 			}
25288 			SD_INFO(SD_LOG_IOCTL_MHD, un,
25289 			    "sd_mhd_watch_cb: Reservation Conflict\n");
25290 			un->un_resvd_status |= SD_RESERVATION_CONFLICT;
25291 			mutex_exit(SD_MUTEX(un));
25292 		}
25293 	}
25294 
25295 	if (sensep != NULL) {
25296 		if (actual_sense_length >= (SENSE_LENGTH - 2)) {
25297 			mutex_enter(SD_MUTEX(un));
25298 			if ((sensep->es_add_code == SD_SCSI_RESET_SENSE_CODE) &&
25299 			    (un->un_resvd_status & SD_RESERVE)) {
25300 				/*
25301 				 * The additional sense code indicates a power
25302 				 * on or bus device reset has occurred; update
25303 				 * the reservation status.
25304 				 */
25305 				un->un_resvd_status |=
25306 				    (SD_LOST_RESERVE | SD_WANT_RESERVE);
25307 				SD_INFO(SD_LOG_IOCTL_MHD, un,
25308 				    "sd_mhd_watch_cb: Lost Reservation\n");
25309 			}
25310 		} else {
25311 			return (0);
25312 		}
25313 	} else {
25314 		mutex_enter(SD_MUTEX(un));
25315 	}
25316 
25317 	if ((un->un_resvd_status & SD_RESERVE) &&
25318 	    (un->un_resvd_status & SD_LOST_RESERVE)) {
25319 		if (un->un_resvd_status & SD_WANT_RESERVE) {
25320 			/*
25321 			 * A reset occurred in between the last probe and this
25322 			 * one so if a timeout is pending cancel it.
25323 			 */
25324 			if (un->un_resvd_timeid) {
25325 				timeout_id_t temp_id = un->un_resvd_timeid;
25326 				un->un_resvd_timeid = NULL;
25327 				mutex_exit(SD_MUTEX(un));
25328 				(void) untimeout(temp_id);
25329 				mutex_enter(SD_MUTEX(un));
25330 			}
25331 			un->un_resvd_status &= ~SD_WANT_RESERVE;
25332 		}
25333 		if (un->un_resvd_timeid == 0) {
25334 			/* Schedule a timeout to handle the lost reservation */
25335 			un->un_resvd_timeid = timeout(sd_mhd_resvd_recover,
25336 			    (void *)dev,
25337 			    drv_usectohz(sd_reinstate_resv_delay));
25338 		}
25339 	}
25340 	mutex_exit(SD_MUTEX(un));
25341 	return (0);
25342 }
25343 
25344 
25345 /*
25346  *    Function: sd_mhd_watch_incomplete()
25347  *
25348  * Description: This function is used to find out why a scsi pkt sent by the
25349  *		scsi watch facility was not completed. Under some scenarios this
25350  *		routine will return. Otherwise it will send a bus reset to see
25351  *		if the drive is still online.
25352  *
25353  *   Arguments: un  - driver soft state (unit) structure
25354  *		pkt - incomplete scsi pkt
25355  */
25356 
25357 static void
25358 sd_mhd_watch_incomplete(struct sd_lun *un, struct scsi_pkt *pkt)
25359 {
25360 	int	be_chatty;
25361 	int	perr;
25362 
25363 	ASSERT(pkt != NULL);
25364 	ASSERT(un != NULL);
25365 	be_chatty	= (!(pkt->pkt_flags & FLAG_SILENT));
25366 	perr		= (pkt->pkt_statistics & STAT_PERR);
25367 
25368 	mutex_enter(SD_MUTEX(un));
25369 	if (un->un_state == SD_STATE_DUMPING) {
25370 		mutex_exit(SD_MUTEX(un));
25371 		return;
25372 	}
25373 
25374 	switch (pkt->pkt_reason) {
25375 	case CMD_UNX_BUS_FREE:
25376 		/*
25377 		 * If we had a parity error that caused the target to drop BSY*,
25378 		 * don't be chatty about it.
25379 		 */
25380 		if (perr && be_chatty) {
25381 			be_chatty = 0;
25382 		}
25383 		break;
25384 	case CMD_TAG_REJECT:
25385 		/*
25386 		 * The SCSI-2 spec states that a tag reject will be sent by the
25387 		 * target if tagged queuing is not supported. A tag reject may
25388 		 * also be sent during certain initialization periods or to
25389 		 * control internal resources. For the latter case the target
25390 		 * may also return Queue Full.
25391 		 *
25392 		 * If this driver receives a tag reject from a target that is
25393 		 * going through an init period or controlling internal
25394 		 * resources tagged queuing will be disabled. This is a less
25395 		 * than optimal behavior but the driver is unable to determine
25396 		 * the target state and assumes tagged queueing is not supported
25397 		 */
25398 		pkt->pkt_flags = 0;
25399 		un->un_tagflags = 0;
25400 
25401 		if (un->un_f_opt_queueing == TRUE) {
25402 			un->un_throttle = min(un->un_throttle, 3);
25403 		} else {
25404 			un->un_throttle = 1;
25405 		}
25406 		mutex_exit(SD_MUTEX(un));
25407 		(void) scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing", 0, 1);
25408 		mutex_enter(SD_MUTEX(un));
25409 		break;
25410 	case CMD_INCOMPLETE:
25411 		/*
25412 		 * The transport stopped with an abnormal state, fallthrough and
25413 		 * reset the target and/or bus unless selection did not complete
25414 		 * (indicated by STATE_GOT_BUS) in which case we don't want to
25415 		 * go through a target/bus reset
25416 		 */
25417 		if (pkt->pkt_state == STATE_GOT_BUS) {
25418 			break;
25419 		}
25420 		/*FALLTHROUGH*/
25421 
25422 	case CMD_TIMEOUT:
25423 	default:
25424 		/*
25425 		 * The lun may still be running the command, so a lun reset
25426 		 * should be attempted. If the lun reset fails or cannot be
25427 		 * issued, than try a target reset. Lastly try a bus reset.
25428 		 */
25429 		if ((pkt->pkt_statistics &
25430 		    (STAT_BUS_RESET|STAT_DEV_RESET|STAT_ABORTED)) == 0) {
25431 			int reset_retval = 0;
25432 			mutex_exit(SD_MUTEX(un));
25433 			if (un->un_f_allow_bus_device_reset == TRUE) {
25434 				if (un->un_f_lun_reset_enabled == TRUE) {
25435 					reset_retval =
25436 					    scsi_reset(SD_ADDRESS(un),
25437 					    RESET_LUN);
25438 				}
25439 				if (reset_retval == 0) {
25440 					reset_retval =
25441 					    scsi_reset(SD_ADDRESS(un),
25442 					    RESET_TARGET);
25443 				}
25444 			}
25445 			if (reset_retval == 0) {
25446 				(void) scsi_reset(SD_ADDRESS(un), RESET_ALL);
25447 			}
25448 			mutex_enter(SD_MUTEX(un));
25449 		}
25450 		break;
25451 	}
25452 
25453 	/* A device/bus reset has occurred; update the reservation status. */
25454 	if ((pkt->pkt_reason == CMD_RESET) || (pkt->pkt_statistics &
25455 	    (STAT_BUS_RESET | STAT_DEV_RESET))) {
25456 		if ((un->un_resvd_status & SD_RESERVE) == SD_RESERVE) {
25457 			un->un_resvd_status |=
25458 			    (SD_LOST_RESERVE | SD_WANT_RESERVE);
25459 			SD_INFO(SD_LOG_IOCTL_MHD, un,
25460 			    "sd_mhd_watch_incomplete: Lost Reservation\n");
25461 		}
25462 	}
25463 
25464 	/*
25465 	 * The disk has been turned off; Update the device state.
25466 	 *
25467 	 * Note: Should we be offlining the disk here?
25468 	 */
25469 	if (pkt->pkt_state == STATE_GOT_BUS) {
25470 		SD_INFO(SD_LOG_IOCTL_MHD, un, "sd_mhd_watch_incomplete: "
25471 		    "Disk not responding to selection\n");
25472 		if (un->un_state != SD_STATE_OFFLINE) {
25473 			New_state(un, SD_STATE_OFFLINE);
25474 		}
25475 	} else if (be_chatty) {
25476 		/*
25477 		 * suppress messages if they are all the same pkt reason;
25478 		 * with TQ, many (up to 256) are returned with the same
25479 		 * pkt_reason
25480 		 */
25481 		if (pkt->pkt_reason != un->un_last_pkt_reason) {
25482 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
25483 			    "sd_mhd_watch_incomplete: "
25484 			    "SCSI transport failed: reason '%s'\n",
25485 			    scsi_rname(pkt->pkt_reason));
25486 		}
25487 	}
25488 	un->un_last_pkt_reason = pkt->pkt_reason;
25489 	mutex_exit(SD_MUTEX(un));
25490 }
25491 
25492 
25493 /*
25494  *    Function: sd_sname()
25495  *
25496  * Description: This is a simple little routine to return a string containing
25497  *		a printable description of command status byte for use in
25498  *		logging.
25499  *
25500  *   Arguments: status - pointer to a status byte
25501  *
25502  * Return Code: char * - string containing status description.
25503  */
25504 
25505 static char *
25506 sd_sname(uchar_t status)
25507 {
25508 	switch (status & STATUS_MASK) {
25509 	case STATUS_GOOD:
25510 		return ("good status");
25511 	case STATUS_CHECK:
25512 		return ("check condition");
25513 	case STATUS_MET:
25514 		return ("condition met");
25515 	case STATUS_BUSY:
25516 		return ("busy");
25517 	case STATUS_INTERMEDIATE:
25518 		return ("intermediate");
25519 	case STATUS_INTERMEDIATE_MET:
25520 		return ("intermediate - condition met");
25521 	case STATUS_RESERVATION_CONFLICT:
25522 		return ("reservation_conflict");
25523 	case STATUS_TERMINATED:
25524 		return ("command terminated");
25525 	case STATUS_QFULL:
25526 		return ("queue full");
25527 	default:
25528 		return ("<unknown status>");
25529 	}
25530 }
25531 
25532 
25533 /*
25534  *    Function: sd_mhd_resvd_recover()
25535  *
25536  * Description: This function adds a reservation entry to the
25537  *		sd_resv_reclaim_request list and signals the reservation
25538  *		reclaim thread that there is work pending. If the reservation
25539  *		reclaim thread has not been previously created this function
25540  *		will kick it off.
25541  *
25542  *   Arguments: arg -   the device 'dev_t' is used for context to discriminate
25543  *			among multiple watches that share this callback function
25544  *
25545  *     Context: This routine is called by timeout() and is run in interrupt
25546  *		context. It must not sleep or call other functions which may
25547  *		sleep.
25548  */
25549 
25550 static void
25551 sd_mhd_resvd_recover(void *arg)
25552 {
25553 	dev_t			dev = (dev_t)arg;
25554 	struct sd_lun		*un;
25555 	struct sd_thr_request	*sd_treq = NULL;
25556 	struct sd_thr_request	*sd_cur = NULL;
25557 	struct sd_thr_request	*sd_prev = NULL;
25558 	int			already_there = 0;
25559 
25560 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
25561 		return;
25562 	}
25563 
25564 	mutex_enter(SD_MUTEX(un));
25565 	un->un_resvd_timeid = NULL;
25566 	if (un->un_resvd_status & SD_WANT_RESERVE) {
25567 		/*
25568 		 * There was a reset so don't issue the reserve, allow the
25569 		 * sd_mhd_watch_cb callback function to notice this and
25570 		 * reschedule the timeout for reservation.
25571 		 */
25572 		mutex_exit(SD_MUTEX(un));
25573 		return;
25574 	}
25575 	mutex_exit(SD_MUTEX(un));
25576 
25577 	/*
25578 	 * Add this device to the sd_resv_reclaim_request list and the
25579 	 * sd_resv_reclaim_thread should take care of the rest.
25580 	 *
25581 	 * Note: We can't sleep in this context so if the memory allocation
25582 	 * fails allow the sd_mhd_watch_cb callback function to notice this and
25583 	 * reschedule the timeout for reservation.  (4378460)
25584 	 */
25585 	sd_treq = (struct sd_thr_request *)
25586 	    kmem_zalloc(sizeof (struct sd_thr_request), KM_NOSLEEP);
25587 	if (sd_treq == NULL) {
25588 		return;
25589 	}
25590 
25591 	sd_treq->sd_thr_req_next = NULL;
25592 	sd_treq->dev = dev;
25593 	mutex_enter(&sd_tr.srq_resv_reclaim_mutex);
25594 	if (sd_tr.srq_thr_req_head == NULL) {
25595 		sd_tr.srq_thr_req_head = sd_treq;
25596 	} else {
25597 		sd_cur = sd_prev = sd_tr.srq_thr_req_head;
25598 		for (; sd_cur != NULL; sd_cur = sd_cur->sd_thr_req_next) {
25599 			if (sd_cur->dev == dev) {
25600 				/*
25601 				 * already in Queue so don't log
25602 				 * another request for the device
25603 				 */
25604 				already_there = 1;
25605 				break;
25606 			}
25607 			sd_prev = sd_cur;
25608 		}
25609 		if (!already_there) {
25610 			SD_INFO(SD_LOG_IOCTL_MHD, un, "sd_mhd_resvd_recover: "
25611 			    "logging request for %lx\n", dev);
25612 			sd_prev->sd_thr_req_next = sd_treq;
25613 		} else {
25614 			kmem_free(sd_treq, sizeof (struct sd_thr_request));
25615 		}
25616 	}
25617 
25618 	/*
25619 	 * Create a kernel thread to do the reservation reclaim and free up this
25620 	 * thread. We cannot block this thread while we go away to do the
25621 	 * reservation reclaim
25622 	 */
25623 	if (sd_tr.srq_resv_reclaim_thread == NULL)
25624 		sd_tr.srq_resv_reclaim_thread = thread_create(NULL, 0,
25625 		    sd_resv_reclaim_thread, NULL,
25626 		    0, &p0, TS_RUN, v.v_maxsyspri - 2);
25627 
25628 	/* Tell the reservation reclaim thread that it has work to do */
25629 	cv_signal(&sd_tr.srq_resv_reclaim_cv);
25630 	mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
25631 }
25632 
25633 /*
25634  *    Function: sd_resv_reclaim_thread()
25635  *
25636  * Description: This function implements the reservation reclaim operations
25637  *
25638  *   Arguments: arg - the device 'dev_t' is used for context to discriminate
25639  *		      among multiple watches that share this callback function
25640  */
25641 
25642 static void
25643 sd_resv_reclaim_thread()
25644 {
25645 	struct sd_lun		*un;
25646 	struct sd_thr_request	*sd_mhreq;
25647 
25648 	/* Wait for work */
25649 	mutex_enter(&sd_tr.srq_resv_reclaim_mutex);
25650 	if (sd_tr.srq_thr_req_head == NULL) {
25651 		cv_wait(&sd_tr.srq_resv_reclaim_cv,
25652 		    &sd_tr.srq_resv_reclaim_mutex);
25653 	}
25654 
25655 	/* Loop while we have work */
25656 	while ((sd_tr.srq_thr_cur_req = sd_tr.srq_thr_req_head) != NULL) {
25657 		un = ddi_get_soft_state(sd_state,
25658 		    SDUNIT(sd_tr.srq_thr_cur_req->dev));
25659 		if (un == NULL) {
25660 			/*
25661 			 * softstate structure is NULL so just
25662 			 * dequeue the request and continue
25663 			 */
25664 			sd_tr.srq_thr_req_head =
25665 			    sd_tr.srq_thr_cur_req->sd_thr_req_next;
25666 			kmem_free(sd_tr.srq_thr_cur_req,
25667 			    sizeof (struct sd_thr_request));
25668 			continue;
25669 		}
25670 
25671 		/* dequeue the request */
25672 		sd_mhreq = sd_tr.srq_thr_cur_req;
25673 		sd_tr.srq_thr_req_head =
25674 		    sd_tr.srq_thr_cur_req->sd_thr_req_next;
25675 		mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
25676 
25677 		/*
25678 		 * Reclaim reservation only if SD_RESERVE is still set. There
25679 		 * may have been a call to MHIOCRELEASE before we got here.
25680 		 */
25681 		mutex_enter(SD_MUTEX(un));
25682 		if ((un->un_resvd_status & SD_RESERVE) == SD_RESERVE) {
25683 			/*
25684 			 * Note: The SD_LOST_RESERVE flag is cleared before
25685 			 * reclaiming the reservation. If this is done after the
25686 			 * call to sd_reserve_release a reservation loss in the
25687 			 * window between pkt completion of reserve cmd and
25688 			 * mutex_enter below may not be recognized
25689 			 */
25690 			un->un_resvd_status &= ~SD_LOST_RESERVE;
25691 			mutex_exit(SD_MUTEX(un));
25692 
25693 			if (sd_reserve_release(sd_mhreq->dev,
25694 			    SD_RESERVE) == 0) {
25695 				mutex_enter(SD_MUTEX(un));
25696 				un->un_resvd_status |= SD_RESERVE;
25697 				mutex_exit(SD_MUTEX(un));
25698 				SD_INFO(SD_LOG_IOCTL_MHD, un,
25699 				    "sd_resv_reclaim_thread: "
25700 				    "Reservation Recovered\n");
25701 			} else {
25702 				mutex_enter(SD_MUTEX(un));
25703 				un->un_resvd_status |= SD_LOST_RESERVE;
25704 				mutex_exit(SD_MUTEX(un));
25705 				SD_INFO(SD_LOG_IOCTL_MHD, un,
25706 				    "sd_resv_reclaim_thread: Failed "
25707 				    "Reservation Recovery\n");
25708 			}
25709 		} else {
25710 			mutex_exit(SD_MUTEX(un));
25711 		}
25712 		mutex_enter(&sd_tr.srq_resv_reclaim_mutex);
25713 		ASSERT(sd_mhreq == sd_tr.srq_thr_cur_req);
25714 		kmem_free(sd_mhreq, sizeof (struct sd_thr_request));
25715 		sd_mhreq = sd_tr.srq_thr_cur_req = NULL;
25716 		/*
25717 		 * wakeup the destroy thread if anyone is waiting on
25718 		 * us to complete.
25719 		 */
25720 		cv_signal(&sd_tr.srq_inprocess_cv);
25721 		SD_TRACE(SD_LOG_IOCTL_MHD, un,
25722 		    "sd_resv_reclaim_thread: cv_signalling current request \n");
25723 	}
25724 
25725 	/*
25726 	 * cleanup the sd_tr structure now that this thread will not exist
25727 	 */
25728 	ASSERT(sd_tr.srq_thr_req_head == NULL);
25729 	ASSERT(sd_tr.srq_thr_cur_req == NULL);
25730 	sd_tr.srq_resv_reclaim_thread = NULL;
25731 	mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
25732 	thread_exit();
25733 }
25734 
25735 
25736 /*
25737  *    Function: sd_rmv_resv_reclaim_req()
25738  *
25739  * Description: This function removes any pending reservation reclaim requests
25740  *		for the specified device.
25741  *
25742  *   Arguments: dev - the device 'dev_t'
25743  */
25744 
25745 static void
25746 sd_rmv_resv_reclaim_req(dev_t dev)
25747 {
25748 	struct sd_thr_request *sd_mhreq;
25749 	struct sd_thr_request *sd_prev;
25750 
25751 	/* Remove a reservation reclaim request from the list */
25752 	mutex_enter(&sd_tr.srq_resv_reclaim_mutex);
25753 	if (sd_tr.srq_thr_cur_req && sd_tr.srq_thr_cur_req->dev == dev) {
25754 		/*
25755 		 * We are attempting to reinstate reservation for
25756 		 * this device. We wait for sd_reserve_release()
25757 		 * to return before we return.
25758 		 */
25759 		cv_wait(&sd_tr.srq_inprocess_cv,
25760 		    &sd_tr.srq_resv_reclaim_mutex);
25761 	} else {
25762 		sd_prev = sd_mhreq = sd_tr.srq_thr_req_head;
25763 		if (sd_mhreq && sd_mhreq->dev == dev) {
25764 			sd_tr.srq_thr_req_head = sd_mhreq->sd_thr_req_next;
25765 			kmem_free(sd_mhreq, sizeof (struct sd_thr_request));
25766 			mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
25767 			return;
25768 		}
25769 		for (; sd_mhreq != NULL; sd_mhreq = sd_mhreq->sd_thr_req_next) {
25770 			if (sd_mhreq && sd_mhreq->dev == dev) {
25771 				break;
25772 			}
25773 			sd_prev = sd_mhreq;
25774 		}
25775 		if (sd_mhreq != NULL) {
25776 			sd_prev->sd_thr_req_next = sd_mhreq->sd_thr_req_next;
25777 			kmem_free(sd_mhreq, sizeof (struct sd_thr_request));
25778 		}
25779 	}
25780 	mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
25781 }
25782 
25783 
25784 /*
25785  *    Function: sd_mhd_reset_notify_cb()
25786  *
25787  * Description: This is a call back function for scsi_reset_notify. This
25788  *		function updates the softstate reserved status and logs the
25789  *		reset. The driver scsi watch facility callback function
25790  *		(sd_mhd_watch_cb) and reservation reclaim thread functionality
25791  *		will reclaim the reservation.
25792  *
25793  *   Arguments: arg  - driver soft state (unit) structure
25794  */
25795 
25796 static void
25797 sd_mhd_reset_notify_cb(caddr_t arg)
25798 {
25799 	struct sd_lun *un = (struct sd_lun *)arg;
25800 
25801 	mutex_enter(SD_MUTEX(un));
25802 	if ((un->un_resvd_status & SD_RESERVE) == SD_RESERVE) {
25803 		un->un_resvd_status |= (SD_LOST_RESERVE | SD_WANT_RESERVE);
25804 		SD_INFO(SD_LOG_IOCTL_MHD, un,
25805 		    "sd_mhd_reset_notify_cb: Lost Reservation\n");
25806 	}
25807 	mutex_exit(SD_MUTEX(un));
25808 }
25809 
25810 
25811 /*
25812  *    Function: sd_take_ownership()
25813  *
25814  * Description: This routine implements an algorithm to achieve a stable
25815  *		reservation on disks which don't implement priority reserve,
25816  *		and makes sure that other host lose re-reservation attempts.
25817  *		This algorithm contains of a loop that keeps issuing the RESERVE
25818  *		for some period of time (min_ownership_delay, default 6 seconds)
25819  *		During that loop, it looks to see if there has been a bus device
25820  *		reset or bus reset (both of which cause an existing reservation
25821  *		to be lost). If the reservation is lost issue RESERVE until a
25822  *		period of min_ownership_delay with no resets has gone by, or
25823  *		until max_ownership_delay has expired. This loop ensures that
25824  *		the host really did manage to reserve the device, in spite of
25825  *		resets. The looping for min_ownership_delay (default six
25826  *		seconds) is important to early generation clustering products,
25827  *		Solstice HA 1.x and Sun Cluster 2.x. Those products use an
25828  *		MHIOCENFAILFAST periodic timer of two seconds. By having
25829  *		MHIOCTKOWN issue Reserves in a loop for six seconds, and having
25830  *		MHIOCENFAILFAST poll every two seconds, the idea is that by the
25831  *		time the MHIOCTKOWN ioctl returns, the other host (if any) will
25832  *		have already noticed, via the MHIOCENFAILFAST polling, that it
25833  *		no longer "owns" the disk and will have panicked itself.  Thus,
25834  *		the host issuing the MHIOCTKOWN is assured (with timing
25835  *		dependencies) that by the time it actually starts to use the
25836  *		disk for real work, the old owner is no longer accessing it.
25837  *
25838  *		min_ownership_delay is the minimum amount of time for which the
25839  *		disk must be reserved continuously devoid of resets before the
25840  *		MHIOCTKOWN ioctl will return success.
25841  *
25842  *		max_ownership_delay indicates the amount of time by which the
25843  *		take ownership should succeed or timeout with an error.
25844  *
25845  *   Arguments: dev - the device 'dev_t'
25846  *		*p  - struct containing timing info.
25847  *
25848  * Return Code: 0 for success or error code
25849  */
25850 
25851 static int
25852 sd_take_ownership(dev_t dev, struct mhioctkown *p)
25853 {
25854 	struct sd_lun	*un;
25855 	int		rval;
25856 	int		err;
25857 	int		reservation_count   = 0;
25858 	int		min_ownership_delay =  6000000; /* in usec */
25859 	int		max_ownership_delay = 30000000; /* in usec */
25860 	clock_t		start_time;	/* starting time of this algorithm */
25861 	clock_t		end_time;	/* time limit for giving up */
25862 	clock_t		ownership_time;	/* time limit for stable ownership */
25863 	clock_t		current_time;
25864 	clock_t		previous_current_time;
25865 
25866 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
25867 		return (ENXIO);
25868 	}
25869 
25870 	/*
25871 	 * Attempt a device reservation. A priority reservation is requested.
25872 	 */
25873 	if ((rval = sd_reserve_release(dev, SD_PRIORITY_RESERVE))
25874 	    != SD_SUCCESS) {
25875 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
25876 		    "sd_take_ownership: return(1)=%d\n", rval);
25877 		return (rval);
25878 	}
25879 
25880 	/* Update the softstate reserved status to indicate the reservation */
25881 	mutex_enter(SD_MUTEX(un));
25882 	un->un_resvd_status |= SD_RESERVE;
25883 	un->un_resvd_status &=
25884 	    ~(SD_LOST_RESERVE | SD_WANT_RESERVE | SD_RESERVATION_CONFLICT);
25885 	mutex_exit(SD_MUTEX(un));
25886 
25887 	if (p != NULL) {
25888 		if (p->min_ownership_delay != 0) {
25889 			min_ownership_delay = p->min_ownership_delay * 1000;
25890 		}
25891 		if (p->max_ownership_delay != 0) {
25892 			max_ownership_delay = p->max_ownership_delay * 1000;
25893 		}
25894 	}
25895 	SD_INFO(SD_LOG_IOCTL_MHD, un,
25896 	    "sd_take_ownership: min, max delays: %d, %d\n",
25897 	    min_ownership_delay, max_ownership_delay);
25898 
25899 	start_time = ddi_get_lbolt();
25900 	current_time	= start_time;
25901 	ownership_time	= current_time + drv_usectohz(min_ownership_delay);
25902 	end_time	= start_time + drv_usectohz(max_ownership_delay);
25903 
25904 	while (current_time - end_time < 0) {
25905 		delay(drv_usectohz(500000));
25906 
25907 		if ((err = sd_reserve_release(dev, SD_RESERVE)) != 0) {
25908 			if ((sd_reserve_release(dev, SD_RESERVE)) != 0) {
25909 				mutex_enter(SD_MUTEX(un));
25910 				rval = (un->un_resvd_status &
25911 				    SD_RESERVATION_CONFLICT) ? EACCES : EIO;
25912 				mutex_exit(SD_MUTEX(un));
25913 				break;
25914 			}
25915 		}
25916 		previous_current_time = current_time;
25917 		current_time = ddi_get_lbolt();
25918 		mutex_enter(SD_MUTEX(un));
25919 		if (err || (un->un_resvd_status & SD_LOST_RESERVE)) {
25920 			ownership_time = ddi_get_lbolt() +
25921 			    drv_usectohz(min_ownership_delay);
25922 			reservation_count = 0;
25923 		} else {
25924 			reservation_count++;
25925 		}
25926 		un->un_resvd_status |= SD_RESERVE;
25927 		un->un_resvd_status &= ~(SD_LOST_RESERVE | SD_WANT_RESERVE);
25928 		mutex_exit(SD_MUTEX(un));
25929 
25930 		SD_INFO(SD_LOG_IOCTL_MHD, un,
25931 		    "sd_take_ownership: ticks for loop iteration=%ld, "
25932 		    "reservation=%s\n", (current_time - previous_current_time),
25933 		    reservation_count ? "ok" : "reclaimed");
25934 
25935 		if (current_time - ownership_time >= 0 &&
25936 		    reservation_count >= 4) {
25937 			rval = 0; /* Achieved a stable ownership */
25938 			break;
25939 		}
25940 		if (current_time - end_time >= 0) {
25941 			rval = EACCES; /* No ownership in max possible time */
25942 			break;
25943 		}
25944 	}
25945 	SD_TRACE(SD_LOG_IOCTL_MHD, un,
25946 	    "sd_take_ownership: return(2)=%d\n", rval);
25947 	return (rval);
25948 }
25949 
25950 
25951 /*
25952  *    Function: sd_reserve_release()
25953  *
25954  * Description: This function builds and sends scsi RESERVE, RELEASE, and
25955  *		PRIORITY RESERVE commands based on a user specified command type
25956  *
25957  *   Arguments: dev - the device 'dev_t'
25958  *		cmd - user specified command type; one of SD_PRIORITY_RESERVE,
25959  *		      SD_RESERVE, SD_RELEASE
25960  *
25961  * Return Code: 0 or Error Code
25962  */
25963 
25964 static int
25965 sd_reserve_release(dev_t dev, int cmd)
25966 {
25967 	struct uscsi_cmd	*com = NULL;
25968 	struct sd_lun		*un = NULL;
25969 	char			cdb[CDB_GROUP0];
25970 	int			rval;
25971 
25972 	ASSERT((cmd == SD_RELEASE) || (cmd == SD_RESERVE) ||
25973 	    (cmd == SD_PRIORITY_RESERVE));
25974 
25975 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
25976 		return (ENXIO);
25977 	}
25978 
25979 	/* instantiate and initialize the command and cdb */
25980 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
25981 	bzero(cdb, CDB_GROUP0);
25982 	com->uscsi_flags   = USCSI_SILENT;
25983 	com->uscsi_timeout = un->un_reserve_release_time;
25984 	com->uscsi_cdblen  = CDB_GROUP0;
25985 	com->uscsi_cdb	   = cdb;
25986 	if (cmd == SD_RELEASE) {
25987 		cdb[0] = SCMD_RELEASE;
25988 	} else {
25989 		cdb[0] = SCMD_RESERVE;
25990 	}
25991 
25992 	/* Send the command. */
25993 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
25994 	    UIO_SYSSPACE, SD_PATH_STANDARD);
25995 
25996 	/*
25997 	 * "break" a reservation that is held by another host, by issuing a
25998 	 * reset if priority reserve is desired, and we could not get the
25999 	 * device.
26000 	 */
26001 	if ((cmd == SD_PRIORITY_RESERVE) &&
26002 	    (rval != 0) && (com->uscsi_status == STATUS_RESERVATION_CONFLICT)) {
26003 		/*
26004 		 * First try to reset the LUN. If we cannot, then try a target
26005 		 * reset, followed by a bus reset if the target reset fails.
26006 		 */
26007 		int reset_retval = 0;
26008 		if (un->un_f_lun_reset_enabled == TRUE) {
26009 			reset_retval = scsi_reset(SD_ADDRESS(un), RESET_LUN);
26010 		}
26011 		if (reset_retval == 0) {
26012 			/* The LUN reset either failed or was not issued */
26013 			reset_retval = scsi_reset(SD_ADDRESS(un), RESET_TARGET);
26014 		}
26015 		if ((reset_retval == 0) &&
26016 		    (scsi_reset(SD_ADDRESS(un), RESET_ALL) == 0)) {
26017 			rval = EIO;
26018 			kmem_free(com, sizeof (*com));
26019 			return (rval);
26020 		}
26021 
26022 		bzero(com, sizeof (struct uscsi_cmd));
26023 		com->uscsi_flags   = USCSI_SILENT;
26024 		com->uscsi_cdb	   = cdb;
26025 		com->uscsi_cdblen  = CDB_GROUP0;
26026 		com->uscsi_timeout = 5;
26027 
26028 		/*
26029 		 * Reissue the last reserve command, this time without request
26030 		 * sense.  Assume that it is just a regular reserve command.
26031 		 */
26032 		rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
26033 		    UIO_SYSSPACE, SD_PATH_STANDARD);
26034 	}
26035 
26036 	/* Return an error if still getting a reservation conflict. */
26037 	if ((rval != 0) && (com->uscsi_status == STATUS_RESERVATION_CONFLICT)) {
26038 		rval = EACCES;
26039 	}
26040 
26041 	kmem_free(com, sizeof (*com));
26042 	return (rval);
26043 }
26044 
26045 
26046 #define	SD_NDUMP_RETRIES	12
26047 /*
26048  *	System Crash Dump routine
26049  */
26050 
26051 static int
26052 sddump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk)
26053 {
26054 	int		instance;
26055 	int		partition;
26056 	int		i;
26057 	int		err;
26058 	struct sd_lun	*un;
26059 	struct dk_map	*lp;
26060 	struct scsi_pkt *wr_pktp;
26061 	struct buf	*wr_bp;
26062 	struct buf	wr_buf;
26063 	daddr_t		tgt_byte_offset; /* rmw - byte offset for target */
26064 	daddr_t		tgt_blkno;	/* rmw - blkno for target */
26065 	size_t		tgt_byte_count; /* rmw -  # of bytes to xfer */
26066 	size_t		tgt_nblk; /* rmw -  # of tgt blks to xfer */
26067 	size_t		io_start_offset;
26068 	int		doing_rmw = FALSE;
26069 	int		rval;
26070 #if defined(__i386) || defined(__amd64)
26071 	ssize_t dma_resid;
26072 	daddr_t oblkno;
26073 #endif
26074 
26075 	instance = SDUNIT(dev);
26076 	if (((un = ddi_get_soft_state(sd_state, instance)) == NULL) ||
26077 	    (!un->un_f_geometry_is_valid) || ISCD(un)) {
26078 		return (ENXIO);
26079 	}
26080 
26081 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*un))
26082 
26083 	SD_TRACE(SD_LOG_DUMP, un, "sddump: entry\n");
26084 
26085 	partition = SDPART(dev);
26086 	SD_INFO(SD_LOG_DUMP, un, "sddump: partition = %d\n", partition);
26087 
26088 	/* Validate blocks to dump at against partition size. */
26089 	lp = &un->un_map[partition];
26090 	if ((blkno + nblk) > lp->dkl_nblk) {
26091 		SD_TRACE(SD_LOG_DUMP, un,
26092 		    "sddump: dump range larger than partition: "
26093 		    "blkno = 0x%x, nblk = 0x%x, dkl_nblk = 0x%x\n",
26094 		    blkno, nblk, lp->dkl_nblk);
26095 		return (EINVAL);
26096 	}
26097 
26098 	mutex_enter(&un->un_pm_mutex);
26099 	if (SD_DEVICE_IS_IN_LOW_POWER(un)) {
26100 		struct scsi_pkt *start_pktp;
26101 
26102 		mutex_exit(&un->un_pm_mutex);
26103 
26104 		/*
26105 		 * use pm framework to power on HBA 1st
26106 		 */
26107 		(void) pm_raise_power(SD_DEVINFO(un), 0, SD_SPINDLE_ON);
26108 
26109 		/*
26110 		 * Dump no long uses sdpower to power on a device, it's
26111 		 * in-line here so it can be done in polled mode.
26112 		 */
26113 
26114 		SD_INFO(SD_LOG_DUMP, un, "sddump: starting device\n");
26115 
26116 		start_pktp = scsi_init_pkt(SD_ADDRESS(un), NULL, NULL,
26117 		    CDB_GROUP0, un->un_status_len, 0, 0, NULL_FUNC, NULL);
26118 
26119 		if (start_pktp == NULL) {
26120 			/* We were not given a SCSI packet, fail. */
26121 			return (EIO);
26122 		}
26123 		bzero(start_pktp->pkt_cdbp, CDB_GROUP0);
26124 		start_pktp->pkt_cdbp[0] = SCMD_START_STOP;
26125 		start_pktp->pkt_cdbp[4] = SD_TARGET_START;
26126 		start_pktp->pkt_flags = FLAG_NOINTR;
26127 
26128 		mutex_enter(SD_MUTEX(un));
26129 		SD_FILL_SCSI1_LUN(un, start_pktp);
26130 		mutex_exit(SD_MUTEX(un));
26131 		/*
26132 		 * Scsi_poll returns 0 (success) if the command completes and
26133 		 * the status block is STATUS_GOOD.
26134 		 */
26135 		if (sd_scsi_poll(un, start_pktp) != 0) {
26136 			scsi_destroy_pkt(start_pktp);
26137 			return (EIO);
26138 		}
26139 		scsi_destroy_pkt(start_pktp);
26140 		(void) sd_ddi_pm_resume(un);
26141 	} else {
26142 		mutex_exit(&un->un_pm_mutex);
26143 	}
26144 
26145 	mutex_enter(SD_MUTEX(un));
26146 	un->un_throttle = 0;
26147 
26148 	/*
26149 	 * The first time through, reset the specific target device.
26150 	 * However, when cpr calls sddump we know that sd is in a
26151 	 * a good state so no bus reset is required.
26152 	 * Clear sense data via Request Sense cmd.
26153 	 * In sddump we don't care about allow_bus_device_reset anymore
26154 	 */
26155 
26156 	if ((un->un_state != SD_STATE_SUSPENDED) &&
26157 	    (un->un_state != SD_STATE_DUMPING)) {
26158 
26159 		New_state(un, SD_STATE_DUMPING);
26160 
26161 		if (un->un_f_is_fibre == FALSE) {
26162 			mutex_exit(SD_MUTEX(un));
26163 			/*
26164 			 * Attempt a bus reset for parallel scsi.
26165 			 *
26166 			 * Note: A bus reset is required because on some host
26167 			 * systems (i.e. E420R) a bus device reset is
26168 			 * insufficient to reset the state of the target.
26169 			 *
26170 			 * Note: Don't issue the reset for fibre-channel,
26171 			 * because this tends to hang the bus (loop) for
26172 			 * too long while everyone is logging out and in
26173 			 * and the deadman timer for dumping will fire
26174 			 * before the dump is complete.
26175 			 */
26176 			if (scsi_reset(SD_ADDRESS(un), RESET_ALL) == 0) {
26177 				mutex_enter(SD_MUTEX(un));
26178 				Restore_state(un);
26179 				mutex_exit(SD_MUTEX(un));
26180 				return (EIO);
26181 			}
26182 
26183 			/* Delay to give the device some recovery time. */
26184 			drv_usecwait(10000);
26185 
26186 			if (sd_send_polled_RQS(un) == SD_FAILURE) {
26187 				SD_INFO(SD_LOG_DUMP, un,
26188 					"sddump: sd_send_polled_RQS failed\n");
26189 			}
26190 			mutex_enter(SD_MUTEX(un));
26191 		}
26192 	}
26193 
26194 	/*
26195 	 * Convert the partition-relative block number to a
26196 	 * disk physical block number.
26197 	 */
26198 	blkno += un->un_offset[partition];
26199 	SD_INFO(SD_LOG_DUMP, un, "sddump: disk blkno = 0x%x\n", blkno);
26200 
26201 
26202 	/*
26203 	 * Check if the device has a non-512 block size.
26204 	 */
26205 	wr_bp = NULL;
26206 	if (NOT_DEVBSIZE(un)) {
26207 		tgt_byte_offset = blkno * un->un_sys_blocksize;
26208 		tgt_byte_count = nblk * un->un_sys_blocksize;
26209 		if ((tgt_byte_offset % un->un_tgt_blocksize) ||
26210 		    (tgt_byte_count % un->un_tgt_blocksize)) {
26211 			doing_rmw = TRUE;
26212 			/*
26213 			 * Calculate the block number and number of block
26214 			 * in terms of the media block size.
26215 			 */
26216 			tgt_blkno = tgt_byte_offset / un->un_tgt_blocksize;
26217 			tgt_nblk =
26218 			    ((tgt_byte_offset + tgt_byte_count +
26219 				(un->un_tgt_blocksize - 1)) /
26220 				un->un_tgt_blocksize) - tgt_blkno;
26221 
26222 			/*
26223 			 * Invoke the routine which is going to do read part
26224 			 * of read-modify-write.
26225 			 * Note that this routine returns a pointer to
26226 			 * a valid bp in wr_bp.
26227 			 */
26228 			err = sddump_do_read_of_rmw(un, tgt_blkno, tgt_nblk,
26229 			    &wr_bp);
26230 			if (err) {
26231 				mutex_exit(SD_MUTEX(un));
26232 				return (err);
26233 			}
26234 			/*
26235 			 * Offset is being calculated as -
26236 			 * (original block # * system block size) -
26237 			 * (new block # * target block size)
26238 			 */
26239 			io_start_offset =
26240 			    ((uint64_t)(blkno * un->un_sys_blocksize)) -
26241 			    ((uint64_t)(tgt_blkno * un->un_tgt_blocksize));
26242 
26243 			ASSERT((io_start_offset >= 0) &&
26244 			    (io_start_offset < un->un_tgt_blocksize));
26245 			/*
26246 			 * Do the modify portion of read modify write.
26247 			 */
26248 			bcopy(addr, &wr_bp->b_un.b_addr[io_start_offset],
26249 			    (size_t)nblk * un->un_sys_blocksize);
26250 		} else {
26251 			doing_rmw = FALSE;
26252 			tgt_blkno = tgt_byte_offset / un->un_tgt_blocksize;
26253 			tgt_nblk = tgt_byte_count / un->un_tgt_blocksize;
26254 		}
26255 
26256 		/* Convert blkno and nblk to target blocks */
26257 		blkno = tgt_blkno;
26258 		nblk = tgt_nblk;
26259 	} else {
26260 		wr_bp = &wr_buf;
26261 		bzero(wr_bp, sizeof (struct buf));
26262 		wr_bp->b_flags		= B_BUSY;
26263 		wr_bp->b_un.b_addr	= addr;
26264 		wr_bp->b_bcount		= nblk << DEV_BSHIFT;
26265 		wr_bp->b_resid		= 0;
26266 	}
26267 
26268 	mutex_exit(SD_MUTEX(un));
26269 
26270 	/*
26271 	 * Obtain a SCSI packet for the write command.
26272 	 * It should be safe to call the allocator here without
26273 	 * worrying about being locked for DVMA mapping because
26274 	 * the address we're passed is already a DVMA mapping
26275 	 *
26276 	 * We are also not going to worry about semaphore ownership
26277 	 * in the dump buffer. Dumping is single threaded at present.
26278 	 */
26279 
26280 	wr_pktp = NULL;
26281 
26282 #if defined(__i386) || defined(__amd64)
26283 	dma_resid = wr_bp->b_bcount;
26284 	oblkno = blkno;
26285 	while (dma_resid != 0) {
26286 #endif
26287 
26288 	for (i = 0; i < SD_NDUMP_RETRIES; i++) {
26289 		wr_bp->b_flags &= ~B_ERROR;
26290 
26291 #if defined(__i386) || defined(__amd64)
26292 		blkno = oblkno +
26293 			((wr_bp->b_bcount - dma_resid) /
26294 			    un->un_tgt_blocksize);
26295 		nblk = dma_resid / un->un_tgt_blocksize;
26296 
26297 		if (wr_pktp) {
26298 			/* Partial DMA transfers after initial transfer */
26299 			rval = sd_setup_next_rw_pkt(un, wr_pktp, wr_bp,
26300 			    blkno, nblk);
26301 		} else {
26302 			/* Initial transfer */
26303 			rval = sd_setup_rw_pkt(un, &wr_pktp, wr_bp,
26304 			    un->un_pkt_flags, NULL_FUNC, NULL,
26305 			    blkno, nblk);
26306 		}
26307 #else
26308 		rval = sd_setup_rw_pkt(un, &wr_pktp, wr_bp,
26309 		    0, NULL_FUNC, NULL, blkno, nblk);
26310 #endif
26311 
26312 		if (rval == 0) {
26313 			/* We were given a SCSI packet, continue. */
26314 			break;
26315 		}
26316 
26317 		if (i == 0) {
26318 			if (wr_bp->b_flags & B_ERROR) {
26319 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
26320 				    "no resources for dumping; "
26321 				    "error code: 0x%x, retrying",
26322 				    geterror(wr_bp));
26323 			} else {
26324 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
26325 				    "no resources for dumping; retrying");
26326 			}
26327 		} else if (i != (SD_NDUMP_RETRIES - 1)) {
26328 			if (wr_bp->b_flags & B_ERROR) {
26329 				scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
26330 				    "no resources for dumping; error code: "
26331 				    "0x%x, retrying\n", geterror(wr_bp));
26332 			}
26333 		} else {
26334 			if (wr_bp->b_flags & B_ERROR) {
26335 				scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
26336 				    "no resources for dumping; "
26337 				    "error code: 0x%x, retries failed, "
26338 				    "giving up.\n", geterror(wr_bp));
26339 			} else {
26340 				scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
26341 				    "no resources for dumping; "
26342 				    "retries failed, giving up.\n");
26343 			}
26344 			mutex_enter(SD_MUTEX(un));
26345 			Restore_state(un);
26346 			if (NOT_DEVBSIZE(un) && (doing_rmw == TRUE)) {
26347 				mutex_exit(SD_MUTEX(un));
26348 				scsi_free_consistent_buf(wr_bp);
26349 			} else {
26350 				mutex_exit(SD_MUTEX(un));
26351 			}
26352 			return (EIO);
26353 		}
26354 		drv_usecwait(10000);
26355 	}
26356 
26357 #if defined(__i386) || defined(__amd64)
26358 	/*
26359 	 * save the resid from PARTIAL_DMA
26360 	 */
26361 	dma_resid = wr_pktp->pkt_resid;
26362 	if (dma_resid != 0)
26363 		nblk -= SD_BYTES2TGTBLOCKS(un, dma_resid);
26364 	wr_pktp->pkt_resid = 0;
26365 #endif
26366 
26367 	/* SunBug 1222170 */
26368 	wr_pktp->pkt_flags = FLAG_NOINTR;
26369 
26370 	err = EIO;
26371 	for (i = 0; i < SD_NDUMP_RETRIES; i++) {
26372 
26373 		/*
26374 		 * Scsi_poll returns 0 (success) if the command completes and
26375 		 * the status block is STATUS_GOOD.  We should only check
26376 		 * errors if this condition is not true.  Even then we should
26377 		 * send our own request sense packet only if we have a check
26378 		 * condition and auto request sense has not been performed by
26379 		 * the hba.
26380 		 */
26381 		SD_TRACE(SD_LOG_DUMP, un, "sddump: sending write\n");
26382 
26383 		if ((sd_scsi_poll(un, wr_pktp) == 0) &&
26384 		    (wr_pktp->pkt_resid == 0)) {
26385 			err = SD_SUCCESS;
26386 			break;
26387 		}
26388 
26389 		/*
26390 		 * Check CMD_DEV_GONE 1st, give up if device is gone.
26391 		 */
26392 		if (wr_pktp->pkt_reason == CMD_DEV_GONE) {
26393 			scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
26394 			    "Device is gone\n");
26395 			break;
26396 		}
26397 
26398 		if (SD_GET_PKT_STATUS(wr_pktp) == STATUS_CHECK) {
26399 			SD_INFO(SD_LOG_DUMP, un,
26400 			    "sddump: write failed with CHECK, try # %d\n", i);
26401 			if (((wr_pktp->pkt_state & STATE_ARQ_DONE) == 0)) {
26402 				(void) sd_send_polled_RQS(un);
26403 			}
26404 
26405 			continue;
26406 		}
26407 
26408 		if (SD_GET_PKT_STATUS(wr_pktp) == STATUS_BUSY) {
26409 			int reset_retval = 0;
26410 
26411 			SD_INFO(SD_LOG_DUMP, un,
26412 			    "sddump: write failed with BUSY, try # %d\n", i);
26413 
26414 			if (un->un_f_lun_reset_enabled == TRUE) {
26415 				reset_retval = scsi_reset(SD_ADDRESS(un),
26416 				    RESET_LUN);
26417 			}
26418 			if (reset_retval == 0) {
26419 				(void) scsi_reset(SD_ADDRESS(un), RESET_TARGET);
26420 			}
26421 			(void) sd_send_polled_RQS(un);
26422 
26423 		} else {
26424 			SD_INFO(SD_LOG_DUMP, un,
26425 			    "sddump: write failed with 0x%x, try # %d\n",
26426 			    SD_GET_PKT_STATUS(wr_pktp), i);
26427 			mutex_enter(SD_MUTEX(un));
26428 			sd_reset_target(un, wr_pktp);
26429 			mutex_exit(SD_MUTEX(un));
26430 		}
26431 
26432 		/*
26433 		 * If we are not getting anywhere with lun/target resets,
26434 		 * let's reset the bus.
26435 		 */
26436 		if (i == SD_NDUMP_RETRIES/2) {
26437 			(void) scsi_reset(SD_ADDRESS(un), RESET_ALL);
26438 			(void) sd_send_polled_RQS(un);
26439 		}
26440 
26441 	}
26442 #if defined(__i386) || defined(__amd64)
26443 	}	/* dma_resid */
26444 #endif
26445 
26446 	scsi_destroy_pkt(wr_pktp);
26447 	mutex_enter(SD_MUTEX(un));
26448 	if ((NOT_DEVBSIZE(un)) && (doing_rmw == TRUE)) {
26449 		mutex_exit(SD_MUTEX(un));
26450 		scsi_free_consistent_buf(wr_bp);
26451 	} else {
26452 		mutex_exit(SD_MUTEX(un));
26453 	}
26454 	SD_TRACE(SD_LOG_DUMP, un, "sddump: exit: err = %d\n", err);
26455 	return (err);
26456 }
26457 
26458 /*
26459  *    Function: sd_scsi_poll()
26460  *
26461  * Description: This is a wrapper for the scsi_poll call.
26462  *
26463  *   Arguments: sd_lun - The unit structure
26464  *              scsi_pkt - The scsi packet being sent to the device.
26465  *
26466  * Return Code: 0 - Command completed successfully with good status
26467  *             -1 - Command failed.  This could indicate a check condition
26468  *                  or other status value requiring recovery action.
26469  *
26470  */
26471 
26472 static int
26473 sd_scsi_poll(struct sd_lun *un, struct scsi_pkt *pktp)
26474 {
26475 	int status;
26476 
26477 	ASSERT(un != NULL);
26478 	ASSERT(!mutex_owned(SD_MUTEX(un)));
26479 	ASSERT(pktp != NULL);
26480 
26481 	status = SD_SUCCESS;
26482 
26483 	if (scsi_ifgetcap(&pktp->pkt_address, "tagged-qing", 1) == 1) {
26484 		pktp->pkt_flags |= un->un_tagflags;
26485 		pktp->pkt_flags &= ~FLAG_NODISCON;
26486 	}
26487 
26488 	status = sd_ddi_scsi_poll(pktp);
26489 	/*
26490 	 * Scsi_poll returns 0 (success) if the command completes and the
26491 	 * status block is STATUS_GOOD.  We should only check errors if this
26492 	 * condition is not true.  Even then we should send our own request
26493 	 * sense packet only if we have a check condition and auto
26494 	 * request sense has not been performed by the hba.
26495 	 * Don't get RQS data if pkt_reason is CMD_DEV_GONE.
26496 	 */
26497 	if ((status != SD_SUCCESS) &&
26498 	    (SD_GET_PKT_STATUS(pktp) == STATUS_CHECK) &&
26499 	    (pktp->pkt_state & STATE_ARQ_DONE) == 0 &&
26500 	    (pktp->pkt_reason != CMD_DEV_GONE))
26501 		(void) sd_send_polled_RQS(un);
26502 
26503 	return (status);
26504 }
26505 
26506 /*
26507  *    Function: sd_send_polled_RQS()
26508  *
26509  * Description: This sends the request sense command to a device.
26510  *
26511  *   Arguments: sd_lun - The unit structure
26512  *
26513  * Return Code: 0 - Command completed successfully with good status
26514  *             -1 - Command failed.
26515  *
26516  */
26517 
26518 static int
26519 sd_send_polled_RQS(struct sd_lun *un)
26520 {
26521 	int	ret_val;
26522 	struct	scsi_pkt	*rqs_pktp;
26523 	struct	buf		*rqs_bp;
26524 
26525 	ASSERT(un != NULL);
26526 	ASSERT(!mutex_owned(SD_MUTEX(un)));
26527 
26528 	ret_val = SD_SUCCESS;
26529 
26530 	rqs_pktp = un->un_rqs_pktp;
26531 	rqs_bp	 = un->un_rqs_bp;
26532 
26533 	mutex_enter(SD_MUTEX(un));
26534 
26535 	if (un->un_sense_isbusy) {
26536 		ret_val = SD_FAILURE;
26537 		mutex_exit(SD_MUTEX(un));
26538 		return (ret_val);
26539 	}
26540 
26541 	/*
26542 	 * If the request sense buffer (and packet) is not in use,
26543 	 * let's set the un_sense_isbusy and send our packet
26544 	 */
26545 	un->un_sense_isbusy 	= 1;
26546 	rqs_pktp->pkt_resid  	= 0;
26547 	rqs_pktp->pkt_reason 	= 0;
26548 	rqs_pktp->pkt_flags |= FLAG_NOINTR;
26549 	bzero(rqs_bp->b_un.b_addr, SENSE_LENGTH);
26550 
26551 	mutex_exit(SD_MUTEX(un));
26552 
26553 	SD_INFO(SD_LOG_COMMON, un, "sd_send_polled_RQS: req sense buf at"
26554 	    " 0x%p\n", rqs_bp->b_un.b_addr);
26555 
26556 	/*
26557 	 * Can't send this to sd_scsi_poll, we wrap ourselves around the
26558 	 * axle - it has a call into us!
26559 	 */
26560 	if ((ret_val = sd_ddi_scsi_poll(rqs_pktp)) != 0) {
26561 		SD_INFO(SD_LOG_COMMON, un,
26562 		    "sd_send_polled_RQS: RQS failed\n");
26563 	}
26564 
26565 	SD_DUMP_MEMORY(un, SD_LOG_COMMON, "sd_send_polled_RQS:",
26566 	    (uchar_t *)rqs_bp->b_un.b_addr, SENSE_LENGTH, SD_LOG_HEX);
26567 
26568 	mutex_enter(SD_MUTEX(un));
26569 	un->un_sense_isbusy = 0;
26570 	mutex_exit(SD_MUTEX(un));
26571 
26572 	return (ret_val);
26573 }
26574 
26575 /*
26576  * Defines needed for localized version of the scsi_poll routine.
26577  */
26578 #define	SD_CSEC		10000			/* usecs */
26579 #define	SD_SEC_TO_CSEC	(1000000/SD_CSEC)
26580 
26581 
26582 /*
26583  *    Function: sd_ddi_scsi_poll()
26584  *
26585  * Description: Localized version of the scsi_poll routine.  The purpose is to
26586  *		send a scsi_pkt to a device as a polled command.  This version
26587  *		is to ensure more robust handling of transport errors.
26588  *		Specifically this routine cures not ready, coming ready
26589  *		transition for power up and reset of sonoma's.  This can take
26590  *		up to 45 seconds for power-on and 20 seconds for reset of a
26591  * 		sonoma lun.
26592  *
26593  *   Arguments: scsi_pkt - The scsi_pkt being sent to a device
26594  *
26595  * Return Code: 0 - Command completed successfully with good status
26596  *             -1 - Command failed.
26597  *
26598  */
26599 
26600 static int
26601 sd_ddi_scsi_poll(struct scsi_pkt *pkt)
26602 {
26603 	int busy_count;
26604 	int timeout;
26605 	int rval = SD_FAILURE;
26606 	int savef;
26607 	struct scsi_extended_sense *sensep;
26608 	long savet;
26609 	void (*savec)();
26610 	/*
26611 	 * The following is defined in machdep.c and is used in determining if
26612 	 * the scsi transport system will do polled I/O instead of interrupt
26613 	 * I/O when called from xx_dump().
26614 	 */
26615 	extern int do_polled_io;
26616 
26617 	/*
26618 	 * save old flags in pkt, to restore at end
26619 	 */
26620 	savef = pkt->pkt_flags;
26621 	savec = pkt->pkt_comp;
26622 	savet = pkt->pkt_time;
26623 
26624 	pkt->pkt_flags |= FLAG_NOINTR;
26625 
26626 	/*
26627 	 * XXX there is nothing in the SCSA spec that states that we should not
26628 	 * do a callback for polled cmds; however, removing this will break sd
26629 	 * and probably other target drivers
26630 	 */
26631 	pkt->pkt_comp = NULL;
26632 
26633 	/*
26634 	 * we don't like a polled command without timeout.
26635 	 * 60 seconds seems long enough.
26636 	 */
26637 	if (pkt->pkt_time == 0) {
26638 		pkt->pkt_time = SCSI_POLL_TIMEOUT;
26639 	}
26640 
26641 	/*
26642 	 * Send polled cmd.
26643 	 *
26644 	 * We do some error recovery for various errors.  Tran_busy,
26645 	 * queue full, and non-dispatched commands are retried every 10 msec.
26646 	 * as they are typically transient failures.  Busy status and Not
26647 	 * Ready are retried every second as this status takes a while to
26648 	 * change.  Unit attention is retried for pkt_time (60) times
26649 	 * with no delay.
26650 	 */
26651 	timeout = pkt->pkt_time * SD_SEC_TO_CSEC;
26652 
26653 	for (busy_count = 0; busy_count < timeout; busy_count++) {
26654 		int rc;
26655 		int poll_delay;
26656 
26657 		/*
26658 		 * Initialize pkt status variables.
26659 		 */
26660 		*pkt->pkt_scbp = pkt->pkt_reason = pkt->pkt_state = 0;
26661 
26662 		if ((rc = scsi_transport(pkt)) != TRAN_ACCEPT) {
26663 			if (rc != TRAN_BUSY) {
26664 				/* Transport failed - give up. */
26665 				break;
26666 			} else {
26667 				/* Transport busy - try again. */
26668 				poll_delay = 1 * SD_CSEC; /* 10 msec */
26669 			}
26670 		} else {
26671 			/*
26672 			 * Transport accepted - check pkt status.
26673 			 */
26674 			rc = (*pkt->pkt_scbp) & STATUS_MASK;
26675 			if (pkt->pkt_reason == CMD_CMPLT &&
26676 			    rc == STATUS_CHECK &&
26677 			    pkt->pkt_state & STATE_ARQ_DONE) {
26678 				struct scsi_arq_status *arqstat =
26679 				    (struct scsi_arq_status *)(pkt->pkt_scbp);
26680 
26681 				sensep = &arqstat->sts_sensedata;
26682 			} else {
26683 				sensep = NULL;
26684 			}
26685 
26686 			if ((pkt->pkt_reason == CMD_CMPLT) &&
26687 			    (rc == STATUS_GOOD)) {
26688 				/* No error - we're done */
26689 				rval = SD_SUCCESS;
26690 				break;
26691 
26692 			} else if (pkt->pkt_reason == CMD_DEV_GONE) {
26693 				/* Lost connection - give up */
26694 				break;
26695 
26696 			} else if ((pkt->pkt_reason == CMD_INCOMPLETE) &&
26697 			    (pkt->pkt_state == 0)) {
26698 				/* Pkt not dispatched - try again. */
26699 				poll_delay = 1 * SD_CSEC; /* 10 msec. */
26700 
26701 			} else if ((pkt->pkt_reason == CMD_CMPLT) &&
26702 			    (rc == STATUS_QFULL)) {
26703 				/* Queue full - try again. */
26704 				poll_delay = 1 * SD_CSEC; /* 10 msec. */
26705 
26706 			} else if ((pkt->pkt_reason == CMD_CMPLT) &&
26707 			    (rc == STATUS_BUSY)) {
26708 				/* Busy - try again. */
26709 				poll_delay = 100 * SD_CSEC; /* 1 sec. */
26710 				busy_count += (SD_SEC_TO_CSEC - 1);
26711 
26712 			} else if ((sensep != NULL) &&
26713 			    (sensep->es_key == KEY_UNIT_ATTENTION)) {
26714 				/* Unit Attention - try again */
26715 				busy_count += (SD_SEC_TO_CSEC - 1); /* 1 */
26716 				continue;
26717 
26718 			} else if ((sensep != NULL) &&
26719 			    (sensep->es_key == KEY_NOT_READY) &&
26720 			    (sensep->es_add_code == 0x04) &&
26721 			    (sensep->es_qual_code == 0x01)) {
26722 				/* Not ready -> ready - try again. */
26723 				poll_delay = 100 * SD_CSEC; /* 1 sec. */
26724 				busy_count += (SD_SEC_TO_CSEC - 1);
26725 
26726 			} else {
26727 				/* BAD status - give up. */
26728 				break;
26729 			}
26730 		}
26731 
26732 		if ((curthread->t_flag & T_INTR_THREAD) == 0 &&
26733 		    !do_polled_io) {
26734 			delay(drv_usectohz(poll_delay));
26735 		} else {
26736 			/* we busy wait during cpr_dump or interrupt threads */
26737 			drv_usecwait(poll_delay);
26738 		}
26739 	}
26740 
26741 	pkt->pkt_flags = savef;
26742 	pkt->pkt_comp = savec;
26743 	pkt->pkt_time = savet;
26744 	return (rval);
26745 }
26746 
26747 
26748 /*
26749  *    Function: sd_persistent_reservation_in_read_keys
26750  *
26751  * Description: This routine is the driver entry point for handling CD-ROM
26752  *		multi-host persistent reservation requests (MHIOCGRP_INKEYS)
26753  *		by sending the SCSI-3 PRIN commands to the device.
26754  *		Processes the read keys command response by copying the
26755  *		reservation key information into the user provided buffer.
26756  *		Support for the 32/64 bit _MULTI_DATAMODEL is implemented.
26757  *
26758  *   Arguments: un   -  Pointer to soft state struct for the target.
26759  *		usrp -	user provided pointer to multihost Persistent In Read
26760  *			Keys structure (mhioc_inkeys_t)
26761  *		flag -	this argument is a pass through to ddi_copyxxx()
26762  *			directly from the mode argument of ioctl().
26763  *
26764  * Return Code: 0   - Success
26765  *		EACCES
26766  *		ENOTSUP
26767  *		errno return code from sd_send_scsi_cmd()
26768  *
26769  *     Context: Can sleep. Does not return until command is completed.
26770  */
26771 
26772 static int
26773 sd_persistent_reservation_in_read_keys(struct sd_lun *un,
26774     mhioc_inkeys_t *usrp, int flag)
26775 {
26776 #ifdef _MULTI_DATAMODEL
26777 	struct mhioc_key_list32	li32;
26778 #endif
26779 	sd_prin_readkeys_t	*in;
26780 	mhioc_inkeys_t		*ptr;
26781 	mhioc_key_list_t	li;
26782 	uchar_t			*data_bufp;
26783 	int 			data_len;
26784 	int			rval;
26785 	size_t			copysz;
26786 
26787 	if ((ptr = (mhioc_inkeys_t *)usrp) == NULL) {
26788 		return (EINVAL);
26789 	}
26790 	bzero(&li, sizeof (mhioc_key_list_t));
26791 
26792 	/*
26793 	 * Get the listsize from user
26794 	 */
26795 #ifdef _MULTI_DATAMODEL
26796 
26797 	switch (ddi_model_convert_from(flag & FMODELS)) {
26798 	case DDI_MODEL_ILP32:
26799 		copysz = sizeof (struct mhioc_key_list32);
26800 		if (ddi_copyin(ptr->li, &li32, copysz, flag)) {
26801 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
26802 			    "sd_persistent_reservation_in_read_keys: "
26803 			    "failed ddi_copyin: mhioc_key_list32_t\n");
26804 			rval = EFAULT;
26805 			goto done;
26806 		}
26807 		li.listsize = li32.listsize;
26808 		li.list = (mhioc_resv_key_t *)(uintptr_t)li32.list;
26809 		break;
26810 
26811 	case DDI_MODEL_NONE:
26812 		copysz = sizeof (mhioc_key_list_t);
26813 		if (ddi_copyin(ptr->li, &li, copysz, flag)) {
26814 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
26815 			    "sd_persistent_reservation_in_read_keys: "
26816 			    "failed ddi_copyin: mhioc_key_list_t\n");
26817 			rval = EFAULT;
26818 			goto done;
26819 		}
26820 		break;
26821 	}
26822 
26823 #else /* ! _MULTI_DATAMODEL */
26824 	copysz = sizeof (mhioc_key_list_t);
26825 	if (ddi_copyin(ptr->li, &li, copysz, flag)) {
26826 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
26827 		    "sd_persistent_reservation_in_read_keys: "
26828 		    "failed ddi_copyin: mhioc_key_list_t\n");
26829 		rval = EFAULT;
26830 		goto done;
26831 	}
26832 #endif
26833 
26834 	data_len  = li.listsize * MHIOC_RESV_KEY_SIZE;
26835 	data_len += (sizeof (sd_prin_readkeys_t) - sizeof (caddr_t));
26836 	data_bufp = kmem_zalloc(data_len, KM_SLEEP);
26837 
26838 	if ((rval = sd_send_scsi_PERSISTENT_RESERVE_IN(un, SD_READ_KEYS,
26839 	    data_len, data_bufp)) != 0) {
26840 		goto done;
26841 	}
26842 	in = (sd_prin_readkeys_t *)data_bufp;
26843 	ptr->generation = BE_32(in->generation);
26844 	li.listlen = BE_32(in->len) / MHIOC_RESV_KEY_SIZE;
26845 
26846 	/*
26847 	 * Return the min(listsize, listlen) keys
26848 	 */
26849 #ifdef _MULTI_DATAMODEL
26850 
26851 	switch (ddi_model_convert_from(flag & FMODELS)) {
26852 	case DDI_MODEL_ILP32:
26853 		li32.listlen = li.listlen;
26854 		if (ddi_copyout(&li32, ptr->li, copysz, flag)) {
26855 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
26856 			    "sd_persistent_reservation_in_read_keys: "
26857 			    "failed ddi_copyout: mhioc_key_list32_t\n");
26858 			rval = EFAULT;
26859 			goto done;
26860 		}
26861 		break;
26862 
26863 	case DDI_MODEL_NONE:
26864 		if (ddi_copyout(&li, ptr->li, copysz, flag)) {
26865 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
26866 			    "sd_persistent_reservation_in_read_keys: "
26867 			    "failed ddi_copyout: mhioc_key_list_t\n");
26868 			rval = EFAULT;
26869 			goto done;
26870 		}
26871 		break;
26872 	}
26873 
26874 #else /* ! _MULTI_DATAMODEL */
26875 
26876 	if (ddi_copyout(&li, ptr->li, copysz, flag)) {
26877 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
26878 		    "sd_persistent_reservation_in_read_keys: "
26879 		    "failed ddi_copyout: mhioc_key_list_t\n");
26880 		rval = EFAULT;
26881 		goto done;
26882 	}
26883 
26884 #endif /* _MULTI_DATAMODEL */
26885 
26886 	copysz = min(li.listlen * MHIOC_RESV_KEY_SIZE,
26887 	    li.listsize * MHIOC_RESV_KEY_SIZE);
26888 	if (ddi_copyout(&in->keylist, li.list, copysz, flag)) {
26889 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
26890 		    "sd_persistent_reservation_in_read_keys: "
26891 		    "failed ddi_copyout: keylist\n");
26892 		rval = EFAULT;
26893 	}
26894 done:
26895 	kmem_free(data_bufp, data_len);
26896 	return (rval);
26897 }
26898 
26899 
26900 /*
26901  *    Function: sd_persistent_reservation_in_read_resv
26902  *
26903  * Description: This routine is the driver entry point for handling CD-ROM
26904  *		multi-host persistent reservation requests (MHIOCGRP_INRESV)
26905  *		by sending the SCSI-3 PRIN commands to the device.
26906  *		Process the read persistent reservations command response by
26907  *		copying the reservation information into the user provided
26908  *		buffer. Support for the 32/64 _MULTI_DATAMODEL is implemented.
26909  *
26910  *   Arguments: un   -  Pointer to soft state struct for the target.
26911  *		usrp -	user provided pointer to multihost Persistent In Read
26912  *			Keys structure (mhioc_inkeys_t)
26913  *		flag -	this argument is a pass through to ddi_copyxxx()
26914  *			directly from the mode argument of ioctl().
26915  *
26916  * Return Code: 0   - Success
26917  *		EACCES
26918  *		ENOTSUP
26919  *		errno return code from sd_send_scsi_cmd()
26920  *
26921  *     Context: Can sleep. Does not return until command is completed.
26922  */
26923 
26924 static int
26925 sd_persistent_reservation_in_read_resv(struct sd_lun *un,
26926     mhioc_inresvs_t *usrp, int flag)
26927 {
26928 #ifdef _MULTI_DATAMODEL
26929 	struct mhioc_resv_desc_list32 resvlist32;
26930 #endif
26931 	sd_prin_readresv_t	*in;
26932 	mhioc_inresvs_t		*ptr;
26933 	sd_readresv_desc_t	*readresv_ptr;
26934 	mhioc_resv_desc_list_t	resvlist;
26935 	mhioc_resv_desc_t 	resvdesc;
26936 	uchar_t			*data_bufp;
26937 	int 			data_len;
26938 	int			rval;
26939 	int			i;
26940 	size_t			copysz;
26941 	mhioc_resv_desc_t	*bufp;
26942 
26943 	if ((ptr = usrp) == NULL) {
26944 		return (EINVAL);
26945 	}
26946 
26947 	/*
26948 	 * Get the listsize from user
26949 	 */
26950 #ifdef _MULTI_DATAMODEL
26951 	switch (ddi_model_convert_from(flag & FMODELS)) {
26952 	case DDI_MODEL_ILP32:
26953 		copysz = sizeof (struct mhioc_resv_desc_list32);
26954 		if (ddi_copyin(ptr->li, &resvlist32, copysz, flag)) {
26955 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
26956 			    "sd_persistent_reservation_in_read_resv: "
26957 			    "failed ddi_copyin: mhioc_resv_desc_list_t\n");
26958 			rval = EFAULT;
26959 			goto done;
26960 		}
26961 		resvlist.listsize = resvlist32.listsize;
26962 		resvlist.list = (mhioc_resv_desc_t *)(uintptr_t)resvlist32.list;
26963 		break;
26964 
26965 	case DDI_MODEL_NONE:
26966 		copysz = sizeof (mhioc_resv_desc_list_t);
26967 		if (ddi_copyin(ptr->li, &resvlist, copysz, flag)) {
26968 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
26969 			    "sd_persistent_reservation_in_read_resv: "
26970 			    "failed ddi_copyin: mhioc_resv_desc_list_t\n");
26971 			rval = EFAULT;
26972 			goto done;
26973 		}
26974 		break;
26975 	}
26976 #else /* ! _MULTI_DATAMODEL */
26977 	copysz = sizeof (mhioc_resv_desc_list_t);
26978 	if (ddi_copyin(ptr->li, &resvlist, copysz, flag)) {
26979 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
26980 		    "sd_persistent_reservation_in_read_resv: "
26981 		    "failed ddi_copyin: mhioc_resv_desc_list_t\n");
26982 		rval = EFAULT;
26983 		goto done;
26984 	}
26985 #endif /* ! _MULTI_DATAMODEL */
26986 
26987 	data_len  = resvlist.listsize * SCSI3_RESV_DESC_LEN;
26988 	data_len += (sizeof (sd_prin_readresv_t) - sizeof (caddr_t));
26989 	data_bufp = kmem_zalloc(data_len, KM_SLEEP);
26990 
26991 	if ((rval = sd_send_scsi_PERSISTENT_RESERVE_IN(un, SD_READ_RESV,
26992 	    data_len, data_bufp)) != 0) {
26993 		goto done;
26994 	}
26995 	in = (sd_prin_readresv_t *)data_bufp;
26996 	ptr->generation = BE_32(in->generation);
26997 	resvlist.listlen = BE_32(in->len) / SCSI3_RESV_DESC_LEN;
26998 
26999 	/*
27000 	 * Return the min(listsize, listlen( keys
27001 	 */
27002 #ifdef _MULTI_DATAMODEL
27003 
27004 	switch (ddi_model_convert_from(flag & FMODELS)) {
27005 	case DDI_MODEL_ILP32:
27006 		resvlist32.listlen = resvlist.listlen;
27007 		if (ddi_copyout(&resvlist32, ptr->li, copysz, flag)) {
27008 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
27009 			    "sd_persistent_reservation_in_read_resv: "
27010 			    "failed ddi_copyout: mhioc_resv_desc_list_t\n");
27011 			rval = EFAULT;
27012 			goto done;
27013 		}
27014 		break;
27015 
27016 	case DDI_MODEL_NONE:
27017 		if (ddi_copyout(&resvlist, ptr->li, copysz, flag)) {
27018 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
27019 			    "sd_persistent_reservation_in_read_resv: "
27020 			    "failed ddi_copyout: mhioc_resv_desc_list_t\n");
27021 			rval = EFAULT;
27022 			goto done;
27023 		}
27024 		break;
27025 	}
27026 
27027 #else /* ! _MULTI_DATAMODEL */
27028 
27029 	if (ddi_copyout(&resvlist, ptr->li, copysz, flag)) {
27030 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
27031 		    "sd_persistent_reservation_in_read_resv: "
27032 		    "failed ddi_copyout: mhioc_resv_desc_list_t\n");
27033 		rval = EFAULT;
27034 		goto done;
27035 	}
27036 
27037 #endif /* ! _MULTI_DATAMODEL */
27038 
27039 	readresv_ptr = (sd_readresv_desc_t *)&in->readresv_desc;
27040 	bufp = resvlist.list;
27041 	copysz = sizeof (mhioc_resv_desc_t);
27042 	for (i = 0; i < min(resvlist.listlen, resvlist.listsize);
27043 	    i++, readresv_ptr++, bufp++) {
27044 
27045 		bcopy(&readresv_ptr->resvkey, &resvdesc.key,
27046 		    MHIOC_RESV_KEY_SIZE);
27047 		resvdesc.type  = readresv_ptr->type;
27048 		resvdesc.scope = readresv_ptr->scope;
27049 		resvdesc.scope_specific_addr =
27050 		    BE_32(readresv_ptr->scope_specific_addr);
27051 
27052 		if (ddi_copyout(&resvdesc, bufp, copysz, flag)) {
27053 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
27054 			    "sd_persistent_reservation_in_read_resv: "
27055 			    "failed ddi_copyout: resvlist\n");
27056 			rval = EFAULT;
27057 			goto done;
27058 		}
27059 	}
27060 done:
27061 	kmem_free(data_bufp, data_len);
27062 	return (rval);
27063 }
27064 
27065 
27066 /*
27067  *    Function: sr_change_blkmode()
27068  *
27069  * Description: This routine is the driver entry point for handling CD-ROM
27070  *		block mode ioctl requests. Support for returning and changing
27071  *		the current block size in use by the device is implemented. The
27072  *		LBA size is changed via a MODE SELECT Block Descriptor.
27073  *
27074  *		This routine issues a mode sense with an allocation length of
27075  *		12 bytes for the mode page header and a single block descriptor.
27076  *
27077  *   Arguments: dev - the device 'dev_t'
27078  *		cmd - the request type; one of CDROMGBLKMODE (get) or
27079  *		      CDROMSBLKMODE (set)
27080  *		data - current block size or requested block size
27081  *		flag - this argument is a pass through to ddi_copyxxx() directly
27082  *		       from the mode argument of ioctl().
27083  *
27084  * Return Code: the code returned by sd_send_scsi_cmd()
27085  *		EINVAL if invalid arguments are provided
27086  *		EFAULT if ddi_copyxxx() fails
27087  *		ENXIO if fail ddi_get_soft_state
27088  *		EIO if invalid mode sense block descriptor length
27089  *
27090  */
27091 
27092 static int
27093 sr_change_blkmode(dev_t dev, int cmd, intptr_t data, int flag)
27094 {
27095 	struct sd_lun			*un = NULL;
27096 	struct mode_header		*sense_mhp, *select_mhp;
27097 	struct block_descriptor		*sense_desc, *select_desc;
27098 	int				current_bsize;
27099 	int				rval = EINVAL;
27100 	uchar_t				*sense = NULL;
27101 	uchar_t				*select = NULL;
27102 
27103 	ASSERT((cmd == CDROMGBLKMODE) || (cmd == CDROMSBLKMODE));
27104 
27105 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
27106 		return (ENXIO);
27107 	}
27108 
27109 	/*
27110 	 * The block length is changed via the Mode Select block descriptor, the
27111 	 * "Read/Write Error Recovery" mode page (0x1) contents are not actually
27112 	 * required as part of this routine. Therefore the mode sense allocation
27113 	 * length is specified to be the length of a mode page header and a
27114 	 * block descriptor.
27115 	 */
27116 	sense = kmem_zalloc(BUFLEN_CHG_BLK_MODE, KM_SLEEP);
27117 
27118 	if ((rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, sense,
27119 	    BUFLEN_CHG_BLK_MODE, MODEPAGE_ERR_RECOV, SD_PATH_STANDARD)) != 0) {
27120 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27121 		    "sr_change_blkmode: Mode Sense Failed\n");
27122 		kmem_free(sense, BUFLEN_CHG_BLK_MODE);
27123 		return (rval);
27124 	}
27125 
27126 	/* Check the block descriptor len to handle only 1 block descriptor */
27127 	sense_mhp = (struct mode_header *)sense;
27128 	if ((sense_mhp->bdesc_length == 0) ||
27129 	    (sense_mhp->bdesc_length > MODE_BLK_DESC_LENGTH)) {
27130 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27131 		    "sr_change_blkmode: Mode Sense returned invalid block"
27132 		    " descriptor length\n");
27133 		kmem_free(sense, BUFLEN_CHG_BLK_MODE);
27134 		return (EIO);
27135 	}
27136 	sense_desc = (struct block_descriptor *)(sense + MODE_HEADER_LENGTH);
27137 	current_bsize = ((sense_desc->blksize_hi << 16) |
27138 	    (sense_desc->blksize_mid << 8) | sense_desc->blksize_lo);
27139 
27140 	/* Process command */
27141 	switch (cmd) {
27142 	case CDROMGBLKMODE:
27143 		/* Return the block size obtained during the mode sense */
27144 		if (ddi_copyout(&current_bsize, (void *)data,
27145 		    sizeof (int), flag) != 0)
27146 			rval = EFAULT;
27147 		break;
27148 	case CDROMSBLKMODE:
27149 		/* Validate the requested block size */
27150 		switch (data) {
27151 		case CDROM_BLK_512:
27152 		case CDROM_BLK_1024:
27153 		case CDROM_BLK_2048:
27154 		case CDROM_BLK_2056:
27155 		case CDROM_BLK_2336:
27156 		case CDROM_BLK_2340:
27157 		case CDROM_BLK_2352:
27158 		case CDROM_BLK_2368:
27159 		case CDROM_BLK_2448:
27160 		case CDROM_BLK_2646:
27161 		case CDROM_BLK_2647:
27162 			break;
27163 		default:
27164 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27165 			    "sr_change_blkmode: "
27166 			    "Block Size '%ld' Not Supported\n", data);
27167 			kmem_free(sense, BUFLEN_CHG_BLK_MODE);
27168 			return (EINVAL);
27169 		}
27170 
27171 		/*
27172 		 * The current block size matches the requested block size so
27173 		 * there is no need to send the mode select to change the size
27174 		 */
27175 		if (current_bsize == data) {
27176 			break;
27177 		}
27178 
27179 		/* Build the select data for the requested block size */
27180 		select = kmem_zalloc(BUFLEN_CHG_BLK_MODE, KM_SLEEP);
27181 		select_mhp = (struct mode_header *)select;
27182 		select_desc =
27183 		    (struct block_descriptor *)(select + MODE_HEADER_LENGTH);
27184 		/*
27185 		 * The LBA size is changed via the block descriptor, so the
27186 		 * descriptor is built according to the user data
27187 		 */
27188 		select_mhp->bdesc_length = MODE_BLK_DESC_LENGTH;
27189 		select_desc->blksize_hi  = (char)(((data) & 0x00ff0000) >> 16);
27190 		select_desc->blksize_mid = (char)(((data) & 0x0000ff00) >> 8);
27191 		select_desc->blksize_lo  = (char)((data) & 0x000000ff);
27192 
27193 		/* Send the mode select for the requested block size */
27194 		if ((rval = sd_send_scsi_MODE_SELECT(un, CDB_GROUP0,
27195 		    select, BUFLEN_CHG_BLK_MODE, SD_DONTSAVE_PAGE,
27196 		    SD_PATH_STANDARD)) != 0) {
27197 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27198 			    "sr_change_blkmode: Mode Select Failed\n");
27199 			/*
27200 			 * The mode select failed for the requested block size,
27201 			 * so reset the data for the original block size and
27202 			 * send it to the target. The error is indicated by the
27203 			 * return value for the failed mode select.
27204 			 */
27205 			select_desc->blksize_hi  = sense_desc->blksize_hi;
27206 			select_desc->blksize_mid = sense_desc->blksize_mid;
27207 			select_desc->blksize_lo  = sense_desc->blksize_lo;
27208 			(void) sd_send_scsi_MODE_SELECT(un, CDB_GROUP0,
27209 			    select, BUFLEN_CHG_BLK_MODE, SD_DONTSAVE_PAGE,
27210 			    SD_PATH_STANDARD);
27211 		} else {
27212 			ASSERT(!mutex_owned(SD_MUTEX(un)));
27213 			mutex_enter(SD_MUTEX(un));
27214 			sd_update_block_info(un, (uint32_t)data, 0);
27215 
27216 			mutex_exit(SD_MUTEX(un));
27217 		}
27218 		break;
27219 	default:
27220 		/* should not reach here, but check anyway */
27221 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27222 		    "sr_change_blkmode: Command '%x' Not Supported\n", cmd);
27223 		rval = EINVAL;
27224 		break;
27225 	}
27226 
27227 	if (select) {
27228 		kmem_free(select, BUFLEN_CHG_BLK_MODE);
27229 	}
27230 	if (sense) {
27231 		kmem_free(sense, BUFLEN_CHG_BLK_MODE);
27232 	}
27233 	return (rval);
27234 }
27235 
27236 
27237 /*
27238  * Note: The following sr_change_speed() and sr_atapi_change_speed() routines
27239  * implement driver support for getting and setting the CD speed. The command
27240  * set used will be based on the device type. If the device has not been
27241  * identified as MMC the Toshiba vendor specific mode page will be used. If
27242  * the device is MMC but does not support the Real Time Streaming feature
27243  * the SET CD SPEED command will be used to set speed and mode page 0x2A will
27244  * be used to read the speed.
27245  */
27246 
27247 /*
27248  *    Function: sr_change_speed()
27249  *
27250  * Description: This routine is the driver entry point for handling CD-ROM
27251  *		drive speed ioctl requests for devices supporting the Toshiba
27252  *		vendor specific drive speed mode page. Support for returning
27253  *		and changing the current drive speed in use by the device is
27254  *		implemented.
27255  *
27256  *   Arguments: dev - the device 'dev_t'
27257  *		cmd - the request type; one of CDROMGDRVSPEED (get) or
27258  *		      CDROMSDRVSPEED (set)
27259  *		data - current drive speed or requested drive speed
27260  *		flag - this argument is a pass through to ddi_copyxxx() directly
27261  *		       from the mode argument of ioctl().
27262  *
27263  * Return Code: the code returned by sd_send_scsi_cmd()
27264  *		EINVAL if invalid arguments are provided
27265  *		EFAULT if ddi_copyxxx() fails
27266  *		ENXIO if fail ddi_get_soft_state
27267  *		EIO if invalid mode sense block descriptor length
27268  */
27269 
27270 static int
27271 sr_change_speed(dev_t dev, int cmd, intptr_t data, int flag)
27272 {
27273 	struct sd_lun			*un = NULL;
27274 	struct mode_header		*sense_mhp, *select_mhp;
27275 	struct mode_speed		*sense_page, *select_page;
27276 	int				current_speed;
27277 	int				rval = EINVAL;
27278 	int				bd_len;
27279 	uchar_t				*sense = NULL;
27280 	uchar_t				*select = NULL;
27281 
27282 	ASSERT((cmd == CDROMGDRVSPEED) || (cmd == CDROMSDRVSPEED));
27283 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
27284 		return (ENXIO);
27285 	}
27286 
27287 	/*
27288 	 * Note: The drive speed is being modified here according to a Toshiba
27289 	 * vendor specific mode page (0x31).
27290 	 */
27291 	sense = kmem_zalloc(BUFLEN_MODE_CDROM_SPEED, KM_SLEEP);
27292 
27293 	if ((rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, sense,
27294 	    BUFLEN_MODE_CDROM_SPEED, CDROM_MODE_SPEED,
27295 		SD_PATH_STANDARD)) != 0) {
27296 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27297 		    "sr_change_speed: Mode Sense Failed\n");
27298 		kmem_free(sense, BUFLEN_MODE_CDROM_SPEED);
27299 		return (rval);
27300 	}
27301 	sense_mhp  = (struct mode_header *)sense;
27302 
27303 	/* Check the block descriptor len to handle only 1 block descriptor */
27304 	bd_len = sense_mhp->bdesc_length;
27305 	if (bd_len > MODE_BLK_DESC_LENGTH) {
27306 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27307 		    "sr_change_speed: Mode Sense returned invalid block "
27308 		    "descriptor length\n");
27309 		kmem_free(sense, BUFLEN_MODE_CDROM_SPEED);
27310 		return (EIO);
27311 	}
27312 
27313 	sense_page = (struct mode_speed *)
27314 	    (sense + MODE_HEADER_LENGTH + sense_mhp->bdesc_length);
27315 	current_speed = sense_page->speed;
27316 
27317 	/* Process command */
27318 	switch (cmd) {
27319 	case CDROMGDRVSPEED:
27320 		/* Return the drive speed obtained during the mode sense */
27321 		if (current_speed == 0x2) {
27322 			current_speed = CDROM_TWELVE_SPEED;
27323 		}
27324 		if (ddi_copyout(&current_speed, (void *)data,
27325 		    sizeof (int), flag) != 0) {
27326 			rval = EFAULT;
27327 		}
27328 		break;
27329 	case CDROMSDRVSPEED:
27330 		/* Validate the requested drive speed */
27331 		switch ((uchar_t)data) {
27332 		case CDROM_TWELVE_SPEED:
27333 			data = 0x2;
27334 			/*FALLTHROUGH*/
27335 		case CDROM_NORMAL_SPEED:
27336 		case CDROM_DOUBLE_SPEED:
27337 		case CDROM_QUAD_SPEED:
27338 		case CDROM_MAXIMUM_SPEED:
27339 			break;
27340 		default:
27341 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27342 			    "sr_change_speed: "
27343 			    "Drive Speed '%d' Not Supported\n", (uchar_t)data);
27344 			kmem_free(sense, BUFLEN_MODE_CDROM_SPEED);
27345 			return (EINVAL);
27346 		}
27347 
27348 		/*
27349 		 * The current drive speed matches the requested drive speed so
27350 		 * there is no need to send the mode select to change the speed
27351 		 */
27352 		if (current_speed == data) {
27353 			break;
27354 		}
27355 
27356 		/* Build the select data for the requested drive speed */
27357 		select = kmem_zalloc(BUFLEN_MODE_CDROM_SPEED, KM_SLEEP);
27358 		select_mhp = (struct mode_header *)select;
27359 		select_mhp->bdesc_length = 0;
27360 		select_page =
27361 		    (struct mode_speed *)(select + MODE_HEADER_LENGTH);
27362 		select_page =
27363 		    (struct mode_speed *)(select + MODE_HEADER_LENGTH);
27364 		select_page->mode_page.code = CDROM_MODE_SPEED;
27365 		select_page->mode_page.length = 2;
27366 		select_page->speed = (uchar_t)data;
27367 
27368 		/* Send the mode select for the requested block size */
27369 		if ((rval = sd_send_scsi_MODE_SELECT(un, CDB_GROUP0, select,
27370 		    MODEPAGE_CDROM_SPEED_LEN + MODE_HEADER_LENGTH,
27371 		    SD_DONTSAVE_PAGE, SD_PATH_STANDARD)) != 0) {
27372 			/*
27373 			 * The mode select failed for the requested drive speed,
27374 			 * so reset the data for the original drive speed and
27375 			 * send it to the target. The error is indicated by the
27376 			 * return value for the failed mode select.
27377 			 */
27378 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27379 			    "sr_drive_speed: Mode Select Failed\n");
27380 			select_page->speed = sense_page->speed;
27381 			(void) sd_send_scsi_MODE_SELECT(un, CDB_GROUP0, select,
27382 			    MODEPAGE_CDROM_SPEED_LEN + MODE_HEADER_LENGTH,
27383 			    SD_DONTSAVE_PAGE, SD_PATH_STANDARD);
27384 		}
27385 		break;
27386 	default:
27387 		/* should not reach here, but check anyway */
27388 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27389 		    "sr_change_speed: Command '%x' Not Supported\n", cmd);
27390 		rval = EINVAL;
27391 		break;
27392 	}
27393 
27394 	if (select) {
27395 		kmem_free(select, BUFLEN_MODE_CDROM_SPEED);
27396 	}
27397 	if (sense) {
27398 		kmem_free(sense, BUFLEN_MODE_CDROM_SPEED);
27399 	}
27400 
27401 	return (rval);
27402 }
27403 
27404 
27405 /*
27406  *    Function: sr_atapi_change_speed()
27407  *
27408  * Description: This routine is the driver entry point for handling CD-ROM
27409  *		drive speed ioctl requests for MMC devices that do not support
27410  *		the Real Time Streaming feature (0x107).
27411  *
27412  *		Note: This routine will use the SET SPEED command which may not
27413  *		be supported by all devices.
27414  *
27415  *   Arguments: dev- the device 'dev_t'
27416  *		cmd- the request type; one of CDROMGDRVSPEED (get) or
27417  *		     CDROMSDRVSPEED (set)
27418  *		data- current drive speed or requested drive speed
27419  *		flag- this argument is a pass through to ddi_copyxxx() directly
27420  *		      from the mode argument of ioctl().
27421  *
27422  * Return Code: the code returned by sd_send_scsi_cmd()
27423  *		EINVAL if invalid arguments are provided
27424  *		EFAULT if ddi_copyxxx() fails
27425  *		ENXIO if fail ddi_get_soft_state
27426  *		EIO if invalid mode sense block descriptor length
27427  */
27428 
27429 static int
27430 sr_atapi_change_speed(dev_t dev, int cmd, intptr_t data, int flag)
27431 {
27432 	struct sd_lun			*un;
27433 	struct uscsi_cmd		*com = NULL;
27434 	struct mode_header_grp2		*sense_mhp;
27435 	uchar_t				*sense_page;
27436 	uchar_t				*sense = NULL;
27437 	char				cdb[CDB_GROUP5];
27438 	int				bd_len;
27439 	int				current_speed = 0;
27440 	int				max_speed = 0;
27441 	int				rval;
27442 
27443 	ASSERT((cmd == CDROMGDRVSPEED) || (cmd == CDROMSDRVSPEED));
27444 
27445 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
27446 		return (ENXIO);
27447 	}
27448 
27449 	sense = kmem_zalloc(BUFLEN_MODE_CDROM_CAP, KM_SLEEP);
27450 
27451 	if ((rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP1, sense,
27452 	    BUFLEN_MODE_CDROM_CAP, MODEPAGE_CDROM_CAP,
27453 	    SD_PATH_STANDARD)) != 0) {
27454 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27455 		    "sr_atapi_change_speed: Mode Sense Failed\n");
27456 		kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
27457 		return (rval);
27458 	}
27459 
27460 	/* Check the block descriptor len to handle only 1 block descriptor */
27461 	sense_mhp = (struct mode_header_grp2 *)sense;
27462 	bd_len = (sense_mhp->bdesc_length_hi << 8) | sense_mhp->bdesc_length_lo;
27463 	if (bd_len > MODE_BLK_DESC_LENGTH) {
27464 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27465 		    "sr_atapi_change_speed: Mode Sense returned invalid "
27466 		    "block descriptor length\n");
27467 		kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
27468 		return (EIO);
27469 	}
27470 
27471 	/* Calculate the current and maximum drive speeds */
27472 	sense_page = (uchar_t *)(sense + MODE_HEADER_LENGTH_GRP2 + bd_len);
27473 	current_speed = (sense_page[14] << 8) | sense_page[15];
27474 	max_speed = (sense_page[8] << 8) | sense_page[9];
27475 
27476 	/* Process the command */
27477 	switch (cmd) {
27478 	case CDROMGDRVSPEED:
27479 		current_speed /= SD_SPEED_1X;
27480 		if (ddi_copyout(&current_speed, (void *)data,
27481 		    sizeof (int), flag) != 0)
27482 			rval = EFAULT;
27483 		break;
27484 	case CDROMSDRVSPEED:
27485 		/* Convert the speed code to KB/sec */
27486 		switch ((uchar_t)data) {
27487 		case CDROM_NORMAL_SPEED:
27488 			current_speed = SD_SPEED_1X;
27489 			break;
27490 		case CDROM_DOUBLE_SPEED:
27491 			current_speed = 2 * SD_SPEED_1X;
27492 			break;
27493 		case CDROM_QUAD_SPEED:
27494 			current_speed = 4 * SD_SPEED_1X;
27495 			break;
27496 		case CDROM_TWELVE_SPEED:
27497 			current_speed = 12 * SD_SPEED_1X;
27498 			break;
27499 		case CDROM_MAXIMUM_SPEED:
27500 			current_speed = 0xffff;
27501 			break;
27502 		default:
27503 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27504 			    "sr_atapi_change_speed: invalid drive speed %d\n",
27505 			    (uchar_t)data);
27506 			kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
27507 			return (EINVAL);
27508 		}
27509 
27510 		/* Check the request against the drive's max speed. */
27511 		if (current_speed != 0xffff) {
27512 			if (current_speed > max_speed) {
27513 				kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
27514 				return (EINVAL);
27515 			}
27516 		}
27517 
27518 		/*
27519 		 * Build and send the SET SPEED command
27520 		 *
27521 		 * Note: The SET SPEED (0xBB) command used in this routine is
27522 		 * obsolete per the SCSI MMC spec but still supported in the
27523 		 * MT FUJI vendor spec. Most equipment is adhereing to MT FUJI
27524 		 * therefore the command is still implemented in this routine.
27525 		 */
27526 		bzero(cdb, sizeof (cdb));
27527 		cdb[0] = (char)SCMD_SET_CDROM_SPEED;
27528 		cdb[2] = (uchar_t)(current_speed >> 8);
27529 		cdb[3] = (uchar_t)current_speed;
27530 		com = kmem_zalloc(sizeof (*com), KM_SLEEP);
27531 		com->uscsi_cdb	   = (caddr_t)cdb;
27532 		com->uscsi_cdblen  = CDB_GROUP5;
27533 		com->uscsi_bufaddr = NULL;
27534 		com->uscsi_buflen  = 0;
27535 		com->uscsi_flags   = USCSI_DIAGNOSE|USCSI_SILENT;
27536 		rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, 0,
27537 		    UIO_SYSSPACE, SD_PATH_STANDARD);
27538 		break;
27539 	default:
27540 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27541 		    "sr_atapi_change_speed: Command '%x' Not Supported\n", cmd);
27542 		rval = EINVAL;
27543 	}
27544 
27545 	if (sense) {
27546 		kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
27547 	}
27548 	if (com) {
27549 		kmem_free(com, sizeof (*com));
27550 	}
27551 	return (rval);
27552 }
27553 
27554 
27555 /*
27556  *    Function: sr_pause_resume()
27557  *
27558  * Description: This routine is the driver entry point for handling CD-ROM
27559  *		pause/resume ioctl requests. This only affects the audio play
27560  *		operation.
27561  *
27562  *   Arguments: dev - the device 'dev_t'
27563  *		cmd - the request type; one of CDROMPAUSE or CDROMRESUME, used
27564  *		      for setting the resume bit of the cdb.
27565  *
27566  * Return Code: the code returned by sd_send_scsi_cmd()
27567  *		EINVAL if invalid mode specified
27568  *
27569  */
27570 
27571 static int
27572 sr_pause_resume(dev_t dev, int cmd)
27573 {
27574 	struct sd_lun		*un;
27575 	struct uscsi_cmd	*com;
27576 	char			cdb[CDB_GROUP1];
27577 	int			rval;
27578 
27579 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
27580 		return (ENXIO);
27581 	}
27582 
27583 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
27584 	bzero(cdb, CDB_GROUP1);
27585 	cdb[0] = SCMD_PAUSE_RESUME;
27586 	switch (cmd) {
27587 	case CDROMRESUME:
27588 		cdb[8] = 1;
27589 		break;
27590 	case CDROMPAUSE:
27591 		cdb[8] = 0;
27592 		break;
27593 	default:
27594 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN, "sr_pause_resume:"
27595 		    " Command '%x' Not Supported\n", cmd);
27596 		rval = EINVAL;
27597 		goto done;
27598 	}
27599 
27600 	com->uscsi_cdb    = cdb;
27601 	com->uscsi_cdblen = CDB_GROUP1;
27602 	com->uscsi_flags  = USCSI_DIAGNOSE|USCSI_SILENT;
27603 
27604 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
27605 	    UIO_SYSSPACE, SD_PATH_STANDARD);
27606 
27607 done:
27608 	kmem_free(com, sizeof (*com));
27609 	return (rval);
27610 }
27611 
27612 
27613 /*
27614  *    Function: sr_play_msf()
27615  *
27616  * Description: This routine is the driver entry point for handling CD-ROM
27617  *		ioctl requests to output the audio signals at the specified
27618  *		starting address and continue the audio play until the specified
27619  *		ending address (CDROMPLAYMSF) The address is in Minute Second
27620  *		Frame (MSF) format.
27621  *
27622  *   Arguments: dev	- the device 'dev_t'
27623  *		data	- pointer to user provided audio msf structure,
27624  *		          specifying start/end addresses.
27625  *		flag	- this argument is a pass through to ddi_copyxxx()
27626  *		          directly from the mode argument of ioctl().
27627  *
27628  * Return Code: the code returned by sd_send_scsi_cmd()
27629  *		EFAULT if ddi_copyxxx() fails
27630  *		ENXIO if fail ddi_get_soft_state
27631  *		EINVAL if data pointer is NULL
27632  */
27633 
27634 static int
27635 sr_play_msf(dev_t dev, caddr_t data, int flag)
27636 {
27637 	struct sd_lun		*un;
27638 	struct uscsi_cmd	*com;
27639 	struct cdrom_msf	msf_struct;
27640 	struct cdrom_msf	*msf = &msf_struct;
27641 	char			cdb[CDB_GROUP1];
27642 	int			rval;
27643 
27644 	if (data == NULL) {
27645 		return (EINVAL);
27646 	}
27647 
27648 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
27649 		return (ENXIO);
27650 	}
27651 
27652 	if (ddi_copyin(data, msf, sizeof (struct cdrom_msf), flag)) {
27653 		return (EFAULT);
27654 	}
27655 
27656 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
27657 	bzero(cdb, CDB_GROUP1);
27658 	cdb[0] = SCMD_PLAYAUDIO_MSF;
27659 	if (un->un_f_cfg_playmsf_bcd == TRUE) {
27660 		cdb[3] = BYTE_TO_BCD(msf->cdmsf_min0);
27661 		cdb[4] = BYTE_TO_BCD(msf->cdmsf_sec0);
27662 		cdb[5] = BYTE_TO_BCD(msf->cdmsf_frame0);
27663 		cdb[6] = BYTE_TO_BCD(msf->cdmsf_min1);
27664 		cdb[7] = BYTE_TO_BCD(msf->cdmsf_sec1);
27665 		cdb[8] = BYTE_TO_BCD(msf->cdmsf_frame1);
27666 	} else {
27667 		cdb[3] = msf->cdmsf_min0;
27668 		cdb[4] = msf->cdmsf_sec0;
27669 		cdb[5] = msf->cdmsf_frame0;
27670 		cdb[6] = msf->cdmsf_min1;
27671 		cdb[7] = msf->cdmsf_sec1;
27672 		cdb[8] = msf->cdmsf_frame1;
27673 	}
27674 	com->uscsi_cdb    = cdb;
27675 	com->uscsi_cdblen = CDB_GROUP1;
27676 	com->uscsi_flags  = USCSI_DIAGNOSE|USCSI_SILENT;
27677 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
27678 	    UIO_SYSSPACE, SD_PATH_STANDARD);
27679 	kmem_free(com, sizeof (*com));
27680 	return (rval);
27681 }
27682 
27683 
27684 /*
27685  *    Function: sr_play_trkind()
27686  *
27687  * Description: This routine is the driver entry point for handling CD-ROM
27688  *		ioctl requests to output the audio signals at the specified
27689  *		starting address and continue the audio play until the specified
27690  *		ending address (CDROMPLAYTRKIND). The address is in Track Index
27691  *		format.
27692  *
27693  *   Arguments: dev	- the device 'dev_t'
27694  *		data	- pointer to user provided audio track/index structure,
27695  *		          specifying start/end addresses.
27696  *		flag	- this argument is a pass through to ddi_copyxxx()
27697  *		          directly from the mode argument of ioctl().
27698  *
27699  * Return Code: the code returned by sd_send_scsi_cmd()
27700  *		EFAULT if ddi_copyxxx() fails
27701  *		ENXIO if fail ddi_get_soft_state
27702  *		EINVAL if data pointer is NULL
27703  */
27704 
27705 static int
27706 sr_play_trkind(dev_t dev, caddr_t data, int flag)
27707 {
27708 	struct cdrom_ti		ti_struct;
27709 	struct cdrom_ti		*ti = &ti_struct;
27710 	struct uscsi_cmd	*com = NULL;
27711 	char			cdb[CDB_GROUP1];
27712 	int			rval;
27713 
27714 	if (data == NULL) {
27715 		return (EINVAL);
27716 	}
27717 
27718 	if (ddi_copyin(data, ti, sizeof (struct cdrom_ti), flag)) {
27719 		return (EFAULT);
27720 	}
27721 
27722 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
27723 	bzero(cdb, CDB_GROUP1);
27724 	cdb[0] = SCMD_PLAYAUDIO_TI;
27725 	cdb[4] = ti->cdti_trk0;
27726 	cdb[5] = ti->cdti_ind0;
27727 	cdb[7] = ti->cdti_trk1;
27728 	cdb[8] = ti->cdti_ind1;
27729 	com->uscsi_cdb    = cdb;
27730 	com->uscsi_cdblen = CDB_GROUP1;
27731 	com->uscsi_flags  = USCSI_DIAGNOSE|USCSI_SILENT;
27732 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
27733 	    UIO_SYSSPACE, SD_PATH_STANDARD);
27734 	kmem_free(com, sizeof (*com));
27735 	return (rval);
27736 }
27737 
27738 
27739 /*
27740  *    Function: sr_read_all_subcodes()
27741  *
27742  * Description: This routine is the driver entry point for handling CD-ROM
27743  *		ioctl requests to return raw subcode data while the target is
27744  *		playing audio (CDROMSUBCODE).
27745  *
27746  *   Arguments: dev	- the device 'dev_t'
27747  *		data	- pointer to user provided cdrom subcode structure,
27748  *		          specifying the transfer length and address.
27749  *		flag	- this argument is a pass through to ddi_copyxxx()
27750  *		          directly from the mode argument of ioctl().
27751  *
27752  * Return Code: the code returned by sd_send_scsi_cmd()
27753  *		EFAULT if ddi_copyxxx() fails
27754  *		ENXIO if fail ddi_get_soft_state
27755  *		EINVAL if data pointer is NULL
27756  */
27757 
27758 static int
27759 sr_read_all_subcodes(dev_t dev, caddr_t data, int flag)
27760 {
27761 	struct sd_lun		*un = NULL;
27762 	struct uscsi_cmd	*com = NULL;
27763 	struct cdrom_subcode	*subcode = NULL;
27764 	int			rval;
27765 	size_t			buflen;
27766 	char			cdb[CDB_GROUP5];
27767 
27768 #ifdef _MULTI_DATAMODEL
27769 	/* To support ILP32 applications in an LP64 world */
27770 	struct cdrom_subcode32		cdrom_subcode32;
27771 	struct cdrom_subcode32		*cdsc32 = &cdrom_subcode32;
27772 #endif
27773 	if (data == NULL) {
27774 		return (EINVAL);
27775 	}
27776 
27777 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
27778 		return (ENXIO);
27779 	}
27780 
27781 	subcode = kmem_zalloc(sizeof (struct cdrom_subcode), KM_SLEEP);
27782 
27783 #ifdef _MULTI_DATAMODEL
27784 	switch (ddi_model_convert_from(flag & FMODELS)) {
27785 	case DDI_MODEL_ILP32:
27786 		if (ddi_copyin(data, cdsc32, sizeof (*cdsc32), flag)) {
27787 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27788 			    "sr_read_all_subcodes: ddi_copyin Failed\n");
27789 			kmem_free(subcode, sizeof (struct cdrom_subcode));
27790 			return (EFAULT);
27791 		}
27792 		/* Convert the ILP32 uscsi data from the application to LP64 */
27793 		cdrom_subcode32tocdrom_subcode(cdsc32, subcode);
27794 		break;
27795 	case DDI_MODEL_NONE:
27796 		if (ddi_copyin(data, subcode,
27797 		    sizeof (struct cdrom_subcode), flag)) {
27798 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27799 			    "sr_read_all_subcodes: ddi_copyin Failed\n");
27800 			kmem_free(subcode, sizeof (struct cdrom_subcode));
27801 			return (EFAULT);
27802 		}
27803 		break;
27804 	}
27805 #else /* ! _MULTI_DATAMODEL */
27806 	if (ddi_copyin(data, subcode, sizeof (struct cdrom_subcode), flag)) {
27807 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27808 		    "sr_read_all_subcodes: ddi_copyin Failed\n");
27809 		kmem_free(subcode, sizeof (struct cdrom_subcode));
27810 		return (EFAULT);
27811 	}
27812 #endif /* _MULTI_DATAMODEL */
27813 
27814 	/*
27815 	 * Since MMC-2 expects max 3 bytes for length, check if the
27816 	 * length input is greater than 3 bytes
27817 	 */
27818 	if ((subcode->cdsc_length & 0xFF000000) != 0) {
27819 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27820 		    "sr_read_all_subcodes: "
27821 		    "cdrom transfer length too large: %d (limit %d)\n",
27822 		    subcode->cdsc_length, 0xFFFFFF);
27823 		kmem_free(subcode, sizeof (struct cdrom_subcode));
27824 		return (EINVAL);
27825 	}
27826 
27827 	buflen = CDROM_BLK_SUBCODE * subcode->cdsc_length;
27828 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
27829 	bzero(cdb, CDB_GROUP5);
27830 
27831 	if (un->un_f_mmc_cap == TRUE) {
27832 		cdb[0] = (char)SCMD_READ_CD;
27833 		cdb[2] = (char)0xff;
27834 		cdb[3] = (char)0xff;
27835 		cdb[4] = (char)0xff;
27836 		cdb[5] = (char)0xff;
27837 		cdb[6] = (((subcode->cdsc_length) & 0x00ff0000) >> 16);
27838 		cdb[7] = (((subcode->cdsc_length) & 0x0000ff00) >> 8);
27839 		cdb[8] = ((subcode->cdsc_length) & 0x000000ff);
27840 		cdb[10] = 1;
27841 	} else {
27842 		/*
27843 		 * Note: A vendor specific command (0xDF) is being used her to
27844 		 * request a read of all subcodes.
27845 		 */
27846 		cdb[0] = (char)SCMD_READ_ALL_SUBCODES;
27847 		cdb[6] = (((subcode->cdsc_length) & 0xff000000) >> 24);
27848 		cdb[7] = (((subcode->cdsc_length) & 0x00ff0000) >> 16);
27849 		cdb[8] = (((subcode->cdsc_length) & 0x0000ff00) >> 8);
27850 		cdb[9] = ((subcode->cdsc_length) & 0x000000ff);
27851 	}
27852 	com->uscsi_cdb	   = cdb;
27853 	com->uscsi_cdblen  = CDB_GROUP5;
27854 	com->uscsi_bufaddr = (caddr_t)subcode->cdsc_addr;
27855 	com->uscsi_buflen  = buflen;
27856 	com->uscsi_flags   = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
27857 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_USERSPACE,
27858 	    UIO_SYSSPACE, SD_PATH_STANDARD);
27859 	kmem_free(subcode, sizeof (struct cdrom_subcode));
27860 	kmem_free(com, sizeof (*com));
27861 	return (rval);
27862 }
27863 
27864 
27865 /*
27866  *    Function: sr_read_subchannel()
27867  *
27868  * Description: This routine is the driver entry point for handling CD-ROM
27869  *		ioctl requests to return the Q sub-channel data of the CD
27870  *		current position block. (CDROMSUBCHNL) The data includes the
27871  *		track number, index number, absolute CD-ROM address (LBA or MSF
27872  *		format per the user) , track relative CD-ROM address (LBA or MSF
27873  *		format per the user), control data and audio status.
27874  *
27875  *   Arguments: dev	- the device 'dev_t'
27876  *		data	- pointer to user provided cdrom sub-channel structure
27877  *		flag	- this argument is a pass through to ddi_copyxxx()
27878  *		          directly from the mode argument of ioctl().
27879  *
27880  * Return Code: the code returned by sd_send_scsi_cmd()
27881  *		EFAULT if ddi_copyxxx() fails
27882  *		ENXIO if fail ddi_get_soft_state
27883  *		EINVAL if data pointer is NULL
27884  */
27885 
27886 static int
27887 sr_read_subchannel(dev_t dev, caddr_t data, int flag)
27888 {
27889 	struct sd_lun		*un;
27890 	struct uscsi_cmd	*com;
27891 	struct cdrom_subchnl	subchanel;
27892 	struct cdrom_subchnl	*subchnl = &subchanel;
27893 	char			cdb[CDB_GROUP1];
27894 	caddr_t			buffer;
27895 	int			rval;
27896 
27897 	if (data == NULL) {
27898 		return (EINVAL);
27899 	}
27900 
27901 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
27902 	    (un->un_state == SD_STATE_OFFLINE)) {
27903 		return (ENXIO);
27904 	}
27905 
27906 	if (ddi_copyin(data, subchnl, sizeof (struct cdrom_subchnl), flag)) {
27907 		return (EFAULT);
27908 	}
27909 
27910 	buffer = kmem_zalloc((size_t)16, KM_SLEEP);
27911 	bzero(cdb, CDB_GROUP1);
27912 	cdb[0] = SCMD_READ_SUBCHANNEL;
27913 	/* Set the MSF bit based on the user requested address format */
27914 	cdb[1] = (subchnl->cdsc_format & CDROM_LBA) ? 0 : 0x02;
27915 	/*
27916 	 * Set the Q bit in byte 2 to indicate that Q sub-channel data be
27917 	 * returned
27918 	 */
27919 	cdb[2] = 0x40;
27920 	/*
27921 	 * Set byte 3 to specify the return data format. A value of 0x01
27922 	 * indicates that the CD-ROM current position should be returned.
27923 	 */
27924 	cdb[3] = 0x01;
27925 	cdb[8] = 0x10;
27926 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
27927 	com->uscsi_cdb	   = cdb;
27928 	com->uscsi_cdblen  = CDB_GROUP1;
27929 	com->uscsi_bufaddr = buffer;
27930 	com->uscsi_buflen  = 16;
27931 	com->uscsi_flags   = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
27932 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
27933 	    UIO_SYSSPACE, SD_PATH_STANDARD);
27934 	if (rval != 0) {
27935 		kmem_free(buffer, 16);
27936 		kmem_free(com, sizeof (*com));
27937 		return (rval);
27938 	}
27939 
27940 	/* Process the returned Q sub-channel data */
27941 	subchnl->cdsc_audiostatus = buffer[1];
27942 	subchnl->cdsc_adr	= (buffer[5] & 0xF0);
27943 	subchnl->cdsc_ctrl	= (buffer[5] & 0x0F);
27944 	subchnl->cdsc_trk	= buffer[6];
27945 	subchnl->cdsc_ind	= buffer[7];
27946 	if (subchnl->cdsc_format & CDROM_LBA) {
27947 		subchnl->cdsc_absaddr.lba =
27948 		    ((uchar_t)buffer[8] << 24) + ((uchar_t)buffer[9] << 16) +
27949 		    ((uchar_t)buffer[10] << 8) + ((uchar_t)buffer[11]);
27950 		subchnl->cdsc_reladdr.lba =
27951 		    ((uchar_t)buffer[12] << 24) + ((uchar_t)buffer[13] << 16) +
27952 		    ((uchar_t)buffer[14] << 8) + ((uchar_t)buffer[15]);
27953 	} else if (un->un_f_cfg_readsub_bcd == TRUE) {
27954 		subchnl->cdsc_absaddr.msf.minute = BCD_TO_BYTE(buffer[9]);
27955 		subchnl->cdsc_absaddr.msf.second = BCD_TO_BYTE(buffer[10]);
27956 		subchnl->cdsc_absaddr.msf.frame  = BCD_TO_BYTE(buffer[11]);
27957 		subchnl->cdsc_reladdr.msf.minute = BCD_TO_BYTE(buffer[13]);
27958 		subchnl->cdsc_reladdr.msf.second = BCD_TO_BYTE(buffer[14]);
27959 		subchnl->cdsc_reladdr.msf.frame  = BCD_TO_BYTE(buffer[15]);
27960 	} else {
27961 		subchnl->cdsc_absaddr.msf.minute = buffer[9];
27962 		subchnl->cdsc_absaddr.msf.second = buffer[10];
27963 		subchnl->cdsc_absaddr.msf.frame  = buffer[11];
27964 		subchnl->cdsc_reladdr.msf.minute = buffer[13];
27965 		subchnl->cdsc_reladdr.msf.second = buffer[14];
27966 		subchnl->cdsc_reladdr.msf.frame  = buffer[15];
27967 	}
27968 	kmem_free(buffer, 16);
27969 	kmem_free(com, sizeof (*com));
27970 	if (ddi_copyout(subchnl, data, sizeof (struct cdrom_subchnl), flag)
27971 	    != 0) {
27972 		return (EFAULT);
27973 	}
27974 	return (rval);
27975 }
27976 
27977 
27978 /*
27979  *    Function: sr_read_tocentry()
27980  *
27981  * Description: This routine is the driver entry point for handling CD-ROM
27982  *		ioctl requests to read from the Table of Contents (TOC)
27983  *		(CDROMREADTOCENTRY). This routine provides the ADR and CTRL
27984  *		fields, the starting address (LBA or MSF format per the user)
27985  *		and the data mode if the user specified track is a data track.
27986  *
27987  *		Note: The READ HEADER (0x44) command used in this routine is
27988  *		obsolete per the SCSI MMC spec but still supported in the
27989  *		MT FUJI vendor spec. Most equipment is adhereing to MT FUJI
27990  *		therefore the command is still implemented in this routine.
27991  *
27992  *   Arguments: dev	- the device 'dev_t'
27993  *		data	- pointer to user provided toc entry structure,
27994  *			  specifying the track # and the address format
27995  *			  (LBA or MSF).
27996  *		flag	- this argument is a pass through to ddi_copyxxx()
27997  *		          directly from the mode argument of ioctl().
27998  *
27999  * Return Code: the code returned by sd_send_scsi_cmd()
28000  *		EFAULT if ddi_copyxxx() fails
28001  *		ENXIO if fail ddi_get_soft_state
28002  *		EINVAL if data pointer is NULL
28003  */
28004 
28005 static int
28006 sr_read_tocentry(dev_t dev, caddr_t data, int flag)
28007 {
28008 	struct sd_lun		*un = NULL;
28009 	struct uscsi_cmd	*com;
28010 	struct cdrom_tocentry	toc_entry;
28011 	struct cdrom_tocentry	*entry = &toc_entry;
28012 	caddr_t			buffer;
28013 	int			rval;
28014 	char			cdb[CDB_GROUP1];
28015 
28016 	if (data == NULL) {
28017 		return (EINVAL);
28018 	}
28019 
28020 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
28021 	    (un->un_state == SD_STATE_OFFLINE)) {
28022 		return (ENXIO);
28023 	}
28024 
28025 	if (ddi_copyin(data, entry, sizeof (struct cdrom_tocentry), flag)) {
28026 		return (EFAULT);
28027 	}
28028 
28029 	/* Validate the requested track and address format */
28030 	if (!(entry->cdte_format & (CDROM_LBA | CDROM_MSF))) {
28031 		return (EINVAL);
28032 	}
28033 
28034 	if (entry->cdte_track == 0) {
28035 		return (EINVAL);
28036 	}
28037 
28038 	buffer = kmem_zalloc((size_t)12, KM_SLEEP);
28039 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
28040 	bzero(cdb, CDB_GROUP1);
28041 
28042 	cdb[0] = SCMD_READ_TOC;
28043 	/* Set the MSF bit based on the user requested address format  */
28044 	cdb[1] = ((entry->cdte_format & CDROM_LBA) ? 0 : 2);
28045 	if (un->un_f_cfg_read_toc_trk_bcd == TRUE) {
28046 		cdb[6] = BYTE_TO_BCD(entry->cdte_track);
28047 	} else {
28048 		cdb[6] = entry->cdte_track;
28049 	}
28050 
28051 	/*
28052 	 * Bytes 7 & 8 are the 12 byte allocation length for a single entry.
28053 	 * (4 byte TOC response header + 8 byte track descriptor)
28054 	 */
28055 	cdb[8] = 12;
28056 	com->uscsi_cdb	   = cdb;
28057 	com->uscsi_cdblen  = CDB_GROUP1;
28058 	com->uscsi_bufaddr = buffer;
28059 	com->uscsi_buflen  = 0x0C;
28060 	com->uscsi_flags   = (USCSI_DIAGNOSE | USCSI_SILENT | USCSI_READ);
28061 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
28062 	    UIO_SYSSPACE, SD_PATH_STANDARD);
28063 	if (rval != 0) {
28064 		kmem_free(buffer, 12);
28065 		kmem_free(com, sizeof (*com));
28066 		return (rval);
28067 	}
28068 
28069 	/* Process the toc entry */
28070 	entry->cdte_adr		= (buffer[5] & 0xF0) >> 4;
28071 	entry->cdte_ctrl	= (buffer[5] & 0x0F);
28072 	if (entry->cdte_format & CDROM_LBA) {
28073 		entry->cdte_addr.lba =
28074 		    ((uchar_t)buffer[8] << 24) + ((uchar_t)buffer[9] << 16) +
28075 		    ((uchar_t)buffer[10] << 8) + ((uchar_t)buffer[11]);
28076 	} else if (un->un_f_cfg_read_toc_addr_bcd == TRUE) {
28077 		entry->cdte_addr.msf.minute	= BCD_TO_BYTE(buffer[9]);
28078 		entry->cdte_addr.msf.second	= BCD_TO_BYTE(buffer[10]);
28079 		entry->cdte_addr.msf.frame	= BCD_TO_BYTE(buffer[11]);
28080 		/*
28081 		 * Send a READ TOC command using the LBA address format to get
28082 		 * the LBA for the track requested so it can be used in the
28083 		 * READ HEADER request
28084 		 *
28085 		 * Note: The MSF bit of the READ HEADER command specifies the
28086 		 * output format. The block address specified in that command
28087 		 * must be in LBA format.
28088 		 */
28089 		cdb[1] = 0;
28090 		rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
28091 		    UIO_SYSSPACE, SD_PATH_STANDARD);
28092 		if (rval != 0) {
28093 			kmem_free(buffer, 12);
28094 			kmem_free(com, sizeof (*com));
28095 			return (rval);
28096 		}
28097 	} else {
28098 		entry->cdte_addr.msf.minute	= buffer[9];
28099 		entry->cdte_addr.msf.second	= buffer[10];
28100 		entry->cdte_addr.msf.frame	= buffer[11];
28101 		/*
28102 		 * Send a READ TOC command using the LBA address format to get
28103 		 * the LBA for the track requested so it can be used in the
28104 		 * READ HEADER request
28105 		 *
28106 		 * Note: The MSF bit of the READ HEADER command specifies the
28107 		 * output format. The block address specified in that command
28108 		 * must be in LBA format.
28109 		 */
28110 		cdb[1] = 0;
28111 		rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
28112 		    UIO_SYSSPACE, SD_PATH_STANDARD);
28113 		if (rval != 0) {
28114 			kmem_free(buffer, 12);
28115 			kmem_free(com, sizeof (*com));
28116 			return (rval);
28117 		}
28118 	}
28119 
28120 	/*
28121 	 * Build and send the READ HEADER command to determine the data mode of
28122 	 * the user specified track.
28123 	 */
28124 	if ((entry->cdte_ctrl & CDROM_DATA_TRACK) &&
28125 	    (entry->cdte_track != CDROM_LEADOUT)) {
28126 		bzero(cdb, CDB_GROUP1);
28127 		cdb[0] = SCMD_READ_HEADER;
28128 		cdb[2] = buffer[8];
28129 		cdb[3] = buffer[9];
28130 		cdb[4] = buffer[10];
28131 		cdb[5] = buffer[11];
28132 		cdb[8] = 0x08;
28133 		com->uscsi_buflen = 0x08;
28134 		rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
28135 		    UIO_SYSSPACE, SD_PATH_STANDARD);
28136 		if (rval == 0) {
28137 			entry->cdte_datamode = buffer[0];
28138 		} else {
28139 			/*
28140 			 * READ HEADER command failed, since this is
28141 			 * obsoleted in one spec, its better to return
28142 			 * -1 for an invlid track so that we can still
28143 			 * recieve the rest of the TOC data.
28144 			 */
28145 			entry->cdte_datamode = (uchar_t)-1;
28146 		}
28147 	} else {
28148 		entry->cdte_datamode = (uchar_t)-1;
28149 	}
28150 
28151 	kmem_free(buffer, 12);
28152 	kmem_free(com, sizeof (*com));
28153 	if (ddi_copyout(entry, data, sizeof (struct cdrom_tocentry), flag) != 0)
28154 		return (EFAULT);
28155 
28156 	return (rval);
28157 }
28158 
28159 
28160 /*
28161  *    Function: sr_read_tochdr()
28162  *
28163  * Description: This routine is the driver entry point for handling CD-ROM
28164  * 		ioctl requests to read the Table of Contents (TOC) header
28165  *		(CDROMREADTOHDR). The TOC header consists of the disk starting
28166  *		and ending track numbers
28167  *
28168  *   Arguments: dev	- the device 'dev_t'
28169  *		data	- pointer to user provided toc header structure,
28170  *			  specifying the starting and ending track numbers.
28171  *		flag	- this argument is a pass through to ddi_copyxxx()
28172  *			  directly from the mode argument of ioctl().
28173  *
28174  * Return Code: the code returned by sd_send_scsi_cmd()
28175  *		EFAULT if ddi_copyxxx() fails
28176  *		ENXIO if fail ddi_get_soft_state
28177  *		EINVAL if data pointer is NULL
28178  */
28179 
28180 static int
28181 sr_read_tochdr(dev_t dev, caddr_t data, int flag)
28182 {
28183 	struct sd_lun		*un;
28184 	struct uscsi_cmd	*com;
28185 	struct cdrom_tochdr	toc_header;
28186 	struct cdrom_tochdr	*hdr = &toc_header;
28187 	char			cdb[CDB_GROUP1];
28188 	int			rval;
28189 	caddr_t			buffer;
28190 
28191 	if (data == NULL) {
28192 		return (EINVAL);
28193 	}
28194 
28195 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
28196 	    (un->un_state == SD_STATE_OFFLINE)) {
28197 		return (ENXIO);
28198 	}
28199 
28200 	buffer = kmem_zalloc(4, KM_SLEEP);
28201 	bzero(cdb, CDB_GROUP1);
28202 	cdb[0] = SCMD_READ_TOC;
28203 	/*
28204 	 * Specifying a track number of 0x00 in the READ TOC command indicates
28205 	 * that the TOC header should be returned
28206 	 */
28207 	cdb[6] = 0x00;
28208 	/*
28209 	 * Bytes 7 & 8 are the 4 byte allocation length for TOC header.
28210 	 * (2 byte data len + 1 byte starting track # + 1 byte ending track #)
28211 	 */
28212 	cdb[8] = 0x04;
28213 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
28214 	com->uscsi_cdb	   = cdb;
28215 	com->uscsi_cdblen  = CDB_GROUP1;
28216 	com->uscsi_bufaddr = buffer;
28217 	com->uscsi_buflen  = 0x04;
28218 	com->uscsi_timeout = 300;
28219 	com->uscsi_flags   = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
28220 
28221 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
28222 	    UIO_SYSSPACE, SD_PATH_STANDARD);
28223 	if (un->un_f_cfg_read_toc_trk_bcd == TRUE) {
28224 		hdr->cdth_trk0 = BCD_TO_BYTE(buffer[2]);
28225 		hdr->cdth_trk1 = BCD_TO_BYTE(buffer[3]);
28226 	} else {
28227 		hdr->cdth_trk0 = buffer[2];
28228 		hdr->cdth_trk1 = buffer[3];
28229 	}
28230 	kmem_free(buffer, 4);
28231 	kmem_free(com, sizeof (*com));
28232 	if (ddi_copyout(hdr, data, sizeof (struct cdrom_tochdr), flag) != 0) {
28233 		return (EFAULT);
28234 	}
28235 	return (rval);
28236 }
28237 
28238 
28239 /*
28240  * Note: The following sr_read_mode1(), sr_read_cd_mode2(), sr_read_mode2(),
28241  * sr_read_cdda(), sr_read_cdxa(), routines implement driver support for
28242  * handling CDROMREAD ioctl requests for mode 1 user data, mode 2 user data,
28243  * digital audio and extended architecture digital audio. These modes are
28244  * defined in the IEC908 (Red Book), ISO10149 (Yellow Book), and the SCSI3
28245  * MMC specs.
28246  *
28247  * In addition to support for the various data formats these routines also
28248  * include support for devices that implement only the direct access READ
28249  * commands (0x08, 0x28), devices that implement the READ_CD commands
28250  * (0xBE, 0xD4), and devices that implement the vendor unique READ CDDA and
28251  * READ CDXA commands (0xD8, 0xDB)
28252  */
28253 
28254 /*
28255  *    Function: sr_read_mode1()
28256  *
28257  * Description: This routine is the driver entry point for handling CD-ROM
28258  *		ioctl read mode1 requests (CDROMREADMODE1).
28259  *
28260  *   Arguments: dev	- the device 'dev_t'
28261  *		data	- pointer to user provided cd read structure specifying
28262  *			  the lba buffer address and length.
28263  *		flag	- this argument is a pass through to ddi_copyxxx()
28264  *			  directly from the mode argument of ioctl().
28265  *
28266  * Return Code: the code returned by sd_send_scsi_cmd()
28267  *		EFAULT if ddi_copyxxx() fails
28268  *		ENXIO if fail ddi_get_soft_state
28269  *		EINVAL if data pointer is NULL
28270  */
28271 
28272 static int
28273 sr_read_mode1(dev_t dev, caddr_t data, int flag)
28274 {
28275 	struct sd_lun		*un;
28276 	struct cdrom_read	mode1_struct;
28277 	struct cdrom_read	*mode1 = &mode1_struct;
28278 	int			rval;
28279 #ifdef _MULTI_DATAMODEL
28280 	/* To support ILP32 applications in an LP64 world */
28281 	struct cdrom_read32	cdrom_read32;
28282 	struct cdrom_read32	*cdrd32 = &cdrom_read32;
28283 #endif /* _MULTI_DATAMODEL */
28284 
28285 	if (data == NULL) {
28286 		return (EINVAL);
28287 	}
28288 
28289 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
28290 	    (un->un_state == SD_STATE_OFFLINE)) {
28291 		return (ENXIO);
28292 	}
28293 
28294 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
28295 	    "sd_read_mode1: entry: un:0x%p\n", un);
28296 
28297 #ifdef _MULTI_DATAMODEL
28298 	switch (ddi_model_convert_from(flag & FMODELS)) {
28299 	case DDI_MODEL_ILP32:
28300 		if (ddi_copyin(data, cdrd32, sizeof (*cdrd32), flag) != 0) {
28301 			return (EFAULT);
28302 		}
28303 		/* Convert the ILP32 uscsi data from the application to LP64 */
28304 		cdrom_read32tocdrom_read(cdrd32, mode1);
28305 		break;
28306 	case DDI_MODEL_NONE:
28307 		if (ddi_copyin(data, mode1, sizeof (struct cdrom_read), flag)) {
28308 			return (EFAULT);
28309 		}
28310 	}
28311 #else /* ! _MULTI_DATAMODEL */
28312 	if (ddi_copyin(data, mode1, sizeof (struct cdrom_read), flag)) {
28313 		return (EFAULT);
28314 	}
28315 #endif /* _MULTI_DATAMODEL */
28316 
28317 	rval = sd_send_scsi_READ(un, mode1->cdread_bufaddr,
28318 	    mode1->cdread_buflen, mode1->cdread_lba, SD_PATH_STANDARD);
28319 
28320 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
28321 	    "sd_read_mode1: exit: un:0x%p\n", un);
28322 
28323 	return (rval);
28324 }
28325 
28326 
28327 /*
28328  *    Function: sr_read_cd_mode2()
28329  *
28330  * Description: This routine is the driver entry point for handling CD-ROM
28331  *		ioctl read mode2 requests (CDROMREADMODE2) for devices that
28332  *		support the READ CD (0xBE) command or the 1st generation
28333  *		READ CD (0xD4) command.
28334  *
28335  *   Arguments: dev	- the device 'dev_t'
28336  *		data	- pointer to user provided cd read structure specifying
28337  *			  the lba buffer address and length.
28338  *		flag	- this argument is a pass through to ddi_copyxxx()
28339  *			  directly from the mode argument of ioctl().
28340  *
28341  * Return Code: the code returned by sd_send_scsi_cmd()
28342  *		EFAULT if ddi_copyxxx() fails
28343  *		ENXIO if fail ddi_get_soft_state
28344  *		EINVAL if data pointer is NULL
28345  */
28346 
28347 static int
28348 sr_read_cd_mode2(dev_t dev, caddr_t data, int flag)
28349 {
28350 	struct sd_lun		*un;
28351 	struct uscsi_cmd	*com;
28352 	struct cdrom_read	mode2_struct;
28353 	struct cdrom_read	*mode2 = &mode2_struct;
28354 	uchar_t			cdb[CDB_GROUP5];
28355 	int			nblocks;
28356 	int			rval;
28357 #ifdef _MULTI_DATAMODEL
28358 	/*  To support ILP32 applications in an LP64 world */
28359 	struct cdrom_read32	cdrom_read32;
28360 	struct cdrom_read32	*cdrd32 = &cdrom_read32;
28361 #endif /* _MULTI_DATAMODEL */
28362 
28363 	if (data == NULL) {
28364 		return (EINVAL);
28365 	}
28366 
28367 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
28368 	    (un->un_state == SD_STATE_OFFLINE)) {
28369 		return (ENXIO);
28370 	}
28371 
28372 #ifdef _MULTI_DATAMODEL
28373 	switch (ddi_model_convert_from(flag & FMODELS)) {
28374 	case DDI_MODEL_ILP32:
28375 		if (ddi_copyin(data, cdrd32, sizeof (*cdrd32), flag) != 0) {
28376 			return (EFAULT);
28377 		}
28378 		/* Convert the ILP32 uscsi data from the application to LP64 */
28379 		cdrom_read32tocdrom_read(cdrd32, mode2);
28380 		break;
28381 	case DDI_MODEL_NONE:
28382 		if (ddi_copyin(data, mode2, sizeof (*mode2), flag) != 0) {
28383 			return (EFAULT);
28384 		}
28385 		break;
28386 	}
28387 
28388 #else /* ! _MULTI_DATAMODEL */
28389 	if (ddi_copyin(data, mode2, sizeof (*mode2), flag) != 0) {
28390 		return (EFAULT);
28391 	}
28392 #endif /* _MULTI_DATAMODEL */
28393 
28394 	bzero(cdb, sizeof (cdb));
28395 	if (un->un_f_cfg_read_cd_xd4 == TRUE) {
28396 		/* Read command supported by 1st generation atapi drives */
28397 		cdb[0] = SCMD_READ_CDD4;
28398 	} else {
28399 		/* Universal CD Access Command */
28400 		cdb[0] = SCMD_READ_CD;
28401 	}
28402 
28403 	/*
28404 	 * Set expected sector type to: 2336s byte, Mode 2 Yellow Book
28405 	 */
28406 	cdb[1] = CDROM_SECTOR_TYPE_MODE2;
28407 
28408 	/* set the start address */
28409 	cdb[2] = (uchar_t)((mode2->cdread_lba >> 24) & 0XFF);
28410 	cdb[3] = (uchar_t)((mode2->cdread_lba >> 16) & 0XFF);
28411 	cdb[4] = (uchar_t)((mode2->cdread_lba >> 8) & 0xFF);
28412 	cdb[5] = (uchar_t)(mode2->cdread_lba & 0xFF);
28413 
28414 	/* set the transfer length */
28415 	nblocks = mode2->cdread_buflen / 2336;
28416 	cdb[6] = (uchar_t)(nblocks >> 16);
28417 	cdb[7] = (uchar_t)(nblocks >> 8);
28418 	cdb[8] = (uchar_t)nblocks;
28419 
28420 	/* set the filter bits */
28421 	cdb[9] = CDROM_READ_CD_USERDATA;
28422 
28423 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
28424 	com->uscsi_cdb = (caddr_t)cdb;
28425 	com->uscsi_cdblen = sizeof (cdb);
28426 	com->uscsi_bufaddr = mode2->cdread_bufaddr;
28427 	com->uscsi_buflen = mode2->cdread_buflen;
28428 	com->uscsi_flags = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
28429 
28430 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_USERSPACE,
28431 	    UIO_SYSSPACE, SD_PATH_STANDARD);
28432 	kmem_free(com, sizeof (*com));
28433 	return (rval);
28434 }
28435 
28436 
28437 /*
28438  *    Function: sr_read_mode2()
28439  *
28440  * Description: This routine is the driver entry point for handling CD-ROM
28441  *		ioctl read mode2 requests (CDROMREADMODE2) for devices that
28442  *		do not support the READ CD (0xBE) command.
28443  *
28444  *   Arguments: dev	- the device 'dev_t'
28445  *		data	- pointer to user provided cd read structure specifying
28446  *			  the lba buffer address and length.
28447  *		flag	- this argument is a pass through to ddi_copyxxx()
28448  *			  directly from the mode argument of ioctl().
28449  *
28450  * Return Code: the code returned by sd_send_scsi_cmd()
28451  *		EFAULT if ddi_copyxxx() fails
28452  *		ENXIO if fail ddi_get_soft_state
28453  *		EINVAL if data pointer is NULL
28454  *		EIO if fail to reset block size
28455  *		EAGAIN if commands are in progress in the driver
28456  */
28457 
28458 static int
28459 sr_read_mode2(dev_t dev, caddr_t data, int flag)
28460 {
28461 	struct sd_lun		*un;
28462 	struct cdrom_read	mode2_struct;
28463 	struct cdrom_read	*mode2 = &mode2_struct;
28464 	int			rval;
28465 	uint32_t		restore_blksize;
28466 	struct uscsi_cmd	*com;
28467 	uchar_t			cdb[CDB_GROUP0];
28468 	int			nblocks;
28469 
28470 #ifdef _MULTI_DATAMODEL
28471 	/* To support ILP32 applications in an LP64 world */
28472 	struct cdrom_read32	cdrom_read32;
28473 	struct cdrom_read32	*cdrd32 = &cdrom_read32;
28474 #endif /* _MULTI_DATAMODEL */
28475 
28476 	if (data == NULL) {
28477 		return (EINVAL);
28478 	}
28479 
28480 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
28481 	    (un->un_state == SD_STATE_OFFLINE)) {
28482 		return (ENXIO);
28483 	}
28484 
28485 	/*
28486 	 * Because this routine will update the device and driver block size
28487 	 * being used we want to make sure there are no commands in progress.
28488 	 * If commands are in progress the user will have to try again.
28489 	 *
28490 	 * We check for 1 instead of 0 because we increment un_ncmds_in_driver
28491 	 * in sdioctl to protect commands from sdioctl through to the top of
28492 	 * sd_uscsi_strategy. See sdioctl for details.
28493 	 */
28494 	mutex_enter(SD_MUTEX(un));
28495 	if (un->un_ncmds_in_driver != 1) {
28496 		mutex_exit(SD_MUTEX(un));
28497 		return (EAGAIN);
28498 	}
28499 	mutex_exit(SD_MUTEX(un));
28500 
28501 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
28502 	    "sd_read_mode2: entry: un:0x%p\n", un);
28503 
28504 #ifdef _MULTI_DATAMODEL
28505 	switch (ddi_model_convert_from(flag & FMODELS)) {
28506 	case DDI_MODEL_ILP32:
28507 		if (ddi_copyin(data, cdrd32, sizeof (*cdrd32), flag) != 0) {
28508 			return (EFAULT);
28509 		}
28510 		/* Convert the ILP32 uscsi data from the application to LP64 */
28511 		cdrom_read32tocdrom_read(cdrd32, mode2);
28512 		break;
28513 	case DDI_MODEL_NONE:
28514 		if (ddi_copyin(data, mode2, sizeof (*mode2), flag) != 0) {
28515 			return (EFAULT);
28516 		}
28517 		break;
28518 	}
28519 #else /* ! _MULTI_DATAMODEL */
28520 	if (ddi_copyin(data, mode2, sizeof (*mode2), flag)) {
28521 		return (EFAULT);
28522 	}
28523 #endif /* _MULTI_DATAMODEL */
28524 
28525 	/* Store the current target block size for restoration later */
28526 	restore_blksize = un->un_tgt_blocksize;
28527 
28528 	/* Change the device and soft state target block size to 2336 */
28529 	if (sr_sector_mode(dev, SD_MODE2_BLKSIZE) != 0) {
28530 		rval = EIO;
28531 		goto done;
28532 	}
28533 
28534 
28535 	bzero(cdb, sizeof (cdb));
28536 
28537 	/* set READ operation */
28538 	cdb[0] = SCMD_READ;
28539 
28540 	/* adjust lba for 2kbyte blocks from 512 byte blocks */
28541 	mode2->cdread_lba >>= 2;
28542 
28543 	/* set the start address */
28544 	cdb[1] = (uchar_t)((mode2->cdread_lba >> 16) & 0X1F);
28545 	cdb[2] = (uchar_t)((mode2->cdread_lba >> 8) & 0xFF);
28546 	cdb[3] = (uchar_t)(mode2->cdread_lba & 0xFF);
28547 
28548 	/* set the transfer length */
28549 	nblocks = mode2->cdread_buflen / 2336;
28550 	cdb[4] = (uchar_t)nblocks & 0xFF;
28551 
28552 	/* build command */
28553 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
28554 	com->uscsi_cdb = (caddr_t)cdb;
28555 	com->uscsi_cdblen = sizeof (cdb);
28556 	com->uscsi_bufaddr = mode2->cdread_bufaddr;
28557 	com->uscsi_buflen = mode2->cdread_buflen;
28558 	com->uscsi_flags = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
28559 
28560 	/*
28561 	 * Issue SCSI command with user space address for read buffer.
28562 	 *
28563 	 * This sends the command through main channel in the driver.
28564 	 *
28565 	 * Since this is accessed via an IOCTL call, we go through the
28566 	 * standard path, so that if the device was powered down, then
28567 	 * it would be 'awakened' to handle the command.
28568 	 */
28569 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_USERSPACE,
28570 	    UIO_SYSSPACE, SD_PATH_STANDARD);
28571 
28572 	kmem_free(com, sizeof (*com));
28573 
28574 	/* Restore the device and soft state target block size */
28575 	if (sr_sector_mode(dev, restore_blksize) != 0) {
28576 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
28577 		    "can't do switch back to mode 1\n");
28578 		/*
28579 		 * If sd_send_scsi_READ succeeded we still need to report
28580 		 * an error because we failed to reset the block size
28581 		 */
28582 		if (rval == 0) {
28583 			rval = EIO;
28584 		}
28585 	}
28586 
28587 done:
28588 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
28589 	    "sd_read_mode2: exit: un:0x%p\n", un);
28590 
28591 	return (rval);
28592 }
28593 
28594 
28595 /*
28596  *    Function: sr_sector_mode()
28597  *
28598  * Description: This utility function is used by sr_read_mode2 to set the target
28599  *		block size based on the user specified size. This is a legacy
28600  *		implementation based upon a vendor specific mode page
28601  *
28602  *   Arguments: dev	- the device 'dev_t'
28603  *		data	- flag indicating if block size is being set to 2336 or
28604  *			  512.
28605  *
28606  * Return Code: the code returned by sd_send_scsi_cmd()
28607  *		EFAULT if ddi_copyxxx() fails
28608  *		ENXIO if fail ddi_get_soft_state
28609  *		EINVAL if data pointer is NULL
28610  */
28611 
28612 static int
28613 sr_sector_mode(dev_t dev, uint32_t blksize)
28614 {
28615 	struct sd_lun	*un;
28616 	uchar_t		*sense;
28617 	uchar_t		*select;
28618 	int		rval;
28619 
28620 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
28621 	    (un->un_state == SD_STATE_OFFLINE)) {
28622 		return (ENXIO);
28623 	}
28624 
28625 	sense = kmem_zalloc(20, KM_SLEEP);
28626 
28627 	/* Note: This is a vendor specific mode page (0x81) */
28628 	if ((rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, sense, 20, 0x81,
28629 	    SD_PATH_STANDARD)) != 0) {
28630 		SD_ERROR(SD_LOG_IOCTL_RMMEDIA, un,
28631 		    "sr_sector_mode: Mode Sense failed\n");
28632 		kmem_free(sense, 20);
28633 		return (rval);
28634 	}
28635 	select = kmem_zalloc(20, KM_SLEEP);
28636 	select[3] = 0x08;
28637 	select[10] = ((blksize >> 8) & 0xff);
28638 	select[11] = (blksize & 0xff);
28639 	select[12] = 0x01;
28640 	select[13] = 0x06;
28641 	select[14] = sense[14];
28642 	select[15] = sense[15];
28643 	if (blksize == SD_MODE2_BLKSIZE) {
28644 		select[14] |= 0x01;
28645 	}
28646 
28647 	if ((rval = sd_send_scsi_MODE_SELECT(un, CDB_GROUP0, select, 20,
28648 	    SD_DONTSAVE_PAGE, SD_PATH_STANDARD)) != 0) {
28649 		SD_ERROR(SD_LOG_IOCTL_RMMEDIA, un,
28650 		    "sr_sector_mode: Mode Select failed\n");
28651 	} else {
28652 		/*
28653 		 * Only update the softstate block size if we successfully
28654 		 * changed the device block mode.
28655 		 */
28656 		mutex_enter(SD_MUTEX(un));
28657 		sd_update_block_info(un, blksize, 0);
28658 		mutex_exit(SD_MUTEX(un));
28659 	}
28660 	kmem_free(sense, 20);
28661 	kmem_free(select, 20);
28662 	return (rval);
28663 }
28664 
28665 
28666 /*
28667  *    Function: sr_read_cdda()
28668  *
28669  * Description: This routine is the driver entry point for handling CD-ROM
28670  *		ioctl requests to return CD-DA or subcode data. (CDROMCDDA) If
28671  *		the target supports CDDA these requests are handled via a vendor
28672  *		specific command (0xD8) If the target does not support CDDA
28673  *		these requests are handled via the READ CD command (0xBE).
28674  *
28675  *   Arguments: dev	- the device 'dev_t'
28676  *		data	- pointer to user provided CD-DA structure specifying
28677  *			  the track starting address, transfer length, and
28678  *			  subcode options.
28679  *		flag	- this argument is a pass through to ddi_copyxxx()
28680  *			  directly from the mode argument of ioctl().
28681  *
28682  * Return Code: the code returned by sd_send_scsi_cmd()
28683  *		EFAULT if ddi_copyxxx() fails
28684  *		ENXIO if fail ddi_get_soft_state
28685  *		EINVAL if invalid arguments are provided
28686  *		ENOTTY
28687  */
28688 
28689 static int
28690 sr_read_cdda(dev_t dev, caddr_t data, int flag)
28691 {
28692 	struct sd_lun			*un;
28693 	struct uscsi_cmd		*com;
28694 	struct cdrom_cdda		*cdda;
28695 	int				rval;
28696 	size_t				buflen;
28697 	char				cdb[CDB_GROUP5];
28698 
28699 #ifdef _MULTI_DATAMODEL
28700 	/* To support ILP32 applications in an LP64 world */
28701 	struct cdrom_cdda32	cdrom_cdda32;
28702 	struct cdrom_cdda32	*cdda32 = &cdrom_cdda32;
28703 #endif /* _MULTI_DATAMODEL */
28704 
28705 	if (data == NULL) {
28706 		return (EINVAL);
28707 	}
28708 
28709 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
28710 		return (ENXIO);
28711 	}
28712 
28713 	cdda = kmem_zalloc(sizeof (struct cdrom_cdda), KM_SLEEP);
28714 
28715 #ifdef _MULTI_DATAMODEL
28716 	switch (ddi_model_convert_from(flag & FMODELS)) {
28717 	case DDI_MODEL_ILP32:
28718 		if (ddi_copyin(data, cdda32, sizeof (*cdda32), flag)) {
28719 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
28720 			    "sr_read_cdda: ddi_copyin Failed\n");
28721 			kmem_free(cdda, sizeof (struct cdrom_cdda));
28722 			return (EFAULT);
28723 		}
28724 		/* Convert the ILP32 uscsi data from the application to LP64 */
28725 		cdrom_cdda32tocdrom_cdda(cdda32, cdda);
28726 		break;
28727 	case DDI_MODEL_NONE:
28728 		if (ddi_copyin(data, cdda, sizeof (struct cdrom_cdda), flag)) {
28729 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
28730 			    "sr_read_cdda: ddi_copyin Failed\n");
28731 			kmem_free(cdda, sizeof (struct cdrom_cdda));
28732 			return (EFAULT);
28733 		}
28734 		break;
28735 	}
28736 #else /* ! _MULTI_DATAMODEL */
28737 	if (ddi_copyin(data, cdda, sizeof (struct cdrom_cdda), flag)) {
28738 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
28739 		    "sr_read_cdda: ddi_copyin Failed\n");
28740 		kmem_free(cdda, sizeof (struct cdrom_cdda));
28741 		return (EFAULT);
28742 	}
28743 #endif /* _MULTI_DATAMODEL */
28744 
28745 	/*
28746 	 * Since MMC-2 expects max 3 bytes for length, check if the
28747 	 * length input is greater than 3 bytes
28748 	 */
28749 	if ((cdda->cdda_length & 0xFF000000) != 0) {
28750 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN, "sr_read_cdda: "
28751 		    "cdrom transfer length too large: %d (limit %d)\n",
28752 		    cdda->cdda_length, 0xFFFFFF);
28753 		kmem_free(cdda, sizeof (struct cdrom_cdda));
28754 		return (EINVAL);
28755 	}
28756 
28757 	switch (cdda->cdda_subcode) {
28758 	case CDROM_DA_NO_SUBCODE:
28759 		buflen = CDROM_BLK_2352 * cdda->cdda_length;
28760 		break;
28761 	case CDROM_DA_SUBQ:
28762 		buflen = CDROM_BLK_2368 * cdda->cdda_length;
28763 		break;
28764 	case CDROM_DA_ALL_SUBCODE:
28765 		buflen = CDROM_BLK_2448 * cdda->cdda_length;
28766 		break;
28767 	case CDROM_DA_SUBCODE_ONLY:
28768 		buflen = CDROM_BLK_SUBCODE * cdda->cdda_length;
28769 		break;
28770 	default:
28771 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
28772 		    "sr_read_cdda: Subcode '0x%x' Not Supported\n",
28773 		    cdda->cdda_subcode);
28774 		kmem_free(cdda, sizeof (struct cdrom_cdda));
28775 		return (EINVAL);
28776 	}
28777 
28778 	/* Build and send the command */
28779 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
28780 	bzero(cdb, CDB_GROUP5);
28781 
28782 	if (un->un_f_cfg_cdda == TRUE) {
28783 		cdb[0] = (char)SCMD_READ_CD;
28784 		cdb[1] = 0x04;
28785 		cdb[2] = (((cdda->cdda_addr) & 0xff000000) >> 24);
28786 		cdb[3] = (((cdda->cdda_addr) & 0x00ff0000) >> 16);
28787 		cdb[4] = (((cdda->cdda_addr) & 0x0000ff00) >> 8);
28788 		cdb[5] = ((cdda->cdda_addr) & 0x000000ff);
28789 		cdb[6] = (((cdda->cdda_length) & 0x00ff0000) >> 16);
28790 		cdb[7] = (((cdda->cdda_length) & 0x0000ff00) >> 8);
28791 		cdb[8] = ((cdda->cdda_length) & 0x000000ff);
28792 		cdb[9] = 0x10;
28793 		switch (cdda->cdda_subcode) {
28794 		case CDROM_DA_NO_SUBCODE :
28795 			cdb[10] = 0x0;
28796 			break;
28797 		case CDROM_DA_SUBQ :
28798 			cdb[10] = 0x2;
28799 			break;
28800 		case CDROM_DA_ALL_SUBCODE :
28801 			cdb[10] = 0x1;
28802 			break;
28803 		case CDROM_DA_SUBCODE_ONLY :
28804 			/* FALLTHROUGH */
28805 		default :
28806 			kmem_free(cdda, sizeof (struct cdrom_cdda));
28807 			kmem_free(com, sizeof (*com));
28808 			return (ENOTTY);
28809 		}
28810 	} else {
28811 		cdb[0] = (char)SCMD_READ_CDDA;
28812 		cdb[2] = (((cdda->cdda_addr) & 0xff000000) >> 24);
28813 		cdb[3] = (((cdda->cdda_addr) & 0x00ff0000) >> 16);
28814 		cdb[4] = (((cdda->cdda_addr) & 0x0000ff00) >> 8);
28815 		cdb[5] = ((cdda->cdda_addr) & 0x000000ff);
28816 		cdb[6] = (((cdda->cdda_length) & 0xff000000) >> 24);
28817 		cdb[7] = (((cdda->cdda_length) & 0x00ff0000) >> 16);
28818 		cdb[8] = (((cdda->cdda_length) & 0x0000ff00) >> 8);
28819 		cdb[9] = ((cdda->cdda_length) & 0x000000ff);
28820 		cdb[10] = cdda->cdda_subcode;
28821 	}
28822 
28823 	com->uscsi_cdb = cdb;
28824 	com->uscsi_cdblen = CDB_GROUP5;
28825 	com->uscsi_bufaddr = (caddr_t)cdda->cdda_data;
28826 	com->uscsi_buflen = buflen;
28827 	com->uscsi_flags = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
28828 
28829 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_USERSPACE,
28830 	    UIO_SYSSPACE, SD_PATH_STANDARD);
28831 
28832 	kmem_free(cdda, sizeof (struct cdrom_cdda));
28833 	kmem_free(com, sizeof (*com));
28834 	return (rval);
28835 }
28836 
28837 
28838 /*
28839  *    Function: sr_read_cdxa()
28840  *
28841  * Description: This routine is the driver entry point for handling CD-ROM
28842  *		ioctl requests to return CD-XA (Extended Architecture) data.
28843  *		(CDROMCDXA).
28844  *
28845  *   Arguments: dev	- the device 'dev_t'
28846  *		data	- pointer to user provided CD-XA structure specifying
28847  *			  the data starting address, transfer length, and format
28848  *		flag	- this argument is a pass through to ddi_copyxxx()
28849  *			  directly from the mode argument of ioctl().
28850  *
28851  * Return Code: the code returned by sd_send_scsi_cmd()
28852  *		EFAULT if ddi_copyxxx() fails
28853  *		ENXIO if fail ddi_get_soft_state
28854  *		EINVAL if data pointer is NULL
28855  */
28856 
28857 static int
28858 sr_read_cdxa(dev_t dev, caddr_t data, int flag)
28859 {
28860 	struct sd_lun		*un;
28861 	struct uscsi_cmd	*com;
28862 	struct cdrom_cdxa	*cdxa;
28863 	int			rval;
28864 	size_t			buflen;
28865 	char			cdb[CDB_GROUP5];
28866 	uchar_t			read_flags;
28867 
28868 #ifdef _MULTI_DATAMODEL
28869 	/* To support ILP32 applications in an LP64 world */
28870 	struct cdrom_cdxa32		cdrom_cdxa32;
28871 	struct cdrom_cdxa32		*cdxa32 = &cdrom_cdxa32;
28872 #endif /* _MULTI_DATAMODEL */
28873 
28874 	if (data == NULL) {
28875 		return (EINVAL);
28876 	}
28877 
28878 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
28879 		return (ENXIO);
28880 	}
28881 
28882 	cdxa = kmem_zalloc(sizeof (struct cdrom_cdxa), KM_SLEEP);
28883 
28884 #ifdef _MULTI_DATAMODEL
28885 	switch (ddi_model_convert_from(flag & FMODELS)) {
28886 	case DDI_MODEL_ILP32:
28887 		if (ddi_copyin(data, cdxa32, sizeof (*cdxa32), flag)) {
28888 			kmem_free(cdxa, sizeof (struct cdrom_cdxa));
28889 			return (EFAULT);
28890 		}
28891 		/*
28892 		 * Convert the ILP32 uscsi data from the
28893 		 * application to LP64 for internal use.
28894 		 */
28895 		cdrom_cdxa32tocdrom_cdxa(cdxa32, cdxa);
28896 		break;
28897 	case DDI_MODEL_NONE:
28898 		if (ddi_copyin(data, cdxa, sizeof (struct cdrom_cdxa), flag)) {
28899 			kmem_free(cdxa, sizeof (struct cdrom_cdxa));
28900 			return (EFAULT);
28901 		}
28902 		break;
28903 	}
28904 #else /* ! _MULTI_DATAMODEL */
28905 	if (ddi_copyin(data, cdxa, sizeof (struct cdrom_cdxa), flag)) {
28906 		kmem_free(cdxa, sizeof (struct cdrom_cdxa));
28907 		return (EFAULT);
28908 	}
28909 #endif /* _MULTI_DATAMODEL */
28910 
28911 	/*
28912 	 * Since MMC-2 expects max 3 bytes for length, check if the
28913 	 * length input is greater than 3 bytes
28914 	 */
28915 	if ((cdxa->cdxa_length & 0xFF000000) != 0) {
28916 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN, "sr_read_cdxa: "
28917 		    "cdrom transfer length too large: %d (limit %d)\n",
28918 		    cdxa->cdxa_length, 0xFFFFFF);
28919 		kmem_free(cdxa, sizeof (struct cdrom_cdxa));
28920 		return (EINVAL);
28921 	}
28922 
28923 	switch (cdxa->cdxa_format) {
28924 	case CDROM_XA_DATA:
28925 		buflen = CDROM_BLK_2048 * cdxa->cdxa_length;
28926 		read_flags = 0x10;
28927 		break;
28928 	case CDROM_XA_SECTOR_DATA:
28929 		buflen = CDROM_BLK_2352 * cdxa->cdxa_length;
28930 		read_flags = 0xf8;
28931 		break;
28932 	case CDROM_XA_DATA_W_ERROR:
28933 		buflen = CDROM_BLK_2646 * cdxa->cdxa_length;
28934 		read_flags = 0xfc;
28935 		break;
28936 	default:
28937 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
28938 		    "sr_read_cdxa: Format '0x%x' Not Supported\n",
28939 		    cdxa->cdxa_format);
28940 		kmem_free(cdxa, sizeof (struct cdrom_cdxa));
28941 		return (EINVAL);
28942 	}
28943 
28944 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
28945 	bzero(cdb, CDB_GROUP5);
28946 	if (un->un_f_mmc_cap == TRUE) {
28947 		cdb[0] = (char)SCMD_READ_CD;
28948 		cdb[2] = (((cdxa->cdxa_addr) & 0xff000000) >> 24);
28949 		cdb[3] = (((cdxa->cdxa_addr) & 0x00ff0000) >> 16);
28950 		cdb[4] = (((cdxa->cdxa_addr) & 0x0000ff00) >> 8);
28951 		cdb[5] = ((cdxa->cdxa_addr) & 0x000000ff);
28952 		cdb[6] = (((cdxa->cdxa_length) & 0x00ff0000) >> 16);
28953 		cdb[7] = (((cdxa->cdxa_length) & 0x0000ff00) >> 8);
28954 		cdb[8] = ((cdxa->cdxa_length) & 0x000000ff);
28955 		cdb[9] = (char)read_flags;
28956 	} else {
28957 		/*
28958 		 * Note: A vendor specific command (0xDB) is being used her to
28959 		 * request a read of all subcodes.
28960 		 */
28961 		cdb[0] = (char)SCMD_READ_CDXA;
28962 		cdb[2] = (((cdxa->cdxa_addr) & 0xff000000) >> 24);
28963 		cdb[3] = (((cdxa->cdxa_addr) & 0x00ff0000) >> 16);
28964 		cdb[4] = (((cdxa->cdxa_addr) & 0x0000ff00) >> 8);
28965 		cdb[5] = ((cdxa->cdxa_addr) & 0x000000ff);
28966 		cdb[6] = (((cdxa->cdxa_length) & 0xff000000) >> 24);
28967 		cdb[7] = (((cdxa->cdxa_length) & 0x00ff0000) >> 16);
28968 		cdb[8] = (((cdxa->cdxa_length) & 0x0000ff00) >> 8);
28969 		cdb[9] = ((cdxa->cdxa_length) & 0x000000ff);
28970 		cdb[10] = cdxa->cdxa_format;
28971 	}
28972 	com->uscsi_cdb	   = cdb;
28973 	com->uscsi_cdblen  = CDB_GROUP5;
28974 	com->uscsi_bufaddr = (caddr_t)cdxa->cdxa_data;
28975 	com->uscsi_buflen  = buflen;
28976 	com->uscsi_flags   = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
28977 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_USERSPACE,
28978 	    UIO_SYSSPACE, SD_PATH_STANDARD);
28979 	kmem_free(cdxa, sizeof (struct cdrom_cdxa));
28980 	kmem_free(com, sizeof (*com));
28981 	return (rval);
28982 }
28983 
28984 
28985 /*
28986  *    Function: sr_eject()
28987  *
28988  * Description: This routine is the driver entry point for handling CD-ROM
28989  *		eject ioctl requests (FDEJECT, DKIOCEJECT, CDROMEJECT)
28990  *
28991  *   Arguments: dev	- the device 'dev_t'
28992  *
28993  * Return Code: the code returned by sd_send_scsi_cmd()
28994  */
28995 
28996 static int
28997 sr_eject(dev_t dev)
28998 {
28999 	struct sd_lun	*un;
29000 	int		rval;
29001 
29002 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
29003 	    (un->un_state == SD_STATE_OFFLINE)) {
29004 		return (ENXIO);
29005 	}
29006 	if ((rval = sd_send_scsi_DOORLOCK(un, SD_REMOVAL_ALLOW,
29007 	    SD_PATH_STANDARD)) != 0) {
29008 		return (rval);
29009 	}
29010 
29011 	rval = sd_send_scsi_START_STOP_UNIT(un, SD_TARGET_EJECT,
29012 	    SD_PATH_STANDARD);
29013 
29014 	if (rval == 0) {
29015 		mutex_enter(SD_MUTEX(un));
29016 		sr_ejected(un);
29017 		un->un_mediastate = DKIO_EJECTED;
29018 		cv_broadcast(&un->un_state_cv);
29019 		mutex_exit(SD_MUTEX(un));
29020 	}
29021 	return (rval);
29022 }
29023 
29024 
29025 /*
29026  *    Function: sr_ejected()
29027  *
29028  * Description: This routine updates the soft state structure to invalidate the
29029  *		geometry information after the media has been ejected or a
29030  *		media eject has been detected.
29031  *
29032  *   Arguments: un - driver soft state (unit) structure
29033  */
29034 
29035 static void
29036 sr_ejected(struct sd_lun *un)
29037 {
29038 	struct sd_errstats *stp;
29039 
29040 	ASSERT(un != NULL);
29041 	ASSERT(mutex_owned(SD_MUTEX(un)));
29042 
29043 	un->un_f_blockcount_is_valid	= FALSE;
29044 	un->un_f_tgt_blocksize_is_valid	= FALSE;
29045 	un->un_f_geometry_is_valid	= FALSE;
29046 
29047 	if (un->un_errstats != NULL) {
29048 		stp = (struct sd_errstats *)un->un_errstats->ks_data;
29049 		stp->sd_capacity.value.ui64 = 0;
29050 	}
29051 }
29052 
29053 
29054 /*
29055  *    Function: sr_check_wp()
29056  *
29057  * Description: This routine checks the write protection of a removable
29058  *      media disk and hotpluggable devices via the write protect bit of
29059  *      the Mode Page Header device specific field. Some devices choke
29060  *      on unsupported mode page. In order to workaround this issue,
29061  *      this routine has been implemented to use 0x3f mode page(request
29062  *      for all pages) for all device types.
29063  *
29064  *   Arguments: dev		- the device 'dev_t'
29065  *
29066  * Return Code: int indicating if the device is write protected (1) or not (0)
29067  *
29068  *     Context: Kernel thread.
29069  *
29070  */
29071 
29072 static int
29073 sr_check_wp(dev_t dev)
29074 {
29075 	struct sd_lun	*un;
29076 	uchar_t		device_specific;
29077 	uchar_t		*sense;
29078 	int		hdrlen;
29079 	int		rval = FALSE;
29080 
29081 	/*
29082 	 * Note: The return codes for this routine should be reworked to
29083 	 * properly handle the case of a NULL softstate.
29084 	 */
29085 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
29086 		return (FALSE);
29087 	}
29088 
29089 	if (un->un_f_cfg_is_atapi == TRUE) {
29090 		/*
29091 		 * The mode page contents are not required; set the allocation
29092 		 * length for the mode page header only
29093 		 */
29094 		hdrlen = MODE_HEADER_LENGTH_GRP2;
29095 		sense = kmem_zalloc(hdrlen, KM_SLEEP);
29096 		if (sd_send_scsi_MODE_SENSE(un, CDB_GROUP1, sense, hdrlen,
29097 		    MODEPAGE_ALLPAGES, SD_PATH_STANDARD) != 0)
29098 			goto err_exit;
29099 		device_specific =
29100 		    ((struct mode_header_grp2 *)sense)->device_specific;
29101 	} else {
29102 		hdrlen = MODE_HEADER_LENGTH;
29103 		sense = kmem_zalloc(hdrlen, KM_SLEEP);
29104 		if (sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, sense, hdrlen,
29105 		    MODEPAGE_ALLPAGES, SD_PATH_STANDARD) != 0)
29106 			goto err_exit;
29107 		device_specific =
29108 		    ((struct mode_header *)sense)->device_specific;
29109 	}
29110 
29111 	/*
29112 	 * Write protect mode sense failed; not all disks
29113 	 * understand this query. Return FALSE assuming that
29114 	 * these devices are not writable.
29115 	 */
29116 	if (device_specific & WRITE_PROTECT) {
29117 		rval = TRUE;
29118 	}
29119 
29120 err_exit:
29121 	kmem_free(sense, hdrlen);
29122 	return (rval);
29123 }
29124 
29125 /*
29126  *    Function: sr_volume_ctrl()
29127  *
29128  * Description: This routine is the driver entry point for handling CD-ROM
29129  *		audio output volume ioctl requests. (CDROMVOLCTRL)
29130  *
29131  *   Arguments: dev	- the device 'dev_t'
29132  *		data	- pointer to user audio volume control structure
29133  *		flag	- this argument is a pass through to ddi_copyxxx()
29134  *			  directly from the mode argument of ioctl().
29135  *
29136  * Return Code: the code returned by sd_send_scsi_cmd()
29137  *		EFAULT if ddi_copyxxx() fails
29138  *		ENXIO if fail ddi_get_soft_state
29139  *		EINVAL if data pointer is NULL
29140  *
29141  */
29142 
29143 static int
29144 sr_volume_ctrl(dev_t dev, caddr_t data, int flag)
29145 {
29146 	struct sd_lun		*un;
29147 	struct cdrom_volctrl    volume;
29148 	struct cdrom_volctrl    *vol = &volume;
29149 	uchar_t			*sense_page;
29150 	uchar_t			*select_page;
29151 	uchar_t			*sense;
29152 	uchar_t			*select;
29153 	int			sense_buflen;
29154 	int			select_buflen;
29155 	int			rval;
29156 
29157 	if (data == NULL) {
29158 		return (EINVAL);
29159 	}
29160 
29161 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
29162 	    (un->un_state == SD_STATE_OFFLINE)) {
29163 		return (ENXIO);
29164 	}
29165 
29166 	if (ddi_copyin(data, vol, sizeof (struct cdrom_volctrl), flag)) {
29167 		return (EFAULT);
29168 	}
29169 
29170 	if ((un->un_f_cfg_is_atapi == TRUE) || (un->un_f_mmc_cap == TRUE)) {
29171 		struct mode_header_grp2		*sense_mhp;
29172 		struct mode_header_grp2		*select_mhp;
29173 		int				bd_len;
29174 
29175 		sense_buflen = MODE_PARAM_LENGTH_GRP2 + MODEPAGE_AUDIO_CTRL_LEN;
29176 		select_buflen = MODE_HEADER_LENGTH_GRP2 +
29177 		    MODEPAGE_AUDIO_CTRL_LEN;
29178 		sense  = kmem_zalloc(sense_buflen, KM_SLEEP);
29179 		select = kmem_zalloc(select_buflen, KM_SLEEP);
29180 		if ((rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP1, sense,
29181 		    sense_buflen, MODEPAGE_AUDIO_CTRL,
29182 		    SD_PATH_STANDARD)) != 0) {
29183 			SD_ERROR(SD_LOG_IOCTL_RMMEDIA, un,
29184 			    "sr_volume_ctrl: Mode Sense Failed\n");
29185 			kmem_free(sense, sense_buflen);
29186 			kmem_free(select, select_buflen);
29187 			return (rval);
29188 		}
29189 		sense_mhp = (struct mode_header_grp2 *)sense;
29190 		select_mhp = (struct mode_header_grp2 *)select;
29191 		bd_len = (sense_mhp->bdesc_length_hi << 8) |
29192 		    sense_mhp->bdesc_length_lo;
29193 		if (bd_len > MODE_BLK_DESC_LENGTH) {
29194 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
29195 			    "sr_volume_ctrl: Mode Sense returned invalid "
29196 			    "block descriptor length\n");
29197 			kmem_free(sense, sense_buflen);
29198 			kmem_free(select, select_buflen);
29199 			return (EIO);
29200 		}
29201 		sense_page = (uchar_t *)
29202 		    (sense + MODE_HEADER_LENGTH_GRP2 + bd_len);
29203 		select_page = (uchar_t *)(select + MODE_HEADER_LENGTH_GRP2);
29204 		select_mhp->length_msb = 0;
29205 		select_mhp->length_lsb = 0;
29206 		select_mhp->bdesc_length_hi = 0;
29207 		select_mhp->bdesc_length_lo = 0;
29208 	} else {
29209 		struct mode_header		*sense_mhp, *select_mhp;
29210 
29211 		sense_buflen = MODE_PARAM_LENGTH + MODEPAGE_AUDIO_CTRL_LEN;
29212 		select_buflen = MODE_HEADER_LENGTH + MODEPAGE_AUDIO_CTRL_LEN;
29213 		sense  = kmem_zalloc(sense_buflen, KM_SLEEP);
29214 		select = kmem_zalloc(select_buflen, KM_SLEEP);
29215 		if ((rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, sense,
29216 		    sense_buflen, MODEPAGE_AUDIO_CTRL,
29217 		    SD_PATH_STANDARD)) != 0) {
29218 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
29219 			    "sr_volume_ctrl: Mode Sense Failed\n");
29220 			kmem_free(sense, sense_buflen);
29221 			kmem_free(select, select_buflen);
29222 			return (rval);
29223 		}
29224 		sense_mhp  = (struct mode_header *)sense;
29225 		select_mhp = (struct mode_header *)select;
29226 		if (sense_mhp->bdesc_length > MODE_BLK_DESC_LENGTH) {
29227 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
29228 			    "sr_volume_ctrl: Mode Sense returned invalid "
29229 			    "block descriptor length\n");
29230 			kmem_free(sense, sense_buflen);
29231 			kmem_free(select, select_buflen);
29232 			return (EIO);
29233 		}
29234 		sense_page = (uchar_t *)
29235 		    (sense + MODE_HEADER_LENGTH + sense_mhp->bdesc_length);
29236 		select_page = (uchar_t *)(select + MODE_HEADER_LENGTH);
29237 		select_mhp->length = 0;
29238 		select_mhp->bdesc_length = 0;
29239 	}
29240 	/*
29241 	 * Note: An audio control data structure could be created and overlayed
29242 	 * on the following in place of the array indexing method implemented.
29243 	 */
29244 
29245 	/* Build the select data for the user volume data */
29246 	select_page[0] = MODEPAGE_AUDIO_CTRL;
29247 	select_page[1] = 0xE;
29248 	/* Set the immediate bit */
29249 	select_page[2] = 0x04;
29250 	/* Zero out reserved fields */
29251 	select_page[3] = 0x00;
29252 	select_page[4] = 0x00;
29253 	/* Return sense data for fields not to be modified */
29254 	select_page[5] = sense_page[5];
29255 	select_page[6] = sense_page[6];
29256 	select_page[7] = sense_page[7];
29257 	/* Set the user specified volume levels for channel 0 and 1 */
29258 	select_page[8] = 0x01;
29259 	select_page[9] = vol->channel0;
29260 	select_page[10] = 0x02;
29261 	select_page[11] = vol->channel1;
29262 	/* Channel 2 and 3 are currently unsupported so return the sense data */
29263 	select_page[12] = sense_page[12];
29264 	select_page[13] = sense_page[13];
29265 	select_page[14] = sense_page[14];
29266 	select_page[15] = sense_page[15];
29267 
29268 	if ((un->un_f_cfg_is_atapi == TRUE) || (un->un_f_mmc_cap == TRUE)) {
29269 		rval = sd_send_scsi_MODE_SELECT(un, CDB_GROUP1, select,
29270 		    select_buflen, SD_DONTSAVE_PAGE, SD_PATH_STANDARD);
29271 	} else {
29272 		rval = sd_send_scsi_MODE_SELECT(un, CDB_GROUP0, select,
29273 		    select_buflen, SD_DONTSAVE_PAGE, SD_PATH_STANDARD);
29274 	}
29275 
29276 	kmem_free(sense, sense_buflen);
29277 	kmem_free(select, select_buflen);
29278 	return (rval);
29279 }
29280 
29281 
29282 /*
29283  *    Function: sr_read_sony_session_offset()
29284  *
29285  * Description: This routine is the driver entry point for handling CD-ROM
29286  *		ioctl requests for session offset information. (CDROMREADOFFSET)
29287  *		The address of the first track in the last session of a
29288  *		multi-session CD-ROM is returned
29289  *
29290  *		Note: This routine uses a vendor specific key value in the
29291  *		command control field without implementing any vendor check here
29292  *		or in the ioctl routine.
29293  *
29294  *   Arguments: dev	- the device 'dev_t'
29295  *		data	- pointer to an int to hold the requested address
29296  *		flag	- this argument is a pass through to ddi_copyxxx()
29297  *			  directly from the mode argument of ioctl().
29298  *
29299  * Return Code: the code returned by sd_send_scsi_cmd()
29300  *		EFAULT if ddi_copyxxx() fails
29301  *		ENXIO if fail ddi_get_soft_state
29302  *		EINVAL if data pointer is NULL
29303  */
29304 
29305 static int
29306 sr_read_sony_session_offset(dev_t dev, caddr_t data, int flag)
29307 {
29308 	struct sd_lun		*un;
29309 	struct uscsi_cmd	*com;
29310 	caddr_t			buffer;
29311 	char			cdb[CDB_GROUP1];
29312 	int			session_offset = 0;
29313 	int			rval;
29314 
29315 	if (data == NULL) {
29316 		return (EINVAL);
29317 	}
29318 
29319 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
29320 	    (un->un_state == SD_STATE_OFFLINE)) {
29321 		return (ENXIO);
29322 	}
29323 
29324 	buffer = kmem_zalloc((size_t)SONY_SESSION_OFFSET_LEN, KM_SLEEP);
29325 	bzero(cdb, CDB_GROUP1);
29326 	cdb[0] = SCMD_READ_TOC;
29327 	/*
29328 	 * Bytes 7 & 8 are the 12 byte allocation length for a single entry.
29329 	 * (4 byte TOC response header + 8 byte response data)
29330 	 */
29331 	cdb[8] = SONY_SESSION_OFFSET_LEN;
29332 	/* Byte 9 is the control byte. A vendor specific value is used */
29333 	cdb[9] = SONY_SESSION_OFFSET_KEY;
29334 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
29335 	com->uscsi_cdb = cdb;
29336 	com->uscsi_cdblen = CDB_GROUP1;
29337 	com->uscsi_bufaddr = buffer;
29338 	com->uscsi_buflen = SONY_SESSION_OFFSET_LEN;
29339 	com->uscsi_flags = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
29340 
29341 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
29342 	    UIO_SYSSPACE, SD_PATH_STANDARD);
29343 	if (rval != 0) {
29344 		kmem_free(buffer, SONY_SESSION_OFFSET_LEN);
29345 		kmem_free(com, sizeof (*com));
29346 		return (rval);
29347 	}
29348 	if (buffer[1] == SONY_SESSION_OFFSET_VALID) {
29349 		session_offset =
29350 		    ((uchar_t)buffer[8] << 24) + ((uchar_t)buffer[9] << 16) +
29351 		    ((uchar_t)buffer[10] << 8) + ((uchar_t)buffer[11]);
29352 		/*
29353 		 * Offset returned offset in current lbasize block's. Convert to
29354 		 * 2k block's to return to the user
29355 		 */
29356 		if (un->un_tgt_blocksize == CDROM_BLK_512) {
29357 			session_offset >>= 2;
29358 		} else if (un->un_tgt_blocksize == CDROM_BLK_1024) {
29359 			session_offset >>= 1;
29360 		}
29361 	}
29362 
29363 	if (ddi_copyout(&session_offset, data, sizeof (int), flag) != 0) {
29364 		rval = EFAULT;
29365 	}
29366 
29367 	kmem_free(buffer, SONY_SESSION_OFFSET_LEN);
29368 	kmem_free(com, sizeof (*com));
29369 	return (rval);
29370 }
29371 
29372 
29373 /*
29374  *    Function: sd_wm_cache_constructor()
29375  *
29376  * Description: Cache Constructor for the wmap cache for the read/modify/write
29377  * 		devices.
29378  *
29379  *   Arguments: wm      - A pointer to the sd_w_map to be initialized.
29380  *		un	- sd_lun structure for the device.
29381  *		flag	- the km flags passed to constructor
29382  *
29383  * Return Code: 0 on success.
29384  *		-1 on failure.
29385  */
29386 
29387 /*ARGSUSED*/
29388 static int
29389 sd_wm_cache_constructor(void *wm, void *un, int flags)
29390 {
29391 	bzero(wm, sizeof (struct sd_w_map));
29392 	cv_init(&((struct sd_w_map *)wm)->wm_avail, NULL, CV_DRIVER, NULL);
29393 	return (0);
29394 }
29395 
29396 
29397 /*
29398  *    Function: sd_wm_cache_destructor()
29399  *
29400  * Description: Cache destructor for the wmap cache for the read/modify/write
29401  * 		devices.
29402  *
29403  *   Arguments: wm      - A pointer to the sd_w_map to be initialized.
29404  *		un	- sd_lun structure for the device.
29405  */
29406 /*ARGSUSED*/
29407 static void
29408 sd_wm_cache_destructor(void *wm, void *un)
29409 {
29410 	cv_destroy(&((struct sd_w_map *)wm)->wm_avail);
29411 }
29412 
29413 
29414 /*
29415  *    Function: sd_range_lock()
29416  *
29417  * Description: Lock the range of blocks specified as parameter to ensure
29418  *		that read, modify write is atomic and no other i/o writes
29419  *		to the same location. The range is specified in terms
29420  *		of start and end blocks. Block numbers are the actual
29421  *		media block numbers and not system.
29422  *
29423  *   Arguments: un	- sd_lun structure for the device.
29424  *		startb - The starting block number
29425  *		endb - The end block number
29426  *		typ - type of i/o - simple/read_modify_write
29427  *
29428  * Return Code: wm  - pointer to the wmap structure.
29429  *
29430  *     Context: This routine can sleep.
29431  */
29432 
29433 static struct sd_w_map *
29434 sd_range_lock(struct sd_lun *un, daddr_t startb, daddr_t endb, ushort_t typ)
29435 {
29436 	struct sd_w_map *wmp = NULL;
29437 	struct sd_w_map *sl_wmp = NULL;
29438 	struct sd_w_map *tmp_wmp;
29439 	wm_state state = SD_WM_CHK_LIST;
29440 
29441 
29442 	ASSERT(un != NULL);
29443 	ASSERT(!mutex_owned(SD_MUTEX(un)));
29444 
29445 	mutex_enter(SD_MUTEX(un));
29446 
29447 	while (state != SD_WM_DONE) {
29448 
29449 		switch (state) {
29450 		case SD_WM_CHK_LIST:
29451 			/*
29452 			 * This is the starting state. Check the wmap list
29453 			 * to see if the range is currently available.
29454 			 */
29455 			if (!(typ & SD_WTYPE_RMW) && !(un->un_rmw_count)) {
29456 				/*
29457 				 * If this is a simple write and no rmw
29458 				 * i/o is pending then try to lock the
29459 				 * range as the range should be available.
29460 				 */
29461 				state = SD_WM_LOCK_RANGE;
29462 			} else {
29463 				tmp_wmp = sd_get_range(un, startb, endb);
29464 				if (tmp_wmp != NULL) {
29465 					if ((wmp != NULL) && ONLIST(un, wmp)) {
29466 						/*
29467 						 * Should not keep onlist wmps
29468 						 * while waiting this macro
29469 						 * will also do wmp = NULL;
29470 						 */
29471 						FREE_ONLIST_WMAP(un, wmp);
29472 					}
29473 					/*
29474 					 * sl_wmp is the wmap on which wait
29475 					 * is done, since the tmp_wmp points
29476 					 * to the inuse wmap, set sl_wmp to
29477 					 * tmp_wmp and change the state to sleep
29478 					 */
29479 					sl_wmp = tmp_wmp;
29480 					state = SD_WM_WAIT_MAP;
29481 				} else {
29482 					state = SD_WM_LOCK_RANGE;
29483 				}
29484 
29485 			}
29486 			break;
29487 
29488 		case SD_WM_LOCK_RANGE:
29489 			ASSERT(un->un_wm_cache);
29490 			/*
29491 			 * The range need to be locked, try to get a wmap.
29492 			 * First attempt it with NO_SLEEP, want to avoid a sleep
29493 			 * if possible as we will have to release the sd mutex
29494 			 * if we have to sleep.
29495 			 */
29496 			if (wmp == NULL)
29497 				wmp = kmem_cache_alloc(un->un_wm_cache,
29498 				    KM_NOSLEEP);
29499 			if (wmp == NULL) {
29500 				mutex_exit(SD_MUTEX(un));
29501 				_NOTE(DATA_READABLE_WITHOUT_LOCK
29502 				    (sd_lun::un_wm_cache))
29503 				wmp = kmem_cache_alloc(un->un_wm_cache,
29504 				    KM_SLEEP);
29505 				mutex_enter(SD_MUTEX(un));
29506 				/*
29507 				 * we released the mutex so recheck and go to
29508 				 * check list state.
29509 				 */
29510 				state = SD_WM_CHK_LIST;
29511 			} else {
29512 				/*
29513 				 * We exit out of state machine since we
29514 				 * have the wmap. Do the housekeeping first.
29515 				 * place the wmap on the wmap list if it is not
29516 				 * on it already and then set the state to done.
29517 				 */
29518 				wmp->wm_start = startb;
29519 				wmp->wm_end = endb;
29520 				wmp->wm_flags = typ | SD_WM_BUSY;
29521 				if (typ & SD_WTYPE_RMW) {
29522 					un->un_rmw_count++;
29523 				}
29524 				/*
29525 				 * If not already on the list then link
29526 				 */
29527 				if (!ONLIST(un, wmp)) {
29528 					wmp->wm_next = un->un_wm;
29529 					wmp->wm_prev = NULL;
29530 					if (wmp->wm_next)
29531 						wmp->wm_next->wm_prev = wmp;
29532 					un->un_wm = wmp;
29533 				}
29534 				state = SD_WM_DONE;
29535 			}
29536 			break;
29537 
29538 		case SD_WM_WAIT_MAP:
29539 			ASSERT(sl_wmp->wm_flags & SD_WM_BUSY);
29540 			/*
29541 			 * Wait is done on sl_wmp, which is set in the
29542 			 * check_list state.
29543 			 */
29544 			sl_wmp->wm_wanted_count++;
29545 			cv_wait(&sl_wmp->wm_avail, SD_MUTEX(un));
29546 			sl_wmp->wm_wanted_count--;
29547 			/*
29548 			 * We can reuse the memory from the completed sl_wmp
29549 			 * lock range for our new lock, but only if noone is
29550 			 * waiting for it.
29551 			 */
29552 			ASSERT(!(sl_wmp->wm_flags & SD_WM_BUSY));
29553 			if (sl_wmp->wm_wanted_count == 0) {
29554 				if (wmp != NULL)
29555 					CHK_N_FREEWMP(un, wmp);
29556 				wmp = sl_wmp;
29557 			}
29558 			sl_wmp = NULL;
29559 			/*
29560 			 * After waking up, need to recheck for availability of
29561 			 * range.
29562 			 */
29563 			state = SD_WM_CHK_LIST;
29564 			break;
29565 
29566 		default:
29567 			panic("sd_range_lock: "
29568 			    "Unknown state %d in sd_range_lock", state);
29569 			/*NOTREACHED*/
29570 		} /* switch(state) */
29571 
29572 	} /* while(state != SD_WM_DONE) */
29573 
29574 	mutex_exit(SD_MUTEX(un));
29575 
29576 	ASSERT(wmp != NULL);
29577 
29578 	return (wmp);
29579 }
29580 
29581 
29582 /*
29583  *    Function: sd_get_range()
29584  *
29585  * Description: Find if there any overlapping I/O to this one
29586  *		Returns the write-map of 1st such I/O, NULL otherwise.
29587  *
29588  *   Arguments: un	- sd_lun structure for the device.
29589  *		startb - The starting block number
29590  *		endb - The end block number
29591  *
29592  * Return Code: wm  - pointer to the wmap structure.
29593  */
29594 
29595 static struct sd_w_map *
29596 sd_get_range(struct sd_lun *un, daddr_t startb, daddr_t endb)
29597 {
29598 	struct sd_w_map *wmp;
29599 
29600 	ASSERT(un != NULL);
29601 
29602 	for (wmp = un->un_wm; wmp != NULL; wmp = wmp->wm_next) {
29603 		if (!(wmp->wm_flags & SD_WM_BUSY)) {
29604 			continue;
29605 		}
29606 		if ((startb >= wmp->wm_start) && (startb <= wmp->wm_end)) {
29607 			break;
29608 		}
29609 		if ((endb >= wmp->wm_start) && (endb <= wmp->wm_end)) {
29610 			break;
29611 		}
29612 	}
29613 
29614 	return (wmp);
29615 }
29616 
29617 
29618 /*
29619  *    Function: sd_free_inlist_wmap()
29620  *
29621  * Description: Unlink and free a write map struct.
29622  *
29623  *   Arguments: un      - sd_lun structure for the device.
29624  *		wmp	- sd_w_map which needs to be unlinked.
29625  */
29626 
29627 static void
29628 sd_free_inlist_wmap(struct sd_lun *un, struct sd_w_map *wmp)
29629 {
29630 	ASSERT(un != NULL);
29631 
29632 	if (un->un_wm == wmp) {
29633 		un->un_wm = wmp->wm_next;
29634 	} else {
29635 		wmp->wm_prev->wm_next = wmp->wm_next;
29636 	}
29637 
29638 	if (wmp->wm_next) {
29639 		wmp->wm_next->wm_prev = wmp->wm_prev;
29640 	}
29641 
29642 	wmp->wm_next = wmp->wm_prev = NULL;
29643 
29644 	kmem_cache_free(un->un_wm_cache, wmp);
29645 }
29646 
29647 
29648 /*
29649  *    Function: sd_range_unlock()
29650  *
29651  * Description: Unlock the range locked by wm.
29652  *		Free write map if nobody else is waiting on it.
29653  *
29654  *   Arguments: un      - sd_lun structure for the device.
29655  *              wmp     - sd_w_map which needs to be unlinked.
29656  */
29657 
29658 static void
29659 sd_range_unlock(struct sd_lun *un, struct sd_w_map *wm)
29660 {
29661 	ASSERT(un != NULL);
29662 	ASSERT(wm != NULL);
29663 	ASSERT(!mutex_owned(SD_MUTEX(un)));
29664 
29665 	mutex_enter(SD_MUTEX(un));
29666 
29667 	if (wm->wm_flags & SD_WTYPE_RMW) {
29668 		un->un_rmw_count--;
29669 	}
29670 
29671 	if (wm->wm_wanted_count) {
29672 		wm->wm_flags = 0;
29673 		/*
29674 		 * Broadcast that the wmap is available now.
29675 		 */
29676 		cv_broadcast(&wm->wm_avail);
29677 	} else {
29678 		/*
29679 		 * If no one is waiting on the map, it should be free'ed.
29680 		 */
29681 		sd_free_inlist_wmap(un, wm);
29682 	}
29683 
29684 	mutex_exit(SD_MUTEX(un));
29685 }
29686 
29687 
29688 /*
29689  *    Function: sd_read_modify_write_task
29690  *
29691  * Description: Called from a taskq thread to initiate the write phase of
29692  *		a read-modify-write request.  This is used for targets where
29693  *		un->un_sys_blocksize != un->un_tgt_blocksize.
29694  *
29695  *   Arguments: arg - a pointer to the buf(9S) struct for the write command.
29696  *
29697  *     Context: Called under taskq thread context.
29698  */
29699 
29700 static void
29701 sd_read_modify_write_task(void *arg)
29702 {
29703 	struct sd_mapblocksize_info	*bsp;
29704 	struct buf	*bp;
29705 	struct sd_xbuf	*xp;
29706 	struct sd_lun	*un;
29707 
29708 	bp = arg;	/* The bp is given in arg */
29709 	ASSERT(bp != NULL);
29710 
29711 	/* Get the pointer to the layer-private data struct */
29712 	xp = SD_GET_XBUF(bp);
29713 	ASSERT(xp != NULL);
29714 	bsp = xp->xb_private;
29715 	ASSERT(bsp != NULL);
29716 
29717 	un = SD_GET_UN(bp);
29718 	ASSERT(un != NULL);
29719 	ASSERT(!mutex_owned(SD_MUTEX(un)));
29720 
29721 	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
29722 	    "sd_read_modify_write_task: entry: buf:0x%p\n", bp);
29723 
29724 	/*
29725 	 * This is the write phase of a read-modify-write request, called
29726 	 * under the context of a taskq thread in response to the completion
29727 	 * of the read portion of the rmw request completing under interrupt
29728 	 * context. The write request must be sent from here down the iostart
29729 	 * chain as if it were being sent from sd_mapblocksize_iostart(), so
29730 	 * we use the layer index saved in the layer-private data area.
29731 	 */
29732 	SD_NEXT_IOSTART(bsp->mbs_layer_index, un, bp);
29733 
29734 	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
29735 	    "sd_read_modify_write_task: exit: buf:0x%p\n", bp);
29736 }
29737 
29738 
29739 /*
29740  *    Function: sddump_do_read_of_rmw()
29741  *
29742  * Description: This routine will be called from sddump, If sddump is called
29743  *		with an I/O which not aligned on device blocksize boundary
29744  *		then the write has to be converted to read-modify-write.
29745  *		Do the read part here in order to keep sddump simple.
29746  *		Note - That the sd_mutex is held across the call to this
29747  *		routine.
29748  *
29749  *   Arguments: un	- sd_lun
29750  *		blkno	- block number in terms of media block size.
29751  *		nblk	- number of blocks.
29752  *		bpp	- pointer to pointer to the buf structure. On return
29753  *			from this function, *bpp points to the valid buffer
29754  *			to which the write has to be done.
29755  *
29756  * Return Code: 0 for success or errno-type return code
29757  */
29758 
29759 static int
29760 sddump_do_read_of_rmw(struct sd_lun *un, uint64_t blkno, uint64_t nblk,
29761 	struct buf **bpp)
29762 {
29763 	int err;
29764 	int i;
29765 	int rval;
29766 	struct buf *bp;
29767 	struct scsi_pkt *pkt = NULL;
29768 	uint32_t target_blocksize;
29769 
29770 	ASSERT(un != NULL);
29771 	ASSERT(mutex_owned(SD_MUTEX(un)));
29772 
29773 	target_blocksize = un->un_tgt_blocksize;
29774 
29775 	mutex_exit(SD_MUTEX(un));
29776 
29777 	bp = scsi_alloc_consistent_buf(SD_ADDRESS(un), (struct buf *)NULL,
29778 	    (size_t)(nblk * target_blocksize), B_READ, NULL_FUNC, NULL);
29779 	if (bp == NULL) {
29780 		scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
29781 		    "no resources for dumping; giving up");
29782 		err = ENOMEM;
29783 		goto done;
29784 	}
29785 
29786 	rval = sd_setup_rw_pkt(un, &pkt, bp, 0, NULL_FUNC, NULL,
29787 	    blkno, nblk);
29788 	if (rval != 0) {
29789 		scsi_free_consistent_buf(bp);
29790 		scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
29791 		    "no resources for dumping; giving up");
29792 		err = ENOMEM;
29793 		goto done;
29794 	}
29795 
29796 	pkt->pkt_flags |= FLAG_NOINTR;
29797 
29798 	err = EIO;
29799 	for (i = 0; i < SD_NDUMP_RETRIES; i++) {
29800 
29801 		/*
29802 		 * Scsi_poll returns 0 (success) if the command completes and
29803 		 * the status block is STATUS_GOOD.  We should only check
29804 		 * errors if this condition is not true.  Even then we should
29805 		 * send our own request sense packet only if we have a check
29806 		 * condition and auto request sense has not been performed by
29807 		 * the hba.
29808 		 */
29809 		SD_TRACE(SD_LOG_DUMP, un, "sddump: sending read\n");
29810 
29811 		if ((sd_scsi_poll(un, pkt) == 0) && (pkt->pkt_resid == 0)) {
29812 			err = 0;
29813 			break;
29814 		}
29815 
29816 		/*
29817 		 * Check CMD_DEV_GONE 1st, give up if device is gone,
29818 		 * no need to read RQS data.
29819 		 */
29820 		if (pkt->pkt_reason == CMD_DEV_GONE) {
29821 			scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
29822 			    "Device is gone\n");
29823 			break;
29824 		}
29825 
29826 		if (SD_GET_PKT_STATUS(pkt) == STATUS_CHECK) {
29827 			SD_INFO(SD_LOG_DUMP, un,
29828 			    "sddump: read failed with CHECK, try # %d\n", i);
29829 			if (((pkt->pkt_state & STATE_ARQ_DONE) == 0)) {
29830 				(void) sd_send_polled_RQS(un);
29831 			}
29832 
29833 			continue;
29834 		}
29835 
29836 		if (SD_GET_PKT_STATUS(pkt) == STATUS_BUSY) {
29837 			int reset_retval = 0;
29838 
29839 			SD_INFO(SD_LOG_DUMP, un,
29840 			    "sddump: read failed with BUSY, try # %d\n", i);
29841 
29842 			if (un->un_f_lun_reset_enabled == TRUE) {
29843 				reset_retval = scsi_reset(SD_ADDRESS(un),
29844 				    RESET_LUN);
29845 			}
29846 			if (reset_retval == 0) {
29847 				(void) scsi_reset(SD_ADDRESS(un), RESET_TARGET);
29848 			}
29849 			(void) sd_send_polled_RQS(un);
29850 
29851 		} else {
29852 			SD_INFO(SD_LOG_DUMP, un,
29853 			    "sddump: read failed with 0x%x, try # %d\n",
29854 			    SD_GET_PKT_STATUS(pkt), i);
29855 			mutex_enter(SD_MUTEX(un));
29856 			sd_reset_target(un, pkt);
29857 			mutex_exit(SD_MUTEX(un));
29858 		}
29859 
29860 		/*
29861 		 * If we are not getting anywhere with lun/target resets,
29862 		 * let's reset the bus.
29863 		 */
29864 		if (i > SD_NDUMP_RETRIES/2) {
29865 			(void) scsi_reset(SD_ADDRESS(un), RESET_ALL);
29866 			(void) sd_send_polled_RQS(un);
29867 		}
29868 
29869 	}
29870 	scsi_destroy_pkt(pkt);
29871 
29872 	if (err != 0) {
29873 		scsi_free_consistent_buf(bp);
29874 		*bpp = NULL;
29875 	} else {
29876 		*bpp = bp;
29877 	}
29878 
29879 done:
29880 	mutex_enter(SD_MUTEX(un));
29881 	return (err);
29882 }
29883 
29884 
29885 /*
29886  *    Function: sd_failfast_flushq
29887  *
29888  * Description: Take all bp's on the wait queue that have B_FAILFAST set
29889  *		in b_flags and move them onto the failfast queue, then kick
29890  *		off a thread to return all bp's on the failfast queue to
29891  *		their owners with an error set.
29892  *
29893  *   Arguments: un - pointer to the soft state struct for the instance.
29894  *
29895  *     Context: may execute in interrupt context.
29896  */
29897 
29898 static void
29899 sd_failfast_flushq(struct sd_lun *un)
29900 {
29901 	struct buf *bp;
29902 	struct buf *next_waitq_bp;
29903 	struct buf *prev_waitq_bp = NULL;
29904 
29905 	ASSERT(un != NULL);
29906 	ASSERT(mutex_owned(SD_MUTEX(un)));
29907 	ASSERT(un->un_failfast_state == SD_FAILFAST_ACTIVE);
29908 	ASSERT(un->un_failfast_bp == NULL);
29909 
29910 	SD_TRACE(SD_LOG_IO_FAILFAST, un,
29911 	    "sd_failfast_flushq: entry: un:0x%p\n", un);
29912 
29913 	/*
29914 	 * Check if we should flush all bufs when entering failfast state, or
29915 	 * just those with B_FAILFAST set.
29916 	 */
29917 	if (sd_failfast_flushctl & SD_FAILFAST_FLUSH_ALL_BUFS) {
29918 		/*
29919 		 * Move *all* bp's on the wait queue to the failfast flush
29920 		 * queue, including those that do NOT have B_FAILFAST set.
29921 		 */
29922 		if (un->un_failfast_headp == NULL) {
29923 			ASSERT(un->un_failfast_tailp == NULL);
29924 			un->un_failfast_headp = un->un_waitq_headp;
29925 		} else {
29926 			ASSERT(un->un_failfast_tailp != NULL);
29927 			un->un_failfast_tailp->av_forw = un->un_waitq_headp;
29928 		}
29929 
29930 		un->un_failfast_tailp = un->un_waitq_tailp;
29931 
29932 		/* update kstat for each bp moved out of the waitq */
29933 		for (bp = un->un_waitq_headp; bp != NULL; bp = bp->av_forw) {
29934 			SD_UPDATE_KSTATS(un, kstat_waitq_exit, bp);
29935 		}
29936 
29937 		/* empty the waitq */
29938 		un->un_waitq_headp = un->un_waitq_tailp = NULL;
29939 
29940 	} else {
29941 		/*
29942 		 * Go thru the wait queue, pick off all entries with
29943 		 * B_FAILFAST set, and move these onto the failfast queue.
29944 		 */
29945 		for (bp = un->un_waitq_headp; bp != NULL; bp = next_waitq_bp) {
29946 			/*
29947 			 * Save the pointer to the next bp on the wait queue,
29948 			 * so we get to it on the next iteration of this loop.
29949 			 */
29950 			next_waitq_bp = bp->av_forw;
29951 
29952 			/*
29953 			 * If this bp from the wait queue does NOT have
29954 			 * B_FAILFAST set, just move on to the next element
29955 			 * in the wait queue. Note, this is the only place
29956 			 * where it is correct to set prev_waitq_bp.
29957 			 */
29958 			if ((bp->b_flags & B_FAILFAST) == 0) {
29959 				prev_waitq_bp = bp;
29960 				continue;
29961 			}
29962 
29963 			/*
29964 			 * Remove the bp from the wait queue.
29965 			 */
29966 			if (bp == un->un_waitq_headp) {
29967 				/* The bp is the first element of the waitq. */
29968 				un->un_waitq_headp = next_waitq_bp;
29969 				if (un->un_waitq_headp == NULL) {
29970 					/* The wait queue is now empty */
29971 					un->un_waitq_tailp = NULL;
29972 				}
29973 			} else {
29974 				/*
29975 				 * The bp is either somewhere in the middle
29976 				 * or at the end of the wait queue.
29977 				 */
29978 				ASSERT(un->un_waitq_headp != NULL);
29979 				ASSERT(prev_waitq_bp != NULL);
29980 				ASSERT((prev_waitq_bp->b_flags & B_FAILFAST)
29981 				    == 0);
29982 				if (bp == un->un_waitq_tailp) {
29983 					/* bp is the last entry on the waitq. */
29984 					ASSERT(next_waitq_bp == NULL);
29985 					un->un_waitq_tailp = prev_waitq_bp;
29986 				}
29987 				prev_waitq_bp->av_forw = next_waitq_bp;
29988 			}
29989 			bp->av_forw = NULL;
29990 
29991 			/*
29992 			 * update kstat since the bp is moved out of
29993 			 * the waitq
29994 			 */
29995 			SD_UPDATE_KSTATS(un, kstat_waitq_exit, bp);
29996 
29997 			/*
29998 			 * Now put the bp onto the failfast queue.
29999 			 */
30000 			if (un->un_failfast_headp == NULL) {
30001 				/* failfast queue is currently empty */
30002 				ASSERT(un->un_failfast_tailp == NULL);
30003 				un->un_failfast_headp =
30004 				    un->un_failfast_tailp = bp;
30005 			} else {
30006 				/* Add the bp to the end of the failfast q */
30007 				ASSERT(un->un_failfast_tailp != NULL);
30008 				ASSERT(un->un_failfast_tailp->b_flags &
30009 				    B_FAILFAST);
30010 				un->un_failfast_tailp->av_forw = bp;
30011 				un->un_failfast_tailp = bp;
30012 			}
30013 		}
30014 	}
30015 
30016 	/*
30017 	 * Now return all bp's on the failfast queue to their owners.
30018 	 */
30019 	while ((bp = un->un_failfast_headp) != NULL) {
30020 
30021 		un->un_failfast_headp = bp->av_forw;
30022 		if (un->un_failfast_headp == NULL) {
30023 			un->un_failfast_tailp = NULL;
30024 		}
30025 
30026 		/*
30027 		 * We want to return the bp with a failure error code, but
30028 		 * we do not want a call to sd_start_cmds() to occur here,
30029 		 * so use sd_return_failed_command_no_restart() instead of
30030 		 * sd_return_failed_command().
30031 		 */
30032 		sd_return_failed_command_no_restart(un, bp, EIO);
30033 	}
30034 
30035 	/* Flush the xbuf queues if required. */
30036 	if (sd_failfast_flushctl & SD_FAILFAST_FLUSH_ALL_QUEUES) {
30037 		ddi_xbuf_flushq(un->un_xbuf_attr, sd_failfast_flushq_callback);
30038 	}
30039 
30040 	SD_TRACE(SD_LOG_IO_FAILFAST, un,
30041 	    "sd_failfast_flushq: exit: un:0x%p\n", un);
30042 }
30043 
30044 
30045 /*
30046  *    Function: sd_failfast_flushq_callback
30047  *
30048  * Description: Return TRUE if the given bp meets the criteria for failfast
30049  *		flushing. Used with ddi_xbuf_flushq(9F).
30050  *
30051  *   Arguments: bp - ptr to buf struct to be examined.
30052  *
30053  *     Context: Any
30054  */
30055 
30056 static int
30057 sd_failfast_flushq_callback(struct buf *bp)
30058 {
30059 	/*
30060 	 * Return TRUE if (1) we want to flush ALL bufs when the failfast
30061 	 * state is entered; OR (2) the given bp has B_FAILFAST set.
30062 	 */
30063 	return (((sd_failfast_flushctl & SD_FAILFAST_FLUSH_ALL_BUFS) ||
30064 	    (bp->b_flags & B_FAILFAST)) ? TRUE : FALSE);
30065 }
30066 
30067 
30068 
30069 #if defined(__i386) || defined(__amd64)
30070 /*
30071  * Function: sd_setup_next_xfer
30072  *
30073  * Description: Prepare next I/O operation using DMA_PARTIAL
30074  *
30075  */
30076 
30077 static int
30078 sd_setup_next_xfer(struct sd_lun *un, struct buf *bp,
30079     struct scsi_pkt *pkt, struct sd_xbuf *xp)
30080 {
30081 	ssize_t	num_blks_not_xfered;
30082 	daddr_t	strt_blk_num;
30083 	ssize_t	bytes_not_xfered;
30084 	int	rval;
30085 
30086 	ASSERT(pkt->pkt_resid == 0);
30087 
30088 	/*
30089 	 * Calculate next block number and amount to be transferred.
30090 	 *
30091 	 * How much data NOT transfered to the HBA yet.
30092 	 */
30093 	bytes_not_xfered = xp->xb_dma_resid;
30094 
30095 	/*
30096 	 * figure how many blocks NOT transfered to the HBA yet.
30097 	 */
30098 	num_blks_not_xfered = SD_BYTES2TGTBLOCKS(un, bytes_not_xfered);
30099 
30100 	/*
30101 	 * set starting block number to the end of what WAS transfered.
30102 	 */
30103 	strt_blk_num = xp->xb_blkno +
30104 	    SD_BYTES2TGTBLOCKS(un, bp->b_bcount - bytes_not_xfered);
30105 
30106 	/*
30107 	 * Move pkt to the next portion of the xfer.  sd_setup_next_rw_pkt
30108 	 * will call scsi_initpkt with NULL_FUNC so we do not have to release
30109 	 * the disk mutex here.
30110 	 */
30111 	rval = sd_setup_next_rw_pkt(un, pkt, bp,
30112 	    strt_blk_num, num_blks_not_xfered);
30113 
30114 	if (rval == 0) {
30115 
30116 		/*
30117 		 * Success.
30118 		 *
30119 		 * Adjust things if there are still more blocks to be
30120 		 * transfered.
30121 		 */
30122 		xp->xb_dma_resid = pkt->pkt_resid;
30123 		pkt->pkt_resid = 0;
30124 
30125 		return (1);
30126 	}
30127 
30128 	/*
30129 	 * There's really only one possible return value from
30130 	 * sd_setup_next_rw_pkt which occurs when scsi_init_pkt
30131 	 * returns NULL.
30132 	 */
30133 	ASSERT(rval == SD_PKT_ALLOC_FAILURE);
30134 
30135 	bp->b_resid = bp->b_bcount;
30136 	bp->b_flags |= B_ERROR;
30137 
30138 	scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
30139 	    "Error setting up next portion of DMA transfer\n");
30140 
30141 	return (0);
30142 }
30143 #endif
30144 
30145 /*
30146  *    Function: sd_panic_for_res_conflict
30147  *
30148  * Description: Call panic with a string formated with "Reservation Conflict"
30149  *		and a human readable identifier indicating the SD instance
30150  *		that experienced the reservation conflict.
30151  *
30152  *   Arguments: un - pointer to the soft state struct for the instance.
30153  *
30154  *     Context: may execute in interrupt context.
30155  */
30156 
30157 #define	SD_RESV_CONFLICT_FMT_LEN 40
30158 void
30159 sd_panic_for_res_conflict(struct sd_lun *un)
30160 {
30161 	char panic_str[SD_RESV_CONFLICT_FMT_LEN+MAXPATHLEN];
30162 	char path_str[MAXPATHLEN];
30163 
30164 	(void) snprintf(panic_str, sizeof (panic_str),
30165 	    "Reservation Conflict\nDisk: %s",
30166 	    ddi_pathname(SD_DEVINFO(un), path_str));
30167 
30168 	panic(panic_str);
30169 }
30170 
30171 /*
30172  * Note: The following sd_faultinjection_ioctl( ) routines implement
30173  * driver support for handling fault injection for error analysis
30174  * causing faults in multiple layers of the driver.
30175  *
30176  */
30177 
30178 #ifdef SD_FAULT_INJECTION
30179 static uint_t   sd_fault_injection_on = 0;
30180 
30181 /*
30182  *    Function: sd_faultinjection_ioctl()
30183  *
30184  * Description: This routine is the driver entry point for handling
30185  *              faultinjection ioctls to inject errors into the
30186  *              layer model
30187  *
30188  *   Arguments: cmd	- the ioctl cmd recieved
30189  *		arg	- the arguments from user and returns
30190  */
30191 
30192 static void
30193 sd_faultinjection_ioctl(int cmd, intptr_t arg,  struct sd_lun *un) {
30194 
30195 	uint_t i;
30196 	uint_t rval;
30197 
30198 	SD_TRACE(SD_LOG_IOERR, un, "sd_faultinjection_ioctl: entry\n");
30199 
30200 	mutex_enter(SD_MUTEX(un));
30201 
30202 	switch (cmd) {
30203 	case SDIOCRUN:
30204 		/* Allow pushed faults to be injected */
30205 		SD_INFO(SD_LOG_SDTEST, un,
30206 		    "sd_faultinjection_ioctl: Injecting Fault Run\n");
30207 
30208 		sd_fault_injection_on = 1;
30209 
30210 		SD_INFO(SD_LOG_IOERR, un,
30211 		    "sd_faultinjection_ioctl: run finished\n");
30212 		break;
30213 
30214 	case SDIOCSTART:
30215 		/* Start Injection Session */
30216 		SD_INFO(SD_LOG_SDTEST, un,
30217 		    "sd_faultinjection_ioctl: Injecting Fault Start\n");
30218 
30219 		sd_fault_injection_on = 0;
30220 		un->sd_injection_mask = 0xFFFFFFFF;
30221 		for (i = 0; i < SD_FI_MAX_ERROR; i++) {
30222 			un->sd_fi_fifo_pkt[i] = NULL;
30223 			un->sd_fi_fifo_xb[i] = NULL;
30224 			un->sd_fi_fifo_un[i] = NULL;
30225 			un->sd_fi_fifo_arq[i] = NULL;
30226 		}
30227 		un->sd_fi_fifo_start = 0;
30228 		un->sd_fi_fifo_end = 0;
30229 
30230 		mutex_enter(&(un->un_fi_mutex));
30231 		un->sd_fi_log[0] = '\0';
30232 		un->sd_fi_buf_len = 0;
30233 		mutex_exit(&(un->un_fi_mutex));
30234 
30235 		SD_INFO(SD_LOG_IOERR, un,
30236 		    "sd_faultinjection_ioctl: start finished\n");
30237 		break;
30238 
30239 	case SDIOCSTOP:
30240 		/* Stop Injection Session */
30241 		SD_INFO(SD_LOG_SDTEST, un,
30242 		    "sd_faultinjection_ioctl: Injecting Fault Stop\n");
30243 		sd_fault_injection_on = 0;
30244 		un->sd_injection_mask = 0x0;
30245 
30246 		/* Empty stray or unuseds structs from fifo */
30247 		for (i = 0; i < SD_FI_MAX_ERROR; i++) {
30248 			if (un->sd_fi_fifo_pkt[i] != NULL) {
30249 				kmem_free(un->sd_fi_fifo_pkt[i],
30250 				    sizeof (struct sd_fi_pkt));
30251 			}
30252 			if (un->sd_fi_fifo_xb[i] != NULL) {
30253 				kmem_free(un->sd_fi_fifo_xb[i],
30254 				    sizeof (struct sd_fi_xb));
30255 			}
30256 			if (un->sd_fi_fifo_un[i] != NULL) {
30257 				kmem_free(un->sd_fi_fifo_un[i],
30258 				    sizeof (struct sd_fi_un));
30259 			}
30260 			if (un->sd_fi_fifo_arq[i] != NULL) {
30261 				kmem_free(un->sd_fi_fifo_arq[i],
30262 				    sizeof (struct sd_fi_arq));
30263 			}
30264 			un->sd_fi_fifo_pkt[i] = NULL;
30265 			un->sd_fi_fifo_un[i] = NULL;
30266 			un->sd_fi_fifo_xb[i] = NULL;
30267 			un->sd_fi_fifo_arq[i] = NULL;
30268 		}
30269 		un->sd_fi_fifo_start = 0;
30270 		un->sd_fi_fifo_end = 0;
30271 
30272 		SD_INFO(SD_LOG_IOERR, un,
30273 		    "sd_faultinjection_ioctl: stop finished\n");
30274 		break;
30275 
30276 	case SDIOCINSERTPKT:
30277 		/* Store a packet struct to be pushed onto fifo */
30278 		SD_INFO(SD_LOG_SDTEST, un,
30279 		    "sd_faultinjection_ioctl: Injecting Fault Insert Pkt\n");
30280 
30281 		i = un->sd_fi_fifo_end % SD_FI_MAX_ERROR;
30282 
30283 		sd_fault_injection_on = 0;
30284 
30285 		/* No more that SD_FI_MAX_ERROR allowed in Queue */
30286 		if (un->sd_fi_fifo_pkt[i] != NULL) {
30287 			kmem_free(un->sd_fi_fifo_pkt[i],
30288 			    sizeof (struct sd_fi_pkt));
30289 		}
30290 		if (arg != NULL) {
30291 			un->sd_fi_fifo_pkt[i] =
30292 			    kmem_alloc(sizeof (struct sd_fi_pkt), KM_NOSLEEP);
30293 			if (un->sd_fi_fifo_pkt[i] == NULL) {
30294 				/* Alloc failed don't store anything */
30295 				break;
30296 			}
30297 			rval = ddi_copyin((void *)arg, un->sd_fi_fifo_pkt[i],
30298 			    sizeof (struct sd_fi_pkt), 0);
30299 			if (rval == -1) {
30300 				kmem_free(un->sd_fi_fifo_pkt[i],
30301 				    sizeof (struct sd_fi_pkt));
30302 				un->sd_fi_fifo_pkt[i] = NULL;
30303 			}
30304 		} else {
30305 			SD_INFO(SD_LOG_IOERR, un,
30306 			    "sd_faultinjection_ioctl: pkt null\n");
30307 		}
30308 		break;
30309 
30310 	case SDIOCINSERTXB:
30311 		/* Store a xb struct to be pushed onto fifo */
30312 		SD_INFO(SD_LOG_SDTEST, un,
30313 		    "sd_faultinjection_ioctl: Injecting Fault Insert XB\n");
30314 
30315 		i = un->sd_fi_fifo_end % SD_FI_MAX_ERROR;
30316 
30317 		sd_fault_injection_on = 0;
30318 
30319 		if (un->sd_fi_fifo_xb[i] != NULL) {
30320 			kmem_free(un->sd_fi_fifo_xb[i],
30321 			    sizeof (struct sd_fi_xb));
30322 			un->sd_fi_fifo_xb[i] = NULL;
30323 		}
30324 		if (arg != NULL) {
30325 			un->sd_fi_fifo_xb[i] =
30326 			    kmem_alloc(sizeof (struct sd_fi_xb), KM_NOSLEEP);
30327 			if (un->sd_fi_fifo_xb[i] == NULL) {
30328 				/* Alloc failed don't store anything */
30329 				break;
30330 			}
30331 			rval = ddi_copyin((void *)arg, un->sd_fi_fifo_xb[i],
30332 			    sizeof (struct sd_fi_xb), 0);
30333 
30334 			if (rval == -1) {
30335 				kmem_free(un->sd_fi_fifo_xb[i],
30336 				    sizeof (struct sd_fi_xb));
30337 				un->sd_fi_fifo_xb[i] = NULL;
30338 			}
30339 		} else {
30340 			SD_INFO(SD_LOG_IOERR, un,
30341 			    "sd_faultinjection_ioctl: xb null\n");
30342 		}
30343 		break;
30344 
30345 	case SDIOCINSERTUN:
30346 		/* Store a un struct to be pushed onto fifo */
30347 		SD_INFO(SD_LOG_SDTEST, un,
30348 		    "sd_faultinjection_ioctl: Injecting Fault Insert UN\n");
30349 
30350 		i = un->sd_fi_fifo_end % SD_FI_MAX_ERROR;
30351 
30352 		sd_fault_injection_on = 0;
30353 
30354 		if (un->sd_fi_fifo_un[i] != NULL) {
30355 			kmem_free(un->sd_fi_fifo_un[i],
30356 			    sizeof (struct sd_fi_un));
30357 			un->sd_fi_fifo_un[i] = NULL;
30358 		}
30359 		if (arg != NULL) {
30360 			un->sd_fi_fifo_un[i] =
30361 			    kmem_alloc(sizeof (struct sd_fi_un), KM_NOSLEEP);
30362 			if (un->sd_fi_fifo_un[i] == NULL) {
30363 				/* Alloc failed don't store anything */
30364 				break;
30365 			}
30366 			rval = ddi_copyin((void *)arg, un->sd_fi_fifo_un[i],
30367 			    sizeof (struct sd_fi_un), 0);
30368 			if (rval == -1) {
30369 				kmem_free(un->sd_fi_fifo_un[i],
30370 				    sizeof (struct sd_fi_un));
30371 				un->sd_fi_fifo_un[i] = NULL;
30372 			}
30373 
30374 		} else {
30375 			SD_INFO(SD_LOG_IOERR, un,
30376 			    "sd_faultinjection_ioctl: un null\n");
30377 		}
30378 
30379 		break;
30380 
30381 	case SDIOCINSERTARQ:
30382 		/* Store a arq struct to be pushed onto fifo */
30383 		SD_INFO(SD_LOG_SDTEST, un,
30384 		    "sd_faultinjection_ioctl: Injecting Fault Insert ARQ\n");
30385 		i = un->sd_fi_fifo_end % SD_FI_MAX_ERROR;
30386 
30387 		sd_fault_injection_on = 0;
30388 
30389 		if (un->sd_fi_fifo_arq[i] != NULL) {
30390 			kmem_free(un->sd_fi_fifo_arq[i],
30391 			    sizeof (struct sd_fi_arq));
30392 			un->sd_fi_fifo_arq[i] = NULL;
30393 		}
30394 		if (arg != NULL) {
30395 			un->sd_fi_fifo_arq[i] =
30396 			    kmem_alloc(sizeof (struct sd_fi_arq), KM_NOSLEEP);
30397 			if (un->sd_fi_fifo_arq[i] == NULL) {
30398 				/* Alloc failed don't store anything */
30399 				break;
30400 			}
30401 			rval = ddi_copyin((void *)arg, un->sd_fi_fifo_arq[i],
30402 			    sizeof (struct sd_fi_arq), 0);
30403 			if (rval == -1) {
30404 				kmem_free(un->sd_fi_fifo_arq[i],
30405 				    sizeof (struct sd_fi_arq));
30406 				un->sd_fi_fifo_arq[i] = NULL;
30407 			}
30408 
30409 		} else {
30410 			SD_INFO(SD_LOG_IOERR, un,
30411 			    "sd_faultinjection_ioctl: arq null\n");
30412 		}
30413 
30414 		break;
30415 
30416 	case SDIOCPUSH:
30417 		/* Push stored xb, pkt, un, and arq onto fifo */
30418 		sd_fault_injection_on = 0;
30419 
30420 		if (arg != NULL) {
30421 			rval = ddi_copyin((void *)arg, &i, sizeof (uint_t), 0);
30422 			if (rval != -1 &&
30423 			    un->sd_fi_fifo_end + i < SD_FI_MAX_ERROR) {
30424 				un->sd_fi_fifo_end += i;
30425 			}
30426 		} else {
30427 			SD_INFO(SD_LOG_IOERR, un,
30428 			    "sd_faultinjection_ioctl: push arg null\n");
30429 			if (un->sd_fi_fifo_end + i < SD_FI_MAX_ERROR) {
30430 				un->sd_fi_fifo_end++;
30431 			}
30432 		}
30433 		SD_INFO(SD_LOG_IOERR, un,
30434 		    "sd_faultinjection_ioctl: push to end=%d\n",
30435 		    un->sd_fi_fifo_end);
30436 		break;
30437 
30438 	case SDIOCRETRIEVE:
30439 		/* Return buffer of log from Injection session */
30440 		SD_INFO(SD_LOG_SDTEST, un,
30441 		    "sd_faultinjection_ioctl: Injecting Fault Retreive");
30442 
30443 		sd_fault_injection_on = 0;
30444 
30445 		mutex_enter(&(un->un_fi_mutex));
30446 		rval = ddi_copyout(un->sd_fi_log, (void *)arg,
30447 		    un->sd_fi_buf_len+1, 0);
30448 		mutex_exit(&(un->un_fi_mutex));
30449 
30450 		if (rval == -1) {
30451 			/*
30452 			 * arg is possibly invalid setting
30453 			 * it to NULL for return
30454 			 */
30455 			arg = NULL;
30456 		}
30457 		break;
30458 	}
30459 
30460 	mutex_exit(SD_MUTEX(un));
30461 	SD_TRACE(SD_LOG_IOERR, un, "sd_faultinjection_ioctl:"
30462 			    " exit\n");
30463 }
30464 
30465 
30466 /*
30467  *    Function: sd_injection_log()
30468  *
30469  * Description: This routine adds buff to the already existing injection log
30470  *              for retrieval via faultinjection_ioctl for use in fault
30471  *              detection and recovery
30472  *
30473  *   Arguments: buf - the string to add to the log
30474  */
30475 
30476 static void
30477 sd_injection_log(char *buf, struct sd_lun *un)
30478 {
30479 	uint_t len;
30480 
30481 	ASSERT(un != NULL);
30482 	ASSERT(buf != NULL);
30483 
30484 	mutex_enter(&(un->un_fi_mutex));
30485 
30486 	len = min(strlen(buf), 255);
30487 	/* Add logged value to Injection log to be returned later */
30488 	if (len + un->sd_fi_buf_len < SD_FI_MAX_BUF) {
30489 		uint_t	offset = strlen((char *)un->sd_fi_log);
30490 		char *destp = (char *)un->sd_fi_log + offset;
30491 		int i;
30492 		for (i = 0; i < len; i++) {
30493 			*destp++ = *buf++;
30494 		}
30495 		un->sd_fi_buf_len += len;
30496 		un->sd_fi_log[un->sd_fi_buf_len] = '\0';
30497 	}
30498 
30499 	mutex_exit(&(un->un_fi_mutex));
30500 }
30501 
30502 
30503 /*
30504  *    Function: sd_faultinjection()
30505  *
30506  * Description: This routine takes the pkt and changes its
30507  *		content based on error injection scenerio.
30508  *
30509  *   Arguments: pktp	- packet to be changed
30510  */
30511 
30512 static void
30513 sd_faultinjection(struct scsi_pkt *pktp)
30514 {
30515 	uint_t i;
30516 	struct sd_fi_pkt *fi_pkt;
30517 	struct sd_fi_xb *fi_xb;
30518 	struct sd_fi_un *fi_un;
30519 	struct sd_fi_arq *fi_arq;
30520 	struct buf *bp;
30521 	struct sd_xbuf *xb;
30522 	struct sd_lun *un;
30523 
30524 	ASSERT(pktp != NULL);
30525 
30526 	/* pull bp xb and un from pktp */
30527 	bp = (struct buf *)pktp->pkt_private;
30528 	xb = SD_GET_XBUF(bp);
30529 	un = SD_GET_UN(bp);
30530 
30531 	ASSERT(un != NULL);
30532 
30533 	mutex_enter(SD_MUTEX(un));
30534 
30535 	SD_TRACE(SD_LOG_SDTEST, un,
30536 	    "sd_faultinjection: entry Injection from sdintr\n");
30537 
30538 	/* if injection is off return */
30539 	if (sd_fault_injection_on == 0 ||
30540 		un->sd_fi_fifo_start == un->sd_fi_fifo_end) {
30541 		mutex_exit(SD_MUTEX(un));
30542 		return;
30543 	}
30544 
30545 
30546 	/* take next set off fifo */
30547 	i = un->sd_fi_fifo_start % SD_FI_MAX_ERROR;
30548 
30549 	fi_pkt = un->sd_fi_fifo_pkt[i];
30550 	fi_xb = un->sd_fi_fifo_xb[i];
30551 	fi_un = un->sd_fi_fifo_un[i];
30552 	fi_arq = un->sd_fi_fifo_arq[i];
30553 
30554 
30555 	/* set variables accordingly */
30556 	/* set pkt if it was on fifo */
30557 	if (fi_pkt != NULL) {
30558 		SD_CONDSET(pktp, pkt, pkt_flags, "pkt_flags");
30559 		SD_CONDSET(*pktp, pkt, pkt_scbp, "pkt_scbp");
30560 		SD_CONDSET(*pktp, pkt, pkt_cdbp, "pkt_cdbp");
30561 		SD_CONDSET(pktp, pkt, pkt_state, "pkt_state");
30562 		SD_CONDSET(pktp, pkt, pkt_statistics, "pkt_statistics");
30563 		SD_CONDSET(pktp, pkt, pkt_reason, "pkt_reason");
30564 
30565 	}
30566 
30567 	/* set xb if it was on fifo */
30568 	if (fi_xb != NULL) {
30569 		SD_CONDSET(xb, xb, xb_blkno, "xb_blkno");
30570 		SD_CONDSET(xb, xb, xb_dma_resid, "xb_dma_resid");
30571 		SD_CONDSET(xb, xb, xb_retry_count, "xb_retry_count");
30572 		SD_CONDSET(xb, xb, xb_victim_retry_count,
30573 		    "xb_victim_retry_count");
30574 		SD_CONDSET(xb, xb, xb_sense_status, "xb_sense_status");
30575 		SD_CONDSET(xb, xb, xb_sense_state, "xb_sense_state");
30576 		SD_CONDSET(xb, xb, xb_sense_resid, "xb_sense_resid");
30577 
30578 		/* copy in block data from sense */
30579 		if (fi_xb->xb_sense_data[0] != -1) {
30580 			bcopy(fi_xb->xb_sense_data, xb->xb_sense_data,
30581 			    SENSE_LENGTH);
30582 		}
30583 
30584 		/* copy in extended sense codes */
30585 		SD_CONDSET(((struct scsi_extended_sense *)xb), xb, es_code,
30586 		    "es_code");
30587 		SD_CONDSET(((struct scsi_extended_sense *)xb), xb, es_key,
30588 		    "es_key");
30589 		SD_CONDSET(((struct scsi_extended_sense *)xb), xb, es_add_code,
30590 		    "es_add_code");
30591 		SD_CONDSET(((struct scsi_extended_sense *)xb), xb,
30592 		    es_qual_code, "es_qual_code");
30593 	}
30594 
30595 	/* set un if it was on fifo */
30596 	if (fi_un != NULL) {
30597 		SD_CONDSET(un->un_sd->sd_inq, un, inq_rmb, "inq_rmb");
30598 		SD_CONDSET(un, un, un_ctype, "un_ctype");
30599 		SD_CONDSET(un, un, un_reset_retry_count,
30600 		    "un_reset_retry_count");
30601 		SD_CONDSET(un, un, un_reservation_type, "un_reservation_type");
30602 		SD_CONDSET(un, un, un_resvd_status, "un_resvd_status");
30603 		SD_CONDSET(un, un, un_f_arq_enabled, "un_f_arq_enabled");
30604 		SD_CONDSET(un, un, un_f_geometry_is_valid,
30605 		    "un_f_geometry_is_valid");
30606 		SD_CONDSET(un, un, un_f_allow_bus_device_reset,
30607 		    "un_f_allow_bus_device_reset");
30608 		SD_CONDSET(un, un, un_f_opt_queueing, "un_f_opt_queueing");
30609 
30610 	}
30611 
30612 	/* copy in auto request sense if it was on fifo */
30613 	if (fi_arq != NULL) {
30614 		bcopy(fi_arq, pktp->pkt_scbp, sizeof (struct sd_fi_arq));
30615 	}
30616 
30617 	/* free structs */
30618 	if (un->sd_fi_fifo_pkt[i] != NULL) {
30619 		kmem_free(un->sd_fi_fifo_pkt[i], sizeof (struct sd_fi_pkt));
30620 	}
30621 	if (un->sd_fi_fifo_xb[i] != NULL) {
30622 		kmem_free(un->sd_fi_fifo_xb[i], sizeof (struct sd_fi_xb));
30623 	}
30624 	if (un->sd_fi_fifo_un[i] != NULL) {
30625 		kmem_free(un->sd_fi_fifo_un[i], sizeof (struct sd_fi_un));
30626 	}
30627 	if (un->sd_fi_fifo_arq[i] != NULL) {
30628 		kmem_free(un->sd_fi_fifo_arq[i], sizeof (struct sd_fi_arq));
30629 	}
30630 
30631 	/*
30632 	 * kmem_free does not gurantee to set to NULL
30633 	 * since we uses these to determine if we set
30634 	 * values or not lets confirm they are always
30635 	 * NULL after free
30636 	 */
30637 	un->sd_fi_fifo_pkt[i] = NULL;
30638 	un->sd_fi_fifo_un[i] = NULL;
30639 	un->sd_fi_fifo_xb[i] = NULL;
30640 	un->sd_fi_fifo_arq[i] = NULL;
30641 
30642 	un->sd_fi_fifo_start++;
30643 
30644 	mutex_exit(SD_MUTEX(un));
30645 
30646 	SD_TRACE(SD_LOG_SDTEST, un, "sd_faultinjection: exit\n");
30647 }
30648 
30649 #endif /* SD_FAULT_INJECTION */
30650 
30651 /*
30652  * This routine is invoked in sd_unit_attach(). Before calling it, the
30653  * properties in conf file should be processed already, and "hotpluggable"
30654  * property was processed also.
30655  *
30656  * The sd driver distinguishes 3 different type of devices: removable media,
30657  * non-removable media, and hotpluggable. Below the differences are defined:
30658  *
30659  * 1. Device ID
30660  *
30661  *     The device ID of a device is used to identify this device. Refer to
30662  *     ddi_devid_register(9F).
30663  *
30664  *     For a non-removable media disk device which can provide 0x80 or 0x83
30665  *     VPD page (refer to INQUIRY command of SCSI SPC specification), a unique
30666  *     device ID is created to identify this device. For other non-removable
30667  *     media devices, a default device ID is created only if this device has
30668  *     at least 2 alter cylinders. Otherwise, this device has no devid.
30669  *
30670  *     -------------------------------------------------------
30671  *     removable media   hotpluggable  | Can Have Device ID
30672  *     -------------------------------------------------------
30673  *         false             false     |     Yes
30674  *         false             true      |     Yes
30675  *         true                x       |     No
30676  *     ------------------------------------------------------
30677  *
30678  *
30679  * 2. SCSI group 4 commands
30680  *
30681  *     In SCSI specs, only some commands in group 4 command set can use
30682  *     8-byte addresses that can be used to access >2TB storage spaces.
30683  *     Other commands have no such capability. Without supporting group4,
30684  *     it is impossible to make full use of storage spaces of a disk with
30685  *     capacity larger than 2TB.
30686  *
30687  *     -----------------------------------------------
30688  *     removable media   hotpluggable   LP64  |  Group
30689  *     -----------------------------------------------
30690  *           false          false       false |   1
30691  *           false          false       true  |   4
30692  *           false          true        false |   1
30693  *           false          true        true  |   4
30694  *           true             x           x   |   5
30695  *     -----------------------------------------------
30696  *
30697  *
30698  * 3. Check for VTOC Label
30699  *
30700  *     If a direct-access disk has no EFI label, sd will check if it has a
30701  *     valid VTOC label. Now, sd also does that check for removable media
30702  *     and hotpluggable devices.
30703  *
30704  *     --------------------------------------------------------------
30705  *     Direct-Access   removable media    hotpluggable |  Check Label
30706  *     -------------------------------------------------------------
30707  *         false          false           false        |   No
30708  *         false          false           true         |   No
30709  *         false          true            false        |   Yes
30710  *         false          true            true         |   Yes
30711  *         true            x                x          |   Yes
30712  *     --------------------------------------------------------------
30713  *
30714  *
30715  * 4. Building default VTOC label
30716  *
30717  *     As section 3 says, sd checks if some kinds of devices have VTOC label.
30718  *     If those devices have no valid VTOC label, sd(7d) will attempt to
30719  *     create default VTOC for them. Currently sd creates default VTOC label
30720  *     for all devices on x86 platform (VTOC_16), but only for removable
30721  *     media devices on SPARC (VTOC_8).
30722  *
30723  *     -----------------------------------------------------------
30724  *       removable media hotpluggable platform   |   Default Label
30725  *     -----------------------------------------------------------
30726  *             false          false    sparc     |     No
30727  *             false          true      x86      |     Yes
30728  *             false          true     sparc     |     Yes
30729  *             true             x        x       |     Yes
30730  *     ----------------------------------------------------------
30731  *
30732  *
30733  * 5. Supported blocksizes of target devices
30734  *
30735  *     Sd supports non-512-byte blocksize for removable media devices only.
30736  *     For other devices, only 512-byte blocksize is supported. This may be
30737  *     changed in near future because some RAID devices require non-512-byte
30738  *     blocksize
30739  *
30740  *     -----------------------------------------------------------
30741  *     removable media    hotpluggable    | non-512-byte blocksize
30742  *     -----------------------------------------------------------
30743  *           false          false         |   No
30744  *           false          true          |   No
30745  *           true             x           |   Yes
30746  *     -----------------------------------------------------------
30747  *
30748  *
30749  * 6. Automatic mount & unmount (i.e. vold)
30750  *
30751  *     Sd(7d) driver provides DKIOCREMOVABLE ioctl. This ioctl is used to query
30752  *     if a device is removable media device. It return 1 for removable media
30753  *     devices, and 0 for others.
30754  *
30755  *     Vold treats a device as removable one only if DKIOREMOVABLE returns 1.
30756  *     And it does automounting only for removable media devices. In order to
30757  *     preserve users' experience and let vold continue to do automounting for
30758  *     USB disk devices, DKIOCREMOVABLE ioctl still returns 1 for USB/1394 disk
30759  *     devices.
30760  *
30761  *      ------------------------------------------------------
30762  *       removable media    hotpluggable   |  automatic mount
30763  *      ------------------------------------------------------
30764  *             false          false        |   No
30765  *             false          true         |   Yes
30766  *             true             x          |   Yes
30767  *      ------------------------------------------------------
30768  *
30769  *
30770  * 7. fdisk partition management
30771  *
30772  *     Fdisk is traditional partition method on x86 platform. Sd(7d) driver
30773  *     just supports fdisk partitions on x86 platform. On sparc platform, sd
30774  *     doesn't support fdisk partitions at all. Note: pcfs(7fs) can recognize
30775  *     fdisk partitions on both x86 and SPARC platform.
30776  *
30777  *     -----------------------------------------------------------
30778  *       platform   removable media  USB/1394  |  fdisk supported
30779  *     -----------------------------------------------------------
30780  *        x86         X               X        |       true
30781  *     ------------------------------------------------------------
30782  *        sparc       X               X        |       false
30783  *     ------------------------------------------------------------
30784  *
30785  *
30786  * 8. MBOOT/MBR
30787  *
30788  *     Although sd(7d) doesn't support fdisk on SPARC platform, it does support
30789  *     read/write mboot for removable media devices on sparc platform.
30790  *
30791  *     -----------------------------------------------------------
30792  *       platform   removable media  USB/1394  |  mboot supported
30793  *     -----------------------------------------------------------
30794  *        x86         X               X        |       true
30795  *     ------------------------------------------------------------
30796  *        sparc      false           false     |       false
30797  *        sparc      false           true      |       true
30798  *        sparc      true            false     |       true
30799  *        sparc      true            true      |       true
30800  *     ------------------------------------------------------------
30801  *
30802  *
30803  * 9.  error handling during opening device
30804  *
30805  *     If failed to open a disk device, an errno is returned. For some kinds
30806  *     of errors, different errno is returned depending on if this device is
30807  *     a removable media device. This brings USB/1394 hard disks in line with
30808  *     expected hard disk behavior. It is not expected that this breaks any
30809  *     application.
30810  *
30811  *     ------------------------------------------------------
30812  *       removable media    hotpluggable   |  errno
30813  *     ------------------------------------------------------
30814  *             false          false        |   EIO
30815  *             false          true         |   EIO
30816  *             true             x          |   ENXIO
30817  *     ------------------------------------------------------
30818  *
30819  *
30820  * 10. off-by-1 workaround (bug 1175930, and 4996920) (x86 only)
30821  *
30822  *     [ this is a bit of very ugly history, soon to be removed ]
30823  *
30824  *     SCSI READ_CAPACITY command returns the last valid logical block number
30825  *     which starts from 0. So real capacity is larger than the returned
30826  *     value by 1. However, because scdk.c (which was EOL'ed) directly used
30827  *     the logical block number as capacity of disk devices, off-by-1 work-
30828  *     around was applied. This workaround causes fixed SCSI disk to loss a
30829  *     sector on x86 platform, and precludes exchanging fixed hard disks
30830  *     between sparc and x86.
30831  *
30832  *     ------------------------------------------------------
30833  *       removable media    hotplug        |   Off-by-1 works
30834  *     -------------------------------------------------------
30835  *             false          false        |     Yes
30836  *             false          true         |     No
30837  *             true           false        |     No
30838  *             true           true         |     No
30839  *     ------------------------------------------------------
30840  *
30841  *
30842  * 11. ioctls: DKIOCEJECT, CDROMEJECT
30843  *
30844  *     These IOCTLs are applicable only to removable media devices.
30845  *
30846  *     -----------------------------------------------------------
30847  *       removable media    hotpluggable   |DKIOCEJECT, CDROMEJECT
30848  *     -----------------------------------------------------------
30849  *             false          false        |     No
30850  *             false          true         |     No
30851  *             true            x           |     Yes
30852  *     -----------------------------------------------------------
30853  *
30854  *
30855  * 12. Kstats for partitions
30856  *
30857  *     sd creates partition kstat for non-removable media devices. USB and
30858  *     Firewire hard disks now have partition kstats
30859  *
30860  *      ------------------------------------------------------
30861  *       removable media    hotplugable    |   kstat
30862  *      ------------------------------------------------------
30863  *             false          false        |    Yes
30864  *             false          true         |    Yes
30865  *             true             x          |    No
30866  *       ------------------------------------------------------
30867  *
30868  *
30869  * 13. Removable media & hotpluggable properties
30870  *
30871  *     Sd driver creates a "removable-media" property for removable media
30872  *     devices. Parent nexus drivers create a "hotpluggable" property if
30873  *     it supports hotplugging.
30874  *
30875  *     ---------------------------------------------------------------------
30876  *     removable media   hotpluggable |  "removable-media"   " hotpluggable"
30877  *     ---------------------------------------------------------------------
30878  *       false            false       |    No                   No
30879  *       false            true        |    No                   Yes
30880  *       true             false       |    Yes                  No
30881  *       true             true        |    Yes                  Yes
30882  *     ---------------------------------------------------------------------
30883  *
30884  *
30885  * 14. Power Management
30886  *
30887  *     sd only power manages removable media devices or devices that support
30888  *     LOG_SENSE or have a "pm-capable" property  (PSARC/2002/250)
30889  *
30890  *     A parent nexus that supports hotplugging can also set "pm-capable"
30891  *     if the disk can be power managed.
30892  *
30893  *     ------------------------------------------------------------
30894  *       removable media hotpluggable pm-capable  |   power manage
30895  *     ------------------------------------------------------------
30896  *             false          false     false     |     No
30897  *             false          false     true      |     Yes
30898  *             false          true      false     |     No
30899  *             false          true      true      |     Yes
30900  *             true             x        x        |     Yes
30901  *     ------------------------------------------------------------
30902  *
30903  *      USB and firewire hard disks can now be power managed independently
30904  *      of the framebuffer
30905  *
30906  *
30907  * 15. Support for USB disks with capacity larger than 1TB
30908  *
30909  *     Currently, sd doesn't permit a fixed disk device with capacity
30910  *     larger than 1TB to be used in a 32-bit operating system environment.
30911  *     However, sd doesn't do that for removable media devices. Instead, it
30912  *     assumes that removable media devices cannot have a capacity larger
30913  *     than 1TB. Therefore, using those devices on 32-bit system is partially
30914  *     supported, which can cause some unexpected results.
30915  *
30916  *     ---------------------------------------------------------------------
30917  *       removable media    USB/1394 | Capacity > 1TB |   Used in 32-bit env
30918  *     ---------------------------------------------------------------------
30919  *             false          false  |   true         |     no
30920  *             false          true   |   true         |     no
30921  *             true           false  |   true         |     Yes
30922  *             true           true   |   true         |     Yes
30923  *     ---------------------------------------------------------------------
30924  *
30925  *
30926  * 16. Check write-protection at open time
30927  *
30928  *     When a removable media device is being opened for writing without NDELAY
30929  *     flag, sd will check if this device is writable. If attempting to open
30930  *     without NDELAY flag a write-protected device, this operation will abort.
30931  *
30932  *     ------------------------------------------------------------
30933  *       removable media    USB/1394   |   WP Check
30934  *     ------------------------------------------------------------
30935  *             false          false    |     No
30936  *             false          true     |     No
30937  *             true           false    |     Yes
30938  *             true           true     |     Yes
30939  *     ------------------------------------------------------------
30940  *
30941  *
30942  * 17. syslog when corrupted VTOC is encountered
30943  *
30944  *      Currently, if an invalid VTOC is encountered, sd only print syslog
30945  *      for fixed SCSI disks.
30946  *     ------------------------------------------------------------
30947  *       removable media    USB/1394   |   print syslog
30948  *     ------------------------------------------------------------
30949  *             false          false    |     Yes
30950  *             false          true     |     No
30951  *             true           false    |     No
30952  *             true           true     |     No
30953  *     ------------------------------------------------------------
30954  */
30955 static void
30956 sd_set_unit_attributes(struct sd_lun *un, dev_info_t *devi)
30957 {
30958 	int	pm_capable_prop;
30959 
30960 	ASSERT(un->un_sd);
30961 	ASSERT(un->un_sd->sd_inq);
30962 
30963 #if defined(_SUNOS_VTOC_16)
30964 	/*
30965 	 * For VTOC_16 devices, the default label will be created for all
30966 	 * devices. (see sd_build_default_label)
30967 	 */
30968 	un->un_f_default_vtoc_supported = TRUE;
30969 #endif
30970 
30971 	if (un->un_sd->sd_inq->inq_rmb) {
30972 		/*
30973 		 * The media of this device is removable. And for this kind
30974 		 * of devices, it is possible to change medium after openning
30975 		 * devices. Thus we should support this operation.
30976 		 */
30977 		un->un_f_has_removable_media = TRUE;
30978 
30979 #if defined(_SUNOS_VTOC_8)
30980 		/*
30981 		 * Note: currently, for VTOC_8 devices, default label is
30982 		 * created for removable and hotpluggable devices only.
30983 		 */
30984 		un->un_f_default_vtoc_supported = TRUE;
30985 #endif
30986 		/*
30987 		 * support non-512-byte blocksize of removable media devices
30988 		 */
30989 		un->un_f_non_devbsize_supported = TRUE;
30990 
30991 		/*
30992 		 * Assume that all removable media devices support DOOR_LOCK
30993 		 */
30994 		un->un_f_doorlock_supported = TRUE;
30995 
30996 		/*
30997 		 * For a removable media device, it is possible to be opened
30998 		 * with NDELAY flag when there is no media in drive, in this
30999 		 * case we don't care if device is writable. But if without
31000 		 * NDELAY flag, we need to check if media is write-protected.
31001 		 */
31002 		un->un_f_chk_wp_open = TRUE;
31003 
31004 		/*
31005 		 * need to start a SCSI watch thread to monitor media state,
31006 		 * when media is being inserted or ejected, notify syseventd.
31007 		 */
31008 		un->un_f_monitor_media_state = TRUE;
31009 
31010 		/*
31011 		 * Some devices don't support START_STOP_UNIT command.
31012 		 * Therefore, we'd better check if a device supports it
31013 		 * before sending it.
31014 		 */
31015 		un->un_f_check_start_stop = TRUE;
31016 
31017 		/*
31018 		 * support eject media ioctl:
31019 		 *		FDEJECT, DKIOCEJECT, CDROMEJECT
31020 		 */
31021 		un->un_f_eject_media_supported = TRUE;
31022 
31023 		/*
31024 		 * Because many removable-media devices don't support
31025 		 * LOG_SENSE, we couldn't use this command to check if
31026 		 * a removable media device support power-management.
31027 		 * We assume that they support power-management via
31028 		 * START_STOP_UNIT command and can be spun up and down
31029 		 * without limitations.
31030 		 */
31031 		un->un_f_pm_supported = TRUE;
31032 
31033 		/*
31034 		 * Need to create a zero length (Boolean) property
31035 		 * removable-media for the removable media devices.
31036 		 * Note that the return value of the property is not being
31037 		 * checked, since if unable to create the property
31038 		 * then do not want the attach to fail altogether. Consistent
31039 		 * with other property creation in attach.
31040 		 */
31041 		(void) ddi_prop_create(DDI_DEV_T_NONE, devi,
31042 		    DDI_PROP_CANSLEEP, "removable-media", NULL, 0);
31043 
31044 	} else {
31045 		/*
31046 		 * create device ID for device
31047 		 */
31048 		un->un_f_devid_supported = TRUE;
31049 
31050 		/*
31051 		 * Spin up non-removable-media devices once it is attached
31052 		 */
31053 		un->un_f_attach_spinup = TRUE;
31054 
31055 		/*
31056 		 * According to SCSI specification, Sense data has two kinds of
31057 		 * format: fixed format, and descriptor format. At present, we
31058 		 * don't support descriptor format sense data for removable
31059 		 * media.
31060 		 */
31061 		if (SD_INQUIRY(un)->inq_dtype == DTYPE_DIRECT) {
31062 			un->un_f_descr_format_supported = TRUE;
31063 		}
31064 
31065 		/*
31066 		 * kstats are created only for non-removable media devices.
31067 		 *
31068 		 * Set this in sd.conf to 0 in order to disable kstats.  The
31069 		 * default is 1, so they are enabled by default.
31070 		 */
31071 		un->un_f_pkstats_enabled = (ddi_prop_get_int(DDI_DEV_T_ANY,
31072 		    SD_DEVINFO(un), DDI_PROP_DONTPASS,
31073 			"enable-partition-kstats", 1));
31074 
31075 		/*
31076 		 * Check if HBA has set the "pm-capable" property.
31077 		 * If "pm-capable" exists and is non-zero then we can
31078 		 * power manage the device without checking the start/stop
31079 		 * cycle count log sense page.
31080 		 *
31081 		 * If "pm-capable" exists and is SD_PM_CAPABLE_FALSE (0)
31082 		 * then we should not power manage the device.
31083 		 *
31084 		 * If "pm-capable" doesn't exist then pm_capable_prop will
31085 		 * be set to SD_PM_CAPABLE_UNDEFINED (-1).  In this case,
31086 		 * sd will check the start/stop cycle count log sense page
31087 		 * and power manage the device if the cycle count limit has
31088 		 * not been exceeded.
31089 		 */
31090 		pm_capable_prop = ddi_prop_get_int(DDI_DEV_T_ANY, devi,
31091 		    DDI_PROP_DONTPASS, "pm-capable", SD_PM_CAPABLE_UNDEFINED);
31092 		if (pm_capable_prop == SD_PM_CAPABLE_UNDEFINED) {
31093 			un->un_f_log_sense_supported = TRUE;
31094 		} else {
31095 			/*
31096 			 * pm-capable property exists.
31097 			 *
31098 			 * Convert "TRUE" values for pm_capable_prop to
31099 			 * SD_PM_CAPABLE_TRUE (1) to make it easier to check
31100 			 * later. "TRUE" values are any values except
31101 			 * SD_PM_CAPABLE_FALSE (0) and
31102 			 * SD_PM_CAPABLE_UNDEFINED (-1)
31103 			 */
31104 			if (pm_capable_prop == SD_PM_CAPABLE_FALSE) {
31105 				un->un_f_log_sense_supported = FALSE;
31106 			} else {
31107 				un->un_f_pm_supported = TRUE;
31108 			}
31109 
31110 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
31111 			    "sd_unit_attach: un:0x%p pm-capable "
31112 			    "property set to %d.\n", un, un->un_f_pm_supported);
31113 		}
31114 	}
31115 
31116 	if (un->un_f_is_hotpluggable) {
31117 #if defined(_SUNOS_VTOC_8)
31118 		/*
31119 		 * Note: currently, for VTOC_8 devices, default label is
31120 		 * created for removable and hotpluggable devices only.
31121 		 */
31122 		un->un_f_default_vtoc_supported = TRUE;
31123 #endif
31124 
31125 		/*
31126 		 * Temporarily, let hotpluggable devices pretend to be
31127 		 * removable-media devices for vold.
31128 		 */
31129 		un->un_f_monitor_media_state = TRUE;
31130 
31131 		un->un_f_check_start_stop = TRUE;
31132 
31133 	}
31134 
31135 	/*
31136 	 * By default, only DIRECT ACCESS devices and CDs will have Sun
31137 	 * labels.
31138 	 */
31139 	if ((SD_INQUIRY(un)->inq_dtype == DTYPE_DIRECT) ||
31140 	    (un->un_sd->sd_inq->inq_rmb)) {
31141 		/*
31142 		 * Direct access devices have disk label
31143 		 */
31144 		un->un_f_vtoc_label_supported = TRUE;
31145 	}
31146 
31147 	/*
31148 	 * Fdisk partitions are supported for all direct access devices on
31149 	 * x86 platform, and just for removable media and hotpluggable
31150 	 * devices on SPARC platform. Later, we will set the following flag
31151 	 * to FALSE if current device is not removable media or hotpluggable
31152 	 * device and if sd works on SAPRC platform.
31153 	 */
31154 	if (SD_INQUIRY(un)->inq_dtype == DTYPE_DIRECT) {
31155 		un->un_f_mboot_supported = TRUE;
31156 	}
31157 
31158 	if (!un->un_f_is_hotpluggable &&
31159 	    !un->un_sd->sd_inq->inq_rmb) {
31160 
31161 #if defined(_SUNOS_VTOC_8)
31162 		/*
31163 		 * Don't support fdisk on fixed disk
31164 		 */
31165 		un->un_f_mboot_supported = FALSE;
31166 #endif
31167 
31168 		/*
31169 		 * Fixed disk support SYNC CACHE
31170 		 */
31171 		un->un_f_sync_cache_supported = TRUE;
31172 
31173 		/*
31174 		 * For fixed disk, if its VTOC is not valid, we will write
31175 		 * errlog into system log
31176 		 */
31177 		if (un->un_f_vtoc_label_supported)
31178 			un->un_f_vtoc_errlog_supported = TRUE;
31179 	}
31180 }
31181