xref: /illumos-gate/usr/src/uts/common/io/scsi/targets/sd.c (revision 14080d4d675ae6a038f6446d92aeb8079f4017ae)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 1990, 2010, Oracle and/or its affiliates. All rights reserved.
24  */
25 /*
26  * Copyright (c) 2011 Bayard G. Bell.  All rights reserved.
27  * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
28  * Copyright 2012 DEY Storage Systems, Inc.  All rights reserved.
29  * Copyright 2019 Joyent, Inc.
30  * Copyright 2019 Racktop Systems
31  * Copyright 2022 OmniOS Community Edition (OmniOSce) Association.
32  * Copyright 2022 Tintri by DDN, Inc. All rights reserved.
33  * Copyright 2022 Garrett D'Amore
34  */
35 /*
36  * Copyright 2011 cyril.galibern@opensvc.com
37  */
38 
39 /*
40  * SCSI disk target driver.
41  */
42 #include <sys/scsi/scsi.h>
43 #include <sys/dkbad.h>
44 #include <sys/dklabel.h>
45 #include <sys/dkio.h>
46 #include <sys/fdio.h>
47 #include <sys/cdio.h>
48 #include <sys/mhd.h>
49 #include <sys/vtoc.h>
50 #include <sys/dktp/fdisk.h>
51 #include <sys/kstat.h>
52 #include <sys/vtrace.h>
53 #include <sys/note.h>
54 #include <sys/thread.h>
55 #include <sys/proc.h>
56 #include <sys/efi_partition.h>
57 #include <sys/var.h>
58 #include <sys/aio_req.h>
59 #include <sys/dkioc_free_util.h>
60 
61 #include <sys/taskq.h>
62 #include <sys/uuid.h>
63 #include <sys/byteorder.h>
64 #include <sys/sdt.h>
65 
66 #include "sd_xbuf.h"
67 
68 #include <sys/scsi/targets/sddef.h>
69 #include <sys/cmlb.h>
70 #include <sys/sysevent/eventdefs.h>
71 #include <sys/sysevent/dev.h>
72 
73 #include <sys/fm/protocol.h>
74 
75 /*
76  * Loadable module info.
77  */
78 #define	SD_MODULE_NAME	"SCSI Disk Driver"
79 
80 /*
81  * Define the interconnect type, to allow the driver to distinguish
82  * between parallel SCSI (sd) and fibre channel (ssd) behaviors.
83  *
84  * This is really for backward compatibility. In the future, the driver
85  * should actually check the "interconnect-type" property as reported by
86  * the HBA; however at present this property is not defined by all HBAs,
87  * so we will use this #define (1) to permit the driver to run in
88  * backward-compatibility mode; and (2) to print a notification message
89  * if an FC HBA does not support the "interconnect-type" property.  The
90  * behavior of the driver will be to assume parallel SCSI behaviors unless
91  * the "interconnect-type" property is defined by the HBA **AND** has a
92  * value of either INTERCONNECT_FIBRE, INTERCONNECT_SSA, or
93  * INTERCONNECT_FABRIC, in which case the driver will assume Fibre
94  * Channel behaviors (as per the old ssd).  (Note that the
95  * INTERCONNECT_1394 and INTERCONNECT_USB types are not supported and
96  * will result in the driver assuming parallel SCSI behaviors.)
97  *
98  * (see common/sys/scsi/impl/services.h)
99  */
100 #define	SD_DEFAULT_INTERCONNECT_TYPE	SD_INTERCONNECT_PARALLEL
101 
102 /*
103  * The name of the driver, established from the module name in _init.
104  */
105 static	char *sd_label			= NULL;
106 
107 /*
108  * Driver name is unfortunately prefixed on some driver.conf properties.
109  */
110 static	char *sd_max_xfer_size		= "sd_max_xfer_size";
111 static	char *sd_config_list		= "sd-config-list";
112 
113 /*
114  * Driver global variables
115  */
116 
117 #ifdef	SDDEBUG
118 int	sd_force_pm_supported		= 0;
119 #endif	/* SDDEBUG */
120 
121 void *sd_state				= NULL;
122 int sd_io_time				= SD_IO_TIME;
123 int sd_failfast_enable			= 1;
124 int sd_ua_retry_count			= SD_UA_RETRY_COUNT;
125 int sd_report_pfa			= 1;
126 int sd_max_throttle			= SD_MAX_THROTTLE;
127 int sd_min_throttle			= SD_MIN_THROTTLE;
128 int sd_rot_delay			= 4; /* Default 4ms Rotation delay */
129 int sd_qfull_throttle_enable		= TRUE;
130 
131 int sd_retry_on_reservation_conflict	= 1;
132 int sd_reinstate_resv_delay		= SD_REINSTATE_RESV_DELAY;
133 _NOTE(SCHEME_PROTECTS_DATA("safe sharing", sd_reinstate_resv_delay))
134 
135 static int sd_dtype_optical_bind	= -1;
136 
137 /* Note: the following is not a bug, it really is "sd_" and not "ssd_" */
138 static	char *sd_resv_conflict_name	= "sd_retry_on_reservation_conflict";
139 
140 /*
141  * Global data for debug logging. To enable debug printing, sd_component_mask
142  * and sd_level_mask should be set to the desired bit patterns as outlined in
143  * sddef.h.
144  */
145 uint_t	sd_component_mask		= 0x0;
146 uint_t	sd_level_mask			= 0x0;
147 struct	sd_lun *sd_debug_un		= NULL;
148 uint_t	sd_error_level			= SCSI_ERR_RETRYABLE;
149 
150 /* Note: these may go away in the future... */
151 static uint32_t	sd_xbuf_active_limit	= 512;
152 static uint32_t sd_xbuf_reserve_limit	= 16;
153 
154 static struct sd_resv_reclaim_request	sd_tr = { NULL, NULL, NULL, 0, 0, 0 };
155 
156 /*
157  * Timer value used to reset the throttle after it has been reduced
158  * (typically in response to TRAN_BUSY or STATUS_QFULL)
159  */
160 static int sd_reset_throttle_timeout	= SD_RESET_THROTTLE_TIMEOUT;
161 static int sd_qfull_throttle_timeout	= SD_QFULL_THROTTLE_TIMEOUT;
162 
163 /*
164  * Interval value associated with the media change scsi watch.
165  */
166 static int sd_check_media_time		= 3000000;
167 
168 /*
169  * Wait value used for in progress operations during a DDI_SUSPEND
170  */
171 static int sd_wait_cmds_complete	= SD_WAIT_CMDS_COMPLETE;
172 
173 /*
174  * Global buffer and mutex for debug logging
175  */
176 static char	sd_log_buf[1024];
177 static kmutex_t	sd_log_mutex;
178 
179 /*
180  * Structs and globals for recording attached lun information.
181  * This maintains a chain. Each node in the chain represents a SCSI controller.
182  * The structure records the number of luns attached to each target connected
183  * with the controller.
184  * For parallel scsi device only.
185  */
186 struct sd_scsi_hba_tgt_lun {
187 	struct sd_scsi_hba_tgt_lun	*next;
188 	dev_info_t			*pdip;
189 	int				nlun[NTARGETS_WIDE];
190 };
191 
192 /*
193  * Flag to indicate the lun is attached or detached
194  */
195 #define	SD_SCSI_LUN_ATTACH	0
196 #define	SD_SCSI_LUN_DETACH	1
197 
198 static kmutex_t	sd_scsi_target_lun_mutex;
199 static struct sd_scsi_hba_tgt_lun	*sd_scsi_target_lun_head = NULL;
200 
201 _NOTE(MUTEX_PROTECTS_DATA(sd_scsi_target_lun_mutex,
202     sd_scsi_hba_tgt_lun::next sd_scsi_hba_tgt_lun::pdip))
203 
204 _NOTE(MUTEX_PROTECTS_DATA(sd_scsi_target_lun_mutex,
205     sd_scsi_target_lun_head))
206 
207 /*
208  * "Smart" Probe Caching structs, globals, #defines, etc.
209  * For parallel scsi and non-self-identify device only.
210  */
211 
212 /*
213  * The following resources and routines are implemented to support
214  * "smart" probing, which caches the scsi_probe() results in an array,
215  * in order to help avoid long probe times.
216  */
217 struct sd_scsi_probe_cache {
218 	struct	sd_scsi_probe_cache	*next;
219 	dev_info_t	*pdip;
220 	int		cache[NTARGETS_WIDE];
221 };
222 
223 static kmutex_t	sd_scsi_probe_cache_mutex;
224 static struct	sd_scsi_probe_cache *sd_scsi_probe_cache_head = NULL;
225 
226 /*
227  * Really we only need protection on the head of the linked list, but
228  * better safe than sorry.
229  */
230 _NOTE(MUTEX_PROTECTS_DATA(sd_scsi_probe_cache_mutex,
231     sd_scsi_probe_cache::next sd_scsi_probe_cache::pdip))
232 
233 _NOTE(MUTEX_PROTECTS_DATA(sd_scsi_probe_cache_mutex,
234     sd_scsi_probe_cache_head))
235 
236 /*
237  * Power attribute table
238  */
239 static sd_power_attr_ss sd_pwr_ss = {
240 	{ "NAME=spindle-motor", "0=off", "1=on", NULL },
241 	{0, 100},
242 	{30, 0},
243 	{20000, 0}
244 };
245 
246 static sd_power_attr_pc sd_pwr_pc = {
247 	{ "NAME=spindle-motor", "0=stopped", "1=standby", "2=idle",
248 		"3=active", NULL },
249 	{0, 0, 0, 100},
250 	{90, 90, 20, 0},
251 	{15000, 15000, 1000, 0}
252 };
253 
254 /*
255  * Power level to power condition
256  */
257 static int sd_pl2pc[] = {
258 	SD_TARGET_START_VALID,
259 	SD_TARGET_STANDBY,
260 	SD_TARGET_IDLE,
261 	SD_TARGET_ACTIVE
262 };
263 
264 /*
265  * Vendor specific data name property declarations
266  */
267 
268 static sd_tunables seagate_properties = {
269 	SEAGATE_THROTTLE_VALUE,
270 	0,
271 	0,
272 	0,
273 	0,
274 	0,
275 	0,
276 	0,
277 	0
278 };
279 
280 
281 static sd_tunables fujitsu_properties = {
282 	FUJITSU_THROTTLE_VALUE,
283 	0,
284 	0,
285 	0,
286 	0,
287 	0,
288 	0,
289 	0,
290 	0
291 };
292 
293 static sd_tunables ibm_properties = {
294 	IBM_THROTTLE_VALUE,
295 	0,
296 	0,
297 	0,
298 	0,
299 	0,
300 	0,
301 	0,
302 	0
303 };
304 
305 static sd_tunables sve_properties = {
306 	SVE_THROTTLE_VALUE,
307 	0,
308 	0,
309 	SVE_BUSY_RETRIES,
310 	SVE_RESET_RETRY_COUNT,
311 	SVE_RESERVE_RELEASE_TIME,
312 	SVE_MIN_THROTTLE_VALUE,
313 	SVE_DISKSORT_DISABLED_FLAG,
314 	0
315 };
316 
317 static sd_tunables maserati_properties = {
318 	0,
319 	0,
320 	0,
321 	0,
322 	0,
323 	0,
324 	0,
325 	MASERATI_DISKSORT_DISABLED_FLAG,
326 	MASERATI_LUN_RESET_ENABLED_FLAG
327 };
328 
329 static sd_tunables pirus_properties = {
330 	PIRUS_THROTTLE_VALUE,
331 	0,
332 	PIRUS_NRR_COUNT,
333 	PIRUS_BUSY_RETRIES,
334 	PIRUS_RESET_RETRY_COUNT,
335 	0,
336 	PIRUS_MIN_THROTTLE_VALUE,
337 	PIRUS_DISKSORT_DISABLED_FLAG,
338 	PIRUS_LUN_RESET_ENABLED_FLAG
339 };
340 
341 static sd_tunables elite_properties = {
342 	ELITE_THROTTLE_VALUE,
343 	0,
344 	0,
345 	0,
346 	0,
347 	0,
348 	0,
349 	0,
350 	0
351 };
352 
353 static sd_tunables st31200n_properties = {
354 	ST31200N_THROTTLE_VALUE,
355 	0,
356 	0,
357 	0,
358 	0,
359 	0,
360 	0,
361 	0,
362 	0
363 };
364 
365 static sd_tunables lsi_properties_scsi = {
366 	LSI_THROTTLE_VALUE,
367 	0,
368 	LSI_NOTREADY_RETRIES,
369 	0,
370 	0,
371 	0,
372 	0,
373 	0,
374 	0
375 };
376 
377 static sd_tunables symbios_properties = {
378 	SYMBIOS_THROTTLE_VALUE,
379 	0,
380 	SYMBIOS_NOTREADY_RETRIES,
381 	0,
382 	0,
383 	0,
384 	0,
385 	0,
386 	0
387 };
388 
389 static sd_tunables lsi_properties = {
390 	0,
391 	0,
392 	LSI_NOTREADY_RETRIES,
393 	0,
394 	0,
395 	0,
396 	0,
397 	0,
398 	0
399 };
400 
401 static sd_tunables lsi_oem_properties = {
402 	0,
403 	0,
404 	LSI_OEM_NOTREADY_RETRIES,
405 	0,
406 	0,
407 	0,
408 	0,
409 	0,
410 	0,
411 	1
412 };
413 
414 
415 
416 #if (defined(SD_PROP_TST))
417 
418 #define	SD_TST_CTYPE_VAL	CTYPE_CDROM
419 #define	SD_TST_THROTTLE_VAL	16
420 #define	SD_TST_NOTREADY_VAL	12
421 #define	SD_TST_BUSY_VAL		60
422 #define	SD_TST_RST_RETRY_VAL	36
423 #define	SD_TST_RSV_REL_TIME	60
424 
425 static sd_tunables tst_properties = {
426 	SD_TST_THROTTLE_VAL,
427 	SD_TST_CTYPE_VAL,
428 	SD_TST_NOTREADY_VAL,
429 	SD_TST_BUSY_VAL,
430 	SD_TST_RST_RETRY_VAL,
431 	SD_TST_RSV_REL_TIME,
432 	0,
433 	0,
434 	0
435 };
436 #endif
437 
438 /* This is similar to the ANSI toupper implementation */
439 #define	SD_TOUPPER(C)	(((C) >= 'a' && (C) <= 'z') ? (C) - 'a' + 'A' : (C))
440 
441 /*
442  * Static Driver Configuration Table
443  *
444  * This is the table of disks which need throttle adjustment (or, perhaps
445  * something else as defined by the flags at a future time.)  device_id
446  * is a string consisting of concatenated vid (vendor), pid (product/model)
447  * and revision strings as defined in the scsi_inquiry structure.  Offsets of
448  * the parts of the string are as defined by the sizes in the scsi_inquiry
449  * structure.  Device type is searched as far as the device_id string is
450  * defined.  Flags defines which values are to be set in the driver from the
451  * properties list.
452  *
453  * Entries below which begin and end with a "*" are a special case.
454  * These do not have a specific vendor, and the string which follows
455  * can appear anywhere in the 16 byte PID portion of the inquiry data.
456  *
457  * Entries below which begin and end with a " " (blank) are a special
458  * case. The comparison function will treat multiple consecutive blanks
459  * as equivalent to a single blank. For example, this causes a
460  * sd_disk_table entry of " NEC CDROM " to match a device's id string
461  * of  "NEC       CDROM".
462  *
463  * Note: The MD21 controller type has been obsoleted.
464  *	 ST318202F is a Legacy device
465  *	 MAM3182FC, MAM3364FC, MAM3738FC do not appear to have ever been
466  *	 made with an FC connection. The entries here are a legacy.
467  */
468 static sd_disk_config_t sd_disk_table[] = {
469 	{ "SEAGATE ST34371FC", SD_CONF_BSET_THROTTLE, &seagate_properties },
470 	{ "SEAGATE ST19171FC", SD_CONF_BSET_THROTTLE, &seagate_properties },
471 	{ "SEAGATE ST39102FC", SD_CONF_BSET_THROTTLE, &seagate_properties },
472 	{ "SEAGATE ST39103FC", SD_CONF_BSET_THROTTLE, &seagate_properties },
473 	{ "SEAGATE ST118273F", SD_CONF_BSET_THROTTLE, &seagate_properties },
474 	{ "SEAGATE ST318202F", SD_CONF_BSET_THROTTLE, &seagate_properties },
475 	{ "SEAGATE ST318203F", SD_CONF_BSET_THROTTLE, &seagate_properties },
476 	{ "SEAGATE ST136403F", SD_CONF_BSET_THROTTLE, &seagate_properties },
477 	{ "SEAGATE ST318304F", SD_CONF_BSET_THROTTLE, &seagate_properties },
478 	{ "SEAGATE ST336704F", SD_CONF_BSET_THROTTLE, &seagate_properties },
479 	{ "SEAGATE ST373405F", SD_CONF_BSET_THROTTLE, &seagate_properties },
480 	{ "SEAGATE ST336605F", SD_CONF_BSET_THROTTLE, &seagate_properties },
481 	{ "SEAGATE ST336752F", SD_CONF_BSET_THROTTLE, &seagate_properties },
482 	{ "SEAGATE ST318452F", SD_CONF_BSET_THROTTLE, &seagate_properties },
483 	{ "FUJITSU MAG3091F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
484 	{ "FUJITSU MAG3182F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
485 	{ "FUJITSU MAA3182F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
486 	{ "FUJITSU MAF3364F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
487 	{ "FUJITSU MAL3364F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
488 	{ "FUJITSU MAL3738F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
489 	{ "FUJITSU MAM3182FC",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
490 	{ "FUJITSU MAM3364FC",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
491 	{ "FUJITSU MAM3738FC",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
492 	{ "IBM     DDYFT1835",  SD_CONF_BSET_THROTTLE, &ibm_properties },
493 	{ "IBM     DDYFT3695",  SD_CONF_BSET_THROTTLE, &ibm_properties },
494 	{ "IBM     IC35LF2D2",  SD_CONF_BSET_THROTTLE, &ibm_properties },
495 	{ "IBM     IC35LF2PR",  SD_CONF_BSET_THROTTLE, &ibm_properties },
496 	{ "IBM     1724-100",   SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
497 	{ "IBM     1726-2xx",   SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
498 	{ "IBM     1726-22x",   SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
499 	{ "IBM     1726-4xx",   SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
500 	{ "IBM     1726-42x",   SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
501 	{ "IBM     1726-3xx",   SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
502 	{ "IBM     3526",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
503 	{ "IBM     3542",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
504 	{ "IBM     3552",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
505 	{ "IBM     1722",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
506 	{ "IBM     1742",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
507 	{ "IBM     1815",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
508 	{ "IBM     FAStT",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
509 	{ "IBM     1814",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
510 	{ "IBM     1814-200",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
511 	{ "IBM     1818",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
512 	{ "DELL    MD3000",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
513 	{ "DELL    MD3000i",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
514 	{ "LSI     INF",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
515 	{ "ENGENIO INF",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
516 	{ "SGI     TP",		SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
517 	{ "SGI     IS",		SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
518 	{ "*CSM100_*",		SD_CONF_BSET_NRR_COUNT |
519 			SD_CONF_BSET_CACHE_IS_NV, &lsi_oem_properties },
520 	{ "*CSM200_*",		SD_CONF_BSET_NRR_COUNT |
521 			SD_CONF_BSET_CACHE_IS_NV, &lsi_oem_properties },
522 	{ "Fujitsu SX300",	SD_CONF_BSET_THROTTLE,  &lsi_oem_properties },
523 	{ "LSI",		SD_CONF_BSET_NRR_COUNT, &lsi_properties },
524 	{ "SUN     SESS01", SD_CONF_BSET_THROTTLE |
525 		SD_CONF_BSET_BSY_RETRY_COUNT|
526 		SD_CONF_BSET_RST_RETRIES|
527 		SD_CONF_BSET_RSV_REL_TIME|
528 		SD_CONF_BSET_MIN_THROTTLE|
529 		SD_CONF_BSET_DISKSORT_DISABLED,
530 		&sve_properties },
531 	{ "SUN     SVE01", SD_CONF_BSET_DISKSORT_DISABLED |
532 		SD_CONF_BSET_LUN_RESET_ENABLED,
533 		&maserati_properties },
534 	{ "SUN     SE6920", SD_CONF_BSET_THROTTLE |
535 		SD_CONF_BSET_NRR_COUNT|
536 		SD_CONF_BSET_BSY_RETRY_COUNT|
537 		SD_CONF_BSET_RST_RETRIES|
538 		SD_CONF_BSET_MIN_THROTTLE|
539 		SD_CONF_BSET_DISKSORT_DISABLED|
540 		SD_CONF_BSET_LUN_RESET_ENABLED,
541 		&pirus_properties },
542 	{ "SUN     SE6940", SD_CONF_BSET_THROTTLE |
543 		SD_CONF_BSET_NRR_COUNT|
544 		SD_CONF_BSET_BSY_RETRY_COUNT|
545 		SD_CONF_BSET_RST_RETRIES|
546 		SD_CONF_BSET_MIN_THROTTLE|
547 		SD_CONF_BSET_DISKSORT_DISABLED|
548 		SD_CONF_BSET_LUN_RESET_ENABLED,
549 		&pirus_properties },
550 	{ "SUN     StorageTek 6920", SD_CONF_BSET_THROTTLE |
551 		SD_CONF_BSET_NRR_COUNT|
552 		SD_CONF_BSET_BSY_RETRY_COUNT|
553 		SD_CONF_BSET_RST_RETRIES|
554 		SD_CONF_BSET_MIN_THROTTLE|
555 		SD_CONF_BSET_DISKSORT_DISABLED|
556 		SD_CONF_BSET_LUN_RESET_ENABLED,
557 		&pirus_properties },
558 	{ "SUN     StorageTek 6940", SD_CONF_BSET_THROTTLE |
559 		SD_CONF_BSET_NRR_COUNT|
560 		SD_CONF_BSET_BSY_RETRY_COUNT|
561 		SD_CONF_BSET_RST_RETRIES|
562 		SD_CONF_BSET_MIN_THROTTLE|
563 		SD_CONF_BSET_DISKSORT_DISABLED|
564 		SD_CONF_BSET_LUN_RESET_ENABLED,
565 		&pirus_properties },
566 	{ "SUN     PSX1000", SD_CONF_BSET_THROTTLE |
567 		SD_CONF_BSET_NRR_COUNT|
568 		SD_CONF_BSET_BSY_RETRY_COUNT|
569 		SD_CONF_BSET_RST_RETRIES|
570 		SD_CONF_BSET_MIN_THROTTLE|
571 		SD_CONF_BSET_DISKSORT_DISABLED|
572 		SD_CONF_BSET_LUN_RESET_ENABLED,
573 		&pirus_properties },
574 	{ "SUN     SE6330", SD_CONF_BSET_THROTTLE |
575 		SD_CONF_BSET_NRR_COUNT|
576 		SD_CONF_BSET_BSY_RETRY_COUNT|
577 		SD_CONF_BSET_RST_RETRIES|
578 		SD_CONF_BSET_MIN_THROTTLE|
579 		SD_CONF_BSET_DISKSORT_DISABLED|
580 		SD_CONF_BSET_LUN_RESET_ENABLED,
581 		&pirus_properties },
582 	{ "SUN     STK6580_6780", SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
583 	{ "SUN     SUN_6180", SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
584 	{ "STK     OPENstorage", SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
585 	{ "STK     OpenStorage", SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
586 	{ "STK     BladeCtlr",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
587 	{ "STK     FLEXLINE",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
588 	{ "SYMBIOS", SD_CONF_BSET_NRR_COUNT, &symbios_properties },
589 	{ "SEAGATE ST42400N", SD_CONF_BSET_THROTTLE, &elite_properties },
590 	{ "SEAGATE ST31200N", SD_CONF_BSET_THROTTLE, &st31200n_properties },
591 	{ "SEAGATE ST41600N", SD_CONF_BSET_TUR_CHECK, NULL },
592 	{ "CONNER  CP30540",  SD_CONF_BSET_NOCACHE,  NULL },
593 	{ "*SUN0104*", SD_CONF_BSET_FAB_DEVID, NULL },
594 	{ "*SUN0207*", SD_CONF_BSET_FAB_DEVID, NULL },
595 	{ "*SUN0327*", SD_CONF_BSET_FAB_DEVID, NULL },
596 	{ "*SUN0340*", SD_CONF_BSET_FAB_DEVID, NULL },
597 	{ "*SUN0424*", SD_CONF_BSET_FAB_DEVID, NULL },
598 	{ "*SUN0669*", SD_CONF_BSET_FAB_DEVID, NULL },
599 	{ "*SUN1.0G*", SD_CONF_BSET_FAB_DEVID, NULL },
600 	{ "SYMBIOS INF-01-00       ", SD_CONF_BSET_FAB_DEVID, NULL },
601 	{ "SYMBIOS", SD_CONF_BSET_THROTTLE|SD_CONF_BSET_NRR_COUNT,
602 	    &symbios_properties },
603 	{ "LSI", SD_CONF_BSET_THROTTLE | SD_CONF_BSET_NRR_COUNT,
604 	    &lsi_properties_scsi },
605 	{ " NEC CD-ROM DRIVE:260 ", (SD_CONF_BSET_PLAYMSF_BCD
606 				    | SD_CONF_BSET_READSUB_BCD
607 				    | SD_CONF_BSET_READ_TOC_ADDR_BCD
608 				    | SD_CONF_BSET_NO_READ_HEADER
609 				    | SD_CONF_BSET_READ_CD_XD4), NULL },
610 
611 	{ " NEC CD-ROM DRIVE:270 ", (SD_CONF_BSET_PLAYMSF_BCD
612 				    | SD_CONF_BSET_READSUB_BCD
613 				    | SD_CONF_BSET_READ_TOC_ADDR_BCD
614 				    | SD_CONF_BSET_NO_READ_HEADER
615 				    | SD_CONF_BSET_READ_CD_XD4), NULL },
616 
617 #if (defined(SD_PROP_TST))
618 	{ "VENDOR  PRODUCT ", (SD_CONF_BSET_THROTTLE
619 				| SD_CONF_BSET_CTYPE
620 				| SD_CONF_BSET_NRR_COUNT
621 				| SD_CONF_BSET_FAB_DEVID
622 				| SD_CONF_BSET_NOCACHE
623 				| SD_CONF_BSET_BSY_RETRY_COUNT
624 				| SD_CONF_BSET_PLAYMSF_BCD
625 				| SD_CONF_BSET_READSUB_BCD
626 				| SD_CONF_BSET_READ_TOC_TRK_BCD
627 				| SD_CONF_BSET_READ_TOC_ADDR_BCD
628 				| SD_CONF_BSET_NO_READ_HEADER
629 				| SD_CONF_BSET_READ_CD_XD4
630 				| SD_CONF_BSET_RST_RETRIES
631 				| SD_CONF_BSET_RSV_REL_TIME
632 				| SD_CONF_BSET_TUR_CHECK), &tst_properties},
633 #endif
634 };
635 
636 static const int sd_disk_table_size =
637 	sizeof (sd_disk_table)/ sizeof (sd_disk_config_t);
638 
639 /*
640  * Emulation mode disk drive VID/PID table
641  */
642 static char sd_flash_dev_table[][25] = {
643 	"ATA     MARVELL SD88SA02",
644 	"MARVELL SD88SA02",
645 	"TOSHIBA THNSNV05",
646 };
647 
648 static const int sd_flash_dev_table_size =
649 	sizeof (sd_flash_dev_table) / sizeof (sd_flash_dev_table[0]);
650 
651 #define	SD_INTERCONNECT_PARALLEL	0
652 #define	SD_INTERCONNECT_FABRIC		1
653 #define	SD_INTERCONNECT_FIBRE		2
654 #define	SD_INTERCONNECT_SSA		3
655 #define	SD_INTERCONNECT_SATA		4
656 #define	SD_INTERCONNECT_SAS		5
657 
658 #define	SD_IS_PARALLEL_SCSI(un)		\
659 	((un)->un_interconnect_type == SD_INTERCONNECT_PARALLEL)
660 #define	SD_IS_SERIAL(un)		\
661 	(((un)->un_interconnect_type == SD_INTERCONNECT_SATA) ||\
662 	((un)->un_interconnect_type == SD_INTERCONNECT_SAS))
663 
664 /*
665  * Definitions used by device id registration routines
666  */
667 #define	VPD_HEAD_OFFSET		3	/* size of head for vpd page */
668 #define	VPD_PAGE_LENGTH		3	/* offset for pge length data */
669 #define	VPD_MODE_PAGE		1	/* offset into vpd pg for "page code" */
670 
671 static kmutex_t sd_sense_mutex = {0};
672 
673 /*
674  * Macros for updates of the driver state
675  */
676 #define	New_state(un, s)        \
677 	(un)->un_last_state = (un)->un_state, (un)->un_state = (s)
678 #define	Restore_state(un)	\
679 	{ uchar_t tmp = (un)->un_last_state; New_state((un), tmp); }
680 
681 static struct sd_cdbinfo sd_cdbtab[] = {
682 	{ CDB_GROUP0, 0x00,	   0x1FFFFF,   0xFF,	    },
683 	{ CDB_GROUP1, SCMD_GROUP1, 0xFFFFFFFF, 0xFFFF,	    },
684 	{ CDB_GROUP5, SCMD_GROUP5, 0xFFFFFFFF, 0xFFFFFFFF,  },
685 	{ CDB_GROUP4, SCMD_GROUP4, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFF, },
686 };
687 
688 /*
689  * Specifies the number of seconds that must have elapsed since the last
690  * cmd. has completed for a device to be declared idle to the PM framework.
691  */
692 static int sd_pm_idletime = 1;
693 
694 /*
695  * Internal function prototypes
696  */
697 
698 typedef struct unmap_param_hdr_s {
699 	uint16_t	uph_data_len;
700 	uint16_t	uph_descr_data_len;
701 	uint32_t	uph_reserved;
702 } unmap_param_hdr_t;
703 
704 typedef struct unmap_blk_descr_s {
705 	uint64_t	ubd_lba;
706 	uint32_t	ubd_lba_cnt;
707 	uint32_t	ubd_reserved;
708 } unmap_blk_descr_t;
709 
710 /* Max number of block descriptors in UNMAP command */
711 #define	SD_UNMAP_MAX_DESCR \
712 	((UINT16_MAX - sizeof (unmap_param_hdr_t)) / sizeof (unmap_blk_descr_t))
713 /* Max size of the UNMAP parameter list in bytes */
714 #define	SD_UNMAP_PARAM_LIST_MAXSZ	(sizeof (unmap_param_hdr_t) + \
715 	SD_UNMAP_MAX_DESCR * sizeof (unmap_blk_descr_t))
716 
717 int _init(void);
718 int _fini(void);
719 int _info(struct modinfo *modinfop);
720 
721 /*PRINTFLIKE3*/
722 static void sd_log_trace(uint_t comp, struct sd_lun *un, const char *fmt, ...);
723 /*PRINTFLIKE3*/
724 static void sd_log_info(uint_t comp, struct sd_lun *un, const char *fmt, ...);
725 /*PRINTFLIKE3*/
726 static void sd_log_err(uint_t comp, struct sd_lun *un, const char *fmt, ...);
727 
728 static int sdprobe(dev_info_t *devi);
729 static int sdinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg,
730     void **result);
731 static int sd_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op,
732     int mod_flags, char *name, caddr_t valuep, int *lengthp);
733 
734 /*
735  * Smart probe for parallel scsi
736  */
737 static void sd_scsi_probe_cache_init(void);
738 static void sd_scsi_probe_cache_fini(void);
739 static void sd_scsi_clear_probe_cache(void);
740 static int  sd_scsi_probe_with_cache(struct scsi_device *devp, int (*fn)());
741 
742 /*
743  * Attached luns on target for parallel scsi
744  */
745 static void sd_scsi_target_lun_init(void);
746 static void sd_scsi_target_lun_fini(void);
747 static int  sd_scsi_get_target_lun_count(dev_info_t *dip, int target);
748 static void sd_scsi_update_lun_on_target(dev_info_t *dip, int target, int flag);
749 
750 static int sd_spin_up_unit(sd_ssc_t *ssc);
751 
752 /*
753  * Using sd_ssc_init to establish sd_ssc_t struct
754  * Using sd_ssc_send to send uscsi internal command
755  * Using sd_ssc_fini to free sd_ssc_t struct
756  */
757 static sd_ssc_t *sd_ssc_init(struct sd_lun *un);
758 static int sd_ssc_send(sd_ssc_t *ssc, struct uscsi_cmd *incmd,
759     int flag, enum uio_seg dataspace, int path_flag);
760 static void sd_ssc_fini(sd_ssc_t *ssc);
761 
762 /*
763  * Using sd_ssc_assessment to set correct type-of-assessment
764  * Using sd_ssc_post to post ereport & system log
765  *       sd_ssc_post will call sd_ssc_print to print system log
766  *       sd_ssc_post will call sd_ssd_ereport_post to post ereport
767  */
768 static void sd_ssc_assessment(sd_ssc_t *ssc,
769     enum sd_type_assessment tp_assess);
770 
771 static void sd_ssc_post(sd_ssc_t *ssc, enum sd_driver_assessment sd_assess);
772 static void sd_ssc_print(sd_ssc_t *ssc, int sd_severity);
773 static void sd_ssc_ereport_post(sd_ssc_t *ssc,
774     enum sd_driver_assessment drv_assess);
775 
776 /*
777  * Using sd_ssc_set_info to mark an un-decodable-data error.
778  * Using sd_ssc_extract_info to transfer information from internal
779  *       data structures to sd_ssc_t.
780  */
781 static void sd_ssc_set_info(sd_ssc_t *ssc, int ssc_flags, uint_t comp,
782     const char *fmt, ...);
783 static void sd_ssc_extract_info(sd_ssc_t *ssc, struct sd_lun *un,
784     struct scsi_pkt *pktp, struct buf *bp, struct sd_xbuf *xp);
785 
786 static int sd_send_scsi_cmd(dev_t dev, struct uscsi_cmd *incmd, int flag,
787     enum uio_seg dataspace, int path_flag);
788 
789 static void	sd_enable_descr_sense(sd_ssc_t *ssc);
790 static void	sd_reenable_dsense_task(void *arg);
791 
792 static void	sd_set_mmc_caps(sd_ssc_t *ssc);
793 
794 static void sd_read_unit_properties(struct sd_lun *un);
795 static int  sd_process_sdconf_file(struct sd_lun *un);
796 static void sd_nvpair_str_decode(struct sd_lun *un, char *nvpair_str);
797 static void sd_set_properties(struct sd_lun *un, char *name, char *value);
798 static void sd_get_tunables_from_conf(struct sd_lun *un, int flags,
799     int *data_list, sd_tunables *values);
800 static void sd_process_sdconf_table(struct sd_lun *un);
801 static int  sd_sdconf_id_match(struct sd_lun *un, char *id, int idlen);
802 static int  sd_blank_cmp(struct sd_lun *un, char *id, int idlen);
803 static int  sd_chk_vers1_data(struct sd_lun *un, int flags, int *prop_list,
804     int list_len, char *dataname_ptr);
805 static void sd_set_vers1_properties(struct sd_lun *un, int flags,
806     sd_tunables *prop_list);
807 
808 static void sd_register_devid(sd_ssc_t *ssc, dev_info_t *devi,
809     int reservation_flag);
810 static int  sd_get_devid(sd_ssc_t *ssc);
811 static ddi_devid_t sd_create_devid(sd_ssc_t *ssc);
812 static int  sd_write_deviceid(sd_ssc_t *ssc);
813 static int  sd_check_vpd_page_support(sd_ssc_t *ssc);
814 
815 static void sd_setup_pm(sd_ssc_t *ssc, dev_info_t *devi);
816 static void sd_create_pm_components(dev_info_t *devi, struct sd_lun *un);
817 
818 static int  sd_ddi_suspend(dev_info_t *devi);
819 static int  sd_ddi_resume(dev_info_t *devi);
820 static int  sd_pm_state_change(struct sd_lun *un, int level, int flag);
821 static int  sdpower(dev_info_t *devi, int component, int level);
822 
823 static int  sdattach(dev_info_t *devi, ddi_attach_cmd_t cmd);
824 static int  sddetach(dev_info_t *devi, ddi_detach_cmd_t cmd);
825 static int  sd_unit_attach(dev_info_t *devi);
826 static int  sd_unit_detach(dev_info_t *devi);
827 
828 static void sd_set_unit_attributes(struct sd_lun *un, dev_info_t *devi);
829 static void sd_create_errstats(struct sd_lun *un, int instance);
830 static void sd_set_errstats(struct sd_lun *un);
831 static void sd_set_pstats(struct sd_lun *un);
832 
833 static int  sddump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk);
834 static int  sd_scsi_poll(struct sd_lun *un, struct scsi_pkt *pkt);
835 static int  sd_send_polled_RQS(struct sd_lun *un);
836 static int  sd_ddi_scsi_poll(struct scsi_pkt *pkt);
837 
838 /*
839  * Defines for sd_cache_control
840  */
841 
842 #define	SD_CACHE_ENABLE		1
843 #define	SD_CACHE_DISABLE	0
844 #define	SD_CACHE_NOCHANGE	-1
845 
846 static int   sd_cache_control(sd_ssc_t *ssc, int rcd_flag, int wce_flag);
847 static int   sd_get_write_cache_enabled(sd_ssc_t *ssc, int *is_enabled);
848 static void  sd_get_write_cache_changeable(sd_ssc_t *ssc, int *is_changeable);
849 static void  sd_get_nv_sup(sd_ssc_t *ssc);
850 static dev_t sd_make_device(dev_info_t *devi);
851 static void  sd_check_bdc_vpd(sd_ssc_t *ssc);
852 static void  sd_check_emulation_mode(sd_ssc_t *ssc);
853 static void  sd_update_block_info(struct sd_lun *un, uint32_t lbasize,
854     uint64_t capacity);
855 
856 /*
857  * Driver entry point functions.
858  */
859 static int  sdopen(dev_t *dev_p, int flag, int otyp, cred_t *cred_p);
860 static int  sdclose(dev_t dev, int flag, int otyp, cred_t *cred_p);
861 static int  sd_ready_and_valid(sd_ssc_t *ssc, int part);
862 
863 static void sdmin(struct buf *bp);
864 static int sdread(dev_t dev, struct uio *uio, cred_t *cred_p);
865 static int sdwrite(dev_t dev, struct uio *uio, cred_t *cred_p);
866 static int sdaread(dev_t dev, struct aio_req *aio, cred_t *cred_p);
867 static int sdawrite(dev_t dev, struct aio_req *aio, cred_t *cred_p);
868 
869 static int sdstrategy(struct buf *bp);
870 static int sdioctl(dev_t, int, intptr_t, int, cred_t *, int *);
871 
872 /*
873  * Function prototypes for layering functions in the iostart chain.
874  */
875 static void sd_mapblockaddr_iostart(int index, struct sd_lun *un,
876     struct buf *bp);
877 static void sd_mapblocksize_iostart(int index, struct sd_lun *un,
878     struct buf *bp);
879 static void sd_checksum_iostart(int index, struct sd_lun *un, struct buf *bp);
880 static void sd_checksum_uscsi_iostart(int index, struct sd_lun *un,
881     struct buf *bp);
882 static void sd_pm_iostart(int index, struct sd_lun *un, struct buf *bp);
883 static void sd_core_iostart(int index, struct sd_lun *un, struct buf *bp);
884 
885 /*
886  * Function prototypes for layering functions in the iodone chain.
887  */
888 static void sd_buf_iodone(int index, struct sd_lun *un, struct buf *bp);
889 static void sd_uscsi_iodone(int index, struct sd_lun *un, struct buf *bp);
890 static void sd_mapblockaddr_iodone(int index, struct sd_lun *un,
891     struct buf *bp);
892 static void sd_mapblocksize_iodone(int index, struct sd_lun *un,
893     struct buf *bp);
894 static void sd_checksum_iodone(int index, struct sd_lun *un, struct buf *bp);
895 static void sd_checksum_uscsi_iodone(int index, struct sd_lun *un,
896     struct buf *bp);
897 static void sd_pm_iodone(int index, struct sd_lun *un, struct buf *bp);
898 
899 /*
900  * Prototypes for functions to support buf(9S) based IO.
901  */
902 static void sd_xbuf_strategy(struct buf *bp, ddi_xbuf_t xp, void *arg);
903 static int sd_initpkt_for_buf(struct buf *, struct scsi_pkt **);
904 static void sd_destroypkt_for_buf(struct buf *);
905 static int sd_setup_rw_pkt(struct sd_lun *un, struct scsi_pkt **pktpp,
906     struct buf *bp, int flags,
907     int (*callback)(caddr_t), caddr_t callback_arg,
908     diskaddr_t lba, uint32_t blockcount);
909 static int sd_setup_next_rw_pkt(struct sd_lun *un, struct scsi_pkt *pktp,
910     struct buf *bp, diskaddr_t lba, uint32_t blockcount);
911 
912 /*
913  * Prototypes for functions to support USCSI IO.
914  */
915 static int sd_uscsi_strategy(struct buf *bp);
916 static int sd_initpkt_for_uscsi(struct buf *, struct scsi_pkt **);
917 static void sd_destroypkt_for_uscsi(struct buf *);
918 
919 static void sd_xbuf_init(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
920     uchar_t chain_type, void *pktinfop);
921 
922 static int  sd_pm_entry(struct sd_lun *un);
923 static void sd_pm_exit(struct sd_lun *un);
924 
925 static void sd_pm_idletimeout_handler(void *arg);
926 
927 /*
928  * sd_core internal functions (used at the sd_core_io layer).
929  */
930 static void sd_add_buf_to_waitq(struct sd_lun *un, struct buf *bp);
931 static void sdintr(struct scsi_pkt *pktp);
932 static void sd_start_cmds(struct sd_lun *un, struct buf *immed_bp);
933 
934 static int sd_send_scsi_cmd(dev_t dev, struct uscsi_cmd *incmd, int flag,
935     enum uio_seg dataspace, int path_flag);
936 
937 static struct buf *sd_bioclone_alloc(struct buf *bp, size_t datalen,
938     daddr_t blkno, int (*func)(struct buf *));
939 static struct buf *sd_shadow_buf_alloc(struct buf *bp, size_t datalen,
940     uint_t bflags, daddr_t blkno, int (*func)(struct buf *));
941 static void sd_bioclone_free(struct buf *bp);
942 static void sd_shadow_buf_free(struct buf *bp);
943 
944 static void sd_print_transport_rejected_message(struct sd_lun *un,
945     struct sd_xbuf *xp, int code);
946 static void sd_print_incomplete_msg(struct sd_lun *un, struct buf *bp,
947     void *arg, int code);
948 static void sd_print_sense_failed_msg(struct sd_lun *un, struct buf *bp,
949     void *arg, int code);
950 static void sd_print_cmd_incomplete_msg(struct sd_lun *un, struct buf *bp,
951     void *arg, int code);
952 
953 static void sd_retry_command(struct sd_lun *un, struct buf *bp,
954     int retry_check_flag,
955     void (*user_funcp)(struct sd_lun *un, struct buf *bp, void *argp, int c),
956     void *user_arg, int failure_code,  clock_t retry_delay,
957     void (*statp)(kstat_io_t *));
958 
959 static void sd_set_retry_bp(struct sd_lun *un, struct buf *bp,
960     clock_t retry_delay, void (*statp)(kstat_io_t *));
961 
962 static void sd_send_request_sense_command(struct sd_lun *un, struct buf *bp,
963     struct scsi_pkt *pktp);
964 static void sd_start_retry_command(void *arg);
965 static void sd_start_direct_priority_command(void *arg);
966 static void sd_return_failed_command(struct sd_lun *un, struct buf *bp,
967     int errcode);
968 static void sd_return_failed_command_no_restart(struct sd_lun *un,
969     struct buf *bp, int errcode);
970 static void sd_return_command(struct sd_lun *un, struct buf *bp);
971 static void sd_sync_with_callback(struct sd_lun *un);
972 static int sdrunout(caddr_t arg);
973 
974 static void sd_mark_rqs_busy(struct sd_lun *un, struct buf *bp);
975 static struct buf *sd_mark_rqs_idle(struct sd_lun *un, struct sd_xbuf *xp);
976 
977 static void sd_reduce_throttle(struct sd_lun *un, int throttle_type);
978 static void sd_restore_throttle(void *arg);
979 
980 static void sd_init_cdb_limits(struct sd_lun *un);
981 
982 static void sd_pkt_status_good(struct sd_lun *un, struct buf *bp,
983     struct sd_xbuf *xp, struct scsi_pkt *pktp);
984 
985 /*
986  * Error handling functions
987  */
988 static void sd_pkt_status_check_condition(struct sd_lun *un, struct buf *bp,
989     struct sd_xbuf *xp, struct scsi_pkt *pktp);
990 static void sd_pkt_status_busy(struct sd_lun *un, struct buf *bp,
991     struct sd_xbuf *xp, struct scsi_pkt *pktp);
992 static void sd_pkt_status_reservation_conflict(struct sd_lun *un,
993     struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
994 static void sd_pkt_status_qfull(struct sd_lun *un, struct buf *bp,
995     struct sd_xbuf *xp, struct scsi_pkt *pktp);
996 
997 static void sd_handle_request_sense(struct sd_lun *un, struct buf *bp,
998     struct sd_xbuf *xp, struct scsi_pkt *pktp);
999 static void sd_handle_auto_request_sense(struct sd_lun *un, struct buf *bp,
1000     struct sd_xbuf *xp, struct scsi_pkt *pktp);
1001 static int sd_validate_sense_data(struct sd_lun *un, struct buf *bp,
1002     struct sd_xbuf *xp, size_t actual_len);
1003 static void sd_decode_sense(struct sd_lun *un, struct buf *bp,
1004     struct sd_xbuf *xp, struct scsi_pkt *pktp);
1005 
1006 static void sd_print_sense_msg(struct sd_lun *un, struct buf *bp,
1007     void *arg, int code);
1008 
1009 static void sd_sense_key_no_sense(struct sd_lun *un, struct buf *bp,
1010     struct sd_xbuf *xp, struct scsi_pkt *pktp);
1011 static void sd_sense_key_recoverable_error(struct sd_lun *un,
1012     uint8_t *sense_datap,
1013     struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1014 static void sd_sense_key_not_ready(struct sd_lun *un,
1015     uint8_t *sense_datap,
1016     struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1017 static void sd_sense_key_medium_or_hardware_error(struct sd_lun *un,
1018     uint8_t *sense_datap,
1019     struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1020 static void sd_sense_key_illegal_request(struct sd_lun *un, struct buf *bp,
1021     struct sd_xbuf *xp, struct scsi_pkt *pktp);
1022 static void sd_sense_key_unit_attention(struct sd_lun *un,
1023     uint8_t *sense_datap,
1024     struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1025 static void sd_sense_key_fail_command(struct sd_lun *un, struct buf *bp,
1026     struct sd_xbuf *xp, struct scsi_pkt *pktp);
1027 static void sd_sense_key_blank_check(struct sd_lun *un, struct buf *bp,
1028     struct sd_xbuf *xp, struct scsi_pkt *pktp);
1029 static void sd_sense_key_aborted_command(struct sd_lun *un, struct buf *bp,
1030     struct sd_xbuf *xp, struct scsi_pkt *pktp);
1031 static void sd_sense_key_default(struct sd_lun *un,
1032     uint8_t *sense_datap,
1033     struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1034 
1035 static void sd_print_retry_msg(struct sd_lun *un, struct buf *bp,
1036     void *arg, int flag);
1037 
1038 static void sd_pkt_reason_cmd_incomplete(struct sd_lun *un, struct buf *bp,
1039     struct sd_xbuf *xp, struct scsi_pkt *pktp);
1040 static void sd_pkt_reason_cmd_tran_err(struct sd_lun *un, struct buf *bp,
1041     struct sd_xbuf *xp, struct scsi_pkt *pktp);
1042 static void sd_pkt_reason_cmd_reset(struct sd_lun *un, struct buf *bp,
1043     struct sd_xbuf *xp, struct scsi_pkt *pktp);
1044 static void sd_pkt_reason_cmd_aborted(struct sd_lun *un, struct buf *bp,
1045     struct sd_xbuf *xp, struct scsi_pkt *pktp);
1046 static void sd_pkt_reason_cmd_timeout(struct sd_lun *un, struct buf *bp,
1047     struct sd_xbuf *xp, struct scsi_pkt *pktp);
1048 static void sd_pkt_reason_cmd_unx_bus_free(struct sd_lun *un, struct buf *bp,
1049     struct sd_xbuf *xp, struct scsi_pkt *pktp);
1050 static void sd_pkt_reason_cmd_tag_reject(struct sd_lun *un, struct buf *bp,
1051     struct sd_xbuf *xp, struct scsi_pkt *pktp);
1052 static void sd_pkt_reason_default(struct sd_lun *un, struct buf *bp,
1053     struct sd_xbuf *xp, struct scsi_pkt *pktp);
1054 
1055 static void sd_reset_target(struct sd_lun *un, struct scsi_pkt *pktp);
1056 
1057 static void sd_start_stop_unit_callback(void *arg);
1058 static void sd_start_stop_unit_task(void *arg);
1059 
1060 static void sd_taskq_create(void);
1061 static void sd_taskq_delete(void);
1062 static void sd_target_change_task(void *arg);
1063 static void sd_log_dev_status_event(struct sd_lun *un, char *esc, int km_flag);
1064 static void sd_log_lun_expansion_event(struct sd_lun *un, int km_flag);
1065 static void sd_log_eject_request_event(struct sd_lun *un, int km_flag);
1066 static void sd_media_change_task(void *arg);
1067 
1068 static int sd_handle_mchange(struct sd_lun *un);
1069 static int sd_send_scsi_DOORLOCK(sd_ssc_t *ssc, int flag, int path_flag);
1070 static int sd_send_scsi_READ_CAPACITY(sd_ssc_t *ssc, uint64_t *capp,
1071     uint32_t *lbap, int path_flag);
1072 static int sd_send_scsi_READ_CAPACITY_16(sd_ssc_t *ssc, uint64_t *capp,
1073     uint32_t *lbap, uint32_t *psp, int path_flag);
1074 static int sd_send_scsi_START_STOP_UNIT(sd_ssc_t *ssc, int pc_flag,
1075     int flag, int path_flag);
1076 static int sd_send_scsi_INQUIRY(sd_ssc_t *ssc, uchar_t *bufaddr,
1077     size_t buflen, uchar_t evpd, uchar_t page_code, size_t *residp);
1078 static int sd_send_scsi_TEST_UNIT_READY(sd_ssc_t *ssc, int flag);
1079 static int sd_send_scsi_PERSISTENT_RESERVE_IN(sd_ssc_t *ssc,
1080     uchar_t usr_cmd, uint16_t data_len, uchar_t *data_bufp);
1081 static int sd_send_scsi_PERSISTENT_RESERVE_OUT(sd_ssc_t *ssc,
1082     uchar_t usr_cmd, uchar_t *usr_bufp);
1083 static int sd_send_scsi_SYNCHRONIZE_CACHE(struct sd_lun *un,
1084     struct dk_callback *dkc);
1085 static int sd_send_scsi_SYNCHRONIZE_CACHE_biodone(struct buf *bp);
1086 static int sd_send_scsi_UNMAP(dev_t dev, sd_ssc_t *ssc, dkioc_free_list_t *dfl,
1087     int flag);
1088 static int sd_send_scsi_GET_CONFIGURATION(sd_ssc_t *ssc,
1089     struct uscsi_cmd *ucmdbuf, uchar_t *rqbuf, uint_t rqbuflen,
1090     uchar_t *bufaddr, uint_t buflen, int path_flag);
1091 static int sd_send_scsi_feature_GET_CONFIGURATION(sd_ssc_t *ssc,
1092     struct uscsi_cmd *ucmdbuf, uchar_t *rqbuf, uint_t rqbuflen,
1093     uchar_t *bufaddr, uint_t buflen, char feature, int path_flag);
1094 static int sd_send_scsi_MODE_SENSE(sd_ssc_t *ssc, int cdbsize,
1095     uchar_t *bufaddr, size_t buflen, uchar_t page_code, int path_flag);
1096 static int sd_send_scsi_MODE_SELECT(sd_ssc_t *ssc, int cdbsize,
1097     uchar_t *bufaddr, size_t buflen, uchar_t save_page, int path_flag);
1098 static int sd_send_scsi_RDWR(sd_ssc_t *ssc, uchar_t cmd, void *bufaddr,
1099     size_t buflen, daddr_t start_block, int path_flag);
1100 #define	sd_send_scsi_READ(ssc, bufaddr, buflen, start_block, path_flag)	\
1101     sd_send_scsi_RDWR(ssc, SCMD_READ, bufaddr, buflen, start_block, \
1102     path_flag)
1103 #define	sd_send_scsi_WRITE(ssc, bufaddr, buflen, start_block, path_flag)\
1104     sd_send_scsi_RDWR(ssc, SCMD_WRITE, bufaddr, buflen, start_block,\
1105     path_flag)
1106 
1107 static int sd_send_scsi_LOG_SENSE(sd_ssc_t *ssc, uchar_t *bufaddr,
1108     uint16_t buflen, uchar_t page_code, uchar_t page_control,
1109     uint16_t param_ptr, int path_flag);
1110 static int sd_send_scsi_GET_EVENT_STATUS_NOTIFICATION(sd_ssc_t *ssc,
1111     uchar_t *bufaddr, size_t buflen, uchar_t class_req);
1112 static boolean_t sd_gesn_media_data_valid(uchar_t *data);
1113 
1114 static int  sd_alloc_rqs(struct scsi_device *devp, struct sd_lun *un);
1115 static void sd_free_rqs(struct sd_lun *un);
1116 
1117 static void sd_dump_memory(struct sd_lun *un, uint_t comp, char *title,
1118     uchar_t *data, int len, int fmt);
1119 static void sd_panic_for_res_conflict(struct sd_lun *un);
1120 
1121 /*
1122  * Disk Ioctl Function Prototypes
1123  */
1124 static int sd_get_media_info(dev_t dev, caddr_t arg, int flag);
1125 static int sd_get_media_info_ext(dev_t dev, caddr_t arg, int flag);
1126 static int sd_dkio_ctrl_info(dev_t dev, caddr_t arg, int flag);
1127 static int sd_dkio_get_temp(dev_t dev, caddr_t arg, int flag);
1128 
1129 /*
1130  * Multi-host Ioctl Prototypes
1131  */
1132 static int sd_check_mhd(dev_t dev, int interval);
1133 static int sd_mhd_watch_cb(caddr_t arg, struct scsi_watch_result *resultp);
1134 static void sd_mhd_watch_incomplete(struct sd_lun *un, struct scsi_pkt *pkt);
1135 static char *sd_sname(uchar_t status);
1136 static void sd_mhd_resvd_recover(void *arg);
1137 static void sd_resv_reclaim_thread();
1138 static int sd_take_ownership(dev_t dev, struct mhioctkown *p);
1139 static int sd_reserve_release(dev_t dev, int cmd);
1140 static void sd_rmv_resv_reclaim_req(dev_t dev);
1141 static void sd_mhd_reset_notify_cb(caddr_t arg);
1142 static int sd_persistent_reservation_in_read_keys(struct sd_lun *un,
1143     mhioc_inkeys_t *usrp, int flag);
1144 static int sd_persistent_reservation_in_read_resv(struct sd_lun *un,
1145     mhioc_inresvs_t *usrp, int flag);
1146 static int sd_mhdioc_takeown(dev_t dev, caddr_t arg, int flag);
1147 static int sd_mhdioc_failfast(dev_t dev, caddr_t arg, int flag);
1148 static int sd_mhdioc_release(dev_t dev);
1149 static int sd_mhdioc_register_devid(dev_t dev);
1150 static int sd_mhdioc_inkeys(dev_t dev, caddr_t arg, int flag);
1151 static int sd_mhdioc_inresv(dev_t dev, caddr_t arg, int flag);
1152 
1153 /*
1154  * SCSI removable prototypes
1155  */
1156 static int sr_change_blkmode(dev_t dev, int cmd, intptr_t data, int flag);
1157 static int sr_change_speed(dev_t dev, int cmd, intptr_t data, int flag);
1158 static int sr_atapi_change_speed(dev_t dev, int cmd, intptr_t data, int flag);
1159 static int sr_pause_resume(dev_t dev, int mode);
1160 static int sr_play_msf(dev_t dev, caddr_t data, int flag);
1161 static int sr_play_trkind(dev_t dev, caddr_t data, int flag);
1162 static int sr_read_all_subcodes(dev_t dev, caddr_t data, int flag);
1163 static int sr_read_subchannel(dev_t dev, caddr_t data, int flag);
1164 static int sr_read_tocentry(dev_t dev, caddr_t data, int flag);
1165 static int sr_read_tochdr(dev_t dev, caddr_t data, int flag);
1166 static int sr_read_cdda(dev_t dev, caddr_t data, int flag);
1167 static int sr_read_cdxa(dev_t dev, caddr_t data, int flag);
1168 static int sr_read_mode1(dev_t dev, caddr_t data, int flag);
1169 static int sr_read_mode2(dev_t dev, caddr_t data, int flag);
1170 static int sr_read_cd_mode2(dev_t dev, caddr_t data, int flag);
1171 static int sr_sector_mode(dev_t dev, uint32_t blksize);
1172 static int sr_eject(dev_t dev);
1173 static void sr_ejected(register struct sd_lun *un);
1174 static int sr_check_wp(dev_t dev);
1175 static opaque_t sd_watch_request_submit(struct sd_lun *un);
1176 static int sd_check_media(dev_t dev, enum dkio_state state);
1177 static int sd_media_watch_cb(caddr_t arg, struct scsi_watch_result *resultp);
1178 static void sd_delayed_cv_broadcast(void *arg);
1179 static int sr_volume_ctrl(dev_t dev, caddr_t data, int flag);
1180 static int sr_read_sony_session_offset(dev_t dev, caddr_t data, int flag);
1181 
1182 static int sd_log_page_supported(sd_ssc_t *ssc, int log_page);
1183 
1184 /*
1185  * Function Prototype for the non-512 support (DVDRAM, MO etc.) functions.
1186  */
1187 static void sd_check_for_writable_cd(sd_ssc_t *ssc, int path_flag);
1188 static int sd_wm_cache_constructor(void *wm, void *un, int flags);
1189 static void sd_wm_cache_destructor(void *wm, void *un);
1190 static struct sd_w_map *sd_range_lock(struct sd_lun *un, daddr_t startb,
1191     daddr_t endb, ushort_t typ);
1192 static struct sd_w_map *sd_get_range(struct sd_lun *un, daddr_t startb,
1193     daddr_t endb);
1194 static void sd_free_inlist_wmap(struct sd_lun *un, struct sd_w_map *wmp);
1195 static void sd_range_unlock(struct sd_lun *un, struct sd_w_map *wm);
1196 static void sd_read_modify_write_task(void * arg);
1197 static int
1198 sddump_do_read_of_rmw(struct sd_lun *un, uint64_t blkno, uint64_t nblk,
1199     struct buf **bpp);
1200 
1201 
1202 /*
1203  * Function prototypes for failfast support.
1204  */
1205 static void sd_failfast_flushq(struct sd_lun *un);
1206 static int sd_failfast_flushq_callback(struct buf *bp);
1207 
1208 /*
1209  * Function prototypes for partial DMA support
1210  */
1211 static int sd_setup_next_xfer(struct sd_lun *un, struct buf *bp,
1212 		struct scsi_pkt *pkt, struct sd_xbuf *xp);
1213 
1214 
1215 /* Function prototypes for cmlb */
1216 static int sd_tg_rdwr(dev_info_t *devi, uchar_t cmd, void *bufaddr,
1217     diskaddr_t start_block, size_t reqlength, void *tg_cookie);
1218 
1219 static int sd_tg_getinfo(dev_info_t *devi, int cmd, void *arg, void *tg_cookie);
1220 
1221 /*
1222  * For printing RMW warning message timely
1223  */
1224 static void sd_rmw_msg_print_handler(void *arg);
1225 
1226 /*
1227  * Constants for failfast support:
1228  *
1229  * SD_FAILFAST_INACTIVE: Instance is currently in a normal state, with NO
1230  * failfast processing being performed.
1231  *
1232  * SD_FAILFAST_ACTIVE: Instance is in the failfast state and is performing
1233  * failfast processing on all bufs with B_FAILFAST set.
1234  */
1235 
1236 #define	SD_FAILFAST_INACTIVE		0
1237 #define	SD_FAILFAST_ACTIVE		1
1238 
1239 /*
1240  * Bitmask to control behavior of buf(9S) flushes when a transition to
1241  * the failfast state occurs. Optional bits include:
1242  *
1243  * SD_FAILFAST_FLUSH_ALL_BUFS: When set, flush ALL bufs including those that
1244  * do NOT have B_FAILFAST set. When clear, only bufs with B_FAILFAST will
1245  * be flushed.
1246  *
1247  * SD_FAILFAST_FLUSH_ALL_QUEUES: When set, flush any/all other queues in the
1248  * driver, in addition to the regular wait queue. This includes the xbuf
1249  * queues. When clear, only the driver's wait queue will be flushed.
1250  */
1251 #define	SD_FAILFAST_FLUSH_ALL_BUFS	0x01
1252 #define	SD_FAILFAST_FLUSH_ALL_QUEUES	0x02
1253 
1254 /*
1255  * The default behavior is to only flush bufs that have B_FAILFAST set, but
1256  * to flush all queues within the driver.
1257  */
1258 static int sd_failfast_flushctl = SD_FAILFAST_FLUSH_ALL_QUEUES;
1259 
1260 
1261 /*
1262  * SD Testing Fault Injection
1263  */
1264 #ifdef SD_FAULT_INJECTION
1265 static void sd_faultinjection_ioctl(int cmd, intptr_t arg, struct sd_lun *un);
1266 static void sd_faultinjection(struct scsi_pkt *pktp);
1267 static void sd_injection_log(char *buf, struct sd_lun *un);
1268 #endif
1269 
1270 /*
1271  * Device driver ops vector
1272  */
1273 static struct cb_ops sd_cb_ops = {
1274 	sdopen,			/* open */
1275 	sdclose,		/* close */
1276 	sdstrategy,		/* strategy */
1277 	nodev,			/* print */
1278 	sddump,			/* dump */
1279 	sdread,			/* read */
1280 	sdwrite,		/* write */
1281 	sdioctl,		/* ioctl */
1282 	nodev,			/* devmap */
1283 	nodev,			/* mmap */
1284 	nodev,			/* segmap */
1285 	nochpoll,		/* poll */
1286 	sd_prop_op,		/* cb_prop_op */
1287 	0,			/* streamtab  */
1288 	D_64BIT | D_MP | D_NEW | D_HOTPLUG, /* Driver compatibility flags */
1289 	CB_REV,			/* cb_rev */
1290 	sdaread,		/* async I/O read entry point */
1291 	sdawrite		/* async I/O write entry point */
1292 };
1293 
1294 struct dev_ops sd_ops = {
1295 	DEVO_REV,		/* devo_rev, */
1296 	0,			/* refcnt  */
1297 	sdinfo,			/* info */
1298 	nulldev,		/* identify */
1299 	sdprobe,		/* probe */
1300 	sdattach,		/* attach */
1301 	sddetach,		/* detach */
1302 	nodev,			/* reset */
1303 	&sd_cb_ops,		/* driver operations */
1304 	NULL,			/* bus operations */
1305 	sdpower,		/* power */
1306 	ddi_quiesce_not_needed,		/* quiesce */
1307 };
1308 
1309 /*
1310  * This is the loadable module wrapper.
1311  */
1312 #include <sys/modctl.h>
1313 
1314 static struct modldrv modldrv = {
1315 	&mod_driverops,		/* Type of module. This one is a driver */
1316 	SD_MODULE_NAME,		/* Module name. */
1317 	&sd_ops			/* driver ops */
1318 };
1319 
1320 static struct modlinkage modlinkage = {
1321 	MODREV_1, &modldrv, NULL
1322 };
1323 
1324 static cmlb_tg_ops_t sd_tgops = {
1325 	TG_DK_OPS_VERSION_1,
1326 	sd_tg_rdwr,
1327 	sd_tg_getinfo
1328 };
1329 
1330 static struct scsi_asq_key_strings sd_additional_codes[] = {
1331 	0x81, 0, "Logical Unit is Reserved",
1332 	0x85, 0, "Audio Address Not Valid",
1333 	0xb6, 0, "Media Load Mechanism Failed",
1334 	0xB9, 0, "Audio Play Operation Aborted",
1335 	0xbf, 0, "Buffer Overflow for Read All Subcodes Command",
1336 	0x53, 2, "Medium removal prevented",
1337 	0x6f, 0, "Authentication failed during key exchange",
1338 	0x6f, 1, "Key not present",
1339 	0x6f, 2, "Key not established",
1340 	0x6f, 3, "Read without proper authentication",
1341 	0x6f, 4, "Mismatched region to this logical unit",
1342 	0x6f, 5, "Region reset count error",
1343 	0xffff, 0x0, NULL
1344 };
1345 
1346 
1347 /*
1348  * Struct for passing printing information for sense data messages
1349  */
1350 struct sd_sense_info {
1351 	int	ssi_severity;
1352 	int	ssi_pfa_flag;
1353 };
1354 
1355 /*
1356  * Table of function pointers for iostart-side routines. Separate "chains"
1357  * of layered function calls are formed by placing the function pointers
1358  * sequentially in the desired order. Functions are called according to an
1359  * incrementing table index ordering. The last function in each chain must
1360  * be sd_core_iostart(). The corresponding iodone-side routines are expected
1361  * in the sd_iodone_chain[] array.
1362  *
1363  * Note: It may seem more natural to organize both the iostart and iodone
1364  * functions together, into an array of structures (or some similar
1365  * organization) with a common index, rather than two separate arrays which
1366  * must be maintained in synchronization. The purpose of this division is
1367  * to achieve improved performance: individual arrays allows for more
1368  * effective cache line utilization on certain platforms.
1369  */
1370 
1371 typedef void (*sd_chain_t)(int index, struct sd_lun *un, struct buf *bp);
1372 
1373 
1374 static sd_chain_t sd_iostart_chain[] = {
1375 
1376 	/* Chain for buf IO for disk drive targets (PM enabled) */
1377 	sd_mapblockaddr_iostart,	/* Index: 0 */
1378 	sd_pm_iostart,			/* Index: 1 */
1379 	sd_core_iostart,		/* Index: 2 */
1380 
1381 	/* Chain for buf IO for disk drive targets (PM disabled) */
1382 	sd_mapblockaddr_iostart,	/* Index: 3 */
1383 	sd_core_iostart,		/* Index: 4 */
1384 
1385 	/*
1386 	 * Chain for buf IO for removable-media or large sector size
1387 	 * disk drive targets with RMW needed (PM enabled)
1388 	 */
1389 	sd_mapblockaddr_iostart,	/* Index: 5 */
1390 	sd_mapblocksize_iostart,	/* Index: 6 */
1391 	sd_pm_iostart,			/* Index: 7 */
1392 	sd_core_iostart,		/* Index: 8 */
1393 
1394 	/*
1395 	 * Chain for buf IO for removable-media or large sector size
1396 	 * disk drive targets with RMW needed (PM disabled)
1397 	 */
1398 	sd_mapblockaddr_iostart,	/* Index: 9 */
1399 	sd_mapblocksize_iostart,	/* Index: 10 */
1400 	sd_core_iostart,		/* Index: 11 */
1401 
1402 	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
1403 	sd_mapblockaddr_iostart,	/* Index: 12 */
1404 	sd_checksum_iostart,		/* Index: 13 */
1405 	sd_pm_iostart,			/* Index: 14 */
1406 	sd_core_iostart,		/* Index: 15 */
1407 
1408 	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
1409 	sd_mapblockaddr_iostart,	/* Index: 16 */
1410 	sd_checksum_iostart,		/* Index: 17 */
1411 	sd_core_iostart,		/* Index: 18 */
1412 
1413 	/* Chain for USCSI commands (all targets) */
1414 	sd_pm_iostart,			/* Index: 19 */
1415 	sd_core_iostart,		/* Index: 20 */
1416 
1417 	/* Chain for checksumming USCSI commands (all targets) */
1418 	sd_checksum_uscsi_iostart,	/* Index: 21 */
1419 	sd_pm_iostart,			/* Index: 22 */
1420 	sd_core_iostart,		/* Index: 23 */
1421 
1422 	/* Chain for "direct" USCSI commands (all targets) */
1423 	sd_core_iostart,		/* Index: 24 */
1424 
1425 	/* Chain for "direct priority" USCSI commands (all targets) */
1426 	sd_core_iostart,		/* Index: 25 */
1427 
1428 	/*
1429 	 * Chain for buf IO for large sector size disk drive targets
1430 	 * with RMW needed with checksumming (PM enabled)
1431 	 */
1432 	sd_mapblockaddr_iostart,	/* Index: 26 */
1433 	sd_mapblocksize_iostart,	/* Index: 27 */
1434 	sd_checksum_iostart,		/* Index: 28 */
1435 	sd_pm_iostart,			/* Index: 29 */
1436 	sd_core_iostart,		/* Index: 30 */
1437 
1438 	/*
1439 	 * Chain for buf IO for large sector size disk drive targets
1440 	 * with RMW needed with checksumming (PM disabled)
1441 	 */
1442 	sd_mapblockaddr_iostart,	/* Index: 31 */
1443 	sd_mapblocksize_iostart,	/* Index: 32 */
1444 	sd_checksum_iostart,		/* Index: 33 */
1445 	sd_core_iostart,		/* Index: 34 */
1446 
1447 };
1448 
1449 /*
1450  * Macros to locate the first function of each iostart chain in the
1451  * sd_iostart_chain[] array. These are located by the index in the array.
1452  */
1453 #define	SD_CHAIN_DISK_IOSTART			0
1454 #define	SD_CHAIN_DISK_IOSTART_NO_PM		3
1455 #define	SD_CHAIN_MSS_DISK_IOSTART		5
1456 #define	SD_CHAIN_RMMEDIA_IOSTART		5
1457 #define	SD_CHAIN_MSS_DISK_IOSTART_NO_PM		9
1458 #define	SD_CHAIN_RMMEDIA_IOSTART_NO_PM		9
1459 #define	SD_CHAIN_CHKSUM_IOSTART			12
1460 #define	SD_CHAIN_CHKSUM_IOSTART_NO_PM		16
1461 #define	SD_CHAIN_USCSI_CMD_IOSTART		19
1462 #define	SD_CHAIN_USCSI_CHKSUM_IOSTART		21
1463 #define	SD_CHAIN_DIRECT_CMD_IOSTART		24
1464 #define	SD_CHAIN_PRIORITY_CMD_IOSTART		25
1465 #define	SD_CHAIN_MSS_CHKSUM_IOSTART		26
1466 #define	SD_CHAIN_MSS_CHKSUM_IOSTART_NO_PM	31
1467 
1468 
1469 /*
1470  * Table of function pointers for the iodone-side routines for the driver-
1471  * internal layering mechanism.  The calling sequence for iodone routines
1472  * uses a decrementing table index, so the last routine called in a chain
1473  * must be at the lowest array index location for that chain.  The last
1474  * routine for each chain must be either sd_buf_iodone() (for buf(9S) IOs)
1475  * or sd_uscsi_iodone() (for uscsi IOs).  Other than this, the ordering
1476  * of the functions in an iodone side chain must correspond to the ordering
1477  * of the iostart routines for that chain.  Note that there is no iodone
1478  * side routine that corresponds to sd_core_iostart(), so there is no
1479  * entry in the table for this.
1480  */
1481 
1482 static sd_chain_t sd_iodone_chain[] = {
1483 
1484 	/* Chain for buf IO for disk drive targets (PM enabled) */
1485 	sd_buf_iodone,			/* Index: 0 */
1486 	sd_mapblockaddr_iodone,		/* Index: 1 */
1487 	sd_pm_iodone,			/* Index: 2 */
1488 
1489 	/* Chain for buf IO for disk drive targets (PM disabled) */
1490 	sd_buf_iodone,			/* Index: 3 */
1491 	sd_mapblockaddr_iodone,		/* Index: 4 */
1492 
1493 	/*
1494 	 * Chain for buf IO for removable-media or large sector size
1495 	 * disk drive targets with RMW needed (PM enabled)
1496 	 */
1497 	sd_buf_iodone,			/* Index: 5 */
1498 	sd_mapblockaddr_iodone,		/* Index: 6 */
1499 	sd_mapblocksize_iodone,		/* Index: 7 */
1500 	sd_pm_iodone,			/* Index: 8 */
1501 
1502 	/*
1503 	 * Chain for buf IO for removable-media or large sector size
1504 	 * disk drive targets with RMW needed (PM disabled)
1505 	 */
1506 	sd_buf_iodone,			/* Index: 9 */
1507 	sd_mapblockaddr_iodone,		/* Index: 10 */
1508 	sd_mapblocksize_iodone,		/* Index: 11 */
1509 
1510 	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
1511 	sd_buf_iodone,			/* Index: 12 */
1512 	sd_mapblockaddr_iodone,		/* Index: 13 */
1513 	sd_checksum_iodone,		/* Index: 14 */
1514 	sd_pm_iodone,			/* Index: 15 */
1515 
1516 	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
1517 	sd_buf_iodone,			/* Index: 16 */
1518 	sd_mapblockaddr_iodone,		/* Index: 17 */
1519 	sd_checksum_iodone,		/* Index: 18 */
1520 
1521 	/* Chain for USCSI commands (non-checksum targets) */
1522 	sd_uscsi_iodone,		/* Index: 19 */
1523 	sd_pm_iodone,			/* Index: 20 */
1524 
1525 	/* Chain for USCSI commands (checksum targets) */
1526 	sd_uscsi_iodone,		/* Index: 21 */
1527 	sd_checksum_uscsi_iodone,	/* Index: 22 */
1528 	sd_pm_iodone,			/* Index: 22 */
1529 
1530 	/* Chain for "direct" USCSI commands (all targets) */
1531 	sd_uscsi_iodone,		/* Index: 24 */
1532 
1533 	/* Chain for "direct priority" USCSI commands (all targets) */
1534 	sd_uscsi_iodone,		/* Index: 25 */
1535 
1536 	/*
1537 	 * Chain for buf IO for large sector size disk drive targets
1538 	 * with checksumming (PM enabled)
1539 	 */
1540 	sd_buf_iodone,			/* Index: 26 */
1541 	sd_mapblockaddr_iodone,		/* Index: 27 */
1542 	sd_mapblocksize_iodone,		/* Index: 28 */
1543 	sd_checksum_iodone,		/* Index: 29 */
1544 	sd_pm_iodone,			/* Index: 30 */
1545 
1546 	/*
1547 	 * Chain for buf IO for large sector size disk drive targets
1548 	 * with checksumming (PM disabled)
1549 	 */
1550 	sd_buf_iodone,			/* Index: 31 */
1551 	sd_mapblockaddr_iodone,		/* Index: 32 */
1552 	sd_mapblocksize_iodone,		/* Index: 33 */
1553 	sd_checksum_iodone,		/* Index: 34 */
1554 };
1555 
1556 
1557 /*
1558  * Macros to locate the "first" function in the sd_iodone_chain[] array for
1559  * each iodone-side chain. These are located by the array index, but as the
1560  * iodone side functions are called in a decrementing-index order, the
1561  * highest index number in each chain must be specified (as these correspond
1562  * to the first function in the iodone chain that will be called by the core
1563  * at IO completion time).
1564  */
1565 
1566 #define	SD_CHAIN_DISK_IODONE			2
1567 #define	SD_CHAIN_DISK_IODONE_NO_PM		4
1568 #define	SD_CHAIN_RMMEDIA_IODONE			8
1569 #define	SD_CHAIN_MSS_DISK_IODONE		8
1570 #define	SD_CHAIN_RMMEDIA_IODONE_NO_PM		11
1571 #define	SD_CHAIN_MSS_DISK_IODONE_NO_PM		11
1572 #define	SD_CHAIN_CHKSUM_IODONE			15
1573 #define	SD_CHAIN_CHKSUM_IODONE_NO_PM		18
1574 #define	SD_CHAIN_USCSI_CMD_IODONE		20
1575 #define	SD_CHAIN_USCSI_CHKSUM_IODONE		22
1576 #define	SD_CHAIN_DIRECT_CMD_IODONE		24
1577 #define	SD_CHAIN_PRIORITY_CMD_IODONE		25
1578 #define	SD_CHAIN_MSS_CHKSUM_IODONE		30
1579 #define	SD_CHAIN_MSS_CHKSUM_IODONE_NO_PM	34
1580 
1581 
1582 
1583 /*
1584  * Array to map a layering chain index to the appropriate initpkt routine.
1585  * The redundant entries are present so that the index used for accessing
1586  * the above sd_iostart_chain and sd_iodone_chain tables can be used directly
1587  * with this table as well.
1588  */
1589 typedef int (*sd_initpkt_t)(struct buf *, struct scsi_pkt **);
1590 
1591 static sd_initpkt_t	sd_initpkt_map[] = {
1592 
1593 	/* Chain for buf IO for disk drive targets (PM enabled) */
1594 	sd_initpkt_for_buf,		/* Index: 0 */
1595 	sd_initpkt_for_buf,		/* Index: 1 */
1596 	sd_initpkt_for_buf,		/* Index: 2 */
1597 
1598 	/* Chain for buf IO for disk drive targets (PM disabled) */
1599 	sd_initpkt_for_buf,		/* Index: 3 */
1600 	sd_initpkt_for_buf,		/* Index: 4 */
1601 
1602 	/*
1603 	 * Chain for buf IO for removable-media or large sector size
1604 	 * disk drive targets (PM enabled)
1605 	 */
1606 	sd_initpkt_for_buf,		/* Index: 5 */
1607 	sd_initpkt_for_buf,		/* Index: 6 */
1608 	sd_initpkt_for_buf,		/* Index: 7 */
1609 	sd_initpkt_for_buf,		/* Index: 8 */
1610 
1611 	/*
1612 	 * Chain for buf IO for removable-media or large sector size
1613 	 * disk drive targets (PM disabled)
1614 	 */
1615 	sd_initpkt_for_buf,		/* Index: 9 */
1616 	sd_initpkt_for_buf,		/* Index: 10 */
1617 	sd_initpkt_for_buf,		/* Index: 11 */
1618 
1619 	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
1620 	sd_initpkt_for_buf,		/* Index: 12 */
1621 	sd_initpkt_for_buf,		/* Index: 13 */
1622 	sd_initpkt_for_buf,		/* Index: 14 */
1623 	sd_initpkt_for_buf,		/* Index: 15 */
1624 
1625 	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
1626 	sd_initpkt_for_buf,		/* Index: 16 */
1627 	sd_initpkt_for_buf,		/* Index: 17 */
1628 	sd_initpkt_for_buf,		/* Index: 18 */
1629 
1630 	/* Chain for USCSI commands (non-checksum targets) */
1631 	sd_initpkt_for_uscsi,		/* Index: 19 */
1632 	sd_initpkt_for_uscsi,		/* Index: 20 */
1633 
1634 	/* Chain for USCSI commands (checksum targets) */
1635 	sd_initpkt_for_uscsi,		/* Index: 21 */
1636 	sd_initpkt_for_uscsi,		/* Index: 22 */
1637 	sd_initpkt_for_uscsi,		/* Index: 22 */
1638 
1639 	/* Chain for "direct" USCSI commands (all targets) */
1640 	sd_initpkt_for_uscsi,		/* Index: 24 */
1641 
1642 	/* Chain for "direct priority" USCSI commands (all targets) */
1643 	sd_initpkt_for_uscsi,		/* Index: 25 */
1644 
1645 	/*
1646 	 * Chain for buf IO for large sector size disk drive targets
1647 	 * with checksumming (PM enabled)
1648 	 */
1649 	sd_initpkt_for_buf,		/* Index: 26 */
1650 	sd_initpkt_for_buf,		/* Index: 27 */
1651 	sd_initpkt_for_buf,		/* Index: 28 */
1652 	sd_initpkt_for_buf,		/* Index: 29 */
1653 	sd_initpkt_for_buf,		/* Index: 30 */
1654 
1655 	/*
1656 	 * Chain for buf IO for large sector size disk drive targets
1657 	 * with checksumming (PM disabled)
1658 	 */
1659 	sd_initpkt_for_buf,		/* Index: 31 */
1660 	sd_initpkt_for_buf,		/* Index: 32 */
1661 	sd_initpkt_for_buf,		/* Index: 33 */
1662 	sd_initpkt_for_buf,		/* Index: 34 */
1663 };
1664 
1665 
1666 /*
1667  * Array to map a layering chain index to the appropriate destroypktpkt routine.
1668  * The redundant entries are present so that the index used for accessing
1669  * the above sd_iostart_chain and sd_iodone_chain tables can be used directly
1670  * with this table as well.
1671  */
1672 typedef void (*sd_destroypkt_t)(struct buf *);
1673 
1674 static sd_destroypkt_t	sd_destroypkt_map[] = {
1675 
1676 	/* Chain for buf IO for disk drive targets (PM enabled) */
1677 	sd_destroypkt_for_buf,		/* Index: 0 */
1678 	sd_destroypkt_for_buf,		/* Index: 1 */
1679 	sd_destroypkt_for_buf,		/* Index: 2 */
1680 
1681 	/* Chain for buf IO for disk drive targets (PM disabled) */
1682 	sd_destroypkt_for_buf,		/* Index: 3 */
1683 	sd_destroypkt_for_buf,		/* Index: 4 */
1684 
1685 	/*
1686 	 * Chain for buf IO for removable-media or large sector size
1687 	 * disk drive targets (PM enabled)
1688 	 */
1689 	sd_destroypkt_for_buf,		/* Index: 5 */
1690 	sd_destroypkt_for_buf,		/* Index: 6 */
1691 	sd_destroypkt_for_buf,		/* Index: 7 */
1692 	sd_destroypkt_for_buf,		/* Index: 8 */
1693 
1694 	/*
1695 	 * Chain for buf IO for removable-media or large sector size
1696 	 * disk drive targets (PM disabled)
1697 	 */
1698 	sd_destroypkt_for_buf,		/* Index: 9 */
1699 	sd_destroypkt_for_buf,		/* Index: 10 */
1700 	sd_destroypkt_for_buf,		/* Index: 11 */
1701 
1702 	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
1703 	sd_destroypkt_for_buf,		/* Index: 12 */
1704 	sd_destroypkt_for_buf,		/* Index: 13 */
1705 	sd_destroypkt_for_buf,		/* Index: 14 */
1706 	sd_destroypkt_for_buf,		/* Index: 15 */
1707 
1708 	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
1709 	sd_destroypkt_for_buf,		/* Index: 16 */
1710 	sd_destroypkt_for_buf,		/* Index: 17 */
1711 	sd_destroypkt_for_buf,		/* Index: 18 */
1712 
1713 	/* Chain for USCSI commands (non-checksum targets) */
1714 	sd_destroypkt_for_uscsi,	/* Index: 19 */
1715 	sd_destroypkt_for_uscsi,	/* Index: 20 */
1716 
1717 	/* Chain for USCSI commands (checksum targets) */
1718 	sd_destroypkt_for_uscsi,	/* Index: 21 */
1719 	sd_destroypkt_for_uscsi,	/* Index: 22 */
1720 	sd_destroypkt_for_uscsi,	/* Index: 22 */
1721 
1722 	/* Chain for "direct" USCSI commands (all targets) */
1723 	sd_destroypkt_for_uscsi,	/* Index: 24 */
1724 
1725 	/* Chain for "direct priority" USCSI commands (all targets) */
1726 	sd_destroypkt_for_uscsi,	/* Index: 25 */
1727 
1728 	/*
1729 	 * Chain for buf IO for large sector size disk drive targets
1730 	 * with checksumming (PM disabled)
1731 	 */
1732 	sd_destroypkt_for_buf,		/* Index: 26 */
1733 	sd_destroypkt_for_buf,		/* Index: 27 */
1734 	sd_destroypkt_for_buf,		/* Index: 28 */
1735 	sd_destroypkt_for_buf,		/* Index: 29 */
1736 	sd_destroypkt_for_buf,		/* Index: 30 */
1737 
1738 	/*
1739 	 * Chain for buf IO for large sector size disk drive targets
1740 	 * with checksumming (PM enabled)
1741 	 */
1742 	sd_destroypkt_for_buf,		/* Index: 31 */
1743 	sd_destroypkt_for_buf,		/* Index: 32 */
1744 	sd_destroypkt_for_buf,		/* Index: 33 */
1745 	sd_destroypkt_for_buf,		/* Index: 34 */
1746 };
1747 
1748 
1749 
1750 /*
1751  * Array to map a layering chain index to the appropriate chain "type".
1752  * The chain type indicates a specific property/usage of the chain.
1753  * The redundant entries are present so that the index used for accessing
1754  * the above sd_iostart_chain and sd_iodone_chain tables can be used directly
1755  * with this table as well.
1756  */
1757 
1758 #define	SD_CHAIN_NULL			0	/* for the special RQS cmd */
1759 #define	SD_CHAIN_BUFIO			1	/* regular buf IO */
1760 #define	SD_CHAIN_USCSI			2	/* regular USCSI commands */
1761 #define	SD_CHAIN_DIRECT			3	/* uscsi, w/ bypass power mgt */
1762 #define	SD_CHAIN_DIRECT_PRIORITY	4	/* uscsi, w/ bypass power mgt */
1763 						/* (for error recovery) */
1764 
1765 static int sd_chain_type_map[] = {
1766 
1767 	/* Chain for buf IO for disk drive targets (PM enabled) */
1768 	SD_CHAIN_BUFIO,			/* Index: 0 */
1769 	SD_CHAIN_BUFIO,			/* Index: 1 */
1770 	SD_CHAIN_BUFIO,			/* Index: 2 */
1771 
1772 	/* Chain for buf IO for disk drive targets (PM disabled) */
1773 	SD_CHAIN_BUFIO,			/* Index: 3 */
1774 	SD_CHAIN_BUFIO,			/* Index: 4 */
1775 
1776 	/*
1777 	 * Chain for buf IO for removable-media or large sector size
1778 	 * disk drive targets (PM enabled)
1779 	 */
1780 	SD_CHAIN_BUFIO,			/* Index: 5 */
1781 	SD_CHAIN_BUFIO,			/* Index: 6 */
1782 	SD_CHAIN_BUFIO,			/* Index: 7 */
1783 	SD_CHAIN_BUFIO,			/* Index: 8 */
1784 
1785 	/*
1786 	 * Chain for buf IO for removable-media or large sector size
1787 	 * disk drive targets (PM disabled)
1788 	 */
1789 	SD_CHAIN_BUFIO,			/* Index: 9 */
1790 	SD_CHAIN_BUFIO,			/* Index: 10 */
1791 	SD_CHAIN_BUFIO,			/* Index: 11 */
1792 
1793 	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
1794 	SD_CHAIN_BUFIO,			/* Index: 12 */
1795 	SD_CHAIN_BUFIO,			/* Index: 13 */
1796 	SD_CHAIN_BUFIO,			/* Index: 14 */
1797 	SD_CHAIN_BUFIO,			/* Index: 15 */
1798 
1799 	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
1800 	SD_CHAIN_BUFIO,			/* Index: 16 */
1801 	SD_CHAIN_BUFIO,			/* Index: 17 */
1802 	SD_CHAIN_BUFIO,			/* Index: 18 */
1803 
1804 	/* Chain for USCSI commands (non-checksum targets) */
1805 	SD_CHAIN_USCSI,			/* Index: 19 */
1806 	SD_CHAIN_USCSI,			/* Index: 20 */
1807 
1808 	/* Chain for USCSI commands (checksum targets) */
1809 	SD_CHAIN_USCSI,			/* Index: 21 */
1810 	SD_CHAIN_USCSI,			/* Index: 22 */
1811 	SD_CHAIN_USCSI,			/* Index: 23 */
1812 
1813 	/* Chain for "direct" USCSI commands (all targets) */
1814 	SD_CHAIN_DIRECT,		/* Index: 24 */
1815 
1816 	/* Chain for "direct priority" USCSI commands (all targets) */
1817 	SD_CHAIN_DIRECT_PRIORITY,	/* Index: 25 */
1818 
1819 	/*
1820 	 * Chain for buf IO for large sector size disk drive targets
1821 	 * with checksumming (PM enabled)
1822 	 */
1823 	SD_CHAIN_BUFIO,			/* Index: 26 */
1824 	SD_CHAIN_BUFIO,			/* Index: 27 */
1825 	SD_CHAIN_BUFIO,			/* Index: 28 */
1826 	SD_CHAIN_BUFIO,			/* Index: 29 */
1827 	SD_CHAIN_BUFIO,			/* Index: 30 */
1828 
1829 	/*
1830 	 * Chain for buf IO for large sector size disk drive targets
1831 	 * with checksumming (PM disabled)
1832 	 */
1833 	SD_CHAIN_BUFIO,			/* Index: 31 */
1834 	SD_CHAIN_BUFIO,			/* Index: 32 */
1835 	SD_CHAIN_BUFIO,			/* Index: 33 */
1836 	SD_CHAIN_BUFIO,			/* Index: 34 */
1837 };
1838 
1839 
1840 /* Macro to return TRUE if the IO has come from the sd_buf_iostart() chain. */
1841 #define	SD_IS_BUFIO(xp)			\
1842 	(sd_chain_type_map[(xp)->xb_chain_iostart] == SD_CHAIN_BUFIO)
1843 
1844 /* Macro to return TRUE if the IO has come from the "direct priority" chain. */
1845 #define	SD_IS_DIRECT_PRIORITY(xp)	\
1846 	(sd_chain_type_map[(xp)->xb_chain_iostart] == SD_CHAIN_DIRECT_PRIORITY)
1847 
1848 
1849 
1850 /*
1851  * Struct, array, and macros to map a specific chain to the appropriate
1852  * layering indexes in the sd_iostart_chain[] and sd_iodone_chain[] arrays.
1853  *
1854  * The sd_chain_index_map[] array is used at attach time to set the various
1855  * un_xxx_chain type members of the sd_lun softstate to the specific layering
1856  * chain to be used with the instance. This allows different instances to use
1857  * different chain for buf IO, uscsi IO, etc.. Also, since the xb_chain_iostart
1858  * and xb_chain_iodone index values in the sd_xbuf are initialized to these
1859  * values at sd_xbuf init time, this allows (1) layering chains may be changed
1860  * dynamically & without the use of locking; and (2) a layer may update the
1861  * xb_chain_io[start|done] member in a given xbuf with its current index value,
1862  * to allow for deferred processing of an IO within the same chain from a
1863  * different execution context.
1864  */
1865 
1866 struct sd_chain_index {
1867 	int	sci_iostart_index;
1868 	int	sci_iodone_index;
1869 };
1870 
1871 static struct sd_chain_index	sd_chain_index_map[] = {
1872 	{ SD_CHAIN_DISK_IOSTART,		SD_CHAIN_DISK_IODONE },
1873 	{ SD_CHAIN_DISK_IOSTART_NO_PM,		SD_CHAIN_DISK_IODONE_NO_PM },
1874 	{ SD_CHAIN_RMMEDIA_IOSTART,		SD_CHAIN_RMMEDIA_IODONE },
1875 	{ SD_CHAIN_RMMEDIA_IOSTART_NO_PM,	SD_CHAIN_RMMEDIA_IODONE_NO_PM },
1876 	{ SD_CHAIN_CHKSUM_IOSTART,		SD_CHAIN_CHKSUM_IODONE },
1877 	{ SD_CHAIN_CHKSUM_IOSTART_NO_PM,	SD_CHAIN_CHKSUM_IODONE_NO_PM },
1878 	{ SD_CHAIN_USCSI_CMD_IOSTART,		SD_CHAIN_USCSI_CMD_IODONE },
1879 	{ SD_CHAIN_USCSI_CHKSUM_IOSTART,	SD_CHAIN_USCSI_CHKSUM_IODONE },
1880 	{ SD_CHAIN_DIRECT_CMD_IOSTART,		SD_CHAIN_DIRECT_CMD_IODONE },
1881 	{ SD_CHAIN_PRIORITY_CMD_IOSTART,	SD_CHAIN_PRIORITY_CMD_IODONE },
1882 	{ SD_CHAIN_MSS_CHKSUM_IOSTART,		SD_CHAIN_MSS_CHKSUM_IODONE },
1883 	{ SD_CHAIN_MSS_CHKSUM_IOSTART_NO_PM, SD_CHAIN_MSS_CHKSUM_IODONE_NO_PM },
1884 
1885 };
1886 
1887 
1888 /*
1889  * The following are indexes into the sd_chain_index_map[] array.
1890  */
1891 
1892 /* un->un_buf_chain_type must be set to one of these */
1893 #define	SD_CHAIN_INFO_DISK		0
1894 #define	SD_CHAIN_INFO_DISK_NO_PM	1
1895 #define	SD_CHAIN_INFO_RMMEDIA		2
1896 #define	SD_CHAIN_INFO_MSS_DISK		2
1897 #define	SD_CHAIN_INFO_RMMEDIA_NO_PM	3
1898 #define	SD_CHAIN_INFO_MSS_DSK_NO_PM	3
1899 #define	SD_CHAIN_INFO_CHKSUM		4
1900 #define	SD_CHAIN_INFO_CHKSUM_NO_PM	5
1901 #define	SD_CHAIN_INFO_MSS_DISK_CHKSUM	10
1902 #define	SD_CHAIN_INFO_MSS_DISK_CHKSUM_NO_PM	11
1903 
1904 /* un->un_uscsi_chain_type must be set to one of these */
1905 #define	SD_CHAIN_INFO_USCSI_CMD		6
1906 /* USCSI with PM disabled is the same as DIRECT */
1907 #define	SD_CHAIN_INFO_USCSI_CMD_NO_PM	8
1908 #define	SD_CHAIN_INFO_USCSI_CHKSUM	7
1909 
1910 /* un->un_direct_chain_type must be set to one of these */
1911 #define	SD_CHAIN_INFO_DIRECT_CMD	8
1912 
1913 /* un->un_priority_chain_type must be set to one of these */
1914 #define	SD_CHAIN_INFO_PRIORITY_CMD	9
1915 
1916 /* size for devid inquiries */
1917 #define	MAX_INQUIRY_SIZE		0xF0
1918 
1919 /*
1920  * Macros used by functions to pass a given buf(9S) struct along to the
1921  * next function in the layering chain for further processing.
1922  *
1923  * In the following macros, passing more than three arguments to the called
1924  * routines causes the optimizer for the SPARC compiler to stop doing tail
1925  * call elimination which results in significant performance degradation.
1926  */
1927 #define	SD_BEGIN_IOSTART(index, un, bp)	\
1928 	((*(sd_iostart_chain[index]))(index, un, bp))
1929 
1930 #define	SD_BEGIN_IODONE(index, un, bp)	\
1931 	((*(sd_iodone_chain[index]))(index, un, bp))
1932 
1933 #define	SD_NEXT_IOSTART(index, un, bp)				\
1934 	((*(sd_iostart_chain[(index) + 1]))((index) + 1, un, bp))
1935 
1936 #define	SD_NEXT_IODONE(index, un, bp)				\
1937 	((*(sd_iodone_chain[(index) - 1]))((index) - 1, un, bp))
1938 
1939 /*
1940  *    Function: _init
1941  *
1942  * Description: This is the driver _init(9E) entry point.
1943  *
1944  * Return Code: Returns the value from mod_install(9F) or
1945  *		ddi_soft_state_init(9F) as appropriate.
1946  *
1947  *     Context: Called when driver module loaded.
1948  */
1949 
1950 int
_init(void)1951 _init(void)
1952 {
1953 	int	err;
1954 
1955 	/* establish driver name from module name */
1956 	sd_label = (char *)mod_modname(&modlinkage);
1957 
1958 	err = ddi_soft_state_init(&sd_state, sizeof (struct sd_lun),
1959 	    SD_MAXUNIT);
1960 	if (err != 0) {
1961 		return (err);
1962 	}
1963 
1964 	mutex_init(&sd_log_mutex,    NULL, MUTEX_DRIVER, NULL);
1965 
1966 	mutex_init(&sd_tr.srq_resv_reclaim_mutex, NULL, MUTEX_DRIVER, NULL);
1967 	cv_init(&sd_tr.srq_resv_reclaim_cv, NULL, CV_DRIVER, NULL);
1968 	cv_init(&sd_tr.srq_inprocess_cv, NULL, CV_DRIVER, NULL);
1969 
1970 	/*
1971 	 * it's ok to init here even for fibre device
1972 	 */
1973 	sd_scsi_probe_cache_init();
1974 
1975 	sd_scsi_target_lun_init();
1976 
1977 	/*
1978 	 * Creating taskq before mod_install ensures that all callers (threads)
1979 	 * that enter the module after a successful mod_install encounter
1980 	 * a valid taskq.
1981 	 */
1982 	sd_taskq_create();
1983 
1984 	err = mod_install(&modlinkage);
1985 	if (err != 0) {
1986 		/* delete taskq if install fails */
1987 		sd_taskq_delete();
1988 
1989 		mutex_destroy(&sd_log_mutex);
1990 
1991 		mutex_destroy(&sd_tr.srq_resv_reclaim_mutex);
1992 		cv_destroy(&sd_tr.srq_resv_reclaim_cv);
1993 		cv_destroy(&sd_tr.srq_inprocess_cv);
1994 
1995 		sd_scsi_probe_cache_fini();
1996 
1997 		sd_scsi_target_lun_fini();
1998 
1999 		ddi_soft_state_fini(&sd_state);
2000 
2001 		return (err);
2002 	}
2003 
2004 	return (err);
2005 }
2006 
2007 
2008 /*
2009  *    Function: _fini
2010  *
2011  * Description: This is the driver _fini(9E) entry point.
2012  *
2013  * Return Code: Returns the value from mod_remove(9F)
2014  *
2015  *     Context: Called when driver module is unloaded.
2016  */
2017 
2018 int
_fini(void)2019 _fini(void)
2020 {
2021 	int err;
2022 
2023 	if ((err = mod_remove(&modlinkage)) != 0) {
2024 		return (err);
2025 	}
2026 
2027 	sd_taskq_delete();
2028 
2029 	mutex_destroy(&sd_log_mutex);
2030 	mutex_destroy(&sd_tr.srq_resv_reclaim_mutex);
2031 
2032 	sd_scsi_probe_cache_fini();
2033 
2034 	sd_scsi_target_lun_fini();
2035 
2036 	cv_destroy(&sd_tr.srq_resv_reclaim_cv);
2037 	cv_destroy(&sd_tr.srq_inprocess_cv);
2038 
2039 	ddi_soft_state_fini(&sd_state);
2040 
2041 	return (err);
2042 }
2043 
2044 
2045 /*
2046  *    Function: _info
2047  *
2048  * Description: This is the driver _info(9E) entry point.
2049  *
2050  *   Arguments: modinfop - pointer to the driver modinfo structure
2051  *
2052  * Return Code: Returns the value from mod_info(9F).
2053  *
2054  *     Context: Kernel thread context
2055  */
2056 
2057 int
_info(struct modinfo * modinfop)2058 _info(struct modinfo *modinfop)
2059 {
2060 	return (mod_info(&modlinkage, modinfop));
2061 }
2062 
2063 
2064 /*
2065  * The following routines implement the driver message logging facility.
2066  * They provide component- and level- based debug output filtering.
2067  * Output may also be restricted to messages for a single instance by
2068  * specifying a soft state pointer in sd_debug_un. If sd_debug_un is set
2069  * to NULL, then messages for all instances are printed.
2070  *
2071  * These routines have been cloned from each other due to the language
2072  * constraints of macros and variable argument list processing.
2073  */
2074 
2075 
2076 /*
2077  *    Function: sd_log_err
2078  *
2079  * Description: This routine is called by the SD_ERROR macro for debug
2080  *		logging of error conditions.
2081  *
2082  *   Arguments: comp - driver component being logged
2083  *		dev  - pointer to driver info structure
2084  *		fmt  - error string and format to be logged
2085  */
2086 
2087 static void
sd_log_err(uint_t comp,struct sd_lun * un,const char * fmt,...)2088 sd_log_err(uint_t comp, struct sd_lun *un, const char *fmt, ...)
2089 {
2090 	va_list		ap;
2091 	dev_info_t	*dev;
2092 
2093 	ASSERT(un != NULL);
2094 	dev = SD_DEVINFO(un);
2095 	ASSERT(dev != NULL);
2096 
2097 	/*
2098 	 * Filter messages based on the global component and level masks.
2099 	 * Also print if un matches the value of sd_debug_un, or if
2100 	 * sd_debug_un is set to NULL.
2101 	 */
2102 	if ((sd_component_mask & comp) && (sd_level_mask & SD_LOGMASK_ERROR) &&
2103 	    ((sd_debug_un == NULL) || (sd_debug_un == un))) {
2104 		mutex_enter(&sd_log_mutex);
2105 		va_start(ap, fmt);
2106 		(void) vsprintf(sd_log_buf, fmt, ap);
2107 		va_end(ap);
2108 		scsi_log(dev, sd_label, CE_CONT, "%s", sd_log_buf);
2109 		mutex_exit(&sd_log_mutex);
2110 	}
2111 #ifdef SD_FAULT_INJECTION
2112 	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::sd_injection_mask));
2113 	if (un->sd_injection_mask & comp) {
2114 		mutex_enter(&sd_log_mutex);
2115 		va_start(ap, fmt);
2116 		(void) vsprintf(sd_log_buf, fmt, ap);
2117 		va_end(ap);
2118 		sd_injection_log(sd_log_buf, un);
2119 		mutex_exit(&sd_log_mutex);
2120 	}
2121 #endif
2122 }
2123 
2124 
2125 /*
2126  *    Function: sd_log_info
2127  *
2128  * Description: This routine is called by the SD_INFO macro for debug
2129  *		logging of general purpose informational conditions.
2130  *
2131  *   Arguments: comp - driver component being logged
2132  *		dev  - pointer to driver info structure
2133  *		fmt  - info string and format to be logged
2134  */
2135 
2136 static void
sd_log_info(uint_t component,struct sd_lun * un,const char * fmt,...)2137 sd_log_info(uint_t component, struct sd_lun *un, const char *fmt, ...)
2138 {
2139 	va_list		ap;
2140 	dev_info_t	*dev;
2141 
2142 	ASSERT(un != NULL);
2143 	dev = SD_DEVINFO(un);
2144 	ASSERT(dev != NULL);
2145 
2146 	/*
2147 	 * Filter messages based on the global component and level masks.
2148 	 * Also print if un matches the value of sd_debug_un, or if
2149 	 * sd_debug_un is set to NULL.
2150 	 */
2151 	if ((sd_component_mask & component) &&
2152 	    (sd_level_mask & SD_LOGMASK_INFO) &&
2153 	    ((sd_debug_un == NULL) || (sd_debug_un == un))) {
2154 		mutex_enter(&sd_log_mutex);
2155 		va_start(ap, fmt);
2156 		(void) vsprintf(sd_log_buf, fmt, ap);
2157 		va_end(ap);
2158 		scsi_log(dev, sd_label, CE_CONT, "%s", sd_log_buf);
2159 		mutex_exit(&sd_log_mutex);
2160 	}
2161 #ifdef SD_FAULT_INJECTION
2162 	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::sd_injection_mask));
2163 	if (un->sd_injection_mask & component) {
2164 		mutex_enter(&sd_log_mutex);
2165 		va_start(ap, fmt);
2166 		(void) vsprintf(sd_log_buf, fmt, ap);
2167 		va_end(ap);
2168 		sd_injection_log(sd_log_buf, un);
2169 		mutex_exit(&sd_log_mutex);
2170 	}
2171 #endif
2172 }
2173 
2174 
2175 /*
2176  *    Function: sd_log_trace
2177  *
2178  * Description: This routine is called by the SD_TRACE macro for debug
2179  *		logging of trace conditions (i.e. function entry/exit).
2180  *
2181  *   Arguments: comp - driver component being logged
2182  *		dev  - pointer to driver info structure
2183  *		fmt  - trace string and format to be logged
2184  */
2185 
2186 static void
sd_log_trace(uint_t component,struct sd_lun * un,const char * fmt,...)2187 sd_log_trace(uint_t component, struct sd_lun *un, const char *fmt, ...)
2188 {
2189 	va_list		ap;
2190 	dev_info_t	*dev;
2191 
2192 	ASSERT(un != NULL);
2193 	dev = SD_DEVINFO(un);
2194 	ASSERT(dev != NULL);
2195 
2196 	/*
2197 	 * Filter messages based on the global component and level masks.
2198 	 * Also print if un matches the value of sd_debug_un, or if
2199 	 * sd_debug_un is set to NULL.
2200 	 */
2201 	if ((sd_component_mask & component) &&
2202 	    (sd_level_mask & SD_LOGMASK_TRACE) &&
2203 	    ((sd_debug_un == NULL) || (sd_debug_un == un))) {
2204 		mutex_enter(&sd_log_mutex);
2205 		va_start(ap, fmt);
2206 		(void) vsprintf(sd_log_buf, fmt, ap);
2207 		va_end(ap);
2208 		scsi_log(dev, sd_label, CE_CONT, "%s", sd_log_buf);
2209 		mutex_exit(&sd_log_mutex);
2210 	}
2211 #ifdef SD_FAULT_INJECTION
2212 	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::sd_injection_mask));
2213 	if (un->sd_injection_mask & component) {
2214 		mutex_enter(&sd_log_mutex);
2215 		va_start(ap, fmt);
2216 		(void) vsprintf(sd_log_buf, fmt, ap);
2217 		va_end(ap);
2218 		sd_injection_log(sd_log_buf, un);
2219 		mutex_exit(&sd_log_mutex);
2220 	}
2221 #endif
2222 }
2223 
2224 
2225 /*
2226  *    Function: sdprobe
2227  *
2228  * Description: This is the driver probe(9e) entry point function.
2229  *
2230  *   Arguments: devi - opaque device info handle
2231  *
2232  * Return Code: DDI_PROBE_SUCCESS: If the probe was successful.
2233  *              DDI_PROBE_FAILURE: If the probe failed.
2234  *              DDI_PROBE_PARTIAL: If the instance is not present now,
2235  *				   but may be present in the future.
2236  */
2237 
2238 static int
sdprobe(dev_info_t * devi)2239 sdprobe(dev_info_t *devi)
2240 {
2241 	struct scsi_device	*devp;
2242 	int			rval;
2243 	int			instance = ddi_get_instance(devi);
2244 
2245 	if (ddi_dev_is_sid(devi) == DDI_SUCCESS) {
2246 		return (DDI_PROBE_DONTCARE);
2247 	}
2248 
2249 	devp = ddi_get_driver_private(devi);
2250 
2251 	if (devp == NULL) {
2252 		/* Ooops... nexus driver is mis-configured... */
2253 		return (DDI_PROBE_FAILURE);
2254 	}
2255 
2256 	if (ddi_get_soft_state(sd_state, instance) != NULL) {
2257 		return (DDI_PROBE_PARTIAL);
2258 	}
2259 
2260 	/*
2261 	 * Call the SCSA utility probe routine to see if we actually
2262 	 * have a target at this SCSI nexus.
2263 	 */
2264 	switch (sd_scsi_probe_with_cache(devp, NULL_FUNC)) {
2265 	case SCSIPROBE_EXISTS:
2266 		switch (devp->sd_inq->inq_dtype) {
2267 		case DTYPE_DIRECT:
2268 			rval = DDI_PROBE_SUCCESS;
2269 			break;
2270 		case DTYPE_RODIRECT:
2271 			/* CDs etc. Can be removable media */
2272 			rval = DDI_PROBE_SUCCESS;
2273 			break;
2274 		case DTYPE_OPTICAL:
2275 			/*
2276 			 * Rewritable optical driver HP115AA
2277 			 * Can also be removable media
2278 			 */
2279 
2280 			/*
2281 			 * Do not attempt to bind to  DTYPE_OPTICAL if
2282 			 * pre solaris 9 sparc sd behavior is required
2283 			 *
2284 			 * If first time through and sd_dtype_optical_bind
2285 			 * has not been set in /etc/system check properties
2286 			 */
2287 
2288 			if (sd_dtype_optical_bind  < 0) {
2289 				sd_dtype_optical_bind = ddi_prop_get_int
2290 				    (DDI_DEV_T_ANY, devi, 0,
2291 				    "optical-device-bind", 1);
2292 			}
2293 
2294 			if (sd_dtype_optical_bind == 0) {
2295 				rval = DDI_PROBE_FAILURE;
2296 			} else {
2297 				rval = DDI_PROBE_SUCCESS;
2298 			}
2299 			break;
2300 
2301 		case DTYPE_NOTPRESENT:
2302 		default:
2303 			rval = DDI_PROBE_FAILURE;
2304 			break;
2305 		}
2306 		break;
2307 	default:
2308 		rval = DDI_PROBE_PARTIAL;
2309 		break;
2310 	}
2311 
2312 	/*
2313 	 * This routine checks for resource allocation prior to freeing,
2314 	 * so it will take care of the "smart probing" case where a
2315 	 * scsi_probe() may or may not have been issued and will *not*
2316 	 * free previously-freed resources.
2317 	 */
2318 	scsi_unprobe(devp);
2319 	return (rval);
2320 }
2321 
2322 
2323 /*
2324  *    Function: sdinfo
2325  *
2326  * Description: This is the driver getinfo(9e) entry point function.
2327  *		Given the device number, return the devinfo pointer from
2328  *		the scsi_device structure or the instance number
2329  *		associated with the dev_t.
2330  *
2331  *   Arguments: dip     - pointer to device info structure
2332  *		infocmd - command argument (DDI_INFO_DEVT2DEVINFO,
2333  *			  DDI_INFO_DEVT2INSTANCE)
2334  *		arg     - driver dev_t
2335  *		resultp - user buffer for request response
2336  *
2337  * Return Code: DDI_SUCCESS
2338  *              DDI_FAILURE
2339  */
2340 /* ARGSUSED */
2341 static int
sdinfo(dev_info_t * dip,ddi_info_cmd_t infocmd,void * arg,void ** result)2342 sdinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
2343 {
2344 	struct sd_lun	*un;
2345 	dev_t		dev;
2346 	int		instance;
2347 	int		error;
2348 
2349 	switch (infocmd) {
2350 	case DDI_INFO_DEVT2DEVINFO:
2351 		dev = (dev_t)arg;
2352 		instance = SDUNIT(dev);
2353 		if ((un = ddi_get_soft_state(sd_state, instance)) == NULL) {
2354 			return (DDI_FAILURE);
2355 		}
2356 		*result = (void *) SD_DEVINFO(un);
2357 		error = DDI_SUCCESS;
2358 		break;
2359 	case DDI_INFO_DEVT2INSTANCE:
2360 		dev = (dev_t)arg;
2361 		instance = SDUNIT(dev);
2362 		*result = (void *)(uintptr_t)instance;
2363 		error = DDI_SUCCESS;
2364 		break;
2365 	default:
2366 		error = DDI_FAILURE;
2367 	}
2368 	return (error);
2369 }
2370 
2371 /*
2372  *    Function: sd_prop_op
2373  *
2374  * Description: This is the driver prop_op(9e) entry point function.
2375  *		Return the number of blocks for the partition in question
2376  *		or forward the request to the property facilities.
2377  *
2378  *   Arguments: dev       - device number
2379  *		dip       - pointer to device info structure
2380  *		prop_op   - property operator
2381  *		mod_flags - DDI_PROP_DONTPASS, don't pass to parent
2382  *		name      - pointer to property name
2383  *		valuep    - pointer or address of the user buffer
2384  *		lengthp   - property length
2385  *
2386  * Return Code: DDI_PROP_SUCCESS
2387  *              DDI_PROP_NOT_FOUND
2388  *              DDI_PROP_UNDEFINED
2389  *              DDI_PROP_NO_MEMORY
2390  *              DDI_PROP_BUF_TOO_SMALL
2391  */
2392 
2393 static int
sd_prop_op(dev_t dev,dev_info_t * dip,ddi_prop_op_t prop_op,int mod_flags,char * name,caddr_t valuep,int * lengthp)2394 sd_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, int mod_flags,
2395     char *name, caddr_t valuep, int *lengthp)
2396 {
2397 	struct sd_lun	*un;
2398 
2399 	if ((un = ddi_get_soft_state(sd_state, ddi_get_instance(dip))) == NULL)
2400 		return (ddi_prop_op(dev, dip, prop_op, mod_flags,
2401 		    name, valuep, lengthp));
2402 
2403 	return (cmlb_prop_op(un->un_cmlbhandle,
2404 	    dev, dip, prop_op, mod_flags, name, valuep, lengthp,
2405 	    SDPART(dev), (void *)SD_PATH_DIRECT));
2406 }
2407 
2408 /*
2409  * The following functions are for smart probing:
2410  * sd_scsi_probe_cache_init()
2411  * sd_scsi_probe_cache_fini()
2412  * sd_scsi_clear_probe_cache()
2413  * sd_scsi_probe_with_cache()
2414  */
2415 
2416 /*
2417  *    Function: sd_scsi_probe_cache_init
2418  *
2419  * Description: Initializes the probe response cache mutex and head pointer.
2420  *
2421  *     Context: Kernel thread context
2422  */
2423 
2424 static void
sd_scsi_probe_cache_init(void)2425 sd_scsi_probe_cache_init(void)
2426 {
2427 	mutex_init(&sd_scsi_probe_cache_mutex, NULL, MUTEX_DRIVER, NULL);
2428 	sd_scsi_probe_cache_head = NULL;
2429 }
2430 
2431 
2432 /*
2433  *    Function: sd_scsi_probe_cache_fini
2434  *
2435  * Description: Frees all resources associated with the probe response cache.
2436  *
2437  *     Context: Kernel thread context
2438  */
2439 
2440 static void
sd_scsi_probe_cache_fini(void)2441 sd_scsi_probe_cache_fini(void)
2442 {
2443 	struct sd_scsi_probe_cache *cp;
2444 	struct sd_scsi_probe_cache *ncp;
2445 
2446 	/* Clean up our smart probing linked list */
2447 	for (cp = sd_scsi_probe_cache_head; cp != NULL; cp = ncp) {
2448 		ncp = cp->next;
2449 		kmem_free(cp, sizeof (struct sd_scsi_probe_cache));
2450 	}
2451 	sd_scsi_probe_cache_head = NULL;
2452 	mutex_destroy(&sd_scsi_probe_cache_mutex);
2453 }
2454 
2455 
2456 /*
2457  *    Function: sd_scsi_clear_probe_cache
2458  *
2459  * Description: This routine clears the probe response cache. This is
2460  *		done when open() returns ENXIO so that when deferred
2461  *		attach is attempted (possibly after a device has been
2462  *		turned on) we will retry the probe. Since we don't know
2463  *		which target we failed to open, we just clear the
2464  *		entire cache.
2465  *
2466  *     Context: Kernel thread context
2467  */
2468 
2469 static void
sd_scsi_clear_probe_cache(void)2470 sd_scsi_clear_probe_cache(void)
2471 {
2472 	struct sd_scsi_probe_cache	*cp;
2473 	int				i;
2474 
2475 	mutex_enter(&sd_scsi_probe_cache_mutex);
2476 	for (cp = sd_scsi_probe_cache_head; cp != NULL; cp = cp->next) {
2477 		/*
2478 		 * Reset all entries to SCSIPROBE_EXISTS.  This will
2479 		 * force probing to be performed the next time
2480 		 * sd_scsi_probe_with_cache is called.
2481 		 */
2482 		for (i = 0; i < NTARGETS_WIDE; i++) {
2483 			cp->cache[i] = SCSIPROBE_EXISTS;
2484 		}
2485 	}
2486 	mutex_exit(&sd_scsi_probe_cache_mutex);
2487 }
2488 
2489 
2490 /*
2491  *    Function: sd_scsi_probe_with_cache
2492  *
2493  * Description: This routine implements support for a scsi device probe
2494  *		with cache. The driver maintains a cache of the target
2495  *		responses to scsi probes. If we get no response from a
2496  *		target during a probe inquiry, we remember that, and we
2497  *		avoid additional calls to scsi_probe on non-zero LUNs
2498  *		on the same target until the cache is cleared. By doing
2499  *		so we avoid the 1/4 sec selection timeout for nonzero
2500  *		LUNs. lun0 of a target is always probed.
2501  *
2502  *   Arguments: devp     - Pointer to a scsi_device(9S) structure
2503  *              waitfunc - indicates what the allocator routines should
2504  *			   do when resources are not available. This value
2505  *			   is passed on to scsi_probe() when that routine
2506  *			   is called.
2507  *
2508  * Return Code: SCSIPROBE_NORESP if a NORESP in probe response cache;
2509  *		otherwise the value returned by scsi_probe(9F).
2510  *
2511  *     Context: Kernel thread context
2512  */
2513 
2514 static int
sd_scsi_probe_with_cache(struct scsi_device * devp,int (* waitfn)())2515 sd_scsi_probe_with_cache(struct scsi_device *devp, int (*waitfn)())
2516 {
2517 	struct sd_scsi_probe_cache	*cp;
2518 	dev_info_t	*pdip = ddi_get_parent(devp->sd_dev);
2519 	int		lun, tgt;
2520 
2521 	lun = ddi_prop_get_int(DDI_DEV_T_ANY, devp->sd_dev, DDI_PROP_DONTPASS,
2522 	    SCSI_ADDR_PROP_LUN, 0);
2523 	tgt = ddi_prop_get_int(DDI_DEV_T_ANY, devp->sd_dev, DDI_PROP_DONTPASS,
2524 	    SCSI_ADDR_PROP_TARGET, -1);
2525 
2526 	/* Make sure caching enabled and target in range */
2527 	if ((tgt < 0) || (tgt >= NTARGETS_WIDE)) {
2528 		/* do it the old way (no cache) */
2529 		return (scsi_probe(devp, waitfn));
2530 	}
2531 
2532 	mutex_enter(&sd_scsi_probe_cache_mutex);
2533 
2534 	/* Find the cache for this scsi bus instance */
2535 	for (cp = sd_scsi_probe_cache_head; cp != NULL; cp = cp->next) {
2536 		if (cp->pdip == pdip) {
2537 			break;
2538 		}
2539 	}
2540 
2541 	/* If we can't find a cache for this pdip, create one */
2542 	if (cp == NULL) {
2543 		int i;
2544 
2545 		cp = kmem_zalloc(sizeof (struct sd_scsi_probe_cache),
2546 		    KM_SLEEP);
2547 		cp->pdip = pdip;
2548 		cp->next = sd_scsi_probe_cache_head;
2549 		sd_scsi_probe_cache_head = cp;
2550 		for (i = 0; i < NTARGETS_WIDE; i++) {
2551 			cp->cache[i] = SCSIPROBE_EXISTS;
2552 		}
2553 	}
2554 
2555 	mutex_exit(&sd_scsi_probe_cache_mutex);
2556 
2557 	/* Recompute the cache for this target if LUN zero */
2558 	if (lun == 0) {
2559 		cp->cache[tgt] = SCSIPROBE_EXISTS;
2560 	}
2561 
2562 	/* Don't probe if cache remembers a NORESP from a previous LUN. */
2563 	if (cp->cache[tgt] != SCSIPROBE_EXISTS) {
2564 		return (SCSIPROBE_NORESP);
2565 	}
2566 
2567 	/* Do the actual probe; save & return the result */
2568 	return (cp->cache[tgt] = scsi_probe(devp, waitfn));
2569 }
2570 
2571 
2572 /*
2573  *    Function: sd_scsi_target_lun_init
2574  *
2575  * Description: Initializes the attached lun chain mutex and head pointer.
2576  *
2577  *     Context: Kernel thread context
2578  */
2579 
2580 static void
sd_scsi_target_lun_init(void)2581 sd_scsi_target_lun_init(void)
2582 {
2583 	mutex_init(&sd_scsi_target_lun_mutex, NULL, MUTEX_DRIVER, NULL);
2584 	sd_scsi_target_lun_head = NULL;
2585 }
2586 
2587 
2588 /*
2589  *    Function: sd_scsi_target_lun_fini
2590  *
2591  * Description: Frees all resources associated with the attached lun
2592  *              chain
2593  *
2594  *     Context: Kernel thread context
2595  */
2596 
2597 static void
sd_scsi_target_lun_fini(void)2598 sd_scsi_target_lun_fini(void)
2599 {
2600 	struct sd_scsi_hba_tgt_lun	*cp;
2601 	struct sd_scsi_hba_tgt_lun	*ncp;
2602 
2603 	for (cp = sd_scsi_target_lun_head; cp != NULL; cp = ncp) {
2604 		ncp = cp->next;
2605 		kmem_free(cp, sizeof (struct sd_scsi_hba_tgt_lun));
2606 	}
2607 	sd_scsi_target_lun_head = NULL;
2608 	mutex_destroy(&sd_scsi_target_lun_mutex);
2609 }
2610 
2611 
2612 /*
2613  *    Function: sd_scsi_get_target_lun_count
2614  *
2615  * Description: This routine will check in the attached lun chain to see
2616  *		how many luns are attached on the required SCSI controller
2617  *		and target. Currently, some capabilities like tagged queue
2618  *		are supported per target based by HBA. So all luns in a
2619  *		target have the same capabilities. Based on this assumption,
2620  *		sd should only set these capabilities once per target. This
2621  *		function is called when sd needs to decide how many luns
2622  *		already attached on a target.
2623  *
2624  *   Arguments: dip	- Pointer to the system's dev_info_t for the SCSI
2625  *			  controller device.
2626  *              target	- The target ID on the controller's SCSI bus.
2627  *
2628  * Return Code: The number of luns attached on the required target and
2629  *		controller.
2630  *		-1 if target ID is not in parallel SCSI scope or the given
2631  *		dip is not in the chain.
2632  *
2633  *     Context: Kernel thread context
2634  */
2635 
2636 static int
sd_scsi_get_target_lun_count(dev_info_t * dip,int target)2637 sd_scsi_get_target_lun_count(dev_info_t *dip, int target)
2638 {
2639 	struct sd_scsi_hba_tgt_lun	*cp;
2640 
2641 	if ((target < 0) || (target >= NTARGETS_WIDE)) {
2642 		return (-1);
2643 	}
2644 
2645 	mutex_enter(&sd_scsi_target_lun_mutex);
2646 
2647 	for (cp = sd_scsi_target_lun_head; cp != NULL; cp = cp->next) {
2648 		if (cp->pdip == dip) {
2649 			break;
2650 		}
2651 	}
2652 
2653 	mutex_exit(&sd_scsi_target_lun_mutex);
2654 
2655 	if (cp == NULL) {
2656 		return (-1);
2657 	}
2658 
2659 	return (cp->nlun[target]);
2660 }
2661 
2662 
2663 /*
2664  *    Function: sd_scsi_update_lun_on_target
2665  *
2666  * Description: This routine is used to update the attached lun chain when a
2667  *		lun is attached or detached on a target.
2668  *
2669  *   Arguments: dip     - Pointer to the system's dev_info_t for the SCSI
2670  *                        controller device.
2671  *              target  - The target ID on the controller's SCSI bus.
2672  *		flag	- Indicate the lun is attached or detached.
2673  *
2674  *     Context: Kernel thread context
2675  */
2676 
2677 static void
sd_scsi_update_lun_on_target(dev_info_t * dip,int target,int flag)2678 sd_scsi_update_lun_on_target(dev_info_t *dip, int target, int flag)
2679 {
2680 	struct sd_scsi_hba_tgt_lun	*cp;
2681 
2682 	mutex_enter(&sd_scsi_target_lun_mutex);
2683 
2684 	for (cp = sd_scsi_target_lun_head; cp != NULL; cp = cp->next) {
2685 		if (cp->pdip == dip) {
2686 			break;
2687 		}
2688 	}
2689 
2690 	if ((cp == NULL) && (flag == SD_SCSI_LUN_ATTACH)) {
2691 		cp = kmem_zalloc(sizeof (struct sd_scsi_hba_tgt_lun),
2692 		    KM_SLEEP);
2693 		cp->pdip = dip;
2694 		cp->next = sd_scsi_target_lun_head;
2695 		sd_scsi_target_lun_head = cp;
2696 	}
2697 
2698 	mutex_exit(&sd_scsi_target_lun_mutex);
2699 
2700 	if (cp != NULL) {
2701 		if (flag == SD_SCSI_LUN_ATTACH) {
2702 			cp->nlun[target] ++;
2703 		} else {
2704 			cp->nlun[target] --;
2705 		}
2706 	}
2707 }
2708 
2709 
2710 /*
2711  *    Function: sd_spin_up_unit
2712  *
2713  * Description: Issues the following commands to spin-up the device:
2714  *		START STOP UNIT, and INQUIRY.
2715  *
2716  *   Arguments: ssc   - ssc contains pointer to driver soft state (unit)
2717  *                      structure for this target.
2718  *
2719  * Return Code: 0 - success
2720  *		EIO - failure
2721  *		EACCES - reservation conflict
2722  *
2723  *     Context: Kernel thread context
2724  */
2725 
2726 static int
sd_spin_up_unit(sd_ssc_t * ssc)2727 sd_spin_up_unit(sd_ssc_t *ssc)
2728 {
2729 	size_t	resid		= 0;
2730 	int	has_conflict	= FALSE;
2731 	uchar_t *bufaddr;
2732 	int	status;
2733 	struct sd_lun	*un;
2734 
2735 	ASSERT(ssc != NULL);
2736 	un = ssc->ssc_un;
2737 	ASSERT(un != NULL);
2738 
2739 	/*
2740 	 * Send a throwaway START UNIT command.
2741 	 *
2742 	 * If we fail on this, we don't care presently what precisely
2743 	 * is wrong.  EMC's arrays will also fail this with a check
2744 	 * condition (0x2/0x4/0x3) if the device is "inactive," but
2745 	 * we don't want to fail the attach because it may become
2746 	 * "active" later.
2747 	 * We don't know if power condition is supported or not at
2748 	 * this stage, use START STOP bit.
2749 	 */
2750 	status = sd_send_scsi_START_STOP_UNIT(ssc, SD_START_STOP,
2751 	    SD_TARGET_START, SD_PATH_DIRECT);
2752 
2753 	if (status != 0) {
2754 		if (status == EACCES)
2755 			has_conflict = TRUE;
2756 		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
2757 	}
2758 
2759 	/*
2760 	 * Send another INQUIRY command to the target. This is necessary for
2761 	 * non-removable media direct access devices because their INQUIRY data
2762 	 * may not be fully qualified until they are spun up (perhaps via the
2763 	 * START command above).  Note: This seems to be needed for some
2764 	 * legacy devices only.) The INQUIRY command should succeed even if a
2765 	 * Reservation Conflict is present.
2766 	 */
2767 	bufaddr = kmem_zalloc(SUN_INQSIZE, KM_SLEEP);
2768 
2769 	if (sd_send_scsi_INQUIRY(ssc, bufaddr, SUN_INQSIZE, 0, 0, &resid)
2770 	    != 0) {
2771 		kmem_free(bufaddr, SUN_INQSIZE);
2772 		sd_ssc_assessment(ssc, SD_FMT_STATUS_CHECK);
2773 		return (EIO);
2774 	}
2775 
2776 	/*
2777 	 * If we got enough INQUIRY data, copy it over the old INQUIRY data.
2778 	 * Note that this routine does not return a failure here even if the
2779 	 * INQUIRY command did not return any data.  This is a legacy behavior.
2780 	 */
2781 	if ((SUN_INQSIZE - resid) >= SUN_MIN_INQLEN) {
2782 		bcopy(bufaddr, SD_INQUIRY(un), SUN_INQSIZE);
2783 	}
2784 
2785 	kmem_free(bufaddr, SUN_INQSIZE);
2786 
2787 	/* If we hit a reservation conflict above, tell the caller. */
2788 	if (has_conflict == TRUE) {
2789 		return (EACCES);
2790 	}
2791 
2792 	return (0);
2793 }
2794 
2795 /*
2796  *    Function: sd_enable_descr_sense
2797  *
2798  * Description: This routine attempts to select descriptor sense format
2799  *		using the Control mode page.  Devices that support 64 bit
2800  *		LBAs (for >2TB luns) should also implement descriptor
2801  *		sense data so we will call this function whenever we see
2802  *		a lun larger than 2TB.  If for some reason the device
2803  *		supports 64 bit LBAs but doesn't support descriptor sense
2804  *		presumably the mode select will fail.  Everything will
2805  *		continue to work normally except that we will not get
2806  *		complete sense data for commands that fail with an LBA
2807  *		larger than 32 bits.
2808  *
2809  *   Arguments: ssc   - ssc contains pointer to driver soft state (unit)
2810  *                      structure for this target.
2811  *
2812  *     Context: Kernel thread context only
2813  */
2814 
2815 static void
sd_enable_descr_sense(sd_ssc_t * ssc)2816 sd_enable_descr_sense(sd_ssc_t *ssc)
2817 {
2818 	uchar_t			*header;
2819 	struct mode_control_scsi3 *ctrl_bufp;
2820 	size_t			buflen;
2821 	size_t			bd_len;
2822 	int			status;
2823 	struct sd_lun		*un;
2824 
2825 	ASSERT(ssc != NULL);
2826 	un = ssc->ssc_un;
2827 	ASSERT(un != NULL);
2828 
2829 	/*
2830 	 * Read MODE SENSE page 0xA, Control Mode Page
2831 	 */
2832 	buflen = MODE_HEADER_LENGTH + MODE_BLK_DESC_LENGTH +
2833 	    sizeof (struct mode_control_scsi3);
2834 	header = kmem_zalloc(buflen, KM_SLEEP);
2835 
2836 	status = sd_send_scsi_MODE_SENSE(ssc, CDB_GROUP0, header, buflen,
2837 	    MODEPAGE_CTRL_MODE, SD_PATH_DIRECT);
2838 
2839 	if (status != 0) {
2840 		SD_ERROR(SD_LOG_COMMON, un,
2841 		    "sd_enable_descr_sense: mode sense ctrl page failed\n");
2842 		goto eds_exit;
2843 	}
2844 
2845 	/*
2846 	 * Determine size of Block Descriptors in order to locate
2847 	 * the mode page data. ATAPI devices return 0, SCSI devices
2848 	 * should return MODE_BLK_DESC_LENGTH.
2849 	 */
2850 	bd_len  = ((struct mode_header *)header)->bdesc_length;
2851 
2852 	/* Clear the mode data length field for MODE SELECT */
2853 	((struct mode_header *)header)->length = 0;
2854 
2855 	ctrl_bufp = (struct mode_control_scsi3 *)
2856 	    (header + MODE_HEADER_LENGTH + bd_len);
2857 
2858 	/*
2859 	 * If the page length is smaller than the expected value,
2860 	 * the target device doesn't support D_SENSE. Bail out here.
2861 	 */
2862 	if (ctrl_bufp->mode_page.length <
2863 	    sizeof (struct mode_control_scsi3) - 2) {
2864 		SD_ERROR(SD_LOG_COMMON, un,
2865 		    "sd_enable_descr_sense: enable D_SENSE failed\n");
2866 		goto eds_exit;
2867 	}
2868 
2869 	/*
2870 	 * Clear PS bit for MODE SELECT
2871 	 */
2872 	ctrl_bufp->mode_page.ps = 0;
2873 
2874 	/*
2875 	 * Set D_SENSE to enable descriptor sense format.
2876 	 */
2877 	ctrl_bufp->d_sense = 1;
2878 
2879 	sd_ssc_assessment(ssc, SD_FMT_IGNORE);
2880 
2881 	/*
2882 	 * Use MODE SELECT to commit the change to the D_SENSE bit
2883 	 */
2884 	status = sd_send_scsi_MODE_SELECT(ssc, CDB_GROUP0, header,
2885 	    buflen, SD_DONTSAVE_PAGE, SD_PATH_DIRECT);
2886 
2887 	if (status != 0) {
2888 		SD_INFO(SD_LOG_COMMON, un,
2889 		    "sd_enable_descr_sense: mode select ctrl page failed\n");
2890 	} else {
2891 		kmem_free(header, buflen);
2892 		return;
2893 	}
2894 
2895 eds_exit:
2896 	sd_ssc_assessment(ssc, SD_FMT_IGNORE);
2897 	kmem_free(header, buflen);
2898 }
2899 
2900 /*
2901  *    Function: sd_reenable_dsense_task
2902  *
2903  * Description: Re-enable descriptor sense after device or bus reset
2904  *
2905  *     Context: Executes in a taskq() thread context
2906  */
2907 static void
sd_reenable_dsense_task(void * arg)2908 sd_reenable_dsense_task(void *arg)
2909 {
2910 	struct	sd_lun	*un = arg;
2911 	sd_ssc_t	*ssc;
2912 
2913 	ASSERT(un != NULL);
2914 
2915 	ssc = sd_ssc_init(un);
2916 	sd_enable_descr_sense(ssc);
2917 	sd_ssc_fini(ssc);
2918 }
2919 
2920 /*
2921  *    Function: sd_set_mmc_caps
2922  *
2923  * Description: This routine determines if the device is MMC compliant and if
2924  *		the device supports CDDA via a mode sense of the CDVD
2925  *		capabilities mode page. Also checks if the device is a
2926  *		dvdram writable device.
2927  *
2928  *   Arguments: ssc   - ssc contains pointer to driver soft state (unit)
2929  *                      structure for this target.
2930  *
2931  *     Context: Kernel thread context only
2932  */
2933 
2934 static void
sd_set_mmc_caps(sd_ssc_t * ssc)2935 sd_set_mmc_caps(sd_ssc_t *ssc)
2936 {
2937 	struct mode_header_grp2		*sense_mhp;
2938 	uchar_t				*sense_page;
2939 	caddr_t				buf;
2940 	int				bd_len;
2941 	int				status;
2942 	struct uscsi_cmd		com;
2943 	int				rtn;
2944 	uchar_t				*out_data_rw, *out_data_hd;
2945 	uchar_t				*rqbuf_rw, *rqbuf_hd;
2946 	uchar_t				*out_data_gesn;
2947 	int				gesn_len;
2948 	struct sd_lun			*un;
2949 
2950 	ASSERT(ssc != NULL);
2951 	un = ssc->ssc_un;
2952 	ASSERT(un != NULL);
2953 
2954 	/*
2955 	 * The flags which will be set in this function are - mmc compliant,
2956 	 * dvdram writable device, cdda support. Initialize them to FALSE
2957 	 * and if a capability is detected - it will be set to TRUE.
2958 	 */
2959 	un->un_f_mmc_cap = FALSE;
2960 	un->un_f_dvdram_writable_device = FALSE;
2961 	un->un_f_cfg_cdda = FALSE;
2962 
2963 	buf = kmem_zalloc(BUFLEN_MODE_CDROM_CAP, KM_SLEEP);
2964 	status = sd_send_scsi_MODE_SENSE(ssc, CDB_GROUP1, (uchar_t *)buf,
2965 	    BUFLEN_MODE_CDROM_CAP, MODEPAGE_CDROM_CAP, SD_PATH_DIRECT);
2966 
2967 	sd_ssc_assessment(ssc, SD_FMT_IGNORE);
2968 
2969 	if (status != 0) {
2970 		/* command failed; just return */
2971 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
2972 		return;
2973 	}
2974 	/*
2975 	 * If the mode sense request for the CDROM CAPABILITIES
2976 	 * page (0x2A) succeeds the device is assumed to be MMC.
2977 	 */
2978 	un->un_f_mmc_cap = TRUE;
2979 
2980 	/* See if GET STATUS EVENT NOTIFICATION is supported */
2981 	if (un->un_f_mmc_gesn_polling) {
2982 		gesn_len = SD_GESN_HEADER_LEN + SD_GESN_MEDIA_DATA_LEN;
2983 		out_data_gesn = kmem_zalloc(gesn_len, KM_SLEEP);
2984 
2985 		rtn = sd_send_scsi_GET_EVENT_STATUS_NOTIFICATION(ssc,
2986 		    out_data_gesn, gesn_len, 1 << SD_GESN_MEDIA_CLASS);
2987 
2988 		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
2989 
2990 		if ((rtn != 0) || !sd_gesn_media_data_valid(out_data_gesn)) {
2991 			un->un_f_mmc_gesn_polling = FALSE;
2992 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
2993 			    "sd_set_mmc_caps: gesn not supported "
2994 			    "%d %x %x %x %x\n", rtn,
2995 			    out_data_gesn[0], out_data_gesn[1],
2996 			    out_data_gesn[2], out_data_gesn[3]);
2997 		}
2998 
2999 		kmem_free(out_data_gesn, gesn_len);
3000 	}
3001 
3002 	/* Get to the page data */
3003 	sense_mhp = (struct mode_header_grp2 *)buf;
3004 	bd_len = (sense_mhp->bdesc_length_hi << 8) |
3005 	    sense_mhp->bdesc_length_lo;
3006 	if (bd_len > MODE_BLK_DESC_LENGTH) {
3007 		/*
3008 		 * We did not get back the expected block descriptor
3009 		 * length so we cannot determine if the device supports
3010 		 * CDDA. However, we still indicate the device is MMC
3011 		 * according to the successful response to the page
3012 		 * 0x2A mode sense request.
3013 		 */
3014 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3015 		    "sd_set_mmc_caps: Mode Sense returned "
3016 		    "invalid block descriptor length\n");
3017 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3018 		return;
3019 	}
3020 
3021 	/* See if read CDDA is supported */
3022 	sense_page = (uchar_t *)(buf + MODE_HEADER_LENGTH_GRP2 +
3023 	    bd_len);
3024 	un->un_f_cfg_cdda = (sense_page[5] & 0x01) ? TRUE : FALSE;
3025 
3026 	/* See if writing DVD RAM is supported. */
3027 	un->un_f_dvdram_writable_device = (sense_page[3] & 0x20) ? TRUE : FALSE;
3028 	if (un->un_f_dvdram_writable_device == TRUE) {
3029 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3030 		return;
3031 	}
3032 
3033 	/*
3034 	 * If the device presents DVD or CD capabilities in the mode
3035 	 * page, we can return here since a RRD will not have
3036 	 * these capabilities.
3037 	 */
3038 	if ((sense_page[2] & 0x3f) || (sense_page[3] & 0x3f)) {
3039 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3040 		return;
3041 	}
3042 	kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3043 
3044 	/*
3045 	 * If un->un_f_dvdram_writable_device is still FALSE,
3046 	 * check for a Removable Rigid Disk (RRD).  A RRD
3047 	 * device is identified by the features RANDOM_WRITABLE and
3048 	 * HARDWARE_DEFECT_MANAGEMENT.
3049 	 */
3050 	out_data_rw = kmem_zalloc(SD_CURRENT_FEATURE_LEN, KM_SLEEP);
3051 	rqbuf_rw = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3052 
3053 	rtn = sd_send_scsi_feature_GET_CONFIGURATION(ssc, &com, rqbuf_rw,
3054 	    SENSE_LENGTH, out_data_rw, SD_CURRENT_FEATURE_LEN,
3055 	    RANDOM_WRITABLE, SD_PATH_STANDARD);
3056 
3057 	sd_ssc_assessment(ssc, SD_FMT_IGNORE);
3058 
3059 	if (rtn != 0) {
3060 		kmem_free(out_data_rw, SD_CURRENT_FEATURE_LEN);
3061 		kmem_free(rqbuf_rw, SENSE_LENGTH);
3062 		return;
3063 	}
3064 
3065 	out_data_hd = kmem_zalloc(SD_CURRENT_FEATURE_LEN, KM_SLEEP);
3066 	rqbuf_hd = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3067 
3068 	rtn = sd_send_scsi_feature_GET_CONFIGURATION(ssc, &com, rqbuf_hd,
3069 	    SENSE_LENGTH, out_data_hd, SD_CURRENT_FEATURE_LEN,
3070 	    HARDWARE_DEFECT_MANAGEMENT, SD_PATH_STANDARD);
3071 
3072 	sd_ssc_assessment(ssc, SD_FMT_IGNORE);
3073 
3074 	if (rtn == 0) {
3075 		/*
3076 		 * We have good information, check for random writable
3077 		 * and hardware defect features.
3078 		 */
3079 		if ((out_data_rw[9] & RANDOM_WRITABLE) &&
3080 		    (out_data_hd[9] & HARDWARE_DEFECT_MANAGEMENT)) {
3081 			un->un_f_dvdram_writable_device = TRUE;
3082 		}
3083 	}
3084 
3085 	kmem_free(out_data_rw, SD_CURRENT_FEATURE_LEN);
3086 	kmem_free(rqbuf_rw, SENSE_LENGTH);
3087 	kmem_free(out_data_hd, SD_CURRENT_FEATURE_LEN);
3088 	kmem_free(rqbuf_hd, SENSE_LENGTH);
3089 }
3090 
3091 /*
3092  *    Function: sd_check_for_writable_cd
3093  *
3094  * Description: This routine determines if the media in the device is
3095  *		writable or not. It uses the get configuration command (0x46)
3096  *		to determine if the media is writable
3097  *
3098  *   Arguments: un - driver soft state (unit) structure
3099  *              path_flag - SD_PATH_DIRECT to use the USCSI "direct"
3100  *                           chain and the normal command waitq, or
3101  *                           SD_PATH_DIRECT_PRIORITY to use the USCSI
3102  *                           "direct" chain and bypass the normal command
3103  *                           waitq.
3104  *
3105  *     Context: Never called at interrupt context.
3106  */
3107 
3108 static void
sd_check_for_writable_cd(sd_ssc_t * ssc,int path_flag)3109 sd_check_for_writable_cd(sd_ssc_t *ssc, int path_flag)
3110 {
3111 	struct uscsi_cmd		com;
3112 	uchar_t				*out_data;
3113 	uchar_t				*rqbuf;
3114 	int				rtn;
3115 	uchar_t				*out_data_rw, *out_data_hd;
3116 	uchar_t				*rqbuf_rw, *rqbuf_hd;
3117 	struct mode_header_grp2		*sense_mhp;
3118 	uchar_t				*sense_page;
3119 	caddr_t				buf;
3120 	int				bd_len;
3121 	int				status;
3122 	struct sd_lun			*un;
3123 
3124 	ASSERT(ssc != NULL);
3125 	un = ssc->ssc_un;
3126 	ASSERT(un != NULL);
3127 	ASSERT(mutex_owned(SD_MUTEX(un)));
3128 
3129 	/*
3130 	 * Initialize the writable media to false, if configuration info.
3131 	 * tells us otherwise then only we will set it.
3132 	 */
3133 	un->un_f_mmc_writable_media = FALSE;
3134 	mutex_exit(SD_MUTEX(un));
3135 
3136 	out_data = kmem_zalloc(SD_PROFILE_HEADER_LEN, KM_SLEEP);
3137 	rqbuf = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3138 
3139 	rtn = sd_send_scsi_GET_CONFIGURATION(ssc, &com, rqbuf, SENSE_LENGTH,
3140 	    out_data, SD_PROFILE_HEADER_LEN, path_flag);
3141 
3142 	if (rtn != 0)
3143 		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
3144 
3145 	mutex_enter(SD_MUTEX(un));
3146 	if (rtn == 0) {
3147 		/*
3148 		 * We have good information, check for writable DVD.
3149 		 */
3150 		if ((out_data[6] == 0) && (out_data[7] == 0x12)) {
3151 			un->un_f_mmc_writable_media = TRUE;
3152 			kmem_free(out_data, SD_PROFILE_HEADER_LEN);
3153 			kmem_free(rqbuf, SENSE_LENGTH);
3154 			return;
3155 		}
3156 	}
3157 
3158 	kmem_free(out_data, SD_PROFILE_HEADER_LEN);
3159 	kmem_free(rqbuf, SENSE_LENGTH);
3160 
3161 	/*
3162 	 * Determine if this is a RRD type device.
3163 	 */
3164 	mutex_exit(SD_MUTEX(un));
3165 	buf = kmem_zalloc(BUFLEN_MODE_CDROM_CAP, KM_SLEEP);
3166 	status = sd_send_scsi_MODE_SENSE(ssc, CDB_GROUP1, (uchar_t *)buf,
3167 	    BUFLEN_MODE_CDROM_CAP, MODEPAGE_CDROM_CAP, path_flag);
3168 
3169 	sd_ssc_assessment(ssc, SD_FMT_IGNORE);
3170 
3171 	mutex_enter(SD_MUTEX(un));
3172 	if (status != 0) {
3173 		/* command failed; just return */
3174 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3175 		return;
3176 	}
3177 
3178 	/* Get to the page data */
3179 	sense_mhp = (struct mode_header_grp2 *)buf;
3180 	bd_len = (sense_mhp->bdesc_length_hi << 8) | sense_mhp->bdesc_length_lo;
3181 	if (bd_len > MODE_BLK_DESC_LENGTH) {
3182 		/*
3183 		 * We did not get back the expected block descriptor length so
3184 		 * we cannot check the mode page.
3185 		 */
3186 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3187 		    "sd_check_for_writable_cd: Mode Sense returned "
3188 		    "invalid block descriptor length\n");
3189 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3190 		return;
3191 	}
3192 
3193 	/*
3194 	 * If the device presents DVD or CD capabilities in the mode
3195 	 * page, we can return here since a RRD device will not have
3196 	 * these capabilities.
3197 	 */
3198 	sense_page = (uchar_t *)(buf + MODE_HEADER_LENGTH_GRP2 + bd_len);
3199 	if ((sense_page[2] & 0x3f) || (sense_page[3] & 0x3f)) {
3200 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3201 		return;
3202 	}
3203 	kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3204 
3205 	/*
3206 	 * If un->un_f_mmc_writable_media is still FALSE,
3207 	 * check for RRD type media.  A RRD device is identified
3208 	 * by the features RANDOM_WRITABLE and HARDWARE_DEFECT_MANAGEMENT.
3209 	 */
3210 	mutex_exit(SD_MUTEX(un));
3211 	out_data_rw = kmem_zalloc(SD_CURRENT_FEATURE_LEN, KM_SLEEP);
3212 	rqbuf_rw = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3213 
3214 	rtn = sd_send_scsi_feature_GET_CONFIGURATION(ssc, &com, rqbuf_rw,
3215 	    SENSE_LENGTH, out_data_rw, SD_CURRENT_FEATURE_LEN,
3216 	    RANDOM_WRITABLE, path_flag);
3217 
3218 	sd_ssc_assessment(ssc, SD_FMT_IGNORE);
3219 	if (rtn != 0) {
3220 		kmem_free(out_data_rw, SD_CURRENT_FEATURE_LEN);
3221 		kmem_free(rqbuf_rw, SENSE_LENGTH);
3222 		mutex_enter(SD_MUTEX(un));
3223 		return;
3224 	}
3225 
3226 	out_data_hd = kmem_zalloc(SD_CURRENT_FEATURE_LEN, KM_SLEEP);
3227 	rqbuf_hd = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3228 
3229 	rtn = sd_send_scsi_feature_GET_CONFIGURATION(ssc, &com, rqbuf_hd,
3230 	    SENSE_LENGTH, out_data_hd, SD_CURRENT_FEATURE_LEN,
3231 	    HARDWARE_DEFECT_MANAGEMENT, path_flag);
3232 
3233 	sd_ssc_assessment(ssc, SD_FMT_IGNORE);
3234 	mutex_enter(SD_MUTEX(un));
3235 	if (rtn == 0) {
3236 		/*
3237 		 * We have good information, check for random writable
3238 		 * and hardware defect features as current.
3239 		 */
3240 		if ((out_data_rw[9] & RANDOM_WRITABLE) &&
3241 		    (out_data_rw[10] & 0x1) &&
3242 		    (out_data_hd[9] & HARDWARE_DEFECT_MANAGEMENT) &&
3243 		    (out_data_hd[10] & 0x1)) {
3244 			un->un_f_mmc_writable_media = TRUE;
3245 		}
3246 	}
3247 
3248 	kmem_free(out_data_rw, SD_CURRENT_FEATURE_LEN);
3249 	kmem_free(rqbuf_rw, SENSE_LENGTH);
3250 	kmem_free(out_data_hd, SD_CURRENT_FEATURE_LEN);
3251 	kmem_free(rqbuf_hd, SENSE_LENGTH);
3252 }
3253 
3254 /*
3255  *    Function: sd_read_unit_properties
3256  *
3257  * Description: The following implements a property lookup mechanism.
3258  *		Properties for particular disks (keyed on vendor, model
3259  *		and rev numbers) are sought in the sd.conf file via
3260  *		sd_process_sdconf_file(), and if not found there, are
3261  *		looked for in a list hardcoded in this driver via
3262  *		sd_process_sdconf_table() Once located the properties
3263  *		are used to update the driver unit structure.
3264  *
3265  *   Arguments: un - driver soft state (unit) structure
3266  */
3267 
3268 static void
sd_read_unit_properties(struct sd_lun * un)3269 sd_read_unit_properties(struct sd_lun *un)
3270 {
3271 	/*
3272 	 * sd_process_sdconf_file returns SD_FAILURE if it cannot find
3273 	 * the "sd-config-list" property (from the sd.conf file) or if
3274 	 * there was not a match for the inquiry vid/pid. If this event
3275 	 * occurs the static driver configuration table is searched for
3276 	 * a match.
3277 	 */
3278 	ASSERT(un != NULL);
3279 	if (sd_process_sdconf_file(un) == SD_FAILURE) {
3280 		sd_process_sdconf_table(un);
3281 	}
3282 }
3283 
3284 
3285 /*
3286  *    Function: sd_process_sdconf_file
3287  *
3288  * Description: Use ddi_prop_lookup(9F) to obtain the properties from the
3289  *		driver's config file (ie, sd.conf) and update the driver
3290  *		soft state structure accordingly.
3291  *
3292  *   Arguments: un - driver soft state (unit) structure
3293  *
3294  * Return Code: SD_SUCCESS - The properties were successfully set according
3295  *			     to the driver configuration file.
3296  *		SD_FAILURE - The driver config list was not obtained or
3297  *			     there was no vid/pid match. This indicates that
3298  *			     the static config table should be used.
3299  *
3300  * The config file has a property, "sd-config-list". Currently we support
3301  * two kinds of formats. For both formats, the value of this property
3302  * is a list of duplets:
3303  *
3304  *  sd-config-list=
3305  *	<duplet>,
3306  *	[,<duplet>]*;
3307  *
3308  * For the improved format, where
3309  *
3310  *     <duplet>:= "<vid+pid>","<tunable-list>"
3311  *
3312  * and
3313  *
3314  *     <tunable-list>:=   <tunable> [, <tunable> ]*;
3315  *     <tunable> =        <name> : <value>
3316  *
3317  * The <vid+pid> is the string that is returned by the target device on a
3318  * SCSI inquiry command, the <tunable-list> contains one or more tunables
3319  * to apply to all target devices with the specified <vid+pid>.
3320  *
3321  * Each <tunable> is a "<name> : <value>" pair.
3322  *
3323  * For the old format, the structure of each duplet is as follows:
3324  *
3325  *  <duplet>:= "<vid+pid>","<data-property-name_list>"
3326  *
3327  * The first entry of the duplet is the device ID string (the concatenated
3328  * vid & pid; not to be confused with a device_id).  This is defined in
3329  * the same way as in the sd_disk_table.
3330  *
3331  * The second part of the duplet is a string that identifies a
3332  * data-property-name-list. The data-property-name-list is defined as
3333  * follows:
3334  *
3335  *  <data-property-name-list>:=<data-property-name> [<data-property-name>]
3336  *
3337  * The syntax of <data-property-name> depends on the <version> field.
3338  *
3339  * If version = SD_CONF_VERSION_1 we have the following syntax:
3340  *
3341  *	<data-property-name>:=<version>,<flags>,<prop0>,<prop1>,.....<propN>
3342  *
3343  * where the prop0 value will be used to set prop0 if bit0 set in the
3344  * flags, prop1 if bit1 set, etc. and N = SD_CONF_MAX_ITEMS -1
3345  *
3346  */
3347 
3348 static int
sd_process_sdconf_file(struct sd_lun * un)3349 sd_process_sdconf_file(struct sd_lun *un)
3350 {
3351 	char	**config_list = NULL;
3352 	uint_t	nelements;
3353 	char	*vidptr;
3354 	int	vidlen;
3355 	char	*dnlist_ptr;
3356 	char	*dataname_ptr;
3357 	char	*dataname_lasts;
3358 	int	*data_list = NULL;
3359 	uint_t	data_list_len;
3360 	int	rval = SD_FAILURE;
3361 	int	i;
3362 
3363 	ASSERT(un != NULL);
3364 
3365 	/* Obtain the configuration list associated with the .conf file */
3366 	if (ddi_prop_lookup_string_array(DDI_DEV_T_ANY, SD_DEVINFO(un),
3367 	    DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, sd_config_list,
3368 	    &config_list, &nelements) != DDI_PROP_SUCCESS) {
3369 		return (SD_FAILURE);
3370 	}
3371 
3372 	/*
3373 	 * Compare vids in each duplet to the inquiry vid - if a match is
3374 	 * made, get the data value and update the soft state structure
3375 	 * accordingly.
3376 	 *
3377 	 * Each duplet should show as a pair of strings, return SD_FAILURE
3378 	 * otherwise.
3379 	 */
3380 	if (nelements & 1) {
3381 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3382 		    "sd-config-list should show as pairs of strings.\n");
3383 		if (config_list)
3384 			ddi_prop_free(config_list);
3385 		return (SD_FAILURE);
3386 	}
3387 
3388 	for (i = 0; i < nelements; i += 2) {
3389 		/*
3390 		 * Note: The assumption here is that each vid entry is on
3391 		 * a unique line from its associated duplet.
3392 		 */
3393 		vidptr = config_list[i];
3394 		vidlen = (int)strlen(vidptr);
3395 		if (sd_sdconf_id_match(un, vidptr, vidlen) != SD_SUCCESS) {
3396 			continue;
3397 		}
3398 
3399 		/*
3400 		 * dnlist contains 1 or more blank separated
3401 		 * data-property-name entries
3402 		 */
3403 		dnlist_ptr = config_list[i + 1];
3404 
3405 		if (strchr(dnlist_ptr, ':') != NULL) {
3406 			/*
3407 			 * Decode the improved format sd-config-list.
3408 			 */
3409 			sd_nvpair_str_decode(un, dnlist_ptr);
3410 		} else {
3411 			/*
3412 			 * The old format sd-config-list, loop through all
3413 			 * data-property-name entries in the
3414 			 * data-property-name-list
3415 			 * setting the properties for each.
3416 			 */
3417 			for (dataname_ptr = strtok_r(dnlist_ptr, " \t",
3418 			    &dataname_lasts); dataname_ptr != NULL;
3419 			    dataname_ptr = strtok_r(NULL, " \t",
3420 			    &dataname_lasts)) {
3421 				int version;
3422 
3423 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
3424 				    "sd_process_sdconf_file: disk:%s, "
3425 				    "data:%s\n", vidptr, dataname_ptr);
3426 
3427 				/* Get the data list */
3428 				if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY,
3429 				    SD_DEVINFO(un), 0, dataname_ptr, &data_list,
3430 				    &data_list_len) != DDI_PROP_SUCCESS) {
3431 					SD_INFO(SD_LOG_ATTACH_DETACH, un,
3432 					    "sd_process_sdconf_file: data "
3433 					    "property (%s) has no value\n",
3434 					    dataname_ptr);
3435 					continue;
3436 				}
3437 
3438 				version = data_list[0];
3439 
3440 				if (version == SD_CONF_VERSION_1) {
3441 					sd_tunables values;
3442 
3443 					/* Set the properties */
3444 					if (sd_chk_vers1_data(un, data_list[1],
3445 					    &data_list[2], data_list_len,
3446 					    dataname_ptr) == SD_SUCCESS) {
3447 						sd_get_tunables_from_conf(un,
3448 						    data_list[1], &data_list[2],
3449 						    &values);
3450 						sd_set_vers1_properties(un,
3451 						    data_list[1], &values);
3452 						rval = SD_SUCCESS;
3453 					} else {
3454 						rval = SD_FAILURE;
3455 					}
3456 				} else {
3457 					scsi_log(SD_DEVINFO(un), sd_label,
3458 					    CE_WARN, "data property %s version "
3459 					    "0x%x is invalid.",
3460 					    dataname_ptr, version);
3461 					rval = SD_FAILURE;
3462 				}
3463 				ddi_prop_free(data_list);
3464 			}
3465 		}
3466 	}
3467 
3468 	/* free up the memory allocated by ddi_prop_lookup_string_array(). */
3469 	if (config_list) {
3470 		ddi_prop_free(config_list);
3471 	}
3472 
3473 	return (rval);
3474 }
3475 
3476 /*
3477  *    Function: sd_nvpair_str_decode()
3478  *
3479  * Description: Parse the improved format sd-config-list to get
3480  *    each entry of tunable, which includes a name-value pair.
3481  *    Then call sd_set_properties() to set the property.
3482  *
3483  *   Arguments: un - driver soft state (unit) structure
3484  *    nvpair_str - the tunable list
3485  */
3486 static void
sd_nvpair_str_decode(struct sd_lun * un,char * nvpair_str)3487 sd_nvpair_str_decode(struct sd_lun *un, char *nvpair_str)
3488 {
3489 	char	*nv, *name, *value, *token;
3490 	char	*nv_lasts, *v_lasts, *x_lasts;
3491 
3492 	for (nv = strtok_r(nvpair_str, ",", &nv_lasts); nv != NULL;
3493 	    nv = strtok_r(NULL, ",", &nv_lasts)) {
3494 		token = strtok_r(nv, ":", &v_lasts);
3495 		name  = strtok_r(token, " \t", &x_lasts);
3496 		token = strtok_r(NULL, ":", &v_lasts);
3497 		value = strtok_r(token, " \t", &x_lasts);
3498 		if (name == NULL || value == NULL) {
3499 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3500 			    "sd_nvpair_str_decode: "
3501 			    "name or value is not valid!\n");
3502 		} else {
3503 			sd_set_properties(un, name, value);
3504 		}
3505 	}
3506 }
3507 
3508 /*
3509  *    Function: sd_set_properties()
3510  *
3511  * Description: Set device properties based on the improved
3512  *    format sd-config-list.
3513  *
3514  *   Arguments: un - driver soft state (unit) structure
3515  *    name  - supported tunable name
3516  *    value - tunable value
3517  */
3518 static void
sd_set_properties(struct sd_lun * un,char * name,char * value)3519 sd_set_properties(struct sd_lun *un, char *name, char *value)
3520 {
3521 	char	*endptr = NULL;
3522 	long	val = 0;
3523 
3524 	if (strcasecmp(name, "cache-nonvolatile") == 0) {
3525 		if (strcasecmp(value, "true") == 0) {
3526 			un->un_f_suppress_cache_flush = TRUE;
3527 		} else if (strcasecmp(value, "false") == 0) {
3528 			un->un_f_suppress_cache_flush = FALSE;
3529 		} else {
3530 			goto value_invalid;
3531 		}
3532 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
3533 		    "suppress_cache_flush flag set to %d\n",
3534 		    un->un_f_suppress_cache_flush);
3535 		return;
3536 	}
3537 
3538 	if (strcasecmp(name, "controller-type") == 0) {
3539 		if (ddi_strtol(value, &endptr, 0, &val) == 0) {
3540 			un->un_ctype = val;
3541 		} else {
3542 			goto value_invalid;
3543 		}
3544 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
3545 		    "ctype set to %d\n", un->un_ctype);
3546 		return;
3547 	}
3548 
3549 	if (strcasecmp(name, "delay-busy") == 0) {
3550 		if (ddi_strtol(value, &endptr, 0, &val) == 0) {
3551 			un->un_busy_timeout = drv_usectohz(val / 1000);
3552 		} else {
3553 			goto value_invalid;
3554 		}
3555 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
3556 		    "busy_timeout set to %d\n", un->un_busy_timeout);
3557 		return;
3558 	}
3559 
3560 	if (strcasecmp(name, "disksort") == 0) {
3561 		if (strcasecmp(value, "true") == 0) {
3562 			un->un_f_disksort_disabled = FALSE;
3563 		} else if (strcasecmp(value, "false") == 0) {
3564 			un->un_f_disksort_disabled = TRUE;
3565 		} else {
3566 			goto value_invalid;
3567 		}
3568 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
3569 		    "disksort disabled flag set to %d\n",
3570 		    un->un_f_disksort_disabled);
3571 		return;
3572 	}
3573 
3574 	if (strcasecmp(name, "power-condition") == 0) {
3575 		if (strcasecmp(value, "true") == 0) {
3576 			un->un_f_power_condition_disabled = FALSE;
3577 		} else if (strcasecmp(value, "false") == 0) {
3578 			un->un_f_power_condition_disabled = TRUE;
3579 		} else {
3580 			goto value_invalid;
3581 		}
3582 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
3583 		    "power condition disabled flag set to %d\n",
3584 		    un->un_f_power_condition_disabled);
3585 		return;
3586 	}
3587 
3588 	if (strcasecmp(name, "timeout-releasereservation") == 0) {
3589 		if (ddi_strtol(value, &endptr, 0, &val) == 0) {
3590 			un->un_reserve_release_time = val;
3591 		} else {
3592 			goto value_invalid;
3593 		}
3594 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
3595 		    "reservation release timeout set to %d\n",
3596 		    un->un_reserve_release_time);
3597 		return;
3598 	}
3599 
3600 	if (strcasecmp(name, "reset-lun") == 0) {
3601 		if (strcasecmp(value, "true") == 0) {
3602 			un->un_f_lun_reset_enabled = TRUE;
3603 		} else if (strcasecmp(value, "false") == 0) {
3604 			un->un_f_lun_reset_enabled = FALSE;
3605 		} else {
3606 			goto value_invalid;
3607 		}
3608 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
3609 		    "lun reset enabled flag set to %d\n",
3610 		    un->un_f_lun_reset_enabled);
3611 		return;
3612 	}
3613 
3614 	if (strcasecmp(name, "retries-busy") == 0) {
3615 		if (ddi_strtol(value, &endptr, 0, &val) == 0) {
3616 			un->un_busy_retry_count = val;
3617 		} else {
3618 			goto value_invalid;
3619 		}
3620 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
3621 		    "busy retry count set to %d\n", un->un_busy_retry_count);
3622 		return;
3623 	}
3624 
3625 	if (strcasecmp(name, "retries-timeout") == 0) {
3626 		if (ddi_strtol(value, &endptr, 0, &val) == 0) {
3627 			un->un_retry_count = val;
3628 		} else {
3629 			goto value_invalid;
3630 		}
3631 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
3632 		    "timeout retry count set to %d\n", un->un_retry_count);
3633 		return;
3634 	}
3635 
3636 	if (strcasecmp(name, "retries-notready") == 0) {
3637 		if (ddi_strtol(value, &endptr, 0, &val) == 0) {
3638 			un->un_notready_retry_count = val;
3639 		} else {
3640 			goto value_invalid;
3641 		}
3642 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
3643 		    "notready retry count set to %d\n",
3644 		    un->un_notready_retry_count);
3645 		return;
3646 	}
3647 
3648 	if (strcasecmp(name, "retries-reset") == 0) {
3649 		if (ddi_strtol(value, &endptr, 0, &val) == 0) {
3650 			un->un_reset_retry_count = val;
3651 		} else {
3652 			goto value_invalid;
3653 		}
3654 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
3655 		    "reset retry count set to %d\n",
3656 		    un->un_reset_retry_count);
3657 		return;
3658 	}
3659 
3660 	if (strcasecmp(name, "throttle-max") == 0) {
3661 		if (ddi_strtol(value, &endptr, 0, &val) == 0) {
3662 			un->un_saved_throttle = un->un_throttle = val;
3663 		} else {
3664 			goto value_invalid;
3665 		}
3666 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
3667 		    "throttle set to %d\n", un->un_throttle);
3668 	}
3669 
3670 	if (strcasecmp(name, "throttle-min") == 0) {
3671 		if (ddi_strtol(value, &endptr, 0, &val) == 0) {
3672 			un->un_min_throttle = val;
3673 		} else {
3674 			goto value_invalid;
3675 		}
3676 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
3677 		    "min throttle set to %d\n", un->un_min_throttle);
3678 	}
3679 
3680 	if (strcasecmp(name, "rmw-type") == 0) {
3681 		if (ddi_strtol(value, &endptr, 0, &val) == 0) {
3682 			un->un_f_rmw_type = val;
3683 		} else {
3684 			goto value_invalid;
3685 		}
3686 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
3687 		    "RMW type set to %d\n", un->un_f_rmw_type);
3688 	}
3689 
3690 	if (strcasecmp(name, "physical-block-size") == 0) {
3691 		if (ddi_strtol(value, &endptr, 0, &val) == 0 &&
3692 		    ISP2(val) && val >= un->un_tgt_blocksize &&
3693 		    val >= un->un_sys_blocksize) {
3694 			un->un_phy_blocksize = val;
3695 		} else {
3696 			goto value_invalid;
3697 		}
3698 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
3699 		    "physical block size set to %d\n", un->un_phy_blocksize);
3700 	}
3701 
3702 	if (strcasecmp(name, "retries-victim") == 0) {
3703 		if (ddi_strtol(value, &endptr, 0, &val) == 0) {
3704 			un->un_victim_retry_count = val;
3705 		} else {
3706 			goto value_invalid;
3707 		}
3708 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
3709 		    "victim retry count set to %d\n",
3710 		    un->un_victim_retry_count);
3711 		return;
3712 	}
3713 
3714 	/*
3715 	 * Validate the throttle values.
3716 	 * If any of the numbers are invalid, set everything to defaults.
3717 	 */
3718 	if ((un->un_throttle < SD_LOWEST_VALID_THROTTLE) ||
3719 	    (un->un_min_throttle < SD_LOWEST_VALID_THROTTLE) ||
3720 	    (un->un_min_throttle > un->un_throttle)) {
3721 		un->un_saved_throttle = un->un_throttle = sd_max_throttle;
3722 		un->un_min_throttle = sd_min_throttle;
3723 	}
3724 
3725 	if (strcasecmp(name, "mmc-gesn-polling") == 0) {
3726 		if (strcasecmp(value, "true") == 0) {
3727 			un->un_f_mmc_gesn_polling = TRUE;
3728 		} else if (strcasecmp(value, "false") == 0) {
3729 			un->un_f_mmc_gesn_polling = FALSE;
3730 		} else {
3731 			goto value_invalid;
3732 		}
3733 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
3734 		    "mmc-gesn-polling set to %d\n",
3735 		    un->un_f_mmc_gesn_polling);
3736 	}
3737 
3738 	return;
3739 
3740 value_invalid:
3741 	SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
3742 	    "value of prop %s is invalid\n", name);
3743 }
3744 
3745 /*
3746  *    Function: sd_get_tunables_from_conf()
3747  *
3748  *
3749  *    This function reads the data list from the sd.conf file and pulls
3750  *    the values that can have numeric values as arguments and places
3751  *    the values in the appropriate sd_tunables member.
3752  *    Since the order of the data list members varies across platforms
3753  *    This function reads them from the data list in a platform specific
3754  *    order and places them into the correct sd_tunable member that is
3755  *    consistent across all platforms.
3756  */
3757 static void
sd_get_tunables_from_conf(struct sd_lun * un,int flags,int * data_list,sd_tunables * values)3758 sd_get_tunables_from_conf(struct sd_lun *un, int flags, int *data_list,
3759     sd_tunables *values)
3760 {
3761 	int i;
3762 	int mask;
3763 
3764 	bzero(values, sizeof (sd_tunables));
3765 
3766 	for (i = 0; i < SD_CONF_MAX_ITEMS; i++) {
3767 
3768 		mask = 1 << i;
3769 		if (mask > flags) {
3770 			break;
3771 		}
3772 
3773 		switch (mask & flags) {
3774 		case 0:	/* This mask bit not set in flags */
3775 			continue;
3776 		case SD_CONF_BSET_THROTTLE:
3777 			values->sdt_throttle = data_list[i];
3778 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3779 			    "sd_get_tunables_from_conf: throttle = %d\n",
3780 			    values->sdt_throttle);
3781 			break;
3782 		case SD_CONF_BSET_CTYPE:
3783 			values->sdt_ctype = data_list[i];
3784 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3785 			    "sd_get_tunables_from_conf: ctype = %d\n",
3786 			    values->sdt_ctype);
3787 			break;
3788 		case SD_CONF_BSET_NRR_COUNT:
3789 			values->sdt_not_rdy_retries = data_list[i];
3790 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3791 			    "sd_get_tunables_from_conf: not_rdy_retries = %d\n",
3792 			    values->sdt_not_rdy_retries);
3793 			break;
3794 		case SD_CONF_BSET_BSY_RETRY_COUNT:
3795 			values->sdt_busy_retries = data_list[i];
3796 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3797 			    "sd_get_tunables_from_conf: busy_retries = %d\n",
3798 			    values->sdt_busy_retries);
3799 			break;
3800 		case SD_CONF_BSET_RST_RETRIES:
3801 			values->sdt_reset_retries = data_list[i];
3802 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3803 			    "sd_get_tunables_from_conf: reset_retries = %d\n",
3804 			    values->sdt_reset_retries);
3805 			break;
3806 		case SD_CONF_BSET_RSV_REL_TIME:
3807 			values->sdt_reserv_rel_time = data_list[i];
3808 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3809 			    "sd_get_tunables_from_conf: reserv_rel_time = %d\n",
3810 			    values->sdt_reserv_rel_time);
3811 			break;
3812 		case SD_CONF_BSET_MIN_THROTTLE:
3813 			values->sdt_min_throttle = data_list[i];
3814 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3815 			    "sd_get_tunables_from_conf: min_throttle = %d\n",
3816 			    values->sdt_min_throttle);
3817 			break;
3818 		case SD_CONF_BSET_DISKSORT_DISABLED:
3819 			values->sdt_disk_sort_dis = data_list[i];
3820 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3821 			    "sd_get_tunables_from_conf: disk_sort_dis = %d\n",
3822 			    values->sdt_disk_sort_dis);
3823 			break;
3824 		case SD_CONF_BSET_LUN_RESET_ENABLED:
3825 			values->sdt_lun_reset_enable = data_list[i];
3826 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3827 			    "sd_get_tunables_from_conf: lun_reset_enable = %d"
3828 			    "\n", values->sdt_lun_reset_enable);
3829 			break;
3830 		case SD_CONF_BSET_CACHE_IS_NV:
3831 			values->sdt_suppress_cache_flush = data_list[i];
3832 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3833 			    "sd_get_tunables_from_conf: \
3834 			    suppress_cache_flush = %d"
3835 			    "\n", values->sdt_suppress_cache_flush);
3836 			break;
3837 		case SD_CONF_BSET_PC_DISABLED:
3838 			values->sdt_disk_sort_dis = data_list[i];
3839 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3840 			    "sd_get_tunables_from_conf: power_condition_dis = "
3841 			    "%d\n", values->sdt_power_condition_dis);
3842 			break;
3843 		}
3844 	}
3845 }
3846 
3847 /*
3848  *    Function: sd_process_sdconf_table
3849  *
3850  * Description: Search the static configuration table for a match on the
3851  *		inquiry vid/pid and update the driver soft state structure
3852  *		according to the table property values for the device.
3853  *
3854  *		The form of a configuration table entry is:
3855  *		  <vid+pid>,<flags>,<property-data>
3856  *		  "SEAGATE ST42400N",1,0x40000,
3857  *		  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1;
3858  *
3859  *   Arguments: un - driver soft state (unit) structure
3860  */
3861 
3862 static void
sd_process_sdconf_table(struct sd_lun * un)3863 sd_process_sdconf_table(struct sd_lun *un)
3864 {
3865 	char	*id = NULL;
3866 	int	table_index;
3867 	int	idlen;
3868 
3869 	ASSERT(un != NULL);
3870 	for (table_index = 0; table_index < sd_disk_table_size;
3871 	    table_index++) {
3872 		id = sd_disk_table[table_index].device_id;
3873 		idlen = strlen(id);
3874 
3875 		/*
3876 		 * The static configuration table currently does not
3877 		 * implement version 10 properties. Additionally,
3878 		 * multiple data-property-name entries are not
3879 		 * implemented in the static configuration table.
3880 		 */
3881 		if (sd_sdconf_id_match(un, id, idlen) == SD_SUCCESS) {
3882 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3883 			    "sd_process_sdconf_table: disk %s\n", id);
3884 			sd_set_vers1_properties(un,
3885 			    sd_disk_table[table_index].flags,
3886 			    sd_disk_table[table_index].properties);
3887 			break;
3888 		}
3889 	}
3890 }
3891 
3892 
3893 /*
3894  *    Function: sd_sdconf_id_match
3895  *
3896  * Description: This local function implements a case sensitive vid/pid
3897  *		comparison as well as the boundary cases of wild card and
3898  *		multiple blanks.
3899  *
3900  *		Note: An implicit assumption made here is that the scsi
3901  *		inquiry structure will always keep the vid, pid and
3902  *		revision strings in consecutive sequence, so they can be
3903  *		read as a single string. If this assumption is not the
3904  *		case, a separate string, to be used for the check, needs
3905  *		to be built with these strings concatenated.
3906  *
3907  *   Arguments: un - driver soft state (unit) structure
3908  *		id - table or config file vid/pid
3909  *		idlen  - length of the vid/pid (bytes)
3910  *
3911  * Return Code: SD_SUCCESS - Indicates a match with the inquiry vid/pid
3912  *		SD_FAILURE - Indicates no match with the inquiry vid/pid
3913  */
3914 
3915 static int
sd_sdconf_id_match(struct sd_lun * un,char * id,int idlen)3916 sd_sdconf_id_match(struct sd_lun *un, char *id, int idlen)
3917 {
3918 	struct scsi_inquiry	*sd_inq;
3919 	int			rval = SD_SUCCESS;
3920 
3921 	ASSERT(un != NULL);
3922 	sd_inq = un->un_sd->sd_inq;
3923 	ASSERT(id != NULL);
3924 
3925 	/*
3926 	 * We use the inq_vid as a pointer to a buffer containing the
3927 	 * vid and pid and use the entire vid/pid length of the table
3928 	 * entry for the comparison. This works because the inq_pid
3929 	 * data member follows inq_vid in the scsi_inquiry structure.
3930 	 */
3931 	if (strncasecmp(sd_inq->inq_vid, id, idlen) != 0) {
3932 		/*
3933 		 * The user id string is compared to the inquiry vid/pid
3934 		 * using a case insensitive comparison and ignoring
3935 		 * multiple spaces.
3936 		 */
3937 		rval = sd_blank_cmp(un, id, idlen);
3938 		if (rval != SD_SUCCESS) {
3939 			/*
3940 			 * User id strings that start and end with a "*"
3941 			 * are a special case. These do not have a
3942 			 * specific vendor, and the product string can
3943 			 * appear anywhere in the 16 byte PID portion of
3944 			 * the inquiry data. This is a simple strstr()
3945 			 * type search for the user id in the inquiry data.
3946 			 */
3947 			if ((id[0] == '*') && (id[idlen - 1] == '*')) {
3948 				char	*pidptr = &id[1];
3949 				int	i;
3950 				int	j;
3951 				int	pidstrlen = idlen - 2;
3952 				j = sizeof (SD_INQUIRY(un)->inq_pid) -
3953 				    pidstrlen;
3954 
3955 				if (j < 0) {
3956 					return (SD_FAILURE);
3957 				}
3958 				for (i = 0; i < j; i++) {
3959 					if (bcmp(&SD_INQUIRY(un)->inq_pid[i],
3960 					    pidptr, pidstrlen) == 0) {
3961 						rval = SD_SUCCESS;
3962 						break;
3963 					}
3964 				}
3965 			}
3966 		}
3967 	}
3968 	return (rval);
3969 }
3970 
3971 
3972 /*
3973  *    Function: sd_blank_cmp
3974  *
3975  * Description: If the id string starts and ends with a space, treat
3976  *		multiple consecutive spaces as equivalent to a single
3977  *		space. For example, this causes a sd_disk_table entry
3978  *		of " NEC CDROM " to match a device's id string of
3979  *		"NEC       CDROM".
3980  *
3981  *		Note: The success exit condition for this routine is if
3982  *		the pointer to the table entry is '\0' and the cnt of
3983  *		the inquiry length is zero. This will happen if the inquiry
3984  *		string returned by the device is padded with spaces to be
3985  *		exactly 24 bytes in length (8 byte vid + 16 byte pid). The
3986  *		SCSI spec states that the inquiry string is to be padded with
3987  *		spaces.
3988  *
3989  *   Arguments: un - driver soft state (unit) structure
3990  *		id - table or config file vid/pid
3991  *		idlen  - length of the vid/pid (bytes)
3992  *
3993  * Return Code: SD_SUCCESS - Indicates a match with the inquiry vid/pid
3994  *		SD_FAILURE - Indicates no match with the inquiry vid/pid
3995  */
3996 
3997 static int
sd_blank_cmp(struct sd_lun * un,char * id,int idlen)3998 sd_blank_cmp(struct sd_lun *un, char *id, int idlen)
3999 {
4000 	char		*p1;
4001 	char		*p2;
4002 	int		cnt;
4003 	cnt = sizeof (SD_INQUIRY(un)->inq_vid) +
4004 	    sizeof (SD_INQUIRY(un)->inq_pid);
4005 
4006 	ASSERT(un != NULL);
4007 	p2 = un->un_sd->sd_inq->inq_vid;
4008 	ASSERT(id != NULL);
4009 	p1 = id;
4010 
4011 	if ((id[0] == ' ') && (id[idlen - 1] == ' ')) {
4012 		/*
4013 		 * Note: string p1 is terminated by a NUL but string p2
4014 		 * isn't.  The end of p2 is determined by cnt.
4015 		 */
4016 		for (;;) {
4017 			/* skip over any extra blanks in both strings */
4018 			while ((*p1 != '\0') && (*p1 == ' ')) {
4019 				p1++;
4020 			}
4021 			while ((cnt != 0) && (*p2 == ' ')) {
4022 				p2++;
4023 				cnt--;
4024 			}
4025 
4026 			/* compare the two strings */
4027 			if ((cnt == 0) ||
4028 			    (SD_TOUPPER(*p1) != SD_TOUPPER(*p2))) {
4029 				break;
4030 			}
4031 			while ((cnt > 0) &&
4032 			    (SD_TOUPPER(*p1) == SD_TOUPPER(*p2))) {
4033 				p1++;
4034 				p2++;
4035 				cnt--;
4036 			}
4037 		}
4038 	}
4039 
4040 	/* return SD_SUCCESS if both strings match */
4041 	return (((*p1 == '\0') && (cnt == 0)) ? SD_SUCCESS : SD_FAILURE);
4042 }
4043 
4044 
4045 /*
4046  *    Function: sd_chk_vers1_data
4047  *
4048  * Description: Verify the version 1 device properties provided by the
4049  *		user via the configuration file
4050  *
4051  *   Arguments: un	     - driver soft state (unit) structure
4052  *		flags	     - integer mask indicating properties to be set
4053  *		prop_list    - integer list of property values
4054  *		list_len     - number of the elements
4055  *
4056  * Return Code: SD_SUCCESS - Indicates the user provided data is valid
4057  *		SD_FAILURE - Indicates the user provided data is invalid
4058  */
4059 
4060 static int
sd_chk_vers1_data(struct sd_lun * un,int flags,int * prop_list,int list_len,char * dataname_ptr)4061 sd_chk_vers1_data(struct sd_lun *un, int flags, int *prop_list,
4062     int list_len, char *dataname_ptr)
4063 {
4064 	int i;
4065 	int mask = 1;
4066 	int index = 0;
4067 
4068 	ASSERT(un != NULL);
4069 
4070 	/* Check for a NULL property name and list */
4071 	if (dataname_ptr == NULL) {
4072 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
4073 		    "sd_chk_vers1_data: NULL data property name.");
4074 		return (SD_FAILURE);
4075 	}
4076 	if (prop_list == NULL) {
4077 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
4078 		    "sd_chk_vers1_data: %s NULL data property list.",
4079 		    dataname_ptr);
4080 		return (SD_FAILURE);
4081 	}
4082 
4083 	/* Display a warning if undefined bits are set in the flags */
4084 	if (flags & ~SD_CONF_BIT_MASK) {
4085 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
4086 		    "sd_chk_vers1_data: invalid bits 0x%x in data list %s. "
4087 		    "Properties not set.",
4088 		    (flags & ~SD_CONF_BIT_MASK), dataname_ptr);
4089 		return (SD_FAILURE);
4090 	}
4091 
4092 	/*
4093 	 * Verify the length of the list by identifying the highest bit set
4094 	 * in the flags and validating that the property list has a length
4095 	 * up to the index of this bit.
4096 	 */
4097 	for (i = 0; i < SD_CONF_MAX_ITEMS; i++) {
4098 		if (flags & mask) {
4099 			index++;
4100 		}
4101 		mask = 1 << i;
4102 	}
4103 	if (list_len < (index + 2)) {
4104 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
4105 		    "sd_chk_vers1_data: "
4106 		    "Data property list %s size is incorrect. "
4107 		    "Properties not set.", dataname_ptr);
4108 		scsi_log(SD_DEVINFO(un), sd_label, CE_CONT, "Size expected: "
4109 		    "version + 1 flagword + %d properties", SD_CONF_MAX_ITEMS);
4110 		return (SD_FAILURE);
4111 	}
4112 	return (SD_SUCCESS);
4113 }
4114 
4115 
4116 /*
4117  *    Function: sd_set_vers1_properties
4118  *
4119  * Description: Set version 1 device properties based on a property list
4120  *		retrieved from the driver configuration file or static
4121  *		configuration table. Version 1 properties have the format:
4122  *
4123  *	<data-property-name>:=<version>,<flags>,<prop0>,<prop1>,.....<propN>
4124  *
4125  *		where the prop0 value will be used to set prop0 if bit0
4126  *		is set in the flags
4127  *
4128  *   Arguments: un	     - driver soft state (unit) structure
4129  *		flags	     - integer mask indicating properties to be set
4130  *		prop_list    - integer list of property values
4131  */
4132 
4133 static void
sd_set_vers1_properties(struct sd_lun * un,int flags,sd_tunables * prop_list)4134 sd_set_vers1_properties(struct sd_lun *un, int flags, sd_tunables *prop_list)
4135 {
4136 	ASSERT(un != NULL);
4137 
4138 	/*
4139 	 * Set the flag to indicate cache is to be disabled. An attempt
4140 	 * to disable the cache via sd_cache_control() will be made
4141 	 * later during attach once the basic initialization is complete.
4142 	 */
4143 	if (flags & SD_CONF_BSET_NOCACHE) {
4144 		un->un_f_opt_disable_cache = TRUE;
4145 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4146 		    "sd_set_vers1_properties: caching disabled flag set\n");
4147 	}
4148 
4149 	/* CD-specific configuration parameters */
4150 	if (flags & SD_CONF_BSET_PLAYMSF_BCD) {
4151 		un->un_f_cfg_playmsf_bcd = TRUE;
4152 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4153 		    "sd_set_vers1_properties: playmsf_bcd set\n");
4154 	}
4155 	if (flags & SD_CONF_BSET_READSUB_BCD) {
4156 		un->un_f_cfg_readsub_bcd = TRUE;
4157 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4158 		    "sd_set_vers1_properties: readsub_bcd set\n");
4159 	}
4160 	if (flags & SD_CONF_BSET_READ_TOC_TRK_BCD) {
4161 		un->un_f_cfg_read_toc_trk_bcd = TRUE;
4162 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4163 		    "sd_set_vers1_properties: read_toc_trk_bcd set\n");
4164 	}
4165 	if (flags & SD_CONF_BSET_READ_TOC_ADDR_BCD) {
4166 		un->un_f_cfg_read_toc_addr_bcd = TRUE;
4167 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4168 		    "sd_set_vers1_properties: read_toc_addr_bcd set\n");
4169 	}
4170 	if (flags & SD_CONF_BSET_NO_READ_HEADER) {
4171 		un->un_f_cfg_no_read_header = TRUE;
4172 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4173 		    "sd_set_vers1_properties: no_read_header set\n");
4174 	}
4175 	if (flags & SD_CONF_BSET_READ_CD_XD4) {
4176 		un->un_f_cfg_read_cd_xd4 = TRUE;
4177 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4178 		    "sd_set_vers1_properties: read_cd_xd4 set\n");
4179 	}
4180 
4181 	/* Support for devices which do not have valid/unique serial numbers */
4182 	if (flags & SD_CONF_BSET_FAB_DEVID) {
4183 		un->un_f_opt_fab_devid = TRUE;
4184 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4185 		    "sd_set_vers1_properties: fab_devid bit set\n");
4186 	}
4187 
4188 	/* Support for user throttle configuration */
4189 	if (flags & SD_CONF_BSET_THROTTLE) {
4190 		ASSERT(prop_list != NULL);
4191 		un->un_saved_throttle = un->un_throttle =
4192 		    prop_list->sdt_throttle;
4193 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4194 		    "sd_set_vers1_properties: throttle set to %d\n",
4195 		    prop_list->sdt_throttle);
4196 	}
4197 
4198 	/* Set the per disk retry count according to the conf file or table. */
4199 	if (flags & SD_CONF_BSET_NRR_COUNT) {
4200 		ASSERT(prop_list != NULL);
4201 		if (prop_list->sdt_not_rdy_retries) {
4202 			un->un_notready_retry_count =
4203 			    prop_list->sdt_not_rdy_retries;
4204 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4205 			    "sd_set_vers1_properties: not ready retry count"
4206 			    " set to %d\n", un->un_notready_retry_count);
4207 		}
4208 	}
4209 
4210 	/* The controller type is reported for generic disk driver ioctls */
4211 	if (flags & SD_CONF_BSET_CTYPE) {
4212 		ASSERT(prop_list != NULL);
4213 		switch (prop_list->sdt_ctype) {
4214 		case CTYPE_CDROM:
4215 			un->un_ctype = prop_list->sdt_ctype;
4216 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4217 			    "sd_set_vers1_properties: ctype set to "
4218 			    "CTYPE_CDROM\n");
4219 			break;
4220 		case CTYPE_CCS:
4221 			un->un_ctype = prop_list->sdt_ctype;
4222 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4223 			    "sd_set_vers1_properties: ctype set to "
4224 			    "CTYPE_CCS\n");
4225 			break;
4226 		case CTYPE_ROD:		/* RW optical */
4227 			un->un_ctype = prop_list->sdt_ctype;
4228 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4229 			    "sd_set_vers1_properties: ctype set to "
4230 			    "CTYPE_ROD\n");
4231 			break;
4232 		default:
4233 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
4234 			    "sd_set_vers1_properties: Could not set "
4235 			    "invalid ctype value (%d)",
4236 			    prop_list->sdt_ctype);
4237 		}
4238 	}
4239 
4240 	/* Purple failover timeout */
4241 	if (flags & SD_CONF_BSET_BSY_RETRY_COUNT) {
4242 		ASSERT(prop_list != NULL);
4243 		un->un_busy_retry_count =
4244 		    prop_list->sdt_busy_retries;
4245 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4246 		    "sd_set_vers1_properties: "
4247 		    "busy retry count set to %d\n",
4248 		    un->un_busy_retry_count);
4249 	}
4250 
4251 	/* Purple reset retry count */
4252 	if (flags & SD_CONF_BSET_RST_RETRIES) {
4253 		ASSERT(prop_list != NULL);
4254 		un->un_reset_retry_count =
4255 		    prop_list->sdt_reset_retries;
4256 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4257 		    "sd_set_vers1_properties: "
4258 		    "reset retry count set to %d\n",
4259 		    un->un_reset_retry_count);
4260 	}
4261 
4262 	/* Purple reservation release timeout */
4263 	if (flags & SD_CONF_BSET_RSV_REL_TIME) {
4264 		ASSERT(prop_list != NULL);
4265 		un->un_reserve_release_time =
4266 		    prop_list->sdt_reserv_rel_time;
4267 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4268 		    "sd_set_vers1_properties: "
4269 		    "reservation release timeout set to %d\n",
4270 		    un->un_reserve_release_time);
4271 	}
4272 
4273 	/*
4274 	 * Driver flag telling the driver to verify that no commands are pending
4275 	 * for a device before issuing a Test Unit Ready. This is a workaround
4276 	 * for a firmware bug in some Seagate eliteI drives.
4277 	 */
4278 	if (flags & SD_CONF_BSET_TUR_CHECK) {
4279 		un->un_f_cfg_tur_check = TRUE;
4280 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4281 		    "sd_set_vers1_properties: tur queue check set\n");
4282 	}
4283 
4284 	if (flags & SD_CONF_BSET_MIN_THROTTLE) {
4285 		un->un_min_throttle = prop_list->sdt_min_throttle;
4286 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4287 		    "sd_set_vers1_properties: min throttle set to %d\n",
4288 		    un->un_min_throttle);
4289 	}
4290 
4291 	if (flags & SD_CONF_BSET_DISKSORT_DISABLED) {
4292 		un->un_f_disksort_disabled =
4293 		    (prop_list->sdt_disk_sort_dis != 0) ?
4294 		    TRUE : FALSE;
4295 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4296 		    "sd_set_vers1_properties: disksort disabled "
4297 		    "flag set to %d\n",
4298 		    prop_list->sdt_disk_sort_dis);
4299 	}
4300 
4301 	if (flags & SD_CONF_BSET_LUN_RESET_ENABLED) {
4302 		un->un_f_lun_reset_enabled =
4303 		    (prop_list->sdt_lun_reset_enable != 0) ?
4304 		    TRUE : FALSE;
4305 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4306 		    "sd_set_vers1_properties: lun reset enabled "
4307 		    "flag set to %d\n",
4308 		    prop_list->sdt_lun_reset_enable);
4309 	}
4310 
4311 	if (flags & SD_CONF_BSET_CACHE_IS_NV) {
4312 		un->un_f_suppress_cache_flush =
4313 		    (prop_list->sdt_suppress_cache_flush != 0) ?
4314 		    TRUE : FALSE;
4315 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4316 		    "sd_set_vers1_properties: suppress_cache_flush "
4317 		    "flag set to %d\n",
4318 		    prop_list->sdt_suppress_cache_flush);
4319 	}
4320 
4321 	if (flags & SD_CONF_BSET_PC_DISABLED) {
4322 		un->un_f_power_condition_disabled =
4323 		    (prop_list->sdt_power_condition_dis != 0) ?
4324 		    TRUE : FALSE;
4325 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4326 		    "sd_set_vers1_properties: power_condition_disabled "
4327 		    "flag set to %d\n",
4328 		    prop_list->sdt_power_condition_dis);
4329 	}
4330 
4331 	/*
4332 	 * Validate the throttle values.
4333 	 * If any of the numbers are invalid, set everything to defaults.
4334 	 */
4335 	if ((un->un_throttle < SD_LOWEST_VALID_THROTTLE) ||
4336 	    (un->un_min_throttle < SD_LOWEST_VALID_THROTTLE) ||
4337 	    (un->un_min_throttle > un->un_throttle)) {
4338 		un->un_saved_throttle = un->un_throttle = sd_max_throttle;
4339 		un->un_min_throttle = sd_min_throttle;
4340 	}
4341 }
4342 
4343 /*
4344  *    Function: sd_get_physical_geometry
4345  *
4346  * Description: Retrieve the MODE SENSE page 3 (Format Device Page) and
4347  *		MODE SENSE page 4 (Rigid Disk Drive Geometry Page) from the
4348  *		target, and use this information to initialize the physical
4349  *		geometry cache specified by pgeom_p.
4350  *
4351  *		MODE SENSE is an optional command, so failure in this case
4352  *		does not necessarily denote an error. We want to use the
4353  *		MODE SENSE commands to derive the physical geometry of the
4354  *		device, but if either command fails, the logical geometry is
4355  *		used as the fallback for disk label geometry in cmlb.
4356  *
4357  *		This requires that un->un_blockcount and un->un_tgt_blocksize
4358  *		have already been initialized for the current target and
4359  *		that the current values be passed as args so that we don't
4360  *		end up ever trying to use -1 as a valid value. This could
4361  *		happen if either value is reset while we're not holding
4362  *		the mutex.
4363  *
4364  *   Arguments: un - driver soft state (unit) structure
4365  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
4366  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
4367  *			to use the USCSI "direct" chain and bypass the normal
4368  *			command waitq.
4369  *
4370  *     Context: Kernel thread only (can sleep).
4371  */
4372 
4373 static int
sd_get_physical_geometry(struct sd_lun * un,cmlb_geom_t * pgeom_p,diskaddr_t capacity,int lbasize,int path_flag)4374 sd_get_physical_geometry(struct sd_lun *un, cmlb_geom_t *pgeom_p,
4375     diskaddr_t capacity, int lbasize, int path_flag)
4376 {
4377 	struct	mode_format	*page3p;
4378 	struct	mode_geometry	*page4p;
4379 	struct	mode_header	*headerp;
4380 	int	sector_size;
4381 	int	nsect;
4382 	int	nhead;
4383 	int	ncyl;
4384 	int	intrlv;
4385 	int	spc;
4386 	diskaddr_t	modesense_capacity;
4387 	int	rpm;
4388 	int	bd_len;
4389 	int	mode_header_length;
4390 	uchar_t	*p3bufp;
4391 	uchar_t	*p4bufp;
4392 	int	cdbsize;
4393 	int	ret = EIO;
4394 	sd_ssc_t *ssc;
4395 	int	status;
4396 
4397 	ASSERT(un != NULL);
4398 
4399 	if (lbasize == 0) {
4400 		if (ISCD(un)) {
4401 			lbasize = 2048;
4402 		} else {
4403 			lbasize = un->un_sys_blocksize;
4404 		}
4405 	}
4406 	pgeom_p->g_secsize = (unsigned short)lbasize;
4407 
4408 	/*
4409 	 * If the unit is a cd/dvd drive MODE SENSE page three
4410 	 * and MODE SENSE page four are reserved (see SBC spec
4411 	 * and MMC spec). To prevent soft errors just return
4412 	 * using the default LBA size.
4413 	 *
4414 	 * Since SATA MODE SENSE function (sata_txlt_mode_sense()) does not
4415 	 * implement support for mode pages 3 and 4 return here to prevent
4416 	 * illegal requests on SATA drives.
4417 	 *
4418 	 * These pages are also reserved in SBC-2 and later.  We assume SBC-2
4419 	 * or later for a direct-attached block device if the SCSI version is
4420 	 * at least SPC-3.
4421 	 */
4422 
4423 	if (ISCD(un) ||
4424 	    un->un_interconnect_type == SD_INTERCONNECT_SATA ||
4425 	    (un->un_ctype == CTYPE_CCS && SD_INQUIRY(un)->inq_ansi >= 5))
4426 		return (ret);
4427 
4428 	cdbsize = (un->un_f_cfg_is_atapi == TRUE) ? CDB_GROUP2 : CDB_GROUP0;
4429 
4430 	/*
4431 	 * Retrieve MODE SENSE page 3 - Format Device Page
4432 	 */
4433 	p3bufp = kmem_zalloc(SD_MODE_SENSE_PAGE3_LENGTH, KM_SLEEP);
4434 	ssc = sd_ssc_init(un);
4435 	status = sd_send_scsi_MODE_SENSE(ssc, cdbsize, p3bufp,
4436 	    SD_MODE_SENSE_PAGE3_LENGTH, SD_MODE_SENSE_PAGE3_CODE, path_flag);
4437 	if (status != 0) {
4438 		SD_ERROR(SD_LOG_COMMON, un,
4439 		    "sd_get_physical_geometry: mode sense page 3 failed\n");
4440 		goto page3_exit;
4441 	}
4442 
4443 	/*
4444 	 * Determine size of Block Descriptors in order to locate the mode
4445 	 * page data.  ATAPI devices return 0, SCSI devices should return
4446 	 * MODE_BLK_DESC_LENGTH.
4447 	 */
4448 	headerp = (struct mode_header *)p3bufp;
4449 	if (un->un_f_cfg_is_atapi == TRUE) {
4450 		struct mode_header_grp2 *mhp =
4451 		    (struct mode_header_grp2 *)headerp;
4452 		mode_header_length = MODE_HEADER_LENGTH_GRP2;
4453 		bd_len = (mhp->bdesc_length_hi << 8) | mhp->bdesc_length_lo;
4454 	} else {
4455 		mode_header_length = MODE_HEADER_LENGTH;
4456 		bd_len = ((struct mode_header *)headerp)->bdesc_length;
4457 	}
4458 
4459 	if (bd_len > MODE_BLK_DESC_LENGTH) {
4460 		sd_ssc_set_info(ssc, SSC_FLAGS_INVALID_DATA, SD_LOG_COMMON,
4461 		    "sd_get_physical_geometry: received unexpected bd_len "
4462 		    "of %d, page3\n", bd_len);
4463 		status = EIO;
4464 		goto page3_exit;
4465 	}
4466 
4467 	page3p = (struct mode_format *)
4468 	    ((caddr_t)headerp + mode_header_length + bd_len);
4469 
4470 	if (page3p->mode_page.code != SD_MODE_SENSE_PAGE3_CODE) {
4471 		sd_ssc_set_info(ssc, SSC_FLAGS_INVALID_DATA, SD_LOG_COMMON,
4472 		    "sd_get_physical_geometry: mode sense pg3 code mismatch "
4473 		    "%d\n", page3p->mode_page.code);
4474 		status = EIO;
4475 		goto page3_exit;
4476 	}
4477 
4478 	/*
4479 	 * Use this physical geometry data only if BOTH MODE SENSE commands
4480 	 * complete successfully; otherwise, revert to the logical geometry.
4481 	 * So, we need to save everything in temporary variables.
4482 	 */
4483 	sector_size = BE_16(page3p->data_bytes_sect);
4484 
4485 	/*
4486 	 * 1243403: The NEC D38x7 drives do not support MODE SENSE sector size
4487 	 */
4488 	if (sector_size == 0) {
4489 		sector_size = un->un_sys_blocksize;
4490 	} else {
4491 		sector_size &= ~(un->un_sys_blocksize - 1);
4492 	}
4493 
4494 	nsect  = BE_16(page3p->sect_track);
4495 	intrlv = BE_16(page3p->interleave);
4496 
4497 	SD_INFO(SD_LOG_COMMON, un,
4498 	    "sd_get_physical_geometry: Format Parameters (page 3)\n");
4499 	SD_INFO(SD_LOG_COMMON, un,
4500 	    "   mode page: %d; nsect: %d; sector size: %d;\n",
4501 	    page3p->mode_page.code, nsect, sector_size);
4502 	SD_INFO(SD_LOG_COMMON, un,
4503 	    "   interleave: %d; track skew: %d; cylinder skew: %d;\n", intrlv,
4504 	    BE_16(page3p->track_skew),
4505 	    BE_16(page3p->cylinder_skew));
4506 
4507 	sd_ssc_assessment(ssc, SD_FMT_STANDARD);
4508 
4509 	/*
4510 	 * Retrieve MODE SENSE page 4 - Rigid Disk Drive Geometry Page
4511 	 */
4512 	p4bufp = kmem_zalloc(SD_MODE_SENSE_PAGE4_LENGTH, KM_SLEEP);
4513 	status = sd_send_scsi_MODE_SENSE(ssc, cdbsize, p4bufp,
4514 	    SD_MODE_SENSE_PAGE4_LENGTH, SD_MODE_SENSE_PAGE4_CODE, path_flag);
4515 	if (status != 0) {
4516 		SD_ERROR(SD_LOG_COMMON, un,
4517 		    "sd_get_physical_geometry: mode sense page 4 failed\n");
4518 		goto page4_exit;
4519 	}
4520 
4521 	/*
4522 	 * Determine size of Block Descriptors in order to locate the mode
4523 	 * page data.  ATAPI devices return 0, SCSI devices should return
4524 	 * MODE_BLK_DESC_LENGTH.
4525 	 */
4526 	headerp = (struct mode_header *)p4bufp;
4527 	if (un->un_f_cfg_is_atapi == TRUE) {
4528 		struct mode_header_grp2 *mhp =
4529 		    (struct mode_header_grp2 *)headerp;
4530 		bd_len = (mhp->bdesc_length_hi << 8) | mhp->bdesc_length_lo;
4531 	} else {
4532 		bd_len = ((struct mode_header *)headerp)->bdesc_length;
4533 	}
4534 
4535 	if (bd_len > MODE_BLK_DESC_LENGTH) {
4536 		sd_ssc_set_info(ssc, SSC_FLAGS_INVALID_DATA, SD_LOG_COMMON,
4537 		    "sd_get_physical_geometry: received unexpected bd_len of "
4538 		    "%d, page4\n", bd_len);
4539 		status = EIO;
4540 		goto page4_exit;
4541 	}
4542 
4543 	page4p = (struct mode_geometry *)
4544 	    ((caddr_t)headerp + mode_header_length + bd_len);
4545 
4546 	if (page4p->mode_page.code != SD_MODE_SENSE_PAGE4_CODE) {
4547 		sd_ssc_set_info(ssc, SSC_FLAGS_INVALID_DATA, SD_LOG_COMMON,
4548 		    "sd_get_physical_geometry: mode sense pg4 code mismatch "
4549 		    "%d\n", page4p->mode_page.code);
4550 		status = EIO;
4551 		goto page4_exit;
4552 	}
4553 
4554 	/*
4555 	 * Stash the data now, after we know that both commands completed.
4556 	 */
4557 
4558 
4559 	nhead = (int)page4p->heads;	/* uchar, so no conversion needed */
4560 	spc   = nhead * nsect;
4561 	ncyl  = (page4p->cyl_ub << 16) + (page4p->cyl_mb << 8) + page4p->cyl_lb;
4562 	rpm   = BE_16(page4p->rpm);
4563 
4564 	modesense_capacity = spc * ncyl;
4565 
4566 	SD_INFO(SD_LOG_COMMON, un,
4567 	    "sd_get_physical_geometry: Geometry Parameters (page 4)\n");
4568 	SD_INFO(SD_LOG_COMMON, un,
4569 	    "   cylinders: %d; heads: %d; rpm: %d;\n", ncyl, nhead, rpm);
4570 	SD_INFO(SD_LOG_COMMON, un,
4571 	    "   computed capacity(h*s*c): %d;\n", modesense_capacity);
4572 	SD_INFO(SD_LOG_COMMON, un, "   pgeom_p: %p; read cap: %d\n",
4573 	    (void *)pgeom_p, capacity);
4574 
4575 	/*
4576 	 * Compensate if the drive's geometry is not rectangular, i.e.,
4577 	 * the product of C * H * S returned by MODE SENSE >= that returned
4578 	 * by read capacity. This is an idiosyncrasy of the original x86
4579 	 * disk subsystem.
4580 	 */
4581 	if (modesense_capacity >= capacity) {
4582 		SD_INFO(SD_LOG_COMMON, un,
4583 		    "sd_get_physical_geometry: adjusting acyl; "
4584 		    "old: %d; new: %d\n", pgeom_p->g_acyl,
4585 		    (modesense_capacity - capacity + spc - 1) / spc);
4586 		if (sector_size != 0) {
4587 			/* 1243403: NEC D38x7 drives don't support sec size */
4588 			pgeom_p->g_secsize = (unsigned short)sector_size;
4589 		}
4590 		pgeom_p->g_nsect    = (unsigned short)nsect;
4591 		pgeom_p->g_nhead    = (unsigned short)nhead;
4592 		pgeom_p->g_capacity = capacity;
4593 		pgeom_p->g_acyl	    =
4594 		    (modesense_capacity - pgeom_p->g_capacity + spc - 1) / spc;
4595 		pgeom_p->g_ncyl	    = ncyl - pgeom_p->g_acyl;
4596 	}
4597 
4598 	pgeom_p->g_rpm    = (unsigned short)rpm;
4599 	pgeom_p->g_intrlv = (unsigned short)intrlv;
4600 	ret = 0;
4601 
4602 	SD_INFO(SD_LOG_COMMON, un,
4603 	    "sd_get_physical_geometry: mode sense geometry:\n");
4604 	SD_INFO(SD_LOG_COMMON, un,
4605 	    "   nsect: %d; sector size: %d; interlv: %d\n",
4606 	    nsect, sector_size, intrlv);
4607 	SD_INFO(SD_LOG_COMMON, un,
4608 	    "   nhead: %d; ncyl: %d; rpm: %d; capacity(ms): %d\n",
4609 	    nhead, ncyl, rpm, modesense_capacity);
4610 	SD_INFO(SD_LOG_COMMON, un,
4611 	    "sd_get_physical_geometry: (cached)\n");
4612 	SD_INFO(SD_LOG_COMMON, un,
4613 	    "   ncyl: %ld; acyl: %d; nhead: %d; nsect: %d\n",
4614 	    pgeom_p->g_ncyl,  pgeom_p->g_acyl,
4615 	    pgeom_p->g_nhead, pgeom_p->g_nsect);
4616 	SD_INFO(SD_LOG_COMMON, un,
4617 	    "   lbasize: %d; capacity: %ld; intrlv: %d; rpm: %d\n",
4618 	    pgeom_p->g_secsize, pgeom_p->g_capacity,
4619 	    pgeom_p->g_intrlv, pgeom_p->g_rpm);
4620 	sd_ssc_assessment(ssc, SD_FMT_STANDARD);
4621 
4622 page4_exit:
4623 	kmem_free(p4bufp, SD_MODE_SENSE_PAGE4_LENGTH);
4624 
4625 page3_exit:
4626 	kmem_free(p3bufp, SD_MODE_SENSE_PAGE3_LENGTH);
4627 
4628 	if (status != 0) {
4629 		if (status == EIO) {
4630 			/*
4631 			 * Some disks do not support mode sense(6), we
4632 			 * should ignore this kind of error(sense key is
4633 			 * 0x5 - illegal request).
4634 			 */
4635 			uint8_t *sensep;
4636 			int senlen;
4637 
4638 			sensep = (uint8_t *)ssc->ssc_uscsi_cmd->uscsi_rqbuf;
4639 			senlen = (int)(ssc->ssc_uscsi_cmd->uscsi_rqlen -
4640 			    ssc->ssc_uscsi_cmd->uscsi_rqresid);
4641 
4642 			if (senlen > 0 &&
4643 			    scsi_sense_key(sensep) == KEY_ILLEGAL_REQUEST) {
4644 				sd_ssc_assessment(ssc,
4645 				    SD_FMT_IGNORE_COMPROMISE);
4646 			} else {
4647 				sd_ssc_assessment(ssc, SD_FMT_STATUS_CHECK);
4648 			}
4649 		} else {
4650 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
4651 		}
4652 	}
4653 	sd_ssc_fini(ssc);
4654 	return (ret);
4655 }
4656 
4657 /*
4658  *    Function: sd_get_virtual_geometry
4659  *
4660  * Description: Ask the controller to tell us about the target device.
4661  *
4662  *   Arguments: un - pointer to softstate
4663  *		capacity - disk capacity in #blocks
4664  *		lbasize - disk block size in bytes
4665  *
4666  *     Context: Kernel thread only
4667  */
4668 
4669 static int
sd_get_virtual_geometry(struct sd_lun * un,cmlb_geom_t * lgeom_p,diskaddr_t capacity,int lbasize)4670 sd_get_virtual_geometry(struct sd_lun *un, cmlb_geom_t *lgeom_p,
4671     diskaddr_t capacity, int lbasize)
4672 {
4673 	uint_t	geombuf;
4674 	int	spc;
4675 
4676 	ASSERT(un != NULL);
4677 
4678 	/* Set sector size, and total number of sectors */
4679 	(void) scsi_ifsetcap(SD_ADDRESS(un), "sector-size",   lbasize,  1);
4680 	(void) scsi_ifsetcap(SD_ADDRESS(un), "total-sectors", capacity, 1);
4681 
4682 	/* Let the HBA tell us its geometry */
4683 	geombuf = (uint_t)scsi_ifgetcap(SD_ADDRESS(un), "geometry", 1);
4684 
4685 	/* A value of -1 indicates an undefined "geometry" property */
4686 	if (geombuf == (-1)) {
4687 		return (EINVAL);
4688 	}
4689 
4690 	/* Initialize the logical geometry cache. */
4691 	lgeom_p->g_nhead   = (geombuf >> 16) & 0xffff;
4692 	lgeom_p->g_nsect   = geombuf & 0xffff;
4693 	lgeom_p->g_secsize = un->un_sys_blocksize;
4694 
4695 	spc = lgeom_p->g_nhead * lgeom_p->g_nsect;
4696 
4697 	/*
4698 	 * Note: The driver originally converted the capacity value from
4699 	 * target blocks to system blocks. However, the capacity value passed
4700 	 * to this routine is already in terms of system blocks (this scaling
4701 	 * is done when the READ CAPACITY command is issued and processed).
4702 	 * This 'error' may have gone undetected because the usage of g_ncyl
4703 	 * (which is based upon g_capacity) is very limited within the driver
4704 	 */
4705 	lgeom_p->g_capacity = capacity;
4706 
4707 	/*
4708 	 * Set ncyl to zero if the hba returned a zero nhead or nsect value. The
4709 	 * hba may return zero values if the device has been removed.
4710 	 */
4711 	if (spc == 0) {
4712 		lgeom_p->g_ncyl = 0;
4713 	} else {
4714 		lgeom_p->g_ncyl = lgeom_p->g_capacity / spc;
4715 	}
4716 	lgeom_p->g_acyl = 0;
4717 
4718 	SD_INFO(SD_LOG_COMMON, un, "sd_get_virtual_geometry: (cached)\n");
4719 	return (0);
4720 
4721 }
4722 /*
4723  *    Function: sd_update_block_info
4724  *
4725  * Description: Calculate a byte count to sector count bitshift value
4726  *		from sector size.
4727  *
4728  *   Arguments: un: unit struct.
4729  *		lbasize: new target sector size
4730  *		capacity: new target capacity, ie. block count
4731  *
4732  *     Context: Kernel thread context
4733  */
4734 
4735 static void
sd_update_block_info(struct sd_lun * un,uint32_t lbasize,uint64_t capacity)4736 sd_update_block_info(struct sd_lun *un, uint32_t lbasize, uint64_t capacity)
4737 {
4738 	if (lbasize != 0) {
4739 		un->un_tgt_blocksize = lbasize;
4740 		un->un_f_tgt_blocksize_is_valid = TRUE;
4741 		if (!un->un_f_has_removable_media) {
4742 			un->un_sys_blocksize = lbasize;
4743 		}
4744 	}
4745 
4746 	if (capacity != 0) {
4747 		un->un_blockcount		= capacity;
4748 		un->un_f_blockcount_is_valid	= TRUE;
4749 
4750 		/*
4751 		 * The capacity has changed so update the errstats.
4752 		 */
4753 		if (un->un_errstats != NULL) {
4754 			struct sd_errstats *stp;
4755 
4756 			capacity *= un->un_sys_blocksize;
4757 			stp = (struct sd_errstats *)un->un_errstats->ks_data;
4758 			if (stp->sd_capacity.value.ui64 < capacity)
4759 				stp->sd_capacity.value.ui64 = capacity;
4760 		}
4761 	}
4762 }
4763 
4764 /*
4765  * Parses the SCSI Block Limits VPD page (0xB0). It's legal to pass NULL for
4766  * vpd_pg, in which case all the block limits will be reset to the defaults.
4767  */
4768 static void
sd_parse_blk_limits_vpd(struct sd_lun * un,uchar_t * vpd_pg)4769 sd_parse_blk_limits_vpd(struct sd_lun *un, uchar_t *vpd_pg)
4770 {
4771 	sd_blk_limits_t *lim = &un->un_blk_lim;
4772 	unsigned pg_len;
4773 
4774 	if (vpd_pg != NULL)
4775 		pg_len = BE_IN16(&vpd_pg[2]);
4776 	else
4777 		pg_len = 0;
4778 
4779 	/* Block Limits VPD can be 16 bytes or 64 bytes long - support both */
4780 	if (pg_len >= 0x10) {
4781 		lim->lim_opt_xfer_len_gran = BE_IN16(&vpd_pg[6]);
4782 		lim->lim_max_xfer_len = BE_IN32(&vpd_pg[8]);
4783 		lim->lim_opt_xfer_len = BE_IN32(&vpd_pg[12]);
4784 
4785 		/* Zero means not reported, so use "unlimited" */
4786 		if (lim->lim_max_xfer_len == 0)
4787 			lim->lim_max_xfer_len = UINT32_MAX;
4788 		if (lim->lim_opt_xfer_len == 0)
4789 			lim->lim_opt_xfer_len = UINT32_MAX;
4790 	} else {
4791 		lim->lim_opt_xfer_len_gran = 0;
4792 		lim->lim_max_xfer_len = UINT32_MAX;
4793 		lim->lim_opt_xfer_len = UINT32_MAX;
4794 	}
4795 	if (pg_len >= 0x3c) {
4796 		lim->lim_max_pfetch_len = BE_IN32(&vpd_pg[16]);
4797 		/*
4798 		 * A zero in either of the following two fields indicates lack
4799 		 * of UNMAP support.
4800 		 */
4801 		lim->lim_max_unmap_lba_cnt = BE_IN32(&vpd_pg[20]);
4802 		lim->lim_max_unmap_descr_cnt = BE_IN32(&vpd_pg[24]);
4803 		lim->lim_opt_unmap_gran = BE_IN32(&vpd_pg[28]);
4804 		if ((vpd_pg[32] >> 7) == 1) {
4805 			lim->lim_unmap_gran_align =
4806 			    ((vpd_pg[32] & 0x7f) << 24) | (vpd_pg[33] << 16) |
4807 			    (vpd_pg[34] << 8) | vpd_pg[35];
4808 		} else {
4809 			lim->lim_unmap_gran_align = 0;
4810 		}
4811 		lim->lim_max_write_same_len = BE_IN64(&vpd_pg[36]);
4812 	} else {
4813 		lim->lim_max_pfetch_len = UINT32_MAX;
4814 		lim->lim_max_unmap_lba_cnt = UINT32_MAX;
4815 		lim->lim_max_unmap_descr_cnt = SD_UNMAP_MAX_DESCR;
4816 		lim->lim_opt_unmap_gran = 0;
4817 		lim->lim_unmap_gran_align = 0;
4818 		lim->lim_max_write_same_len = UINT64_MAX;
4819 	}
4820 }
4821 
4822 /*
4823  * Collects VPD page B0 data if available (block limits). If the data is
4824  * not available or querying the device failed, we revert to the defaults.
4825  */
4826 static void
sd_setup_blk_limits(sd_ssc_t * ssc)4827 sd_setup_blk_limits(sd_ssc_t *ssc)
4828 {
4829 	struct sd_lun	*un		= ssc->ssc_un;
4830 	uchar_t		*inqB0		= NULL;
4831 	size_t		inqB0_resid	= 0;
4832 	int		rval;
4833 
4834 	if (un->un_vpd_page_mask & SD_VPD_BLK_LIMITS_PG) {
4835 		inqB0 = kmem_zalloc(MAX_INQUIRY_SIZE, KM_SLEEP);
4836 		rval = sd_send_scsi_INQUIRY(ssc, inqB0, MAX_INQUIRY_SIZE, 0x01,
4837 		    0xB0, &inqB0_resid);
4838 		if (rval != 0) {
4839 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
4840 			kmem_free(inqB0, MAX_INQUIRY_SIZE);
4841 			inqB0 = NULL;
4842 		}
4843 	}
4844 	/* passing NULL inqB0 will reset to defaults */
4845 	sd_parse_blk_limits_vpd(ssc->ssc_un, inqB0);
4846 	if (inqB0)
4847 		kmem_free(inqB0, MAX_INQUIRY_SIZE);
4848 }
4849 
4850 /*
4851  *    Function: sd_register_devid
4852  *
4853  * Description: This routine will obtain the device id information from the
4854  *		target, obtain the serial number, and register the device
4855  *		id with the ddi framework.
4856  *
4857  *   Arguments: devi - the system's dev_info_t for the device.
4858  *		un - driver soft state (unit) structure
4859  *		reservation_flag - indicates if a reservation conflict
4860  *		occurred during attach
4861  *
4862  *     Context: Kernel Thread
4863  */
4864 static void
sd_register_devid(sd_ssc_t * ssc,dev_info_t * devi,int reservation_flag)4865 sd_register_devid(sd_ssc_t *ssc, dev_info_t *devi, int reservation_flag)
4866 {
4867 	int		rval		= 0;
4868 	uchar_t		*inq80		= NULL;
4869 	size_t		inq80_len	= MAX_INQUIRY_SIZE;
4870 	size_t		inq80_resid	= 0;
4871 	uchar_t		*inq83		= NULL;
4872 	size_t		inq83_len	= MAX_INQUIRY_SIZE;
4873 	size_t		inq83_resid	= 0;
4874 	int		dlen, len;
4875 	char		*sn;
4876 	struct sd_lun	*un;
4877 
4878 	ASSERT(ssc != NULL);
4879 	un = ssc->ssc_un;
4880 	ASSERT(un != NULL);
4881 	ASSERT(mutex_owned(SD_MUTEX(un)));
4882 	ASSERT((SD_DEVINFO(un)) == devi);
4883 
4884 
4885 	/*
4886 	 * We check the availability of the World Wide Name (0x83) and Unit
4887 	 * Serial Number (0x80) pages in sd_check_vpd_page_support(), and using
4888 	 * un_vpd_page_mask from them, we decide which way to get the WWN.  If
4889 	 * 0x83 is available, that is the best choice.  Our next choice is
4890 	 * 0x80.  If neither are available, we munge the devid from the device
4891 	 * vid/pid/serial # for Sun qualified disks, or use the ddi framework
4892 	 * to fabricate a devid for non-Sun qualified disks.
4893 	 */
4894 	if (sd_check_vpd_page_support(ssc) == 0) {
4895 		/* collect page 80 data if available */
4896 		if (un->un_vpd_page_mask & SD_VPD_UNIT_SERIAL_PG) {
4897 
4898 			mutex_exit(SD_MUTEX(un));
4899 			inq80 = kmem_zalloc(inq80_len, KM_SLEEP);
4900 
4901 			rval = sd_send_scsi_INQUIRY(ssc, inq80, inq80_len,
4902 			    0x01, 0x80, &inq80_resid);
4903 
4904 			if (rval != 0) {
4905 				sd_ssc_assessment(ssc, SD_FMT_IGNORE);
4906 				kmem_free(inq80, inq80_len);
4907 				inq80 = NULL;
4908 				inq80_len = 0;
4909 			} else if (ddi_prop_exists(
4910 			    DDI_DEV_T_NONE, SD_DEVINFO(un),
4911 			    DDI_PROP_NOTPROM | DDI_PROP_DONTPASS,
4912 			    INQUIRY_SERIAL_NO) == 0) {
4913 				/*
4914 				 * If we don't already have a serial number
4915 				 * property, do quick verify of data returned
4916 				 * and define property.
4917 				 */
4918 				dlen = inq80_len - inq80_resid;
4919 				len = (size_t)inq80[3];
4920 				if ((dlen >= 4) && ((len + 4) <= dlen)) {
4921 					/*
4922 					 * Ensure sn termination, skip leading
4923 					 * blanks, and create property
4924 					 * 'inquiry-serial-no'.
4925 					 */
4926 					sn = (char *)&inq80[4];
4927 					sn[len] = 0;
4928 					while (*sn && (*sn == ' '))
4929 						sn++;
4930 					if (*sn) {
4931 						(void) ddi_prop_update_string(
4932 						    DDI_DEV_T_NONE,
4933 						    SD_DEVINFO(un),
4934 						    INQUIRY_SERIAL_NO, sn);
4935 					}
4936 				}
4937 			}
4938 			mutex_enter(SD_MUTEX(un));
4939 		}
4940 
4941 		/* collect page 83 data if available */
4942 		if (un->un_vpd_page_mask & SD_VPD_DEVID_WWN_PG) {
4943 			mutex_exit(SD_MUTEX(un));
4944 			inq83 = kmem_zalloc(inq83_len, KM_SLEEP);
4945 
4946 			rval = sd_send_scsi_INQUIRY(ssc, inq83, inq83_len,
4947 			    0x01, 0x83, &inq83_resid);
4948 
4949 			if (rval != 0) {
4950 				sd_ssc_assessment(ssc, SD_FMT_IGNORE);
4951 				kmem_free(inq83, inq83_len);
4952 				inq83 = NULL;
4953 				inq83_len = 0;
4954 			}
4955 			mutex_enter(SD_MUTEX(un));
4956 		}
4957 	}
4958 
4959 	/*
4960 	 * If transport has already registered a devid for this target
4961 	 * then that takes precedence over the driver's determination
4962 	 * of the devid.
4963 	 *
4964 	 * NOTE: The reason this check is done here instead of at the beginning
4965 	 * of the function is to allow the code above to create the
4966 	 * 'inquiry-serial-no' property.
4967 	 */
4968 	if (ddi_devid_get(SD_DEVINFO(un), &un->un_devid) == DDI_SUCCESS) {
4969 		ASSERT(un->un_devid);
4970 		un->un_f_devid_transport_defined = TRUE;
4971 		goto cleanup; /* use devid registered by the transport */
4972 	}
4973 
4974 	/*
4975 	 * This is the case of antiquated Sun disk drives that have the
4976 	 * FAB_DEVID property set in the disk_table.  These drives
4977 	 * manage the devid's by storing them in last 2 available sectors
4978 	 * on the drive and have them fabricated by the ddi layer by calling
4979 	 * ddi_devid_init and passing the DEVID_FAB flag.
4980 	 */
4981 	if (un->un_f_opt_fab_devid == TRUE) {
4982 		/*
4983 		 * Depending on EINVAL isn't reliable, since a reserved disk
4984 		 * may result in invalid geometry, so check to make sure a
4985 		 * reservation conflict did not occur during attach.
4986 		 */
4987 		if ((sd_get_devid(ssc) == EINVAL) &&
4988 		    (reservation_flag != SD_TARGET_IS_RESERVED)) {
4989 			/*
4990 			 * The devid is invalid AND there is no reservation
4991 			 * conflict.  Fabricate a new devid.
4992 			 */
4993 			(void) sd_create_devid(ssc);
4994 		}
4995 
4996 		/* Register the devid if it exists */
4997 		if (un->un_devid != NULL) {
4998 			(void) ddi_devid_register(SD_DEVINFO(un),
4999 			    un->un_devid);
5000 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
5001 			    "sd_register_devid: Devid Fabricated\n");
5002 		}
5003 		goto cleanup;
5004 	}
5005 
5006 	/* encode best devid possible based on data available */
5007 	if (ddi_devid_scsi_encode(DEVID_SCSI_ENCODE_VERSION_LATEST,
5008 	    (char *)ddi_driver_name(SD_DEVINFO(un)),
5009 	    (uchar_t *)SD_INQUIRY(un), sizeof (*SD_INQUIRY(un)),
5010 	    inq80, inq80_len - inq80_resid, inq83, inq83_len -
5011 	    inq83_resid, &un->un_devid) == DDI_SUCCESS) {
5012 
5013 		/* devid successfully encoded, register devid */
5014 		(void) ddi_devid_register(SD_DEVINFO(un), un->un_devid);
5015 
5016 	} else {
5017 		/*
5018 		 * Unable to encode a devid based on data available.
5019 		 * This is not a Sun qualified disk.  Older Sun disk
5020 		 * drives that have the SD_FAB_DEVID property
5021 		 * set in the disk_table and non Sun qualified
5022 		 * disks are treated in the same manner.  These
5023 		 * drives manage the devid's by storing them in
5024 		 * last 2 available sectors on the drive and
5025 		 * have them fabricated by the ddi layer by
5026 		 * calling ddi_devid_init and passing the
5027 		 * DEVID_FAB flag.
5028 		 * Create a fabricate devid only if there's no
5029 		 * fabricate devid existed.
5030 		 */
5031 		if (sd_get_devid(ssc) == EINVAL) {
5032 			(void) sd_create_devid(ssc);
5033 		}
5034 		un->un_f_opt_fab_devid = TRUE;
5035 
5036 		/* Register the devid if it exists */
5037 		if (un->un_devid != NULL) {
5038 			(void) ddi_devid_register(SD_DEVINFO(un),
5039 			    un->un_devid);
5040 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
5041 			    "sd_register_devid: devid fabricated using "
5042 			    "ddi framework\n");
5043 		}
5044 	}
5045 
5046 cleanup:
5047 	/* clean up resources */
5048 	if (inq80 != NULL) {
5049 		kmem_free(inq80, inq80_len);
5050 	}
5051 	if (inq83 != NULL) {
5052 		kmem_free(inq83, inq83_len);
5053 	}
5054 }
5055 
5056 
5057 
5058 /*
5059  *    Function: sd_get_devid
5060  *
5061  * Description: This routine will return 0 if a valid device id has been
5062  *		obtained from the target and stored in the soft state. If a
5063  *		valid device id has not been previously read and stored, a
5064  *		read attempt will be made.
5065  *
5066  *   Arguments: un - driver soft state (unit) structure
5067  *
5068  * Return Code: 0 if we successfully get the device id
5069  *
5070  *     Context: Kernel Thread
5071  */
5072 
5073 static int
sd_get_devid(sd_ssc_t * ssc)5074 sd_get_devid(sd_ssc_t *ssc)
5075 {
5076 	struct dk_devid		*dkdevid;
5077 	ddi_devid_t		tmpid;
5078 	uint_t			*ip;
5079 	size_t			sz;
5080 	diskaddr_t		blk;
5081 	int			status;
5082 	int			chksum;
5083 	int			i;
5084 	size_t			buffer_size;
5085 	struct sd_lun		*un;
5086 
5087 	ASSERT(ssc != NULL);
5088 	un = ssc->ssc_un;
5089 	ASSERT(un != NULL);
5090 	ASSERT(mutex_owned(SD_MUTEX(un)));
5091 
5092 	SD_TRACE(SD_LOG_ATTACH_DETACH, un, "sd_get_devid: entry: un: 0x%p\n",
5093 	    un);
5094 
5095 	if (un->un_devid != NULL) {
5096 		return (0);
5097 	}
5098 
5099 	mutex_exit(SD_MUTEX(un));
5100 	if (cmlb_get_devid_block(un->un_cmlbhandle, &blk,
5101 	    (void *)SD_PATH_DIRECT) != 0) {
5102 		mutex_enter(SD_MUTEX(un));
5103 		return (EINVAL);
5104 	}
5105 
5106 	/*
5107 	 * Read and verify device id, stored in the reserved cylinders at the
5108 	 * end of the disk. Backup label is on the odd sectors of the last
5109 	 * track of the last cylinder. Device id will be on track of the next
5110 	 * to last cylinder.
5111 	 */
5112 	mutex_enter(SD_MUTEX(un));
5113 	buffer_size = SD_REQBYTES2TGTBYTES(un, sizeof (struct dk_devid));
5114 	mutex_exit(SD_MUTEX(un));
5115 	dkdevid = kmem_alloc(buffer_size, KM_SLEEP);
5116 	status = sd_send_scsi_READ(ssc, dkdevid, buffer_size, blk,
5117 	    SD_PATH_DIRECT);
5118 
5119 	if (status != 0) {
5120 		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
5121 		goto error;
5122 	}
5123 
5124 	/* Validate the revision */
5125 	if ((dkdevid->dkd_rev_hi != DK_DEVID_REV_MSB) ||
5126 	    (dkdevid->dkd_rev_lo != DK_DEVID_REV_LSB)) {
5127 		status = EINVAL;
5128 		goto error;
5129 	}
5130 
5131 	/* Calculate the checksum */
5132 	chksum = 0;
5133 	ip = (uint_t *)dkdevid;
5134 	for (i = 0; i < ((DEV_BSIZE - sizeof (int)) / sizeof (int));
5135 	    i++) {
5136 		chksum ^= ip[i];
5137 	}
5138 
5139 	/* Compare the checksums */
5140 	if (DKD_GETCHKSUM(dkdevid) != chksum) {
5141 		status = EINVAL;
5142 		goto error;
5143 	}
5144 
5145 	/* Validate the device id */
5146 	if (ddi_devid_valid((ddi_devid_t)&dkdevid->dkd_devid) != DDI_SUCCESS) {
5147 		status = EINVAL;
5148 		goto error;
5149 	}
5150 
5151 	/*
5152 	 * Store the device id in the driver soft state
5153 	 */
5154 	sz = ddi_devid_sizeof((ddi_devid_t)&dkdevid->dkd_devid);
5155 	tmpid = kmem_alloc(sz, KM_SLEEP);
5156 
5157 	mutex_enter(SD_MUTEX(un));
5158 
5159 	un->un_devid = tmpid;
5160 	bcopy(&dkdevid->dkd_devid, un->un_devid, sz);
5161 
5162 	kmem_free(dkdevid, buffer_size);
5163 
5164 	SD_TRACE(SD_LOG_ATTACH_DETACH, un, "sd_get_devid: exit: un:0x%p\n", un);
5165 
5166 	return (status);
5167 error:
5168 	mutex_enter(SD_MUTEX(un));
5169 	kmem_free(dkdevid, buffer_size);
5170 	return (status);
5171 }
5172 
5173 
5174 /*
5175  *    Function: sd_create_devid
5176  *
5177  * Description: This routine will fabricate the device id and write it
5178  *		to the disk.
5179  *
5180  *   Arguments: un - driver soft state (unit) structure
5181  *
5182  * Return Code: value of the fabricated device id
5183  *
5184  *     Context: Kernel Thread
5185  */
5186 
5187 static ddi_devid_t
sd_create_devid(sd_ssc_t * ssc)5188 sd_create_devid(sd_ssc_t *ssc)
5189 {
5190 	struct sd_lun	*un;
5191 
5192 	ASSERT(ssc != NULL);
5193 	un = ssc->ssc_un;
5194 	ASSERT(un != NULL);
5195 
5196 	/* Fabricate the devid */
5197 	if (ddi_devid_init(SD_DEVINFO(un), DEVID_FAB, 0, NULL, &un->un_devid)
5198 	    == DDI_FAILURE) {
5199 		return (NULL);
5200 	}
5201 
5202 	/* Write the devid to disk */
5203 	if (sd_write_deviceid(ssc) != 0) {
5204 		ddi_devid_free(un->un_devid);
5205 		un->un_devid = NULL;
5206 	}
5207 
5208 	return (un->un_devid);
5209 }
5210 
5211 
5212 /*
5213  *    Function: sd_write_deviceid
5214  *
5215  * Description: This routine will write the device id to the disk
5216  *		reserved sector.
5217  *
5218  *   Arguments: un - driver soft state (unit) structure
5219  *
5220  * Return Code: EINVAL
5221  *		value returned by sd_send_scsi_cmd
5222  *
5223  *     Context: Kernel Thread
5224  */
5225 
5226 static int
sd_write_deviceid(sd_ssc_t * ssc)5227 sd_write_deviceid(sd_ssc_t *ssc)
5228 {
5229 	struct dk_devid		*dkdevid;
5230 	uchar_t			*buf;
5231 	diskaddr_t		blk;
5232 	uint_t			*ip, chksum;
5233 	int			status;
5234 	int			i;
5235 	struct sd_lun		*un;
5236 
5237 	ASSERT(ssc != NULL);
5238 	un = ssc->ssc_un;
5239 	ASSERT(un != NULL);
5240 	ASSERT(mutex_owned(SD_MUTEX(un)));
5241 
5242 	mutex_exit(SD_MUTEX(un));
5243 	if (cmlb_get_devid_block(un->un_cmlbhandle, &blk,
5244 	    (void *)SD_PATH_DIRECT) != 0) {
5245 		mutex_enter(SD_MUTEX(un));
5246 		return (-1);
5247 	}
5248 
5249 
5250 	/* Allocate the buffer */
5251 	buf = kmem_zalloc(un->un_sys_blocksize, KM_SLEEP);
5252 	dkdevid = (struct dk_devid *)buf;
5253 
5254 	/* Fill in the revision */
5255 	dkdevid->dkd_rev_hi = DK_DEVID_REV_MSB;
5256 	dkdevid->dkd_rev_lo = DK_DEVID_REV_LSB;
5257 
5258 	/* Copy in the device id */
5259 	mutex_enter(SD_MUTEX(un));
5260 	bcopy(un->un_devid, &dkdevid->dkd_devid,
5261 	    ddi_devid_sizeof(un->un_devid));
5262 	mutex_exit(SD_MUTEX(un));
5263 
5264 	/* Calculate the checksum */
5265 	chksum = 0;
5266 	ip = (uint_t *)dkdevid;
5267 	for (i = 0; i < ((DEV_BSIZE - sizeof (int)) / sizeof (int));
5268 	    i++) {
5269 		chksum ^= ip[i];
5270 	}
5271 
5272 	/* Fill-in checksum */
5273 	DKD_FORMCHKSUM(chksum, dkdevid);
5274 
5275 	/* Write the reserved sector */
5276 	status = sd_send_scsi_WRITE(ssc, buf, un->un_sys_blocksize, blk,
5277 	    SD_PATH_DIRECT);
5278 	if (status != 0)
5279 		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
5280 
5281 	kmem_free(buf, un->un_sys_blocksize);
5282 
5283 	mutex_enter(SD_MUTEX(un));
5284 	return (status);
5285 }
5286 
5287 
5288 /*
5289  *    Function: sd_check_vpd_page_support
5290  *
5291  * Description: This routine sends an inquiry command with the EVPD bit set and
5292  *		a page code of 0x00 to the device. It is used to determine which
5293  *		vital product pages are available to find the devid. We are
5294  *		looking for pages 0x83 0x80 or 0xB1.  If we return a negative 1,
5295  *		the device does not support that command.
5296  *
5297  *   Arguments: un  - driver soft state (unit) structure
5298  *
5299  * Return Code: 0 - success
5300  *		1 - check condition
5301  *
5302  *     Context: This routine can sleep.
5303  */
5304 
5305 static int
sd_check_vpd_page_support(sd_ssc_t * ssc)5306 sd_check_vpd_page_support(sd_ssc_t *ssc)
5307 {
5308 	uchar_t	*page_list	= NULL;
5309 	uchar_t	page_length	= 0xff;	/* Use max possible length */
5310 	uchar_t	evpd		= 0x01;	/* Set the EVPD bit */
5311 	uchar_t	page_code	= 0x00;	/* Supported VPD Pages */
5312 	int	rval		= 0;
5313 	int	counter;
5314 	struct sd_lun		*un;
5315 
5316 	ASSERT(ssc != NULL);
5317 	un = ssc->ssc_un;
5318 	ASSERT(un != NULL);
5319 	ASSERT(mutex_owned(SD_MUTEX(un)));
5320 
5321 	mutex_exit(SD_MUTEX(un));
5322 
5323 	/*
5324 	 * We'll set the page length to the maximum to save figuring it out
5325 	 * with an additional call.
5326 	 */
5327 	page_list =  kmem_zalloc(page_length, KM_SLEEP);
5328 
5329 	rval = sd_send_scsi_INQUIRY(ssc, page_list, page_length, evpd,
5330 	    page_code, NULL);
5331 
5332 	if (rval != 0)
5333 		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
5334 
5335 	mutex_enter(SD_MUTEX(un));
5336 
5337 	/*
5338 	 * Now we must validate that the device accepted the command, as some
5339 	 * drives do not support it.  If the drive does support it, we will
5340 	 * return 0, and the supported pages will be in un_vpd_page_mask.  If
5341 	 * not, we return -1.
5342 	 */
5343 	if ((rval == 0) && (page_list[VPD_MODE_PAGE] == 0x00)) {
5344 		/* Loop to find one of the 2 pages we need */
5345 		counter = 4;  /* Supported pages start at byte 4, with 0x00 */
5346 
5347 		/*
5348 		 * Pages are returned in ascending order, and 0x83 is what we
5349 		 * are hoping for.
5350 		 */
5351 		while ((page_list[counter] <= 0xB1) &&
5352 		    (counter <= (page_list[VPD_PAGE_LENGTH] +
5353 		    VPD_HEAD_OFFSET))) {
5354 			/*
5355 			 * Add 3 because page_list[3] is the number of
5356 			 * pages minus 3
5357 			 */
5358 
5359 			switch (page_list[counter]) {
5360 			case 0x00:
5361 				un->un_vpd_page_mask |= SD_VPD_SUPPORTED_PG;
5362 				break;
5363 			case 0x80:
5364 				un->un_vpd_page_mask |= SD_VPD_UNIT_SERIAL_PG;
5365 				break;
5366 			case 0x81:
5367 				un->un_vpd_page_mask |= SD_VPD_OPERATING_PG;
5368 				break;
5369 			case 0x82:
5370 				un->un_vpd_page_mask |= SD_VPD_ASCII_OP_PG;
5371 				break;
5372 			case 0x83:
5373 				un->un_vpd_page_mask |= SD_VPD_DEVID_WWN_PG;
5374 				break;
5375 			case 0x86:
5376 				un->un_vpd_page_mask |= SD_VPD_EXTENDED_DATA_PG;
5377 				break;
5378 			case 0xB0:
5379 				un->un_vpd_page_mask |= SD_VPD_BLK_LIMITS_PG;
5380 				break;
5381 			case 0xB1:
5382 				un->un_vpd_page_mask |= SD_VPD_DEV_CHARACTER_PG;
5383 				break;
5384 			}
5385 			counter++;
5386 		}
5387 
5388 	} else {
5389 		rval = -1;
5390 
5391 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
5392 		    "sd_check_vpd_page_support: This drive does not implement "
5393 		    "VPD pages.\n");
5394 	}
5395 
5396 	kmem_free(page_list, page_length);
5397 
5398 	return (rval);
5399 }
5400 
5401 
5402 /*
5403  *    Function: sd_setup_pm
5404  *
5405  * Description: Initialize Power Management on the device
5406  *
5407  *     Context: Kernel Thread
5408  */
5409 
5410 static void
sd_setup_pm(sd_ssc_t * ssc,dev_info_t * devi)5411 sd_setup_pm(sd_ssc_t *ssc, dev_info_t *devi)
5412 {
5413 	uint_t		log_page_size;
5414 	uchar_t		*log_page_data;
5415 	int		rval = 0;
5416 	struct sd_lun	*un;
5417 
5418 	ASSERT(ssc != NULL);
5419 	un = ssc->ssc_un;
5420 	ASSERT(un != NULL);
5421 
5422 	/*
5423 	 * Since we are called from attach, holding a mutex for
5424 	 * un is unnecessary. Because some of the routines called
5425 	 * from here require SD_MUTEX to not be held, assert this
5426 	 * right up front.
5427 	 */
5428 	ASSERT(!mutex_owned(SD_MUTEX(un)));
5429 	/*
5430 	 * Since the sd device does not have the 'reg' property,
5431 	 * cpr will not call its DDI_SUSPEND/DDI_RESUME entries.
5432 	 * The following code is to tell cpr that this device
5433 	 * DOES need to be suspended and resumed.
5434 	 */
5435 	(void) ddi_prop_update_string(DDI_DEV_T_NONE, devi,
5436 	    "pm-hardware-state", "needs-suspend-resume");
5437 
5438 	/*
5439 	 * This complies with the new power management framework
5440 	 * for certain desktop machines. Create the pm_components
5441 	 * property as a string array property.
5442 	 * If un_f_pm_supported is TRUE, that means the disk
5443 	 * attached HBA has set the "pm-capable" property and
5444 	 * the value of this property is bigger than 0.
5445 	 */
5446 	if (un->un_f_pm_supported) {
5447 		/*
5448 		 * not all devices have a motor, try it first.
5449 		 * some devices may return ILLEGAL REQUEST, some
5450 		 * will hang
5451 		 * The following START_STOP_UNIT is used to check if target
5452 		 * device has a motor.
5453 		 */
5454 		un->un_f_start_stop_supported = TRUE;
5455 
5456 		if (un->un_f_power_condition_supported) {
5457 			rval = sd_send_scsi_START_STOP_UNIT(ssc,
5458 			    SD_POWER_CONDITION, SD_TARGET_ACTIVE,
5459 			    SD_PATH_DIRECT);
5460 			if (rval != 0) {
5461 				un->un_f_power_condition_supported = FALSE;
5462 			}
5463 		}
5464 		if (!un->un_f_power_condition_supported) {
5465 			rval = sd_send_scsi_START_STOP_UNIT(ssc,
5466 			    SD_START_STOP, SD_TARGET_START, SD_PATH_DIRECT);
5467 		}
5468 		if (rval != 0) {
5469 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
5470 			un->un_f_start_stop_supported = FALSE;
5471 		}
5472 
5473 		/*
5474 		 * create pm properties anyways otherwise the parent can't
5475 		 * go to sleep
5476 		 */
5477 		un->un_f_pm_is_enabled = TRUE;
5478 		(void) sd_create_pm_components(devi, un);
5479 
5480 		/*
5481 		 * If it claims that log sense is supported, check it out.
5482 		 */
5483 		if (un->un_f_log_sense_supported) {
5484 			rval = sd_log_page_supported(ssc,
5485 			    START_STOP_CYCLE_PAGE);
5486 			if (rval == 1) {
5487 				/* Page found, use it. */
5488 				un->un_start_stop_cycle_page =
5489 				    START_STOP_CYCLE_PAGE;
5490 			} else {
5491 				/*
5492 				 * Page not found or log sense is not
5493 				 * supported.
5494 				 * Notice we do not check the old style
5495 				 * START_STOP_CYCLE_VU_PAGE because this
5496 				 * code path does not apply to old disks.
5497 				 */
5498 				un->un_f_log_sense_supported = FALSE;
5499 				un->un_f_pm_log_sense_smart = FALSE;
5500 			}
5501 		}
5502 
5503 		return;
5504 	}
5505 
5506 	/*
5507 	 * For the disk whose attached HBA has not set the "pm-capable"
5508 	 * property, check if it supports the power management.
5509 	 */
5510 	if (!un->un_f_log_sense_supported) {
5511 		un->un_power_level = SD_SPINDLE_ON;
5512 		un->un_f_pm_is_enabled = FALSE;
5513 		return;
5514 	}
5515 
5516 	rval = sd_log_page_supported(ssc, START_STOP_CYCLE_PAGE);
5517 
5518 #ifdef	SDDEBUG
5519 	if (sd_force_pm_supported) {
5520 		/* Force a successful result */
5521 		rval = 1;
5522 	}
5523 #endif
5524 
5525 	/*
5526 	 * If the start-stop cycle counter log page is not supported
5527 	 * or if the pm-capable property is set to be false (0),
5528 	 * then we should not create the pm_components property.
5529 	 */
5530 	if (rval == -1) {
5531 		/*
5532 		 * Error.
5533 		 * Reading log sense failed, most likely this is
5534 		 * an older drive that does not support log sense.
5535 		 * If this fails auto-pm is not supported.
5536 		 */
5537 		un->un_power_level = SD_SPINDLE_ON;
5538 		un->un_f_pm_is_enabled = FALSE;
5539 
5540 	} else if (rval == 0) {
5541 		/*
5542 		 * Page not found.
5543 		 * The start stop cycle counter is implemented as page
5544 		 * START_STOP_CYCLE_PAGE_VU_PAGE (0x31) in older disks. For
5545 		 * newer disks it is implemented as START_STOP_CYCLE_PAGE (0xE).
5546 		 */
5547 		if (sd_log_page_supported(ssc, START_STOP_CYCLE_VU_PAGE) == 1) {
5548 			/*
5549 			 * Page found, use this one.
5550 			 */
5551 			un->un_start_stop_cycle_page = START_STOP_CYCLE_VU_PAGE;
5552 			un->un_f_pm_is_enabled = TRUE;
5553 		} else {
5554 			/*
5555 			 * Error or page not found.
5556 			 * auto-pm is not supported for this device.
5557 			 */
5558 			un->un_power_level = SD_SPINDLE_ON;
5559 			un->un_f_pm_is_enabled = FALSE;
5560 		}
5561 	} else {
5562 		/*
5563 		 * Page found, use it.
5564 		 */
5565 		un->un_start_stop_cycle_page = START_STOP_CYCLE_PAGE;
5566 		un->un_f_pm_is_enabled = TRUE;
5567 	}
5568 
5569 
5570 	if (un->un_f_pm_is_enabled == TRUE) {
5571 		log_page_size = START_STOP_CYCLE_COUNTER_PAGE_SIZE;
5572 		log_page_data = kmem_zalloc(log_page_size, KM_SLEEP);
5573 
5574 		rval = sd_send_scsi_LOG_SENSE(ssc, log_page_data,
5575 		    log_page_size, un->un_start_stop_cycle_page,
5576 		    0x01, 0, SD_PATH_DIRECT);
5577 
5578 		if (rval != 0) {
5579 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
5580 		}
5581 
5582 #ifdef	SDDEBUG
5583 		if (sd_force_pm_supported) {
5584 			/* Force a successful result */
5585 			rval = 0;
5586 		}
5587 #endif
5588 
5589 		/*
5590 		 * If the Log sense for Page( Start/stop cycle counter page)
5591 		 * succeeds, then power management is supported and we can
5592 		 * enable auto-pm.
5593 		 */
5594 		if (rval == 0)  {
5595 			(void) sd_create_pm_components(devi, un);
5596 		} else {
5597 			un->un_power_level = SD_SPINDLE_ON;
5598 			un->un_f_pm_is_enabled = FALSE;
5599 		}
5600 
5601 		kmem_free(log_page_data, log_page_size);
5602 	}
5603 }
5604 
5605 
5606 /*
5607  *    Function: sd_create_pm_components
5608  *
5609  * Description: Initialize PM property.
5610  *
5611  *     Context: Kernel thread context
5612  */
5613 
5614 static void
sd_create_pm_components(dev_info_t * devi,struct sd_lun * un)5615 sd_create_pm_components(dev_info_t *devi, struct sd_lun *un)
5616 {
5617 	ASSERT(!mutex_owned(SD_MUTEX(un)));
5618 
5619 	if (un->un_f_power_condition_supported) {
5620 		if (ddi_prop_update_string_array(DDI_DEV_T_NONE, devi,
5621 		    "pm-components", sd_pwr_pc.pm_comp, 5)
5622 		    != DDI_PROP_SUCCESS) {
5623 			un->un_power_level = SD_SPINDLE_ACTIVE;
5624 			un->un_f_pm_is_enabled = FALSE;
5625 			return;
5626 		}
5627 	} else {
5628 		if (ddi_prop_update_string_array(DDI_DEV_T_NONE, devi,
5629 		    "pm-components", sd_pwr_ss.pm_comp, 3)
5630 		    != DDI_PROP_SUCCESS) {
5631 			un->un_power_level = SD_SPINDLE_ON;
5632 			un->un_f_pm_is_enabled = FALSE;
5633 			return;
5634 		}
5635 	}
5636 	/*
5637 	 * When components are initially created they are idle,
5638 	 * power up any non-removables.
5639 	 * Note: the return value of pm_raise_power can't be used
5640 	 * for determining if PM should be enabled for this device.
5641 	 * Even if you check the return values and remove this
5642 	 * property created above, the PM framework will not honor the
5643 	 * change after the first call to pm_raise_power. Hence,
5644 	 * removal of that property does not help if pm_raise_power
5645 	 * fails. In the case of removable media, the start/stop
5646 	 * will fail if the media is not present.
5647 	 */
5648 	if (un->un_f_attach_spinup && (pm_raise_power(SD_DEVINFO(un), 0,
5649 	    SD_PM_STATE_ACTIVE(un)) == DDI_SUCCESS)) {
5650 		mutex_enter(SD_MUTEX(un));
5651 		un->un_power_level = SD_PM_STATE_ACTIVE(un);
5652 		mutex_enter(&un->un_pm_mutex);
5653 		/* Set to on and not busy. */
5654 		un->un_pm_count = 0;
5655 	} else {
5656 		mutex_enter(SD_MUTEX(un));
5657 		un->un_power_level = SD_PM_STATE_STOPPED(un);
5658 		mutex_enter(&un->un_pm_mutex);
5659 		/* Set to off. */
5660 		un->un_pm_count = -1;
5661 	}
5662 	mutex_exit(&un->un_pm_mutex);
5663 	mutex_exit(SD_MUTEX(un));
5664 }
5665 
5666 
5667 /*
5668  *    Function: sd_ddi_suspend
5669  *
5670  * Description: Performs system power-down operations. This includes
5671  *		setting the drive state to indicate its suspended so
5672  *		that no new commands will be accepted. Also, wait for
5673  *		all commands that are in transport or queued to a timer
5674  *		for retry to complete. All timeout threads are cancelled.
5675  *
5676  * Return Code: DDI_FAILURE or DDI_SUCCESS
5677  *
5678  *     Context: Kernel thread context
5679  */
5680 
5681 static int
sd_ddi_suspend(dev_info_t * devi)5682 sd_ddi_suspend(dev_info_t *devi)
5683 {
5684 	struct	sd_lun	*un;
5685 	clock_t		wait_cmds_complete;
5686 
5687 	un = ddi_get_soft_state(sd_state, ddi_get_instance(devi));
5688 	if (un == NULL) {
5689 		return (DDI_FAILURE);
5690 	}
5691 
5692 	SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: entry\n");
5693 
5694 	mutex_enter(SD_MUTEX(un));
5695 
5696 	/* Return success if the device is already suspended. */
5697 	if (un->un_state == SD_STATE_SUSPENDED) {
5698 		mutex_exit(SD_MUTEX(un));
5699 		SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: "
5700 		    "device already suspended, exiting\n");
5701 		return (DDI_SUCCESS);
5702 	}
5703 
5704 	/* Return failure if the device is being used by HA */
5705 	if (un->un_resvd_status &
5706 	    (SD_RESERVE | SD_WANT_RESERVE | SD_LOST_RESERVE)) {
5707 		mutex_exit(SD_MUTEX(un));
5708 		SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: "
5709 		    "device in use by HA, exiting\n");
5710 		return (DDI_FAILURE);
5711 	}
5712 
5713 	/*
5714 	 * Return failure if the device is in a resource wait
5715 	 * or power changing state.
5716 	 */
5717 	if ((un->un_state == SD_STATE_RWAIT) ||
5718 	    (un->un_state == SD_STATE_PM_CHANGING)) {
5719 		mutex_exit(SD_MUTEX(un));
5720 		SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: "
5721 		    "device in resource wait state, exiting\n");
5722 		return (DDI_FAILURE);
5723 	}
5724 
5725 
5726 	un->un_save_state = un->un_last_state;
5727 	New_state(un, SD_STATE_SUSPENDED);
5728 
5729 	/*
5730 	 * Wait for all commands that are in transport or queued to a timer
5731 	 * for retry to complete.
5732 	 *
5733 	 * While waiting, no new commands will be accepted or sent because of
5734 	 * the new state we set above.
5735 	 *
5736 	 * Wait till current operation has completed. If we are in the resource
5737 	 * wait state (with an intr outstanding) then we need to wait till the
5738 	 * intr completes and starts the next cmd. We want to wait for
5739 	 * SD_WAIT_CMDS_COMPLETE seconds before failing the DDI_SUSPEND.
5740 	 */
5741 	wait_cmds_complete = ddi_get_lbolt() +
5742 	    (sd_wait_cmds_complete * drv_usectohz(1000000));
5743 
5744 	while (un->un_ncmds_in_transport != 0) {
5745 		/*
5746 		 * Fail if commands do not finish in the specified time.
5747 		 */
5748 		if (cv_timedwait(&un->un_disk_busy_cv, SD_MUTEX(un),
5749 		    wait_cmds_complete) == -1) {
5750 			/*
5751 			 * Undo the state changes made above. Everything
5752 			 * must go back to it's original value.
5753 			 */
5754 			Restore_state(un);
5755 			un->un_last_state = un->un_save_state;
5756 			/* Wake up any threads that might be waiting. */
5757 			cv_broadcast(&un->un_suspend_cv);
5758 			mutex_exit(SD_MUTEX(un));
5759 			SD_ERROR(SD_LOG_IO_PM, un,
5760 			    "sd_ddi_suspend: failed due to outstanding cmds\n");
5761 			SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: exiting\n");
5762 			return (DDI_FAILURE);
5763 		}
5764 	}
5765 
5766 	/*
5767 	 * Cancel SCSI watch thread and timeouts, if any are active
5768 	 */
5769 
5770 	if (SD_OK_TO_SUSPEND_SCSI_WATCHER(un)) {
5771 		opaque_t temp_token = un->un_swr_token;
5772 		mutex_exit(SD_MUTEX(un));
5773 		scsi_watch_suspend(temp_token);
5774 		mutex_enter(SD_MUTEX(un));
5775 	}
5776 
5777 	if (un->un_reset_throttle_timeid != NULL) {
5778 		timeout_id_t temp_id = un->un_reset_throttle_timeid;
5779 		un->un_reset_throttle_timeid = NULL;
5780 		mutex_exit(SD_MUTEX(un));
5781 		(void) untimeout(temp_id);
5782 		mutex_enter(SD_MUTEX(un));
5783 	}
5784 
5785 	if (un->un_dcvb_timeid != NULL) {
5786 		timeout_id_t temp_id = un->un_dcvb_timeid;
5787 		un->un_dcvb_timeid = NULL;
5788 		mutex_exit(SD_MUTEX(un));
5789 		(void) untimeout(temp_id);
5790 		mutex_enter(SD_MUTEX(un));
5791 	}
5792 
5793 	mutex_enter(&un->un_pm_mutex);
5794 	if (un->un_pm_timeid != NULL) {
5795 		timeout_id_t temp_id = un->un_pm_timeid;
5796 		un->un_pm_timeid = NULL;
5797 		mutex_exit(&un->un_pm_mutex);
5798 		mutex_exit(SD_MUTEX(un));
5799 		(void) untimeout(temp_id);
5800 		mutex_enter(SD_MUTEX(un));
5801 	} else {
5802 		mutex_exit(&un->un_pm_mutex);
5803 	}
5804 
5805 	if (un->un_rmw_msg_timeid != NULL) {
5806 		timeout_id_t temp_id = un->un_rmw_msg_timeid;
5807 		un->un_rmw_msg_timeid = NULL;
5808 		mutex_exit(SD_MUTEX(un));
5809 		(void) untimeout(temp_id);
5810 		mutex_enter(SD_MUTEX(un));
5811 	}
5812 
5813 	if (un->un_retry_timeid != NULL) {
5814 		timeout_id_t temp_id = un->un_retry_timeid;
5815 		un->un_retry_timeid = NULL;
5816 		mutex_exit(SD_MUTEX(un));
5817 		(void) untimeout(temp_id);
5818 		mutex_enter(SD_MUTEX(un));
5819 
5820 		if (un->un_retry_bp != NULL) {
5821 			un->un_retry_bp->av_forw = un->un_waitq_headp;
5822 			un->un_waitq_headp = un->un_retry_bp;
5823 			if (un->un_waitq_tailp == NULL) {
5824 				un->un_waitq_tailp = un->un_retry_bp;
5825 			}
5826 			un->un_retry_bp = NULL;
5827 			un->un_retry_statp = NULL;
5828 		}
5829 	}
5830 
5831 	if (un->un_direct_priority_timeid != NULL) {
5832 		timeout_id_t temp_id = un->un_direct_priority_timeid;
5833 		un->un_direct_priority_timeid = NULL;
5834 		mutex_exit(SD_MUTEX(un));
5835 		(void) untimeout(temp_id);
5836 		mutex_enter(SD_MUTEX(un));
5837 	}
5838 
5839 	if (un->un_f_is_fibre == TRUE) {
5840 		/*
5841 		 * Remove callbacks for insert and remove events
5842 		 */
5843 		if (un->un_insert_event != NULL) {
5844 			mutex_exit(SD_MUTEX(un));
5845 			(void) ddi_remove_event_handler(un->un_insert_cb_id);
5846 			mutex_enter(SD_MUTEX(un));
5847 			un->un_insert_event = NULL;
5848 		}
5849 
5850 		if (un->un_remove_event != NULL) {
5851 			mutex_exit(SD_MUTEX(un));
5852 			(void) ddi_remove_event_handler(un->un_remove_cb_id);
5853 			mutex_enter(SD_MUTEX(un));
5854 			un->un_remove_event = NULL;
5855 		}
5856 	}
5857 
5858 	mutex_exit(SD_MUTEX(un));
5859 
5860 	SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: exit\n");
5861 
5862 	return (DDI_SUCCESS);
5863 }
5864 
5865 
5866 /*
5867  *    Function: sd_ddi_resume
5868  *
5869  * Description: Performs system power-up operations..
5870  *
5871  * Return Code: DDI_SUCCESS
5872  *		DDI_FAILURE
5873  *
5874  *     Context: Kernel thread context
5875  */
5876 
5877 static int
sd_ddi_resume(dev_info_t * devi)5878 sd_ddi_resume(dev_info_t *devi)
5879 {
5880 	struct	sd_lun	*un;
5881 
5882 	un = ddi_get_soft_state(sd_state, ddi_get_instance(devi));
5883 	if (un == NULL) {
5884 		return (DDI_FAILURE);
5885 	}
5886 
5887 	SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_resume: entry\n");
5888 
5889 	mutex_enter(SD_MUTEX(un));
5890 	Restore_state(un);
5891 
5892 	/*
5893 	 * Restore the state which was saved to give the
5894 	 * the right state in un_last_state
5895 	 */
5896 	un->un_last_state = un->un_save_state;
5897 	/*
5898 	 * Note: throttle comes back at full.
5899 	 * Also note: this MUST be done before calling pm_raise_power
5900 	 * otherwise the system can get hung in biowait. The scenario where
5901 	 * this'll happen is under cpr suspend. Writing of the system
5902 	 * state goes through sddump, which writes 0 to un_throttle. If
5903 	 * writing the system state then fails, example if the partition is
5904 	 * too small, then cpr attempts a resume. If throttle isn't restored
5905 	 * from the saved value until after calling pm_raise_power then
5906 	 * cmds sent in sdpower are not transported and sd_send_scsi_cmd hangs
5907 	 * in biowait.
5908 	 */
5909 	un->un_throttle = un->un_saved_throttle;
5910 
5911 	/*
5912 	 * The chance of failure is very rare as the only command done in power
5913 	 * entry point is START command when you transition from 0->1 or
5914 	 * unknown->1. Put it to SPINDLE ON state irrespective of the state at
5915 	 * which suspend was done. Ignore the return value as the resume should
5916 	 * not be failed. In the case of removable media the media need not be
5917 	 * inserted and hence there is a chance that raise power will fail with
5918 	 * media not present.
5919 	 */
5920 	if (un->un_f_attach_spinup) {
5921 		mutex_exit(SD_MUTEX(un));
5922 		(void) pm_raise_power(SD_DEVINFO(un), 0,
5923 		    SD_PM_STATE_ACTIVE(un));
5924 		mutex_enter(SD_MUTEX(un));
5925 	}
5926 
5927 	/*
5928 	 * Don't broadcast to the suspend cv and therefore possibly
5929 	 * start I/O until after power has been restored.
5930 	 */
5931 	cv_broadcast(&un->un_suspend_cv);
5932 	cv_broadcast(&un->un_state_cv);
5933 
5934 	/* restart thread */
5935 	if (SD_OK_TO_RESUME_SCSI_WATCHER(un)) {
5936 		scsi_watch_resume(un->un_swr_token);
5937 	}
5938 
5939 	/*
5940 	 * Transport any pending commands to the target.
5941 	 *
5942 	 * If this is a low-activity device commands in queue will have to wait
5943 	 * until new commands come in, which may take awhile. Also, we
5944 	 * specifically don't check un_ncmds_in_transport because we know that
5945 	 * there really are no commands in progress after the unit was
5946 	 * suspended and we could have reached the throttle level, been
5947 	 * suspended, and have no new commands coming in for awhile. Highly
5948 	 * unlikely, but so is the low-activity disk scenario.
5949 	 */
5950 	ddi_xbuf_dispatch(un->un_xbuf_attr);
5951 
5952 	sd_start_cmds(un, NULL);
5953 	mutex_exit(SD_MUTEX(un));
5954 
5955 	SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_resume: exit\n");
5956 
5957 	return (DDI_SUCCESS);
5958 }
5959 
5960 
5961 /*
5962  *    Function: sd_pm_state_change
5963  *
5964  * Description: Change the driver power state.
5965  *		Someone else is required to actually change the driver
5966  *		power level.
5967  *
5968  *   Arguments: un - driver soft state (unit) structure
5969  *              level - the power level that is changed to
5970  *              flag - to decide how to change the power state
5971  *
5972  * Return Code: DDI_SUCCESS
5973  *
5974  *     Context: Kernel thread context
5975  */
5976 static int
sd_pm_state_change(struct sd_lun * un,int level,int flag)5977 sd_pm_state_change(struct sd_lun *un, int level, int flag)
5978 {
5979 	ASSERT(un != NULL);
5980 	SD_TRACE(SD_LOG_POWER, un, "sd_pm_state_change: entry\n");
5981 
5982 	ASSERT(!mutex_owned(SD_MUTEX(un)));
5983 	mutex_enter(SD_MUTEX(un));
5984 
5985 	if (flag == SD_PM_STATE_ROLLBACK || SD_PM_IS_IO_CAPABLE(un, level)) {
5986 		un->un_power_level = level;
5987 		ASSERT(!mutex_owned(&un->un_pm_mutex));
5988 		mutex_enter(&un->un_pm_mutex);
5989 		if (SD_DEVICE_IS_IN_LOW_POWER(un)) {
5990 			un->un_pm_count++;
5991 			ASSERT(un->un_pm_count == 0);
5992 		}
5993 		mutex_exit(&un->un_pm_mutex);
5994 	} else {
5995 		/*
5996 		 * Exit if power management is not enabled for this device,
5997 		 * or if the device is being used by HA.
5998 		 */
5999 		if ((un->un_f_pm_is_enabled == FALSE) || (un->un_resvd_status &
6000 		    (SD_RESERVE | SD_WANT_RESERVE | SD_LOST_RESERVE))) {
6001 			mutex_exit(SD_MUTEX(un));
6002 			SD_TRACE(SD_LOG_POWER, un,
6003 			    "sd_pm_state_change: exiting\n");
6004 			return (DDI_FAILURE);
6005 		}
6006 
6007 		SD_INFO(SD_LOG_POWER, un, "sd_pm_state_change: "
6008 		    "un_ncmds_in_driver=%ld\n", un->un_ncmds_in_driver);
6009 
6010 		/*
6011 		 * See if the device is not busy, ie.:
6012 		 *    - we have no commands in the driver for this device
6013 		 *    - not waiting for resources
6014 		 */
6015 		if ((un->un_ncmds_in_driver == 0) &&
6016 		    (un->un_state != SD_STATE_RWAIT)) {
6017 			/*
6018 			 * The device is not busy, so it is OK to go to low
6019 			 * power state. Indicate low power, but rely on someone
6020 			 * else to actually change it.
6021 			 */
6022 			mutex_enter(&un->un_pm_mutex);
6023 			un->un_pm_count = -1;
6024 			mutex_exit(&un->un_pm_mutex);
6025 			un->un_power_level = level;
6026 		}
6027 	}
6028 
6029 	mutex_exit(SD_MUTEX(un));
6030 
6031 	SD_TRACE(SD_LOG_POWER, un, "sd_pm_state_change: exit\n");
6032 
6033 	return (DDI_SUCCESS);
6034 }
6035 
6036 
6037 /*
6038  *    Function: sd_pm_idletimeout_handler
6039  *
6040  * Description: A timer routine that's active only while a device is busy.
6041  *		The purpose is to extend slightly the pm framework's busy
6042  *		view of the device to prevent busy/idle thrashing for
6043  *		back-to-back commands. Do this by comparing the current time
6044  *		to the time at which the last command completed and when the
6045  *		difference is greater than sd_pm_idletime, call
6046  *		pm_idle_component. In addition to indicating idle to the pm
6047  *		framework, update the chain type to again use the internal pm
6048  *		layers of the driver.
6049  *
6050  *   Arguments: arg - driver soft state (unit) structure
6051  *
6052  *     Context: Executes in a timeout(9F) thread context
6053  */
6054 
6055 static void
sd_pm_idletimeout_handler(void * arg)6056 sd_pm_idletimeout_handler(void *arg)
6057 {
6058 	const hrtime_t idletime = sd_pm_idletime * NANOSEC;
6059 	struct sd_lun *un = arg;
6060 
6061 	/*
6062 	 * Grab both mutexes, in the proper order, since we're accessing
6063 	 * both PM and softstate variables.
6064 	 */
6065 	mutex_enter(SD_MUTEX(un));
6066 	mutex_enter(&un->un_pm_mutex);
6067 	/* if timeout id is NULL, we are being canceled via untimeout */
6068 	if (un->un_pm_idle_timeid == NULL) {
6069 		mutex_exit(&un->un_pm_mutex);
6070 		mutex_exit(SD_MUTEX(un));
6071 		return;
6072 	}
6073 	if (((gethrtime() - un->un_pm_idle_time) > idletime) &&
6074 	    (un->un_ncmds_in_driver == 0) && (un->un_pm_count == 0)) {
6075 		/*
6076 		 * Update the chain types.
6077 		 * This takes affect on the next new command received.
6078 		 */
6079 		if (un->un_f_non_devbsize_supported) {
6080 			un->un_buf_chain_type = SD_CHAIN_INFO_RMMEDIA;
6081 		} else {
6082 			un->un_buf_chain_type = SD_CHAIN_INFO_DISK;
6083 		}
6084 		un->un_uscsi_chain_type = SD_CHAIN_INFO_USCSI_CMD;
6085 
6086 		SD_TRACE(SD_LOG_IO_PM, un,
6087 		    "sd_pm_idletimeout_handler: idling device\n");
6088 		(void) pm_idle_component(SD_DEVINFO(un), 0);
6089 		un->un_pm_idle_timeid = NULL;
6090 	} else {
6091 		un->un_pm_idle_timeid =
6092 		    timeout(sd_pm_idletimeout_handler, un,
6093 		    (drv_usectohz((clock_t)300000))); /* 300 ms. */
6094 	}
6095 	mutex_exit(&un->un_pm_mutex);
6096 	mutex_exit(SD_MUTEX(un));
6097 }
6098 
6099 
6100 /*
6101  *    Function: sd_pm_timeout_handler
6102  *
6103  * Description: Callback to tell framework we are idle.
6104  *
6105  *     Context: timeout(9f) thread context.
6106  */
6107 
6108 static void
sd_pm_timeout_handler(void * arg)6109 sd_pm_timeout_handler(void *arg)
6110 {
6111 	struct sd_lun *un = arg;
6112 
6113 	(void) pm_idle_component(SD_DEVINFO(un), 0);
6114 	mutex_enter(&un->un_pm_mutex);
6115 	un->un_pm_timeid = NULL;
6116 	mutex_exit(&un->un_pm_mutex);
6117 }
6118 
6119 
6120 /*
6121  *    Function: sdpower
6122  *
6123  * Description: PM entry point.
6124  *
6125  * Return Code: DDI_SUCCESS
6126  *		DDI_FAILURE
6127  *
6128  *     Context: Kernel thread context
6129  */
6130 
6131 static int
sdpower(dev_info_t * devi,int component,int level)6132 sdpower(dev_info_t *devi, int component, int level)
6133 {
6134 	struct sd_lun	*un;
6135 	int		instance;
6136 	int		rval = DDI_SUCCESS;
6137 	uint_t		i, log_page_size, maxcycles, ncycles;
6138 	uchar_t		*log_page_data;
6139 	int		log_sense_page;
6140 	int		medium_present;
6141 	time_t		intvlp;
6142 	struct pm_trans_data	sd_pm_tran_data;
6143 	uchar_t		save_state = SD_STATE_NORMAL;
6144 	int		sval;
6145 	uchar_t		state_before_pm;
6146 	sd_ssc_t	*ssc;
6147 	int	last_power_level = SD_SPINDLE_UNINIT;
6148 
6149 	instance = ddi_get_instance(devi);
6150 
6151 	if (((un = ddi_get_soft_state(sd_state, instance)) == NULL) ||
6152 	    !SD_PM_IS_LEVEL_VALID(un, level) || component != 0) {
6153 		return (DDI_FAILURE);
6154 	}
6155 
6156 	ssc = sd_ssc_init(un);
6157 
6158 	SD_TRACE(SD_LOG_IO_PM, un, "sdpower: entry, level = %d\n", level);
6159 
6160 	mutex_enter(SD_MUTEX(un));
6161 
6162 	SD_INFO(SD_LOG_POWER, un, "sdpower: un_ncmds_in_driver = %ld\n",
6163 	    un->un_ncmds_in_driver);
6164 
6165 	/*
6166 	 * If un_ncmds_in_driver is non-zero it indicates commands are
6167 	 * already being processed in the driver.
6168 	 * At the same time somebody is requesting to go to a lower power
6169 	 * that can't perform I/O, which can't happen, therefore we need to
6170 	 * return failure.
6171 	 */
6172 	if ((!SD_PM_IS_IO_CAPABLE(un, level)) &&
6173 	    (un->un_ncmds_in_driver != 0)) {
6174 		mutex_exit(SD_MUTEX(un));
6175 
6176 		SD_TRACE(SD_LOG_IO_PM, un,
6177 		    "sdpower: exit, device has queued cmds.\n");
6178 
6179 		goto sdpower_failed;
6180 	}
6181 
6182 	/*
6183 	 * if it is OFFLINE that means the disk is completely dead
6184 	 * in our case we have to put the disk in on or off by sending commands
6185 	 * Of course that will fail anyway so return back here.
6186 	 *
6187 	 * Power changes to a device that's OFFLINE or SUSPENDED
6188 	 * are not allowed.
6189 	 */
6190 	if ((un->un_state == SD_STATE_OFFLINE) ||
6191 	    (un->un_state == SD_STATE_SUSPENDED)) {
6192 		mutex_exit(SD_MUTEX(un));
6193 
6194 		SD_TRACE(SD_LOG_IO_PM, un,
6195 		    "sdpower: exit, device is off-line.\n");
6196 
6197 		goto sdpower_failed;
6198 	}
6199 
6200 	/*
6201 	 * Change the device's state to indicate it's power level
6202 	 * is being changed. Do this to prevent a power off in the
6203 	 * middle of commands, which is especially bad on devices
6204 	 * that are really powered off instead of just spun down.
6205 	 */
6206 	state_before_pm = un->un_state;
6207 	un->un_state = SD_STATE_PM_CHANGING;
6208 
6209 	mutex_exit(SD_MUTEX(un));
6210 
6211 	/*
6212 	 * If log sense command is not supported, bypass the
6213 	 * following checking, otherwise, check the log sense
6214 	 * information for this device.
6215 	 */
6216 	if (SD_PM_STOP_MOTOR_NEEDED(un, level) &&
6217 	    un->un_f_log_sense_supported) {
6218 		/*
6219 		 * Get the log sense information to understand whether the
6220 		 * the powercycle counts have gone beyond the threshhold.
6221 		 */
6222 		log_page_size = START_STOP_CYCLE_COUNTER_PAGE_SIZE;
6223 		log_page_data = kmem_zalloc(log_page_size, KM_SLEEP);
6224 
6225 		mutex_enter(SD_MUTEX(un));
6226 		log_sense_page = un->un_start_stop_cycle_page;
6227 		mutex_exit(SD_MUTEX(un));
6228 
6229 		rval = sd_send_scsi_LOG_SENSE(ssc, log_page_data,
6230 		    log_page_size, log_sense_page, 0x01, 0, SD_PATH_DIRECT);
6231 
6232 		if (rval != 0) {
6233 			if (rval == EIO)
6234 				sd_ssc_assessment(ssc, SD_FMT_STATUS_CHECK);
6235 			else
6236 				sd_ssc_assessment(ssc, SD_FMT_IGNORE);
6237 		}
6238 
6239 #ifdef	SDDEBUG
6240 		if (sd_force_pm_supported) {
6241 			/* Force a successful result */
6242 			rval = 0;
6243 		}
6244 #endif
6245 		if (rval != 0) {
6246 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
6247 			    "Log Sense Failed\n");
6248 
6249 			kmem_free(log_page_data, log_page_size);
6250 			/* Cannot support power management on those drives */
6251 
6252 			/*
6253 			 * On exit put the state back to it's original value
6254 			 * and broadcast to anyone waiting for the power
6255 			 * change completion.
6256 			 */
6257 			mutex_enter(SD_MUTEX(un));
6258 			un->un_state = state_before_pm;
6259 			cv_broadcast(&un->un_suspend_cv);
6260 			mutex_exit(SD_MUTEX(un));
6261 			SD_TRACE(SD_LOG_IO_PM, un,
6262 			    "sdpower: exit, Log Sense Failed.\n");
6263 
6264 			goto sdpower_failed;
6265 		}
6266 
6267 		/*
6268 		 * From the page data - Convert the essential information to
6269 		 * pm_trans_data
6270 		 */
6271 		maxcycles =
6272 		    (log_page_data[0x1c] << 24) | (log_page_data[0x1d] << 16) |
6273 		    (log_page_data[0x1E] << 8)  | log_page_data[0x1F];
6274 
6275 		ncycles =
6276 		    (log_page_data[0x24] << 24) | (log_page_data[0x25] << 16) |
6277 		    (log_page_data[0x26] << 8)  | log_page_data[0x27];
6278 
6279 		if (un->un_f_pm_log_sense_smart) {
6280 			sd_pm_tran_data.un.smart_count.allowed = maxcycles;
6281 			sd_pm_tran_data.un.smart_count.consumed = ncycles;
6282 			sd_pm_tran_data.un.smart_count.flag = 0;
6283 			sd_pm_tran_data.format = DC_SMART_FORMAT;
6284 		} else {
6285 			sd_pm_tran_data.un.scsi_cycles.lifemax = maxcycles;
6286 			sd_pm_tran_data.un.scsi_cycles.ncycles = ncycles;
6287 			for (i = 0; i < DC_SCSI_MFR_LEN; i++) {
6288 				sd_pm_tran_data.un.scsi_cycles.svc_date[i] =
6289 				    log_page_data[8+i];
6290 			}
6291 			sd_pm_tran_data.un.scsi_cycles.flag = 0;
6292 			sd_pm_tran_data.format = DC_SCSI_FORMAT;
6293 		}
6294 
6295 		kmem_free(log_page_data, log_page_size);
6296 
6297 		/*
6298 		 * Call pm_trans_check routine to get the Ok from
6299 		 * the global policy
6300 		 */
6301 		rval = pm_trans_check(&sd_pm_tran_data, &intvlp);
6302 #ifdef	SDDEBUG
6303 		if (sd_force_pm_supported) {
6304 			/* Force a successful result */
6305 			rval = 1;
6306 		}
6307 #endif
6308 		switch (rval) {
6309 		case 0:
6310 			/*
6311 			 * Not Ok to Power cycle or error in parameters passed
6312 			 * Would have given the advised time to consider power
6313 			 * cycle. Based on the new intvlp parameter we are
6314 			 * supposed to pretend we are busy so that pm framework
6315 			 * will never call our power entry point. Because of
6316 			 * that install a timeout handler and wait for the
6317 			 * recommended time to elapse so that power management
6318 			 * can be effective again.
6319 			 *
6320 			 * To effect this behavior, call pm_busy_component to
6321 			 * indicate to the framework this device is busy.
6322 			 * By not adjusting un_pm_count the rest of PM in
6323 			 * the driver will function normally, and independent
6324 			 * of this but because the framework is told the device
6325 			 * is busy it won't attempt powering down until it gets
6326 			 * a matching idle. The timeout handler sends this.
6327 			 * Note: sd_pm_entry can't be called here to do this
6328 			 * because sdpower may have been called as a result
6329 			 * of a call to pm_raise_power from within sd_pm_entry.
6330 			 *
6331 			 * If a timeout handler is already active then
6332 			 * don't install another.
6333 			 */
6334 			mutex_enter(&un->un_pm_mutex);
6335 			if (un->un_pm_timeid == NULL) {
6336 				un->un_pm_timeid =
6337 				    timeout(sd_pm_timeout_handler,
6338 				    un, intvlp * drv_usectohz(1000000));
6339 				mutex_exit(&un->un_pm_mutex);
6340 				(void) pm_busy_component(SD_DEVINFO(un), 0);
6341 			} else {
6342 				mutex_exit(&un->un_pm_mutex);
6343 			}
6344 			/*
6345 			 * On exit put the state back to its original value
6346 			 * and broadcast to anyone waiting for the power
6347 			 * change completion.
6348 			 */
6349 			mutex_enter(SD_MUTEX(un));
6350 			un->un_state = state_before_pm;
6351 			cv_broadcast(&un->un_suspend_cv);
6352 			mutex_exit(SD_MUTEX(un));
6353 
6354 			SD_TRACE(SD_LOG_IO_PM, un, "sdpower: exit, "
6355 			    "trans check Failed, not ok to power cycle.\n");
6356 
6357 			goto sdpower_failed;
6358 		case -1:
6359 			/*
6360 			 * On exit put the state back to its original value
6361 			 * and broadcast to anyone waiting for the power
6362 			 * change completion.
6363 			 */
6364 			mutex_enter(SD_MUTEX(un));
6365 			un->un_state = state_before_pm;
6366 			cv_broadcast(&un->un_suspend_cv);
6367 			mutex_exit(SD_MUTEX(un));
6368 			SD_TRACE(SD_LOG_IO_PM, un,
6369 			    "sdpower: exit, trans check command Failed.\n");
6370 
6371 			goto sdpower_failed;
6372 		}
6373 	}
6374 
6375 	if (!SD_PM_IS_IO_CAPABLE(un, level)) {
6376 		/*
6377 		 * Save the last state... if the STOP FAILS we need it
6378 		 * for restoring
6379 		 */
6380 		mutex_enter(SD_MUTEX(un));
6381 		save_state = un->un_last_state;
6382 		last_power_level = un->un_power_level;
6383 		/*
6384 		 * There must not be any cmds. getting processed
6385 		 * in the driver when we get here. Power to the
6386 		 * device is potentially going off.
6387 		 */
6388 		ASSERT(un->un_ncmds_in_driver == 0);
6389 		mutex_exit(SD_MUTEX(un));
6390 
6391 		/*
6392 		 * For now PM suspend the device completely before spindle is
6393 		 * turned off
6394 		 */
6395 		if ((rval = sd_pm_state_change(un, level, SD_PM_STATE_CHANGE))
6396 		    == DDI_FAILURE) {
6397 			/*
6398 			 * On exit put the state back to its original value
6399 			 * and broadcast to anyone waiting for the power
6400 			 * change completion.
6401 			 */
6402 			mutex_enter(SD_MUTEX(un));
6403 			un->un_state = state_before_pm;
6404 			un->un_power_level = last_power_level;
6405 			cv_broadcast(&un->un_suspend_cv);
6406 			mutex_exit(SD_MUTEX(un));
6407 			SD_TRACE(SD_LOG_IO_PM, un,
6408 			    "sdpower: exit, PM suspend Failed.\n");
6409 
6410 			goto sdpower_failed;
6411 		}
6412 	}
6413 
6414 	/*
6415 	 * The transition from SPINDLE_OFF to SPINDLE_ON can happen in open,
6416 	 * close, or strategy. Dump no long uses this routine, it uses it's
6417 	 * own code so it can be done in polled mode.
6418 	 */
6419 
6420 	medium_present = TRUE;
6421 
6422 	/*
6423 	 * When powering up, issue a TUR in case the device is at unit
6424 	 * attention.  Don't do retries. Bypass the PM layer, otherwise
6425 	 * a deadlock on un_pm_busy_cv will occur.
6426 	 */
6427 	if (SD_PM_IS_IO_CAPABLE(un, level)) {
6428 		sval = sd_send_scsi_TEST_UNIT_READY(ssc,
6429 		    SD_DONT_RETRY_TUR | SD_BYPASS_PM);
6430 		if (sval != 0)
6431 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
6432 	}
6433 
6434 	if (un->un_f_power_condition_supported) {
6435 		char *pm_condition_name[] = {"STOPPED", "STANDBY",
6436 		    "IDLE", "ACTIVE"};
6437 		SD_TRACE(SD_LOG_IO_PM, un,
6438 		    "sdpower: sending \'%s\' power condition",
6439 		    pm_condition_name[level]);
6440 		sval = sd_send_scsi_START_STOP_UNIT(ssc, SD_POWER_CONDITION,
6441 		    sd_pl2pc[level], SD_PATH_DIRECT);
6442 	} else {
6443 		SD_TRACE(SD_LOG_IO_PM, un, "sdpower: sending \'%s\' unit\n",
6444 		    ((level == SD_SPINDLE_ON) ? "START" : "STOP"));
6445 		sval = sd_send_scsi_START_STOP_UNIT(ssc, SD_START_STOP,
6446 		    ((level == SD_SPINDLE_ON) ? SD_TARGET_START :
6447 		    SD_TARGET_STOP), SD_PATH_DIRECT);
6448 	}
6449 	if (sval != 0) {
6450 		if (sval == EIO)
6451 			sd_ssc_assessment(ssc, SD_FMT_STATUS_CHECK);
6452 		else
6453 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
6454 	}
6455 
6456 	/* Command failed, check for media present. */
6457 	if ((sval == ENXIO) && un->un_f_has_removable_media) {
6458 		medium_present = FALSE;
6459 	}
6460 
6461 	/*
6462 	 * The conditions of interest here are:
6463 	 *   if a spindle off with media present fails,
6464 	 *	then restore the state and return an error.
6465 	 *   else if a spindle on fails,
6466 	 *	then return an error (there's no state to restore).
6467 	 * In all other cases we setup for the new state
6468 	 * and return success.
6469 	 */
6470 	if (!SD_PM_IS_IO_CAPABLE(un, level)) {
6471 		if ((medium_present == TRUE) && (sval != 0)) {
6472 			/* The stop command from above failed */
6473 			rval = DDI_FAILURE;
6474 			/*
6475 			 * The stop command failed, and we have media
6476 			 * present. Put the level back by calling the
6477 			 * sd_pm_resume() and set the state back to
6478 			 * it's previous value.
6479 			 */
6480 			(void) sd_pm_state_change(un, last_power_level,
6481 			    SD_PM_STATE_ROLLBACK);
6482 			mutex_enter(SD_MUTEX(un));
6483 			un->un_last_state = save_state;
6484 			mutex_exit(SD_MUTEX(un));
6485 		} else if (un->un_f_monitor_media_state) {
6486 			/*
6487 			 * The stop command from above succeeded.
6488 			 * Terminate watch thread in case of removable media
6489 			 * devices going into low power state. This is as per
6490 			 * the requirements of pm framework, otherwise commands
6491 			 * will be generated for the device (through watch
6492 			 * thread), even when the device is in low power state.
6493 			 */
6494 			mutex_enter(SD_MUTEX(un));
6495 			un->un_f_watcht_stopped = FALSE;
6496 			if (un->un_swr_token != NULL) {
6497 				opaque_t temp_token = un->un_swr_token;
6498 				un->un_f_watcht_stopped = TRUE;
6499 				un->un_swr_token = NULL;
6500 				mutex_exit(SD_MUTEX(un));
6501 				(void) scsi_watch_request_terminate(temp_token,
6502 				    SCSI_WATCH_TERMINATE_ALL_WAIT);
6503 			} else {
6504 				mutex_exit(SD_MUTEX(un));
6505 			}
6506 		}
6507 	} else {
6508 		/*
6509 		 * The level requested is I/O capable.
6510 		 * Legacy behavior: return success on a failed spinup
6511 		 * if there is no media in the drive.
6512 		 * Do this by looking at medium_present here.
6513 		 */
6514 		if ((sval != 0) && medium_present) {
6515 			/* The start command from above failed */
6516 			rval = DDI_FAILURE;
6517 		} else {
6518 			/*
6519 			 * The start command from above succeeded
6520 			 * PM resume the devices now that we have
6521 			 * started the disks
6522 			 */
6523 			(void) sd_pm_state_change(un, level,
6524 			    SD_PM_STATE_CHANGE);
6525 
6526 			/*
6527 			 * Resume the watch thread since it was suspended
6528 			 * when the device went into low power mode.
6529 			 */
6530 			if (un->un_f_monitor_media_state) {
6531 				mutex_enter(SD_MUTEX(un));
6532 				if (un->un_f_watcht_stopped == TRUE) {
6533 					opaque_t temp_token;
6534 
6535 					un->un_f_watcht_stopped = FALSE;
6536 					mutex_exit(SD_MUTEX(un));
6537 					temp_token =
6538 					    sd_watch_request_submit(un);
6539 					mutex_enter(SD_MUTEX(un));
6540 					un->un_swr_token = temp_token;
6541 				}
6542 				mutex_exit(SD_MUTEX(un));
6543 			}
6544 		}
6545 	}
6546 
6547 	/*
6548 	 * On exit put the state back to its original value
6549 	 * and broadcast to anyone waiting for the power
6550 	 * change completion.
6551 	 */
6552 	mutex_enter(SD_MUTEX(un));
6553 	un->un_state = state_before_pm;
6554 	cv_broadcast(&un->un_suspend_cv);
6555 	mutex_exit(SD_MUTEX(un));
6556 
6557 	SD_TRACE(SD_LOG_IO_PM, un, "sdpower: exit, status = 0x%x\n", rval);
6558 
6559 	sd_ssc_fini(ssc);
6560 	return (rval);
6561 
6562 sdpower_failed:
6563 
6564 	sd_ssc_fini(ssc);
6565 	return (DDI_FAILURE);
6566 }
6567 
6568 
6569 
6570 /*
6571  *    Function: sdattach
6572  *
6573  * Description: Driver's attach(9e) entry point function.
6574  *
6575  *   Arguments: devi - opaque device info handle
6576  *		cmd  - attach  type
6577  *
6578  * Return Code: DDI_SUCCESS
6579  *		DDI_FAILURE
6580  *
6581  *     Context: Kernel thread context
6582  */
6583 
6584 static int
sdattach(dev_info_t * devi,ddi_attach_cmd_t cmd)6585 sdattach(dev_info_t *devi, ddi_attach_cmd_t cmd)
6586 {
6587 	switch (cmd) {
6588 	case DDI_ATTACH:
6589 		return (sd_unit_attach(devi));
6590 	case DDI_RESUME:
6591 		return (sd_ddi_resume(devi));
6592 	default:
6593 		break;
6594 	}
6595 	return (DDI_FAILURE);
6596 }
6597 
6598 
6599 /*
6600  *    Function: sddetach
6601  *
6602  * Description: Driver's detach(9E) entry point function.
6603  *
6604  *   Arguments: devi - opaque device info handle
6605  *		cmd  - detach  type
6606  *
6607  * Return Code: DDI_SUCCESS
6608  *		DDI_FAILURE
6609  *
6610  *     Context: Kernel thread context
6611  */
6612 
6613 static int
sddetach(dev_info_t * devi,ddi_detach_cmd_t cmd)6614 sddetach(dev_info_t *devi, ddi_detach_cmd_t cmd)
6615 {
6616 	switch (cmd) {
6617 	case DDI_DETACH:
6618 		return (sd_unit_detach(devi));
6619 	case DDI_SUSPEND:
6620 		return (sd_ddi_suspend(devi));
6621 	default:
6622 		break;
6623 	}
6624 	return (DDI_FAILURE);
6625 }
6626 
6627 
6628 /*
6629  *     Function: sd_sync_with_callback
6630  *
6631  *  Description: Prevents sd_unit_attach or sd_unit_detach from freeing the soft
6632  *		 state while the callback routine is active.
6633  *
6634  *    Arguments: un: softstate structure for the instance
6635  *
6636  *	Context: Kernel thread context
6637  */
6638 
6639 static void
sd_sync_with_callback(struct sd_lun * un)6640 sd_sync_with_callback(struct sd_lun *un)
6641 {
6642 	ASSERT(un != NULL);
6643 
6644 	mutex_enter(SD_MUTEX(un));
6645 
6646 	ASSERT(un->un_in_callback >= 0);
6647 
6648 	while (un->un_in_callback > 0) {
6649 		mutex_exit(SD_MUTEX(un));
6650 		delay(2);
6651 		mutex_enter(SD_MUTEX(un));
6652 	}
6653 
6654 	mutex_exit(SD_MUTEX(un));
6655 }
6656 
6657 /*
6658  *    Function: sd_unit_attach
6659  *
6660  * Description: Performs DDI_ATTACH processing for sdattach(). Allocates
6661  *		the soft state structure for the device and performs
6662  *		all necessary structure and device initializations.
6663  *
6664  *   Arguments: devi: the system's dev_info_t for the device.
6665  *
6666  * Return Code: DDI_SUCCESS if attach is successful.
6667  *		DDI_FAILURE if any part of the attach fails.
6668  *
6669  *     Context: Called at attach(9e) time for the DDI_ATTACH flag.
6670  *		Kernel thread context only.  Can sleep.
6671  */
6672 
6673 static int
sd_unit_attach(dev_info_t * devi)6674 sd_unit_attach(dev_info_t *devi)
6675 {
6676 	struct	scsi_device	*devp;
6677 	struct	sd_lun		*un;
6678 	char			*variantp;
6679 	char			name_str[48];
6680 	int	reservation_flag = SD_TARGET_IS_UNRESERVED;
6681 	int	instance;
6682 	int	rval;
6683 	int	wc_enabled;
6684 	int	wc_changeable;
6685 	int	tgt;
6686 	uint64_t	capacity;
6687 	uint_t		lbasize = 0;
6688 	dev_info_t	*pdip = ddi_get_parent(devi);
6689 	int		offbyone = 0;
6690 	int		geom_label_valid = 0;
6691 	sd_ssc_t	*ssc;
6692 	int		status;
6693 	struct sd_fm_internal	*sfip = NULL;
6694 	int		max_xfer_size;
6695 
6696 	/*
6697 	 * Retrieve the target driver's private data area. This was set
6698 	 * up by the HBA.
6699 	 */
6700 	devp = ddi_get_driver_private(devi);
6701 
6702 	/*
6703 	 * Retrieve the target ID of the device.
6704 	 */
6705 	tgt = ddi_prop_get_int(DDI_DEV_T_ANY, devi, DDI_PROP_DONTPASS,
6706 	    SCSI_ADDR_PROP_TARGET, -1);
6707 
6708 	/*
6709 	 * Since we have no idea what state things were left in by the last
6710 	 * user of the device, set up some 'default' settings, ie. turn 'em
6711 	 * off. The scsi_ifsetcap calls force re-negotiations with the drive.
6712 	 * Do this before the scsi_probe, which sends an inquiry.
6713 	 * This is a fix for bug (4430280).
6714 	 * Of special importance is wide-xfer. The drive could have been left
6715 	 * in wide transfer mode by the last driver to communicate with it,
6716 	 * this includes us. If that's the case, and if the following is not
6717 	 * setup properly or we don't re-negotiate with the drive prior to
6718 	 * transferring data to/from the drive, it causes bus parity errors,
6719 	 * data overruns, and unexpected interrupts. This first occurred when
6720 	 * the fix for bug (4378686) was made.
6721 	 */
6722 	(void) scsi_ifsetcap(&devp->sd_address, "lun-reset", 0, 1);
6723 	(void) scsi_ifsetcap(&devp->sd_address, "wide-xfer", 0, 1);
6724 	(void) scsi_ifsetcap(&devp->sd_address, "auto-rqsense", 0, 1);
6725 
6726 	/*
6727 	 * Currently, scsi_ifsetcap sets tagged-qing capability for all LUNs
6728 	 * on a target. Setting it per lun instance actually sets the
6729 	 * capability of this target, which affects those luns already
6730 	 * attached on the same target. So during attach, we can only disable
6731 	 * this capability only when no other lun has been attached on this
6732 	 * target. By doing this, we assume a target has the same tagged-qing
6733 	 * capability for every lun. The condition can be removed when HBA
6734 	 * is changed to support per lun based tagged-qing capability.
6735 	 */
6736 	if (sd_scsi_get_target_lun_count(pdip, tgt) < 1) {
6737 		(void) scsi_ifsetcap(&devp->sd_address, "tagged-qing", 0, 1);
6738 	}
6739 
6740 	/*
6741 	 * Use scsi_probe() to issue an INQUIRY command to the device.
6742 	 * This call will allocate and fill in the scsi_inquiry structure
6743 	 * and point the sd_inq member of the scsi_device structure to it.
6744 	 * If the attach succeeds, then this memory will not be de-allocated
6745 	 * (via scsi_unprobe()) until the instance is detached.
6746 	 */
6747 	if (scsi_probe(devp, SLEEP_FUNC) != SCSIPROBE_EXISTS) {
6748 		goto probe_failed;
6749 	}
6750 
6751 	/*
6752 	 * Check the device type as specified in the inquiry data and
6753 	 * claim it if it is of a type that we support.
6754 	 */
6755 	switch (devp->sd_inq->inq_dtype) {
6756 	case DTYPE_DIRECT:
6757 		break;
6758 	case DTYPE_RODIRECT:
6759 		break;
6760 	case DTYPE_OPTICAL:
6761 		break;
6762 	case DTYPE_NOTPRESENT:
6763 	default:
6764 		/* Unsupported device type; fail the attach. */
6765 		goto probe_failed;
6766 	}
6767 
6768 	/*
6769 	 * Allocate the soft state structure for this unit.
6770 	 *
6771 	 * We rely upon this memory being set to all zeroes by
6772 	 * ddi_soft_state_zalloc().  We assume that any member of the
6773 	 * soft state structure that is not explicitly initialized by
6774 	 * this routine will have a value of zero.
6775 	 */
6776 	instance = ddi_get_instance(devp->sd_dev);
6777 	if (ddi_soft_state_zalloc(sd_state, instance) != DDI_SUCCESS) {
6778 		goto probe_failed;
6779 	}
6780 
6781 	/*
6782 	 * Retrieve a pointer to the newly-allocated soft state.
6783 	 *
6784 	 * This should NEVER fail if the ddi_soft_state_zalloc() call above
6785 	 * was successful, unless something has gone horribly wrong and the
6786 	 * ddi's soft state internals are corrupt (in which case it is
6787 	 * probably better to halt here than just fail the attach....)
6788 	 */
6789 	if ((un = ddi_get_soft_state(sd_state, instance)) == NULL) {
6790 		panic("sd_unit_attach: NULL soft state on instance:0x%x",
6791 		    instance);
6792 		/*NOTREACHED*/
6793 	}
6794 
6795 	/*
6796 	 * Link the back ptr of the driver soft state to the scsi_device
6797 	 * struct for this lun.
6798 	 * Save a pointer to the softstate in the driver-private area of
6799 	 * the scsi_device struct.
6800 	 * Note: We cannot call SD_INFO, SD_TRACE, SD_ERROR, or SD_DIAG until
6801 	 * we first set un->un_sd below.
6802 	 */
6803 	un->un_sd = devp;
6804 	devp->sd_private = (opaque_t)un;
6805 
6806 	/*
6807 	 * The following must be after devp is stored in the soft state struct.
6808 	 */
6809 #ifdef SDDEBUG
6810 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
6811 	    "%s_unit_attach: un:0x%p instance:%d\n",
6812 	    ddi_driver_name(devi), un, instance);
6813 #endif
6814 
6815 	/*
6816 	 * Set up the device type and node type (for the minor nodes).
6817 	 * By default we assume that the device can at least support the
6818 	 * Common Command Set. Call it a CD-ROM if it reports itself
6819 	 * as a RODIRECT device.
6820 	 */
6821 	switch (devp->sd_inq->inq_dtype) {
6822 	case DTYPE_RODIRECT:
6823 		un->un_node_type = DDI_NT_CD_CHAN;
6824 		un->un_ctype	 = CTYPE_CDROM;
6825 		break;
6826 	case DTYPE_OPTICAL:
6827 		un->un_node_type = DDI_NT_BLOCK_CHAN;
6828 		un->un_ctype	 = CTYPE_ROD;
6829 		break;
6830 	default:
6831 		un->un_node_type = DDI_NT_BLOCK_CHAN;
6832 		un->un_ctype	 = CTYPE_CCS;
6833 		break;
6834 	}
6835 
6836 	/*
6837 	 * Try to read the interconnect type from the HBA.
6838 	 *
6839 	 * Note: This driver is currently compiled as two binaries, a parallel
6840 	 * scsi version (sd) and a fibre channel version (ssd). All functional
6841 	 * differences are determined at compile time. In the future a single
6842 	 * binary will be provided and the interconnect type will be used to
6843 	 * differentiate between fibre and parallel scsi behaviors. At that time
6844 	 * it will be necessary for all fibre channel HBAs to support this
6845 	 * property.
6846 	 *
6847 	 * set un_f_is_fiber to TRUE ( default fiber )
6848 	 */
6849 	un->un_f_is_fibre = TRUE;
6850 	switch (scsi_ifgetcap(SD_ADDRESS(un), "interconnect-type", -1)) {
6851 	case INTERCONNECT_SSA:
6852 		un->un_interconnect_type = SD_INTERCONNECT_SSA;
6853 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
6854 		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_SSA\n", un);
6855 		break;
6856 	case INTERCONNECT_PARALLEL:
6857 		un->un_f_is_fibre = FALSE;
6858 		un->un_interconnect_type = SD_INTERCONNECT_PARALLEL;
6859 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
6860 		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_PARALLEL\n", un);
6861 		break;
6862 	case INTERCONNECT_SAS:
6863 		un->un_f_is_fibre = FALSE;
6864 		un->un_interconnect_type = SD_INTERCONNECT_SAS;
6865 		un->un_node_type = DDI_NT_BLOCK_SAS;
6866 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
6867 		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_SAS\n", un);
6868 		break;
6869 	case INTERCONNECT_SATA:
6870 		un->un_f_is_fibre = FALSE;
6871 		un->un_interconnect_type = SD_INTERCONNECT_SATA;
6872 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
6873 		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_SATA\n", un);
6874 		break;
6875 	case INTERCONNECT_FIBRE:
6876 		un->un_interconnect_type = SD_INTERCONNECT_FIBRE;
6877 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
6878 		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_FIBRE\n", un);
6879 		break;
6880 	case INTERCONNECT_FABRIC:
6881 		un->un_interconnect_type = SD_INTERCONNECT_FABRIC;
6882 		un->un_node_type = DDI_NT_BLOCK_FABRIC;
6883 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
6884 		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_FABRIC\n", un);
6885 		break;
6886 	default:
6887 		/*
6888 		 * The HBA does not support the "interconnect-type" property
6889 		 * (or did not provide a recognized type).
6890 		 *
6891 		 * Note: This will be obsoleted when a single fibre channel
6892 		 * and parallel scsi driver is delivered. In the meantime the
6893 		 * interconnect type will be set to the platform default.If that
6894 		 * type is not parallel SCSI, it means that we should be
6895 		 * assuming "ssd" semantics. However, here this also means that
6896 		 * the FC HBA is not supporting the "interconnect-type" property
6897 		 * like we expect it to, so log this occurrence.
6898 		 */
6899 		un->un_interconnect_type = SD_DEFAULT_INTERCONNECT_TYPE;
6900 		if (!SD_IS_PARALLEL_SCSI(un)) {
6901 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
6902 			    "sd_unit_attach: un:0x%p Assuming "
6903 			    "INTERCONNECT_FIBRE\n", un);
6904 		} else {
6905 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
6906 			    "sd_unit_attach: un:0x%p Assuming "
6907 			    "INTERCONNECT_PARALLEL\n", un);
6908 			un->un_f_is_fibre = FALSE;
6909 		}
6910 		break;
6911 	}
6912 
6913 	if (un->un_f_is_fibre == TRUE) {
6914 		if (scsi_ifgetcap(SD_ADDRESS(un), "scsi-version", 1) ==
6915 		    SCSI_VERSION_3) {
6916 			switch (un->un_interconnect_type) {
6917 			case SD_INTERCONNECT_FIBRE:
6918 			case SD_INTERCONNECT_SSA:
6919 				un->un_node_type = DDI_NT_BLOCK_WWN;
6920 				break;
6921 			default:
6922 				break;
6923 			}
6924 		}
6925 	}
6926 
6927 	/*
6928 	 * Initialize the Request Sense command for the target
6929 	 */
6930 	if (sd_alloc_rqs(devp, un) != DDI_SUCCESS) {
6931 		goto alloc_rqs_failed;
6932 	}
6933 
6934 	/*
6935 	 * Set un_retry_count with SD_RETRY_COUNT, this is ok for Sparc
6936 	 * with separate binary for sd and ssd.
6937 	 *
6938 	 * x86 has 1 binary, un_retry_count is set base on connection type.
6939 	 * The hardcoded values will go away when Sparc uses 1 binary
6940 	 * for sd and ssd.  This hardcoded values need to match
6941 	 * SD_RETRY_COUNT in sddef.h
6942 	 * The value used is base on interconnect type.
6943 	 * fibre = 3, parallel = 5
6944 	 */
6945 	un->un_retry_count = un->un_f_is_fibre ? 3 : 5;
6946 
6947 	/*
6948 	 * Set the per disk retry count to the default number of retries
6949 	 * for disks and CDROMs. This value can be overridden by the
6950 	 * disk property list or an entry in sd.conf.
6951 	 */
6952 	un->un_notready_retry_count =
6953 	    ISCD(un) ? CD_NOT_READY_RETRY_COUNT(un)
6954 	    : DISK_NOT_READY_RETRY_COUNT(un);
6955 
6956 	/*
6957 	 * Set the busy retry count to the default value of un_retry_count.
6958 	 * This can be overridden by entries in sd.conf or the device
6959 	 * config table.
6960 	 */
6961 	un->un_busy_retry_count = un->un_retry_count;
6962 
6963 	/*
6964 	 * Init the reset threshold for retries.  This number determines
6965 	 * how many retries must be performed before a reset can be issued
6966 	 * (for certain error conditions). This can be overridden by entries
6967 	 * in sd.conf or the device config table.
6968 	 */
6969 	un->un_reset_retry_count = (un->un_retry_count / 2);
6970 
6971 	/*
6972 	 * Set the victim_retry_count to the default un_retry_count
6973 	 */
6974 	un->un_victim_retry_count = (2 * un->un_retry_count);
6975 
6976 	/*
6977 	 * Set the reservation release timeout to the default value of
6978 	 * 5 seconds. This can be overridden by entries in ssd.conf or the
6979 	 * device config table.
6980 	 */
6981 	un->un_reserve_release_time = 5;
6982 
6983 	/*
6984 	 * Set up the default maximum transfer size. Note that this may
6985 	 * get updated later in the attach, when setting up default wide
6986 	 * operations for disks.
6987 	 */
6988 	un->un_max_xfer_size = (uint_t)SD_DEFAULT_MAX_XFER_SIZE;
6989 	un->un_partial_dma_supported = 1;
6990 
6991 	/*
6992 	 * Get "allow bus device reset" property (defaults to "enabled" if
6993 	 * the property was not defined). This is to disable bus resets for
6994 	 * certain kinds of error recovery. Note: In the future when a run-time
6995 	 * fibre check is available the soft state flag should default to
6996 	 * enabled.
6997 	 */
6998 	if (un->un_f_is_fibre == TRUE) {
6999 		un->un_f_allow_bus_device_reset = TRUE;
7000 	} else {
7001 		if (ddi_getprop(DDI_DEV_T_ANY, devi, DDI_PROP_DONTPASS,
7002 		    "allow-bus-device-reset", 1) != 0) {
7003 			un->un_f_allow_bus_device_reset = TRUE;
7004 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
7005 			    "sd_unit_attach: un:0x%p Bus device reset "
7006 			    "enabled\n", un);
7007 		} else {
7008 			un->un_f_allow_bus_device_reset = FALSE;
7009 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
7010 			    "sd_unit_attach: un:0x%p Bus device reset "
7011 			    "disabled\n", un);
7012 		}
7013 	}
7014 
7015 	/*
7016 	 * Check if this is an ATAPI device. ATAPI devices use Group 1
7017 	 * Read/Write commands and Group 2 Mode Sense/Select commands.
7018 	 *
7019 	 * Note: The "obsolete" way of doing this is to check for the "atapi"
7020 	 * property. The new "variant" property with a value of "atapi" has been
7021 	 * introduced so that future 'variants' of standard SCSI behavior (like
7022 	 * atapi) could be specified by the underlying HBA drivers by supplying
7023 	 * a new value for the "variant" property, instead of having to define a
7024 	 * new property.
7025 	 */
7026 	if (ddi_prop_get_int(DDI_DEV_T_ANY, devi, 0, "atapi", -1) != -1) {
7027 		un->un_f_cfg_is_atapi = TRUE;
7028 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
7029 		    "sd_unit_attach: un:0x%p Atapi device\n", un);
7030 	}
7031 	if (ddi_prop_lookup_string(DDI_DEV_T_ANY, devi, 0, "variant",
7032 	    &variantp) == DDI_PROP_SUCCESS) {
7033 		if (strcmp(variantp, "atapi") == 0) {
7034 			un->un_f_cfg_is_atapi = TRUE;
7035 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
7036 			    "sd_unit_attach: un:0x%p Atapi device\n", un);
7037 		}
7038 		ddi_prop_free(variantp);
7039 	}
7040 
7041 	un->un_cmd_timeout	= SD_IO_TIME;
7042 
7043 	un->un_busy_timeout  = SD_BSY_TIMEOUT;
7044 
7045 	/* Info on current states, statuses, etc. (Updated frequently) */
7046 	un->un_state		= SD_STATE_NORMAL;
7047 	un->un_last_state	= SD_STATE_NORMAL;
7048 
7049 	/* Control & status info for command throttling */
7050 	un->un_throttle		= sd_max_throttle;
7051 	un->un_saved_throttle	= sd_max_throttle;
7052 	un->un_min_throttle	= sd_min_throttle;
7053 
7054 	if (un->un_f_is_fibre == TRUE) {
7055 		un->un_f_use_adaptive_throttle = TRUE;
7056 	} else {
7057 		un->un_f_use_adaptive_throttle = FALSE;
7058 	}
7059 
7060 	/* Removable media support. */
7061 	cv_init(&un->un_state_cv, NULL, CV_DRIVER, NULL);
7062 	un->un_mediastate		= DKIO_NONE;
7063 	un->un_specified_mediastate	= DKIO_NONE;
7064 
7065 	/* CVs for suspend/resume (PM or DR) */
7066 	cv_init(&un->un_suspend_cv,   NULL, CV_DRIVER, NULL);
7067 	cv_init(&un->un_disk_busy_cv, NULL, CV_DRIVER, NULL);
7068 
7069 	/* Power management support. */
7070 	un->un_power_level = SD_SPINDLE_UNINIT;
7071 
7072 	cv_init(&un->un_wcc_cv,   NULL, CV_DRIVER, NULL);
7073 	un->un_f_wcc_inprog = 0;
7074 
7075 	/*
7076 	 * The conf file entry and softstate variable is a forceful override,
7077 	 * meaning a non-zero value must be entered to change the default.
7078 	 */
7079 	un->un_f_disksort_disabled = FALSE;
7080 	un->un_f_rmw_type = SD_RMW_TYPE_DEFAULT;
7081 	un->un_f_enable_rmw = FALSE;
7082 
7083 	/*
7084 	 * GET EVENT STATUS NOTIFICATION media polling enabled by default, but
7085 	 * can be overridden via [s]sd-config-list "mmc-gesn-polling" property.
7086 	 */
7087 	un->un_f_mmc_gesn_polling = TRUE;
7088 
7089 	/*
7090 	 * physical sector size defaults to DEV_BSIZE currently. We can
7091 	 * override this value via the driver configuration file so we must
7092 	 * set it before calling sd_read_unit_properties().
7093 	 */
7094 	un->un_phy_blocksize = DEV_BSIZE;
7095 
7096 	/*
7097 	 * Retrieve the properties from the static driver table or the driver
7098 	 * configuration file (.conf) for this unit and update the soft state
7099 	 * for the device as needed for the indicated properties.
7100 	 * Note: the property configuration needs to occur here as some of the
7101 	 * following routines may have dependencies on soft state flags set
7102 	 * as part of the driver property configuration.
7103 	 */
7104 	sd_read_unit_properties(un);
7105 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
7106 	    "sd_unit_attach: un:0x%p property configuration complete.\n", un);
7107 
7108 	/*
7109 	 * Only if a device has "hotpluggable" property, it is
7110 	 * treated as hotpluggable device. Otherwise, it is
7111 	 * regarded as non-hotpluggable one.
7112 	 */
7113 	if (ddi_prop_get_int(DDI_DEV_T_ANY, devi, 0, "hotpluggable",
7114 	    -1) != -1) {
7115 		un->un_f_is_hotpluggable = TRUE;
7116 	}
7117 
7118 	/*
7119 	 * set unit's attributes(flags) according to "hotpluggable" and
7120 	 * RMB bit in INQUIRY data.
7121 	 */
7122 	sd_set_unit_attributes(un, devi);
7123 
7124 	/*
7125 	 * By default, we mark the capacity, lbasize, and geometry
7126 	 * as invalid. Only if we successfully read a valid capacity
7127 	 * will we update the un_blockcount and un_tgt_blocksize with the
7128 	 * valid values (the geometry will be validated later).
7129 	 */
7130 	un->un_f_blockcount_is_valid	= FALSE;
7131 	un->un_f_tgt_blocksize_is_valid	= FALSE;
7132 
7133 	/*
7134 	 * Use DEV_BSIZE and DEV_BSHIFT as defaults, until we can determine
7135 	 * otherwise.
7136 	 */
7137 	un->un_tgt_blocksize  = un->un_sys_blocksize  = DEV_BSIZE;
7138 	un->un_blockcount = 0;
7139 
7140 	/*
7141 	 * Set up the per-instance info needed to determine the correct
7142 	 * CDBs and other info for issuing commands to the target.
7143 	 */
7144 	sd_init_cdb_limits(un);
7145 
7146 	/*
7147 	 * Set up the IO chains to use, based upon the target type.
7148 	 */
7149 	if (un->un_f_non_devbsize_supported) {
7150 		un->un_buf_chain_type = SD_CHAIN_INFO_RMMEDIA;
7151 	} else {
7152 		un->un_buf_chain_type = SD_CHAIN_INFO_DISK;
7153 	}
7154 	un->un_uscsi_chain_type  = SD_CHAIN_INFO_USCSI_CMD;
7155 	un->un_direct_chain_type = SD_CHAIN_INFO_DIRECT_CMD;
7156 	un->un_priority_chain_type = SD_CHAIN_INFO_PRIORITY_CMD;
7157 
7158 	un->un_xbuf_attr = ddi_xbuf_attr_create(sizeof (struct sd_xbuf),
7159 	    sd_xbuf_strategy, un, sd_xbuf_active_limit,  sd_xbuf_reserve_limit,
7160 	    ddi_driver_major(devi), DDI_XBUF_QTHREAD_DRIVER);
7161 	ddi_xbuf_attr_register_devinfo(un->un_xbuf_attr, devi);
7162 
7163 
7164 	if (ISCD(un)) {
7165 		un->un_additional_codes = sd_additional_codes;
7166 	} else {
7167 		un->un_additional_codes = NULL;
7168 	}
7169 
7170 	/*
7171 	 * Create the kstats here so they can be available for attach-time
7172 	 * routines that send commands to the unit (either polled or via
7173 	 * sd_send_scsi_cmd).
7174 	 *
7175 	 * Note: This is a critical sequence that needs to be maintained:
7176 	 *	1) Instantiate the kstats here, before any routines using the
7177 	 *	   iopath (i.e. sd_send_scsi_cmd).
7178 	 *	2) Instantiate and initialize the partition stats
7179 	 *	   (sd_set_pstats).
7180 	 *	3) Initialize the error stats (sd_set_errstats), following
7181 	 *	   sd_validate_geometry(),sd_register_devid(),
7182 	 *	   and sd_cache_control().
7183 	 */
7184 
7185 	un->un_stats = kstat_create(sd_label, instance,
7186 	    NULL, "disk", KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT);
7187 	if (un->un_stats != NULL) {
7188 		un->un_stats->ks_lock = SD_MUTEX(un);
7189 		kstat_install(un->un_stats);
7190 	}
7191 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
7192 	    "sd_unit_attach: un:0x%p un_stats created\n", un);
7193 
7194 	un->un_unmapstats_ks = kstat_create(sd_label, instance, "unmapstats",
7195 	    "misc", KSTAT_TYPE_NAMED, sizeof (*un->un_unmapstats) /
7196 	    sizeof (kstat_named_t), 0);
7197 	if (un->un_unmapstats_ks) {
7198 		un->un_unmapstats = un->un_unmapstats_ks->ks_data;
7199 
7200 		kstat_named_init(&un->un_unmapstats->us_cmds,
7201 		    "commands", KSTAT_DATA_UINT64);
7202 		kstat_named_init(&un->un_unmapstats->us_errs,
7203 		    "errors", KSTAT_DATA_UINT64);
7204 		kstat_named_init(&un->un_unmapstats->us_extents,
7205 		    "extents", KSTAT_DATA_UINT64);
7206 		kstat_named_init(&un->un_unmapstats->us_bytes,
7207 		    "bytes", KSTAT_DATA_UINT64);
7208 
7209 		kstat_install(un->un_unmapstats_ks);
7210 	} else {
7211 		cmn_err(CE_NOTE, "!Cannot create unmap kstats for disk %d",
7212 		    instance);
7213 	}
7214 
7215 	sd_create_errstats(un, instance);
7216 	if (un->un_errstats == NULL) {
7217 		goto create_errstats_failed;
7218 	}
7219 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
7220 	    "sd_unit_attach: un:0x%p errstats created\n", un);
7221 
7222 	/*
7223 	 * The following if/else code was relocated here from below as part
7224 	 * of the fix for bug (4430280). However with the default setup added
7225 	 * on entry to this routine, it's no longer absolutely necessary for
7226 	 * this to be before the call to sd_spin_up_unit.
7227 	 */
7228 	if (SD_IS_PARALLEL_SCSI(un) || SD_IS_SERIAL(un)) {
7229 		int tq_trigger_flag = (((devp->sd_inq->inq_ansi == 4) ||
7230 		    (devp->sd_inq->inq_ansi == 5)) &&
7231 		    devp->sd_inq->inq_bque) || devp->sd_inq->inq_cmdque;
7232 
7233 		/*
7234 		 * If tagged queueing is supported by the target
7235 		 * and by the host adapter then we will enable it
7236 		 */
7237 		un->un_tagflags = 0;
7238 		if ((devp->sd_inq->inq_rdf == RDF_SCSI2) && tq_trigger_flag &&
7239 		    (un->un_f_arq_enabled == TRUE)) {
7240 			if (scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing",
7241 			    1, 1) == 1) {
7242 				un->un_tagflags = FLAG_STAG;
7243 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
7244 				    "sd_unit_attach: un:0x%p tag queueing "
7245 				    "enabled\n", un);
7246 			} else if (scsi_ifgetcap(SD_ADDRESS(un),
7247 			    "untagged-qing", 0) == 1) {
7248 				un->un_f_opt_queueing = TRUE;
7249 				un->un_saved_throttle = un->un_throttle =
7250 				    min(un->un_throttle, 3);
7251 			} else {
7252 				un->un_f_opt_queueing = FALSE;
7253 				un->un_saved_throttle = un->un_throttle = 1;
7254 			}
7255 		} else if ((scsi_ifgetcap(SD_ADDRESS(un), "untagged-qing", 0)
7256 		    == 1) && (un->un_f_arq_enabled == TRUE)) {
7257 			/* The Host Adapter supports internal queueing. */
7258 			un->un_f_opt_queueing = TRUE;
7259 			un->un_saved_throttle = un->un_throttle =
7260 			    min(un->un_throttle, 3);
7261 		} else {
7262 			un->un_f_opt_queueing = FALSE;
7263 			un->un_saved_throttle = un->un_throttle = 1;
7264 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
7265 			    "sd_unit_attach: un:0x%p no tag queueing\n", un);
7266 		}
7267 
7268 		/*
7269 		 * Enable large transfers for SATA/SAS drives
7270 		 */
7271 		if (SD_IS_SERIAL(un)) {
7272 			un->un_max_xfer_size =
7273 			    ddi_getprop(DDI_DEV_T_ANY, devi, 0,
7274 			    sd_max_xfer_size, SD_MAX_XFER_SIZE);
7275 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
7276 			    "sd_unit_attach: un:0x%p max transfer "
7277 			    "size=0x%x\n", un, un->un_max_xfer_size);
7278 
7279 		}
7280 
7281 		/* Setup or tear down default wide operations for disks */
7282 
7283 		/*
7284 		 * Note: Legacy: it may be possible for both "sd_max_xfer_size"
7285 		 * and "ssd_max_xfer_size" to exist simultaneously on the same
7286 		 * system and be set to different values. In the future this
7287 		 * code may need to be updated when the ssd module is
7288 		 * obsoleted and removed from the system. (4299588)
7289 		 */
7290 		if (SD_IS_PARALLEL_SCSI(un) &&
7291 		    (devp->sd_inq->inq_rdf == RDF_SCSI2) &&
7292 		    (devp->sd_inq->inq_wbus16 || devp->sd_inq->inq_wbus32)) {
7293 			if (scsi_ifsetcap(SD_ADDRESS(un), "wide-xfer",
7294 			    1, 1) == 1) {
7295 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
7296 				    "sd_unit_attach: un:0x%p Wide Transfer "
7297 				    "enabled\n", un);
7298 			}
7299 
7300 			/*
7301 			 * If tagged queuing has also been enabled, then
7302 			 * enable large xfers
7303 			 */
7304 			if (un->un_saved_throttle == sd_max_throttle) {
7305 				un->un_max_xfer_size =
7306 				    ddi_getprop(DDI_DEV_T_ANY, devi, 0,
7307 				    sd_max_xfer_size, SD_MAX_XFER_SIZE);
7308 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
7309 				    "sd_unit_attach: un:0x%p max transfer "
7310 				    "size=0x%x\n", un, un->un_max_xfer_size);
7311 			}
7312 		} else {
7313 			if (scsi_ifsetcap(SD_ADDRESS(un), "wide-xfer",
7314 			    0, 1) == 1) {
7315 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
7316 				    "sd_unit_attach: un:0x%p "
7317 				    "Wide Transfer disabled\n", un);
7318 			}
7319 		}
7320 	} else {
7321 		un->un_tagflags = FLAG_STAG;
7322 		un->un_max_xfer_size = ddi_getprop(DDI_DEV_T_ANY,
7323 		    devi, 0, sd_max_xfer_size, SD_MAX_XFER_SIZE);
7324 	}
7325 
7326 	/*
7327 	 * If this target supports LUN reset, try to enable it.
7328 	 */
7329 	if (un->un_f_lun_reset_enabled) {
7330 		if (scsi_ifsetcap(SD_ADDRESS(un), "lun-reset", 1, 1) == 1) {
7331 			SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_unit_attach: "
7332 			    "un:0x%p lun_reset capability set\n", un);
7333 		} else {
7334 			SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_unit_attach: "
7335 			    "un:0x%p lun-reset capability not set\n", un);
7336 		}
7337 	}
7338 
7339 	/*
7340 	 * Adjust the maximum transfer size. This is to fix
7341 	 * the problem of partial DMA support on SPARC. Some
7342 	 * HBA driver, like aac, has very small dma_attr_maxxfer
7343 	 * size, which requires partial DMA support on SPARC.
7344 	 * In the future the SPARC pci nexus driver may solve
7345 	 * the problem instead of this fix.
7346 	 */
7347 	max_xfer_size = scsi_ifgetcap(SD_ADDRESS(un), "dma-max", 1);
7348 	if ((max_xfer_size > 0) && (max_xfer_size < un->un_max_xfer_size)) {
7349 		/* We need DMA partial even on sparc to ensure sddump() works */
7350 		un->un_max_xfer_size = max_xfer_size;
7351 		if (un->un_partial_dma_supported == 0)
7352 			un->un_partial_dma_supported = 1;
7353 	}
7354 	if (ddi_prop_get_int(DDI_DEV_T_ANY, SD_DEVINFO(un),
7355 	    DDI_PROP_DONTPASS, "buf_break", 0) == 1) {
7356 		if (ddi_xbuf_attr_setup_brk(un->un_xbuf_attr,
7357 		    un->un_max_xfer_size) == 1) {
7358 			un->un_buf_breakup_supported = 1;
7359 			SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_unit_attach: "
7360 			    "un:0x%p Buf breakup enabled\n", un);
7361 		}
7362 	}
7363 
7364 	/*
7365 	 * Set PKT_DMA_PARTIAL flag.
7366 	 */
7367 	if (un->un_partial_dma_supported == 1) {
7368 		un->un_pkt_flags = PKT_DMA_PARTIAL;
7369 	} else {
7370 		un->un_pkt_flags = 0;
7371 	}
7372 
7373 	/* Initialize sd_ssc_t for internal uscsi commands */
7374 	ssc = sd_ssc_init(un);
7375 	scsi_fm_init(devp);
7376 
7377 	/*
7378 	 * Allocate memory for SCSI FMA stuffs.
7379 	 */
7380 	un->un_fm_private =
7381 	    kmem_zalloc(sizeof (struct sd_fm_internal), KM_SLEEP);
7382 	sfip = (struct sd_fm_internal *)un->un_fm_private;
7383 	sfip->fm_ssc.ssc_uscsi_cmd = &sfip->fm_ucmd;
7384 	sfip->fm_ssc.ssc_uscsi_info = &sfip->fm_uinfo;
7385 	sfip->fm_ssc.ssc_un = un;
7386 
7387 	if (ISCD(un) ||
7388 	    un->un_f_has_removable_media ||
7389 	    devp->sd_fm_capable == DDI_FM_NOT_CAPABLE) {
7390 		/*
7391 		 * We don't touch CDROM or the DDI_FM_NOT_CAPABLE device.
7392 		 * Their log are unchanged.
7393 		 */
7394 		sfip->fm_log_level = SD_FM_LOG_NSUP;
7395 	} else {
7396 		/*
7397 		 * If enter here, it should be non-CDROM and FM-capable
7398 		 * device, and it will not keep the old scsi_log as before
7399 		 * in /var/adm/messages. However, the property
7400 		 * "fm-scsi-log" will control whether the FM telemetry will
7401 		 * be logged in /var/adm/messages.
7402 		 */
7403 		int fm_scsi_log;
7404 		fm_scsi_log = ddi_prop_get_int(DDI_DEV_T_ANY, SD_DEVINFO(un),
7405 		    DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, "fm-scsi-log", 0);
7406 
7407 		if (fm_scsi_log)
7408 			sfip->fm_log_level = SD_FM_LOG_EREPORT;
7409 		else
7410 			sfip->fm_log_level = SD_FM_LOG_SILENT;
7411 	}
7412 
7413 	/*
7414 	 * At this point in the attach, we have enough info in the
7415 	 * soft state to be able to issue commands to the target.
7416 	 *
7417 	 * All command paths used below MUST issue their commands as
7418 	 * SD_PATH_DIRECT. This is important as intermediate layers
7419 	 * are not all initialized yet (such as PM).
7420 	 */
7421 
7422 	/*
7423 	 * Send a TEST UNIT READY command to the device. This should clear
7424 	 * any outstanding UNIT ATTENTION that may be present.
7425 	 *
7426 	 * Note: Don't check for success, just track if there is a reservation,
7427 	 * this is a throw away command to clear any unit attentions.
7428 	 *
7429 	 * Note: This MUST be the first command issued to the target during
7430 	 * attach to ensure power on UNIT ATTENTIONS are cleared.
7431 	 * Pass in flag SD_DONT_RETRY_TUR to prevent the long delays associated
7432 	 * with attempts at spinning up a device with no media.
7433 	 */
7434 	status = sd_send_scsi_TEST_UNIT_READY(ssc, SD_DONT_RETRY_TUR);
7435 	if (status != 0) {
7436 		if (status == EACCES)
7437 			reservation_flag = SD_TARGET_IS_RESERVED;
7438 		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
7439 	}
7440 
7441 	/*
7442 	 * If the device is NOT a removable media device, attempt to spin
7443 	 * it up (using the START_STOP_UNIT command) and read its capacity
7444 	 * (using the READ CAPACITY command).  Note, however, that either
7445 	 * of these could fail and in some cases we would continue with
7446 	 * the attach despite the failure (see below).
7447 	 */
7448 	if (un->un_f_descr_format_supported) {
7449 
7450 		switch (sd_spin_up_unit(ssc)) {
7451 		case 0:
7452 			/*
7453 			 * Spin-up was successful; now try to read the
7454 			 * capacity.  If successful then save the results
7455 			 * and mark the capacity & lbasize as valid.
7456 			 */
7457 			SD_TRACE(SD_LOG_ATTACH_DETACH, un,
7458 			    "sd_unit_attach: un:0x%p spin-up successful\n", un);
7459 
7460 			status = sd_send_scsi_READ_CAPACITY(ssc, &capacity,
7461 			    &lbasize, SD_PATH_DIRECT);
7462 
7463 			switch (status) {
7464 			case 0: {
7465 				if (capacity > DK_MAX_BLOCKS) {
7466 					if ((capacity + 1) >
7467 					    SD_GROUP1_MAX_ADDRESS) {
7468 						/*
7469 						 * Enable descriptor format
7470 						 * sense data so that we can
7471 						 * get 64 bit sense data
7472 						 * fields.
7473 						 */
7474 						sd_enable_descr_sense(ssc);
7475 					}
7476 				}
7477 
7478 				/*
7479 				 * Here it's not necessary to check the case:
7480 				 * the capacity of the device is bigger than
7481 				 * what the max hba cdb can support. Because
7482 				 * sd_send_scsi_READ_CAPACITY will retrieve
7483 				 * the capacity by sending USCSI command, which
7484 				 * is constrained by the max hba cdb. Actually,
7485 				 * sd_send_scsi_READ_CAPACITY will return
7486 				 * EINVAL when using bigger cdb than required
7487 				 * cdb length. Will handle this case in
7488 				 * "case EINVAL".
7489 				 */
7490 
7491 				/*
7492 				 * The following relies on
7493 				 * sd_send_scsi_READ_CAPACITY never
7494 				 * returning 0 for capacity and/or lbasize.
7495 				 */
7496 				sd_update_block_info(un, lbasize, capacity);
7497 
7498 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
7499 				    "sd_unit_attach: un:0x%p capacity = %ld "
7500 				    "blocks; lbasize= %ld.\n", un,
7501 				    un->un_blockcount, un->un_tgt_blocksize);
7502 
7503 				break;
7504 			}
7505 			case EINVAL:
7506 				/*
7507 				 * In the case where the max-cdb-length property
7508 				 * is smaller than the required CDB length for
7509 				 * a SCSI device, a target driver can fail to
7510 				 * attach to that device.
7511 				 */
7512 				scsi_log(SD_DEVINFO(un),
7513 				    sd_label, CE_WARN,
7514 				    "disk capacity is too large "
7515 				    "for current cdb length");
7516 				sd_ssc_assessment(ssc, SD_FMT_IGNORE);
7517 
7518 				goto spinup_failed;
7519 			case EACCES:
7520 				/*
7521 				 * Should never get here if the spin-up
7522 				 * succeeded, but code it in anyway.
7523 				 * From here, just continue with the attach...
7524 				 */
7525 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
7526 				    "sd_unit_attach: un:0x%p "
7527 				    "sd_send_scsi_READ_CAPACITY "
7528 				    "returned reservation conflict\n", un);
7529 				reservation_flag = SD_TARGET_IS_RESERVED;
7530 				sd_ssc_assessment(ssc, SD_FMT_IGNORE);
7531 				break;
7532 			default:
7533 				/*
7534 				 * Likewise, should never get here if the
7535 				 * spin-up succeeded. Just continue with
7536 				 * the attach...
7537 				 */
7538 				if (status == EIO)
7539 					sd_ssc_assessment(ssc,
7540 					    SD_FMT_STATUS_CHECK);
7541 				else
7542 					sd_ssc_assessment(ssc,
7543 					    SD_FMT_IGNORE);
7544 				break;
7545 			}
7546 			break;
7547 		case EACCES:
7548 			/*
7549 			 * Device is reserved by another host.  In this case
7550 			 * we could not spin it up or read the capacity, but
7551 			 * we continue with the attach anyway.
7552 			 */
7553 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
7554 			    "sd_unit_attach: un:0x%p spin-up reservation "
7555 			    "conflict.\n", un);
7556 			reservation_flag = SD_TARGET_IS_RESERVED;
7557 			break;
7558 		default:
7559 			/* Fail the attach if the spin-up failed. */
7560 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
7561 			    "sd_unit_attach: un:0x%p spin-up failed.", un);
7562 			goto spinup_failed;
7563 		}
7564 
7565 	}
7566 
7567 	/*
7568 	 * Check to see if this is a MMC drive
7569 	 */
7570 	if (ISCD(un)) {
7571 		sd_set_mmc_caps(ssc);
7572 	}
7573 
7574 	/*
7575 	 * Add a zero-length attribute to tell the world we support
7576 	 * kernel ioctls (for layered drivers)
7577 	 */
7578 	(void) ddi_prop_create(DDI_DEV_T_NONE, devi, DDI_PROP_CANSLEEP,
7579 	    DDI_KERNEL_IOCTL, NULL, 0);
7580 
7581 	/*
7582 	 * Add a boolean property to tell the world we support
7583 	 * the B_FAILFAST flag (for layered drivers)
7584 	 */
7585 	(void) ddi_prop_create(DDI_DEV_T_NONE, devi, DDI_PROP_CANSLEEP,
7586 	    "ddi-failfast-supported", NULL, 0);
7587 
7588 	/*
7589 	 * Initialize power management
7590 	 */
7591 	mutex_init(&un->un_pm_mutex, NULL, MUTEX_DRIVER, NULL);
7592 	cv_init(&un->un_pm_busy_cv, NULL, CV_DRIVER, NULL);
7593 	sd_setup_pm(ssc, devi);
7594 	if (un->un_f_pm_is_enabled == FALSE) {
7595 		/*
7596 		 * For performance, point to a jump table that does
7597 		 * not include pm.
7598 		 * The direct and priority chains don't change with PM.
7599 		 *
7600 		 * Note: this is currently done based on individual device
7601 		 * capabilities. When an interface for determining system
7602 		 * power enabled state becomes available, or when additional
7603 		 * layers are added to the command chain, these values will
7604 		 * have to be re-evaluated for correctness.
7605 		 */
7606 		if (un->un_f_non_devbsize_supported) {
7607 			un->un_buf_chain_type = SD_CHAIN_INFO_RMMEDIA_NO_PM;
7608 		} else {
7609 			un->un_buf_chain_type = SD_CHAIN_INFO_DISK_NO_PM;
7610 		}
7611 		un->un_uscsi_chain_type  = SD_CHAIN_INFO_USCSI_CMD_NO_PM;
7612 	}
7613 
7614 	/*
7615 	 * This property is set to 0 by HA software to avoid retries
7616 	 * on a reserved disk. (The preferred property name is
7617 	 * "retry-on-reservation-conflict") (1189689)
7618 	 *
7619 	 * Note: The use of a global here can have unintended consequences. A
7620 	 * per instance variable is preferable to match the capabilities of
7621 	 * different underlying hba's (4402600)
7622 	 */
7623 	sd_retry_on_reservation_conflict = ddi_getprop(DDI_DEV_T_ANY, devi,
7624 	    DDI_PROP_DONTPASS, "retry-on-reservation-conflict",
7625 	    sd_retry_on_reservation_conflict);
7626 	if (sd_retry_on_reservation_conflict != 0) {
7627 		sd_retry_on_reservation_conflict = ddi_getprop(DDI_DEV_T_ANY,
7628 		    devi, DDI_PROP_DONTPASS, sd_resv_conflict_name,
7629 		    sd_retry_on_reservation_conflict);
7630 	}
7631 
7632 	/* Set up options for QFULL handling. */
7633 	if ((rval = ddi_getprop(DDI_DEV_T_ANY, devi, 0,
7634 	    "qfull-retries", -1)) != -1) {
7635 		(void) scsi_ifsetcap(SD_ADDRESS(un), "qfull-retries",
7636 		    rval, 1);
7637 	}
7638 	if ((rval = ddi_getprop(DDI_DEV_T_ANY, devi, 0,
7639 	    "qfull-retry-interval", -1)) != -1) {
7640 		(void) scsi_ifsetcap(SD_ADDRESS(un), "qfull-retry-interval",
7641 		    rval, 1);
7642 	}
7643 
7644 	/*
7645 	 * This just prints a message that announces the existence of the
7646 	 * device. The message is always printed in the system logfile, but
7647 	 * only appears on the console if the system is booted with the
7648 	 * -v (verbose) argument.
7649 	 */
7650 	ddi_report_dev(devi);
7651 
7652 	un->un_mediastate = DKIO_NONE;
7653 
7654 	/*
7655 	 * Check Block Device Characteristics VPD.
7656 	 */
7657 	sd_check_bdc_vpd(ssc);
7658 
7659 	/*
7660 	 * Check whether the drive is in emulation mode.
7661 	 */
7662 	sd_check_emulation_mode(ssc);
7663 
7664 	cmlb_alloc_handle(&un->un_cmlbhandle);
7665 
7666 #if defined(__x86)
7667 	/*
7668 	 * On x86, compensate for off-by-1 legacy error
7669 	 */
7670 	if (!un->un_f_has_removable_media && !un->un_f_is_hotpluggable &&
7671 	    (lbasize == un->un_sys_blocksize))
7672 		offbyone = CMLB_OFF_BY_ONE;
7673 #endif
7674 
7675 	if (cmlb_attach(devi, &sd_tgops, (int)devp->sd_inq->inq_dtype,
7676 	    VOID2BOOLEAN(un->un_f_has_removable_media != 0),
7677 	    VOID2BOOLEAN(un->un_f_is_hotpluggable != 0),
7678 	    un->un_node_type, offbyone, un->un_cmlbhandle,
7679 	    (void *)SD_PATH_DIRECT) != 0) {
7680 		goto cmlb_attach_failed;
7681 	}
7682 
7683 
7684 	/*
7685 	 * Read and validate the device's geometry (ie, disk label)
7686 	 * A new unformatted drive will not have a valid geometry, but
7687 	 * the driver needs to successfully attach to this device so
7688 	 * the drive can be formatted via ioctls.
7689 	 */
7690 	geom_label_valid = (cmlb_validate(un->un_cmlbhandle, 0,
7691 	    (void *)SD_PATH_DIRECT) == 0) ? 1: 0;
7692 
7693 	mutex_enter(SD_MUTEX(un));
7694 
7695 	/*
7696 	 * Read and initialize the devid for the unit.
7697 	 */
7698 	if (un->un_f_devid_supported) {
7699 		sd_register_devid(ssc, devi, reservation_flag);
7700 	}
7701 	mutex_exit(SD_MUTEX(un));
7702 
7703 
7704 	if (un->un_f_opt_disable_cache == TRUE) {
7705 		/*
7706 		 * Disable both read cache and write cache.  This is
7707 		 * the historic behavior of the keywords in the config file.
7708 		 */
7709 		if (sd_cache_control(ssc, SD_CACHE_DISABLE, SD_CACHE_DISABLE) !=
7710 		    0) {
7711 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
7712 			    "sd_unit_attach: un:0x%p Could not disable "
7713 			    "caching", un);
7714 			goto devid_failed;
7715 		}
7716 	}
7717 
7718 	/*
7719 	 * Check the value of the WCE bit and if it's allowed to be changed,
7720 	 * set un_f_write_cache_enabled and un_f_cache_mode_changeable
7721 	 * accordingly.
7722 	 */
7723 	(void) sd_get_write_cache_enabled(ssc, &wc_enabled);
7724 	sd_get_write_cache_changeable(ssc, &wc_changeable);
7725 	mutex_enter(SD_MUTEX(un));
7726 	un->un_f_write_cache_enabled = (wc_enabled != 0);
7727 	un->un_f_cache_mode_changeable = (wc_changeable != 0);
7728 	mutex_exit(SD_MUTEX(un));
7729 
7730 	if ((un->un_f_rmw_type != SD_RMW_TYPE_RETURN_ERROR &&
7731 	    un->un_tgt_blocksize != DEV_BSIZE) ||
7732 	    un->un_f_enable_rmw) {
7733 		if (!(un->un_wm_cache)) {
7734 			(void) snprintf(name_str, sizeof (name_str),
7735 			    "%s%d_cache",
7736 			    ddi_driver_name(SD_DEVINFO(un)),
7737 			    ddi_get_instance(SD_DEVINFO(un)));
7738 			un->un_wm_cache = kmem_cache_create(
7739 			    name_str, sizeof (struct sd_w_map),
7740 			    8, sd_wm_cache_constructor,
7741 			    sd_wm_cache_destructor, NULL,
7742 			    (void *)un, NULL, 0);
7743 			if (!(un->un_wm_cache)) {
7744 				goto wm_cache_failed;
7745 			}
7746 		}
7747 	}
7748 
7749 	/*
7750 	 * Check the value of the NV_SUP bit and set
7751 	 * un_f_suppress_cache_flush accordingly.
7752 	 */
7753 	sd_get_nv_sup(ssc);
7754 
7755 	/*
7756 	 * Find out what type of reservation this disk supports.
7757 	 */
7758 	status = sd_send_scsi_PERSISTENT_RESERVE_IN(ssc, SD_READ_KEYS, 0, NULL);
7759 
7760 	switch (status) {
7761 	case 0:
7762 		/*
7763 		 * SCSI-3 reservations are supported.
7764 		 */
7765 		un->un_reservation_type = SD_SCSI3_RESERVATION;
7766 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
7767 		    "sd_unit_attach: un:0x%p SCSI-3 reservations\n", un);
7768 		break;
7769 	case ENOTSUP:
7770 		/*
7771 		 * The PERSISTENT RESERVE IN command would not be recognized by
7772 		 * a SCSI-2 device, so assume the reservation type is SCSI-2.
7773 		 */
7774 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
7775 		    "sd_unit_attach: un:0x%p SCSI-2 reservations\n", un);
7776 		un->un_reservation_type = SD_SCSI2_RESERVATION;
7777 
7778 		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
7779 		break;
7780 	default:
7781 		/*
7782 		 * default to SCSI-3 reservations
7783 		 */
7784 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
7785 		    "sd_unit_attach: un:0x%p default SCSI3 reservations\n", un);
7786 		un->un_reservation_type = SD_SCSI3_RESERVATION;
7787 
7788 		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
7789 		break;
7790 	}
7791 
7792 	/*
7793 	 * Set the pstat and error stat values here, so data obtained during the
7794 	 * previous attach-time routines is available.
7795 	 *
7796 	 * Note: This is a critical sequence that needs to be maintained:
7797 	 *	1) Instantiate the kstats before any routines using the iopath
7798 	 *	   (i.e. sd_send_scsi_cmd).
7799 	 *	2) Initialize the error stats (sd_set_errstats) and partition
7800 	 *	   stats (sd_set_pstats)here, following
7801 	 *	   cmlb_validate_geometry(), sd_register_devid(), and
7802 	 *	   sd_cache_control().
7803 	 */
7804 
7805 	if (un->un_f_pkstats_enabled && geom_label_valid) {
7806 		sd_set_pstats(un);
7807 		SD_TRACE(SD_LOG_IO_PARTITION, un,
7808 		    "sd_unit_attach: un:0x%p pstats created and set\n", un);
7809 	}
7810 
7811 	sd_set_errstats(un);
7812 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
7813 	    "sd_unit_attach: un:0x%p errstats set\n", un);
7814 
7815 	sd_setup_blk_limits(ssc);
7816 
7817 	/*
7818 	 * After successfully attaching an instance, we record the information
7819 	 * of how many luns have been attached on the relative target and
7820 	 * controller for parallel SCSI. This information is used when sd tries
7821 	 * to set the tagged queuing capability in HBA.
7822 	 */
7823 	if (SD_IS_PARALLEL_SCSI(un) && (tgt >= 0) && (tgt < NTARGETS_WIDE)) {
7824 		sd_scsi_update_lun_on_target(pdip, tgt, SD_SCSI_LUN_ATTACH);
7825 	}
7826 
7827 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
7828 	    "sd_unit_attach: un:0x%p exit success\n", un);
7829 
7830 	/* Uninitialize sd_ssc_t pointer */
7831 	sd_ssc_fini(ssc);
7832 
7833 	return (DDI_SUCCESS);
7834 
7835 	/*
7836 	 * An error occurred during the attach; clean up & return failure.
7837 	 */
7838 wm_cache_failed:
7839 devid_failed:
7840 	ddi_remove_minor_node(devi, NULL);
7841 
7842 cmlb_attach_failed:
7843 	/*
7844 	 * Cleanup from the scsi_ifsetcap() calls (437868)
7845 	 */
7846 	(void) scsi_ifsetcap(SD_ADDRESS(un), "lun-reset", 0, 1);
7847 	(void) scsi_ifsetcap(SD_ADDRESS(un), "wide-xfer", 0, 1);
7848 
7849 	/*
7850 	 * Refer to the comments of setting tagged-qing in the beginning of
7851 	 * sd_unit_attach. We can only disable tagged queuing when there is
7852 	 * no lun attached on the target.
7853 	 */
7854 	if (sd_scsi_get_target_lun_count(pdip, tgt) < 1) {
7855 		(void) scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing", 0, 1);
7856 	}
7857 
7858 	if (un->un_f_is_fibre == FALSE) {
7859 		(void) scsi_ifsetcap(SD_ADDRESS(un), "auto-rqsense", 0, 1);
7860 	}
7861 
7862 spinup_failed:
7863 
7864 	/* Uninitialize sd_ssc_t pointer */
7865 	sd_ssc_fini(ssc);
7866 
7867 	mutex_enter(SD_MUTEX(un));
7868 
7869 	/* Deallocate SCSI FMA memory spaces */
7870 	kmem_free(un->un_fm_private, sizeof (struct sd_fm_internal));
7871 
7872 	/* Cancel callback for SD_PATH_DIRECT_PRIORITY cmd. restart */
7873 	if (un->un_direct_priority_timeid != NULL) {
7874 		timeout_id_t temp_id = un->un_direct_priority_timeid;
7875 		un->un_direct_priority_timeid = NULL;
7876 		mutex_exit(SD_MUTEX(un));
7877 		(void) untimeout(temp_id);
7878 		mutex_enter(SD_MUTEX(un));
7879 	}
7880 
7881 	/* Cancel any pending start/stop timeouts */
7882 	if (un->un_startstop_timeid != NULL) {
7883 		timeout_id_t temp_id = un->un_startstop_timeid;
7884 		un->un_startstop_timeid = NULL;
7885 		mutex_exit(SD_MUTEX(un));
7886 		(void) untimeout(temp_id);
7887 		mutex_enter(SD_MUTEX(un));
7888 	}
7889 
7890 	/* Cancel any pending reset-throttle timeouts */
7891 	if (un->un_reset_throttle_timeid != NULL) {
7892 		timeout_id_t temp_id = un->un_reset_throttle_timeid;
7893 		un->un_reset_throttle_timeid = NULL;
7894 		mutex_exit(SD_MUTEX(un));
7895 		(void) untimeout(temp_id);
7896 		mutex_enter(SD_MUTEX(un));
7897 	}
7898 
7899 	/* Cancel rmw warning message timeouts */
7900 	if (un->un_rmw_msg_timeid != NULL) {
7901 		timeout_id_t temp_id = un->un_rmw_msg_timeid;
7902 		un->un_rmw_msg_timeid = NULL;
7903 		mutex_exit(SD_MUTEX(un));
7904 		(void) untimeout(temp_id);
7905 		mutex_enter(SD_MUTEX(un));
7906 	}
7907 
7908 	/* Cancel any pending retry timeouts */
7909 	if (un->un_retry_timeid != NULL) {
7910 		timeout_id_t temp_id = un->un_retry_timeid;
7911 		un->un_retry_timeid = NULL;
7912 		mutex_exit(SD_MUTEX(un));
7913 		(void) untimeout(temp_id);
7914 		mutex_enter(SD_MUTEX(un));
7915 	}
7916 
7917 	/* Cancel any pending delayed cv broadcast timeouts */
7918 	if (un->un_dcvb_timeid != NULL) {
7919 		timeout_id_t temp_id = un->un_dcvb_timeid;
7920 		un->un_dcvb_timeid = NULL;
7921 		mutex_exit(SD_MUTEX(un));
7922 		(void) untimeout(temp_id);
7923 		mutex_enter(SD_MUTEX(un));
7924 	}
7925 
7926 	mutex_exit(SD_MUTEX(un));
7927 
7928 	/* There should not be any in-progress I/O so ASSERT this check */
7929 	ASSERT(un->un_ncmds_in_transport == 0);
7930 	ASSERT(un->un_ncmds_in_driver == 0);
7931 
7932 	/* Do not free the softstate if the callback routine is active */
7933 	sd_sync_with_callback(un);
7934 
7935 	/*
7936 	 * Partition stats apparently are not used with removables. These would
7937 	 * not have been created during attach, so no need to clean them up...
7938 	 */
7939 	if (un->un_errstats != NULL) {
7940 		kstat_delete(un->un_errstats);
7941 		un->un_errstats = NULL;
7942 	}
7943 
7944 create_errstats_failed:
7945 
7946 	if (un->un_stats != NULL) {
7947 		kstat_delete(un->un_stats);
7948 		un->un_stats = NULL;
7949 	}
7950 
7951 	ddi_xbuf_attr_unregister_devinfo(un->un_xbuf_attr, devi);
7952 	ddi_xbuf_attr_destroy(un->un_xbuf_attr);
7953 
7954 	ddi_prop_remove_all(devi);
7955 	cv_destroy(&un->un_state_cv);
7956 
7957 	sd_free_rqs(un);
7958 
7959 alloc_rqs_failed:
7960 
7961 	devp->sd_private = NULL;
7962 	bzero(un, sizeof (struct sd_lun));	/* Clear any stale data! */
7963 
7964 	/*
7965 	 * Note: the man pages are unclear as to whether or not doing a
7966 	 * ddi_soft_state_free(sd_state, instance) is the right way to
7967 	 * clean up after the ddi_soft_state_zalloc() if the subsequent
7968 	 * ddi_get_soft_state() fails.  The implication seems to be
7969 	 * that the get_soft_state cannot fail if the zalloc succeeds.
7970 	 */
7971 #ifndef XPV_HVM_DRIVER
7972 	ddi_soft_state_free(sd_state, instance);
7973 #endif /* !XPV_HVM_DRIVER */
7974 
7975 probe_failed:
7976 	scsi_unprobe(devp);
7977 
7978 	return (DDI_FAILURE);
7979 }
7980 
7981 
7982 /*
7983  *    Function: sd_unit_detach
7984  *
7985  * Description: Performs DDI_DETACH processing for sddetach().
7986  *
7987  * Return Code: DDI_SUCCESS
7988  *		DDI_FAILURE
7989  *
7990  *     Context: Kernel thread context
7991  */
7992 
7993 static int
sd_unit_detach(dev_info_t * devi)7994 sd_unit_detach(dev_info_t *devi)
7995 {
7996 	struct scsi_device	*devp;
7997 	struct sd_lun		*un;
7998 	int			i;
7999 	int			tgt;
8000 	dev_t			dev;
8001 	dev_info_t		*pdip = ddi_get_parent(devi);
8002 	int			instance = ddi_get_instance(devi);
8003 
8004 	/*
8005 	 * Fail the detach for any of the following:
8006 	 *  - Unable to get the sd_lun struct for the instance
8007 	 *  - There is pending I/O
8008 	 */
8009 	devp = ddi_get_driver_private(devi);
8010 	if ((devp == NULL) ||
8011 	    ((un = (struct sd_lun *)devp->sd_private) == NULL) ||
8012 	    (un->un_ncmds_in_driver != 0)) {
8013 		return (DDI_FAILURE);
8014 	}
8015 
8016 	SD_TRACE(SD_LOG_ATTACH_DETACH, un, "sd_unit_detach: entry 0x%p\n", un);
8017 
8018 	tgt = ddi_prop_get_int(DDI_DEV_T_ANY, devi, DDI_PROP_DONTPASS,
8019 	    SCSI_ADDR_PROP_TARGET, -1);
8020 
8021 	dev = sd_make_device(SD_DEVINFO(un));
8022 
8023 #ifndef lint
8024 	_NOTE(COMPETING_THREADS_NOW);
8025 #endif
8026 
8027 	mutex_enter(SD_MUTEX(un));
8028 
8029 	/*
8030 	 * Fail the detach if there are any outstanding layered
8031 	 * opens on this device.
8032 	 */
8033 	for (i = 0; i < NDKMAP; i++) {
8034 		if (un->un_ocmap.lyropen[i] != 0) {
8035 			goto err_notclosed;
8036 		}
8037 	}
8038 
8039 	/*
8040 	 * Verify there are NO outstanding commands issued to this device.
8041 	 * ie, un_ncmds_in_transport == 0.
8042 	 * It's possible to have outstanding commands through the physio
8043 	 * code path, even though everything's closed.
8044 	 */
8045 	if ((un->un_ncmds_in_transport != 0) || (un->un_retry_timeid != NULL) ||
8046 	    (un->un_direct_priority_timeid != NULL) ||
8047 	    (un->un_state == SD_STATE_RWAIT)) {
8048 		mutex_exit(SD_MUTEX(un));
8049 		SD_ERROR(SD_LOG_ATTACH_DETACH, un,
8050 		    "sd_dr_detach: Detach failure due to outstanding cmds\n");
8051 		goto err_stillbusy;
8052 	}
8053 
8054 	/*
8055 	 * If we have the device reserved, release the reservation.
8056 	 */
8057 	if ((un->un_resvd_status & SD_RESERVE) &&
8058 	    !(un->un_resvd_status & SD_LOST_RESERVE)) {
8059 		mutex_exit(SD_MUTEX(un));
8060 		/*
8061 		 * Note: sd_reserve_release sends a command to the device
8062 		 * via the sd_ioctlcmd() path, and can sleep.
8063 		 */
8064 		if (sd_reserve_release(dev, SD_RELEASE) != 0) {
8065 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
8066 			    "sd_dr_detach: Cannot release reservation \n");
8067 		}
8068 	} else {
8069 		mutex_exit(SD_MUTEX(un));
8070 	}
8071 
8072 	/*
8073 	 * Untimeout any reserve recover, throttle reset, restart unit
8074 	 * and delayed broadcast timeout threads. Protect the timeout pointer
8075 	 * from getting nulled by their callback functions.
8076 	 */
8077 	mutex_enter(SD_MUTEX(un));
8078 	if (un->un_resvd_timeid != NULL) {
8079 		timeout_id_t temp_id = un->un_resvd_timeid;
8080 		un->un_resvd_timeid = NULL;
8081 		mutex_exit(SD_MUTEX(un));
8082 		(void) untimeout(temp_id);
8083 		mutex_enter(SD_MUTEX(un));
8084 	}
8085 
8086 	if (un->un_reset_throttle_timeid != NULL) {
8087 		timeout_id_t temp_id = un->un_reset_throttle_timeid;
8088 		un->un_reset_throttle_timeid = NULL;
8089 		mutex_exit(SD_MUTEX(un));
8090 		(void) untimeout(temp_id);
8091 		mutex_enter(SD_MUTEX(un));
8092 	}
8093 
8094 	if (un->un_startstop_timeid != NULL) {
8095 		timeout_id_t temp_id = un->un_startstop_timeid;
8096 		un->un_startstop_timeid = NULL;
8097 		mutex_exit(SD_MUTEX(un));
8098 		(void) untimeout(temp_id);
8099 		mutex_enter(SD_MUTEX(un));
8100 	}
8101 
8102 	if (un->un_rmw_msg_timeid != NULL) {
8103 		timeout_id_t temp_id = un->un_rmw_msg_timeid;
8104 		un->un_rmw_msg_timeid = NULL;
8105 		mutex_exit(SD_MUTEX(un));
8106 		(void) untimeout(temp_id);
8107 		mutex_enter(SD_MUTEX(un));
8108 	}
8109 
8110 	if (un->un_dcvb_timeid != NULL) {
8111 		timeout_id_t temp_id = un->un_dcvb_timeid;
8112 		un->un_dcvb_timeid = NULL;
8113 		mutex_exit(SD_MUTEX(un));
8114 		(void) untimeout(temp_id);
8115 	} else {
8116 		mutex_exit(SD_MUTEX(un));
8117 	}
8118 
8119 	/* Remove any pending reservation reclaim requests for this device */
8120 	sd_rmv_resv_reclaim_req(dev);
8121 
8122 	mutex_enter(SD_MUTEX(un));
8123 
8124 	/* Cancel any pending callbacks for SD_PATH_DIRECT_PRIORITY cmd. */
8125 	if (un->un_direct_priority_timeid != NULL) {
8126 		timeout_id_t temp_id = un->un_direct_priority_timeid;
8127 		un->un_direct_priority_timeid = NULL;
8128 		mutex_exit(SD_MUTEX(un));
8129 		(void) untimeout(temp_id);
8130 		mutex_enter(SD_MUTEX(un));
8131 	}
8132 
8133 	/* Cancel any active multi-host disk watch thread requests */
8134 	if (un->un_mhd_token != NULL) {
8135 		mutex_exit(SD_MUTEX(un));
8136 		 _NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::un_mhd_token));
8137 		if (scsi_watch_request_terminate(un->un_mhd_token,
8138 		    SCSI_WATCH_TERMINATE_NOWAIT)) {
8139 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
8140 			    "sd_dr_detach: Cannot cancel mhd watch request\n");
8141 			/*
8142 			 * Note: We are returning here after having removed
8143 			 * some driver timeouts above. This is consistent with
8144 			 * the legacy implementation but perhaps the watch
8145 			 * terminate call should be made with the wait flag set.
8146 			 */
8147 			goto err_stillbusy;
8148 		}
8149 		mutex_enter(SD_MUTEX(un));
8150 		un->un_mhd_token = NULL;
8151 	}
8152 
8153 	if (un->un_swr_token != NULL) {
8154 		mutex_exit(SD_MUTEX(un));
8155 		_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::un_swr_token));
8156 		if (scsi_watch_request_terminate(un->un_swr_token,
8157 		    SCSI_WATCH_TERMINATE_NOWAIT)) {
8158 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
8159 			    "sd_dr_detach: Cannot cancel swr watch request\n");
8160 			/*
8161 			 * Note: We are returning here after having removed
8162 			 * some driver timeouts above. This is consistent with
8163 			 * the legacy implementation but perhaps the watch
8164 			 * terminate call should be made with the wait flag set.
8165 			 */
8166 			goto err_stillbusy;
8167 		}
8168 		mutex_enter(SD_MUTEX(un));
8169 		un->un_swr_token = NULL;
8170 	}
8171 
8172 	mutex_exit(SD_MUTEX(un));
8173 
8174 	/*
8175 	 * Clear any scsi_reset_notifies. We clear the reset notifies
8176 	 * if we have not registered one.
8177 	 * Note: The sd_mhd_reset_notify_cb() fn tries to acquire SD_MUTEX!
8178 	 */
8179 	(void) scsi_reset_notify(SD_ADDRESS(un), SCSI_RESET_CANCEL,
8180 	    sd_mhd_reset_notify_cb, (caddr_t)un);
8181 
8182 	/*
8183 	 * protect the timeout pointers from getting nulled by
8184 	 * their callback functions during the cancellation process.
8185 	 * In such a scenario untimeout can be invoked with a null value.
8186 	 */
8187 	_NOTE(NO_COMPETING_THREADS_NOW);
8188 
8189 	mutex_enter(&un->un_pm_mutex);
8190 	if (un->un_pm_idle_timeid != NULL) {
8191 		timeout_id_t temp_id = un->un_pm_idle_timeid;
8192 		un->un_pm_idle_timeid = NULL;
8193 		mutex_exit(&un->un_pm_mutex);
8194 
8195 		/*
8196 		 * Timeout is active; cancel it.
8197 		 * Note that it'll never be active on a device
8198 		 * that does not support PM therefore we don't
8199 		 * have to check before calling pm_idle_component.
8200 		 */
8201 		(void) untimeout(temp_id);
8202 		(void) pm_idle_component(SD_DEVINFO(un), 0);
8203 		mutex_enter(&un->un_pm_mutex);
8204 	}
8205 
8206 	/*
8207 	 * Check whether there is already a timeout scheduled for power
8208 	 * management. If yes then don't lower the power here, that's.
8209 	 * the timeout handler's job.
8210 	 */
8211 	if (un->un_pm_timeid != NULL) {
8212 		timeout_id_t temp_id = un->un_pm_timeid;
8213 		un->un_pm_timeid = NULL;
8214 		mutex_exit(&un->un_pm_mutex);
8215 		/*
8216 		 * Timeout is active; cancel it.
8217 		 * Note that it'll never be active on a device
8218 		 * that does not support PM therefore we don't
8219 		 * have to check before calling pm_idle_component.
8220 		 */
8221 		(void) untimeout(temp_id);
8222 		(void) pm_idle_component(SD_DEVINFO(un), 0);
8223 
8224 	} else {
8225 		mutex_exit(&un->un_pm_mutex);
8226 		if ((un->un_f_pm_is_enabled == TRUE) &&
8227 		    (pm_lower_power(SD_DEVINFO(un), 0, SD_PM_STATE_STOPPED(un))
8228 		    != DDI_SUCCESS)) {
8229 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
8230 		    "sd_dr_detach: Lower power request failed, ignoring.\n");
8231 			/*
8232 			 * Fix for bug: 4297749, item # 13
8233 			 * The above test now includes a check to see if PM is
8234 			 * supported by this device before call
8235 			 * pm_lower_power().
8236 			 * Note, the following is not dead code. The call to
8237 			 * pm_lower_power above will generate a call back into
8238 			 * our sdpower routine which might result in a timeout
8239 			 * handler getting activated. Therefore the following
8240 			 * code is valid and necessary.
8241 			 */
8242 			mutex_enter(&un->un_pm_mutex);
8243 			if (un->un_pm_timeid != NULL) {
8244 				timeout_id_t temp_id = un->un_pm_timeid;
8245 				un->un_pm_timeid = NULL;
8246 				mutex_exit(&un->un_pm_mutex);
8247 				(void) untimeout(temp_id);
8248 				(void) pm_idle_component(SD_DEVINFO(un), 0);
8249 			} else {
8250 				mutex_exit(&un->un_pm_mutex);
8251 			}
8252 		}
8253 	}
8254 
8255 	/*
8256 	 * Cleanup from the scsi_ifsetcap() calls (437868)
8257 	 * Relocated here from above to be after the call to
8258 	 * pm_lower_power, which was getting errors.
8259 	 */
8260 	(void) scsi_ifsetcap(SD_ADDRESS(un), "lun-reset", 0, 1);
8261 	(void) scsi_ifsetcap(SD_ADDRESS(un), "wide-xfer", 0, 1);
8262 
8263 	/*
8264 	 * Currently, tagged queuing is supported per target based by HBA.
8265 	 * Setting this per lun instance actually sets the capability of this
8266 	 * target in HBA, which affects those luns already attached on the
8267 	 * same target. So during detach, we can only disable this capability
8268 	 * only when this is the only lun left on this target. By doing
8269 	 * this, we assume a target has the same tagged queuing capability
8270 	 * for every lun. The condition can be removed when HBA is changed to
8271 	 * support per lun based tagged queuing capability.
8272 	 */
8273 	if (sd_scsi_get_target_lun_count(pdip, tgt) <= 1) {
8274 		(void) scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing", 0, 1);
8275 	}
8276 
8277 	if (un->un_f_is_fibre == FALSE) {
8278 		(void) scsi_ifsetcap(SD_ADDRESS(un), "auto-rqsense", 0, 1);
8279 	}
8280 
8281 	/*
8282 	 * Remove any event callbacks, fibre only
8283 	 */
8284 	if (un->un_f_is_fibre == TRUE) {
8285 		if ((un->un_insert_event != NULL) &&
8286 		    (ddi_remove_event_handler(un->un_insert_cb_id) !=
8287 		    DDI_SUCCESS)) {
8288 			/*
8289 			 * Note: We are returning here after having done
8290 			 * substantial cleanup above. This is consistent
8291 			 * with the legacy implementation but this may not
8292 			 * be the right thing to do.
8293 			 */
8294 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
8295 			    "sd_dr_detach: Cannot cancel insert event\n");
8296 			goto err_remove_event;
8297 		}
8298 		un->un_insert_event = NULL;
8299 
8300 		if ((un->un_remove_event != NULL) &&
8301 		    (ddi_remove_event_handler(un->un_remove_cb_id) !=
8302 		    DDI_SUCCESS)) {
8303 			/*
8304 			 * Note: We are returning here after having done
8305 			 * substantial cleanup above. This is consistent
8306 			 * with the legacy implementation but this may not
8307 			 * be the right thing to do.
8308 			 */
8309 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
8310 			    "sd_dr_detach: Cannot cancel remove event\n");
8311 			goto err_remove_event;
8312 		}
8313 		un->un_remove_event = NULL;
8314 	}
8315 
8316 	/* Do not free the softstate if the callback routine is active */
8317 	sd_sync_with_callback(un);
8318 
8319 	cmlb_detach(un->un_cmlbhandle, (void *)SD_PATH_DIRECT);
8320 	cmlb_free_handle(&un->un_cmlbhandle);
8321 
8322 	/*
8323 	 * Clean up the soft state struct.
8324 	 * Cleanup is done in reverse order of allocs/inits.
8325 	 * At this point there should be no competing threads anymore.
8326 	 */
8327 
8328 	scsi_fm_fini(devp);
8329 
8330 	/*
8331 	 * Deallocate memory for SCSI FMA.
8332 	 */
8333 	kmem_free(un->un_fm_private, sizeof (struct sd_fm_internal));
8334 
8335 	/*
8336 	 * Unregister and free device id if it was not registered
8337 	 * by the transport.
8338 	 */
8339 	if (un->un_f_devid_transport_defined == FALSE)
8340 		ddi_devid_unregister(devi);
8341 
8342 	/*
8343 	 * free the devid structure if allocated before (by ddi_devid_init()
8344 	 * or ddi_devid_get()).
8345 	 */
8346 	if (un->un_devid) {
8347 		ddi_devid_free(un->un_devid);
8348 		un->un_devid = NULL;
8349 	}
8350 
8351 	/*
8352 	 * Destroy wmap cache if it exists.
8353 	 */
8354 	if (un->un_wm_cache != NULL) {
8355 		kmem_cache_destroy(un->un_wm_cache);
8356 		un->un_wm_cache = NULL;
8357 	}
8358 
8359 	/*
8360 	 * kstat cleanup is done in detach for all device types (4363169).
8361 	 * We do not want to fail detach if the device kstats are not deleted
8362 	 * since there is a confusion about the devo_refcnt for the device.
8363 	 * We just delete the kstats and let detach complete successfully.
8364 	 */
8365 	if (un->un_stats != NULL) {
8366 		kstat_delete(un->un_stats);
8367 		un->un_stats = NULL;
8368 	}
8369 	if (un->un_unmapstats != NULL) {
8370 		kstat_delete(un->un_unmapstats_ks);
8371 		un->un_unmapstats_ks = NULL;
8372 		un->un_unmapstats = NULL;
8373 	}
8374 	if (un->un_errstats != NULL) {
8375 		kstat_delete(un->un_errstats);
8376 		un->un_errstats = NULL;
8377 	}
8378 
8379 	/* Remove partition stats */
8380 	if (un->un_f_pkstats_enabled) {
8381 		for (i = 0; i < NSDMAP; i++) {
8382 			if (un->un_pstats[i] != NULL) {
8383 				kstat_delete(un->un_pstats[i]);
8384 				un->un_pstats[i] = NULL;
8385 			}
8386 		}
8387 	}
8388 
8389 	/* Remove xbuf registration */
8390 	ddi_xbuf_attr_unregister_devinfo(un->un_xbuf_attr, devi);
8391 	ddi_xbuf_attr_destroy(un->un_xbuf_attr);
8392 
8393 	/* Remove driver properties */
8394 	ddi_prop_remove_all(devi);
8395 
8396 	mutex_destroy(&un->un_pm_mutex);
8397 	cv_destroy(&un->un_pm_busy_cv);
8398 
8399 	cv_destroy(&un->un_wcc_cv);
8400 
8401 	/* Removable media condvar. */
8402 	cv_destroy(&un->un_state_cv);
8403 
8404 	/* Suspend/resume condvar. */
8405 	cv_destroy(&un->un_suspend_cv);
8406 	cv_destroy(&un->un_disk_busy_cv);
8407 
8408 	sd_free_rqs(un);
8409 
8410 	/* Free up soft state */
8411 	devp->sd_private = NULL;
8412 
8413 	bzero(un, sizeof (struct sd_lun));
8414 
8415 	ddi_soft_state_free(sd_state, instance);
8416 
8417 	/* This frees up the INQUIRY data associated with the device. */
8418 	scsi_unprobe(devp);
8419 
8420 	/*
8421 	 * After successfully detaching an instance, we update the information
8422 	 * of how many luns have been attached in the relative target and
8423 	 * controller for parallel SCSI. This information is used when sd tries
8424 	 * to set the tagged queuing capability in HBA.
8425 	 * Since un has been released, we can't use SD_IS_PARALLEL_SCSI(un) to
8426 	 * check if the device is parallel SCSI. However, we don't need to
8427 	 * check here because we've already checked during attach. No device
8428 	 * that is not parallel SCSI is in the chain.
8429 	 */
8430 	if ((tgt >= 0) && (tgt < NTARGETS_WIDE)) {
8431 		sd_scsi_update_lun_on_target(pdip, tgt, SD_SCSI_LUN_DETACH);
8432 	}
8433 
8434 	return (DDI_SUCCESS);
8435 
8436 err_notclosed:
8437 	mutex_exit(SD_MUTEX(un));
8438 
8439 err_stillbusy:
8440 	_NOTE(NO_COMPETING_THREADS_NOW);
8441 
8442 err_remove_event:
8443 	SD_TRACE(SD_LOG_ATTACH_DETACH, un, "sd_unit_detach: exit failure\n");
8444 	return (DDI_FAILURE);
8445 }
8446 
8447 
8448 /*
8449  *    Function: sd_create_errstats
8450  *
8451  * Description: This routine instantiates the device error stats.
8452  *
8453  *		Note: During attach the stats are instantiated first so they are
8454  *		available for attach-time routines that utilize the driver
8455  *		iopath to send commands to the device. The stats are initialized
8456  *		separately so data obtained during some attach-time routines is
8457  *		available. (4362483)
8458  *
8459  *   Arguments: un - driver soft state (unit) structure
8460  *		instance - driver instance
8461  *
8462  *     Context: Kernel thread context
8463  */
8464 
8465 static void
sd_create_errstats(struct sd_lun * un,int instance)8466 sd_create_errstats(struct sd_lun *un, int instance)
8467 {
8468 	struct	sd_errstats	*stp;
8469 	char	kstatmodule_err[KSTAT_STRLEN];
8470 	char	kstatname[KSTAT_STRLEN];
8471 	int	ndata = (sizeof (struct sd_errstats) / sizeof (kstat_named_t));
8472 
8473 	ASSERT(un != NULL);
8474 
8475 	if (un->un_errstats != NULL) {
8476 		return;
8477 	}
8478 
8479 	(void) snprintf(kstatmodule_err, sizeof (kstatmodule_err),
8480 	    "%serr", sd_label);
8481 	(void) snprintf(kstatname, sizeof (kstatname),
8482 	    "%s%d,err", sd_label, instance);
8483 
8484 	un->un_errstats = kstat_create(kstatmodule_err, instance, kstatname,
8485 	    "device_error", KSTAT_TYPE_NAMED, ndata, KSTAT_FLAG_PERSISTENT);
8486 
8487 	if (un->un_errstats == NULL) {
8488 		SD_ERROR(SD_LOG_ATTACH_DETACH, un,
8489 		    "sd_create_errstats: Failed kstat_create\n");
8490 		return;
8491 	}
8492 
8493 	stp = (struct sd_errstats *)un->un_errstats->ks_data;
8494 	kstat_named_init(&stp->sd_softerrs,	"Soft Errors",
8495 	    KSTAT_DATA_UINT32);
8496 	kstat_named_init(&stp->sd_harderrs,	"Hard Errors",
8497 	    KSTAT_DATA_UINT32);
8498 	kstat_named_init(&stp->sd_transerrs,	"Transport Errors",
8499 	    KSTAT_DATA_UINT32);
8500 	kstat_named_init(&stp->sd_vid,		"Vendor",
8501 	    KSTAT_DATA_CHAR);
8502 	kstat_named_init(&stp->sd_pid,		"Product",
8503 	    KSTAT_DATA_CHAR);
8504 	kstat_named_init(&stp->sd_revision,	"Revision",
8505 	    KSTAT_DATA_CHAR);
8506 	kstat_named_init(&stp->sd_serial,	"Serial No",
8507 	    KSTAT_DATA_CHAR);
8508 	kstat_named_init(&stp->sd_capacity,	"Size",
8509 	    KSTAT_DATA_ULONGLONG);
8510 	kstat_named_init(&stp->sd_rq_media_err,	"Media Error",
8511 	    KSTAT_DATA_UINT32);
8512 	kstat_named_init(&stp->sd_rq_ntrdy_err,	"Device Not Ready",
8513 	    KSTAT_DATA_UINT32);
8514 	kstat_named_init(&stp->sd_rq_nodev_err,	"No Device",
8515 	    KSTAT_DATA_UINT32);
8516 	kstat_named_init(&stp->sd_rq_recov_err,	"Recoverable",
8517 	    KSTAT_DATA_UINT32);
8518 	kstat_named_init(&stp->sd_rq_illrq_err,	"Illegal Request",
8519 	    KSTAT_DATA_UINT32);
8520 	kstat_named_init(&stp->sd_rq_pfa_err,	"Predictive Failure Analysis",
8521 	    KSTAT_DATA_UINT32);
8522 
8523 	un->un_errstats->ks_private = un;
8524 	un->un_errstats->ks_update  = nulldev;
8525 
8526 	kstat_install(un->un_errstats);
8527 }
8528 
8529 
8530 /*
8531  *    Function: sd_set_errstats
8532  *
8533  * Description: This routine sets the value of the vendor id, product id,
8534  *		revision, serial number, and capacity device error stats.
8535  *
8536  *		Note: During attach the stats are instantiated first so they are
8537  *		available for attach-time routines that utilize the driver
8538  *		iopath to send commands to the device. The stats are initialized
8539  *		separately so data obtained during some attach-time routines is
8540  *		available. (4362483)
8541  *
8542  *   Arguments: un - driver soft state (unit) structure
8543  *
8544  *     Context: Kernel thread context
8545  */
8546 
8547 static void
sd_set_errstats(struct sd_lun * un)8548 sd_set_errstats(struct sd_lun *un)
8549 {
8550 	struct	sd_errstats	*stp;
8551 	char			*sn;
8552 
8553 	ASSERT(un != NULL);
8554 	ASSERT(un->un_errstats != NULL);
8555 	stp = (struct sd_errstats *)un->un_errstats->ks_data;
8556 	ASSERT(stp != NULL);
8557 	(void) strncpy(stp->sd_vid.value.c, un->un_sd->sd_inq->inq_vid, 8);
8558 	(void) strncpy(stp->sd_pid.value.c, un->un_sd->sd_inq->inq_pid, 16);
8559 	(void) strncpy(stp->sd_revision.value.c,
8560 	    un->un_sd->sd_inq->inq_revision, 4);
8561 
8562 	/*
8563 	 * All the errstats are persistent across detach/attach,
8564 	 * so reset all the errstats here in case of the hot
8565 	 * replacement of disk drives, except for not changed
8566 	 * Sun qualified drives.
8567 	 */
8568 	if ((bcmp(&SD_INQUIRY(un)->inq_pid[9], "SUN", 3) != 0) ||
8569 	    (bcmp(&SD_INQUIRY(un)->inq_serial, stp->sd_serial.value.c,
8570 	    sizeof (SD_INQUIRY(un)->inq_serial)) != 0)) {
8571 		stp->sd_softerrs.value.ui32 = 0;
8572 		stp->sd_harderrs.value.ui32 = 0;
8573 		stp->sd_transerrs.value.ui32 = 0;
8574 		stp->sd_rq_media_err.value.ui32 = 0;
8575 		stp->sd_rq_ntrdy_err.value.ui32 = 0;
8576 		stp->sd_rq_nodev_err.value.ui32 = 0;
8577 		stp->sd_rq_recov_err.value.ui32 = 0;
8578 		stp->sd_rq_illrq_err.value.ui32 = 0;
8579 		stp->sd_rq_pfa_err.value.ui32 = 0;
8580 	}
8581 
8582 	/*
8583 	 * Set the "Serial No" kstat for Sun qualified drives (indicated by
8584 	 * "SUN" in bytes 25-27 of the inquiry data (bytes 9-11 of the pid)
8585 	 * (4376302))
8586 	 */
8587 	if (bcmp(&SD_INQUIRY(un)->inq_pid[9], "SUN", 3) == 0) {
8588 		bcopy(&SD_INQUIRY(un)->inq_serial, stp->sd_serial.value.c,
8589 		    sizeof (SD_INQUIRY(un)->inq_serial));
8590 	} else {
8591 		/*
8592 		 * Set the "Serial No" kstat for non-Sun qualified drives
8593 		 */
8594 		if (ddi_prop_lookup_string(DDI_DEV_T_ANY, SD_DEVINFO(un),
8595 		    DDI_PROP_NOTPROM | DDI_PROP_DONTPASS,
8596 		    INQUIRY_SERIAL_NO, &sn) == DDI_SUCCESS) {
8597 			(void) strlcpy(stp->sd_serial.value.c, sn,
8598 			    sizeof (stp->sd_serial.value.c));
8599 			ddi_prop_free(sn);
8600 		}
8601 	}
8602 
8603 	if (un->un_f_blockcount_is_valid != TRUE) {
8604 		/*
8605 		 * Set capacity error stat to 0 for no media. This ensures
8606 		 * a valid capacity is displayed in response to 'iostat -E'
8607 		 * when no media is present in the device.
8608 		 */
8609 		stp->sd_capacity.value.ui64 = 0;
8610 	} else {
8611 		/*
8612 		 * Multiply un_blockcount by un->un_sys_blocksize to get
8613 		 * capacity.
8614 		 *
8615 		 * Note: for non-512 blocksize devices "un_blockcount" has been
8616 		 * "scaled" in sd_send_scsi_READ_CAPACITY by multiplying by
8617 		 * (un_tgt_blocksize / un->un_sys_blocksize).
8618 		 */
8619 		stp->sd_capacity.value.ui64 = (uint64_t)
8620 		    ((uint64_t)un->un_blockcount * un->un_sys_blocksize);
8621 	}
8622 }
8623 
8624 
8625 /*
8626  *    Function: sd_set_pstats
8627  *
8628  * Description: This routine instantiates and initializes the partition
8629  *              stats for each partition with more than zero blocks.
8630  *		(4363169)
8631  *
8632  *   Arguments: un - driver soft state (unit) structure
8633  *
8634  *     Context: Kernel thread context
8635  */
8636 
8637 static void
sd_set_pstats(struct sd_lun * un)8638 sd_set_pstats(struct sd_lun *un)
8639 {
8640 	char	kstatname[KSTAT_STRLEN];
8641 	int	instance;
8642 	int	i;
8643 	diskaddr_t	nblks = 0;
8644 	char	*partname = NULL;
8645 
8646 	ASSERT(un != NULL);
8647 
8648 	instance = ddi_get_instance(SD_DEVINFO(un));
8649 
8650 	/* Note:x86: is this a VTOC8/VTOC16 difference? */
8651 	for (i = 0; i < NSDMAP; i++) {
8652 
8653 		if (cmlb_partinfo(un->un_cmlbhandle, i,
8654 		    &nblks, NULL, &partname, NULL, (void *)SD_PATH_DIRECT) != 0)
8655 			continue;
8656 		mutex_enter(SD_MUTEX(un));
8657 
8658 		if ((un->un_pstats[i] == NULL) &&
8659 		    (nblks != 0)) {
8660 
8661 			(void) snprintf(kstatname, sizeof (kstatname),
8662 			    "%s%d,%s", sd_label, instance,
8663 			    partname);
8664 
8665 			un->un_pstats[i] = kstat_create(sd_label,
8666 			    instance, kstatname, "partition", KSTAT_TYPE_IO,
8667 			    1, KSTAT_FLAG_PERSISTENT);
8668 			if (un->un_pstats[i] != NULL) {
8669 				un->un_pstats[i]->ks_lock = SD_MUTEX(un);
8670 				kstat_install(un->un_pstats[i]);
8671 			}
8672 		}
8673 		mutex_exit(SD_MUTEX(un));
8674 	}
8675 }
8676 
8677 /*
8678  * Values related to caching mode page depending on whether the unit is ATAPI.
8679  */
8680 #define	SDC_CDB_GROUP(un) ((un->un_f_cfg_is_atapi == TRUE) ? \
8681 	CDB_GROUP1 : CDB_GROUP0)
8682 #define	SDC_HDRLEN(un) ((un->un_f_cfg_is_atapi == TRUE) ? \
8683 	MODE_HEADER_LENGTH_GRP2 : MODE_HEADER_LENGTH)
8684 /*
8685  * Use mode_cache_scsi3 to ensure we get all of the mode sense data, otherwise
8686  * the mode select will fail (mode_cache_scsi3 is a superset of mode_caching).
8687  */
8688 #define	SDC_BUFLEN(un) (SDC_HDRLEN(un) + MODE_BLK_DESC_LENGTH + \
8689 	sizeof (struct mode_cache_scsi3))
8690 
8691 static int
sd_get_caching_mode_page(sd_ssc_t * ssc,uchar_t page_control,uchar_t ** header,int * bdlen)8692 sd_get_caching_mode_page(sd_ssc_t *ssc, uchar_t page_control, uchar_t **header,
8693     int *bdlen)
8694 {
8695 	struct sd_lun	*un = ssc->ssc_un;
8696 	struct mode_caching *mode_caching_page;
8697 	size_t		buflen = SDC_BUFLEN(un);
8698 	int		hdrlen = SDC_HDRLEN(un);
8699 	int		rval;
8700 
8701 	/*
8702 	 * Do a test unit ready, otherwise a mode sense may not work if this
8703 	 * is the first command sent to the device after boot.
8704 	 */
8705 	if (sd_send_scsi_TEST_UNIT_READY(ssc, 0) != 0)
8706 		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
8707 
8708 	/*
8709 	 * Allocate memory for the retrieved mode page and its headers.  Set
8710 	 * a pointer to the page itself.
8711 	 */
8712 	*header = kmem_zalloc(buflen, KM_SLEEP);
8713 
8714 	/* Get the information from the device */
8715 	rval = sd_send_scsi_MODE_SENSE(ssc, SDC_CDB_GROUP(un), *header, buflen,
8716 	    page_control | MODEPAGE_CACHING, SD_PATH_DIRECT);
8717 	if (rval != 0) {
8718 		SD_ERROR(SD_LOG_IOCTL_RMMEDIA, un, "%s: Mode Sense Failed\n",
8719 		    __func__);
8720 		goto mode_sense_failed;
8721 	}
8722 
8723 	/*
8724 	 * Determine size of Block Descriptors in order to locate
8725 	 * the mode page data. ATAPI devices return 0, SCSI devices
8726 	 * should return MODE_BLK_DESC_LENGTH.
8727 	 */
8728 	if (un->un_f_cfg_is_atapi == TRUE) {
8729 		struct mode_header_grp2 *mhp =
8730 		    (struct mode_header_grp2 *)(*header);
8731 		*bdlen = (mhp->bdesc_length_hi << 8) | mhp->bdesc_length_lo;
8732 	} else {
8733 		*bdlen = ((struct mode_header *)(*header))->bdesc_length;
8734 	}
8735 
8736 	if (*bdlen > MODE_BLK_DESC_LENGTH) {
8737 		sd_ssc_set_info(ssc, SSC_FLAGS_INVALID_DATA, 0,
8738 		    "%s: Mode Sense returned invalid block descriptor length\n",
8739 		    __func__);
8740 		rval = EIO;
8741 		goto mode_sense_failed;
8742 	}
8743 
8744 	mode_caching_page = (struct mode_caching *)(*header + hdrlen + *bdlen);
8745 	if (mode_caching_page->mode_page.code != MODEPAGE_CACHING) {
8746 		sd_ssc_set_info(ssc, SSC_FLAGS_INVALID_DATA, SD_LOG_COMMON,
8747 		    "%s: Mode Sense caching page code mismatch %d\n",
8748 		    __func__, mode_caching_page->mode_page.code);
8749 		rval = EIO;
8750 	}
8751 
8752 mode_sense_failed:
8753 	if (rval != 0) {
8754 		kmem_free(*header, buflen);
8755 		*header = NULL;
8756 		*bdlen = 0;
8757 	}
8758 	return (rval);
8759 }
8760 
8761 /*
8762  *    Function: sd_cache_control()
8763  *
8764  * Description: This routine is the driver entry point for setting
8765  *		read and write caching by modifying the WCE (write cache
8766  *		enable) and RCD (read cache disable) bits of mode
8767  *		page 8 (MODEPAGE_CACHING).
8768  *
8769  *   Arguments: ssc		- ssc contains pointer to driver soft state
8770  *				  (unit) structure for this target.
8771  *		rcd_flag	- flag for controlling the read cache
8772  *		wce_flag	- flag for controlling the write cache
8773  *
8774  * Return Code: EIO
8775  *		code returned by sd_send_scsi_MODE_SENSE and
8776  *		sd_send_scsi_MODE_SELECT
8777  *
8778  *     Context: Kernel Thread
8779  */
8780 
8781 static int
sd_cache_control(sd_ssc_t * ssc,int rcd_flag,int wce_flag)8782 sd_cache_control(sd_ssc_t *ssc, int rcd_flag, int wce_flag)
8783 {
8784 	struct sd_lun	*un = ssc->ssc_un;
8785 	struct mode_caching *mode_caching_page;
8786 	uchar_t		*header;
8787 	size_t		buflen = SDC_BUFLEN(un);
8788 	int		hdrlen = SDC_HDRLEN(un);
8789 	int		bdlen;
8790 	int		rval;
8791 
8792 	rval = sd_get_caching_mode_page(ssc, MODEPAGE_CURRENT, &header, &bdlen);
8793 	switch (rval) {
8794 	case 0:
8795 		/* Check the relevant bits on successful mode sense */
8796 		mode_caching_page = (struct mode_caching *)(header + hdrlen +
8797 		    bdlen);
8798 		if ((mode_caching_page->rcd && rcd_flag == SD_CACHE_ENABLE) ||
8799 		    (!mode_caching_page->rcd && rcd_flag == SD_CACHE_DISABLE) ||
8800 		    (mode_caching_page->wce && wce_flag == SD_CACHE_DISABLE) ||
8801 		    (!mode_caching_page->wce && wce_flag == SD_CACHE_ENABLE)) {
8802 			size_t sbuflen;
8803 			uchar_t save_pg;
8804 
8805 			/*
8806 			 * Construct select buffer length based on the
8807 			 * length of the sense data returned.
8808 			 */
8809 			sbuflen = hdrlen + bdlen + sizeof (struct mode_page) +
8810 			    (int)mode_caching_page->mode_page.length;
8811 
8812 			/* Set the caching bits as requested */
8813 			if (rcd_flag == SD_CACHE_ENABLE)
8814 				mode_caching_page->rcd = 0;
8815 			else if (rcd_flag == SD_CACHE_DISABLE)
8816 				mode_caching_page->rcd = 1;
8817 
8818 			if (wce_flag == SD_CACHE_ENABLE)
8819 				mode_caching_page->wce = 1;
8820 			else if (wce_flag == SD_CACHE_DISABLE)
8821 				mode_caching_page->wce = 0;
8822 
8823 			/*
8824 			 * Save the page if the mode sense says the
8825 			 * drive supports it.
8826 			 */
8827 			save_pg = mode_caching_page->mode_page.ps ?
8828 			    SD_SAVE_PAGE : SD_DONTSAVE_PAGE;
8829 
8830 			/* Clear reserved bits before mode select */
8831 			mode_caching_page->mode_page.ps = 0;
8832 
8833 			/*
8834 			 * Clear out mode header for mode select.
8835 			 * The rest of the retrieved page will be reused.
8836 			 */
8837 			bzero(header, hdrlen);
8838 
8839 			if (un->un_f_cfg_is_atapi == TRUE) {
8840 				struct mode_header_grp2 *mhp =
8841 				    (struct mode_header_grp2 *)header;
8842 				mhp->bdesc_length_hi = bdlen >> 8;
8843 				mhp->bdesc_length_lo = (uchar_t)bdlen & 0xff;
8844 			} else {
8845 				((struct mode_header *)header)->bdesc_length =
8846 				    bdlen;
8847 			}
8848 
8849 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
8850 
8851 			/* Issue mode select to change the cache settings */
8852 			rval = sd_send_scsi_MODE_SELECT(ssc, SDC_CDB_GROUP(un),
8853 			    header, sbuflen, save_pg, SD_PATH_DIRECT);
8854 		}
8855 		kmem_free(header, buflen);
8856 		break;
8857 	case EIO:
8858 		sd_ssc_assessment(ssc, SD_FMT_STATUS_CHECK);
8859 		break;
8860 	default:
8861 		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
8862 		break;
8863 	}
8864 
8865 	return (rval);
8866 }
8867 
8868 
8869 /*
8870  *    Function: sd_get_write_cache_enabled()
8871  *
8872  * Description: This routine is the driver entry point for determining if write
8873  *		caching is enabled.  It examines the WCE (write cache enable)
8874  *		bits of mode page 8 (MODEPAGE_CACHING) with Page Control field
8875  *		bits set to MODEPAGE_CURRENT.
8876  *
8877  *   Arguments: ssc		- ssc contains pointer to driver soft state
8878  *				  (unit) structure for this target.
8879  *		is_enabled	- pointer to int where write cache enabled state
8880  *				  is returned (non-zero -> write cache enabled)
8881  *
8882  * Return Code: EIO
8883  *		code returned by sd_send_scsi_MODE_SENSE
8884  *
8885  *     Context: Kernel Thread
8886  *
8887  * NOTE: If ioctl is added to disable write cache, this sequence should
8888  * be followed so that no locking is required for accesses to
8889  * un->un_f_write_cache_enabled:
8890  *	do mode select to clear wce
8891  *	do synchronize cache to flush cache
8892  *	set un->un_f_write_cache_enabled = FALSE
8893  *
8894  * Conversely, an ioctl to enable the write cache should be done
8895  * in this order:
8896  *	set un->un_f_write_cache_enabled = TRUE
8897  *	do mode select to set wce
8898  */
8899 
8900 static int
sd_get_write_cache_enabled(sd_ssc_t * ssc,int * is_enabled)8901 sd_get_write_cache_enabled(sd_ssc_t *ssc, int *is_enabled)
8902 {
8903 	struct sd_lun	*un = ssc->ssc_un;
8904 	struct mode_caching *mode_caching_page;
8905 	uchar_t		*header;
8906 	size_t		buflen = SDC_BUFLEN(un);
8907 	int		hdrlen = SDC_HDRLEN(un);
8908 	int		bdlen;
8909 	int		rval;
8910 
8911 	/* In case of error, flag as enabled */
8912 	*is_enabled = TRUE;
8913 
8914 	rval = sd_get_caching_mode_page(ssc, MODEPAGE_CURRENT, &header, &bdlen);
8915 	switch (rval) {
8916 	case 0:
8917 		mode_caching_page = (struct mode_caching *)(header + hdrlen +
8918 		    bdlen);
8919 		*is_enabled = mode_caching_page->wce;
8920 		sd_ssc_assessment(ssc, SD_FMT_STANDARD);
8921 		kmem_free(header, buflen);
8922 		break;
8923 	case EIO: {
8924 		/*
8925 		 * Some disks do not support Mode Sense(6), we
8926 		 * should ignore this kind of error (sense key is
8927 		 * 0x5 - illegal request).
8928 		 */
8929 		uint8_t *sensep;
8930 		int senlen;
8931 
8932 		sensep = (uint8_t *)ssc->ssc_uscsi_cmd->uscsi_rqbuf;
8933 		senlen = (int)(ssc->ssc_uscsi_cmd->uscsi_rqlen -
8934 		    ssc->ssc_uscsi_cmd->uscsi_rqresid);
8935 
8936 		if (senlen > 0 &&
8937 		    scsi_sense_key(sensep) == KEY_ILLEGAL_REQUEST) {
8938 			sd_ssc_assessment(ssc, SD_FMT_IGNORE_COMPROMISE);
8939 		} else {
8940 			sd_ssc_assessment(ssc, SD_FMT_STATUS_CHECK);
8941 		}
8942 		break;
8943 	}
8944 	default:
8945 		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
8946 		break;
8947 	}
8948 
8949 	return (rval);
8950 }
8951 
8952 /*
8953  *    Function: sd_get_write_cache_changeable()
8954  *
8955  * Description: This routine is the driver entry point for determining if write
8956  *		caching is changeable.  It examines the WCE (write cache enable)
8957  *		bits of mode page 8 (MODEPAGE_CACHING) with Page Control field
8958  *		bits set to MODEPAGE_CHANGEABLE.
8959  *
8960  *   Arguments: ssc		- ssc contains pointer to driver soft state
8961  *				  (unit) structure for this target.
8962  *		is_changeable	- pointer to int where write cache changeable
8963  *				  state is returned (non-zero -> write cache
8964  *				  changeable)
8965  *
8966  *     Context: Kernel Thread
8967  */
8968 
8969 static void
sd_get_write_cache_changeable(sd_ssc_t * ssc,int * is_changeable)8970 sd_get_write_cache_changeable(sd_ssc_t *ssc, int *is_changeable)
8971 {
8972 	struct sd_lun	*un = ssc->ssc_un;
8973 	struct mode_caching *mode_caching_page;
8974 	uchar_t		*header;
8975 	size_t		buflen = SDC_BUFLEN(un);
8976 	int		hdrlen = SDC_HDRLEN(un);
8977 	int		bdlen;
8978 	int		rval;
8979 
8980 	/* In case of error, flag as enabled */
8981 	*is_changeable = TRUE;
8982 
8983 	rval = sd_get_caching_mode_page(ssc, MODEPAGE_CHANGEABLE, &header,
8984 	    &bdlen);
8985 	switch (rval) {
8986 	case 0:
8987 		mode_caching_page = (struct mode_caching *)(header + hdrlen +
8988 		    bdlen);
8989 		*is_changeable = mode_caching_page->wce;
8990 		kmem_free(header, buflen);
8991 		sd_ssc_assessment(ssc, SD_FMT_STANDARD);
8992 		break;
8993 	case EIO:
8994 		sd_ssc_assessment(ssc, SD_FMT_STATUS_CHECK);
8995 		break;
8996 	default:
8997 		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
8998 		break;
8999 	}
9000 }
9001 
9002 /*
9003  *    Function: sd_get_nv_sup()
9004  *
9005  * Description: This routine is the driver entry point for
9006  * determining whether non-volatile cache is supported. This
9007  * determination process works as follows:
9008  *
9009  * 1. sd first queries sd.conf on whether
9010  * suppress_cache_flush bit is set for this device.
9011  *
9012  * 2. if not there, then queries the internal disk table.
9013  *
9014  * 3. if either sd.conf or internal disk table specifies
9015  * cache flush be suppressed, we don't bother checking
9016  * NV_SUP bit.
9017  *
9018  * If SUPPRESS_CACHE_FLUSH bit is not set to 1, sd queries
9019  * the optional INQUIRY VPD page 0x86. If the device
9020  * supports VPD page 0x86, sd examines the NV_SUP
9021  * (non-volatile cache support) bit in the INQUIRY VPD page
9022  * 0x86:
9023  *   o If NV_SUP bit is set, sd assumes the device has a
9024  *   non-volatile cache and set the
9025  *   un_f_sync_nv_supported to TRUE.
9026  *   o Otherwise cache is not non-volatile,
9027  *   un_f_sync_nv_supported is set to FALSE.
9028  *
9029  * Arguments: un - driver soft state (unit) structure
9030  *
9031  * Return Code:
9032  *
9033  *     Context: Kernel Thread
9034  */
9035 
9036 static void
sd_get_nv_sup(sd_ssc_t * ssc)9037 sd_get_nv_sup(sd_ssc_t *ssc)
9038 {
9039 	int		rval		= 0;
9040 	uchar_t		*inq86		= NULL;
9041 	size_t		inq86_len	= MAX_INQUIRY_SIZE;
9042 	size_t		inq86_resid	= 0;
9043 	struct		dk_callback *dkc;
9044 	struct sd_lun	*un;
9045 
9046 	ASSERT(ssc != NULL);
9047 	un = ssc->ssc_un;
9048 	ASSERT(un != NULL);
9049 
9050 	mutex_enter(SD_MUTEX(un));
9051 
9052 	/*
9053 	 * Be conservative on the device's support of
9054 	 * SYNC_NV bit: un_f_sync_nv_supported is
9055 	 * initialized to be false.
9056 	 */
9057 	un->un_f_sync_nv_supported = FALSE;
9058 
9059 	/*
9060 	 * If either sd.conf or internal disk table
9061 	 * specifies cache flush be suppressed, then
9062 	 * we don't bother checking NV_SUP bit.
9063 	 */
9064 	if (un->un_f_suppress_cache_flush == TRUE) {
9065 		mutex_exit(SD_MUTEX(un));
9066 		return;
9067 	}
9068 
9069 	if (sd_check_vpd_page_support(ssc) == 0 &&
9070 	    un->un_vpd_page_mask & SD_VPD_EXTENDED_DATA_PG) {
9071 		mutex_exit(SD_MUTEX(un));
9072 		/* collect page 86 data if available */
9073 		inq86 = kmem_zalloc(inq86_len, KM_SLEEP);
9074 
9075 		rval = sd_send_scsi_INQUIRY(ssc, inq86, inq86_len,
9076 		    0x01, 0x86, &inq86_resid);
9077 
9078 		if (rval == 0 && (inq86_len - inq86_resid > 6)) {
9079 			SD_TRACE(SD_LOG_COMMON, un,
9080 			    "sd_get_nv_sup: \
9081 			    successfully get VPD page: %x \
9082 			    PAGE LENGTH: %x BYTE 6: %x\n",
9083 			    inq86[1], inq86[3], inq86[6]);
9084 
9085 			mutex_enter(SD_MUTEX(un));
9086 			/*
9087 			 * check the value of NV_SUP bit: only if the device
9088 			 * reports NV_SUP bit to be 1, the
9089 			 * un_f_sync_nv_supported bit will be set to true.
9090 			 */
9091 			if (inq86[6] & SD_VPD_NV_SUP) {
9092 				un->un_f_sync_nv_supported = TRUE;
9093 			}
9094 			mutex_exit(SD_MUTEX(un));
9095 		} else if (rval != 0) {
9096 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
9097 		}
9098 
9099 		kmem_free(inq86, inq86_len);
9100 	} else {
9101 		mutex_exit(SD_MUTEX(un));
9102 	}
9103 
9104 	/*
9105 	 * Send a SYNC CACHE command to check whether
9106 	 * SYNC_NV bit is supported. This command should have
9107 	 * un_f_sync_nv_supported set to correct value.
9108 	 */
9109 	mutex_enter(SD_MUTEX(un));
9110 	if (un->un_f_sync_nv_supported) {
9111 		mutex_exit(SD_MUTEX(un));
9112 		dkc = kmem_zalloc(sizeof (struct dk_callback), KM_SLEEP);
9113 		dkc->dkc_flag = FLUSH_VOLATILE;
9114 		(void) sd_send_scsi_SYNCHRONIZE_CACHE(un, dkc);
9115 
9116 		/*
9117 		 * Send a TEST UNIT READY command to the device. This should
9118 		 * clear any outstanding UNIT ATTENTION that may be present.
9119 		 */
9120 		rval = sd_send_scsi_TEST_UNIT_READY(ssc, SD_DONT_RETRY_TUR);
9121 		if (rval != 0)
9122 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
9123 
9124 		kmem_free(dkc, sizeof (struct dk_callback));
9125 	} else {
9126 		mutex_exit(SD_MUTEX(un));
9127 	}
9128 
9129 	SD_TRACE(SD_LOG_COMMON, un, "sd_get_nv_sup: \
9130 	    un_f_suppress_cache_flush is set to %d\n",
9131 	    un->un_f_suppress_cache_flush);
9132 }
9133 
9134 /*
9135  *    Function: sd_make_device
9136  *
9137  * Description: Utility routine to return the Solaris device number from
9138  *		the data in the device's dev_info structure.
9139  *
9140  * Return Code: The Solaris device number
9141  *
9142  *     Context: Any
9143  */
9144 
9145 static dev_t
sd_make_device(dev_info_t * devi)9146 sd_make_device(dev_info_t *devi)
9147 {
9148 	return (makedevice(ddi_driver_major(devi),
9149 	    ddi_get_instance(devi) << SDUNIT_SHIFT));
9150 }
9151 
9152 
9153 /*
9154  *    Function: sd_pm_entry
9155  *
9156  * Description: Called at the start of a new command to manage power
9157  *		and busy status of a device. This includes determining whether
9158  *		the current power state of the device is sufficient for
9159  *		performing the command or whether it must be changed.
9160  *		The PM framework is notified appropriately.
9161  *		Only with a return status of DDI_SUCCESS will the
9162  *		component be busy to the framework.
9163  *
9164  *		All callers of sd_pm_entry must check the return status
9165  *		and only call sd_pm_exit it it was DDI_SUCCESS. A status
9166  *		of DDI_FAILURE indicates the device failed to power up.
9167  *		In this case un_pm_count has been adjusted so the result
9168  *		on exit is still powered down, ie. count is less than 0.
9169  *		Calling sd_pm_exit with this count value hits an ASSERT.
9170  *
9171  * Return Code: DDI_SUCCESS or DDI_FAILURE
9172  *
9173  *     Context: Kernel thread context.
9174  */
9175 
9176 static int
sd_pm_entry(struct sd_lun * un)9177 sd_pm_entry(struct sd_lun *un)
9178 {
9179 	int return_status = DDI_SUCCESS;
9180 
9181 	ASSERT(!mutex_owned(SD_MUTEX(un)));
9182 	ASSERT(!mutex_owned(&un->un_pm_mutex));
9183 
9184 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_entry: entry\n");
9185 
9186 	if (un->un_f_pm_is_enabled == FALSE) {
9187 		SD_TRACE(SD_LOG_IO_PM, un,
9188 		    "sd_pm_entry: exiting, PM not enabled\n");
9189 		return (return_status);
9190 	}
9191 
9192 	/*
9193 	 * Just increment a counter if PM is enabled. On the transition from
9194 	 * 0 ==> 1, mark the device as busy.  The iodone side will decrement
9195 	 * the count with each IO and mark the device as idle when the count
9196 	 * hits 0.
9197 	 *
9198 	 * If the count is less than 0 the device is powered down. If a powered
9199 	 * down device is successfully powered up then the count must be
9200 	 * incremented to reflect the power up. Note that it'll get incremented
9201 	 * a second time to become busy.
9202 	 *
9203 	 * Because the following has the potential to change the device state
9204 	 * and must release the un_pm_mutex to do so, only one thread can be
9205 	 * allowed through at a time.
9206 	 */
9207 
9208 	mutex_enter(&un->un_pm_mutex);
9209 	while (un->un_pm_busy == TRUE) {
9210 		cv_wait(&un->un_pm_busy_cv, &un->un_pm_mutex);
9211 	}
9212 	un->un_pm_busy = TRUE;
9213 
9214 	if (un->un_pm_count < 1) {
9215 
9216 		SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_entry: busy component\n");
9217 
9218 		/*
9219 		 * Indicate we are now busy so the framework won't attempt to
9220 		 * power down the device. This call will only fail if either
9221 		 * we passed a bad component number or the device has no
9222 		 * components. Neither of these should ever happen.
9223 		 */
9224 		mutex_exit(&un->un_pm_mutex);
9225 		return_status = pm_busy_component(SD_DEVINFO(un), 0);
9226 		ASSERT(return_status == DDI_SUCCESS);
9227 
9228 		mutex_enter(&un->un_pm_mutex);
9229 
9230 		if (un->un_pm_count < 0) {
9231 			mutex_exit(&un->un_pm_mutex);
9232 
9233 			SD_TRACE(SD_LOG_IO_PM, un,
9234 			    "sd_pm_entry: power up component\n");
9235 
9236 			/*
9237 			 * pm_raise_power will cause sdpower to be called
9238 			 * which brings the device power level to the
9239 			 * desired state, If successful, un_pm_count and
9240 			 * un_power_level will be updated appropriately.
9241 			 */
9242 			return_status = pm_raise_power(SD_DEVINFO(un), 0,
9243 			    SD_PM_STATE_ACTIVE(un));
9244 
9245 			mutex_enter(&un->un_pm_mutex);
9246 
9247 			if (return_status != DDI_SUCCESS) {
9248 				/*
9249 				 * Power up failed.
9250 				 * Idle the device and adjust the count
9251 				 * so the result on exit is that we're
9252 				 * still powered down, ie. count is less than 0.
9253 				 */
9254 				SD_TRACE(SD_LOG_IO_PM, un,
9255 				    "sd_pm_entry: power up failed,"
9256 				    " idle the component\n");
9257 
9258 				(void) pm_idle_component(SD_DEVINFO(un), 0);
9259 				un->un_pm_count--;
9260 			} else {
9261 				/*
9262 				 * Device is powered up, verify the
9263 				 * count is non-negative.
9264 				 * This is debug only.
9265 				 */
9266 				ASSERT(un->un_pm_count == 0);
9267 			}
9268 		}
9269 
9270 		if (return_status == DDI_SUCCESS) {
9271 			/*
9272 			 * For performance, now that the device has been tagged
9273 			 * as busy, and it's known to be powered up, update the
9274 			 * chain types to use jump tables that do not include
9275 			 * pm. This significantly lowers the overhead and
9276 			 * therefore improves performance.
9277 			 */
9278 
9279 			mutex_exit(&un->un_pm_mutex);
9280 			mutex_enter(SD_MUTEX(un));
9281 			SD_TRACE(SD_LOG_IO_PM, un,
9282 			    "sd_pm_entry: changing uscsi_chain_type from %d\n",
9283 			    un->un_uscsi_chain_type);
9284 
9285 			if (un->un_f_non_devbsize_supported) {
9286 				un->un_buf_chain_type =
9287 				    SD_CHAIN_INFO_RMMEDIA_NO_PM;
9288 			} else {
9289 				un->un_buf_chain_type =
9290 				    SD_CHAIN_INFO_DISK_NO_PM;
9291 			}
9292 			un->un_uscsi_chain_type = SD_CHAIN_INFO_USCSI_CMD_NO_PM;
9293 
9294 			SD_TRACE(SD_LOG_IO_PM, un,
9295 			    "             changed  uscsi_chain_type to   %d\n",
9296 			    un->un_uscsi_chain_type);
9297 			mutex_exit(SD_MUTEX(un));
9298 			mutex_enter(&un->un_pm_mutex);
9299 
9300 			if (un->un_pm_idle_timeid == NULL) {
9301 				/* 300 ms. */
9302 				un->un_pm_idle_timeid =
9303 				    timeout(sd_pm_idletimeout_handler, un,
9304 				    (drv_usectohz((clock_t)300000)));
9305 				/*
9306 				 * Include an extra call to busy which keeps the
9307 				 * device busy with-respect-to the PM layer
9308 				 * until the timer fires, at which time it'll
9309 				 * get the extra idle call.
9310 				 */
9311 				(void) pm_busy_component(SD_DEVINFO(un), 0);
9312 			}
9313 		}
9314 	}
9315 	un->un_pm_busy = FALSE;
9316 	/* Next... */
9317 	cv_signal(&un->un_pm_busy_cv);
9318 
9319 	un->un_pm_count++;
9320 
9321 	SD_TRACE(SD_LOG_IO_PM, un,
9322 	    "sd_pm_entry: exiting, un_pm_count = %d\n", un->un_pm_count);
9323 
9324 	mutex_exit(&un->un_pm_mutex);
9325 
9326 	return (return_status);
9327 }
9328 
9329 
9330 /*
9331  *    Function: sd_pm_exit
9332  *
9333  * Description: Called at the completion of a command to manage busy
9334  *		status for the device. If the device becomes idle the
9335  *		PM framework is notified.
9336  *
9337  *     Context: Kernel thread context
9338  */
9339 
9340 static void
sd_pm_exit(struct sd_lun * un)9341 sd_pm_exit(struct sd_lun *un)
9342 {
9343 	ASSERT(!mutex_owned(SD_MUTEX(un)));
9344 	ASSERT(!mutex_owned(&un->un_pm_mutex));
9345 
9346 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_exit: entry\n");
9347 
9348 	/*
9349 	 * After attach the following flag is only read, so don't
9350 	 * take the penalty of acquiring a mutex for it.
9351 	 */
9352 	if (un->un_f_pm_is_enabled == TRUE) {
9353 
9354 		mutex_enter(&un->un_pm_mutex);
9355 		un->un_pm_count--;
9356 
9357 		SD_TRACE(SD_LOG_IO_PM, un,
9358 		    "sd_pm_exit: un_pm_count = %d\n", un->un_pm_count);
9359 
9360 		ASSERT(un->un_pm_count >= 0);
9361 		if (un->un_pm_count == 0) {
9362 			mutex_exit(&un->un_pm_mutex);
9363 
9364 			SD_TRACE(SD_LOG_IO_PM, un,
9365 			    "sd_pm_exit: idle component\n");
9366 
9367 			(void) pm_idle_component(SD_DEVINFO(un), 0);
9368 
9369 		} else {
9370 			mutex_exit(&un->un_pm_mutex);
9371 		}
9372 	}
9373 
9374 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_exit: exiting\n");
9375 }
9376 
9377 
9378 /*
9379  *    Function: sdopen
9380  *
9381  * Description: Driver's open(9e) entry point function.
9382  *
9383  *   Arguments: dev_i   - pointer to device number
9384  *		flag    - how to open file (FEXCL, FNDELAY, FREAD, FWRITE)
9385  *		otyp    - open type (OTYP_BLK, OTYP_CHR, OTYP_LYR)
9386  *		cred_p  - user credential pointer
9387  *
9388  * Return Code: EINVAL
9389  *		ENXIO
9390  *		EIO
9391  *		EROFS
9392  *		EBUSY
9393  *
9394  *     Context: Kernel thread context
9395  */
9396 /* ARGSUSED */
9397 static int
sdopen(dev_t * dev_p,int flag,int otyp,cred_t * cred_p)9398 sdopen(dev_t *dev_p, int flag, int otyp, cred_t *cred_p)
9399 {
9400 	struct sd_lun	*un;
9401 	int		nodelay;
9402 	int		part;
9403 	uint64_t	partmask;
9404 	int		instance;
9405 	dev_t		dev;
9406 	int		rval = EIO;
9407 	diskaddr_t	nblks = 0;
9408 	diskaddr_t	label_cap;
9409 
9410 	/* Validate the open type */
9411 	if (otyp >= OTYPCNT) {
9412 		return (EINVAL);
9413 	}
9414 
9415 	dev = *dev_p;
9416 	instance = SDUNIT(dev);
9417 
9418 	/*
9419 	 * Fail the open if there is no softstate for the instance.
9420 	 */
9421 	if ((un = ddi_get_soft_state(sd_state, instance)) == NULL) {
9422 		/*
9423 		 * The probe cache only needs to be cleared when open (9e) fails
9424 		 * with ENXIO (4238046).
9425 		 */
9426 		/*
9427 		 * un-conditionally clearing probe cache is ok with
9428 		 * separate sd/ssd binaries
9429 		 * x86 platform can be an issue with both parallel
9430 		 * and fibre in 1 binary
9431 		 */
9432 		sd_scsi_clear_probe_cache();
9433 		return (ENXIO);
9434 	}
9435 
9436 	nodelay  = (flag & (FNDELAY | FNONBLOCK));
9437 	part	 = SDPART(dev);
9438 	partmask = 1 << part;
9439 
9440 	mutex_enter(SD_MUTEX(un));
9441 
9442 	/*
9443 	 * All device accesses go thru sdstrategy() where we check
9444 	 * on suspend status but there could be a scsi_poll command,
9445 	 * which bypasses sdstrategy(), so we need to check pm
9446 	 * status.
9447 	 */
9448 
9449 	if (!nodelay) {
9450 		while ((un->un_state == SD_STATE_SUSPENDED) ||
9451 		    (un->un_state == SD_STATE_PM_CHANGING)) {
9452 			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
9453 		}
9454 
9455 		mutex_exit(SD_MUTEX(un));
9456 		if (sd_pm_entry(un) != DDI_SUCCESS) {
9457 			rval = EIO;
9458 			SD_ERROR(SD_LOG_OPEN_CLOSE, un,
9459 			    "sdopen: sd_pm_entry failed\n");
9460 			goto open_failed_with_pm;
9461 		}
9462 		mutex_enter(SD_MUTEX(un));
9463 	}
9464 
9465 	/* check for previous exclusive open */
9466 	SD_TRACE(SD_LOG_OPEN_CLOSE, un, "sdopen: un=%p\n", (void *)un);
9467 	if (otyp == OTYP_LYR) {
9468 		SD_TRACE(SD_LOG_OPEN_CLOSE, un,
9469 		    "sdopen: exclopen=%x, flag=%x, un_ocmap.lyropen=%x\n",
9470 		    un->un_exclopen, flag, un->un_ocmap.lyropen[part]);
9471 	} else {
9472 		SD_TRACE(SD_LOG_OPEN_CLOSE, un,
9473 		    "sdopen: exclopen=%x, flag=%x, regopen=%x\n",
9474 		    un->un_exclopen, flag, un->un_ocmap.regopen[otyp]);
9475 	}
9476 
9477 	if (un->un_exclopen & (partmask)) {
9478 		goto excl_open_fail;
9479 	}
9480 
9481 	if (flag & FEXCL) {
9482 		int i;
9483 		if (un->un_ocmap.lyropen[part]) {
9484 			goto excl_open_fail;
9485 		}
9486 		for (i = 0; i < (OTYPCNT - 1); i++) {
9487 			if (un->un_ocmap.regopen[i] & (partmask)) {
9488 				goto excl_open_fail;
9489 			}
9490 		}
9491 	}
9492 
9493 	/*
9494 	 * Check the write permission if this is a removable media device,
9495 	 * NDELAY has not been set, and writable permission is requested.
9496 	 *
9497 	 * Note: If NDELAY was set and this is write-protected media the WRITE
9498 	 * attempt will fail with EIO as part of the I/O processing. This is a
9499 	 * more permissive implementation that allows the open to succeed and
9500 	 * WRITE attempts to fail when appropriate.
9501 	 */
9502 	if (un->un_f_chk_wp_open) {
9503 		if ((flag & FWRITE) && (!nodelay)) {
9504 			mutex_exit(SD_MUTEX(un));
9505 			/*
9506 			 * Defer the check for write permission on writable
9507 			 * DVD drive till sdstrategy and will not fail open even
9508 			 * if FWRITE is set as the device can be writable
9509 			 * depending upon the media and the media can change
9510 			 * after the call to open().
9511 			 */
9512 			if (un->un_f_dvdram_writable_device == FALSE) {
9513 				if (ISCD(un) || sr_check_wp(dev)) {
9514 				rval = EROFS;
9515 				mutex_enter(SD_MUTEX(un));
9516 				SD_ERROR(SD_LOG_OPEN_CLOSE, un, "sdopen: "
9517 				    "write to cd or write protected media\n");
9518 				goto open_fail;
9519 				}
9520 			}
9521 			mutex_enter(SD_MUTEX(un));
9522 		}
9523 	}
9524 
9525 	/*
9526 	 * If opening in NDELAY/NONBLOCK mode, just return.
9527 	 * Check if disk is ready and has a valid geometry later.
9528 	 */
9529 	if (!nodelay) {
9530 		sd_ssc_t	*ssc;
9531 
9532 		mutex_exit(SD_MUTEX(un));
9533 		ssc = sd_ssc_init(un);
9534 		rval = sd_ready_and_valid(ssc, part);
9535 		sd_ssc_fini(ssc);
9536 		mutex_enter(SD_MUTEX(un));
9537 		/*
9538 		 * Fail if device is not ready or if the number of disk
9539 		 * blocks is zero or negative for non CD devices.
9540 		 */
9541 
9542 		nblks = 0;
9543 
9544 		if (rval == SD_READY_VALID && (!ISCD(un))) {
9545 			/* if cmlb_partinfo fails, nblks remains 0 */
9546 			mutex_exit(SD_MUTEX(un));
9547 			(void) cmlb_partinfo(un->un_cmlbhandle, part, &nblks,
9548 			    NULL, NULL, NULL, (void *)SD_PATH_DIRECT);
9549 			mutex_enter(SD_MUTEX(un));
9550 		}
9551 
9552 		if ((rval != SD_READY_VALID) ||
9553 		    (!ISCD(un) && nblks <= 0)) {
9554 			rval = un->un_f_has_removable_media ? ENXIO : EIO;
9555 			SD_ERROR(SD_LOG_OPEN_CLOSE, un, "sdopen: "
9556 			    "device not ready or invalid disk block value\n");
9557 			goto open_fail;
9558 		}
9559 #if defined(__x86)
9560 	} else {
9561 		uchar_t *cp;
9562 		/*
9563 		 * x86 requires special nodelay handling, so that p0 is
9564 		 * always defined and accessible.
9565 		 * Invalidate geometry only if device is not already open.
9566 		 */
9567 		cp = &un->un_ocmap.chkd[0];
9568 		while (cp < &un->un_ocmap.chkd[OCSIZE]) {
9569 			if (*cp != (uchar_t)0) {
9570 				break;
9571 			}
9572 			cp++;
9573 		}
9574 		if (cp == &un->un_ocmap.chkd[OCSIZE]) {
9575 			mutex_exit(SD_MUTEX(un));
9576 			cmlb_invalidate(un->un_cmlbhandle,
9577 			    (void *)SD_PATH_DIRECT);
9578 			mutex_enter(SD_MUTEX(un));
9579 		}
9580 
9581 #endif
9582 	}
9583 
9584 	if (otyp == OTYP_LYR) {
9585 		un->un_ocmap.lyropen[part]++;
9586 	} else {
9587 		un->un_ocmap.regopen[otyp] |= partmask;
9588 	}
9589 
9590 	/* Set up open and exclusive open flags */
9591 	if (flag & FEXCL) {
9592 		un->un_exclopen |= (partmask);
9593 	}
9594 
9595 	/*
9596 	 * If the lun is EFI labeled and lun capacity is greater than the
9597 	 * capacity contained in the label, log a sys-event to notify the
9598 	 * interested module.
9599 	 * To avoid an infinite loop of logging sys-event, we only log the
9600 	 * event when the lun is not opened in NDELAY mode. The event handler
9601 	 * should open the lun in NDELAY mode.
9602 	 */
9603 	if (!nodelay) {
9604 		mutex_exit(SD_MUTEX(un));
9605 		if (cmlb_efi_label_capacity(un->un_cmlbhandle, &label_cap,
9606 		    (void*)SD_PATH_DIRECT) == 0) {
9607 			mutex_enter(SD_MUTEX(un));
9608 			if (un->un_f_blockcount_is_valid &&
9609 			    un->un_blockcount > label_cap &&
9610 			    un->un_f_expnevent == B_FALSE) {
9611 				un->un_f_expnevent = B_TRUE;
9612 				mutex_exit(SD_MUTEX(un));
9613 				sd_log_lun_expansion_event(un,
9614 				    (nodelay ? KM_NOSLEEP : KM_SLEEP));
9615 				mutex_enter(SD_MUTEX(un));
9616 			}
9617 		} else {
9618 			mutex_enter(SD_MUTEX(un));
9619 		}
9620 	}
9621 
9622 	SD_TRACE(SD_LOG_OPEN_CLOSE, un, "sdopen: "
9623 	    "open of part %d type %d\n", part, otyp);
9624 
9625 	mutex_exit(SD_MUTEX(un));
9626 	if (!nodelay) {
9627 		sd_pm_exit(un);
9628 	}
9629 
9630 	SD_TRACE(SD_LOG_OPEN_CLOSE, un, "sdopen: exit success\n");
9631 	return (DDI_SUCCESS);
9632 
9633 excl_open_fail:
9634 	SD_ERROR(SD_LOG_OPEN_CLOSE, un, "sdopen: fail exclusive open\n");
9635 	rval = EBUSY;
9636 
9637 open_fail:
9638 	mutex_exit(SD_MUTEX(un));
9639 
9640 	/*
9641 	 * On a failed open we must exit the pm management.
9642 	 */
9643 	if (!nodelay) {
9644 		sd_pm_exit(un);
9645 	}
9646 open_failed_with_pm:
9647 
9648 	return (rval);
9649 }
9650 
9651 
9652 /*
9653  *    Function: sdclose
9654  *
9655  * Description: Driver's close(9e) entry point function.
9656  *
9657  *   Arguments: dev    - device number
9658  *		flag   - file status flag, informational only
9659  *		otyp   - close type (OTYP_BLK, OTYP_CHR, OTYP_LYR)
9660  *		cred_p - user credential pointer
9661  *
9662  * Return Code: ENXIO
9663  *
9664  *     Context: Kernel thread context
9665  */
9666 /* ARGSUSED */
9667 static int
sdclose(dev_t dev,int flag,int otyp,cred_t * cred_p)9668 sdclose(dev_t dev, int flag, int otyp, cred_t *cred_p)
9669 {
9670 	struct sd_lun	*un;
9671 	uchar_t		*cp;
9672 	int		part;
9673 	int		nodelay;
9674 	int		rval = 0;
9675 
9676 	/* Validate the open type */
9677 	if (otyp >= OTYPCNT) {
9678 		return (ENXIO);
9679 	}
9680 
9681 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
9682 		return (ENXIO);
9683 	}
9684 
9685 	part = SDPART(dev);
9686 	nodelay = flag & (FNDELAY | FNONBLOCK);
9687 
9688 	SD_TRACE(SD_LOG_OPEN_CLOSE, un,
9689 	    "sdclose: close of part %d type %d\n", part, otyp);
9690 
9691 	mutex_enter(SD_MUTEX(un));
9692 
9693 	/* Don't proceed if power is being changed. */
9694 	while (un->un_state == SD_STATE_PM_CHANGING) {
9695 		cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
9696 	}
9697 
9698 	if (un->un_exclopen & (1 << part)) {
9699 		un->un_exclopen &= ~(1 << part);
9700 	}
9701 
9702 	/* Update the open partition map */
9703 	if (otyp == OTYP_LYR) {
9704 		un->un_ocmap.lyropen[part] -= 1;
9705 	} else {
9706 		un->un_ocmap.regopen[otyp] &= ~(1 << part);
9707 	}
9708 
9709 	cp = &un->un_ocmap.chkd[0];
9710 	while (cp < &un->un_ocmap.chkd[OCSIZE]) {
9711 		if (*cp != '\0') {
9712 			break;
9713 		}
9714 		cp++;
9715 	}
9716 
9717 	if (cp == &un->un_ocmap.chkd[OCSIZE]) {
9718 		SD_TRACE(SD_LOG_OPEN_CLOSE, un, "sdclose: last close\n");
9719 
9720 		/*
9721 		 * We avoid persistance upon the last close, and set
9722 		 * the throttle back to the maximum.
9723 		 */
9724 		un->un_throttle = un->un_saved_throttle;
9725 
9726 		if (un->un_state == SD_STATE_OFFLINE) {
9727 			if (un->un_f_is_fibre == FALSE) {
9728 				scsi_log(SD_DEVINFO(un), sd_label,
9729 				    CE_WARN, "offline\n");
9730 			}
9731 			mutex_exit(SD_MUTEX(un));
9732 			cmlb_invalidate(un->un_cmlbhandle,
9733 			    (void *)SD_PATH_DIRECT);
9734 			mutex_enter(SD_MUTEX(un));
9735 
9736 		} else {
9737 			/*
9738 			 * Flush any outstanding writes in NVRAM cache.
9739 			 * Note: SYNCHRONIZE CACHE is an optional SCSI-2
9740 			 * cmd, it may not work for non-Pluto devices.
9741 			 * SYNCHRONIZE CACHE is not required for removables,
9742 			 * except DVD-RAM drives.
9743 			 *
9744 			 * Also note: because SYNCHRONIZE CACHE is currently
9745 			 * the only command issued here that requires the
9746 			 * drive be powered up, only do the power up before
9747 			 * sending the Sync Cache command. If additional
9748 			 * commands are added which require a powered up
9749 			 * drive, the following sequence may have to change.
9750 			 *
9751 			 * And finally, note that parallel SCSI on SPARC
9752 			 * only issues a Sync Cache to DVD-RAM, a newly
9753 			 * supported device.
9754 			 */
9755 			if ((un->un_f_sync_cache_supported &&
9756 			    un->un_f_sync_cache_required) ||
9757 			    un->un_f_dvdram_writable_device == TRUE) {
9758 				mutex_exit(SD_MUTEX(un));
9759 				if (sd_pm_entry(un) == DDI_SUCCESS) {
9760 					rval =
9761 					    sd_send_scsi_SYNCHRONIZE_CACHE(un,
9762 					    NULL);
9763 					/* ignore error if not supported */
9764 					if (rval == ENOTSUP) {
9765 						rval = 0;
9766 					} else if (rval != 0) {
9767 						rval = EIO;
9768 					}
9769 					sd_pm_exit(un);
9770 				} else {
9771 					rval = EIO;
9772 				}
9773 				mutex_enter(SD_MUTEX(un));
9774 			}
9775 
9776 			/*
9777 			 * For devices which supports DOOR_LOCK, send an ALLOW
9778 			 * MEDIA REMOVAL command, but don't get upset if it
9779 			 * fails. We need to raise the power of the drive before
9780 			 * we can call sd_send_scsi_DOORLOCK()
9781 			 */
9782 			if (un->un_f_doorlock_supported) {
9783 				mutex_exit(SD_MUTEX(un));
9784 				if (sd_pm_entry(un) == DDI_SUCCESS) {
9785 					sd_ssc_t	*ssc;
9786 
9787 					ssc = sd_ssc_init(un);
9788 					rval = sd_send_scsi_DOORLOCK(ssc,
9789 					    SD_REMOVAL_ALLOW, SD_PATH_DIRECT);
9790 					if (rval != 0)
9791 						sd_ssc_assessment(ssc,
9792 						    SD_FMT_IGNORE);
9793 					sd_ssc_fini(ssc);
9794 
9795 					sd_pm_exit(un);
9796 					if (ISCD(un) && (rval != 0) &&
9797 					    (nodelay != 0)) {
9798 						rval = ENXIO;
9799 					}
9800 				} else {
9801 					rval = EIO;
9802 				}
9803 				mutex_enter(SD_MUTEX(un));
9804 			}
9805 
9806 			/*
9807 			 * If a device has removable media, invalidate all
9808 			 * parameters related to media, such as geometry,
9809 			 * blocksize, and blockcount.
9810 			 */
9811 			if (un->un_f_has_removable_media) {
9812 				sr_ejected(un);
9813 			}
9814 
9815 			/*
9816 			 * Destroy the cache (if it exists) which was
9817 			 * allocated for the write maps, as long as no
9818 			 * other outstanding commands for the device exist.
9819 			 * (If we don't destroy it here, we will do so later
9820 			 * on detach.  More likely we'll just reuse it on
9821 			 * a future open.)
9822 			 */
9823 			if ((un->un_wm_cache != NULL) &&
9824 			    (un->un_ncmds_in_driver == 0)) {
9825 				kmem_cache_destroy(un->un_wm_cache);
9826 				un->un_wm_cache = NULL;
9827 			}
9828 		}
9829 	}
9830 
9831 	mutex_exit(SD_MUTEX(un));
9832 
9833 	return (rval);
9834 }
9835 
9836 
9837 /*
9838  *    Function: sd_ready_and_valid
9839  *
9840  * Description: Test if device is ready and has a valid geometry.
9841  *
9842  *   Arguments: ssc - sd_ssc_t will contain un
9843  *		un  - driver soft state (unit) structure
9844  *
9845  * Return Code: SD_READY_VALID		ready and valid label
9846  *		SD_NOT_READY_VALID	not ready, no label
9847  *		SD_RESERVED_BY_OTHERS	reservation conflict
9848  *
9849  *     Context: Never called at interrupt context.
9850  */
9851 
9852 static int
sd_ready_and_valid(sd_ssc_t * ssc,int part)9853 sd_ready_and_valid(sd_ssc_t *ssc, int part)
9854 {
9855 	struct sd_errstats	*stp;
9856 	uint64_t		capacity;
9857 	uint_t			lbasize;
9858 	int			rval = SD_READY_VALID;
9859 	char			name_str[48];
9860 	boolean_t		is_valid;
9861 	struct sd_lun		*un;
9862 	int			status;
9863 
9864 	ASSERT(ssc != NULL);
9865 	un = ssc->ssc_un;
9866 	ASSERT(un != NULL);
9867 	ASSERT(!mutex_owned(SD_MUTEX(un)));
9868 
9869 	mutex_enter(SD_MUTEX(un));
9870 	/*
9871 	 * If a device has removable media, we must check if media is
9872 	 * ready when checking if this device is ready and valid.
9873 	 */
9874 	if (un->un_f_has_removable_media) {
9875 		mutex_exit(SD_MUTEX(un));
9876 		status = sd_send_scsi_TEST_UNIT_READY(ssc, 0);
9877 
9878 		if (status != 0) {
9879 			rval = SD_NOT_READY_VALID;
9880 			mutex_enter(SD_MUTEX(un));
9881 
9882 			/* Ignore all failed status for removalbe media */
9883 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
9884 
9885 			goto done;
9886 		}
9887 
9888 		is_valid = SD_IS_VALID_LABEL(un);
9889 		mutex_enter(SD_MUTEX(un));
9890 		if (!is_valid ||
9891 		    (un->un_f_blockcount_is_valid == FALSE) ||
9892 		    (un->un_f_tgt_blocksize_is_valid == FALSE)) {
9893 
9894 			/* capacity has to be read every open. */
9895 			mutex_exit(SD_MUTEX(un));
9896 			status = sd_send_scsi_READ_CAPACITY(ssc, &capacity,
9897 			    &lbasize, SD_PATH_DIRECT);
9898 
9899 			if (status != 0) {
9900 				sd_ssc_assessment(ssc, SD_FMT_IGNORE);
9901 
9902 				cmlb_invalidate(un->un_cmlbhandle,
9903 				    (void *)SD_PATH_DIRECT);
9904 				mutex_enter(SD_MUTEX(un));
9905 				rval = SD_NOT_READY_VALID;
9906 
9907 				goto done;
9908 			} else {
9909 				mutex_enter(SD_MUTEX(un));
9910 				sd_update_block_info(un, lbasize, capacity);
9911 			}
9912 		}
9913 
9914 		/*
9915 		 * Check if the media in the device is writable or not.
9916 		 */
9917 		if (!is_valid && ISCD(un)) {
9918 			sd_check_for_writable_cd(ssc, SD_PATH_DIRECT);
9919 		}
9920 
9921 	} else {
9922 		/*
9923 		 * Do a test unit ready to clear any unit attention from non-cd
9924 		 * devices.
9925 		 */
9926 		mutex_exit(SD_MUTEX(un));
9927 
9928 		status = sd_send_scsi_TEST_UNIT_READY(ssc, 0);
9929 		if (status != 0) {
9930 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
9931 		}
9932 
9933 		mutex_enter(SD_MUTEX(un));
9934 	}
9935 
9936 
9937 	/*
9938 	 * If this is a non 512 block device, allocate space for
9939 	 * the wmap cache. This is being done here since every time
9940 	 * a media is changed this routine will be called and the
9941 	 * block size is a function of media rather than device.
9942 	 */
9943 	if (((un->un_f_rmw_type != SD_RMW_TYPE_RETURN_ERROR ||
9944 	    un->un_f_non_devbsize_supported) &&
9945 	    un->un_tgt_blocksize != DEV_BSIZE) ||
9946 	    un->un_f_enable_rmw) {
9947 		if (!(un->un_wm_cache)) {
9948 			(void) snprintf(name_str, sizeof (name_str),
9949 			    "%s%d_cache",
9950 			    ddi_driver_name(SD_DEVINFO(un)),
9951 			    ddi_get_instance(SD_DEVINFO(un)));
9952 			un->un_wm_cache = kmem_cache_create(
9953 			    name_str, sizeof (struct sd_w_map),
9954 			    8, sd_wm_cache_constructor,
9955 			    sd_wm_cache_destructor, NULL,
9956 			    (void *)un, NULL, 0);
9957 			if (!(un->un_wm_cache)) {
9958 				rval = ENOMEM;
9959 				goto done;
9960 			}
9961 		}
9962 	}
9963 
9964 	if (un->un_state == SD_STATE_NORMAL) {
9965 		/*
9966 		 * If the target is not yet ready here (defined by a TUR
9967 		 * failure), invalidate the geometry and print an 'offline'
9968 		 * message. This is a legacy message, as the state of the
9969 		 * target is not actually changed to SD_STATE_OFFLINE.
9970 		 *
9971 		 * If the TUR fails for EACCES (Reservation Conflict),
9972 		 * SD_RESERVED_BY_OTHERS will be returned to indicate
9973 		 * reservation conflict. If the TUR fails for other
9974 		 * reasons, SD_NOT_READY_VALID will be returned.
9975 		 */
9976 		int err;
9977 
9978 		mutex_exit(SD_MUTEX(un));
9979 		err = sd_send_scsi_TEST_UNIT_READY(ssc, 0);
9980 		mutex_enter(SD_MUTEX(un));
9981 
9982 		if (err != 0) {
9983 			mutex_exit(SD_MUTEX(un));
9984 			cmlb_invalidate(un->un_cmlbhandle,
9985 			    (void *)SD_PATH_DIRECT);
9986 			mutex_enter(SD_MUTEX(un));
9987 			if (err == EACCES) {
9988 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
9989 				    "reservation conflict\n");
9990 				rval = SD_RESERVED_BY_OTHERS;
9991 				sd_ssc_assessment(ssc, SD_FMT_IGNORE);
9992 			} else {
9993 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
9994 				    "drive offline\n");
9995 				rval = SD_NOT_READY_VALID;
9996 				sd_ssc_assessment(ssc, SD_FMT_STATUS_CHECK);
9997 			}
9998 			goto done;
9999 		}
10000 	}
10001 
10002 	if (un->un_f_format_in_progress == FALSE) {
10003 		mutex_exit(SD_MUTEX(un));
10004 
10005 		(void) cmlb_validate(un->un_cmlbhandle, 0,
10006 		    (void *)SD_PATH_DIRECT);
10007 		if (cmlb_partinfo(un->un_cmlbhandle, part, NULL, NULL, NULL,
10008 		    NULL, (void *) SD_PATH_DIRECT) != 0) {
10009 			rval = SD_NOT_READY_VALID;
10010 			mutex_enter(SD_MUTEX(un));
10011 
10012 			goto done;
10013 		}
10014 		if (un->un_f_pkstats_enabled) {
10015 			sd_set_pstats(un);
10016 			SD_TRACE(SD_LOG_IO_PARTITION, un,
10017 			    "sd_ready_and_valid: un:0x%p pstats created and "
10018 			    "set\n", un);
10019 		}
10020 		mutex_enter(SD_MUTEX(un));
10021 	}
10022 
10023 	/*
10024 	 * If this device supports DOOR_LOCK command, try and send
10025 	 * this command to PREVENT MEDIA REMOVAL, but don't get upset
10026 	 * if it fails. For a CD, however, it is an error
10027 	 */
10028 	if (un->un_f_doorlock_supported) {
10029 		mutex_exit(SD_MUTEX(un));
10030 		status = sd_send_scsi_DOORLOCK(ssc, SD_REMOVAL_PREVENT,
10031 		    SD_PATH_DIRECT);
10032 
10033 		if ((status != 0) && ISCD(un)) {
10034 			rval = SD_NOT_READY_VALID;
10035 			mutex_enter(SD_MUTEX(un));
10036 
10037 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
10038 
10039 			goto done;
10040 		} else if (status != 0)
10041 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
10042 		mutex_enter(SD_MUTEX(un));
10043 	}
10044 
10045 	/* The state has changed, inform the media watch routines */
10046 	un->un_mediastate = DKIO_INSERTED;
10047 	cv_broadcast(&un->un_state_cv);
10048 	rval = SD_READY_VALID;
10049 
10050 done:
10051 
10052 	/*
10053 	 * Initialize the capacity kstat value, if no media previously
10054 	 * (capacity kstat is 0) and a media has been inserted
10055 	 * (un_blockcount > 0).
10056 	 */
10057 	if (un->un_errstats != NULL) {
10058 		stp = (struct sd_errstats *)un->un_errstats->ks_data;
10059 		if ((stp->sd_capacity.value.ui64 == 0) &&
10060 		    (un->un_f_blockcount_is_valid == TRUE)) {
10061 			stp->sd_capacity.value.ui64 =
10062 			    (uint64_t)((uint64_t)un->un_blockcount *
10063 			    un->un_sys_blocksize);
10064 		}
10065 	}
10066 
10067 	mutex_exit(SD_MUTEX(un));
10068 	return (rval);
10069 }
10070 
10071 
10072 /*
10073  *    Function: sdmin
10074  *
10075  * Description: Routine to limit the size of a data transfer. Used in
10076  *		conjunction with physio(9F).
10077  *
10078  *   Arguments: bp - pointer to the indicated buf(9S) struct.
10079  *
10080  *     Context: Kernel thread context.
10081  */
10082 
10083 static void
sdmin(struct buf * bp)10084 sdmin(struct buf *bp)
10085 {
10086 	struct sd_lun	*un;
10087 	int		instance;
10088 
10089 	instance = SDUNIT(bp->b_edev);
10090 
10091 	un = ddi_get_soft_state(sd_state, instance);
10092 	ASSERT(un != NULL);
10093 
10094 	/*
10095 	 * We depend on buf breakup to restrict
10096 	 * IO size if it is enabled.
10097 	 */
10098 	if (un->un_buf_breakup_supported) {
10099 		return;
10100 	}
10101 
10102 	if (bp->b_bcount > un->un_max_xfer_size) {
10103 		bp->b_bcount = un->un_max_xfer_size;
10104 	}
10105 }
10106 
10107 
10108 /*
10109  *    Function: sdread
10110  *
10111  * Description: Driver's read(9e) entry point function.
10112  *
10113  *   Arguments: dev   - device number
10114  *		uio   - structure pointer describing where data is to be stored
10115  *			in user's space
10116  *		cred_p  - user credential pointer
10117  *
10118  * Return Code: ENXIO
10119  *		EIO
10120  *		EINVAL
10121  *		value returned by physio
10122  *
10123  *     Context: Kernel thread context.
10124  */
10125 /* ARGSUSED */
10126 static int
sdread(dev_t dev,struct uio * uio,cred_t * cred_p)10127 sdread(dev_t dev, struct uio *uio, cred_t *cred_p)
10128 {
10129 	struct sd_lun	*un = NULL;
10130 	int		secmask;
10131 	int		err = 0;
10132 	sd_ssc_t	*ssc;
10133 
10134 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
10135 		return (ENXIO);
10136 	}
10137 
10138 	ASSERT(!mutex_owned(SD_MUTEX(un)));
10139 
10140 
10141 	if (!SD_IS_VALID_LABEL(un) && !ISCD(un)) {
10142 		mutex_enter(SD_MUTEX(un));
10143 		/*
10144 		 * Because the call to sd_ready_and_valid will issue I/O we
10145 		 * must wait here if either the device is suspended or
10146 		 * if it's power level is changing.
10147 		 */
10148 		while ((un->un_state == SD_STATE_SUSPENDED) ||
10149 		    (un->un_state == SD_STATE_PM_CHANGING)) {
10150 			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
10151 		}
10152 		un->un_ncmds_in_driver++;
10153 		mutex_exit(SD_MUTEX(un));
10154 
10155 		/* Initialize sd_ssc_t for internal uscsi commands */
10156 		ssc = sd_ssc_init(un);
10157 		if ((sd_ready_and_valid(ssc, SDPART(dev))) != SD_READY_VALID) {
10158 			err = EIO;
10159 		} else {
10160 			err = 0;
10161 		}
10162 		sd_ssc_fini(ssc);
10163 
10164 		mutex_enter(SD_MUTEX(un));
10165 		un->un_ncmds_in_driver--;
10166 		ASSERT(un->un_ncmds_in_driver >= 0);
10167 		mutex_exit(SD_MUTEX(un));
10168 		if (err != 0)
10169 			return (err);
10170 	}
10171 
10172 	/*
10173 	 * Read requests are restricted to multiples of the system block size.
10174 	 */
10175 	if (un->un_f_rmw_type == SD_RMW_TYPE_RETURN_ERROR &&
10176 	    !un->un_f_enable_rmw)
10177 		secmask = un->un_tgt_blocksize - 1;
10178 	else
10179 		secmask = DEV_BSIZE - 1;
10180 
10181 	if (uio->uio_loffset & ((offset_t)(secmask))) {
10182 		SD_ERROR(SD_LOG_READ_WRITE, un,
10183 		    "sdread: file offset not modulo %d\n",
10184 		    secmask + 1);
10185 		err = EINVAL;
10186 	} else if (uio->uio_iov->iov_len & (secmask)) {
10187 		SD_ERROR(SD_LOG_READ_WRITE, un,
10188 		    "sdread: transfer length not modulo %d\n",
10189 		    secmask + 1);
10190 		err = EINVAL;
10191 	} else {
10192 		err = physio(sdstrategy, NULL, dev, B_READ, sdmin, uio);
10193 	}
10194 
10195 	return (err);
10196 }
10197 
10198 
10199 /*
10200  *    Function: sdwrite
10201  *
10202  * Description: Driver's write(9e) entry point function.
10203  *
10204  *   Arguments: dev   - device number
10205  *		uio   - structure pointer describing where data is stored in
10206  *			user's space
10207  *		cred_p  - user credential pointer
10208  *
10209  * Return Code: ENXIO
10210  *		EIO
10211  *		EINVAL
10212  *		value returned by physio
10213  *
10214  *     Context: Kernel thread context.
10215  */
10216 /* ARGSUSED */
10217 static int
sdwrite(dev_t dev,struct uio * uio,cred_t * cred_p)10218 sdwrite(dev_t dev, struct uio *uio, cred_t *cred_p)
10219 {
10220 	struct sd_lun	*un = NULL;
10221 	int		secmask;
10222 	int		err = 0;
10223 	sd_ssc_t	*ssc;
10224 
10225 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
10226 		return (ENXIO);
10227 	}
10228 
10229 	ASSERT(!mutex_owned(SD_MUTEX(un)));
10230 
10231 	if (!SD_IS_VALID_LABEL(un) && !ISCD(un)) {
10232 		mutex_enter(SD_MUTEX(un));
10233 		/*
10234 		 * Because the call to sd_ready_and_valid will issue I/O we
10235 		 * must wait here if either the device is suspended or
10236 		 * if it's power level is changing.
10237 		 */
10238 		while ((un->un_state == SD_STATE_SUSPENDED) ||
10239 		    (un->un_state == SD_STATE_PM_CHANGING)) {
10240 			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
10241 		}
10242 		un->un_ncmds_in_driver++;
10243 		mutex_exit(SD_MUTEX(un));
10244 
10245 		/* Initialize sd_ssc_t for internal uscsi commands */
10246 		ssc = sd_ssc_init(un);
10247 		if ((sd_ready_and_valid(ssc, SDPART(dev))) != SD_READY_VALID) {
10248 			err = EIO;
10249 		} else {
10250 			err = 0;
10251 		}
10252 		sd_ssc_fini(ssc);
10253 
10254 		mutex_enter(SD_MUTEX(un));
10255 		un->un_ncmds_in_driver--;
10256 		ASSERT(un->un_ncmds_in_driver >= 0);
10257 		mutex_exit(SD_MUTEX(un));
10258 		if (err != 0)
10259 			return (err);
10260 	}
10261 
10262 	/*
10263 	 * Write requests are restricted to multiples of the system block size.
10264 	 */
10265 	if (un->un_f_rmw_type == SD_RMW_TYPE_RETURN_ERROR &&
10266 	    !un->un_f_enable_rmw)
10267 		secmask = un->un_tgt_blocksize - 1;
10268 	else
10269 		secmask = DEV_BSIZE - 1;
10270 
10271 	if (uio->uio_loffset & ((offset_t)(secmask))) {
10272 		SD_ERROR(SD_LOG_READ_WRITE, un,
10273 		    "sdwrite: file offset not modulo %d\n",
10274 		    secmask + 1);
10275 		err = EINVAL;
10276 	} else if (uio->uio_iov->iov_len & (secmask)) {
10277 		SD_ERROR(SD_LOG_READ_WRITE, un,
10278 		    "sdwrite: transfer length not modulo %d\n",
10279 		    secmask + 1);
10280 		err = EINVAL;
10281 	} else {
10282 		err = physio(sdstrategy, NULL, dev, B_WRITE, sdmin, uio);
10283 	}
10284 
10285 	return (err);
10286 }
10287 
10288 
10289 /*
10290  *    Function: sdaread
10291  *
10292  * Description: Driver's aread(9e) entry point function.
10293  *
10294  *   Arguments: dev   - device number
10295  *		aio   - structure pointer describing where data is to be stored
10296  *		cred_p  - user credential pointer
10297  *
10298  * Return Code: ENXIO
10299  *		EIO
10300  *		EINVAL
10301  *		value returned by aphysio
10302  *
10303  *     Context: Kernel thread context.
10304  */
10305 /* ARGSUSED */
10306 static int
sdaread(dev_t dev,struct aio_req * aio,cred_t * cred_p)10307 sdaread(dev_t dev, struct aio_req *aio, cred_t *cred_p)
10308 {
10309 	struct sd_lun	*un = NULL;
10310 	struct uio	*uio = aio->aio_uio;
10311 	int		secmask;
10312 	int		err = 0;
10313 	sd_ssc_t	*ssc;
10314 
10315 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
10316 		return (ENXIO);
10317 	}
10318 
10319 	ASSERT(!mutex_owned(SD_MUTEX(un)));
10320 
10321 	if (!SD_IS_VALID_LABEL(un) && !ISCD(un)) {
10322 		mutex_enter(SD_MUTEX(un));
10323 		/*
10324 		 * Because the call to sd_ready_and_valid will issue I/O we
10325 		 * must wait here if either the device is suspended or
10326 		 * if it's power level is changing.
10327 		 */
10328 		while ((un->un_state == SD_STATE_SUSPENDED) ||
10329 		    (un->un_state == SD_STATE_PM_CHANGING)) {
10330 			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
10331 		}
10332 		un->un_ncmds_in_driver++;
10333 		mutex_exit(SD_MUTEX(un));
10334 
10335 		/* Initialize sd_ssc_t for internal uscsi commands */
10336 		ssc = sd_ssc_init(un);
10337 		if ((sd_ready_and_valid(ssc, SDPART(dev))) != SD_READY_VALID) {
10338 			err = EIO;
10339 		} else {
10340 			err = 0;
10341 		}
10342 		sd_ssc_fini(ssc);
10343 
10344 		mutex_enter(SD_MUTEX(un));
10345 		un->un_ncmds_in_driver--;
10346 		ASSERT(un->un_ncmds_in_driver >= 0);
10347 		mutex_exit(SD_MUTEX(un));
10348 		if (err != 0)
10349 			return (err);
10350 	}
10351 
10352 	/*
10353 	 * Read requests are restricted to multiples of the system block size.
10354 	 */
10355 	if (un->un_f_rmw_type == SD_RMW_TYPE_RETURN_ERROR &&
10356 	    !un->un_f_enable_rmw)
10357 		secmask = un->un_tgt_blocksize - 1;
10358 	else
10359 		secmask = DEV_BSIZE - 1;
10360 
10361 	if (uio->uio_loffset & ((offset_t)(secmask))) {
10362 		SD_ERROR(SD_LOG_READ_WRITE, un,
10363 		    "sdaread: file offset not modulo %d\n",
10364 		    secmask + 1);
10365 		err = EINVAL;
10366 	} else if (uio->uio_iov->iov_len & (secmask)) {
10367 		SD_ERROR(SD_LOG_READ_WRITE, un,
10368 		    "sdaread: transfer length not modulo %d\n",
10369 		    secmask + 1);
10370 		err = EINVAL;
10371 	} else {
10372 		err = aphysio(sdstrategy, anocancel, dev, B_READ, sdmin, aio);
10373 	}
10374 
10375 	return (err);
10376 }
10377 
10378 
10379 /*
10380  *    Function: sdawrite
10381  *
10382  * Description: Driver's awrite(9e) entry point function.
10383  *
10384  *   Arguments: dev   - device number
10385  *		aio   - structure pointer describing where data is stored
10386  *		cred_p  - user credential pointer
10387  *
10388  * Return Code: ENXIO
10389  *		EIO
10390  *		EINVAL
10391  *		value returned by aphysio
10392  *
10393  *     Context: Kernel thread context.
10394  */
10395 /* ARGSUSED */
10396 static int
sdawrite(dev_t dev,struct aio_req * aio,cred_t * cred_p)10397 sdawrite(dev_t dev, struct aio_req *aio, cred_t *cred_p)
10398 {
10399 	struct sd_lun	*un = NULL;
10400 	struct uio	*uio = aio->aio_uio;
10401 	int		secmask;
10402 	int		err = 0;
10403 	sd_ssc_t	*ssc;
10404 
10405 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
10406 		return (ENXIO);
10407 	}
10408 
10409 	ASSERT(!mutex_owned(SD_MUTEX(un)));
10410 
10411 	if (!SD_IS_VALID_LABEL(un) && !ISCD(un)) {
10412 		mutex_enter(SD_MUTEX(un));
10413 		/*
10414 		 * Because the call to sd_ready_and_valid will issue I/O we
10415 		 * must wait here if either the device is suspended or
10416 		 * if it's power level is changing.
10417 		 */
10418 		while ((un->un_state == SD_STATE_SUSPENDED) ||
10419 		    (un->un_state == SD_STATE_PM_CHANGING)) {
10420 			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
10421 		}
10422 		un->un_ncmds_in_driver++;
10423 		mutex_exit(SD_MUTEX(un));
10424 
10425 		/* Initialize sd_ssc_t for internal uscsi commands */
10426 		ssc = sd_ssc_init(un);
10427 		if ((sd_ready_and_valid(ssc, SDPART(dev))) != SD_READY_VALID) {
10428 			err = EIO;
10429 		} else {
10430 			err = 0;
10431 		}
10432 		sd_ssc_fini(ssc);
10433 
10434 		mutex_enter(SD_MUTEX(un));
10435 		un->un_ncmds_in_driver--;
10436 		ASSERT(un->un_ncmds_in_driver >= 0);
10437 		mutex_exit(SD_MUTEX(un));
10438 		if (err != 0)
10439 			return (err);
10440 	}
10441 
10442 	/*
10443 	 * Write requests are restricted to multiples of the system block size.
10444 	 */
10445 	if (un->un_f_rmw_type == SD_RMW_TYPE_RETURN_ERROR &&
10446 	    !un->un_f_enable_rmw)
10447 		secmask = un->un_tgt_blocksize - 1;
10448 	else
10449 		secmask = DEV_BSIZE - 1;
10450 
10451 	if (uio->uio_loffset & ((offset_t)(secmask))) {
10452 		SD_ERROR(SD_LOG_READ_WRITE, un,
10453 		    "sdawrite: file offset not modulo %d\n",
10454 		    secmask + 1);
10455 		err = EINVAL;
10456 	} else if (uio->uio_iov->iov_len & (secmask)) {
10457 		SD_ERROR(SD_LOG_READ_WRITE, un,
10458 		    "sdawrite: transfer length not modulo %d\n",
10459 		    secmask + 1);
10460 		err = EINVAL;
10461 	} else {
10462 		err = aphysio(sdstrategy, anocancel, dev, B_WRITE, sdmin, aio);
10463 	}
10464 
10465 	return (err);
10466 }
10467 
10468 
10469 
10470 
10471 
10472 /*
10473  * Driver IO processing follows the following sequence:
10474  *
10475  *     sdioctl(9E)     sdstrategy(9E)         biodone(9F)
10476  *         |                |                     ^
10477  *         v                v                     |
10478  * sd_send_scsi_cmd()  ddi_xbuf_qstrategy()       +-------------------+
10479  *         |                |                     |                   |
10480  *         v                |                     |                   |
10481  * sd_uscsi_strategy() sd_xbuf_strategy()   sd_buf_iodone()   sd_uscsi_iodone()
10482  *         |                |                     ^                   ^
10483  *         v                v                     |                   |
10484  * SD_BEGIN_IOSTART()  SD_BEGIN_IOSTART()         |                   |
10485  *         |                |                     |                   |
10486  *     +---+                |                     +------------+      +-------+
10487  *     |                    |                                  |              |
10488  *     |   SD_NEXT_IOSTART()|                  SD_NEXT_IODONE()|              |
10489  *     |                    v                                  |              |
10490  *     |         sd_mapblockaddr_iostart()           sd_mapblockaddr_iodone() |
10491  *     |                    |                                  ^              |
10492  *     |   SD_NEXT_IOSTART()|                  SD_NEXT_IODONE()|              |
10493  *     |                    v                                  |              |
10494  *     |         sd_mapblocksize_iostart()           sd_mapblocksize_iodone() |
10495  *     |                    |                                  ^              |
10496  *     |   SD_NEXT_IOSTART()|                  SD_NEXT_IODONE()|              |
10497  *     |                    v                                  |              |
10498  *     |           sd_checksum_iostart()               sd_checksum_iodone()   |
10499  *     |                    |                                  ^              |
10500  *     +-> SD_NEXT_IOSTART()|                  SD_NEXT_IODONE()+------------->+
10501  *     |                    v                                  |              |
10502  *     |              sd_pm_iostart()                     sd_pm_iodone()      |
10503  *     |                    |                                  ^              |
10504  *     |                    |                                  |              |
10505  *     +-> SD_NEXT_IOSTART()|               SD_BEGIN_IODONE()--+--------------+
10506  *                          |                           ^
10507  *                          v                           |
10508  *                   sd_core_iostart()                  |
10509  *                          |                           |
10510  *                          |                           +------>(*destroypkt)()
10511  *                          +-> sd_start_cmds() <-+     |           |
10512  *                          |                     |     |           v
10513  *                          |                     |     |  scsi_destroy_pkt(9F)
10514  *                          |                     |     |
10515  *                          +->(*initpkt)()       +- sdintr()
10516  *                          |  |                        |  |
10517  *                          |  +-> scsi_init_pkt(9F)    |  +-> sd_handle_xxx()
10518  *                          |  +-> scsi_setup_cdb(9F)   |
10519  *                          |                           |
10520  *                          +--> scsi_transport(9F)     |
10521  *                                     |                |
10522  *                                     +----> SCSA ---->+
10523  *
10524  *
10525  * This code is based upon the following presumptions:
10526  *
10527  *   - iostart and iodone functions operate on buf(9S) structures. These
10528  *     functions perform the necessary operations on the buf(9S) and pass
10529  *     them along to the next function in the chain by using the macros
10530  *     SD_NEXT_IOSTART() (for iostart side functions) and SD_NEXT_IODONE()
10531  *     (for iodone side functions).
10532  *
10533  *   - The iostart side functions may sleep. The iodone side functions
10534  *     are called under interrupt context and may NOT sleep. Therefore
10535  *     iodone side functions also may not call iostart side functions.
10536  *     (NOTE: iostart side functions should NOT sleep for memory, as
10537  *     this could result in deadlock.)
10538  *
10539  *   - An iostart side function may call its corresponding iodone side
10540  *     function directly (if necessary).
10541  *
10542  *   - In the event of an error, an iostart side function can return a buf(9S)
10543  *     to its caller by calling SD_BEGIN_IODONE() (after setting B_ERROR and
10544  *     b_error in the usual way of course).
10545  *
10546  *   - The taskq mechanism may be used by the iodone side functions to dispatch
10547  *     requests to the iostart side functions.  The iostart side functions in
10548  *     this case would be called under the context of a taskq thread, so it's
10549  *     OK for them to block/sleep/spin in this case.
10550  *
10551  *   - iostart side functions may allocate "shadow" buf(9S) structs and
10552  *     pass them along to the next function in the chain.  The corresponding
10553  *     iodone side functions must coalesce the "shadow" bufs and return
10554  *     the "original" buf to the next higher layer.
10555  *
10556  *   - The b_private field of the buf(9S) struct holds a pointer to
10557  *     an sd_xbuf struct, which contains information needed to
10558  *     construct the scsi_pkt for the command.
10559  *
10560  *   - The SD_MUTEX(un) is NOT held across calls to the next layer. Each
10561  *     layer must acquire & release the SD_MUTEX(un) as needed.
10562  */
10563 
10564 
10565 /*
10566  * Create taskq for all targets in the system. This is created at
10567  * _init(9E) and destroyed at _fini(9E).
10568  *
10569  * Note: here we set the minalloc to a reasonably high number to ensure that
10570  * we will have an adequate supply of task entries available at interrupt time.
10571  * This is used in conjunction with the TASKQ_PREPOPULATE flag in
10572  * sd_create_taskq().  Since we do not want to sleep for allocations at
10573  * interrupt time, set maxalloc equal to minalloc. That way we will just fail
10574  * the command if we ever try to dispatch more than SD_TASKQ_MAXALLOC taskq
10575  * requests any one instant in time.
10576  */
10577 #define	SD_TASKQ_NUMTHREADS	8
10578 #define	SD_TASKQ_MINALLOC	256
10579 #define	SD_TASKQ_MAXALLOC	256
10580 
10581 static taskq_t	*sd_tq = NULL;
10582 _NOTE(SCHEME_PROTECTS_DATA("stable data", sd_tq))
10583 
10584 static int	sd_taskq_minalloc = SD_TASKQ_MINALLOC;
10585 static int	sd_taskq_maxalloc = SD_TASKQ_MAXALLOC;
10586 
10587 /*
10588  * The following task queue is being created for the write part of
10589  * read-modify-write of non-512 block size devices.
10590  * Limit the number of threads to 1 for now. This number has been chosen
10591  * considering the fact that it applies only to dvd ram drives/MO drives
10592  * currently. Performance for which is not main criteria at this stage.
10593  * Note: It needs to be explored if we can use a single taskq in future
10594  */
10595 #define	SD_WMR_TASKQ_NUMTHREADS	1
10596 static taskq_t	*sd_wmr_tq = NULL;
10597 _NOTE(SCHEME_PROTECTS_DATA("stable data", sd_wmr_tq))
10598 
10599 /*
10600  *    Function: sd_taskq_create
10601  *
10602  * Description: Create taskq thread(s) and preallocate task entries
10603  *
10604  * Return Code: Returns a pointer to the allocated taskq_t.
10605  *
10606  *     Context: Can sleep. Requires blockable context.
10607  *
10608  *       Notes: - The taskq() facility currently is NOT part of the DDI.
10609  *		  (definitely NOT recommeded for 3rd-party drivers!) :-)
10610  *		- taskq_create() will block for memory, also it will panic
10611  *		  if it cannot create the requested number of threads.
10612  *		- Currently taskq_create() creates threads that cannot be
10613  *		  swapped.
10614  *		- We use TASKQ_PREPOPULATE to ensure we have an adequate
10615  *		  supply of taskq entries at interrupt time (ie, so that we
10616  *		  do not have to sleep for memory)
10617  */
10618 
10619 static void
sd_taskq_create(void)10620 sd_taskq_create(void)
10621 {
10622 	char	taskq_name[TASKQ_NAMELEN];
10623 
10624 	ASSERT(sd_tq == NULL);
10625 	ASSERT(sd_wmr_tq == NULL);
10626 
10627 	(void) snprintf(taskq_name, sizeof (taskq_name),
10628 	    "%s_drv_taskq", sd_label);
10629 	sd_tq = (taskq_create(taskq_name, SD_TASKQ_NUMTHREADS,
10630 	    (v.v_maxsyspri - 2), sd_taskq_minalloc, sd_taskq_maxalloc,
10631 	    TASKQ_PREPOPULATE));
10632 
10633 	(void) snprintf(taskq_name, sizeof (taskq_name),
10634 	    "%s_rmw_taskq", sd_label);
10635 	sd_wmr_tq = (taskq_create(taskq_name, SD_WMR_TASKQ_NUMTHREADS,
10636 	    (v.v_maxsyspri - 2), sd_taskq_minalloc, sd_taskq_maxalloc,
10637 	    TASKQ_PREPOPULATE));
10638 }
10639 
10640 
10641 /*
10642  *    Function: sd_taskq_delete
10643  *
10644  * Description: Complementary cleanup routine for sd_taskq_create().
10645  *
10646  *     Context: Kernel thread context.
10647  */
10648 
10649 static void
sd_taskq_delete(void)10650 sd_taskq_delete(void)
10651 {
10652 	ASSERT(sd_tq != NULL);
10653 	ASSERT(sd_wmr_tq != NULL);
10654 	taskq_destroy(sd_tq);
10655 	taskq_destroy(sd_wmr_tq);
10656 	sd_tq = NULL;
10657 	sd_wmr_tq = NULL;
10658 }
10659 
10660 
10661 /*
10662  *    Function: sdstrategy
10663  *
10664  * Description: Driver's strategy (9E) entry point function.
10665  *
10666  *   Arguments: bp - pointer to buf(9S)
10667  *
10668  * Return Code: Always returns zero
10669  *
10670  *     Context: Kernel thread context.
10671  */
10672 
10673 static int
sdstrategy(struct buf * bp)10674 sdstrategy(struct buf *bp)
10675 {
10676 	struct sd_lun *un;
10677 
10678 	un = ddi_get_soft_state(sd_state, SD_GET_INSTANCE_FROM_BUF(bp));
10679 	if (un == NULL) {
10680 		bioerror(bp, EIO);
10681 		bp->b_resid = bp->b_bcount;
10682 		biodone(bp);
10683 		return (0);
10684 	}
10685 
10686 	/* As was done in the past, fail new cmds. if state is dumping. */
10687 	if (un->un_state == SD_STATE_DUMPING) {
10688 		bioerror(bp, ENXIO);
10689 		bp->b_resid = bp->b_bcount;
10690 		biodone(bp);
10691 		return (0);
10692 	}
10693 
10694 	ASSERT(!mutex_owned(SD_MUTEX(un)));
10695 
10696 	/*
10697 	 * Commands may sneak in while we released the mutex in
10698 	 * DDI_SUSPEND, we should block new commands. However, old
10699 	 * commands that are still in the driver at this point should
10700 	 * still be allowed to drain.
10701 	 */
10702 	mutex_enter(SD_MUTEX(un));
10703 	/*
10704 	 * Must wait here if either the device is suspended or
10705 	 * if it's power level is changing.
10706 	 */
10707 	while ((un->un_state == SD_STATE_SUSPENDED) ||
10708 	    (un->un_state == SD_STATE_PM_CHANGING)) {
10709 		cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
10710 	}
10711 
10712 	un->un_ncmds_in_driver++;
10713 
10714 	/*
10715 	 * atapi: Since we are running the CD for now in PIO mode we need to
10716 	 * call bp_mapin here to avoid bp_mapin called interrupt context under
10717 	 * the HBA's init_pkt routine.
10718 	 */
10719 	if (un->un_f_cfg_is_atapi == TRUE) {
10720 		mutex_exit(SD_MUTEX(un));
10721 		bp_mapin(bp);
10722 		mutex_enter(SD_MUTEX(un));
10723 	}
10724 	SD_INFO(SD_LOG_IO, un, "sdstrategy: un_ncmds_in_driver = %ld\n",
10725 	    un->un_ncmds_in_driver);
10726 
10727 	if (bp->b_flags & B_WRITE)
10728 		un->un_f_sync_cache_required = TRUE;
10729 
10730 	mutex_exit(SD_MUTEX(un));
10731 
10732 	/*
10733 	 * This will (eventually) allocate the sd_xbuf area and
10734 	 * call sd_xbuf_strategy().  We just want to return the
10735 	 * result of ddi_xbuf_qstrategy so that we have an opt-
10736 	 * imized tail call which saves us a stack frame.
10737 	 */
10738 	return (ddi_xbuf_qstrategy(bp, un->un_xbuf_attr));
10739 }
10740 
10741 
10742 /*
10743  *    Function: sd_xbuf_strategy
10744  *
10745  * Description: Function for initiating IO operations via the
10746  *		ddi_xbuf_qstrategy() mechanism.
10747  *
10748  *     Context: Kernel thread context.
10749  */
10750 
10751 static void
sd_xbuf_strategy(struct buf * bp,ddi_xbuf_t xp,void * arg)10752 sd_xbuf_strategy(struct buf *bp, ddi_xbuf_t xp, void *arg)
10753 {
10754 	struct sd_lun *un = arg;
10755 
10756 	ASSERT(bp != NULL);
10757 	ASSERT(xp != NULL);
10758 	ASSERT(un != NULL);
10759 	ASSERT(!mutex_owned(SD_MUTEX(un)));
10760 
10761 	/*
10762 	 * Initialize the fields in the xbuf and save a pointer to the
10763 	 * xbuf in bp->b_private.
10764 	 */
10765 	sd_xbuf_init(un, bp, xp, SD_CHAIN_BUFIO, NULL);
10766 
10767 	/* Send the buf down the iostart chain */
10768 	SD_BEGIN_IOSTART(((struct sd_xbuf *)xp)->xb_chain_iostart, un, bp);
10769 }
10770 
10771 
10772 /*
10773  *    Function: sd_xbuf_init
10774  *
10775  * Description: Prepare the given sd_xbuf struct for use.
10776  *
10777  *   Arguments: un - ptr to softstate
10778  *		bp - ptr to associated buf(9S)
10779  *		xp - ptr to associated sd_xbuf
10780  *		chain_type - IO chain type to use:
10781  *			SD_CHAIN_NULL
10782  *			SD_CHAIN_BUFIO
10783  *			SD_CHAIN_USCSI
10784  *			SD_CHAIN_DIRECT
10785  *			SD_CHAIN_DIRECT_PRIORITY
10786  *		pktinfop - ptr to private data struct for scsi_pkt(9S)
10787  *			initialization; may be NULL if none.
10788  *
10789  *     Context: Kernel thread context
10790  */
10791 
10792 static void
sd_xbuf_init(struct sd_lun * un,struct buf * bp,struct sd_xbuf * xp,uchar_t chain_type,void * pktinfop)10793 sd_xbuf_init(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
10794     uchar_t chain_type, void *pktinfop)
10795 {
10796 	int index;
10797 
10798 	ASSERT(un != NULL);
10799 	ASSERT(bp != NULL);
10800 	ASSERT(xp != NULL);
10801 
10802 	SD_INFO(SD_LOG_IO, un, "sd_xbuf_init: buf:0x%p chain type:0x%x\n",
10803 	    bp, chain_type);
10804 
10805 	xp->xb_un	= un;
10806 	xp->xb_pktp	= NULL;
10807 	xp->xb_pktinfo	= pktinfop;
10808 	xp->xb_private	= bp->b_private;
10809 	xp->xb_blkno	= (daddr_t)bp->b_blkno;
10810 
10811 	/*
10812 	 * Set up the iostart and iodone chain indexes in the xbuf, based
10813 	 * upon the specified chain type to use.
10814 	 */
10815 	switch (chain_type) {
10816 	case SD_CHAIN_NULL:
10817 		/*
10818 		 * Fall thru to just use the values for the buf type, even
10819 		 * tho for the NULL chain these values will never be used.
10820 		 */
10821 		/* FALLTHRU */
10822 	case SD_CHAIN_BUFIO:
10823 		index = un->un_buf_chain_type;
10824 		if ((!un->un_f_has_removable_media) &&
10825 		    (un->un_tgt_blocksize != 0) &&
10826 		    (un->un_tgt_blocksize != DEV_BSIZE ||
10827 		    un->un_f_enable_rmw)) {
10828 			int secmask = 0, blknomask = 0;
10829 			if (un->un_f_enable_rmw) {
10830 				blknomask =
10831 				    (un->un_phy_blocksize / DEV_BSIZE) - 1;
10832 				secmask = un->un_phy_blocksize - 1;
10833 			} else {
10834 				blknomask =
10835 				    (un->un_tgt_blocksize / DEV_BSIZE) - 1;
10836 				secmask = un->un_tgt_blocksize - 1;
10837 			}
10838 
10839 			if ((bp->b_lblkno & (blknomask)) ||
10840 			    (bp->b_bcount & (secmask))) {
10841 				if ((un->un_f_rmw_type !=
10842 				    SD_RMW_TYPE_RETURN_ERROR) ||
10843 				    un->un_f_enable_rmw) {
10844 					if (un->un_f_pm_is_enabled == FALSE)
10845 						index =
10846 						    SD_CHAIN_INFO_MSS_DSK_NO_PM;
10847 					else
10848 						index =
10849 						    SD_CHAIN_INFO_MSS_DISK;
10850 				}
10851 			}
10852 		}
10853 		break;
10854 	case SD_CHAIN_USCSI:
10855 		index = un->un_uscsi_chain_type;
10856 		break;
10857 	case SD_CHAIN_DIRECT:
10858 		index = un->un_direct_chain_type;
10859 		break;
10860 	case SD_CHAIN_DIRECT_PRIORITY:
10861 		index = un->un_priority_chain_type;
10862 		break;
10863 	default:
10864 		/* We're really broken if we ever get here... */
10865 		panic("sd_xbuf_init: illegal chain type!");
10866 		/*NOTREACHED*/
10867 	}
10868 
10869 	xp->xb_chain_iostart = sd_chain_index_map[index].sci_iostart_index;
10870 	xp->xb_chain_iodone = sd_chain_index_map[index].sci_iodone_index;
10871 
10872 	/*
10873 	 * It might be a bit easier to simply bzero the entire xbuf above,
10874 	 * but it turns out that since we init a fair number of members anyway,
10875 	 * we save a fair number cycles by doing explicit assignment of zero.
10876 	 */
10877 	xp->xb_pkt_flags	= 0;
10878 	xp->xb_dma_resid	= 0;
10879 	xp->xb_retry_count	= 0;
10880 	xp->xb_victim_retry_count = 0;
10881 	xp->xb_ua_retry_count	= 0;
10882 	xp->xb_nr_retry_count	= 0;
10883 	xp->xb_sense_bp		= NULL;
10884 	xp->xb_sense_status	= 0;
10885 	xp->xb_sense_state	= 0;
10886 	xp->xb_sense_resid	= 0;
10887 	xp->xb_ena		= 0;
10888 
10889 	bp->b_private	= xp;
10890 	bp->b_flags	&= ~(B_DONE | B_ERROR);
10891 	bp->b_resid	= 0;
10892 	bp->av_forw	= NULL;
10893 	bp->av_back	= NULL;
10894 	bioerror(bp, 0);
10895 
10896 	SD_INFO(SD_LOG_IO, un, "sd_xbuf_init: done.\n");
10897 }
10898 
10899 
10900 /*
10901  *    Function: sd_uscsi_strategy
10902  *
10903  * Description: Wrapper for calling into the USCSI chain via physio(9F)
10904  *
10905  *   Arguments: bp - buf struct ptr
10906  *
10907  * Return Code: Always returns 0
10908  *
10909  *     Context: Kernel thread context
10910  */
10911 
10912 static int
sd_uscsi_strategy(struct buf * bp)10913 sd_uscsi_strategy(struct buf *bp)
10914 {
10915 	struct sd_lun		*un;
10916 	struct sd_uscsi_info	*uip;
10917 	struct sd_xbuf		*xp;
10918 	uchar_t			chain_type;
10919 	uchar_t			cmd;
10920 
10921 	ASSERT(bp != NULL);
10922 
10923 	un = ddi_get_soft_state(sd_state, SD_GET_INSTANCE_FROM_BUF(bp));
10924 	if (un == NULL) {
10925 		bioerror(bp, EIO);
10926 		bp->b_resid = bp->b_bcount;
10927 		biodone(bp);
10928 		return (0);
10929 	}
10930 
10931 	ASSERT(!mutex_owned(SD_MUTEX(un)));
10932 
10933 	SD_TRACE(SD_LOG_IO, un, "sd_uscsi_strategy: entry: buf:0x%p\n", bp);
10934 
10935 	/*
10936 	 * A pointer to a struct sd_uscsi_info is expected in bp->b_private
10937 	 */
10938 	ASSERT(bp->b_private != NULL);
10939 	uip = (struct sd_uscsi_info *)bp->b_private;
10940 	cmd = ((struct uscsi_cmd *)(uip->ui_cmdp))->uscsi_cdb[0];
10941 
10942 	mutex_enter(SD_MUTEX(un));
10943 	/*
10944 	 * atapi: Since we are running the CD for now in PIO mode we need to
10945 	 * call bp_mapin here to avoid bp_mapin called interrupt context under
10946 	 * the HBA's init_pkt routine.
10947 	 */
10948 	if (un->un_f_cfg_is_atapi == TRUE) {
10949 		mutex_exit(SD_MUTEX(un));
10950 		bp_mapin(bp);
10951 		mutex_enter(SD_MUTEX(un));
10952 	}
10953 	un->un_ncmds_in_driver++;
10954 	SD_INFO(SD_LOG_IO, un, "sd_uscsi_strategy: un_ncmds_in_driver = %ld\n",
10955 	    un->un_ncmds_in_driver);
10956 
10957 	if ((bp->b_flags & B_WRITE) && (bp->b_bcount != 0) &&
10958 	    (cmd != SCMD_MODE_SELECT) && (cmd != SCMD_MODE_SELECT_G1))
10959 		un->un_f_sync_cache_required = TRUE;
10960 
10961 	mutex_exit(SD_MUTEX(un));
10962 
10963 	switch (uip->ui_flags) {
10964 	case SD_PATH_DIRECT:
10965 		chain_type = SD_CHAIN_DIRECT;
10966 		break;
10967 	case SD_PATH_DIRECT_PRIORITY:
10968 		chain_type = SD_CHAIN_DIRECT_PRIORITY;
10969 		break;
10970 	default:
10971 		chain_type = SD_CHAIN_USCSI;
10972 		break;
10973 	}
10974 
10975 	/*
10976 	 * We may allocate extra buf for external USCSI commands. If the
10977 	 * application asks for bigger than 20-byte sense data via USCSI,
10978 	 * SCSA layer will allocate 252 bytes sense buf for that command.
10979 	 */
10980 	if (((struct uscsi_cmd *)(uip->ui_cmdp))->uscsi_rqlen >
10981 	    SENSE_LENGTH) {
10982 		xp = kmem_zalloc(sizeof (struct sd_xbuf) - SENSE_LENGTH +
10983 		    MAX_SENSE_LENGTH, KM_SLEEP);
10984 	} else {
10985 		xp = kmem_zalloc(sizeof (struct sd_xbuf), KM_SLEEP);
10986 	}
10987 
10988 	sd_xbuf_init(un, bp, xp, chain_type, uip->ui_cmdp);
10989 
10990 	/* Use the index obtained within xbuf_init */
10991 	SD_BEGIN_IOSTART(xp->xb_chain_iostart, un, bp);
10992 
10993 	SD_TRACE(SD_LOG_IO, un, "sd_uscsi_strategy: exit: buf:0x%p\n", bp);
10994 
10995 	return (0);
10996 }
10997 
10998 /*
10999  *    Function: sd_send_scsi_cmd
11000  *
11001  * Description: Runs a USCSI command for user (when called thru sdioctl),
11002  *		or for the driver
11003  *
11004  *   Arguments: dev - the dev_t for the device
11005  *		incmd - ptr to a valid uscsi_cmd struct
11006  *		flag - bit flag, indicating open settings, 32/64 bit type
11007  *		dataspace - UIO_USERSPACE or UIO_SYSSPACE
11008  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
11009  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
11010  *			to use the USCSI "direct" chain and bypass the normal
11011  *			command waitq.
11012  *
11013  * Return Code: 0 -  successful completion of the given command
11014  *		EIO - scsi_uscsi_handle_command() failed
11015  *		ENXIO  - soft state not found for specified dev
11016  *		EINVAL
11017  *		EFAULT - copyin/copyout error
11018  *		return code of scsi_uscsi_handle_command():
11019  *			EIO
11020  *			ENXIO
11021  *			EACCES
11022  *
11023  *     Context: Waits for command to complete. Can sleep.
11024  */
11025 
11026 static int
sd_send_scsi_cmd(dev_t dev,struct uscsi_cmd * incmd,int flag,enum uio_seg dataspace,int path_flag)11027 sd_send_scsi_cmd(dev_t dev, struct uscsi_cmd *incmd, int flag,
11028     enum uio_seg dataspace, int path_flag)
11029 {
11030 	struct sd_lun	*un;
11031 	sd_ssc_t	*ssc;
11032 	int		rval;
11033 
11034 	un = ddi_get_soft_state(sd_state, SDUNIT(dev));
11035 	if (un == NULL) {
11036 		return (ENXIO);
11037 	}
11038 
11039 	/*
11040 	 * Using sd_ssc_send to handle uscsi cmd
11041 	 */
11042 	ssc = sd_ssc_init(un);
11043 	rval = sd_ssc_send(ssc, incmd, flag, dataspace, path_flag);
11044 	sd_ssc_fini(ssc);
11045 
11046 	return (rval);
11047 }
11048 
11049 /*
11050  *    Function: sd_ssc_init
11051  *
11052  * Description: Uscsi end-user call this function to initialize necessary
11053  *              fields, such as uscsi_cmd and sd_uscsi_info struct.
11054  *
11055  *              The return value of sd_send_scsi_cmd will be treated as a
11056  *              fault in various conditions. Even it is not Zero, some
11057  *              callers may ignore the return value. That is to say, we can
11058  *              not make an accurate assessment in sdintr, since if a
11059  *              command is failed in sdintr it does not mean the caller of
11060  *              sd_send_scsi_cmd will treat it as a real failure.
11061  *
11062  *              To avoid printing too many error logs for a failed uscsi
11063  *              packet that the caller may not treat it as a failure, the
11064  *              sd will keep silent for handling all uscsi commands.
11065  *
11066  *              During detach->attach and attach-open, for some types of
11067  *              problems, the driver should be providing information about
11068  *              the problem encountered. Device use USCSI_SILENT, which
11069  *              suppresses all driver information. The result is that no
11070  *              information about the problem is available. Being
11071  *              completely silent during this time is inappropriate. The
11072  *              driver needs a more selective filter than USCSI_SILENT, so
11073  *              that information related to faults is provided.
11074  *
11075  *              To make the accurate accessment, the caller  of
11076  *              sd_send_scsi_USCSI_CMD should take the ownership and
11077  *              get necessary information to print error messages.
11078  *
11079  *              If we want to print necessary info of uscsi command, we need to
11080  *              keep the uscsi_cmd and sd_uscsi_info till we can make the
11081  *              assessment. We use sd_ssc_init to alloc necessary
11082  *              structs for sending an uscsi command and we are also
11083  *              responsible for free the memory by calling
11084  *              sd_ssc_fini.
11085  *
11086  *              The calling secquences will look like:
11087  *              sd_ssc_init->
11088  *
11089  *                  ...
11090  *
11091  *                  sd_send_scsi_USCSI_CMD->
11092  *                      sd_ssc_send-> - - - sdintr
11093  *                  ...
11094  *
11095  *                  if we think the return value should be treated as a
11096  *                  failure, we make the accessment here and print out
11097  *                  necessary by retrieving uscsi_cmd and sd_uscsi_info'
11098  *
11099  *                  ...
11100  *
11101  *              sd_ssc_fini
11102  *
11103  *
11104  *   Arguments: un - pointer to driver soft state (unit) structure for this
11105  *                   target.
11106  *
11107  * Return code: sd_ssc_t - pointer to allocated sd_ssc_t struct, it contains
11108  *                         uscsi_cmd and sd_uscsi_info.
11109  *                  NULL - if can not alloc memory for sd_ssc_t struct
11110  *
11111  *     Context: Kernel Thread.
11112  */
11113 static sd_ssc_t *
sd_ssc_init(struct sd_lun * un)11114 sd_ssc_init(struct sd_lun *un)
11115 {
11116 	sd_ssc_t		*ssc;
11117 	struct uscsi_cmd	*ucmdp;
11118 	struct sd_uscsi_info	*uip;
11119 
11120 	ASSERT(un != NULL);
11121 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11122 
11123 	/*
11124 	 * Allocate sd_ssc_t structure
11125 	 */
11126 	ssc = kmem_zalloc(sizeof (sd_ssc_t), KM_SLEEP);
11127 
11128 	/*
11129 	 * Allocate uscsi_cmd by calling scsi_uscsi_alloc common routine
11130 	 */
11131 	ucmdp = scsi_uscsi_alloc();
11132 
11133 	/*
11134 	 * Allocate sd_uscsi_info structure
11135 	 */
11136 	uip = kmem_zalloc(sizeof (struct sd_uscsi_info), KM_SLEEP);
11137 
11138 	ssc->ssc_uscsi_cmd = ucmdp;
11139 	ssc->ssc_uscsi_info = uip;
11140 	ssc->ssc_un = un;
11141 
11142 	return (ssc);
11143 }
11144 
11145 /*
11146  * Function: sd_ssc_fini
11147  *
11148  * Description: To free sd_ssc_t and it's hanging off
11149  *
11150  * Arguments: ssc - struct pointer of sd_ssc_t.
11151  */
11152 static void
sd_ssc_fini(sd_ssc_t * ssc)11153 sd_ssc_fini(sd_ssc_t *ssc)
11154 {
11155 	scsi_uscsi_free(ssc->ssc_uscsi_cmd);
11156 
11157 	if (ssc->ssc_uscsi_info != NULL) {
11158 		kmem_free(ssc->ssc_uscsi_info, sizeof (struct sd_uscsi_info));
11159 		ssc->ssc_uscsi_info = NULL;
11160 	}
11161 
11162 	kmem_free(ssc, sizeof (sd_ssc_t));
11163 	ssc = NULL;
11164 }
11165 
11166 /*
11167  * Function: sd_ssc_send
11168  *
11169  * Description: Runs a USCSI command for user when called through sdioctl,
11170  *              or for the driver.
11171  *
11172  *   Arguments: ssc - the struct of sd_ssc_t will bring uscsi_cmd and
11173  *                    sd_uscsi_info in.
11174  *		incmd - ptr to a valid uscsi_cmd struct
11175  *		flag - bit flag, indicating open settings, 32/64 bit type
11176  *		dataspace - UIO_USERSPACE or UIO_SYSSPACE
11177  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
11178  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
11179  *			to use the USCSI "direct" chain and bypass the normal
11180  *			command waitq.
11181  *
11182  * Return Code: 0 -  successful completion of the given command
11183  *		EIO - scsi_uscsi_handle_command() failed
11184  *		ENXIO  - soft state not found for specified dev
11185  *		ECANCELED - command cancelled due to low power
11186  *		EINVAL
11187  *		EFAULT - copyin/copyout error
11188  *		return code of scsi_uscsi_handle_command():
11189  *			EIO
11190  *			ENXIO
11191  *			EACCES
11192  *
11193  *     Context: Kernel Thread;
11194  *              Waits for command to complete. Can sleep.
11195  */
11196 static int
sd_ssc_send(sd_ssc_t * ssc,struct uscsi_cmd * incmd,int flag,enum uio_seg dataspace,int path_flag)11197 sd_ssc_send(sd_ssc_t *ssc, struct uscsi_cmd *incmd, int flag,
11198     enum uio_seg dataspace, int path_flag)
11199 {
11200 	struct sd_uscsi_info	*uip;
11201 	struct uscsi_cmd	*uscmd;
11202 	struct sd_lun		*un;
11203 	dev_t			dev;
11204 
11205 	int	format = 0;
11206 	int	rval;
11207 
11208 	ASSERT(ssc != NULL);
11209 	un = ssc->ssc_un;
11210 	ASSERT(un != NULL);
11211 	uscmd = ssc->ssc_uscsi_cmd;
11212 	ASSERT(uscmd != NULL);
11213 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11214 	if (ssc->ssc_flags & SSC_FLAGS_NEED_ASSESSMENT) {
11215 		/*
11216 		 * If enter here, it indicates that the previous uscsi
11217 		 * command has not been processed by sd_ssc_assessment.
11218 		 * This is violating our rules of FMA telemetry processing.
11219 		 * We should print out this message and the last undisposed
11220 		 * uscsi command.
11221 		 */
11222 		if (uscmd->uscsi_cdb != NULL) {
11223 			SD_INFO(SD_LOG_SDTEST, un,
11224 			    "sd_ssc_send is missing the alternative "
11225 			    "sd_ssc_assessment when running command 0x%x.\n",
11226 			    uscmd->uscsi_cdb[0]);
11227 		}
11228 		/*
11229 		 * Set the ssc_flags to SSC_FLAGS_UNKNOWN, which should be
11230 		 * the initial status.
11231 		 */
11232 		ssc->ssc_flags = SSC_FLAGS_UNKNOWN;
11233 	}
11234 
11235 	/*
11236 	 * We need to make sure sd_ssc_send will have sd_ssc_assessment
11237 	 * followed to avoid missing FMA telemetries.
11238 	 */
11239 	ssc->ssc_flags |= SSC_FLAGS_NEED_ASSESSMENT;
11240 
11241 	/*
11242 	 * if USCSI_PMFAILFAST is set and un is in low power, fail the
11243 	 * command immediately.
11244 	 */
11245 	mutex_enter(SD_MUTEX(un));
11246 	mutex_enter(&un->un_pm_mutex);
11247 	if ((uscmd->uscsi_flags & USCSI_PMFAILFAST) &&
11248 	    SD_DEVICE_IS_IN_LOW_POWER(un)) {
11249 		SD_TRACE(SD_LOG_IO, un, "sd_ssc_send:"
11250 		    "un:0x%p is in low power\n", un);
11251 		mutex_exit(&un->un_pm_mutex);
11252 		mutex_exit(SD_MUTEX(un));
11253 		return (ECANCELED);
11254 	}
11255 	mutex_exit(&un->un_pm_mutex);
11256 	mutex_exit(SD_MUTEX(un));
11257 
11258 #ifdef SDDEBUG
11259 	switch (dataspace) {
11260 	case UIO_USERSPACE:
11261 		SD_TRACE(SD_LOG_IO, un,
11262 		    "sd_ssc_send: entry: un:0x%p UIO_USERSPACE\n", un);
11263 		break;
11264 	case UIO_SYSSPACE:
11265 		SD_TRACE(SD_LOG_IO, un,
11266 		    "sd_ssc_send: entry: un:0x%p UIO_SYSSPACE\n", un);
11267 		break;
11268 	default:
11269 		SD_TRACE(SD_LOG_IO, un,
11270 		    "sd_ssc_send: entry: un:0x%p UNEXPECTED SPACE\n", un);
11271 		break;
11272 	}
11273 #endif
11274 
11275 	rval = scsi_uscsi_copyin((intptr_t)incmd, flag,
11276 	    SD_ADDRESS(un), &uscmd);
11277 	if (rval != 0) {
11278 		SD_TRACE(SD_LOG_IO, un, "sd_sense_scsi_cmd: "
11279 		    "scsi_uscsi_alloc_and_copyin failed\n", un);
11280 		return (rval);
11281 	}
11282 
11283 	if ((uscmd->uscsi_cdb != NULL) &&
11284 	    (uscmd->uscsi_cdb[0] == SCMD_FORMAT)) {
11285 		mutex_enter(SD_MUTEX(un));
11286 		un->un_f_format_in_progress = TRUE;
11287 		mutex_exit(SD_MUTEX(un));
11288 		format = 1;
11289 	}
11290 
11291 	/*
11292 	 * Allocate an sd_uscsi_info struct and fill it with the info
11293 	 * needed by sd_initpkt_for_uscsi().  Then put the pointer into
11294 	 * b_private in the buf for sd_initpkt_for_uscsi().  Note that
11295 	 * since we allocate the buf here in this function, we do not
11296 	 * need to preserve the prior contents of b_private.
11297 	 * The sd_uscsi_info struct is also used by sd_uscsi_strategy()
11298 	 */
11299 	uip = ssc->ssc_uscsi_info;
11300 	uip->ui_flags = path_flag;
11301 	uip->ui_cmdp = uscmd;
11302 
11303 	/*
11304 	 * Commands sent with priority are intended for error recovery
11305 	 * situations, and do not have retries performed.
11306 	 */
11307 	if (path_flag == SD_PATH_DIRECT_PRIORITY) {
11308 		uscmd->uscsi_flags |= USCSI_DIAGNOSE;
11309 	}
11310 	uscmd->uscsi_flags &= ~USCSI_NOINTR;
11311 
11312 	dev = SD_GET_DEV(un);
11313 	rval = scsi_uscsi_handle_cmd(dev, dataspace, uscmd,
11314 	    sd_uscsi_strategy, NULL, uip);
11315 
11316 	/*
11317 	 * mark ssc_flags right after handle_cmd to make sure
11318 	 * the uscsi has been sent
11319 	 */
11320 	ssc->ssc_flags |= SSC_FLAGS_CMD_ISSUED;
11321 
11322 #ifdef SDDEBUG
11323 	SD_INFO(SD_LOG_IO, un, "sd_ssc_send: "
11324 	    "uscsi_status: 0x%02x  uscsi_resid:0x%x\n",
11325 	    uscmd->uscsi_status, uscmd->uscsi_resid);
11326 	if (uscmd->uscsi_bufaddr != NULL) {
11327 		SD_INFO(SD_LOG_IO, un, "sd_ssc_send: "
11328 		    "uscmd->uscsi_bufaddr: 0x%p  uscmd->uscsi_buflen:%d\n",
11329 		    uscmd->uscsi_bufaddr, uscmd->uscsi_buflen);
11330 		if (dataspace == UIO_SYSSPACE) {
11331 			SD_DUMP_MEMORY(un, SD_LOG_IO,
11332 			    "data", (uchar_t *)uscmd->uscsi_bufaddr,
11333 			    uscmd->uscsi_buflen, SD_LOG_HEX);
11334 		}
11335 	}
11336 #endif
11337 
11338 	if (format == 1) {
11339 		mutex_enter(SD_MUTEX(un));
11340 		un->un_f_format_in_progress = FALSE;
11341 		mutex_exit(SD_MUTEX(un));
11342 	}
11343 
11344 	(void) scsi_uscsi_copyout((intptr_t)incmd, uscmd);
11345 
11346 	return (rval);
11347 }
11348 
11349 /*
11350  *     Function: sd_ssc_print
11351  *
11352  * Description: Print information available to the console.
11353  *
11354  * Arguments: ssc - the struct of sd_ssc_t will bring uscsi_cmd and
11355  *                    sd_uscsi_info in.
11356  *            sd_severity - log level.
11357  *     Context: Kernel thread or interrupt context.
11358  */
11359 static void
sd_ssc_print(sd_ssc_t * ssc,int sd_severity)11360 sd_ssc_print(sd_ssc_t *ssc, int sd_severity)
11361 {
11362 	struct uscsi_cmd	*ucmdp;
11363 	struct scsi_device	*devp;
11364 	dev_info_t		*devinfo;
11365 	uchar_t			*sensep;
11366 	int			senlen;
11367 	union scsi_cdb		*cdbp;
11368 	uchar_t			com;
11369 	extern struct scsi_key_strings scsi_cmds[];
11370 
11371 	ASSERT(ssc != NULL);
11372 	ASSERT(ssc->ssc_un != NULL);
11373 
11374 	if (SD_FM_LOG(ssc->ssc_un) != SD_FM_LOG_EREPORT)
11375 		return;
11376 	ucmdp = ssc->ssc_uscsi_cmd;
11377 	devp = SD_SCSI_DEVP(ssc->ssc_un);
11378 	devinfo = SD_DEVINFO(ssc->ssc_un);
11379 	ASSERT(ucmdp != NULL);
11380 	ASSERT(devp != NULL);
11381 	ASSERT(devinfo != NULL);
11382 	sensep = (uint8_t *)ucmdp->uscsi_rqbuf;
11383 	senlen = ucmdp->uscsi_rqlen - ucmdp->uscsi_rqresid;
11384 	cdbp = (union scsi_cdb *)ucmdp->uscsi_cdb;
11385 
11386 	/* In certain case (like DOORLOCK), the cdb could be NULL. */
11387 	if (cdbp == NULL)
11388 		return;
11389 	/* We don't print log if no sense data available. */
11390 	if (senlen == 0)
11391 		sensep = NULL;
11392 	com = cdbp->scc_cmd;
11393 	scsi_generic_errmsg(devp, sd_label, sd_severity, 0, 0, com,
11394 	    scsi_cmds, sensep, ssc->ssc_un->un_additional_codes, NULL);
11395 }
11396 
11397 /*
11398  *     Function: sd_ssc_assessment
11399  *
11400  * Description: We use this function to make an assessment at the point
11401  *              where SD driver may encounter a potential error.
11402  *
11403  * Arguments: ssc - the struct of sd_ssc_t will bring uscsi_cmd and
11404  *                  sd_uscsi_info in.
11405  *            tp_assess - a hint of strategy for ereport posting.
11406  *            Possible values of tp_assess include:
11407  *                SD_FMT_IGNORE - we don't post any ereport because we're
11408  *                sure that it is ok to ignore the underlying problems.
11409  *                SD_FMT_IGNORE_COMPROMISE - we don't post any ereport for now
11410  *                but it might be not correct to ignore the underlying hardware
11411  *                error.
11412  *                SD_FMT_STATUS_CHECK - we will post an ereport with the
11413  *                payload driver-assessment of value "fail" or
11414  *                "fatal"(depending on what information we have here). This
11415  *                assessment value is usually set when SD driver think there
11416  *                is a potential error occurred(Typically, when return value
11417  *                of the SCSI command is EIO).
11418  *                SD_FMT_STANDARD - we will post an ereport with the payload
11419  *                driver-assessment of value "info". This assessment value is
11420  *                set when the SCSI command returned successfully and with
11421  *                sense data sent back.
11422  *
11423  *     Context: Kernel thread.
11424  */
11425 static void
sd_ssc_assessment(sd_ssc_t * ssc,enum sd_type_assessment tp_assess)11426 sd_ssc_assessment(sd_ssc_t *ssc, enum sd_type_assessment tp_assess)
11427 {
11428 	int senlen = 0;
11429 	struct uscsi_cmd *ucmdp = NULL;
11430 	struct sd_lun *un;
11431 
11432 	ASSERT(ssc != NULL);
11433 	un = ssc->ssc_un;
11434 	ASSERT(un != NULL);
11435 	ucmdp = ssc->ssc_uscsi_cmd;
11436 	ASSERT(ucmdp != NULL);
11437 
11438 	if (ssc->ssc_flags & SSC_FLAGS_NEED_ASSESSMENT) {
11439 		ssc->ssc_flags &= ~SSC_FLAGS_NEED_ASSESSMENT;
11440 	} else {
11441 		/*
11442 		 * If enter here, it indicates that we have a wrong
11443 		 * calling sequence of sd_ssc_send and sd_ssc_assessment,
11444 		 * both of which should be called in a pair in case of
11445 		 * loss of FMA telemetries.
11446 		 */
11447 		if (ucmdp->uscsi_cdb != NULL) {
11448 			SD_INFO(SD_LOG_SDTEST, un,
11449 			    "sd_ssc_assessment is missing the "
11450 			    "alternative sd_ssc_send when running 0x%x, "
11451 			    "or there are superfluous sd_ssc_assessment for "
11452 			    "the same sd_ssc_send.\n",
11453 			    ucmdp->uscsi_cdb[0]);
11454 		}
11455 		/*
11456 		 * Set the ssc_flags to the initial value to avoid passing
11457 		 * down dirty flags to the following sd_ssc_send function.
11458 		 */
11459 		ssc->ssc_flags = SSC_FLAGS_UNKNOWN;
11460 		return;
11461 	}
11462 
11463 	/*
11464 	 * Only handle an issued command which is waiting for assessment.
11465 	 * A command which is not issued will not have
11466 	 * SSC_FLAGS_INVALID_DATA set, so it'ok we just return here.
11467 	 */
11468 	if (!(ssc->ssc_flags & SSC_FLAGS_CMD_ISSUED)) {
11469 		sd_ssc_print(ssc, SCSI_ERR_INFO);
11470 		return;
11471 	} else {
11472 		/*
11473 		 * For an issued command, we should clear this flag in
11474 		 * order to make the sd_ssc_t structure be used off
11475 		 * multiple uscsi commands.
11476 		 */
11477 		ssc->ssc_flags &= ~SSC_FLAGS_CMD_ISSUED;
11478 	}
11479 
11480 	/*
11481 	 * We will not deal with non-retryable(flag USCSI_DIAGNOSE set)
11482 	 * commands here. And we should clear the ssc_flags before return.
11483 	 */
11484 	if (ucmdp->uscsi_flags & USCSI_DIAGNOSE) {
11485 		ssc->ssc_flags = SSC_FLAGS_UNKNOWN;
11486 		return;
11487 	}
11488 
11489 	switch (tp_assess) {
11490 	case SD_FMT_IGNORE:
11491 	case SD_FMT_IGNORE_COMPROMISE:
11492 		break;
11493 	case SD_FMT_STATUS_CHECK:
11494 		/*
11495 		 * For a failed command(including the succeeded command
11496 		 * with invalid data sent back).
11497 		 */
11498 		sd_ssc_post(ssc, SD_FM_DRV_FATAL);
11499 		break;
11500 	case SD_FMT_STANDARD:
11501 		/*
11502 		 * Always for the succeeded commands probably with sense
11503 		 * data sent back.
11504 		 * Limitation:
11505 		 *	We can only handle a succeeded command with sense
11506 		 *	data sent back when auto-request-sense is enabled.
11507 		 */
11508 		senlen = ssc->ssc_uscsi_cmd->uscsi_rqlen -
11509 		    ssc->ssc_uscsi_cmd->uscsi_rqresid;
11510 		if ((ssc->ssc_uscsi_info->ui_pkt_state & STATE_ARQ_DONE) &&
11511 		    (un->un_f_arq_enabled == TRUE) &&
11512 		    senlen > 0 &&
11513 		    ssc->ssc_uscsi_cmd->uscsi_rqbuf != NULL) {
11514 			sd_ssc_post(ssc, SD_FM_DRV_NOTICE);
11515 		}
11516 		break;
11517 	default:
11518 		/*
11519 		 * Should not have other type of assessment.
11520 		 */
11521 		scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
11522 		    "sd_ssc_assessment got wrong "
11523 		    "sd_type_assessment %d.\n", tp_assess);
11524 		break;
11525 	}
11526 	/*
11527 	 * Clear up the ssc_flags before return.
11528 	 */
11529 	ssc->ssc_flags = SSC_FLAGS_UNKNOWN;
11530 }
11531 
11532 /*
11533  *    Function: sd_ssc_post
11534  *
11535  * Description: 1. read the driver property to get fm-scsi-log flag.
11536  *              2. print log if fm_log_capable is non-zero.
11537  *              3. call sd_ssc_ereport_post to post ereport if possible.
11538  *
11539  *    Context: May be called from kernel thread or interrupt context.
11540  */
11541 static void
sd_ssc_post(sd_ssc_t * ssc,enum sd_driver_assessment sd_assess)11542 sd_ssc_post(sd_ssc_t *ssc, enum sd_driver_assessment sd_assess)
11543 {
11544 	struct sd_lun	*un;
11545 	int		sd_severity;
11546 
11547 	ASSERT(ssc != NULL);
11548 	un = ssc->ssc_un;
11549 	ASSERT(un != NULL);
11550 
11551 	/*
11552 	 * We may enter here from sd_ssc_assessment(for USCSI command) or
11553 	 * by directly called from sdintr context.
11554 	 * We don't handle a non-disk drive(CD-ROM, removable media).
11555 	 * Clear the ssc_flags before return in case we've set
11556 	 * SSC_FLAGS_INVALID_XXX which should be skipped for a non-disk
11557 	 * driver.
11558 	 */
11559 	if (ISCD(un) || un->un_f_has_removable_media) {
11560 		ssc->ssc_flags = SSC_FLAGS_UNKNOWN;
11561 		return;
11562 	}
11563 
11564 	switch (sd_assess) {
11565 		case SD_FM_DRV_FATAL:
11566 			sd_severity = SCSI_ERR_FATAL;
11567 			break;
11568 		case SD_FM_DRV_RECOVERY:
11569 			sd_severity = SCSI_ERR_RECOVERED;
11570 			break;
11571 		case SD_FM_DRV_RETRY:
11572 			sd_severity = SCSI_ERR_RETRYABLE;
11573 			break;
11574 		case SD_FM_DRV_NOTICE:
11575 			sd_severity = SCSI_ERR_INFO;
11576 			break;
11577 		default:
11578 			sd_severity = SCSI_ERR_UNKNOWN;
11579 	}
11580 	/* print log */
11581 	sd_ssc_print(ssc, sd_severity);
11582 
11583 	/* always post ereport */
11584 	sd_ssc_ereport_post(ssc, sd_assess);
11585 }
11586 
11587 /*
11588  *    Function: sd_ssc_set_info
11589  *
11590  * Description: Mark ssc_flags and set ssc_info which would be the
11591  *              payload of uderr ereport. This function will cause
11592  *              sd_ssc_ereport_post to post uderr ereport only.
11593  *              Besides, when ssc_flags == SSC_FLAGS_INVALID_DATA(USCSI),
11594  *              the function will also call SD_ERROR or scsi_log for a
11595  *              CDROM/removable-media/DDI_FM_NOT_CAPABLE device.
11596  *
11597  * Arguments: ssc - the struct of sd_ssc_t will bring uscsi_cmd and
11598  *                  sd_uscsi_info in.
11599  *            ssc_flags - indicate the sub-category of a uderr.
11600  *            comp - this argument is meaningful only when
11601  *                   ssc_flags == SSC_FLAGS_INVALID_DATA, and its possible
11602  *                   values include:
11603  *                   > 0, SD_ERROR is used with comp as the driver logging
11604  *                   component;
11605  *                   = 0, scsi-log is used to log error telemetries;
11606  *                   < 0, no log available for this telemetry.
11607  *
11608  *    Context: Kernel thread or interrupt context
11609  */
11610 static void
sd_ssc_set_info(sd_ssc_t * ssc,int ssc_flags,uint_t comp,const char * fmt,...)11611 sd_ssc_set_info(sd_ssc_t *ssc, int ssc_flags, uint_t comp, const char *fmt, ...)
11612 {
11613 	va_list	ap;
11614 
11615 	ASSERT(ssc != NULL);
11616 	ASSERT(ssc->ssc_un != NULL);
11617 
11618 	ssc->ssc_flags |= ssc_flags;
11619 	va_start(ap, fmt);
11620 	(void) vsnprintf(ssc->ssc_info, sizeof (ssc->ssc_info), fmt, ap);
11621 	va_end(ap);
11622 
11623 	/*
11624 	 * If SSC_FLAGS_INVALID_DATA is set, it should be a uscsi command
11625 	 * with invalid data sent back. For non-uscsi command, the
11626 	 * following code will be bypassed.
11627 	 */
11628 	if (ssc_flags & SSC_FLAGS_INVALID_DATA) {
11629 		if (SD_FM_LOG(ssc->ssc_un) == SD_FM_LOG_NSUP) {
11630 			/*
11631 			 * If the error belong to certain component and we
11632 			 * do not want it to show up on the console, we
11633 			 * will use SD_ERROR, otherwise scsi_log is
11634 			 * preferred.
11635 			 */
11636 			if (comp > 0) {
11637 				SD_ERROR(comp, ssc->ssc_un, ssc->ssc_info);
11638 			} else if (comp == 0) {
11639 				scsi_log(SD_DEVINFO(ssc->ssc_un), sd_label,
11640 				    CE_WARN, ssc->ssc_info);
11641 			}
11642 		}
11643 	}
11644 }
11645 
11646 /*
11647  *    Function: sd_buf_iodone
11648  *
11649  * Description: Frees the sd_xbuf & returns the buf to its originator.
11650  *
11651  *     Context: May be called from interrupt context.
11652  */
11653 /* ARGSUSED */
11654 static void
sd_buf_iodone(int index,struct sd_lun * un,struct buf * bp)11655 sd_buf_iodone(int index, struct sd_lun *un, struct buf *bp)
11656 {
11657 	struct sd_xbuf *xp;
11658 
11659 	ASSERT(un != NULL);
11660 	ASSERT(bp != NULL);
11661 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11662 
11663 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_buf_iodone: entry.\n");
11664 
11665 	xp = SD_GET_XBUF(bp);
11666 	ASSERT(xp != NULL);
11667 
11668 	/* xbuf is gone after this */
11669 	if (ddi_xbuf_done(bp, un->un_xbuf_attr)) {
11670 		mutex_enter(SD_MUTEX(un));
11671 
11672 		/*
11673 		 * Grab time when the cmd completed.
11674 		 * This is used for determining if the system has been
11675 		 * idle long enough to make it idle to the PM framework.
11676 		 * This is for lowering the overhead, and therefore improving
11677 		 * performance per I/O operation.
11678 		 */
11679 		un->un_pm_idle_time = gethrtime();
11680 
11681 		un->un_ncmds_in_driver--;
11682 		ASSERT(un->un_ncmds_in_driver >= 0);
11683 		SD_INFO(SD_LOG_IO, un,
11684 		    "sd_buf_iodone: un_ncmds_in_driver = %ld\n",
11685 		    un->un_ncmds_in_driver);
11686 
11687 		mutex_exit(SD_MUTEX(un));
11688 	}
11689 
11690 	biodone(bp);				/* bp is gone after this */
11691 
11692 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_buf_iodone: exit.\n");
11693 }
11694 
11695 
11696 /*
11697  *    Function: sd_uscsi_iodone
11698  *
11699  * Description: Frees the sd_xbuf & returns the buf to its originator.
11700  *
11701  *     Context: May be called from interrupt context.
11702  */
11703 /* ARGSUSED */
11704 static void
sd_uscsi_iodone(int index,struct sd_lun * un,struct buf * bp)11705 sd_uscsi_iodone(int index, struct sd_lun *un, struct buf *bp)
11706 {
11707 	struct sd_xbuf *xp;
11708 
11709 	ASSERT(un != NULL);
11710 	ASSERT(bp != NULL);
11711 
11712 	xp = SD_GET_XBUF(bp);
11713 	ASSERT(xp != NULL);
11714 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11715 
11716 	SD_INFO(SD_LOG_IO, un, "sd_uscsi_iodone: entry.\n");
11717 
11718 	bp->b_private = xp->xb_private;
11719 
11720 	mutex_enter(SD_MUTEX(un));
11721 
11722 	/*
11723 	 * Grab time when the cmd completed.
11724 	 * This is used for determining if the system has been
11725 	 * idle long enough to make it idle to the PM framework.
11726 	 * This is for lowering the overhead, and therefore improving
11727 	 * performance per I/O operation.
11728 	 */
11729 	un->un_pm_idle_time = gethrtime();
11730 
11731 	un->un_ncmds_in_driver--;
11732 	ASSERT(un->un_ncmds_in_driver >= 0);
11733 	SD_INFO(SD_LOG_IO, un, "sd_uscsi_iodone: un_ncmds_in_driver = %ld\n",
11734 	    un->un_ncmds_in_driver);
11735 
11736 	mutex_exit(SD_MUTEX(un));
11737 
11738 	if (((struct uscsi_cmd *)(xp->xb_pktinfo))->uscsi_rqlen >
11739 	    SENSE_LENGTH) {
11740 		kmem_free(xp, sizeof (struct sd_xbuf) - SENSE_LENGTH +
11741 		    MAX_SENSE_LENGTH);
11742 	} else {
11743 		kmem_free(xp, sizeof (struct sd_xbuf));
11744 	}
11745 
11746 	biodone(bp);
11747 
11748 	SD_INFO(SD_LOG_IO, un, "sd_uscsi_iodone: exit.\n");
11749 }
11750 
11751 
11752 /*
11753  *    Function: sd_mapblockaddr_iostart
11754  *
11755  * Description: Verify request lies within the partition limits for
11756  *		the indicated minor device.  Issue "overrun" buf if
11757  *		request would exceed partition range.  Converts
11758  *		partition-relative block address to absolute.
11759  *
11760  *              Upon exit of this function:
11761  *              1.I/O is aligned
11762  *                 xp->xb_blkno represents the absolute sector address
11763  *              2.I/O is misaligned
11764  *                 xp->xb_blkno represents the absolute logical block address
11765  *                 based on DEV_BSIZE. The logical block address will be
11766  *                 converted to physical sector address in sd_mapblocksize_\
11767  *                 iostart.
11768  *              3.I/O is misaligned but is aligned in "overrun" buf
11769  *                 xp->xb_blkno represents the absolute logical block address
11770  *                 based on DEV_BSIZE. The logical block address will be
11771  *                 converted to physical sector address in sd_mapblocksize_\
11772  *                 iostart. But no RMW will be issued in this case.
11773  *
11774  *     Context: Can sleep
11775  *
11776  *      Issues: This follows what the old code did, in terms of accessing
11777  *		some of the partition info in the unit struct without holding
11778  *		the mutext.  This is a general issue, if the partition info
11779  *		can be altered while IO is in progress... as soon as we send
11780  *		a buf, its partitioning can be invalid before it gets to the
11781  *		device.  Probably the right fix is to move partitioning out
11782  *		of the driver entirely.
11783  */
11784 
11785 static void
sd_mapblockaddr_iostart(int index,struct sd_lun * un,struct buf * bp)11786 sd_mapblockaddr_iostart(int index, struct sd_lun *un, struct buf *bp)
11787 {
11788 	diskaddr_t	nblocks;	/* #blocks in the given partition */
11789 	daddr_t	blocknum;	/* Block number specified by the buf */
11790 	size_t	requested_nblocks;
11791 	size_t	available_nblocks;
11792 	int	partition;
11793 	diskaddr_t	partition_offset;
11794 	struct sd_xbuf *xp;
11795 	int secmask = 0, blknomask = 0;
11796 	ushort_t is_aligned = TRUE;
11797 
11798 	ASSERT(un != NULL);
11799 	ASSERT(bp != NULL);
11800 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11801 
11802 	SD_TRACE(SD_LOG_IO_PARTITION, un,
11803 	    "sd_mapblockaddr_iostart: entry: buf:0x%p\n", bp);
11804 
11805 	xp = SD_GET_XBUF(bp);
11806 	ASSERT(xp != NULL);
11807 
11808 	/*
11809 	 * If the geometry is not indicated as valid, attempt to access
11810 	 * the unit & verify the geometry/label. This can be the case for
11811 	 * removable-media devices, of if the device was opened in
11812 	 * NDELAY/NONBLOCK mode.
11813 	 */
11814 	partition = SDPART(bp->b_edev);
11815 
11816 	if (!SD_IS_VALID_LABEL(un)) {
11817 		sd_ssc_t *ssc;
11818 		/*
11819 		 * Initialize sd_ssc_t for internal uscsi commands
11820 		 * In case of potential porformance issue, we need
11821 		 * to alloc memory only if there is invalid label
11822 		 */
11823 		ssc = sd_ssc_init(un);
11824 
11825 		if (sd_ready_and_valid(ssc, partition) != SD_READY_VALID) {
11826 			/*
11827 			 * For removable devices it is possible to start an
11828 			 * I/O without a media by opening the device in nodelay
11829 			 * mode. Also for writable CDs there can be many
11830 			 * scenarios where there is no geometry yet but volume
11831 			 * manager is trying to issue a read() just because
11832 			 * it can see TOC on the CD. So do not print a message
11833 			 * for removables.
11834 			 */
11835 			if (!un->un_f_has_removable_media) {
11836 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
11837 				    "i/o to invalid geometry\n");
11838 			}
11839 			bioerror(bp, EIO);
11840 			bp->b_resid = bp->b_bcount;
11841 			SD_BEGIN_IODONE(index, un, bp);
11842 
11843 			sd_ssc_fini(ssc);
11844 			return;
11845 		}
11846 		sd_ssc_fini(ssc);
11847 	}
11848 
11849 	nblocks = 0;
11850 	(void) cmlb_partinfo(un->un_cmlbhandle, partition,
11851 	    &nblocks, &partition_offset, NULL, NULL, (void *)SD_PATH_DIRECT);
11852 
11853 	if (un->un_f_enable_rmw) {
11854 		blknomask = (un->un_phy_blocksize / DEV_BSIZE) - 1;
11855 		secmask = un->un_phy_blocksize - 1;
11856 	} else {
11857 		blknomask = (un->un_tgt_blocksize / DEV_BSIZE) - 1;
11858 		secmask = un->un_tgt_blocksize - 1;
11859 	}
11860 
11861 	if ((bp->b_lblkno & (blknomask)) || (bp->b_bcount & (secmask))) {
11862 		is_aligned = FALSE;
11863 	}
11864 
11865 	if (!(NOT_DEVBSIZE(un)) || un->un_f_enable_rmw) {
11866 		/*
11867 		 * If I/O is aligned, no need to involve RMW(Read Modify Write)
11868 		 * Convert the logical block number to target's physical sector
11869 		 * number.
11870 		 */
11871 		if (is_aligned) {
11872 			xp->xb_blkno = SD_SYS2TGTBLOCK(un, xp->xb_blkno);
11873 		} else {
11874 			/*
11875 			 * There is no RMW if we're just reading, so don't
11876 			 * warn or error out because of it.
11877 			 */
11878 			if (bp->b_flags & B_READ) {
11879 				/*EMPTY*/
11880 			} else if (!un->un_f_enable_rmw &&
11881 			    un->un_f_rmw_type == SD_RMW_TYPE_RETURN_ERROR) {
11882 				bp->b_flags |= B_ERROR;
11883 				goto error_exit;
11884 			} else if (un->un_f_rmw_type == SD_RMW_TYPE_DEFAULT) {
11885 				mutex_enter(SD_MUTEX(un));
11886 				if (!un->un_f_enable_rmw &&
11887 				    un->un_rmw_msg_timeid == NULL) {
11888 					scsi_log(SD_DEVINFO(un), sd_label,
11889 					    CE_WARN, "I/O request is not "
11890 					    "aligned with %d disk sector size. "
11891 					    "It is handled through Read Modify "
11892 					    "Write but the performance is "
11893 					    "very low.\n",
11894 					    un->un_tgt_blocksize);
11895 					un->un_rmw_msg_timeid =
11896 					    timeout(sd_rmw_msg_print_handler,
11897 					    un, SD_RMW_MSG_PRINT_TIMEOUT);
11898 				} else {
11899 					un->un_rmw_incre_count ++;
11900 				}
11901 				mutex_exit(SD_MUTEX(un));
11902 			}
11903 
11904 			nblocks = SD_TGT2SYSBLOCK(un, nblocks);
11905 			partition_offset = SD_TGT2SYSBLOCK(un,
11906 			    partition_offset);
11907 		}
11908 	}
11909 
11910 	/*
11911 	 * blocknum is the starting block number of the request. At this
11912 	 * point it is still relative to the start of the minor device.
11913 	 */
11914 	blocknum = xp->xb_blkno;
11915 
11916 	/*
11917 	 * Legacy: If the starting block number is one past the last block
11918 	 * in the partition, do not set B_ERROR in the buf.
11919 	 */
11920 	if (blocknum == nblocks)  {
11921 		goto error_exit;
11922 	}
11923 
11924 	/*
11925 	 * Confirm that the first block of the request lies within the
11926 	 * partition limits. Also the requested number of bytes must be
11927 	 * a multiple of the system block size.
11928 	 */
11929 	if ((blocknum < 0) || (blocknum >= nblocks) ||
11930 	    ((bp->b_bcount & (DEV_BSIZE - 1)) != 0)) {
11931 		bp->b_flags |= B_ERROR;
11932 		goto error_exit;
11933 	}
11934 
11935 	/*
11936 	 * If the requsted # blocks exceeds the available # blocks, that
11937 	 * is an overrun of the partition.
11938 	 */
11939 	if ((!NOT_DEVBSIZE(un)) && is_aligned) {
11940 		requested_nblocks = SD_BYTES2TGTBLOCKS(un, bp->b_bcount);
11941 	} else {
11942 		requested_nblocks = SD_BYTES2SYSBLOCKS(bp->b_bcount);
11943 	}
11944 
11945 	available_nblocks = (size_t)(nblocks - blocknum);
11946 	ASSERT(nblocks >= blocknum);
11947 
11948 	if (requested_nblocks > available_nblocks) {
11949 		size_t resid;
11950 
11951 		/*
11952 		 * Allocate an "overrun" buf to allow the request to proceed
11953 		 * for the amount of space available in the partition. The
11954 		 * amount not transferred will be added into the b_resid
11955 		 * when the operation is complete. The overrun buf
11956 		 * replaces the original buf here, and the original buf
11957 		 * is saved inside the overrun buf, for later use.
11958 		 */
11959 		if ((!NOT_DEVBSIZE(un)) && is_aligned) {
11960 			resid = SD_TGTBLOCKS2BYTES(un,
11961 			    (offset_t)(requested_nblocks - available_nblocks));
11962 		} else {
11963 			resid = SD_SYSBLOCKS2BYTES(
11964 			    (offset_t)(requested_nblocks - available_nblocks));
11965 		}
11966 
11967 		size_t count = bp->b_bcount - resid;
11968 		/*
11969 		 * Note: count is an unsigned entity thus it'll NEVER
11970 		 * be less than 0 so ASSERT the original values are
11971 		 * correct.
11972 		 */
11973 		ASSERT(bp->b_bcount >= resid);
11974 
11975 		bp = sd_bioclone_alloc(bp, count, blocknum,
11976 		    (int (*)(struct buf *))(uintptr_t)sd_mapblockaddr_iodone);
11977 		xp = SD_GET_XBUF(bp); /* Update for 'new' bp! */
11978 		ASSERT(xp != NULL);
11979 	}
11980 
11981 	/* At this point there should be no residual for this buf. */
11982 	ASSERT(bp->b_resid == 0);
11983 
11984 	/* Convert the block number to an absolute address. */
11985 	xp->xb_blkno += partition_offset;
11986 
11987 	SD_NEXT_IOSTART(index, un, bp);
11988 
11989 	SD_TRACE(SD_LOG_IO_PARTITION, un,
11990 	    "sd_mapblockaddr_iostart: exit 0: buf:0x%p\n", bp);
11991 
11992 	return;
11993 
11994 error_exit:
11995 	bp->b_resid = bp->b_bcount;
11996 	SD_BEGIN_IODONE(index, un, bp);
11997 	SD_TRACE(SD_LOG_IO_PARTITION, un,
11998 	    "sd_mapblockaddr_iostart: exit 1: buf:0x%p\n", bp);
11999 }
12000 
12001 
12002 /*
12003  *    Function: sd_mapblockaddr_iodone
12004  *
12005  * Description: Completion-side processing for partition management.
12006  *
12007  *     Context: May be called under interrupt context
12008  */
12009 
12010 static void
sd_mapblockaddr_iodone(int index,struct sd_lun * un,struct buf * bp)12011 sd_mapblockaddr_iodone(int index, struct sd_lun *un, struct buf *bp)
12012 {
12013 	/* int	partition; */	/* Not used, see below. */
12014 	ASSERT(un != NULL);
12015 	ASSERT(bp != NULL);
12016 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12017 
12018 	SD_TRACE(SD_LOG_IO_PARTITION, un,
12019 	    "sd_mapblockaddr_iodone: entry: buf:0x%p\n", bp);
12020 
12021 	if ((uintptr_t)bp->b_iodone == (uintptr_t)sd_mapblockaddr_iodone) {
12022 		/*
12023 		 * We have an "overrun" buf to deal with...
12024 		 */
12025 		struct sd_xbuf	*xp;
12026 		struct buf	*obp;	/* ptr to the original buf */
12027 
12028 		xp = SD_GET_XBUF(bp);
12029 		ASSERT(xp != NULL);
12030 
12031 		/* Retrieve the pointer to the original buf */
12032 		obp = (struct buf *)xp->xb_private;
12033 		ASSERT(obp != NULL);
12034 
12035 		obp->b_resid = obp->b_bcount - (bp->b_bcount - bp->b_resid);
12036 		bioerror(obp, bp->b_error);
12037 
12038 		sd_bioclone_free(bp);
12039 
12040 		/*
12041 		 * Get back the original buf.
12042 		 * Note that since the restoration of xb_blkno below
12043 		 * was removed, the sd_xbuf is not needed.
12044 		 */
12045 		bp = obp;
12046 		/*
12047 		 * xp = SD_GET_XBUF(bp);
12048 		 * ASSERT(xp != NULL);
12049 		 */
12050 	}
12051 
12052 	/*
12053 	 * Convert sd->xb_blkno back to a minor-device relative value.
12054 	 * Note: this has been commented out, as it is not needed in the
12055 	 * current implementation of the driver (ie, since this function
12056 	 * is at the top of the layering chains, so the info will be
12057 	 * discarded) and it is in the "hot" IO path.
12058 	 *
12059 	 * partition = getminor(bp->b_edev) & SDPART_MASK;
12060 	 * xp->xb_blkno -= un->un_offset[partition];
12061 	 */
12062 
12063 	SD_NEXT_IODONE(index, un, bp);
12064 
12065 	SD_TRACE(SD_LOG_IO_PARTITION, un,
12066 	    "sd_mapblockaddr_iodone: exit: buf:0x%p\n", bp);
12067 }
12068 
12069 
12070 /*
12071  *    Function: sd_mapblocksize_iostart
12072  *
12073  * Description: Convert between system block size (un->un_sys_blocksize)
12074  *		and target block size (un->un_tgt_blocksize).
12075  *
12076  *     Context: Can sleep to allocate resources.
12077  *
12078  * Assumptions: A higher layer has already performed any partition validation,
12079  *		and converted the xp->xb_blkno to an absolute value relative
12080  *		to the start of the device.
12081  *
12082  *		It is also assumed that the higher layer has implemented
12083  *		an "overrun" mechanism for the case where the request would
12084  *		read/write beyond the end of a partition.  In this case we
12085  *		assume (and ASSERT) that bp->b_resid == 0.
12086  *
12087  *		Note: The implementation for this routine assumes the target
12088  *		block size remains constant between allocation and transport.
12089  */
12090 
12091 static void
sd_mapblocksize_iostart(int index,struct sd_lun * un,struct buf * bp)12092 sd_mapblocksize_iostart(int index, struct sd_lun *un, struct buf *bp)
12093 {
12094 	struct sd_mapblocksize_info	*bsp;
12095 	struct sd_xbuf			*xp;
12096 	offset_t first_byte;
12097 	daddr_t	start_block, end_block;
12098 	daddr_t	request_bytes;
12099 	ushort_t is_aligned = FALSE;
12100 
12101 	ASSERT(un != NULL);
12102 	ASSERT(bp != NULL);
12103 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12104 	ASSERT(bp->b_resid == 0);
12105 
12106 	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
12107 	    "sd_mapblocksize_iostart: entry: buf:0x%p\n", bp);
12108 
12109 	/*
12110 	 * For a non-writable CD, a write request is an error
12111 	 */
12112 	if (ISCD(un) && ((bp->b_flags & B_READ) == 0) &&
12113 	    (un->un_f_mmc_writable_media == FALSE)) {
12114 		bioerror(bp, EIO);
12115 		bp->b_resid = bp->b_bcount;
12116 		SD_BEGIN_IODONE(index, un, bp);
12117 		return;
12118 	}
12119 
12120 	/*
12121 	 * We do not need a shadow buf if the device is using
12122 	 * un->un_sys_blocksize as its block size or if bcount == 0.
12123 	 * In this case there is no layer-private data block allocated.
12124 	 */
12125 	if ((un->un_tgt_blocksize == DEV_BSIZE && !un->un_f_enable_rmw) ||
12126 	    (bp->b_bcount == 0)) {
12127 		goto done;
12128 	}
12129 
12130 #if defined(__x86)
12131 	/* We do not support non-block-aligned transfers for ROD devices */
12132 	ASSERT(!ISROD(un));
12133 #endif
12134 
12135 	xp = SD_GET_XBUF(bp);
12136 	ASSERT(xp != NULL);
12137 
12138 	SD_INFO(SD_LOG_IO_RMMEDIA, un, "sd_mapblocksize_iostart: "
12139 	    "tgt_blocksize:0x%x sys_blocksize: 0x%x\n",
12140 	    un->un_tgt_blocksize, DEV_BSIZE);
12141 	SD_INFO(SD_LOG_IO_RMMEDIA, un, "sd_mapblocksize_iostart: "
12142 	    "request start block:0x%x\n", xp->xb_blkno);
12143 	SD_INFO(SD_LOG_IO_RMMEDIA, un, "sd_mapblocksize_iostart: "
12144 	    "request len:0x%x\n", bp->b_bcount);
12145 
12146 	/*
12147 	 * Allocate the layer-private data area for the mapblocksize layer.
12148 	 * Layers are allowed to use the xp_private member of the sd_xbuf
12149 	 * struct to store the pointer to their layer-private data block, but
12150 	 * each layer also has the responsibility of restoring the prior
12151 	 * contents of xb_private before returning the buf/xbuf to the
12152 	 * higher layer that sent it.
12153 	 *
12154 	 * Here we save the prior contents of xp->xb_private into the
12155 	 * bsp->mbs_oprivate field of our layer-private data area. This value
12156 	 * is restored by sd_mapblocksize_iodone() just prior to freeing up
12157 	 * the layer-private area and returning the buf/xbuf to the layer
12158 	 * that sent it.
12159 	 *
12160 	 * Note that here we use kmem_zalloc for the allocation as there are
12161 	 * parts of the mapblocksize code that expect certain fields to be
12162 	 * zero unless explicitly set to a required value.
12163 	 */
12164 	bsp = kmem_zalloc(sizeof (struct sd_mapblocksize_info), KM_SLEEP);
12165 	bsp->mbs_oprivate = xp->xb_private;
12166 	xp->xb_private = bsp;
12167 
12168 	/*
12169 	 * This treats the data on the disk (target) as an array of bytes.
12170 	 * first_byte is the byte offset, from the beginning of the device,
12171 	 * to the location of the request. This is converted from a
12172 	 * un->un_sys_blocksize block address to a byte offset, and then back
12173 	 * to a block address based upon a un->un_tgt_blocksize block size.
12174 	 *
12175 	 * xp->xb_blkno should be absolute upon entry into this function,
12176 	 * but, but it is based upon partitions that use the "system"
12177 	 * block size. It must be adjusted to reflect the block size of
12178 	 * the target.
12179 	 *
12180 	 * Note that end_block is actually the block that follows the last
12181 	 * block of the request, but that's what is needed for the computation.
12182 	 */
12183 	first_byte  = SD_SYSBLOCKS2BYTES((offset_t)xp->xb_blkno);
12184 	if (un->un_f_enable_rmw) {
12185 		start_block = xp->xb_blkno =
12186 		    (first_byte / un->un_phy_blocksize) *
12187 		    (un->un_phy_blocksize / DEV_BSIZE);
12188 		end_block   = ((first_byte + bp->b_bcount +
12189 		    un->un_phy_blocksize - 1) / un->un_phy_blocksize) *
12190 		    (un->un_phy_blocksize / DEV_BSIZE);
12191 	} else {
12192 		start_block = xp->xb_blkno = first_byte / un->un_tgt_blocksize;
12193 		end_block   = (first_byte + bp->b_bcount +
12194 		    un->un_tgt_blocksize - 1) / un->un_tgt_blocksize;
12195 	}
12196 
12197 	/* request_bytes is rounded up to a multiple of the target block size */
12198 	request_bytes = (end_block - start_block) * un->un_tgt_blocksize;
12199 
12200 	/*
12201 	 * See if the starting address of the request and the request
12202 	 * length are aligned on a un->un_tgt_blocksize boundary. If aligned
12203 	 * then we do not need to allocate a shadow buf to handle the request.
12204 	 */
12205 	if (un->un_f_enable_rmw) {
12206 		if (((first_byte % un->un_phy_blocksize) == 0) &&
12207 		    ((bp->b_bcount % un->un_phy_blocksize) == 0)) {
12208 			is_aligned = TRUE;
12209 		}
12210 	} else {
12211 		if (((first_byte % un->un_tgt_blocksize) == 0) &&
12212 		    ((bp->b_bcount % un->un_tgt_blocksize) == 0)) {
12213 			is_aligned = TRUE;
12214 		}
12215 	}
12216 
12217 	if ((bp->b_flags & B_READ) == 0) {
12218 		/*
12219 		 * Lock the range for a write operation. An aligned request is
12220 		 * considered a simple write; otherwise the request must be a
12221 		 * read-modify-write.
12222 		 */
12223 		bsp->mbs_wmp = sd_range_lock(un, start_block, end_block - 1,
12224 		    (is_aligned == TRUE) ? SD_WTYPE_SIMPLE : SD_WTYPE_RMW);
12225 	}
12226 
12227 	/*
12228 	 * Alloc a shadow buf if the request is not aligned. Also, this is
12229 	 * where the READ command is generated for a read-modify-write. (The
12230 	 * write phase is deferred until after the read completes.)
12231 	 */
12232 	if (is_aligned == FALSE) {
12233 
12234 		struct sd_mapblocksize_info	*shadow_bsp;
12235 		struct sd_xbuf	*shadow_xp;
12236 		struct buf	*shadow_bp;
12237 
12238 		/*
12239 		 * Allocate the shadow buf and it associated xbuf. Note that
12240 		 * after this call the xb_blkno value in both the original
12241 		 * buf's sd_xbuf _and_ the shadow buf's sd_xbuf will be the
12242 		 * same: absolute relative to the start of the device, and
12243 		 * adjusted for the target block size. The b_blkno in the
12244 		 * shadow buf will also be set to this value. We should never
12245 		 * change b_blkno in the original bp however.
12246 		 *
12247 		 * Note also that the shadow buf will always need to be a
12248 		 * READ command, regardless of whether the incoming command
12249 		 * is a READ or a WRITE.
12250 		 */
12251 		shadow_bp = sd_shadow_buf_alloc(bp, request_bytes, B_READ,
12252 		    xp->xb_blkno,
12253 		    (int (*)(struct buf *))(uintptr_t)sd_mapblocksize_iodone);
12254 
12255 		shadow_xp = SD_GET_XBUF(shadow_bp);
12256 
12257 		/*
12258 		 * Allocate the layer-private data for the shadow buf.
12259 		 * (No need to preserve xb_private in the shadow xbuf.)
12260 		 */
12261 		shadow_xp->xb_private = shadow_bsp =
12262 		    kmem_zalloc(sizeof (struct sd_mapblocksize_info), KM_SLEEP);
12263 
12264 		/*
12265 		 * bsp->mbs_copy_offset is used later by sd_mapblocksize_iodone
12266 		 * to figure out where the start of the user data is (based upon
12267 		 * the system block size) in the data returned by the READ
12268 		 * command (which will be based upon the target blocksize). Note
12269 		 * that this is only really used if the request is unaligned.
12270 		 */
12271 		if (un->un_f_enable_rmw) {
12272 			bsp->mbs_copy_offset = (ssize_t)(first_byte -
12273 			    ((offset_t)xp->xb_blkno * un->un_sys_blocksize));
12274 			ASSERT((bsp->mbs_copy_offset >= 0) &&
12275 			    (bsp->mbs_copy_offset < un->un_phy_blocksize));
12276 		} else {
12277 			bsp->mbs_copy_offset = (ssize_t)(first_byte -
12278 			    ((offset_t)xp->xb_blkno * un->un_tgt_blocksize));
12279 			ASSERT((bsp->mbs_copy_offset >= 0) &&
12280 			    (bsp->mbs_copy_offset < un->un_tgt_blocksize));
12281 		}
12282 
12283 		shadow_bsp->mbs_copy_offset = bsp->mbs_copy_offset;
12284 
12285 		shadow_bsp->mbs_layer_index = bsp->mbs_layer_index = index;
12286 
12287 		/* Transfer the wmap (if any) to the shadow buf */
12288 		shadow_bsp->mbs_wmp = bsp->mbs_wmp;
12289 		bsp->mbs_wmp = NULL;
12290 
12291 		/*
12292 		 * The shadow buf goes on from here in place of the
12293 		 * original buf.
12294 		 */
12295 		shadow_bsp->mbs_orig_bp = bp;
12296 		bp = shadow_bp;
12297 	}
12298 
12299 	SD_INFO(SD_LOG_IO_RMMEDIA, un,
12300 	    "sd_mapblocksize_iostart: tgt start block:0x%x\n", xp->xb_blkno);
12301 	SD_INFO(SD_LOG_IO_RMMEDIA, un,
12302 	    "sd_mapblocksize_iostart: tgt request len:0x%x\n",
12303 	    request_bytes);
12304 	SD_INFO(SD_LOG_IO_RMMEDIA, un,
12305 	    "sd_mapblocksize_iostart: shadow buf:0x%x\n", bp);
12306 
12307 done:
12308 	SD_NEXT_IOSTART(index, un, bp);
12309 
12310 	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
12311 	    "sd_mapblocksize_iostart: exit: buf:0x%p\n", bp);
12312 }
12313 
12314 
12315 /*
12316  *    Function: sd_mapblocksize_iodone
12317  *
12318  * Description: Completion side processing for block-size mapping.
12319  *
12320  *     Context: May be called under interrupt context
12321  */
12322 
12323 static void
sd_mapblocksize_iodone(int index,struct sd_lun * un,struct buf * bp)12324 sd_mapblocksize_iodone(int index, struct sd_lun *un, struct buf *bp)
12325 {
12326 	struct sd_mapblocksize_info	*bsp;
12327 	struct sd_xbuf	*xp;
12328 	struct sd_xbuf	*orig_xp;	/* sd_xbuf for the original buf */
12329 	struct buf	*orig_bp;	/* ptr to the original buf */
12330 	offset_t	shadow_end;
12331 	offset_t	request_end;
12332 	offset_t	shadow_start;
12333 	ssize_t		copy_offset;
12334 	size_t		copy_length;
12335 	size_t		shortfall;
12336 	uint_t		is_write;	/* TRUE if this bp is a WRITE */
12337 	uint_t		has_wmap;	/* TRUE is this bp has a wmap */
12338 
12339 	ASSERT(un != NULL);
12340 	ASSERT(bp != NULL);
12341 
12342 	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
12343 	    "sd_mapblocksize_iodone: entry: buf:0x%p\n", bp);
12344 
12345 	/*
12346 	 * There is no shadow buf or layer-private data if the target is
12347 	 * using un->un_sys_blocksize as its block size or if bcount == 0.
12348 	 */
12349 	if ((un->un_tgt_blocksize == DEV_BSIZE && !un->un_f_enable_rmw) ||
12350 	    (bp->b_bcount == 0)) {
12351 		goto exit;
12352 	}
12353 
12354 	xp = SD_GET_XBUF(bp);
12355 	ASSERT(xp != NULL);
12356 
12357 	/* Retrieve the pointer to the layer-private data area from the xbuf. */
12358 	bsp = xp->xb_private;
12359 
12360 	is_write = ((bp->b_flags & B_READ) == 0) ? TRUE : FALSE;
12361 	has_wmap = (bsp->mbs_wmp != NULL) ? TRUE : FALSE;
12362 
12363 	if (is_write) {
12364 		/*
12365 		 * For a WRITE request we must free up the block range that
12366 		 * we have locked up.  This holds regardless of whether this is
12367 		 * an aligned write request or a read-modify-write request.
12368 		 */
12369 		sd_range_unlock(un, bsp->mbs_wmp);
12370 		bsp->mbs_wmp = NULL;
12371 	}
12372 
12373 	if ((uintptr_t)bp->b_iodone != (uintptr_t)sd_mapblocksize_iodone) {
12374 		/*
12375 		 * An aligned read or write command will have no shadow buf;
12376 		 * there is not much else to do with it.
12377 		 */
12378 		goto done;
12379 	}
12380 
12381 	orig_bp = bsp->mbs_orig_bp;
12382 	ASSERT(orig_bp != NULL);
12383 	orig_xp = SD_GET_XBUF(orig_bp);
12384 	ASSERT(orig_xp != NULL);
12385 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12386 
12387 	if (!is_write && has_wmap) {
12388 		/*
12389 		 * A READ with a wmap means this is the READ phase of a
12390 		 * read-modify-write. If an error occurred on the READ then
12391 		 * we do not proceed with the WRITE phase or copy any data.
12392 		 * Just release the write maps and return with an error.
12393 		 */
12394 		if ((bp->b_resid != 0) || (bp->b_error != 0)) {
12395 			orig_bp->b_resid = orig_bp->b_bcount;
12396 			bioerror(orig_bp, bp->b_error);
12397 			sd_range_unlock(un, bsp->mbs_wmp);
12398 			goto freebuf_done;
12399 		}
12400 	}
12401 
12402 	/*
12403 	 * Here is where we set up to copy the data from the shadow buf
12404 	 * into the space associated with the original buf.
12405 	 *
12406 	 * To deal with the conversion between block sizes, these
12407 	 * computations treat the data as an array of bytes, with the
12408 	 * first byte (byte 0) corresponding to the first byte in the
12409 	 * first block on the disk.
12410 	 */
12411 
12412 	/*
12413 	 * shadow_start and shadow_len indicate the location and size of
12414 	 * the data returned with the shadow IO request.
12415 	 */
12416 	if (un->un_f_enable_rmw) {
12417 		shadow_start  = SD_SYSBLOCKS2BYTES((offset_t)xp->xb_blkno);
12418 	} else {
12419 		shadow_start  = SD_TGTBLOCKS2BYTES(un, (offset_t)xp->xb_blkno);
12420 	}
12421 	shadow_end    = shadow_start + bp->b_bcount - bp->b_resid;
12422 
12423 	/*
12424 	 * copy_offset gives the offset (in bytes) from the start of the first
12425 	 * block of the READ request to the beginning of the data.  We retrieve
12426 	 * this value from xb_pktp in the ORIGINAL xbuf, as it has been saved
12427 	 * there by sd_mapblockize_iostart(). copy_length gives the amount of
12428 	 * data to be copied (in bytes).
12429 	 */
12430 	copy_offset  = bsp->mbs_copy_offset;
12431 	if (un->un_f_enable_rmw) {
12432 		ASSERT((copy_offset >= 0) &&
12433 		    (copy_offset < un->un_phy_blocksize));
12434 	} else {
12435 		ASSERT((copy_offset >= 0) &&
12436 		    (copy_offset < un->un_tgt_blocksize));
12437 	}
12438 
12439 	copy_length  = orig_bp->b_bcount;
12440 	request_end  = shadow_start + copy_offset + orig_bp->b_bcount;
12441 
12442 	/*
12443 	 * Set up the resid and error fields of orig_bp as appropriate.
12444 	 */
12445 	if (shadow_end >= request_end) {
12446 		/* We got all the requested data; set resid to zero */
12447 		orig_bp->b_resid = 0;
12448 	} else {
12449 		/*
12450 		 * We failed to get enough data to fully satisfy the original
12451 		 * request. Just copy back whatever data we got and set
12452 		 * up the residual and error code as required.
12453 		 *
12454 		 * 'shortfall' is the amount by which the data received with the
12455 		 * shadow buf has "fallen short" of the requested amount.
12456 		 */
12457 		shortfall = (size_t)(request_end - shadow_end);
12458 
12459 		if (shortfall > orig_bp->b_bcount) {
12460 			/*
12461 			 * We did not get enough data to even partially
12462 			 * fulfill the original request.  The residual is
12463 			 * equal to the amount requested.
12464 			 */
12465 			orig_bp->b_resid = orig_bp->b_bcount;
12466 		} else {
12467 			/*
12468 			 * We did not get all the data that we requested
12469 			 * from the device, but we will try to return what
12470 			 * portion we did get.
12471 			 */
12472 			orig_bp->b_resid = shortfall;
12473 		}
12474 		ASSERT(copy_length >= orig_bp->b_resid);
12475 		copy_length  -= orig_bp->b_resid;
12476 	}
12477 
12478 	/* Propagate the error code from the shadow buf to the original buf */
12479 	bioerror(orig_bp, bp->b_error);
12480 
12481 	if (is_write) {
12482 		goto freebuf_done;	/* No data copying for a WRITE */
12483 	}
12484 
12485 	if (has_wmap) {
12486 		/*
12487 		 * This is a READ command from the READ phase of a
12488 		 * read-modify-write request. We have to copy the data given
12489 		 * by the user OVER the data returned by the READ command,
12490 		 * then convert the command from a READ to a WRITE and send
12491 		 * it back to the target.
12492 		 */
12493 		bcopy(orig_bp->b_un.b_addr, bp->b_un.b_addr + copy_offset,
12494 		    copy_length);
12495 
12496 		bp->b_flags &= ~((int)B_READ);	/* Convert to a WRITE */
12497 
12498 		/*
12499 		 * Dispatch the WRITE command to the taskq thread, which
12500 		 * will in turn send the command to the target. When the
12501 		 * WRITE command completes, we (sd_mapblocksize_iodone())
12502 		 * will get called again as part of the iodone chain
12503 		 * processing for it. Note that we will still be dealing
12504 		 * with the shadow buf at that point.
12505 		 */
12506 		if (taskq_dispatch(sd_wmr_tq, sd_read_modify_write_task, bp,
12507 		    KM_NOSLEEP) != TASKQID_INVALID) {
12508 			/*
12509 			 * Dispatch was successful so we are done. Return
12510 			 * without going any higher up the iodone chain. Do
12511 			 * not free up any layer-private data until after the
12512 			 * WRITE completes.
12513 			 */
12514 			return;
12515 		}
12516 
12517 		/*
12518 		 * Dispatch of the WRITE command failed; set up the error
12519 		 * condition and send this IO back up the iodone chain.
12520 		 */
12521 		bioerror(orig_bp, EIO);
12522 		orig_bp->b_resid = orig_bp->b_bcount;
12523 
12524 	} else {
12525 		/*
12526 		 * This is a regular READ request (ie, not a RMW). Copy the
12527 		 * data from the shadow buf into the original buf. The
12528 		 * copy_offset compensates for any "misalignment" between the
12529 		 * shadow buf (with its un->un_tgt_blocksize blocks) and the
12530 		 * original buf (with its un->un_sys_blocksize blocks).
12531 		 */
12532 		bcopy(bp->b_un.b_addr + copy_offset, orig_bp->b_un.b_addr,
12533 		    copy_length);
12534 	}
12535 
12536 freebuf_done:
12537 
12538 	/*
12539 	 * At this point we still have both the shadow buf AND the original
12540 	 * buf to deal with, as well as the layer-private data area in each.
12541 	 * Local variables are as follows:
12542 	 *
12543 	 * bp -- points to shadow buf
12544 	 * xp -- points to xbuf of shadow buf
12545 	 * bsp -- points to layer-private data area of shadow buf
12546 	 * orig_bp -- points to original buf
12547 	 *
12548 	 * First free the shadow buf and its associated xbuf, then free the
12549 	 * layer-private data area from the shadow buf. There is no need to
12550 	 * restore xb_private in the shadow xbuf.
12551 	 */
12552 	sd_shadow_buf_free(bp);
12553 	kmem_free(bsp, sizeof (struct sd_mapblocksize_info));
12554 
12555 	/*
12556 	 * Now update the local variables to point to the original buf, xbuf,
12557 	 * and layer-private area.
12558 	 */
12559 	bp = orig_bp;
12560 	xp = SD_GET_XBUF(bp);
12561 	ASSERT(xp != NULL);
12562 	ASSERT(xp == orig_xp);
12563 	bsp = xp->xb_private;
12564 	ASSERT(bsp != NULL);
12565 
12566 done:
12567 	/*
12568 	 * Restore xb_private to whatever it was set to by the next higher
12569 	 * layer in the chain, then free the layer-private data area.
12570 	 */
12571 	xp->xb_private = bsp->mbs_oprivate;
12572 	kmem_free(bsp, sizeof (struct sd_mapblocksize_info));
12573 
12574 exit:
12575 	SD_TRACE(SD_LOG_IO_RMMEDIA, SD_GET_UN(bp),
12576 	    "sd_mapblocksize_iodone: calling SD_NEXT_IODONE: buf:0x%p\n", bp);
12577 
12578 	SD_NEXT_IODONE(index, un, bp);
12579 }
12580 
12581 
12582 /*
12583  *    Function: sd_checksum_iostart
12584  *
12585  * Description: A stub function for a layer that's currently not used.
12586  *		For now just a placeholder.
12587  *
12588  *     Context: Kernel thread context
12589  */
12590 
12591 static void
sd_checksum_iostart(int index,struct sd_lun * un,struct buf * bp)12592 sd_checksum_iostart(int index, struct sd_lun *un, struct buf *bp)
12593 {
12594 	ASSERT(un != NULL);
12595 	ASSERT(bp != NULL);
12596 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12597 	SD_NEXT_IOSTART(index, un, bp);
12598 }
12599 
12600 
12601 /*
12602  *    Function: sd_checksum_iodone
12603  *
12604  * Description: A stub function for a layer that's currently not used.
12605  *		For now just a placeholder.
12606  *
12607  *     Context: May be called under interrupt context
12608  */
12609 
12610 static void
sd_checksum_iodone(int index,struct sd_lun * un,struct buf * bp)12611 sd_checksum_iodone(int index, struct sd_lun *un, struct buf *bp)
12612 {
12613 	ASSERT(un != NULL);
12614 	ASSERT(bp != NULL);
12615 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12616 	SD_NEXT_IODONE(index, un, bp);
12617 }
12618 
12619 
12620 /*
12621  *    Function: sd_checksum_uscsi_iostart
12622  *
12623  * Description: A stub function for a layer that's currently not used.
12624  *		For now just a placeholder.
12625  *
12626  *     Context: Kernel thread context
12627  */
12628 
12629 static void
sd_checksum_uscsi_iostart(int index,struct sd_lun * un,struct buf * bp)12630 sd_checksum_uscsi_iostart(int index, struct sd_lun *un, struct buf *bp)
12631 {
12632 	ASSERT(un != NULL);
12633 	ASSERT(bp != NULL);
12634 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12635 	SD_NEXT_IOSTART(index, un, bp);
12636 }
12637 
12638 
12639 /*
12640  *    Function: sd_checksum_uscsi_iodone
12641  *
12642  * Description: A stub function for a layer that's currently not used.
12643  *		For now just a placeholder.
12644  *
12645  *     Context: May be called under interrupt context
12646  */
12647 
12648 static void
sd_checksum_uscsi_iodone(int index,struct sd_lun * un,struct buf * bp)12649 sd_checksum_uscsi_iodone(int index, struct sd_lun *un, struct buf *bp)
12650 {
12651 	ASSERT(un != NULL);
12652 	ASSERT(bp != NULL);
12653 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12654 	SD_NEXT_IODONE(index, un, bp);
12655 }
12656 
12657 
12658 /*
12659  *    Function: sd_pm_iostart
12660  *
12661  * Description: iostart-side routine for Power mangement.
12662  *
12663  *     Context: Kernel thread context
12664  */
12665 
12666 static void
sd_pm_iostart(int index,struct sd_lun * un,struct buf * bp)12667 sd_pm_iostart(int index, struct sd_lun *un, struct buf *bp)
12668 {
12669 	ASSERT(un != NULL);
12670 	ASSERT(bp != NULL);
12671 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12672 	ASSERT(!mutex_owned(&un->un_pm_mutex));
12673 
12674 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iostart: entry\n");
12675 
12676 	if (sd_pm_entry(un) != DDI_SUCCESS) {
12677 		/*
12678 		 * Set up to return the failed buf back up the 'iodone'
12679 		 * side of the calling chain.
12680 		 */
12681 		bioerror(bp, EIO);
12682 		bp->b_resid = bp->b_bcount;
12683 
12684 		SD_BEGIN_IODONE(index, un, bp);
12685 
12686 		SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iostart: exit\n");
12687 		return;
12688 	}
12689 
12690 	SD_NEXT_IOSTART(index, un, bp);
12691 
12692 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iostart: exit\n");
12693 }
12694 
12695 
12696 /*
12697  *    Function: sd_pm_iodone
12698  *
12699  * Description: iodone-side routine for power mangement.
12700  *
12701  *     Context: may be called from interrupt context
12702  */
12703 
12704 static void
sd_pm_iodone(int index,struct sd_lun * un,struct buf * bp)12705 sd_pm_iodone(int index, struct sd_lun *un, struct buf *bp)
12706 {
12707 	ASSERT(un != NULL);
12708 	ASSERT(bp != NULL);
12709 	ASSERT(!mutex_owned(&un->un_pm_mutex));
12710 
12711 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iodone: entry\n");
12712 
12713 	/*
12714 	 * After attach the following flag is only read, so don't
12715 	 * take the penalty of acquiring a mutex for it.
12716 	 */
12717 	if (un->un_f_pm_is_enabled == TRUE) {
12718 		sd_pm_exit(un);
12719 	}
12720 
12721 	SD_NEXT_IODONE(index, un, bp);
12722 
12723 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iodone: exit\n");
12724 }
12725 
12726 
12727 /*
12728  *    Function: sd_core_iostart
12729  *
12730  * Description: Primary driver function for enqueuing buf(9S) structs from
12731  *		the system and initiating IO to the target device
12732  *
12733  *     Context: Kernel thread context. Can sleep.
12734  *
12735  * Assumptions:  - The given xp->xb_blkno is absolute
12736  *		   (ie, relative to the start of the device).
12737  *		 - The IO is to be done using the native blocksize of
12738  *		   the device, as specified in un->un_tgt_blocksize.
12739  */
12740 /* ARGSUSED */
12741 static void
sd_core_iostart(int index,struct sd_lun * un,struct buf * bp)12742 sd_core_iostart(int index, struct sd_lun *un, struct buf *bp)
12743 {
12744 	struct sd_xbuf *xp;
12745 
12746 	ASSERT(un != NULL);
12747 	ASSERT(bp != NULL);
12748 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12749 	ASSERT(bp->b_resid == 0);
12750 
12751 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_core_iostart: entry: bp:0x%p\n", bp);
12752 
12753 	xp = SD_GET_XBUF(bp);
12754 	ASSERT(xp != NULL);
12755 
12756 	mutex_enter(SD_MUTEX(un));
12757 
12758 	/*
12759 	 * If we are currently in the failfast state, fail any new IO
12760 	 * that has B_FAILFAST set, then return.
12761 	 */
12762 	if ((bp->b_flags & B_FAILFAST) &&
12763 	    (un->un_failfast_state == SD_FAILFAST_ACTIVE)) {
12764 		mutex_exit(SD_MUTEX(un));
12765 		bioerror(bp, EIO);
12766 		bp->b_resid = bp->b_bcount;
12767 		SD_BEGIN_IODONE(index, un, bp);
12768 		return;
12769 	}
12770 
12771 	if (SD_IS_DIRECT_PRIORITY(xp)) {
12772 		/*
12773 		 * Priority command -- transport it immediately.
12774 		 *
12775 		 * Note: We may want to assert that USCSI_DIAGNOSE is set,
12776 		 * because all direct priority commands should be associated
12777 		 * with error recovery actions which we don't want to retry.
12778 		 */
12779 		sd_start_cmds(un, bp);
12780 	} else {
12781 		/*
12782 		 * Normal command -- add it to the wait queue, then start
12783 		 * transporting commands from the wait queue.
12784 		 */
12785 		sd_add_buf_to_waitq(un, bp);
12786 		SD_UPDATE_KSTATS(un, kstat_waitq_enter, bp);
12787 		sd_start_cmds(un, NULL);
12788 	}
12789 
12790 	mutex_exit(SD_MUTEX(un));
12791 
12792 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_core_iostart: exit: bp:0x%p\n", bp);
12793 }
12794 
12795 
12796 /*
12797  *    Function: sd_init_cdb_limits
12798  *
12799  * Description: This is to handle scsi_pkt initialization differences
12800  *		between the driver platforms.
12801  *
12802  *		Legacy behaviors:
12803  *
12804  *		If the block number or the sector count exceeds the
12805  *		capabilities of a Group 0 command, shift over to a
12806  *		Group 1 command. We don't blindly use Group 1
12807  *		commands because a) some drives (CDC Wren IVs) get a
12808  *		bit confused, and b) there is probably a fair amount
12809  *		of speed difference for a target to receive and decode
12810  *		a 10 byte command instead of a 6 byte command.
12811  *
12812  *		The xfer time difference of 6 vs 10 byte CDBs is
12813  *		still significant so this code is still worthwhile.
12814  *		10 byte CDBs are very inefficient with the fas HBA driver
12815  *		and older disks. Each CDB byte took 1 usec with some
12816  *		popular disks.
12817  *
12818  *     Context: Must be called at attach time
12819  */
12820 
12821 static void
sd_init_cdb_limits(struct sd_lun * un)12822 sd_init_cdb_limits(struct sd_lun *un)
12823 {
12824 	int hba_cdb_limit;
12825 
12826 	/*
12827 	 * Use CDB_GROUP1 commands for most devices except for
12828 	 * parallel SCSI fixed drives in which case we get better
12829 	 * performance using CDB_GROUP0 commands (where applicable).
12830 	 */
12831 	un->un_mincdb = SD_CDB_GROUP1;
12832 	if (!un->un_f_is_fibre && !un->un_f_cfg_is_atapi && !ISROD(un) &&
12833 	    !un->un_f_has_removable_media) {
12834 		un->un_mincdb = SD_CDB_GROUP0;
12835 	}
12836 
12837 	/*
12838 	 * Try to read the max-cdb-length supported by HBA.
12839 	 */
12840 	un->un_max_hba_cdb = scsi_ifgetcap(SD_ADDRESS(un), "max-cdb-length", 1);
12841 	if (0 >= un->un_max_hba_cdb) {
12842 		un->un_max_hba_cdb = CDB_GROUP4;
12843 		hba_cdb_limit = SD_CDB_GROUP4;
12844 	} else if (0 < un->un_max_hba_cdb &&
12845 	    un->un_max_hba_cdb < CDB_GROUP1) {
12846 		hba_cdb_limit = SD_CDB_GROUP0;
12847 	} else if (CDB_GROUP1 <= un->un_max_hba_cdb &&
12848 	    un->un_max_hba_cdb < CDB_GROUP5) {
12849 		hba_cdb_limit = SD_CDB_GROUP1;
12850 	} else if (CDB_GROUP5 <= un->un_max_hba_cdb &&
12851 	    un->un_max_hba_cdb < CDB_GROUP4) {
12852 		hba_cdb_limit = SD_CDB_GROUP5;
12853 	} else {
12854 		hba_cdb_limit = SD_CDB_GROUP4;
12855 	}
12856 
12857 	/*
12858 	 * Use CDB_GROUP5 commands for removable devices.  Use CDB_GROUP4
12859 	 * commands for fixed disks.
12860 	 */
12861 	un->un_maxcdb = (un->un_f_has_removable_media) ? SD_CDB_GROUP5 :
12862 	    min(hba_cdb_limit, SD_CDB_GROUP4);
12863 
12864 	un->un_status_len = (int)((un->un_f_arq_enabled == TRUE)
12865 	    ? sizeof (struct scsi_arq_status) : 1);
12866 	if (!ISCD(un))
12867 		un->un_cmd_timeout = (ushort_t)sd_io_time;
12868 	un->un_uscsi_timeout = ((ISCD(un)) ? 2 : 1) * un->un_cmd_timeout;
12869 }
12870 
12871 
12872 /*
12873  *    Function: sd_initpkt_for_buf
12874  *
12875  * Description: Allocate and initialize for transport a scsi_pkt struct,
12876  *		based upon the info specified in the given buf struct.
12877  *
12878  *		Assumes the xb_blkno in the request is absolute (ie,
12879  *		relative to the start of the device (NOT partition!).
12880  *		Also assumes that the request is using the native block
12881  *		size of the device (as returned by the READ CAPACITY
12882  *		command).
12883  *
12884  * Return Code: SD_PKT_ALLOC_SUCCESS
12885  *		SD_PKT_ALLOC_FAILURE
12886  *		SD_PKT_ALLOC_FAILURE_NO_DMA
12887  *		SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL
12888  *
12889  *     Context: Kernel thread and may be called from software interrupt context
12890  *		as part of a sdrunout callback. This function may not block or
12891  *		call routines that block
12892  */
12893 
12894 static int
sd_initpkt_for_buf(struct buf * bp,struct scsi_pkt ** pktpp)12895 sd_initpkt_for_buf(struct buf *bp, struct scsi_pkt **pktpp)
12896 {
12897 	struct sd_xbuf	*xp;
12898 	struct scsi_pkt *pktp = NULL;
12899 	struct sd_lun	*un;
12900 	size_t		blockcount;
12901 	daddr_t		startblock;
12902 	int		rval;
12903 	int		cmd_flags;
12904 
12905 	ASSERT(bp != NULL);
12906 	ASSERT(pktpp != NULL);
12907 	xp = SD_GET_XBUF(bp);
12908 	ASSERT(xp != NULL);
12909 	un = SD_GET_UN(bp);
12910 	ASSERT(un != NULL);
12911 	ASSERT(mutex_owned(SD_MUTEX(un)));
12912 	ASSERT(bp->b_resid == 0);
12913 
12914 	SD_TRACE(SD_LOG_IO_CORE, un,
12915 	    "sd_initpkt_for_buf: entry: buf:0x%p\n", bp);
12916 
12917 	mutex_exit(SD_MUTEX(un));
12918 
12919 #if defined(__x86)	/* DMAFREE for x86 only */
12920 	if (xp->xb_pkt_flags & SD_XB_DMA_FREED) {
12921 		/*
12922 		 * Already have a scsi_pkt -- just need DMA resources.
12923 		 * We must recompute the CDB in case the mapping returns
12924 		 * a nonzero pkt_resid.
12925 		 * Note: if this is a portion of a PKT_DMA_PARTIAL transfer
12926 		 * that is being retried, the unmap/remap of the DMA resouces
12927 		 * will result in the entire transfer starting over again
12928 		 * from the very first block.
12929 		 */
12930 		ASSERT(xp->xb_pktp != NULL);
12931 		pktp = xp->xb_pktp;
12932 	} else {
12933 		pktp = NULL;
12934 	}
12935 #endif /* __x86 */
12936 
12937 	startblock = xp->xb_blkno;	/* Absolute block num. */
12938 	blockcount = SD_BYTES2TGTBLOCKS(un, bp->b_bcount);
12939 
12940 	cmd_flags = un->un_pkt_flags | (xp->xb_pkt_flags & SD_XB_INITPKT_MASK);
12941 
12942 	/*
12943 	 * sd_setup_rw_pkt will determine the appropriate CDB group to use,
12944 	 * call scsi_init_pkt, and build the CDB.
12945 	 */
12946 	rval = sd_setup_rw_pkt(un, &pktp, bp,
12947 	    cmd_flags, sdrunout, (caddr_t)un,
12948 	    startblock, blockcount);
12949 
12950 	if (rval == 0) {
12951 		/*
12952 		 * Success.
12953 		 *
12954 		 * If partial DMA is being used and required for this transfer.
12955 		 * set it up here.
12956 		 */
12957 		if ((un->un_pkt_flags & PKT_DMA_PARTIAL) != 0 &&
12958 		    (pktp->pkt_resid != 0)) {
12959 
12960 			/*
12961 			 * Save the CDB length and pkt_resid for the
12962 			 * next xfer
12963 			 */
12964 			xp->xb_dma_resid = pktp->pkt_resid;
12965 
12966 			/* rezero resid */
12967 			pktp->pkt_resid = 0;
12968 
12969 		} else {
12970 			xp->xb_dma_resid = 0;
12971 		}
12972 
12973 		pktp->pkt_flags = un->un_tagflags;
12974 		pktp->pkt_time  = un->un_cmd_timeout;
12975 		pktp->pkt_comp  = sdintr;
12976 
12977 		pktp->pkt_private = bp;
12978 		*pktpp = pktp;
12979 
12980 		SD_TRACE(SD_LOG_IO_CORE, un,
12981 		    "sd_initpkt_for_buf: exit: buf:0x%p\n", bp);
12982 
12983 #if defined(__x86)	/* DMAFREE for x86 only */
12984 		xp->xb_pkt_flags &= ~SD_XB_DMA_FREED;
12985 #endif
12986 
12987 		mutex_enter(SD_MUTEX(un));
12988 		return (SD_PKT_ALLOC_SUCCESS);
12989 
12990 	}
12991 
12992 	/*
12993 	 * SD_PKT_ALLOC_FAILURE is the only expected failure code
12994 	 * from sd_setup_rw_pkt.
12995 	 */
12996 	ASSERT(rval == SD_PKT_ALLOC_FAILURE);
12997 
12998 	if (rval == SD_PKT_ALLOC_FAILURE) {
12999 		*pktpp = NULL;
13000 		/*
13001 		 * Set the driver state to RWAIT to indicate the driver
13002 		 * is waiting on resource allocations. The driver will not
13003 		 * suspend, pm_suspend, or detatch while the state is RWAIT.
13004 		 */
13005 		mutex_enter(SD_MUTEX(un));
13006 		New_state(un, SD_STATE_RWAIT);
13007 
13008 		SD_ERROR(SD_LOG_IO_CORE, un,
13009 		    "sd_initpkt_for_buf: No pktp. exit bp:0x%p\n", bp);
13010 
13011 		if ((bp->b_flags & B_ERROR) != 0) {
13012 			return (SD_PKT_ALLOC_FAILURE_NO_DMA);
13013 		}
13014 		return (SD_PKT_ALLOC_FAILURE);
13015 	} else {
13016 		/*
13017 		 * PKT_ALLOC_FAILURE_CDB_TOO_SMALL
13018 		 *
13019 		 * This should never happen.  Maybe someone messed with the
13020 		 * kernel's minphys?
13021 		 */
13022 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
13023 		    "Request rejected: too large for CDB: "
13024 		    "lba:0x%08lx  len:0x%08lx\n", startblock, blockcount);
13025 		SD_ERROR(SD_LOG_IO_CORE, un,
13026 		    "sd_initpkt_for_buf: No cp. exit bp:0x%p\n", bp);
13027 		mutex_enter(SD_MUTEX(un));
13028 		return (SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL);
13029 
13030 	}
13031 }
13032 
13033 
13034 /*
13035  *    Function: sd_destroypkt_for_buf
13036  *
13037  * Description: Free the scsi_pkt(9S) for the given bp (buf IO processing).
13038  *
13039  *     Context: Kernel thread or interrupt context
13040  */
13041 
13042 static void
sd_destroypkt_for_buf(struct buf * bp)13043 sd_destroypkt_for_buf(struct buf *bp)
13044 {
13045 	ASSERT(bp != NULL);
13046 	ASSERT(SD_GET_UN(bp) != NULL);
13047 
13048 	SD_TRACE(SD_LOG_IO_CORE, SD_GET_UN(bp),
13049 	    "sd_destroypkt_for_buf: entry: buf:0x%p\n", bp);
13050 
13051 	ASSERT(SD_GET_PKTP(bp) != NULL);
13052 	scsi_destroy_pkt(SD_GET_PKTP(bp));
13053 
13054 	SD_TRACE(SD_LOG_IO_CORE, SD_GET_UN(bp),
13055 	    "sd_destroypkt_for_buf: exit: buf:0x%p\n", bp);
13056 }
13057 
13058 /*
13059  *    Function: sd_setup_rw_pkt
13060  *
13061  * Description: Determines appropriate CDB group for the requested LBA
13062  *		and transfer length, calls scsi_init_pkt, and builds
13063  *		the CDB.  Do not use for partial DMA transfers except
13064  *		for the initial transfer since the CDB size must
13065  *		remain constant.
13066  *
13067  *     Context: Kernel thread and may be called from software interrupt
13068  *		context as part of a sdrunout callback. This function may not
13069  *		block or call routines that block
13070  */
13071 
13072 
13073 int
sd_setup_rw_pkt(struct sd_lun * un,struct scsi_pkt ** pktpp,struct buf * bp,int flags,int (* callback)(caddr_t),caddr_t callback_arg,diskaddr_t lba,uint32_t blockcount)13074 sd_setup_rw_pkt(struct sd_lun *un,
13075     struct scsi_pkt **pktpp, struct buf *bp, int flags,
13076     int (*callback)(caddr_t), caddr_t callback_arg,
13077     diskaddr_t lba, uint32_t blockcount)
13078 {
13079 	struct scsi_pkt *return_pktp;
13080 	union scsi_cdb *cdbp;
13081 	struct sd_cdbinfo *cp = NULL;
13082 	int i;
13083 
13084 	/*
13085 	 * See which size CDB to use, based upon the request.
13086 	 */
13087 	for (i = un->un_mincdb; i <= un->un_maxcdb; i++) {
13088 
13089 		/*
13090 		 * Check lba and block count against sd_cdbtab limits.
13091 		 * In the partial DMA case, we have to use the same size
13092 		 * CDB for all the transfers.  Check lba + blockcount
13093 		 * against the max LBA so we know that segment of the
13094 		 * transfer can use the CDB we select.
13095 		 */
13096 		if ((lba + blockcount - 1 <= sd_cdbtab[i].sc_maxlba) &&
13097 		    (blockcount <= sd_cdbtab[i].sc_maxlen)) {
13098 
13099 			/*
13100 			 * The command will fit into the CDB type
13101 			 * specified by sd_cdbtab[i].
13102 			 */
13103 			cp = sd_cdbtab + i;
13104 
13105 			/*
13106 			 * Call scsi_init_pkt so we can fill in the
13107 			 * CDB.
13108 			 */
13109 			return_pktp = scsi_init_pkt(SD_ADDRESS(un), *pktpp,
13110 			    bp, cp->sc_grpcode, un->un_status_len, 0,
13111 			    flags, callback, callback_arg);
13112 
13113 			if (return_pktp != NULL) {
13114 
13115 				/*
13116 				 * Return new value of pkt
13117 				 */
13118 				*pktpp = return_pktp;
13119 
13120 				/*
13121 				 * To be safe, zero the CDB insuring there is
13122 				 * no leftover data from a previous command.
13123 				 */
13124 				bzero(return_pktp->pkt_cdbp, cp->sc_grpcode);
13125 
13126 				/*
13127 				 * Handle partial DMA mapping
13128 				 */
13129 				if (return_pktp->pkt_resid != 0) {
13130 
13131 					/*
13132 					 * Not going to xfer as many blocks as
13133 					 * originally expected
13134 					 */
13135 					blockcount -=
13136 					    SD_BYTES2TGTBLOCKS(un,
13137 					    return_pktp->pkt_resid);
13138 				}
13139 
13140 				cdbp = (union scsi_cdb *)return_pktp->pkt_cdbp;
13141 
13142 				/*
13143 				 * Set command byte based on the CDB
13144 				 * type we matched.
13145 				 */
13146 				cdbp->scc_cmd = cp->sc_grpmask |
13147 				    ((bp->b_flags & B_READ) ?
13148 				    SCMD_READ : SCMD_WRITE);
13149 
13150 				SD_FILL_SCSI1_LUN(un, return_pktp);
13151 
13152 				/*
13153 				 * Fill in LBA and length
13154 				 */
13155 				ASSERT((cp->sc_grpcode == CDB_GROUP1) ||
13156 				    (cp->sc_grpcode == CDB_GROUP4) ||
13157 				    (cp->sc_grpcode == CDB_GROUP0) ||
13158 				    (cp->sc_grpcode == CDB_GROUP5));
13159 
13160 				if (cp->sc_grpcode == CDB_GROUP1) {
13161 					FORMG1ADDR(cdbp, lba);
13162 					FORMG1COUNT(cdbp, blockcount);
13163 					return (0);
13164 				} else if (cp->sc_grpcode == CDB_GROUP4) {
13165 					FORMG4LONGADDR(cdbp, lba);
13166 					FORMG4COUNT(cdbp, blockcount);
13167 					return (0);
13168 				} else if (cp->sc_grpcode == CDB_GROUP0) {
13169 					FORMG0ADDR(cdbp, lba);
13170 					FORMG0COUNT(cdbp, blockcount);
13171 					return (0);
13172 				} else if (cp->sc_grpcode == CDB_GROUP5) {
13173 					FORMG5ADDR(cdbp, lba);
13174 					FORMG5COUNT(cdbp, blockcount);
13175 					return (0);
13176 				}
13177 
13178 				/*
13179 				 * It should be impossible to not match one
13180 				 * of the CDB types above, so we should never
13181 				 * reach this point.  Set the CDB command byte
13182 				 * to test-unit-ready to avoid writing
13183 				 * to somewhere we don't intend.
13184 				 */
13185 				cdbp->scc_cmd = SCMD_TEST_UNIT_READY;
13186 				return (SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL);
13187 			} else {
13188 				/*
13189 				 * Couldn't get scsi_pkt
13190 				 */
13191 				return (SD_PKT_ALLOC_FAILURE);
13192 			}
13193 		}
13194 	}
13195 
13196 	/*
13197 	 * None of the available CDB types were suitable.  This really
13198 	 * should never happen:  on a 64 bit system we support
13199 	 * READ16/WRITE16 which will hold an entire 64 bit disk address
13200 	 * and on a 32 bit system we will refuse to bind to a device
13201 	 * larger than 2TB so addresses will never be larger than 32 bits.
13202 	 */
13203 	return (SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL);
13204 }
13205 
13206 /*
13207  *    Function: sd_setup_next_rw_pkt
13208  *
13209  * Description: Setup packet for partial DMA transfers, except for the
13210  *		initial transfer.  sd_setup_rw_pkt should be used for
13211  *		the initial transfer.
13212  *
13213  *     Context: Kernel thread and may be called from interrupt context.
13214  */
13215 
13216 int
sd_setup_next_rw_pkt(struct sd_lun * un,struct scsi_pkt * pktp,struct buf * bp,diskaddr_t lba,uint32_t blockcount)13217 sd_setup_next_rw_pkt(struct sd_lun *un,
13218     struct scsi_pkt *pktp, struct buf *bp,
13219     diskaddr_t lba, uint32_t blockcount)
13220 {
13221 	uchar_t com;
13222 	union scsi_cdb *cdbp;
13223 	uchar_t cdb_group_id;
13224 
13225 	ASSERT(pktp != NULL);
13226 	ASSERT(pktp->pkt_cdbp != NULL);
13227 
13228 	cdbp = (union scsi_cdb *)pktp->pkt_cdbp;
13229 	com = cdbp->scc_cmd;
13230 	cdb_group_id = CDB_GROUPID(com);
13231 
13232 	ASSERT((cdb_group_id == CDB_GROUPID_0) ||
13233 	    (cdb_group_id == CDB_GROUPID_1) ||
13234 	    (cdb_group_id == CDB_GROUPID_4) ||
13235 	    (cdb_group_id == CDB_GROUPID_5));
13236 
13237 	/*
13238 	 * Move pkt to the next portion of the xfer.
13239 	 * func is NULL_FUNC so we do not have to release
13240 	 * the disk mutex here.
13241 	 */
13242 	if (scsi_init_pkt(SD_ADDRESS(un), pktp, bp, 0, 0, 0, 0,
13243 	    NULL_FUNC, NULL) == pktp) {
13244 		/* Success.  Handle partial DMA */
13245 		if (pktp->pkt_resid != 0) {
13246 			blockcount -=
13247 			    SD_BYTES2TGTBLOCKS(un, pktp->pkt_resid);
13248 		}
13249 
13250 		cdbp->scc_cmd = com;
13251 		SD_FILL_SCSI1_LUN(un, pktp);
13252 		if (cdb_group_id == CDB_GROUPID_1) {
13253 			FORMG1ADDR(cdbp, lba);
13254 			FORMG1COUNT(cdbp, blockcount);
13255 			return (0);
13256 		} else if (cdb_group_id == CDB_GROUPID_4) {
13257 			FORMG4LONGADDR(cdbp, lba);
13258 			FORMG4COUNT(cdbp, blockcount);
13259 			return (0);
13260 		} else if (cdb_group_id == CDB_GROUPID_0) {
13261 			FORMG0ADDR(cdbp, lba);
13262 			FORMG0COUNT(cdbp, blockcount);
13263 			return (0);
13264 		} else if (cdb_group_id == CDB_GROUPID_5) {
13265 			FORMG5ADDR(cdbp, lba);
13266 			FORMG5COUNT(cdbp, blockcount);
13267 			return (0);
13268 		}
13269 
13270 		/* Unreachable */
13271 		return (SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL);
13272 	}
13273 
13274 	/*
13275 	 * Error setting up next portion of cmd transfer.
13276 	 * Something is definitely very wrong and this
13277 	 * should not happen.
13278 	 */
13279 	return (SD_PKT_ALLOC_FAILURE);
13280 }
13281 
13282 /*
13283  *    Function: sd_initpkt_for_uscsi
13284  *
13285  * Description: Allocate and initialize for transport a scsi_pkt struct,
13286  *		based upon the info specified in the given uscsi_cmd struct.
13287  *
13288  * Return Code: SD_PKT_ALLOC_SUCCESS
13289  *		SD_PKT_ALLOC_FAILURE
13290  *		SD_PKT_ALLOC_FAILURE_NO_DMA
13291  *		SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL
13292  *
13293  *     Context: Kernel thread and may be called from software interrupt context
13294  *		as part of a sdrunout callback. This function may not block or
13295  *		call routines that block
13296  */
13297 
13298 static int
sd_initpkt_for_uscsi(struct buf * bp,struct scsi_pkt ** pktpp)13299 sd_initpkt_for_uscsi(struct buf *bp, struct scsi_pkt **pktpp)
13300 {
13301 	struct uscsi_cmd *uscmd;
13302 	struct sd_xbuf	*xp;
13303 	struct scsi_pkt	*pktp;
13304 	struct sd_lun	*un;
13305 	uint32_t	flags = 0;
13306 
13307 	ASSERT(bp != NULL);
13308 	ASSERT(pktpp != NULL);
13309 	xp = SD_GET_XBUF(bp);
13310 	ASSERT(xp != NULL);
13311 	un = SD_GET_UN(bp);
13312 	ASSERT(un != NULL);
13313 	ASSERT(mutex_owned(SD_MUTEX(un)));
13314 
13315 	/* The pointer to the uscsi_cmd struct is expected in xb_pktinfo */
13316 	uscmd = (struct uscsi_cmd *)xp->xb_pktinfo;
13317 	ASSERT(uscmd != NULL);
13318 
13319 	SD_TRACE(SD_LOG_IO_CORE, un,
13320 	    "sd_initpkt_for_uscsi: entry: buf:0x%p\n", bp);
13321 
13322 	/*
13323 	 * Allocate the scsi_pkt for the command.
13324 	 *
13325 	 * Note: If PKT_DMA_PARTIAL flag is set, scsi_vhci binds a path
13326 	 *	 during scsi_init_pkt time and will continue to use the
13327 	 *	 same path as long as the same scsi_pkt is used without
13328 	 *	 intervening scsi_dmafree(). Since uscsi command does
13329 	 *	 not call scsi_dmafree() before retry failed command, it
13330 	 *	 is necessary to make sure PKT_DMA_PARTIAL flag is NOT
13331 	 *	 set such that scsi_vhci can use other available path for
13332 	 *	 retry. Besides, ucsci command does not allow DMA breakup,
13333 	 *	 so there is no need to set PKT_DMA_PARTIAL flag.
13334 	 *
13335 	 *	 More fundamentally, we can't support breaking up this DMA into
13336 	 *	 multiple windows on x86. There is, in general, no guarantee
13337 	 *	 that arbitrary SCSI commands are idempotent, which is required
13338 	 *	 if we want to use multiple windows for a given command.
13339 	 */
13340 	if (uscmd->uscsi_rqlen > SENSE_LENGTH) {
13341 		pktp = scsi_init_pkt(SD_ADDRESS(un), NULL,
13342 		    ((bp->b_bcount != 0) ? bp : NULL), uscmd->uscsi_cdblen,
13343 		    ((int)(uscmd->uscsi_rqlen) + sizeof (struct scsi_arq_status)
13344 		    - sizeof (struct scsi_extended_sense)), 0,
13345 		    (un->un_pkt_flags & ~PKT_DMA_PARTIAL) | PKT_XARQ,
13346 		    sdrunout, (caddr_t)un);
13347 	} else {
13348 		pktp = scsi_init_pkt(SD_ADDRESS(un), NULL,
13349 		    ((bp->b_bcount != 0) ? bp : NULL), uscmd->uscsi_cdblen,
13350 		    sizeof (struct scsi_arq_status), 0,
13351 		    (un->un_pkt_flags & ~PKT_DMA_PARTIAL),
13352 		    sdrunout, (caddr_t)un);
13353 	}
13354 
13355 	if (pktp == NULL) {
13356 		*pktpp = NULL;
13357 		/*
13358 		 * Set the driver state to RWAIT to indicate the driver
13359 		 * is waiting on resource allocations. The driver will not
13360 		 * suspend, pm_suspend, or detatch while the state is RWAIT.
13361 		 */
13362 		New_state(un, SD_STATE_RWAIT);
13363 
13364 		SD_ERROR(SD_LOG_IO_CORE, un,
13365 		    "sd_initpkt_for_uscsi: No pktp. exit bp:0x%p\n", bp);
13366 
13367 		if ((bp->b_flags & B_ERROR) != 0) {
13368 			return (SD_PKT_ALLOC_FAILURE_NO_DMA);
13369 		}
13370 		return (SD_PKT_ALLOC_FAILURE);
13371 	}
13372 
13373 	/*
13374 	 * We do not do DMA breakup for USCSI commands, so return failure
13375 	 * here if all the needed DMA resources were not allocated.
13376 	 */
13377 	if ((un->un_pkt_flags & PKT_DMA_PARTIAL) &&
13378 	    (bp->b_bcount != 0) && (pktp->pkt_resid != 0)) {
13379 		scsi_destroy_pkt(pktp);
13380 		SD_ERROR(SD_LOG_IO_CORE, un, "sd_initpkt_for_uscsi: "
13381 		    "No partial DMA for USCSI. exit: buf:0x%p\n", bp);
13382 		return (SD_PKT_ALLOC_FAILURE_PKT_TOO_SMALL);
13383 	}
13384 
13385 	/* Init the cdb from the given uscsi struct */
13386 	(void) scsi_setup_cdb((union scsi_cdb *)pktp->pkt_cdbp,
13387 	    uscmd->uscsi_cdb[0], 0, 0, 0);
13388 
13389 	SD_FILL_SCSI1_LUN(un, pktp);
13390 
13391 	/*
13392 	 * Set up the optional USCSI flags. See the uscsi(4I) man page
13393 	 * for listing of the supported flags.
13394 	 */
13395 
13396 	if (uscmd->uscsi_flags & USCSI_SILENT) {
13397 		flags |= FLAG_SILENT;
13398 	}
13399 
13400 	if (uscmd->uscsi_flags & USCSI_DIAGNOSE) {
13401 		flags |= FLAG_DIAGNOSE;
13402 	}
13403 
13404 	if (uscmd->uscsi_flags & USCSI_ISOLATE) {
13405 		flags |= FLAG_ISOLATE;
13406 	}
13407 
13408 	if (un->un_f_is_fibre == FALSE) {
13409 		if (uscmd->uscsi_flags & USCSI_RENEGOT) {
13410 			flags |= FLAG_RENEGOTIATE_WIDE_SYNC;
13411 		}
13412 	}
13413 
13414 	/*
13415 	 * Set the pkt flags here so we save time later.
13416 	 * Note: These flags are NOT in the uscsi man page!!!
13417 	 */
13418 	if (uscmd->uscsi_flags & USCSI_HEAD) {
13419 		flags |= FLAG_HEAD;
13420 	}
13421 
13422 	if (uscmd->uscsi_flags & USCSI_NOINTR) {
13423 		flags |= FLAG_NOINTR;
13424 	}
13425 
13426 	/*
13427 	 * For tagged queueing, things get a bit complicated.
13428 	 * Check first for head of queue and last for ordered queue.
13429 	 * If neither head nor order, use the default driver tag flags.
13430 	 */
13431 	if ((uscmd->uscsi_flags & USCSI_NOTAG) == 0) {
13432 		if (uscmd->uscsi_flags & USCSI_HTAG) {
13433 			flags |= FLAG_HTAG;
13434 		} else if (uscmd->uscsi_flags & USCSI_OTAG) {
13435 			flags |= FLAG_OTAG;
13436 		} else {
13437 			flags |= un->un_tagflags & FLAG_TAGMASK;
13438 		}
13439 	}
13440 
13441 	if (uscmd->uscsi_flags & USCSI_NODISCON) {
13442 		flags = (flags & ~FLAG_TAGMASK) | FLAG_NODISCON;
13443 	}
13444 
13445 	pktp->pkt_flags = flags;
13446 
13447 	/* Transfer uscsi information to scsi_pkt */
13448 	(void) scsi_uscsi_pktinit(uscmd, pktp);
13449 
13450 	/* Copy the caller's CDB into the pkt... */
13451 	bcopy(uscmd->uscsi_cdb, pktp->pkt_cdbp, uscmd->uscsi_cdblen);
13452 
13453 	if (uscmd->uscsi_timeout == 0) {
13454 		pktp->pkt_time = un->un_uscsi_timeout;
13455 	} else {
13456 		pktp->pkt_time = uscmd->uscsi_timeout;
13457 	}
13458 
13459 	/* need it later to identify USCSI request in sdintr */
13460 	xp->xb_pkt_flags |= SD_XB_USCSICMD;
13461 
13462 	xp->xb_sense_resid = uscmd->uscsi_rqresid;
13463 
13464 	pktp->pkt_private = bp;
13465 	pktp->pkt_comp = sdintr;
13466 	*pktpp = pktp;
13467 
13468 	SD_TRACE(SD_LOG_IO_CORE, un,
13469 	    "sd_initpkt_for_uscsi: exit: buf:0x%p\n", bp);
13470 
13471 	return (SD_PKT_ALLOC_SUCCESS);
13472 }
13473 
13474 
13475 /*
13476  *    Function: sd_destroypkt_for_uscsi
13477  *
13478  * Description: Free the scsi_pkt(9S) struct for the given bp, for uscsi
13479  *		IOs.. Also saves relevant info into the associated uscsi_cmd
13480  *		struct.
13481  *
13482  *     Context: May be called under interrupt context
13483  */
13484 
13485 static void
sd_destroypkt_for_uscsi(struct buf * bp)13486 sd_destroypkt_for_uscsi(struct buf *bp)
13487 {
13488 	struct uscsi_cmd *uscmd;
13489 	struct sd_xbuf	*xp;
13490 	struct scsi_pkt	*pktp;
13491 	struct sd_lun	*un;
13492 	struct sd_uscsi_info *suip;
13493 
13494 	ASSERT(bp != NULL);
13495 	xp = SD_GET_XBUF(bp);
13496 	ASSERT(xp != NULL);
13497 	un = SD_GET_UN(bp);
13498 	ASSERT(un != NULL);
13499 	ASSERT(!mutex_owned(SD_MUTEX(un)));
13500 	pktp = SD_GET_PKTP(bp);
13501 	ASSERT(pktp != NULL);
13502 
13503 	SD_TRACE(SD_LOG_IO_CORE, un,
13504 	    "sd_destroypkt_for_uscsi: entry: buf:0x%p\n", bp);
13505 
13506 	/* The pointer to the uscsi_cmd struct is expected in xb_pktinfo */
13507 	uscmd = (struct uscsi_cmd *)xp->xb_pktinfo;
13508 	ASSERT(uscmd != NULL);
13509 
13510 	/* Save the status and the residual into the uscsi_cmd struct */
13511 	uscmd->uscsi_status = ((*(pktp)->pkt_scbp) & STATUS_MASK);
13512 	uscmd->uscsi_resid  = bp->b_resid;
13513 
13514 	/* Transfer scsi_pkt information to uscsi */
13515 	(void) scsi_uscsi_pktfini(pktp, uscmd);
13516 
13517 	/*
13518 	 * If enabled, copy any saved sense data into the area specified
13519 	 * by the uscsi command.
13520 	 */
13521 	if (((uscmd->uscsi_flags & USCSI_RQENABLE) != 0) &&
13522 	    (uscmd->uscsi_rqlen != 0) && (uscmd->uscsi_rqbuf != NULL)) {
13523 		/*
13524 		 * Note: uscmd->uscsi_rqbuf should always point to a buffer
13525 		 * at least SENSE_LENGTH bytes in size (see sd_send_scsi_cmd())
13526 		 */
13527 		uscmd->uscsi_rqstatus = xp->xb_sense_status;
13528 		uscmd->uscsi_rqresid  = xp->xb_sense_resid;
13529 		if (uscmd->uscsi_rqlen > SENSE_LENGTH) {
13530 			bcopy(xp->xb_sense_data, uscmd->uscsi_rqbuf,
13531 			    MAX_SENSE_LENGTH);
13532 		} else {
13533 			bcopy(xp->xb_sense_data, uscmd->uscsi_rqbuf,
13534 			    SENSE_LENGTH);
13535 		}
13536 	}
13537 	/*
13538 	 * The following assignments are for SCSI FMA.
13539 	 */
13540 	ASSERT(xp->xb_private != NULL);
13541 	suip = (struct sd_uscsi_info *)xp->xb_private;
13542 	suip->ui_pkt_reason = pktp->pkt_reason;
13543 	suip->ui_pkt_state = pktp->pkt_state;
13544 	suip->ui_pkt_statistics = pktp->pkt_statistics;
13545 	suip->ui_lba = (uint64_t)SD_GET_BLKNO(bp);
13546 
13547 	/* We are done with the scsi_pkt; free it now */
13548 	ASSERT(SD_GET_PKTP(bp) != NULL);
13549 	scsi_destroy_pkt(SD_GET_PKTP(bp));
13550 
13551 	SD_TRACE(SD_LOG_IO_CORE, un,
13552 	    "sd_destroypkt_for_uscsi: exit: buf:0x%p\n", bp);
13553 }
13554 
13555 
13556 /*
13557  *    Function: sd_bioclone_alloc
13558  *
13559  * Description: Allocate a buf(9S) and init it as per the given buf
13560  *		and the various arguments.  The associated sd_xbuf
13561  *		struct is (nearly) duplicated.  The struct buf *bp
13562  *		argument is saved in new_xp->xb_private.
13563  *
13564  *   Arguments: bp - ptr the the buf(9S) to be "shadowed"
13565  *		datalen - size of data area for the shadow bp
13566  *		blkno - starting LBA
13567  *		func - function pointer for b_iodone in the shadow buf. (May
13568  *			be NULL if none.)
13569  *
13570  * Return Code: Pointer to allocates buf(9S) struct
13571  *
13572  *     Context: Can sleep.
13573  */
13574 
13575 static struct buf *
sd_bioclone_alloc(struct buf * bp,size_t datalen,daddr_t blkno,int (* func)(struct buf *))13576 sd_bioclone_alloc(struct buf *bp, size_t datalen, daddr_t blkno,
13577     int (*func)(struct buf *))
13578 {
13579 	struct	sd_lun	*un;
13580 	struct	sd_xbuf	*xp;
13581 	struct	sd_xbuf	*new_xp;
13582 	struct	buf	*new_bp;
13583 
13584 	ASSERT(bp != NULL);
13585 	xp = SD_GET_XBUF(bp);
13586 	ASSERT(xp != NULL);
13587 	un = SD_GET_UN(bp);
13588 	ASSERT(un != NULL);
13589 	ASSERT(!mutex_owned(SD_MUTEX(un)));
13590 
13591 	new_bp = bioclone(bp, 0, datalen, SD_GET_DEV(un), blkno, func,
13592 	    NULL, KM_SLEEP);
13593 
13594 	new_bp->b_lblkno	= blkno;
13595 
13596 	/*
13597 	 * Allocate an xbuf for the shadow bp and copy the contents of the
13598 	 * original xbuf into it.
13599 	 */
13600 	new_xp = kmem_alloc(sizeof (struct sd_xbuf), KM_SLEEP);
13601 	bcopy(xp, new_xp, sizeof (struct sd_xbuf));
13602 
13603 	/*
13604 	 * The given bp is automatically saved in the xb_private member
13605 	 * of the new xbuf.  Callers are allowed to depend on this.
13606 	 */
13607 	new_xp->xb_private = bp;
13608 
13609 	new_bp->b_private  = new_xp;
13610 
13611 	return (new_bp);
13612 }
13613 
13614 /*
13615  *    Function: sd_shadow_buf_alloc
13616  *
13617  * Description: Allocate a buf(9S) and init it as per the given buf
13618  *		and the various arguments.  The associated sd_xbuf
13619  *		struct is (nearly) duplicated.  The struct buf *bp
13620  *		argument is saved in new_xp->xb_private.
13621  *
13622  *   Arguments: bp - ptr the the buf(9S) to be "shadowed"
13623  *		datalen - size of data area for the shadow bp
13624  *		bflags - B_READ or B_WRITE (pseudo flag)
13625  *		blkno - starting LBA
13626  *		func - function pointer for b_iodone in the shadow buf. (May
13627  *			be NULL if none.)
13628  *
13629  * Return Code: Pointer to allocates buf(9S) struct
13630  *
13631  *     Context: Can sleep.
13632  */
13633 
13634 static struct buf *
sd_shadow_buf_alloc(struct buf * bp,size_t datalen,uint_t bflags,daddr_t blkno,int (* func)(struct buf *))13635 sd_shadow_buf_alloc(struct buf *bp, size_t datalen, uint_t bflags,
13636     daddr_t blkno, int (*func)(struct buf *))
13637 {
13638 	struct	sd_lun	*un;
13639 	struct	sd_xbuf	*xp;
13640 	struct	sd_xbuf	*new_xp;
13641 	struct	buf	*new_bp;
13642 
13643 	ASSERT(bp != NULL);
13644 	xp = SD_GET_XBUF(bp);
13645 	ASSERT(xp != NULL);
13646 	un = SD_GET_UN(bp);
13647 	ASSERT(un != NULL);
13648 	ASSERT(!mutex_owned(SD_MUTEX(un)));
13649 
13650 	if (bp->b_flags & (B_PAGEIO | B_PHYS)) {
13651 		bp_mapin(bp);
13652 	}
13653 
13654 	bflags &= (B_READ | B_WRITE);
13655 #if defined(__x86)
13656 	new_bp = getrbuf(KM_SLEEP);
13657 	new_bp->b_un.b_addr = kmem_zalloc(datalen, KM_SLEEP);
13658 	new_bp->b_bcount = datalen;
13659 	new_bp->b_flags = bflags |
13660 	    (bp->b_flags & ~(B_PAGEIO | B_PHYS | B_REMAPPED | B_SHADOW));
13661 #else
13662 	new_bp = scsi_alloc_consistent_buf(SD_ADDRESS(un), NULL,
13663 	    datalen, bflags, SLEEP_FUNC, NULL);
13664 #endif
13665 	new_bp->av_forw	= NULL;
13666 	new_bp->av_back	= NULL;
13667 	new_bp->b_dev	= bp->b_dev;
13668 	new_bp->b_blkno	= blkno;
13669 	new_bp->b_iodone = func;
13670 	new_bp->b_edev	= bp->b_edev;
13671 	new_bp->b_resid	= 0;
13672 
13673 	/* We need to preserve the B_FAILFAST flag */
13674 	if (bp->b_flags & B_FAILFAST) {
13675 		new_bp->b_flags |= B_FAILFAST;
13676 	}
13677 
13678 	/*
13679 	 * Allocate an xbuf for the shadow bp and copy the contents of the
13680 	 * original xbuf into it.
13681 	 */
13682 	new_xp = kmem_alloc(sizeof (struct sd_xbuf), KM_SLEEP);
13683 	bcopy(xp, new_xp, sizeof (struct sd_xbuf));
13684 
13685 	/* Need later to copy data between the shadow buf & original buf! */
13686 	new_xp->xb_pkt_flags |= PKT_CONSISTENT;
13687 
13688 	/*
13689 	 * The given bp is automatically saved in the xb_private member
13690 	 * of the new xbuf.  Callers are allowed to depend on this.
13691 	 */
13692 	new_xp->xb_private = bp;
13693 
13694 	new_bp->b_private  = new_xp;
13695 
13696 	return (new_bp);
13697 }
13698 
13699 /*
13700  *    Function: sd_bioclone_free
13701  *
13702  * Description: Deallocate a buf(9S) that was used for 'shadow' IO operations
13703  *		in the larger than partition operation.
13704  *
13705  *     Context: May be called under interrupt context
13706  */
13707 
13708 static void
sd_bioclone_free(struct buf * bp)13709 sd_bioclone_free(struct buf *bp)
13710 {
13711 	struct sd_xbuf	*xp;
13712 
13713 	ASSERT(bp != NULL);
13714 	xp = SD_GET_XBUF(bp);
13715 	ASSERT(xp != NULL);
13716 
13717 	/*
13718 	 * Call bp_mapout() before freeing the buf,  in case a lower
13719 	 * layer or HBA  had done a bp_mapin().  we must do this here
13720 	 * as we are the "originator" of the shadow buf.
13721 	 */
13722 	bp_mapout(bp);
13723 
13724 	/*
13725 	 * Null out b_iodone before freeing the bp, to ensure that the driver
13726 	 * never gets confused by a stale value in this field. (Just a little
13727 	 * extra defensiveness here.)
13728 	 */
13729 	bp->b_iodone = NULL;
13730 
13731 	freerbuf(bp);
13732 
13733 	kmem_free(xp, sizeof (struct sd_xbuf));
13734 }
13735 
13736 /*
13737  *    Function: sd_shadow_buf_free
13738  *
13739  * Description: Deallocate a buf(9S) that was used for 'shadow' IO operations.
13740  *
13741  *     Context: May be called under interrupt context
13742  */
13743 
13744 static void
sd_shadow_buf_free(struct buf * bp)13745 sd_shadow_buf_free(struct buf *bp)
13746 {
13747 	struct sd_xbuf	*xp;
13748 
13749 	ASSERT(bp != NULL);
13750 	xp = SD_GET_XBUF(bp);
13751 	ASSERT(xp != NULL);
13752 
13753 #if defined(__sparc)
13754 	/*
13755 	 * Call bp_mapout() before freeing the buf,  in case a lower
13756 	 * layer or HBA  had done a bp_mapin().  we must do this here
13757 	 * as we are the "originator" of the shadow buf.
13758 	 */
13759 	bp_mapout(bp);
13760 #endif
13761 
13762 	/*
13763 	 * Null out b_iodone before freeing the bp, to ensure that the driver
13764 	 * never gets confused by a stale value in this field. (Just a little
13765 	 * extra defensiveness here.)
13766 	 */
13767 	bp->b_iodone = NULL;
13768 
13769 #if defined(__x86)
13770 	kmem_free(bp->b_un.b_addr, bp->b_bcount);
13771 	freerbuf(bp);
13772 #else
13773 	scsi_free_consistent_buf(bp);
13774 #endif
13775 
13776 	kmem_free(xp, sizeof (struct sd_xbuf));
13777 }
13778 
13779 
13780 /*
13781  *    Function: sd_print_transport_rejected_message
13782  *
13783  * Description: This implements the ludicrously complex rules for printing
13784  *		a "transport rejected" message.  This is to address the
13785  *		specific problem of having a flood of this error message
13786  *		produced when a failover occurs.
13787  *
13788  *     Context: Any.
13789  */
13790 
13791 static void
sd_print_transport_rejected_message(struct sd_lun * un,struct sd_xbuf * xp,int code)13792 sd_print_transport_rejected_message(struct sd_lun *un, struct sd_xbuf *xp,
13793     int code)
13794 {
13795 	ASSERT(un != NULL);
13796 	ASSERT(mutex_owned(SD_MUTEX(un)));
13797 	ASSERT(xp != NULL);
13798 
13799 	/*
13800 	 * Print the "transport rejected" message under the following
13801 	 * conditions:
13802 	 *
13803 	 * - Whenever the SD_LOGMASK_DIAG bit of sd_level_mask is set
13804 	 * - The error code from scsi_transport() is NOT a TRAN_FATAL_ERROR.
13805 	 * - If the error code IS a TRAN_FATAL_ERROR, then the message is
13806 	 *   printed the FIRST time a TRAN_FATAL_ERROR is returned from
13807 	 *   scsi_transport(9F) (which indicates that the target might have
13808 	 *   gone off-line).  This uses the un->un_tran_fatal_count
13809 	 *   count, which is incremented whenever a TRAN_FATAL_ERROR is
13810 	 *   received, and reset to zero whenver a TRAN_ACCEPT is returned
13811 	 *   from scsi_transport().
13812 	 *
13813 	 * The FLAG_SILENT in the scsi_pkt must be CLEARED in ALL of
13814 	 * the preceeding cases in order for the message to be printed.
13815 	 */
13816 	if (((xp->xb_pktp->pkt_flags & FLAG_SILENT) == 0) &&
13817 	    (SD_FM_LOG(un) == SD_FM_LOG_NSUP)) {
13818 		if ((sd_level_mask & SD_LOGMASK_DIAG) ||
13819 		    (code != TRAN_FATAL_ERROR) ||
13820 		    (un->un_tran_fatal_count == 1)) {
13821 			switch (code) {
13822 			case TRAN_BADPKT:
13823 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
13824 				    "transport rejected bad packet\n");
13825 				break;
13826 			case TRAN_FATAL_ERROR:
13827 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
13828 				    "transport rejected fatal error\n");
13829 				break;
13830 			default:
13831 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
13832 				    "transport rejected (%d)\n", code);
13833 				break;
13834 			}
13835 		}
13836 	}
13837 }
13838 
13839 
13840 /*
13841  *    Function: sd_add_buf_to_waitq
13842  *
13843  * Description: Add the given buf(9S) struct to the wait queue for the
13844  *		instance.  If sorting is enabled, then the buf is added
13845  *		to the queue via an elevator sort algorithm (a la
13846  *		disksort(9F)).  The SD_GET_BLKNO(bp) is used as the sort key.
13847  *		If sorting is not enabled, then the buf is just added
13848  *		to the end of the wait queue.
13849  *
13850  * Return Code: void
13851  *
13852  *     Context: Does not sleep/block, therefore technically can be called
13853  *		from any context.  However if sorting is enabled then the
13854  *		execution time is indeterminate, and may take long if
13855  *		the wait queue grows large.
13856  */
13857 
13858 static void
sd_add_buf_to_waitq(struct sd_lun * un,struct buf * bp)13859 sd_add_buf_to_waitq(struct sd_lun *un, struct buf *bp)
13860 {
13861 	struct buf *ap;
13862 
13863 	ASSERT(bp != NULL);
13864 	ASSERT(un != NULL);
13865 	ASSERT(mutex_owned(SD_MUTEX(un)));
13866 
13867 	/* If the queue is empty, add the buf as the only entry & return. */
13868 	if (un->un_waitq_headp == NULL) {
13869 		ASSERT(un->un_waitq_tailp == NULL);
13870 		un->un_waitq_headp = un->un_waitq_tailp = bp;
13871 		bp->av_forw = NULL;
13872 		return;
13873 	}
13874 
13875 	ASSERT(un->un_waitq_tailp != NULL);
13876 
13877 	/*
13878 	 * If sorting is disabled, just add the buf to the tail end of
13879 	 * the wait queue and return.
13880 	 */
13881 	if (un->un_f_disksort_disabled || un->un_f_enable_rmw) {
13882 		un->un_waitq_tailp->av_forw = bp;
13883 		un->un_waitq_tailp = bp;
13884 		bp->av_forw = NULL;
13885 		return;
13886 	}
13887 
13888 	/*
13889 	 * Sort thru the list of requests currently on the wait queue
13890 	 * and add the new buf request at the appropriate position.
13891 	 *
13892 	 * The un->un_waitq_headp is an activity chain pointer on which
13893 	 * we keep two queues, sorted in ascending SD_GET_BLKNO() order. The
13894 	 * first queue holds those requests which are positioned after
13895 	 * the current SD_GET_BLKNO() (in the first request); the second holds
13896 	 * requests which came in after their SD_GET_BLKNO() number was passed.
13897 	 * Thus we implement a one way scan, retracting after reaching
13898 	 * the end of the drive to the first request on the second
13899 	 * queue, at which time it becomes the first queue.
13900 	 * A one-way scan is natural because of the way UNIX read-ahead
13901 	 * blocks are allocated.
13902 	 *
13903 	 * If we lie after the first request, then we must locate the
13904 	 * second request list and add ourselves to it.
13905 	 */
13906 	ap = un->un_waitq_headp;
13907 	if (SD_GET_BLKNO(bp) < SD_GET_BLKNO(ap)) {
13908 		while (ap->av_forw != NULL) {
13909 			/*
13910 			 * Look for an "inversion" in the (normally
13911 			 * ascending) block numbers. This indicates
13912 			 * the start of the second request list.
13913 			 */
13914 			if (SD_GET_BLKNO(ap->av_forw) < SD_GET_BLKNO(ap)) {
13915 				/*
13916 				 * Search the second request list for the
13917 				 * first request at a larger block number.
13918 				 * We go before that; however if there is
13919 				 * no such request, we go at the end.
13920 				 */
13921 				do {
13922 					if (SD_GET_BLKNO(bp) <
13923 					    SD_GET_BLKNO(ap->av_forw)) {
13924 						goto insert;
13925 					}
13926 					ap = ap->av_forw;
13927 				} while (ap->av_forw != NULL);
13928 				goto insert;		/* after last */
13929 			}
13930 			ap = ap->av_forw;
13931 		}
13932 
13933 		/*
13934 		 * No inversions... we will go after the last, and
13935 		 * be the first request in the second request list.
13936 		 */
13937 		goto insert;
13938 	}
13939 
13940 	/*
13941 	 * Request is at/after the current request...
13942 	 * sort in the first request list.
13943 	 */
13944 	while (ap->av_forw != NULL) {
13945 		/*
13946 		 * We want to go after the current request (1) if
13947 		 * there is an inversion after it (i.e. it is the end
13948 		 * of the first request list), or (2) if the next
13949 		 * request is a larger block no. than our request.
13950 		 */
13951 		if ((SD_GET_BLKNO(ap->av_forw) < SD_GET_BLKNO(ap)) ||
13952 		    (SD_GET_BLKNO(bp) < SD_GET_BLKNO(ap->av_forw))) {
13953 			goto insert;
13954 		}
13955 		ap = ap->av_forw;
13956 	}
13957 
13958 	/*
13959 	 * Neither a second list nor a larger request, therefore
13960 	 * we go at the end of the first list (which is the same
13961 	 * as the end of the whole schebang).
13962 	 */
13963 insert:
13964 	bp->av_forw = ap->av_forw;
13965 	ap->av_forw = bp;
13966 
13967 	/*
13968 	 * If we inserted onto the tail end of the waitq, make sure the
13969 	 * tail pointer is updated.
13970 	 */
13971 	if (ap == un->un_waitq_tailp) {
13972 		un->un_waitq_tailp = bp;
13973 	}
13974 }
13975 
13976 
13977 /*
13978  *    Function: sd_start_cmds
13979  *
13980  * Description: Remove and transport cmds from the driver queues.
13981  *
13982  *   Arguments: un - pointer to the unit (soft state) struct for the target.
13983  *
13984  *		immed_bp - ptr to a buf to be transported immediately. Only
13985  *		the immed_bp is transported; bufs on the waitq are not
13986  *		processed and the un_retry_bp is not checked.  If immed_bp is
13987  *		NULL, then normal queue processing is performed.
13988  *
13989  *     Context: May be called from kernel thread context, interrupt context,
13990  *		or runout callback context. This function may not block or
13991  *		call routines that block.
13992  */
13993 
13994 static void
sd_start_cmds(struct sd_lun * un,struct buf * immed_bp)13995 sd_start_cmds(struct sd_lun *un, struct buf *immed_bp)
13996 {
13997 	struct	sd_xbuf	*xp;
13998 	struct	buf	*bp;
13999 	void	(*statp)(kstat_io_t *);
14000 #if defined(__x86)	/* DMAFREE for x86 only */
14001 	void	(*saved_statp)(kstat_io_t *);
14002 #endif
14003 	int	rval;
14004 	struct sd_fm_internal *sfip = NULL;
14005 
14006 	ASSERT(un != NULL);
14007 	ASSERT(mutex_owned(SD_MUTEX(un)));
14008 	ASSERT(un->un_ncmds_in_transport >= 0);
14009 	ASSERT(un->un_throttle >= 0);
14010 
14011 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_start_cmds: entry\n");
14012 
14013 	do {
14014 #if defined(__x86)	/* DMAFREE for x86 only */
14015 		saved_statp = NULL;
14016 #endif
14017 
14018 		/*
14019 		 * If we are syncing or dumping, fail the command to
14020 		 * avoid recursively calling back into scsi_transport().
14021 		 * The dump I/O itself uses a separate code path so this
14022 		 * only prevents non-dump I/O from being sent while dumping.
14023 		 * File system sync takes place before dumping begins.
14024 		 * During panic, filesystem I/O is allowed provided
14025 		 * un_in_callback is <= 1.  This is to prevent recursion
14026 		 * such as sd_start_cmds -> scsi_transport -> sdintr ->
14027 		 * sd_start_cmds and so on.  See panic.c for more information
14028 		 * about the states the system can be in during panic.
14029 		 */
14030 		if ((un->un_state == SD_STATE_DUMPING) ||
14031 		    (ddi_in_panic() && (un->un_in_callback > 1))) {
14032 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14033 			    "sd_start_cmds: panicking\n");
14034 			goto exit;
14035 		}
14036 
14037 		if ((bp = immed_bp) != NULL) {
14038 			/*
14039 			 * We have a bp that must be transported immediately.
14040 			 * It's OK to transport the immed_bp here without doing
14041 			 * the throttle limit check because the immed_bp is
14042 			 * always used in a retry/recovery case. This means
14043 			 * that we know we are not at the throttle limit by
14044 			 * virtue of the fact that to get here we must have
14045 			 * already gotten a command back via sdintr(). This also
14046 			 * relies on (1) the command on un_retry_bp preventing
14047 			 * further commands from the waitq from being issued;
14048 			 * and (2) the code in sd_retry_command checking the
14049 			 * throttle limit before issuing a delayed or immediate
14050 			 * retry. This holds even if the throttle limit is
14051 			 * currently ratcheted down from its maximum value.
14052 			 */
14053 			statp = kstat_runq_enter;
14054 			if (bp == un->un_retry_bp) {
14055 				ASSERT((un->un_retry_statp == NULL) ||
14056 				    (un->un_retry_statp == kstat_waitq_enter) ||
14057 				    (un->un_retry_statp ==
14058 				    kstat_runq_back_to_waitq));
14059 				/*
14060 				 * If the waitq kstat was incremented when
14061 				 * sd_set_retry_bp() queued this bp for a retry,
14062 				 * then we must set up statp so that the waitq
14063 				 * count will get decremented correctly below.
14064 				 * Also we must clear un->un_retry_statp to
14065 				 * ensure that we do not act on a stale value
14066 				 * in this field.
14067 				 */
14068 				if ((un->un_retry_statp == kstat_waitq_enter) ||
14069 				    (un->un_retry_statp ==
14070 				    kstat_runq_back_to_waitq)) {
14071 					statp = kstat_waitq_to_runq;
14072 				}
14073 #if defined(__x86)	/* DMAFREE for x86 only */
14074 				saved_statp = un->un_retry_statp;
14075 #endif
14076 				un->un_retry_statp = NULL;
14077 
14078 				SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
14079 				    "sd_start_cmds: un:0x%p: GOT retry_bp:0x%p "
14080 				    "un_throttle:%d un_ncmds_in_transport:%d\n",
14081 				    un, un->un_retry_bp, un->un_throttle,
14082 				    un->un_ncmds_in_transport);
14083 			} else {
14084 				SD_TRACE(SD_LOG_IO_CORE, un, "sd_start_cmds: "
14085 				    "processing priority bp:0x%p\n", bp);
14086 			}
14087 
14088 		} else if ((bp = un->un_waitq_headp) != NULL) {
14089 			/*
14090 			 * A command on the waitq is ready to go, but do not
14091 			 * send it if:
14092 			 *
14093 			 * (1) the throttle limit has been reached, or
14094 			 * (2) a retry is pending, or
14095 			 * (3) a START_STOP_UNIT callback pending, or
14096 			 * (4) a callback for a SD_PATH_DIRECT_PRIORITY
14097 			 *	command is pending.
14098 			 *
14099 			 * For all of these conditions, IO processing will
14100 			 * restart after the condition is cleared.
14101 			 */
14102 			if (un->un_ncmds_in_transport >= un->un_throttle) {
14103 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14104 				    "sd_start_cmds: exiting, "
14105 				    "throttle limit reached!\n");
14106 				goto exit;
14107 			}
14108 			if (un->un_retry_bp != NULL) {
14109 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14110 				    "sd_start_cmds: exiting, retry pending!\n");
14111 				goto exit;
14112 			}
14113 			if (un->un_startstop_timeid != NULL) {
14114 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14115 				    "sd_start_cmds: exiting, "
14116 				    "START_STOP pending!\n");
14117 				goto exit;
14118 			}
14119 			if (un->un_direct_priority_timeid != NULL) {
14120 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14121 				    "sd_start_cmds: exiting, "
14122 				    "SD_PATH_DIRECT_PRIORITY cmd. pending!\n");
14123 				goto exit;
14124 			}
14125 
14126 			/* Dequeue the command */
14127 			un->un_waitq_headp = bp->av_forw;
14128 			if (un->un_waitq_headp == NULL) {
14129 				un->un_waitq_tailp = NULL;
14130 			}
14131 			bp->av_forw = NULL;
14132 			statp = kstat_waitq_to_runq;
14133 			SD_TRACE(SD_LOG_IO_CORE, un,
14134 			    "sd_start_cmds: processing waitq bp:0x%p\n", bp);
14135 
14136 		} else {
14137 			/* No work to do so bail out now */
14138 			SD_TRACE(SD_LOG_IO_CORE, un,
14139 			    "sd_start_cmds: no more work, exiting!\n");
14140 			goto exit;
14141 		}
14142 
14143 		/*
14144 		 * Reset the state to normal. This is the mechanism by which
14145 		 * the state transitions from either SD_STATE_RWAIT or
14146 		 * SD_STATE_OFFLINE to SD_STATE_NORMAL.
14147 		 * If state is SD_STATE_PM_CHANGING then this command is
14148 		 * part of the device power control and the state must
14149 		 * not be put back to normal. Doing so would would
14150 		 * allow new commands to proceed when they shouldn't,
14151 		 * the device may be going off.
14152 		 */
14153 		if ((un->un_state != SD_STATE_SUSPENDED) &&
14154 		    (un->un_state != SD_STATE_PM_CHANGING)) {
14155 			New_state(un, SD_STATE_NORMAL);
14156 		}
14157 
14158 		xp = SD_GET_XBUF(bp);
14159 		ASSERT(xp != NULL);
14160 
14161 #if defined(__x86)	/* DMAFREE for x86 only */
14162 		/*
14163 		 * Allocate the scsi_pkt if we need one, or attach DMA
14164 		 * resources if we have a scsi_pkt that needs them. The
14165 		 * latter should only occur for commands that are being
14166 		 * retried.
14167 		 */
14168 		if ((xp->xb_pktp == NULL) ||
14169 		    ((xp->xb_pkt_flags & SD_XB_DMA_FREED) != 0)) {
14170 #else
14171 		if (xp->xb_pktp == NULL) {
14172 #endif
14173 			/*
14174 			 * There is no scsi_pkt allocated for this buf. Call
14175 			 * the initpkt function to allocate & init one.
14176 			 *
14177 			 * The scsi_init_pkt runout callback functionality is
14178 			 * implemented as follows:
14179 			 *
14180 			 * 1) The initpkt function always calls
14181 			 *    scsi_init_pkt(9F) with sdrunout specified as the
14182 			 *    callback routine.
14183 			 * 2) A successful packet allocation is initialized and
14184 			 *    the I/O is transported.
14185 			 * 3) The I/O associated with an allocation resource
14186 			 *    failure is left on its queue to be retried via
14187 			 *    runout or the next I/O.
14188 			 * 4) The I/O associated with a DMA error is removed
14189 			 *    from the queue and failed with EIO. Processing of
14190 			 *    the transport queues is also halted to be
14191 			 *    restarted via runout or the next I/O.
14192 			 * 5) The I/O associated with a CDB size or packet
14193 			 *    size error is removed from the queue and failed
14194 			 *    with EIO. Processing of the transport queues is
14195 			 *    continued.
14196 			 *
14197 			 * Note: there is no interface for canceling a runout
14198 			 * callback. To prevent the driver from detaching or
14199 			 * suspending while a runout is pending the driver
14200 			 * state is set to SD_STATE_RWAIT
14201 			 *
14202 			 * Note: using the scsi_init_pkt callback facility can
14203 			 * result in an I/O request persisting at the head of
14204 			 * the list which cannot be satisfied even after
14205 			 * multiple retries. In the future the driver may
14206 			 * implement some kind of maximum runout count before
14207 			 * failing an I/O.
14208 			 *
14209 			 * Note: the use of funcp below may seem superfluous,
14210 			 * but it helps warlock figure out the correct
14211 			 * initpkt function calls (see [s]sd.wlcmd).
14212 			 */
14213 			struct scsi_pkt	*pktp;
14214 			int (*funcp)(struct buf *bp, struct scsi_pkt **pktp);
14215 
14216 			ASSERT(bp != un->un_rqs_bp);
14217 
14218 			funcp = sd_initpkt_map[xp->xb_chain_iostart];
14219 			switch ((*funcp)(bp, &pktp)) {
14220 			case  SD_PKT_ALLOC_SUCCESS:
14221 				xp->xb_pktp = pktp;
14222 				SD_TRACE(SD_LOG_IO_CORE, un,
14223 				    "sd_start_cmd: SD_PKT_ALLOC_SUCCESS 0x%p\n",
14224 				    pktp);
14225 				goto got_pkt;
14226 
14227 			case SD_PKT_ALLOC_FAILURE:
14228 				/*
14229 				 * Temporary (hopefully) resource depletion.
14230 				 * Since retries and RQS commands always have a
14231 				 * scsi_pkt allocated, these cases should never
14232 				 * get here. So the only cases this needs to
14233 				 * handle is a bp from the waitq (which we put
14234 				 * back onto the waitq for sdrunout), or a bp
14235 				 * sent as an immed_bp (which we just fail).
14236 				 */
14237 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14238 				    "sd_start_cmds: SD_PKT_ALLOC_FAILURE\n");
14239 
14240 #if defined(__x86)	/* DMAFREE for x86 only */
14241 
14242 				if (bp == immed_bp) {
14243 					/*
14244 					 * If SD_XB_DMA_FREED is clear, then
14245 					 * this is a failure to allocate a
14246 					 * scsi_pkt, and we must fail the
14247 					 * command.
14248 					 */
14249 					if ((xp->xb_pkt_flags &
14250 					    SD_XB_DMA_FREED) == 0) {
14251 						break;
14252 					}
14253 
14254 					/*
14255 					 * If this immediate command is NOT our
14256 					 * un_retry_bp, then we must fail it.
14257 					 */
14258 					if (bp != un->un_retry_bp) {
14259 						break;
14260 					}
14261 
14262 					/*
14263 					 * We get here if this cmd is our
14264 					 * un_retry_bp that was DMAFREED, but
14265 					 * scsi_init_pkt() failed to reallocate
14266 					 * DMA resources when we attempted to
14267 					 * retry it. This can happen when an
14268 					 * mpxio failover is in progress, but
14269 					 * we don't want to just fail the
14270 					 * command in this case.
14271 					 *
14272 					 * Use timeout(9F) to restart it after
14273 					 * a 100ms delay.  We don't want to
14274 					 * let sdrunout() restart it, because
14275 					 * sdrunout() is just supposed to start
14276 					 * commands that are sitting on the
14277 					 * wait queue.  The un_retry_bp stays
14278 					 * set until the command completes, but
14279 					 * sdrunout can be called many times
14280 					 * before that happens.  Since sdrunout
14281 					 * cannot tell if the un_retry_bp is
14282 					 * already in the transport, it could
14283 					 * end up calling scsi_transport() for
14284 					 * the un_retry_bp multiple times.
14285 					 *
14286 					 * Also: don't schedule the callback
14287 					 * if some other callback is already
14288 					 * pending.
14289 					 */
14290 					if (un->un_retry_statp == NULL) {
14291 						/*
14292 						 * restore the kstat pointer to
14293 						 * keep kstat counts coherent
14294 						 * when we do retry the command.
14295 						 */
14296 						un->un_retry_statp =
14297 						    saved_statp;
14298 					}
14299 
14300 					if ((un->un_startstop_timeid == NULL) &&
14301 					    (un->un_retry_timeid == NULL) &&
14302 					    (un->un_direct_priority_timeid ==
14303 					    NULL)) {
14304 
14305 						un->un_retry_timeid =
14306 						    timeout(
14307 						    sd_start_retry_command,
14308 						    un, SD_RESTART_TIMEOUT);
14309 					}
14310 					goto exit;
14311 				}
14312 
14313 #else
14314 				if (bp == immed_bp) {
14315 					break;	/* Just fail the command */
14316 				}
14317 #endif
14318 
14319 				/* Add the buf back to the head of the waitq */
14320 				bp->av_forw = un->un_waitq_headp;
14321 				un->un_waitq_headp = bp;
14322 				if (un->un_waitq_tailp == NULL) {
14323 					un->un_waitq_tailp = bp;
14324 				}
14325 				goto exit;
14326 
14327 			case SD_PKT_ALLOC_FAILURE_NO_DMA:
14328 				/*
14329 				 * HBA DMA resource failure. Fail the command
14330 				 * and continue processing of the queues.
14331 				 */
14332 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14333 				    "sd_start_cmds: "
14334 				    "SD_PKT_ALLOC_FAILURE_NO_DMA\n");
14335 				break;
14336 
14337 			case SD_PKT_ALLOC_FAILURE_PKT_TOO_SMALL:
14338 				/*
14339 				 * Note:x86: Partial DMA mapping not supported
14340 				 * for USCSI commands, and all the needed DMA
14341 				 * resources were not allocated.
14342 				 */
14343 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14344 				    "sd_start_cmds: "
14345 				    "SD_PKT_ALLOC_FAILURE_PKT_TOO_SMALL\n");
14346 				break;
14347 
14348 			case SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL:
14349 				/*
14350 				 * Note:x86: Request cannot fit into CDB based
14351 				 * on lba and len.
14352 				 */
14353 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14354 				    "sd_start_cmds: "
14355 				    "SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL\n");
14356 				break;
14357 
14358 			default:
14359 				/* Should NEVER get here! */
14360 				panic("scsi_initpkt error");
14361 				/*NOTREACHED*/
14362 			}
14363 
14364 			/*
14365 			 * Fatal error in allocating a scsi_pkt for this buf.
14366 			 * Update kstats & return the buf with an error code.
14367 			 * We must use sd_return_failed_command_no_restart() to
14368 			 * avoid a recursive call back into sd_start_cmds().
14369 			 * However this also means that we must keep processing
14370 			 * the waitq here in order to avoid stalling.
14371 			 */
14372 			if (statp == kstat_waitq_to_runq) {
14373 				SD_UPDATE_KSTATS(un, kstat_waitq_exit, bp);
14374 			}
14375 			sd_return_failed_command_no_restart(un, bp, EIO);
14376 			if (bp == immed_bp) {
14377 				/* immed_bp is gone by now, so clear this */
14378 				immed_bp = NULL;
14379 			}
14380 			continue;
14381 		}
14382 got_pkt:
14383 		if (bp == immed_bp) {
14384 			/* goto the head of the class.... */
14385 			xp->xb_pktp->pkt_flags |= FLAG_HEAD;
14386 		}
14387 
14388 		un->un_ncmds_in_transport++;
14389 		SD_UPDATE_KSTATS(un, statp, bp);
14390 
14391 		/*
14392 		 * Call scsi_transport() to send the command to the target.
14393 		 * According to SCSA architecture, we must drop the mutex here
14394 		 * before calling scsi_transport() in order to avoid deadlock.
14395 		 * Note that the scsi_pkt's completion routine can be executed
14396 		 * (from interrupt context) even before the call to
14397 		 * scsi_transport() returns.
14398 		 */
14399 		SD_TRACE(SD_LOG_IO_CORE, un,
14400 		    "sd_start_cmds: calling scsi_transport()\n");
14401 		DTRACE_PROBE1(scsi__transport__dispatch, struct buf *, bp);
14402 
14403 		mutex_exit(SD_MUTEX(un));
14404 		rval = scsi_transport(xp->xb_pktp);
14405 		mutex_enter(SD_MUTEX(un));
14406 
14407 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14408 		    "sd_start_cmds: scsi_transport() returned %d\n", rval);
14409 
14410 		switch (rval) {
14411 		case TRAN_ACCEPT:
14412 			/* Clear this with every pkt accepted by the HBA */
14413 			un->un_tran_fatal_count = 0;
14414 			break;	/* Success; try the next cmd (if any) */
14415 
14416 		case TRAN_BUSY:
14417 			un->un_ncmds_in_transport--;
14418 			ASSERT(un->un_ncmds_in_transport >= 0);
14419 
14420 			/*
14421 			 * Don't retry request sense, the sense data
14422 			 * is lost when another request is sent.
14423 			 * Free up the rqs buf and retry
14424 			 * the original failed cmd.  Update kstat.
14425 			 */
14426 			if (bp == un->un_rqs_bp) {
14427 				SD_UPDATE_KSTATS(un, kstat_runq_exit, bp);
14428 				bp = sd_mark_rqs_idle(un, xp);
14429 				sd_retry_command(un, bp, SD_RETRIES_STANDARD,
14430 				    NULL, NULL, EIO, un->un_busy_timeout / 500,
14431 				    kstat_waitq_enter);
14432 				goto exit;
14433 			}
14434 
14435 #if defined(__x86)	/* DMAFREE for x86 only */
14436 			/*
14437 			 * Free the DMA resources for the  scsi_pkt. This will
14438 			 * allow mpxio to select another path the next time
14439 			 * we call scsi_transport() with this scsi_pkt.
14440 			 * See sdintr() for the rationalization behind this.
14441 			 */
14442 			if ((un->un_f_is_fibre == TRUE) &&
14443 			    ((xp->xb_pkt_flags & SD_XB_USCSICMD) == 0) &&
14444 			    ((xp->xb_pktp->pkt_flags & FLAG_SENSING) == 0)) {
14445 				scsi_dmafree(xp->xb_pktp);
14446 				xp->xb_pkt_flags |= SD_XB_DMA_FREED;
14447 			}
14448 #endif
14449 
14450 			if (SD_IS_DIRECT_PRIORITY(SD_GET_XBUF(bp))) {
14451 				/*
14452 				 * Commands that are SD_PATH_DIRECT_PRIORITY
14453 				 * are for error recovery situations. These do
14454 				 * not use the normal command waitq, so if they
14455 				 * get a TRAN_BUSY we cannot put them back onto
14456 				 * the waitq for later retry. One possible
14457 				 * problem is that there could already be some
14458 				 * other command on un_retry_bp that is waiting
14459 				 * for this one to complete, so we would be
14460 				 * deadlocked if we put this command back onto
14461 				 * the waitq for later retry (since un_retry_bp
14462 				 * must complete before the driver gets back to
14463 				 * commands on the waitq).
14464 				 *
14465 				 * To avoid deadlock we must schedule a callback
14466 				 * that will restart this command after a set
14467 				 * interval.  This should keep retrying for as
14468 				 * long as the underlying transport keeps
14469 				 * returning TRAN_BUSY (just like for other
14470 				 * commands).  Use the same timeout interval as
14471 				 * for the ordinary TRAN_BUSY retry.
14472 				 */
14473 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14474 				    "sd_start_cmds: scsi_transport() returned "
14475 				    "TRAN_BUSY for DIRECT_PRIORITY cmd!\n");
14476 
14477 				SD_UPDATE_KSTATS(un, kstat_runq_exit, bp);
14478 				un->un_direct_priority_timeid =
14479 				    timeout(sd_start_direct_priority_command,
14480 				    bp, un->un_busy_timeout / 500);
14481 
14482 				goto exit;
14483 			}
14484 
14485 			/*
14486 			 * For TRAN_BUSY, we want to reduce the throttle value,
14487 			 * unless we are retrying a command.
14488 			 */
14489 			if (bp != un->un_retry_bp) {
14490 				sd_reduce_throttle(un, SD_THROTTLE_TRAN_BUSY);
14491 			}
14492 
14493 			/*
14494 			 * Set up the bp to be tried again 10 ms later.
14495 			 * Note:x86: Is there a timeout value in the sd_lun
14496 			 * for this condition?
14497 			 */
14498 			sd_set_retry_bp(un, bp, un->un_busy_timeout / 500,
14499 			    kstat_runq_back_to_waitq);
14500 			goto exit;
14501 
14502 		case TRAN_FATAL_ERROR:
14503 			un->un_tran_fatal_count++;
14504 			/* FALLTHRU */
14505 
14506 		case TRAN_BADPKT:
14507 		default:
14508 			un->un_ncmds_in_transport--;
14509 			ASSERT(un->un_ncmds_in_transport >= 0);
14510 
14511 			/*
14512 			 * If this is our REQUEST SENSE command with a
14513 			 * transport error, we must get back the pointers
14514 			 * to the original buf, and mark the REQUEST
14515 			 * SENSE command as "available".
14516 			 */
14517 			if (bp == un->un_rqs_bp) {
14518 				bp = sd_mark_rqs_idle(un, xp);
14519 				xp = SD_GET_XBUF(bp);
14520 			} else {
14521 				/*
14522 				 * Legacy behavior: do not update transport
14523 				 * error count for request sense commands.
14524 				 */
14525 				SD_UPDATE_ERRSTATS(un, sd_transerrs);
14526 			}
14527 
14528 			SD_UPDATE_KSTATS(un, kstat_runq_exit, bp);
14529 			sd_print_transport_rejected_message(un, xp, rval);
14530 
14531 			/*
14532 			 * This command will be terminated by SD driver due
14533 			 * to a fatal transport error. We should post
14534 			 * ereport.io.scsi.cmd.disk.tran with driver-assessment
14535 			 * of "fail" for any command to indicate this
14536 			 * situation.
14537 			 */
14538 			if (xp->xb_ena > 0) {
14539 				ASSERT(un->un_fm_private != NULL);
14540 				sfip = un->un_fm_private;
14541 				sfip->fm_ssc.ssc_flags |= SSC_FLAGS_TRAN_ABORT;
14542 				sd_ssc_extract_info(&sfip->fm_ssc, un,
14543 				    xp->xb_pktp, bp, xp);
14544 				sd_ssc_post(&sfip->fm_ssc, SD_FM_DRV_FATAL);
14545 			}
14546 
14547 			/*
14548 			 * We must use sd_return_failed_command_no_restart() to
14549 			 * avoid a recursive call back into sd_start_cmds().
14550 			 * However this also means that we must keep processing
14551 			 * the waitq here in order to avoid stalling.
14552 			 */
14553 			sd_return_failed_command_no_restart(un, bp, EIO);
14554 
14555 			/*
14556 			 * Notify any threads waiting in sd_ddi_suspend() that
14557 			 * a command completion has occurred.
14558 			 */
14559 			if (un->un_state == SD_STATE_SUSPENDED) {
14560 				cv_broadcast(&un->un_disk_busy_cv);
14561 			}
14562 
14563 			if (bp == immed_bp) {
14564 				/* immed_bp is gone by now, so clear this */
14565 				immed_bp = NULL;
14566 			}
14567 			break;
14568 		}
14569 
14570 	} while (immed_bp == NULL);
14571 
14572 exit:
14573 	ASSERT(mutex_owned(SD_MUTEX(un)));
14574 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_start_cmds: exit\n");
14575 }
14576 
14577 
14578 /*
14579  *    Function: sd_return_command
14580  *
14581  * Description: Returns a command to its originator (with or without an
14582  *		error).  Also starts commands waiting to be transported
14583  *		to the target.
14584  *
14585  *     Context: May be called from interrupt, kernel, or timeout context
14586  */
14587 
14588 static void
14589 sd_return_command(struct sd_lun *un, struct buf *bp)
14590 {
14591 	struct sd_xbuf *xp;
14592 	struct scsi_pkt *pktp;
14593 	struct sd_fm_internal *sfip;
14594 
14595 	ASSERT(bp != NULL);
14596 	ASSERT(un != NULL);
14597 	ASSERT(mutex_owned(SD_MUTEX(un)));
14598 	ASSERT(bp != un->un_rqs_bp);
14599 	xp = SD_GET_XBUF(bp);
14600 	ASSERT(xp != NULL);
14601 
14602 	pktp = SD_GET_PKTP(bp);
14603 	sfip = (struct sd_fm_internal *)un->un_fm_private;
14604 	ASSERT(sfip != NULL);
14605 
14606 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_return_command: entry\n");
14607 
14608 	/*
14609 	 * Note: check for the "sdrestart failed" case.
14610 	 */
14611 	if ((un->un_partial_dma_supported == 1) &&
14612 	    ((xp->xb_pkt_flags & SD_XB_USCSICMD) != SD_XB_USCSICMD) &&
14613 	    (geterror(bp) == 0) && (xp->xb_dma_resid != 0) &&
14614 	    (xp->xb_pktp->pkt_resid == 0)) {
14615 
14616 		if (sd_setup_next_xfer(un, bp, pktp, xp) != 0) {
14617 			/*
14618 			 * Successfully set up next portion of cmd
14619 			 * transfer, try sending it
14620 			 */
14621 			sd_retry_command(un, bp, SD_RETRIES_NOCHECK,
14622 			    NULL, NULL, 0, (clock_t)0, NULL);
14623 			sd_start_cmds(un, NULL);
14624 			return;	/* Note:x86: need a return here? */
14625 		}
14626 	}
14627 
14628 	/*
14629 	 * If this is the failfast bp, clear it from un_failfast_bp. This
14630 	 * can happen if upon being re-tried the failfast bp either
14631 	 * succeeded or encountered another error (possibly even a different
14632 	 * error than the one that precipitated the failfast state, but in
14633 	 * that case it would have had to exhaust retries as well). Regardless,
14634 	 * this should not occur whenever the instance is in the active
14635 	 * failfast state.
14636 	 */
14637 	if (bp == un->un_failfast_bp) {
14638 		ASSERT(un->un_failfast_state == SD_FAILFAST_INACTIVE);
14639 		un->un_failfast_bp = NULL;
14640 	}
14641 
14642 	/*
14643 	 * Clear the failfast state upon successful completion of ANY cmd.
14644 	 */
14645 	if (bp->b_error == 0) {
14646 		un->un_failfast_state = SD_FAILFAST_INACTIVE;
14647 		/*
14648 		 * If this is a successful command, but used to be retried,
14649 		 * we will take it as a recovered command and post an
14650 		 * ereport with driver-assessment of "recovered".
14651 		 */
14652 		if (xp->xb_ena > 0) {
14653 			sd_ssc_extract_info(&sfip->fm_ssc, un, pktp, bp, xp);
14654 			sd_ssc_post(&sfip->fm_ssc, SD_FM_DRV_RECOVERY);
14655 		}
14656 	} else {
14657 		/*
14658 		 * If this is a failed non-USCSI command we will post an
14659 		 * ereport with driver-assessment set accordingly("fail" or
14660 		 * "fatal").
14661 		 */
14662 		if (!(xp->xb_pkt_flags & SD_XB_USCSICMD)) {
14663 			sd_ssc_extract_info(&sfip->fm_ssc, un, pktp, bp, xp);
14664 			sd_ssc_post(&sfip->fm_ssc, SD_FM_DRV_FATAL);
14665 		}
14666 	}
14667 
14668 	/*
14669 	 * This is used if the command was retried one or more times. Show that
14670 	 * we are done with it, and allow processing of the waitq to resume.
14671 	 */
14672 	if (bp == un->un_retry_bp) {
14673 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14674 		    "sd_return_command: un:0x%p: "
14675 		    "RETURNING retry_bp:0x%p\n", un, un->un_retry_bp);
14676 		un->un_retry_bp = NULL;
14677 		un->un_retry_statp = NULL;
14678 	}
14679 
14680 	SD_UPDATE_RDWR_STATS(un, bp);
14681 	SD_UPDATE_PARTITION_STATS(un, bp);
14682 
14683 	switch (un->un_state) {
14684 	case SD_STATE_SUSPENDED:
14685 		/*
14686 		 * Notify any threads waiting in sd_ddi_suspend() that
14687 		 * a command completion has occurred.
14688 		 */
14689 		cv_broadcast(&un->un_disk_busy_cv);
14690 		break;
14691 	default:
14692 		sd_start_cmds(un, NULL);
14693 		break;
14694 	}
14695 
14696 	/* Return this command up the iodone chain to its originator. */
14697 	mutex_exit(SD_MUTEX(un));
14698 
14699 	(*(sd_destroypkt_map[xp->xb_chain_iodone]))(bp);
14700 	xp->xb_pktp = NULL;
14701 
14702 	SD_BEGIN_IODONE(xp->xb_chain_iodone, un, bp);
14703 
14704 	ASSERT(!mutex_owned(SD_MUTEX(un)));
14705 	mutex_enter(SD_MUTEX(un));
14706 
14707 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_return_command: exit\n");
14708 }
14709 
14710 
14711 /*
14712  *    Function: sd_return_failed_command
14713  *
14714  * Description: Command completion when an error occurred.
14715  *
14716  *     Context: May be called from interrupt context
14717  */
14718 
14719 static void
14720 sd_return_failed_command(struct sd_lun *un, struct buf *bp, int errcode)
14721 {
14722 	ASSERT(bp != NULL);
14723 	ASSERT(un != NULL);
14724 	ASSERT(mutex_owned(SD_MUTEX(un)));
14725 
14726 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14727 	    "sd_return_failed_command: entry\n");
14728 
14729 	/*
14730 	 * b_resid could already be nonzero due to a partial data
14731 	 * transfer, so do not change it here.
14732 	 */
14733 	SD_BIOERROR(bp, errcode);
14734 
14735 	sd_return_command(un, bp);
14736 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14737 	    "sd_return_failed_command: exit\n");
14738 }
14739 
14740 
14741 /*
14742  *    Function: sd_return_failed_command_no_restart
14743  *
14744  * Description: Same as sd_return_failed_command, but ensures that no
14745  *		call back into sd_start_cmds will be issued.
14746  *
14747  *     Context: May be called from interrupt context
14748  */
14749 
14750 static void
14751 sd_return_failed_command_no_restart(struct sd_lun *un, struct buf *bp,
14752     int errcode)
14753 {
14754 	struct sd_xbuf *xp;
14755 
14756 	ASSERT(bp != NULL);
14757 	ASSERT(un != NULL);
14758 	ASSERT(mutex_owned(SD_MUTEX(un)));
14759 	xp = SD_GET_XBUF(bp);
14760 	ASSERT(xp != NULL);
14761 	ASSERT(errcode != 0);
14762 
14763 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14764 	    "sd_return_failed_command_no_restart: entry\n");
14765 
14766 	/*
14767 	 * b_resid could already be nonzero due to a partial data
14768 	 * transfer, so do not change it here.
14769 	 */
14770 	SD_BIOERROR(bp, errcode);
14771 
14772 	/*
14773 	 * If this is the failfast bp, clear it. This can happen if the
14774 	 * failfast bp encounterd a fatal error when we attempted to
14775 	 * re-try it (such as a scsi_transport(9F) failure).  However
14776 	 * we should NOT be in an active failfast state if the failfast
14777 	 * bp is not NULL.
14778 	 */
14779 	if (bp == un->un_failfast_bp) {
14780 		ASSERT(un->un_failfast_state == SD_FAILFAST_INACTIVE);
14781 		un->un_failfast_bp = NULL;
14782 	}
14783 
14784 	if (bp == un->un_retry_bp) {
14785 		/*
14786 		 * This command was retried one or more times. Show that we are
14787 		 * done with it, and allow processing of the waitq to resume.
14788 		 */
14789 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14790 		    "sd_return_failed_command_no_restart: "
14791 		    " un:0x%p: RETURNING retry_bp:0x%p\n", un, un->un_retry_bp);
14792 		un->un_retry_bp = NULL;
14793 		un->un_retry_statp = NULL;
14794 	}
14795 
14796 	SD_UPDATE_RDWR_STATS(un, bp);
14797 	SD_UPDATE_PARTITION_STATS(un, bp);
14798 
14799 	mutex_exit(SD_MUTEX(un));
14800 
14801 	if (xp->xb_pktp != NULL) {
14802 		(*(sd_destroypkt_map[xp->xb_chain_iodone]))(bp);
14803 		xp->xb_pktp = NULL;
14804 	}
14805 
14806 	SD_BEGIN_IODONE(xp->xb_chain_iodone, un, bp);
14807 
14808 	mutex_enter(SD_MUTEX(un));
14809 
14810 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14811 	    "sd_return_failed_command_no_restart: exit\n");
14812 }
14813 
14814 
14815 /*
14816  *    Function: sd_retry_command
14817  *
14818  * Description: queue up a command for retry, or (optionally) fail it
14819  *		if retry counts are exhausted.
14820  *
14821  *   Arguments: un - Pointer to the sd_lun struct for the target.
14822  *
14823  *		bp - Pointer to the buf for the command to be retried.
14824  *
14825  *		retry_check_flag - Flag to see which (if any) of the retry
14826  *		   counts should be decremented/checked. If the indicated
14827  *		   retry count is exhausted, then the command will not be
14828  *		   retried; it will be failed instead. This should use a
14829  *		   value equal to one of the following:
14830  *
14831  *			SD_RETRIES_NOCHECK
14832  *			SD_RESD_RETRIES_STANDARD
14833  *			SD_RETRIES_VICTIM
14834  *
14835  *		   Optionally may be bitwise-OR'ed with SD_RETRIES_ISOLATE
14836  *		   if the check should be made to see of FLAG_ISOLATE is set
14837  *		   in the pkt. If FLAG_ISOLATE is set, then the command is
14838  *		   not retried, it is simply failed.
14839  *
14840  *		user_funcp - Ptr to function to call before dispatching the
14841  *		   command. May be NULL if no action needs to be performed.
14842  *		   (Primarily intended for printing messages.)
14843  *
14844  *		user_arg - Optional argument to be passed along to
14845  *		   the user_funcp call.
14846  *
14847  *		failure_code - errno return code to set in the bp if the
14848  *		   command is going to be failed.
14849  *
14850  *		retry_delay - Retry delay interval in (clock_t) units. May
14851  *		   be zero which indicates that the retry should be retried
14852  *		   immediately (ie, without an intervening delay).
14853  *
14854  *		statp - Ptr to kstat function to be updated if the command
14855  *		   is queued for a delayed retry. May be NULL if no kstat
14856  *		   update is desired.
14857  *
14858  *     Context: May be called from interrupt context.
14859  */
14860 
14861 static void
14862 sd_retry_command(struct sd_lun *un, struct buf *bp, int retry_check_flag,
14863     void (*user_funcp)(struct sd_lun *un, struct buf *bp, void *argp, int code),
14864     void *user_arg, int failure_code, clock_t retry_delay,
14865     void (*statp)(kstat_io_t *))
14866 {
14867 	struct sd_xbuf	*xp;
14868 	struct scsi_pkt	*pktp;
14869 	struct sd_fm_internal *sfip;
14870 
14871 	ASSERT(un != NULL);
14872 	ASSERT(mutex_owned(SD_MUTEX(un)));
14873 	ASSERT(bp != NULL);
14874 	xp = SD_GET_XBUF(bp);
14875 	ASSERT(xp != NULL);
14876 	pktp = SD_GET_PKTP(bp);
14877 	ASSERT(pktp != NULL);
14878 
14879 	sfip = (struct sd_fm_internal *)un->un_fm_private;
14880 	ASSERT(sfip != NULL);
14881 
14882 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
14883 	    "sd_retry_command: entry: bp:0x%p xp:0x%p\n", bp, xp);
14884 
14885 	/*
14886 	 * If we are syncing or dumping, fail the command to avoid
14887 	 * recursively calling back into scsi_transport().
14888 	 */
14889 	if (ddi_in_panic()) {
14890 		goto fail_command_no_log;
14891 	}
14892 
14893 	/*
14894 	 * We should never be be retrying a command with FLAG_DIAGNOSE set, so
14895 	 * log an error and fail the command.
14896 	 */
14897 	if ((pktp->pkt_flags & FLAG_DIAGNOSE) != 0) {
14898 		scsi_log(SD_DEVINFO(un), sd_label, CE_NOTE,
14899 		    "ERROR, retrying FLAG_DIAGNOSE command.\n");
14900 		sd_dump_memory(un, SD_LOG_IO, "CDB",
14901 		    (uchar_t *)pktp->pkt_cdbp, CDB_SIZE, SD_LOG_HEX);
14902 		sd_dump_memory(un, SD_LOG_IO, "Sense Data",
14903 		    (uchar_t *)xp->xb_sense_data, SENSE_LENGTH, SD_LOG_HEX);
14904 		goto fail_command;
14905 	}
14906 
14907 	/*
14908 	 * If we are suspended, then put the command onto head of the
14909 	 * wait queue since we don't want to start more commands, and
14910 	 * clear the un_retry_bp. Next time when we are resumed, will
14911 	 * handle the command in the wait queue.
14912 	 */
14913 	switch (un->un_state) {
14914 	case SD_STATE_SUSPENDED:
14915 	case SD_STATE_DUMPING:
14916 		bp->av_forw = un->un_waitq_headp;
14917 		un->un_waitq_headp = bp;
14918 		if (un->un_waitq_tailp == NULL) {
14919 			un->un_waitq_tailp = bp;
14920 		}
14921 		if (bp == un->un_retry_bp) {
14922 			un->un_retry_bp = NULL;
14923 			un->un_retry_statp = NULL;
14924 		}
14925 		SD_UPDATE_KSTATS(un, kstat_waitq_enter, bp);
14926 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_retry_command: "
14927 		    "exiting; cmd bp:0x%p requeued for SUSPEND/DUMP\n", bp);
14928 		return;
14929 	default:
14930 		break;
14931 	}
14932 
14933 	/*
14934 	 * If the caller wants us to check FLAG_ISOLATE, then see if that
14935 	 * is set; if it is then we do not want to retry the command.
14936 	 * Normally, FLAG_ISOLATE is only used with USCSI cmds.
14937 	 */
14938 	if ((retry_check_flag & SD_RETRIES_ISOLATE) != 0) {
14939 		if ((pktp->pkt_flags & FLAG_ISOLATE) != 0) {
14940 			goto fail_command;
14941 		}
14942 	}
14943 
14944 
14945 	/*
14946 	 * If SD_RETRIES_FAILFAST is set, it indicates that either a
14947 	 * command timeout or a selection timeout has occurred. This means
14948 	 * that we were unable to establish an kind of communication with
14949 	 * the target, and subsequent retries and/or commands are likely
14950 	 * to encounter similar results and take a long time to complete.
14951 	 *
14952 	 * If this is a failfast error condition, we need to update the
14953 	 * failfast state, even if this bp does not have B_FAILFAST set.
14954 	 */
14955 	if (retry_check_flag & SD_RETRIES_FAILFAST) {
14956 		if (un->un_failfast_state == SD_FAILFAST_ACTIVE) {
14957 			ASSERT(un->un_failfast_bp == NULL);
14958 			/*
14959 			 * If we are already in the active failfast state, and
14960 			 * another failfast error condition has been detected,
14961 			 * then fail this command if it has B_FAILFAST set.
14962 			 * If B_FAILFAST is clear, then maintain the legacy
14963 			 * behavior of retrying heroically, even tho this will
14964 			 * take a lot more time to fail the command.
14965 			 */
14966 			if (bp->b_flags & B_FAILFAST) {
14967 				goto fail_command;
14968 			}
14969 		} else {
14970 			/*
14971 			 * We're not in the active failfast state, but we
14972 			 * have a failfast error condition, so we must begin
14973 			 * transition to the next state. We do this regardless
14974 			 * of whether or not this bp has B_FAILFAST set.
14975 			 */
14976 			if (un->un_failfast_bp == NULL) {
14977 				/*
14978 				 * This is the first bp to meet a failfast
14979 				 * condition so save it on un_failfast_bp &
14980 				 * do normal retry processing. Do not enter
14981 				 * active failfast state yet. This marks
14982 				 * entry into the "failfast pending" state.
14983 				 */
14984 				un->un_failfast_bp = bp;
14985 
14986 			} else if (un->un_failfast_bp == bp) {
14987 				/*
14988 				 * This is the second time *this* bp has
14989 				 * encountered a failfast error condition,
14990 				 * so enter active failfast state & flush
14991 				 * queues as appropriate.
14992 				 */
14993 				un->un_failfast_state = SD_FAILFAST_ACTIVE;
14994 				un->un_failfast_bp = NULL;
14995 				sd_failfast_flushq(un);
14996 
14997 				/*
14998 				 * Fail this bp now if B_FAILFAST set;
14999 				 * otherwise continue with retries. (It would
15000 				 * be pretty ironic if this bp succeeded on a
15001 				 * subsequent retry after we just flushed all
15002 				 * the queues).
15003 				 */
15004 				if (bp->b_flags & B_FAILFAST) {
15005 					goto fail_command;
15006 				}
15007 
15008 #if !defined(lint) && !defined(__lint)
15009 			} else {
15010 				/*
15011 				 * If neither of the preceeding conditionals
15012 				 * was true, it means that there is some
15013 				 * *other* bp that has met an inital failfast
15014 				 * condition and is currently either being
15015 				 * retried or is waiting to be retried. In
15016 				 * that case we should perform normal retry
15017 				 * processing on *this* bp, since there is a
15018 				 * chance that the current failfast condition
15019 				 * is transient and recoverable. If that does
15020 				 * not turn out to be the case, then retries
15021 				 * will be cleared when the wait queue is
15022 				 * flushed anyway.
15023 				 */
15024 #endif
15025 			}
15026 		}
15027 	} else {
15028 		/*
15029 		 * SD_RETRIES_FAILFAST is clear, which indicates that we
15030 		 * likely were able to at least establish some level of
15031 		 * communication with the target and subsequent commands
15032 		 * and/or retries are likely to get through to the target,
15033 		 * In this case we want to be aggressive about clearing
15034 		 * the failfast state. Note that this does not affect
15035 		 * the "failfast pending" condition.
15036 		 */
15037 		un->un_failfast_state = SD_FAILFAST_INACTIVE;
15038 	}
15039 
15040 
15041 	/*
15042 	 * Check the specified retry count to see if we can still do
15043 	 * any retries with this pkt before we should fail it.
15044 	 */
15045 	switch (retry_check_flag & SD_RETRIES_MASK) {
15046 	case SD_RETRIES_VICTIM:
15047 		/*
15048 		 * Check the victim retry count. If exhausted, then fall
15049 		 * thru & check against the standard retry count.
15050 		 */
15051 		if (xp->xb_victim_retry_count < un->un_victim_retry_count) {
15052 			/* Increment count & proceed with the retry */
15053 			xp->xb_victim_retry_count++;
15054 			break;
15055 		}
15056 		/* Victim retries exhausted, fall back to std. retries... */
15057 		/* FALLTHRU */
15058 
15059 	case SD_RETRIES_STANDARD:
15060 		if (xp->xb_retry_count >= un->un_retry_count) {
15061 			/* Retries exhausted, fail the command */
15062 			SD_TRACE(SD_LOG_IO_CORE, un,
15063 			    "sd_retry_command: retries exhausted!\n");
15064 			/*
15065 			 * update b_resid for failed SCMD_READ & SCMD_WRITE
15066 			 * commands with nonzero pkt_resid.
15067 			 */
15068 			if ((pktp->pkt_reason == CMD_CMPLT) &&
15069 			    (SD_GET_PKT_STATUS(pktp) == STATUS_GOOD) &&
15070 			    (pktp->pkt_resid != 0)) {
15071 				uchar_t op = SD_GET_PKT_OPCODE(pktp) & 0x1F;
15072 				if ((op == SCMD_READ) || (op == SCMD_WRITE)) {
15073 					SD_UPDATE_B_RESID(bp, pktp);
15074 				}
15075 			}
15076 			goto fail_command;
15077 		}
15078 		xp->xb_retry_count++;
15079 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15080 		    "sd_retry_command: retry count:%d\n", xp->xb_retry_count);
15081 		break;
15082 
15083 	case SD_RETRIES_UA:
15084 		if (xp->xb_ua_retry_count >= sd_ua_retry_count) {
15085 			/* Retries exhausted, fail the command */
15086 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
15087 			    "Unit Attention retries exhausted. "
15088 			    "Check the target.\n");
15089 			goto fail_command;
15090 		}
15091 		xp->xb_ua_retry_count++;
15092 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15093 		    "sd_retry_command: retry count:%d\n",
15094 		    xp->xb_ua_retry_count);
15095 		break;
15096 
15097 	case SD_RETRIES_BUSY:
15098 		if (xp->xb_retry_count >= un->un_busy_retry_count) {
15099 			/* Retries exhausted, fail the command */
15100 			SD_TRACE(SD_LOG_IO_CORE, un,
15101 			    "sd_retry_command: retries exhausted!\n");
15102 			goto fail_command;
15103 		}
15104 		xp->xb_retry_count++;
15105 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15106 		    "sd_retry_command: retry count:%d\n", xp->xb_retry_count);
15107 		break;
15108 
15109 	case SD_RETRIES_NOCHECK:
15110 	default:
15111 		/* No retry count to check. Just proceed with the retry */
15112 		break;
15113 	}
15114 
15115 	xp->xb_pktp->pkt_flags |= FLAG_HEAD;
15116 
15117 	/*
15118 	 * If this is a non-USCSI command being retried
15119 	 * during execution last time, we should post an ereport with
15120 	 * driver-assessment of the value "retry".
15121 	 * For partial DMA, request sense and STATUS_QFULL, there are no
15122 	 * hardware errors, we bypass ereport posting.
15123 	 */
15124 	if (failure_code != 0) {
15125 		if (!(xp->xb_pkt_flags & SD_XB_USCSICMD)) {
15126 			sd_ssc_extract_info(&sfip->fm_ssc, un, pktp, bp, xp);
15127 			sd_ssc_post(&sfip->fm_ssc, SD_FM_DRV_RETRY);
15128 		}
15129 	}
15130 
15131 	/*
15132 	 * If we were given a zero timeout, we must attempt to retry the
15133 	 * command immediately (ie, without a delay).
15134 	 */
15135 	if (retry_delay == 0) {
15136 		/*
15137 		 * Check some limiting conditions to see if we can actually
15138 		 * do the immediate retry.  If we cannot, then we must
15139 		 * fall back to queueing up a delayed retry.
15140 		 */
15141 		if (un->un_ncmds_in_transport >= un->un_throttle) {
15142 			/*
15143 			 * We are at the throttle limit for the target,
15144 			 * fall back to delayed retry.
15145 			 */
15146 			retry_delay = un->un_busy_timeout;
15147 			statp = kstat_waitq_enter;
15148 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15149 			    "sd_retry_command: immed. retry hit "
15150 			    "throttle!\n");
15151 		} else {
15152 			/*
15153 			 * We're clear to proceed with the immediate retry.
15154 			 * First call the user-provided function (if any)
15155 			 */
15156 			if (user_funcp != NULL) {
15157 				(*user_funcp)(un, bp, user_arg,
15158 				    SD_IMMEDIATE_RETRY_ISSUED);
15159 #ifdef __lock_lint
15160 				sd_print_incomplete_msg(un, bp, user_arg,
15161 				    SD_IMMEDIATE_RETRY_ISSUED);
15162 				sd_print_cmd_incomplete_msg(un, bp, user_arg,
15163 				    SD_IMMEDIATE_RETRY_ISSUED);
15164 				sd_print_sense_failed_msg(un, bp, user_arg,
15165 				    SD_IMMEDIATE_RETRY_ISSUED);
15166 #endif
15167 			}
15168 
15169 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15170 			    "sd_retry_command: issuing immediate retry\n");
15171 
15172 			/*
15173 			 * Call sd_start_cmds() to transport the command to
15174 			 * the target.
15175 			 */
15176 			sd_start_cmds(un, bp);
15177 
15178 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15179 			    "sd_retry_command exit\n");
15180 			return;
15181 		}
15182 	}
15183 
15184 	/*
15185 	 * Set up to retry the command after a delay.
15186 	 * First call the user-provided function (if any)
15187 	 */
15188 	if (user_funcp != NULL) {
15189 		(*user_funcp)(un, bp, user_arg, SD_DELAYED_RETRY_ISSUED);
15190 	}
15191 
15192 	sd_set_retry_bp(un, bp, retry_delay, statp);
15193 
15194 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_retry_command: exit\n");
15195 	return;
15196 
15197 fail_command:
15198 
15199 	if (user_funcp != NULL) {
15200 		(*user_funcp)(un, bp, user_arg, SD_NO_RETRY_ISSUED);
15201 	}
15202 
15203 fail_command_no_log:
15204 
15205 	SD_INFO(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15206 	    "sd_retry_command: returning failed command\n");
15207 
15208 	sd_return_failed_command(un, bp, failure_code);
15209 
15210 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_retry_command: exit\n");
15211 }
15212 
15213 
15214 /*
15215  *    Function: sd_set_retry_bp
15216  *
15217  * Description: Set up the given bp for retry.
15218  *
15219  *   Arguments: un - ptr to associated softstate
15220  *		bp - ptr to buf(9S) for the command
15221  *		retry_delay - time interval before issuing retry (may be 0)
15222  *		statp - optional pointer to kstat function
15223  *
15224  *     Context: May be called under interrupt context
15225  */
15226 
15227 static void
15228 sd_set_retry_bp(struct sd_lun *un, struct buf *bp, clock_t retry_delay,
15229     void (*statp)(kstat_io_t *))
15230 {
15231 	ASSERT(un != NULL);
15232 	ASSERT(mutex_owned(SD_MUTEX(un)));
15233 	ASSERT(bp != NULL);
15234 
15235 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
15236 	    "sd_set_retry_bp: entry: un:0x%p bp:0x%p\n", un, bp);
15237 
15238 	/*
15239 	 * Indicate that the command is being retried. This will not allow any
15240 	 * other commands on the wait queue to be transported to the target
15241 	 * until this command has been completed (success or failure). The
15242 	 * "retry command" is not transported to the target until the given
15243 	 * time delay expires, unless the user specified a 0 retry_delay.
15244 	 *
15245 	 * Note: the timeout(9F) callback routine is what actually calls
15246 	 * sd_start_cmds() to transport the command, with the exception of a
15247 	 * zero retry_delay. The only current implementor of a zero retry delay
15248 	 * is the case where a START_STOP_UNIT is sent to spin-up a device.
15249 	 */
15250 	if (un->un_retry_bp == NULL) {
15251 		ASSERT(un->un_retry_statp == NULL);
15252 		un->un_retry_bp = bp;
15253 
15254 		/*
15255 		 * If the user has not specified a delay the command should
15256 		 * be queued and no timeout should be scheduled.
15257 		 */
15258 		if (retry_delay == 0) {
15259 			/*
15260 			 * Save the kstat pointer that will be used in the
15261 			 * call to SD_UPDATE_KSTATS() below, so that
15262 			 * sd_start_cmds() can correctly decrement the waitq
15263 			 * count when it is time to transport this command.
15264 			 */
15265 			un->un_retry_statp = statp;
15266 			goto done;
15267 		}
15268 	}
15269 
15270 	if (un->un_retry_bp == bp) {
15271 		/*
15272 		 * Save the kstat pointer that will be used in the call to
15273 		 * SD_UPDATE_KSTATS() below, so that sd_start_cmds() can
15274 		 * correctly decrement the waitq count when it is time to
15275 		 * transport this command.
15276 		 */
15277 		un->un_retry_statp = statp;
15278 
15279 		/*
15280 		 * Schedule a timeout if:
15281 		 *   1) The user has specified a delay.
15282 		 *   2) There is not a START_STOP_UNIT callback pending.
15283 		 *
15284 		 * If no delay has been specified, then it is up to the caller
15285 		 * to ensure that IO processing continues without stalling.
15286 		 * Effectively, this means that the caller will issue the
15287 		 * required call to sd_start_cmds(). The START_STOP_UNIT
15288 		 * callback does this after the START STOP UNIT command has
15289 		 * completed. In either of these cases we should not schedule
15290 		 * a timeout callback here.  Also don't schedule the timeout if
15291 		 * an SD_PATH_DIRECT_PRIORITY command is waiting to restart.
15292 		 */
15293 		if ((retry_delay != 0) && (un->un_startstop_timeid == NULL) &&
15294 		    (un->un_direct_priority_timeid == NULL)) {
15295 			un->un_retry_timeid =
15296 			    timeout(sd_start_retry_command, un, retry_delay);
15297 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15298 			    "sd_set_retry_bp: setting timeout: un: 0x%p"
15299 			    " bp:0x%p un_retry_timeid:0x%p\n",
15300 			    un, bp, un->un_retry_timeid);
15301 		}
15302 	} else {
15303 		/*
15304 		 * We only get in here if there is already another command
15305 		 * waiting to be retried.  In this case, we just put the
15306 		 * given command onto the wait queue, so it can be transported
15307 		 * after the current retry command has completed.
15308 		 *
15309 		 * Also we have to make sure that if the command at the head
15310 		 * of the wait queue is the un_failfast_bp, that we do not
15311 		 * put ahead of it any other commands that are to be retried.
15312 		 */
15313 		if ((un->un_failfast_bp != NULL) &&
15314 		    (un->un_failfast_bp == un->un_waitq_headp)) {
15315 			/*
15316 			 * Enqueue this command AFTER the first command on
15317 			 * the wait queue (which is also un_failfast_bp).
15318 			 */
15319 			bp->av_forw = un->un_waitq_headp->av_forw;
15320 			un->un_waitq_headp->av_forw = bp;
15321 			if (un->un_waitq_headp == un->un_waitq_tailp) {
15322 				un->un_waitq_tailp = bp;
15323 			}
15324 		} else {
15325 			/* Enqueue this command at the head of the waitq. */
15326 			bp->av_forw = un->un_waitq_headp;
15327 			un->un_waitq_headp = bp;
15328 			if (un->un_waitq_tailp == NULL) {
15329 				un->un_waitq_tailp = bp;
15330 			}
15331 		}
15332 
15333 		if (statp == NULL) {
15334 			statp = kstat_waitq_enter;
15335 		}
15336 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15337 		    "sd_set_retry_bp: un:0x%p already delayed retry\n", un);
15338 	}
15339 
15340 done:
15341 	if (statp != NULL) {
15342 		SD_UPDATE_KSTATS(un, statp, bp);
15343 	}
15344 
15345 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15346 	    "sd_set_retry_bp: exit un:0x%p\n", un);
15347 }
15348 
15349 
15350 /*
15351  *    Function: sd_start_retry_command
15352  *
15353  * Description: Start the command that has been waiting on the target's
15354  *		retry queue.  Called from timeout(9F) context after the
15355  *		retry delay interval has expired.
15356  *
15357  *   Arguments: arg - pointer to associated softstate for the device.
15358  *
15359  *     Context: timeout(9F) thread context.  May not sleep.
15360  */
15361 
15362 static void
15363 sd_start_retry_command(void *arg)
15364 {
15365 	struct sd_lun *un = arg;
15366 
15367 	ASSERT(un != NULL);
15368 	ASSERT(!mutex_owned(SD_MUTEX(un)));
15369 
15370 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15371 	    "sd_start_retry_command: entry\n");
15372 
15373 	mutex_enter(SD_MUTEX(un));
15374 
15375 	un->un_retry_timeid = NULL;
15376 
15377 	if (un->un_retry_bp != NULL) {
15378 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15379 		    "sd_start_retry_command: un:0x%p STARTING bp:0x%p\n",
15380 		    un, un->un_retry_bp);
15381 		sd_start_cmds(un, un->un_retry_bp);
15382 	}
15383 
15384 	mutex_exit(SD_MUTEX(un));
15385 
15386 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15387 	    "sd_start_retry_command: exit\n");
15388 }
15389 
15390 /*
15391  *    Function: sd_rmw_msg_print_handler
15392  *
15393  * Description: If RMW mode is enabled and warning message is triggered
15394  *              print I/O count during a fixed interval.
15395  *
15396  *   Arguments: arg - pointer to associated softstate for the device.
15397  *
15398  *     Context: timeout(9F) thread context. May not sleep.
15399  */
15400 static void
15401 sd_rmw_msg_print_handler(void *arg)
15402 {
15403 	struct sd_lun *un = arg;
15404 
15405 	ASSERT(un != NULL);
15406 	ASSERT(!mutex_owned(SD_MUTEX(un)));
15407 
15408 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15409 	    "sd_rmw_msg_print_handler: entry\n");
15410 
15411 	mutex_enter(SD_MUTEX(un));
15412 
15413 	if (un->un_rmw_incre_count > 0) {
15414 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
15415 		    "%"PRIu64" I/O requests are not aligned with %d disk "
15416 		    "sector size in %ld seconds. They are handled through "
15417 		    "Read Modify Write but the performance is very low!\n",
15418 		    un->un_rmw_incre_count, un->un_tgt_blocksize,
15419 		    drv_hztousec(SD_RMW_MSG_PRINT_TIMEOUT) / 1000000);
15420 		un->un_rmw_incre_count = 0;
15421 		un->un_rmw_msg_timeid = timeout(sd_rmw_msg_print_handler,
15422 		    un, SD_RMW_MSG_PRINT_TIMEOUT);
15423 	} else {
15424 		un->un_rmw_msg_timeid = NULL;
15425 	}
15426 
15427 	mutex_exit(SD_MUTEX(un));
15428 
15429 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15430 	    "sd_rmw_msg_print_handler: exit\n");
15431 }
15432 
15433 /*
15434  *    Function: sd_start_direct_priority_command
15435  *
15436  * Description: Used to re-start an SD_PATH_DIRECT_PRIORITY command that had
15437  *		received TRAN_BUSY when we called scsi_transport() to send it
15438  *		to the underlying HBA. This function is called from timeout(9F)
15439  *		context after the delay interval has expired.
15440  *
15441  *   Arguments: arg - pointer to associated buf(9S) to be restarted.
15442  *
15443  *     Context: timeout(9F) thread context.  May not sleep.
15444  */
15445 
15446 static void
15447 sd_start_direct_priority_command(void *arg)
15448 {
15449 	struct buf	*priority_bp = arg;
15450 	struct sd_lun	*un;
15451 
15452 	ASSERT(priority_bp != NULL);
15453 	un = SD_GET_UN(priority_bp);
15454 	ASSERT(un != NULL);
15455 	ASSERT(!mutex_owned(SD_MUTEX(un)));
15456 
15457 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15458 	    "sd_start_direct_priority_command: entry\n");
15459 
15460 	mutex_enter(SD_MUTEX(un));
15461 	un->un_direct_priority_timeid = NULL;
15462 	sd_start_cmds(un, priority_bp);
15463 	mutex_exit(SD_MUTEX(un));
15464 
15465 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15466 	    "sd_start_direct_priority_command: exit\n");
15467 }
15468 
15469 
15470 /*
15471  *    Function: sd_send_request_sense_command
15472  *
15473  * Description: Sends a REQUEST SENSE command to the target
15474  *
15475  *     Context: May be called from interrupt context.
15476  */
15477 
15478 static void
15479 sd_send_request_sense_command(struct sd_lun *un, struct buf *bp,
15480     struct scsi_pkt *pktp)
15481 {
15482 	ASSERT(bp != NULL);
15483 	ASSERT(un != NULL);
15484 	ASSERT(mutex_owned(SD_MUTEX(un)));
15485 
15486 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sd_send_request_sense_command: "
15487 	    "entry: buf:0x%p\n", bp);
15488 
15489 	/*
15490 	 * If we are syncing or dumping, then fail the command to avoid a
15491 	 * recursive callback into scsi_transport(). Also fail the command
15492 	 * if we are suspended (legacy behavior).
15493 	 */
15494 	if (ddi_in_panic() || (un->un_state == SD_STATE_SUSPENDED) ||
15495 	    (un->un_state == SD_STATE_DUMPING)) {
15496 		sd_return_failed_command(un, bp, EIO);
15497 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15498 		    "sd_send_request_sense_command: syncing/dumping, exit\n");
15499 		return;
15500 	}
15501 
15502 	/*
15503 	 * Retry the failed command and don't issue the request sense if:
15504 	 *    1) the sense buf is busy
15505 	 *    2) we have 1 or more outstanding commands on the target
15506 	 *    (the sense data will be cleared or invalidated any way)
15507 	 *
15508 	 * Note: There could be an issue with not checking a retry limit here,
15509 	 * the problem is determining which retry limit to check.
15510 	 */
15511 	if ((un->un_sense_isbusy != 0) || (un->un_ncmds_in_transport > 0)) {
15512 		/* Don't retry if the command is flagged as non-retryable */
15513 		if ((pktp->pkt_flags & FLAG_DIAGNOSE) == 0) {
15514 			sd_retry_command(un, bp, SD_RETRIES_NOCHECK,
15515 			    NULL, NULL, 0, un->un_busy_timeout,
15516 			    kstat_waitq_enter);
15517 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15518 			    "sd_send_request_sense_command: "
15519 			    "at full throttle, retrying exit\n");
15520 		} else {
15521 			sd_return_failed_command(un, bp, EIO);
15522 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15523 			    "sd_send_request_sense_command: "
15524 			    "at full throttle, non-retryable exit\n");
15525 		}
15526 		return;
15527 	}
15528 
15529 	sd_mark_rqs_busy(un, bp);
15530 	sd_start_cmds(un, un->un_rqs_bp);
15531 
15532 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15533 	    "sd_send_request_sense_command: exit\n");
15534 }
15535 
15536 
15537 /*
15538  *    Function: sd_mark_rqs_busy
15539  *
15540  * Description: Indicate that the request sense bp for this instance is
15541  *		in use.
15542  *
15543  *     Context: May be called under interrupt context
15544  */
15545 
15546 static void
15547 sd_mark_rqs_busy(struct sd_lun *un, struct buf *bp)
15548 {
15549 	struct sd_xbuf	*sense_xp;
15550 
15551 	ASSERT(un != NULL);
15552 	ASSERT(bp != NULL);
15553 	ASSERT(mutex_owned(SD_MUTEX(un)));
15554 	ASSERT(un->un_sense_isbusy == 0);
15555 
15556 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_mark_rqs_busy: entry: "
15557 	    "buf:0x%p xp:0x%p un:0x%p\n", bp, SD_GET_XBUF(bp), un);
15558 
15559 	sense_xp = SD_GET_XBUF(un->un_rqs_bp);
15560 	ASSERT(sense_xp != NULL);
15561 
15562 	SD_INFO(SD_LOG_IO, un,
15563 	    "sd_mark_rqs_busy: entry: sense_xp:0x%p\n", sense_xp);
15564 
15565 	ASSERT(sense_xp->xb_pktp != NULL);
15566 	ASSERT((sense_xp->xb_pktp->pkt_flags & (FLAG_SENSING | FLAG_HEAD))
15567 	    == (FLAG_SENSING | FLAG_HEAD));
15568 
15569 	un->un_sense_isbusy = 1;
15570 	un->un_rqs_bp->b_resid = 0;
15571 	sense_xp->xb_pktp->pkt_resid  = 0;
15572 	sense_xp->xb_pktp->pkt_reason = 0;
15573 
15574 	/* So we can get back the bp at interrupt time! */
15575 	sense_xp->xb_sense_bp = bp;
15576 
15577 	bzero(un->un_rqs_bp->b_un.b_addr, SENSE_LENGTH);
15578 
15579 	/*
15580 	 * Mark this buf as awaiting sense data. (This is already set in
15581 	 * the pkt_flags for the RQS packet.)
15582 	 */
15583 	((SD_GET_XBUF(bp))->xb_pktp)->pkt_flags |= FLAG_SENSING;
15584 
15585 	/* Request sense down same path */
15586 	if (scsi_pkt_allocated_correctly((SD_GET_XBUF(bp))->xb_pktp) &&
15587 	    ((SD_GET_XBUF(bp))->xb_pktp)->pkt_path_instance)
15588 		sense_xp->xb_pktp->pkt_path_instance =
15589 		    ((SD_GET_XBUF(bp))->xb_pktp)->pkt_path_instance;
15590 
15591 	sense_xp->xb_retry_count = 0;
15592 	sense_xp->xb_victim_retry_count = 0;
15593 	sense_xp->xb_ua_retry_count = 0;
15594 	sense_xp->xb_nr_retry_count = 0;
15595 	sense_xp->xb_dma_resid  = 0;
15596 
15597 	/* Clean up the fields for auto-request sense */
15598 	sense_xp->xb_sense_status = 0;
15599 	sense_xp->xb_sense_state = 0;
15600 	sense_xp->xb_sense_resid = 0;
15601 	bzero(sense_xp->xb_sense_data, sizeof (sense_xp->xb_sense_data));
15602 
15603 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_mark_rqs_busy: exit\n");
15604 }
15605 
15606 
15607 /*
15608  *    Function: sd_mark_rqs_idle
15609  *
15610  * Description: SD_MUTEX must be held continuously through this routine
15611  *		to prevent reuse of the rqs struct before the caller can
15612  *		complete it's processing.
15613  *
15614  * Return Code: Pointer to the RQS buf
15615  *
15616  *     Context: May be called under interrupt context
15617  */
15618 
15619 static struct buf *
15620 sd_mark_rqs_idle(struct sd_lun *un, struct sd_xbuf *sense_xp)
15621 {
15622 	struct buf *bp;
15623 	ASSERT(un != NULL);
15624 	ASSERT(sense_xp != NULL);
15625 	ASSERT(mutex_owned(SD_MUTEX(un)));
15626 	ASSERT(un->un_sense_isbusy != 0);
15627 
15628 	un->un_sense_isbusy = 0;
15629 	bp = sense_xp->xb_sense_bp;
15630 	sense_xp->xb_sense_bp = NULL;
15631 
15632 	/* This pkt is no longer interested in getting sense data */
15633 	((SD_GET_XBUF(bp))->xb_pktp)->pkt_flags &= ~FLAG_SENSING;
15634 
15635 	return (bp);
15636 }
15637 
15638 
15639 
15640 /*
15641  *    Function: sd_alloc_rqs
15642  *
15643  * Description: Set up the unit to receive auto request sense data
15644  *
15645  * Return Code: DDI_SUCCESS or DDI_FAILURE
15646  *
15647  *     Context: Called under attach(9E) context
15648  */
15649 
15650 static int
15651 sd_alloc_rqs(struct scsi_device *devp, struct sd_lun *un)
15652 {
15653 	struct sd_xbuf *xp;
15654 
15655 	ASSERT(un != NULL);
15656 	ASSERT(!mutex_owned(SD_MUTEX(un)));
15657 	ASSERT(un->un_rqs_bp == NULL);
15658 	ASSERT(un->un_rqs_pktp == NULL);
15659 
15660 	/*
15661 	 * First allocate the required buf and scsi_pkt structs, then set up
15662 	 * the CDB in the scsi_pkt for a REQUEST SENSE command.
15663 	 */
15664 	un->un_rqs_bp = scsi_alloc_consistent_buf(&devp->sd_address, NULL,
15665 	    MAX_SENSE_LENGTH, B_READ, SLEEP_FUNC, NULL);
15666 	if (un->un_rqs_bp == NULL) {
15667 		return (DDI_FAILURE);
15668 	}
15669 
15670 	un->un_rqs_pktp = scsi_init_pkt(&devp->sd_address, NULL, un->un_rqs_bp,
15671 	    CDB_GROUP0, 1, 0, PKT_CONSISTENT, SLEEP_FUNC, NULL);
15672 
15673 	if (un->un_rqs_pktp == NULL) {
15674 		sd_free_rqs(un);
15675 		return (DDI_FAILURE);
15676 	}
15677 
15678 	/* Set up the CDB in the scsi_pkt for a REQUEST SENSE command. */
15679 	(void) scsi_setup_cdb((union scsi_cdb *)un->un_rqs_pktp->pkt_cdbp,
15680 	    SCMD_REQUEST_SENSE, 0, MAX_SENSE_LENGTH, 0);
15681 
15682 	SD_FILL_SCSI1_LUN(un, un->un_rqs_pktp);
15683 
15684 	/* Set up the other needed members in the ARQ scsi_pkt. */
15685 	un->un_rqs_pktp->pkt_comp   = sdintr;
15686 	un->un_rqs_pktp->pkt_time   = sd_io_time;
15687 	un->un_rqs_pktp->pkt_flags |=
15688 	    (FLAG_SENSING | FLAG_HEAD);	/* (1222170) */
15689 
15690 	/*
15691 	 * Allocate  & init the sd_xbuf struct for the RQS command. Do not
15692 	 * provide any intpkt, destroypkt routines as we take care of
15693 	 * scsi_pkt allocation/freeing here and in sd_free_rqs().
15694 	 */
15695 	xp = kmem_alloc(sizeof (struct sd_xbuf), KM_SLEEP);
15696 	sd_xbuf_init(un, un->un_rqs_bp, xp, SD_CHAIN_NULL, NULL);
15697 	xp->xb_pktp = un->un_rqs_pktp;
15698 	SD_INFO(SD_LOG_ATTACH_DETACH, un,
15699 	    "sd_alloc_rqs: un 0x%p, rqs  xp 0x%p,  pkt 0x%p,  buf 0x%p\n",
15700 	    un, xp, un->un_rqs_pktp, un->un_rqs_bp);
15701 
15702 	/*
15703 	 * Save the pointer to the request sense private bp so it can
15704 	 * be retrieved in sdintr.
15705 	 */
15706 	un->un_rqs_pktp->pkt_private = un->un_rqs_bp;
15707 	ASSERT(un->un_rqs_bp->b_private == xp);
15708 
15709 	/*
15710 	 * See if the HBA supports auto-request sense for the specified
15711 	 * target/lun. If it does, then try to enable it (if not already
15712 	 * enabled).
15713 	 *
15714 	 * Note: For some HBAs (ifp & sf), scsi_ifsetcap will always return
15715 	 * failure, while for other HBAs (pln) scsi_ifsetcap will always
15716 	 * return success.  However, in both of these cases ARQ is always
15717 	 * enabled and scsi_ifgetcap will always return true. The best approach
15718 	 * is to issue the scsi_ifgetcap() first, then try the scsi_ifsetcap().
15719 	 *
15720 	 * The 3rd case is the HBA (adp) always return enabled on
15721 	 * scsi_ifgetgetcap even when it's not enable, the best approach
15722 	 * is issue a scsi_ifsetcap then a scsi_ifgetcap
15723 	 * Note: this case is to circumvent the Adaptec bug. (x86 only)
15724 	 */
15725 
15726 	if (un->un_f_is_fibre == TRUE) {
15727 		un->un_f_arq_enabled = TRUE;
15728 	} else {
15729 #if defined(__x86)
15730 		/*
15731 		 * Circumvent the Adaptec bug, remove this code when
15732 		 * the bug is fixed
15733 		 */
15734 		(void) scsi_ifsetcap(SD_ADDRESS(un), "auto-rqsense", 1, 1);
15735 #endif
15736 		switch (scsi_ifgetcap(SD_ADDRESS(un), "auto-rqsense", 1)) {
15737 		case 0:
15738 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
15739 			    "sd_alloc_rqs: HBA supports ARQ\n");
15740 			/*
15741 			 * ARQ is supported by this HBA but currently is not
15742 			 * enabled. Attempt to enable it and if successful then
15743 			 * mark this instance as ARQ enabled.
15744 			 */
15745 			if (scsi_ifsetcap(SD_ADDRESS(un), "auto-rqsense", 1, 1)
15746 			    == 1) {
15747 				/* Successfully enabled ARQ in the HBA */
15748 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
15749 				    "sd_alloc_rqs: ARQ enabled\n");
15750 				un->un_f_arq_enabled = TRUE;
15751 			} else {
15752 				/* Could not enable ARQ in the HBA */
15753 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
15754 				    "sd_alloc_rqs: failed ARQ enable\n");
15755 				un->un_f_arq_enabled = FALSE;
15756 			}
15757 			break;
15758 		case 1:
15759 			/*
15760 			 * ARQ is supported by this HBA and is already enabled.
15761 			 * Just mark ARQ as enabled for this instance.
15762 			 */
15763 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
15764 			    "sd_alloc_rqs: ARQ already enabled\n");
15765 			un->un_f_arq_enabled = TRUE;
15766 			break;
15767 		default:
15768 			/*
15769 			 * ARQ is not supported by this HBA; disable it for this
15770 			 * instance.
15771 			 */
15772 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
15773 			    "sd_alloc_rqs: HBA does not support ARQ\n");
15774 			un->un_f_arq_enabled = FALSE;
15775 			break;
15776 		}
15777 	}
15778 
15779 	return (DDI_SUCCESS);
15780 }
15781 
15782 
15783 /*
15784  *    Function: sd_free_rqs
15785  *
15786  * Description: Cleanup for the pre-instance RQS command.
15787  *
15788  *     Context: Kernel thread context
15789  */
15790 
15791 static void
15792 sd_free_rqs(struct sd_lun *un)
15793 {
15794 	ASSERT(un != NULL);
15795 
15796 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_free_rqs: entry\n");
15797 
15798 	/*
15799 	 * If consistent memory is bound to a scsi_pkt, the pkt
15800 	 * has to be destroyed *before* freeing the consistent memory.
15801 	 * Don't change the sequence of this operations.
15802 	 * scsi_destroy_pkt() might access memory, which isn't allowed,
15803 	 * after it was freed in scsi_free_consistent_buf().
15804 	 */
15805 	if (un->un_rqs_pktp != NULL) {
15806 		scsi_destroy_pkt(un->un_rqs_pktp);
15807 		un->un_rqs_pktp = NULL;
15808 	}
15809 
15810 	if (un->un_rqs_bp != NULL) {
15811 		struct sd_xbuf *xp = SD_GET_XBUF(un->un_rqs_bp);
15812 		if (xp != NULL) {
15813 			kmem_free(xp, sizeof (struct sd_xbuf));
15814 		}
15815 		scsi_free_consistent_buf(un->un_rqs_bp);
15816 		un->un_rqs_bp = NULL;
15817 	}
15818 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_free_rqs: exit\n");
15819 }
15820 
15821 
15822 
15823 /*
15824  *    Function: sd_reduce_throttle
15825  *
15826  * Description: Reduces the maximum # of outstanding commands on a
15827  *		target to the current number of outstanding commands.
15828  *		Queues a timeout(9F) callback to restore the limit
15829  *		after a specified interval has elapsed.
15830  *		Typically used when we get a TRAN_BUSY return code
15831  *		back from scsi_transport().
15832  *
15833  *   Arguments: un - ptr to the sd_lun softstate struct
15834  *		throttle_type: SD_THROTTLE_TRAN_BUSY or SD_THROTTLE_QFULL
15835  *
15836  *     Context: May be called from interrupt context
15837  */
15838 
15839 static void
15840 sd_reduce_throttle(struct sd_lun *un, int throttle_type)
15841 {
15842 	ASSERT(un != NULL);
15843 	ASSERT(mutex_owned(SD_MUTEX(un)));
15844 	ASSERT(un->un_ncmds_in_transport >= 0);
15845 
15846 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_reduce_throttle: "
15847 	    "entry: un:0x%p un_throttle:%d un_ncmds_in_transport:%d\n",
15848 	    un, un->un_throttle, un->un_ncmds_in_transport);
15849 
15850 	if (un->un_throttle > 1) {
15851 		if (un->un_f_use_adaptive_throttle == TRUE) {
15852 			switch (throttle_type) {
15853 			case SD_THROTTLE_TRAN_BUSY:
15854 				if (un->un_busy_throttle == 0) {
15855 					un->un_busy_throttle = un->un_throttle;
15856 				}
15857 				break;
15858 			case SD_THROTTLE_QFULL:
15859 				un->un_busy_throttle = 0;
15860 				break;
15861 			default:
15862 				ASSERT(FALSE);
15863 			}
15864 
15865 			if (un->un_ncmds_in_transport > 0) {
15866 				un->un_throttle = un->un_ncmds_in_transport;
15867 			}
15868 
15869 		} else {
15870 			if (un->un_ncmds_in_transport == 0) {
15871 				un->un_throttle = 1;
15872 			} else {
15873 				un->un_throttle = un->un_ncmds_in_transport;
15874 			}
15875 		}
15876 	}
15877 
15878 	/* Reschedule the timeout if none is currently active */
15879 	if (un->un_reset_throttle_timeid == NULL) {
15880 		un->un_reset_throttle_timeid = timeout(sd_restore_throttle,
15881 		    un, SD_THROTTLE_RESET_INTERVAL);
15882 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15883 		    "sd_reduce_throttle: timeout scheduled!\n");
15884 	}
15885 
15886 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_reduce_throttle: "
15887 	    "exit: un:0x%p un_throttle:%d\n", un, un->un_throttle);
15888 }
15889 
15890 
15891 
15892 /*
15893  *    Function: sd_restore_throttle
15894  *
15895  * Description: Callback function for timeout(9F).  Resets the current
15896  *		value of un->un_throttle to its default.
15897  *
15898  *   Arguments: arg - pointer to associated softstate for the device.
15899  *
15900  *     Context: May be called from interrupt context
15901  */
15902 
15903 static void
15904 sd_restore_throttle(void *arg)
15905 {
15906 	struct sd_lun	*un = arg;
15907 
15908 	ASSERT(un != NULL);
15909 	ASSERT(!mutex_owned(SD_MUTEX(un)));
15910 
15911 	mutex_enter(SD_MUTEX(un));
15912 
15913 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sd_restore_throttle: "
15914 	    "entry: un:0x%p un_throttle:%d\n", un, un->un_throttle);
15915 
15916 	un->un_reset_throttle_timeid = NULL;
15917 
15918 	if (un->un_f_use_adaptive_throttle == TRUE) {
15919 		/*
15920 		 * If un_busy_throttle is nonzero, then it contains the
15921 		 * value that un_throttle was when we got a TRAN_BUSY back
15922 		 * from scsi_transport(). We want to revert back to this
15923 		 * value.
15924 		 *
15925 		 * In the QFULL case, the throttle limit will incrementally
15926 		 * increase until it reaches max throttle.
15927 		 */
15928 		if (un->un_busy_throttle > 0) {
15929 			un->un_throttle = un->un_busy_throttle;
15930 			un->un_busy_throttle = 0;
15931 		} else {
15932 			/*
15933 			 * increase throttle by 10% open gate slowly, schedule
15934 			 * another restore if saved throttle has not been
15935 			 * reached
15936 			 */
15937 			short throttle;
15938 			if (sd_qfull_throttle_enable) {
15939 				throttle = un->un_throttle +
15940 				    max((un->un_throttle / 10), 1);
15941 				un->un_throttle =
15942 				    (throttle < un->un_saved_throttle) ?
15943 				    throttle : un->un_saved_throttle;
15944 				if (un->un_throttle < un->un_saved_throttle) {
15945 					un->un_reset_throttle_timeid =
15946 					    timeout(sd_restore_throttle,
15947 					    un,
15948 					    SD_QFULL_THROTTLE_RESET_INTERVAL);
15949 				}
15950 			}
15951 		}
15952 
15953 		/*
15954 		 * If un_throttle has fallen below the low-water mark, we
15955 		 * restore the maximum value here (and allow it to ratchet
15956 		 * down again if necessary).
15957 		 */
15958 		if (un->un_throttle < un->un_min_throttle) {
15959 			un->un_throttle = un->un_saved_throttle;
15960 		}
15961 	} else {
15962 		SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sd_restore_throttle: "
15963 		    "restoring limit from 0x%x to 0x%x\n",
15964 		    un->un_throttle, un->un_saved_throttle);
15965 		un->un_throttle = un->un_saved_throttle;
15966 	}
15967 
15968 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
15969 	    "sd_restore_throttle: calling sd_start_cmds!\n");
15970 
15971 	sd_start_cmds(un, NULL);
15972 
15973 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
15974 	    "sd_restore_throttle: exit: un:0x%p un_throttle:%d\n",
15975 	    un, un->un_throttle);
15976 
15977 	mutex_exit(SD_MUTEX(un));
15978 
15979 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sd_restore_throttle: exit\n");
15980 }
15981 
15982 /*
15983  *    Function: sdrunout
15984  *
15985  * Description: Callback routine for scsi_init_pkt when a resource allocation
15986  *		fails.
15987  *
15988  *   Arguments: arg - a pointer to the sd_lun unit struct for the particular
15989  *		soft state instance.
15990  *
15991  * Return Code: The scsi_init_pkt routine allows for the callback function to
15992  *		return a 0 indicating the callback should be rescheduled or a 1
15993  *		indicating not to reschedule. This routine always returns 1
15994  *		because the driver always provides a callback function to
15995  *		scsi_init_pkt. This results in a callback always being scheduled
15996  *		(via the scsi_init_pkt callback implementation) if a resource
15997  *		failure occurs.
15998  *
15999  *     Context: This callback function may not block or call routines that block
16000  *
16001  *        Note: Using the scsi_init_pkt callback facility can result in an I/O
16002  *		request persisting at the head of the list which cannot be
16003  *		satisfied even after multiple retries. In the future the driver
16004  *		may implement some time of maximum runout count before failing
16005  *		an I/O.
16006  */
16007 
16008 static int
16009 sdrunout(caddr_t arg)
16010 {
16011 	struct sd_lun	*un = (struct sd_lun *)arg;
16012 
16013 	ASSERT(un != NULL);
16014 	ASSERT(!mutex_owned(SD_MUTEX(un)));
16015 
16016 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sdrunout: entry\n");
16017 
16018 	mutex_enter(SD_MUTEX(un));
16019 	sd_start_cmds(un, NULL);
16020 	mutex_exit(SD_MUTEX(un));
16021 	/*
16022 	 * This callback routine always returns 1 (i.e. do not reschedule)
16023 	 * because we always specify sdrunout as the callback handler for
16024 	 * scsi_init_pkt inside the call to sd_start_cmds.
16025 	 */
16026 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sdrunout: exit\n");
16027 	return (1);
16028 }
16029 
16030 
16031 /*
16032  *    Function: sdintr
16033  *
16034  * Description: Completion callback routine for scsi_pkt(9S) structs
16035  *		sent to the HBA driver via scsi_transport(9F).
16036  *
16037  *     Context: Interrupt context
16038  */
16039 
16040 static void
16041 sdintr(struct scsi_pkt *pktp)
16042 {
16043 	struct buf	*bp;
16044 	struct sd_xbuf	*xp;
16045 	struct sd_lun	*un;
16046 	size_t		actual_len;
16047 	sd_ssc_t	*sscp;
16048 
16049 	ASSERT(pktp != NULL);
16050 	bp = (struct buf *)pktp->pkt_private;
16051 	ASSERT(bp != NULL);
16052 	xp = SD_GET_XBUF(bp);
16053 	ASSERT(xp != NULL);
16054 	ASSERT(xp->xb_pktp != NULL);
16055 	un = SD_GET_UN(bp);
16056 	ASSERT(un != NULL);
16057 	ASSERT(!mutex_owned(SD_MUTEX(un)));
16058 
16059 #ifdef SD_FAULT_INJECTION
16060 
16061 	SD_INFO(SD_LOG_IOERR, un, "sdintr: sdintr calling Fault injection\n");
16062 	/* SD FaultInjection */
16063 	sd_faultinjection(pktp);
16064 
16065 #endif /* SD_FAULT_INJECTION */
16066 
16067 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sdintr: entry: buf:0x%p,"
16068 	    " xp:0x%p, un:0x%p\n", bp, xp, un);
16069 
16070 	mutex_enter(SD_MUTEX(un));
16071 
16072 	ASSERT(un->un_fm_private != NULL);
16073 	sscp = &((struct sd_fm_internal *)(un->un_fm_private))->fm_ssc;
16074 	ASSERT(sscp != NULL);
16075 
16076 	/* Reduce the count of the #commands currently in transport */
16077 	un->un_ncmds_in_transport--;
16078 	ASSERT(un->un_ncmds_in_transport >= 0);
16079 
16080 	/* Increment counter to indicate that the callback routine is active */
16081 	un->un_in_callback++;
16082 
16083 	SD_UPDATE_KSTATS(un, kstat_runq_exit, bp);
16084 
16085 #ifdef	SDDEBUG
16086 	if (bp == un->un_retry_bp) {
16087 		SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sdintr: "
16088 		    "un:0x%p: GOT retry_bp:0x%p un_ncmds_in_transport:%d\n",
16089 		    un, un->un_retry_bp, un->un_ncmds_in_transport);
16090 	}
16091 #endif
16092 
16093 	/*
16094 	 * If pkt_reason is CMD_DEV_GONE, fail the command, and update the media
16095 	 * state if needed.
16096 	 */
16097 	if (pktp->pkt_reason == CMD_DEV_GONE) {
16098 		/* Prevent multiple console messages for the same failure. */
16099 		if (un->un_last_pkt_reason != CMD_DEV_GONE) {
16100 			un->un_last_pkt_reason = CMD_DEV_GONE;
16101 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
16102 			    "Command failed to complete...Device is gone\n");
16103 		}
16104 		if (un->un_mediastate != DKIO_DEV_GONE) {
16105 			un->un_mediastate = DKIO_DEV_GONE;
16106 			cv_broadcast(&un->un_state_cv);
16107 		}
16108 		/*
16109 		 * If the command happens to be the REQUEST SENSE command,
16110 		 * free up the rqs buf and fail the original command.
16111 		 */
16112 		if (bp == un->un_rqs_bp) {
16113 			bp = sd_mark_rqs_idle(un, xp);
16114 		}
16115 		sd_return_failed_command(un, bp, EIO);
16116 		goto exit;
16117 	}
16118 
16119 	if (pktp->pkt_state & STATE_XARQ_DONE) {
16120 		SD_TRACE(SD_LOG_COMMON, un,
16121 		    "sdintr: extra sense data received. pkt=%p\n", pktp);
16122 	}
16123 
16124 	/*
16125 	 * First see if the pkt has auto-request sense data with it....
16126 	 * Look at the packet state first so we don't take a performance
16127 	 * hit looking at the arq enabled flag unless absolutely necessary.
16128 	 */
16129 	if ((pktp->pkt_state & STATE_ARQ_DONE) &&
16130 	    (un->un_f_arq_enabled == TRUE)) {
16131 		/*
16132 		 * The HBA did an auto request sense for this command so check
16133 		 * for FLAG_DIAGNOSE. If set this indicates a uscsi or internal
16134 		 * driver command that should not be retried.
16135 		 */
16136 		if ((pktp->pkt_flags & FLAG_DIAGNOSE) != 0) {
16137 			/*
16138 			 * Save the relevant sense info into the xp for the
16139 			 * original cmd.
16140 			 */
16141 			struct scsi_arq_status *asp;
16142 			asp = (struct scsi_arq_status *)(pktp->pkt_scbp);
16143 			xp->xb_sense_status =
16144 			    *((uchar_t *)(&(asp->sts_rqpkt_status)));
16145 			xp->xb_sense_state  = asp->sts_rqpkt_state;
16146 			xp->xb_sense_resid  = asp->sts_rqpkt_resid;
16147 			if (pktp->pkt_state & STATE_XARQ_DONE) {
16148 				actual_len = MAX_SENSE_LENGTH -
16149 				    xp->xb_sense_resid;
16150 				bcopy(&asp->sts_sensedata, xp->xb_sense_data,
16151 				    MAX_SENSE_LENGTH);
16152 			} else {
16153 				if (xp->xb_sense_resid > SENSE_LENGTH) {
16154 					actual_len = MAX_SENSE_LENGTH -
16155 					    xp->xb_sense_resid;
16156 				} else {
16157 					actual_len = SENSE_LENGTH -
16158 					    xp->xb_sense_resid;
16159 				}
16160 				if (xp->xb_pkt_flags & SD_XB_USCSICMD) {
16161 					if ((((struct uscsi_cmd *)
16162 					    (xp->xb_pktinfo))->uscsi_rqlen) >
16163 					    actual_len) {
16164 						xp->xb_sense_resid =
16165 						    (((struct uscsi_cmd *)
16166 						    (xp->xb_pktinfo))->
16167 						    uscsi_rqlen) - actual_len;
16168 					} else {
16169 						xp->xb_sense_resid = 0;
16170 					}
16171 				}
16172 				bcopy(&asp->sts_sensedata, xp->xb_sense_data,
16173 				    SENSE_LENGTH);
16174 			}
16175 
16176 			/* fail the command */
16177 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16178 			    "sdintr: arq done and FLAG_DIAGNOSE set\n");
16179 			sd_return_failed_command(un, bp, EIO);
16180 			goto exit;
16181 		}
16182 
16183 #if (defined(__x86))	/* DMAFREE for x86 only */
16184 		/*
16185 		 * We want to either retry or fail this command, so free
16186 		 * the DMA resources here.  If we retry the command then
16187 		 * the DMA resources will be reallocated in sd_start_cmds().
16188 		 * Note that when PKT_DMA_PARTIAL is used, this reallocation
16189 		 * causes the *entire* transfer to start over again from the
16190 		 * beginning of the request, even for PARTIAL chunks that
16191 		 * have already transferred successfully.
16192 		 */
16193 		if ((un->un_f_is_fibre == TRUE) &&
16194 		    ((xp->xb_pkt_flags & SD_XB_USCSICMD) == 0) &&
16195 		    ((pktp->pkt_flags & FLAG_SENSING) == 0))  {
16196 			scsi_dmafree(pktp);
16197 			xp->xb_pkt_flags |= SD_XB_DMA_FREED;
16198 		}
16199 #endif
16200 
16201 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16202 		    "sdintr: arq done, sd_handle_auto_request_sense\n");
16203 
16204 		sd_handle_auto_request_sense(un, bp, xp, pktp);
16205 		goto exit;
16206 	}
16207 
16208 	/* Next see if this is the REQUEST SENSE pkt for the instance */
16209 	if (pktp->pkt_flags & FLAG_SENSING)  {
16210 		/* This pktp is from the unit's REQUEST_SENSE command */
16211 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16212 		    "sdintr: sd_handle_request_sense\n");
16213 		sd_handle_request_sense(un, bp, xp, pktp);
16214 		goto exit;
16215 	}
16216 
16217 	/*
16218 	 * Check to see if the command successfully completed as requested;
16219 	 * this is the most common case (and also the hot performance path).
16220 	 *
16221 	 * Requirements for successful completion are:
16222 	 * pkt_reason is CMD_CMPLT and packet status is status good.
16223 	 * In addition:
16224 	 * - A residual of zero indicates successful completion no matter what
16225 	 *   the command is.
16226 	 * - If the residual is not zero and the command is not a read or
16227 	 *   write, then it's still defined as successful completion. In other
16228 	 *   words, if the command is a read or write the residual must be
16229 	 *   zero for successful completion.
16230 	 * - If the residual is not zero and the command is a read or
16231 	 *   write, and it's a USCSICMD, then it's still defined as
16232 	 *   successful completion.
16233 	 */
16234 	if ((pktp->pkt_reason == CMD_CMPLT) &&
16235 	    (SD_GET_PKT_STATUS(pktp) == STATUS_GOOD)) {
16236 
16237 		/*
16238 		 * Return all USCSI commands on good status
16239 		 */
16240 		if (pktp->pkt_resid == 0) {
16241 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16242 			    "sdintr: returning command for resid == 0\n");
16243 		} else if (((SD_GET_PKT_OPCODE(pktp) & 0x1F) != SCMD_READ) &&
16244 		    ((SD_GET_PKT_OPCODE(pktp) & 0x1F) != SCMD_WRITE)) {
16245 			SD_UPDATE_B_RESID(bp, pktp);
16246 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16247 			    "sdintr: returning command for resid != 0\n");
16248 		} else if (xp->xb_pkt_flags & SD_XB_USCSICMD) {
16249 			SD_UPDATE_B_RESID(bp, pktp);
16250 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16251 			    "sdintr: returning uscsi command\n");
16252 		} else {
16253 			goto not_successful;
16254 		}
16255 		sd_return_command(un, bp);
16256 
16257 		/*
16258 		 * Decrement counter to indicate that the callback routine
16259 		 * is done.
16260 		 */
16261 		un->un_in_callback--;
16262 		ASSERT(un->un_in_callback >= 0);
16263 		mutex_exit(SD_MUTEX(un));
16264 
16265 		return;
16266 	}
16267 
16268 not_successful:
16269 
16270 #if (defined(__x86))	/* DMAFREE for x86 only */
16271 	/*
16272 	 * The following is based upon knowledge of the underlying transport
16273 	 * and its use of DMA resources.  This code should be removed when
16274 	 * PKT_DMA_PARTIAL support is taken out of the disk driver in favor
16275 	 * of the new PKT_CMD_BREAKUP protocol. See also sd_initpkt_for_buf()
16276 	 * and sd_start_cmds().
16277 	 *
16278 	 * Free any DMA resources associated with this command if there
16279 	 * is a chance it could be retried or enqueued for later retry.
16280 	 * If we keep the DMA binding then mpxio cannot reissue the
16281 	 * command on another path whenever a path failure occurs.
16282 	 *
16283 	 * Note that when PKT_DMA_PARTIAL is used, free/reallocation
16284 	 * causes the *entire* transfer to start over again from the
16285 	 * beginning of the request, even for PARTIAL chunks that
16286 	 * have already transferred successfully.
16287 	 *
16288 	 * This is only done for non-uscsi commands (and also skipped for the
16289 	 * driver's internal RQS command). Also just do this for Fibre Channel
16290 	 * devices as these are the only ones that support mpxio.
16291 	 */
16292 	if ((un->un_f_is_fibre == TRUE) &&
16293 	    ((xp->xb_pkt_flags & SD_XB_USCSICMD) == 0) &&
16294 	    ((pktp->pkt_flags & FLAG_SENSING) == 0))  {
16295 		scsi_dmafree(pktp);
16296 		xp->xb_pkt_flags |= SD_XB_DMA_FREED;
16297 	}
16298 #endif
16299 
16300 	/*
16301 	 * The command did not successfully complete as requested so check
16302 	 * for FLAG_DIAGNOSE. If set this indicates a uscsi or internal
16303 	 * driver command that should not be retried so just return. If
16304 	 * FLAG_DIAGNOSE is not set the error will be processed below.
16305 	 */
16306 	if ((pktp->pkt_flags & FLAG_DIAGNOSE) != 0) {
16307 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16308 		    "sdintr: FLAG_DIAGNOSE: sd_return_failed_command\n");
16309 		/*
16310 		 * Issue a request sense if a check condition caused the error
16311 		 * (we handle the auto request sense case above), otherwise
16312 		 * just fail the command.
16313 		 */
16314 		if ((pktp->pkt_reason == CMD_CMPLT) &&
16315 		    (SD_GET_PKT_STATUS(pktp) == STATUS_CHECK)) {
16316 			sd_send_request_sense_command(un, bp, pktp);
16317 		} else {
16318 			sd_return_failed_command(un, bp, EIO);
16319 		}
16320 		goto exit;
16321 	}
16322 
16323 	/*
16324 	 * The command did not successfully complete as requested so process
16325 	 * the error, retry, and/or attempt recovery.
16326 	 */
16327 	switch (pktp->pkt_reason) {
16328 	case CMD_CMPLT:
16329 		switch (SD_GET_PKT_STATUS(pktp)) {
16330 		case STATUS_GOOD:
16331 			/*
16332 			 * The command completed successfully with a non-zero
16333 			 * residual
16334 			 */
16335 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16336 			    "sdintr: STATUS_GOOD \n");
16337 			sd_pkt_status_good(un, bp, xp, pktp);
16338 			break;
16339 
16340 		case STATUS_CHECK:
16341 		case STATUS_TERMINATED:
16342 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16343 			    "sdintr: STATUS_TERMINATED | STATUS_CHECK\n");
16344 			sd_pkt_status_check_condition(un, bp, xp, pktp);
16345 			break;
16346 
16347 		case STATUS_BUSY:
16348 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16349 			    "sdintr: STATUS_BUSY\n");
16350 			sd_pkt_status_busy(un, bp, xp, pktp);
16351 			break;
16352 
16353 		case STATUS_RESERVATION_CONFLICT:
16354 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16355 			    "sdintr: STATUS_RESERVATION_CONFLICT\n");
16356 			sd_pkt_status_reservation_conflict(un, bp, xp, pktp);
16357 			break;
16358 
16359 		case STATUS_QFULL:
16360 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16361 			    "sdintr: STATUS_QFULL\n");
16362 			sd_pkt_status_qfull(un, bp, xp, pktp);
16363 			break;
16364 
16365 		case STATUS_MET:
16366 		case STATUS_INTERMEDIATE:
16367 		case STATUS_SCSI2:
16368 		case STATUS_INTERMEDIATE_MET:
16369 		case STATUS_ACA_ACTIVE:
16370 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
16371 			    "Unexpected SCSI status received: 0x%x\n",
16372 			    SD_GET_PKT_STATUS(pktp));
16373 			/*
16374 			 * Mark the ssc_flags when detected invalid status
16375 			 * code for non-USCSI command.
16376 			 */
16377 			if (!(xp->xb_pkt_flags & SD_XB_USCSICMD)) {
16378 				sd_ssc_set_info(sscp, SSC_FLAGS_INVALID_STATUS,
16379 				    0, "stat-code");
16380 			}
16381 			sd_return_failed_command(un, bp, EIO);
16382 			break;
16383 
16384 		default:
16385 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
16386 			    "Invalid SCSI status received: 0x%x\n",
16387 			    SD_GET_PKT_STATUS(pktp));
16388 			if (!(xp->xb_pkt_flags & SD_XB_USCSICMD)) {
16389 				sd_ssc_set_info(sscp, SSC_FLAGS_INVALID_STATUS,
16390 				    0, "stat-code");
16391 			}
16392 			sd_return_failed_command(un, bp, EIO);
16393 			break;
16394 
16395 		}
16396 		break;
16397 
16398 	case CMD_INCOMPLETE:
16399 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16400 		    "sdintr:  CMD_INCOMPLETE\n");
16401 		sd_pkt_reason_cmd_incomplete(un, bp, xp, pktp);
16402 		break;
16403 	case CMD_TRAN_ERR:
16404 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16405 		    "sdintr: CMD_TRAN_ERR\n");
16406 		sd_pkt_reason_cmd_tran_err(un, bp, xp, pktp);
16407 		break;
16408 	case CMD_RESET:
16409 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16410 		    "sdintr: CMD_RESET \n");
16411 		sd_pkt_reason_cmd_reset(un, bp, xp, pktp);
16412 		break;
16413 	case CMD_ABORTED:
16414 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16415 		    "sdintr: CMD_ABORTED \n");
16416 		sd_pkt_reason_cmd_aborted(un, bp, xp, pktp);
16417 		break;
16418 	case CMD_TIMEOUT:
16419 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16420 		    "sdintr: CMD_TIMEOUT\n");
16421 		sd_pkt_reason_cmd_timeout(un, bp, xp, pktp);
16422 		break;
16423 	case CMD_UNX_BUS_FREE:
16424 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16425 		    "sdintr: CMD_UNX_BUS_FREE \n");
16426 		sd_pkt_reason_cmd_unx_bus_free(un, bp, xp, pktp);
16427 		break;
16428 	case CMD_TAG_REJECT:
16429 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16430 		    "sdintr: CMD_TAG_REJECT\n");
16431 		sd_pkt_reason_cmd_tag_reject(un, bp, xp, pktp);
16432 		break;
16433 	default:
16434 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16435 		    "sdintr: default\n");
16436 		/*
16437 		 * Mark the ssc_flags for detecting invliad pkt_reason.
16438 		 */
16439 		if (!(xp->xb_pkt_flags & SD_XB_USCSICMD)) {
16440 			sd_ssc_set_info(sscp, SSC_FLAGS_INVALID_PKT_REASON,
16441 			    0, "pkt-reason");
16442 		}
16443 		sd_pkt_reason_default(un, bp, xp, pktp);
16444 		break;
16445 	}
16446 
16447 exit:
16448 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sdintr: exit\n");
16449 
16450 	/* Decrement counter to indicate that the callback routine is done. */
16451 	un->un_in_callback--;
16452 	ASSERT(un->un_in_callback >= 0);
16453 
16454 	/*
16455 	 * At this point, the pkt has been dispatched, ie, it is either
16456 	 * being re-tried or has been returned to its caller and should
16457 	 * not be referenced.
16458 	 */
16459 
16460 	mutex_exit(SD_MUTEX(un));
16461 }
16462 
16463 
16464 /*
16465  *    Function: sd_print_incomplete_msg
16466  *
16467  * Description: Prints the error message for a CMD_INCOMPLETE error.
16468  *
16469  *   Arguments: un - ptr to associated softstate for the device.
16470  *		bp - ptr to the buf(9S) for the command.
16471  *		arg - message string ptr
16472  *		code - SD_DELAYED_RETRY_ISSUED, SD_IMMEDIATE_RETRY_ISSUED,
16473  *			or SD_NO_RETRY_ISSUED.
16474  *
16475  *     Context: May be called under interrupt context
16476  */
16477 
16478 static void
16479 sd_print_incomplete_msg(struct sd_lun *un, struct buf *bp, void *arg, int code)
16480 {
16481 	struct scsi_pkt	*pktp;
16482 	char	*msgp;
16483 	char	*cmdp = arg;
16484 
16485 	ASSERT(un != NULL);
16486 	ASSERT(mutex_owned(SD_MUTEX(un)));
16487 	ASSERT(bp != NULL);
16488 	ASSERT(arg != NULL);
16489 	pktp = SD_GET_PKTP(bp);
16490 	ASSERT(pktp != NULL);
16491 
16492 	switch (code) {
16493 	case SD_DELAYED_RETRY_ISSUED:
16494 	case SD_IMMEDIATE_RETRY_ISSUED:
16495 		msgp = "retrying";
16496 		break;
16497 	case SD_NO_RETRY_ISSUED:
16498 	default:
16499 		msgp = "giving up";
16500 		break;
16501 	}
16502 
16503 	if ((pktp->pkt_flags & FLAG_SILENT) == 0) {
16504 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
16505 		    "incomplete %s- %s\n", cmdp, msgp);
16506 	}
16507 }
16508 
16509 
16510 
16511 /*
16512  *    Function: sd_pkt_status_good
16513  *
16514  * Description: Processing for a STATUS_GOOD code in pkt_status.
16515  *
16516  *     Context: May be called under interrupt context
16517  */
16518 
16519 static void
16520 sd_pkt_status_good(struct sd_lun *un, struct buf *bp,
16521     struct sd_xbuf *xp, struct scsi_pkt *pktp)
16522 {
16523 	char	*cmdp;
16524 
16525 	ASSERT(un != NULL);
16526 	ASSERT(mutex_owned(SD_MUTEX(un)));
16527 	ASSERT(bp != NULL);
16528 	ASSERT(xp != NULL);
16529 	ASSERT(pktp != NULL);
16530 	ASSERT(pktp->pkt_reason == CMD_CMPLT);
16531 	ASSERT(SD_GET_PKT_STATUS(pktp) == STATUS_GOOD);
16532 	ASSERT(pktp->pkt_resid != 0);
16533 
16534 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_pkt_status_good: entry\n");
16535 
16536 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
16537 	switch (SD_GET_PKT_OPCODE(pktp) & 0x1F) {
16538 	case SCMD_READ:
16539 		cmdp = "read";
16540 		break;
16541 	case SCMD_WRITE:
16542 		cmdp = "write";
16543 		break;
16544 	default:
16545 		SD_UPDATE_B_RESID(bp, pktp);
16546 		sd_return_command(un, bp);
16547 		SD_TRACE(SD_LOG_IO_CORE, un, "sd_pkt_status_good: exit\n");
16548 		return;
16549 	}
16550 
16551 	/*
16552 	 * See if we can retry the read/write, preferrably immediately.
16553 	 * If retries are exhaused, then sd_retry_command() will update
16554 	 * the b_resid count.
16555 	 */
16556 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_incomplete_msg,
16557 	    cmdp, EIO, (clock_t)0, NULL);
16558 
16559 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_pkt_status_good: exit\n");
16560 }
16561 
16562 
16563 
16564 
16565 
16566 /*
16567  *    Function: sd_handle_request_sense
16568  *
16569  * Description: Processing for non-auto Request Sense command.
16570  *
16571  *   Arguments: un - ptr to associated softstate
16572  *		sense_bp - ptr to buf(9S) for the RQS command
16573  *		sense_xp - ptr to the sd_xbuf for the RQS command
16574  *		sense_pktp - ptr to the scsi_pkt(9S) for the RQS command
16575  *
16576  *     Context: May be called under interrupt context
16577  */
16578 
16579 static void
16580 sd_handle_request_sense(struct sd_lun *un, struct buf *sense_bp,
16581     struct sd_xbuf *sense_xp, struct scsi_pkt *sense_pktp)
16582 {
16583 	struct buf	*cmd_bp;	/* buf for the original command */
16584 	struct sd_xbuf	*cmd_xp;	/* sd_xbuf for the original command */
16585 	struct scsi_pkt *cmd_pktp;	/* pkt for the original command */
16586 	size_t		actual_len;	/* actual sense data length */
16587 
16588 	ASSERT(un != NULL);
16589 	ASSERT(mutex_owned(SD_MUTEX(un)));
16590 	ASSERT(sense_bp != NULL);
16591 	ASSERT(sense_xp != NULL);
16592 	ASSERT(sense_pktp != NULL);
16593 
16594 	/*
16595 	 * Note the sense_bp, sense_xp, and sense_pktp here are for the
16596 	 * RQS command and not the original command.
16597 	 */
16598 	ASSERT(sense_pktp == un->un_rqs_pktp);
16599 	ASSERT(sense_bp   == un->un_rqs_bp);
16600 	ASSERT((sense_pktp->pkt_flags & (FLAG_SENSING | FLAG_HEAD)) ==
16601 	    (FLAG_SENSING | FLAG_HEAD));
16602 	ASSERT((((SD_GET_XBUF(sense_xp->xb_sense_bp))->xb_pktp->pkt_flags) &
16603 	    FLAG_SENSING) == FLAG_SENSING);
16604 
16605 	/* These are the bp, xp, and pktp for the original command */
16606 	cmd_bp = sense_xp->xb_sense_bp;
16607 	cmd_xp = SD_GET_XBUF(cmd_bp);
16608 	cmd_pktp = SD_GET_PKTP(cmd_bp);
16609 
16610 	if (sense_pktp->pkt_reason != CMD_CMPLT) {
16611 		/*
16612 		 * The REQUEST SENSE command failed.  Release the REQUEST
16613 		 * SENSE command for re-use, get back the bp for the original
16614 		 * command, and attempt to re-try the original command if
16615 		 * FLAG_DIAGNOSE is not set in the original packet.
16616 		 */
16617 		SD_UPDATE_ERRSTATS(un, sd_harderrs);
16618 		if ((cmd_pktp->pkt_flags & FLAG_DIAGNOSE) == 0) {
16619 			cmd_bp = sd_mark_rqs_idle(un, sense_xp);
16620 			sd_retry_command(un, cmd_bp, SD_RETRIES_STANDARD,
16621 			    NULL, NULL, EIO, (clock_t)0, NULL);
16622 			return;
16623 		}
16624 	}
16625 
16626 	/*
16627 	 * Save the relevant sense info into the xp for the original cmd.
16628 	 *
16629 	 * Note: if the request sense failed the state info will be zero
16630 	 * as set in sd_mark_rqs_busy()
16631 	 */
16632 	cmd_xp->xb_sense_status = *(sense_pktp->pkt_scbp);
16633 	cmd_xp->xb_sense_state  = sense_pktp->pkt_state;
16634 	actual_len = MAX_SENSE_LENGTH - sense_pktp->pkt_resid;
16635 	if ((cmd_xp->xb_pkt_flags & SD_XB_USCSICMD) &&
16636 	    (((struct uscsi_cmd *)cmd_xp->xb_pktinfo)->uscsi_rqlen >
16637 	    SENSE_LENGTH)) {
16638 		bcopy(sense_bp->b_un.b_addr, cmd_xp->xb_sense_data,
16639 		    MAX_SENSE_LENGTH);
16640 		cmd_xp->xb_sense_resid = sense_pktp->pkt_resid;
16641 	} else {
16642 		bcopy(sense_bp->b_un.b_addr, cmd_xp->xb_sense_data,
16643 		    SENSE_LENGTH);
16644 		if (actual_len < SENSE_LENGTH) {
16645 			cmd_xp->xb_sense_resid = SENSE_LENGTH - actual_len;
16646 		} else {
16647 			cmd_xp->xb_sense_resid = 0;
16648 		}
16649 	}
16650 
16651 	/*
16652 	 *  Free up the RQS command....
16653 	 *  NOTE:
16654 	 *	Must do this BEFORE calling sd_validate_sense_data!
16655 	 *	sd_validate_sense_data may return the original command in
16656 	 *	which case the pkt will be freed and the flags can no
16657 	 *	longer be touched.
16658 	 *	SD_MUTEX is held through this process until the command
16659 	 *	is dispatched based upon the sense data, so there are
16660 	 *	no race conditions.
16661 	 */
16662 	(void) sd_mark_rqs_idle(un, sense_xp);
16663 
16664 	/*
16665 	 * For a retryable command see if we have valid sense data, if so then
16666 	 * turn it over to sd_decode_sense() to figure out the right course of
16667 	 * action. Just fail a non-retryable command.
16668 	 */
16669 	if ((cmd_pktp->pkt_flags & FLAG_DIAGNOSE) == 0) {
16670 		if (sd_validate_sense_data(un, cmd_bp, cmd_xp, actual_len) ==
16671 		    SD_SENSE_DATA_IS_VALID) {
16672 			sd_decode_sense(un, cmd_bp, cmd_xp, cmd_pktp);
16673 		}
16674 	} else {
16675 		SD_DUMP_MEMORY(un, SD_LOG_IO_CORE, "Failed CDB",
16676 		    (uchar_t *)cmd_pktp->pkt_cdbp, CDB_SIZE, SD_LOG_HEX);
16677 		SD_DUMP_MEMORY(un, SD_LOG_IO_CORE, "Sense Data",
16678 		    (uchar_t *)cmd_xp->xb_sense_data, SENSE_LENGTH, SD_LOG_HEX);
16679 		sd_return_failed_command(un, cmd_bp, EIO);
16680 	}
16681 }
16682 
16683 
16684 
16685 
16686 /*
16687  *    Function: sd_handle_auto_request_sense
16688  *
16689  * Description: Processing for auto-request sense information.
16690  *
16691  *   Arguments: un - ptr to associated softstate
16692  *		bp - ptr to buf(9S) for the command
16693  *		xp - ptr to the sd_xbuf for the command
16694  *		pktp - ptr to the scsi_pkt(9S) for the command
16695  *
16696  *     Context: May be called under interrupt context
16697  */
16698 
16699 static void
16700 sd_handle_auto_request_sense(struct sd_lun *un, struct buf *bp,
16701     struct sd_xbuf *xp, struct scsi_pkt *pktp)
16702 {
16703 	struct scsi_arq_status *asp;
16704 	size_t actual_len;
16705 
16706 	ASSERT(un != NULL);
16707 	ASSERT(mutex_owned(SD_MUTEX(un)));
16708 	ASSERT(bp != NULL);
16709 	ASSERT(xp != NULL);
16710 	ASSERT(pktp != NULL);
16711 	ASSERT(pktp != un->un_rqs_pktp);
16712 	ASSERT(bp   != un->un_rqs_bp);
16713 
16714 	/*
16715 	 * For auto-request sense, we get a scsi_arq_status back from
16716 	 * the HBA, with the sense data in the sts_sensedata member.
16717 	 * The pkt_scbp of the packet points to this scsi_arq_status.
16718 	 */
16719 	asp = (struct scsi_arq_status *)(pktp->pkt_scbp);
16720 
16721 	if (asp->sts_rqpkt_reason != CMD_CMPLT) {
16722 		/*
16723 		 * The auto REQUEST SENSE failed; see if we can re-try
16724 		 * the original command.
16725 		 */
16726 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
16727 		    "auto request sense failed (reason=%s)\n",
16728 		    scsi_rname(asp->sts_rqpkt_reason));
16729 
16730 		sd_reset_target(un, pktp);
16731 
16732 		sd_retry_command(un, bp, SD_RETRIES_STANDARD,
16733 		    NULL, NULL, EIO, (clock_t)0, NULL);
16734 		return;
16735 	}
16736 
16737 	/* Save the relevant sense info into the xp for the original cmd. */
16738 	xp->xb_sense_status = *((uchar_t *)(&(asp->sts_rqpkt_status)));
16739 	xp->xb_sense_state  = asp->sts_rqpkt_state;
16740 	xp->xb_sense_resid  = asp->sts_rqpkt_resid;
16741 	if (xp->xb_sense_state & STATE_XARQ_DONE) {
16742 		actual_len = MAX_SENSE_LENGTH - xp->xb_sense_resid;
16743 		bcopy(&asp->sts_sensedata, xp->xb_sense_data,
16744 		    MAX_SENSE_LENGTH);
16745 	} else {
16746 		if (xp->xb_sense_resid > SENSE_LENGTH) {
16747 			actual_len = MAX_SENSE_LENGTH - xp->xb_sense_resid;
16748 		} else {
16749 			actual_len = SENSE_LENGTH - xp->xb_sense_resid;
16750 		}
16751 		if (xp->xb_pkt_flags & SD_XB_USCSICMD) {
16752 			if ((((struct uscsi_cmd *)
16753 			    (xp->xb_pktinfo))->uscsi_rqlen) > actual_len) {
16754 				xp->xb_sense_resid = (((struct uscsi_cmd *)
16755 				    (xp->xb_pktinfo))->uscsi_rqlen) -
16756 				    actual_len;
16757 			} else {
16758 				xp->xb_sense_resid = 0;
16759 			}
16760 		}
16761 		bcopy(&asp->sts_sensedata, xp->xb_sense_data, SENSE_LENGTH);
16762 	}
16763 
16764 	/*
16765 	 * See if we have valid sense data, if so then turn it over to
16766 	 * sd_decode_sense() to figure out the right course of action.
16767 	 */
16768 	if (sd_validate_sense_data(un, bp, xp, actual_len) ==
16769 	    SD_SENSE_DATA_IS_VALID) {
16770 		sd_decode_sense(un, bp, xp, pktp);
16771 	}
16772 }
16773 
16774 
16775 /*
16776  *    Function: sd_print_sense_failed_msg
16777  *
16778  * Description: Print log message when RQS has failed.
16779  *
16780  *   Arguments: un - ptr to associated softstate
16781  *		bp - ptr to buf(9S) for the command
16782  *		arg - generic message string ptr
16783  *		code - SD_IMMEDIATE_RETRY_ISSUED, SD_DELAYED_RETRY_ISSUED,
16784  *			or SD_NO_RETRY_ISSUED
16785  *
16786  *     Context: May be called from interrupt context
16787  */
16788 
16789 static void
16790 sd_print_sense_failed_msg(struct sd_lun *un, struct buf *bp, void *arg,
16791     int code)
16792 {
16793 	char	*msgp = arg;
16794 
16795 	ASSERT(un != NULL);
16796 	ASSERT(mutex_owned(SD_MUTEX(un)));
16797 	ASSERT(bp != NULL);
16798 
16799 	if ((code == SD_NO_RETRY_ISSUED) && (msgp != NULL)) {
16800 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN, msgp);
16801 	}
16802 }
16803 
16804 
16805 /*
16806  *    Function: sd_validate_sense_data
16807  *
16808  * Description: Check the given sense data for validity.
16809  *		If the sense data is not valid, the command will
16810  *		be either failed or retried!
16811  *
16812  * Return Code: SD_SENSE_DATA_IS_INVALID
16813  *		SD_SENSE_DATA_IS_VALID
16814  *
16815  *     Context: May be called from interrupt context
16816  */
16817 
16818 static int
16819 sd_validate_sense_data(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
16820     size_t actual_len)
16821 {
16822 	struct scsi_extended_sense *esp;
16823 	struct	scsi_pkt *pktp;
16824 	char	*msgp = NULL;
16825 	sd_ssc_t *sscp;
16826 
16827 	ASSERT(un != NULL);
16828 	ASSERT(mutex_owned(SD_MUTEX(un)));
16829 	ASSERT(bp != NULL);
16830 	ASSERT(bp != un->un_rqs_bp);
16831 	ASSERT(xp != NULL);
16832 	ASSERT(un->un_fm_private != NULL);
16833 
16834 	pktp = SD_GET_PKTP(bp);
16835 	ASSERT(pktp != NULL);
16836 
16837 	sscp = &((struct sd_fm_internal *)(un->un_fm_private))->fm_ssc;
16838 	ASSERT(sscp != NULL);
16839 
16840 	/*
16841 	 * Check the status of the RQS command (auto or manual).
16842 	 */
16843 	switch (xp->xb_sense_status & STATUS_MASK) {
16844 	case STATUS_GOOD:
16845 		break;
16846 
16847 	case STATUS_RESERVATION_CONFLICT:
16848 		sd_pkt_status_reservation_conflict(un, bp, xp, pktp);
16849 		return (SD_SENSE_DATA_IS_INVALID);
16850 
16851 	case STATUS_BUSY:
16852 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
16853 		    "Busy Status on REQUEST SENSE\n");
16854 		sd_retry_command(un, bp, SD_RETRIES_BUSY, NULL,
16855 		    NULL, EIO, un->un_busy_timeout / 500, kstat_waitq_enter);
16856 		return (SD_SENSE_DATA_IS_INVALID);
16857 
16858 	case STATUS_QFULL:
16859 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
16860 		    "QFULL Status on REQUEST SENSE\n");
16861 		sd_retry_command(un, bp, SD_RETRIES_STANDARD, NULL,
16862 		    NULL, EIO, un->un_busy_timeout / 500, kstat_waitq_enter);
16863 		return (SD_SENSE_DATA_IS_INVALID);
16864 
16865 	case STATUS_CHECK:
16866 	case STATUS_TERMINATED:
16867 		msgp = "Check Condition on REQUEST SENSE\n";
16868 		goto sense_failed;
16869 
16870 	default:
16871 		msgp = "Not STATUS_GOOD on REQUEST_SENSE\n";
16872 		goto sense_failed;
16873 	}
16874 
16875 	/*
16876 	 * See if we got the minimum required amount of sense data.
16877 	 * Note: We are assuming the returned sense data is SENSE_LENGTH bytes
16878 	 * or less.
16879 	 */
16880 	if (((xp->xb_sense_state & STATE_XFERRED_DATA) == 0) ||
16881 	    (actual_len == 0)) {
16882 		msgp = "Request Sense couldn't get sense data\n";
16883 		goto sense_failed;
16884 	}
16885 
16886 	if (actual_len < SUN_MIN_SENSE_LENGTH) {
16887 		msgp = "Not enough sense information\n";
16888 		/* Mark the ssc_flags for detecting invalid sense data */
16889 		if (!(xp->xb_pkt_flags & SD_XB_USCSICMD)) {
16890 			sd_ssc_set_info(sscp, SSC_FLAGS_INVALID_SENSE, 0,
16891 			    "sense-data");
16892 		}
16893 		goto sense_failed;
16894 	}
16895 
16896 	/*
16897 	 * We require the extended sense data
16898 	 */
16899 	esp = (struct scsi_extended_sense *)xp->xb_sense_data;
16900 	if (esp->es_class != CLASS_EXTENDED_SENSE) {
16901 		if ((pktp->pkt_flags & FLAG_SILENT) == 0) {
16902 			static char tmp[8];
16903 			static char buf[148];
16904 			char *p = (char *)(xp->xb_sense_data);
16905 			int i;
16906 
16907 			mutex_enter(&sd_sense_mutex);
16908 			(void) strcpy(buf, "undecodable sense information:");
16909 			for (i = 0; i < actual_len; i++) {
16910 				(void) sprintf(tmp, " 0x%x", *(p++) & 0xff);
16911 				(void) strcpy(&buf[strlen(buf)], tmp);
16912 			}
16913 			i = strlen(buf);
16914 			(void) strcpy(&buf[i], "-(assumed fatal)\n");
16915 
16916 			if (SD_FM_LOG(un) == SD_FM_LOG_NSUP) {
16917 				scsi_log(SD_DEVINFO(un), sd_label,
16918 				    CE_WARN, buf);
16919 			}
16920 			mutex_exit(&sd_sense_mutex);
16921 		}
16922 
16923 		/* Mark the ssc_flags for detecting invalid sense data */
16924 		if (!(xp->xb_pkt_flags & SD_XB_USCSICMD)) {
16925 			sd_ssc_set_info(sscp, SSC_FLAGS_INVALID_SENSE, 0,
16926 			    "sense-data");
16927 		}
16928 
16929 		/* Note: Legacy behavior, fail the command with no retry */
16930 		sd_return_failed_command(un, bp, EIO);
16931 		return (SD_SENSE_DATA_IS_INVALID);
16932 	}
16933 
16934 	/*
16935 	 * Check that es_code is valid (es_class concatenated with es_code
16936 	 * make up the "response code" field.  es_class will always be 7, so
16937 	 * make sure es_code is 0, 1, 2, 3 or 0xf.  es_code will indicate the
16938 	 * format.
16939 	 */
16940 	if ((esp->es_code != CODE_FMT_FIXED_CURRENT) &&
16941 	    (esp->es_code != CODE_FMT_FIXED_DEFERRED) &&
16942 	    (esp->es_code != CODE_FMT_DESCR_CURRENT) &&
16943 	    (esp->es_code != CODE_FMT_DESCR_DEFERRED) &&
16944 	    (esp->es_code != CODE_FMT_VENDOR_SPECIFIC)) {
16945 		/* Mark the ssc_flags for detecting invalid sense data */
16946 		if (!(xp->xb_pkt_flags & SD_XB_USCSICMD)) {
16947 			sd_ssc_set_info(sscp, SSC_FLAGS_INVALID_SENSE, 0,
16948 			    "sense-data");
16949 		}
16950 		goto sense_failed;
16951 	}
16952 
16953 	return (SD_SENSE_DATA_IS_VALID);
16954 
16955 sense_failed:
16956 	/*
16957 	 * If the request sense failed (for whatever reason), attempt
16958 	 * to retry the original command.
16959 	 */
16960 	/*
16961 	 * The SD_RETRY_DELAY value need to be adjusted here
16962 	 * when SD_RETRY_DELAY change in sddef.h
16963 	 */
16964 	sd_retry_command(un, bp, SD_RETRIES_STANDARD,
16965 	    sd_print_sense_failed_msg, msgp, EIO,
16966 	    un->un_f_is_fibre ? drv_usectohz(100000) : (clock_t)0, NULL);
16967 
16968 	return (SD_SENSE_DATA_IS_INVALID);
16969 }
16970 
16971 /*
16972  *    Function: sd_decode_sense
16973  *
16974  * Description: Take recovery action(s) when SCSI Sense Data is received.
16975  *
16976  *     Context: Interrupt context.
16977  */
16978 
16979 static void
16980 sd_decode_sense(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
16981     struct scsi_pkt *pktp)
16982 {
16983 	uint8_t sense_key;
16984 
16985 	ASSERT(un != NULL);
16986 	ASSERT(mutex_owned(SD_MUTEX(un)));
16987 	ASSERT(bp != NULL);
16988 	ASSERT(bp != un->un_rqs_bp);
16989 	ASSERT(xp != NULL);
16990 	ASSERT(pktp != NULL);
16991 
16992 	sense_key = scsi_sense_key(xp->xb_sense_data);
16993 
16994 	switch (sense_key) {
16995 	case KEY_NO_SENSE:
16996 		sd_sense_key_no_sense(un, bp, xp, pktp);
16997 		break;
16998 	case KEY_RECOVERABLE_ERROR:
16999 		sd_sense_key_recoverable_error(un, xp->xb_sense_data,
17000 		    bp, xp, pktp);
17001 		break;
17002 	case KEY_NOT_READY:
17003 		sd_sense_key_not_ready(un, xp->xb_sense_data,
17004 		    bp, xp, pktp);
17005 		break;
17006 	case KEY_MEDIUM_ERROR:
17007 	case KEY_HARDWARE_ERROR:
17008 		sd_sense_key_medium_or_hardware_error(un,
17009 		    xp->xb_sense_data, bp, xp, pktp);
17010 		break;
17011 	case KEY_ILLEGAL_REQUEST:
17012 		sd_sense_key_illegal_request(un, bp, xp, pktp);
17013 		break;
17014 	case KEY_UNIT_ATTENTION:
17015 		sd_sense_key_unit_attention(un, xp->xb_sense_data,
17016 		    bp, xp, pktp);
17017 		break;
17018 	case KEY_WRITE_PROTECT:
17019 	case KEY_VOLUME_OVERFLOW:
17020 	case KEY_MISCOMPARE:
17021 		sd_sense_key_fail_command(un, bp, xp, pktp);
17022 		break;
17023 	case KEY_BLANK_CHECK:
17024 		sd_sense_key_blank_check(un, bp, xp, pktp);
17025 		break;
17026 	case KEY_ABORTED_COMMAND:
17027 		sd_sense_key_aborted_command(un, bp, xp, pktp);
17028 		break;
17029 	case KEY_VENDOR_UNIQUE:
17030 	case KEY_COPY_ABORTED:
17031 	case KEY_EQUAL:
17032 	case KEY_RESERVED:
17033 	default:
17034 		sd_sense_key_default(un, xp->xb_sense_data,
17035 		    bp, xp, pktp);
17036 		break;
17037 	}
17038 }
17039 
17040 
17041 /*
17042  *    Function: sd_dump_memory
17043  *
17044  * Description: Debug logging routine to print the contents of a user provided
17045  *		buffer. The output of the buffer is broken up into 256 byte
17046  *		segments due to a size constraint of the scsi_log.
17047  *		implementation.
17048  *
17049  *   Arguments: un - ptr to softstate
17050  *		comp - component mask
17051  *		title - "title" string to preceed data when printed
17052  *		data - ptr to data block to be printed
17053  *		len - size of data block to be printed
17054  *		fmt - SD_LOG_HEX (use 0x%02x format) or SD_LOG_CHAR (use %c)
17055  *
17056  *     Context: May be called from interrupt context
17057  */
17058 
17059 #define	SD_DUMP_MEMORY_BUF_SIZE	256
17060 
17061 static char *sd_dump_format_string[] = {
17062 		" 0x%02x",
17063 		" %c"
17064 };
17065 
17066 static void
17067 sd_dump_memory(struct sd_lun *un, uint_t comp, char *title, uchar_t *data,
17068     int len, int fmt)
17069 {
17070 	int	i, j;
17071 	int	avail_count;
17072 	int	start_offset;
17073 	int	end_offset;
17074 	size_t	entry_len;
17075 	char	*bufp;
17076 	char	*local_buf;
17077 	char	*format_string;
17078 
17079 	ASSERT((fmt == SD_LOG_HEX) || (fmt == SD_LOG_CHAR));
17080 
17081 	/*
17082 	 * In the debug version of the driver, this function is called from a
17083 	 * number of places which are NOPs in the release driver.
17084 	 * The debug driver therefore has additional methods of filtering
17085 	 * debug output.
17086 	 */
17087 #ifdef SDDEBUG
17088 	/*
17089 	 * In the debug version of the driver we can reduce the amount of debug
17090 	 * messages by setting sd_error_level to something other than
17091 	 * SCSI_ERR_ALL and clearing bits in sd_level_mask and
17092 	 * sd_component_mask.
17093 	 */
17094 	if (((sd_level_mask & (SD_LOGMASK_DUMP_MEM | SD_LOGMASK_DIAG)) == 0) ||
17095 	    (sd_error_level != SCSI_ERR_ALL)) {
17096 		return;
17097 	}
17098 	if (((sd_component_mask & comp) == 0) ||
17099 	    (sd_error_level != SCSI_ERR_ALL)) {
17100 		return;
17101 	}
17102 #else
17103 	if (sd_error_level != SCSI_ERR_ALL) {
17104 		return;
17105 	}
17106 #endif
17107 
17108 	local_buf = kmem_zalloc(SD_DUMP_MEMORY_BUF_SIZE, KM_SLEEP);
17109 	bufp = local_buf;
17110 	/*
17111 	 * Available length is the length of local_buf[], minus the
17112 	 * length of the title string, minus one for the ":", minus
17113 	 * one for the newline, minus one for the NULL terminator.
17114 	 * This gives the #bytes available for holding the printed
17115 	 * values from the given data buffer.
17116 	 */
17117 	if (fmt == SD_LOG_HEX) {
17118 		format_string = sd_dump_format_string[0];
17119 	} else /* SD_LOG_CHAR */ {
17120 		format_string = sd_dump_format_string[1];
17121 	}
17122 	/*
17123 	 * Available count is the number of elements from the given
17124 	 * data buffer that we can fit into the available length.
17125 	 * This is based upon the size of the format string used.
17126 	 * Make one entry and find it's size.
17127 	 */
17128 	(void) sprintf(bufp, format_string, data[0]);
17129 	entry_len = strlen(bufp);
17130 	avail_count = (SD_DUMP_MEMORY_BUF_SIZE - strlen(title) - 3) / entry_len;
17131 
17132 	j = 0;
17133 	while (j < len) {
17134 		bufp = local_buf;
17135 		bzero(bufp, SD_DUMP_MEMORY_BUF_SIZE);
17136 		start_offset = j;
17137 
17138 		end_offset = start_offset + avail_count;
17139 
17140 		(void) sprintf(bufp, "%s:", title);
17141 		bufp += strlen(bufp);
17142 		for (i = start_offset; ((i < end_offset) && (j < len));
17143 		    i++, j++) {
17144 			(void) sprintf(bufp, format_string, data[i]);
17145 			bufp += entry_len;
17146 		}
17147 		(void) sprintf(bufp, "\n");
17148 
17149 		scsi_log(SD_DEVINFO(un), sd_label, CE_NOTE, "%s", local_buf);
17150 	}
17151 	kmem_free(local_buf, SD_DUMP_MEMORY_BUF_SIZE);
17152 }
17153 
17154 /*
17155  *    Function: sd_print_sense_msg
17156  *
17157  * Description: Log a message based upon the given sense data.
17158  *
17159  *   Arguments: un - ptr to associated softstate
17160  *		bp - ptr to buf(9S) for the command
17161  *		arg - ptr to associate sd_sense_info struct
17162  *		code - SD_IMMEDIATE_RETRY_ISSUED, SD_DELAYED_RETRY_ISSUED,
17163  *			or SD_NO_RETRY_ISSUED
17164  *
17165  *     Context: May be called from interrupt context
17166  */
17167 
17168 static void
17169 sd_print_sense_msg(struct sd_lun *un, struct buf *bp, void *arg, int code)
17170 {
17171 	struct sd_xbuf	*xp;
17172 	struct scsi_pkt	*pktp;
17173 	uint8_t *sensep;
17174 	daddr_t request_blkno;
17175 	diskaddr_t err_blkno;
17176 	int severity;
17177 	int pfa_flag;
17178 	extern struct scsi_key_strings scsi_cmds[];
17179 
17180 	ASSERT(un != NULL);
17181 	ASSERT(mutex_owned(SD_MUTEX(un)));
17182 	ASSERT(bp != NULL);
17183 	xp = SD_GET_XBUF(bp);
17184 	ASSERT(xp != NULL);
17185 	pktp = SD_GET_PKTP(bp);
17186 	ASSERT(pktp != NULL);
17187 	ASSERT(arg != NULL);
17188 
17189 	severity = ((struct sd_sense_info *)(arg))->ssi_severity;
17190 	pfa_flag = ((struct sd_sense_info *)(arg))->ssi_pfa_flag;
17191 
17192 	if ((code == SD_DELAYED_RETRY_ISSUED) ||
17193 	    (code == SD_IMMEDIATE_RETRY_ISSUED)) {
17194 		severity = SCSI_ERR_RETRYABLE;
17195 	}
17196 
17197 	/* Use absolute block number for the request block number */
17198 	request_blkno = xp->xb_blkno;
17199 
17200 	/*
17201 	 * Now try to get the error block number from the sense data
17202 	 */
17203 	sensep = xp->xb_sense_data;
17204 
17205 	if (scsi_sense_info_uint64(sensep, SENSE_LENGTH,
17206 	    (uint64_t *)&err_blkno)) {
17207 		/*
17208 		 * We retrieved the error block number from the information
17209 		 * portion of the sense data.
17210 		 *
17211 		 * For USCSI commands we are better off using the error
17212 		 * block no. as the requested block no. (This is the best
17213 		 * we can estimate.)
17214 		 */
17215 		if ((SD_IS_BUFIO(xp) == FALSE) &&
17216 		    ((pktp->pkt_flags & FLAG_SILENT) == 0)) {
17217 			request_blkno = err_blkno;
17218 		}
17219 	} else {
17220 		/*
17221 		 * Without the es_valid bit set (for fixed format) or an
17222 		 * information descriptor (for descriptor format) we cannot
17223 		 * be certain of the error blkno, so just use the
17224 		 * request_blkno.
17225 		 */
17226 		err_blkno = (diskaddr_t)request_blkno;
17227 	}
17228 
17229 	/*
17230 	 * The following will log the buffer contents for the release driver
17231 	 * if the SD_LOGMASK_DIAG bit of sd_level_mask is set, or the error
17232 	 * level is set to verbose.
17233 	 */
17234 	sd_dump_memory(un, SD_LOG_IO, "Failed CDB",
17235 	    (uchar_t *)pktp->pkt_cdbp, CDB_SIZE, SD_LOG_HEX);
17236 	sd_dump_memory(un, SD_LOG_IO, "Sense Data",
17237 	    (uchar_t *)sensep, SENSE_LENGTH, SD_LOG_HEX);
17238 
17239 	if (pfa_flag == FALSE) {
17240 		/* This is normally only set for USCSI */
17241 		if ((pktp->pkt_flags & FLAG_SILENT) != 0) {
17242 			return;
17243 		}
17244 
17245 		if ((SD_IS_BUFIO(xp) == TRUE) &&
17246 		    (((sd_level_mask & SD_LOGMASK_DIAG) == 0) &&
17247 		    (severity < sd_error_level))) {
17248 			return;
17249 		}
17250 	}
17251 
17252 	if (SD_FM_LOG(un) == SD_FM_LOG_NSUP ||
17253 	    ((scsi_sense_key(sensep) == KEY_RECOVERABLE_ERROR) &&
17254 	    (pktp->pkt_resid == 0))) {
17255 		scsi_vu_errmsg(SD_SCSI_DEVP(un), pktp, sd_label, severity,
17256 		    request_blkno, err_blkno, scsi_cmds,
17257 		    (struct scsi_extended_sense *)sensep,
17258 		    un->un_additional_codes, NULL);
17259 	}
17260 }
17261 
17262 /*
17263  *    Function: sd_sense_key_no_sense
17264  *
17265  * Description: Recovery action when sense data was not received.
17266  *
17267  *     Context: May be called from interrupt context
17268  */
17269 
17270 static void
17271 sd_sense_key_no_sense(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
17272     struct scsi_pkt *pktp)
17273 {
17274 	struct sd_sense_info	si;
17275 
17276 	ASSERT(un != NULL);
17277 	ASSERT(mutex_owned(SD_MUTEX(un)));
17278 	ASSERT(bp != NULL);
17279 	ASSERT(xp != NULL);
17280 	ASSERT(pktp != NULL);
17281 
17282 	si.ssi_severity = SCSI_ERR_FATAL;
17283 	si.ssi_pfa_flag = FALSE;
17284 
17285 	SD_UPDATE_ERRSTATS(un, sd_softerrs);
17286 
17287 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
17288 	    &si, EIO, (clock_t)0, NULL);
17289 }
17290 
17291 
17292 /*
17293  *    Function: sd_sense_key_recoverable_error
17294  *
17295  * Description: Recovery actions for a SCSI "Recovered Error" sense key.
17296  *
17297  *     Context: May be called from interrupt context
17298  */
17299 
17300 static void
17301 sd_sense_key_recoverable_error(struct sd_lun *un, uint8_t *sense_datap,
17302     struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp)
17303 {
17304 	struct sd_sense_info	si;
17305 	uint8_t asc = scsi_sense_asc(sense_datap);
17306 	uint8_t ascq = scsi_sense_ascq(sense_datap);
17307 
17308 	ASSERT(un != NULL);
17309 	ASSERT(mutex_owned(SD_MUTEX(un)));
17310 	ASSERT(bp != NULL);
17311 	ASSERT(xp != NULL);
17312 	ASSERT(pktp != NULL);
17313 
17314 	/*
17315 	 * 0x00, 0x1D: ATA PASSTHROUGH INFORMATION AVAILABLE
17316 	 */
17317 	if (asc == 0x00 && ascq == 0x1D) {
17318 		sd_return_command(un, bp);
17319 		return;
17320 	}
17321 
17322 	/*
17323 	 * 0x5D: FAILURE PREDICTION THRESHOLD EXCEEDED
17324 	 */
17325 	if ((asc == 0x5D) && (sd_report_pfa != 0)) {
17326 		SD_UPDATE_ERRSTATS(un, sd_rq_pfa_err);
17327 		si.ssi_severity = SCSI_ERR_INFO;
17328 		si.ssi_pfa_flag = TRUE;
17329 	} else {
17330 		SD_UPDATE_ERRSTATS(un, sd_softerrs);
17331 		SD_UPDATE_ERRSTATS(un, sd_rq_recov_err);
17332 		si.ssi_severity = SCSI_ERR_RECOVERED;
17333 		si.ssi_pfa_flag = FALSE;
17334 	}
17335 
17336 	if (pktp->pkt_resid == 0) {
17337 		sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
17338 		sd_return_command(un, bp);
17339 		return;
17340 	}
17341 
17342 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
17343 	    &si, EIO, (clock_t)0, NULL);
17344 }
17345 
17346 
17347 
17348 
17349 /*
17350  *    Function: sd_sense_key_not_ready
17351  *
17352  * Description: Recovery actions for a SCSI "Not Ready" sense key.
17353  *
17354  *     Context: May be called from interrupt context
17355  */
17356 
17357 static void
17358 sd_sense_key_not_ready(struct sd_lun *un, uint8_t *sense_datap, struct buf *bp,
17359     struct sd_xbuf *xp, struct scsi_pkt *pktp)
17360 {
17361 	struct sd_sense_info	si;
17362 	uint8_t asc = scsi_sense_asc(sense_datap);
17363 	uint8_t ascq = scsi_sense_ascq(sense_datap);
17364 
17365 	ASSERT(un != NULL);
17366 	ASSERT(mutex_owned(SD_MUTEX(un)));
17367 	ASSERT(bp != NULL);
17368 	ASSERT(xp != NULL);
17369 	ASSERT(pktp != NULL);
17370 
17371 	si.ssi_severity = SCSI_ERR_FATAL;
17372 	si.ssi_pfa_flag = FALSE;
17373 
17374 	/*
17375 	 * Update error stats after first NOT READY error. Disks may have
17376 	 * been powered down and may need to be restarted.  For CDROMs,
17377 	 * report NOT READY errors only if media is present.
17378 	 */
17379 	if ((ISCD(un) && (asc == 0x3A)) ||
17380 	    (xp->xb_nr_retry_count > 0)) {
17381 		SD_UPDATE_ERRSTATS(un, sd_harderrs);
17382 		SD_UPDATE_ERRSTATS(un, sd_rq_ntrdy_err);
17383 	}
17384 
17385 	/*
17386 	 * Just fail if the "not ready" retry limit has been reached.
17387 	 */
17388 	if (xp->xb_nr_retry_count >= un->un_notready_retry_count) {
17389 		/* Special check for error message printing for removables. */
17390 		if (un->un_f_has_removable_media && (asc == 0x04) &&
17391 		    (ascq >= 0x04)) {
17392 			si.ssi_severity = SCSI_ERR_ALL;
17393 		}
17394 		goto fail_command;
17395 	}
17396 
17397 	/*
17398 	 * Check the ASC and ASCQ in the sense data as needed, to determine
17399 	 * what to do.
17400 	 */
17401 	switch (asc) {
17402 	case 0x04:	/* LOGICAL UNIT NOT READY */
17403 		/*
17404 		 * disk drives that don't spin up result in a very long delay
17405 		 * in format without warning messages. We will log a message
17406 		 * if the error level is set to verbose.
17407 		 */
17408 		if (sd_error_level < SCSI_ERR_RETRYABLE) {
17409 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
17410 			    "logical unit not ready, resetting disk\n");
17411 		}
17412 
17413 		/*
17414 		 * There are different requirements for CDROMs and disks for
17415 		 * the number of retries.  If a CD-ROM is giving this, it is
17416 		 * probably reading TOC and is in the process of getting
17417 		 * ready, so we should keep on trying for a long time to make
17418 		 * sure that all types of media are taken in account (for
17419 		 * some media the drive takes a long time to read TOC).  For
17420 		 * disks we do not want to retry this too many times as this
17421 		 * can cause a long hang in format when the drive refuses to
17422 		 * spin up (a very common failure).
17423 		 */
17424 		switch (ascq) {
17425 		case 0x00:  /* LUN NOT READY, CAUSE NOT REPORTABLE */
17426 			/*
17427 			 * Disk drives frequently refuse to spin up which
17428 			 * results in a very long hang in format without
17429 			 * warning messages.
17430 			 *
17431 			 * Note: This code preserves the legacy behavior of
17432 			 * comparing xb_nr_retry_count against zero for fibre
17433 			 * channel targets instead of comparing against the
17434 			 * un_reset_retry_count value.  The reason for this
17435 			 * discrepancy has been so utterly lost beneath the
17436 			 * Sands of Time that even Indiana Jones could not
17437 			 * find it.
17438 			 */
17439 			if (un->un_f_is_fibre == TRUE) {
17440 				if (((sd_level_mask & SD_LOGMASK_DIAG) ||
17441 				    (xp->xb_nr_retry_count > 0)) &&
17442 				    (un->un_startstop_timeid == NULL)) {
17443 					scsi_log(SD_DEVINFO(un), sd_label,
17444 					    CE_WARN, "logical unit not ready, "
17445 					    "resetting disk\n");
17446 					sd_reset_target(un, pktp);
17447 				}
17448 			} else {
17449 				if (((sd_level_mask & SD_LOGMASK_DIAG) ||
17450 				    (xp->xb_nr_retry_count >
17451 				    un->un_reset_retry_count)) &&
17452 				    (un->un_startstop_timeid == NULL)) {
17453 					scsi_log(SD_DEVINFO(un), sd_label,
17454 					    CE_WARN, "logical unit not ready, "
17455 					    "resetting disk\n");
17456 					sd_reset_target(un, pktp);
17457 				}
17458 			}
17459 			break;
17460 
17461 		case 0x01:  /* LUN IS IN PROCESS OF BECOMING READY */
17462 			/*
17463 			 * If the target is in the process of becoming
17464 			 * ready, just proceed with the retry. This can
17465 			 * happen with CD-ROMs that take a long time to
17466 			 * read TOC after a power cycle or reset.
17467 			 */
17468 			goto do_retry;
17469 
17470 		case 0x02:  /* LUN NOT READY, INITITIALIZING CMD REQUIRED */
17471 			break;
17472 
17473 		case 0x03:  /* LUN NOT READY, MANUAL INTERVENTION REQUIRED */
17474 			/*
17475 			 * Retries cannot help here so just fail right away.
17476 			 */
17477 			goto fail_command;
17478 
17479 		case 0x04:  /* LUN NOT READY, FORMAT IN PROGRESS */
17480 		case 0x05:  /* LUN NOT READY, REBUILD IN PROGRESS */
17481 		case 0x06:  /* LUN NOT READY, RECALCULATION IN PROGRESS */
17482 		case 0x07:  /* LUN NOT READY, OPERATION IN PROGRESS */
17483 		case 0x08:  /* LUN NOT READY, LONG WRITE IN PROGRESS */
17484 		default:    /* Possible future codes in SCSI spec? */
17485 			/*
17486 			 * For removable-media devices, do not retry if
17487 			 * ASCQ > 2 as these result mostly from USCSI commands
17488 			 * on MMC devices issued to check status of an
17489 			 * operation initiated in immediate mode.  Also for
17490 			 * ASCQ >= 4 do not print console messages as these
17491 			 * mainly represent a user-initiated operation
17492 			 * instead of a system failure.
17493 			 */
17494 			if (un->un_f_has_removable_media) {
17495 				si.ssi_severity = SCSI_ERR_ALL;
17496 				goto fail_command;
17497 			}
17498 			break;
17499 		}
17500 
17501 		/*
17502 		 * As part of our recovery attempt for the NOT READY
17503 		 * condition, we issue a START STOP UNIT command. However
17504 		 * we want to wait for a short delay before attempting this
17505 		 * as there may still be more commands coming back from the
17506 		 * target with the check condition. To do this we use
17507 		 * timeout(9F) to call sd_start_stop_unit_callback() after
17508 		 * the delay interval expires. (sd_start_stop_unit_callback()
17509 		 * dispatches sd_start_stop_unit_task(), which will issue
17510 		 * the actual START STOP UNIT command. The delay interval
17511 		 * is one-half of the delay that we will use to retry the
17512 		 * command that generated the NOT READY condition.
17513 		 *
17514 		 * Note that we could just dispatch sd_start_stop_unit_task()
17515 		 * from here and allow it to sleep for the delay interval,
17516 		 * but then we would be tying up the taskq thread
17517 		 * uncesessarily for the duration of the delay.
17518 		 *
17519 		 * Do not issue the START STOP UNIT if the current command
17520 		 * is already a START STOP UNIT.
17521 		 */
17522 		if (pktp->pkt_cdbp[0] == SCMD_START_STOP) {
17523 			break;
17524 		}
17525 
17526 		/*
17527 		 * Do not schedule the timeout if one is already pending.
17528 		 */
17529 		if (un->un_startstop_timeid != NULL) {
17530 			SD_INFO(SD_LOG_ERROR, un,
17531 			    "sd_sense_key_not_ready: restart already issued to"
17532 			    " %s%d\n", ddi_driver_name(SD_DEVINFO(un)),
17533 			    ddi_get_instance(SD_DEVINFO(un)));
17534 			break;
17535 		}
17536 
17537 		/*
17538 		 * Schedule the START STOP UNIT command, then queue the command
17539 		 * for a retry.
17540 		 *
17541 		 * Note: A timeout is not scheduled for this retry because we
17542 		 * want the retry to be serial with the START_STOP_UNIT. The
17543 		 * retry will be started when the START_STOP_UNIT is completed
17544 		 * in sd_start_stop_unit_task.
17545 		 */
17546 		un->un_startstop_timeid = timeout(sd_start_stop_unit_callback,
17547 		    un, un->un_busy_timeout / 2);
17548 		xp->xb_nr_retry_count++;
17549 		sd_set_retry_bp(un, bp, 0, kstat_waitq_enter);
17550 		return;
17551 
17552 	case 0x05:	/* LOGICAL UNIT DOES NOT RESPOND TO SELECTION */
17553 		if (sd_error_level < SCSI_ERR_RETRYABLE) {
17554 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
17555 			    "unit does not respond to selection\n");
17556 		}
17557 		break;
17558 
17559 	case 0x3A:	/* MEDIUM NOT PRESENT */
17560 		if (sd_error_level >= SCSI_ERR_FATAL) {
17561 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
17562 			    "Caddy not inserted in drive\n");
17563 		}
17564 
17565 		sr_ejected(un);
17566 		un->un_mediastate = DKIO_EJECTED;
17567 		/* The state has changed, inform the media watch routines */
17568 		cv_broadcast(&un->un_state_cv);
17569 		/* Just fail if no media is present in the drive. */
17570 		goto fail_command;
17571 
17572 	default:
17573 		if (sd_error_level < SCSI_ERR_RETRYABLE) {
17574 			scsi_log(SD_DEVINFO(un), sd_label, CE_NOTE,
17575 			    "Unit not Ready. Additional sense code 0x%x\n",
17576 			    asc);
17577 		}
17578 		break;
17579 	}
17580 
17581 do_retry:
17582 
17583 	/*
17584 	 * Retry the command, as some targets may report NOT READY for
17585 	 * several seconds after being reset.
17586 	 */
17587 	xp->xb_nr_retry_count++;
17588 	si.ssi_severity = SCSI_ERR_RETRYABLE;
17589 	sd_retry_command(un, bp, SD_RETRIES_NOCHECK, sd_print_sense_msg,
17590 	    &si, EIO, un->un_busy_timeout, NULL);
17591 
17592 	return;
17593 
17594 fail_command:
17595 	sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
17596 	sd_return_failed_command(un, bp, EIO);
17597 }
17598 
17599 
17600 
17601 /*
17602  *    Function: sd_sense_key_medium_or_hardware_error
17603  *
17604  * Description: Recovery actions for a SCSI "Medium Error" or "Hardware Error"
17605  *		sense key.
17606  *
17607  *     Context: May be called from interrupt context
17608  */
17609 
17610 static void
17611 sd_sense_key_medium_or_hardware_error(struct sd_lun *un, uint8_t *sense_datap,
17612     struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp)
17613 {
17614 	struct sd_sense_info	si;
17615 	uint8_t sense_key = scsi_sense_key(sense_datap);
17616 
17617 	ASSERT(un != NULL);
17618 	ASSERT(mutex_owned(SD_MUTEX(un)));
17619 	ASSERT(bp != NULL);
17620 	ASSERT(xp != NULL);
17621 	ASSERT(pktp != NULL);
17622 
17623 	si.ssi_severity = SCSI_ERR_FATAL;
17624 	si.ssi_pfa_flag = FALSE;
17625 
17626 	if (sense_key == KEY_MEDIUM_ERROR) {
17627 		SD_UPDATE_ERRSTATS(un, sd_rq_media_err);
17628 	}
17629 
17630 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
17631 
17632 	if ((un->un_reset_retry_count != 0) &&
17633 	    (xp->xb_retry_count == un->un_reset_retry_count)) {
17634 		mutex_exit(SD_MUTEX(un));
17635 		/* Do NOT do a RESET_ALL here: too intrusive. (4112858) */
17636 		if (un->un_f_allow_bus_device_reset == TRUE) {
17637 
17638 			int reset_retval = 0;
17639 			if (un->un_f_lun_reset_enabled == TRUE) {
17640 				SD_TRACE(SD_LOG_IO_CORE, un,
17641 				    "sd_sense_key_medium_or_hardware_"
17642 				    "error: issuing RESET_LUN\n");
17643 				reset_retval = scsi_reset(SD_ADDRESS(un),
17644 				    RESET_LUN);
17645 			}
17646 			if (reset_retval == 0) {
17647 				SD_TRACE(SD_LOG_IO_CORE, un,
17648 				    "sd_sense_key_medium_or_hardware_"
17649 				    "error: issuing RESET_TARGET\n");
17650 				(void) scsi_reset(SD_ADDRESS(un),
17651 				    RESET_TARGET);
17652 			}
17653 		}
17654 		mutex_enter(SD_MUTEX(un));
17655 	}
17656 
17657 	/*
17658 	 * This really ought to be a fatal error, but we will retry anyway
17659 	 * as some drives report this as a spurious error.
17660 	 */
17661 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
17662 	    &si, EIO, (clock_t)0, NULL);
17663 }
17664 
17665 
17666 
17667 /*
17668  *    Function: sd_sense_key_illegal_request
17669  *
17670  * Description: Recovery actions for a SCSI "Illegal Request" sense key.
17671  *
17672  *     Context: May be called from interrupt context
17673  */
17674 
17675 static void
17676 sd_sense_key_illegal_request(struct sd_lun *un, struct buf *bp,
17677     struct sd_xbuf *xp, struct scsi_pkt *pktp)
17678 {
17679 	struct sd_sense_info	si;
17680 
17681 	ASSERT(un != NULL);
17682 	ASSERT(mutex_owned(SD_MUTEX(un)));
17683 	ASSERT(bp != NULL);
17684 	ASSERT(xp != NULL);
17685 	ASSERT(pktp != NULL);
17686 
17687 	SD_UPDATE_ERRSTATS(un, sd_rq_illrq_err);
17688 
17689 	si.ssi_severity = SCSI_ERR_INFO;
17690 	si.ssi_pfa_flag = FALSE;
17691 
17692 	/* Pointless to retry if the target thinks it's an illegal request */
17693 	sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
17694 	sd_return_failed_command(un, bp, EIO);
17695 }
17696 
17697 
17698 
17699 
17700 /*
17701  *    Function: sd_sense_key_unit_attention
17702  *
17703  * Description: Recovery actions for a SCSI "Unit Attention" sense key.
17704  *
17705  *     Context: May be called from interrupt context
17706  */
17707 
17708 static void
17709 sd_sense_key_unit_attention(struct sd_lun *un, uint8_t *sense_datap,
17710     struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp)
17711 {
17712 	/*
17713 	 * For UNIT ATTENTION we allow retries for one minute. Devices
17714 	 * like Sonoma can return UNIT ATTENTION close to a minute
17715 	 * under certain conditions.
17716 	 */
17717 	int	retry_check_flag = SD_RETRIES_UA;
17718 	boolean_t	kstat_updated = B_FALSE;
17719 	struct	sd_sense_info		si;
17720 	uint8_t asc = scsi_sense_asc(sense_datap);
17721 	uint8_t	ascq = scsi_sense_ascq(sense_datap);
17722 
17723 	ASSERT(un != NULL);
17724 	ASSERT(mutex_owned(SD_MUTEX(un)));
17725 	ASSERT(bp != NULL);
17726 	ASSERT(xp != NULL);
17727 	ASSERT(pktp != NULL);
17728 
17729 	si.ssi_severity = SCSI_ERR_INFO;
17730 	si.ssi_pfa_flag = FALSE;
17731 
17732 
17733 	switch (asc) {
17734 	case 0x5D:  /* FAILURE PREDICTION THRESHOLD EXCEEDED */
17735 		if (sd_report_pfa != 0) {
17736 			SD_UPDATE_ERRSTATS(un, sd_rq_pfa_err);
17737 			si.ssi_pfa_flag = TRUE;
17738 			retry_check_flag = SD_RETRIES_STANDARD;
17739 			goto do_retry;
17740 		}
17741 
17742 		break;
17743 
17744 	case 0x29:  /* POWER ON, RESET, OR BUS DEVICE RESET OCCURRED */
17745 		if ((un->un_resvd_status & SD_RESERVE) == SD_RESERVE) {
17746 			un->un_resvd_status |=
17747 			    (SD_LOST_RESERVE | SD_WANT_RESERVE);
17748 		}
17749 		if (un->un_blockcount + 1 > SD_GROUP1_MAX_ADDRESS) {
17750 			if (taskq_dispatch(sd_tq, sd_reenable_dsense_task,
17751 			    un, KM_NOSLEEP) == TASKQID_INVALID) {
17752 				/*
17753 				 * If we can't dispatch the task we'll just
17754 				 * live without descriptor sense.  We can
17755 				 * try again on the next "unit attention"
17756 				 */
17757 				SD_ERROR(SD_LOG_ERROR, un,
17758 				    "sd_sense_key_unit_attention: "
17759 				    "Could not dispatch "
17760 				    "sd_reenable_dsense_task\n");
17761 			}
17762 		}
17763 		/* FALLTHRU */
17764 
17765 	case 0x28: /* NOT READY TO READY CHANGE, MEDIUM MAY HAVE CHANGED */
17766 		if (!un->un_f_has_removable_media) {
17767 			break;
17768 		}
17769 
17770 		/*
17771 		 * When we get a unit attention from a removable-media device,
17772 		 * it may be in a state that will take a long time to recover
17773 		 * (e.g., from a reset).  Since we are executing in interrupt
17774 		 * context here, we cannot wait around for the device to come
17775 		 * back. So hand this command off to sd_media_change_task()
17776 		 * for deferred processing under taskq thread context. (Note
17777 		 * that the command still may be failed if a problem is
17778 		 * encountered at a later time.)
17779 		 */
17780 		if (taskq_dispatch(sd_tq, sd_media_change_task, pktp,
17781 		    KM_NOSLEEP) == TASKQID_INVALID) {
17782 			/*
17783 			 * Cannot dispatch the request so fail the command.
17784 			 */
17785 			SD_UPDATE_ERRSTATS(un, sd_harderrs);
17786 			SD_UPDATE_ERRSTATS(un, sd_rq_nodev_err);
17787 			si.ssi_severity = SCSI_ERR_FATAL;
17788 			sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
17789 			sd_return_failed_command(un, bp, EIO);
17790 		}
17791 
17792 		/*
17793 		 * If failed to dispatch sd_media_change_task(), we already
17794 		 * updated kstat. If succeed to dispatch sd_media_change_task(),
17795 		 * we should update kstat later if it encounters an error. So,
17796 		 * we update kstat_updated flag here.
17797 		 */
17798 		kstat_updated = B_TRUE;
17799 
17800 		/*
17801 		 * Either the command has been successfully dispatched to a
17802 		 * task Q for retrying, or the dispatch failed. In either case
17803 		 * do NOT retry again by calling sd_retry_command. This sets up
17804 		 * two retries of the same command and when one completes and
17805 		 * frees the resources the other will access freed memory,
17806 		 * a bad thing.
17807 		 */
17808 		return;
17809 
17810 	default:
17811 		break;
17812 	}
17813 
17814 	/*
17815 	 * ASC  ASCQ
17816 	 *  2A   09	Capacity data has changed
17817 	 *  2A   01	Mode parameters changed
17818 	 *  3F   0E	Reported luns data has changed
17819 	 * Arrays that support logical unit expansion should report
17820 	 * capacity changes(2Ah/09). Mode parameters changed and
17821 	 * reported luns data has changed are the approximation.
17822 	 */
17823 	if (((asc == 0x2a) && (ascq == 0x09)) ||
17824 	    ((asc == 0x2a) && (ascq == 0x01)) ||
17825 	    ((asc == 0x3f) && (ascq == 0x0e))) {
17826 		if (taskq_dispatch(sd_tq, sd_target_change_task, un,
17827 		    KM_NOSLEEP) == TASKQID_INVALID) {
17828 			SD_ERROR(SD_LOG_ERROR, un,
17829 			    "sd_sense_key_unit_attention: "
17830 			    "Could not dispatch sd_target_change_task\n");
17831 		}
17832 	}
17833 
17834 	/*
17835 	 * Update kstat if we haven't done that.
17836 	 */
17837 	if (!kstat_updated) {
17838 		SD_UPDATE_ERRSTATS(un, sd_harderrs);
17839 		SD_UPDATE_ERRSTATS(un, sd_rq_nodev_err);
17840 	}
17841 
17842 do_retry:
17843 	sd_retry_command(un, bp, retry_check_flag, sd_print_sense_msg, &si,
17844 	    EIO, SD_UA_RETRY_DELAY, NULL);
17845 }
17846 
17847 
17848 
17849 /*
17850  *    Function: sd_sense_key_fail_command
17851  *
17852  * Description: Use to fail a command when we don't like the sense key that
17853  *		was returned.
17854  *
17855  *     Context: May be called from interrupt context
17856  */
17857 
17858 static void
17859 sd_sense_key_fail_command(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
17860     struct scsi_pkt *pktp)
17861 {
17862 	struct sd_sense_info	si;
17863 
17864 	ASSERT(un != NULL);
17865 	ASSERT(mutex_owned(SD_MUTEX(un)));
17866 	ASSERT(bp != NULL);
17867 	ASSERT(xp != NULL);
17868 	ASSERT(pktp != NULL);
17869 
17870 	si.ssi_severity = SCSI_ERR_FATAL;
17871 	si.ssi_pfa_flag = FALSE;
17872 
17873 	sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
17874 	sd_return_failed_command(un, bp, EIO);
17875 }
17876 
17877 
17878 
17879 /*
17880  *    Function: sd_sense_key_blank_check
17881  *
17882  * Description: Recovery actions for a SCSI "Blank Check" sense key.
17883  *		Has no monetary connotation.
17884  *
17885  *     Context: May be called from interrupt context
17886  */
17887 
17888 static void
17889 sd_sense_key_blank_check(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
17890     struct scsi_pkt *pktp)
17891 {
17892 	struct sd_sense_info	si;
17893 
17894 	ASSERT(un != NULL);
17895 	ASSERT(mutex_owned(SD_MUTEX(un)));
17896 	ASSERT(bp != NULL);
17897 	ASSERT(xp != NULL);
17898 	ASSERT(pktp != NULL);
17899 
17900 	/*
17901 	 * Blank check is not fatal for removable devices, therefore
17902 	 * it does not require a console message.
17903 	 */
17904 	si.ssi_severity = (un->un_f_has_removable_media) ? SCSI_ERR_ALL :
17905 	    SCSI_ERR_FATAL;
17906 	si.ssi_pfa_flag = FALSE;
17907 
17908 	sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
17909 	sd_return_failed_command(un, bp, EIO);
17910 }
17911 
17912 
17913 
17914 
17915 /*
17916  *    Function: sd_sense_key_aborted_command
17917  *
17918  * Description: Recovery actions for a SCSI "Aborted Command" sense key.
17919  *
17920  *     Context: May be called from interrupt context
17921  */
17922 
17923 static void
17924 sd_sense_key_aborted_command(struct sd_lun *un, struct buf *bp,
17925     struct sd_xbuf *xp, struct scsi_pkt *pktp)
17926 {
17927 	struct sd_sense_info	si;
17928 
17929 	ASSERT(un != NULL);
17930 	ASSERT(mutex_owned(SD_MUTEX(un)));
17931 	ASSERT(bp != NULL);
17932 	ASSERT(xp != NULL);
17933 	ASSERT(pktp != NULL);
17934 
17935 	si.ssi_severity = SCSI_ERR_FATAL;
17936 	si.ssi_pfa_flag = FALSE;
17937 
17938 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
17939 
17940 	/*
17941 	 * This really ought to be a fatal error, but we will retry anyway
17942 	 * as some drives report this as a spurious error.
17943 	 */
17944 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
17945 	    &si, EIO, drv_usectohz(100000), NULL);
17946 }
17947 
17948 
17949 
17950 /*
17951  *    Function: sd_sense_key_default
17952  *
17953  * Description: Default recovery action for several SCSI sense keys (basically
17954  *		attempts a retry).
17955  *
17956  *     Context: May be called from interrupt context
17957  */
17958 
17959 static void
17960 sd_sense_key_default(struct sd_lun *un, uint8_t *sense_datap, struct buf *bp,
17961     struct sd_xbuf *xp, struct scsi_pkt *pktp)
17962 {
17963 	struct sd_sense_info	si;
17964 	uint8_t sense_key = scsi_sense_key(sense_datap);
17965 
17966 	ASSERT(un != NULL);
17967 	ASSERT(mutex_owned(SD_MUTEX(un)));
17968 	ASSERT(bp != NULL);
17969 	ASSERT(xp != NULL);
17970 	ASSERT(pktp != NULL);
17971 
17972 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
17973 
17974 	/*
17975 	 * Undecoded sense key.	Attempt retries and hope that will fix
17976 	 * the problem.  Otherwise, we're dead.
17977 	 */
17978 	if ((pktp->pkt_flags & FLAG_SILENT) == 0) {
17979 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
17980 		    "Unhandled Sense Key '%s'\n", sense_keys[sense_key]);
17981 	}
17982 
17983 	si.ssi_severity = SCSI_ERR_FATAL;
17984 	si.ssi_pfa_flag = FALSE;
17985 
17986 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
17987 	    &si, EIO, (clock_t)0, NULL);
17988 }
17989 
17990 
17991 
17992 /*
17993  *    Function: sd_print_retry_msg
17994  *
17995  * Description: Print a message indicating the retry action being taken.
17996  *
17997  *   Arguments: un - ptr to associated softstate
17998  *		bp - ptr to buf(9S) for the command
17999  *		arg - not used.
18000  *		flag - SD_IMMEDIATE_RETRY_ISSUED, SD_DELAYED_RETRY_ISSUED,
18001  *			or SD_NO_RETRY_ISSUED
18002  *
18003  *     Context: May be called from interrupt context
18004  */
18005 /* ARGSUSED */
18006 static void
18007 sd_print_retry_msg(struct sd_lun *un, struct buf *bp, void *arg, int flag)
18008 {
18009 	struct sd_xbuf	*xp;
18010 	struct scsi_pkt *pktp;
18011 	char *reasonp;
18012 	char *msgp;
18013 
18014 	ASSERT(un != NULL);
18015 	ASSERT(mutex_owned(SD_MUTEX(un)));
18016 	ASSERT(bp != NULL);
18017 	pktp = SD_GET_PKTP(bp);
18018 	ASSERT(pktp != NULL);
18019 	xp = SD_GET_XBUF(bp);
18020 	ASSERT(xp != NULL);
18021 
18022 	ASSERT(!mutex_owned(&un->un_pm_mutex));
18023 	mutex_enter(&un->un_pm_mutex);
18024 	if ((un->un_state == SD_STATE_SUSPENDED) ||
18025 	    (SD_DEVICE_IS_IN_LOW_POWER(un)) ||
18026 	    (pktp->pkt_flags & FLAG_SILENT)) {
18027 		mutex_exit(&un->un_pm_mutex);
18028 		goto update_pkt_reason;
18029 	}
18030 	mutex_exit(&un->un_pm_mutex);
18031 
18032 	/*
18033 	 * Suppress messages if they are all the same pkt_reason; with
18034 	 * TQ, many (up to 256) are returned with the same pkt_reason.
18035 	 * If we are in panic, then suppress the retry messages.
18036 	 */
18037 	switch (flag) {
18038 	case SD_NO_RETRY_ISSUED:
18039 		msgp = "giving up";
18040 		break;
18041 	case SD_IMMEDIATE_RETRY_ISSUED:
18042 	case SD_DELAYED_RETRY_ISSUED:
18043 		if (ddi_in_panic() || (un->un_state == SD_STATE_OFFLINE) ||
18044 		    ((pktp->pkt_reason == un->un_last_pkt_reason) &&
18045 		    (sd_error_level != SCSI_ERR_ALL))) {
18046 			return;
18047 		}
18048 		msgp = "retrying command";
18049 		break;
18050 	default:
18051 		goto update_pkt_reason;
18052 	}
18053 
18054 	reasonp = (((pktp->pkt_statistics & STAT_PERR) != 0) ? "parity error" :
18055 	    scsi_rname(pktp->pkt_reason));
18056 
18057 	if (SD_FM_LOG(un) == SD_FM_LOG_NSUP) {
18058 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
18059 		    "SCSI transport failed: reason '%s': %s\n", reasonp, msgp);
18060 	}
18061 
18062 update_pkt_reason:
18063 	/*
18064 	 * Update un->un_last_pkt_reason with the value in pktp->pkt_reason.
18065 	 * This is to prevent multiple console messages for the same failure
18066 	 * condition.  Note that un->un_last_pkt_reason is NOT restored if &
18067 	 * when the command is retried successfully because there still may be
18068 	 * more commands coming back with the same value of pktp->pkt_reason.
18069 	 */
18070 	if ((pktp->pkt_reason != CMD_CMPLT) || (xp->xb_retry_count == 0)) {
18071 		un->un_last_pkt_reason = pktp->pkt_reason;
18072 	}
18073 }
18074 
18075 
18076 /*
18077  *    Function: sd_print_cmd_incomplete_msg
18078  *
18079  * Description: Message logging fn. for a SCSA "CMD_INCOMPLETE" pkt_reason.
18080  *
18081  *   Arguments: un - ptr to associated softstate
18082  *		bp - ptr to buf(9S) for the command
18083  *		arg - passed to sd_print_retry_msg()
18084  *		code - SD_IMMEDIATE_RETRY_ISSUED, SD_DELAYED_RETRY_ISSUED,
18085  *			or SD_NO_RETRY_ISSUED
18086  *
18087  *     Context: May be called from interrupt context
18088  */
18089 
18090 static void
18091 sd_print_cmd_incomplete_msg(struct sd_lun *un, struct buf *bp, void *arg,
18092     int code)
18093 {
18094 	dev_info_t	*dip;
18095 
18096 	ASSERT(un != NULL);
18097 	ASSERT(mutex_owned(SD_MUTEX(un)));
18098 	ASSERT(bp != NULL);
18099 
18100 	switch (code) {
18101 	case SD_NO_RETRY_ISSUED:
18102 		/* Command was failed. Someone turned off this target? */
18103 		if (un->un_state != SD_STATE_OFFLINE) {
18104 			/*
18105 			 * Suppress message if we are detaching and
18106 			 * device has been disconnected
18107 			 * Note that DEVI_IS_DEVICE_REMOVED is a consolidation
18108 			 * private interface and not part of the DDI
18109 			 */
18110 			dip = un->un_sd->sd_dev;
18111 			if (!(DEVI_IS_DETACHING(dip) &&
18112 			    DEVI_IS_DEVICE_REMOVED(dip))) {
18113 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
18114 				"disk not responding to selection\n");
18115 			}
18116 			New_state(un, SD_STATE_OFFLINE);
18117 		}
18118 		break;
18119 
18120 	case SD_DELAYED_RETRY_ISSUED:
18121 	case SD_IMMEDIATE_RETRY_ISSUED:
18122 	default:
18123 		/* Command was successfully queued for retry */
18124 		sd_print_retry_msg(un, bp, arg, code);
18125 		break;
18126 	}
18127 }
18128 
18129 
18130 /*
18131  *    Function: sd_pkt_reason_cmd_incomplete
18132  *
18133  * Description: Recovery actions for a SCSA "CMD_INCOMPLETE" pkt_reason.
18134  *
18135  *     Context: May be called from interrupt context
18136  */
18137 
18138 static void
18139 sd_pkt_reason_cmd_incomplete(struct sd_lun *un, struct buf *bp,
18140     struct sd_xbuf *xp, struct scsi_pkt *pktp)
18141 {
18142 	int flag = SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE;
18143 
18144 	ASSERT(un != NULL);
18145 	ASSERT(mutex_owned(SD_MUTEX(un)));
18146 	ASSERT(bp != NULL);
18147 	ASSERT(xp != NULL);
18148 	ASSERT(pktp != NULL);
18149 
18150 	/* Do not do a reset if selection did not complete */
18151 	/* Note: Should this not just check the bit? */
18152 	if (pktp->pkt_state != STATE_GOT_BUS) {
18153 		SD_UPDATE_ERRSTATS(un, sd_transerrs);
18154 		sd_reset_target(un, pktp);
18155 	}
18156 
18157 	/*
18158 	 * If the target was not successfully selected, then set
18159 	 * SD_RETRIES_FAILFAST to indicate that we lost communication
18160 	 * with the target, and further retries and/or commands are
18161 	 * likely to take a long time.
18162 	 */
18163 	if ((pktp->pkt_state & STATE_GOT_TARGET) == 0) {
18164 		flag |= SD_RETRIES_FAILFAST;
18165 	}
18166 
18167 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
18168 
18169 	sd_retry_command(un, bp, flag,
18170 	    sd_print_cmd_incomplete_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
18171 }
18172 
18173 
18174 
18175 /*
18176  *    Function: sd_pkt_reason_cmd_tran_err
18177  *
18178  * Description: Recovery actions for a SCSA "CMD_TRAN_ERR" pkt_reason.
18179  *
18180  *     Context: May be called from interrupt context
18181  */
18182 
18183 static void
18184 sd_pkt_reason_cmd_tran_err(struct sd_lun *un, struct buf *bp,
18185     struct sd_xbuf *xp, struct scsi_pkt *pktp)
18186 {
18187 	ASSERT(un != NULL);
18188 	ASSERT(mutex_owned(SD_MUTEX(un)));
18189 	ASSERT(bp != NULL);
18190 	ASSERT(xp != NULL);
18191 	ASSERT(pktp != NULL);
18192 
18193 	/*
18194 	 * Do not reset if we got a parity error, or if
18195 	 * selection did not complete.
18196 	 */
18197 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
18198 	/* Note: Should this not just check the bit for pkt_state? */
18199 	if (((pktp->pkt_statistics & STAT_PERR) == 0) &&
18200 	    (pktp->pkt_state != STATE_GOT_BUS)) {
18201 		SD_UPDATE_ERRSTATS(un, sd_transerrs);
18202 		sd_reset_target(un, pktp);
18203 	}
18204 
18205 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
18206 
18207 	sd_retry_command(un, bp, (SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE),
18208 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
18209 }
18210 
18211 
18212 
18213 /*
18214  *    Function: sd_pkt_reason_cmd_reset
18215  *
18216  * Description: Recovery actions for a SCSA "CMD_RESET" pkt_reason.
18217  *
18218  *     Context: May be called from interrupt context
18219  */
18220 
18221 static void
18222 sd_pkt_reason_cmd_reset(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
18223     struct scsi_pkt *pktp)
18224 {
18225 	ASSERT(un != NULL);
18226 	ASSERT(mutex_owned(SD_MUTEX(un)));
18227 	ASSERT(bp != NULL);
18228 	ASSERT(xp != NULL);
18229 	ASSERT(pktp != NULL);
18230 
18231 	/* The target may still be running the command, so try to reset. */
18232 	SD_UPDATE_ERRSTATS(un, sd_transerrs);
18233 	sd_reset_target(un, pktp);
18234 
18235 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
18236 
18237 	/*
18238 	 * If pkt_reason is CMD_RESET chances are that this pkt got
18239 	 * reset because another target on this bus caused it. The target
18240 	 * that caused it should get CMD_TIMEOUT with pkt_statistics
18241 	 * of STAT_TIMEOUT/STAT_DEV_RESET.
18242 	 */
18243 
18244 	sd_retry_command(un, bp, (SD_RETRIES_VICTIM | SD_RETRIES_ISOLATE),
18245 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
18246 }
18247 
18248 
18249 
18250 
18251 /*
18252  *    Function: sd_pkt_reason_cmd_aborted
18253  *
18254  * Description: Recovery actions for a SCSA "CMD_ABORTED" pkt_reason.
18255  *
18256  *     Context: May be called from interrupt context
18257  */
18258 
18259 static void
18260 sd_pkt_reason_cmd_aborted(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
18261     struct scsi_pkt *pktp)
18262 {
18263 	ASSERT(un != NULL);
18264 	ASSERT(mutex_owned(SD_MUTEX(un)));
18265 	ASSERT(bp != NULL);
18266 	ASSERT(xp != NULL);
18267 	ASSERT(pktp != NULL);
18268 
18269 	/* The target may still be running the command, so try to reset. */
18270 	SD_UPDATE_ERRSTATS(un, sd_transerrs);
18271 	sd_reset_target(un, pktp);
18272 
18273 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
18274 
18275 	/*
18276 	 * If pkt_reason is CMD_ABORTED chances are that this pkt got
18277 	 * aborted because another target on this bus caused it. The target
18278 	 * that caused it should get CMD_TIMEOUT with pkt_statistics
18279 	 * of STAT_TIMEOUT/STAT_DEV_RESET.
18280 	 */
18281 
18282 	sd_retry_command(un, bp, (SD_RETRIES_VICTIM | SD_RETRIES_ISOLATE),
18283 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
18284 }
18285 
18286 
18287 
18288 /*
18289  *    Function: sd_pkt_reason_cmd_timeout
18290  *
18291  * Description: Recovery actions for a SCSA "CMD_TIMEOUT" pkt_reason.
18292  *
18293  *     Context: May be called from interrupt context
18294  */
18295 
18296 static void
18297 sd_pkt_reason_cmd_timeout(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
18298     struct scsi_pkt *pktp)
18299 {
18300 	ASSERT(un != NULL);
18301 	ASSERT(mutex_owned(SD_MUTEX(un)));
18302 	ASSERT(bp != NULL);
18303 	ASSERT(xp != NULL);
18304 	ASSERT(pktp != NULL);
18305 
18306 
18307 	SD_UPDATE_ERRSTATS(un, sd_transerrs);
18308 	sd_reset_target(un, pktp);
18309 
18310 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
18311 
18312 	/*
18313 	 * A command timeout indicates that we could not establish
18314 	 * communication with the target, so set SD_RETRIES_FAILFAST
18315 	 * as further retries/commands are likely to take a long time.
18316 	 */
18317 	sd_retry_command(un, bp,
18318 	    (SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE | SD_RETRIES_FAILFAST),
18319 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
18320 }
18321 
18322 
18323 
18324 /*
18325  *    Function: sd_pkt_reason_cmd_unx_bus_free
18326  *
18327  * Description: Recovery actions for a SCSA "CMD_UNX_BUS_FREE" pkt_reason.
18328  *
18329  *     Context: May be called from interrupt context
18330  */
18331 
18332 static void
18333 sd_pkt_reason_cmd_unx_bus_free(struct sd_lun *un, struct buf *bp,
18334     struct sd_xbuf *xp, struct scsi_pkt *pktp)
18335 {
18336 	void (*funcp)(struct sd_lun *un, struct buf *bp, void *arg, int code);
18337 
18338 	ASSERT(un != NULL);
18339 	ASSERT(mutex_owned(SD_MUTEX(un)));
18340 	ASSERT(bp != NULL);
18341 	ASSERT(xp != NULL);
18342 	ASSERT(pktp != NULL);
18343 
18344 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
18345 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
18346 
18347 	funcp = ((pktp->pkt_statistics & STAT_PERR) == 0) ?
18348 	    sd_print_retry_msg : NULL;
18349 
18350 	sd_retry_command(un, bp, (SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE),
18351 	    funcp, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
18352 }
18353 
18354 
18355 /*
18356  *    Function: sd_pkt_reason_cmd_tag_reject
18357  *
18358  * Description: Recovery actions for a SCSA "CMD_TAG_REJECT" pkt_reason.
18359  *
18360  *     Context: May be called from interrupt context
18361  */
18362 
18363 static void
18364 sd_pkt_reason_cmd_tag_reject(struct sd_lun *un, struct buf *bp,
18365     struct sd_xbuf *xp, struct scsi_pkt *pktp)
18366 {
18367 	ASSERT(un != NULL);
18368 	ASSERT(mutex_owned(SD_MUTEX(un)));
18369 	ASSERT(bp != NULL);
18370 	ASSERT(xp != NULL);
18371 	ASSERT(pktp != NULL);
18372 
18373 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
18374 	pktp->pkt_flags = 0;
18375 	un->un_tagflags = 0;
18376 	if (un->un_f_opt_queueing == TRUE) {
18377 		un->un_throttle = min(un->un_throttle, 3);
18378 	} else {
18379 		un->un_throttle = 1;
18380 	}
18381 	mutex_exit(SD_MUTEX(un));
18382 	(void) scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing", 0, 1);
18383 	mutex_enter(SD_MUTEX(un));
18384 
18385 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
18386 
18387 	/* Legacy behavior not to check retry counts here. */
18388 	sd_retry_command(un, bp, (SD_RETRIES_NOCHECK | SD_RETRIES_ISOLATE),
18389 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
18390 }
18391 
18392 
18393 /*
18394  *    Function: sd_pkt_reason_default
18395  *
18396  * Description: Default recovery actions for SCSA pkt_reason values that
18397  *		do not have more explicit recovery actions.
18398  *
18399  *     Context: May be called from interrupt context
18400  */
18401 
18402 static void
18403 sd_pkt_reason_default(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
18404     struct scsi_pkt *pktp)
18405 {
18406 	ASSERT(un != NULL);
18407 	ASSERT(mutex_owned(SD_MUTEX(un)));
18408 	ASSERT(bp != NULL);
18409 	ASSERT(xp != NULL);
18410 	ASSERT(pktp != NULL);
18411 
18412 	SD_UPDATE_ERRSTATS(un, sd_transerrs);
18413 	sd_reset_target(un, pktp);
18414 
18415 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
18416 
18417 	sd_retry_command(un, bp, (SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE),
18418 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
18419 }
18420 
18421 
18422 
18423 /*
18424  *    Function: sd_pkt_status_check_condition
18425  *
18426  * Description: Recovery actions for a "STATUS_CHECK" SCSI command status.
18427  *
18428  *     Context: May be called from interrupt context
18429  */
18430 
18431 static void
18432 sd_pkt_status_check_condition(struct sd_lun *un, struct buf *bp,
18433     struct sd_xbuf *xp, struct scsi_pkt *pktp)
18434 {
18435 	ASSERT(un != NULL);
18436 	ASSERT(mutex_owned(SD_MUTEX(un)));
18437 	ASSERT(bp != NULL);
18438 	ASSERT(xp != NULL);
18439 	ASSERT(pktp != NULL);
18440 
18441 	SD_TRACE(SD_LOG_IO, un, "sd_pkt_status_check_condition: "
18442 	    "entry: buf:0x%p xp:0x%p\n", bp, xp);
18443 
18444 	/*
18445 	 * If ARQ is NOT enabled, then issue a REQUEST SENSE command (the
18446 	 * command will be retried after the request sense). Otherwise, retry
18447 	 * the command. Note: we are issuing the request sense even though the
18448 	 * retry limit may have been reached for the failed command.
18449 	 */
18450 	if (un->un_f_arq_enabled == FALSE) {
18451 		SD_INFO(SD_LOG_IO_CORE, un, "sd_pkt_status_check_condition: "
18452 		    "no ARQ, sending request sense command\n");
18453 		sd_send_request_sense_command(un, bp, pktp);
18454 	} else {
18455 		SD_INFO(SD_LOG_IO_CORE, un, "sd_pkt_status_check_condition: "
18456 		    "ARQ,retrying request sense command\n");
18457 		/*
18458 		 * The SD_RETRY_DELAY value need to be adjusted here
18459 		 * when SD_RETRY_DELAY change in sddef.h
18460 		 */
18461 		sd_retry_command(un, bp, SD_RETRIES_STANDARD, NULL, NULL, EIO,
18462 		    un->un_f_is_fibre?drv_usectohz(100000):(clock_t)0,
18463 		    NULL);
18464 	}
18465 
18466 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_pkt_status_check_condition: exit\n");
18467 }
18468 
18469 
18470 /*
18471  *    Function: sd_pkt_status_busy
18472  *
18473  * Description: Recovery actions for a "STATUS_BUSY" SCSI command status.
18474  *
18475  *     Context: May be called from interrupt context
18476  */
18477 
18478 static void
18479 sd_pkt_status_busy(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
18480     struct scsi_pkt *pktp)
18481 {
18482 	ASSERT(un != NULL);
18483 	ASSERT(mutex_owned(SD_MUTEX(un)));
18484 	ASSERT(bp != NULL);
18485 	ASSERT(xp != NULL);
18486 	ASSERT(pktp != NULL);
18487 
18488 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18489 	    "sd_pkt_status_busy: entry\n");
18490 
18491 	/* If retries are exhausted, just fail the command. */
18492 	if (xp->xb_retry_count >= un->un_busy_retry_count) {
18493 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
18494 		    "device busy too long\n");
18495 		sd_return_failed_command(un, bp, EIO);
18496 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18497 		    "sd_pkt_status_busy: exit\n");
18498 		return;
18499 	}
18500 	xp->xb_retry_count++;
18501 
18502 	/*
18503 	 * Try to reset the target. However, we do not want to perform
18504 	 * more than one reset if the device continues to fail. The reset
18505 	 * will be performed when the retry count reaches the reset
18506 	 * threshold.  This threshold should be set such that at least
18507 	 * one retry is issued before the reset is performed.
18508 	 */
18509 	if (xp->xb_retry_count ==
18510 	    ((un->un_reset_retry_count < 2) ? 2 : un->un_reset_retry_count)) {
18511 		int rval = 0;
18512 		mutex_exit(SD_MUTEX(un));
18513 		if (un->un_f_allow_bus_device_reset == TRUE) {
18514 			/*
18515 			 * First try to reset the LUN; if we cannot then
18516 			 * try to reset the target.
18517 			 */
18518 			if (un->un_f_lun_reset_enabled == TRUE) {
18519 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18520 				    "sd_pkt_status_busy: RESET_LUN\n");
18521 				rval = scsi_reset(SD_ADDRESS(un), RESET_LUN);
18522 			}
18523 			if (rval == 0) {
18524 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18525 				    "sd_pkt_status_busy: RESET_TARGET\n");
18526 				rval = scsi_reset(SD_ADDRESS(un), RESET_TARGET);
18527 			}
18528 		}
18529 		if (rval == 0) {
18530 			/*
18531 			 * If the RESET_LUN and/or RESET_TARGET failed,
18532 			 * try RESET_ALL
18533 			 */
18534 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18535 			    "sd_pkt_status_busy: RESET_ALL\n");
18536 			rval = scsi_reset(SD_ADDRESS(un), RESET_ALL);
18537 		}
18538 		mutex_enter(SD_MUTEX(un));
18539 		if (rval == 0) {
18540 			/*
18541 			 * The RESET_LUN, RESET_TARGET, and/or RESET_ALL failed.
18542 			 * At this point we give up & fail the command.
18543 			 */
18544 			sd_return_failed_command(un, bp, EIO);
18545 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18546 			    "sd_pkt_status_busy: exit (failed cmd)\n");
18547 			return;
18548 		}
18549 	}
18550 
18551 	/*
18552 	 * Retry the command. Be sure to specify SD_RETRIES_NOCHECK as
18553 	 * we have already checked the retry counts above.
18554 	 */
18555 	sd_retry_command(un, bp, SD_RETRIES_NOCHECK, NULL, NULL,
18556 	    EIO, un->un_busy_timeout, NULL);
18557 
18558 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18559 	    "sd_pkt_status_busy: exit\n");
18560 }
18561 
18562 
18563 /*
18564  *    Function: sd_pkt_status_reservation_conflict
18565  *
18566  * Description: Recovery actions for a "STATUS_RESERVATION_CONFLICT" SCSI
18567  *		command status.
18568  *
18569  *     Context: May be called from interrupt context
18570  */
18571 
18572 static void
18573 sd_pkt_status_reservation_conflict(struct sd_lun *un, struct buf *bp,
18574     struct sd_xbuf *xp, struct scsi_pkt *pktp)
18575 {
18576 	ASSERT(un != NULL);
18577 	ASSERT(mutex_owned(SD_MUTEX(un)));
18578 	ASSERT(bp != NULL);
18579 	ASSERT(xp != NULL);
18580 	ASSERT(pktp != NULL);
18581 
18582 	/*
18583 	 * If the command was PERSISTENT_RESERVATION_[IN|OUT] then reservation
18584 	 * conflict could be due to various reasons like incorrect keys, not
18585 	 * registered or not reserved etc. So, we return EACCES to the caller.
18586 	 */
18587 	if (un->un_reservation_type == SD_SCSI3_RESERVATION) {
18588 		int cmd = SD_GET_PKT_OPCODE(pktp);
18589 		if ((cmd == SCMD_PERSISTENT_RESERVE_IN) ||
18590 		    (cmd == SCMD_PERSISTENT_RESERVE_OUT)) {
18591 			sd_return_failed_command(un, bp, EACCES);
18592 			return;
18593 		}
18594 	}
18595 
18596 	un->un_resvd_status |= SD_RESERVATION_CONFLICT;
18597 
18598 	if ((un->un_resvd_status & SD_FAILFAST) != 0) {
18599 		if (sd_failfast_enable != 0) {
18600 			/* By definition, we must panic here.... */
18601 			sd_panic_for_res_conflict(un);
18602 			/*NOTREACHED*/
18603 		}
18604 		SD_ERROR(SD_LOG_IO, un,
18605 		    "sd_handle_resv_conflict: Disk Reserved\n");
18606 		sd_return_failed_command(un, bp, EACCES);
18607 		return;
18608 	}
18609 
18610 	/*
18611 	 * 1147670: retry only if sd_retry_on_reservation_conflict
18612 	 * property is set (default is 1). Retries will not succeed
18613 	 * on a disk reserved by another initiator. HA systems
18614 	 * may reset this via sd.conf to avoid these retries.
18615 	 *
18616 	 * Note: The legacy return code for this failure is EIO, however EACCES
18617 	 * seems more appropriate for a reservation conflict.
18618 	 */
18619 	if (sd_retry_on_reservation_conflict == 0) {
18620 		SD_ERROR(SD_LOG_IO, un,
18621 		    "sd_handle_resv_conflict: Device Reserved\n");
18622 		sd_return_failed_command(un, bp, EIO);
18623 		return;
18624 	}
18625 
18626 	/*
18627 	 * Retry the command if we can.
18628 	 *
18629 	 * Note: The legacy return code for this failure is EIO, however EACCES
18630 	 * seems more appropriate for a reservation conflict.
18631 	 */
18632 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, NULL, NULL, EIO,
18633 	    (clock_t)2, NULL);
18634 }
18635 
18636 
18637 
18638 /*
18639  *    Function: sd_pkt_status_qfull
18640  *
18641  * Description: Handle a QUEUE FULL condition from the target.  This can
18642  *		occur if the HBA does not handle the queue full condition.
18643  *		(Basically this means third-party HBAs as Sun HBAs will
18644  *		handle the queue full condition.)  Note that if there are
18645  *		some commands already in the transport, then the queue full
18646  *		has occurred because the queue for this nexus is actually
18647  *		full. If there are no commands in the transport, then the
18648  *		queue full is resulting from some other initiator or lun
18649  *		consuming all the resources at the target.
18650  *
18651  *     Context: May be called from interrupt context
18652  */
18653 
18654 static void
18655 sd_pkt_status_qfull(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
18656     struct scsi_pkt *pktp)
18657 {
18658 	ASSERT(un != NULL);
18659 	ASSERT(mutex_owned(SD_MUTEX(un)));
18660 	ASSERT(bp != NULL);
18661 	ASSERT(xp != NULL);
18662 	ASSERT(pktp != NULL);
18663 
18664 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18665 	    "sd_pkt_status_qfull: entry\n");
18666 
18667 	/*
18668 	 * Just lower the QFULL throttle and retry the command.  Note that
18669 	 * we do not limit the number of retries here.
18670 	 */
18671 	sd_reduce_throttle(un, SD_THROTTLE_QFULL);
18672 	sd_retry_command(un, bp, SD_RETRIES_NOCHECK, NULL, NULL, 0,
18673 	    SD_RESTART_TIMEOUT, NULL);
18674 
18675 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18676 	    "sd_pkt_status_qfull: exit\n");
18677 }
18678 
18679 
18680 /*
18681  *    Function: sd_reset_target
18682  *
18683  * Description: Issue a scsi_reset(9F), with either RESET_LUN,
18684  *		RESET_TARGET, or RESET_ALL.
18685  *
18686  *     Context: May be called under interrupt context.
18687  */
18688 
18689 static void
18690 sd_reset_target(struct sd_lun *un, struct scsi_pkt *pktp)
18691 {
18692 	int rval = 0;
18693 
18694 	ASSERT(un != NULL);
18695 	ASSERT(mutex_owned(SD_MUTEX(un)));
18696 	ASSERT(pktp != NULL);
18697 
18698 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_reset_target: entry\n");
18699 
18700 	/*
18701 	 * No need to reset if the transport layer has already done so.
18702 	 */
18703 	if ((pktp->pkt_statistics &
18704 	    (STAT_BUS_RESET | STAT_DEV_RESET | STAT_ABORTED)) != 0) {
18705 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18706 		    "sd_reset_target: no reset\n");
18707 		return;
18708 	}
18709 
18710 	mutex_exit(SD_MUTEX(un));
18711 
18712 	if (un->un_f_allow_bus_device_reset == TRUE) {
18713 		if (un->un_f_lun_reset_enabled == TRUE) {
18714 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18715 			    "sd_reset_target: RESET_LUN\n");
18716 			rval = scsi_reset(SD_ADDRESS(un), RESET_LUN);
18717 		}
18718 		if (rval == 0) {
18719 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18720 			    "sd_reset_target: RESET_TARGET\n");
18721 			rval = scsi_reset(SD_ADDRESS(un), RESET_TARGET);
18722 		}
18723 	}
18724 
18725 	if (rval == 0) {
18726 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18727 		    "sd_reset_target: RESET_ALL\n");
18728 		(void) scsi_reset(SD_ADDRESS(un), RESET_ALL);
18729 	}
18730 
18731 	mutex_enter(SD_MUTEX(un));
18732 
18733 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_reset_target: exit\n");
18734 }
18735 
18736 /*
18737  *    Function: sd_target_change_task
18738  *
18739  * Description: Handle dynamic target change
18740  *
18741  *     Context: Executes in a taskq() thread context
18742  */
18743 static void
18744 sd_target_change_task(void *arg)
18745 {
18746 	struct sd_lun		*un = arg;
18747 	uint64_t		capacity;
18748 	diskaddr_t		label_cap;
18749 	uint_t			lbasize;
18750 	sd_ssc_t		*ssc;
18751 
18752 	ASSERT(un != NULL);
18753 	ASSERT(!mutex_owned(SD_MUTEX(un)));
18754 
18755 	if ((un->un_f_blockcount_is_valid == FALSE) ||
18756 	    (un->un_f_tgt_blocksize_is_valid == FALSE)) {
18757 		return;
18758 	}
18759 
18760 	ssc = sd_ssc_init(un);
18761 
18762 	if (sd_send_scsi_READ_CAPACITY(ssc, &capacity,
18763 	    &lbasize, SD_PATH_DIRECT) != 0) {
18764 		SD_ERROR(SD_LOG_ERROR, un,
18765 		    "sd_target_change_task: fail to read capacity\n");
18766 		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
18767 		goto task_exit;
18768 	}
18769 
18770 	mutex_enter(SD_MUTEX(un));
18771 	if (capacity <= un->un_blockcount) {
18772 		mutex_exit(SD_MUTEX(un));
18773 		goto task_exit;
18774 	}
18775 
18776 	sd_update_block_info(un, lbasize, capacity);
18777 	mutex_exit(SD_MUTEX(un));
18778 
18779 	/*
18780 	 * If lun is EFI labeled and lun capacity is greater than the
18781 	 * capacity contained in the label, log a sys event.
18782 	 */
18783 	if (cmlb_efi_label_capacity(un->un_cmlbhandle, &label_cap,
18784 	    (void*)SD_PATH_DIRECT) == 0) {
18785 		mutex_enter(SD_MUTEX(un));
18786 		if (un->un_f_blockcount_is_valid &&
18787 		    un->un_blockcount > label_cap) {
18788 			mutex_exit(SD_MUTEX(un));
18789 			sd_log_lun_expansion_event(un, KM_SLEEP);
18790 		} else {
18791 			mutex_exit(SD_MUTEX(un));
18792 		}
18793 	}
18794 
18795 task_exit:
18796 	sd_ssc_fini(ssc);
18797 }
18798 
18799 
18800 /*
18801  *    Function: sd_log_dev_status_event
18802  *
18803  * Description: Log EC_dev_status sysevent
18804  *
18805  *     Context: Never called from interrupt context
18806  */
18807 static void
18808 sd_log_dev_status_event(struct sd_lun *un, char *esc, int km_flag)
18809 {
18810 	int err;
18811 	char			*path;
18812 	nvlist_t		*attr_list;
18813 	size_t			n;
18814 
18815 	/* Allocate and build sysevent attribute list */
18816 	err = nvlist_alloc(&attr_list, NV_UNIQUE_NAME_TYPE, km_flag);
18817 	if (err != 0) {
18818 		SD_ERROR(SD_LOG_ERROR, un,
18819 		    "sd_log_dev_status_event: fail to allocate space\n");
18820 		return;
18821 	}
18822 
18823 	path = kmem_alloc(MAXPATHLEN, km_flag);
18824 	if (path == NULL) {
18825 		nvlist_free(attr_list);
18826 		SD_ERROR(SD_LOG_ERROR, un,
18827 		    "sd_log_dev_status_event: fail to allocate space\n");
18828 		return;
18829 	}
18830 
18831 	n = snprintf(path, MAXPATHLEN, "/devices");
18832 	(void) ddi_pathname(SD_DEVINFO(un), path + n);
18833 	n = strlen(path);
18834 	n += snprintf(path + n, MAXPATHLEN - n, ":x");
18835 
18836 	/*
18837 	 * On receipt of this event, the ZFS sysevent module will scan
18838 	 * active zpools for child vdevs matching this physical path.
18839 	 * In order to catch both whole disk pools and those with an
18840 	 * EFI boot partition, generate separate sysevents for minor
18841 	 * node 'a' and 'b'.
18842 	 */
18843 	for (char c = 'a'; c < 'c'; c++) {
18844 		path[n - 1] = c;
18845 
18846 		err = nvlist_add_string(attr_list, DEV_PHYS_PATH, path);
18847 		if (err != 0) {
18848 			SD_ERROR(SD_LOG_ERROR, un,
18849 			    "sd_log_dev_status_event: fail to add attribute\n");
18850 			break;
18851 		}
18852 
18853 		err = ddi_log_sysevent(SD_DEVINFO(un), SUNW_VENDOR,
18854 		    EC_DEV_STATUS, esc, attr_list, NULL, km_flag);
18855 		if (err != DDI_SUCCESS) {
18856 			SD_ERROR(SD_LOG_ERROR, un,
18857 			    "sd_log_dev_status_event: fail to log sysevent\n");
18858 			break;
18859 		}
18860 	}
18861 
18862 	nvlist_free(attr_list);
18863 	kmem_free(path, MAXPATHLEN);
18864 }
18865 
18866 
18867 /*
18868  *    Function: sd_log_lun_expansion_event
18869  *
18870  * Description: Log lun expansion sys event
18871  *
18872  *     Context: Never called from interrupt context
18873  */
18874 static void
18875 sd_log_lun_expansion_event(struct sd_lun *un, int km_flag)
18876 {
18877 	sd_log_dev_status_event(un, ESC_DEV_DLE, km_flag);
18878 }
18879 
18880 
18881 /*
18882  *    Function: sd_log_eject_request_event
18883  *
18884  * Description: Log eject request sysevent
18885  *
18886  *     Context: Never called from interrupt context
18887  */
18888 static void
18889 sd_log_eject_request_event(struct sd_lun *un, int km_flag)
18890 {
18891 	sd_log_dev_status_event(un, ESC_DEV_EJECT_REQUEST, km_flag);
18892 }
18893 
18894 
18895 /*
18896  *    Function: sd_media_change_task
18897  *
18898  * Description: Recovery action for CDROM to become available.
18899  *
18900  *     Context: Executes in a taskq() thread context
18901  */
18902 
18903 static void
18904 sd_media_change_task(void *arg)
18905 {
18906 	struct	scsi_pkt	*pktp = arg;
18907 	struct	sd_lun		*un;
18908 	struct	buf		*bp;
18909 	struct	sd_xbuf		*xp;
18910 	int	err		= 0;
18911 	int	retry_count	= 0;
18912 	int	retry_limit	= SD_UNIT_ATTENTION_RETRY/10;
18913 	struct	sd_sense_info	si;
18914 
18915 	ASSERT(pktp != NULL);
18916 	bp = (struct buf *)pktp->pkt_private;
18917 	ASSERT(bp != NULL);
18918 	xp = SD_GET_XBUF(bp);
18919 	ASSERT(xp != NULL);
18920 	un = SD_GET_UN(bp);
18921 	ASSERT(un != NULL);
18922 	ASSERT(!mutex_owned(SD_MUTEX(un)));
18923 	ASSERT(un->un_f_monitor_media_state);
18924 
18925 	si.ssi_severity = SCSI_ERR_INFO;
18926 	si.ssi_pfa_flag = FALSE;
18927 
18928 	/*
18929 	 * When a reset is issued on a CDROM, it takes a long time to
18930 	 * recover. First few attempts to read capacity and other things
18931 	 * related to handling unit attention fail (with a ASC 0x4 and
18932 	 * ASCQ 0x1). In that case we want to do enough retries and we want
18933 	 * to limit the retries in other cases of genuine failures like
18934 	 * no media in drive.
18935 	 */
18936 	while (retry_count++ < retry_limit) {
18937 		if ((err = sd_handle_mchange(un)) == 0) {
18938 			break;
18939 		}
18940 		if (err == EAGAIN) {
18941 			retry_limit = SD_UNIT_ATTENTION_RETRY;
18942 		}
18943 		/* Sleep for 0.5 sec. & try again */
18944 		delay(drv_usectohz(500000));
18945 	}
18946 
18947 	/*
18948 	 * Dispatch (retry or fail) the original command here,
18949 	 * along with appropriate console messages....
18950 	 *
18951 	 * Must grab the mutex before calling sd_retry_command,
18952 	 * sd_print_sense_msg and sd_return_failed_command.
18953 	 */
18954 	mutex_enter(SD_MUTEX(un));
18955 	if (err != SD_CMD_SUCCESS) {
18956 		SD_UPDATE_ERRSTATS(un, sd_harderrs);
18957 		SD_UPDATE_ERRSTATS(un, sd_rq_nodev_err);
18958 		si.ssi_severity = SCSI_ERR_FATAL;
18959 		sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
18960 		sd_return_failed_command(un, bp, EIO);
18961 	} else {
18962 		sd_retry_command(un, bp, SD_RETRIES_UA, sd_print_sense_msg,
18963 		    &si, EIO, (clock_t)0, NULL);
18964 	}
18965 	mutex_exit(SD_MUTEX(un));
18966 }
18967 
18968 
18969 
18970 /*
18971  *    Function: sd_handle_mchange
18972  *
18973  * Description: Perform geometry validation & other recovery when CDROM
18974  *		has been removed from drive.
18975  *
18976  * Return Code: 0 for success
18977  *		errno-type return code of either sd_send_scsi_DOORLOCK() or
18978  *		sd_send_scsi_READ_CAPACITY()
18979  *
18980  *     Context: Executes in a taskq() thread context
18981  */
18982 
18983 static int
18984 sd_handle_mchange(struct sd_lun *un)
18985 {
18986 	uint64_t	capacity;
18987 	uint32_t	lbasize;
18988 	int		rval;
18989 	sd_ssc_t	*ssc;
18990 
18991 	ASSERT(!mutex_owned(SD_MUTEX(un)));
18992 	ASSERT(un->un_f_monitor_media_state);
18993 
18994 	ssc = sd_ssc_init(un);
18995 	rval = sd_send_scsi_READ_CAPACITY(ssc, &capacity, &lbasize,
18996 	    SD_PATH_DIRECT_PRIORITY);
18997 
18998 	if (rval != 0)
18999 		goto failed;
19000 
19001 	mutex_enter(SD_MUTEX(un));
19002 	sd_update_block_info(un, lbasize, capacity);
19003 
19004 	if (un->un_errstats != NULL) {
19005 		struct	sd_errstats *stp =
19006 		    (struct sd_errstats *)un->un_errstats->ks_data;
19007 		stp->sd_capacity.value.ui64 = (uint64_t)
19008 		    ((uint64_t)un->un_blockcount *
19009 		    (uint64_t)un->un_tgt_blocksize);
19010 	}
19011 
19012 	/*
19013 	 * Check if the media in the device is writable or not
19014 	 */
19015 	if (ISCD(un)) {
19016 		sd_check_for_writable_cd(ssc, SD_PATH_DIRECT_PRIORITY);
19017 	}
19018 
19019 	/*
19020 	 * Note: Maybe let the strategy/partitioning chain worry about getting
19021 	 * valid geometry.
19022 	 */
19023 	mutex_exit(SD_MUTEX(un));
19024 	cmlb_invalidate(un->un_cmlbhandle, (void *)SD_PATH_DIRECT_PRIORITY);
19025 
19026 
19027 	if (cmlb_validate(un->un_cmlbhandle, 0,
19028 	    (void *)SD_PATH_DIRECT_PRIORITY) != 0) {
19029 		sd_ssc_fini(ssc);
19030 		return (EIO);
19031 	} else {
19032 		if (un->un_f_pkstats_enabled) {
19033 			sd_set_pstats(un);
19034 			SD_TRACE(SD_LOG_IO_PARTITION, un,
19035 			    "sd_handle_mchange: un:0x%p pstats created and "
19036 			    "set\n", un);
19037 		}
19038 	}
19039 
19040 	/*
19041 	 * Try to lock the door
19042 	 */
19043 	rval = sd_send_scsi_DOORLOCK(ssc, SD_REMOVAL_PREVENT,
19044 	    SD_PATH_DIRECT_PRIORITY);
19045 failed:
19046 	if (rval != 0)
19047 		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
19048 	sd_ssc_fini(ssc);
19049 	return (rval);
19050 }
19051 
19052 
19053 /*
19054  *    Function: sd_send_scsi_DOORLOCK
19055  *
19056  * Description: Issue the scsi DOOR LOCK command
19057  *
19058  *   Arguments: ssc   - ssc contains pointer to driver soft state (unit)
19059  *                      structure for this target.
19060  *		flag  - SD_REMOVAL_ALLOW
19061  *			SD_REMOVAL_PREVENT
19062  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
19063  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
19064  *			to use the USCSI "direct" chain and bypass the normal
19065  *			command waitq. SD_PATH_DIRECT_PRIORITY is used when this
19066  *			command is issued as part of an error recovery action.
19067  *
19068  * Return Code: 0   - Success
19069  *		errno return code from sd_ssc_send()
19070  *
19071  *     Context: Can sleep.
19072  */
19073 
19074 static int
19075 sd_send_scsi_DOORLOCK(sd_ssc_t *ssc, int flag, int path_flag)
19076 {
19077 	struct scsi_extended_sense	sense_buf;
19078 	union scsi_cdb		cdb;
19079 	struct uscsi_cmd	ucmd_buf;
19080 	int			status;
19081 	struct sd_lun		*un;
19082 
19083 	ASSERT(ssc != NULL);
19084 	un = ssc->ssc_un;
19085 	ASSERT(un != NULL);
19086 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19087 
19088 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_DOORLOCK: entry: un:0x%p\n", un);
19089 
19090 	/* already determined doorlock is not supported, fake success */
19091 	if (un->un_f_doorlock_supported == FALSE) {
19092 		return (0);
19093 	}
19094 
19095 	/*
19096 	 * If we are ejecting and see an SD_REMOVAL_PREVENT
19097 	 * ignore the command so we can complete the eject
19098 	 * operation.
19099 	 */
19100 	if (flag == SD_REMOVAL_PREVENT) {
19101 		mutex_enter(SD_MUTEX(un));
19102 		if (un->un_f_ejecting == TRUE) {
19103 			mutex_exit(SD_MUTEX(un));
19104 			return (EAGAIN);
19105 		}
19106 		mutex_exit(SD_MUTEX(un));
19107 	}
19108 
19109 	bzero(&cdb, sizeof (cdb));
19110 	bzero(&ucmd_buf, sizeof (ucmd_buf));
19111 
19112 	cdb.scc_cmd = SCMD_DOORLOCK;
19113 	cdb.cdb_opaque[4] = (uchar_t)flag;
19114 
19115 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
19116 	ucmd_buf.uscsi_cdblen	= CDB_GROUP0;
19117 	ucmd_buf.uscsi_bufaddr	= NULL;
19118 	ucmd_buf.uscsi_buflen	= 0;
19119 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
19120 	ucmd_buf.uscsi_rqlen	= sizeof (sense_buf);
19121 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_SILENT;
19122 	ucmd_buf.uscsi_timeout	= 15;
19123 
19124 	SD_TRACE(SD_LOG_IO, un,
19125 	    "sd_send_scsi_DOORLOCK: returning sd_ssc_send\n");
19126 
19127 	status = sd_ssc_send(ssc, &ucmd_buf, FKIOCTL,
19128 	    UIO_SYSSPACE, path_flag);
19129 
19130 	if (status == 0)
19131 		sd_ssc_assessment(ssc, SD_FMT_STANDARD);
19132 
19133 	if ((status == EIO) && (ucmd_buf.uscsi_status == STATUS_CHECK) &&
19134 	    (ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
19135 	    (scsi_sense_key((uint8_t *)&sense_buf) == KEY_ILLEGAL_REQUEST)) {
19136 		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
19137 
19138 		/* fake success and skip subsequent doorlock commands */
19139 		un->un_f_doorlock_supported = FALSE;
19140 		return (0);
19141 	}
19142 
19143 	return (status);
19144 }
19145 
19146 /*
19147  *    Function: sd_send_scsi_READ_CAPACITY
19148  *
19149  * Description: This routine uses the scsi READ CAPACITY command to determine
19150  *		the device capacity in number of blocks and the device native
19151  *		block size. If this function returns a failure, then the
19152  *		values in *capp and *lbap are undefined.  If the capacity
19153  *		returned is 0xffffffff then the lun is too large for a
19154  *		normal READ CAPACITY command and the results of a
19155  *		READ CAPACITY 16 will be used instead.
19156  *
19157  *   Arguments: ssc   - ssc contains ptr to soft state struct for the target
19158  *		capp - ptr to unsigned 64-bit variable to receive the
19159  *			capacity value from the command.
19160  *		lbap - ptr to unsigned 32-bit varaible to receive the
19161  *			block size value from the command
19162  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
19163  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
19164  *			to use the USCSI "direct" chain and bypass the normal
19165  *			command waitq. SD_PATH_DIRECT_PRIORITY is used when this
19166  *			command is issued as part of an error recovery action.
19167  *
19168  * Return Code: 0   - Success
19169  *		EIO - IO error
19170  *		EACCES - Reservation conflict detected
19171  *		EAGAIN - Device is becoming ready
19172  *		errno return code from sd_ssc_send()
19173  *
19174  *     Context: Can sleep.  Blocks until command completes.
19175  */
19176 
19177 #define	SD_CAPACITY_SIZE	sizeof (struct scsi_capacity)
19178 
19179 static int
19180 sd_send_scsi_READ_CAPACITY(sd_ssc_t *ssc, uint64_t *capp, uint32_t *lbap,
19181     int path_flag)
19182 {
19183 	struct	scsi_extended_sense	sense_buf;
19184 	struct	uscsi_cmd	ucmd_buf;
19185 	union	scsi_cdb	cdb;
19186 	uint32_t		*capacity_buf;
19187 	uint64_t		capacity;
19188 	uint32_t		lbasize;
19189 	uint32_t		pbsize;
19190 	int			status;
19191 	struct sd_lun		*un;
19192 
19193 	ASSERT(ssc != NULL);
19194 
19195 	un = ssc->ssc_un;
19196 	ASSERT(un != NULL);
19197 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19198 	ASSERT(capp != NULL);
19199 	ASSERT(lbap != NULL);
19200 
19201 	SD_TRACE(SD_LOG_IO, un,
19202 	    "sd_send_scsi_READ_CAPACITY: entry: un:0x%p\n", un);
19203 
19204 	/*
19205 	 * First send a READ_CAPACITY command to the target.
19206 	 * (This command is mandatory under SCSI-2.)
19207 	 *
19208 	 * Set up the CDB for the READ_CAPACITY command.  The Partial
19209 	 * Medium Indicator bit is cleared.  The address field must be
19210 	 * zero if the PMI bit is zero.
19211 	 */
19212 	bzero(&cdb, sizeof (cdb));
19213 	bzero(&ucmd_buf, sizeof (ucmd_buf));
19214 
19215 	capacity_buf = kmem_zalloc(SD_CAPACITY_SIZE, KM_SLEEP);
19216 
19217 	cdb.scc_cmd = SCMD_READ_CAPACITY;
19218 
19219 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
19220 	ucmd_buf.uscsi_cdblen	= CDB_GROUP1;
19221 	ucmd_buf.uscsi_bufaddr	= (caddr_t)capacity_buf;
19222 	ucmd_buf.uscsi_buflen	= SD_CAPACITY_SIZE;
19223 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
19224 	ucmd_buf.uscsi_rqlen	= sizeof (sense_buf);
19225 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
19226 	ucmd_buf.uscsi_timeout	= 60;
19227 
19228 	status = sd_ssc_send(ssc, &ucmd_buf, FKIOCTL,
19229 	    UIO_SYSSPACE, path_flag);
19230 
19231 	switch (status) {
19232 	case 0:
19233 		/* Return failure if we did not get valid capacity data. */
19234 		if (ucmd_buf.uscsi_resid != 0) {
19235 			sd_ssc_set_info(ssc, SSC_FLAGS_INVALID_DATA, -1,
19236 			    "sd_send_scsi_READ_CAPACITY received invalid "
19237 			    "capacity data");
19238 			kmem_free(capacity_buf, SD_CAPACITY_SIZE);
19239 			return (EIO);
19240 		}
19241 		/*
19242 		 * Read capacity and block size from the READ CAPACITY 10 data.
19243 		 * This data may be adjusted later due to device specific
19244 		 * issues.
19245 		 *
19246 		 * According to the SCSI spec, the READ CAPACITY 10
19247 		 * command returns the following:
19248 		 *
19249 		 *  bytes 0-3: Maximum logical block address available.
19250 		 *		(MSB in byte:0 & LSB in byte:3)
19251 		 *
19252 		 *  bytes 4-7: Block length in bytes
19253 		 *		(MSB in byte:4 & LSB in byte:7)
19254 		 *
19255 		 */
19256 		capacity = BE_32(capacity_buf[0]);
19257 		lbasize = BE_32(capacity_buf[1]);
19258 
19259 		/*
19260 		 * Done with capacity_buf
19261 		 */
19262 		kmem_free(capacity_buf, SD_CAPACITY_SIZE);
19263 
19264 		/*
19265 		 * if the reported capacity is set to all 0xf's, then
19266 		 * this disk is too large and requires SBC-2 commands.
19267 		 * Reissue the request using READ CAPACITY 16.
19268 		 */
19269 		if (capacity == 0xffffffff) {
19270 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
19271 			status = sd_send_scsi_READ_CAPACITY_16(ssc, &capacity,
19272 			    &lbasize, &pbsize, path_flag);
19273 			if (status != 0) {
19274 				return (status);
19275 			} else {
19276 				goto rc16_done;
19277 			}
19278 		}
19279 		break;	/* Success! */
19280 	case EIO:
19281 		switch (ucmd_buf.uscsi_status) {
19282 		case STATUS_RESERVATION_CONFLICT:
19283 			status = EACCES;
19284 			break;
19285 		case STATUS_CHECK:
19286 			/*
19287 			 * Check condition; look for ASC/ASCQ of 0x04/0x01
19288 			 * (LOGICAL UNIT IS IN PROCESS OF BECOMING READY)
19289 			 */
19290 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
19291 			    (scsi_sense_asc((uint8_t *)&sense_buf) == 0x04) &&
19292 			    (scsi_sense_ascq((uint8_t *)&sense_buf) == 0x01)) {
19293 				kmem_free(capacity_buf, SD_CAPACITY_SIZE);
19294 				return (EAGAIN);
19295 			}
19296 			break;
19297 		default:
19298 			break;
19299 		}
19300 		/* FALLTHRU */
19301 	default:
19302 		kmem_free(capacity_buf, SD_CAPACITY_SIZE);
19303 		return (status);
19304 	}
19305 
19306 	/*
19307 	 * Some ATAPI CD-ROM drives report inaccurate LBA size values
19308 	 * (2352 and 0 are common) so for these devices always force the value
19309 	 * to 2048 as required by the ATAPI specs.
19310 	 */
19311 	if ((un->un_f_cfg_is_atapi == TRUE) && (ISCD(un))) {
19312 		lbasize = 2048;
19313 	}
19314 
19315 	/*
19316 	 * Get the maximum LBA value from the READ CAPACITY data.
19317 	 * Here we assume that the Partial Medium Indicator (PMI) bit
19318 	 * was cleared when issuing the command. This means that the LBA
19319 	 * returned from the device is the LBA of the last logical block
19320 	 * on the logical unit.  The actual logical block count will be
19321 	 * this value plus one.
19322 	 */
19323 	capacity += 1;
19324 
19325 	/*
19326 	 * Currently, for removable media, the capacity is saved in terms
19327 	 * of un->un_sys_blocksize, so scale the capacity value to reflect this.
19328 	 */
19329 	if (un->un_f_has_removable_media)
19330 		capacity *= (lbasize / un->un_sys_blocksize);
19331 
19332 rc16_done:
19333 
19334 	/*
19335 	 * Copy the values from the READ CAPACITY command into the space
19336 	 * provided by the caller.
19337 	 */
19338 	*capp = capacity;
19339 	*lbap = lbasize;
19340 
19341 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_READ_CAPACITY: "
19342 	    "capacity:0x%llx  lbasize:0x%x\n", capacity, lbasize);
19343 
19344 	/*
19345 	 * Both the lbasize and capacity from the device must be nonzero,
19346 	 * otherwise we assume that the values are not valid and return
19347 	 * failure to the caller. (4203735)
19348 	 */
19349 	if ((capacity == 0) || (lbasize == 0)) {
19350 		sd_ssc_set_info(ssc, SSC_FLAGS_INVALID_DATA, -1,
19351 		    "sd_send_scsi_READ_CAPACITY received invalid value "
19352 		    "capacity %llu lbasize %d", capacity, lbasize);
19353 		return (EIO);
19354 	}
19355 	sd_ssc_assessment(ssc, SD_FMT_STANDARD);
19356 	return (0);
19357 }
19358 
19359 /*
19360  *    Function: sd_send_scsi_READ_CAPACITY_16
19361  *
19362  * Description: This routine uses the scsi READ CAPACITY 16 command to
19363  *		determine the device capacity in number of blocks and the
19364  *		device native block size.  If this function returns a failure,
19365  *		then the values in *capp and *lbap are undefined.
19366  *		This routine should be called by sd_send_scsi_READ_CAPACITY
19367  *              which will apply any device specific adjustments to capacity
19368  *              and lbasize. One exception is it is also called by
19369  *              sd_get_media_info_ext. In that function, there is no need to
19370  *              adjust the capacity and lbasize.
19371  *
19372  *   Arguments: ssc   - ssc contains ptr to soft state struct for the target
19373  *		capp - ptr to unsigned 64-bit variable to receive the
19374  *			capacity value from the command.
19375  *		lbap - ptr to unsigned 32-bit varaible to receive the
19376  *			block size value from the command
19377  *              psp  - ptr to unsigned 32-bit variable to receive the
19378  *                      physical block size value from the command
19379  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
19380  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
19381  *			to use the USCSI "direct" chain and bypass the normal
19382  *			command waitq. SD_PATH_DIRECT_PRIORITY is used when
19383  *			this command is issued as part of an error recovery
19384  *			action.
19385  *
19386  * Return Code: 0   - Success
19387  *		EIO - IO error
19388  *		EACCES - Reservation conflict detected
19389  *		EAGAIN - Device is becoming ready
19390  *		errno return code from sd_ssc_send()
19391  *
19392  *     Context: Can sleep.  Blocks until command completes.
19393  */
19394 
19395 #define	SD_CAPACITY_16_SIZE	sizeof (struct scsi_capacity_16)
19396 
19397 static int
19398 sd_send_scsi_READ_CAPACITY_16(sd_ssc_t *ssc, uint64_t *capp, uint32_t *lbap,
19399     uint32_t *psp, int path_flag)
19400 {
19401 	struct	scsi_extended_sense	sense_buf;
19402 	struct	uscsi_cmd	ucmd_buf;
19403 	union	scsi_cdb	cdb;
19404 	uint64_t		*capacity16_buf;
19405 	uint64_t		capacity;
19406 	uint32_t		lbasize;
19407 	uint32_t		pbsize;
19408 	uint32_t		lbpb_exp;
19409 	int			status;
19410 	struct sd_lun		*un;
19411 
19412 	ASSERT(ssc != NULL);
19413 
19414 	un = ssc->ssc_un;
19415 	ASSERT(un != NULL);
19416 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19417 	ASSERT(capp != NULL);
19418 	ASSERT(lbap != NULL);
19419 
19420 	SD_TRACE(SD_LOG_IO, un,
19421 	    "sd_send_scsi_READ_CAPACITY: entry: un:0x%p\n", un);
19422 
19423 	/*
19424 	 * First send a READ_CAPACITY_16 command to the target.
19425 	 *
19426 	 * Set up the CDB for the READ_CAPACITY_16 command.  The Partial
19427 	 * Medium Indicator bit is cleared.  The address field must be
19428 	 * zero if the PMI bit is zero.
19429 	 */
19430 	bzero(&cdb, sizeof (cdb));
19431 	bzero(&ucmd_buf, sizeof (ucmd_buf));
19432 
19433 	capacity16_buf = kmem_zalloc(SD_CAPACITY_16_SIZE, KM_SLEEP);
19434 
19435 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
19436 	ucmd_buf.uscsi_cdblen	= CDB_GROUP4;
19437 	ucmd_buf.uscsi_bufaddr	= (caddr_t)capacity16_buf;
19438 	ucmd_buf.uscsi_buflen	= SD_CAPACITY_16_SIZE;
19439 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
19440 	ucmd_buf.uscsi_rqlen	= sizeof (sense_buf);
19441 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
19442 	ucmd_buf.uscsi_timeout	= 60;
19443 
19444 	/*
19445 	 * Read Capacity (16) is a Service Action In command.  One
19446 	 * command byte (0x9E) is overloaded for multiple operations,
19447 	 * with the second CDB byte specifying the desired operation
19448 	 */
19449 	cdb.scc_cmd = SCMD_SVC_ACTION_IN_G4;
19450 	cdb.cdb_opaque[1] = SSVC_ACTION_READ_CAPACITY_G4;
19451 
19452 	/*
19453 	 * Fill in allocation length field
19454 	 */
19455 	FORMG4COUNT(&cdb, ucmd_buf.uscsi_buflen);
19456 
19457 	status = sd_ssc_send(ssc, &ucmd_buf, FKIOCTL,
19458 	    UIO_SYSSPACE, path_flag);
19459 
19460 	switch (status) {
19461 	case 0:
19462 		/* Return failure if we did not get valid capacity data. */
19463 		if (ucmd_buf.uscsi_resid > 20) {
19464 			sd_ssc_set_info(ssc, SSC_FLAGS_INVALID_DATA, -1,
19465 			    "sd_send_scsi_READ_CAPACITY_16 received invalid "
19466 			    "capacity data");
19467 			kmem_free(capacity16_buf, SD_CAPACITY_16_SIZE);
19468 			return (EIO);
19469 		}
19470 
19471 		/*
19472 		 * Read capacity and block size from the READ CAPACITY 16 data.
19473 		 * This data may be adjusted later due to device specific
19474 		 * issues.
19475 		 *
19476 		 * According to the SCSI spec, the READ CAPACITY 16
19477 		 * command returns the following:
19478 		 *
19479 		 *  bytes 0-7: Maximum logical block address available.
19480 		 *		(MSB in byte:0 & LSB in byte:7)
19481 		 *
19482 		 *  bytes 8-11: Block length in bytes
19483 		 *		(MSB in byte:8 & LSB in byte:11)
19484 		 *
19485 		 *  byte 13: LOGICAL BLOCKS PER PHYSICAL BLOCK EXPONENT
19486 		 *
19487 		 *  byte 14:
19488 		 *	bit 7: Thin-Provisioning Enabled
19489 		 *	bit 6: Thin-Provisioning Read Zeros
19490 		 */
19491 		capacity = BE_64(capacity16_buf[0]);
19492 		lbasize = BE_32(*(uint32_t *)&capacity16_buf[1]);
19493 		lbpb_exp = (BE_64(capacity16_buf[1]) >> 16) & 0x0f;
19494 
19495 		un->un_thin_flags = 0;
19496 		if (((uint8_t *)capacity16_buf)[14] & (1 << 7))
19497 			un->un_thin_flags |= SD_THIN_PROV_ENABLED;
19498 		if (((uint8_t *)capacity16_buf)[14] & (1 << 6))
19499 			un->un_thin_flags |= SD_THIN_PROV_READ_ZEROS;
19500 
19501 		pbsize = lbasize << lbpb_exp;
19502 
19503 		/*
19504 		 * Done with capacity16_buf
19505 		 */
19506 		kmem_free(capacity16_buf, SD_CAPACITY_16_SIZE);
19507 
19508 		/*
19509 		 * if the reported capacity is set to all 0xf's, then
19510 		 * this disk is too large.  This could only happen with
19511 		 * a device that supports LBAs larger than 64 bits which
19512 		 * are not defined by any current T10 standards.
19513 		 */
19514 		if (capacity == 0xffffffffffffffff) {
19515 			sd_ssc_set_info(ssc, SSC_FLAGS_INVALID_DATA, -1,
19516 			    "disk is too large");
19517 			return (EIO);
19518 		}
19519 		break;	/* Success! */
19520 	case EIO:
19521 		switch (ucmd_buf.uscsi_status) {
19522 		case STATUS_RESERVATION_CONFLICT:
19523 			status = EACCES;
19524 			break;
19525 		case STATUS_CHECK:
19526 			/*
19527 			 * Check condition; look for ASC/ASCQ of 0x04/0x01
19528 			 * (LOGICAL UNIT IS IN PROCESS OF BECOMING READY)
19529 			 */
19530 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
19531 			    (scsi_sense_asc((uint8_t *)&sense_buf) == 0x04) &&
19532 			    (scsi_sense_ascq((uint8_t *)&sense_buf) == 0x01)) {
19533 				kmem_free(capacity16_buf, SD_CAPACITY_16_SIZE);
19534 				return (EAGAIN);
19535 			}
19536 			break;
19537 		default:
19538 			break;
19539 		}
19540 		/* FALLTHRU */
19541 	default:
19542 		kmem_free(capacity16_buf, SD_CAPACITY_16_SIZE);
19543 		return (status);
19544 	}
19545 
19546 	/*
19547 	 * Some ATAPI CD-ROM drives report inaccurate LBA size values
19548 	 * (2352 and 0 are common) so for these devices always force the value
19549 	 * to 2048 as required by the ATAPI specs.
19550 	 */
19551 	if ((un->un_f_cfg_is_atapi == TRUE) && (ISCD(un))) {
19552 		lbasize = 2048;
19553 	}
19554 
19555 	/*
19556 	 * Get the maximum LBA value from the READ CAPACITY 16 data.
19557 	 * Here we assume that the Partial Medium Indicator (PMI) bit
19558 	 * was cleared when issuing the command. This means that the LBA
19559 	 * returned from the device is the LBA of the last logical block
19560 	 * on the logical unit.  The actual logical block count will be
19561 	 * this value plus one.
19562 	 */
19563 	capacity += 1;
19564 
19565 	/*
19566 	 * Currently, for removable media, the capacity is saved in terms
19567 	 * of un->un_sys_blocksize, so scale the capacity value to reflect this.
19568 	 */
19569 	if (un->un_f_has_removable_media)
19570 		capacity *= (lbasize / un->un_sys_blocksize);
19571 
19572 	*capp = capacity;
19573 	*lbap = lbasize;
19574 	*psp = pbsize;
19575 
19576 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_READ_CAPACITY_16: "
19577 	    "capacity:0x%llx  lbasize:0x%x, pbsize: 0x%x\n",
19578 	    capacity, lbasize, pbsize);
19579 
19580 	if ((capacity == 0) || (lbasize == 0) || (pbsize == 0)) {
19581 		sd_ssc_set_info(ssc, SSC_FLAGS_INVALID_DATA, -1,
19582 		    "sd_send_scsi_READ_CAPACITY_16 received invalid value "
19583 		    "capacity %llu lbasize %d pbsize %d", capacity, lbasize);
19584 		return (EIO);
19585 	}
19586 
19587 	sd_ssc_assessment(ssc, SD_FMT_STANDARD);
19588 	return (0);
19589 }
19590 
19591 
19592 /*
19593  *    Function: sd_send_scsi_START_STOP_UNIT
19594  *
19595  * Description: Issue a scsi START STOP UNIT command to the target.
19596  *
19597  *   Arguments: ssc    - ssc contatins pointer to driver soft state (unit)
19598  *                       structure for this target.
19599  *      pc_flag - SD_POWER_CONDITION
19600  *                SD_START_STOP
19601  *		flag  - SD_TARGET_START
19602  *			SD_TARGET_STOP
19603  *			SD_TARGET_EJECT
19604  *			SD_TARGET_CLOSE
19605  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
19606  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
19607  *			to use the USCSI "direct" chain and bypass the normal
19608  *			command waitq. SD_PATH_DIRECT_PRIORITY is used when this
19609  *			command is issued as part of an error recovery action.
19610  *
19611  * Return Code: 0   - Success
19612  *		EIO - IO error
19613  *		EACCES - Reservation conflict detected
19614  *		ENXIO  - Not Ready, medium not present
19615  *		errno return code from sd_ssc_send()
19616  *
19617  *     Context: Can sleep.
19618  */
19619 
19620 static int
19621 sd_send_scsi_START_STOP_UNIT(sd_ssc_t *ssc, int pc_flag, int flag,
19622     int path_flag)
19623 {
19624 	struct	scsi_extended_sense	sense_buf;
19625 	union scsi_cdb		cdb;
19626 	struct uscsi_cmd	ucmd_buf;
19627 	int			status;
19628 	struct sd_lun		*un;
19629 
19630 	ASSERT(ssc != NULL);
19631 	un = ssc->ssc_un;
19632 	ASSERT(un != NULL);
19633 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19634 
19635 	SD_TRACE(SD_LOG_IO, un,
19636 	    "sd_send_scsi_START_STOP_UNIT: entry: un:0x%p\n", un);
19637 
19638 	if (un->un_f_check_start_stop &&
19639 	    (pc_flag == SD_START_STOP) &&
19640 	    ((flag == SD_TARGET_START) || (flag == SD_TARGET_STOP)) &&
19641 	    (un->un_f_start_stop_supported != TRUE)) {
19642 		return (0);
19643 	}
19644 
19645 	/*
19646 	 * If we are performing an eject operation and
19647 	 * we receive any command other than SD_TARGET_EJECT
19648 	 * we should immediately return.
19649 	 */
19650 	if (flag != SD_TARGET_EJECT) {
19651 		mutex_enter(SD_MUTEX(un));
19652 		if (un->un_f_ejecting == TRUE) {
19653 			mutex_exit(SD_MUTEX(un));
19654 			return (EAGAIN);
19655 		}
19656 		mutex_exit(SD_MUTEX(un));
19657 	}
19658 
19659 	bzero(&cdb, sizeof (cdb));
19660 	bzero(&ucmd_buf, sizeof (ucmd_buf));
19661 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
19662 
19663 	cdb.scc_cmd = SCMD_START_STOP;
19664 	cdb.cdb_opaque[4] = (pc_flag == SD_POWER_CONDITION) ?
19665 	    (uchar_t)(flag << 4) : (uchar_t)flag;
19666 
19667 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
19668 	ucmd_buf.uscsi_cdblen	= CDB_GROUP0;
19669 	ucmd_buf.uscsi_bufaddr	= NULL;
19670 	ucmd_buf.uscsi_buflen	= 0;
19671 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
19672 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
19673 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_SILENT;
19674 	ucmd_buf.uscsi_timeout	= 200;
19675 
19676 	status = sd_ssc_send(ssc, &ucmd_buf, FKIOCTL,
19677 	    UIO_SYSSPACE, path_flag);
19678 
19679 	switch (status) {
19680 	case 0:
19681 		sd_ssc_assessment(ssc, SD_FMT_STANDARD);
19682 		break;	/* Success! */
19683 	case EIO:
19684 		switch (ucmd_buf.uscsi_status) {
19685 		case STATUS_RESERVATION_CONFLICT:
19686 			status = EACCES;
19687 			break;
19688 		case STATUS_CHECK:
19689 			if (ucmd_buf.uscsi_rqstatus == STATUS_GOOD) {
19690 				switch (scsi_sense_key(
19691 				    (uint8_t *)&sense_buf)) {
19692 				case KEY_ILLEGAL_REQUEST:
19693 					status = ENOTSUP;
19694 					break;
19695 				case KEY_NOT_READY:
19696 					if (scsi_sense_asc(
19697 					    (uint8_t *)&sense_buf)
19698 					    == 0x3A) {
19699 						status = ENXIO;
19700 					}
19701 					break;
19702 				default:
19703 					break;
19704 				}
19705 			}
19706 			break;
19707 		default:
19708 			break;
19709 		}
19710 		break;
19711 	default:
19712 		break;
19713 	}
19714 
19715 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_START_STOP_UNIT: exit\n");
19716 
19717 	return (status);
19718 }
19719 
19720 
19721 /*
19722  *    Function: sd_start_stop_unit_callback
19723  *
19724  * Description: timeout(9F) callback to begin recovery process for a
19725  *		device that has spun down.
19726  *
19727  *   Arguments: arg - pointer to associated softstate struct.
19728  *
19729  *     Context: Executes in a timeout(9F) thread context
19730  */
19731 
19732 static void
19733 sd_start_stop_unit_callback(void *arg)
19734 {
19735 	struct sd_lun	*un = arg;
19736 	ASSERT(un != NULL);
19737 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19738 
19739 	SD_TRACE(SD_LOG_IO, un, "sd_start_stop_unit_callback: entry\n");
19740 
19741 	(void) taskq_dispatch(sd_tq, sd_start_stop_unit_task, un, KM_NOSLEEP);
19742 }
19743 
19744 
19745 /*
19746  *    Function: sd_start_stop_unit_task
19747  *
19748  * Description: Recovery procedure when a drive is spun down.
19749  *
19750  *   Arguments: arg - pointer to associated softstate struct.
19751  *
19752  *     Context: Executes in a taskq() thread context
19753  */
19754 
19755 static void
19756 sd_start_stop_unit_task(void *arg)
19757 {
19758 	struct sd_lun	*un = arg;
19759 	sd_ssc_t	*ssc;
19760 	int		power_level;
19761 	int		rval;
19762 
19763 	ASSERT(un != NULL);
19764 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19765 
19766 	SD_TRACE(SD_LOG_IO, un, "sd_start_stop_unit_task: entry\n");
19767 
19768 	/*
19769 	 * Some unformatted drives report not ready error, no need to
19770 	 * restart if format has been initiated.
19771 	 */
19772 	mutex_enter(SD_MUTEX(un));
19773 	if (un->un_f_format_in_progress == TRUE) {
19774 		mutex_exit(SD_MUTEX(un));
19775 		return;
19776 	}
19777 	mutex_exit(SD_MUTEX(un));
19778 
19779 	ssc = sd_ssc_init(un);
19780 	/*
19781 	 * When a START STOP command is issued from here, it is part of a
19782 	 * failure recovery operation and must be issued before any other
19783 	 * commands, including any pending retries. Thus it must be sent
19784 	 * using SD_PATH_DIRECT_PRIORITY. It doesn't matter if the spin up
19785 	 * succeeds or not, we will start I/O after the attempt.
19786 	 * If power condition is supported and the current power level
19787 	 * is capable of performing I/O, we should set the power condition
19788 	 * to that level. Otherwise, set the power condition to ACTIVE.
19789 	 */
19790 	if (un->un_f_power_condition_supported) {
19791 		mutex_enter(SD_MUTEX(un));
19792 		ASSERT(SD_PM_IS_LEVEL_VALID(un, un->un_power_level));
19793 		power_level = sd_pwr_pc.ran_perf[un->un_power_level]
19794 		    > 0 ? un->un_power_level : SD_SPINDLE_ACTIVE;
19795 		mutex_exit(SD_MUTEX(un));
19796 		rval = sd_send_scsi_START_STOP_UNIT(ssc, SD_POWER_CONDITION,
19797 		    sd_pl2pc[power_level], SD_PATH_DIRECT_PRIORITY);
19798 	} else {
19799 		rval = sd_send_scsi_START_STOP_UNIT(ssc, SD_START_STOP,
19800 		    SD_TARGET_START, SD_PATH_DIRECT_PRIORITY);
19801 	}
19802 
19803 	if (rval != 0)
19804 		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
19805 	sd_ssc_fini(ssc);
19806 	/*
19807 	 * The above call blocks until the START_STOP_UNIT command completes.
19808 	 * Now that it has completed, we must re-try the original IO that
19809 	 * received the NOT READY condition in the first place. There are
19810 	 * three possible conditions here:
19811 	 *
19812 	 *  (1) The original IO is on un_retry_bp.
19813 	 *  (2) The original IO is on the regular wait queue, and un_retry_bp
19814 	 *	is NULL.
19815 	 *  (3) The original IO is on the regular wait queue, and un_retry_bp
19816 	 *	points to some other, unrelated bp.
19817 	 *
19818 	 * For each case, we must call sd_start_cmds() with un_retry_bp
19819 	 * as the argument. If un_retry_bp is NULL, this will initiate
19820 	 * processing of the regular wait queue.  If un_retry_bp is not NULL,
19821 	 * then this will process the bp on un_retry_bp. That may or may not
19822 	 * be the original IO, but that does not matter: the important thing
19823 	 * is to keep the IO processing going at this point.
19824 	 *
19825 	 * Note: This is a very specific error recovery sequence associated
19826 	 * with a drive that is not spun up. We attempt a START_STOP_UNIT and
19827 	 * serialize the I/O with completion of the spin-up.
19828 	 */
19829 	mutex_enter(SD_MUTEX(un));
19830 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
19831 	    "sd_start_stop_unit_task: un:0x%p starting bp:0x%p\n",
19832 	    un, un->un_retry_bp);
19833 	un->un_startstop_timeid = NULL;	/* Timeout is no longer pending */
19834 	sd_start_cmds(un, un->un_retry_bp);
19835 	mutex_exit(SD_MUTEX(un));
19836 
19837 	SD_TRACE(SD_LOG_IO, un, "sd_start_stop_unit_task: exit\n");
19838 }
19839 
19840 
19841 /*
19842  *    Function: sd_send_scsi_INQUIRY
19843  *
19844  * Description: Issue the scsi INQUIRY command.
19845  *
19846  *   Arguments: ssc   - ssc contains pointer to driver soft state (unit)
19847  *                      structure for this target.
19848  *		bufaddr
19849  *		buflen
19850  *		evpd
19851  *		page_code
19852  *		page_length
19853  *
19854  * Return Code: 0   - Success
19855  *		errno return code from sd_ssc_send()
19856  *
19857  *     Context: Can sleep. Does not return until command is completed.
19858  */
19859 
19860 static int
19861 sd_send_scsi_INQUIRY(sd_ssc_t *ssc, uchar_t *bufaddr, size_t buflen,
19862     uchar_t evpd, uchar_t page_code, size_t *residp)
19863 {
19864 	union scsi_cdb		cdb;
19865 	struct uscsi_cmd	ucmd_buf;
19866 	int			status;
19867 	struct sd_lun		*un;
19868 
19869 	ASSERT(ssc != NULL);
19870 	un = ssc->ssc_un;
19871 	ASSERT(un != NULL);
19872 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19873 	ASSERT(bufaddr != NULL);
19874 
19875 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_INQUIRY: entry: un:0x%p\n", un);
19876 
19877 	bzero(&cdb, sizeof (cdb));
19878 	bzero(&ucmd_buf, sizeof (ucmd_buf));
19879 	bzero(bufaddr, buflen);
19880 
19881 	cdb.scc_cmd = SCMD_INQUIRY;
19882 	cdb.cdb_opaque[1] = evpd;
19883 	cdb.cdb_opaque[2] = page_code;
19884 	FORMG0COUNT(&cdb, buflen);
19885 
19886 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
19887 	ucmd_buf.uscsi_cdblen	= CDB_GROUP0;
19888 	ucmd_buf.uscsi_bufaddr	= (caddr_t)bufaddr;
19889 	ucmd_buf.uscsi_buflen	= buflen;
19890 	ucmd_buf.uscsi_rqbuf	= NULL;
19891 	ucmd_buf.uscsi_rqlen	= 0;
19892 	ucmd_buf.uscsi_flags	= USCSI_READ | USCSI_SILENT;
19893 	ucmd_buf.uscsi_timeout	= 200;	/* Excessive legacy value */
19894 
19895 	status = sd_ssc_send(ssc, &ucmd_buf, FKIOCTL,
19896 	    UIO_SYSSPACE, SD_PATH_DIRECT);
19897 
19898 	/*
19899 	 * Only handle status == 0, the upper-level caller
19900 	 * will put different assessment based on the context.
19901 	 */
19902 	if (status == 0)
19903 		sd_ssc_assessment(ssc, SD_FMT_STANDARD);
19904 
19905 	if ((status == 0) && (residp != NULL)) {
19906 		*residp = ucmd_buf.uscsi_resid;
19907 	}
19908 
19909 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_INQUIRY: exit\n");
19910 
19911 	return (status);
19912 }
19913 
19914 
19915 /*
19916  *    Function: sd_send_scsi_TEST_UNIT_READY
19917  *
19918  * Description: Issue the scsi TEST UNIT READY command.
19919  *		This routine can be told to set the flag USCSI_DIAGNOSE to
19920  *		prevent retrying failed commands. Use this when the intent
19921  *		is either to check for device readiness, to clear a Unit
19922  *		Attention, or to clear any outstanding sense data.
19923  *		However under specific conditions the expected behavior
19924  *		is for retries to bring a device ready, so use the flag
19925  *		with caution.
19926  *
19927  *   Arguments: ssc   - ssc contains pointer to driver soft state (unit)
19928  *                      structure for this target.
19929  *		flag:   SD_CHECK_FOR_MEDIA: return ENXIO if no media present
19930  *			SD_DONT_RETRY_TUR: include uscsi flag USCSI_DIAGNOSE.
19931  *			0: dont check for media present, do retries on cmd.
19932  *
19933  * Return Code: 0   - Success
19934  *		EIO - IO error
19935  *		EACCES - Reservation conflict detected
19936  *		ENXIO  - Not Ready, medium not present
19937  *		errno return code from sd_ssc_send()
19938  *
19939  *     Context: Can sleep. Does not return until command is completed.
19940  */
19941 
19942 static int
19943 sd_send_scsi_TEST_UNIT_READY(sd_ssc_t *ssc, int flag)
19944 {
19945 	struct	scsi_extended_sense	sense_buf;
19946 	union scsi_cdb		cdb;
19947 	struct uscsi_cmd	ucmd_buf;
19948 	int			status;
19949 	struct sd_lun		*un;
19950 
19951 	ASSERT(ssc != NULL);
19952 	un = ssc->ssc_un;
19953 	ASSERT(un != NULL);
19954 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19955 
19956 	SD_TRACE(SD_LOG_IO, un,
19957 	    "sd_send_scsi_TEST_UNIT_READY: entry: un:0x%p\n", un);
19958 
19959 	/*
19960 	 * Some Seagate elite1 TQ devices get hung with disconnect/reconnect
19961 	 * timeouts when they receive a TUR and the queue is not empty. Check
19962 	 * the configuration flag set during attach (indicating the drive has
19963 	 * this firmware bug) and un_ncmds_in_transport before issuing the
19964 	 * TUR. If there are
19965 	 * pending commands return success, this is a bit arbitrary but is ok
19966 	 * for non-removables (i.e. the eliteI disks) and non-clustering
19967 	 * configurations.
19968 	 */
19969 	if (un->un_f_cfg_tur_check == TRUE) {
19970 		mutex_enter(SD_MUTEX(un));
19971 		if (un->un_ncmds_in_transport != 0) {
19972 			mutex_exit(SD_MUTEX(un));
19973 			return (0);
19974 		}
19975 		mutex_exit(SD_MUTEX(un));
19976 	}
19977 
19978 	bzero(&cdb, sizeof (cdb));
19979 	bzero(&ucmd_buf, sizeof (ucmd_buf));
19980 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
19981 
19982 	cdb.scc_cmd = SCMD_TEST_UNIT_READY;
19983 
19984 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
19985 	ucmd_buf.uscsi_cdblen	= CDB_GROUP0;
19986 	ucmd_buf.uscsi_bufaddr	= NULL;
19987 	ucmd_buf.uscsi_buflen	= 0;
19988 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
19989 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
19990 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_SILENT;
19991 
19992 	/* Use flag USCSI_DIAGNOSE to prevent retries if it fails. */
19993 	if ((flag & SD_DONT_RETRY_TUR) != 0) {
19994 		ucmd_buf.uscsi_flags |= USCSI_DIAGNOSE;
19995 	}
19996 	ucmd_buf.uscsi_timeout	= 60;
19997 
19998 	status = sd_ssc_send(ssc, &ucmd_buf, FKIOCTL,
19999 	    UIO_SYSSPACE, ((flag & SD_BYPASS_PM) ? SD_PATH_DIRECT :
20000 	    SD_PATH_STANDARD));
20001 
20002 	switch (status) {
20003 	case 0:
20004 		sd_ssc_assessment(ssc, SD_FMT_STANDARD);
20005 		break;	/* Success! */
20006 	case EIO:
20007 		switch (ucmd_buf.uscsi_status) {
20008 		case STATUS_RESERVATION_CONFLICT:
20009 			status = EACCES;
20010 			break;
20011 		case STATUS_CHECK:
20012 			if ((flag & SD_CHECK_FOR_MEDIA) == 0) {
20013 				break;
20014 			}
20015 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
20016 			    (scsi_sense_key((uint8_t *)&sense_buf) ==
20017 			    KEY_NOT_READY) &&
20018 			    (scsi_sense_asc((uint8_t *)&sense_buf) == 0x3A)) {
20019 				status = ENXIO;
20020 			}
20021 			break;
20022 		default:
20023 			break;
20024 		}
20025 		break;
20026 	default:
20027 		break;
20028 	}
20029 
20030 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_TEST_UNIT_READY: exit\n");
20031 
20032 	return (status);
20033 }
20034 
20035 /*
20036  *    Function: sd_send_scsi_PERSISTENT_RESERVE_IN
20037  *
20038  * Description: Issue the scsi PERSISTENT RESERVE IN command.
20039  *
20040  *   Arguments: ssc   - ssc contains pointer to driver soft state (unit)
20041  *                      structure for this target.
20042  *
20043  * Return Code: 0   - Success
20044  *		EACCES
20045  *		ENOTSUP
20046  *		errno return code from sd_ssc_send()
20047  *
20048  *     Context: Can sleep. Does not return until command is completed.
20049  */
20050 
20051 static int
20052 sd_send_scsi_PERSISTENT_RESERVE_IN(sd_ssc_t *ssc, uchar_t usr_cmd,
20053     uint16_t data_len, uchar_t *data_bufp)
20054 {
20055 	struct scsi_extended_sense	sense_buf;
20056 	union scsi_cdb		cdb;
20057 	struct uscsi_cmd	ucmd_buf;
20058 	int			status;
20059 	int			no_caller_buf = FALSE;
20060 	struct sd_lun		*un;
20061 
20062 	ASSERT(ssc != NULL);
20063 	un = ssc->ssc_un;
20064 	ASSERT(un != NULL);
20065 	ASSERT(!mutex_owned(SD_MUTEX(un)));
20066 	ASSERT((usr_cmd == SD_READ_KEYS) || (usr_cmd == SD_READ_RESV));
20067 
20068 	SD_TRACE(SD_LOG_IO, un,
20069 	    "sd_send_scsi_PERSISTENT_RESERVE_IN: entry: un:0x%p\n", un);
20070 
20071 	bzero(&cdb, sizeof (cdb));
20072 	bzero(&ucmd_buf, sizeof (ucmd_buf));
20073 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
20074 	if (data_bufp == NULL) {
20075 		/* Allocate a default buf if the caller did not give one */
20076 		ASSERT(data_len == 0);
20077 		data_len  = MHIOC_RESV_KEY_SIZE;
20078 		data_bufp = kmem_zalloc(MHIOC_RESV_KEY_SIZE, KM_SLEEP);
20079 		no_caller_buf = TRUE;
20080 	}
20081 
20082 	cdb.scc_cmd = SCMD_PERSISTENT_RESERVE_IN;
20083 	cdb.cdb_opaque[1] = usr_cmd;
20084 	FORMG1COUNT(&cdb, data_len);
20085 
20086 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
20087 	ucmd_buf.uscsi_cdblen	= CDB_GROUP1;
20088 	ucmd_buf.uscsi_bufaddr	= (caddr_t)data_bufp;
20089 	ucmd_buf.uscsi_buflen	= data_len;
20090 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
20091 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
20092 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
20093 	ucmd_buf.uscsi_timeout	= 60;
20094 
20095 	status = sd_ssc_send(ssc, &ucmd_buf, FKIOCTL,
20096 	    UIO_SYSSPACE, SD_PATH_STANDARD);
20097 
20098 	switch (status) {
20099 	case 0:
20100 		sd_ssc_assessment(ssc, SD_FMT_STANDARD);
20101 
20102 		break;	/* Success! */
20103 	case EIO:
20104 		switch (ucmd_buf.uscsi_status) {
20105 		case STATUS_RESERVATION_CONFLICT:
20106 			status = EACCES;
20107 			break;
20108 		case STATUS_CHECK:
20109 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
20110 			    (scsi_sense_key((uint8_t *)&sense_buf) ==
20111 			    KEY_ILLEGAL_REQUEST)) {
20112 				status = ENOTSUP;
20113 			}
20114 			break;
20115 		default:
20116 			break;
20117 		}
20118 		break;
20119 	default:
20120 		break;
20121 	}
20122 
20123 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_PERSISTENT_RESERVE_IN: exit\n");
20124 
20125 	if (no_caller_buf == TRUE) {
20126 		kmem_free(data_bufp, data_len);
20127 	}
20128 
20129 	return (status);
20130 }
20131 
20132 
20133 /*
20134  *    Function: sd_send_scsi_PERSISTENT_RESERVE_OUT
20135  *
20136  * Description: This routine is the driver entry point for handling CD-ROM
20137  *		multi-host persistent reservation requests (MHIOCGRP_INKEYS,
20138  *		MHIOCGRP_INRESV) by sending the SCSI-3 PROUT commands to the
20139  *		device.
20140  *
20141  *   Arguments: ssc  -  ssc contains un - pointer to soft state struct
20142  *                      for the target.
20143  *		usr_cmd SCSI-3 reservation facility command (one of
20144  *			SD_SCSI3_REGISTER, SD_SCSI3_RESERVE, SD_SCSI3_RELEASE,
20145  *			SD_SCSI3_PREEMPTANDABORT, SD_SCSI3_CLEAR)
20146  *		usr_bufp - user provided pointer register, reserve descriptor or
20147  *			preempt and abort structure (mhioc_register_t,
20148  *                      mhioc_resv_desc_t, mhioc_preemptandabort_t)
20149  *
20150  * Return Code: 0   - Success
20151  *		EACCES
20152  *		ENOTSUP
20153  *		errno return code from sd_ssc_send()
20154  *
20155  *     Context: Can sleep. Does not return until command is completed.
20156  */
20157 
20158 static int
20159 sd_send_scsi_PERSISTENT_RESERVE_OUT(sd_ssc_t *ssc, uchar_t usr_cmd,
20160     uchar_t *usr_bufp)
20161 {
20162 	struct scsi_extended_sense	sense_buf;
20163 	union scsi_cdb		cdb;
20164 	struct uscsi_cmd	ucmd_buf;
20165 	int			status;
20166 	uchar_t			data_len = sizeof (sd_prout_t);
20167 	sd_prout_t		*prp;
20168 	struct sd_lun		*un;
20169 
20170 	ASSERT(ssc != NULL);
20171 	un = ssc->ssc_un;
20172 	ASSERT(un != NULL);
20173 	ASSERT(!mutex_owned(SD_MUTEX(un)));
20174 	ASSERT(data_len == 24);	/* required by scsi spec */
20175 
20176 	SD_TRACE(SD_LOG_IO, un,
20177 	    "sd_send_scsi_PERSISTENT_RESERVE_OUT: entry: un:0x%p\n", un);
20178 
20179 	if (usr_bufp == NULL) {
20180 		return (EINVAL);
20181 	}
20182 
20183 	bzero(&cdb, sizeof (cdb));
20184 	bzero(&ucmd_buf, sizeof (ucmd_buf));
20185 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
20186 	prp = kmem_zalloc(data_len, KM_SLEEP);
20187 
20188 	cdb.scc_cmd = SCMD_PERSISTENT_RESERVE_OUT;
20189 	cdb.cdb_opaque[1] = usr_cmd;
20190 	FORMG1COUNT(&cdb, data_len);
20191 
20192 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
20193 	ucmd_buf.uscsi_cdblen	= CDB_GROUP1;
20194 	ucmd_buf.uscsi_bufaddr	= (caddr_t)prp;
20195 	ucmd_buf.uscsi_buflen	= data_len;
20196 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
20197 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
20198 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_WRITE | USCSI_SILENT;
20199 	ucmd_buf.uscsi_timeout	= 60;
20200 
20201 	switch (usr_cmd) {
20202 	case SD_SCSI3_REGISTER: {
20203 		mhioc_register_t *ptr = (mhioc_register_t *)usr_bufp;
20204 
20205 		bcopy(ptr->oldkey.key, prp->res_key, MHIOC_RESV_KEY_SIZE);
20206 		bcopy(ptr->newkey.key, prp->service_key,
20207 		    MHIOC_RESV_KEY_SIZE);
20208 		prp->aptpl = ptr->aptpl;
20209 		break;
20210 	}
20211 	case SD_SCSI3_CLEAR: {
20212 		mhioc_resv_desc_t *ptr = (mhioc_resv_desc_t *)usr_bufp;
20213 
20214 		bcopy(ptr->key.key, prp->res_key, MHIOC_RESV_KEY_SIZE);
20215 		break;
20216 	}
20217 	case SD_SCSI3_RESERVE:
20218 	case SD_SCSI3_RELEASE: {
20219 		mhioc_resv_desc_t *ptr = (mhioc_resv_desc_t *)usr_bufp;
20220 
20221 		bcopy(ptr->key.key, prp->res_key, MHIOC_RESV_KEY_SIZE);
20222 		prp->scope_address = BE_32(ptr->scope_specific_addr);
20223 		cdb.cdb_opaque[2] = ptr->type;
20224 		break;
20225 	}
20226 	case SD_SCSI3_PREEMPTANDABORT: {
20227 		mhioc_preemptandabort_t *ptr =
20228 		    (mhioc_preemptandabort_t *)usr_bufp;
20229 
20230 		bcopy(ptr->resvdesc.key.key, prp->res_key, MHIOC_RESV_KEY_SIZE);
20231 		bcopy(ptr->victim_key.key, prp->service_key,
20232 		    MHIOC_RESV_KEY_SIZE);
20233 		prp->scope_address = BE_32(ptr->resvdesc.scope_specific_addr);
20234 		cdb.cdb_opaque[2] = ptr->resvdesc.type;
20235 		ucmd_buf.uscsi_flags |= USCSI_HEAD;
20236 		break;
20237 	}
20238 	case SD_SCSI3_REGISTERANDIGNOREKEY:
20239 	{
20240 		mhioc_registerandignorekey_t *ptr;
20241 		ptr = (mhioc_registerandignorekey_t *)usr_bufp;
20242 		bcopy(ptr->newkey.key,
20243 		    prp->service_key, MHIOC_RESV_KEY_SIZE);
20244 		prp->aptpl = ptr->aptpl;
20245 		break;
20246 	}
20247 	default:
20248 		ASSERT(FALSE);
20249 		break;
20250 	}
20251 
20252 	status = sd_ssc_send(ssc, &ucmd_buf, FKIOCTL,
20253 	    UIO_SYSSPACE, SD_PATH_STANDARD);
20254 
20255 	switch (status) {
20256 	case 0:
20257 		sd_ssc_assessment(ssc, SD_FMT_STANDARD);
20258 		break;	/* Success! */
20259 	case EIO:
20260 		switch (ucmd_buf.uscsi_status) {
20261 		case STATUS_RESERVATION_CONFLICT:
20262 			status = EACCES;
20263 			break;
20264 		case STATUS_CHECK:
20265 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
20266 			    (scsi_sense_key((uint8_t *)&sense_buf) ==
20267 			    KEY_ILLEGAL_REQUEST)) {
20268 				status = ENOTSUP;
20269 			}
20270 			break;
20271 		default:
20272 			break;
20273 		}
20274 		break;
20275 	default:
20276 		break;
20277 	}
20278 
20279 	kmem_free(prp, data_len);
20280 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_PERSISTENT_RESERVE_OUT: exit\n");
20281 	return (status);
20282 }
20283 
20284 
20285 /*
20286  *    Function: sd_send_scsi_SYNCHRONIZE_CACHE
20287  *
20288  * Description: Issues a scsi SYNCHRONIZE CACHE command to the target
20289  *
20290  *   Arguments: un - pointer to the target's soft state struct
20291  *              dkc - pointer to the callback structure
20292  *
20293  * Return Code: 0 - success
20294  *		errno-type error code
20295  *
20296  *     Context: kernel thread context only.
20297  *
20298  *  _______________________________________________________________
20299  * | dkc_flag &   | dkc_callback | DKIOCFLUSHWRITECACHE            |
20300  * |FLUSH_VOLATILE|              | operation                       |
20301  * |______________|______________|_________________________________|
20302  * | 0            | NULL         | Synchronous flush on both       |
20303  * |              |              | volatile and non-volatile cache |
20304  * |______________|______________|_________________________________|
20305  * | 1            | NULL         | Synchronous flush on volatile   |
20306  * |              |              | cache; disk drivers may suppress|
20307  * |              |              | flush if disk table indicates   |
20308  * |              |              | non-volatile cache              |
20309  * |______________|______________|_________________________________|
20310  * | 0            | !NULL        | Asynchronous flush on both      |
20311  * |              |              | volatile and non-volatile cache;|
20312  * |______________|______________|_________________________________|
20313  * | 1            | !NULL        | Asynchronous flush on volatile  |
20314  * |              |              | cache; disk drivers may suppress|
20315  * |              |              | flush if disk table indicates   |
20316  * |              |              | non-volatile cache              |
20317  * |______________|______________|_________________________________|
20318  *
20319  */
20320 
20321 static int
20322 sd_send_scsi_SYNCHRONIZE_CACHE(struct sd_lun *un, struct dk_callback *dkc)
20323 {
20324 	struct sd_uscsi_info	*uip;
20325 	struct uscsi_cmd	*uscmd;
20326 	union scsi_cdb		*cdb;
20327 	struct buf		*bp;
20328 	int			rval = 0;
20329 	int			is_async;
20330 
20331 	SD_TRACE(SD_LOG_IO, un,
20332 	    "sd_send_scsi_SYNCHRONIZE_CACHE: entry: un:0x%p\n", un);
20333 
20334 	ASSERT(un != NULL);
20335 	ASSERT(!mutex_owned(SD_MUTEX(un)));
20336 
20337 	if (dkc == NULL || dkc->dkc_callback == NULL) {
20338 		is_async = FALSE;
20339 	} else {
20340 		is_async = TRUE;
20341 	}
20342 
20343 	mutex_enter(SD_MUTEX(un));
20344 	/* check whether cache flush should be suppressed */
20345 	if (un->un_f_suppress_cache_flush == TRUE) {
20346 		mutex_exit(SD_MUTEX(un));
20347 		/*
20348 		 * suppress the cache flush if the device is told to do
20349 		 * so by sd.conf or disk table
20350 		 */
20351 		SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_SYNCHRONIZE_CACHE: \
20352 		    skip the cache flush since suppress_cache_flush is %d!\n",
20353 		    un->un_f_suppress_cache_flush);
20354 
20355 		if (is_async == TRUE) {
20356 			/* invoke callback for asynchronous flush */
20357 			(*dkc->dkc_callback)(dkc->dkc_cookie, 0);
20358 		}
20359 		return (rval);
20360 	}
20361 	mutex_exit(SD_MUTEX(un));
20362 
20363 	/*
20364 	 * check dkc_flag & FLUSH_VOLATILE so SYNC_NV bit can be
20365 	 * set properly
20366 	 */
20367 	cdb = kmem_zalloc(CDB_GROUP1, KM_SLEEP);
20368 	cdb->scc_cmd = SCMD_SYNCHRONIZE_CACHE;
20369 
20370 	mutex_enter(SD_MUTEX(un));
20371 	if (dkc != NULL && un->un_f_sync_nv_supported &&
20372 	    (dkc->dkc_flag & FLUSH_VOLATILE)) {
20373 		/*
20374 		 * if the device supports SYNC_NV bit, turn on
20375 		 * the SYNC_NV bit to only flush volatile cache
20376 		 */
20377 		cdb->cdb_un.tag |= SD_SYNC_NV_BIT;
20378 	}
20379 	mutex_exit(SD_MUTEX(un));
20380 
20381 	/*
20382 	 * First get some memory for the uscsi_cmd struct and cdb
20383 	 * and initialize for SYNCHRONIZE_CACHE cmd.
20384 	 */
20385 	uscmd = kmem_zalloc(sizeof (struct uscsi_cmd), KM_SLEEP);
20386 	uscmd->uscsi_cdblen = CDB_GROUP1;
20387 	uscmd->uscsi_cdb = (caddr_t)cdb;
20388 	uscmd->uscsi_bufaddr = NULL;
20389 	uscmd->uscsi_buflen = 0;
20390 	uscmd->uscsi_rqbuf = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
20391 	uscmd->uscsi_rqlen = SENSE_LENGTH;
20392 	uscmd->uscsi_rqresid = SENSE_LENGTH;
20393 	uscmd->uscsi_flags = USCSI_RQENABLE | USCSI_SILENT;
20394 	uscmd->uscsi_timeout = sd_io_time;
20395 
20396 	/*
20397 	 * Allocate an sd_uscsi_info struct and fill it with the info
20398 	 * needed by sd_initpkt_for_uscsi().  Then put the pointer into
20399 	 * b_private in the buf for sd_initpkt_for_uscsi().  Note that
20400 	 * since we allocate the buf here in this function, we do not
20401 	 * need to preserve the prior contents of b_private.
20402 	 * The sd_uscsi_info struct is also used by sd_uscsi_strategy()
20403 	 */
20404 	uip = kmem_zalloc(sizeof (struct sd_uscsi_info), KM_SLEEP);
20405 	uip->ui_flags = SD_PATH_DIRECT;
20406 	uip->ui_cmdp  = uscmd;
20407 
20408 	bp = getrbuf(KM_SLEEP);
20409 	bp->b_private = uip;
20410 
20411 	/*
20412 	 * Setup buffer to carry uscsi request.
20413 	 */
20414 	bp->b_flags  = B_BUSY;
20415 	bp->b_bcount = 0;
20416 	bp->b_blkno  = 0;
20417 
20418 	if (is_async == TRUE) {
20419 		bp->b_iodone = sd_send_scsi_SYNCHRONIZE_CACHE_biodone;
20420 		uip->ui_dkc = *dkc;
20421 	}
20422 
20423 	bp->b_edev = SD_GET_DEV(un);
20424 	bp->b_dev = cmpdev(bp->b_edev);	/* maybe unnecessary? */
20425 
20426 	/*
20427 	 * Unset un_f_sync_cache_required flag
20428 	 */
20429 	mutex_enter(SD_MUTEX(un));
20430 	un->un_f_sync_cache_required = FALSE;
20431 	mutex_exit(SD_MUTEX(un));
20432 
20433 	(void) sd_uscsi_strategy(bp);
20434 
20435 	/*
20436 	 * If synchronous request, wait for completion
20437 	 * If async just return and let b_iodone callback
20438 	 * cleanup.
20439 	 * NOTE: On return, u_ncmds_in_driver will be decremented,
20440 	 * but it was also incremented in sd_uscsi_strategy(), so
20441 	 * we should be ok.
20442 	 */
20443 	if (is_async == FALSE) {
20444 		(void) biowait(bp);
20445 		rval = sd_send_scsi_SYNCHRONIZE_CACHE_biodone(bp);
20446 	}
20447 
20448 	return (rval);
20449 }
20450 
20451 
20452 static int
20453 sd_send_scsi_SYNCHRONIZE_CACHE_biodone(struct buf *bp)
20454 {
20455 	struct sd_uscsi_info *uip;
20456 	struct uscsi_cmd *uscmd;
20457 	uint8_t *sense_buf;
20458 	struct sd_lun *un;
20459 	int status;
20460 	union scsi_cdb *cdb;
20461 
20462 	uip = (struct sd_uscsi_info *)(bp->b_private);
20463 	ASSERT(uip != NULL);
20464 
20465 	uscmd = uip->ui_cmdp;
20466 	ASSERT(uscmd != NULL);
20467 
20468 	sense_buf = (uint8_t *)uscmd->uscsi_rqbuf;
20469 	ASSERT(sense_buf != NULL);
20470 
20471 	un = ddi_get_soft_state(sd_state, SD_GET_INSTANCE_FROM_BUF(bp));
20472 	ASSERT(un != NULL);
20473 
20474 	cdb = (union scsi_cdb *)uscmd->uscsi_cdb;
20475 
20476 	status = geterror(bp);
20477 	switch (status) {
20478 	case 0:
20479 		break;	/* Success! */
20480 	case EIO:
20481 		switch (uscmd->uscsi_status) {
20482 		case STATUS_RESERVATION_CONFLICT:
20483 			/* Ignore reservation conflict */
20484 			status = 0;
20485 			goto done;
20486 
20487 		case STATUS_CHECK:
20488 			if ((uscmd->uscsi_rqstatus == STATUS_GOOD) &&
20489 			    (scsi_sense_key(sense_buf) ==
20490 			    KEY_ILLEGAL_REQUEST)) {
20491 				/* Ignore Illegal Request error */
20492 				if (cdb->cdb_un.tag&SD_SYNC_NV_BIT) {
20493 					mutex_enter(SD_MUTEX(un));
20494 					un->un_f_sync_nv_supported = FALSE;
20495 					mutex_exit(SD_MUTEX(un));
20496 					status = 0;
20497 					SD_TRACE(SD_LOG_IO, un,
20498 					    "un_f_sync_nv_supported \
20499 					    is set to false.\n");
20500 					goto done;
20501 				}
20502 
20503 				mutex_enter(SD_MUTEX(un));
20504 				un->un_f_sync_cache_supported = FALSE;
20505 				mutex_exit(SD_MUTEX(un));
20506 				SD_TRACE(SD_LOG_IO, un,
20507 				    "sd_send_scsi_SYNCHRONIZE_CACHE_biodone: \
20508 				    un_f_sync_cache_supported set to false \
20509 				    with asc = %x, ascq = %x\n",
20510 				    scsi_sense_asc(sense_buf),
20511 				    scsi_sense_ascq(sense_buf));
20512 				status = ENOTSUP;
20513 				goto done;
20514 			}
20515 			break;
20516 		default:
20517 			break;
20518 		}
20519 		/* FALLTHRU */
20520 	default:
20521 		/*
20522 		 * Turn on the un_f_sync_cache_required flag
20523 		 * since the SYNC CACHE command failed
20524 		 */
20525 		mutex_enter(SD_MUTEX(un));
20526 		un->un_f_sync_cache_required = TRUE;
20527 		mutex_exit(SD_MUTEX(un));
20528 
20529 		/*
20530 		 * Don't log an error message if this device
20531 		 * has removable media.
20532 		 */
20533 		if (!un->un_f_has_removable_media) {
20534 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
20535 			    "SYNCHRONIZE CACHE command failed (%d)\n", status);
20536 		}
20537 		break;
20538 	}
20539 
20540 done:
20541 	if (uip->ui_dkc.dkc_callback != NULL) {
20542 		(*uip->ui_dkc.dkc_callback)(uip->ui_dkc.dkc_cookie, status);
20543 	}
20544 
20545 	ASSERT((bp->b_flags & B_REMAPPED) == 0);
20546 	freerbuf(bp);
20547 	kmem_free(uip, sizeof (struct sd_uscsi_info));
20548 	kmem_free(uscmd->uscsi_rqbuf, SENSE_LENGTH);
20549 	kmem_free(uscmd->uscsi_cdb, (size_t)uscmd->uscsi_cdblen);
20550 	kmem_free(uscmd, sizeof (struct uscsi_cmd));
20551 
20552 	return (status);
20553 }
20554 
20555 /*
20556  * Issues a single SCSI UNMAP command with a prepared UNMAP parameter list.
20557  * Returns zero on success, or the non-zero command error code on failure.
20558  */
20559 static int
20560 sd_send_scsi_UNMAP_issue_one(sd_ssc_t *ssc, unmap_param_hdr_t *uph,
20561     uint64_t num_descr, uint64_t bytes)
20562 {
20563 	struct sd_lun		*un = ssc->ssc_un;
20564 	struct scsi_extended_sense	sense_buf;
20565 	union scsi_cdb		cdb;
20566 	struct uscsi_cmd	ucmd_buf;
20567 	int			status;
20568 	const uint64_t		param_size = sizeof (unmap_param_hdr_t) +
20569 	    num_descr * sizeof (unmap_blk_descr_t);
20570 
20571 	ASSERT3U(param_size - 2, <=, UINT16_MAX);
20572 	uph->uph_data_len = BE_16(param_size - 2);
20573 	uph->uph_descr_data_len = BE_16(param_size - 8);
20574 
20575 	bzero(&cdb, sizeof (cdb));
20576 	bzero(&ucmd_buf, sizeof (ucmd_buf));
20577 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
20578 
20579 	cdb.scc_cmd = SCMD_UNMAP;
20580 	FORMG1COUNT(&cdb, param_size);
20581 
20582 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
20583 	ucmd_buf.uscsi_cdblen	= (uchar_t)CDB_GROUP1;
20584 	ucmd_buf.uscsi_bufaddr	= (caddr_t)uph;
20585 	ucmd_buf.uscsi_buflen	= param_size;
20586 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
20587 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
20588 	ucmd_buf.uscsi_flags	= USCSI_WRITE | USCSI_RQENABLE | USCSI_SILENT;
20589 	ucmd_buf.uscsi_timeout	= un->un_cmd_timeout;
20590 
20591 	status = sd_ssc_send(ssc, &ucmd_buf, FKIOCTL, UIO_SYSSPACE,
20592 	    SD_PATH_STANDARD);
20593 
20594 	switch (status) {
20595 	case 0:
20596 		sd_ssc_assessment(ssc, SD_FMT_STANDARD);
20597 
20598 		if (un->un_unmapstats) {
20599 			atomic_inc_64(&un->un_unmapstats->us_cmds.value.ui64);
20600 			atomic_add_64(&un->un_unmapstats->us_extents.value.ui64,
20601 			    num_descr);
20602 			atomic_add_64(&un->un_unmapstats->us_bytes.value.ui64,
20603 			    bytes);
20604 		}
20605 		break;	/* Success! */
20606 	case EIO:
20607 		if (un->un_unmapstats)
20608 			atomic_inc_64(&un->un_unmapstats->us_errs.value.ui64);
20609 		switch (ucmd_buf.uscsi_status) {
20610 		case STATUS_RESERVATION_CONFLICT:
20611 			status = EACCES;
20612 			break;
20613 		default:
20614 			break;
20615 		}
20616 		break;
20617 	default:
20618 		if (un->un_unmapstats)
20619 			atomic_inc_64(&un->un_unmapstats->us_errs.value.ui64);
20620 		break;
20621 	}
20622 
20623 	return (status);
20624 }
20625 
20626 /*
20627  * Returns a pointer to the i'th block descriptor inside an UNMAP param list.
20628  */
20629 static inline unmap_blk_descr_t *
20630 UNMAP_blk_descr_i(void *buf, size_t i)
20631 {
20632 	return ((unmap_blk_descr_t *)((uintptr_t)buf +
20633 	    sizeof (unmap_param_hdr_t) + (i * sizeof (unmap_blk_descr_t))));
20634 }
20635 
20636 /*
20637  * Takes the list of extents from sd_send_scsi_UNMAP, chops it up, prepares
20638  * UNMAP block descriptors and issues individual SCSI UNMAP commands. While
20639  * doing so we consult the block limits to determine at most how many
20640  * extents and LBAs we can UNMAP in one command.
20641  * If a command fails for whatever, reason, extent list processing is aborted
20642  * and the failed command's status is returned. Otherwise returns 0 on
20643  * success.
20644  */
20645 static int
20646 sd_send_scsi_UNMAP_issue(dev_t dev, sd_ssc_t *ssc, const dkioc_free_list_t *dfl)
20647 {
20648 	struct sd_lun		*un = ssc->ssc_un;
20649 	unmap_param_hdr_t	*uph;
20650 	sd_blk_limits_t		*lim = &un->un_blk_lim;
20651 	int			rval = 0;
20652 	int			partition;
20653 	/* partition offset & length in system blocks */
20654 	diskaddr_t		part_off_sysblks = 0, part_len_sysblks = 0;
20655 	uint64_t		part_off, part_len;
20656 	uint64_t		descr_cnt_lim, byte_cnt_lim;
20657 	uint64_t		descr_issued = 0, bytes_issued = 0;
20658 
20659 	uph = kmem_zalloc(SD_UNMAP_PARAM_LIST_MAXSZ, KM_SLEEP);
20660 
20661 	partition = SDPART(dev);
20662 	rval = cmlb_partinfo(un->un_cmlbhandle, partition, &part_len_sysblks,
20663 	    &part_off_sysblks, NULL, NULL, (void *)SD_PATH_DIRECT);
20664 	if (rval != 0)
20665 		goto out;
20666 	part_off = SD_SYSBLOCKS2BYTES(part_off_sysblks);
20667 	part_len = SD_SYSBLOCKS2BYTES(part_len_sysblks);
20668 
20669 	ASSERT(un->un_blk_lim.lim_max_unmap_lba_cnt != 0);
20670 	ASSERT(un->un_blk_lim.lim_max_unmap_descr_cnt != 0);
20671 	/* Spec says 0xffffffff are special values, so compute maximums. */
20672 	byte_cnt_lim = lim->lim_max_unmap_lba_cnt < UINT32_MAX ?
20673 	    (uint64_t)lim->lim_max_unmap_lba_cnt * un->un_tgt_blocksize :
20674 	    UINT64_MAX;
20675 	descr_cnt_lim = MIN(lim->lim_max_unmap_descr_cnt, SD_UNMAP_MAX_DESCR);
20676 
20677 	if (dfl->dfl_offset >= part_len) {
20678 		rval = SET_ERROR(EINVAL);
20679 		goto out;
20680 	}
20681 
20682 	for (size_t i = 0; i < dfl->dfl_num_exts; i++) {
20683 		const dkioc_free_list_ext_t *ext = &dfl->dfl_exts[i];
20684 		uint64_t ext_start = ext->dfle_start;
20685 		uint64_t ext_length = ext->dfle_length;
20686 
20687 		while (ext_length > 0) {
20688 			unmap_blk_descr_t *ubd;
20689 			/* Respect device limit on LBA count per command */
20690 			uint64_t len = MIN(MIN(ext_length, byte_cnt_lim -
20691 			    bytes_issued), SD_TGTBLOCKS2BYTES(un, UINT32_MAX));
20692 
20693 			/* check partition limits */
20694 			if (ext_start >= part_len ||
20695 			    ext_start + len < ext_start ||
20696 			    dfl->dfl_offset + ext_start + len <
20697 			    dfl->dfl_offset ||
20698 			    dfl->dfl_offset + ext_start + len > part_len) {
20699 				rval = SET_ERROR(EINVAL);
20700 				goto out;
20701 			}
20702 
20703 			ASSERT3U(descr_issued, <, descr_cnt_lim);
20704 			ASSERT3U(bytes_issued, <, byte_cnt_lim);
20705 			ubd = UNMAP_blk_descr_i(uph, descr_issued);
20706 
20707 			/* adjust in-partition addresses to be device-global */
20708 			ubd->ubd_lba = BE_64(SD_BYTES2TGTBLOCKS(un,
20709 			    dfl->dfl_offset + ext_start + part_off));
20710 			ubd->ubd_lba_cnt = BE_32(SD_BYTES2TGTBLOCKS(un, len));
20711 
20712 			descr_issued++;
20713 			bytes_issued += len;
20714 
20715 			/* Issue command when device limits reached */
20716 			if (descr_issued == descr_cnt_lim ||
20717 			    bytes_issued == byte_cnt_lim) {
20718 				rval = sd_send_scsi_UNMAP_issue_one(ssc, uph,
20719 				    descr_issued, bytes_issued);
20720 				if (rval != 0)
20721 					goto out;
20722 				descr_issued = 0;
20723 				bytes_issued = 0;
20724 			}
20725 
20726 			ext_start += len;
20727 			ext_length -= len;
20728 		}
20729 	}
20730 
20731 	if (descr_issued > 0) {
20732 		/* issue last command */
20733 		rval = sd_send_scsi_UNMAP_issue_one(ssc, uph, descr_issued,
20734 		    bytes_issued);
20735 	}
20736 
20737 out:
20738 	kmem_free(uph, SD_UNMAP_PARAM_LIST_MAXSZ);
20739 	return (rval);
20740 }
20741 
20742 /*
20743  * Issues one or several UNMAP commands based on a list of extents to be
20744  * unmapped. The internal multi-command processing is hidden, as the exact
20745  * number of commands and extents per command is limited by both SCSI
20746  * command syntax and device limits (as expressed in the SCSI Block Limits
20747  * VPD page and un_blk_lim in struct sd_lun).
20748  * Returns zero on success, or the error code of the first failed SCSI UNMAP
20749  * command.
20750  */
20751 static int
20752 sd_send_scsi_UNMAP(dev_t dev, sd_ssc_t *ssc, dkioc_free_list_t *dfl, int flag)
20753 {
20754 	struct sd_lun		*un = ssc->ssc_un;
20755 	int			rval = 0;
20756 
20757 	ASSERT(!mutex_owned(SD_MUTEX(un)));
20758 	ASSERT(dfl != NULL);
20759 
20760 	/* Per spec, any of these conditions signals lack of UNMAP support. */
20761 	if (!(un->un_thin_flags & SD_THIN_PROV_ENABLED) ||
20762 	    un->un_blk_lim.lim_max_unmap_descr_cnt == 0 ||
20763 	    un->un_blk_lim.lim_max_unmap_lba_cnt == 0) {
20764 		return (SET_ERROR(ENOTSUP));
20765 	}
20766 
20767 	/* For userspace calls we must copy in. */
20768 	if (!(flag & FKIOCTL)) {
20769 		int err = dfl_copyin(dfl, &dfl, flag, KM_SLEEP);
20770 		if (err != 0)
20771 			return (err);
20772 	} else if (dfl->dfl_num_exts > DFL_COPYIN_MAX_EXTS) {
20773 		ASSERT3U(dfl->dfl_num_exts, <=, DFL_COPYIN_MAX_EXTS);
20774 		return (SET_ERROR(EINVAL));
20775 	}
20776 
20777 	rval = sd_send_scsi_UNMAP_issue(dev, ssc, dfl);
20778 
20779 	if (!(flag & FKIOCTL)) {
20780 		dfl_free(dfl);
20781 		dfl = NULL;
20782 	}
20783 
20784 	return (rval);
20785 }
20786 
20787 /*
20788  *    Function: sd_send_scsi_GET_CONFIGURATION
20789  *
20790  * Description: Issues the get configuration command to the device.
20791  *		Called from sd_check_for_writable_cd & sd_get_media_info
20792  *		caller needs to ensure that buflen = SD_PROFILE_HEADER_LEN
20793  *   Arguments: ssc
20794  *		ucmdbuf
20795  *		rqbuf
20796  *		rqbuflen
20797  *		bufaddr
20798  *		buflen
20799  *		path_flag
20800  *
20801  * Return Code: 0   - Success
20802  *		errno return code from sd_ssc_send()
20803  *
20804  *     Context: Can sleep. Does not return until command is completed.
20805  *
20806  */
20807 
20808 static int
20809 sd_send_scsi_GET_CONFIGURATION(sd_ssc_t *ssc, struct uscsi_cmd *ucmdbuf,
20810     uchar_t *rqbuf, uint_t rqbuflen, uchar_t *bufaddr, uint_t buflen,
20811     int path_flag)
20812 {
20813 	char	cdb[CDB_GROUP1];
20814 	int	status;
20815 	struct sd_lun	*un;
20816 
20817 	ASSERT(ssc != NULL);
20818 	un = ssc->ssc_un;
20819 	ASSERT(un != NULL);
20820 	ASSERT(!mutex_owned(SD_MUTEX(un)));
20821 	ASSERT(bufaddr != NULL);
20822 	ASSERT(ucmdbuf != NULL);
20823 	ASSERT(rqbuf != NULL);
20824 
20825 	SD_TRACE(SD_LOG_IO, un,
20826 	    "sd_send_scsi_GET_CONFIGURATION: entry: un:0x%p\n", un);
20827 
20828 	bzero(cdb, sizeof (cdb));
20829 	bzero(ucmdbuf, sizeof (struct uscsi_cmd));
20830 	bzero(rqbuf, rqbuflen);
20831 	bzero(bufaddr, buflen);
20832 
20833 	/*
20834 	 * Set up cdb field for the get configuration command.
20835 	 */
20836 	cdb[0] = SCMD_GET_CONFIGURATION;
20837 	cdb[1] = 0x02;  /* Requested Type */
20838 	cdb[8] = SD_PROFILE_HEADER_LEN;
20839 	ucmdbuf->uscsi_cdb = cdb;
20840 	ucmdbuf->uscsi_cdblen = CDB_GROUP1;
20841 	ucmdbuf->uscsi_bufaddr = (caddr_t)bufaddr;
20842 	ucmdbuf->uscsi_buflen = buflen;
20843 	ucmdbuf->uscsi_timeout = sd_io_time;
20844 	ucmdbuf->uscsi_rqbuf = (caddr_t)rqbuf;
20845 	ucmdbuf->uscsi_rqlen = rqbuflen;
20846 	ucmdbuf->uscsi_flags = USCSI_RQENABLE | USCSI_SILENT | USCSI_READ;
20847 
20848 	status = sd_ssc_send(ssc, ucmdbuf, FKIOCTL,
20849 	    UIO_SYSSPACE, path_flag);
20850 
20851 	switch (status) {
20852 	case 0:
20853 		sd_ssc_assessment(ssc, SD_FMT_STANDARD);
20854 		break;  /* Success! */
20855 	case EIO:
20856 		switch (ucmdbuf->uscsi_status) {
20857 		case STATUS_RESERVATION_CONFLICT:
20858 			status = EACCES;
20859 			break;
20860 		default:
20861 			break;
20862 		}
20863 		break;
20864 	default:
20865 		break;
20866 	}
20867 
20868 	if (status == 0) {
20869 		SD_DUMP_MEMORY(un, SD_LOG_IO,
20870 		    "sd_send_scsi_GET_CONFIGURATION: data",
20871 		    (uchar_t *)bufaddr, SD_PROFILE_HEADER_LEN, SD_LOG_HEX);
20872 	}
20873 
20874 	SD_TRACE(SD_LOG_IO, un,
20875 	    "sd_send_scsi_GET_CONFIGURATION: exit\n");
20876 
20877 	return (status);
20878 }
20879 
20880 /*
20881  *    Function: sd_send_scsi_feature_GET_CONFIGURATION
20882  *
20883  * Description: Issues the get configuration command to the device to
20884  *              retrieve a specific feature. Called from
20885  *		sd_check_for_writable_cd & sd_set_mmc_caps.
20886  *   Arguments: ssc
20887  *              ucmdbuf
20888  *              rqbuf
20889  *              rqbuflen
20890  *              bufaddr
20891  *              buflen
20892  *		feature
20893  *
20894  * Return Code: 0   - Success
20895  *              errno return code from sd_ssc_send()
20896  *
20897  *     Context: Can sleep. Does not return until command is completed.
20898  *
20899  */
20900 static int
20901 sd_send_scsi_feature_GET_CONFIGURATION(sd_ssc_t *ssc, struct uscsi_cmd *ucmdbuf,
20902     uchar_t *rqbuf, uint_t rqbuflen, uchar_t *bufaddr, uint_t buflen,
20903     char feature, int path_flag)
20904 {
20905 	char    cdb[CDB_GROUP1];
20906 	int	status;
20907 	struct sd_lun	*un;
20908 
20909 	ASSERT(ssc != NULL);
20910 	un = ssc->ssc_un;
20911 	ASSERT(un != NULL);
20912 	ASSERT(!mutex_owned(SD_MUTEX(un)));
20913 	ASSERT(bufaddr != NULL);
20914 	ASSERT(ucmdbuf != NULL);
20915 	ASSERT(rqbuf != NULL);
20916 
20917 	SD_TRACE(SD_LOG_IO, un,
20918 	    "sd_send_scsi_feature_GET_CONFIGURATION: entry: un:0x%p\n", un);
20919 
20920 	bzero(cdb, sizeof (cdb));
20921 	bzero(ucmdbuf, sizeof (struct uscsi_cmd));
20922 	bzero(rqbuf, rqbuflen);
20923 	bzero(bufaddr, buflen);
20924 
20925 	/*
20926 	 * Set up cdb field for the get configuration command.
20927 	 */
20928 	cdb[0] = SCMD_GET_CONFIGURATION;
20929 	cdb[1] = 0x02;  /* Requested Type */
20930 	cdb[3] = feature;
20931 	cdb[8] = buflen;
20932 	ucmdbuf->uscsi_cdb = cdb;
20933 	ucmdbuf->uscsi_cdblen = CDB_GROUP1;
20934 	ucmdbuf->uscsi_bufaddr = (caddr_t)bufaddr;
20935 	ucmdbuf->uscsi_buflen = buflen;
20936 	ucmdbuf->uscsi_timeout = sd_io_time;
20937 	ucmdbuf->uscsi_rqbuf = (caddr_t)rqbuf;
20938 	ucmdbuf->uscsi_rqlen = rqbuflen;
20939 	ucmdbuf->uscsi_flags = USCSI_RQENABLE | USCSI_SILENT | USCSI_READ;
20940 
20941 	status = sd_ssc_send(ssc, ucmdbuf, FKIOCTL,
20942 	    UIO_SYSSPACE, path_flag);
20943 
20944 	switch (status) {
20945 	case 0:
20946 
20947 		break;  /* Success! */
20948 	case EIO:
20949 		switch (ucmdbuf->uscsi_status) {
20950 		case STATUS_RESERVATION_CONFLICT:
20951 			status = EACCES;
20952 			break;
20953 		default:
20954 			break;
20955 		}
20956 		break;
20957 	default:
20958 		break;
20959 	}
20960 
20961 	if (status == 0) {
20962 		SD_DUMP_MEMORY(un, SD_LOG_IO,
20963 		    "sd_send_scsi_feature_GET_CONFIGURATION: data",
20964 		    (uchar_t *)bufaddr, SD_PROFILE_HEADER_LEN, SD_LOG_HEX);
20965 	}
20966 
20967 	SD_TRACE(SD_LOG_IO, un,
20968 	    "sd_send_scsi_feature_GET_CONFIGURATION: exit\n");
20969 
20970 	return (status);
20971 }
20972 
20973 
20974 /*
20975  *    Function: sd_send_scsi_MODE_SENSE
20976  *
20977  * Description: Utility function for issuing a scsi MODE SENSE command.
20978  *		Note: This routine uses a consistent implementation for Group0,
20979  *		Group1, and Group2 commands across all platforms. ATAPI devices
20980  *		use Group 1 Read/Write commands and Group 2 Mode Sense/Select
20981  *
20982  *   Arguments: ssc   - ssc contains pointer to driver soft state (unit)
20983  *                      structure for this target.
20984  *		cdbsize - size CDB to be used (CDB_GROUP0 (6 byte), or
20985  *			  CDB_GROUP[1|2] (10 byte).
20986  *		bufaddr - buffer for page data retrieved from the target.
20987  *		buflen - size of page to be retrieved.
20988  *		page_code - page code of data to be retrieved from the target.
20989  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
20990  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
20991  *			to use the USCSI "direct" chain and bypass the normal
20992  *			command waitq.
20993  *
20994  * Return Code: 0   - Success
20995  *		errno return code from sd_ssc_send()
20996  *
20997  *     Context: Can sleep. Does not return until command is completed.
20998  */
20999 
21000 static int
21001 sd_send_scsi_MODE_SENSE(sd_ssc_t *ssc, int cdbsize, uchar_t *bufaddr,
21002     size_t buflen,  uchar_t page_code, int path_flag)
21003 {
21004 	struct	scsi_extended_sense	sense_buf;
21005 	union scsi_cdb		cdb;
21006 	struct uscsi_cmd	ucmd_buf;
21007 	int			status;
21008 	int			headlen;
21009 	struct sd_lun		*un;
21010 
21011 	ASSERT(ssc != NULL);
21012 	un = ssc->ssc_un;
21013 	ASSERT(un != NULL);
21014 	ASSERT(!mutex_owned(SD_MUTEX(un)));
21015 	ASSERT(bufaddr != NULL);
21016 	ASSERT((cdbsize == CDB_GROUP0) || (cdbsize == CDB_GROUP1) ||
21017 	    (cdbsize == CDB_GROUP2));
21018 
21019 	SD_TRACE(SD_LOG_IO, un,
21020 	    "sd_send_scsi_MODE_SENSE: entry: un:0x%p\n", un);
21021 
21022 	bzero(&cdb, sizeof (cdb));
21023 	bzero(&ucmd_buf, sizeof (ucmd_buf));
21024 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
21025 	bzero(bufaddr, buflen);
21026 
21027 	if (cdbsize == CDB_GROUP0) {
21028 		cdb.scc_cmd = SCMD_MODE_SENSE;
21029 		cdb.cdb_opaque[2] = page_code;
21030 		FORMG0COUNT(&cdb, buflen);
21031 		headlen = MODE_HEADER_LENGTH;
21032 	} else {
21033 		cdb.scc_cmd = SCMD_MODE_SENSE_G1;
21034 		cdb.cdb_opaque[2] = page_code;
21035 		FORMG1COUNT(&cdb, buflen);
21036 		headlen = MODE_HEADER_LENGTH_GRP2;
21037 	}
21038 
21039 	ASSERT(headlen <= buflen);
21040 	SD_FILL_SCSI1_LUN_CDB(un, &cdb);
21041 
21042 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
21043 	ucmd_buf.uscsi_cdblen	= (uchar_t)cdbsize;
21044 	ucmd_buf.uscsi_bufaddr	= (caddr_t)bufaddr;
21045 	ucmd_buf.uscsi_buflen	= buflen;
21046 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
21047 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
21048 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
21049 	ucmd_buf.uscsi_timeout	= 60;
21050 
21051 	status = sd_ssc_send(ssc, &ucmd_buf, FKIOCTL,
21052 	    UIO_SYSSPACE, path_flag);
21053 
21054 	switch (status) {
21055 	case 0:
21056 		/*
21057 		 * sr_check_wp() uses 0x3f page code and check the header of
21058 		 * mode page to determine if target device is write-protected.
21059 		 * But some USB devices return 0 bytes for 0x3f page code. For
21060 		 * this case, make sure that mode page header is returned at
21061 		 * least.
21062 		 */
21063 		if (buflen - ucmd_buf.uscsi_resid <  headlen) {
21064 			status = EIO;
21065 			sd_ssc_set_info(ssc, SSC_FLAGS_INVALID_DATA, -1,
21066 			    "mode page header is not returned");
21067 		}
21068 		break;	/* Success! */
21069 	case EIO:
21070 		switch (ucmd_buf.uscsi_status) {
21071 		case STATUS_RESERVATION_CONFLICT:
21072 			status = EACCES;
21073 			break;
21074 		default:
21075 			break;
21076 		}
21077 		break;
21078 	default:
21079 		break;
21080 	}
21081 
21082 	if (status == 0) {
21083 		SD_DUMP_MEMORY(un, SD_LOG_IO, "sd_send_scsi_MODE_SENSE: data",
21084 		    (uchar_t *)bufaddr, buflen, SD_LOG_HEX);
21085 	}
21086 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_MODE_SENSE: exit\n");
21087 
21088 	return (status);
21089 }
21090 
21091 
21092 /*
21093  *    Function: sd_send_scsi_MODE_SELECT
21094  *
21095  * Description: Utility function for issuing a scsi MODE SELECT command.
21096  *		Note: This routine uses a consistent implementation for Group0,
21097  *		Group1, and Group2 commands across all platforms. ATAPI devices
21098  *		use Group 1 Read/Write commands and Group 2 Mode Sense/Select
21099  *
21100  *   Arguments: ssc   - ssc contains pointer to driver soft state (unit)
21101  *                      structure for this target.
21102  *		cdbsize - size CDB to be used (CDB_GROUP0 (6 byte), or
21103  *			  CDB_GROUP[1|2] (10 byte).
21104  *		bufaddr - buffer for page data retrieved from the target.
21105  *		buflen - size of page to be retrieved.
21106  *		save_page - boolean to determin if SP bit should be set.
21107  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
21108  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
21109  *			to use the USCSI "direct" chain and bypass the normal
21110  *			command waitq.
21111  *
21112  * Return Code: 0   - Success
21113  *		errno return code from sd_ssc_send()
21114  *
21115  *     Context: Can sleep. Does not return until command is completed.
21116  */
21117 
21118 static int
21119 sd_send_scsi_MODE_SELECT(sd_ssc_t *ssc, int cdbsize, uchar_t *bufaddr,
21120     size_t buflen,  uchar_t save_page, int path_flag)
21121 {
21122 	struct	scsi_extended_sense	sense_buf;
21123 	union scsi_cdb		cdb;
21124 	struct uscsi_cmd	ucmd_buf;
21125 	int			status;
21126 	struct sd_lun		*un;
21127 
21128 	ASSERT(ssc != NULL);
21129 	un = ssc->ssc_un;
21130 	ASSERT(un != NULL);
21131 	ASSERT(!mutex_owned(SD_MUTEX(un)));
21132 	ASSERT(bufaddr != NULL);
21133 	ASSERT((cdbsize == CDB_GROUP0) || (cdbsize == CDB_GROUP1) ||
21134 	    (cdbsize == CDB_GROUP2));
21135 
21136 	SD_TRACE(SD_LOG_IO, un,
21137 	    "sd_send_scsi_MODE_SELECT: entry: un:0x%p\n", un);
21138 
21139 	bzero(&cdb, sizeof (cdb));
21140 	bzero(&ucmd_buf, sizeof (ucmd_buf));
21141 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
21142 
21143 	/* Set the PF bit for many third party drives */
21144 	cdb.cdb_opaque[1] = 0x10;
21145 
21146 	/* Set the savepage(SP) bit if given */
21147 	if (save_page == SD_SAVE_PAGE) {
21148 		cdb.cdb_opaque[1] |= 0x01;
21149 	}
21150 
21151 	if (cdbsize == CDB_GROUP0) {
21152 		cdb.scc_cmd = SCMD_MODE_SELECT;
21153 		FORMG0COUNT(&cdb, buflen);
21154 	} else {
21155 		cdb.scc_cmd = SCMD_MODE_SELECT_G1;
21156 		FORMG1COUNT(&cdb, buflen);
21157 	}
21158 
21159 	SD_FILL_SCSI1_LUN_CDB(un, &cdb);
21160 
21161 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
21162 	ucmd_buf.uscsi_cdblen	= (uchar_t)cdbsize;
21163 	ucmd_buf.uscsi_bufaddr	= (caddr_t)bufaddr;
21164 	ucmd_buf.uscsi_buflen	= buflen;
21165 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
21166 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
21167 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_WRITE | USCSI_SILENT;
21168 	ucmd_buf.uscsi_timeout	= 60;
21169 
21170 	status = sd_ssc_send(ssc, &ucmd_buf, FKIOCTL,
21171 	    UIO_SYSSPACE, path_flag);
21172 
21173 	switch (status) {
21174 	case 0:
21175 		sd_ssc_assessment(ssc, SD_FMT_STANDARD);
21176 		break;	/* Success! */
21177 	case EIO:
21178 		switch (ucmd_buf.uscsi_status) {
21179 		case STATUS_RESERVATION_CONFLICT:
21180 			status = EACCES;
21181 			break;
21182 		default:
21183 			break;
21184 		}
21185 		break;
21186 	default:
21187 		break;
21188 	}
21189 
21190 	if (status == 0) {
21191 		SD_DUMP_MEMORY(un, SD_LOG_IO, "sd_send_scsi_MODE_SELECT: data",
21192 		    (uchar_t *)bufaddr, buflen, SD_LOG_HEX);
21193 	}
21194 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_MODE_SELECT: exit\n");
21195 
21196 	return (status);
21197 }
21198 
21199 
21200 /*
21201  *    Function: sd_send_scsi_RDWR
21202  *
21203  * Description: Issue a scsi READ or WRITE command with the given parameters.
21204  *
21205  *   Arguments: ssc   - ssc contains pointer to driver soft state (unit)
21206  *                      structure for this target.
21207  *		cmd:	 SCMD_READ or SCMD_WRITE
21208  *		bufaddr: Address of caller's buffer to receive the RDWR data
21209  *		buflen:  Length of caller's buffer receive the RDWR data.
21210  *		start_block: Block number for the start of the RDWR operation.
21211  *			 (Assumes target-native block size.)
21212  *		residp:  Pointer to variable to receive the redisual of the
21213  *			 RDWR operation (may be NULL of no residual requested).
21214  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
21215  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
21216  *			to use the USCSI "direct" chain and bypass the normal
21217  *			command waitq.
21218  *
21219  * Return Code: 0   - Success
21220  *		errno return code from sd_ssc_send()
21221  *
21222  *     Context: Can sleep. Does not return until command is completed.
21223  */
21224 
21225 static int
21226 sd_send_scsi_RDWR(sd_ssc_t *ssc, uchar_t cmd, void *bufaddr,
21227     size_t buflen, daddr_t start_block, int path_flag)
21228 {
21229 	struct	scsi_extended_sense	sense_buf;
21230 	union scsi_cdb		cdb;
21231 	struct uscsi_cmd	ucmd_buf;
21232 	uint32_t		block_count;
21233 	int			status;
21234 	int			cdbsize;
21235 	uchar_t			flag;
21236 	struct sd_lun		*un;
21237 
21238 	ASSERT(ssc != NULL);
21239 	un = ssc->ssc_un;
21240 	ASSERT(un != NULL);
21241 	ASSERT(!mutex_owned(SD_MUTEX(un)));
21242 	ASSERT(bufaddr != NULL);
21243 	ASSERT((cmd == SCMD_READ) || (cmd == SCMD_WRITE));
21244 
21245 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_RDWR: entry: un:0x%p\n", un);
21246 
21247 	if (un->un_f_tgt_blocksize_is_valid != TRUE) {
21248 		return (EINVAL);
21249 	}
21250 
21251 	mutex_enter(SD_MUTEX(un));
21252 	block_count = SD_BYTES2TGTBLOCKS(un, buflen);
21253 	mutex_exit(SD_MUTEX(un));
21254 
21255 	flag = (cmd == SCMD_READ) ? USCSI_READ : USCSI_WRITE;
21256 
21257 	SD_INFO(SD_LOG_IO, un, "sd_send_scsi_RDWR: "
21258 	    "bufaddr:0x%p buflen:0x%x start_block:0x%p block_count:0x%x\n",
21259 	    bufaddr, buflen, start_block, block_count);
21260 
21261 	bzero(&cdb, sizeof (cdb));
21262 	bzero(&ucmd_buf, sizeof (ucmd_buf));
21263 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
21264 
21265 	/* Compute CDB size to use */
21266 	if (start_block > 0xffffffff)
21267 		cdbsize = CDB_GROUP4;
21268 	else if ((start_block & 0xFFE00000) ||
21269 	    (un->un_f_cfg_is_atapi == TRUE))
21270 		cdbsize = CDB_GROUP1;
21271 	else
21272 		cdbsize = CDB_GROUP0;
21273 
21274 	switch (cdbsize) {
21275 	case CDB_GROUP0:	/* 6-byte CDBs */
21276 		cdb.scc_cmd = cmd;
21277 		FORMG0ADDR(&cdb, start_block);
21278 		FORMG0COUNT(&cdb, block_count);
21279 		break;
21280 	case CDB_GROUP1:	/* 10-byte CDBs */
21281 		cdb.scc_cmd = cmd | SCMD_GROUP1;
21282 		FORMG1ADDR(&cdb, start_block);
21283 		FORMG1COUNT(&cdb, block_count);
21284 		break;
21285 	case CDB_GROUP4:	/* 16-byte CDBs */
21286 		cdb.scc_cmd = cmd | SCMD_GROUP4;
21287 		FORMG4LONGADDR(&cdb, (uint64_t)start_block);
21288 		FORMG4COUNT(&cdb, block_count);
21289 		break;
21290 	case CDB_GROUP5:	/* 12-byte CDBs (currently unsupported) */
21291 	default:
21292 		/* All others reserved */
21293 		return (EINVAL);
21294 	}
21295 
21296 	/* Set LUN bit(s) in CDB if this is a SCSI-1 device */
21297 	SD_FILL_SCSI1_LUN_CDB(un, &cdb);
21298 
21299 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
21300 	ucmd_buf.uscsi_cdblen	= (uchar_t)cdbsize;
21301 	ucmd_buf.uscsi_bufaddr	= bufaddr;
21302 	ucmd_buf.uscsi_buflen	= buflen;
21303 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
21304 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
21305 	ucmd_buf.uscsi_flags	= flag | USCSI_RQENABLE | USCSI_SILENT;
21306 	ucmd_buf.uscsi_timeout	= 60;
21307 	status = sd_ssc_send(ssc, &ucmd_buf, FKIOCTL,
21308 	    UIO_SYSSPACE, path_flag);
21309 
21310 	switch (status) {
21311 	case 0:
21312 		sd_ssc_assessment(ssc, SD_FMT_STANDARD);
21313 		break;	/* Success! */
21314 	case EIO:
21315 		switch (ucmd_buf.uscsi_status) {
21316 		case STATUS_RESERVATION_CONFLICT:
21317 			status = EACCES;
21318 			break;
21319 		default:
21320 			break;
21321 		}
21322 		break;
21323 	default:
21324 		break;
21325 	}
21326 
21327 	if (status == 0) {
21328 		SD_DUMP_MEMORY(un, SD_LOG_IO, "sd_send_scsi_RDWR: data",
21329 		    (uchar_t *)bufaddr, buflen, SD_LOG_HEX);
21330 	}
21331 
21332 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_RDWR: exit\n");
21333 
21334 	return (status);
21335 }
21336 
21337 
21338 /*
21339  *    Function: sd_send_scsi_LOG_SENSE
21340  *
21341  * Description: Issue a scsi LOG_SENSE command with the given parameters.
21342  *
21343  *   Arguments: ssc   - ssc contains pointer to driver soft state (unit)
21344  *                      structure for this target.
21345  *
21346  * Return Code: 0   - Success
21347  *		errno return code from sd_ssc_send()
21348  *
21349  *     Context: Can sleep. Does not return until command is completed.
21350  */
21351 
21352 static int
21353 sd_send_scsi_LOG_SENSE(sd_ssc_t *ssc, uchar_t *bufaddr, uint16_t buflen,
21354     uchar_t page_code, uchar_t page_control, uint16_t param_ptr, int path_flag)
21355 {
21356 	struct scsi_extended_sense	sense_buf;
21357 	union scsi_cdb		cdb;
21358 	struct uscsi_cmd	ucmd_buf;
21359 	int			status;
21360 	struct sd_lun		*un;
21361 
21362 	ASSERT(ssc != NULL);
21363 	un = ssc->ssc_un;
21364 	ASSERT(un != NULL);
21365 	ASSERT(!mutex_owned(SD_MUTEX(un)));
21366 
21367 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_LOG_SENSE: entry: un:0x%p\n", un);
21368 
21369 	bzero(&cdb, sizeof (cdb));
21370 	bzero(&ucmd_buf, sizeof (ucmd_buf));
21371 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
21372 
21373 	cdb.scc_cmd = SCMD_LOG_SENSE_G1;
21374 	cdb.cdb_opaque[2] = (page_control << 6) | page_code;
21375 	cdb.cdb_opaque[5] = (uchar_t)((param_ptr & 0xFF00) >> 8);
21376 	cdb.cdb_opaque[6] = (uchar_t)(param_ptr  & 0x00FF);
21377 	FORMG1COUNT(&cdb, buflen);
21378 
21379 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
21380 	ucmd_buf.uscsi_cdblen	= CDB_GROUP1;
21381 	ucmd_buf.uscsi_bufaddr	= (caddr_t)bufaddr;
21382 	ucmd_buf.uscsi_buflen	= buflen;
21383 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
21384 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
21385 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
21386 	ucmd_buf.uscsi_timeout	= 60;
21387 
21388 	status = sd_ssc_send(ssc, &ucmd_buf, FKIOCTL,
21389 	    UIO_SYSSPACE, path_flag);
21390 
21391 	switch (status) {
21392 	case 0:
21393 		break;
21394 	case EIO:
21395 		switch (ucmd_buf.uscsi_status) {
21396 		case STATUS_RESERVATION_CONFLICT:
21397 			status = EACCES;
21398 			break;
21399 		case STATUS_CHECK:
21400 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
21401 			    (scsi_sense_key((uint8_t *)&sense_buf) ==
21402 			    KEY_ILLEGAL_REQUEST) &&
21403 			    (scsi_sense_asc((uint8_t *)&sense_buf) == 0x24)) {
21404 				/*
21405 				 * ASC 0x24: INVALID FIELD IN CDB
21406 				 */
21407 				switch (page_code) {
21408 				case START_STOP_CYCLE_PAGE:
21409 					/*
21410 					 * The start stop cycle counter is
21411 					 * implemented as page 0x31 in earlier
21412 					 * generation disks. In new generation
21413 					 * disks the start stop cycle counter is
21414 					 * implemented as page 0xE. To properly
21415 					 * handle this case if an attempt for
21416 					 * log page 0xE is made and fails we
21417 					 * will try again using page 0x31.
21418 					 *
21419 					 * Network storage BU committed to
21420 					 * maintain the page 0x31 for this
21421 					 * purpose and will not have any other
21422 					 * page implemented with page code 0x31
21423 					 * until all disks transition to the
21424 					 * standard page.
21425 					 */
21426 					mutex_enter(SD_MUTEX(un));
21427 					un->un_start_stop_cycle_page =
21428 					    START_STOP_CYCLE_VU_PAGE;
21429 					cdb.cdb_opaque[2] =
21430 					    (char)(page_control << 6) |
21431 					    un->un_start_stop_cycle_page;
21432 					mutex_exit(SD_MUTEX(un));
21433 					sd_ssc_assessment(ssc, SD_FMT_IGNORE);
21434 					status = sd_ssc_send(
21435 					    ssc, &ucmd_buf, FKIOCTL,
21436 					    UIO_SYSSPACE, path_flag);
21437 
21438 					break;
21439 				case TEMPERATURE_PAGE:
21440 					status = ENOTTY;
21441 					break;
21442 				default:
21443 					break;
21444 				}
21445 			}
21446 			break;
21447 		default:
21448 			break;
21449 		}
21450 		break;
21451 	default:
21452 		break;
21453 	}
21454 
21455 	if (status == 0) {
21456 		sd_ssc_assessment(ssc, SD_FMT_STANDARD);
21457 		SD_DUMP_MEMORY(un, SD_LOG_IO, "sd_send_scsi_LOG_SENSE: data",
21458 		    (uchar_t *)bufaddr, buflen, SD_LOG_HEX);
21459 	}
21460 
21461 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_LOG_SENSE: exit\n");
21462 
21463 	return (status);
21464 }
21465 
21466 
21467 /*
21468  *    Function: sd_send_scsi_GET_EVENT_STATUS_NOTIFICATION
21469  *
21470  * Description: Issue the scsi GET EVENT STATUS NOTIFICATION command.
21471  *
21472  *   Arguments: ssc   - ssc contains pointer to driver soft state (unit)
21473  *                      structure for this target.
21474  *		bufaddr
21475  *		buflen
21476  *		class_req
21477  *
21478  * Return Code: 0   - Success
21479  *		errno return code from sd_ssc_send()
21480  *
21481  *     Context: Can sleep. Does not return until command is completed.
21482  */
21483 
21484 static int
21485 sd_send_scsi_GET_EVENT_STATUS_NOTIFICATION(sd_ssc_t *ssc, uchar_t *bufaddr,
21486     size_t buflen, uchar_t class_req)
21487 {
21488 	union scsi_cdb		cdb;
21489 	struct uscsi_cmd	ucmd_buf;
21490 	int			status;
21491 	struct sd_lun		*un;
21492 
21493 	ASSERT(ssc != NULL);
21494 	un = ssc->ssc_un;
21495 	ASSERT(un != NULL);
21496 	ASSERT(!mutex_owned(SD_MUTEX(un)));
21497 	ASSERT(bufaddr != NULL);
21498 
21499 	SD_TRACE(SD_LOG_IO, un,
21500 	    "sd_send_scsi_GET_EVENT_STATUS_NOTIFICATION: entry: un:0x%p\n", un);
21501 
21502 	bzero(&cdb, sizeof (cdb));
21503 	bzero(&ucmd_buf, sizeof (ucmd_buf));
21504 	bzero(bufaddr, buflen);
21505 
21506 	cdb.scc_cmd = SCMD_GET_EVENT_STATUS_NOTIFICATION;
21507 	cdb.cdb_opaque[1] = 1; /* polled */
21508 	cdb.cdb_opaque[4] = class_req;
21509 	FORMG1COUNT(&cdb, buflen);
21510 
21511 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
21512 	ucmd_buf.uscsi_cdblen	= CDB_GROUP1;
21513 	ucmd_buf.uscsi_bufaddr	= (caddr_t)bufaddr;
21514 	ucmd_buf.uscsi_buflen	= buflen;
21515 	ucmd_buf.uscsi_rqbuf	= NULL;
21516 	ucmd_buf.uscsi_rqlen	= 0;
21517 	ucmd_buf.uscsi_flags	= USCSI_READ | USCSI_SILENT;
21518 	ucmd_buf.uscsi_timeout	= 60;
21519 
21520 	status = sd_ssc_send(ssc, &ucmd_buf, FKIOCTL,
21521 	    UIO_SYSSPACE, SD_PATH_DIRECT);
21522 
21523 	/*
21524 	 * Only handle status == 0, the upper-level caller
21525 	 * will put different assessment based on the context.
21526 	 */
21527 	if (status == 0) {
21528 		sd_ssc_assessment(ssc, SD_FMT_STANDARD);
21529 
21530 		if (ucmd_buf.uscsi_resid != 0) {
21531 			status = EIO;
21532 		}
21533 	}
21534 
21535 	SD_TRACE(SD_LOG_IO, un,
21536 	    "sd_send_scsi_GET_EVENT_STATUS_NOTIFICATION: exit\n");
21537 
21538 	return (status);
21539 }
21540 
21541 
21542 static boolean_t
21543 sd_gesn_media_data_valid(uchar_t *data)
21544 {
21545 	uint16_t			len;
21546 
21547 	len = (data[1] << 8) | data[0];
21548 	return ((len >= 6) &&
21549 	    ((data[2] & SD_GESN_HEADER_NEA) == 0) &&
21550 	    ((data[2] & SD_GESN_HEADER_CLASS) == SD_GESN_MEDIA_CLASS) &&
21551 	    ((data[3] & (1 << SD_GESN_MEDIA_CLASS)) != 0));
21552 }
21553 
21554 
21555 /*
21556  *    Function: sdioctl
21557  *
21558  * Description: Driver's ioctl(9e) entry point function.
21559  *
21560  *   Arguments: dev     - device number
21561  *		cmd     - ioctl operation to be performed
21562  *		arg     - user argument, contains data to be set or reference
21563  *			  parameter for get
21564  *		flag    - bit flag, indicating open settings, 32/64 bit type
21565  *		cred_p  - user credential pointer
21566  *		rval_p  - calling process return value (OPT)
21567  *
21568  * Return Code: EINVAL
21569  *		ENOTTY
21570  *		ENXIO
21571  *		EIO
21572  *		EFAULT
21573  *		ENOTSUP
21574  *		EPERM
21575  *
21576  *     Context: Called from the device switch at normal priority.
21577  */
21578 
21579 static int
21580 sdioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cred_p, int *rval_p)
21581 {
21582 	struct sd_lun	*un = NULL;
21583 	int		err = 0;
21584 	int		i = 0;
21585 	cred_t		*cr;
21586 	int		tmprval = EINVAL;
21587 	boolean_t	is_valid;
21588 	sd_ssc_t	*ssc;
21589 
21590 	/*
21591 	 * All device accesses go thru sdstrategy where we check on suspend
21592 	 * status
21593 	 */
21594 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
21595 		return (ENXIO);
21596 	}
21597 
21598 	ASSERT(!mutex_owned(SD_MUTEX(un)));
21599 
21600 	/* Initialize sd_ssc_t for internal uscsi commands */
21601 	ssc = sd_ssc_init(un);
21602 
21603 	is_valid = SD_IS_VALID_LABEL(un);
21604 
21605 	/*
21606 	 * Moved this wait from sd_uscsi_strategy to here for
21607 	 * reasons of deadlock prevention. Internal driver commands,
21608 	 * specifically those to change a devices power level, result
21609 	 * in a call to sd_uscsi_strategy.
21610 	 */
21611 	mutex_enter(SD_MUTEX(un));
21612 	while ((un->un_state == SD_STATE_SUSPENDED) ||
21613 	    (un->un_state == SD_STATE_PM_CHANGING)) {
21614 		cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
21615 	}
21616 	/*
21617 	 * Twiddling the counter here protects commands from now
21618 	 * through to the top of sd_uscsi_strategy. Without the
21619 	 * counter inc. a power down, for example, could get in
21620 	 * after the above check for state is made and before
21621 	 * execution gets to the top of sd_uscsi_strategy.
21622 	 * That would cause problems.
21623 	 */
21624 	un->un_ncmds_in_driver++;
21625 
21626 	if (!is_valid &&
21627 	    (flag & (FNDELAY | FNONBLOCK))) {
21628 		switch (cmd) {
21629 		case DKIOCGGEOM:	/* SD_PATH_DIRECT */
21630 		case DKIOCGVTOC:
21631 		case DKIOCGEXTVTOC:
21632 		case DKIOCGAPART:
21633 		case DKIOCPARTINFO:
21634 		case DKIOCEXTPARTINFO:
21635 		case DKIOCSGEOM:
21636 		case DKIOCSAPART:
21637 		case DKIOCGETEFI:
21638 		case DKIOCPARTITION:
21639 		case DKIOCSVTOC:
21640 		case DKIOCSEXTVTOC:
21641 		case DKIOCSETEFI:
21642 		case DKIOCGMBOOT:
21643 		case DKIOCSMBOOT:
21644 		case DKIOCG_PHYGEOM:
21645 		case DKIOCG_VIRTGEOM:
21646 #if defined(__x86)
21647 		case DKIOCSETEXTPART:
21648 #endif
21649 			/* let cmlb handle it */
21650 			goto skip_ready_valid;
21651 
21652 		case CDROMPAUSE:
21653 		case CDROMRESUME:
21654 		case CDROMPLAYMSF:
21655 		case CDROMPLAYTRKIND:
21656 		case CDROMREADTOCHDR:
21657 		case CDROMREADTOCENTRY:
21658 		case CDROMSTOP:
21659 		case CDROMSTART:
21660 		case CDROMVOLCTRL:
21661 		case CDROMSUBCHNL:
21662 		case CDROMREADMODE2:
21663 		case CDROMREADMODE1:
21664 		case CDROMREADOFFSET:
21665 		case CDROMSBLKMODE:
21666 		case CDROMGBLKMODE:
21667 		case CDROMGDRVSPEED:
21668 		case CDROMSDRVSPEED:
21669 		case CDROMCDDA:
21670 		case CDROMCDXA:
21671 		case CDROMSUBCODE:
21672 			if (!ISCD(un)) {
21673 				un->un_ncmds_in_driver--;
21674 				ASSERT(un->un_ncmds_in_driver >= 0);
21675 				mutex_exit(SD_MUTEX(un));
21676 				err = ENOTTY;
21677 				goto done_without_assess;
21678 			}
21679 			break;
21680 		case FDEJECT:
21681 		case DKIOCEJECT:
21682 		case CDROMEJECT:
21683 			if (!un->un_f_eject_media_supported) {
21684 				un->un_ncmds_in_driver--;
21685 				ASSERT(un->un_ncmds_in_driver >= 0);
21686 				mutex_exit(SD_MUTEX(un));
21687 				err = ENOTTY;
21688 				goto done_without_assess;
21689 			}
21690 			break;
21691 		case DKIOCFLUSHWRITECACHE:
21692 			mutex_exit(SD_MUTEX(un));
21693 			err = sd_send_scsi_TEST_UNIT_READY(ssc, 0);
21694 			if (err != 0) {
21695 				mutex_enter(SD_MUTEX(un));
21696 				un->un_ncmds_in_driver--;
21697 				ASSERT(un->un_ncmds_in_driver >= 0);
21698 				mutex_exit(SD_MUTEX(un));
21699 				err = EIO;
21700 				goto done_quick_assess;
21701 			}
21702 			mutex_enter(SD_MUTEX(un));
21703 			/* FALLTHROUGH */
21704 		case DKIOCREMOVABLE:
21705 		case DKIOCHOTPLUGGABLE:
21706 		case DKIOCINFO:
21707 		case DKIOCGMEDIAINFO:
21708 		case DKIOCGMEDIAINFOEXT:
21709 		case DKIOCSOLIDSTATE:
21710 		case DKIOC_CANFREE:
21711 		case MHIOCENFAILFAST:
21712 		case MHIOCSTATUS:
21713 		case MHIOCTKOWN:
21714 		case MHIOCRELEASE:
21715 		case MHIOCGRP_INKEYS:
21716 		case MHIOCGRP_INRESV:
21717 		case MHIOCGRP_REGISTER:
21718 		case MHIOCGRP_CLEAR:
21719 		case MHIOCGRP_RESERVE:
21720 		case MHIOCGRP_PREEMPTANDABORT:
21721 		case MHIOCGRP_REGISTERANDIGNOREKEY:
21722 		case CDROMCLOSETRAY:
21723 		case USCSICMD:
21724 		case USCSIMAXXFER:
21725 			goto skip_ready_valid;
21726 		default:
21727 			break;
21728 		}
21729 
21730 		mutex_exit(SD_MUTEX(un));
21731 		err = sd_ready_and_valid(ssc, SDPART(dev));
21732 		mutex_enter(SD_MUTEX(un));
21733 
21734 		if (err != SD_READY_VALID) {
21735 			switch (cmd) {
21736 			case DKIOCSTATE:
21737 			case CDROMGDRVSPEED:
21738 			case CDROMSDRVSPEED:
21739 			case FDEJECT:	/* for eject command */
21740 			case DKIOCEJECT:
21741 			case CDROMEJECT:
21742 			case DKIOCREMOVABLE:
21743 			case DKIOCHOTPLUGGABLE:
21744 				break;
21745 			default:
21746 				if (un->un_f_has_removable_media) {
21747 					err = ENXIO;
21748 				} else {
21749 				/* Do not map SD_RESERVED_BY_OTHERS to EIO */
21750 					if (err == SD_RESERVED_BY_OTHERS) {
21751 						err = EACCES;
21752 					} else {
21753 						err = EIO;
21754 					}
21755 				}
21756 				un->un_ncmds_in_driver--;
21757 				ASSERT(un->un_ncmds_in_driver >= 0);
21758 				mutex_exit(SD_MUTEX(un));
21759 
21760 				goto done_without_assess;
21761 			}
21762 		}
21763 	}
21764 
21765 skip_ready_valid:
21766 	mutex_exit(SD_MUTEX(un));
21767 
21768 	switch (cmd) {
21769 	case DKIOCINFO:
21770 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCINFO\n");
21771 		err = sd_dkio_ctrl_info(dev, (caddr_t)arg, flag);
21772 		break;
21773 
21774 	case DKIOCGMEDIAINFO:
21775 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCGMEDIAINFO\n");
21776 		err = sd_get_media_info(dev, (caddr_t)arg, flag);
21777 		break;
21778 
21779 	case DKIOCGMEDIAINFOEXT:
21780 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCGMEDIAINFOEXT\n");
21781 		err = sd_get_media_info_ext(dev, (caddr_t)arg, flag);
21782 		break;
21783 
21784 	case DKIOCGGEOM:
21785 	case DKIOCGVTOC:
21786 	case DKIOCGEXTVTOC:
21787 	case DKIOCGAPART:
21788 	case DKIOCPARTINFO:
21789 	case DKIOCEXTPARTINFO:
21790 	case DKIOCSGEOM:
21791 	case DKIOCSAPART:
21792 	case DKIOCGETEFI:
21793 	case DKIOCPARTITION:
21794 	case DKIOCSVTOC:
21795 	case DKIOCSEXTVTOC:
21796 	case DKIOCSETEFI:
21797 	case DKIOCGMBOOT:
21798 	case DKIOCSMBOOT:
21799 	case DKIOCG_PHYGEOM:
21800 	case DKIOCG_VIRTGEOM:
21801 #if defined(__x86)
21802 	case DKIOCSETEXTPART:
21803 #endif
21804 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOC %d\n", cmd);
21805 
21806 		/* TUR should spin up */
21807 
21808 		if (un->un_f_has_removable_media)
21809 			err = sd_send_scsi_TEST_UNIT_READY(ssc,
21810 			    SD_CHECK_FOR_MEDIA);
21811 
21812 		else
21813 			err = sd_send_scsi_TEST_UNIT_READY(ssc, 0);
21814 
21815 		if (err != 0)
21816 			goto done_with_assess;
21817 
21818 		err = cmlb_ioctl(un->un_cmlbhandle, dev,
21819 		    cmd, arg, flag, cred_p, rval_p, (void *)SD_PATH_DIRECT);
21820 
21821 		if ((err == 0) &&
21822 		    ((cmd == DKIOCSETEFI) ||
21823 		    ((un->un_f_pkstats_enabled) &&
21824 		    (cmd == DKIOCSAPART || cmd == DKIOCSVTOC ||
21825 		    cmd == DKIOCSEXTVTOC)))) {
21826 
21827 			tmprval = cmlb_validate(un->un_cmlbhandle, CMLB_SILENT,
21828 			    (void *)SD_PATH_DIRECT);
21829 			if ((tmprval == 0) && un->un_f_pkstats_enabled) {
21830 				sd_set_pstats(un);
21831 				SD_TRACE(SD_LOG_IO_PARTITION, un,
21832 				    "sd_ioctl: un:0x%p pstats created and "
21833 				    "set\n", un);
21834 			}
21835 		}
21836 
21837 		if ((cmd == DKIOCSVTOC || cmd == DKIOCSEXTVTOC) ||
21838 		    ((cmd == DKIOCSETEFI) && (tmprval == 0))) {
21839 
21840 			mutex_enter(SD_MUTEX(un));
21841 			if (un->un_f_devid_supported &&
21842 			    (un->un_f_opt_fab_devid == TRUE)) {
21843 				if (un->un_devid == NULL) {
21844 					sd_register_devid(ssc, SD_DEVINFO(un),
21845 					    SD_TARGET_IS_UNRESERVED);
21846 				} else {
21847 					/*
21848 					 * The device id for this disk
21849 					 * has been fabricated. The
21850 					 * device id must be preserved
21851 					 * by writing it back out to
21852 					 * disk.
21853 					 */
21854 					if (sd_write_deviceid(ssc) != 0) {
21855 						ddi_devid_free(un->un_devid);
21856 						un->un_devid = NULL;
21857 					}
21858 				}
21859 			}
21860 			mutex_exit(SD_MUTEX(un));
21861 		}
21862 
21863 		break;
21864 
21865 	case DKIOCLOCK:
21866 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCLOCK\n");
21867 		err = sd_send_scsi_DOORLOCK(ssc, SD_REMOVAL_PREVENT,
21868 		    SD_PATH_STANDARD);
21869 		goto done_with_assess;
21870 
21871 	case DKIOCUNLOCK:
21872 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCUNLOCK\n");
21873 		err = sd_send_scsi_DOORLOCK(ssc, SD_REMOVAL_ALLOW,
21874 		    SD_PATH_STANDARD);
21875 		goto done_with_assess;
21876 
21877 	case DKIOCSTATE: {
21878 		enum dkio_state		state;
21879 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCSTATE\n");
21880 
21881 		if (ddi_copyin((void *)arg, &state, sizeof (int), flag) != 0) {
21882 			err = EFAULT;
21883 		} else {
21884 			err = sd_check_media(dev, state);
21885 			if (err == 0) {
21886 				if (ddi_copyout(&un->un_mediastate, (void *)arg,
21887 				    sizeof (int), flag) != 0)
21888 					err = EFAULT;
21889 			}
21890 		}
21891 		break;
21892 	}
21893 
21894 	case DKIOCREMOVABLE:
21895 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCREMOVABLE\n");
21896 		i = un->un_f_has_removable_media ? 1 : 0;
21897 		if (ddi_copyout(&i, (void *)arg, sizeof (int), flag) != 0) {
21898 			err = EFAULT;
21899 		} else {
21900 			err = 0;
21901 		}
21902 		break;
21903 
21904 	case DKIOCSOLIDSTATE:
21905 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCSOLIDSTATE\n");
21906 		i = un->un_f_is_solid_state ? 1 : 0;
21907 		if (ddi_copyout(&i, (void *)arg, sizeof (int), flag) != 0) {
21908 			err = EFAULT;
21909 		} else {
21910 			err = 0;
21911 		}
21912 		break;
21913 
21914 	case DKIOCHOTPLUGGABLE:
21915 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCHOTPLUGGABLE\n");
21916 		i = un->un_f_is_hotpluggable ? 1 : 0;
21917 		if (ddi_copyout(&i, (void *)arg, sizeof (int), flag) != 0) {
21918 			err = EFAULT;
21919 		} else {
21920 			err = 0;
21921 		}
21922 		break;
21923 
21924 	case DKIOCREADONLY:
21925 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCREADONLY\n");
21926 		i = 0;
21927 		if ((ISCD(un) && !un->un_f_mmc_writable_media) ||
21928 		    (sr_check_wp(dev) != 0)) {
21929 			i = 1;
21930 		}
21931 		if (ddi_copyout(&i, (void *)arg, sizeof (int), flag) != 0) {
21932 			err = EFAULT;
21933 		} else {
21934 			err = 0;
21935 		}
21936 		break;
21937 
21938 	case DKIOCGTEMPERATURE:
21939 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCGTEMPERATURE\n");
21940 		err = sd_dkio_get_temp(dev, (caddr_t)arg, flag);
21941 		break;
21942 
21943 	case MHIOCENFAILFAST:
21944 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCENFAILFAST\n");
21945 		if ((err = drv_priv(cred_p)) == 0) {
21946 			err = sd_mhdioc_failfast(dev, (caddr_t)arg, flag);
21947 		}
21948 		break;
21949 
21950 	case MHIOCTKOWN:
21951 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCTKOWN\n");
21952 		if ((err = drv_priv(cred_p)) == 0) {
21953 			err = sd_mhdioc_takeown(dev, (caddr_t)arg, flag);
21954 		}
21955 		break;
21956 
21957 	case MHIOCRELEASE:
21958 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCRELEASE\n");
21959 		if ((err = drv_priv(cred_p)) == 0) {
21960 			err = sd_mhdioc_release(dev);
21961 		}
21962 		break;
21963 
21964 	case MHIOCSTATUS:
21965 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCSTATUS\n");
21966 		if ((err = drv_priv(cred_p)) == 0) {
21967 			switch (sd_send_scsi_TEST_UNIT_READY(ssc, 0)) {
21968 			case 0:
21969 				err = 0;
21970 				break;
21971 			case EACCES:
21972 				*rval_p = 1;
21973 				err = 0;
21974 				sd_ssc_assessment(ssc, SD_FMT_IGNORE);
21975 				break;
21976 			default:
21977 				err = EIO;
21978 				goto done_with_assess;
21979 			}
21980 		}
21981 		break;
21982 
21983 	case MHIOCQRESERVE:
21984 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCQRESERVE\n");
21985 		if ((err = drv_priv(cred_p)) == 0) {
21986 			err = sd_reserve_release(dev, SD_RESERVE);
21987 		}
21988 		break;
21989 
21990 	case MHIOCREREGISTERDEVID:
21991 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCREREGISTERDEVID\n");
21992 		if (drv_priv(cred_p) == EPERM) {
21993 			err = EPERM;
21994 		} else if (!un->un_f_devid_supported) {
21995 			err = ENOTTY;
21996 		} else {
21997 			err = sd_mhdioc_register_devid(dev);
21998 		}
21999 		break;
22000 
22001 	case MHIOCGRP_INKEYS:
22002 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_INKEYS\n");
22003 		if (((err = drv_priv(cred_p)) != EPERM) &&
22004 		    arg != (intptr_t)NULL) {
22005 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
22006 				err = ENOTSUP;
22007 			} else {
22008 				err = sd_mhdioc_inkeys(dev, (caddr_t)arg,
22009 				    flag);
22010 			}
22011 		}
22012 		break;
22013 
22014 	case MHIOCGRP_INRESV:
22015 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_INRESV\n");
22016 		if (((err = drv_priv(cred_p)) != EPERM) &&
22017 		    arg != (intptr_t)NULL) {
22018 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
22019 				err = ENOTSUP;
22020 			} else {
22021 				err = sd_mhdioc_inresv(dev, (caddr_t)arg, flag);
22022 			}
22023 		}
22024 		break;
22025 
22026 	case MHIOCGRP_REGISTER:
22027 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_REGISTER\n");
22028 		if ((err = drv_priv(cred_p)) != EPERM) {
22029 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
22030 				err = ENOTSUP;
22031 			} else if (arg != (intptr_t)NULL) {
22032 				mhioc_register_t reg;
22033 				if (ddi_copyin((void *)arg, &reg,
22034 				    sizeof (mhioc_register_t), flag) != 0) {
22035 					err = EFAULT;
22036 				} else {
22037 					err =
22038 					    sd_send_scsi_PERSISTENT_RESERVE_OUT(
22039 					    ssc, SD_SCSI3_REGISTER,
22040 					    (uchar_t *)&reg);
22041 					if (err != 0)
22042 						goto done_with_assess;
22043 				}
22044 			}
22045 		}
22046 		break;
22047 
22048 	case MHIOCGRP_CLEAR:
22049 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_CLEAR\n");
22050 		if ((err = drv_priv(cred_p)) != EPERM) {
22051 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
22052 				err = ENOTSUP;
22053 			} else if (arg != (intptr_t)NULL) {
22054 				mhioc_register_t reg;
22055 				if (ddi_copyin((void *)arg, &reg,
22056 				    sizeof (mhioc_register_t), flag) != 0) {
22057 					err = EFAULT;
22058 				} else {
22059 					err =
22060 					    sd_send_scsi_PERSISTENT_RESERVE_OUT(
22061 					    ssc, SD_SCSI3_CLEAR,
22062 					    (uchar_t *)&reg);
22063 					if (err != 0)
22064 						goto done_with_assess;
22065 				}
22066 			}
22067 		}
22068 		break;
22069 
22070 	case MHIOCGRP_RESERVE:
22071 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_RESERVE\n");
22072 		if ((err = drv_priv(cred_p)) != EPERM) {
22073 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
22074 				err = ENOTSUP;
22075 			} else if (arg != (intptr_t)NULL) {
22076 				mhioc_resv_desc_t resv_desc;
22077 				if (ddi_copyin((void *)arg, &resv_desc,
22078 				    sizeof (mhioc_resv_desc_t), flag) != 0) {
22079 					err = EFAULT;
22080 				} else {
22081 					err =
22082 					    sd_send_scsi_PERSISTENT_RESERVE_OUT(
22083 					    ssc, SD_SCSI3_RESERVE,
22084 					    (uchar_t *)&resv_desc);
22085 					if (err != 0)
22086 						goto done_with_assess;
22087 				}
22088 			}
22089 		}
22090 		break;
22091 
22092 	case MHIOCGRP_PREEMPTANDABORT:
22093 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_PREEMPTANDABORT\n");
22094 		if ((err = drv_priv(cred_p)) != EPERM) {
22095 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
22096 				err = ENOTSUP;
22097 			} else if (arg != (intptr_t)NULL) {
22098 				mhioc_preemptandabort_t preempt_abort;
22099 				if (ddi_copyin((void *)arg, &preempt_abort,
22100 				    sizeof (mhioc_preemptandabort_t),
22101 				    flag) != 0) {
22102 					err = EFAULT;
22103 				} else {
22104 					err =
22105 					    sd_send_scsi_PERSISTENT_RESERVE_OUT(
22106 					    ssc, SD_SCSI3_PREEMPTANDABORT,
22107 					    (uchar_t *)&preempt_abort);
22108 					if (err != 0)
22109 						goto done_with_assess;
22110 				}
22111 			}
22112 		}
22113 		break;
22114 
22115 	case MHIOCGRP_REGISTERANDIGNOREKEY:
22116 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_REGISTERANDIGNOREKEY\n");
22117 		if ((err = drv_priv(cred_p)) != EPERM) {
22118 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
22119 				err = ENOTSUP;
22120 			} else if (arg != (intptr_t)NULL) {
22121 				mhioc_registerandignorekey_t r_and_i;
22122 				if (ddi_copyin((void *)arg, (void *)&r_and_i,
22123 				    sizeof (mhioc_registerandignorekey_t),
22124 				    flag) != 0) {
22125 					err = EFAULT;
22126 				} else {
22127 					err =
22128 					    sd_send_scsi_PERSISTENT_RESERVE_OUT(
22129 					    ssc, SD_SCSI3_REGISTERANDIGNOREKEY,
22130 					    (uchar_t *)&r_and_i);
22131 					if (err != 0)
22132 						goto done_with_assess;
22133 				}
22134 			}
22135 		}
22136 		break;
22137 
22138 	case USCSICMD:
22139 		SD_TRACE(SD_LOG_IOCTL, un, "USCSICMD\n");
22140 		cr = ddi_get_cred();
22141 		if ((drv_priv(cred_p) != 0) && (drv_priv(cr) != 0)) {
22142 			err = EPERM;
22143 		} else {
22144 			enum uio_seg	uioseg;
22145 
22146 			uioseg = (flag & FKIOCTL) ? UIO_SYSSPACE :
22147 			    UIO_USERSPACE;
22148 			if (un->un_f_format_in_progress == TRUE) {
22149 				err = EAGAIN;
22150 				break;
22151 			}
22152 
22153 			err = sd_ssc_send(ssc,
22154 			    (struct uscsi_cmd *)arg,
22155 			    flag, uioseg, SD_PATH_STANDARD);
22156 			if (err != 0)
22157 				goto done_with_assess;
22158 			else
22159 				sd_ssc_assessment(ssc, SD_FMT_STANDARD);
22160 		}
22161 		break;
22162 
22163 	case USCSIMAXXFER:
22164 		SD_TRACE(SD_LOG_IOCTL, un, "USCSIMAXXFER\n");
22165 		cr = ddi_get_cred();
22166 		if ((drv_priv(cred_p) != 0) && (drv_priv(cr) != 0)) {
22167 			err = EPERM;
22168 		} else {
22169 			const uscsi_xfer_t xfer = un->un_max_xfer_size;
22170 
22171 			if (ddi_copyout(&xfer, (void *)arg, sizeof (xfer),
22172 			    flag) != 0) {
22173 				err = EFAULT;
22174 			} else {
22175 				err = 0;
22176 			}
22177 		}
22178 		break;
22179 
22180 	case CDROMPAUSE:
22181 	case CDROMRESUME:
22182 		SD_TRACE(SD_LOG_IOCTL, un, "PAUSE-RESUME\n");
22183 		if (!ISCD(un)) {
22184 			err = ENOTTY;
22185 		} else {
22186 			err = sr_pause_resume(dev, cmd);
22187 		}
22188 		break;
22189 
22190 	case CDROMPLAYMSF:
22191 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMPLAYMSF\n");
22192 		if (!ISCD(un)) {
22193 			err = ENOTTY;
22194 		} else {
22195 			err = sr_play_msf(dev, (caddr_t)arg, flag);
22196 		}
22197 		break;
22198 
22199 	case CDROMPLAYTRKIND:
22200 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMPLAYTRKIND\n");
22201 #if defined(__x86)
22202 		/*
22203 		 * not supported on ATAPI CD drives, use CDROMPLAYMSF instead
22204 		 */
22205 		if (!ISCD(un) || (un->un_f_cfg_is_atapi == TRUE)) {
22206 #else
22207 		if (!ISCD(un)) {
22208 #endif
22209 			err = ENOTTY;
22210 		} else {
22211 			err = sr_play_trkind(dev, (caddr_t)arg, flag);
22212 		}
22213 		break;
22214 
22215 	case CDROMREADTOCHDR:
22216 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADTOCHDR\n");
22217 		if (!ISCD(un)) {
22218 			err = ENOTTY;
22219 		} else {
22220 			err = sr_read_tochdr(dev, (caddr_t)arg, flag);
22221 		}
22222 		break;
22223 
22224 	case CDROMREADTOCENTRY:
22225 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADTOCENTRY\n");
22226 		if (!ISCD(un)) {
22227 			err = ENOTTY;
22228 		} else {
22229 			err = sr_read_tocentry(dev, (caddr_t)arg, flag);
22230 		}
22231 		break;
22232 
22233 	case CDROMSTOP:
22234 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSTOP\n");
22235 		if (!ISCD(un)) {
22236 			err = ENOTTY;
22237 		} else {
22238 			err = sd_send_scsi_START_STOP_UNIT(ssc, SD_START_STOP,
22239 			    SD_TARGET_STOP, SD_PATH_STANDARD);
22240 			goto done_with_assess;
22241 		}
22242 		break;
22243 
22244 	case CDROMSTART:
22245 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSTART\n");
22246 		if (!ISCD(un)) {
22247 			err = ENOTTY;
22248 		} else {
22249 			err = sd_send_scsi_START_STOP_UNIT(ssc, SD_START_STOP,
22250 			    SD_TARGET_START, SD_PATH_STANDARD);
22251 			goto done_with_assess;
22252 		}
22253 		break;
22254 
22255 	case CDROMCLOSETRAY:
22256 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMCLOSETRAY\n");
22257 		if (!ISCD(un)) {
22258 			err = ENOTTY;
22259 		} else {
22260 			err = sd_send_scsi_START_STOP_UNIT(ssc, SD_START_STOP,
22261 			    SD_TARGET_CLOSE, SD_PATH_STANDARD);
22262 			goto done_with_assess;
22263 		}
22264 		break;
22265 
22266 	case FDEJECT:	/* for eject command */
22267 	case DKIOCEJECT:
22268 	case CDROMEJECT:
22269 		SD_TRACE(SD_LOG_IOCTL, un, "EJECT\n");
22270 		if (!un->un_f_eject_media_supported) {
22271 			err = ENOTTY;
22272 		} else {
22273 			err = sr_eject(dev);
22274 		}
22275 		break;
22276 
22277 	case CDROMVOLCTRL:
22278 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMVOLCTRL\n");
22279 		if (!ISCD(un)) {
22280 			err = ENOTTY;
22281 		} else {
22282 			err = sr_volume_ctrl(dev, (caddr_t)arg, flag);
22283 		}
22284 		break;
22285 
22286 	case CDROMSUBCHNL:
22287 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSUBCHNL\n");
22288 		if (!ISCD(un)) {
22289 			err = ENOTTY;
22290 		} else {
22291 			err = sr_read_subchannel(dev, (caddr_t)arg, flag);
22292 		}
22293 		break;
22294 
22295 	case CDROMREADMODE2:
22296 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADMODE2\n");
22297 		if (!ISCD(un)) {
22298 			err = ENOTTY;
22299 		} else if (un->un_f_cfg_is_atapi == TRUE) {
22300 			/*
22301 			 * If the drive supports READ CD, use that instead of
22302 			 * switching the LBA size via a MODE SELECT
22303 			 * Block Descriptor
22304 			 */
22305 			err = sr_read_cd_mode2(dev, (caddr_t)arg, flag);
22306 		} else {
22307 			err = sr_read_mode2(dev, (caddr_t)arg, flag);
22308 		}
22309 		break;
22310 
22311 	case CDROMREADMODE1:
22312 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADMODE1\n");
22313 		if (!ISCD(un)) {
22314 			err = ENOTTY;
22315 		} else {
22316 			err = sr_read_mode1(dev, (caddr_t)arg, flag);
22317 		}
22318 		break;
22319 
22320 	case CDROMREADOFFSET:
22321 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADOFFSET\n");
22322 		if (!ISCD(un)) {
22323 			err = ENOTTY;
22324 		} else {
22325 			err = sr_read_sony_session_offset(dev, (caddr_t)arg,
22326 			    flag);
22327 		}
22328 		break;
22329 
22330 	case CDROMSBLKMODE:
22331 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSBLKMODE\n");
22332 		/*
22333 		 * There is no means of changing block size in case of atapi
22334 		 * drives, thus return ENOTTY if drive type is atapi
22335 		 */
22336 		if (!ISCD(un) || (un->un_f_cfg_is_atapi == TRUE)) {
22337 			err = ENOTTY;
22338 		} else if (un->un_f_mmc_cap == TRUE) {
22339 
22340 			/*
22341 			 * MMC Devices do not support changing the
22342 			 * logical block size
22343 			 *
22344 			 * Note: EINVAL is being returned instead of ENOTTY to
22345 			 * maintain consistancy with the original mmc
22346 			 * driver update.
22347 			 */
22348 			err = EINVAL;
22349 		} else {
22350 			mutex_enter(SD_MUTEX(un));
22351 			if ((!(un->un_exclopen & (1<<SDPART(dev)))) ||
22352 			    (un->un_ncmds_in_transport > 0)) {
22353 				mutex_exit(SD_MUTEX(un));
22354 				err = EINVAL;
22355 			} else {
22356 				mutex_exit(SD_MUTEX(un));
22357 				err = sr_change_blkmode(dev, cmd, arg, flag);
22358 			}
22359 		}
22360 		break;
22361 
22362 	case CDROMGBLKMODE:
22363 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMGBLKMODE\n");
22364 		if (!ISCD(un)) {
22365 			err = ENOTTY;
22366 		} else if ((un->un_f_cfg_is_atapi != FALSE) &&
22367 		    (un->un_f_blockcount_is_valid != FALSE)) {
22368 			/*
22369 			 * Drive is an ATAPI drive so return target block
22370 			 * size for ATAPI drives since we cannot change the
22371 			 * blocksize on ATAPI drives. Used primarily to detect
22372 			 * if an ATAPI cdrom is present.
22373 			 */
22374 			if (ddi_copyout(&un->un_tgt_blocksize, (void *)arg,
22375 			    sizeof (int), flag) != 0) {
22376 				err = EFAULT;
22377 			} else {
22378 				err = 0;
22379 			}
22380 
22381 		} else {
22382 			/*
22383 			 * Drive supports changing block sizes via a Mode
22384 			 * Select.
22385 			 */
22386 			err = sr_change_blkmode(dev, cmd, arg, flag);
22387 		}
22388 		break;
22389 
22390 	case CDROMGDRVSPEED:
22391 	case CDROMSDRVSPEED:
22392 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMXDRVSPEED\n");
22393 		if (!ISCD(un)) {
22394 			err = ENOTTY;
22395 		} else if (un->un_f_mmc_cap == TRUE) {
22396 			/*
22397 			 * Note: In the future the driver implementation
22398 			 * for getting and
22399 			 * setting cd speed should entail:
22400 			 * 1) If non-mmc try the Toshiba mode page
22401 			 *    (sr_change_speed)
22402 			 * 2) If mmc but no support for Real Time Streaming try
22403 			 *    the SET CD SPEED (0xBB) command
22404 			 *   (sr_atapi_change_speed)
22405 			 * 3) If mmc and support for Real Time Streaming
22406 			 *    try the GET PERFORMANCE and SET STREAMING
22407 			 *    commands (not yet implemented, 4380808)
22408 			 */
22409 			/*
22410 			 * As per recent MMC spec, CD-ROM speed is variable
22411 			 * and changes with LBA. Since there is no such
22412 			 * things as drive speed now, fail this ioctl.
22413 			 *
22414 			 * Note: EINVAL is returned for consistancy of original
22415 			 * implementation which included support for getting
22416 			 * the drive speed of mmc devices but not setting
22417 			 * the drive speed. Thus EINVAL would be returned
22418 			 * if a set request was made for an mmc device.
22419 			 * We no longer support get or set speed for
22420 			 * mmc but need to remain consistent with regard
22421 			 * to the error code returned.
22422 			 */
22423 			err = EINVAL;
22424 		} else if (un->un_f_cfg_is_atapi == TRUE) {
22425 			err = sr_atapi_change_speed(dev, cmd, arg, flag);
22426 		} else {
22427 			err = sr_change_speed(dev, cmd, arg, flag);
22428 		}
22429 		break;
22430 
22431 	case CDROMCDDA:
22432 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMCDDA\n");
22433 		if (!ISCD(un)) {
22434 			err = ENOTTY;
22435 		} else {
22436 			err = sr_read_cdda(dev, (void *)arg, flag);
22437 		}
22438 		break;
22439 
22440 	case CDROMCDXA:
22441 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMCDXA\n");
22442 		if (!ISCD(un)) {
22443 			err = ENOTTY;
22444 		} else {
22445 			err = sr_read_cdxa(dev, (caddr_t)arg, flag);
22446 		}
22447 		break;
22448 
22449 	case CDROMSUBCODE:
22450 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSUBCODE\n");
22451 		if (!ISCD(un)) {
22452 			err = ENOTTY;
22453 		} else {
22454 			err = sr_read_all_subcodes(dev, (caddr_t)arg, flag);
22455 		}
22456 		break;
22457 
22458 
22459 #ifdef SDDEBUG
22460 /* RESET/ABORTS testing ioctls */
22461 	case DKIOCRESET: {
22462 		int	reset_level;
22463 
22464 		if (ddi_copyin((void *)arg, &reset_level, sizeof (int), flag)) {
22465 			err = EFAULT;
22466 		} else {
22467 			SD_INFO(SD_LOG_IOCTL, un, "sdioctl: DKIOCRESET: "
22468 			    "reset_level = 0x%lx\n", reset_level);
22469 			if (scsi_reset(SD_ADDRESS(un), reset_level)) {
22470 				err = 0;
22471 			} else {
22472 				err = EIO;
22473 			}
22474 		}
22475 		break;
22476 	}
22477 
22478 	case DKIOCABORT:
22479 		SD_INFO(SD_LOG_IOCTL, un, "sdioctl: DKIOCABORT:\n");
22480 		if (scsi_abort(SD_ADDRESS(un), NULL)) {
22481 			err = 0;
22482 		} else {
22483 			err = EIO;
22484 		}
22485 		break;
22486 #endif
22487 
22488 #ifdef SD_FAULT_INJECTION
22489 /* SDIOC FaultInjection testing ioctls */
22490 	case SDIOCSTART:
22491 	case SDIOCSTOP:
22492 	case SDIOCINSERTPKT:
22493 	case SDIOCINSERTXB:
22494 	case SDIOCINSERTUN:
22495 	case SDIOCINSERTARQ:
22496 	case SDIOCPUSH:
22497 	case SDIOCRETRIEVE:
22498 	case SDIOCRUN:
22499 		SD_INFO(SD_LOG_SDTEST, un, "sdioctl:"
22500 		    "SDIOC detected cmd:0x%X:\n", cmd);
22501 		/* call error generator */
22502 		sd_faultinjection_ioctl(cmd, arg, un);
22503 		err = 0;
22504 		break;
22505 
22506 #endif /* SD_FAULT_INJECTION */
22507 
22508 	case DKIOCFLUSHWRITECACHE:
22509 		{
22510 			struct dk_callback *dkc = (struct dk_callback *)arg;
22511 
22512 			mutex_enter(SD_MUTEX(un));
22513 			if (!un->un_f_sync_cache_supported ||
22514 			    !un->un_f_write_cache_enabled) {
22515 				err = un->un_f_sync_cache_supported ?
22516 				    0 : ENOTSUP;
22517 				mutex_exit(SD_MUTEX(un));
22518 				if ((flag & FKIOCTL) && dkc != NULL &&
22519 				    dkc->dkc_callback != NULL) {
22520 					(*dkc->dkc_callback)(dkc->dkc_cookie,
22521 					    err);
22522 					/*
22523 					 * Did callback and reported error.
22524 					 * Since we did a callback, ioctl
22525 					 * should return 0.
22526 					 */
22527 					err = 0;
22528 				}
22529 				break;
22530 			}
22531 			mutex_exit(SD_MUTEX(un));
22532 
22533 			if ((flag & FKIOCTL) && dkc != NULL &&
22534 			    dkc->dkc_callback != NULL) {
22535 				/* async SYNC CACHE request */
22536 				err = sd_send_scsi_SYNCHRONIZE_CACHE(un, dkc);
22537 			} else {
22538 				/* synchronous SYNC CACHE request */
22539 				err = sd_send_scsi_SYNCHRONIZE_CACHE(un, NULL);
22540 			}
22541 		}
22542 		break;
22543 
22544 	case DKIOCFREE:
22545 		{
22546 			dkioc_free_list_t *dfl = (dkioc_free_list_t *)arg;
22547 
22548 			/* bad ioctls shouldn't panic */
22549 			if (dfl == NULL) {
22550 				/* check kernel callers strictly in debug */
22551 				ASSERT0(flag & FKIOCTL);
22552 				err = SET_ERROR(EINVAL);
22553 				break;
22554 			}
22555 			/* synchronous UNMAP request */
22556 			err = sd_send_scsi_UNMAP(dev, ssc, dfl, flag);
22557 		}
22558 		break;
22559 
22560 	case DKIOC_CANFREE:
22561 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOC_CANFREE\n");
22562 		i = (un->un_thin_flags & SD_THIN_PROV_ENABLED) ? 1 : 0;
22563 		if (ddi_copyout(&i, (void *)arg, sizeof (int), flag) != 0) {
22564 			err = EFAULT;
22565 		} else {
22566 			err = 0;
22567 		}
22568 		break;
22569 
22570 	case DKIOCGETWCE: {
22571 
22572 		int wce;
22573 
22574 		if ((err = sd_get_write_cache_enabled(ssc, &wce)) != 0) {
22575 			break;
22576 		}
22577 
22578 		if (ddi_copyout(&wce, (void *)arg, sizeof (wce), flag)) {
22579 			err = EFAULT;
22580 		}
22581 		break;
22582 	}
22583 
22584 	case DKIOCSETWCE: {
22585 
22586 		int wce, sync_supported;
22587 		int cur_wce = 0;
22588 
22589 		if (!un->un_f_cache_mode_changeable) {
22590 			err = EINVAL;
22591 			break;
22592 		}
22593 
22594 		if (ddi_copyin((void *)arg, &wce, sizeof (wce), flag)) {
22595 			err = EFAULT;
22596 			break;
22597 		}
22598 
22599 		/*
22600 		 * Synchronize multiple threads trying to enable
22601 		 * or disable the cache via the un_f_wcc_cv
22602 		 * condition variable.
22603 		 */
22604 		mutex_enter(SD_MUTEX(un));
22605 
22606 		/*
22607 		 * Don't allow the cache to be enabled if the
22608 		 * config file has it disabled.
22609 		 */
22610 		if (un->un_f_opt_disable_cache && wce) {
22611 			mutex_exit(SD_MUTEX(un));
22612 			err = EINVAL;
22613 			break;
22614 		}
22615 
22616 		/*
22617 		 * Wait for write cache change in progress
22618 		 * bit to be clear before proceeding.
22619 		 */
22620 		while (un->un_f_wcc_inprog)
22621 			cv_wait(&un->un_wcc_cv, SD_MUTEX(un));
22622 
22623 		un->un_f_wcc_inprog = 1;
22624 
22625 		mutex_exit(SD_MUTEX(un));
22626 
22627 		/*
22628 		 * Get the current write cache state
22629 		 */
22630 		if ((err = sd_get_write_cache_enabled(ssc, &cur_wce)) != 0) {
22631 			mutex_enter(SD_MUTEX(un));
22632 			un->un_f_wcc_inprog = 0;
22633 			cv_broadcast(&un->un_wcc_cv);
22634 			mutex_exit(SD_MUTEX(un));
22635 			break;
22636 		}
22637 
22638 		mutex_enter(SD_MUTEX(un));
22639 		un->un_f_write_cache_enabled = (cur_wce != 0);
22640 
22641 		if (un->un_f_write_cache_enabled && wce == 0) {
22642 			/*
22643 			 * Disable the write cache.  Don't clear
22644 			 * un_f_write_cache_enabled until after
22645 			 * the mode select and flush are complete.
22646 			 */
22647 			sync_supported = un->un_f_sync_cache_supported;
22648 
22649 			/*
22650 			 * If cache flush is suppressed, we assume that the
22651 			 * controller firmware will take care of managing the
22652 			 * write cache for us: no need to explicitly
22653 			 * disable it.
22654 			 */
22655 			if (!un->un_f_suppress_cache_flush) {
22656 				mutex_exit(SD_MUTEX(un));
22657 				if ((err = sd_cache_control(ssc,
22658 				    SD_CACHE_NOCHANGE,
22659 				    SD_CACHE_DISABLE)) == 0 &&
22660 				    sync_supported) {
22661 					err = sd_send_scsi_SYNCHRONIZE_CACHE(un,
22662 					    NULL);
22663 				}
22664 			} else {
22665 				mutex_exit(SD_MUTEX(un));
22666 			}
22667 
22668 			mutex_enter(SD_MUTEX(un));
22669 			if (err == 0) {
22670 				un->un_f_write_cache_enabled = 0;
22671 			}
22672 
22673 		} else if (!un->un_f_write_cache_enabled && wce != 0) {
22674 			/*
22675 			 * Set un_f_write_cache_enabled first, so there is
22676 			 * no window where the cache is enabled, but the
22677 			 * bit says it isn't.
22678 			 */
22679 			un->un_f_write_cache_enabled = 1;
22680 
22681 			/*
22682 			 * If cache flush is suppressed, we assume that the
22683 			 * controller firmware will take care of managing the
22684 			 * write cache for us: no need to explicitly
22685 			 * enable it.
22686 			 */
22687 			if (!un->un_f_suppress_cache_flush) {
22688 				mutex_exit(SD_MUTEX(un));
22689 				err = sd_cache_control(ssc, SD_CACHE_NOCHANGE,
22690 				    SD_CACHE_ENABLE);
22691 			} else {
22692 				mutex_exit(SD_MUTEX(un));
22693 			}
22694 
22695 			mutex_enter(SD_MUTEX(un));
22696 
22697 			if (err) {
22698 				un->un_f_write_cache_enabled = 0;
22699 			}
22700 		}
22701 
22702 		un->un_f_wcc_inprog = 0;
22703 		cv_broadcast(&un->un_wcc_cv);
22704 		mutex_exit(SD_MUTEX(un));
22705 		break;
22706 	}
22707 
22708 	default:
22709 		err = ENOTTY;
22710 		break;
22711 	}
22712 	mutex_enter(SD_MUTEX(un));
22713 	un->un_ncmds_in_driver--;
22714 	ASSERT(un->un_ncmds_in_driver >= 0);
22715 	mutex_exit(SD_MUTEX(un));
22716 
22717 
22718 done_without_assess:
22719 	sd_ssc_fini(ssc);
22720 
22721 	SD_TRACE(SD_LOG_IOCTL, un, "sdioctl: exit: %d\n", err);
22722 	return (err);
22723 
22724 done_with_assess:
22725 	mutex_enter(SD_MUTEX(un));
22726 	un->un_ncmds_in_driver--;
22727 	ASSERT(un->un_ncmds_in_driver >= 0);
22728 	mutex_exit(SD_MUTEX(un));
22729 
22730 done_quick_assess:
22731 	if (err != 0)
22732 		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
22733 	/* Uninitialize sd_ssc_t pointer */
22734 	sd_ssc_fini(ssc);
22735 
22736 	SD_TRACE(SD_LOG_IOCTL, un, "sdioctl: exit: %d\n", err);
22737 	return (err);
22738 }
22739 
22740 
22741 /*
22742  *    Function: sd_dkio_ctrl_info
22743  *
22744  * Description: This routine is the driver entry point for handling controller
22745  *		information ioctl requests (DKIOCINFO).
22746  *
22747  *   Arguments: dev  - the device number
22748  *		arg  - pointer to user provided dk_cinfo structure
22749  *		       specifying the controller type and attributes.
22750  *		flag - this argument is a pass through to ddi_copyxxx()
22751  *		       directly from the mode argument of ioctl().
22752  *
22753  * Return Code: 0
22754  *		EFAULT
22755  *		ENXIO
22756  */
22757 
22758 static int
22759 sd_dkio_ctrl_info(dev_t dev, caddr_t arg, int flag)
22760 {
22761 	struct sd_lun	*un = NULL;
22762 	struct dk_cinfo	*info;
22763 	dev_info_t	*pdip;
22764 	int		lun, tgt;
22765 
22766 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
22767 		return (ENXIO);
22768 	}
22769 
22770 	info = (struct dk_cinfo *)
22771 	    kmem_zalloc(sizeof (struct dk_cinfo), KM_SLEEP);
22772 
22773 	switch (un->un_ctype) {
22774 	case CTYPE_CDROM:
22775 		info->dki_ctype = DKC_CDROM;
22776 		break;
22777 	default:
22778 		info->dki_ctype = DKC_SCSI_CCS;
22779 		break;
22780 	}
22781 	pdip = ddi_get_parent(SD_DEVINFO(un));
22782 	info->dki_cnum = ddi_get_instance(pdip);
22783 	if (strlen(ddi_get_name(pdip)) < DK_DEVLEN) {
22784 		(void) strcpy(info->dki_cname, ddi_get_name(pdip));
22785 	} else {
22786 		(void) strncpy(info->dki_cname, ddi_node_name(pdip),
22787 		    DK_DEVLEN - 1);
22788 	}
22789 
22790 	lun = ddi_prop_get_int(DDI_DEV_T_ANY, SD_DEVINFO(un),
22791 	    DDI_PROP_DONTPASS, SCSI_ADDR_PROP_LUN, 0);
22792 	tgt = ddi_prop_get_int(DDI_DEV_T_ANY, SD_DEVINFO(un),
22793 	    DDI_PROP_DONTPASS, SCSI_ADDR_PROP_TARGET, 0);
22794 
22795 	/* Unit Information */
22796 	info->dki_unit = ddi_get_instance(SD_DEVINFO(un));
22797 	info->dki_slave = ((tgt << 3) | lun);
22798 	(void) strncpy(info->dki_dname, ddi_driver_name(SD_DEVINFO(un)),
22799 	    DK_DEVLEN - 1);
22800 	info->dki_flags = DKI_FMTVOL;
22801 	info->dki_partition = SDPART(dev);
22802 
22803 	/* Max Transfer size of this device in blocks */
22804 	info->dki_maxtransfer = un->un_max_xfer_size / un->un_sys_blocksize;
22805 	info->dki_addr = 0;
22806 	info->dki_space = 0;
22807 	info->dki_prio = 0;
22808 	info->dki_vec = 0;
22809 
22810 	if (ddi_copyout(info, arg, sizeof (struct dk_cinfo), flag) != 0) {
22811 		kmem_free(info, sizeof (struct dk_cinfo));
22812 		return (EFAULT);
22813 	} else {
22814 		kmem_free(info, sizeof (struct dk_cinfo));
22815 		return (0);
22816 	}
22817 }
22818 
22819 /*
22820  *    Function: sd_get_media_info_com
22821  *
22822  * Description: This routine returns the information required to populate
22823  *		the fields for the dk_minfo/dk_minfo_ext structures.
22824  *
22825  *   Arguments: dev		- the device number
22826  *		dki_media_type	- media_type
22827  *		dki_lbsize	- logical block size
22828  *		dki_capacity	- capacity in blocks
22829  *		dki_pbsize	- physical block size (if requested)
22830  *
22831  * Return Code: 0
22832  *		EACCESS
22833  *		EFAULT
22834  *		ENXIO
22835  *		EIO
22836  */
22837 static int
22838 sd_get_media_info_com(dev_t dev, uint_t *dki_media_type, uint_t *dki_lbsize,
22839     diskaddr_t *dki_capacity, uint_t *dki_pbsize)
22840 {
22841 	struct sd_lun		*un = NULL;
22842 	struct uscsi_cmd	com;
22843 	struct scsi_inquiry	*sinq;
22844 	u_longlong_t		media_capacity;
22845 	uint64_t		capacity;
22846 	uint_t			lbasize;
22847 	uint_t			pbsize;
22848 	uchar_t			*out_data;
22849 	uchar_t			*rqbuf;
22850 	int			rval = 0;
22851 	int			rtn;
22852 	sd_ssc_t		*ssc;
22853 
22854 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
22855 	    (un->un_state == SD_STATE_OFFLINE)) {
22856 		return (ENXIO);
22857 	}
22858 
22859 	SD_TRACE(SD_LOG_IOCTL_DKIO, un, "sd_get_media_info_com: entry\n");
22860 
22861 	out_data = kmem_zalloc(SD_PROFILE_HEADER_LEN, KM_SLEEP);
22862 	rqbuf = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
22863 	ssc = sd_ssc_init(un);
22864 
22865 	/* Issue a TUR to determine if the drive is ready with media present */
22866 	rval = sd_send_scsi_TEST_UNIT_READY(ssc, SD_CHECK_FOR_MEDIA);
22867 	if (rval == ENXIO) {
22868 		goto done;
22869 	} else if (rval != 0) {
22870 		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
22871 	}
22872 
22873 	/* Now get configuration data */
22874 	if (ISCD(un)) {
22875 		*dki_media_type = DK_CDROM;
22876 
22877 		/* Allow SCMD_GET_CONFIGURATION to MMC devices only */
22878 		if (un->un_f_mmc_cap == TRUE) {
22879 			rtn = sd_send_scsi_GET_CONFIGURATION(ssc, &com, rqbuf,
22880 			    SENSE_LENGTH, out_data, SD_PROFILE_HEADER_LEN,
22881 			    SD_PATH_STANDARD);
22882 
22883 			if (rtn) {
22884 				/*
22885 				 * We ignore all failures for CD and need to
22886 				 * put the assessment before processing code
22887 				 * to avoid missing assessment for FMA.
22888 				 */
22889 				sd_ssc_assessment(ssc, SD_FMT_IGNORE);
22890 				/*
22891 				 * Failed for other than an illegal request
22892 				 * or command not supported
22893 				 */
22894 				if ((com.uscsi_status == STATUS_CHECK) &&
22895 				    (com.uscsi_rqstatus == STATUS_GOOD)) {
22896 					if ((rqbuf[2] != KEY_ILLEGAL_REQUEST) ||
22897 					    (rqbuf[12] != 0x20)) {
22898 						rval = EIO;
22899 						goto no_assessment;
22900 					}
22901 				}
22902 			} else {
22903 				/*
22904 				 * The GET CONFIGURATION command succeeded
22905 				 * so set the media type according to the
22906 				 * returned data
22907 				 */
22908 				*dki_media_type = out_data[6];
22909 				*dki_media_type <<= 8;
22910 				*dki_media_type |= out_data[7];
22911 			}
22912 		}
22913 	} else {
22914 		/*
22915 		 * The profile list is not available, so we attempt to identify
22916 		 * the media type based on the inquiry data
22917 		 */
22918 		sinq = un->un_sd->sd_inq;
22919 		if ((sinq->inq_dtype == DTYPE_DIRECT) ||
22920 		    (sinq->inq_dtype == DTYPE_OPTICAL)) {
22921 			/* This is a direct access device  or optical disk */
22922 			*dki_media_type = DK_FIXED_DISK;
22923 
22924 			if ((bcmp(sinq->inq_vid, "IOMEGA", 6) == 0) ||
22925 			    (bcmp(sinq->inq_vid, "iomega", 6) == 0)) {
22926 				if ((bcmp(sinq->inq_pid, "ZIP", 3) == 0)) {
22927 					*dki_media_type = DK_ZIP;
22928 				} else if (
22929 				    (bcmp(sinq->inq_pid, "jaz", 3) == 0)) {
22930 					*dki_media_type = DK_JAZ;
22931 				}
22932 			}
22933 		} else {
22934 			/*
22935 			 * Not a CD, direct access or optical disk so return
22936 			 * unknown media
22937 			 */
22938 			*dki_media_type = DK_UNKNOWN;
22939 		}
22940 	}
22941 
22942 	/*
22943 	 * Now read the capacity so we can provide the lbasize,
22944 	 * pbsize and capacity.
22945 	 */
22946 	if (dki_pbsize && un->un_f_descr_format_supported) {
22947 		rval = sd_send_scsi_READ_CAPACITY_16(ssc, &capacity, &lbasize,
22948 		    &pbsize, SD_PATH_DIRECT);
22949 
22950 		/*
22951 		 * Override the physical blocksize if the instance already
22952 		 * has a larger value.
22953 		 */
22954 		pbsize = MAX(pbsize, un->un_phy_blocksize);
22955 	}
22956 
22957 	if (dki_pbsize == NULL || rval != 0 ||
22958 	    !un->un_f_descr_format_supported) {
22959 		rval = sd_send_scsi_READ_CAPACITY(ssc, &capacity, &lbasize,
22960 		    SD_PATH_DIRECT);
22961 
22962 		switch (rval) {
22963 		case 0:
22964 			if (un->un_f_enable_rmw &&
22965 			    un->un_phy_blocksize != 0) {
22966 				pbsize = un->un_phy_blocksize;
22967 			} else {
22968 				pbsize = lbasize;
22969 			}
22970 			media_capacity = capacity;
22971 
22972 			/*
22973 			 * sd_send_scsi_READ_CAPACITY() reports capacity in
22974 			 * un->un_sys_blocksize chunks. So we need to convert
22975 			 * it into cap.lbsize chunks.
22976 			 */
22977 			if (un->un_f_has_removable_media) {
22978 				media_capacity *= un->un_sys_blocksize;
22979 				media_capacity /= lbasize;
22980 			}
22981 			break;
22982 		case EACCES:
22983 			rval = EACCES;
22984 			goto done;
22985 		default:
22986 			rval = EIO;
22987 			goto done;
22988 		}
22989 	} else {
22990 		if (un->un_f_enable_rmw &&
22991 		    !ISP2(pbsize % DEV_BSIZE)) {
22992 			pbsize = SSD_SECSIZE;
22993 		} else if (!ISP2(lbasize % DEV_BSIZE) ||
22994 		    !ISP2(pbsize % DEV_BSIZE)) {
22995 			pbsize = lbasize = DEV_BSIZE;
22996 		}
22997 		media_capacity = capacity;
22998 	}
22999 
23000 	/*
23001 	 * If lun is expanded dynamically, update the un structure.
23002 	 */
23003 	mutex_enter(SD_MUTEX(un));
23004 	if ((un->un_f_blockcount_is_valid == TRUE) &&
23005 	    (un->un_f_tgt_blocksize_is_valid == TRUE) &&
23006 	    (capacity > un->un_blockcount)) {
23007 		un->un_f_expnevent = B_FALSE;
23008 		sd_update_block_info(un, lbasize, capacity);
23009 	}
23010 	mutex_exit(SD_MUTEX(un));
23011 
23012 	*dki_lbsize = lbasize;
23013 	*dki_capacity = media_capacity;
23014 	if (dki_pbsize)
23015 		*dki_pbsize = pbsize;
23016 
23017 done:
23018 	if (rval != 0) {
23019 		if (rval == EIO)
23020 			sd_ssc_assessment(ssc, SD_FMT_STATUS_CHECK);
23021 		else
23022 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
23023 	}
23024 no_assessment:
23025 	sd_ssc_fini(ssc);
23026 	kmem_free(out_data, SD_PROFILE_HEADER_LEN);
23027 	kmem_free(rqbuf, SENSE_LENGTH);
23028 	return (rval);
23029 }
23030 
23031 /*
23032  *    Function: sd_get_media_info
23033  *
23034  * Description: This routine is the driver entry point for handling ioctl
23035  *		requests for the media type or command set profile used by the
23036  *		drive to operate on the media (DKIOCGMEDIAINFO).
23037  *
23038  *   Arguments: dev	- the device number
23039  *		arg	- pointer to user provided dk_minfo structure
23040  *			  specifying the media type, logical block size and
23041  *			  drive capacity.
23042  *		flag	- this argument is a pass through to ddi_copyxxx()
23043  *			  directly from the mode argument of ioctl().
23044  *
23045  * Return Code: returns the value from sd_get_media_info_com
23046  */
23047 static int
23048 sd_get_media_info(dev_t dev, caddr_t arg, int flag)
23049 {
23050 	struct dk_minfo		mi;
23051 	int			rval;
23052 
23053 	rval = sd_get_media_info_com(dev, &mi.dki_media_type,
23054 	    &mi.dki_lbsize, &mi.dki_capacity, NULL);
23055 
23056 	if (rval)
23057 		return (rval);
23058 	if (ddi_copyout(&mi, arg, sizeof (struct dk_minfo), flag))
23059 		rval = EFAULT;
23060 	return (rval);
23061 }
23062 
23063 /*
23064  *    Function: sd_get_media_info_ext
23065  *
23066  * Description: This routine is the driver entry point for handling ioctl
23067  *		requests for the media type or command set profile used by the
23068  *		drive to operate on the media (DKIOCGMEDIAINFOEXT). The
23069  *		difference this ioctl and DKIOCGMEDIAINFO is the return value
23070  *		of this ioctl contains both logical block size and physical
23071  *		block size.
23072  *
23073  *
23074  *   Arguments: dev	- the device number
23075  *		arg	- pointer to user provided dk_minfo_ext structure
23076  *			  specifying the media type, logical block size,
23077  *			  physical block size and disk capacity.
23078  *		flag	- this argument is a pass through to ddi_copyxxx()
23079  *			  directly from the mode argument of ioctl().
23080  *
23081  * Return Code: returns the value from sd_get_media_info_com
23082  */
23083 static int
23084 sd_get_media_info_ext(dev_t dev, caddr_t arg, int flag)
23085 {
23086 	struct dk_minfo_ext	mie;
23087 	int			rval = 0;
23088 	size_t			len;
23089 
23090 	rval = sd_get_media_info_com(dev, &mie.dki_media_type,
23091 	    &mie.dki_lbsize, &mie.dki_capacity, &mie.dki_pbsize);
23092 
23093 	if (rval)
23094 		return (rval);
23095 
23096 	switch (ddi_model_convert_from(flag & FMODELS)) {
23097 	case DDI_MODEL_ILP32:
23098 		len = sizeof (struct dk_minfo_ext32);
23099 		break;
23100 	default:
23101 		len = sizeof (struct dk_minfo_ext);
23102 		break;
23103 	}
23104 
23105 	if (ddi_copyout(&mie, arg, len, flag))
23106 		rval = EFAULT;
23107 	return (rval);
23108 
23109 }
23110 
23111 /*
23112  *    Function: sd_watch_request_submit
23113  *
23114  * Description: Call scsi_watch_request_submit or scsi_mmc_watch_request_submit
23115  *		depending on which is supported by device.
23116  */
23117 static opaque_t
23118 sd_watch_request_submit(struct sd_lun *un)
23119 {
23120 	dev_t			dev;
23121 
23122 	/* All submissions are unified to use same device number */
23123 	dev = sd_make_device(SD_DEVINFO(un));
23124 
23125 	if (un->un_f_mmc_cap && un->un_f_mmc_gesn_polling) {
23126 		return (scsi_mmc_watch_request_submit(SD_SCSI_DEVP(un),
23127 		    sd_check_media_time, SENSE_LENGTH, sd_media_watch_cb,
23128 		    (caddr_t)dev));
23129 	} else {
23130 		return (scsi_watch_request_submit(SD_SCSI_DEVP(un),
23131 		    sd_check_media_time, SENSE_LENGTH, sd_media_watch_cb,
23132 		    (caddr_t)dev));
23133 	}
23134 }
23135 
23136 
23137 /*
23138  *    Function: sd_check_media
23139  *
23140  * Description: This utility routine implements the functionality for the
23141  *		DKIOCSTATE ioctl. This ioctl blocks the user thread until the
23142  *		driver state changes from that specified by the user
23143  *		(inserted or ejected). For example, if the user specifies
23144  *		DKIO_EJECTED and the current media state is inserted this
23145  *		routine will immediately return DKIO_INSERTED. However, if the
23146  *		current media state is not inserted the user thread will be
23147  *		blocked until the drive state changes. If DKIO_NONE is specified
23148  *		the user thread will block until a drive state change occurs.
23149  *
23150  *   Arguments: dev  - the device number
23151  *		state  - user pointer to a dkio_state, updated with the current
23152  *			drive state at return.
23153  *
23154  * Return Code: ENXIO
23155  *		EIO
23156  *		EAGAIN
23157  *		EINTR
23158  */
23159 
23160 static int
23161 sd_check_media(dev_t dev, enum dkio_state state)
23162 {
23163 	struct sd_lun		*un = NULL;
23164 	enum dkio_state		prev_state;
23165 	opaque_t		token = NULL;
23166 	int			rval = 0;
23167 	sd_ssc_t		*ssc;
23168 
23169 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
23170 		return (ENXIO);
23171 	}
23172 
23173 	SD_TRACE(SD_LOG_COMMON, un, "sd_check_media: entry\n");
23174 
23175 	ssc = sd_ssc_init(un);
23176 
23177 	mutex_enter(SD_MUTEX(un));
23178 
23179 	SD_TRACE(SD_LOG_COMMON, un, "sd_check_media: "
23180 	    "state=%x, mediastate=%x\n", state, un->un_mediastate);
23181 
23182 	prev_state = un->un_mediastate;
23183 
23184 	/* is there anything to do? */
23185 	if (state == un->un_mediastate || un->un_mediastate == DKIO_NONE) {
23186 		/*
23187 		 * submit the request to the scsi_watch service;
23188 		 * scsi_media_watch_cb() does the real work
23189 		 */
23190 		mutex_exit(SD_MUTEX(un));
23191 
23192 		/*
23193 		 * This change handles the case where a scsi watch request is
23194 		 * added to a device that is powered down. To accomplish this
23195 		 * we power up the device before adding the scsi watch request,
23196 		 * since the scsi watch sends a TUR directly to the device
23197 		 * which the device cannot handle if it is powered down.
23198 		 */
23199 		if (sd_pm_entry(un) != DDI_SUCCESS) {
23200 			mutex_enter(SD_MUTEX(un));
23201 			goto done;
23202 		}
23203 
23204 		token = sd_watch_request_submit(un);
23205 
23206 		sd_pm_exit(un);
23207 
23208 		mutex_enter(SD_MUTEX(un));
23209 		if (token == NULL) {
23210 			rval = EAGAIN;
23211 			goto done;
23212 		}
23213 
23214 		/*
23215 		 * This is a special case IOCTL that doesn't return
23216 		 * until the media state changes. Routine sdpower
23217 		 * knows about and handles this so don't count it
23218 		 * as an active cmd in the driver, which would
23219 		 * keep the device busy to the pm framework.
23220 		 * If the count isn't decremented the device can't
23221 		 * be powered down.
23222 		 */
23223 		un->un_ncmds_in_driver--;
23224 		ASSERT(un->un_ncmds_in_driver >= 0);
23225 
23226 		/*
23227 		 * if a prior request had been made, this will be the same
23228 		 * token, as scsi_watch was designed that way.
23229 		 */
23230 		un->un_swr_token = token;
23231 		un->un_specified_mediastate = state;
23232 
23233 		/*
23234 		 * now wait for media change
23235 		 * we will not be signalled unless mediastate == state but it is
23236 		 * still better to test for this condition, since there is a
23237 		 * 2 sec cv_broadcast delay when mediastate == DKIO_INSERTED
23238 		 */
23239 		SD_TRACE(SD_LOG_COMMON, un,
23240 		    "sd_check_media: waiting for media state change\n");
23241 		while (un->un_mediastate == state) {
23242 			if (cv_wait_sig(&un->un_state_cv, SD_MUTEX(un)) == 0) {
23243 				SD_TRACE(SD_LOG_COMMON, un,
23244 				    "sd_check_media: waiting for media state "
23245 				    "was interrupted\n");
23246 				un->un_ncmds_in_driver++;
23247 				rval = EINTR;
23248 				goto done;
23249 			}
23250 			SD_TRACE(SD_LOG_COMMON, un,
23251 			    "sd_check_media: received signal, state=%x\n",
23252 			    un->un_mediastate);
23253 		}
23254 		/*
23255 		 * Inc the counter to indicate the device once again
23256 		 * has an active outstanding cmd.
23257 		 */
23258 		un->un_ncmds_in_driver++;
23259 	}
23260 
23261 	/* invalidate geometry */
23262 	if (prev_state == DKIO_INSERTED && un->un_mediastate == DKIO_EJECTED) {
23263 		sr_ejected(un);
23264 	}
23265 
23266 	if (un->un_mediastate == DKIO_INSERTED && prev_state != DKIO_INSERTED) {
23267 		uint64_t	capacity;
23268 		uint_t		lbasize;
23269 
23270 		SD_TRACE(SD_LOG_COMMON, un, "sd_check_media: media inserted\n");
23271 		mutex_exit(SD_MUTEX(un));
23272 		/*
23273 		 * Since the following routines use SD_PATH_DIRECT, we must
23274 		 * call PM directly before the upcoming disk accesses. This
23275 		 * may cause the disk to be power/spin up.
23276 		 */
23277 
23278 		if (sd_pm_entry(un) == DDI_SUCCESS) {
23279 			rval = sd_send_scsi_READ_CAPACITY(ssc,
23280 			    &capacity, &lbasize, SD_PATH_DIRECT);
23281 			if (rval != 0) {
23282 				sd_pm_exit(un);
23283 				if (rval == EIO)
23284 					sd_ssc_assessment(ssc,
23285 					    SD_FMT_STATUS_CHECK);
23286 				else
23287 					sd_ssc_assessment(ssc, SD_FMT_IGNORE);
23288 				mutex_enter(SD_MUTEX(un));
23289 				goto done;
23290 			}
23291 		} else {
23292 			rval = EIO;
23293 			mutex_enter(SD_MUTEX(un));
23294 			goto done;
23295 		}
23296 		mutex_enter(SD_MUTEX(un));
23297 
23298 		sd_update_block_info(un, lbasize, capacity);
23299 
23300 		/*
23301 		 *  Check if the media in the device is writable or not
23302 		 */
23303 		if (ISCD(un)) {
23304 			sd_check_for_writable_cd(ssc, SD_PATH_DIRECT);
23305 		}
23306 
23307 		mutex_exit(SD_MUTEX(un));
23308 		cmlb_invalidate(un->un_cmlbhandle, (void *)SD_PATH_DIRECT);
23309 		if ((cmlb_validate(un->un_cmlbhandle, 0,
23310 		    (void *)SD_PATH_DIRECT) == 0) && un->un_f_pkstats_enabled) {
23311 			sd_set_pstats(un);
23312 			SD_TRACE(SD_LOG_IO_PARTITION, un,
23313 			    "sd_check_media: un:0x%p pstats created and "
23314 			    "set\n", un);
23315 		}
23316 
23317 		rval = sd_send_scsi_DOORLOCK(ssc, SD_REMOVAL_PREVENT,
23318 		    SD_PATH_DIRECT);
23319 
23320 		sd_pm_exit(un);
23321 
23322 		if (rval != 0) {
23323 			if (rval == EIO)
23324 				sd_ssc_assessment(ssc, SD_FMT_STATUS_CHECK);
23325 			else
23326 				sd_ssc_assessment(ssc, SD_FMT_IGNORE);
23327 		}
23328 
23329 		mutex_enter(SD_MUTEX(un));
23330 	}
23331 done:
23332 	sd_ssc_fini(ssc);
23333 	un->un_f_watcht_stopped = FALSE;
23334 	if (token != NULL && un->un_swr_token != NULL) {
23335 		/*
23336 		 * Use of this local token and the mutex ensures that we avoid
23337 		 * some race conditions associated with terminating the
23338 		 * scsi watch.
23339 		 */
23340 		token = un->un_swr_token;
23341 		mutex_exit(SD_MUTEX(un));
23342 		(void) scsi_watch_request_terminate(token,
23343 		    SCSI_WATCH_TERMINATE_WAIT);
23344 		if (scsi_watch_get_ref_count(token) == 0) {
23345 			mutex_enter(SD_MUTEX(un));
23346 			un->un_swr_token = (opaque_t)NULL;
23347 		} else {
23348 			mutex_enter(SD_MUTEX(un));
23349 		}
23350 	}
23351 
23352 	/*
23353 	 * Update the capacity kstat value, if no media previously
23354 	 * (capacity kstat is 0) and a media has been inserted
23355 	 * (un_f_blockcount_is_valid == TRUE)
23356 	 */
23357 	if (un->un_errstats) {
23358 		struct sd_errstats	*stp = NULL;
23359 
23360 		stp = (struct sd_errstats *)un->un_errstats->ks_data;
23361 		if ((stp->sd_capacity.value.ui64 == 0) &&
23362 		    (un->un_f_blockcount_is_valid == TRUE)) {
23363 			stp->sd_capacity.value.ui64 =
23364 			    (uint64_t)((uint64_t)un->un_blockcount *
23365 			    un->un_sys_blocksize);
23366 		}
23367 	}
23368 	mutex_exit(SD_MUTEX(un));
23369 	SD_TRACE(SD_LOG_COMMON, un, "sd_check_media: done\n");
23370 	return (rval);
23371 }
23372 
23373 
23374 /*
23375  *    Function: sd_delayed_cv_broadcast
23376  *
23377  * Description: Delayed cv_broadcast to allow for target to recover from media
23378  *		insertion.
23379  *
23380  *   Arguments: arg - driver soft state (unit) structure
23381  */
23382 
23383 static void
23384 sd_delayed_cv_broadcast(void *arg)
23385 {
23386 	struct sd_lun *un = arg;
23387 
23388 	SD_TRACE(SD_LOG_COMMON, un, "sd_delayed_cv_broadcast\n");
23389 
23390 	mutex_enter(SD_MUTEX(un));
23391 	un->un_dcvb_timeid = NULL;
23392 	cv_broadcast(&un->un_state_cv);
23393 	mutex_exit(SD_MUTEX(un));
23394 }
23395 
23396 
23397 /*
23398  *    Function: sd_media_watch_cb
23399  *
23400  * Description: Callback routine used for support of the DKIOCSTATE ioctl. This
23401  *		routine processes the TUR sense data and updates the driver
23402  *		state if a transition has occurred. The user thread
23403  *		(sd_check_media) is then signalled.
23404  *
23405  *   Arguments: arg -   the device 'dev_t' is used for context to discriminate
23406  *			among multiple watches that share this callback function
23407  *		resultp - scsi watch facility result packet containing scsi
23408  *			  packet, status byte and sense data
23409  *
23410  * Return Code: 0 for success, -1 for failure
23411  */
23412 
23413 static int
23414 sd_media_watch_cb(caddr_t arg, struct scsi_watch_result *resultp)
23415 {
23416 	struct sd_lun			*un;
23417 	struct scsi_status		*statusp = resultp->statusp;
23418 	uint8_t				*sensep = (uint8_t *)resultp->sensep;
23419 	enum dkio_state			state = DKIO_NONE;
23420 	dev_t				dev = (dev_t)arg;
23421 	uchar_t				actual_sense_length;
23422 	uint8_t				skey, asc, ascq;
23423 
23424 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
23425 		return (-1);
23426 	}
23427 	actual_sense_length = resultp->actual_sense_length;
23428 
23429 	mutex_enter(SD_MUTEX(un));
23430 	SD_TRACE(SD_LOG_COMMON, un,
23431 	    "sd_media_watch_cb: status=%x, sensep=%p, len=%x\n",
23432 	    *((char *)statusp), (void *)sensep, actual_sense_length);
23433 
23434 	if (resultp->pkt->pkt_reason == CMD_DEV_GONE) {
23435 		un->un_mediastate = DKIO_DEV_GONE;
23436 		cv_broadcast(&un->un_state_cv);
23437 		mutex_exit(SD_MUTEX(un));
23438 
23439 		return (0);
23440 	}
23441 
23442 	if (un->un_f_mmc_cap && un->un_f_mmc_gesn_polling) {
23443 		if (sd_gesn_media_data_valid(resultp->mmc_data)) {
23444 			if ((resultp->mmc_data[5] &
23445 			    SD_GESN_MEDIA_EVENT_STATUS_PRESENT) != 0) {
23446 				state = DKIO_INSERTED;
23447 			} else {
23448 				state = DKIO_EJECTED;
23449 			}
23450 			if ((resultp->mmc_data[4] & SD_GESN_MEDIA_EVENT_CODE) ==
23451 			    SD_GESN_MEDIA_EVENT_EJECTREQUEST) {
23452 				sd_log_eject_request_event(un, KM_NOSLEEP);
23453 			}
23454 		}
23455 	} else if (sensep != NULL) {
23456 		/*
23457 		 * If there was a check condition then sensep points to valid
23458 		 * sense data. If status was not a check condition but a
23459 		 * reservation or busy status then the new state is DKIO_NONE.
23460 		 */
23461 		skey = scsi_sense_key(sensep);
23462 		asc = scsi_sense_asc(sensep);
23463 		ascq = scsi_sense_ascq(sensep);
23464 
23465 		SD_INFO(SD_LOG_COMMON, un,
23466 		    "sd_media_watch_cb: sense KEY=%x, ASC=%x, ASCQ=%x\n",
23467 		    skey, asc, ascq);
23468 		/* This routine only uses up to 13 bytes of sense data. */
23469 		if (actual_sense_length >= 13) {
23470 			if (skey == KEY_UNIT_ATTENTION) {
23471 				if (asc == 0x28) {
23472 					state = DKIO_INSERTED;
23473 				}
23474 			} else if (skey == KEY_NOT_READY) {
23475 				/*
23476 				 * Sense data of 02/06/00 means that the
23477 				 * drive could not read the media (No
23478 				 * reference position found). In this case
23479 				 * to prevent a hang on the DKIOCSTATE IOCTL
23480 				 * we set the media state to DKIO_INSERTED.
23481 				 */
23482 				if (asc == 0x06 && ascq == 0x00)
23483 					state = DKIO_INSERTED;
23484 
23485 				/*
23486 				 * if 02/04/02  means that the host
23487 				 * should send start command. Explicitly
23488 				 * leave the media state as is
23489 				 * (inserted) as the media is inserted
23490 				 * and host has stopped device for PM
23491 				 * reasons. Upon next true read/write
23492 				 * to this media will bring the
23493 				 * device to the right state good for
23494 				 * media access.
23495 				 */
23496 				if (asc == 0x3a) {
23497 					state = DKIO_EJECTED;
23498 				} else {
23499 					/*
23500 					 * If the drive is busy with an
23501 					 * operation or long write, keep the
23502 					 * media in an inserted state.
23503 					 */
23504 
23505 					if ((asc == 0x04) &&
23506 					    ((ascq == 0x02) ||
23507 					    (ascq == 0x07) ||
23508 					    (ascq == 0x08))) {
23509 						state = DKIO_INSERTED;
23510 					}
23511 				}
23512 			} else if (skey == KEY_NO_SENSE) {
23513 				if ((asc == 0x00) && (ascq == 0x00)) {
23514 					/*
23515 					 * Sense Data 00/00/00 does not provide
23516 					 * any information about the state of
23517 					 * the media. Ignore it.
23518 					 */
23519 					mutex_exit(SD_MUTEX(un));
23520 					return (0);
23521 				}
23522 			}
23523 		}
23524 	} else if ((*((char *)statusp) == STATUS_GOOD) &&
23525 	    (resultp->pkt->pkt_reason == CMD_CMPLT)) {
23526 		state = DKIO_INSERTED;
23527 	}
23528 
23529 	SD_TRACE(SD_LOG_COMMON, un,
23530 	    "sd_media_watch_cb: state=%x, specified=%x\n",
23531 	    state, un->un_specified_mediastate);
23532 
23533 	/*
23534 	 * now signal the waiting thread if this is *not* the specified state;
23535 	 * delay the signal if the state is DKIO_INSERTED to allow the target
23536 	 * to recover
23537 	 */
23538 	if (state != un->un_specified_mediastate) {
23539 		un->un_mediastate = state;
23540 		if (state == DKIO_INSERTED) {
23541 			/*
23542 			 * delay the signal to give the drive a chance
23543 			 * to do what it apparently needs to do
23544 			 */
23545 			SD_TRACE(SD_LOG_COMMON, un,
23546 			    "sd_media_watch_cb: delayed cv_broadcast\n");
23547 			if (un->un_dcvb_timeid == NULL) {
23548 				un->un_dcvb_timeid =
23549 				    timeout(sd_delayed_cv_broadcast, un,
23550 				    drv_usectohz((clock_t)MEDIA_ACCESS_DELAY));
23551 			}
23552 		} else {
23553 			SD_TRACE(SD_LOG_COMMON, un,
23554 			    "sd_media_watch_cb: immediate cv_broadcast\n");
23555 			cv_broadcast(&un->un_state_cv);
23556 		}
23557 	}
23558 	mutex_exit(SD_MUTEX(un));
23559 	return (0);
23560 }
23561 
23562 
23563 /*
23564  *    Function: sd_dkio_get_temp
23565  *
23566  * Description: This routine is the driver entry point for handling ioctl
23567  *		requests to get the disk temperature.
23568  *
23569  *   Arguments: dev  - the device number
23570  *		arg  - pointer to user provided dk_temperature structure.
23571  *		flag - this argument is a pass through to ddi_copyxxx()
23572  *		       directly from the mode argument of ioctl().
23573  *
23574  * Return Code: 0
23575  *		EFAULT
23576  *		ENXIO
23577  *		EAGAIN
23578  */
23579 
23580 static int
23581 sd_dkio_get_temp(dev_t dev, caddr_t arg, int flag)
23582 {
23583 	struct sd_lun		*un = NULL;
23584 	struct dk_temperature	*dktemp = NULL;
23585 	uchar_t			*temperature_page;
23586 	int			rval = 0;
23587 	int			path_flag = SD_PATH_STANDARD;
23588 	sd_ssc_t		*ssc;
23589 
23590 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
23591 		return (ENXIO);
23592 	}
23593 
23594 	ssc = sd_ssc_init(un);
23595 	dktemp = kmem_zalloc(sizeof (struct dk_temperature), KM_SLEEP);
23596 
23597 	/* copyin the disk temp argument to get the user flags */
23598 	if (ddi_copyin((void *)arg, dktemp,
23599 	    sizeof (struct dk_temperature), flag) != 0) {
23600 		rval = EFAULT;
23601 		goto done;
23602 	}
23603 
23604 	/* Initialize the temperature to invalid. */
23605 	dktemp->dkt_cur_temp = (short)DKT_INVALID_TEMP;
23606 	dktemp->dkt_ref_temp = (short)DKT_INVALID_TEMP;
23607 
23608 	/*
23609 	 * Note: Investigate removing the "bypass pm" semantic.
23610 	 * Can we just bypass PM always?
23611 	 */
23612 	if (dktemp->dkt_flags & DKT_BYPASS_PM) {
23613 		path_flag = SD_PATH_DIRECT;
23614 		ASSERT(!mutex_owned(&un->un_pm_mutex));
23615 		mutex_enter(&un->un_pm_mutex);
23616 		if (SD_DEVICE_IS_IN_LOW_POWER(un)) {
23617 			/*
23618 			 * If DKT_BYPASS_PM is set, and the drive happens to be
23619 			 * in low power mode, we can not wake it up, Need to
23620 			 * return EAGAIN.
23621 			 */
23622 			mutex_exit(&un->un_pm_mutex);
23623 			rval = EAGAIN;
23624 			goto done;
23625 		} else {
23626 			/*
23627 			 * Indicate to PM the device is busy. This is required
23628 			 * to avoid a race - i.e. the ioctl is issuing a
23629 			 * command and the pm framework brings down the device
23630 			 * to low power mode (possible power cut-off on some
23631 			 * platforms).
23632 			 */
23633 			mutex_exit(&un->un_pm_mutex);
23634 			if (sd_pm_entry(un) != DDI_SUCCESS) {
23635 				rval = EAGAIN;
23636 				goto done;
23637 			}
23638 		}
23639 	}
23640 
23641 	temperature_page = kmem_zalloc(TEMPERATURE_PAGE_SIZE, KM_SLEEP);
23642 
23643 	rval = sd_send_scsi_LOG_SENSE(ssc, temperature_page,
23644 	    TEMPERATURE_PAGE_SIZE, TEMPERATURE_PAGE, 1, 0, path_flag);
23645 	if (rval != 0)
23646 		goto done2;
23647 
23648 	/*
23649 	 * For the current temperature verify that the parameter length is 0x02
23650 	 * and the parameter code is 0x00
23651 	 */
23652 	if ((temperature_page[7] == 0x02) && (temperature_page[4] == 0x00) &&
23653 	    (temperature_page[5] == 0x00)) {
23654 		if (temperature_page[9] == 0xFF) {
23655 			dktemp->dkt_cur_temp = (short)DKT_INVALID_TEMP;
23656 		} else {
23657 			dktemp->dkt_cur_temp = (short)(temperature_page[9]);
23658 		}
23659 	}
23660 
23661 	/*
23662 	 * For the reference temperature verify that the parameter
23663 	 * length is 0x02 and the parameter code is 0x01
23664 	 */
23665 	if ((temperature_page[13] == 0x02) && (temperature_page[10] == 0x00) &&
23666 	    (temperature_page[11] == 0x01)) {
23667 		if (temperature_page[15] == 0xFF) {
23668 			dktemp->dkt_ref_temp = (short)DKT_INVALID_TEMP;
23669 		} else {
23670 			dktemp->dkt_ref_temp = (short)(temperature_page[15]);
23671 		}
23672 	}
23673 
23674 	/* Do the copyout regardless of the temperature commands status. */
23675 	if (ddi_copyout(dktemp, (void *)arg, sizeof (struct dk_temperature),
23676 	    flag) != 0) {
23677 		rval = EFAULT;
23678 		goto done1;
23679 	}
23680 
23681 done2:
23682 	if (rval != 0) {
23683 		if (rval == EIO)
23684 			sd_ssc_assessment(ssc, SD_FMT_STATUS_CHECK);
23685 		else
23686 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
23687 	}
23688 done1:
23689 	if (path_flag == SD_PATH_DIRECT) {
23690 		sd_pm_exit(un);
23691 	}
23692 
23693 	kmem_free(temperature_page, TEMPERATURE_PAGE_SIZE);
23694 done:
23695 	sd_ssc_fini(ssc);
23696 	if (dktemp != NULL) {
23697 		kmem_free(dktemp, sizeof (struct dk_temperature));
23698 	}
23699 
23700 	return (rval);
23701 }
23702 
23703 
23704 /*
23705  *    Function: sd_log_page_supported
23706  *
23707  * Description: This routine uses sd_send_scsi_LOG_SENSE to find the list of
23708  *		supported log pages.
23709  *
23710  *   Arguments: ssc   - ssc contains pointer to driver soft state (unit)
23711  *                      structure for this target.
23712  *		log_page -
23713  *
23714  * Return Code: -1 - on error (log sense is optional and may not be supported).
23715  *		0  - log page not found.
23716  *		1  - log page found.
23717  */
23718 
23719 static int
23720 sd_log_page_supported(sd_ssc_t *ssc, int log_page)
23721 {
23722 	uchar_t *log_page_data;
23723 	int	i;
23724 	int	match = 0;
23725 	int	log_size;
23726 	int	status = 0;
23727 	struct sd_lun	*un;
23728 
23729 	ASSERT(ssc != NULL);
23730 	un = ssc->ssc_un;
23731 	ASSERT(un != NULL);
23732 
23733 	log_page_data = kmem_zalloc(0xFF, KM_SLEEP);
23734 
23735 	status = sd_send_scsi_LOG_SENSE(ssc, log_page_data, 0xFF, 0, 0x01, 0,
23736 	    SD_PATH_DIRECT);
23737 
23738 	if (status != 0) {
23739 		if (status == EIO) {
23740 			/*
23741 			 * Some disks do not support log sense, we
23742 			 * should ignore this kind of error(sense key is
23743 			 * 0x5 - illegal request).
23744 			 */
23745 			uint8_t *sensep;
23746 			int senlen;
23747 
23748 			sensep = (uint8_t *)ssc->ssc_uscsi_cmd->uscsi_rqbuf;
23749 			senlen = (int)(ssc->ssc_uscsi_cmd->uscsi_rqlen -
23750 			    ssc->ssc_uscsi_cmd->uscsi_rqresid);
23751 
23752 			if (senlen > 0 &&
23753 			    scsi_sense_key(sensep) == KEY_ILLEGAL_REQUEST) {
23754 				sd_ssc_assessment(ssc,
23755 				    SD_FMT_IGNORE_COMPROMISE);
23756 			} else {
23757 				sd_ssc_assessment(ssc, SD_FMT_STATUS_CHECK);
23758 			}
23759 		} else {
23760 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
23761 		}
23762 
23763 		SD_ERROR(SD_LOG_COMMON, un,
23764 		    "sd_log_page_supported: failed log page retrieval\n");
23765 		kmem_free(log_page_data, 0xFF);
23766 		return (-1);
23767 	}
23768 
23769 	log_size = log_page_data[3];
23770 
23771 	/*
23772 	 * The list of supported log pages start from the fourth byte. Check
23773 	 * until we run out of log pages or a match is found.
23774 	 */
23775 	for (i = 4; (i < (log_size + 4)) && !match; i++) {
23776 		if (log_page_data[i] == log_page) {
23777 			match++;
23778 		}
23779 	}
23780 	kmem_free(log_page_data, 0xFF);
23781 	return (match);
23782 }
23783 
23784 
23785 /*
23786  *    Function: sd_mhdioc_failfast
23787  *
23788  * Description: This routine is the driver entry point for handling ioctl
23789  *		requests to enable/disable the multihost failfast option.
23790  *		(MHIOCENFAILFAST)
23791  *
23792  *   Arguments: dev	- the device number
23793  *		arg	- user specified probing interval.
23794  *		flag	- this argument is a pass through to ddi_copyxxx()
23795  *			  directly from the mode argument of ioctl().
23796  *
23797  * Return Code: 0
23798  *		EFAULT
23799  *		ENXIO
23800  */
23801 
23802 static int
23803 sd_mhdioc_failfast(dev_t dev, caddr_t arg, int flag)
23804 {
23805 	struct sd_lun	*un = NULL;
23806 	int		mh_time;
23807 	int		rval = 0;
23808 
23809 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
23810 		return (ENXIO);
23811 	}
23812 
23813 	if (ddi_copyin((void *)arg, &mh_time, sizeof (int), flag))
23814 		return (EFAULT);
23815 
23816 	if (mh_time) {
23817 		mutex_enter(SD_MUTEX(un));
23818 		un->un_resvd_status |= SD_FAILFAST;
23819 		mutex_exit(SD_MUTEX(un));
23820 		/*
23821 		 * If mh_time is INT_MAX, then this ioctl is being used for
23822 		 * SCSI-3 PGR purposes, and we don't need to spawn watch thread.
23823 		 */
23824 		if (mh_time != INT_MAX) {
23825 			rval = sd_check_mhd(dev, mh_time);
23826 		}
23827 	} else {
23828 		(void) sd_check_mhd(dev, 0);
23829 		mutex_enter(SD_MUTEX(un));
23830 		un->un_resvd_status &= ~SD_FAILFAST;
23831 		mutex_exit(SD_MUTEX(un));
23832 	}
23833 	return (rval);
23834 }
23835 
23836 
23837 /*
23838  *    Function: sd_mhdioc_takeown
23839  *
23840  * Description: This routine is the driver entry point for handling ioctl
23841  *		requests to forcefully acquire exclusive access rights to the
23842  *		multihost disk (MHIOCTKOWN).
23843  *
23844  *   Arguments: dev	- the device number
23845  *		arg	- user provided structure specifying the delay
23846  *			  parameters in milliseconds
23847  *		flag	- this argument is a pass through to ddi_copyxxx()
23848  *			  directly from the mode argument of ioctl().
23849  *
23850  * Return Code: 0
23851  *		EFAULT
23852  *		ENXIO
23853  */
23854 
23855 static int
23856 sd_mhdioc_takeown(dev_t dev, caddr_t arg, int flag)
23857 {
23858 	struct sd_lun		*un = NULL;
23859 	struct mhioctkown	*tkown = NULL;
23860 	int			rval = 0;
23861 
23862 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
23863 		return (ENXIO);
23864 	}
23865 
23866 	if (arg != NULL) {
23867 		tkown = (struct mhioctkown *)
23868 		    kmem_zalloc(sizeof (struct mhioctkown), KM_SLEEP);
23869 		rval = ddi_copyin(arg, tkown, sizeof (struct mhioctkown), flag);
23870 		if (rval != 0) {
23871 			rval = EFAULT;
23872 			goto error;
23873 		}
23874 	}
23875 
23876 	rval = sd_take_ownership(dev, tkown);
23877 	mutex_enter(SD_MUTEX(un));
23878 	if (rval == 0) {
23879 		un->un_resvd_status |= SD_RESERVE;
23880 		if (tkown != NULL && tkown->reinstate_resv_delay != 0) {
23881 			sd_reinstate_resv_delay =
23882 			    tkown->reinstate_resv_delay * 1000;
23883 		} else {
23884 			sd_reinstate_resv_delay = SD_REINSTATE_RESV_DELAY;
23885 		}
23886 		/*
23887 		 * Give the scsi_watch routine interval set by
23888 		 * the MHIOCENFAILFAST ioctl precedence here.
23889 		 */
23890 		if ((un->un_resvd_status & SD_FAILFAST) == 0) {
23891 			mutex_exit(SD_MUTEX(un));
23892 			(void) sd_check_mhd(dev,
23893 			    sd_reinstate_resv_delay / 1000);
23894 			SD_TRACE(SD_LOG_IOCTL_MHD, un,
23895 			    "sd_mhdioc_takeown : %d\n",
23896 			    sd_reinstate_resv_delay);
23897 		} else {
23898 			mutex_exit(SD_MUTEX(un));
23899 		}
23900 		(void) scsi_reset_notify(SD_ADDRESS(un), SCSI_RESET_NOTIFY,
23901 		    sd_mhd_reset_notify_cb, (caddr_t)un);
23902 	} else {
23903 		un->un_resvd_status &= ~SD_RESERVE;
23904 		mutex_exit(SD_MUTEX(un));
23905 	}
23906 
23907 error:
23908 	if (tkown != NULL) {
23909 		kmem_free(tkown, sizeof (struct mhioctkown));
23910 	}
23911 	return (rval);
23912 }
23913 
23914 
23915 /*
23916  *    Function: sd_mhdioc_release
23917  *
23918  * Description: This routine is the driver entry point for handling ioctl
23919  *		requests to release exclusive access rights to the multihost
23920  *		disk (MHIOCRELEASE).
23921  *
23922  *   Arguments: dev	- the device number
23923  *
23924  * Return Code: 0
23925  *		ENXIO
23926  */
23927 
23928 static int
23929 sd_mhdioc_release(dev_t dev)
23930 {
23931 	struct sd_lun		*un = NULL;
23932 	timeout_id_t		resvd_timeid_save;
23933 	int			resvd_status_save;
23934 	int			rval = 0;
23935 
23936 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
23937 		return (ENXIO);
23938 	}
23939 
23940 	mutex_enter(SD_MUTEX(un));
23941 	resvd_status_save = un->un_resvd_status;
23942 	un->un_resvd_status &=
23943 	    ~(SD_RESERVE | SD_LOST_RESERVE | SD_WANT_RESERVE);
23944 	if (un->un_resvd_timeid) {
23945 		resvd_timeid_save = un->un_resvd_timeid;
23946 		un->un_resvd_timeid = NULL;
23947 		mutex_exit(SD_MUTEX(un));
23948 		(void) untimeout(resvd_timeid_save);
23949 	} else {
23950 		mutex_exit(SD_MUTEX(un));
23951 	}
23952 
23953 	/*
23954 	 * destroy any pending timeout thread that may be attempting to
23955 	 * reinstate reservation on this device.
23956 	 */
23957 	sd_rmv_resv_reclaim_req(dev);
23958 
23959 	if ((rval = sd_reserve_release(dev, SD_RELEASE)) == 0) {
23960 		mutex_enter(SD_MUTEX(un));
23961 		if ((un->un_mhd_token) &&
23962 		    ((un->un_resvd_status & SD_FAILFAST) == 0)) {
23963 			mutex_exit(SD_MUTEX(un));
23964 			(void) sd_check_mhd(dev, 0);
23965 		} else {
23966 			mutex_exit(SD_MUTEX(un));
23967 		}
23968 		(void) scsi_reset_notify(SD_ADDRESS(un), SCSI_RESET_CANCEL,
23969 		    sd_mhd_reset_notify_cb, (caddr_t)un);
23970 	} else {
23971 		/*
23972 		 * sd_mhd_watch_cb will restart the resvd recover timeout thread
23973 		 */
23974 		mutex_enter(SD_MUTEX(un));
23975 		un->un_resvd_status = resvd_status_save;
23976 		mutex_exit(SD_MUTEX(un));
23977 	}
23978 	return (rval);
23979 }
23980 
23981 
23982 /*
23983  *    Function: sd_mhdioc_register_devid
23984  *
23985  * Description: This routine is the driver entry point for handling ioctl
23986  *		requests to register the device id (MHIOCREREGISTERDEVID).
23987  *
23988  *		Note: The implementation for this ioctl has been updated to
23989  *		be consistent with the original PSARC case (1999/357)
23990  *		(4375899, 4241671, 4220005)
23991  *
23992  *   Arguments: dev	- the device number
23993  *
23994  * Return Code: 0
23995  *		ENXIO
23996  */
23997 
23998 static int
23999 sd_mhdioc_register_devid(dev_t dev)
24000 {
24001 	struct sd_lun	*un = NULL;
24002 	int		rval = 0;
24003 	sd_ssc_t	*ssc;
24004 
24005 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24006 		return (ENXIO);
24007 	}
24008 
24009 	ASSERT(!mutex_owned(SD_MUTEX(un)));
24010 
24011 	mutex_enter(SD_MUTEX(un));
24012 
24013 	/* If a devid already exists, de-register it */
24014 	if (un->un_devid != NULL) {
24015 		ddi_devid_unregister(SD_DEVINFO(un));
24016 		/*
24017 		 * After unregister devid, needs to free devid memory
24018 		 */
24019 		ddi_devid_free(un->un_devid);
24020 		un->un_devid = NULL;
24021 	}
24022 
24023 	/* Check for reservation conflict */
24024 	mutex_exit(SD_MUTEX(un));
24025 	ssc = sd_ssc_init(un);
24026 	rval = sd_send_scsi_TEST_UNIT_READY(ssc, 0);
24027 	mutex_enter(SD_MUTEX(un));
24028 
24029 	switch (rval) {
24030 	case 0:
24031 		sd_register_devid(ssc, SD_DEVINFO(un), SD_TARGET_IS_UNRESERVED);
24032 		break;
24033 	case EACCES:
24034 		break;
24035 	default:
24036 		rval = EIO;
24037 	}
24038 
24039 	mutex_exit(SD_MUTEX(un));
24040 	if (rval != 0) {
24041 		if (rval == EIO)
24042 			sd_ssc_assessment(ssc, SD_FMT_STATUS_CHECK);
24043 		else
24044 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
24045 	}
24046 	sd_ssc_fini(ssc);
24047 	return (rval);
24048 }
24049 
24050 
24051 /*
24052  *    Function: sd_mhdioc_inkeys
24053  *
24054  * Description: This routine is the driver entry point for handling ioctl
24055  *		requests to issue the SCSI-3 Persistent In Read Keys command
24056  *		to the device (MHIOCGRP_INKEYS).
24057  *
24058  *   Arguments: dev	- the device number
24059  *		arg	- user provided in_keys structure
24060  *		flag	- this argument is a pass through to ddi_copyxxx()
24061  *			  directly from the mode argument of ioctl().
24062  *
24063  * Return Code: code returned by sd_persistent_reservation_in_read_keys()
24064  *		ENXIO
24065  *		EFAULT
24066  */
24067 
24068 static int
24069 sd_mhdioc_inkeys(dev_t dev, caddr_t arg, int flag)
24070 {
24071 	struct sd_lun		*un;
24072 	mhioc_inkeys_t		inkeys;
24073 	int			rval = 0;
24074 
24075 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24076 		return (ENXIO);
24077 	}
24078 
24079 #ifdef _MULTI_DATAMODEL
24080 	switch (ddi_model_convert_from(flag & FMODELS)) {
24081 	case DDI_MODEL_ILP32: {
24082 		struct mhioc_inkeys32	inkeys32;
24083 
24084 		if (ddi_copyin(arg, &inkeys32,
24085 		    sizeof (struct mhioc_inkeys32), flag) != 0) {
24086 			return (EFAULT);
24087 		}
24088 		inkeys.li = (mhioc_key_list_t *)(uintptr_t)inkeys32.li;
24089 		if ((rval = sd_persistent_reservation_in_read_keys(un,
24090 		    &inkeys, flag)) != 0) {
24091 			return (rval);
24092 		}
24093 		inkeys32.generation = inkeys.generation;
24094 		if (ddi_copyout(&inkeys32, arg, sizeof (struct mhioc_inkeys32),
24095 		    flag) != 0) {
24096 			return (EFAULT);
24097 		}
24098 		break;
24099 	}
24100 	case DDI_MODEL_NONE:
24101 		if (ddi_copyin(arg, &inkeys, sizeof (mhioc_inkeys_t),
24102 		    flag) != 0) {
24103 			return (EFAULT);
24104 		}
24105 		if ((rval = sd_persistent_reservation_in_read_keys(un,
24106 		    &inkeys, flag)) != 0) {
24107 			return (rval);
24108 		}
24109 		if (ddi_copyout(&inkeys, arg, sizeof (mhioc_inkeys_t),
24110 		    flag) != 0) {
24111 			return (EFAULT);
24112 		}
24113 		break;
24114 	}
24115 
24116 #else /* ! _MULTI_DATAMODEL */
24117 
24118 	if (ddi_copyin(arg, &inkeys, sizeof (mhioc_inkeys_t), flag) != 0) {
24119 		return (EFAULT);
24120 	}
24121 	rval = sd_persistent_reservation_in_read_keys(un, &inkeys, flag);
24122 	if (rval != 0) {
24123 		return (rval);
24124 	}
24125 	if (ddi_copyout(&inkeys, arg, sizeof (mhioc_inkeys_t), flag) != 0) {
24126 		return (EFAULT);
24127 	}
24128 
24129 #endif /* _MULTI_DATAMODEL */
24130 
24131 	return (rval);
24132 }
24133 
24134 
24135 /*
24136  *    Function: sd_mhdioc_inresv
24137  *
24138  * Description: This routine is the driver entry point for handling ioctl
24139  *		requests to issue the SCSI-3 Persistent In Read Reservations
24140  *		command to the device (MHIOCGRP_INKEYS).
24141  *
24142  *   Arguments: dev	- the device number
24143  *		arg	- user provided in_resv structure
24144  *		flag	- this argument is a pass through to ddi_copyxxx()
24145  *			  directly from the mode argument of ioctl().
24146  *
24147  * Return Code: code returned by sd_persistent_reservation_in_read_resv()
24148  *		ENXIO
24149  *		EFAULT
24150  */
24151 
24152 static int
24153 sd_mhdioc_inresv(dev_t dev, caddr_t arg, int flag)
24154 {
24155 	struct sd_lun		*un;
24156 	mhioc_inresvs_t		inresvs;
24157 	int			rval = 0;
24158 
24159 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24160 		return (ENXIO);
24161 	}
24162 
24163 #ifdef _MULTI_DATAMODEL
24164 
24165 	switch (ddi_model_convert_from(flag & FMODELS)) {
24166 	case DDI_MODEL_ILP32: {
24167 		struct mhioc_inresvs32	inresvs32;
24168 
24169 		if (ddi_copyin(arg, &inresvs32,
24170 		    sizeof (struct mhioc_inresvs32), flag) != 0) {
24171 			return (EFAULT);
24172 		}
24173 		inresvs.li = (mhioc_resv_desc_list_t *)(uintptr_t)inresvs32.li;
24174 		if ((rval = sd_persistent_reservation_in_read_resv(un,
24175 		    &inresvs, flag)) != 0) {
24176 			return (rval);
24177 		}
24178 		inresvs32.generation = inresvs.generation;
24179 		if (ddi_copyout(&inresvs32, arg,
24180 		    sizeof (struct mhioc_inresvs32), flag) != 0) {
24181 			return (EFAULT);
24182 		}
24183 		break;
24184 	}
24185 	case DDI_MODEL_NONE:
24186 		if (ddi_copyin(arg, &inresvs,
24187 		    sizeof (mhioc_inresvs_t), flag) != 0) {
24188 			return (EFAULT);
24189 		}
24190 		if ((rval = sd_persistent_reservation_in_read_resv(un,
24191 		    &inresvs, flag)) != 0) {
24192 			return (rval);
24193 		}
24194 		if (ddi_copyout(&inresvs, arg,
24195 		    sizeof (mhioc_inresvs_t), flag) != 0) {
24196 			return (EFAULT);
24197 		}
24198 		break;
24199 	}
24200 
24201 #else /* ! _MULTI_DATAMODEL */
24202 
24203 	if (ddi_copyin(arg, &inresvs, sizeof (mhioc_inresvs_t), flag) != 0) {
24204 		return (EFAULT);
24205 	}
24206 	rval = sd_persistent_reservation_in_read_resv(un, &inresvs, flag);
24207 	if (rval != 0) {
24208 		return (rval);
24209 	}
24210 	if (ddi_copyout(&inresvs, arg, sizeof (mhioc_inresvs_t), flag)) {
24211 		return (EFAULT);
24212 	}
24213 
24214 #endif /* ! _MULTI_DATAMODEL */
24215 
24216 	return (rval);
24217 }
24218 
24219 
24220 /*
24221  * The following routines support the clustering functionality described below
24222  * and implement lost reservation reclaim functionality.
24223  *
24224  * Clustering
24225  * ----------
24226  * The clustering code uses two different, independent forms of SCSI
24227  * reservation. Traditional SCSI-2 Reserve/Release and the newer SCSI-3
24228  * Persistent Group Reservations. For any particular disk, it will use either
24229  * SCSI-2 or SCSI-3 PGR but never both at the same time for the same disk.
24230  *
24231  * SCSI-2
24232  * The cluster software takes ownership of a multi-hosted disk by issuing the
24233  * MHIOCTKOWN ioctl to the disk driver. It releases ownership by issuing the
24234  * MHIOCRELEASE ioctl.  Closely related is the MHIOCENFAILFAST ioctl -- a
24235  * cluster, just after taking ownership of the disk with the MHIOCTKOWN ioctl
24236  * then issues the MHIOCENFAILFAST ioctl.  This ioctl "enables failfast" in the
24237  * driver. The meaning of failfast is that if the driver (on this host) ever
24238  * encounters the scsi error return code RESERVATION_CONFLICT from the device,
24239  * it should immediately panic the host. The motivation for this ioctl is that
24240  * if this host does encounter reservation conflict, the underlying cause is
24241  * that some other host of the cluster has decided that this host is no longer
24242  * in the cluster and has seized control of the disks for itself. Since this
24243  * host is no longer in the cluster, it ought to panic itself. The
24244  * MHIOCENFAILFAST ioctl does two things:
24245  *	(a) it sets a flag that will cause any returned RESERVATION_CONFLICT
24246  *      error to panic the host
24247  *      (b) it sets up a periodic timer to test whether this host still has
24248  *      "access" (in that no other host has reserved the device):  if the
24249  *      periodic timer gets RESERVATION_CONFLICT, the host is panicked. The
24250  *      purpose of that periodic timer is to handle scenarios where the host is
24251  *      otherwise temporarily quiescent, temporarily doing no real i/o.
24252  * The MHIOCTKOWN ioctl will "break" a reservation that is held by another host,
24253  * by issuing a SCSI Bus Device Reset.  It will then issue a SCSI Reserve for
24254  * the device itself.
24255  *
24256  * SCSI-3 PGR
24257  * A direct semantic implementation of the SCSI-3 Persistent Reservation
24258  * facility is supported through the shared multihost disk ioctls
24259  * (MHIOCGRP_INKEYS, MHIOCGRP_INRESV, MHIOCGRP_REGISTER, MHIOCGRP_RESERVE,
24260  * MHIOCGRP_PREEMPTANDABORT, MHIOCGRP_CLEAR)
24261  *
24262  * Reservation Reclaim:
24263  * --------------------
24264  * To support the lost reservation reclaim operations this driver creates a
24265  * single thread to handle reinstating reservations on all devices that have
24266  * lost reservations sd_resv_reclaim_requests are logged for all devices that
24267  * have LOST RESERVATIONS when the scsi watch facility callsback sd_mhd_watch_cb
24268  * and the reservation reclaim thread loops through the requests to regain the
24269  * lost reservations.
24270  */
24271 
24272 /*
24273  *    Function: sd_check_mhd()
24274  *
24275  * Description: This function sets up and submits a scsi watch request or
24276  *		terminates an existing watch request. This routine is used in
24277  *		support of reservation reclaim.
24278  *
24279  *   Arguments: dev    - the device 'dev_t' is used for context to discriminate
24280  *			 among multiple watches that share the callback function
24281  *		interval - the number of microseconds specifying the watch
24282  *			   interval for issuing TEST UNIT READY commands. If
24283  *			   set to 0 the watch should be terminated. If the
24284  *			   interval is set to 0 and if the device is required
24285  *			   to hold reservation while disabling failfast, the
24286  *			   watch is restarted with an interval of
24287  *			   reinstate_resv_delay.
24288  *
24289  * Return Code: 0	   - Successful submit/terminate of scsi watch request
24290  *		ENXIO      - Indicates an invalid device was specified
24291  *		EAGAIN     - Unable to submit the scsi watch request
24292  */
24293 
24294 static int
24295 sd_check_mhd(dev_t dev, int interval)
24296 {
24297 	struct sd_lun	*un;
24298 	opaque_t	token;
24299 
24300 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24301 		return (ENXIO);
24302 	}
24303 
24304 	/* is this a watch termination request? */
24305 	if (interval == 0) {
24306 		mutex_enter(SD_MUTEX(un));
24307 		/* if there is an existing watch task then terminate it */
24308 		if (un->un_mhd_token) {
24309 			token = un->un_mhd_token;
24310 			un->un_mhd_token = NULL;
24311 			mutex_exit(SD_MUTEX(un));
24312 			(void) scsi_watch_request_terminate(token,
24313 			    SCSI_WATCH_TERMINATE_ALL_WAIT);
24314 			mutex_enter(SD_MUTEX(un));
24315 		} else {
24316 			mutex_exit(SD_MUTEX(un));
24317 			/*
24318 			 * Note: If we return here we don't check for the
24319 			 * failfast case. This is the original legacy
24320 			 * implementation but perhaps we should be checking
24321 			 * the failfast case.
24322 			 */
24323 			return (0);
24324 		}
24325 		/*
24326 		 * If the device is required to hold reservation while
24327 		 * disabling failfast, we need to restart the scsi_watch
24328 		 * routine with an interval of reinstate_resv_delay.
24329 		 */
24330 		if (un->un_resvd_status & SD_RESERVE) {
24331 			interval = sd_reinstate_resv_delay / 1000;
24332 		} else {
24333 			/* no failfast so bail */
24334 			mutex_exit(SD_MUTEX(un));
24335 			return (0);
24336 		}
24337 		mutex_exit(SD_MUTEX(un));
24338 	}
24339 
24340 	/*
24341 	 * adjust minimum time interval to 1 second,
24342 	 * and convert from msecs to usecs
24343 	 */
24344 	if (interval > 0 && interval < 1000) {
24345 		interval = 1000;
24346 	}
24347 	interval *= 1000;
24348 
24349 	/*
24350 	 * submit the request to the scsi_watch service
24351 	 */
24352 	token = scsi_watch_request_submit(SD_SCSI_DEVP(un), interval,
24353 	    SENSE_LENGTH, sd_mhd_watch_cb, (caddr_t)dev);
24354 	if (token == NULL) {
24355 		return (EAGAIN);
24356 	}
24357 
24358 	/*
24359 	 * save token for termination later on
24360 	 */
24361 	mutex_enter(SD_MUTEX(un));
24362 	un->un_mhd_token = token;
24363 	mutex_exit(SD_MUTEX(un));
24364 	return (0);
24365 }
24366 
24367 
24368 /*
24369  *    Function: sd_mhd_watch_cb()
24370  *
24371  * Description: This function is the call back function used by the scsi watch
24372  *		facility. The scsi watch facility sends the "Test Unit Ready"
24373  *		and processes the status. If applicable (i.e. a "Unit Attention"
24374  *		status and automatic "Request Sense" not used) the scsi watch
24375  *		facility will send a "Request Sense" and retrieve the sense data
24376  *		to be passed to this callback function. In either case the
24377  *		automatic "Request Sense" or the facility submitting one, this
24378  *		callback is passed the status and sense data.
24379  *
24380  *   Arguments: arg -   the device 'dev_t' is used for context to discriminate
24381  *			among multiple watches that share this callback function
24382  *		resultp - scsi watch facility result packet containing scsi
24383  *			  packet, status byte and sense data
24384  *
24385  * Return Code: 0 - continue the watch task
24386  *		non-zero - terminate the watch task
24387  */
24388 
24389 static int
24390 sd_mhd_watch_cb(caddr_t arg, struct scsi_watch_result *resultp)
24391 {
24392 	struct sd_lun			*un;
24393 	struct scsi_status		*statusp;
24394 	uint8_t				*sensep;
24395 	struct scsi_pkt			*pkt;
24396 	uchar_t				actual_sense_length;
24397 	dev_t				dev = (dev_t)arg;
24398 
24399 	ASSERT(resultp != NULL);
24400 	statusp			= resultp->statusp;
24401 	sensep			= (uint8_t *)resultp->sensep;
24402 	pkt			= resultp->pkt;
24403 	actual_sense_length	= resultp->actual_sense_length;
24404 
24405 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24406 		return (ENXIO);
24407 	}
24408 
24409 	SD_TRACE(SD_LOG_IOCTL_MHD, un,
24410 	    "sd_mhd_watch_cb: reason '%s', status '%s'\n",
24411 	    scsi_rname(pkt->pkt_reason), sd_sname(*((unsigned char *)statusp)));
24412 
24413 	/* Begin processing of the status and/or sense data */
24414 	if (pkt->pkt_reason != CMD_CMPLT) {
24415 		/* Handle the incomplete packet */
24416 		sd_mhd_watch_incomplete(un, pkt);
24417 		return (0);
24418 	} else if (*((unsigned char *)statusp) != STATUS_GOOD) {
24419 		if (*((unsigned char *)statusp)
24420 		    == STATUS_RESERVATION_CONFLICT) {
24421 			/*
24422 			 * Handle a reservation conflict by panicking if
24423 			 * configured for failfast or by logging the conflict
24424 			 * and updating the reservation status
24425 			 */
24426 			mutex_enter(SD_MUTEX(un));
24427 			if ((un->un_resvd_status & SD_FAILFAST) &&
24428 			    (sd_failfast_enable)) {
24429 				sd_panic_for_res_conflict(un);
24430 				/*NOTREACHED*/
24431 			}
24432 			SD_INFO(SD_LOG_IOCTL_MHD, un,
24433 			    "sd_mhd_watch_cb: Reservation Conflict\n");
24434 			un->un_resvd_status |= SD_RESERVATION_CONFLICT;
24435 			mutex_exit(SD_MUTEX(un));
24436 		}
24437 	}
24438 
24439 	if (sensep != NULL) {
24440 		if (actual_sense_length >= (SENSE_LENGTH - 2)) {
24441 			mutex_enter(SD_MUTEX(un));
24442 			if ((scsi_sense_asc(sensep) ==
24443 			    SD_SCSI_RESET_SENSE_CODE) &&
24444 			    (un->un_resvd_status & SD_RESERVE)) {
24445 				/*
24446 				 * The additional sense code indicates a power
24447 				 * on or bus device reset has occurred; update
24448 				 * the reservation status.
24449 				 */
24450 				un->un_resvd_status |=
24451 				    (SD_LOST_RESERVE | SD_WANT_RESERVE);
24452 				SD_INFO(SD_LOG_IOCTL_MHD, un,
24453 				    "sd_mhd_watch_cb: Lost Reservation\n");
24454 			}
24455 		} else {
24456 			return (0);
24457 		}
24458 	} else {
24459 		mutex_enter(SD_MUTEX(un));
24460 	}
24461 
24462 	if ((un->un_resvd_status & SD_RESERVE) &&
24463 	    (un->un_resvd_status & SD_LOST_RESERVE)) {
24464 		if (un->un_resvd_status & SD_WANT_RESERVE) {
24465 			/*
24466 			 * A reset occurred in between the last probe and this
24467 			 * one so if a timeout is pending cancel it.
24468 			 */
24469 			if (un->un_resvd_timeid) {
24470 				timeout_id_t temp_id = un->un_resvd_timeid;
24471 				un->un_resvd_timeid = NULL;
24472 				mutex_exit(SD_MUTEX(un));
24473 				(void) untimeout(temp_id);
24474 				mutex_enter(SD_MUTEX(un));
24475 			}
24476 			un->un_resvd_status &= ~SD_WANT_RESERVE;
24477 		}
24478 		if (un->un_resvd_timeid == 0) {
24479 			/* Schedule a timeout to handle the lost reservation */
24480 			un->un_resvd_timeid = timeout(sd_mhd_resvd_recover,
24481 			    (void *)dev,
24482 			    drv_usectohz(sd_reinstate_resv_delay));
24483 		}
24484 	}
24485 	mutex_exit(SD_MUTEX(un));
24486 	return (0);
24487 }
24488 
24489 
24490 /*
24491  *    Function: sd_mhd_watch_incomplete()
24492  *
24493  * Description: This function is used to find out why a scsi pkt sent by the
24494  *		scsi watch facility was not completed. Under some scenarios this
24495  *		routine will return. Otherwise it will send a bus reset to see
24496  *		if the drive is still online.
24497  *
24498  *   Arguments: un  - driver soft state (unit) structure
24499  *		pkt - incomplete scsi pkt
24500  */
24501 
24502 static void
24503 sd_mhd_watch_incomplete(struct sd_lun *un, struct scsi_pkt *pkt)
24504 {
24505 	int	be_chatty;
24506 	int	perr;
24507 
24508 	ASSERT(pkt != NULL);
24509 	ASSERT(un != NULL);
24510 	be_chatty	= (!(pkt->pkt_flags & FLAG_SILENT));
24511 	perr		= (pkt->pkt_statistics & STAT_PERR);
24512 
24513 	mutex_enter(SD_MUTEX(un));
24514 	if (un->un_state == SD_STATE_DUMPING) {
24515 		mutex_exit(SD_MUTEX(un));
24516 		return;
24517 	}
24518 
24519 	switch (pkt->pkt_reason) {
24520 	case CMD_UNX_BUS_FREE:
24521 		/*
24522 		 * If we had a parity error that caused the target to drop BSY*,
24523 		 * don't be chatty about it.
24524 		 */
24525 		if (perr && be_chatty) {
24526 			be_chatty = 0;
24527 		}
24528 		break;
24529 	case CMD_TAG_REJECT:
24530 		/*
24531 		 * The SCSI-2 spec states that a tag reject will be sent by the
24532 		 * target if tagged queuing is not supported. A tag reject may
24533 		 * also be sent during certain initialization periods or to
24534 		 * control internal resources. For the latter case the target
24535 		 * may also return Queue Full.
24536 		 *
24537 		 * If this driver receives a tag reject from a target that is
24538 		 * going through an init period or controlling internal
24539 		 * resources tagged queuing will be disabled. This is a less
24540 		 * than optimal behavior but the driver is unable to determine
24541 		 * the target state and assumes tagged queueing is not supported
24542 		 */
24543 		pkt->pkt_flags = 0;
24544 		un->un_tagflags = 0;
24545 
24546 		if (un->un_f_opt_queueing == TRUE) {
24547 			un->un_throttle = min(un->un_throttle, 3);
24548 		} else {
24549 			un->un_throttle = 1;
24550 		}
24551 		mutex_exit(SD_MUTEX(un));
24552 		(void) scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing", 0, 1);
24553 		mutex_enter(SD_MUTEX(un));
24554 		break;
24555 	case CMD_INCOMPLETE:
24556 		/*
24557 		 * The transport stopped with an abnormal state, fallthrough and
24558 		 * reset the target and/or bus unless selection did not complete
24559 		 * (indicated by STATE_GOT_BUS) in which case we don't want to
24560 		 * go through a target/bus reset
24561 		 */
24562 		if (pkt->pkt_state == STATE_GOT_BUS) {
24563 			break;
24564 		}
24565 		/*FALLTHROUGH*/
24566 
24567 	case CMD_TIMEOUT:
24568 	default:
24569 		/*
24570 		 * The lun may still be running the command, so a lun reset
24571 		 * should be attempted. If the lun reset fails or cannot be
24572 		 * issued, than try a target reset. Lastly try a bus reset.
24573 		 */
24574 		if ((pkt->pkt_statistics &
24575 		    (STAT_BUS_RESET | STAT_DEV_RESET | STAT_ABORTED)) == 0) {
24576 			int reset_retval = 0;
24577 			mutex_exit(SD_MUTEX(un));
24578 			if (un->un_f_allow_bus_device_reset == TRUE) {
24579 				if (un->un_f_lun_reset_enabled == TRUE) {
24580 					reset_retval =
24581 					    scsi_reset(SD_ADDRESS(un),
24582 					    RESET_LUN);
24583 				}
24584 				if (reset_retval == 0) {
24585 					reset_retval =
24586 					    scsi_reset(SD_ADDRESS(un),
24587 					    RESET_TARGET);
24588 				}
24589 			}
24590 			if (reset_retval == 0) {
24591 				(void) scsi_reset(SD_ADDRESS(un), RESET_ALL);
24592 			}
24593 			mutex_enter(SD_MUTEX(un));
24594 		}
24595 		break;
24596 	}
24597 
24598 	/* A device/bus reset has occurred; update the reservation status. */
24599 	if ((pkt->pkt_reason == CMD_RESET) || (pkt->pkt_statistics &
24600 	    (STAT_BUS_RESET | STAT_DEV_RESET))) {
24601 		if ((un->un_resvd_status & SD_RESERVE) == SD_RESERVE) {
24602 			un->un_resvd_status |=
24603 			    (SD_LOST_RESERVE | SD_WANT_RESERVE);
24604 			SD_INFO(SD_LOG_IOCTL_MHD, un,
24605 			    "sd_mhd_watch_incomplete: Lost Reservation\n");
24606 		}
24607 	}
24608 
24609 	/*
24610 	 * The disk has been turned off; Update the device state.
24611 	 *
24612 	 * Note: Should we be offlining the disk here?
24613 	 */
24614 	if (pkt->pkt_state == STATE_GOT_BUS) {
24615 		SD_INFO(SD_LOG_IOCTL_MHD, un, "sd_mhd_watch_incomplete: "
24616 		    "Disk not responding to selection\n");
24617 		if (un->un_state != SD_STATE_OFFLINE) {
24618 			New_state(un, SD_STATE_OFFLINE);
24619 		}
24620 	} else if (be_chatty) {
24621 		/*
24622 		 * suppress messages if they are all the same pkt reason;
24623 		 * with TQ, many (up to 256) are returned with the same
24624 		 * pkt_reason
24625 		 */
24626 		if (pkt->pkt_reason != un->un_last_pkt_reason) {
24627 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
24628 			    "sd_mhd_watch_incomplete: "
24629 			    "SCSI transport failed: reason '%s'\n",
24630 			    scsi_rname(pkt->pkt_reason));
24631 		}
24632 	}
24633 	un->un_last_pkt_reason = pkt->pkt_reason;
24634 	mutex_exit(SD_MUTEX(un));
24635 }
24636 
24637 
24638 /*
24639  *    Function: sd_sname()
24640  *
24641  * Description: This is a simple little routine to return a string containing
24642  *		a printable description of command status byte for use in
24643  *		logging.
24644  *
24645  *   Arguments: status - pointer to a status byte
24646  *
24647  * Return Code: char * - string containing status description.
24648  */
24649 
24650 static char *
24651 sd_sname(uchar_t status)
24652 {
24653 	switch (status & STATUS_MASK) {
24654 	case STATUS_GOOD:
24655 		return ("good status");
24656 	case STATUS_CHECK:
24657 		return ("check condition");
24658 	case STATUS_MET:
24659 		return ("condition met");
24660 	case STATUS_BUSY:
24661 		return ("busy");
24662 	case STATUS_INTERMEDIATE:
24663 		return ("intermediate");
24664 	case STATUS_INTERMEDIATE_MET:
24665 		return ("intermediate - condition met");
24666 	case STATUS_RESERVATION_CONFLICT:
24667 		return ("reservation_conflict");
24668 	case STATUS_TERMINATED:
24669 		return ("command terminated");
24670 	case STATUS_QFULL:
24671 		return ("queue full");
24672 	default:
24673 		return ("<unknown status>");
24674 	}
24675 }
24676 
24677 
24678 /*
24679  *    Function: sd_mhd_resvd_recover()
24680  *
24681  * Description: This function adds a reservation entry to the
24682  *		sd_resv_reclaim_request list and signals the reservation
24683  *		reclaim thread that there is work pending. If the reservation
24684  *		reclaim thread has not been previously created this function
24685  *		will kick it off.
24686  *
24687  *   Arguments: arg -   the device 'dev_t' is used for context to discriminate
24688  *			among multiple watches that share this callback function
24689  *
24690  *     Context: This routine is called by timeout() and is run in interrupt
24691  *		context. It must not sleep or call other functions which may
24692  *		sleep.
24693  */
24694 
24695 static void
24696 sd_mhd_resvd_recover(void *arg)
24697 {
24698 	dev_t			dev = (dev_t)arg;
24699 	struct sd_lun		*un;
24700 	struct sd_thr_request	*sd_treq = NULL;
24701 	struct sd_thr_request	*sd_cur = NULL;
24702 	struct sd_thr_request	*sd_prev = NULL;
24703 	int			already_there = 0;
24704 
24705 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24706 		return;
24707 	}
24708 
24709 	mutex_enter(SD_MUTEX(un));
24710 	un->un_resvd_timeid = NULL;
24711 	if (un->un_resvd_status & SD_WANT_RESERVE) {
24712 		/*
24713 		 * There was a reset so don't issue the reserve, allow the
24714 		 * sd_mhd_watch_cb callback function to notice this and
24715 		 * reschedule the timeout for reservation.
24716 		 */
24717 		mutex_exit(SD_MUTEX(un));
24718 		return;
24719 	}
24720 	mutex_exit(SD_MUTEX(un));
24721 
24722 	/*
24723 	 * Add this device to the sd_resv_reclaim_request list and the
24724 	 * sd_resv_reclaim_thread should take care of the rest.
24725 	 *
24726 	 * Note: We can't sleep in this context so if the memory allocation
24727 	 * fails allow the sd_mhd_watch_cb callback function to notice this and
24728 	 * reschedule the timeout for reservation.  (4378460)
24729 	 */
24730 	sd_treq = (struct sd_thr_request *)
24731 	    kmem_zalloc(sizeof (struct sd_thr_request), KM_NOSLEEP);
24732 	if (sd_treq == NULL) {
24733 		return;
24734 	}
24735 
24736 	sd_treq->sd_thr_req_next = NULL;
24737 	sd_treq->dev = dev;
24738 	mutex_enter(&sd_tr.srq_resv_reclaim_mutex);
24739 	if (sd_tr.srq_thr_req_head == NULL) {
24740 		sd_tr.srq_thr_req_head = sd_treq;
24741 	} else {
24742 		sd_cur = sd_prev = sd_tr.srq_thr_req_head;
24743 		for (; sd_cur != NULL; sd_cur = sd_cur->sd_thr_req_next) {
24744 			if (sd_cur->dev == dev) {
24745 				/*
24746 				 * already in Queue so don't log
24747 				 * another request for the device
24748 				 */
24749 				already_there = 1;
24750 				break;
24751 			}
24752 			sd_prev = sd_cur;
24753 		}
24754 		if (!already_there) {
24755 			SD_INFO(SD_LOG_IOCTL_MHD, un, "sd_mhd_resvd_recover: "
24756 			    "logging request for %lx\n", dev);
24757 			sd_prev->sd_thr_req_next = sd_treq;
24758 		} else {
24759 			kmem_free(sd_treq, sizeof (struct sd_thr_request));
24760 		}
24761 	}
24762 
24763 	/*
24764 	 * Create a kernel thread to do the reservation reclaim and free up this
24765 	 * thread. We cannot block this thread while we go away to do the
24766 	 * reservation reclaim
24767 	 */
24768 	if (sd_tr.srq_resv_reclaim_thread == NULL)
24769 		sd_tr.srq_resv_reclaim_thread = thread_create(NULL, 0,
24770 		    sd_resv_reclaim_thread, NULL,
24771 		    0, &p0, TS_RUN, v.v_maxsyspri - 2);
24772 
24773 	/* Tell the reservation reclaim thread that it has work to do */
24774 	cv_signal(&sd_tr.srq_resv_reclaim_cv);
24775 	mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
24776 }
24777 
24778 /*
24779  *    Function: sd_resv_reclaim_thread()
24780  *
24781  * Description: This function implements the reservation reclaim operations
24782  *
24783  *   Arguments: arg - the device 'dev_t' is used for context to discriminate
24784  *		      among multiple watches that share this callback function
24785  */
24786 
24787 static void
24788 sd_resv_reclaim_thread()
24789 {
24790 	struct sd_lun		*un;
24791 	struct sd_thr_request	*sd_mhreq;
24792 
24793 	/* Wait for work */
24794 	mutex_enter(&sd_tr.srq_resv_reclaim_mutex);
24795 	if (sd_tr.srq_thr_req_head == NULL) {
24796 		cv_wait(&sd_tr.srq_resv_reclaim_cv,
24797 		    &sd_tr.srq_resv_reclaim_mutex);
24798 	}
24799 
24800 	/* Loop while we have work */
24801 	while ((sd_tr.srq_thr_cur_req = sd_tr.srq_thr_req_head) != NULL) {
24802 		un = ddi_get_soft_state(sd_state,
24803 		    SDUNIT(sd_tr.srq_thr_cur_req->dev));
24804 		if (un == NULL) {
24805 			/*
24806 			 * softstate structure is NULL so just
24807 			 * dequeue the request and continue
24808 			 */
24809 			sd_tr.srq_thr_req_head =
24810 			    sd_tr.srq_thr_cur_req->sd_thr_req_next;
24811 			kmem_free(sd_tr.srq_thr_cur_req,
24812 			    sizeof (struct sd_thr_request));
24813 			continue;
24814 		}
24815 
24816 		/* dequeue the request */
24817 		sd_mhreq = sd_tr.srq_thr_cur_req;
24818 		sd_tr.srq_thr_req_head =
24819 		    sd_tr.srq_thr_cur_req->sd_thr_req_next;
24820 		mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
24821 
24822 		/*
24823 		 * Reclaim reservation only if SD_RESERVE is still set. There
24824 		 * may have been a call to MHIOCRELEASE before we got here.
24825 		 */
24826 		mutex_enter(SD_MUTEX(un));
24827 		if ((un->un_resvd_status & SD_RESERVE) == SD_RESERVE) {
24828 			/*
24829 			 * Note: The SD_LOST_RESERVE flag is cleared before
24830 			 * reclaiming the reservation. If this is done after the
24831 			 * call to sd_reserve_release a reservation loss in the
24832 			 * window between pkt completion of reserve cmd and
24833 			 * mutex_enter below may not be recognized
24834 			 */
24835 			un->un_resvd_status &= ~SD_LOST_RESERVE;
24836 			mutex_exit(SD_MUTEX(un));
24837 
24838 			if (sd_reserve_release(sd_mhreq->dev,
24839 			    SD_RESERVE) == 0) {
24840 				mutex_enter(SD_MUTEX(un));
24841 				un->un_resvd_status |= SD_RESERVE;
24842 				mutex_exit(SD_MUTEX(un));
24843 				SD_INFO(SD_LOG_IOCTL_MHD, un,
24844 				    "sd_resv_reclaim_thread: "
24845 				    "Reservation Recovered\n");
24846 			} else {
24847 				mutex_enter(SD_MUTEX(un));
24848 				un->un_resvd_status |= SD_LOST_RESERVE;
24849 				mutex_exit(SD_MUTEX(un));
24850 				SD_INFO(SD_LOG_IOCTL_MHD, un,
24851 				    "sd_resv_reclaim_thread: Failed "
24852 				    "Reservation Recovery\n");
24853 			}
24854 		} else {
24855 			mutex_exit(SD_MUTEX(un));
24856 		}
24857 		mutex_enter(&sd_tr.srq_resv_reclaim_mutex);
24858 		ASSERT(sd_mhreq == sd_tr.srq_thr_cur_req);
24859 		kmem_free(sd_mhreq, sizeof (struct sd_thr_request));
24860 		sd_mhreq = sd_tr.srq_thr_cur_req = NULL;
24861 		/*
24862 		 * wakeup the destroy thread if anyone is waiting on
24863 		 * us to complete.
24864 		 */
24865 		cv_signal(&sd_tr.srq_inprocess_cv);
24866 		SD_TRACE(SD_LOG_IOCTL_MHD, un,
24867 		    "sd_resv_reclaim_thread: cv_signalling current request \n");
24868 	}
24869 
24870 	/*
24871 	 * cleanup the sd_tr structure now that this thread will not exist
24872 	 */
24873 	ASSERT(sd_tr.srq_thr_req_head == NULL);
24874 	ASSERT(sd_tr.srq_thr_cur_req == NULL);
24875 	sd_tr.srq_resv_reclaim_thread = NULL;
24876 	mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
24877 	thread_exit();
24878 }
24879 
24880 
24881 /*
24882  *    Function: sd_rmv_resv_reclaim_req()
24883  *
24884  * Description: This function removes any pending reservation reclaim requests
24885  *		for the specified device.
24886  *
24887  *   Arguments: dev - the device 'dev_t'
24888  */
24889 
24890 static void
24891 sd_rmv_resv_reclaim_req(dev_t dev)
24892 {
24893 	struct sd_thr_request *sd_mhreq;
24894 	struct sd_thr_request *sd_prev;
24895 
24896 	/* Remove a reservation reclaim request from the list */
24897 	mutex_enter(&sd_tr.srq_resv_reclaim_mutex);
24898 	if (sd_tr.srq_thr_cur_req && sd_tr.srq_thr_cur_req->dev == dev) {
24899 		/*
24900 		 * We are attempting to reinstate reservation for
24901 		 * this device. We wait for sd_reserve_release()
24902 		 * to return before we return.
24903 		 */
24904 		cv_wait(&sd_tr.srq_inprocess_cv,
24905 		    &sd_tr.srq_resv_reclaim_mutex);
24906 	} else {
24907 		sd_prev = sd_mhreq = sd_tr.srq_thr_req_head;
24908 		if (sd_mhreq && sd_mhreq->dev == dev) {
24909 			sd_tr.srq_thr_req_head = sd_mhreq->sd_thr_req_next;
24910 			kmem_free(sd_mhreq, sizeof (struct sd_thr_request));
24911 			mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
24912 			return;
24913 		}
24914 		for (; sd_mhreq != NULL; sd_mhreq = sd_mhreq->sd_thr_req_next) {
24915 			if (sd_mhreq && sd_mhreq->dev == dev) {
24916 				break;
24917 			}
24918 			sd_prev = sd_mhreq;
24919 		}
24920 		if (sd_mhreq != NULL) {
24921 			sd_prev->sd_thr_req_next = sd_mhreq->sd_thr_req_next;
24922 			kmem_free(sd_mhreq, sizeof (struct sd_thr_request));
24923 		}
24924 	}
24925 	mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
24926 }
24927 
24928 
24929 /*
24930  *    Function: sd_mhd_reset_notify_cb()
24931  *
24932  * Description: This is a call back function for scsi_reset_notify. This
24933  *		function updates the softstate reserved status and logs the
24934  *		reset. The driver scsi watch facility callback function
24935  *		(sd_mhd_watch_cb) and reservation reclaim thread functionality
24936  *		will reclaim the reservation.
24937  *
24938  *   Arguments: arg  - driver soft state (unit) structure
24939  */
24940 
24941 static void
24942 sd_mhd_reset_notify_cb(caddr_t arg)
24943 {
24944 	struct sd_lun *un = (struct sd_lun *)arg;
24945 
24946 	mutex_enter(SD_MUTEX(un));
24947 	if ((un->un_resvd_status & SD_RESERVE) == SD_RESERVE) {
24948 		un->un_resvd_status |= (SD_LOST_RESERVE | SD_WANT_RESERVE);
24949 		SD_INFO(SD_LOG_IOCTL_MHD, un,
24950 		    "sd_mhd_reset_notify_cb: Lost Reservation\n");
24951 	}
24952 	mutex_exit(SD_MUTEX(un));
24953 }
24954 
24955 
24956 /*
24957  *    Function: sd_take_ownership()
24958  *
24959  * Description: This routine implements an algorithm to achieve a stable
24960  *		reservation on disks which don't implement priority reserve,
24961  *		and makes sure that other host lose re-reservation attempts.
24962  *		This algorithm contains of a loop that keeps issuing the RESERVE
24963  *		for some period of time (min_ownership_delay, default 6 seconds)
24964  *		During that loop, it looks to see if there has been a bus device
24965  *		reset or bus reset (both of which cause an existing reservation
24966  *		to be lost). If the reservation is lost issue RESERVE until a
24967  *		period of min_ownership_delay with no resets has gone by, or
24968  *		until max_ownership_delay has expired. This loop ensures that
24969  *		the host really did manage to reserve the device, in spite of
24970  *		resets. The looping for min_ownership_delay (default six
24971  *		seconds) is important to early generation clustering products,
24972  *		Solstice HA 1.x and Sun Cluster 2.x. Those products use an
24973  *		MHIOCENFAILFAST periodic timer of two seconds. By having
24974  *		MHIOCTKOWN issue Reserves in a loop for six seconds, and having
24975  *		MHIOCENFAILFAST poll every two seconds, the idea is that by the
24976  *		time the MHIOCTKOWN ioctl returns, the other host (if any) will
24977  *		have already noticed, via the MHIOCENFAILFAST polling, that it
24978  *		no longer "owns" the disk and will have panicked itself.  Thus,
24979  *		the host issuing the MHIOCTKOWN is assured (with timing
24980  *		dependencies) that by the time it actually starts to use the
24981  *		disk for real work, the old owner is no longer accessing it.
24982  *
24983  *		min_ownership_delay is the minimum amount of time for which the
24984  *		disk must be reserved continuously devoid of resets before the
24985  *		MHIOCTKOWN ioctl will return success.
24986  *
24987  *		max_ownership_delay indicates the amount of time by which the
24988  *		take ownership should succeed or timeout with an error.
24989  *
24990  *   Arguments: dev - the device 'dev_t'
24991  *		*p  - struct containing timing info.
24992  *
24993  * Return Code: 0 for success or error code
24994  */
24995 
24996 static int
24997 sd_take_ownership(dev_t dev, struct mhioctkown *p)
24998 {
24999 	struct sd_lun	*un;
25000 	int		rval;
25001 	int		err;
25002 	int		reservation_count   = 0;
25003 	int		min_ownership_delay =  6000000; /* in usec */
25004 	int		max_ownership_delay = 30000000; /* in usec */
25005 	clock_t		start_time;	/* starting time of this algorithm */
25006 	clock_t		end_time;	/* time limit for giving up */
25007 	clock_t		ownership_time;	/* time limit for stable ownership */
25008 	clock_t		current_time;
25009 	clock_t		previous_current_time;
25010 
25011 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
25012 		return (ENXIO);
25013 	}
25014 
25015 	/*
25016 	 * Attempt a device reservation. A priority reservation is requested.
25017 	 */
25018 	if ((rval = sd_reserve_release(dev, SD_PRIORITY_RESERVE))
25019 	    != SD_SUCCESS) {
25020 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
25021 		    "sd_take_ownership: return(1)=%d\n", rval);
25022 		return (rval);
25023 	}
25024 
25025 	/* Update the softstate reserved status to indicate the reservation */
25026 	mutex_enter(SD_MUTEX(un));
25027 	un->un_resvd_status |= SD_RESERVE;
25028 	un->un_resvd_status &=
25029 	    ~(SD_LOST_RESERVE | SD_WANT_RESERVE | SD_RESERVATION_CONFLICT);
25030 	mutex_exit(SD_MUTEX(un));
25031 
25032 	if (p != NULL) {
25033 		if (p->min_ownership_delay != 0) {
25034 			min_ownership_delay = p->min_ownership_delay * 1000;
25035 		}
25036 		if (p->max_ownership_delay != 0) {
25037 			max_ownership_delay = p->max_ownership_delay * 1000;
25038 		}
25039 	}
25040 	SD_INFO(SD_LOG_IOCTL_MHD, un,
25041 	    "sd_take_ownership: min, max delays: %d, %d\n",
25042 	    min_ownership_delay, max_ownership_delay);
25043 
25044 	start_time = ddi_get_lbolt();
25045 	current_time	= start_time;
25046 	ownership_time	= current_time + drv_usectohz(min_ownership_delay);
25047 	end_time	= start_time + drv_usectohz(max_ownership_delay);
25048 
25049 	while (current_time - end_time < 0) {
25050 		delay(drv_usectohz(500000));
25051 
25052 		if ((err = sd_reserve_release(dev, SD_RESERVE)) != 0) {
25053 			if ((sd_reserve_release(dev, SD_RESERVE)) != 0) {
25054 				mutex_enter(SD_MUTEX(un));
25055 				rval = (un->un_resvd_status &
25056 				    SD_RESERVATION_CONFLICT) ? EACCES : EIO;
25057 				mutex_exit(SD_MUTEX(un));
25058 				break;
25059 			}
25060 		}
25061 		previous_current_time = current_time;
25062 		current_time = ddi_get_lbolt();
25063 		mutex_enter(SD_MUTEX(un));
25064 		if (err || (un->un_resvd_status & SD_LOST_RESERVE)) {
25065 			ownership_time = ddi_get_lbolt() +
25066 			    drv_usectohz(min_ownership_delay);
25067 			reservation_count = 0;
25068 		} else {
25069 			reservation_count++;
25070 		}
25071 		un->un_resvd_status |= SD_RESERVE;
25072 		un->un_resvd_status &= ~(SD_LOST_RESERVE | SD_WANT_RESERVE);
25073 		mutex_exit(SD_MUTEX(un));
25074 
25075 		SD_INFO(SD_LOG_IOCTL_MHD, un,
25076 		    "sd_take_ownership: ticks for loop iteration=%ld, "
25077 		    "reservation=%s\n", (current_time - previous_current_time),
25078 		    reservation_count ? "ok" : "reclaimed");
25079 
25080 		if (current_time - ownership_time >= 0 &&
25081 		    reservation_count >= 4) {
25082 			rval = 0; /* Achieved a stable ownership */
25083 			break;
25084 		}
25085 		if (current_time - end_time >= 0) {
25086 			rval = EACCES; /* No ownership in max possible time */
25087 			break;
25088 		}
25089 	}
25090 	SD_TRACE(SD_LOG_IOCTL_MHD, un,
25091 	    "sd_take_ownership: return(2)=%d\n", rval);
25092 	return (rval);
25093 }
25094 
25095 
25096 /*
25097  *    Function: sd_reserve_release()
25098  *
25099  * Description: This function builds and sends scsi RESERVE, RELEASE, and
25100  *		PRIORITY RESERVE commands based on a user specified command type
25101  *
25102  *   Arguments: dev - the device 'dev_t'
25103  *		cmd - user specified command type; one of SD_PRIORITY_RESERVE,
25104  *		      SD_RESERVE, SD_RELEASE
25105  *
25106  * Return Code: 0 or Error Code
25107  */
25108 
25109 static int
25110 sd_reserve_release(dev_t dev, int cmd)
25111 {
25112 	struct uscsi_cmd	*com = NULL;
25113 	struct sd_lun		*un = NULL;
25114 	char			cdb[CDB_GROUP0];
25115 	int			rval;
25116 
25117 	ASSERT((cmd == SD_RELEASE) || (cmd == SD_RESERVE) ||
25118 	    (cmd == SD_PRIORITY_RESERVE));
25119 
25120 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
25121 		return (ENXIO);
25122 	}
25123 
25124 	/* instantiate and initialize the command and cdb */
25125 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
25126 	bzero(cdb, CDB_GROUP0);
25127 	com->uscsi_flags   = USCSI_SILENT;
25128 	com->uscsi_timeout = un->un_reserve_release_time;
25129 	com->uscsi_cdblen  = CDB_GROUP0;
25130 	com->uscsi_cdb	   = cdb;
25131 	if (cmd == SD_RELEASE) {
25132 		cdb[0] = SCMD_RELEASE;
25133 	} else {
25134 		cdb[0] = SCMD_RESERVE;
25135 	}
25136 
25137 	/* Send the command. */
25138 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
25139 	    SD_PATH_STANDARD);
25140 
25141 	/*
25142 	 * "break" a reservation that is held by another host, by issuing a
25143 	 * reset if priority reserve is desired, and we could not get the
25144 	 * device.
25145 	 */
25146 	if ((cmd == SD_PRIORITY_RESERVE) &&
25147 	    (rval != 0) && (com->uscsi_status == STATUS_RESERVATION_CONFLICT)) {
25148 		/*
25149 		 * First try to reset the LUN. If we cannot, then try a target
25150 		 * reset, followed by a bus reset if the target reset fails.
25151 		 */
25152 		int reset_retval = 0;
25153 		if (un->un_f_lun_reset_enabled == TRUE) {
25154 			reset_retval = scsi_reset(SD_ADDRESS(un), RESET_LUN);
25155 		}
25156 		if (reset_retval == 0) {
25157 			/* The LUN reset either failed or was not issued */
25158 			reset_retval = scsi_reset(SD_ADDRESS(un), RESET_TARGET);
25159 		}
25160 		if ((reset_retval == 0) &&
25161 		    (scsi_reset(SD_ADDRESS(un), RESET_ALL) == 0)) {
25162 			rval = EIO;
25163 			kmem_free(com, sizeof (*com));
25164 			return (rval);
25165 		}
25166 
25167 		bzero(com, sizeof (struct uscsi_cmd));
25168 		com->uscsi_flags   = USCSI_SILENT;
25169 		com->uscsi_cdb	   = cdb;
25170 		com->uscsi_cdblen  = CDB_GROUP0;
25171 		com->uscsi_timeout = 5;
25172 
25173 		/*
25174 		 * Reissue the last reserve command, this time without request
25175 		 * sense.  Assume that it is just a regular reserve command.
25176 		 */
25177 		rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
25178 		    SD_PATH_STANDARD);
25179 	}
25180 
25181 	/* Return an error if still getting a reservation conflict. */
25182 	if ((rval != 0) && (com->uscsi_status == STATUS_RESERVATION_CONFLICT)) {
25183 		rval = EACCES;
25184 	}
25185 
25186 	kmem_free(com, sizeof (*com));
25187 	return (rval);
25188 }
25189 
25190 
25191 #define	SD_NDUMP_RETRIES	12
25192 /*
25193  *	System Crash Dump routine
25194  */
25195 
25196 static int
25197 sddump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk)
25198 {
25199 	int		instance;
25200 	int		partition;
25201 	int		i;
25202 	int		err;
25203 	struct sd_lun	*un;
25204 	struct scsi_pkt *wr_pktp;
25205 	struct buf	*wr_bp;
25206 	struct buf	wr_buf;
25207 	daddr_t		tgt_byte_offset; /* rmw - byte offset for target */
25208 	daddr_t		tgt_blkno;	/* rmw - blkno for target */
25209 	size_t		tgt_byte_count; /* rmw -  # of bytes to xfer */
25210 	size_t		tgt_nblk; /* rmw -  # of tgt blks to xfer */
25211 	size_t		io_start_offset;
25212 	int		doing_rmw = FALSE;
25213 	int		rval;
25214 	ssize_t		dma_resid;
25215 	daddr_t		oblkno;
25216 	diskaddr_t	nblks = 0;
25217 	diskaddr_t	start_block;
25218 
25219 	instance = SDUNIT(dev);
25220 	if (((un = ddi_get_soft_state(sd_state, instance)) == NULL) ||
25221 	    !SD_IS_VALID_LABEL(un) || ISCD(un)) {
25222 		return (ENXIO);
25223 	}
25224 
25225 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*un))
25226 
25227 	SD_TRACE(SD_LOG_DUMP, un, "sddump: entry\n");
25228 
25229 	partition = SDPART(dev);
25230 	SD_INFO(SD_LOG_DUMP, un, "sddump: partition = %d\n", partition);
25231 
25232 	if (!(NOT_DEVBSIZE(un))) {
25233 		int secmask = 0;
25234 		int blknomask = 0;
25235 
25236 		blknomask = (un->un_tgt_blocksize / DEV_BSIZE) - 1;
25237 		secmask = un->un_tgt_blocksize - 1;
25238 
25239 		if (blkno & blknomask) {
25240 			SD_TRACE(SD_LOG_DUMP, un,
25241 			    "sddump: dump start block not modulo %d\n",
25242 			    un->un_tgt_blocksize);
25243 			return (EINVAL);
25244 		}
25245 
25246 		if ((nblk * DEV_BSIZE) & secmask) {
25247 			SD_TRACE(SD_LOG_DUMP, un,
25248 			    "sddump: dump length not modulo %d\n",
25249 			    un->un_tgt_blocksize);
25250 			return (EINVAL);
25251 		}
25252 
25253 	}
25254 
25255 	/* Validate blocks to dump at against partition size. */
25256 
25257 	(void) cmlb_partinfo(un->un_cmlbhandle, partition,
25258 	    &nblks, &start_block, NULL, NULL, (void *)SD_PATH_DIRECT);
25259 
25260 	if (NOT_DEVBSIZE(un)) {
25261 		if ((blkno + nblk) > nblks) {
25262 			SD_TRACE(SD_LOG_DUMP, un,
25263 			    "sddump: dump range larger than partition: "
25264 			    "blkno = 0x%x, nblk = 0x%x, dkl_nblk = 0x%x\n",
25265 			    blkno, nblk, nblks);
25266 			return (EINVAL);
25267 		}
25268 	} else {
25269 		if (((blkno / (un->un_tgt_blocksize / DEV_BSIZE)) +
25270 		    (nblk / (un->un_tgt_blocksize / DEV_BSIZE))) > nblks) {
25271 			SD_TRACE(SD_LOG_DUMP, un,
25272 			    "sddump: dump range larger than partition: "
25273 			    "blkno = 0x%x, nblk = 0x%x, dkl_nblk = 0x%x\n",
25274 			    blkno, nblk, nblks);
25275 			return (EINVAL);
25276 		}
25277 	}
25278 
25279 	mutex_enter(&un->un_pm_mutex);
25280 	if (SD_DEVICE_IS_IN_LOW_POWER(un)) {
25281 		struct scsi_pkt *start_pktp;
25282 
25283 		mutex_exit(&un->un_pm_mutex);
25284 
25285 		/*
25286 		 * use pm framework to power on HBA 1st
25287 		 */
25288 		(void) pm_raise_power(SD_DEVINFO(un), 0,
25289 		    SD_PM_STATE_ACTIVE(un));
25290 
25291 		/*
25292 		 * Dump no long uses sdpower to power on a device, it's
25293 		 * in-line here so it can be done in polled mode.
25294 		 */
25295 
25296 		SD_INFO(SD_LOG_DUMP, un, "sddump: starting device\n");
25297 
25298 		start_pktp = scsi_init_pkt(SD_ADDRESS(un), NULL, NULL,
25299 		    CDB_GROUP0, un->un_status_len, 0, 0, NULL_FUNC, NULL);
25300 
25301 		if (start_pktp == NULL) {
25302 			/* We were not given a SCSI packet, fail. */
25303 			return (EIO);
25304 		}
25305 		bzero(start_pktp->pkt_cdbp, CDB_GROUP0);
25306 		start_pktp->pkt_cdbp[0] = SCMD_START_STOP;
25307 		start_pktp->pkt_cdbp[4] = SD_TARGET_START;
25308 		start_pktp->pkt_flags = FLAG_NOINTR;
25309 
25310 		mutex_enter(SD_MUTEX(un));
25311 		SD_FILL_SCSI1_LUN(un, start_pktp);
25312 		mutex_exit(SD_MUTEX(un));
25313 		/*
25314 		 * Scsi_poll returns 0 (success) if the command completes and
25315 		 * the status block is STATUS_GOOD.
25316 		 */
25317 		if (sd_scsi_poll(un, start_pktp) != 0) {
25318 			scsi_destroy_pkt(start_pktp);
25319 			return (EIO);
25320 		}
25321 		scsi_destroy_pkt(start_pktp);
25322 		(void) sd_pm_state_change(un, SD_PM_STATE_ACTIVE(un),
25323 		    SD_PM_STATE_CHANGE);
25324 	} else {
25325 		mutex_exit(&un->un_pm_mutex);
25326 	}
25327 
25328 	mutex_enter(SD_MUTEX(un));
25329 	un->un_throttle = 0;
25330 
25331 	/*
25332 	 * The first time through, reset the specific target device.
25333 	 * However, when cpr calls sddump we know that sd is in a
25334 	 * a good state so no bus reset is required.
25335 	 * Clear sense data via Request Sense cmd.
25336 	 * In sddump we don't care about allow_bus_device_reset anymore
25337 	 */
25338 
25339 	if ((un->un_state != SD_STATE_SUSPENDED) &&
25340 	    (un->un_state != SD_STATE_DUMPING)) {
25341 
25342 		New_state(un, SD_STATE_DUMPING);
25343 
25344 		if (un->un_f_is_fibre == FALSE) {
25345 			mutex_exit(SD_MUTEX(un));
25346 			/*
25347 			 * Attempt a bus reset for parallel scsi.
25348 			 *
25349 			 * Note: A bus reset is required because on some host
25350 			 * systems (i.e. E420R) a bus device reset is
25351 			 * insufficient to reset the state of the target.
25352 			 *
25353 			 * Note: Don't issue the reset for fibre-channel,
25354 			 * because this tends to hang the bus (loop) for
25355 			 * too long while everyone is logging out and in
25356 			 * and the deadman timer for dumping will fire
25357 			 * before the dump is complete.
25358 			 */
25359 			if (scsi_reset(SD_ADDRESS(un), RESET_ALL) == 0) {
25360 				mutex_enter(SD_MUTEX(un));
25361 				Restore_state(un);
25362 				mutex_exit(SD_MUTEX(un));
25363 				return (EIO);
25364 			}
25365 
25366 			/* Delay to give the device some recovery time. */
25367 			drv_usecwait(10000);
25368 
25369 			if (sd_send_polled_RQS(un) == SD_FAILURE) {
25370 				SD_INFO(SD_LOG_DUMP, un,
25371 				    "sddump: sd_send_polled_RQS failed\n");
25372 			}
25373 			mutex_enter(SD_MUTEX(un));
25374 		}
25375 	}
25376 
25377 	/*
25378 	 * Convert the partition-relative block number to a
25379 	 * disk physical block number.
25380 	 */
25381 	if (NOT_DEVBSIZE(un)) {
25382 		blkno += start_block;
25383 	} else {
25384 		blkno = blkno / (un->un_tgt_blocksize / DEV_BSIZE);
25385 		blkno += start_block;
25386 	}
25387 
25388 	SD_INFO(SD_LOG_DUMP, un, "sddump: disk blkno = 0x%x\n", blkno);
25389 
25390 
25391 	/*
25392 	 * Check if the device has a non-512 block size.
25393 	 */
25394 	wr_bp = NULL;
25395 	if (NOT_DEVBSIZE(un)) {
25396 		tgt_byte_offset = blkno * un->un_sys_blocksize;
25397 		tgt_byte_count = nblk * un->un_sys_blocksize;
25398 		if ((tgt_byte_offset % un->un_tgt_blocksize) ||
25399 		    (tgt_byte_count % un->un_tgt_blocksize)) {
25400 			doing_rmw = TRUE;
25401 			/*
25402 			 * Calculate the block number and number of block
25403 			 * in terms of the media block size.
25404 			 */
25405 			tgt_blkno = tgt_byte_offset / un->un_tgt_blocksize;
25406 			tgt_nblk =
25407 			    ((tgt_byte_offset + tgt_byte_count +
25408 			    (un->un_tgt_blocksize - 1)) /
25409 			    un->un_tgt_blocksize) - tgt_blkno;
25410 
25411 			/*
25412 			 * Invoke the routine which is going to do read part
25413 			 * of read-modify-write.
25414 			 * Note that this routine returns a pointer to
25415 			 * a valid bp in wr_bp.
25416 			 */
25417 			err = sddump_do_read_of_rmw(un, tgt_blkno, tgt_nblk,
25418 			    &wr_bp);
25419 			if (err) {
25420 				mutex_exit(SD_MUTEX(un));
25421 				return (err);
25422 			}
25423 			/*
25424 			 * Offset is being calculated as -
25425 			 * (original block # * system block size) -
25426 			 * (new block # * target block size)
25427 			 */
25428 			io_start_offset =
25429 			    ((uint64_t)(blkno * un->un_sys_blocksize)) -
25430 			    ((uint64_t)(tgt_blkno * un->un_tgt_blocksize));
25431 
25432 			ASSERT(io_start_offset < un->un_tgt_blocksize);
25433 			/*
25434 			 * Do the modify portion of read modify write.
25435 			 */
25436 			bcopy(addr, &wr_bp->b_un.b_addr[io_start_offset],
25437 			    (size_t)nblk * un->un_sys_blocksize);
25438 		} else {
25439 			doing_rmw = FALSE;
25440 			tgt_blkno = tgt_byte_offset / un->un_tgt_blocksize;
25441 			tgt_nblk = tgt_byte_count / un->un_tgt_blocksize;
25442 		}
25443 
25444 		/* Convert blkno and nblk to target blocks */
25445 		blkno = tgt_blkno;
25446 		nblk = tgt_nblk;
25447 	} else {
25448 		wr_bp = &wr_buf;
25449 		bzero(wr_bp, sizeof (struct buf));
25450 		wr_bp->b_flags		= B_BUSY;
25451 		wr_bp->b_un.b_addr	= addr;
25452 		wr_bp->b_bcount		= nblk << DEV_BSHIFT;
25453 		wr_bp->b_resid		= 0;
25454 	}
25455 
25456 	mutex_exit(SD_MUTEX(un));
25457 
25458 	/*
25459 	 * Obtain a SCSI packet for the write command.
25460 	 * It should be safe to call the allocator here without
25461 	 * worrying about being locked for DVMA mapping because
25462 	 * the address we're passed is already a DVMA mapping
25463 	 *
25464 	 * We are also not going to worry about semaphore ownership
25465 	 * in the dump buffer. Dumping is single threaded at present.
25466 	 */
25467 
25468 	wr_pktp = NULL;
25469 
25470 	dma_resid = wr_bp->b_bcount;
25471 	oblkno = blkno;
25472 
25473 	if (!(NOT_DEVBSIZE(un))) {
25474 		nblk = nblk / (un->un_tgt_blocksize / DEV_BSIZE);
25475 	}
25476 
25477 	while (dma_resid != 0) {
25478 
25479 	for (i = 0; i < SD_NDUMP_RETRIES; i++) {
25480 		wr_bp->b_flags &= ~B_ERROR;
25481 
25482 		if (un->un_partial_dma_supported == 1) {
25483 			blkno = oblkno +
25484 			    ((wr_bp->b_bcount - dma_resid) /
25485 			    un->un_tgt_blocksize);
25486 			nblk = dma_resid / un->un_tgt_blocksize;
25487 
25488 			if (wr_pktp) {
25489 				/*
25490 				 * Partial DMA transfers after initial transfer
25491 				 */
25492 				rval = sd_setup_next_rw_pkt(un, wr_pktp, wr_bp,
25493 				    blkno, nblk);
25494 			} else {
25495 				/* Initial transfer */
25496 				rval = sd_setup_rw_pkt(un, &wr_pktp, wr_bp,
25497 				    un->un_pkt_flags, NULL_FUNC, NULL,
25498 				    blkno, nblk);
25499 			}
25500 		} else {
25501 			rval = sd_setup_rw_pkt(un, &wr_pktp, wr_bp,
25502 			    0, NULL_FUNC, NULL, blkno, nblk);
25503 		}
25504 
25505 		if (rval == 0) {
25506 			/* We were given a SCSI packet, continue. */
25507 			break;
25508 		}
25509 
25510 		if (i == 0) {
25511 			if (wr_bp->b_flags & B_ERROR) {
25512 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
25513 				    "no resources for dumping; "
25514 				    "error code: 0x%x, retrying",
25515 				    geterror(wr_bp));
25516 			} else {
25517 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
25518 				    "no resources for dumping; retrying");
25519 			}
25520 		} else if (i != (SD_NDUMP_RETRIES - 1)) {
25521 			if (wr_bp->b_flags & B_ERROR) {
25522 				scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
25523 				    "no resources for dumping; error code: "
25524 				    "0x%x, retrying\n", geterror(wr_bp));
25525 			}
25526 		} else {
25527 			if (wr_bp->b_flags & B_ERROR) {
25528 				scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
25529 				    "no resources for dumping; "
25530 				    "error code: 0x%x, retries failed, "
25531 				    "giving up.\n", geterror(wr_bp));
25532 			} else {
25533 				scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
25534 				    "no resources for dumping; "
25535 				    "retries failed, giving up.\n");
25536 			}
25537 			mutex_enter(SD_MUTEX(un));
25538 			Restore_state(un);
25539 			if (NOT_DEVBSIZE(un) && (doing_rmw == TRUE)) {
25540 				mutex_exit(SD_MUTEX(un));
25541 				scsi_free_consistent_buf(wr_bp);
25542 			} else {
25543 				mutex_exit(SD_MUTEX(un));
25544 			}
25545 			return (EIO);
25546 		}
25547 		drv_usecwait(10000);
25548 	}
25549 
25550 	if (un->un_partial_dma_supported == 1) {
25551 		/*
25552 		 * save the resid from PARTIAL_DMA
25553 		 */
25554 		dma_resid = wr_pktp->pkt_resid;
25555 		if (dma_resid != 0)
25556 			nblk -= SD_BYTES2TGTBLOCKS(un, dma_resid);
25557 		wr_pktp->pkt_resid = 0;
25558 	} else {
25559 		dma_resid = 0;
25560 	}
25561 
25562 	/* SunBug 1222170 */
25563 	wr_pktp->pkt_flags = FLAG_NOINTR;
25564 
25565 	err = EIO;
25566 	for (i = 0; i < SD_NDUMP_RETRIES; i++) {
25567 
25568 		/*
25569 		 * Scsi_poll returns 0 (success) if the command completes and
25570 		 * the status block is STATUS_GOOD.  We should only check
25571 		 * errors if this condition is not true.  Even then we should
25572 		 * send our own request sense packet only if we have a check
25573 		 * condition and auto request sense has not been performed by
25574 		 * the hba.
25575 		 */
25576 		SD_TRACE(SD_LOG_DUMP, un, "sddump: sending write\n");
25577 
25578 		if ((sd_scsi_poll(un, wr_pktp) == 0) &&
25579 		    (wr_pktp->pkt_resid == 0)) {
25580 			err = SD_SUCCESS;
25581 			break;
25582 		}
25583 
25584 		/*
25585 		 * Check CMD_DEV_GONE 1st, give up if device is gone.
25586 		 */
25587 		if (wr_pktp->pkt_reason == CMD_DEV_GONE) {
25588 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
25589 			    "Error while dumping state...Device is gone\n");
25590 			break;
25591 		}
25592 
25593 		if (SD_GET_PKT_STATUS(wr_pktp) == STATUS_CHECK) {
25594 			SD_INFO(SD_LOG_DUMP, un,
25595 			    "sddump: write failed with CHECK, try # %d\n", i);
25596 			if (((wr_pktp->pkt_state & STATE_ARQ_DONE) == 0)) {
25597 				(void) sd_send_polled_RQS(un);
25598 			}
25599 
25600 			continue;
25601 		}
25602 
25603 		if (SD_GET_PKT_STATUS(wr_pktp) == STATUS_BUSY) {
25604 			int reset_retval = 0;
25605 
25606 			SD_INFO(SD_LOG_DUMP, un,
25607 			    "sddump: write failed with BUSY, try # %d\n", i);
25608 
25609 			if (un->un_f_lun_reset_enabled == TRUE) {
25610 				reset_retval = scsi_reset(SD_ADDRESS(un),
25611 				    RESET_LUN);
25612 			}
25613 			if (reset_retval == 0) {
25614 				(void) scsi_reset(SD_ADDRESS(un), RESET_TARGET);
25615 			}
25616 			(void) sd_send_polled_RQS(un);
25617 
25618 		} else {
25619 			SD_INFO(SD_LOG_DUMP, un,
25620 			    "sddump: write failed with 0x%x, try # %d\n",
25621 			    SD_GET_PKT_STATUS(wr_pktp), i);
25622 			mutex_enter(SD_MUTEX(un));
25623 			sd_reset_target(un, wr_pktp);
25624 			mutex_exit(SD_MUTEX(un));
25625 		}
25626 
25627 		/*
25628 		 * If we are not getting anywhere with lun/target resets,
25629 		 * let's reset the bus.
25630 		 */
25631 		if (i == SD_NDUMP_RETRIES / 2) {
25632 			(void) scsi_reset(SD_ADDRESS(un), RESET_ALL);
25633 			(void) sd_send_polled_RQS(un);
25634 		}
25635 	}
25636 	}
25637 
25638 	scsi_destroy_pkt(wr_pktp);
25639 	mutex_enter(SD_MUTEX(un));
25640 	if ((NOT_DEVBSIZE(un)) && (doing_rmw == TRUE)) {
25641 		mutex_exit(SD_MUTEX(un));
25642 		scsi_free_consistent_buf(wr_bp);
25643 	} else {
25644 		mutex_exit(SD_MUTEX(un));
25645 	}
25646 	SD_TRACE(SD_LOG_DUMP, un, "sddump: exit: err = %d\n", err);
25647 	return (err);
25648 }
25649 
25650 /*
25651  *    Function: sd_scsi_poll()
25652  *
25653  * Description: This is a wrapper for the scsi_poll call.
25654  *
25655  *   Arguments: sd_lun - The unit structure
25656  *              scsi_pkt - The scsi packet being sent to the device.
25657  *
25658  * Return Code: 0 - Command completed successfully with good status
25659  *             -1 - Command failed.  This could indicate a check condition
25660  *                  or other status value requiring recovery action.
25661  *
25662  * NOTE: This code is only called off sddump().
25663  */
25664 
25665 static int
25666 sd_scsi_poll(struct sd_lun *un, struct scsi_pkt *pktp)
25667 {
25668 	int status;
25669 
25670 	ASSERT(un != NULL);
25671 	ASSERT(!mutex_owned(SD_MUTEX(un)));
25672 	ASSERT(pktp != NULL);
25673 
25674 	status = SD_SUCCESS;
25675 
25676 	if (scsi_ifgetcap(&pktp->pkt_address, "tagged-qing", 1) == 1) {
25677 		pktp->pkt_flags |= un->un_tagflags;
25678 		pktp->pkt_flags &= ~FLAG_NODISCON;
25679 	}
25680 
25681 	status = sd_ddi_scsi_poll(pktp);
25682 	/*
25683 	 * Scsi_poll returns 0 (success) if the command completes and the
25684 	 * status block is STATUS_GOOD.  We should only check errors if this
25685 	 * condition is not true.  Even then we should send our own request
25686 	 * sense packet only if we have a check condition and auto
25687 	 * request sense has not been performed by the hba.
25688 	 * Don't get RQS data if pkt_reason is CMD_DEV_GONE.
25689 	 */
25690 	if ((status != SD_SUCCESS) &&
25691 	    (SD_GET_PKT_STATUS(pktp) == STATUS_CHECK) &&
25692 	    (pktp->pkt_state & STATE_ARQ_DONE) == 0 &&
25693 	    (pktp->pkt_reason != CMD_DEV_GONE))
25694 		(void) sd_send_polled_RQS(un);
25695 
25696 	return (status);
25697 }
25698 
25699 /*
25700  *    Function: sd_send_polled_RQS()
25701  *
25702  * Description: This sends the request sense command to a device.
25703  *
25704  *   Arguments: sd_lun - The unit structure
25705  *
25706  * Return Code: 0 - Command completed successfully with good status
25707  *             -1 - Command failed.
25708  *
25709  */
25710 
25711 static int
25712 sd_send_polled_RQS(struct sd_lun *un)
25713 {
25714 	int	ret_val;
25715 	struct	scsi_pkt	*rqs_pktp;
25716 	struct	buf		*rqs_bp;
25717 
25718 	ASSERT(un != NULL);
25719 	ASSERT(!mutex_owned(SD_MUTEX(un)));
25720 
25721 	ret_val = SD_SUCCESS;
25722 
25723 	rqs_pktp = un->un_rqs_pktp;
25724 	rqs_bp	 = un->un_rqs_bp;
25725 
25726 	mutex_enter(SD_MUTEX(un));
25727 
25728 	if (un->un_sense_isbusy) {
25729 		ret_val = SD_FAILURE;
25730 		mutex_exit(SD_MUTEX(un));
25731 		return (ret_val);
25732 	}
25733 
25734 	/*
25735 	 * If the request sense buffer (and packet) is not in use,
25736 	 * let's set the un_sense_isbusy and send our packet
25737 	 */
25738 	un->un_sense_isbusy = 1;
25739 	rqs_pktp->pkt_resid = 0;
25740 	rqs_pktp->pkt_reason = 0;
25741 	rqs_pktp->pkt_flags |= FLAG_NOINTR;
25742 	bzero(rqs_bp->b_un.b_addr, SENSE_LENGTH);
25743 
25744 	mutex_exit(SD_MUTEX(un));
25745 
25746 	SD_INFO(SD_LOG_COMMON, un, "sd_send_polled_RQS: req sense buf at"
25747 	    " 0x%p\n", rqs_bp->b_un.b_addr);
25748 
25749 	/*
25750 	 * Can't send this to sd_scsi_poll, we wrap ourselves around the
25751 	 * axle - it has a call into us!
25752 	 */
25753 	if ((ret_val = sd_ddi_scsi_poll(rqs_pktp)) != 0) {
25754 		SD_INFO(SD_LOG_COMMON, un,
25755 		    "sd_send_polled_RQS: RQS failed\n");
25756 	}
25757 
25758 	SD_DUMP_MEMORY(un, SD_LOG_COMMON, "sd_send_polled_RQS:",
25759 	    (uchar_t *)rqs_bp->b_un.b_addr, SENSE_LENGTH, SD_LOG_HEX);
25760 
25761 	mutex_enter(SD_MUTEX(un));
25762 	un->un_sense_isbusy = 0;
25763 	mutex_exit(SD_MUTEX(un));
25764 
25765 	return (ret_val);
25766 }
25767 
25768 /*
25769  * Defines needed for localized version of the scsi_poll routine.
25770  */
25771 #define	CSEC		10000			/* usecs */
25772 #define	SEC_TO_CSEC	(1000000 / CSEC)
25773 
25774 /*
25775  *    Function: sd_ddi_scsi_poll()
25776  *
25777  * Description: Localized version of the scsi_poll routine.  The purpose is to
25778  *		send a scsi_pkt to a device as a polled command.  This version
25779  *		is to ensure more robust handling of transport errors.
25780  *		Specifically this routine cures not ready, coming ready
25781  *		transition for power up and reset.
25782  *
25783  *   Arguments: scsi_pkt - The scsi_pkt being sent to a device
25784  *
25785  * Return Code: 0 - Command completed successfully with good status
25786  *             -1 - Command failed.
25787  *
25788  * NOTE: This code is almost identical to scsi_poll, however before 6668774 can
25789  * be fixed (removing this code), we need to determine how to handle the
25790  * KEY_UNIT_ATTENTION condition below in conditions not as limited as sddump().
25791  *
25792  * NOTE: This code is only called off sddump().
25793  */
25794 static int
25795 sd_ddi_scsi_poll(struct scsi_pkt *pkt)
25796 {
25797 	int			rval = -1;
25798 	int			savef;
25799 	long			savet;
25800 	void			(*savec)();
25801 	int			timeout;
25802 	int			busy_count;
25803 	int			poll_delay;
25804 	int			rc;
25805 	uint8_t			*sensep;
25806 	struct scsi_arq_status	*arqstat;
25807 	extern int		do_polled_io;
25808 
25809 	ASSERT(pkt->pkt_scbp);
25810 
25811 	/*
25812 	 * save old flags..
25813 	 */
25814 	savef = pkt->pkt_flags;
25815 	savec = pkt->pkt_comp;
25816 	savet = pkt->pkt_time;
25817 
25818 	pkt->pkt_flags |= FLAG_NOINTR;
25819 
25820 	/*
25821 	 * XXX there is nothing in the SCSA spec that states that we should not
25822 	 * do a callback for polled cmds; however, removing this will break sd
25823 	 * and probably other target drivers
25824 	 */
25825 	pkt->pkt_comp = NULL;
25826 
25827 	/*
25828 	 * we don't like a polled command without timeout.
25829 	 * 60 seconds seems long enough.
25830 	 */
25831 	if (pkt->pkt_time == 0)
25832 		pkt->pkt_time = SCSI_POLL_TIMEOUT;
25833 
25834 	/*
25835 	 * Send polled cmd.
25836 	 *
25837 	 * We do some error recovery for various errors.  Tran_busy,
25838 	 * queue full, and non-dispatched commands are retried every 10 msec.
25839 	 * as they are typically transient failures.  Busy status and Not
25840 	 * Ready are retried every second as this status takes a while to
25841 	 * change.
25842 	 */
25843 	timeout = pkt->pkt_time * SEC_TO_CSEC;
25844 
25845 	for (busy_count = 0; busy_count < timeout; busy_count++) {
25846 		/*
25847 		 * Initialize pkt status variables.
25848 		 */
25849 		*pkt->pkt_scbp = pkt->pkt_reason = pkt->pkt_state = 0;
25850 
25851 		if ((rc = scsi_transport(pkt)) != TRAN_ACCEPT) {
25852 			if (rc != TRAN_BUSY) {
25853 				/* Transport failed - give up. */
25854 				break;
25855 			} else {
25856 				/* Transport busy - try again. */
25857 				poll_delay = 1 * CSEC;		/* 10 msec. */
25858 			}
25859 		} else {
25860 			/*
25861 			 * Transport accepted - check pkt status.
25862 			 */
25863 			rc = (*pkt->pkt_scbp) & STATUS_MASK;
25864 			if ((pkt->pkt_reason == CMD_CMPLT) &&
25865 			    (rc == STATUS_CHECK) &&
25866 			    (pkt->pkt_state & STATE_ARQ_DONE)) {
25867 				arqstat =
25868 				    (struct scsi_arq_status *)(pkt->pkt_scbp);
25869 				sensep = (uint8_t *)&arqstat->sts_sensedata;
25870 			} else {
25871 				sensep = NULL;
25872 			}
25873 
25874 			if ((pkt->pkt_reason == CMD_CMPLT) &&
25875 			    (rc == STATUS_GOOD)) {
25876 				/* No error - we're done */
25877 				rval = 0;
25878 				break;
25879 
25880 			} else if (pkt->pkt_reason == CMD_DEV_GONE) {
25881 				/* Lost connection - give up */
25882 				break;
25883 
25884 			} else if ((pkt->pkt_reason == CMD_INCOMPLETE) &&
25885 			    (pkt->pkt_state == 0)) {
25886 				/* Pkt not dispatched - try again. */
25887 				poll_delay = 1 * CSEC;		/* 10 msec. */
25888 
25889 			} else if ((pkt->pkt_reason == CMD_CMPLT) &&
25890 			    (rc == STATUS_QFULL)) {
25891 				/* Queue full - try again. */
25892 				poll_delay = 1 * CSEC;		/* 10 msec. */
25893 
25894 			} else if ((pkt->pkt_reason == CMD_CMPLT) &&
25895 			    (rc == STATUS_BUSY)) {
25896 				/* Busy - try again. */
25897 				poll_delay = 100 * CSEC;	/* 1 sec. */
25898 				busy_count += (SEC_TO_CSEC - 1);
25899 
25900 			} else if ((sensep != NULL) &&
25901 			    (scsi_sense_key(sensep) == KEY_UNIT_ATTENTION)) {
25902 				/*
25903 				 * Unit Attention - try again.
25904 				 * Pretend it took 1 sec.
25905 				 * NOTE: 'continue' avoids poll_delay
25906 				 */
25907 				busy_count += (SEC_TO_CSEC - 1);
25908 				continue;
25909 
25910 			} else if ((sensep != NULL) &&
25911 			    (scsi_sense_key(sensep) == KEY_NOT_READY) &&
25912 			    (scsi_sense_asc(sensep) == 0x04) &&
25913 			    (scsi_sense_ascq(sensep) == 0x01)) {
25914 				/*
25915 				 * Not ready -> ready - try again.
25916 				 * 04h/01h: LUN IS IN PROCESS OF BECOMING READY
25917 				 * ...same as STATUS_BUSY
25918 				 */
25919 				poll_delay = 100 * CSEC;	/* 1 sec. */
25920 				busy_count += (SEC_TO_CSEC - 1);
25921 
25922 			} else {
25923 				/* BAD status - give up. */
25924 				break;
25925 			}
25926 		}
25927 
25928 		if (((curthread->t_flag & T_INTR_THREAD) == 0) &&
25929 		    !do_polled_io) {
25930 			delay(drv_usectohz(poll_delay));
25931 		} else {
25932 			/* we busy wait during cpr_dump or interrupt threads */
25933 			drv_usecwait(poll_delay);
25934 		}
25935 	}
25936 
25937 	pkt->pkt_flags = savef;
25938 	pkt->pkt_comp = savec;
25939 	pkt->pkt_time = savet;
25940 
25941 	/* return on error */
25942 	if (rval)
25943 		return (rval);
25944 
25945 	/*
25946 	 * This is not a performance critical code path.
25947 	 *
25948 	 * As an accommodation for scsi_poll callers, to avoid ddi_dma_sync()
25949 	 * issues associated with looking at DMA memory prior to
25950 	 * scsi_pkt_destroy(), we scsi_sync_pkt() prior to return.
25951 	 */
25952 	scsi_sync_pkt(pkt);
25953 	return (0);
25954 }
25955 
25956 
25957 
25958 /*
25959  *    Function: sd_persistent_reservation_in_read_keys
25960  *
25961  * Description: This routine is the driver entry point for handling CD-ROM
25962  *		multi-host persistent reservation requests (MHIOCGRP_INKEYS)
25963  *		by sending the SCSI-3 PRIN commands to the device.
25964  *		Processes the read keys command response by copying the
25965  *		reservation key information into the user provided buffer.
25966  *		Support for the 32/64 bit _MULTI_DATAMODEL is implemented.
25967  *
25968  *   Arguments: un   -  Pointer to soft state struct for the target.
25969  *		usrp -	user provided pointer to multihost Persistent In Read
25970  *			Keys structure (mhioc_inkeys_t)
25971  *		flag -	this argument is a pass through to ddi_copyxxx()
25972  *			directly from the mode argument of ioctl().
25973  *
25974  * Return Code: 0   - Success
25975  *		EACCES
25976  *		ENOTSUP
25977  *		errno return code from sd_send_scsi_cmd()
25978  *
25979  *     Context: Can sleep. Does not return until command is completed.
25980  */
25981 
25982 static int
25983 sd_persistent_reservation_in_read_keys(struct sd_lun *un,
25984     mhioc_inkeys_t *usrp, int flag)
25985 {
25986 #ifdef _MULTI_DATAMODEL
25987 	struct mhioc_key_list32	li32;
25988 #endif
25989 	sd_prin_readkeys_t	*in;
25990 	mhioc_inkeys_t		*ptr;
25991 	mhioc_key_list_t	li;
25992 	uchar_t			*data_bufp = NULL;
25993 	int			data_len = 0;
25994 	int			rval = 0;
25995 	size_t			copysz = 0;
25996 	sd_ssc_t		*ssc;
25997 
25998 	if ((ptr = (mhioc_inkeys_t *)usrp) == NULL) {
25999 		return (EINVAL);
26000 	}
26001 	bzero(&li, sizeof (mhioc_key_list_t));
26002 
26003 	ssc = sd_ssc_init(un);
26004 
26005 	/*
26006 	 * Get the listsize from user
26007 	 */
26008 #ifdef _MULTI_DATAMODEL
26009 	switch (ddi_model_convert_from(flag & FMODELS)) {
26010 	case DDI_MODEL_ILP32:
26011 		copysz = sizeof (struct mhioc_key_list32);
26012 		if (ddi_copyin(ptr->li, &li32, copysz, flag)) {
26013 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
26014 			    "sd_persistent_reservation_in_read_keys: "
26015 			    "failed ddi_copyin: mhioc_key_list32_t\n");
26016 			rval = EFAULT;
26017 			goto done;
26018 		}
26019 		li.listsize = li32.listsize;
26020 		li.list = (mhioc_resv_key_t *)(uintptr_t)li32.list;
26021 		break;
26022 
26023 	case DDI_MODEL_NONE:
26024 		copysz = sizeof (mhioc_key_list_t);
26025 		if (ddi_copyin(ptr->li, &li, copysz, flag)) {
26026 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
26027 			    "sd_persistent_reservation_in_read_keys: "
26028 			    "failed ddi_copyin: mhioc_key_list_t\n");
26029 			rval = EFAULT;
26030 			goto done;
26031 		}
26032 		break;
26033 	}
26034 
26035 #else /* ! _MULTI_DATAMODEL */
26036 	copysz = sizeof (mhioc_key_list_t);
26037 	if (ddi_copyin(ptr->li, &li, copysz, flag)) {
26038 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
26039 		    "sd_persistent_reservation_in_read_keys: "
26040 		    "failed ddi_copyin: mhioc_key_list_t\n");
26041 		rval = EFAULT;
26042 		goto done;
26043 	}
26044 #endif
26045 
26046 	data_len  = li.listsize * MHIOC_RESV_KEY_SIZE;
26047 	data_len += (sizeof (sd_prin_readkeys_t) - sizeof (caddr_t));
26048 	data_bufp = kmem_zalloc(data_len, KM_SLEEP);
26049 
26050 	rval = sd_send_scsi_PERSISTENT_RESERVE_IN(ssc, SD_READ_KEYS,
26051 	    data_len, data_bufp);
26052 	if (rval != 0) {
26053 		if (rval == EIO)
26054 			sd_ssc_assessment(ssc, SD_FMT_IGNORE_COMPROMISE);
26055 		else
26056 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
26057 		goto done;
26058 	}
26059 	in = (sd_prin_readkeys_t *)data_bufp;
26060 	ptr->generation = BE_32(in->generation);
26061 	li.listlen = BE_32(in->len) / MHIOC_RESV_KEY_SIZE;
26062 
26063 	/*
26064 	 * Return the min(listsize, listlen) keys
26065 	 */
26066 #ifdef _MULTI_DATAMODEL
26067 
26068 	switch (ddi_model_convert_from(flag & FMODELS)) {
26069 	case DDI_MODEL_ILP32:
26070 		li32.listlen = li.listlen;
26071 		if (ddi_copyout(&li32, ptr->li, copysz, flag)) {
26072 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
26073 			    "sd_persistent_reservation_in_read_keys: "
26074 			    "failed ddi_copyout: mhioc_key_list32_t\n");
26075 			rval = EFAULT;
26076 			goto done;
26077 		}
26078 		break;
26079 
26080 	case DDI_MODEL_NONE:
26081 		if (ddi_copyout(&li, ptr->li, copysz, flag)) {
26082 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
26083 			    "sd_persistent_reservation_in_read_keys: "
26084 			    "failed ddi_copyout: mhioc_key_list_t\n");
26085 			rval = EFAULT;
26086 			goto done;
26087 		}
26088 		break;
26089 	}
26090 
26091 #else /* ! _MULTI_DATAMODEL */
26092 
26093 	if (ddi_copyout(&li, ptr->li, copysz, flag)) {
26094 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
26095 		    "sd_persistent_reservation_in_read_keys: "
26096 		    "failed ddi_copyout: mhioc_key_list_t\n");
26097 		rval = EFAULT;
26098 		goto done;
26099 	}
26100 
26101 #endif /* _MULTI_DATAMODEL */
26102 
26103 	copysz = min(li.listlen * MHIOC_RESV_KEY_SIZE,
26104 	    li.listsize * MHIOC_RESV_KEY_SIZE);
26105 	if (ddi_copyout(&in->keylist, li.list, copysz, flag)) {
26106 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
26107 		    "sd_persistent_reservation_in_read_keys: "
26108 		    "failed ddi_copyout: keylist\n");
26109 		rval = EFAULT;
26110 	}
26111 done:
26112 	sd_ssc_fini(ssc);
26113 	kmem_free(data_bufp, data_len);
26114 	return (rval);
26115 }
26116 
26117 
26118 /*
26119  *    Function: sd_persistent_reservation_in_read_resv
26120  *
26121  * Description: This routine is the driver entry point for handling CD-ROM
26122  *		multi-host persistent reservation requests (MHIOCGRP_INRESV)
26123  *		by sending the SCSI-3 PRIN commands to the device.
26124  *		Process the read persistent reservations command response by
26125  *		copying the reservation information into the user provided
26126  *		buffer. Support for the 32/64 _MULTI_DATAMODEL is implemented.
26127  *
26128  *   Arguments: un   -  Pointer to soft state struct for the target.
26129  *		usrp -	user provided pointer to multihost Persistent In Read
26130  *			Keys structure (mhioc_inkeys_t)
26131  *		flag -	this argument is a pass through to ddi_copyxxx()
26132  *			directly from the mode argument of ioctl().
26133  *
26134  * Return Code: 0   - Success
26135  *		EACCES
26136  *		ENOTSUP
26137  *		errno return code from sd_send_scsi_cmd()
26138  *
26139  *     Context: Can sleep. Does not return until command is completed.
26140  */
26141 
26142 static int
26143 sd_persistent_reservation_in_read_resv(struct sd_lun *un,
26144     mhioc_inresvs_t *usrp, int flag)
26145 {
26146 #ifdef _MULTI_DATAMODEL
26147 	struct mhioc_resv_desc_list32 resvlist32;
26148 #endif
26149 	sd_prin_readresv_t	*in;
26150 	mhioc_inresvs_t		*ptr;
26151 	sd_readresv_desc_t	*readresv_ptr;
26152 	mhioc_resv_desc_list_t	resvlist;
26153 	mhioc_resv_desc_t	resvdesc;
26154 	uchar_t			*data_bufp = NULL;
26155 	int			data_len;
26156 	int			rval = 0;
26157 	int			i;
26158 	size_t			copysz = 0;
26159 	mhioc_resv_desc_t	*bufp;
26160 	sd_ssc_t		*ssc;
26161 
26162 	if ((ptr = usrp) == NULL) {
26163 		return (EINVAL);
26164 	}
26165 
26166 	ssc = sd_ssc_init(un);
26167 
26168 	/*
26169 	 * Get the listsize from user
26170 	 */
26171 #ifdef _MULTI_DATAMODEL
26172 	switch (ddi_model_convert_from(flag & FMODELS)) {
26173 	case DDI_MODEL_ILP32:
26174 		copysz = sizeof (struct mhioc_resv_desc_list32);
26175 		if (ddi_copyin(ptr->li, &resvlist32, copysz, flag)) {
26176 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
26177 			    "sd_persistent_reservation_in_read_resv: "
26178 			    "failed ddi_copyin: mhioc_resv_desc_list_t\n");
26179 			rval = EFAULT;
26180 			goto done;
26181 		}
26182 		resvlist.listsize = resvlist32.listsize;
26183 		resvlist.list = (mhioc_resv_desc_t *)(uintptr_t)resvlist32.list;
26184 		break;
26185 
26186 	case DDI_MODEL_NONE:
26187 		copysz = sizeof (mhioc_resv_desc_list_t);
26188 		if (ddi_copyin(ptr->li, &resvlist, copysz, flag)) {
26189 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
26190 			    "sd_persistent_reservation_in_read_resv: "
26191 			    "failed ddi_copyin: mhioc_resv_desc_list_t\n");
26192 			rval = EFAULT;
26193 			goto done;
26194 		}
26195 		break;
26196 	}
26197 #else /* ! _MULTI_DATAMODEL */
26198 	copysz = sizeof (mhioc_resv_desc_list_t);
26199 	if (ddi_copyin(ptr->li, &resvlist, copysz, flag)) {
26200 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
26201 		    "sd_persistent_reservation_in_read_resv: "
26202 		    "failed ddi_copyin: mhioc_resv_desc_list_t\n");
26203 		rval = EFAULT;
26204 		goto done;
26205 	}
26206 #endif /* ! _MULTI_DATAMODEL */
26207 
26208 	data_len  = resvlist.listsize * SCSI3_RESV_DESC_LEN;
26209 	data_len += (sizeof (sd_prin_readresv_t) - sizeof (caddr_t));
26210 	data_bufp = kmem_zalloc(data_len, KM_SLEEP);
26211 
26212 	rval = sd_send_scsi_PERSISTENT_RESERVE_IN(ssc, SD_READ_RESV,
26213 	    data_len, data_bufp);
26214 	if (rval != 0) {
26215 		if (rval == EIO)
26216 			sd_ssc_assessment(ssc, SD_FMT_IGNORE_COMPROMISE);
26217 		else
26218 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
26219 		goto done;
26220 	}
26221 	in = (sd_prin_readresv_t *)data_bufp;
26222 	ptr->generation = BE_32(in->generation);
26223 	resvlist.listlen = BE_32(in->len) / SCSI3_RESV_DESC_LEN;
26224 
26225 	/*
26226 	 * Return the min(listsize, listlen( keys
26227 	 */
26228 #ifdef _MULTI_DATAMODEL
26229 
26230 	switch (ddi_model_convert_from(flag & FMODELS)) {
26231 	case DDI_MODEL_ILP32:
26232 		resvlist32.listlen = resvlist.listlen;
26233 		if (ddi_copyout(&resvlist32, ptr->li, copysz, flag)) {
26234 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
26235 			    "sd_persistent_reservation_in_read_resv: "
26236 			    "failed ddi_copyout: mhioc_resv_desc_list_t\n");
26237 			rval = EFAULT;
26238 			goto done;
26239 		}
26240 		break;
26241 
26242 	case DDI_MODEL_NONE:
26243 		if (ddi_copyout(&resvlist, ptr->li, copysz, flag)) {
26244 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
26245 			    "sd_persistent_reservation_in_read_resv: "
26246 			    "failed ddi_copyout: mhioc_resv_desc_list_t\n");
26247 			rval = EFAULT;
26248 			goto done;
26249 		}
26250 		break;
26251 	}
26252 
26253 #else /* ! _MULTI_DATAMODEL */
26254 
26255 	if (ddi_copyout(&resvlist, ptr->li, copysz, flag)) {
26256 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
26257 		    "sd_persistent_reservation_in_read_resv: "
26258 		    "failed ddi_copyout: mhioc_resv_desc_list_t\n");
26259 		rval = EFAULT;
26260 		goto done;
26261 	}
26262 
26263 #endif /* ! _MULTI_DATAMODEL */
26264 
26265 	readresv_ptr = (sd_readresv_desc_t *)&in->readresv_desc;
26266 	bufp = resvlist.list;
26267 	copysz = sizeof (mhioc_resv_desc_t);
26268 	for (i = 0; i < min(resvlist.listlen, resvlist.listsize);
26269 	    i++, readresv_ptr++, bufp++) {
26270 
26271 		bcopy(&readresv_ptr->resvkey, &resvdesc.key,
26272 		    MHIOC_RESV_KEY_SIZE);
26273 		resvdesc.type  = readresv_ptr->type;
26274 		resvdesc.scope = readresv_ptr->scope;
26275 		resvdesc.scope_specific_addr =
26276 		    BE_32(readresv_ptr->scope_specific_addr);
26277 
26278 		if (ddi_copyout(&resvdesc, bufp, copysz, flag)) {
26279 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
26280 			    "sd_persistent_reservation_in_read_resv: "
26281 			    "failed ddi_copyout: resvlist\n");
26282 			rval = EFAULT;
26283 			goto done;
26284 		}
26285 	}
26286 done:
26287 	sd_ssc_fini(ssc);
26288 	/* only if data_bufp is allocated, we need to free it */
26289 	if (data_bufp) {
26290 		kmem_free(data_bufp, data_len);
26291 	}
26292 	return (rval);
26293 }
26294 
26295 
26296 /*
26297  *    Function: sr_change_blkmode()
26298  *
26299  * Description: This routine is the driver entry point for handling CD-ROM
26300  *		block mode ioctl requests. Support for returning and changing
26301  *		the current block size in use by the device is implemented. The
26302  *		LBA size is changed via a MODE SELECT Block Descriptor.
26303  *
26304  *		This routine issues a mode sense with an allocation length of
26305  *		12 bytes for the mode page header and a single block descriptor.
26306  *
26307  *   Arguments: dev - the device 'dev_t'
26308  *		cmd - the request type; one of CDROMGBLKMODE (get) or
26309  *		      CDROMSBLKMODE (set)
26310  *		data - current block size or requested block size
26311  *		flag - this argument is a pass through to ddi_copyxxx() directly
26312  *		       from the mode argument of ioctl().
26313  *
26314  * Return Code: the code returned by sd_send_scsi_cmd()
26315  *		EINVAL if invalid arguments are provided
26316  *		EFAULT if ddi_copyxxx() fails
26317  *		ENXIO if fail ddi_get_soft_state
26318  *		EIO if invalid mode sense block descriptor length
26319  *
26320  */
26321 
26322 static int
26323 sr_change_blkmode(dev_t dev, int cmd, intptr_t data, int flag)
26324 {
26325 	struct sd_lun			*un = NULL;
26326 	struct mode_header		*sense_mhp, *select_mhp;
26327 	struct block_descriptor		*sense_desc, *select_desc;
26328 	int				current_bsize;
26329 	int				rval = EINVAL;
26330 	uchar_t				*sense = NULL;
26331 	uchar_t				*select = NULL;
26332 	sd_ssc_t			*ssc;
26333 
26334 	ASSERT((cmd == CDROMGBLKMODE) || (cmd == CDROMSBLKMODE));
26335 
26336 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
26337 		return (ENXIO);
26338 	}
26339 
26340 	/*
26341 	 * The block length is changed via the Mode Select block descriptor, the
26342 	 * "Read/Write Error Recovery" mode page (0x1) contents are not actually
26343 	 * required as part of this routine. Therefore the mode sense allocation
26344 	 * length is specified to be the length of a mode page header and a
26345 	 * block descriptor.
26346 	 */
26347 	sense = kmem_zalloc(BUFLEN_CHG_BLK_MODE, KM_SLEEP);
26348 
26349 	ssc = sd_ssc_init(un);
26350 	rval = sd_send_scsi_MODE_SENSE(ssc, CDB_GROUP0, sense,
26351 	    BUFLEN_CHG_BLK_MODE, MODEPAGE_ERR_RECOV, SD_PATH_STANDARD);
26352 	sd_ssc_fini(ssc);
26353 	if (rval != 0) {
26354 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
26355 		    "sr_change_blkmode: Mode Sense Failed\n");
26356 		kmem_free(sense, BUFLEN_CHG_BLK_MODE);
26357 		return (rval);
26358 	}
26359 
26360 	/* Check the block descriptor len to handle only 1 block descriptor */
26361 	sense_mhp = (struct mode_header *)sense;
26362 	if ((sense_mhp->bdesc_length == 0) ||
26363 	    (sense_mhp->bdesc_length > MODE_BLK_DESC_LENGTH)) {
26364 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
26365 		    "sr_change_blkmode: Mode Sense returned invalid block"
26366 		    " descriptor length\n");
26367 		kmem_free(sense, BUFLEN_CHG_BLK_MODE);
26368 		return (EIO);
26369 	}
26370 	sense_desc = (struct block_descriptor *)(sense + MODE_HEADER_LENGTH);
26371 	current_bsize = ((sense_desc->blksize_hi << 16) |
26372 	    (sense_desc->blksize_mid << 8) | sense_desc->blksize_lo);
26373 
26374 	/* Process command */
26375 	switch (cmd) {
26376 	case CDROMGBLKMODE:
26377 		/* Return the block size obtained during the mode sense */
26378 		if (ddi_copyout(&current_bsize, (void *)data,
26379 		    sizeof (int), flag) != 0)
26380 			rval = EFAULT;
26381 		break;
26382 	case CDROMSBLKMODE:
26383 		/* Validate the requested block size */
26384 		switch (data) {
26385 		case CDROM_BLK_512:
26386 		case CDROM_BLK_1024:
26387 		case CDROM_BLK_2048:
26388 		case CDROM_BLK_2056:
26389 		case CDROM_BLK_2336:
26390 		case CDROM_BLK_2340:
26391 		case CDROM_BLK_2352:
26392 		case CDROM_BLK_2368:
26393 		case CDROM_BLK_2448:
26394 		case CDROM_BLK_2646:
26395 		case CDROM_BLK_2647:
26396 			break;
26397 		default:
26398 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
26399 			    "sr_change_blkmode: "
26400 			    "Block Size '%ld' Not Supported\n", data);
26401 			kmem_free(sense, BUFLEN_CHG_BLK_MODE);
26402 			return (EINVAL);
26403 		}
26404 
26405 		/*
26406 		 * The current block size matches the requested block size so
26407 		 * there is no need to send the mode select to change the size
26408 		 */
26409 		if (current_bsize == data) {
26410 			break;
26411 		}
26412 
26413 		/* Build the select data for the requested block size */
26414 		select = kmem_zalloc(BUFLEN_CHG_BLK_MODE, KM_SLEEP);
26415 		select_mhp = (struct mode_header *)select;
26416 		select_desc =
26417 		    (struct block_descriptor *)(select + MODE_HEADER_LENGTH);
26418 		/*
26419 		 * The LBA size is changed via the block descriptor, so the
26420 		 * descriptor is built according to the user data
26421 		 */
26422 		select_mhp->bdesc_length = MODE_BLK_DESC_LENGTH;
26423 		select_desc->blksize_hi  = (char)(((data) & 0x00ff0000) >> 16);
26424 		select_desc->blksize_mid = (char)(((data) & 0x0000ff00) >> 8);
26425 		select_desc->blksize_lo  = (char)((data) & 0x000000ff);
26426 
26427 		/* Send the mode select for the requested block size */
26428 		ssc = sd_ssc_init(un);
26429 		rval = sd_send_scsi_MODE_SELECT(ssc, CDB_GROUP0,
26430 		    select, BUFLEN_CHG_BLK_MODE, SD_DONTSAVE_PAGE,
26431 		    SD_PATH_STANDARD);
26432 		sd_ssc_fini(ssc);
26433 		if (rval != 0) {
26434 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
26435 			    "sr_change_blkmode: Mode Select Failed\n");
26436 			/*
26437 			 * The mode select failed for the requested block size,
26438 			 * so reset the data for the original block size and
26439 			 * send it to the target. The error is indicated by the
26440 			 * return value for the failed mode select.
26441 			 */
26442 			select_desc->blksize_hi  = sense_desc->blksize_hi;
26443 			select_desc->blksize_mid = sense_desc->blksize_mid;
26444 			select_desc->blksize_lo  = sense_desc->blksize_lo;
26445 			ssc = sd_ssc_init(un);
26446 			(void) sd_send_scsi_MODE_SELECT(ssc, CDB_GROUP0,
26447 			    select, BUFLEN_CHG_BLK_MODE, SD_DONTSAVE_PAGE,
26448 			    SD_PATH_STANDARD);
26449 			sd_ssc_fini(ssc);
26450 		} else {
26451 			ASSERT(!mutex_owned(SD_MUTEX(un)));
26452 			mutex_enter(SD_MUTEX(un));
26453 			sd_update_block_info(un, (uint32_t)data, 0);
26454 			mutex_exit(SD_MUTEX(un));
26455 		}
26456 		break;
26457 	default:
26458 		/* should not reach here, but check anyway */
26459 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
26460 		    "sr_change_blkmode: Command '%x' Not Supported\n", cmd);
26461 		rval = EINVAL;
26462 		break;
26463 	}
26464 
26465 	if (select) {
26466 		kmem_free(select, BUFLEN_CHG_BLK_MODE);
26467 	}
26468 	if (sense) {
26469 		kmem_free(sense, BUFLEN_CHG_BLK_MODE);
26470 	}
26471 	return (rval);
26472 }
26473 
26474 
26475 /*
26476  * Note: The following sr_change_speed() and sr_atapi_change_speed() routines
26477  * implement driver support for getting and setting the CD speed. The command
26478  * set used will be based on the device type. If the device has not been
26479  * identified as MMC the Toshiba vendor specific mode page will be used. If
26480  * the device is MMC but does not support the Real Time Streaming feature
26481  * the SET CD SPEED command will be used to set speed and mode page 0x2A will
26482  * be used to read the speed.
26483  */
26484 
26485 /*
26486  *    Function: sr_change_speed()
26487  *
26488  * Description: This routine is the driver entry point for handling CD-ROM
26489  *		drive speed ioctl requests for devices supporting the Toshiba
26490  *		vendor specific drive speed mode page. Support for returning
26491  *		and changing the current drive speed in use by the device is
26492  *		implemented.
26493  *
26494  *   Arguments: dev - the device 'dev_t'
26495  *		cmd - the request type; one of CDROMGDRVSPEED (get) or
26496  *		      CDROMSDRVSPEED (set)
26497  *		data - current drive speed or requested drive speed
26498  *		flag - this argument is a pass through to ddi_copyxxx() directly
26499  *		       from the mode argument of ioctl().
26500  *
26501  * Return Code: the code returned by sd_send_scsi_cmd()
26502  *		EINVAL if invalid arguments are provided
26503  *		EFAULT if ddi_copyxxx() fails
26504  *		ENXIO if fail ddi_get_soft_state
26505  *		EIO if invalid mode sense block descriptor length
26506  */
26507 
26508 static int
26509 sr_change_speed(dev_t dev, int cmd, intptr_t data, int flag)
26510 {
26511 	struct sd_lun			*un = NULL;
26512 	struct mode_header		*sense_mhp, *select_mhp;
26513 	struct mode_speed		*sense_page, *select_page;
26514 	int				current_speed;
26515 	int				rval = EINVAL;
26516 	int				bd_len;
26517 	uchar_t				*sense = NULL;
26518 	uchar_t				*select = NULL;
26519 	sd_ssc_t			*ssc;
26520 
26521 	ASSERT((cmd == CDROMGDRVSPEED) || (cmd == CDROMSDRVSPEED));
26522 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
26523 		return (ENXIO);
26524 	}
26525 
26526 	/*
26527 	 * Note: The drive speed is being modified here according to a Toshiba
26528 	 * vendor specific mode page (0x31).
26529 	 */
26530 	sense = kmem_zalloc(BUFLEN_MODE_CDROM_SPEED, KM_SLEEP);
26531 
26532 	ssc = sd_ssc_init(un);
26533 	rval = sd_send_scsi_MODE_SENSE(ssc, CDB_GROUP0, sense,
26534 	    BUFLEN_MODE_CDROM_SPEED, CDROM_MODE_SPEED,
26535 	    SD_PATH_STANDARD);
26536 	sd_ssc_fini(ssc);
26537 	if (rval != 0) {
26538 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
26539 		    "sr_change_speed: Mode Sense Failed\n");
26540 		kmem_free(sense, BUFLEN_MODE_CDROM_SPEED);
26541 		return (rval);
26542 	}
26543 	sense_mhp  = (struct mode_header *)sense;
26544 
26545 	/* Check the block descriptor len to handle only 1 block descriptor */
26546 	bd_len = sense_mhp->bdesc_length;
26547 	if (bd_len > MODE_BLK_DESC_LENGTH) {
26548 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
26549 		    "sr_change_speed: Mode Sense returned invalid block "
26550 		    "descriptor length\n");
26551 		kmem_free(sense, BUFLEN_MODE_CDROM_SPEED);
26552 		return (EIO);
26553 	}
26554 
26555 	sense_page = (struct mode_speed *)
26556 	    (sense + MODE_HEADER_LENGTH + sense_mhp->bdesc_length);
26557 	current_speed = sense_page->speed;
26558 
26559 	/* Process command */
26560 	switch (cmd) {
26561 	case CDROMGDRVSPEED:
26562 		/* Return the drive speed obtained during the mode sense */
26563 		if (current_speed == 0x2) {
26564 			current_speed = CDROM_TWELVE_SPEED;
26565 		}
26566 		if (ddi_copyout(&current_speed, (void *)data,
26567 		    sizeof (int), flag) != 0) {
26568 			rval = EFAULT;
26569 		}
26570 		break;
26571 	case CDROMSDRVSPEED:
26572 		/* Validate the requested drive speed */
26573 		switch ((uchar_t)data) {
26574 		case CDROM_TWELVE_SPEED:
26575 			data = 0x2;
26576 			/*FALLTHROUGH*/
26577 		case CDROM_NORMAL_SPEED:
26578 		case CDROM_DOUBLE_SPEED:
26579 		case CDROM_QUAD_SPEED:
26580 		case CDROM_MAXIMUM_SPEED:
26581 			break;
26582 		default:
26583 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
26584 			    "sr_change_speed: "
26585 			    "Drive Speed '%d' Not Supported\n", (uchar_t)data);
26586 			kmem_free(sense, BUFLEN_MODE_CDROM_SPEED);
26587 			return (EINVAL);
26588 		}
26589 
26590 		/*
26591 		 * The current drive speed matches the requested drive speed so
26592 		 * there is no need to send the mode select to change the speed
26593 		 */
26594 		if (current_speed == data) {
26595 			break;
26596 		}
26597 
26598 		/* Build the select data for the requested drive speed */
26599 		select = kmem_zalloc(BUFLEN_MODE_CDROM_SPEED, KM_SLEEP);
26600 		select_mhp = (struct mode_header *)select;
26601 		select_mhp->bdesc_length = 0;
26602 		select_page =
26603 		    (struct mode_speed *)(select + MODE_HEADER_LENGTH);
26604 		select_page =
26605 		    (struct mode_speed *)(select + MODE_HEADER_LENGTH);
26606 		select_page->mode_page.code = CDROM_MODE_SPEED;
26607 		select_page->mode_page.length = 2;
26608 		select_page->speed = (uchar_t)data;
26609 
26610 		/* Send the mode select for the requested block size */
26611 		ssc = sd_ssc_init(un);
26612 		rval = sd_send_scsi_MODE_SELECT(ssc, CDB_GROUP0, select,
26613 		    MODEPAGE_CDROM_SPEED_LEN + MODE_HEADER_LENGTH,
26614 		    SD_DONTSAVE_PAGE, SD_PATH_STANDARD);
26615 		sd_ssc_fini(ssc);
26616 		if (rval != 0) {
26617 			/*
26618 			 * The mode select failed for the requested drive speed,
26619 			 * so reset the data for the original drive speed and
26620 			 * send it to the target. The error is indicated by the
26621 			 * return value for the failed mode select.
26622 			 */
26623 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
26624 			    "sr_drive_speed: Mode Select Failed\n");
26625 			select_page->speed = sense_page->speed;
26626 			ssc = sd_ssc_init(un);
26627 			(void) sd_send_scsi_MODE_SELECT(ssc, CDB_GROUP0, select,
26628 			    MODEPAGE_CDROM_SPEED_LEN + MODE_HEADER_LENGTH,
26629 			    SD_DONTSAVE_PAGE, SD_PATH_STANDARD);
26630 			sd_ssc_fini(ssc);
26631 		}
26632 		break;
26633 	default:
26634 		/* should not reach here, but check anyway */
26635 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
26636 		    "sr_change_speed: Command '%x' Not Supported\n", cmd);
26637 		rval = EINVAL;
26638 		break;
26639 	}
26640 
26641 	if (select) {
26642 		kmem_free(select, BUFLEN_MODE_CDROM_SPEED);
26643 	}
26644 	if (sense) {
26645 		kmem_free(sense, BUFLEN_MODE_CDROM_SPEED);
26646 	}
26647 
26648 	return (rval);
26649 }
26650 
26651 
26652 /*
26653  *    Function: sr_atapi_change_speed()
26654  *
26655  * Description: This routine is the driver entry point for handling CD-ROM
26656  *		drive speed ioctl requests for MMC devices that do not support
26657  *		the Real Time Streaming feature (0x107).
26658  *
26659  *		Note: This routine will use the SET SPEED command which may not
26660  *		be supported by all devices.
26661  *
26662  *   Arguments: dev- the device 'dev_t'
26663  *		cmd- the request type; one of CDROMGDRVSPEED (get) or
26664  *		     CDROMSDRVSPEED (set)
26665  *		data- current drive speed or requested drive speed
26666  *		flag- this argument is a pass through to ddi_copyxxx() directly
26667  *		      from the mode argument of ioctl().
26668  *
26669  * Return Code: the code returned by sd_send_scsi_cmd()
26670  *		EINVAL if invalid arguments are provided
26671  *		EFAULT if ddi_copyxxx() fails
26672  *		ENXIO if fail ddi_get_soft_state
26673  *		EIO if invalid mode sense block descriptor length
26674  */
26675 
26676 static int
26677 sr_atapi_change_speed(dev_t dev, int cmd, intptr_t data, int flag)
26678 {
26679 	struct sd_lun			*un;
26680 	struct uscsi_cmd		*com = NULL;
26681 	struct mode_header_grp2		*sense_mhp;
26682 	uchar_t				*sense_page;
26683 	uchar_t				*sense = NULL;
26684 	char				cdb[CDB_GROUP5];
26685 	int				bd_len;
26686 	int				current_speed = 0;
26687 	int				max_speed = 0;
26688 	int				rval;
26689 	sd_ssc_t			*ssc;
26690 
26691 	ASSERT((cmd == CDROMGDRVSPEED) || (cmd == CDROMSDRVSPEED));
26692 
26693 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
26694 		return (ENXIO);
26695 	}
26696 
26697 	sense = kmem_zalloc(BUFLEN_MODE_CDROM_CAP, KM_SLEEP);
26698 
26699 	ssc = sd_ssc_init(un);
26700 	rval = sd_send_scsi_MODE_SENSE(ssc, CDB_GROUP1, sense,
26701 	    BUFLEN_MODE_CDROM_CAP, MODEPAGE_CDROM_CAP,
26702 	    SD_PATH_STANDARD);
26703 	sd_ssc_fini(ssc);
26704 	if (rval != 0) {
26705 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
26706 		    "sr_atapi_change_speed: Mode Sense Failed\n");
26707 		kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
26708 		return (rval);
26709 	}
26710 
26711 	/* Check the block descriptor len to handle only 1 block descriptor */
26712 	sense_mhp = (struct mode_header_grp2 *)sense;
26713 	bd_len = (sense_mhp->bdesc_length_hi << 8) | sense_mhp->bdesc_length_lo;
26714 	if (bd_len > MODE_BLK_DESC_LENGTH) {
26715 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
26716 		    "sr_atapi_change_speed: Mode Sense returned invalid "
26717 		    "block descriptor length\n");
26718 		kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
26719 		return (EIO);
26720 	}
26721 
26722 	/* Calculate the current and maximum drive speeds */
26723 	sense_page = (uchar_t *)(sense + MODE_HEADER_LENGTH_GRP2 + bd_len);
26724 	current_speed = (sense_page[14] << 8) | sense_page[15];
26725 	max_speed = (sense_page[8] << 8) | sense_page[9];
26726 
26727 	/* Process the command */
26728 	switch (cmd) {
26729 	case CDROMGDRVSPEED:
26730 		current_speed /= SD_SPEED_1X;
26731 		if (ddi_copyout(&current_speed, (void *)data,
26732 		    sizeof (int), flag) != 0)
26733 			rval = EFAULT;
26734 		break;
26735 	case CDROMSDRVSPEED:
26736 		/* Convert the speed code to KB/sec */
26737 		switch ((uchar_t)data) {
26738 		case CDROM_NORMAL_SPEED:
26739 			current_speed = SD_SPEED_1X;
26740 			break;
26741 		case CDROM_DOUBLE_SPEED:
26742 			current_speed = 2 * SD_SPEED_1X;
26743 			break;
26744 		case CDROM_QUAD_SPEED:
26745 			current_speed = 4 * SD_SPEED_1X;
26746 			break;
26747 		case CDROM_TWELVE_SPEED:
26748 			current_speed = 12 * SD_SPEED_1X;
26749 			break;
26750 		case CDROM_MAXIMUM_SPEED:
26751 			current_speed = 0xffff;
26752 			break;
26753 		default:
26754 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
26755 			    "sr_atapi_change_speed: invalid drive speed %d\n",
26756 			    (uchar_t)data);
26757 			kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
26758 			return (EINVAL);
26759 		}
26760 
26761 		/* Check the request against the drive's max speed. */
26762 		if (current_speed != 0xffff) {
26763 			if (current_speed > max_speed) {
26764 				kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
26765 				return (EINVAL);
26766 			}
26767 		}
26768 
26769 		/*
26770 		 * Build and send the SET SPEED command
26771 		 *
26772 		 * Note: The SET SPEED (0xBB) command used in this routine is
26773 		 * obsolete per the SCSI MMC spec but still supported in the
26774 		 * MT FUJI vendor spec. Most equipment is adhereing to MT FUJI
26775 		 * therefore the command is still implemented in this routine.
26776 		 */
26777 		bzero(cdb, sizeof (cdb));
26778 		cdb[0] = (char)SCMD_SET_CDROM_SPEED;
26779 		cdb[2] = (uchar_t)(current_speed >> 8);
26780 		cdb[3] = (uchar_t)current_speed;
26781 		com = kmem_zalloc(sizeof (*com), KM_SLEEP);
26782 		com->uscsi_cdb	   = (caddr_t)cdb;
26783 		com->uscsi_cdblen  = CDB_GROUP5;
26784 		com->uscsi_bufaddr = NULL;
26785 		com->uscsi_buflen  = 0;
26786 		com->uscsi_flags   = USCSI_DIAGNOSE | USCSI_SILENT;
26787 		rval = sd_send_scsi_cmd(dev, com, FKIOCTL, 0, SD_PATH_STANDARD);
26788 		break;
26789 	default:
26790 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
26791 		    "sr_atapi_change_speed: Command '%x' Not Supported\n", cmd);
26792 		rval = EINVAL;
26793 	}
26794 
26795 	if (sense) {
26796 		kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
26797 	}
26798 	if (com) {
26799 		kmem_free(com, sizeof (*com));
26800 	}
26801 	return (rval);
26802 }
26803 
26804 
26805 /*
26806  *    Function: sr_pause_resume()
26807  *
26808  * Description: This routine is the driver entry point for handling CD-ROM
26809  *		pause/resume ioctl requests. This only affects the audio play
26810  *		operation.
26811  *
26812  *   Arguments: dev - the device 'dev_t'
26813  *		cmd - the request type; one of CDROMPAUSE or CDROMRESUME, used
26814  *		      for setting the resume bit of the cdb.
26815  *
26816  * Return Code: the code returned by sd_send_scsi_cmd()
26817  *		EINVAL if invalid mode specified
26818  *
26819  */
26820 
26821 static int
26822 sr_pause_resume(dev_t dev, int cmd)
26823 {
26824 	struct sd_lun		*un;
26825 	struct uscsi_cmd	*com;
26826 	char			cdb[CDB_GROUP1];
26827 	int			rval;
26828 
26829 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
26830 		return (ENXIO);
26831 	}
26832 
26833 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
26834 	bzero(cdb, CDB_GROUP1);
26835 	cdb[0] = SCMD_PAUSE_RESUME;
26836 	switch (cmd) {
26837 	case CDROMRESUME:
26838 		cdb[8] = 1;
26839 		break;
26840 	case CDROMPAUSE:
26841 		cdb[8] = 0;
26842 		break;
26843 	default:
26844 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN, "sr_pause_resume:"
26845 		    " Command '%x' Not Supported\n", cmd);
26846 		rval = EINVAL;
26847 		goto done;
26848 	}
26849 
26850 	com->uscsi_cdb    = cdb;
26851 	com->uscsi_cdblen = CDB_GROUP1;
26852 	com->uscsi_flags  = USCSI_DIAGNOSE | USCSI_SILENT;
26853 
26854 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
26855 	    SD_PATH_STANDARD);
26856 
26857 done:
26858 	kmem_free(com, sizeof (*com));
26859 	return (rval);
26860 }
26861 
26862 
26863 /*
26864  *    Function: sr_play_msf()
26865  *
26866  * Description: This routine is the driver entry point for handling CD-ROM
26867  *		ioctl requests to output the audio signals at the specified
26868  *		starting address and continue the audio play until the specified
26869  *		ending address (CDROMPLAYMSF) The address is in Minute Second
26870  *		Frame (MSF) format.
26871  *
26872  *   Arguments: dev	- the device 'dev_t'
26873  *		data	- pointer to user provided audio msf structure,
26874  *		          specifying start/end addresses.
26875  *		flag	- this argument is a pass through to ddi_copyxxx()
26876  *		          directly from the mode argument of ioctl().
26877  *
26878  * Return Code: the code returned by sd_send_scsi_cmd()
26879  *		EFAULT if ddi_copyxxx() fails
26880  *		ENXIO if fail ddi_get_soft_state
26881  *		EINVAL if data pointer is NULL
26882  */
26883 
26884 static int
26885 sr_play_msf(dev_t dev, caddr_t data, int flag)
26886 {
26887 	struct sd_lun		*un;
26888 	struct uscsi_cmd	*com;
26889 	struct cdrom_msf	msf_struct;
26890 	struct cdrom_msf	*msf = &msf_struct;
26891 	char			cdb[CDB_GROUP1];
26892 	int			rval;
26893 
26894 	if (data == NULL) {
26895 		return (EINVAL);
26896 	}
26897 
26898 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
26899 		return (ENXIO);
26900 	}
26901 
26902 	if (ddi_copyin(data, msf, sizeof (struct cdrom_msf), flag)) {
26903 		return (EFAULT);
26904 	}
26905 
26906 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
26907 	bzero(cdb, CDB_GROUP1);
26908 	cdb[0] = SCMD_PLAYAUDIO_MSF;
26909 	if (un->un_f_cfg_playmsf_bcd == TRUE) {
26910 		cdb[3] = BYTE_TO_BCD(msf->cdmsf_min0);
26911 		cdb[4] = BYTE_TO_BCD(msf->cdmsf_sec0);
26912 		cdb[5] = BYTE_TO_BCD(msf->cdmsf_frame0);
26913 		cdb[6] = BYTE_TO_BCD(msf->cdmsf_min1);
26914 		cdb[7] = BYTE_TO_BCD(msf->cdmsf_sec1);
26915 		cdb[8] = BYTE_TO_BCD(msf->cdmsf_frame1);
26916 	} else {
26917 		cdb[3] = msf->cdmsf_min0;
26918 		cdb[4] = msf->cdmsf_sec0;
26919 		cdb[5] = msf->cdmsf_frame0;
26920 		cdb[6] = msf->cdmsf_min1;
26921 		cdb[7] = msf->cdmsf_sec1;
26922 		cdb[8] = msf->cdmsf_frame1;
26923 	}
26924 	com->uscsi_cdb    = cdb;
26925 	com->uscsi_cdblen = CDB_GROUP1;
26926 	com->uscsi_flags  = USCSI_DIAGNOSE | USCSI_SILENT;
26927 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
26928 	    SD_PATH_STANDARD);
26929 	kmem_free(com, sizeof (*com));
26930 	return (rval);
26931 }
26932 
26933 
26934 /*
26935  *    Function: sr_play_trkind()
26936  *
26937  * Description: This routine is the driver entry point for handling CD-ROM
26938  *		ioctl requests to output the audio signals at the specified
26939  *		starting address and continue the audio play until the specified
26940  *		ending address (CDROMPLAYTRKIND). The address is in Track Index
26941  *		format.
26942  *
26943  *   Arguments: dev	- the device 'dev_t'
26944  *		data	- pointer to user provided audio track/index structure,
26945  *		          specifying start/end addresses.
26946  *		flag	- this argument is a pass through to ddi_copyxxx()
26947  *		          directly from the mode argument of ioctl().
26948  *
26949  * Return Code: the code returned by sd_send_scsi_cmd()
26950  *		EFAULT if ddi_copyxxx() fails
26951  *		ENXIO if fail ddi_get_soft_state
26952  *		EINVAL if data pointer is NULL
26953  */
26954 
26955 static int
26956 sr_play_trkind(dev_t dev, caddr_t data, int flag)
26957 {
26958 	struct cdrom_ti		ti_struct;
26959 	struct cdrom_ti		*ti = &ti_struct;
26960 	struct uscsi_cmd	*com = NULL;
26961 	char			cdb[CDB_GROUP1];
26962 	int			rval;
26963 
26964 	if (data == NULL) {
26965 		return (EINVAL);
26966 	}
26967 
26968 	if (ddi_copyin(data, ti, sizeof (struct cdrom_ti), flag)) {
26969 		return (EFAULT);
26970 	}
26971 
26972 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
26973 	bzero(cdb, CDB_GROUP1);
26974 	cdb[0] = SCMD_PLAYAUDIO_TI;
26975 	cdb[4] = ti->cdti_trk0;
26976 	cdb[5] = ti->cdti_ind0;
26977 	cdb[7] = ti->cdti_trk1;
26978 	cdb[8] = ti->cdti_ind1;
26979 	com->uscsi_cdb    = cdb;
26980 	com->uscsi_cdblen = CDB_GROUP1;
26981 	com->uscsi_flags  = USCSI_DIAGNOSE | USCSI_SILENT;
26982 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
26983 	    SD_PATH_STANDARD);
26984 	kmem_free(com, sizeof (*com));
26985 	return (rval);
26986 }
26987 
26988 
26989 /*
26990  *    Function: sr_read_all_subcodes()
26991  *
26992  * Description: This routine is the driver entry point for handling CD-ROM
26993  *		ioctl requests to return raw subcode data while the target is
26994  *		playing audio (CDROMSUBCODE).
26995  *
26996  *   Arguments: dev	- the device 'dev_t'
26997  *		data	- pointer to user provided cdrom subcode structure,
26998  *		          specifying the transfer length and address.
26999  *		flag	- this argument is a pass through to ddi_copyxxx()
27000  *		          directly from the mode argument of ioctl().
27001  *
27002  * Return Code: the code returned by sd_send_scsi_cmd()
27003  *		EFAULT if ddi_copyxxx() fails
27004  *		ENXIO if fail ddi_get_soft_state
27005  *		EINVAL if data pointer is NULL
27006  */
27007 
27008 static int
27009 sr_read_all_subcodes(dev_t dev, caddr_t data, int flag)
27010 {
27011 	struct sd_lun		*un = NULL;
27012 	struct uscsi_cmd	*com = NULL;
27013 	struct cdrom_subcode	*subcode = NULL;
27014 	int			rval;
27015 	size_t			buflen;
27016 	char			cdb[CDB_GROUP5];
27017 
27018 #ifdef _MULTI_DATAMODEL
27019 	/* To support ILP32 applications in an LP64 world */
27020 	struct cdrom_subcode32		cdrom_subcode32;
27021 	struct cdrom_subcode32		*cdsc32 = &cdrom_subcode32;
27022 #endif
27023 	if (data == NULL) {
27024 		return (EINVAL);
27025 	}
27026 
27027 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
27028 		return (ENXIO);
27029 	}
27030 
27031 	subcode = kmem_zalloc(sizeof (struct cdrom_subcode), KM_SLEEP);
27032 
27033 #ifdef _MULTI_DATAMODEL
27034 	switch (ddi_model_convert_from(flag & FMODELS)) {
27035 	case DDI_MODEL_ILP32:
27036 		if (ddi_copyin(data, cdsc32, sizeof (*cdsc32), flag)) {
27037 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27038 			    "sr_read_all_subcodes: ddi_copyin Failed\n");
27039 			kmem_free(subcode, sizeof (struct cdrom_subcode));
27040 			return (EFAULT);
27041 		}
27042 		/* Convert the ILP32 uscsi data from the application to LP64 */
27043 		cdrom_subcode32tocdrom_subcode(cdsc32, subcode);
27044 		break;
27045 	case DDI_MODEL_NONE:
27046 		if (ddi_copyin(data, subcode,
27047 		    sizeof (struct cdrom_subcode), flag)) {
27048 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27049 			    "sr_read_all_subcodes: ddi_copyin Failed\n");
27050 			kmem_free(subcode, sizeof (struct cdrom_subcode));
27051 			return (EFAULT);
27052 		}
27053 		break;
27054 	}
27055 #else /* ! _MULTI_DATAMODEL */
27056 	if (ddi_copyin(data, subcode, sizeof (struct cdrom_subcode), flag)) {
27057 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27058 		    "sr_read_all_subcodes: ddi_copyin Failed\n");
27059 		kmem_free(subcode, sizeof (struct cdrom_subcode));
27060 		return (EFAULT);
27061 	}
27062 #endif /* _MULTI_DATAMODEL */
27063 
27064 	/*
27065 	 * Since MMC-2 expects max 3 bytes for length, check if the
27066 	 * length input is greater than 3 bytes
27067 	 */
27068 	if ((subcode->cdsc_length & 0xFF000000) != 0) {
27069 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27070 		    "sr_read_all_subcodes: "
27071 		    "cdrom transfer length too large: %d (limit %d)\n",
27072 		    subcode->cdsc_length, 0xFFFFFF);
27073 		kmem_free(subcode, sizeof (struct cdrom_subcode));
27074 		return (EINVAL);
27075 	}
27076 
27077 	buflen = CDROM_BLK_SUBCODE * subcode->cdsc_length;
27078 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
27079 	bzero(cdb, CDB_GROUP5);
27080 
27081 	if (un->un_f_mmc_cap == TRUE) {
27082 		cdb[0] = (char)SCMD_READ_CD;
27083 		cdb[2] = (char)0xff;
27084 		cdb[3] = (char)0xff;
27085 		cdb[4] = (char)0xff;
27086 		cdb[5] = (char)0xff;
27087 		cdb[6] = (((subcode->cdsc_length) & 0x00ff0000) >> 16);
27088 		cdb[7] = (((subcode->cdsc_length) & 0x0000ff00) >> 8);
27089 		cdb[8] = ((subcode->cdsc_length) & 0x000000ff);
27090 		cdb[10] = 1;
27091 	} else {
27092 		/*
27093 		 * Note: A vendor specific command (0xDF) is being used here to
27094 		 * request a read of all subcodes.
27095 		 */
27096 		cdb[0] = (char)SCMD_READ_ALL_SUBCODES;
27097 		cdb[6] = (((subcode->cdsc_length) & 0xff000000) >> 24);
27098 		cdb[7] = (((subcode->cdsc_length) & 0x00ff0000) >> 16);
27099 		cdb[8] = (((subcode->cdsc_length) & 0x0000ff00) >> 8);
27100 		cdb[9] = ((subcode->cdsc_length) & 0x000000ff);
27101 	}
27102 	com->uscsi_cdb	   = cdb;
27103 	com->uscsi_cdblen  = CDB_GROUP5;
27104 	com->uscsi_bufaddr = (caddr_t)subcode->cdsc_addr;
27105 	com->uscsi_buflen  = buflen;
27106 	com->uscsi_flags   = USCSI_DIAGNOSE | USCSI_SILENT | USCSI_READ;
27107 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_USERSPACE,
27108 	    SD_PATH_STANDARD);
27109 	kmem_free(subcode, sizeof (struct cdrom_subcode));
27110 	kmem_free(com, sizeof (*com));
27111 	return (rval);
27112 }
27113 
27114 
27115 /*
27116  *    Function: sr_read_subchannel()
27117  *
27118  * Description: This routine is the driver entry point for handling CD-ROM
27119  *		ioctl requests to return the Q sub-channel data of the CD
27120  *		current position block. (CDROMSUBCHNL) The data includes the
27121  *		track number, index number, absolute CD-ROM address (LBA or MSF
27122  *		format per the user) , track relative CD-ROM address (LBA or MSF
27123  *		format per the user), control data and audio status.
27124  *
27125  *   Arguments: dev	- the device 'dev_t'
27126  *		data	- pointer to user provided cdrom sub-channel structure
27127  *		flag	- this argument is a pass through to ddi_copyxxx()
27128  *		          directly from the mode argument of ioctl().
27129  *
27130  * Return Code: the code returned by sd_send_scsi_cmd()
27131  *		EFAULT if ddi_copyxxx() fails
27132  *		ENXIO if fail ddi_get_soft_state
27133  *		EINVAL if data pointer is NULL
27134  */
27135 
27136 static int
27137 sr_read_subchannel(dev_t dev, caddr_t data, int flag)
27138 {
27139 	struct sd_lun		*un;
27140 	struct uscsi_cmd	*com;
27141 	struct cdrom_subchnl	subchanel;
27142 	struct cdrom_subchnl	*subchnl = &subchanel;
27143 	char			cdb[CDB_GROUP1];
27144 	caddr_t			buffer;
27145 	int			rval;
27146 
27147 	if (data == NULL) {
27148 		return (EINVAL);
27149 	}
27150 
27151 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
27152 	    (un->un_state == SD_STATE_OFFLINE)) {
27153 		return (ENXIO);
27154 	}
27155 
27156 	if (ddi_copyin(data, subchnl, sizeof (struct cdrom_subchnl), flag)) {
27157 		return (EFAULT);
27158 	}
27159 
27160 	buffer = kmem_zalloc((size_t)16, KM_SLEEP);
27161 	bzero(cdb, CDB_GROUP1);
27162 	cdb[0] = SCMD_READ_SUBCHANNEL;
27163 	/* Set the MSF bit based on the user requested address format */
27164 	cdb[1] = (subchnl->cdsc_format & CDROM_LBA) ? 0 : 0x02;
27165 	/*
27166 	 * Set the Q bit in byte 2 to indicate that Q sub-channel data be
27167 	 * returned
27168 	 */
27169 	cdb[2] = 0x40;
27170 	/*
27171 	 * Set byte 3 to specify the return data format. A value of 0x01
27172 	 * indicates that the CD-ROM current position should be returned.
27173 	 */
27174 	cdb[3] = 0x01;
27175 	cdb[8] = 0x10;
27176 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
27177 	com->uscsi_cdb	   = cdb;
27178 	com->uscsi_cdblen  = CDB_GROUP1;
27179 	com->uscsi_bufaddr = buffer;
27180 	com->uscsi_buflen  = 16;
27181 	com->uscsi_flags   = USCSI_DIAGNOSE | USCSI_SILENT | USCSI_READ;
27182 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
27183 	    SD_PATH_STANDARD);
27184 	if (rval != 0) {
27185 		kmem_free(buffer, 16);
27186 		kmem_free(com, sizeof (*com));
27187 		return (rval);
27188 	}
27189 
27190 	/* Process the returned Q sub-channel data */
27191 	subchnl->cdsc_audiostatus = buffer[1];
27192 	subchnl->cdsc_adr	= (buffer[5] & 0xF0) >> 4;
27193 	subchnl->cdsc_ctrl	= (buffer[5] & 0x0F);
27194 	subchnl->cdsc_trk	= buffer[6];
27195 	subchnl->cdsc_ind	= buffer[7];
27196 	if (subchnl->cdsc_format & CDROM_LBA) {
27197 		subchnl->cdsc_absaddr.lba =
27198 		    ((uchar_t)buffer[8] << 24) + ((uchar_t)buffer[9] << 16) +
27199 		    ((uchar_t)buffer[10] << 8) + ((uchar_t)buffer[11]);
27200 		subchnl->cdsc_reladdr.lba =
27201 		    ((uchar_t)buffer[12] << 24) + ((uchar_t)buffer[13] << 16) +
27202 		    ((uchar_t)buffer[14] << 8) + ((uchar_t)buffer[15]);
27203 	} else if (un->un_f_cfg_readsub_bcd == TRUE) {
27204 		subchnl->cdsc_absaddr.msf.minute = BCD_TO_BYTE(buffer[9]);
27205 		subchnl->cdsc_absaddr.msf.second = BCD_TO_BYTE(buffer[10]);
27206 		subchnl->cdsc_absaddr.msf.frame  = BCD_TO_BYTE(buffer[11]);
27207 		subchnl->cdsc_reladdr.msf.minute = BCD_TO_BYTE(buffer[13]);
27208 		subchnl->cdsc_reladdr.msf.second = BCD_TO_BYTE(buffer[14]);
27209 		subchnl->cdsc_reladdr.msf.frame  = BCD_TO_BYTE(buffer[15]);
27210 	} else {
27211 		subchnl->cdsc_absaddr.msf.minute = buffer[9];
27212 		subchnl->cdsc_absaddr.msf.second = buffer[10];
27213 		subchnl->cdsc_absaddr.msf.frame  = buffer[11];
27214 		subchnl->cdsc_reladdr.msf.minute = buffer[13];
27215 		subchnl->cdsc_reladdr.msf.second = buffer[14];
27216 		subchnl->cdsc_reladdr.msf.frame  = buffer[15];
27217 	}
27218 	kmem_free(buffer, 16);
27219 	kmem_free(com, sizeof (*com));
27220 	if (ddi_copyout(subchnl, data, sizeof (struct cdrom_subchnl), flag)
27221 	    != 0) {
27222 		return (EFAULT);
27223 	}
27224 	return (rval);
27225 }
27226 
27227 
27228 /*
27229  *    Function: sr_read_tocentry()
27230  *
27231  * Description: This routine is the driver entry point for handling CD-ROM
27232  *		ioctl requests to read from the Table of Contents (TOC)
27233  *		(CDROMREADTOCENTRY). This routine provides the ADR and CTRL
27234  *		fields, the starting address (LBA or MSF format per the user)
27235  *		and the data mode if the user specified track is a data track.
27236  *
27237  *		Note: The READ HEADER (0x44) command used in this routine is
27238  *		obsolete per the SCSI MMC spec but still supported in the
27239  *		MT FUJI vendor spec. Most equipment is adhereing to MT FUJI
27240  *		therefore the command is still implemented in this routine.
27241  *
27242  *   Arguments: dev	- the device 'dev_t'
27243  *		data	- pointer to user provided toc entry structure,
27244  *			  specifying the track # and the address format
27245  *			  (LBA or MSF).
27246  *		flag	- this argument is a pass through to ddi_copyxxx()
27247  *		          directly from the mode argument of ioctl().
27248  *
27249  * Return Code: the code returned by sd_send_scsi_cmd()
27250  *		EFAULT if ddi_copyxxx() fails
27251  *		ENXIO if fail ddi_get_soft_state
27252  *		EINVAL if data pointer is NULL
27253  */
27254 
27255 static int
27256 sr_read_tocentry(dev_t dev, caddr_t data, int flag)
27257 {
27258 	struct sd_lun		*un = NULL;
27259 	struct uscsi_cmd	*com;
27260 	struct cdrom_tocentry	toc_entry;
27261 	struct cdrom_tocentry	*entry = &toc_entry;
27262 	caddr_t			buffer;
27263 	int			rval;
27264 	char			cdb[CDB_GROUP1];
27265 
27266 	if (data == NULL) {
27267 		return (EINVAL);
27268 	}
27269 
27270 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
27271 	    (un->un_state == SD_STATE_OFFLINE)) {
27272 		return (ENXIO);
27273 	}
27274 
27275 	if (ddi_copyin(data, entry, sizeof (struct cdrom_tocentry), flag)) {
27276 		return (EFAULT);
27277 	}
27278 
27279 	/* Validate the requested track and address format */
27280 	if (!(entry->cdte_format & (CDROM_LBA | CDROM_MSF))) {
27281 		return (EINVAL);
27282 	}
27283 
27284 	if (entry->cdte_track == 0) {
27285 		return (EINVAL);
27286 	}
27287 
27288 	buffer = kmem_zalloc((size_t)12, KM_SLEEP);
27289 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
27290 	bzero(cdb, CDB_GROUP1);
27291 
27292 	cdb[0] = SCMD_READ_TOC;
27293 	/* Set the MSF bit based on the user requested address format  */
27294 	cdb[1] = ((entry->cdte_format & CDROM_LBA) ? 0 : 2);
27295 	if (un->un_f_cfg_read_toc_trk_bcd == TRUE) {
27296 		cdb[6] = BYTE_TO_BCD(entry->cdte_track);
27297 	} else {
27298 		cdb[6] = entry->cdte_track;
27299 	}
27300 
27301 	/*
27302 	 * Bytes 7 & 8 are the 12 byte allocation length for a single entry.
27303 	 * (4 byte TOC response header + 8 byte track descriptor)
27304 	 */
27305 	cdb[8] = 12;
27306 	com->uscsi_cdb	   = cdb;
27307 	com->uscsi_cdblen  = CDB_GROUP1;
27308 	com->uscsi_bufaddr = buffer;
27309 	com->uscsi_buflen  = 0x0C;
27310 	com->uscsi_flags   = (USCSI_DIAGNOSE | USCSI_SILENT | USCSI_READ);
27311 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
27312 	    SD_PATH_STANDARD);
27313 	if (rval != 0) {
27314 		kmem_free(buffer, 12);
27315 		kmem_free(com, sizeof (*com));
27316 		return (rval);
27317 	}
27318 
27319 	/* Process the toc entry */
27320 	entry->cdte_adr		= (buffer[5] & 0xF0) >> 4;
27321 	entry->cdte_ctrl	= (buffer[5] & 0x0F);
27322 	if (entry->cdte_format & CDROM_LBA) {
27323 		entry->cdte_addr.lba =
27324 		    ((uchar_t)buffer[8] << 24) + ((uchar_t)buffer[9] << 16) +
27325 		    ((uchar_t)buffer[10] << 8) + ((uchar_t)buffer[11]);
27326 	} else if (un->un_f_cfg_read_toc_addr_bcd == TRUE) {
27327 		entry->cdte_addr.msf.minute	= BCD_TO_BYTE(buffer[9]);
27328 		entry->cdte_addr.msf.second	= BCD_TO_BYTE(buffer[10]);
27329 		entry->cdte_addr.msf.frame	= BCD_TO_BYTE(buffer[11]);
27330 		/*
27331 		 * Send a READ TOC command using the LBA address format to get
27332 		 * the LBA for the track requested so it can be used in the
27333 		 * READ HEADER request
27334 		 *
27335 		 * Note: The MSF bit of the READ HEADER command specifies the
27336 		 * output format. The block address specified in that command
27337 		 * must be in LBA format.
27338 		 */
27339 		cdb[1] = 0;
27340 		rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
27341 		    SD_PATH_STANDARD);
27342 		if (rval != 0) {
27343 			kmem_free(buffer, 12);
27344 			kmem_free(com, sizeof (*com));
27345 			return (rval);
27346 		}
27347 	} else {
27348 		entry->cdte_addr.msf.minute	= buffer[9];
27349 		entry->cdte_addr.msf.second	= buffer[10];
27350 		entry->cdte_addr.msf.frame	= buffer[11];
27351 		/*
27352 		 * Send a READ TOC command using the LBA address format to get
27353 		 * the LBA for the track requested so it can be used in the
27354 		 * READ HEADER request
27355 		 *
27356 		 * Note: The MSF bit of the READ HEADER command specifies the
27357 		 * output format. The block address specified in that command
27358 		 * must be in LBA format.
27359 		 */
27360 		cdb[1] = 0;
27361 		rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
27362 		    SD_PATH_STANDARD);
27363 		if (rval != 0) {
27364 			kmem_free(buffer, 12);
27365 			kmem_free(com, sizeof (*com));
27366 			return (rval);
27367 		}
27368 	}
27369 
27370 	/*
27371 	 * Build and send the READ HEADER command to determine the data mode of
27372 	 * the user specified track.
27373 	 */
27374 	if ((entry->cdte_ctrl & CDROM_DATA_TRACK) &&
27375 	    (entry->cdte_track != CDROM_LEADOUT)) {
27376 		bzero(cdb, CDB_GROUP1);
27377 		cdb[0] = SCMD_READ_HEADER;
27378 		cdb[2] = buffer[8];
27379 		cdb[3] = buffer[9];
27380 		cdb[4] = buffer[10];
27381 		cdb[5] = buffer[11];
27382 		cdb[8] = 0x08;
27383 		com->uscsi_buflen = 0x08;
27384 		rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
27385 		    SD_PATH_STANDARD);
27386 		if (rval == 0) {
27387 			entry->cdte_datamode = buffer[0];
27388 		} else {
27389 			/*
27390 			 * READ HEADER command failed, since this is
27391 			 * obsoleted in one spec, its better to return
27392 			 * -1 for an invlid track so that we can still
27393 			 * receive the rest of the TOC data.
27394 			 */
27395 			entry->cdte_datamode = (uchar_t)-1;
27396 		}
27397 	} else {
27398 		entry->cdte_datamode = (uchar_t)-1;
27399 	}
27400 
27401 	kmem_free(buffer, 12);
27402 	kmem_free(com, sizeof (*com));
27403 	if (ddi_copyout(entry, data, sizeof (struct cdrom_tocentry), flag) != 0)
27404 		return (EFAULT);
27405 
27406 	return (rval);
27407 }
27408 
27409 
27410 /*
27411  *    Function: sr_read_tochdr()
27412  *
27413  * Description: This routine is the driver entry point for handling CD-ROM
27414  *		ioctl requests to read the Table of Contents (TOC) header
27415  *		(CDROMREADTOHDR). The TOC header consists of the disk starting
27416  *		and ending track numbers
27417  *
27418  *   Arguments: dev	- the device 'dev_t'
27419  *		data	- pointer to user provided toc header structure,
27420  *			  specifying the starting and ending track numbers.
27421  *		flag	- this argument is a pass through to ddi_copyxxx()
27422  *			  directly from the mode argument of ioctl().
27423  *
27424  * Return Code: the code returned by sd_send_scsi_cmd()
27425  *		EFAULT if ddi_copyxxx() fails
27426  *		ENXIO if fail ddi_get_soft_state
27427  *		EINVAL if data pointer is NULL
27428  */
27429 
27430 static int
27431 sr_read_tochdr(dev_t dev, caddr_t data, int flag)
27432 {
27433 	struct sd_lun		*un;
27434 	struct uscsi_cmd	*com;
27435 	struct cdrom_tochdr	toc_header;
27436 	struct cdrom_tochdr	*hdr = &toc_header;
27437 	char			cdb[CDB_GROUP1];
27438 	int			rval;
27439 	caddr_t			buffer;
27440 
27441 	if (data == NULL) {
27442 		return (EINVAL);
27443 	}
27444 
27445 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
27446 	    (un->un_state == SD_STATE_OFFLINE)) {
27447 		return (ENXIO);
27448 	}
27449 
27450 	buffer = kmem_zalloc(4, KM_SLEEP);
27451 	bzero(cdb, CDB_GROUP1);
27452 	cdb[0] = SCMD_READ_TOC;
27453 	/*
27454 	 * Specifying a track number of 0x00 in the READ TOC command indicates
27455 	 * that the TOC header should be returned
27456 	 */
27457 	cdb[6] = 0x00;
27458 	/*
27459 	 * Bytes 7 & 8 are the 4 byte allocation length for TOC header.
27460 	 * (2 byte data len + 1 byte starting track # + 1 byte ending track #)
27461 	 */
27462 	cdb[8] = 0x04;
27463 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
27464 	com->uscsi_cdb	   = cdb;
27465 	com->uscsi_cdblen  = CDB_GROUP1;
27466 	com->uscsi_bufaddr = buffer;
27467 	com->uscsi_buflen  = 0x04;
27468 	com->uscsi_timeout = 300;
27469 	com->uscsi_flags   = USCSI_DIAGNOSE | USCSI_SILENT | USCSI_READ;
27470 
27471 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
27472 	    SD_PATH_STANDARD);
27473 	if (un->un_f_cfg_read_toc_trk_bcd == TRUE) {
27474 		hdr->cdth_trk0 = BCD_TO_BYTE(buffer[2]);
27475 		hdr->cdth_trk1 = BCD_TO_BYTE(buffer[3]);
27476 	} else {
27477 		hdr->cdth_trk0 = buffer[2];
27478 		hdr->cdth_trk1 = buffer[3];
27479 	}
27480 	kmem_free(buffer, 4);
27481 	kmem_free(com, sizeof (*com));
27482 	if (ddi_copyout(hdr, data, sizeof (struct cdrom_tochdr), flag) != 0) {
27483 		return (EFAULT);
27484 	}
27485 	return (rval);
27486 }
27487 
27488 
27489 /*
27490  * Note: The following sr_read_mode1(), sr_read_cd_mode2(), sr_read_mode2(),
27491  * sr_read_cdda(), sr_read_cdxa(), routines implement driver support for
27492  * handling CDROMREAD ioctl requests for mode 1 user data, mode 2 user data,
27493  * digital audio and extended architecture digital audio. These modes are
27494  * defined in the IEC908 (Red Book), ISO10149 (Yellow Book), and the SCSI3
27495  * MMC specs.
27496  *
27497  * In addition to support for the various data formats these routines also
27498  * include support for devices that implement only the direct access READ
27499  * commands (0x08, 0x28), devices that implement the READ_CD commands
27500  * (0xBE, 0xD4), and devices that implement the vendor unique READ CDDA and
27501  * READ CDXA commands (0xD8, 0xDB)
27502  */
27503 
27504 /*
27505  *    Function: sr_read_mode1()
27506  *
27507  * Description: This routine is the driver entry point for handling CD-ROM
27508  *		ioctl read mode1 requests (CDROMREADMODE1).
27509  *
27510  *   Arguments: dev	- the device 'dev_t'
27511  *		data	- pointer to user provided cd read structure specifying
27512  *			  the lba buffer address and length.
27513  *		flag	- this argument is a pass through to ddi_copyxxx()
27514  *			  directly from the mode argument of ioctl().
27515  *
27516  * Return Code: the code returned by sd_send_scsi_cmd()
27517  *		EFAULT if ddi_copyxxx() fails
27518  *		ENXIO if fail ddi_get_soft_state
27519  *		EINVAL if data pointer is NULL
27520  */
27521 
27522 static int
27523 sr_read_mode1(dev_t dev, caddr_t data, int flag)
27524 {
27525 	struct sd_lun		*un;
27526 	struct cdrom_read	mode1_struct;
27527 	struct cdrom_read	*mode1 = &mode1_struct;
27528 	int			rval;
27529 	sd_ssc_t		*ssc;
27530 
27531 #ifdef _MULTI_DATAMODEL
27532 	/* To support ILP32 applications in an LP64 world */
27533 	struct cdrom_read32	cdrom_read32;
27534 	struct cdrom_read32	*cdrd32 = &cdrom_read32;
27535 #endif /* _MULTI_DATAMODEL */
27536 
27537 	if (data == NULL) {
27538 		return (EINVAL);
27539 	}
27540 
27541 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
27542 	    (un->un_state == SD_STATE_OFFLINE)) {
27543 		return (ENXIO);
27544 	}
27545 
27546 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
27547 	    "sd_read_mode1: entry: un:0x%p\n", un);
27548 
27549 #ifdef _MULTI_DATAMODEL
27550 	switch (ddi_model_convert_from(flag & FMODELS)) {
27551 	case DDI_MODEL_ILP32:
27552 		if (ddi_copyin(data, cdrd32, sizeof (*cdrd32), flag) != 0) {
27553 			return (EFAULT);
27554 		}
27555 		/* Convert the ILP32 uscsi data from the application to LP64 */
27556 		cdrom_read32tocdrom_read(cdrd32, mode1);
27557 		break;
27558 	case DDI_MODEL_NONE:
27559 		if (ddi_copyin(data, mode1, sizeof (struct cdrom_read), flag)) {
27560 			return (EFAULT);
27561 		}
27562 	}
27563 #else /* ! _MULTI_DATAMODEL */
27564 	if (ddi_copyin(data, mode1, sizeof (struct cdrom_read), flag)) {
27565 		return (EFAULT);
27566 	}
27567 #endif /* _MULTI_DATAMODEL */
27568 
27569 	ssc = sd_ssc_init(un);
27570 	rval = sd_send_scsi_READ(ssc, mode1->cdread_bufaddr,
27571 	    mode1->cdread_buflen, mode1->cdread_lba, SD_PATH_STANDARD);
27572 	sd_ssc_fini(ssc);
27573 
27574 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
27575 	    "sd_read_mode1: exit: un:0x%p\n", un);
27576 
27577 	return (rval);
27578 }
27579 
27580 
27581 /*
27582  *    Function: sr_read_cd_mode2()
27583  *
27584  * Description: This routine is the driver entry point for handling CD-ROM
27585  *		ioctl read mode2 requests (CDROMREADMODE2) for devices that
27586  *		support the READ CD (0xBE) command or the 1st generation
27587  *		READ CD (0xD4) command.
27588  *
27589  *   Arguments: dev	- the device 'dev_t'
27590  *		data	- pointer to user provided cd read structure specifying
27591  *			  the lba buffer address and length.
27592  *		flag	- this argument is a pass through to ddi_copyxxx()
27593  *			  directly from the mode argument of ioctl().
27594  *
27595  * Return Code: the code returned by sd_send_scsi_cmd()
27596  *		EFAULT if ddi_copyxxx() fails
27597  *		ENXIO if fail ddi_get_soft_state
27598  *		EINVAL if data pointer is NULL
27599  */
27600 
27601 static int
27602 sr_read_cd_mode2(dev_t dev, caddr_t data, int flag)
27603 {
27604 	struct sd_lun		*un;
27605 	struct uscsi_cmd	*com;
27606 	struct cdrom_read	mode2_struct;
27607 	struct cdrom_read	*mode2 = &mode2_struct;
27608 	uchar_t			cdb[CDB_GROUP5];
27609 	int			nblocks;
27610 	int			rval;
27611 #ifdef _MULTI_DATAMODEL
27612 	/*  To support ILP32 applications in an LP64 world */
27613 	struct cdrom_read32	cdrom_read32;
27614 	struct cdrom_read32	*cdrd32 = &cdrom_read32;
27615 #endif /* _MULTI_DATAMODEL */
27616 
27617 	if (data == NULL) {
27618 		return (EINVAL);
27619 	}
27620 
27621 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
27622 	    (un->un_state == SD_STATE_OFFLINE)) {
27623 		return (ENXIO);
27624 	}
27625 
27626 #ifdef _MULTI_DATAMODEL
27627 	switch (ddi_model_convert_from(flag & FMODELS)) {
27628 	case DDI_MODEL_ILP32:
27629 		if (ddi_copyin(data, cdrd32, sizeof (*cdrd32), flag) != 0) {
27630 			return (EFAULT);
27631 		}
27632 		/* Convert the ILP32 uscsi data from the application to LP64 */
27633 		cdrom_read32tocdrom_read(cdrd32, mode2);
27634 		break;
27635 	case DDI_MODEL_NONE:
27636 		if (ddi_copyin(data, mode2, sizeof (*mode2), flag) != 0) {
27637 			return (EFAULT);
27638 		}
27639 		break;
27640 	}
27641 
27642 #else /* ! _MULTI_DATAMODEL */
27643 	if (ddi_copyin(data, mode2, sizeof (*mode2), flag) != 0) {
27644 		return (EFAULT);
27645 	}
27646 #endif /* _MULTI_DATAMODEL */
27647 
27648 	bzero(cdb, sizeof (cdb));
27649 	if (un->un_f_cfg_read_cd_xd4 == TRUE) {
27650 		/* Read command supported by 1st generation atapi drives */
27651 		cdb[0] = SCMD_READ_CDD4;
27652 	} else {
27653 		/* Universal CD Access Command */
27654 		cdb[0] = SCMD_READ_CD;
27655 	}
27656 
27657 	/*
27658 	 * Set expected sector type to: 2336s byte, Mode 2 Yellow Book
27659 	 */
27660 	cdb[1] = CDROM_SECTOR_TYPE_MODE2;
27661 
27662 	/* set the start address */
27663 	cdb[2] = (uchar_t)((mode2->cdread_lba >> 24) & 0XFF);
27664 	cdb[3] = (uchar_t)((mode2->cdread_lba >> 16) & 0XFF);
27665 	cdb[4] = (uchar_t)((mode2->cdread_lba >> 8) & 0xFF);
27666 	cdb[5] = (uchar_t)(mode2->cdread_lba & 0xFF);
27667 
27668 	/* set the transfer length */
27669 	nblocks = mode2->cdread_buflen / 2336;
27670 	cdb[6] = (uchar_t)(nblocks >> 16);
27671 	cdb[7] = (uchar_t)(nblocks >> 8);
27672 	cdb[8] = (uchar_t)nblocks;
27673 
27674 	/* set the filter bits */
27675 	cdb[9] = CDROM_READ_CD_USERDATA;
27676 
27677 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
27678 	com->uscsi_cdb = (caddr_t)cdb;
27679 	com->uscsi_cdblen = sizeof (cdb);
27680 	com->uscsi_bufaddr = mode2->cdread_bufaddr;
27681 	com->uscsi_buflen = mode2->cdread_buflen;
27682 	com->uscsi_flags = USCSI_DIAGNOSE | USCSI_SILENT | USCSI_READ;
27683 
27684 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_USERSPACE,
27685 	    SD_PATH_STANDARD);
27686 	kmem_free(com, sizeof (*com));
27687 	return (rval);
27688 }
27689 
27690 
27691 /*
27692  *    Function: sr_read_mode2()
27693  *
27694  * Description: This routine is the driver entry point for handling CD-ROM
27695  *		ioctl read mode2 requests (CDROMREADMODE2) for devices that
27696  *		do not support the READ CD (0xBE) command.
27697  *
27698  *   Arguments: dev	- the device 'dev_t'
27699  *		data	- pointer to user provided cd read structure specifying
27700  *			  the lba buffer address and length.
27701  *		flag	- this argument is a pass through to ddi_copyxxx()
27702  *			  directly from the mode argument of ioctl().
27703  *
27704  * Return Code: the code returned by sd_send_scsi_cmd()
27705  *		EFAULT if ddi_copyxxx() fails
27706  *		ENXIO if fail ddi_get_soft_state
27707  *		EINVAL if data pointer is NULL
27708  *		EIO if fail to reset block size
27709  *		EAGAIN if commands are in progress in the driver
27710  */
27711 
27712 static int
27713 sr_read_mode2(dev_t dev, caddr_t data, int flag)
27714 {
27715 	struct sd_lun		*un;
27716 	struct cdrom_read	mode2_struct;
27717 	struct cdrom_read	*mode2 = &mode2_struct;
27718 	int			rval;
27719 	uint32_t		restore_blksize;
27720 	struct uscsi_cmd	*com;
27721 	uchar_t			cdb[CDB_GROUP0];
27722 	int			nblocks;
27723 
27724 #ifdef _MULTI_DATAMODEL
27725 	/* To support ILP32 applications in an LP64 world */
27726 	struct cdrom_read32	cdrom_read32;
27727 	struct cdrom_read32	*cdrd32 = &cdrom_read32;
27728 #endif /* _MULTI_DATAMODEL */
27729 
27730 	if (data == NULL) {
27731 		return (EINVAL);
27732 	}
27733 
27734 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
27735 	    (un->un_state == SD_STATE_OFFLINE)) {
27736 		return (ENXIO);
27737 	}
27738 
27739 	/*
27740 	 * Because this routine will update the device and driver block size
27741 	 * being used we want to make sure there are no commands in progress.
27742 	 * If commands are in progress the user will have to try again.
27743 	 *
27744 	 * We check for 1 instead of 0 because we increment un_ncmds_in_driver
27745 	 * in sdioctl to protect commands from sdioctl through to the top of
27746 	 * sd_uscsi_strategy. See sdioctl for details.
27747 	 */
27748 	mutex_enter(SD_MUTEX(un));
27749 	if (un->un_ncmds_in_driver != 1) {
27750 		mutex_exit(SD_MUTEX(un));
27751 		return (EAGAIN);
27752 	}
27753 	mutex_exit(SD_MUTEX(un));
27754 
27755 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
27756 	    "sd_read_mode2: entry: un:0x%p\n", un);
27757 
27758 #ifdef _MULTI_DATAMODEL
27759 	switch (ddi_model_convert_from(flag & FMODELS)) {
27760 	case DDI_MODEL_ILP32:
27761 		if (ddi_copyin(data, cdrd32, sizeof (*cdrd32), flag) != 0) {
27762 			return (EFAULT);
27763 		}
27764 		/* Convert the ILP32 uscsi data from the application to LP64 */
27765 		cdrom_read32tocdrom_read(cdrd32, mode2);
27766 		break;
27767 	case DDI_MODEL_NONE:
27768 		if (ddi_copyin(data, mode2, sizeof (*mode2), flag) != 0) {
27769 			return (EFAULT);
27770 		}
27771 		break;
27772 	}
27773 #else /* ! _MULTI_DATAMODEL */
27774 	if (ddi_copyin(data, mode2, sizeof (*mode2), flag)) {
27775 		return (EFAULT);
27776 	}
27777 #endif /* _MULTI_DATAMODEL */
27778 
27779 	/* Store the current target block size for restoration later */
27780 	restore_blksize = un->un_tgt_blocksize;
27781 
27782 	/* Change the device and soft state target block size to 2336 */
27783 	if (sr_sector_mode(dev, SD_MODE2_BLKSIZE) != 0) {
27784 		rval = EIO;
27785 		goto done;
27786 	}
27787 
27788 
27789 	bzero(cdb, sizeof (cdb));
27790 
27791 	/* set READ operation */
27792 	cdb[0] = SCMD_READ;
27793 
27794 	/* adjust lba for 2kbyte blocks from 512 byte blocks */
27795 	mode2->cdread_lba >>= 2;
27796 
27797 	/* set the start address */
27798 	cdb[1] = (uchar_t)((mode2->cdread_lba >> 16) & 0X1F);
27799 	cdb[2] = (uchar_t)((mode2->cdread_lba >> 8) & 0xFF);
27800 	cdb[3] = (uchar_t)(mode2->cdread_lba & 0xFF);
27801 
27802 	/* set the transfer length */
27803 	nblocks = mode2->cdread_buflen / 2336;
27804 	cdb[4] = (uchar_t)nblocks & 0xFF;
27805 
27806 	/* build command */
27807 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
27808 	com->uscsi_cdb = (caddr_t)cdb;
27809 	com->uscsi_cdblen = sizeof (cdb);
27810 	com->uscsi_bufaddr = mode2->cdread_bufaddr;
27811 	com->uscsi_buflen = mode2->cdread_buflen;
27812 	com->uscsi_flags = USCSI_DIAGNOSE | USCSI_SILENT | USCSI_READ;
27813 
27814 	/*
27815 	 * Issue SCSI command with user space address for read buffer.
27816 	 *
27817 	 * This sends the command through main channel in the driver.
27818 	 *
27819 	 * Since this is accessed via an IOCTL call, we go through the
27820 	 * standard path, so that if the device was powered down, then
27821 	 * it would be 'awakened' to handle the command.
27822 	 */
27823 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_USERSPACE,
27824 	    SD_PATH_STANDARD);
27825 
27826 	kmem_free(com, sizeof (*com));
27827 
27828 	/* Restore the device and soft state target block size */
27829 	if (sr_sector_mode(dev, restore_blksize) != 0) {
27830 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27831 		    "can't do switch back to mode 1\n");
27832 		/*
27833 		 * If sd_send_scsi_READ succeeded we still need to report
27834 		 * an error because we failed to reset the block size
27835 		 */
27836 		if (rval == 0) {
27837 			rval = EIO;
27838 		}
27839 	}
27840 
27841 done:
27842 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
27843 	    "sd_read_mode2: exit: un:0x%p\n", un);
27844 
27845 	return (rval);
27846 }
27847 
27848 
27849 /*
27850  *    Function: sr_sector_mode()
27851  *
27852  * Description: This utility function is used by sr_read_mode2 to set the target
27853  *		block size based on the user specified size. This is a legacy
27854  *		implementation based upon a vendor specific mode page
27855  *
27856  *   Arguments: dev	- the device 'dev_t'
27857  *		data	- flag indicating if block size is being set to 2336 or
27858  *			  512.
27859  *
27860  * Return Code: the code returned by sd_send_scsi_cmd()
27861  *		EFAULT if ddi_copyxxx() fails
27862  *		ENXIO if fail ddi_get_soft_state
27863  *		EINVAL if data pointer is NULL
27864  */
27865 
27866 static int
27867 sr_sector_mode(dev_t dev, uint32_t blksize)
27868 {
27869 	struct sd_lun	*un;
27870 	uchar_t		*sense;
27871 	uchar_t		*select;
27872 	int		rval;
27873 	sd_ssc_t	*ssc;
27874 
27875 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
27876 	    (un->un_state == SD_STATE_OFFLINE)) {
27877 		return (ENXIO);
27878 	}
27879 
27880 	sense = kmem_zalloc(20, KM_SLEEP);
27881 
27882 	/* Note: This is a vendor specific mode page (0x81) */
27883 	ssc = sd_ssc_init(un);
27884 	rval = sd_send_scsi_MODE_SENSE(ssc, CDB_GROUP0, sense, 20, 0x81,
27885 	    SD_PATH_STANDARD);
27886 	sd_ssc_fini(ssc);
27887 	if (rval != 0) {
27888 		SD_ERROR(SD_LOG_IOCTL_RMMEDIA, un,
27889 		    "sr_sector_mode: Mode Sense failed\n");
27890 		kmem_free(sense, 20);
27891 		return (rval);
27892 	}
27893 	select = kmem_zalloc(20, KM_SLEEP);
27894 	select[3] = 0x08;
27895 	select[10] = ((blksize >> 8) & 0xff);
27896 	select[11] = (blksize & 0xff);
27897 	select[12] = 0x01;
27898 	select[13] = 0x06;
27899 	select[14] = sense[14];
27900 	select[15] = sense[15];
27901 	if (blksize == SD_MODE2_BLKSIZE) {
27902 		select[14] |= 0x01;
27903 	}
27904 
27905 	ssc = sd_ssc_init(un);
27906 	rval = sd_send_scsi_MODE_SELECT(ssc, CDB_GROUP0, select, 20,
27907 	    SD_DONTSAVE_PAGE, SD_PATH_STANDARD);
27908 	sd_ssc_fini(ssc);
27909 	if (rval != 0) {
27910 		SD_ERROR(SD_LOG_IOCTL_RMMEDIA, un,
27911 		    "sr_sector_mode: Mode Select failed\n");
27912 	} else {
27913 		/*
27914 		 * Only update the softstate block size if we successfully
27915 		 * changed the device block mode.
27916 		 */
27917 		mutex_enter(SD_MUTEX(un));
27918 		sd_update_block_info(un, blksize, 0);
27919 		mutex_exit(SD_MUTEX(un));
27920 	}
27921 	kmem_free(sense, 20);
27922 	kmem_free(select, 20);
27923 	return (rval);
27924 }
27925 
27926 
27927 /*
27928  *    Function: sr_read_cdda()
27929  *
27930  * Description: This routine is the driver entry point for handling CD-ROM
27931  *		ioctl requests to return CD-DA or subcode data. (CDROMCDDA) If
27932  *		the target supports CDDA these requests are handled via a vendor
27933  *		specific command (0xD8) If the target does not support CDDA
27934  *		these requests are handled via the READ CD command (0xBE).
27935  *
27936  *   Arguments: dev	- the device 'dev_t'
27937  *		data	- pointer to user provided CD-DA structure specifying
27938  *			  the track starting address, transfer length, and
27939  *			  subcode options.
27940  *		flag	- this argument is a pass through to ddi_copyxxx()
27941  *			  directly from the mode argument of ioctl().
27942  *
27943  * Return Code: the code returned by sd_send_scsi_cmd()
27944  *		EFAULT if ddi_copyxxx() fails
27945  *		ENXIO if fail ddi_get_soft_state
27946  *		EINVAL if invalid arguments are provided
27947  *		ENOTTY
27948  */
27949 
27950 static int
27951 sr_read_cdda(dev_t dev, caddr_t data, int flag)
27952 {
27953 	struct sd_lun			*un;
27954 	struct uscsi_cmd		*com;
27955 	struct cdrom_cdda		*cdda;
27956 	int				rval;
27957 	size_t				buflen;
27958 	char				cdb[CDB_GROUP5];
27959 
27960 #ifdef _MULTI_DATAMODEL
27961 	/* To support ILP32 applications in an LP64 world */
27962 	struct cdrom_cdda32	cdrom_cdda32;
27963 	struct cdrom_cdda32	*cdda32 = &cdrom_cdda32;
27964 #endif /* _MULTI_DATAMODEL */
27965 
27966 	if (data == NULL) {
27967 		return (EINVAL);
27968 	}
27969 
27970 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
27971 		return (ENXIO);
27972 	}
27973 
27974 	cdda = kmem_zalloc(sizeof (struct cdrom_cdda), KM_SLEEP);
27975 
27976 #ifdef _MULTI_DATAMODEL
27977 	switch (ddi_model_convert_from(flag & FMODELS)) {
27978 	case DDI_MODEL_ILP32:
27979 		if (ddi_copyin(data, cdda32, sizeof (*cdda32), flag)) {
27980 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27981 			    "sr_read_cdda: ddi_copyin Failed\n");
27982 			kmem_free(cdda, sizeof (struct cdrom_cdda));
27983 			return (EFAULT);
27984 		}
27985 		/* Convert the ILP32 uscsi data from the application to LP64 */
27986 		cdrom_cdda32tocdrom_cdda(cdda32, cdda);
27987 		break;
27988 	case DDI_MODEL_NONE:
27989 		if (ddi_copyin(data, cdda, sizeof (struct cdrom_cdda), flag)) {
27990 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27991 			    "sr_read_cdda: ddi_copyin Failed\n");
27992 			kmem_free(cdda, sizeof (struct cdrom_cdda));
27993 			return (EFAULT);
27994 		}
27995 		break;
27996 	}
27997 #else /* ! _MULTI_DATAMODEL */
27998 	if (ddi_copyin(data, cdda, sizeof (struct cdrom_cdda), flag)) {
27999 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
28000 		    "sr_read_cdda: ddi_copyin Failed\n");
28001 		kmem_free(cdda, sizeof (struct cdrom_cdda));
28002 		return (EFAULT);
28003 	}
28004 #endif /* _MULTI_DATAMODEL */
28005 
28006 	/*
28007 	 * Since MMC-2 expects max 3 bytes for length, check if the
28008 	 * length input is greater than 3 bytes
28009 	 */
28010 	if ((cdda->cdda_length & 0xFF000000) != 0) {
28011 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN, "sr_read_cdda: "
28012 		    "cdrom transfer length too large: %d (limit %d)\n",
28013 		    cdda->cdda_length, 0xFFFFFF);
28014 		kmem_free(cdda, sizeof (struct cdrom_cdda));
28015 		return (EINVAL);
28016 	}
28017 
28018 	switch (cdda->cdda_subcode) {
28019 	case CDROM_DA_NO_SUBCODE:
28020 		buflen = CDROM_BLK_2352 * cdda->cdda_length;
28021 		break;
28022 	case CDROM_DA_SUBQ:
28023 		buflen = CDROM_BLK_2368 * cdda->cdda_length;
28024 		break;
28025 	case CDROM_DA_ALL_SUBCODE:
28026 		buflen = CDROM_BLK_2448 * cdda->cdda_length;
28027 		break;
28028 	case CDROM_DA_SUBCODE_ONLY:
28029 		buflen = CDROM_BLK_SUBCODE * cdda->cdda_length;
28030 		break;
28031 	default:
28032 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
28033 		    "sr_read_cdda: Subcode '0x%x' Not Supported\n",
28034 		    cdda->cdda_subcode);
28035 		kmem_free(cdda, sizeof (struct cdrom_cdda));
28036 		return (EINVAL);
28037 	}
28038 
28039 	/* Build and send the command */
28040 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
28041 	bzero(cdb, CDB_GROUP5);
28042 
28043 	if (un->un_f_cfg_cdda == TRUE) {
28044 		cdb[0] = (char)SCMD_READ_CD;
28045 		cdb[1] = 0x04;
28046 		cdb[2] = (((cdda->cdda_addr) & 0xff000000) >> 24);
28047 		cdb[3] = (((cdda->cdda_addr) & 0x00ff0000) >> 16);
28048 		cdb[4] = (((cdda->cdda_addr) & 0x0000ff00) >> 8);
28049 		cdb[5] = ((cdda->cdda_addr) & 0x000000ff);
28050 		cdb[6] = (((cdda->cdda_length) & 0x00ff0000) >> 16);
28051 		cdb[7] = (((cdda->cdda_length) & 0x0000ff00) >> 8);
28052 		cdb[8] = ((cdda->cdda_length) & 0x000000ff);
28053 		cdb[9] = 0x10;
28054 		switch (cdda->cdda_subcode) {
28055 		case CDROM_DA_NO_SUBCODE :
28056 			cdb[10] = 0x0;
28057 			break;
28058 		case CDROM_DA_SUBQ :
28059 			cdb[10] = 0x2;
28060 			break;
28061 		case CDROM_DA_ALL_SUBCODE :
28062 			cdb[10] = 0x1;
28063 			break;
28064 		case CDROM_DA_SUBCODE_ONLY :
28065 			/* FALLTHROUGH */
28066 		default :
28067 			kmem_free(cdda, sizeof (struct cdrom_cdda));
28068 			kmem_free(com, sizeof (*com));
28069 			return (ENOTTY);
28070 		}
28071 	} else {
28072 		cdb[0] = (char)SCMD_READ_CDDA;
28073 		cdb[2] = (((cdda->cdda_addr) & 0xff000000) >> 24);
28074 		cdb[3] = (((cdda->cdda_addr) & 0x00ff0000) >> 16);
28075 		cdb[4] = (((cdda->cdda_addr) & 0x0000ff00) >> 8);
28076 		cdb[5] = ((cdda->cdda_addr) & 0x000000ff);
28077 		cdb[6] = (((cdda->cdda_length) & 0xff000000) >> 24);
28078 		cdb[7] = (((cdda->cdda_length) & 0x00ff0000) >> 16);
28079 		cdb[8] = (((cdda->cdda_length) & 0x0000ff00) >> 8);
28080 		cdb[9] = ((cdda->cdda_length) & 0x000000ff);
28081 		cdb[10] = cdda->cdda_subcode;
28082 	}
28083 
28084 	com->uscsi_cdb = cdb;
28085 	com->uscsi_cdblen = CDB_GROUP5;
28086 	com->uscsi_bufaddr = (caddr_t)cdda->cdda_data;
28087 	com->uscsi_buflen = buflen;
28088 	com->uscsi_flags = USCSI_DIAGNOSE | USCSI_SILENT | USCSI_READ;
28089 
28090 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_USERSPACE,
28091 	    SD_PATH_STANDARD);
28092 
28093 	kmem_free(cdda, sizeof (struct cdrom_cdda));
28094 	kmem_free(com, sizeof (*com));
28095 	return (rval);
28096 }
28097 
28098 
28099 /*
28100  *    Function: sr_read_cdxa()
28101  *
28102  * Description: This routine is the driver entry point for handling CD-ROM
28103  *		ioctl requests to return CD-XA (Extended Architecture) data.
28104  *		(CDROMCDXA).
28105  *
28106  *   Arguments: dev	- the device 'dev_t'
28107  *		data	- pointer to user provided CD-XA structure specifying
28108  *			  the data starting address, transfer length, and format
28109  *		flag	- this argument is a pass through to ddi_copyxxx()
28110  *			  directly from the mode argument of ioctl().
28111  *
28112  * Return Code: the code returned by sd_send_scsi_cmd()
28113  *		EFAULT if ddi_copyxxx() fails
28114  *		ENXIO if fail ddi_get_soft_state
28115  *		EINVAL if data pointer is NULL
28116  */
28117 
28118 static int
28119 sr_read_cdxa(dev_t dev, caddr_t data, int flag)
28120 {
28121 	struct sd_lun		*un;
28122 	struct uscsi_cmd	*com;
28123 	struct cdrom_cdxa	*cdxa;
28124 	int			rval;
28125 	size_t			buflen;
28126 	char			cdb[CDB_GROUP5];
28127 	uchar_t			read_flags;
28128 
28129 #ifdef _MULTI_DATAMODEL
28130 	/* To support ILP32 applications in an LP64 world */
28131 	struct cdrom_cdxa32		cdrom_cdxa32;
28132 	struct cdrom_cdxa32		*cdxa32 = &cdrom_cdxa32;
28133 #endif /* _MULTI_DATAMODEL */
28134 
28135 	if (data == NULL) {
28136 		return (EINVAL);
28137 	}
28138 
28139 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
28140 		return (ENXIO);
28141 	}
28142 
28143 	cdxa = kmem_zalloc(sizeof (struct cdrom_cdxa), KM_SLEEP);
28144 
28145 #ifdef _MULTI_DATAMODEL
28146 	switch (ddi_model_convert_from(flag & FMODELS)) {
28147 	case DDI_MODEL_ILP32:
28148 		if (ddi_copyin(data, cdxa32, sizeof (*cdxa32), flag)) {
28149 			kmem_free(cdxa, sizeof (struct cdrom_cdxa));
28150 			return (EFAULT);
28151 		}
28152 		/*
28153 		 * Convert the ILP32 uscsi data from the
28154 		 * application to LP64 for internal use.
28155 		 */
28156 		cdrom_cdxa32tocdrom_cdxa(cdxa32, cdxa);
28157 		break;
28158 	case DDI_MODEL_NONE:
28159 		if (ddi_copyin(data, cdxa, sizeof (struct cdrom_cdxa), flag)) {
28160 			kmem_free(cdxa, sizeof (struct cdrom_cdxa));
28161 			return (EFAULT);
28162 		}
28163 		break;
28164 	}
28165 #else /* ! _MULTI_DATAMODEL */
28166 	if (ddi_copyin(data, cdxa, sizeof (struct cdrom_cdxa), flag)) {
28167 		kmem_free(cdxa, sizeof (struct cdrom_cdxa));
28168 		return (EFAULT);
28169 	}
28170 #endif /* _MULTI_DATAMODEL */
28171 
28172 	/*
28173 	 * Since MMC-2 expects max 3 bytes for length, check if the
28174 	 * length input is greater than 3 bytes
28175 	 */
28176 	if ((cdxa->cdxa_length & 0xFF000000) != 0) {
28177 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN, "sr_read_cdxa: "
28178 		    "cdrom transfer length too large: %d (limit %d)\n",
28179 		    cdxa->cdxa_length, 0xFFFFFF);
28180 		kmem_free(cdxa, sizeof (struct cdrom_cdxa));
28181 		return (EINVAL);
28182 	}
28183 
28184 	switch (cdxa->cdxa_format) {
28185 	case CDROM_XA_DATA:
28186 		buflen = CDROM_BLK_2048 * cdxa->cdxa_length;
28187 		read_flags = 0x10;
28188 		break;
28189 	case CDROM_XA_SECTOR_DATA:
28190 		buflen = CDROM_BLK_2352 * cdxa->cdxa_length;
28191 		read_flags = 0xf8;
28192 		break;
28193 	case CDROM_XA_DATA_W_ERROR:
28194 		buflen = CDROM_BLK_2646 * cdxa->cdxa_length;
28195 		read_flags = 0xfc;
28196 		break;
28197 	default:
28198 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
28199 		    "sr_read_cdxa: Format '0x%x' Not Supported\n",
28200 		    cdxa->cdxa_format);
28201 		kmem_free(cdxa, sizeof (struct cdrom_cdxa));
28202 		return (EINVAL);
28203 	}
28204 
28205 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
28206 	bzero(cdb, CDB_GROUP5);
28207 	if (un->un_f_mmc_cap == TRUE) {
28208 		cdb[0] = (char)SCMD_READ_CD;
28209 		cdb[2] = (((cdxa->cdxa_addr) & 0xff000000) >> 24);
28210 		cdb[3] = (((cdxa->cdxa_addr) & 0x00ff0000) >> 16);
28211 		cdb[4] = (((cdxa->cdxa_addr) & 0x0000ff00) >> 8);
28212 		cdb[5] = ((cdxa->cdxa_addr) & 0x000000ff);
28213 		cdb[6] = (((cdxa->cdxa_length) & 0x00ff0000) >> 16);
28214 		cdb[7] = (((cdxa->cdxa_length) & 0x0000ff00) >> 8);
28215 		cdb[8] = ((cdxa->cdxa_length) & 0x000000ff);
28216 		cdb[9] = (char)read_flags;
28217 	} else {
28218 		/*
28219 		 * Note: A vendor specific command (0xDB) is being used her to
28220 		 * request a read of all subcodes.
28221 		 */
28222 		cdb[0] = (char)SCMD_READ_CDXA;
28223 		cdb[2] = (((cdxa->cdxa_addr) & 0xff000000) >> 24);
28224 		cdb[3] = (((cdxa->cdxa_addr) & 0x00ff0000) >> 16);
28225 		cdb[4] = (((cdxa->cdxa_addr) & 0x0000ff00) >> 8);
28226 		cdb[5] = ((cdxa->cdxa_addr) & 0x000000ff);
28227 		cdb[6] = (((cdxa->cdxa_length) & 0xff000000) >> 24);
28228 		cdb[7] = (((cdxa->cdxa_length) & 0x00ff0000) >> 16);
28229 		cdb[8] = (((cdxa->cdxa_length) & 0x0000ff00) >> 8);
28230 		cdb[9] = ((cdxa->cdxa_length) & 0x000000ff);
28231 		cdb[10] = cdxa->cdxa_format;
28232 	}
28233 	com->uscsi_cdb	   = cdb;
28234 	com->uscsi_cdblen  = CDB_GROUP5;
28235 	com->uscsi_bufaddr = (caddr_t)cdxa->cdxa_data;
28236 	com->uscsi_buflen  = buflen;
28237 	com->uscsi_flags   = USCSI_DIAGNOSE | USCSI_SILENT | USCSI_READ;
28238 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_USERSPACE,
28239 	    SD_PATH_STANDARD);
28240 	kmem_free(cdxa, sizeof (struct cdrom_cdxa));
28241 	kmem_free(com, sizeof (*com));
28242 	return (rval);
28243 }
28244 
28245 
28246 /*
28247  *    Function: sr_eject()
28248  *
28249  * Description: This routine is the driver entry point for handling CD-ROM
28250  *		eject ioctl requests (FDEJECT, DKIOCEJECT, CDROMEJECT)
28251  *
28252  *   Arguments: dev	- the device 'dev_t'
28253  *
28254  * Return Code: the code returned by sd_send_scsi_cmd()
28255  */
28256 
28257 static int
28258 sr_eject(dev_t dev)
28259 {
28260 	struct sd_lun	*un;
28261 	int		rval;
28262 	sd_ssc_t	*ssc;
28263 
28264 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
28265 	    (un->un_state == SD_STATE_OFFLINE)) {
28266 		return (ENXIO);
28267 	}
28268 
28269 	/*
28270 	 * To prevent race conditions with the eject
28271 	 * command, keep track of an eject command as
28272 	 * it progresses. If we are already handling
28273 	 * an eject command in the driver for the given
28274 	 * unit and another request to eject is received
28275 	 * immediately return EAGAIN so we don't lose
28276 	 * the command if the current eject command fails.
28277 	 */
28278 	mutex_enter(SD_MUTEX(un));
28279 	if (un->un_f_ejecting == TRUE) {
28280 		mutex_exit(SD_MUTEX(un));
28281 		return (EAGAIN);
28282 	}
28283 	un->un_f_ejecting = TRUE;
28284 	mutex_exit(SD_MUTEX(un));
28285 
28286 	ssc = sd_ssc_init(un);
28287 	rval = sd_send_scsi_DOORLOCK(ssc, SD_REMOVAL_ALLOW,
28288 	    SD_PATH_STANDARD);
28289 	sd_ssc_fini(ssc);
28290 
28291 	if (rval != 0) {
28292 		mutex_enter(SD_MUTEX(un));
28293 		un->un_f_ejecting = FALSE;
28294 		mutex_exit(SD_MUTEX(un));
28295 		return (rval);
28296 	}
28297 
28298 	ssc = sd_ssc_init(un);
28299 	rval = sd_send_scsi_START_STOP_UNIT(ssc, SD_START_STOP,
28300 	    SD_TARGET_EJECT, SD_PATH_STANDARD);
28301 	sd_ssc_fini(ssc);
28302 
28303 	if (rval == 0) {
28304 		mutex_enter(SD_MUTEX(un));
28305 		sr_ejected(un);
28306 		un->un_mediastate = DKIO_EJECTED;
28307 		un->un_f_ejecting = FALSE;
28308 		cv_broadcast(&un->un_state_cv);
28309 		mutex_exit(SD_MUTEX(un));
28310 	} else {
28311 		mutex_enter(SD_MUTEX(un));
28312 		un->un_f_ejecting = FALSE;
28313 		mutex_exit(SD_MUTEX(un));
28314 	}
28315 	return (rval);
28316 }
28317 
28318 
28319 /*
28320  *    Function: sr_ejected()
28321  *
28322  * Description: This routine updates the soft state structure to invalidate the
28323  *		geometry information after the media has been ejected or a
28324  *		media eject has been detected.
28325  *
28326  *   Arguments: un - driver soft state (unit) structure
28327  */
28328 
28329 static void
28330 sr_ejected(struct sd_lun *un)
28331 {
28332 	struct sd_errstats *stp;
28333 
28334 	ASSERT(un != NULL);
28335 	ASSERT(mutex_owned(SD_MUTEX(un)));
28336 
28337 	un->un_f_blockcount_is_valid	= FALSE;
28338 	un->un_f_tgt_blocksize_is_valid	= FALSE;
28339 	mutex_exit(SD_MUTEX(un));
28340 	cmlb_invalidate(un->un_cmlbhandle, (void *)SD_PATH_DIRECT_PRIORITY);
28341 	mutex_enter(SD_MUTEX(un));
28342 
28343 	if (un->un_errstats != NULL) {
28344 		stp = (struct sd_errstats *)un->un_errstats->ks_data;
28345 		stp->sd_capacity.value.ui64 = 0;
28346 	}
28347 }
28348 
28349 
28350 /*
28351  *    Function: sr_check_wp()
28352  *
28353  * Description: This routine checks the write protection of a removable
28354  *      media disk and hotpluggable devices via the write protect bit of
28355  *      the Mode Page Header device specific field. Some devices choke
28356  *      on unsupported mode page. In order to workaround this issue,
28357  *      this routine has been implemented to use 0x3f mode page(request
28358  *      for all pages) for all device types.
28359  *
28360  *   Arguments: dev             - the device 'dev_t'
28361  *
28362  * Return Code: int indicating if the device is write protected (1) or not (0)
28363  *
28364  *     Context: Kernel thread.
28365  *
28366  */
28367 
28368 static int
28369 sr_check_wp(dev_t dev)
28370 {
28371 	struct sd_lun	*un;
28372 	uchar_t		device_specific;
28373 	uchar_t		*sense;
28374 	int		hdrlen;
28375 	int		rval = FALSE;
28376 	int		status;
28377 	sd_ssc_t	*ssc;
28378 
28379 	/*
28380 	 * Note: The return codes for this routine should be reworked to
28381 	 * properly handle the case of a NULL softstate.
28382 	 */
28383 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
28384 		return (FALSE);
28385 	}
28386 
28387 	if (un->un_f_cfg_is_atapi == TRUE) {
28388 		/*
28389 		 * The mode page contents are not required; set the allocation
28390 		 * length for the mode page header only
28391 		 */
28392 		hdrlen = MODE_HEADER_LENGTH_GRP2;
28393 		sense = kmem_zalloc(hdrlen, KM_SLEEP);
28394 		ssc = sd_ssc_init(un);
28395 		status = sd_send_scsi_MODE_SENSE(ssc, CDB_GROUP1, sense, hdrlen,
28396 		    MODEPAGE_ALLPAGES, SD_PATH_STANDARD);
28397 		sd_ssc_fini(ssc);
28398 		if (status != 0)
28399 			goto err_exit;
28400 		device_specific =
28401 		    ((struct mode_header_grp2 *)sense)->device_specific;
28402 	} else {
28403 		hdrlen = MODE_HEADER_LENGTH;
28404 		sense = kmem_zalloc(hdrlen, KM_SLEEP);
28405 		ssc = sd_ssc_init(un);
28406 		status = sd_send_scsi_MODE_SENSE(ssc, CDB_GROUP0, sense, hdrlen,
28407 		    MODEPAGE_ALLPAGES, SD_PATH_STANDARD);
28408 		sd_ssc_fini(ssc);
28409 		if (status != 0)
28410 			goto err_exit;
28411 		device_specific =
28412 		    ((struct mode_header *)sense)->device_specific;
28413 	}
28414 
28415 
28416 	/*
28417 	 * Write protect mode sense failed; not all disks
28418 	 * understand this query. Return FALSE assuming that
28419 	 * these devices are not writable.
28420 	 */
28421 	if (device_specific & WRITE_PROTECT) {
28422 		rval = TRUE;
28423 	}
28424 
28425 err_exit:
28426 	kmem_free(sense, hdrlen);
28427 	return (rval);
28428 }
28429 
28430 /*
28431  *    Function: sr_volume_ctrl()
28432  *
28433  * Description: This routine is the driver entry point for handling CD-ROM
28434  *		audio output volume ioctl requests. (CDROMVOLCTRL)
28435  *
28436  *   Arguments: dev	- the device 'dev_t'
28437  *		data	- pointer to user audio volume control structure
28438  *		flag	- this argument is a pass through to ddi_copyxxx()
28439  *			  directly from the mode argument of ioctl().
28440  *
28441  * Return Code: the code returned by sd_send_scsi_cmd()
28442  *		EFAULT if ddi_copyxxx() fails
28443  *		ENXIO if fail ddi_get_soft_state
28444  *		EINVAL if data pointer is NULL
28445  *
28446  */
28447 
28448 static int
28449 sr_volume_ctrl(dev_t dev, caddr_t data, int flag)
28450 {
28451 	struct sd_lun		*un;
28452 	struct cdrom_volctrl    volume;
28453 	struct cdrom_volctrl    *vol = &volume;
28454 	uchar_t			*sense_page;
28455 	uchar_t			*select_page;
28456 	uchar_t			*sense;
28457 	uchar_t			*select;
28458 	int			sense_buflen;
28459 	int			select_buflen;
28460 	int			rval;
28461 	sd_ssc_t		*ssc;
28462 
28463 	if (data == NULL) {
28464 		return (EINVAL);
28465 	}
28466 
28467 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
28468 	    (un->un_state == SD_STATE_OFFLINE)) {
28469 		return (ENXIO);
28470 	}
28471 
28472 	if (ddi_copyin(data, vol, sizeof (struct cdrom_volctrl), flag)) {
28473 		return (EFAULT);
28474 	}
28475 
28476 	if ((un->un_f_cfg_is_atapi == TRUE) || (un->un_f_mmc_cap == TRUE)) {
28477 		struct mode_header_grp2		*sense_mhp;
28478 		struct mode_header_grp2		*select_mhp;
28479 		int				bd_len;
28480 
28481 		sense_buflen = MODE_PARAM_LENGTH_GRP2 + MODEPAGE_AUDIO_CTRL_LEN;
28482 		select_buflen = MODE_HEADER_LENGTH_GRP2 +
28483 		    MODEPAGE_AUDIO_CTRL_LEN;
28484 		sense  = kmem_zalloc(sense_buflen, KM_SLEEP);
28485 		select = kmem_zalloc(select_buflen, KM_SLEEP);
28486 		ssc = sd_ssc_init(un);
28487 		rval = sd_send_scsi_MODE_SENSE(ssc, CDB_GROUP1, sense,
28488 		    sense_buflen, MODEPAGE_AUDIO_CTRL,
28489 		    SD_PATH_STANDARD);
28490 		sd_ssc_fini(ssc);
28491 
28492 		if (rval != 0) {
28493 			SD_ERROR(SD_LOG_IOCTL_RMMEDIA, un,
28494 			    "sr_volume_ctrl: Mode Sense Failed\n");
28495 			kmem_free(sense, sense_buflen);
28496 			kmem_free(select, select_buflen);
28497 			return (rval);
28498 		}
28499 		sense_mhp = (struct mode_header_grp2 *)sense;
28500 		select_mhp = (struct mode_header_grp2 *)select;
28501 		bd_len = (sense_mhp->bdesc_length_hi << 8) |
28502 		    sense_mhp->bdesc_length_lo;
28503 		if (bd_len > MODE_BLK_DESC_LENGTH) {
28504 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
28505 			    "sr_volume_ctrl: Mode Sense returned invalid "
28506 			    "block descriptor length\n");
28507 			kmem_free(sense, sense_buflen);
28508 			kmem_free(select, select_buflen);
28509 			return (EIO);
28510 		}
28511 		sense_page = (uchar_t *)
28512 		    (sense + MODE_HEADER_LENGTH_GRP2 + bd_len);
28513 		select_page = (uchar_t *)(select + MODE_HEADER_LENGTH_GRP2);
28514 		select_mhp->length_msb = 0;
28515 		select_mhp->length_lsb = 0;
28516 		select_mhp->bdesc_length_hi = 0;
28517 		select_mhp->bdesc_length_lo = 0;
28518 	} else {
28519 		struct mode_header		*sense_mhp, *select_mhp;
28520 
28521 		sense_buflen = MODE_PARAM_LENGTH + MODEPAGE_AUDIO_CTRL_LEN;
28522 		select_buflen = MODE_HEADER_LENGTH + MODEPAGE_AUDIO_CTRL_LEN;
28523 		sense  = kmem_zalloc(sense_buflen, KM_SLEEP);
28524 		select = kmem_zalloc(select_buflen, KM_SLEEP);
28525 		ssc = sd_ssc_init(un);
28526 		rval = sd_send_scsi_MODE_SENSE(ssc, CDB_GROUP0, sense,
28527 		    sense_buflen, MODEPAGE_AUDIO_CTRL,
28528 		    SD_PATH_STANDARD);
28529 		sd_ssc_fini(ssc);
28530 
28531 		if (rval != 0) {
28532 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
28533 			    "sr_volume_ctrl: Mode Sense Failed\n");
28534 			kmem_free(sense, sense_buflen);
28535 			kmem_free(select, select_buflen);
28536 			return (rval);
28537 		}
28538 		sense_mhp  = (struct mode_header *)sense;
28539 		select_mhp = (struct mode_header *)select;
28540 		if (sense_mhp->bdesc_length > MODE_BLK_DESC_LENGTH) {
28541 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
28542 			    "sr_volume_ctrl: Mode Sense returned invalid "
28543 			    "block descriptor length\n");
28544 			kmem_free(sense, sense_buflen);
28545 			kmem_free(select, select_buflen);
28546 			return (EIO);
28547 		}
28548 		sense_page = (uchar_t *)
28549 		    (sense + MODE_HEADER_LENGTH + sense_mhp->bdesc_length);
28550 		select_page = (uchar_t *)(select + MODE_HEADER_LENGTH);
28551 		select_mhp->length = 0;
28552 		select_mhp->bdesc_length = 0;
28553 	}
28554 	/*
28555 	 * Note: An audio control data structure could be created and overlayed
28556 	 * on the following in place of the array indexing method implemented.
28557 	 */
28558 
28559 	/* Build the select data for the user volume data */
28560 	select_page[0] = MODEPAGE_AUDIO_CTRL;
28561 	select_page[1] = 0xE;
28562 	/* Set the immediate bit */
28563 	select_page[2] = 0x04;
28564 	/* Zero out reserved fields */
28565 	select_page[3] = 0x00;
28566 	select_page[4] = 0x00;
28567 	/* Return sense data for fields not to be modified */
28568 	select_page[5] = sense_page[5];
28569 	select_page[6] = sense_page[6];
28570 	select_page[7] = sense_page[7];
28571 	/* Set the user specified volume levels for channel 0 and 1 */
28572 	select_page[8] = 0x01;
28573 	select_page[9] = vol->channel0;
28574 	select_page[10] = 0x02;
28575 	select_page[11] = vol->channel1;
28576 	/* Channel 2 and 3 are currently unsupported so return the sense data */
28577 	select_page[12] = sense_page[12];
28578 	select_page[13] = sense_page[13];
28579 	select_page[14] = sense_page[14];
28580 	select_page[15] = sense_page[15];
28581 
28582 	ssc = sd_ssc_init(un);
28583 	if ((un->un_f_cfg_is_atapi == TRUE) || (un->un_f_mmc_cap == TRUE)) {
28584 		rval = sd_send_scsi_MODE_SELECT(ssc, CDB_GROUP1, select,
28585 		    select_buflen, SD_DONTSAVE_PAGE, SD_PATH_STANDARD);
28586 	} else {
28587 		rval = sd_send_scsi_MODE_SELECT(ssc, CDB_GROUP0, select,
28588 		    select_buflen, SD_DONTSAVE_PAGE, SD_PATH_STANDARD);
28589 	}
28590 	sd_ssc_fini(ssc);
28591 
28592 	kmem_free(sense, sense_buflen);
28593 	kmem_free(select, select_buflen);
28594 	return (rval);
28595 }
28596 
28597 
28598 /*
28599  *    Function: sr_read_sony_session_offset()
28600  *
28601  * Description: This routine is the driver entry point for handling CD-ROM
28602  *		ioctl requests for session offset information. (CDROMREADOFFSET)
28603  *		The address of the first track in the last session of a
28604  *		multi-session CD-ROM is returned
28605  *
28606  *		Note: This routine uses a vendor specific key value in the
28607  *		command control field without implementing any vendor check here
28608  *		or in the ioctl routine.
28609  *
28610  *   Arguments: dev	- the device 'dev_t'
28611  *		data	- pointer to an int to hold the requested address
28612  *		flag	- this argument is a pass through to ddi_copyxxx()
28613  *			  directly from the mode argument of ioctl().
28614  *
28615  * Return Code: the code returned by sd_send_scsi_cmd()
28616  *		EFAULT if ddi_copyxxx() fails
28617  *		ENXIO if fail ddi_get_soft_state
28618  *		EINVAL if data pointer is NULL
28619  */
28620 
28621 static int
28622 sr_read_sony_session_offset(dev_t dev, caddr_t data, int flag)
28623 {
28624 	struct sd_lun		*un;
28625 	struct uscsi_cmd	*com;
28626 	caddr_t			buffer;
28627 	char			cdb[CDB_GROUP1];
28628 	int			session_offset = 0;
28629 	int			rval;
28630 
28631 	if (data == NULL) {
28632 		return (EINVAL);
28633 	}
28634 
28635 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
28636 	    (un->un_state == SD_STATE_OFFLINE)) {
28637 		return (ENXIO);
28638 	}
28639 
28640 	buffer = kmem_zalloc((size_t)SONY_SESSION_OFFSET_LEN, KM_SLEEP);
28641 	bzero(cdb, CDB_GROUP1);
28642 	cdb[0] = SCMD_READ_TOC;
28643 	/*
28644 	 * Bytes 7 & 8 are the 12 byte allocation length for a single entry.
28645 	 * (4 byte TOC response header + 8 byte response data)
28646 	 */
28647 	cdb[8] = SONY_SESSION_OFFSET_LEN;
28648 	/* Byte 9 is the control byte. A vendor specific value is used */
28649 	cdb[9] = SONY_SESSION_OFFSET_KEY;
28650 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
28651 	com->uscsi_cdb = cdb;
28652 	com->uscsi_cdblen = CDB_GROUP1;
28653 	com->uscsi_bufaddr = buffer;
28654 	com->uscsi_buflen = SONY_SESSION_OFFSET_LEN;
28655 	com->uscsi_flags = USCSI_DIAGNOSE | USCSI_SILENT | USCSI_READ;
28656 
28657 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
28658 	    SD_PATH_STANDARD);
28659 	if (rval != 0) {
28660 		kmem_free(buffer, SONY_SESSION_OFFSET_LEN);
28661 		kmem_free(com, sizeof (*com));
28662 		return (rval);
28663 	}
28664 	if (buffer[1] == SONY_SESSION_OFFSET_VALID) {
28665 		session_offset =
28666 		    ((uchar_t)buffer[8] << 24) + ((uchar_t)buffer[9] << 16) +
28667 		    ((uchar_t)buffer[10] << 8) + ((uchar_t)buffer[11]);
28668 		/*
28669 		 * Offset returned offset in current lbasize block's. Convert to
28670 		 * 2k block's to return to the user
28671 		 */
28672 		if (un->un_tgt_blocksize == CDROM_BLK_512) {
28673 			session_offset >>= 2;
28674 		} else if (un->un_tgt_blocksize == CDROM_BLK_1024) {
28675 			session_offset >>= 1;
28676 		}
28677 	}
28678 
28679 	if (ddi_copyout(&session_offset, data, sizeof (int), flag) != 0) {
28680 		rval = EFAULT;
28681 	}
28682 
28683 	kmem_free(buffer, SONY_SESSION_OFFSET_LEN);
28684 	kmem_free(com, sizeof (*com));
28685 	return (rval);
28686 }
28687 
28688 
28689 /*
28690  *    Function: sd_wm_cache_constructor()
28691  *
28692  * Description: Cache Constructor for the wmap cache for the read/modify/write
28693  *		devices.
28694  *
28695  *   Arguments: wm      - A pointer to the sd_w_map to be initialized.
28696  *		un	- sd_lun structure for the device.
28697  *		flag	- the km flags passed to constructor
28698  *
28699  * Return Code: 0 on success.
28700  *		-1 on failure.
28701  */
28702 
28703 /*ARGSUSED*/
28704 static int
28705 sd_wm_cache_constructor(void *wm, void *un, int flags)
28706 {
28707 	bzero(wm, sizeof (struct sd_w_map));
28708 	cv_init(&((struct sd_w_map *)wm)->wm_avail, NULL, CV_DRIVER, NULL);
28709 	return (0);
28710 }
28711 
28712 
28713 /*
28714  *    Function: sd_wm_cache_destructor()
28715  *
28716  * Description: Cache destructor for the wmap cache for the read/modify/write
28717  *		devices.
28718  *
28719  *   Arguments: wm      - A pointer to the sd_w_map to be initialized.
28720  *		un	- sd_lun structure for the device.
28721  */
28722 /*ARGSUSED*/
28723 static void
28724 sd_wm_cache_destructor(void *wm, void *un)
28725 {
28726 	cv_destroy(&((struct sd_w_map *)wm)->wm_avail);
28727 }
28728 
28729 
28730 /*
28731  *    Function: sd_range_lock()
28732  *
28733  * Description: Lock the range of blocks specified as parameter to ensure
28734  *		that read, modify write is atomic and no other i/o writes
28735  *		to the same location. The range is specified in terms
28736  *		of start and end blocks. Block numbers are the actual
28737  *		media block numbers and not system.
28738  *
28739  *   Arguments: un	- sd_lun structure for the device.
28740  *		startb - The starting block number
28741  *		endb - The end block number
28742  *		typ - type of i/o - simple/read_modify_write
28743  *
28744  * Return Code: wm  - pointer to the wmap structure.
28745  *
28746  *     Context: This routine can sleep.
28747  */
28748 
28749 static struct sd_w_map *
28750 sd_range_lock(struct sd_lun *un, daddr_t startb, daddr_t endb, ushort_t typ)
28751 {
28752 	struct sd_w_map *wmp = NULL;
28753 	struct sd_w_map *sl_wmp = NULL;
28754 	struct sd_w_map *tmp_wmp;
28755 	wm_state state = SD_WM_CHK_LIST;
28756 
28757 
28758 	ASSERT(un != NULL);
28759 	ASSERT(!mutex_owned(SD_MUTEX(un)));
28760 
28761 	mutex_enter(SD_MUTEX(un));
28762 
28763 	while (state != SD_WM_DONE) {
28764 
28765 		switch (state) {
28766 		case SD_WM_CHK_LIST:
28767 			/*
28768 			 * This is the starting state. Check the wmap list
28769 			 * to see if the range is currently available.
28770 			 */
28771 			if (!(typ & SD_WTYPE_RMW) && !(un->un_rmw_count)) {
28772 				/*
28773 				 * If this is a simple write and no rmw
28774 				 * i/o is pending then try to lock the
28775 				 * range as the range should be available.
28776 				 */
28777 				state = SD_WM_LOCK_RANGE;
28778 			} else {
28779 				tmp_wmp = sd_get_range(un, startb, endb);
28780 				if (tmp_wmp != NULL) {
28781 					if ((wmp != NULL) && ONLIST(un, wmp)) {
28782 						/*
28783 						 * Should not keep onlist wmps
28784 						 * while waiting this macro
28785 						 * will also do wmp = NULL;
28786 						 */
28787 						FREE_ONLIST_WMAP(un, wmp);
28788 					}
28789 					/*
28790 					 * sl_wmp is the wmap on which wait
28791 					 * is done, since the tmp_wmp points
28792 					 * to the inuse wmap, set sl_wmp to
28793 					 * tmp_wmp and change the state to sleep
28794 					 */
28795 					sl_wmp = tmp_wmp;
28796 					state = SD_WM_WAIT_MAP;
28797 				} else {
28798 					state = SD_WM_LOCK_RANGE;
28799 				}
28800 
28801 			}
28802 			break;
28803 
28804 		case SD_WM_LOCK_RANGE:
28805 			ASSERT(un->un_wm_cache);
28806 			/*
28807 			 * The range need to be locked, try to get a wmap.
28808 			 * First attempt it with NO_SLEEP, want to avoid a sleep
28809 			 * if possible as we will have to release the sd mutex
28810 			 * if we have to sleep.
28811 			 */
28812 			if (wmp == NULL)
28813 				wmp = kmem_cache_alloc(un->un_wm_cache,
28814 				    KM_NOSLEEP);
28815 			if (wmp == NULL) {
28816 				mutex_exit(SD_MUTEX(un));
28817 				_NOTE(DATA_READABLE_WITHOUT_LOCK
28818 				    (sd_lun::un_wm_cache))
28819 				wmp = kmem_cache_alloc(un->un_wm_cache,
28820 				    KM_SLEEP);
28821 				mutex_enter(SD_MUTEX(un));
28822 				/*
28823 				 * we released the mutex so recheck and go to
28824 				 * check list state.
28825 				 */
28826 				state = SD_WM_CHK_LIST;
28827 			} else {
28828 				/*
28829 				 * We exit out of state machine since we
28830 				 * have the wmap. Do the housekeeping first.
28831 				 * place the wmap on the wmap list if it is not
28832 				 * on it already and then set the state to done.
28833 				 */
28834 				wmp->wm_start = startb;
28835 				wmp->wm_end = endb;
28836 				wmp->wm_flags = typ | SD_WM_BUSY;
28837 				if (typ & SD_WTYPE_RMW) {
28838 					un->un_rmw_count++;
28839 				}
28840 				/*
28841 				 * If not already on the list then link
28842 				 */
28843 				if (!ONLIST(un, wmp)) {
28844 					wmp->wm_next = un->un_wm;
28845 					wmp->wm_prev = NULL;
28846 					if (wmp->wm_next)
28847 						wmp->wm_next->wm_prev = wmp;
28848 					un->un_wm = wmp;
28849 				}
28850 				state = SD_WM_DONE;
28851 			}
28852 			break;
28853 
28854 		case SD_WM_WAIT_MAP:
28855 			ASSERT(sl_wmp->wm_flags & SD_WM_BUSY);
28856 			/*
28857 			 * Wait is done on sl_wmp, which is set in the
28858 			 * check_list state.
28859 			 */
28860 			sl_wmp->wm_wanted_count++;
28861 			cv_wait(&sl_wmp->wm_avail, SD_MUTEX(un));
28862 			sl_wmp->wm_wanted_count--;
28863 			/*
28864 			 * We can reuse the memory from the completed sl_wmp
28865 			 * lock range for our new lock, but only if noone is
28866 			 * waiting for it.
28867 			 */
28868 			ASSERT(!(sl_wmp->wm_flags & SD_WM_BUSY));
28869 			if (sl_wmp->wm_wanted_count == 0) {
28870 				if (wmp != NULL) {
28871 					CHK_N_FREEWMP(un, wmp);
28872 				}
28873 				wmp = sl_wmp;
28874 			}
28875 			sl_wmp = NULL;
28876 			/*
28877 			 * After waking up, need to recheck for availability of
28878 			 * range.
28879 			 */
28880 			state = SD_WM_CHK_LIST;
28881 			break;
28882 
28883 		default:
28884 			panic("sd_range_lock: "
28885 			    "Unknown state %d in sd_range_lock", state);
28886 			/*NOTREACHED*/
28887 		} /* switch(state) */
28888 
28889 	} /* while(state != SD_WM_DONE) */
28890 
28891 	mutex_exit(SD_MUTEX(un));
28892 
28893 	ASSERT(wmp != NULL);
28894 
28895 	return (wmp);
28896 }
28897 
28898 
28899 /*
28900  *    Function: sd_get_range()
28901  *
28902  * Description: Find if there any overlapping I/O to this one
28903  *		Returns the write-map of 1st such I/O, NULL otherwise.
28904  *
28905  *   Arguments: un	- sd_lun structure for the device.
28906  *		startb - The starting block number
28907  *		endb - The end block number
28908  *
28909  * Return Code: wm  - pointer to the wmap structure.
28910  */
28911 
28912 static struct sd_w_map *
28913 sd_get_range(struct sd_lun *un, daddr_t startb, daddr_t endb)
28914 {
28915 	struct sd_w_map *wmp;
28916 
28917 	ASSERT(un != NULL);
28918 
28919 	for (wmp = un->un_wm; wmp != NULL; wmp = wmp->wm_next) {
28920 		if (!(wmp->wm_flags & SD_WM_BUSY)) {
28921 			continue;
28922 		}
28923 		if ((startb >= wmp->wm_start) && (startb <= wmp->wm_end)) {
28924 			break;
28925 		}
28926 		if ((endb >= wmp->wm_start) && (endb <= wmp->wm_end)) {
28927 			break;
28928 		}
28929 	}
28930 
28931 	return (wmp);
28932 }
28933 
28934 
28935 /*
28936  *    Function: sd_free_inlist_wmap()
28937  *
28938  * Description: Unlink and free a write map struct.
28939  *
28940  *   Arguments: un      - sd_lun structure for the device.
28941  *		wmp	- sd_w_map which needs to be unlinked.
28942  */
28943 
28944 static void
28945 sd_free_inlist_wmap(struct sd_lun *un, struct sd_w_map *wmp)
28946 {
28947 	ASSERT(un != NULL);
28948 
28949 	if (un->un_wm == wmp) {
28950 		un->un_wm = wmp->wm_next;
28951 	} else {
28952 		wmp->wm_prev->wm_next = wmp->wm_next;
28953 	}
28954 
28955 	if (wmp->wm_next) {
28956 		wmp->wm_next->wm_prev = wmp->wm_prev;
28957 	}
28958 
28959 	wmp->wm_next = wmp->wm_prev = NULL;
28960 
28961 	kmem_cache_free(un->un_wm_cache, wmp);
28962 }
28963 
28964 
28965 /*
28966  *    Function: sd_range_unlock()
28967  *
28968  * Description: Unlock the range locked by wm.
28969  *		Free write map if nobody else is waiting on it.
28970  *
28971  *   Arguments: un      - sd_lun structure for the device.
28972  *              wmp     - sd_w_map which needs to be unlinked.
28973  */
28974 
28975 static void
28976 sd_range_unlock(struct sd_lun *un, struct sd_w_map *wm)
28977 {
28978 	ASSERT(un != NULL);
28979 	ASSERT(wm != NULL);
28980 	ASSERT(!mutex_owned(SD_MUTEX(un)));
28981 
28982 	mutex_enter(SD_MUTEX(un));
28983 
28984 	if (wm->wm_flags & SD_WTYPE_RMW) {
28985 		un->un_rmw_count--;
28986 	}
28987 
28988 	if (wm->wm_wanted_count) {
28989 		wm->wm_flags = 0;
28990 		/*
28991 		 * Broadcast that the wmap is available now.
28992 		 */
28993 		cv_broadcast(&wm->wm_avail);
28994 	} else {
28995 		/*
28996 		 * If no one is waiting on the map, it should be free'ed.
28997 		 */
28998 		sd_free_inlist_wmap(un, wm);
28999 	}
29000 
29001 	mutex_exit(SD_MUTEX(un));
29002 }
29003 
29004 
29005 /*
29006  *    Function: sd_read_modify_write_task
29007  *
29008  * Description: Called from a taskq thread to initiate the write phase of
29009  *		a read-modify-write request.  This is used for targets where
29010  *		un->un_sys_blocksize != un->un_tgt_blocksize.
29011  *
29012  *   Arguments: arg - a pointer to the buf(9S) struct for the write command.
29013  *
29014  *     Context: Called under taskq thread context.
29015  */
29016 
29017 static void
29018 sd_read_modify_write_task(void *arg)
29019 {
29020 	struct sd_mapblocksize_info	*bsp;
29021 	struct buf	*bp;
29022 	struct sd_xbuf	*xp;
29023 	struct sd_lun	*un;
29024 
29025 	bp = arg;	/* The bp is given in arg */
29026 	ASSERT(bp != NULL);
29027 
29028 	/* Get the pointer to the layer-private data struct */
29029 	xp = SD_GET_XBUF(bp);
29030 	ASSERT(xp != NULL);
29031 	bsp = xp->xb_private;
29032 	ASSERT(bsp != NULL);
29033 
29034 	un = SD_GET_UN(bp);
29035 	ASSERT(un != NULL);
29036 	ASSERT(!mutex_owned(SD_MUTEX(un)));
29037 
29038 	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
29039 	    "sd_read_modify_write_task: entry: buf:0x%p\n", bp);
29040 
29041 	/*
29042 	 * This is the write phase of a read-modify-write request, called
29043 	 * under the context of a taskq thread in response to the completion
29044 	 * of the read portion of the rmw request completing under interrupt
29045 	 * context. The write request must be sent from here down the iostart
29046 	 * chain as if it were being sent from sd_mapblocksize_iostart(), so
29047 	 * we use the layer index saved in the layer-private data area.
29048 	 */
29049 	SD_NEXT_IOSTART(bsp->mbs_layer_index, un, bp);
29050 
29051 	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
29052 	    "sd_read_modify_write_task: exit: buf:0x%p\n", bp);
29053 }
29054 
29055 
29056 /*
29057  *    Function: sddump_do_read_of_rmw()
29058  *
29059  * Description: This routine will be called from sddump, If sddump is called
29060  *		with an I/O which not aligned on device blocksize boundary
29061  *		then the write has to be converted to read-modify-write.
29062  *		Do the read part here in order to keep sddump simple.
29063  *		Note - That the sd_mutex is held across the call to this
29064  *		routine.
29065  *
29066  *   Arguments: un	- sd_lun
29067  *		blkno	- block number in terms of media block size.
29068  *		nblk	- number of blocks.
29069  *		bpp	- pointer to pointer to the buf structure. On return
29070  *			from this function, *bpp points to the valid buffer
29071  *			to which the write has to be done.
29072  *
29073  * Return Code: 0 for success or errno-type return code
29074  */
29075 
29076 static int
29077 sddump_do_read_of_rmw(struct sd_lun *un, uint64_t blkno, uint64_t nblk,
29078     struct buf **bpp)
29079 {
29080 	int err;
29081 	int i;
29082 	int rval;
29083 	struct buf *bp;
29084 	struct scsi_pkt *pkt = NULL;
29085 	uint32_t target_blocksize;
29086 
29087 	ASSERT(un != NULL);
29088 	ASSERT(mutex_owned(SD_MUTEX(un)));
29089 
29090 	target_blocksize = un->un_tgt_blocksize;
29091 
29092 	mutex_exit(SD_MUTEX(un));
29093 
29094 	bp = scsi_alloc_consistent_buf(SD_ADDRESS(un), (struct buf *)NULL,
29095 	    (size_t)(nblk * target_blocksize), B_READ, NULL_FUNC, NULL);
29096 	if (bp == NULL) {
29097 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
29098 		    "no resources for dumping; giving up");
29099 		err = ENOMEM;
29100 		goto done;
29101 	}
29102 
29103 	rval = sd_setup_rw_pkt(un, &pkt, bp, 0, NULL_FUNC, NULL,
29104 	    blkno, nblk);
29105 	if (rval != 0) {
29106 		scsi_free_consistent_buf(bp);
29107 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
29108 		    "no resources for dumping; giving up");
29109 		err = ENOMEM;
29110 		goto done;
29111 	}
29112 
29113 	pkt->pkt_flags |= FLAG_NOINTR;
29114 
29115 	err = EIO;
29116 	for (i = 0; i < SD_NDUMP_RETRIES; i++) {
29117 
29118 		/*
29119 		 * Scsi_poll returns 0 (success) if the command completes and
29120 		 * the status block is STATUS_GOOD.  We should only check
29121 		 * errors if this condition is not true.  Even then we should
29122 		 * send our own request sense packet only if we have a check
29123 		 * condition and auto request sense has not been performed by
29124 		 * the hba.
29125 		 */
29126 		SD_TRACE(SD_LOG_DUMP, un, "sddump: sending read\n");
29127 
29128 		if ((sd_scsi_poll(un, pkt) == 0) && (pkt->pkt_resid == 0)) {
29129 			err = 0;
29130 			break;
29131 		}
29132 
29133 		/*
29134 		 * Check CMD_DEV_GONE 1st, give up if device is gone,
29135 		 * no need to read RQS data.
29136 		 */
29137 		if (pkt->pkt_reason == CMD_DEV_GONE) {
29138 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
29139 			    "Error while dumping state with rmw..."
29140 			    "Device is gone\n");
29141 			break;
29142 		}
29143 
29144 		if (SD_GET_PKT_STATUS(pkt) == STATUS_CHECK) {
29145 			SD_INFO(SD_LOG_DUMP, un,
29146 			    "sddump: read failed with CHECK, try # %d\n", i);
29147 			if (((pkt->pkt_state & STATE_ARQ_DONE) == 0)) {
29148 				(void) sd_send_polled_RQS(un);
29149 			}
29150 
29151 			continue;
29152 		}
29153 
29154 		if (SD_GET_PKT_STATUS(pkt) == STATUS_BUSY) {
29155 			int reset_retval = 0;
29156 
29157 			SD_INFO(SD_LOG_DUMP, un,
29158 			    "sddump: read failed with BUSY, try # %d\n", i);
29159 
29160 			if (un->un_f_lun_reset_enabled == TRUE) {
29161 				reset_retval = scsi_reset(SD_ADDRESS(un),
29162 				    RESET_LUN);
29163 			}
29164 			if (reset_retval == 0) {
29165 				(void) scsi_reset(SD_ADDRESS(un), RESET_TARGET);
29166 			}
29167 			(void) sd_send_polled_RQS(un);
29168 
29169 		} else {
29170 			SD_INFO(SD_LOG_DUMP, un,
29171 			    "sddump: read failed with 0x%x, try # %d\n",
29172 			    SD_GET_PKT_STATUS(pkt), i);
29173 			mutex_enter(SD_MUTEX(un));
29174 			sd_reset_target(un, pkt);
29175 			mutex_exit(SD_MUTEX(un));
29176 		}
29177 
29178 		/*
29179 		 * If we are not getting anywhere with lun/target resets,
29180 		 * let's reset the bus.
29181 		 */
29182 		if (i > SD_NDUMP_RETRIES / 2) {
29183 			(void) scsi_reset(SD_ADDRESS(un), RESET_ALL);
29184 			(void) sd_send_polled_RQS(un);
29185 		}
29186 
29187 	}
29188 	scsi_destroy_pkt(pkt);
29189 
29190 	if (err != 0) {
29191 		scsi_free_consistent_buf(bp);
29192 		*bpp = NULL;
29193 	} else {
29194 		*bpp = bp;
29195 	}
29196 
29197 done:
29198 	mutex_enter(SD_MUTEX(un));
29199 	return (err);
29200 }
29201 
29202 
29203 /*
29204  *    Function: sd_failfast_flushq
29205  *
29206  * Description: Take all bp's on the wait queue that have B_FAILFAST set
29207  *		in b_flags and move them onto the failfast queue, then kick
29208  *		off a thread to return all bp's on the failfast queue to
29209  *		their owners with an error set.
29210  *
29211  *   Arguments: un - pointer to the soft state struct for the instance.
29212  *
29213  *     Context: may execute in interrupt context.
29214  */
29215 
29216 static void
29217 sd_failfast_flushq(struct sd_lun *un)
29218 {
29219 	struct buf *bp;
29220 	struct buf *next_waitq_bp;
29221 	struct buf *prev_waitq_bp = NULL;
29222 
29223 	ASSERT(un != NULL);
29224 	ASSERT(mutex_owned(SD_MUTEX(un)));
29225 	ASSERT(un->un_failfast_state == SD_FAILFAST_ACTIVE);
29226 	ASSERT(un->un_failfast_bp == NULL);
29227 
29228 	SD_TRACE(SD_LOG_IO_FAILFAST, un,
29229 	    "sd_failfast_flushq: entry: un:0x%p\n", un);
29230 
29231 	/*
29232 	 * Check if we should flush all bufs when entering failfast state, or
29233 	 * just those with B_FAILFAST set.
29234 	 */
29235 	if (sd_failfast_flushctl & SD_FAILFAST_FLUSH_ALL_BUFS) {
29236 		/*
29237 		 * Move *all* bp's on the wait queue to the failfast flush
29238 		 * queue, including those that do NOT have B_FAILFAST set.
29239 		 */
29240 		if (un->un_failfast_headp == NULL) {
29241 			ASSERT(un->un_failfast_tailp == NULL);
29242 			un->un_failfast_headp = un->un_waitq_headp;
29243 		} else {
29244 			ASSERT(un->un_failfast_tailp != NULL);
29245 			un->un_failfast_tailp->av_forw = un->un_waitq_headp;
29246 		}
29247 
29248 		un->un_failfast_tailp = un->un_waitq_tailp;
29249 
29250 		/* update kstat for each bp moved out of the waitq */
29251 		for (bp = un->un_waitq_headp; bp != NULL; bp = bp->av_forw) {
29252 			SD_UPDATE_KSTATS(un, kstat_waitq_exit, bp);
29253 		}
29254 
29255 		/* empty the waitq */
29256 		un->un_waitq_headp = un->un_waitq_tailp = NULL;
29257 
29258 	} else {
29259 		/*
29260 		 * Go thru the wait queue, pick off all entries with
29261 		 * B_FAILFAST set, and move these onto the failfast queue.
29262 		 */
29263 		for (bp = un->un_waitq_headp; bp != NULL; bp = next_waitq_bp) {
29264 			/*
29265 			 * Save the pointer to the next bp on the wait queue,
29266 			 * so we get to it on the next iteration of this loop.
29267 			 */
29268 			next_waitq_bp = bp->av_forw;
29269 
29270 			/*
29271 			 * If this bp from the wait queue does NOT have
29272 			 * B_FAILFAST set, just move on to the next element
29273 			 * in the wait queue. Note, this is the only place
29274 			 * where it is correct to set prev_waitq_bp.
29275 			 */
29276 			if ((bp->b_flags & B_FAILFAST) == 0) {
29277 				prev_waitq_bp = bp;
29278 				continue;
29279 			}
29280 
29281 			/*
29282 			 * Remove the bp from the wait queue.
29283 			 */
29284 			if (bp == un->un_waitq_headp) {
29285 				/* The bp is the first element of the waitq. */
29286 				un->un_waitq_headp = next_waitq_bp;
29287 				if (un->un_waitq_headp == NULL) {
29288 					/* The wait queue is now empty */
29289 					un->un_waitq_tailp = NULL;
29290 				}
29291 			} else {
29292 				/*
29293 				 * The bp is either somewhere in the middle
29294 				 * or at the end of the wait queue.
29295 				 */
29296 				ASSERT(un->un_waitq_headp != NULL);
29297 				ASSERT(prev_waitq_bp != NULL);
29298 				ASSERT((prev_waitq_bp->b_flags & B_FAILFAST)
29299 				    == 0);
29300 				if (bp == un->un_waitq_tailp) {
29301 					/* bp is the last entry on the waitq. */
29302 					ASSERT(next_waitq_bp == NULL);
29303 					un->un_waitq_tailp = prev_waitq_bp;
29304 				}
29305 				prev_waitq_bp->av_forw = next_waitq_bp;
29306 			}
29307 			bp->av_forw = NULL;
29308 
29309 			/*
29310 			 * update kstat since the bp is moved out of
29311 			 * the waitq
29312 			 */
29313 			SD_UPDATE_KSTATS(un, kstat_waitq_exit, bp);
29314 
29315 			/*
29316 			 * Now put the bp onto the failfast queue.
29317 			 */
29318 			if (un->un_failfast_headp == NULL) {
29319 				/* failfast queue is currently empty */
29320 				ASSERT(un->un_failfast_tailp == NULL);
29321 				un->un_failfast_headp =
29322 				    un->un_failfast_tailp = bp;
29323 			} else {
29324 				/* Add the bp to the end of the failfast q */
29325 				ASSERT(un->un_failfast_tailp != NULL);
29326 				ASSERT(un->un_failfast_tailp->b_flags &
29327 				    B_FAILFAST);
29328 				un->un_failfast_tailp->av_forw = bp;
29329 				un->un_failfast_tailp = bp;
29330 			}
29331 		}
29332 	}
29333 
29334 	/*
29335 	 * Now return all bp's on the failfast queue to their owners.
29336 	 */
29337 	while ((bp = un->un_failfast_headp) != NULL) {
29338 
29339 		un->un_failfast_headp = bp->av_forw;
29340 		if (un->un_failfast_headp == NULL) {
29341 			un->un_failfast_tailp = NULL;
29342 		}
29343 
29344 		/*
29345 		 * We want to return the bp with a failure error code, but
29346 		 * we do not want a call to sd_start_cmds() to occur here,
29347 		 * so use sd_return_failed_command_no_restart() instead of
29348 		 * sd_return_failed_command().
29349 		 */
29350 		sd_return_failed_command_no_restart(un, bp, EIO);
29351 	}
29352 
29353 	/* Flush the xbuf queues if required. */
29354 	if (sd_failfast_flushctl & SD_FAILFAST_FLUSH_ALL_QUEUES) {
29355 		ddi_xbuf_flushq(un->un_xbuf_attr, sd_failfast_flushq_callback);
29356 	}
29357 
29358 	SD_TRACE(SD_LOG_IO_FAILFAST, un,
29359 	    "sd_failfast_flushq: exit: un:0x%p\n", un);
29360 }
29361 
29362 
29363 /*
29364  *    Function: sd_failfast_flushq_callback
29365  *
29366  * Description: Return TRUE if the given bp meets the criteria for failfast
29367  *		flushing. Used with ddi_xbuf_flushq(9F).
29368  *
29369  *   Arguments: bp - ptr to buf struct to be examined.
29370  *
29371  *     Context: Any
29372  */
29373 
29374 static int
29375 sd_failfast_flushq_callback(struct buf *bp)
29376 {
29377 	/*
29378 	 * Return TRUE if (1) we want to flush ALL bufs when the failfast
29379 	 * state is entered; OR (2) the given bp has B_FAILFAST set.
29380 	 */
29381 	return (((sd_failfast_flushctl & SD_FAILFAST_FLUSH_ALL_BUFS) ||
29382 	    (bp->b_flags & B_FAILFAST)) ? TRUE : FALSE);
29383 }
29384 
29385 
29386 
29387 /*
29388  * Function: sd_setup_next_xfer
29389  *
29390  * Description: Prepare next I/O operation using DMA_PARTIAL
29391  *
29392  */
29393 
29394 static int
29395 sd_setup_next_xfer(struct sd_lun *un, struct buf *bp,
29396     struct scsi_pkt *pkt, struct sd_xbuf *xp)
29397 {
29398 	ssize_t	num_blks_not_xfered;
29399 	daddr_t	strt_blk_num;
29400 	ssize_t	bytes_not_xfered;
29401 	int	rval;
29402 
29403 	ASSERT(pkt->pkt_resid == 0);
29404 
29405 	/*
29406 	 * Calculate next block number and amount to be transferred.
29407 	 *
29408 	 * How much data NOT transfered to the HBA yet.
29409 	 */
29410 	bytes_not_xfered = xp->xb_dma_resid;
29411 
29412 	/*
29413 	 * figure how many blocks NOT transfered to the HBA yet.
29414 	 */
29415 	num_blks_not_xfered = SD_BYTES2TGTBLOCKS(un, bytes_not_xfered);
29416 
29417 	/*
29418 	 * set starting block number to the end of what WAS transfered.
29419 	 */
29420 	strt_blk_num = xp->xb_blkno +
29421 	    SD_BYTES2TGTBLOCKS(un, bp->b_bcount - bytes_not_xfered);
29422 
29423 	/*
29424 	 * Move pkt to the next portion of the xfer.  sd_setup_next_rw_pkt
29425 	 * will call scsi_initpkt with NULL_FUNC so we do not have to release
29426 	 * the disk mutex here.
29427 	 */
29428 	rval = sd_setup_next_rw_pkt(un, pkt, bp,
29429 	    strt_blk_num, num_blks_not_xfered);
29430 
29431 	if (rval == 0) {
29432 
29433 		/*
29434 		 * Success.
29435 		 *
29436 		 * Adjust things if there are still more blocks to be
29437 		 * transfered.
29438 		 */
29439 		xp->xb_dma_resid = pkt->pkt_resid;
29440 		pkt->pkt_resid = 0;
29441 
29442 		return (1);
29443 	}
29444 
29445 	/*
29446 	 * There's really only one possible return value from
29447 	 * sd_setup_next_rw_pkt which occurs when scsi_init_pkt
29448 	 * returns NULL.
29449 	 */
29450 	ASSERT(rval == SD_PKT_ALLOC_FAILURE);
29451 
29452 	bp->b_resid = bp->b_bcount;
29453 	bp->b_flags |= B_ERROR;
29454 
29455 	scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
29456 	    "Error setting up next portion of DMA transfer\n");
29457 
29458 	return (0);
29459 }
29460 
29461 /*
29462  *    Function: sd_panic_for_res_conflict
29463  *
29464  * Description: Call panic with a string formatted with "Reservation Conflict"
29465  *		and a human readable identifier indicating the SD instance
29466  *		that experienced the reservation conflict.
29467  *
29468  *   Arguments: un - pointer to the soft state struct for the instance.
29469  *
29470  *     Context: may execute in interrupt context.
29471  */
29472 
29473 #define	SD_RESV_CONFLICT_FMT_LEN 40
29474 void
29475 sd_panic_for_res_conflict(struct sd_lun *un)
29476 {
29477 	char panic_str[SD_RESV_CONFLICT_FMT_LEN + MAXPATHLEN];
29478 	char path_str[MAXPATHLEN];
29479 
29480 	(void) snprintf(panic_str, sizeof (panic_str),
29481 	    "Reservation Conflict\nDisk: %s",
29482 	    ddi_pathname(SD_DEVINFO(un), path_str));
29483 
29484 	panic(panic_str);
29485 }
29486 
29487 /*
29488  * Note: The following sd_faultinjection_ioctl( ) routines implement
29489  * driver support for handling fault injection for error analysis
29490  * causing faults in multiple layers of the driver.
29491  *
29492  */
29493 
29494 #ifdef SD_FAULT_INJECTION
29495 static uint_t   sd_fault_injection_on = 0;
29496 
29497 /*
29498  *    Function: sd_faultinjection_ioctl()
29499  *
29500  * Description: This routine is the driver entry point for handling
29501  *              faultinjection ioctls to inject errors into the
29502  *              layer model
29503  *
29504  *   Arguments: cmd	- the ioctl cmd received
29505  *		arg	- the arguments from user and returns
29506  */
29507 
29508 static void
29509 sd_faultinjection_ioctl(int cmd, intptr_t arg,  struct sd_lun *un)
29510 {
29511 	uint_t i = 0;
29512 	uint_t rval;
29513 
29514 	SD_TRACE(SD_LOG_IOERR, un, "sd_faultinjection_ioctl: entry\n");
29515 
29516 	mutex_enter(SD_MUTEX(un));
29517 
29518 	switch (cmd) {
29519 	case SDIOCRUN:
29520 		/* Allow pushed faults to be injected */
29521 		SD_INFO(SD_LOG_SDTEST, un,
29522 		    "sd_faultinjection_ioctl: Injecting Fault Run\n");
29523 
29524 		sd_fault_injection_on = 1;
29525 
29526 		SD_INFO(SD_LOG_IOERR, un,
29527 		    "sd_faultinjection_ioctl: run finished\n");
29528 		break;
29529 
29530 	case SDIOCSTART:
29531 		/* Start Injection Session */
29532 		SD_INFO(SD_LOG_SDTEST, un,
29533 		    "sd_faultinjection_ioctl: Injecting Fault Start\n");
29534 
29535 		sd_fault_injection_on = 0;
29536 		un->sd_injection_mask = 0xFFFFFFFF;
29537 		for (i = 0; i < SD_FI_MAX_ERROR; i++) {
29538 			un->sd_fi_fifo_pkt[i] = NULL;
29539 			un->sd_fi_fifo_xb[i] = NULL;
29540 			un->sd_fi_fifo_un[i] = NULL;
29541 			un->sd_fi_fifo_arq[i] = NULL;
29542 		}
29543 		un->sd_fi_fifo_start = 0;
29544 		un->sd_fi_fifo_end = 0;
29545 
29546 		mutex_enter(&(un->un_fi_mutex));
29547 		un->sd_fi_log[0] = '\0';
29548 		un->sd_fi_buf_len = 0;
29549 		mutex_exit(&(un->un_fi_mutex));
29550 
29551 		SD_INFO(SD_LOG_IOERR, un,
29552 		    "sd_faultinjection_ioctl: start finished\n");
29553 		break;
29554 
29555 	case SDIOCSTOP:
29556 		/* Stop Injection Session */
29557 		SD_INFO(SD_LOG_SDTEST, un,
29558 		    "sd_faultinjection_ioctl: Injecting Fault Stop\n");
29559 		sd_fault_injection_on = 0;
29560 		un->sd_injection_mask = 0x0;
29561 
29562 		/* Empty stray or unuseds structs from fifo */
29563 		for (i = 0; i < SD_FI_MAX_ERROR; i++) {
29564 			if (un->sd_fi_fifo_pkt[i] != NULL) {
29565 				kmem_free(un->sd_fi_fifo_pkt[i],
29566 				    sizeof (struct sd_fi_pkt));
29567 			}
29568 			if (un->sd_fi_fifo_xb[i] != NULL) {
29569 				kmem_free(un->sd_fi_fifo_xb[i],
29570 				    sizeof (struct sd_fi_xb));
29571 			}
29572 			if (un->sd_fi_fifo_un[i] != NULL) {
29573 				kmem_free(un->sd_fi_fifo_un[i],
29574 				    sizeof (struct sd_fi_un));
29575 			}
29576 			if (un->sd_fi_fifo_arq[i] != NULL) {
29577 				kmem_free(un->sd_fi_fifo_arq[i],
29578 				    sizeof (struct sd_fi_arq));
29579 			}
29580 			un->sd_fi_fifo_pkt[i] = NULL;
29581 			un->sd_fi_fifo_un[i] = NULL;
29582 			un->sd_fi_fifo_xb[i] = NULL;
29583 			un->sd_fi_fifo_arq[i] = NULL;
29584 		}
29585 		un->sd_fi_fifo_start = 0;
29586 		un->sd_fi_fifo_end = 0;
29587 
29588 		SD_INFO(SD_LOG_IOERR, un,
29589 		    "sd_faultinjection_ioctl: stop finished\n");
29590 		break;
29591 
29592 	case SDIOCINSERTPKT:
29593 		/* Store a packet struct to be pushed onto fifo */
29594 		SD_INFO(SD_LOG_SDTEST, un,
29595 		    "sd_faultinjection_ioctl: Injecting Fault Insert Pkt\n");
29596 
29597 		i = un->sd_fi_fifo_end % SD_FI_MAX_ERROR;
29598 
29599 		sd_fault_injection_on = 0;
29600 
29601 		/* No more that SD_FI_MAX_ERROR allowed in Queue */
29602 		if (un->sd_fi_fifo_pkt[i] != NULL) {
29603 			kmem_free(un->sd_fi_fifo_pkt[i],
29604 			    sizeof (struct sd_fi_pkt));
29605 		}
29606 		if (arg != (uintptr_t)NULL) {
29607 			un->sd_fi_fifo_pkt[i] =
29608 			    kmem_alloc(sizeof (struct sd_fi_pkt), KM_NOSLEEP);
29609 			if (un->sd_fi_fifo_pkt[i] == NULL) {
29610 				/* Alloc failed don't store anything */
29611 				break;
29612 			}
29613 			rval = ddi_copyin((void *)arg, un->sd_fi_fifo_pkt[i],
29614 			    sizeof (struct sd_fi_pkt), 0);
29615 			if (rval == -1) {
29616 				kmem_free(un->sd_fi_fifo_pkt[i],
29617 				    sizeof (struct sd_fi_pkt));
29618 				un->sd_fi_fifo_pkt[i] = NULL;
29619 			}
29620 		} else {
29621 			SD_INFO(SD_LOG_IOERR, un,
29622 			    "sd_faultinjection_ioctl: pkt null\n");
29623 		}
29624 		break;
29625 
29626 	case SDIOCINSERTXB:
29627 		/* Store a xb struct to be pushed onto fifo */
29628 		SD_INFO(SD_LOG_SDTEST, un,
29629 		    "sd_faultinjection_ioctl: Injecting Fault Insert XB\n");
29630 
29631 		i = un->sd_fi_fifo_end % SD_FI_MAX_ERROR;
29632 
29633 		sd_fault_injection_on = 0;
29634 
29635 		if (un->sd_fi_fifo_xb[i] != NULL) {
29636 			kmem_free(un->sd_fi_fifo_xb[i],
29637 			    sizeof (struct sd_fi_xb));
29638 			un->sd_fi_fifo_xb[i] = NULL;
29639 		}
29640 		if (arg != (uintptr_t)NULL) {
29641 			un->sd_fi_fifo_xb[i] =
29642 			    kmem_alloc(sizeof (struct sd_fi_xb), KM_NOSLEEP);
29643 			if (un->sd_fi_fifo_xb[i] == NULL) {
29644 				/* Alloc failed don't store anything */
29645 				break;
29646 			}
29647 			rval = ddi_copyin((void *)arg, un->sd_fi_fifo_xb[i],
29648 			    sizeof (struct sd_fi_xb), 0);
29649 
29650 			if (rval == -1) {
29651 				kmem_free(un->sd_fi_fifo_xb[i],
29652 				    sizeof (struct sd_fi_xb));
29653 				un->sd_fi_fifo_xb[i] = NULL;
29654 			}
29655 		} else {
29656 			SD_INFO(SD_LOG_IOERR, un,
29657 			    "sd_faultinjection_ioctl: xb null\n");
29658 		}
29659 		break;
29660 
29661 	case SDIOCINSERTUN:
29662 		/* Store a un struct to be pushed onto fifo */
29663 		SD_INFO(SD_LOG_SDTEST, un,
29664 		    "sd_faultinjection_ioctl: Injecting Fault Insert UN\n");
29665 
29666 		i = un->sd_fi_fifo_end % SD_FI_MAX_ERROR;
29667 
29668 		sd_fault_injection_on = 0;
29669 
29670 		if (un->sd_fi_fifo_un[i] != NULL) {
29671 			kmem_free(un->sd_fi_fifo_un[i],
29672 			    sizeof (struct sd_fi_un));
29673 			un->sd_fi_fifo_un[i] = NULL;
29674 		}
29675 		if (arg != (uintptr_t)NULL) {
29676 			un->sd_fi_fifo_un[i] =
29677 			    kmem_alloc(sizeof (struct sd_fi_un), KM_NOSLEEP);
29678 			if (un->sd_fi_fifo_un[i] == NULL) {
29679 				/* Alloc failed don't store anything */
29680 				break;
29681 			}
29682 			rval = ddi_copyin((void *)arg, un->sd_fi_fifo_un[i],
29683 			    sizeof (struct sd_fi_un), 0);
29684 			if (rval == -1) {
29685 				kmem_free(un->sd_fi_fifo_un[i],
29686 				    sizeof (struct sd_fi_un));
29687 				un->sd_fi_fifo_un[i] = NULL;
29688 			}
29689 
29690 		} else {
29691 			SD_INFO(SD_LOG_IOERR, un,
29692 			    "sd_faultinjection_ioctl: un null\n");
29693 		}
29694 
29695 		break;
29696 
29697 	case SDIOCINSERTARQ:
29698 		/* Store a arq struct to be pushed onto fifo */
29699 		SD_INFO(SD_LOG_SDTEST, un,
29700 		    "sd_faultinjection_ioctl: Injecting Fault Insert ARQ\n");
29701 		i = un->sd_fi_fifo_end % SD_FI_MAX_ERROR;
29702 
29703 		sd_fault_injection_on = 0;
29704 
29705 		if (un->sd_fi_fifo_arq[i] != NULL) {
29706 			kmem_free(un->sd_fi_fifo_arq[i],
29707 			    sizeof (struct sd_fi_arq));
29708 			un->sd_fi_fifo_arq[i] = NULL;
29709 		}
29710 		if (arg != (uintptr_t)NULL) {
29711 			un->sd_fi_fifo_arq[i] =
29712 			    kmem_alloc(sizeof (struct sd_fi_arq), KM_NOSLEEP);
29713 			if (un->sd_fi_fifo_arq[i] == NULL) {
29714 				/* Alloc failed don't store anything */
29715 				break;
29716 			}
29717 			rval = ddi_copyin((void *)arg, un->sd_fi_fifo_arq[i],
29718 			    sizeof (struct sd_fi_arq), 0);
29719 			if (rval == -1) {
29720 				kmem_free(un->sd_fi_fifo_arq[i],
29721 				    sizeof (struct sd_fi_arq));
29722 				un->sd_fi_fifo_arq[i] = NULL;
29723 			}
29724 
29725 		} else {
29726 			SD_INFO(SD_LOG_IOERR, un,
29727 			    "sd_faultinjection_ioctl: arq null\n");
29728 		}
29729 
29730 		break;
29731 
29732 	case SDIOCPUSH:
29733 		/* Push stored xb, pkt, un, and arq onto fifo */
29734 		sd_fault_injection_on = 0;
29735 
29736 		if (arg != (uintptr_t)NULL) {
29737 			rval = ddi_copyin((void *)arg, &i, sizeof (uint_t), 0);
29738 			if (rval != -1 &&
29739 			    un->sd_fi_fifo_end + i < SD_FI_MAX_ERROR) {
29740 				un->sd_fi_fifo_end += i;
29741 			}
29742 		} else {
29743 			SD_INFO(SD_LOG_IOERR, un,
29744 			    "sd_faultinjection_ioctl: push arg null\n");
29745 			if (un->sd_fi_fifo_end + i < SD_FI_MAX_ERROR) {
29746 				un->sd_fi_fifo_end++;
29747 			}
29748 		}
29749 		SD_INFO(SD_LOG_IOERR, un,
29750 		    "sd_faultinjection_ioctl: push to end=%d\n",
29751 		    un->sd_fi_fifo_end);
29752 		break;
29753 
29754 	case SDIOCRETRIEVE:
29755 		/* Return buffer of log from Injection session */
29756 		SD_INFO(SD_LOG_SDTEST, un,
29757 		    "sd_faultinjection_ioctl: Injecting Fault Retreive");
29758 
29759 		sd_fault_injection_on = 0;
29760 
29761 		mutex_enter(&(un->un_fi_mutex));
29762 		rval = ddi_copyout(un->sd_fi_log, (void *)arg,
29763 		    un->sd_fi_buf_len+1, 0);
29764 		mutex_exit(&(un->un_fi_mutex));
29765 
29766 		if (rval == -1) {
29767 			/*
29768 			 * arg is possibly invalid setting
29769 			 * it to NULL for return
29770 			 */
29771 			arg = (uintptr_t)NULL;
29772 		}
29773 		break;
29774 	}
29775 
29776 	mutex_exit(SD_MUTEX(un));
29777 	SD_TRACE(SD_LOG_IOERR, un, "sd_faultinjection_ioctl: exit\n");
29778 }
29779 
29780 
29781 /*
29782  *    Function: sd_injection_log()
29783  *
29784  * Description: This routine adds buff to the already existing injection log
29785  *              for retrieval via faultinjection_ioctl for use in fault
29786  *              detection and recovery
29787  *
29788  *   Arguments: buf - the string to add to the log
29789  */
29790 
29791 static void
29792 sd_injection_log(char *buf, struct sd_lun *un)
29793 {
29794 	uint_t len;
29795 
29796 	ASSERT(un != NULL);
29797 	ASSERT(buf != NULL);
29798 
29799 	mutex_enter(&(un->un_fi_mutex));
29800 
29801 	len = min(strlen(buf), 255);
29802 	/* Add logged value to Injection log to be returned later */
29803 	if (len + un->sd_fi_buf_len < SD_FI_MAX_BUF) {
29804 		uint_t	offset = strlen((char *)un->sd_fi_log);
29805 		char *destp = (char *)un->sd_fi_log + offset;
29806 		int i;
29807 		for (i = 0; i < len; i++) {
29808 			*destp++ = *buf++;
29809 		}
29810 		un->sd_fi_buf_len += len;
29811 		un->sd_fi_log[un->sd_fi_buf_len] = '\0';
29812 	}
29813 
29814 	mutex_exit(&(un->un_fi_mutex));
29815 }
29816 
29817 
29818 /*
29819  *    Function: sd_faultinjection()
29820  *
29821  * Description: This routine takes the pkt and changes its
29822  *		content based on error injection scenerio.
29823  *
29824  *   Arguments: pktp	- packet to be changed
29825  */
29826 
29827 static void
29828 sd_faultinjection(struct scsi_pkt *pktp)
29829 {
29830 	uint_t i;
29831 	struct sd_fi_pkt *fi_pkt;
29832 	struct sd_fi_xb *fi_xb;
29833 	struct sd_fi_un *fi_un;
29834 	struct sd_fi_arq *fi_arq;
29835 	struct buf *bp;
29836 	struct sd_xbuf *xb;
29837 	struct sd_lun *un;
29838 
29839 	ASSERT(pktp != NULL);
29840 
29841 	/* pull bp xb and un from pktp */
29842 	bp = (struct buf *)pktp->pkt_private;
29843 	xb = SD_GET_XBUF(bp);
29844 	un = SD_GET_UN(bp);
29845 
29846 	ASSERT(un != NULL);
29847 
29848 	mutex_enter(SD_MUTEX(un));
29849 
29850 	SD_TRACE(SD_LOG_SDTEST, un,
29851 	    "sd_faultinjection: entry Injection from sdintr\n");
29852 
29853 	/* if injection is off return */
29854 	if (sd_fault_injection_on == 0 ||
29855 	    un->sd_fi_fifo_start == un->sd_fi_fifo_end) {
29856 		mutex_exit(SD_MUTEX(un));
29857 		return;
29858 	}
29859 
29860 	SD_INFO(SD_LOG_SDTEST, un,
29861 	    "sd_faultinjection: is working for copying\n");
29862 
29863 	/* take next set off fifo */
29864 	i = un->sd_fi_fifo_start % SD_FI_MAX_ERROR;
29865 
29866 	fi_pkt = un->sd_fi_fifo_pkt[i];
29867 	fi_xb = un->sd_fi_fifo_xb[i];
29868 	fi_un = un->sd_fi_fifo_un[i];
29869 	fi_arq = un->sd_fi_fifo_arq[i];
29870 
29871 
29872 	/* set variables accordingly */
29873 	/* set pkt if it was on fifo */
29874 	if (fi_pkt != NULL) {
29875 		SD_CONDSET(pktp, pkt, pkt_flags, "pkt_flags");
29876 		SD_CONDSET(*pktp, pkt, pkt_scbp, "pkt_scbp");
29877 		if (fi_pkt->pkt_cdbp != 0xff)
29878 			SD_CONDSET(*pktp, pkt, pkt_cdbp, "pkt_cdbp");
29879 		SD_CONDSET(pktp, pkt, pkt_state, "pkt_state");
29880 		SD_CONDSET(pktp, pkt, pkt_statistics, "pkt_statistics");
29881 		SD_CONDSET(pktp, pkt, pkt_reason, "pkt_reason");
29882 
29883 	}
29884 	/* set xb if it was on fifo */
29885 	if (fi_xb != NULL) {
29886 		SD_CONDSET(xb, xb, xb_blkno, "xb_blkno");
29887 		SD_CONDSET(xb, xb, xb_dma_resid, "xb_dma_resid");
29888 		if (fi_xb->xb_retry_count != 0)
29889 			SD_CONDSET(xb, xb, xb_retry_count, "xb_retry_count");
29890 		SD_CONDSET(xb, xb, xb_victim_retry_count,
29891 		    "xb_victim_retry_count");
29892 		SD_CONDSET(xb, xb, xb_sense_status, "xb_sense_status");
29893 		SD_CONDSET(xb, xb, xb_sense_state, "xb_sense_state");
29894 		SD_CONDSET(xb, xb, xb_sense_resid, "xb_sense_resid");
29895 
29896 		/* copy in block data from sense */
29897 		/*
29898 		 * if (fi_xb->xb_sense_data[0] != -1) {
29899 		 *	bcopy(fi_xb->xb_sense_data, xb->xb_sense_data,
29900 		 *	SENSE_LENGTH);
29901 		 * }
29902 		 */
29903 		bcopy(fi_xb->xb_sense_data, xb->xb_sense_data, SENSE_LENGTH);
29904 
29905 		/* copy in extended sense codes */
29906 		SD_CONDSET(((struct scsi_extended_sense *)xb->xb_sense_data),
29907 		    xb, es_code, "es_code");
29908 		SD_CONDSET(((struct scsi_extended_sense *)xb->xb_sense_data),
29909 		    xb, es_key, "es_key");
29910 		SD_CONDSET(((struct scsi_extended_sense *)xb->xb_sense_data),
29911 		    xb, es_add_code, "es_add_code");
29912 		SD_CONDSET(((struct scsi_extended_sense *)xb->xb_sense_data),
29913 		    xb, es_qual_code, "es_qual_code");
29914 		struct scsi_extended_sense *esp;
29915 		esp = (struct scsi_extended_sense *)xb->xb_sense_data;
29916 		esp->es_class = CLASS_EXTENDED_SENSE;
29917 	}
29918 
29919 	/* set un if it was on fifo */
29920 	if (fi_un != NULL) {
29921 		SD_CONDSET(un->un_sd->sd_inq, un, inq_rmb, "inq_rmb");
29922 		SD_CONDSET(un, un, un_ctype, "un_ctype");
29923 		SD_CONDSET(un, un, un_reset_retry_count,
29924 		    "un_reset_retry_count");
29925 		SD_CONDSET(un, un, un_reservation_type, "un_reservation_type");
29926 		SD_CONDSET(un, un, un_resvd_status, "un_resvd_status");
29927 		SD_CONDSET(un, un, un_f_arq_enabled, "un_f_arq_enabled");
29928 		SD_CONDSET(un, un, un_f_allow_bus_device_reset,
29929 		    "un_f_allow_bus_device_reset");
29930 		SD_CONDSET(un, un, un_f_opt_queueing, "un_f_opt_queueing");
29931 
29932 	}
29933 
29934 	/* copy in auto request sense if it was on fifo */
29935 	if (fi_arq != NULL) {
29936 		bcopy(fi_arq, pktp->pkt_scbp, sizeof (struct sd_fi_arq));
29937 	}
29938 
29939 	/* free structs */
29940 	if (un->sd_fi_fifo_pkt[i] != NULL) {
29941 		kmem_free(un->sd_fi_fifo_pkt[i], sizeof (struct sd_fi_pkt));
29942 	}
29943 	if (un->sd_fi_fifo_xb[i] != NULL) {
29944 		kmem_free(un->sd_fi_fifo_xb[i], sizeof (struct sd_fi_xb));
29945 	}
29946 	if (un->sd_fi_fifo_un[i] != NULL) {
29947 		kmem_free(un->sd_fi_fifo_un[i], sizeof (struct sd_fi_un));
29948 	}
29949 	if (un->sd_fi_fifo_arq[i] != NULL) {
29950 		kmem_free(un->sd_fi_fifo_arq[i], sizeof (struct sd_fi_arq));
29951 	}
29952 
29953 	/*
29954 	 * kmem_free does not gurantee to set to NULL
29955 	 * since we uses these to determine if we set
29956 	 * values or not lets confirm they are always
29957 	 * NULL after free
29958 	 */
29959 	un->sd_fi_fifo_pkt[i] = NULL;
29960 	un->sd_fi_fifo_un[i] = NULL;
29961 	un->sd_fi_fifo_xb[i] = NULL;
29962 	un->sd_fi_fifo_arq[i] = NULL;
29963 
29964 	un->sd_fi_fifo_start++;
29965 
29966 	mutex_exit(SD_MUTEX(un));
29967 
29968 	SD_INFO(SD_LOG_SDTEST, un, "sd_faultinjection: exit\n");
29969 }
29970 
29971 #endif /* SD_FAULT_INJECTION */
29972 
29973 /*
29974  * This routine is invoked in sd_unit_attach(). Before calling it, the
29975  * properties in conf file should be processed already, and "hotpluggable"
29976  * property was processed also.
29977  *
29978  * The sd driver distinguishes 3 different type of devices: removable media,
29979  * non-removable media, and hotpluggable. Below the differences are defined:
29980  *
29981  * 1. Device ID
29982  *
29983  *     The device ID of a device is used to identify this device. Refer to
29984  *     ddi_devid_register(9F).
29985  *
29986  *     For a non-removable media disk device which can provide 0x80 or 0x83
29987  *     VPD page (refer to INQUIRY command of SCSI SPC specification), a unique
29988  *     device ID is created to identify this device. For other non-removable
29989  *     media devices, a default device ID is created only if this device has
29990  *     at least 2 alter cylinders. Otherwise, this device has no devid.
29991  *
29992  *     -------------------------------------------------------
29993  *     removable media   hotpluggable  | Can Have Device ID
29994  *     -------------------------------------------------------
29995  *         false             false     |     Yes
29996  *         false             true      |     Yes
29997  *         true                x       |     No
29998  *     ------------------------------------------------------
29999  *
30000  *
30001  * 2. SCSI group 4 commands
30002  *
30003  *     In SCSI specs, only some commands in group 4 command set can use
30004  *     8-byte addresses that can be used to access >2TB storage spaces.
30005  *     Other commands have no such capability. Without supporting group4,
30006  *     it is impossible to make full use of storage spaces of a disk with
30007  *     capacity larger than 2TB.
30008  *
30009  *     -----------------------------------------------
30010  *     removable media   hotpluggable   LP64  |  Group
30011  *     -----------------------------------------------
30012  *           false          false       false |   1
30013  *           false          false       true  |   4
30014  *           false          true        false |   1
30015  *           false          true        true  |   4
30016  *           true             x           x   |   5
30017  *     -----------------------------------------------
30018  *
30019  *
30020  * 3. Check for VTOC Label
30021  *
30022  *     If a direct-access disk has no EFI label, sd will check if it has a
30023  *     valid VTOC label. Now, sd also does that check for removable media
30024  *     and hotpluggable devices.
30025  *
30026  *     --------------------------------------------------------------
30027  *     Direct-Access   removable media    hotpluggable |  Check Label
30028  *     -------------------------------------------------------------
30029  *         false          false           false        |   No
30030  *         false          false           true         |   No
30031  *         false          true            false        |   Yes
30032  *         false          true            true         |   Yes
30033  *         true            x                x          |   Yes
30034  *     --------------------------------------------------------------
30035  *
30036  *
30037  * 4. Building default VTOC label
30038  *
30039  *     As section 3 says, sd checks if some kinds of devices have VTOC label.
30040  *     If those devices have no valid VTOC label, sd(4D) will attempt to
30041  *     create default VTOC for them. Currently sd creates default VTOC label
30042  *     for all devices on x86 platform (VTOC_16), but only for removable
30043  *     media devices on SPARC (VTOC_8).
30044  *
30045  *     -----------------------------------------------------------
30046  *       removable media hotpluggable platform   |   Default Label
30047  *     -----------------------------------------------------------
30048  *             false          false    sparc     |     No
30049  *             false          true      x86      |     Yes
30050  *             false          true     sparc     |     Yes
30051  *             true             x        x       |     Yes
30052  *     ----------------------------------------------------------
30053  *
30054  *
30055  * 5. Supported blocksizes of target devices
30056  *
30057  *     Sd supports non-512-byte blocksize for removable media devices only.
30058  *     For other devices, only 512-byte blocksize is supported. This may be
30059  *     changed in near future because some RAID devices require non-512-byte
30060  *     blocksize
30061  *
30062  *     -----------------------------------------------------------
30063  *     removable media    hotpluggable    | non-512-byte blocksize
30064  *     -----------------------------------------------------------
30065  *           false          false         |   No
30066  *           false          true          |   No
30067  *           true             x           |   Yes
30068  *     -----------------------------------------------------------
30069  *
30070  *
30071  * 6. Automatic mount & unmount
30072  *
30073  *     sd(4D) driver provides DKIOCREMOVABLE ioctl. This ioctl is used to query
30074  *     if a device is removable media device. It return 1 for removable media
30075  *     devices, and 0 for others.
30076  *
30077  *     The automatic mounting subsystem should distinguish between the types
30078  *     of devices and apply automounting policies to each.
30079  *
30080  *
30081  * 7. fdisk partition management
30082  *
30083  *     Fdisk is traditional partition method on x86 platform. sd(4D) driver
30084  *     just supports fdisk partitions on x86 platform. On sparc platform, sd
30085  *     doesn't support fdisk partitions at all. Note: pcfs(4FS) can recognize
30086  *     fdisk partitions on both x86 and SPARC platform.
30087  *
30088  *     -----------------------------------------------------------
30089  *       platform   removable media  USB/1394  |  fdisk supported
30090  *     -----------------------------------------------------------
30091  *        x86         X               X        |       true
30092  *     ------------------------------------------------------------
30093  *        sparc       X               X        |       false
30094  *     ------------------------------------------------------------
30095  *
30096  *
30097  * 8. MBOOT/MBR
30098  *
30099  *     Although sd(4D) doesn't support fdisk on SPARC platform, it does support
30100  *     read/write mboot for removable media devices on sparc platform.
30101  *
30102  *     -----------------------------------------------------------
30103  *       platform   removable media  USB/1394  |  mboot supported
30104  *     -----------------------------------------------------------
30105  *        x86         X               X        |       true
30106  *     ------------------------------------------------------------
30107  *        sparc      false           false     |       false
30108  *        sparc      false           true      |       true
30109  *        sparc      true            false     |       true
30110  *        sparc      true            true      |       true
30111  *     ------------------------------------------------------------
30112  *
30113  *
30114  * 9.  error handling during opening device
30115  *
30116  *     If failed to open a disk device, an errno is returned. For some kinds
30117  *     of errors, different errno is returned depending on if this device is
30118  *     a removable media device. This brings USB/1394 hard disks in line with
30119  *     expected hard disk behavior. It is not expected that this breaks any
30120  *     application.
30121  *
30122  *     ------------------------------------------------------
30123  *       removable media    hotpluggable   |  errno
30124  *     ------------------------------------------------------
30125  *             false          false        |   EIO
30126  *             false          true         |   EIO
30127  *             true             x          |   ENXIO
30128  *     ------------------------------------------------------
30129  *
30130  *
30131  * 11. ioctls: DKIOCEJECT, CDROMEJECT
30132  *
30133  *     These IOCTLs are applicable only to removable media devices.
30134  *
30135  *     -----------------------------------------------------------
30136  *       removable media    hotpluggable   |DKIOCEJECT, CDROMEJECT
30137  *     -----------------------------------------------------------
30138  *             false          false        |     No
30139  *             false          true         |     No
30140  *             true            x           |     Yes
30141  *     -----------------------------------------------------------
30142  *
30143  *
30144  * 12. Kstats for partitions
30145  *
30146  *     sd creates partition kstat for non-removable media devices. USB and
30147  *     Firewire hard disks now have partition kstats
30148  *
30149  *      ------------------------------------------------------
30150  *       removable media    hotpluggable   |   kstat
30151  *      ------------------------------------------------------
30152  *             false          false        |    Yes
30153  *             false          true         |    Yes
30154  *             true             x          |    No
30155  *       ------------------------------------------------------
30156  *
30157  *
30158  * 13. Removable media & hotpluggable properties
30159  *
30160  *     Sd driver creates a "removable-media" property for removable media
30161  *     devices. Parent nexus drivers create a "hotpluggable" property if
30162  *     it supports hotplugging.
30163  *
30164  *     ---------------------------------------------------------------------
30165  *     removable media   hotpluggable |  "removable-media"   " hotpluggable"
30166  *     ---------------------------------------------------------------------
30167  *       false            false       |    No                   No
30168  *       false            true        |    No                   Yes
30169  *       true             false       |    Yes                  No
30170  *       true             true        |    Yes                  Yes
30171  *     ---------------------------------------------------------------------
30172  *
30173  *
30174  * 14. Power Management
30175  *
30176  *     sd only power manages removable media devices or devices that support
30177  *     LOG_SENSE or have a "pm-capable" property  (PSARC/2002/250)
30178  *
30179  *     A parent nexus that supports hotplugging can also set "pm-capable"
30180  *     if the disk can be power managed.
30181  *
30182  *     ------------------------------------------------------------
30183  *       removable media hotpluggable pm-capable  |   power manage
30184  *     ------------------------------------------------------------
30185  *             false          false     false     |     No
30186  *             false          false     true      |     Yes
30187  *             false          true      false     |     No
30188  *             false          true      true      |     Yes
30189  *             true             x        x        |     Yes
30190  *     ------------------------------------------------------------
30191  *
30192  *      USB and firewire hard disks can now be power managed independently
30193  *      of the framebuffer
30194  *
30195  *
30196  * 15. Support for USB disks with capacity larger than 1TB
30197  *
30198  *     Currently, sd doesn't permit a fixed disk device with capacity
30199  *     larger than 1TB to be used in a 32-bit operating system environment.
30200  *     However, sd doesn't do that for removable media devices. Instead, it
30201  *     assumes that removable media devices cannot have a capacity larger
30202  *     than 1TB. Therefore, using those devices on 32-bit system is partially
30203  *     supported, which can cause some unexpected results.
30204  *
30205  *     ---------------------------------------------------------------------
30206  *       removable media    USB/1394 | Capacity > 1TB |   Used in 32-bit env
30207  *     ---------------------------------------------------------------------
30208  *             false          false  |   true         |     no
30209  *             false          true   |   true         |     no
30210  *             true           false  |   true         |     Yes
30211  *             true           true   |   true         |     Yes
30212  *     ---------------------------------------------------------------------
30213  *
30214  *
30215  * 16. Check write-protection at open time
30216  *
30217  *     When a removable media device is being opened for writing without NDELAY
30218  *     flag, sd will check if this device is writable. If attempting to open
30219  *     without NDELAY flag a write-protected device, this operation will abort.
30220  *
30221  *     ------------------------------------------------------------
30222  *       removable media    USB/1394   |   WP Check
30223  *     ------------------------------------------------------------
30224  *             false          false    |     No
30225  *             false          true     |     No
30226  *             true           false    |     Yes
30227  *             true           true     |     Yes
30228  *     ------------------------------------------------------------
30229  *
30230  *
30231  * 17. syslog when corrupted VTOC is encountered
30232  *
30233  *      Currently, if an invalid VTOC is encountered, sd only print syslog
30234  *      for fixed SCSI disks.
30235  *     ------------------------------------------------------------
30236  *       removable media    USB/1394   |   print syslog
30237  *     ------------------------------------------------------------
30238  *             false          false    |     Yes
30239  *             false          true     |     No
30240  *             true           false    |     No
30241  *             true           true     |     No
30242  *     ------------------------------------------------------------
30243  */
30244 static void
30245 sd_set_unit_attributes(struct sd_lun *un, dev_info_t *devi)
30246 {
30247 	int	pm_cap;
30248 
30249 	ASSERT(un->un_sd);
30250 	ASSERT(un->un_sd->sd_inq);
30251 
30252 	/*
30253 	 * Enable SYNC CACHE support for all devices.
30254 	 */
30255 	un->un_f_sync_cache_supported = TRUE;
30256 
30257 	/*
30258 	 * Set the sync cache required flag to false.
30259 	 * This would ensure that there is no SYNC CACHE
30260 	 * sent when there are no writes
30261 	 */
30262 	un->un_f_sync_cache_required = FALSE;
30263 
30264 	if (un->un_sd->sd_inq->inq_rmb) {
30265 		/*
30266 		 * The media of this device is removable. And for this kind
30267 		 * of devices, it is possible to change medium after opening
30268 		 * devices. Thus we should support this operation.
30269 		 */
30270 		un->un_f_has_removable_media = TRUE;
30271 
30272 		/*
30273 		 * support non-512-byte blocksize of removable media devices
30274 		 */
30275 		un->un_f_non_devbsize_supported = TRUE;
30276 
30277 		/*
30278 		 * Assume that all removable media devices support DOOR_LOCK
30279 		 */
30280 		un->un_f_doorlock_supported = TRUE;
30281 
30282 		/*
30283 		 * For a removable media device, it is possible to be opened
30284 		 * with NDELAY flag when there is no media in drive, in this
30285 		 * case we don't care if device is writable. But if without
30286 		 * NDELAY flag, we need to check if media is write-protected.
30287 		 */
30288 		un->un_f_chk_wp_open = TRUE;
30289 
30290 		/*
30291 		 * need to start a SCSI watch thread to monitor media state,
30292 		 * when media is being inserted or ejected, notify syseventd.
30293 		 */
30294 		un->un_f_monitor_media_state = TRUE;
30295 
30296 		/*
30297 		 * Some devices don't support START_STOP_UNIT command.
30298 		 * Therefore, we'd better check if a device supports it
30299 		 * before sending it.
30300 		 */
30301 		un->un_f_check_start_stop = TRUE;
30302 
30303 		/*
30304 		 * support eject media ioctl:
30305 		 *		FDEJECT, DKIOCEJECT, CDROMEJECT
30306 		 */
30307 		un->un_f_eject_media_supported = TRUE;
30308 
30309 		/*
30310 		 * Because many removable-media devices don't support
30311 		 * LOG_SENSE, we couldn't use this command to check if
30312 		 * a removable media device support power-management.
30313 		 * We assume that they support power-management via
30314 		 * START_STOP_UNIT command and can be spun up and down
30315 		 * without limitations.
30316 		 */
30317 		un->un_f_pm_supported = TRUE;
30318 
30319 		/*
30320 		 * Need to create a zero length (Boolean) property
30321 		 * removable-media for the removable media devices.
30322 		 * Note that the return value of the property is not being
30323 		 * checked, since if unable to create the property
30324 		 * then do not want the attach to fail altogether. Consistent
30325 		 * with other property creation in attach.
30326 		 */
30327 		(void) ddi_prop_create(DDI_DEV_T_NONE, devi,
30328 		    DDI_PROP_CANSLEEP, "removable-media", NULL, 0);
30329 
30330 	} else {
30331 		/*
30332 		 * create device ID for device
30333 		 */
30334 		un->un_f_devid_supported = TRUE;
30335 
30336 		/*
30337 		 * Spin up non-removable-media devices once it is attached
30338 		 */
30339 		un->un_f_attach_spinup = TRUE;
30340 
30341 		/*
30342 		 * According to SCSI specification, Sense data has two kinds of
30343 		 * format: fixed format, and descriptor format. At present, we
30344 		 * don't support descriptor format sense data for removable
30345 		 * media.
30346 		 */
30347 		if (SD_INQUIRY(un)->inq_dtype == DTYPE_DIRECT) {
30348 			un->un_f_descr_format_supported = TRUE;
30349 		}
30350 
30351 		/*
30352 		 * kstats are created only for non-removable media devices.
30353 		 *
30354 		 * Set this in sd.conf to 0 in order to disable kstats.  The
30355 		 * default is 1, so they are enabled by default.
30356 		 */
30357 		un->un_f_pkstats_enabled = (ddi_prop_get_int(DDI_DEV_T_ANY,
30358 		    SD_DEVINFO(un), DDI_PROP_DONTPASS,
30359 		    "enable-partition-kstats", 1));
30360 
30361 		/*
30362 		 * Check if HBA has set the "pm-capable" property.
30363 		 * If "pm-capable" exists and is non-zero then we can
30364 		 * power manage the device without checking the start/stop
30365 		 * cycle count log sense page.
30366 		 *
30367 		 * If "pm-capable" exists and is set to be false (0),
30368 		 * then we should not power manage the device.
30369 		 *
30370 		 * If "pm-capable" doesn't exist then pm_cap will
30371 		 * be set to SD_PM_CAPABLE_UNDEFINED (-1).  In this case,
30372 		 * sd will check the start/stop cycle count log sense page
30373 		 * and power manage the device if the cycle count limit has
30374 		 * not been exceeded.
30375 		 */
30376 		pm_cap = ddi_prop_get_int(DDI_DEV_T_ANY, devi,
30377 		    DDI_PROP_DONTPASS, "pm-capable", SD_PM_CAPABLE_UNDEFINED);
30378 		if (SD_PM_CAPABLE_IS_UNDEFINED(pm_cap)) {
30379 			un->un_f_log_sense_supported = TRUE;
30380 			if (!un->un_f_power_condition_disabled &&
30381 			    SD_INQUIRY(un)->inq_ansi == 6) {
30382 				un->un_f_power_condition_supported = TRUE;
30383 			}
30384 		} else {
30385 			/*
30386 			 * pm-capable property exists.
30387 			 *
30388 			 * Convert "TRUE" values for pm_cap to
30389 			 * SD_PM_CAPABLE_IS_TRUE to make it easier to check
30390 			 * later. "TRUE" values are any values defined in
30391 			 * inquiry.h.
30392 			 */
30393 			if (SD_PM_CAPABLE_IS_FALSE(pm_cap)) {
30394 				un->un_f_log_sense_supported = FALSE;
30395 			} else {
30396 				/* SD_PM_CAPABLE_IS_TRUE case */
30397 				un->un_f_pm_supported = TRUE;
30398 				if (!un->un_f_power_condition_disabled &&
30399 				    SD_PM_CAPABLE_IS_SPC_4(pm_cap)) {
30400 					un->un_f_power_condition_supported =
30401 					    TRUE;
30402 				}
30403 				if (SD_PM_CAP_LOG_SUPPORTED(pm_cap)) {
30404 					un->un_f_log_sense_supported = TRUE;
30405 					un->un_f_pm_log_sense_smart =
30406 					    SD_PM_CAP_SMART_LOG(pm_cap);
30407 				}
30408 			}
30409 
30410 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
30411 			    "sd_unit_attach: un:0x%p pm-capable "
30412 			    "property set to %d.\n", un, un->un_f_pm_supported);
30413 		}
30414 	}
30415 
30416 	if (un->un_f_is_hotpluggable) {
30417 
30418 		/*
30419 		 * Have to watch hotpluggable devices as well, since
30420 		 * that's the only way for userland applications to
30421 		 * detect hot removal while device is busy/mounted.
30422 		 */
30423 		un->un_f_monitor_media_state = TRUE;
30424 
30425 		un->un_f_check_start_stop = TRUE;
30426 
30427 	}
30428 }
30429 
30430 /*
30431  * sd_tg_rdwr:
30432  * Provides rdwr access for cmlb via sd_tgops. The start_block is
30433  * in sys block size, req_length in bytes.
30434  *
30435  */
30436 static int
30437 sd_tg_rdwr(dev_info_t *devi, uchar_t cmd, void *bufaddr,
30438     diskaddr_t start_block, size_t reqlength, void *tg_cookie)
30439 {
30440 	struct sd_lun *un;
30441 	int path_flag = (int)(uintptr_t)tg_cookie;
30442 	char *dkl = NULL;
30443 	diskaddr_t real_addr = start_block;
30444 	diskaddr_t first_byte, end_block;
30445 
30446 	size_t	buffer_size = reqlength;
30447 	int rval = 0;
30448 	diskaddr_t	cap;
30449 	uint32_t	lbasize;
30450 	sd_ssc_t	*ssc;
30451 
30452 	un = ddi_get_soft_state(sd_state, ddi_get_instance(devi));
30453 	if (un == NULL)
30454 		return (ENXIO);
30455 
30456 	if (cmd != TG_READ && cmd != TG_WRITE)
30457 		return (EINVAL);
30458 
30459 	ssc = sd_ssc_init(un);
30460 	mutex_enter(SD_MUTEX(un));
30461 	if (un->un_f_tgt_blocksize_is_valid == FALSE) {
30462 		mutex_exit(SD_MUTEX(un));
30463 		rval = sd_send_scsi_READ_CAPACITY(ssc, (uint64_t *)&cap,
30464 		    &lbasize, path_flag);
30465 		if (rval != 0)
30466 			goto done1;
30467 		mutex_enter(SD_MUTEX(un));
30468 		sd_update_block_info(un, lbasize, cap);
30469 		if ((un->un_f_tgt_blocksize_is_valid == FALSE)) {
30470 			mutex_exit(SD_MUTEX(un));
30471 			rval = EIO;
30472 			goto done;
30473 		}
30474 	}
30475 
30476 	if (NOT_DEVBSIZE(un)) {
30477 		/*
30478 		 * sys_blocksize != tgt_blocksize, need to re-adjust
30479 		 * blkno and save the index to beginning of dk_label
30480 		 */
30481 		first_byte  = SD_SYSBLOCKS2BYTES(start_block);
30482 		real_addr = first_byte / un->un_tgt_blocksize;
30483 
30484 		end_block = (first_byte + reqlength +
30485 		    un->un_tgt_blocksize - 1) / un->un_tgt_blocksize;
30486 
30487 		/* round up buffer size to multiple of target block size */
30488 		buffer_size = (end_block - real_addr) * un->un_tgt_blocksize;
30489 
30490 		SD_TRACE(SD_LOG_IO_PARTITION, un, "sd_tg_rdwr",
30491 		    "label_addr: 0x%x allocation size: 0x%x\n",
30492 		    real_addr, buffer_size);
30493 
30494 		if (((first_byte % un->un_tgt_blocksize) != 0) ||
30495 		    (reqlength % un->un_tgt_blocksize) != 0)
30496 			/* the request is not aligned */
30497 			dkl = kmem_zalloc(buffer_size, KM_SLEEP);
30498 	}
30499 
30500 	/*
30501 	 * The MMC standard allows READ CAPACITY to be
30502 	 * inaccurate by a bounded amount (in the interest of
30503 	 * response latency).  As a result, failed READs are
30504 	 * commonplace (due to the reading of metadata and not
30505 	 * data). Depending on the per-Vendor/drive Sense data,
30506 	 * the failed READ can cause many (unnecessary) retries.
30507 	 */
30508 
30509 	if (ISCD(un) && (cmd == TG_READ) &&
30510 	    (un->un_f_blockcount_is_valid == TRUE) &&
30511 	    ((start_block == (un->un_blockcount - 1)) ||
30512 	    (start_block == (un->un_blockcount - 2)))) {
30513 			path_flag = SD_PATH_DIRECT_PRIORITY;
30514 	}
30515 
30516 	mutex_exit(SD_MUTEX(un));
30517 	if (cmd == TG_READ) {
30518 		rval = sd_send_scsi_READ(ssc, (dkl != NULL) ? dkl : bufaddr,
30519 		    buffer_size, real_addr, path_flag);
30520 		if (dkl != NULL)
30521 			bcopy(dkl + SD_TGTBYTEOFFSET(un, start_block,
30522 			    real_addr), bufaddr, reqlength);
30523 	} else {
30524 		if (dkl) {
30525 			rval = sd_send_scsi_READ(ssc, dkl, buffer_size,
30526 			    real_addr, path_flag);
30527 			if (rval) {
30528 				goto done1;
30529 			}
30530 			bcopy(bufaddr, dkl + SD_TGTBYTEOFFSET(un, start_block,
30531 			    real_addr), reqlength);
30532 		}
30533 		rval = sd_send_scsi_WRITE(ssc, (dkl != NULL) ? dkl : bufaddr,
30534 		    buffer_size, real_addr, path_flag);
30535 	}
30536 
30537 done1:
30538 	if (dkl != NULL)
30539 		kmem_free(dkl, buffer_size);
30540 
30541 	if (rval != 0) {
30542 		if (rval == EIO)
30543 			sd_ssc_assessment(ssc, SD_FMT_STATUS_CHECK);
30544 		else
30545 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
30546 	}
30547 done:
30548 	sd_ssc_fini(ssc);
30549 	return (rval);
30550 }
30551 
30552 
30553 static int
30554 sd_tg_getinfo(dev_info_t *devi, int cmd, void *arg, void *tg_cookie)
30555 {
30556 
30557 	struct sd_lun *un;
30558 	diskaddr_t	cap;
30559 	uint32_t	lbasize;
30560 	int		path_flag = (int)(uintptr_t)tg_cookie;
30561 	int		ret = 0;
30562 
30563 	un = ddi_get_soft_state(sd_state, ddi_get_instance(devi));
30564 	if (un == NULL)
30565 		return (ENXIO);
30566 
30567 	switch (cmd) {
30568 	case TG_GETPHYGEOM:
30569 	case TG_GETVIRTGEOM:
30570 	case TG_GETCAPACITY:
30571 	case TG_GETBLOCKSIZE:
30572 		mutex_enter(SD_MUTEX(un));
30573 
30574 		if ((un->un_f_blockcount_is_valid == TRUE) &&
30575 		    (un->un_f_tgt_blocksize_is_valid == TRUE)) {
30576 			cap = un->un_blockcount;
30577 			lbasize = un->un_tgt_blocksize;
30578 			mutex_exit(SD_MUTEX(un));
30579 		} else {
30580 			sd_ssc_t	*ssc;
30581 			mutex_exit(SD_MUTEX(un));
30582 			ssc = sd_ssc_init(un);
30583 			ret = sd_send_scsi_READ_CAPACITY(ssc, (uint64_t *)&cap,
30584 			    &lbasize, path_flag);
30585 			if (ret != 0) {
30586 				if (ret == EIO)
30587 					sd_ssc_assessment(ssc,
30588 					    SD_FMT_STATUS_CHECK);
30589 				else
30590 					sd_ssc_assessment(ssc,
30591 					    SD_FMT_IGNORE);
30592 				sd_ssc_fini(ssc);
30593 				return (ret);
30594 			}
30595 			sd_ssc_fini(ssc);
30596 			mutex_enter(SD_MUTEX(un));
30597 			sd_update_block_info(un, lbasize, cap);
30598 			if ((un->un_f_blockcount_is_valid == FALSE) ||
30599 			    (un->un_f_tgt_blocksize_is_valid == FALSE)) {
30600 				mutex_exit(SD_MUTEX(un));
30601 				return (EIO);
30602 			}
30603 			mutex_exit(SD_MUTEX(un));
30604 		}
30605 
30606 		if (cmd == TG_GETCAPACITY) {
30607 			*(diskaddr_t *)arg = cap;
30608 			return (0);
30609 		}
30610 
30611 		if (cmd == TG_GETBLOCKSIZE) {
30612 			*(uint32_t *)arg = lbasize;
30613 			return (0);
30614 		}
30615 
30616 		if (cmd == TG_GETPHYGEOM)
30617 			ret = sd_get_physical_geometry(un, (cmlb_geom_t *)arg,
30618 			    cap, lbasize, path_flag);
30619 		else
30620 			/* TG_GETVIRTGEOM */
30621 			ret = sd_get_virtual_geometry(un,
30622 			    (cmlb_geom_t *)arg, cap, lbasize);
30623 
30624 		return (ret);
30625 
30626 	case TG_GETATTR:
30627 		mutex_enter(SD_MUTEX(un));
30628 		((tg_attribute_t *)arg)->media_is_writable =
30629 		    un->un_f_mmc_writable_media;
30630 		((tg_attribute_t *)arg)->media_is_solid_state =
30631 		    un->un_f_is_solid_state;
30632 		((tg_attribute_t *)arg)->media_is_rotational =
30633 		    un->un_f_is_rotational;
30634 		mutex_exit(SD_MUTEX(un));
30635 		return (0);
30636 	default:
30637 		return (ENOTTY);
30638 
30639 	}
30640 }
30641 
30642 /*
30643  *    Function: sd_ssc_ereport_post
30644  *
30645  * Description: Will be called when SD driver need to post an ereport.
30646  *
30647  *    Context: Kernel thread or interrupt context.
30648  */
30649 
30650 #define	DEVID_IF_KNOWN(d) "devid", DATA_TYPE_STRING, (d) ? (d) : "unknown"
30651 
30652 static void
30653 sd_ssc_ereport_post(sd_ssc_t *ssc, enum sd_driver_assessment drv_assess)
30654 {
30655 	int uscsi_path_instance = 0;
30656 	uchar_t	uscsi_pkt_reason;
30657 	uint32_t uscsi_pkt_state;
30658 	uint32_t uscsi_pkt_statistics;
30659 	uint64_t uscsi_ena;
30660 	uchar_t op_code;
30661 	uint8_t *sensep;
30662 	union scsi_cdb *cdbp;
30663 	uint_t cdblen = 0;
30664 	uint_t senlen = 0;
30665 	struct sd_lun *un;
30666 	dev_info_t *dip;
30667 	char *devid;
30668 	int ssc_invalid_flags = SSC_FLAGS_INVALID_PKT_REASON |
30669 	    SSC_FLAGS_INVALID_STATUS |
30670 	    SSC_FLAGS_INVALID_SENSE |
30671 	    SSC_FLAGS_INVALID_DATA;
30672 	char assessment[16];
30673 
30674 	ASSERT(ssc != NULL);
30675 	ASSERT(ssc->ssc_uscsi_cmd != NULL);
30676 	ASSERT(ssc->ssc_uscsi_info != NULL);
30677 
30678 	un = ssc->ssc_un;
30679 	ASSERT(un != NULL);
30680 
30681 	dip = un->un_sd->sd_dev;
30682 
30683 	/*
30684 	 * Get the devid:
30685 	 *	devid will only be passed to non-transport error reports.
30686 	 */
30687 	devid = DEVI(dip)->devi_devid_str;
30688 
30689 	/*
30690 	 * If we are syncing or dumping, the command will not be executed
30691 	 * so we bypass this situation.
30692 	 */
30693 	if (ddi_in_panic() || (un->un_state == SD_STATE_SUSPENDED) ||
30694 	    (un->un_state == SD_STATE_DUMPING))
30695 		return;
30696 
30697 	uscsi_pkt_reason = ssc->ssc_uscsi_info->ui_pkt_reason;
30698 	uscsi_path_instance = ssc->ssc_uscsi_cmd->uscsi_path_instance;
30699 	uscsi_pkt_state = ssc->ssc_uscsi_info->ui_pkt_state;
30700 	uscsi_pkt_statistics = ssc->ssc_uscsi_info->ui_pkt_statistics;
30701 	uscsi_ena = ssc->ssc_uscsi_info->ui_ena;
30702 
30703 	sensep = (uint8_t *)ssc->ssc_uscsi_cmd->uscsi_rqbuf;
30704 	cdbp = (union scsi_cdb *)ssc->ssc_uscsi_cmd->uscsi_cdb;
30705 
30706 	/* In rare cases, EG:DOORLOCK, the cdb could be NULL */
30707 	if (cdbp == NULL) {
30708 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
30709 		    "sd_ssc_ereport_post meet empty cdb\n");
30710 		return;
30711 	}
30712 
30713 	op_code = cdbp->scc_cmd;
30714 
30715 	cdblen = (int)ssc->ssc_uscsi_cmd->uscsi_cdblen;
30716 	senlen = (int)(ssc->ssc_uscsi_cmd->uscsi_rqlen -
30717 	    ssc->ssc_uscsi_cmd->uscsi_rqresid);
30718 
30719 	if (senlen > 0)
30720 		ASSERT(sensep != NULL);
30721 
30722 	/*
30723 	 * Initialize drv_assess to corresponding values.
30724 	 * SD_FM_DRV_FATAL will be mapped to "fail" or "fatal" depending
30725 	 * on the sense-key returned back.
30726 	 */
30727 	switch (drv_assess) {
30728 		case SD_FM_DRV_RECOVERY:
30729 			(void) sprintf(assessment, "%s", "recovered");
30730 			break;
30731 		case SD_FM_DRV_RETRY:
30732 			(void) sprintf(assessment, "%s", "retry");
30733 			break;
30734 		case SD_FM_DRV_NOTICE:
30735 			(void) sprintf(assessment, "%s", "info");
30736 			break;
30737 		case SD_FM_DRV_FATAL:
30738 		default:
30739 			(void) sprintf(assessment, "%s", "unknown");
30740 	}
30741 	/*
30742 	 * If drv_assess == SD_FM_DRV_RECOVERY, this should be a recovered
30743 	 * command, we will post ereport.io.scsi.cmd.disk.recovered.
30744 	 * driver-assessment will always be "recovered" here.
30745 	 */
30746 	if (drv_assess == SD_FM_DRV_RECOVERY) {
30747 		scsi_fm_ereport_post(un->un_sd, uscsi_path_instance, NULL,
30748 		    "cmd.disk.recovered", uscsi_ena, devid, NULL,
30749 		    DDI_NOSLEEP, NULL,
30750 		    FM_VERSION, DATA_TYPE_UINT8, FM_EREPORT_VERS0,
30751 		    DEVID_IF_KNOWN(devid),
30752 		    "driver-assessment", DATA_TYPE_STRING, assessment,
30753 		    "op-code", DATA_TYPE_UINT8, op_code,
30754 		    "cdb", DATA_TYPE_UINT8_ARRAY,
30755 		    cdblen, ssc->ssc_uscsi_cmd->uscsi_cdb,
30756 		    "pkt-reason", DATA_TYPE_UINT8, uscsi_pkt_reason,
30757 		    "pkt-state", DATA_TYPE_UINT32, uscsi_pkt_state,
30758 		    "pkt-stats", DATA_TYPE_UINT32, uscsi_pkt_statistics,
30759 		    NULL);
30760 		return;
30761 	}
30762 
30763 	/*
30764 	 * If there is un-expected/un-decodable data, we should post
30765 	 * ereport.io.scsi.cmd.disk.dev.uderr.
30766 	 * driver-assessment will be set based on parameter drv_assess.
30767 	 * SSC_FLAGS_INVALID_SENSE - invalid sense data sent back.
30768 	 * SSC_FLAGS_INVALID_PKT_REASON - invalid pkt-reason encountered.
30769 	 * SSC_FLAGS_INVALID_STATUS - invalid stat-code encountered.
30770 	 * SSC_FLAGS_INVALID_DATA - invalid data sent back.
30771 	 */
30772 	if (ssc->ssc_flags & ssc_invalid_flags) {
30773 		if (ssc->ssc_flags & SSC_FLAGS_INVALID_SENSE) {
30774 			scsi_fm_ereport_post(un->un_sd, uscsi_path_instance,
30775 			    NULL, "cmd.disk.dev.uderr", uscsi_ena, devid,
30776 			    NULL, DDI_NOSLEEP, NULL,
30777 			    FM_VERSION, DATA_TYPE_UINT8, FM_EREPORT_VERS0,
30778 			    DEVID_IF_KNOWN(devid),
30779 			    "driver-assessment", DATA_TYPE_STRING,
30780 			    drv_assess == SD_FM_DRV_FATAL ?
30781 			    "fail" : assessment,
30782 			    "op-code", DATA_TYPE_UINT8, op_code,
30783 			    "cdb", DATA_TYPE_UINT8_ARRAY,
30784 			    cdblen, ssc->ssc_uscsi_cmd->uscsi_cdb,
30785 			    "pkt-reason", DATA_TYPE_UINT8, uscsi_pkt_reason,
30786 			    "pkt-state", DATA_TYPE_UINT32, uscsi_pkt_state,
30787 			    "pkt-stats", DATA_TYPE_UINT32,
30788 			    uscsi_pkt_statistics,
30789 			    "stat-code", DATA_TYPE_UINT8,
30790 			    ssc->ssc_uscsi_cmd->uscsi_status,
30791 			    "un-decode-info", DATA_TYPE_STRING,
30792 			    ssc->ssc_info,
30793 			    "un-decode-value", DATA_TYPE_UINT8_ARRAY,
30794 			    senlen, sensep,
30795 			    NULL);
30796 		} else {
30797 			/*
30798 			 * For other type of invalid data, the
30799 			 * un-decode-value field would be empty because the
30800 			 * un-decodable content could be seen from upper
30801 			 * level payload or inside un-decode-info.
30802 			 */
30803 			scsi_fm_ereport_post(un->un_sd, uscsi_path_instance,
30804 			    NULL,
30805 			    "cmd.disk.dev.uderr", uscsi_ena, devid,
30806 			    NULL, DDI_NOSLEEP, NULL,
30807 			    FM_VERSION, DATA_TYPE_UINT8, FM_EREPORT_VERS0,
30808 			    DEVID_IF_KNOWN(devid),
30809 			    "driver-assessment", DATA_TYPE_STRING,
30810 			    drv_assess == SD_FM_DRV_FATAL ?
30811 			    "fail" : assessment,
30812 			    "op-code", DATA_TYPE_UINT8, op_code,
30813 			    "cdb", DATA_TYPE_UINT8_ARRAY,
30814 			    cdblen, ssc->ssc_uscsi_cmd->uscsi_cdb,
30815 			    "pkt-reason", DATA_TYPE_UINT8, uscsi_pkt_reason,
30816 			    "pkt-state", DATA_TYPE_UINT32, uscsi_pkt_state,
30817 			    "pkt-stats", DATA_TYPE_UINT32,
30818 			    uscsi_pkt_statistics,
30819 			    "stat-code", DATA_TYPE_UINT8,
30820 			    ssc->ssc_uscsi_cmd->uscsi_status,
30821 			    "un-decode-info", DATA_TYPE_STRING,
30822 			    ssc->ssc_info,
30823 			    "un-decode-value", DATA_TYPE_UINT8_ARRAY,
30824 			    0, NULL,
30825 			    NULL);
30826 		}
30827 		ssc->ssc_flags &= ~ssc_invalid_flags;
30828 		return;
30829 	}
30830 
30831 	if (uscsi_pkt_reason != CMD_CMPLT ||
30832 	    (ssc->ssc_flags & SSC_FLAGS_TRAN_ABORT)) {
30833 		/*
30834 		 * pkt-reason != CMD_CMPLT or SSC_FLAGS_TRAN_ABORT was
30835 		 * set inside sd_start_cmds due to errors(bad packet or
30836 		 * fatal transport error), we should take it as a
30837 		 * transport error, so we post ereport.io.scsi.cmd.disk.tran.
30838 		 * driver-assessment will be set based on drv_assess.
30839 		 * We will set devid to NULL because it is a transport
30840 		 * error.
30841 		 */
30842 		if (ssc->ssc_flags & SSC_FLAGS_TRAN_ABORT)
30843 			ssc->ssc_flags &= ~SSC_FLAGS_TRAN_ABORT;
30844 
30845 		scsi_fm_ereport_post(un->un_sd, uscsi_path_instance, NULL,
30846 		    "cmd.disk.tran", uscsi_ena, NULL, NULL, DDI_NOSLEEP, NULL,
30847 		    FM_VERSION, DATA_TYPE_UINT8, FM_EREPORT_VERS0,
30848 		    DEVID_IF_KNOWN(devid),
30849 		    "driver-assessment", DATA_TYPE_STRING,
30850 		    drv_assess == SD_FM_DRV_FATAL ? "fail" : assessment,
30851 		    "op-code", DATA_TYPE_UINT8, op_code,
30852 		    "cdb", DATA_TYPE_UINT8_ARRAY,
30853 		    cdblen, ssc->ssc_uscsi_cmd->uscsi_cdb,
30854 		    "pkt-reason", DATA_TYPE_UINT8, uscsi_pkt_reason,
30855 		    "pkt-state", DATA_TYPE_UINT8, uscsi_pkt_state,
30856 		    "pkt-stats", DATA_TYPE_UINT32, uscsi_pkt_statistics,
30857 		    NULL);
30858 	} else {
30859 		/*
30860 		 * If we got here, we have a completed command, and we need
30861 		 * to further investigate the sense data to see what kind
30862 		 * of ereport we should post.
30863 		 * No ereport is needed if sense-key is KEY_RECOVERABLE_ERROR
30864 		 * and asc/ascq is "ATA PASS-THROUGH INFORMATION AVAILABLE".
30865 		 * Post ereport.io.scsi.cmd.disk.dev.rqs.merr if sense-key is
30866 		 * KEY_MEDIUM_ERROR.
30867 		 * Post ereport.io.scsi.cmd.disk.dev.rqs.derr otherwise.
30868 		 * driver-assessment will be set based on the parameter
30869 		 * drv_assess.
30870 		 */
30871 		if (senlen > 0) {
30872 			/*
30873 			 * Here we have sense data available.
30874 			 */
30875 			uint8_t sense_key = scsi_sense_key(sensep);
30876 			uint8_t sense_asc = scsi_sense_asc(sensep);
30877 			uint8_t sense_ascq = scsi_sense_ascq(sensep);
30878 
30879 			if (sense_key == KEY_RECOVERABLE_ERROR &&
30880 			    sense_asc == 0x00 && sense_ascq == 0x1d)
30881 				return;
30882 
30883 			if (sense_key == KEY_MEDIUM_ERROR) {
30884 				/*
30885 				 * driver-assessment should be "fatal" if
30886 				 * drv_assess is SD_FM_DRV_FATAL.
30887 				 */
30888 				scsi_fm_ereport_post(un->un_sd,
30889 				    uscsi_path_instance, NULL,
30890 				    "cmd.disk.dev.rqs.merr",
30891 				    uscsi_ena, devid, NULL, DDI_NOSLEEP, NULL,
30892 				    FM_VERSION, DATA_TYPE_UINT8,
30893 				    FM_EREPORT_VERS0,
30894 				    DEVID_IF_KNOWN(devid),
30895 				    "driver-assessment",
30896 				    DATA_TYPE_STRING,
30897 				    drv_assess == SD_FM_DRV_FATAL ?
30898 				    "fatal" : assessment,
30899 				    "op-code",
30900 				    DATA_TYPE_UINT8, op_code,
30901 				    "cdb",
30902 				    DATA_TYPE_UINT8_ARRAY, cdblen,
30903 				    ssc->ssc_uscsi_cmd->uscsi_cdb,
30904 				    "pkt-reason",
30905 				    DATA_TYPE_UINT8, uscsi_pkt_reason,
30906 				    "pkt-state",
30907 				    DATA_TYPE_UINT8, uscsi_pkt_state,
30908 				    "pkt-stats",
30909 				    DATA_TYPE_UINT32,
30910 				    uscsi_pkt_statistics,
30911 				    "stat-code",
30912 				    DATA_TYPE_UINT8,
30913 				    ssc->ssc_uscsi_cmd->uscsi_status,
30914 				    "key",
30915 				    DATA_TYPE_UINT8,
30916 				    scsi_sense_key(sensep),
30917 				    "asc",
30918 				    DATA_TYPE_UINT8,
30919 				    scsi_sense_asc(sensep),
30920 				    "ascq",
30921 				    DATA_TYPE_UINT8,
30922 				    scsi_sense_ascq(sensep),
30923 				    "sense-data",
30924 				    DATA_TYPE_UINT8_ARRAY,
30925 				    senlen, sensep,
30926 				    "lba",
30927 				    DATA_TYPE_UINT64,
30928 				    ssc->ssc_uscsi_info->ui_lba,
30929 				    NULL);
30930 			} else {
30931 				/*
30932 				 * if sense-key == 0x4(hardware
30933 				 * error), driver-assessment should
30934 				 * be "fatal" if drv_assess is
30935 				 * SD_FM_DRV_FATAL.
30936 				 */
30937 				scsi_fm_ereport_post(un->un_sd,
30938 				    uscsi_path_instance, NULL,
30939 				    "cmd.disk.dev.rqs.derr",
30940 				    uscsi_ena, devid,
30941 				    NULL, DDI_NOSLEEP, NULL,
30942 				    FM_VERSION,
30943 				    DATA_TYPE_UINT8, FM_EREPORT_VERS0,
30944 				    DEVID_IF_KNOWN(devid),
30945 				    "driver-assessment",
30946 				    DATA_TYPE_STRING,
30947 				    drv_assess == SD_FM_DRV_FATAL ?
30948 				    (sense_key == 0x4 ?
30949 				    "fatal" : "fail") : assessment,
30950 				    "op-code",
30951 				    DATA_TYPE_UINT8, op_code,
30952 				    "cdb",
30953 				    DATA_TYPE_UINT8_ARRAY, cdblen,
30954 				    ssc->ssc_uscsi_cmd->uscsi_cdb,
30955 				    "pkt-reason",
30956 				    DATA_TYPE_UINT8, uscsi_pkt_reason,
30957 				    "pkt-state",
30958 				    DATA_TYPE_UINT8, uscsi_pkt_state,
30959 				    "pkt-stats",
30960 				    DATA_TYPE_UINT32,
30961 				    uscsi_pkt_statistics,
30962 				    "stat-code",
30963 				    DATA_TYPE_UINT8,
30964 				    ssc->ssc_uscsi_cmd->uscsi_status,
30965 				    "key",
30966 				    DATA_TYPE_UINT8,
30967 				    scsi_sense_key(sensep),
30968 				    "asc",
30969 				    DATA_TYPE_UINT8,
30970 				    scsi_sense_asc(sensep),
30971 				    "ascq",
30972 				    DATA_TYPE_UINT8,
30973 				    scsi_sense_ascq(sensep),
30974 				    "sense-data",
30975 				    DATA_TYPE_UINT8_ARRAY,
30976 				    senlen, sensep,
30977 				    NULL);
30978 			}
30979 		} else {
30980 			/*
30981 			 * For stat_code == STATUS_GOOD, this is not a
30982 			 * hardware error.
30983 			 */
30984 			if (ssc->ssc_uscsi_cmd->uscsi_status == STATUS_GOOD)
30985 				return;
30986 
30987 			/*
30988 			 * Post ereport.io.scsi.cmd.disk.dev.serr if we got the
30989 			 * stat-code but with sense data unavailable.
30990 			 * driver-assessment will be set based on parameter
30991 			 * drv_assess.
30992 			 */
30993 			scsi_fm_ereport_post(un->un_sd, uscsi_path_instance,
30994 			    NULL,
30995 			    "cmd.disk.dev.serr", uscsi_ena,
30996 			    devid, NULL, DDI_NOSLEEP, NULL,
30997 			    FM_VERSION, DATA_TYPE_UINT8, FM_EREPORT_VERS0,
30998 			    DEVID_IF_KNOWN(devid),
30999 			    "driver-assessment", DATA_TYPE_STRING,
31000 			    drv_assess == SD_FM_DRV_FATAL ? "fail" : assessment,
31001 			    "op-code", DATA_TYPE_UINT8, op_code,
31002 			    "cdb",
31003 			    DATA_TYPE_UINT8_ARRAY,
31004 			    cdblen, ssc->ssc_uscsi_cmd->uscsi_cdb,
31005 			    "pkt-reason",
31006 			    DATA_TYPE_UINT8, uscsi_pkt_reason,
31007 			    "pkt-state",
31008 			    DATA_TYPE_UINT8, uscsi_pkt_state,
31009 			    "pkt-stats",
31010 			    DATA_TYPE_UINT32, uscsi_pkt_statistics,
31011 			    "stat-code",
31012 			    DATA_TYPE_UINT8,
31013 			    ssc->ssc_uscsi_cmd->uscsi_status,
31014 			    NULL);
31015 		}
31016 	}
31017 }
31018 
31019 /*
31020  *     Function: sd_ssc_extract_info
31021  *
31022  * Description: Extract information available to help generate ereport.
31023  *
31024  *     Context: Kernel thread or interrupt context.
31025  */
31026 static void
31027 sd_ssc_extract_info(sd_ssc_t *ssc, struct sd_lun *un, struct scsi_pkt *pktp,
31028     struct buf *bp, struct sd_xbuf *xp)
31029 {
31030 	size_t senlen = 0;
31031 	union scsi_cdb *cdbp;
31032 	int path_instance;
31033 	/*
31034 	 * Need scsi_cdb_size array to determine the cdb length.
31035 	 */
31036 	extern uchar_t	scsi_cdb_size[];
31037 
31038 	ASSERT(un != NULL);
31039 	ASSERT(pktp != NULL);
31040 	ASSERT(bp != NULL);
31041 	ASSERT(xp != NULL);
31042 	ASSERT(ssc != NULL);
31043 	ASSERT(mutex_owned(SD_MUTEX(un)));
31044 
31045 	/*
31046 	 * Transfer the cdb buffer pointer here.
31047 	 */
31048 	cdbp = (union scsi_cdb *)pktp->pkt_cdbp;
31049 
31050 	ssc->ssc_uscsi_cmd->uscsi_cdblen = scsi_cdb_size[GETGROUP(cdbp)];
31051 	ssc->ssc_uscsi_cmd->uscsi_cdb = (caddr_t)cdbp;
31052 
31053 	/*
31054 	 * Transfer the sense data buffer pointer if sense data is available,
31055 	 * calculate the sense data length first.
31056 	 */
31057 	if ((xp->xb_sense_state & STATE_XARQ_DONE) ||
31058 	    (xp->xb_sense_state & STATE_ARQ_DONE)) {
31059 		/*
31060 		 * For arq case, we will enter here.
31061 		 */
31062 		if (xp->xb_sense_state & STATE_XARQ_DONE) {
31063 			senlen = MAX_SENSE_LENGTH - xp->xb_sense_resid;
31064 		} else {
31065 			senlen = SENSE_LENGTH;
31066 		}
31067 	} else {
31068 		/*
31069 		 * For non-arq case, we will enter this branch.
31070 		 */
31071 		if (SD_GET_PKT_STATUS(pktp) == STATUS_CHECK &&
31072 		    (xp->xb_sense_state & STATE_XFERRED_DATA)) {
31073 			senlen = SENSE_LENGTH - xp->xb_sense_resid;
31074 		}
31075 
31076 	}
31077 
31078 	ssc->ssc_uscsi_cmd->uscsi_rqlen = (senlen & 0xff);
31079 	ssc->ssc_uscsi_cmd->uscsi_rqresid = 0;
31080 	ssc->ssc_uscsi_cmd->uscsi_rqbuf = (caddr_t)xp->xb_sense_data;
31081 
31082 	ssc->ssc_uscsi_cmd->uscsi_status = ((*(pktp)->pkt_scbp) & STATUS_MASK);
31083 
31084 	/*
31085 	 * Only transfer path_instance when scsi_pkt was properly allocated.
31086 	 */
31087 	path_instance = pktp->pkt_path_instance;
31088 	if (scsi_pkt_allocated_correctly(pktp) && path_instance)
31089 		ssc->ssc_uscsi_cmd->uscsi_path_instance = path_instance;
31090 	else
31091 		ssc->ssc_uscsi_cmd->uscsi_path_instance = 0;
31092 
31093 	/*
31094 	 * Copy in the other fields we may need when posting ereport.
31095 	 */
31096 	ssc->ssc_uscsi_info->ui_pkt_reason = pktp->pkt_reason;
31097 	ssc->ssc_uscsi_info->ui_pkt_state = pktp->pkt_state;
31098 	ssc->ssc_uscsi_info->ui_pkt_statistics = pktp->pkt_statistics;
31099 	ssc->ssc_uscsi_info->ui_lba = (uint64_t)SD_GET_BLKNO(bp);
31100 
31101 	/*
31102 	 * For partially read/write command, we will not create ena
31103 	 * in case of a successful command be reconized as recovered.
31104 	 */
31105 	if ((pktp->pkt_reason == CMD_CMPLT) &&
31106 	    (ssc->ssc_uscsi_cmd->uscsi_status == STATUS_GOOD) &&
31107 	    (senlen == 0)) {
31108 		return;
31109 	}
31110 
31111 	/*
31112 	 * To associate ereports of a single command execution flow, we
31113 	 * need a shared ena for a specific command.
31114 	 */
31115 	if (xp->xb_ena == 0)
31116 		xp->xb_ena = fm_ena_generate(0, FM_ENA_FMT1);
31117 	ssc->ssc_uscsi_info->ui_ena = xp->xb_ena;
31118 }
31119 
31120 
31121 /*
31122  *     Function: sd_check_bdc_vpd
31123  *
31124  * Description: Query the optional INQUIRY VPD page 0xb1. If the device
31125  *              supports VPD page 0xb1, sd examines the MEDIUM ROTATION
31126  *              RATE.
31127  *
31128  *		Set the following based on RPM value:
31129  *		= 0	device is not solid state, non-rotational
31130  *		= 1	device is solid state, non-rotational
31131  *		> 1	device is not solid state, rotational
31132  *
31133  *     Context: Kernel thread or interrupt context.
31134  */
31135 
31136 static void
31137 sd_check_bdc_vpd(sd_ssc_t *ssc)
31138 {
31139 	int		rval		= 0;
31140 	uchar_t		*inqb1		= NULL;
31141 	size_t		inqb1_len	= MAX_INQUIRY_SIZE;
31142 	size_t		inqb1_resid	= 0;
31143 	struct sd_lun	*un;
31144 
31145 	ASSERT(ssc != NULL);
31146 	un = ssc->ssc_un;
31147 	ASSERT(un != NULL);
31148 	ASSERT(!mutex_owned(SD_MUTEX(un)));
31149 
31150 	mutex_enter(SD_MUTEX(un));
31151 	un->un_f_is_rotational = TRUE;
31152 	un->un_f_is_solid_state = FALSE;
31153 
31154 	if (ISCD(un)) {
31155 		mutex_exit(SD_MUTEX(un));
31156 		return;
31157 	}
31158 
31159 	if (sd_check_vpd_page_support(ssc) == 0 &&
31160 	    un->un_vpd_page_mask & SD_VPD_DEV_CHARACTER_PG) {
31161 		mutex_exit(SD_MUTEX(un));
31162 		/* collect page b1 data */
31163 		inqb1 = kmem_zalloc(inqb1_len, KM_SLEEP);
31164 
31165 		rval = sd_send_scsi_INQUIRY(ssc, inqb1, inqb1_len,
31166 		    0x01, 0xB1, &inqb1_resid);
31167 
31168 		if (rval == 0 && (inqb1_len - inqb1_resid > 5)) {
31169 			SD_TRACE(SD_LOG_COMMON, un,
31170 			    "sd_check_bdc_vpd: \
31171 			    successfully get VPD page: %x \
31172 			    PAGE LENGTH: %x BYTE 4: %x \
31173 			    BYTE 5: %x", inqb1[1], inqb1[3], inqb1[4],
31174 			    inqb1[5]);
31175 
31176 			mutex_enter(SD_MUTEX(un));
31177 			/*
31178 			 * Check the MEDIUM ROTATION RATE.
31179 			 */
31180 			if (inqb1[4] == 0) {
31181 				if (inqb1[5] == 0) {
31182 					un->un_f_is_rotational = FALSE;
31183 				} else if (inqb1[5] == 1) {
31184 					un->un_f_is_rotational = FALSE;
31185 					un->un_f_is_solid_state = TRUE;
31186 					/*
31187 					 * Solid state drives don't need
31188 					 * disksort.
31189 					 */
31190 					un->un_f_disksort_disabled = TRUE;
31191 				}
31192 			}
31193 			mutex_exit(SD_MUTEX(un));
31194 		} else if (rval != 0) {
31195 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
31196 		}
31197 
31198 		kmem_free(inqb1, inqb1_len);
31199 	} else {
31200 		mutex_exit(SD_MUTEX(un));
31201 	}
31202 }
31203 
31204 /*
31205  *	Function: sd_check_emulation_mode
31206  *
31207  *   Description: Check whether the SSD is at emulation mode
31208  *		  by issuing READ_CAPACITY_16 to see whether
31209  *		  we can get physical block size of the drive.
31210  *
31211  *	 Context: Kernel thread or interrupt context.
31212  */
31213 
31214 static void
31215 sd_check_emulation_mode(sd_ssc_t *ssc)
31216 {
31217 	int		rval = 0;
31218 	uint64_t	capacity;
31219 	uint_t		lbasize;
31220 	uint_t		pbsize;
31221 	int		i;
31222 	int		devid_len;
31223 	struct sd_lun	*un;
31224 
31225 	ASSERT(ssc != NULL);
31226 	un = ssc->ssc_un;
31227 	ASSERT(un != NULL);
31228 	ASSERT(!mutex_owned(SD_MUTEX(un)));
31229 
31230 	mutex_enter(SD_MUTEX(un));
31231 	if (ISCD(un)) {
31232 		mutex_exit(SD_MUTEX(un));
31233 		return;
31234 	}
31235 
31236 	if (un->un_f_descr_format_supported) {
31237 		mutex_exit(SD_MUTEX(un));
31238 		rval = sd_send_scsi_READ_CAPACITY_16(ssc, &capacity, &lbasize,
31239 		    &pbsize, SD_PATH_DIRECT);
31240 		mutex_enter(SD_MUTEX(un));
31241 
31242 		if (rval != 0) {
31243 			un->un_phy_blocksize = DEV_BSIZE;
31244 		} else {
31245 			if (!ISP2(pbsize % DEV_BSIZE) || pbsize == 0) {
31246 				un->un_phy_blocksize = DEV_BSIZE;
31247 			} else if (pbsize > un->un_phy_blocksize) {
31248 				/*
31249 				 * Don't reset the physical blocksize
31250 				 * unless we've detected a larger value.
31251 				 */
31252 				un->un_phy_blocksize = pbsize;
31253 			}
31254 		}
31255 	}
31256 
31257 	for (i = 0; i < sd_flash_dev_table_size; i++) {
31258 		devid_len = (int)strlen(sd_flash_dev_table[i]);
31259 		if (sd_sdconf_id_match(un, sd_flash_dev_table[i], devid_len)
31260 		    == SD_SUCCESS) {
31261 			un->un_phy_blocksize = SSD_SECSIZE;
31262 			if (un->un_f_is_solid_state &&
31263 			    un->un_phy_blocksize != un->un_tgt_blocksize)
31264 				un->un_f_enable_rmw = TRUE;
31265 		}
31266 	}
31267 
31268 	mutex_exit(SD_MUTEX(un));
31269 }
31270