xref: /illumos-gate/usr/src/uts/common/io/scsi/impl/scsi_watch.c (revision bb0ade0978a02d3fe0b0165cd4725fdcb593fbfb)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * generic scsi device watch
30  */
31 
32 #if DEBUG || lint
33 #define	SWDEBUG
34 #endif
35 
36 /*
37  * debug goodies
38  */
39 #ifdef SWDEBUG
40 static int swdebug = 0;
41 #define	DEBUGGING	((scsi_options & SCSI_DEBUG_TGT) && sddebug > 1)
42 #define	SW_DEBUG	if (swdebug == 1) scsi_log
43 #define	SW_DEBUG2	if (swdebug > 1) scsi_log
44 #else	/* SWDEBUG */
45 #define	swdebug		(0)
46 #define	DEBUGGING	(0)
47 #define	SW_DEBUG	if (0) scsi_log
48 #define	SW_DEBUG2	if (0) scsi_log
49 #endif
50 
51 
52 
53 /*
54  * Includes, Declarations and Local Data
55  */
56 
57 #include <sys/note.h>
58 #include <sys/scsi/scsi.h>
59 #include <sys/var.h>
60 #include <sys/proc.h>
61 #include <sys/thread.h>
62 #include <sys/callb.h>
63 
64 /*
65  * macro for filling in lun value for scsi-1 support
66  */
67 #define	FILL_SCSI1_LUN(devp, pkt) \
68 	if ((devp->sd_address.a_lun > 0) && \
69 	    (devp->sd_inq->inq_ansi == 0x1)) { \
70 		((union scsi_cdb *)(pkt)->pkt_cdbp)->scc_lun = \
71 		    devp->sd_address.a_lun; \
72 	}
73 
74 char *sw_label = "scsi-watch";
75 
76 static int scsi_watch_io_time = SCSI_WATCH_IO_TIME;
77 
78 /*
79  * all info resides in the scsi watch structure
80  *
81  * the monitoring is performed by one separate thread which works
82  * from a linked list of scsi_watch_request packets
83  */
84 static struct scsi_watch {
85 	kthread_t		*sw_thread;	/* the watch thread	*/
86 	kmutex_t		sw_mutex;	/* mutex protecting list */
87 						/* and this structure */
88 	kcondvar_t		sw_cv;		/* cv for waking up thread */
89 	struct scsi_watch_request *sw_head;	/* head of linked list	*/
90 						/* of request structures */
91 	uchar_t			sw_state;	/* for suspend-resume */
92 	uchar_t			sw_flags;	/* to start at head of list */
93 						/* for watch thread */
94 	struct scsi_watch_request *swr_current; /* the command waiting to be */
95 						/* processed by the watch */
96 						/* thread which is being */
97 						/* blocked */
98 } sw;
99 
100 #if !defined(lint)
101 _NOTE(MUTEX_PROTECTS_DATA(scsi_watch::sw_mutex, scsi_watch))
102 #endif
103 
104 /*
105  * Values for sw_state
106  */
107 #define	SW_RUNNING		0
108 #define	SW_SUSPEND_REQUESTED	1
109 #define	SW_SUSPENDED		2
110 
111 /*
112  * values for sw_flags
113  */
114 #define	SW_START_HEAD		0x1
115 
116 struct scsi_watch_request {
117 	struct scsi_watch_request *swr_next;	/* linked request list	*/
118 	struct scsi_watch_request *swr_prev;
119 	clock_t			swr_interval;	/* interval between TURs */
120 	clock_t			swr_timeout;	/* count down		*/
121 	uchar_t			swr_busy;	/* TUR in progress	*/
122 	uchar_t			swr_what;	/* watch or stop	*/
123 	uchar_t			swr_sense_length; /* required sense length */
124 	struct scsi_pkt		*swr_pkt;	/* TUR pkt itself	*/
125 	struct scsi_pkt		*swr_rqpkt;	/* request sense pkt	*/
126 	struct buf		*swr_rqbp;	/* bp for request sense data */
127 	int			(*swr_callback)(); /* callback to driver */
128 	caddr_t			swr_callback_arg;
129 	kcondvar_t		swr_terminate_cv; /* cv to wait on to cleanup */
130 						/* request synchronously */
131 	int			swr_ref;	/*  refer count to the swr */
132 	uchar_t			suspend_destroy; /* flag for free later */
133 };
134 
135 /*
136  * values for swr flags
137  */
138 #define	SUSPEND_DESTROY		1
139 
140 #if !defined(lint)
141 _NOTE(SCHEME_PROTECTS_DATA("unshared data", scsi_watch_request))
142 #endif
143 
144 /*
145  * values for sw_what
146  */
147 #define	SWR_WATCH		0	/* device watch */
148 #define	SWR_STOP		1	/* stop monitoring and destroy swr */
149 #define	SWR_SUSPEND_REQUESTED	2	/* req. pending suspend */
150 #define	SWR_SUSPENDED		3	/* req. is suspended */
151 
152 static void scsi_watch_request_destroy(struct scsi_watch_request *swr);
153 static void scsi_watch_thread(void);
154 static void scsi_watch_request_intr(struct scsi_pkt *pkt);
155 
156 /*
157  * setup, called from _init(), the thread is created when we need it
158  * and exits when there is nothing to do anymore and everything has been
159  * cleaned up (ie. resources deallocated)
160  */
161 void
162 scsi_watch_init()
163 {
164 /* NO OTHER THREADS ARE RUNNING */
165 	mutex_init(&sw.sw_mutex, NULL, MUTEX_DRIVER, NULL);
166 	cv_init(&sw.sw_cv, NULL, CV_DRIVER, NULL);
167 	sw.sw_state = SW_RUNNING;
168 	sw.sw_flags = 0;
169 	sw.swr_current = NULL;
170 }
171 
172 /*
173  * cleaning up, called from _fini()
174  */
175 void
176 scsi_watch_fini()
177 {
178 /* NO OTHER THREADS ARE RUNNING */
179 	/*
180 	 * hope and pray that the thread has exited
181 	 */
182 	ASSERT(sw.sw_thread == 0);
183 	mutex_destroy(&sw.sw_mutex);
184 	cv_destroy(&sw.sw_cv);
185 }
186 
187 /*
188  * allocate an swr (scsi watch request structure) and initialize pkts
189  */
190 #define	ROUTE		&devp->sd_address
191 
192 opaque_t
193 scsi_watch_request_submit(
194 	struct scsi_device	*devp,
195 	int			interval,
196 	int			sense_length,
197 	int			(*callback)(),	/* callback function */
198 	caddr_t			cb_arg)		/* device number */
199 {
200 	register struct scsi_watch_request	*swr = NULL;
201 	register struct scsi_watch_request	*sswr, *p;
202 	struct buf				*bp = NULL;
203 	struct scsi_pkt				*rqpkt = NULL;
204 	struct scsi_pkt				*pkt = NULL;
205 	uchar_t					dtype;
206 
207 	SW_DEBUG((dev_info_t *)NULL, sw_label, SCSI_DEBUG,
208 	    "scsi_watch_request_submit: Entering ...\n");
209 
210 	mutex_enter(&sw.sw_mutex);
211 	if (sw.sw_thread == 0) {
212 		register kthread_t	*t;
213 
214 		t = thread_create((caddr_t)NULL, 0, scsi_watch_thread,
215 		    NULL, 0, &p0, TS_RUN, v.v_maxsyspri - 2);
216 		sw.sw_thread = t;
217 	}
218 
219 	for (p = sw.sw_head; p != NULL; p = p->swr_next) {
220 		if ((p->swr_callback_arg == cb_arg) &&
221 		    (p->swr_callback == callback))
222 			break;
223 	}
224 
225 	/* update time interval for an existing request */
226 	if (p) {
227 		if (p->swr_what != SWR_STOP) {
228 			p->swr_timeout = p->swr_interval
229 			    = drv_usectohz(interval);
230 			p->swr_what = SWR_WATCH;
231 			p->swr_ref++;
232 			cv_signal(&sw.sw_cv);
233 			mutex_exit(&sw.sw_mutex);
234 			return ((opaque_t)p);
235 		}
236 	}
237 	mutex_exit(&sw.sw_mutex);
238 
239 	/*
240 	 * allocate space for scsi_watch_request
241 	 */
242 	swr = kmem_zalloc(sizeof (struct scsi_watch_request), KM_SLEEP);
243 
244 	/*
245 	 * allocate request sense bp and pkt and make cmd
246 	 * we shouldn't really need it if ARQ is enabled but it is useful
247 	 * if the ARQ failed.
248 	 */
249 	bp = scsi_alloc_consistent_buf(ROUTE, NULL,
250 	    sense_length, B_READ, SLEEP_FUNC, NULL);
251 
252 	rqpkt = scsi_init_pkt(ROUTE, (struct scsi_pkt *)NULL,
253 	    bp, CDB_GROUP0, 1, 0, PKT_CONSISTENT, SLEEP_FUNC, NULL);
254 
255 	(void) scsi_setup_cdb((union scsi_cdb *)rqpkt->pkt_cdbp,
256 	    SCMD_REQUEST_SENSE, 0, SENSE_LENGTH, 0);
257 	FILL_SCSI1_LUN(devp, rqpkt);
258 	rqpkt->pkt_private = (opaque_t)swr;
259 	rqpkt->pkt_time = scsi_watch_io_time;
260 	rqpkt->pkt_comp = scsi_watch_request_intr;
261 	rqpkt->pkt_flags |= FLAG_HEAD;
262 
263 	/*
264 	 * Create TUR pkt or a zero byte WRITE(10) based on the
265 	 * disk-type for reservation state.
266 	 * For inq_dtype of SBC (DIRECT, dtype == 0)
267 	 * OR for RBC devices (dtype is 0xE) AND for
268 	 * ANSI version of SPC/SPC-2/SPC-3 (inq_ansi == 3-5).
269 	 */
270 
271 	dtype = devp->sd_inq->inq_dtype & DTYPE_MASK;
272 	if (((dtype == 0) || (dtype == 0xE)) &&
273 	    (devp->sd_inq->inq_ansi > 2)) {
274 		pkt = scsi_init_pkt(ROUTE, (struct scsi_pkt *)NULL, NULL,
275 		    CDB_GROUP1, sizeof (struct scsi_arq_status),
276 		    0, 0, SLEEP_FUNC, NULL);
277 
278 		(void) scsi_setup_cdb((union scsi_cdb *)pkt->pkt_cdbp,
279 		    SCMD_WRITE_G1, 0, 0, 0);
280 	} else {
281 		pkt = scsi_init_pkt(ROUTE, (struct scsi_pkt *)NULL, NULL,
282 		    CDB_GROUP0, sizeof (struct scsi_arq_status),
283 		    0, 0, SLEEP_FUNC, NULL);
284 
285 		(void) scsi_setup_cdb((union scsi_cdb *)pkt->pkt_cdbp,
286 		    SCMD_TEST_UNIT_READY, 0, 0, 0);
287 		FILL_SCSI1_LUN(devp, pkt);
288 	}
289 
290 	pkt->pkt_private = (opaque_t)swr;
291 	pkt->pkt_time = scsi_watch_io_time;
292 	pkt->pkt_comp = scsi_watch_request_intr;
293 	if (scsi_ifgetcap(&pkt->pkt_address, "tagged-qing", 1) == 1) {
294 		pkt->pkt_flags |= FLAG_STAG;
295 	}
296 
297 	/*
298 	 * set the allocated resources in swr
299 	 */
300 	swr->swr_rqbp = bp;
301 	swr->swr_rqpkt = rqpkt;
302 	swr->swr_pkt = pkt;
303 	swr->swr_timeout = swr->swr_interval = drv_usectohz(interval);
304 	swr->swr_callback = callback;
305 	swr->swr_callback_arg = cb_arg;
306 	swr->swr_what = SWR_WATCH;
307 	swr->swr_sense_length = (uchar_t)sense_length;
308 	swr->swr_ref = 1;
309 	cv_init(&swr->swr_terminate_cv, NULL, CV_DRIVER, NULL);
310 
311 	/*
312 	 * add to the list and wake up the thread
313 	 */
314 	mutex_enter(&sw.sw_mutex);
315 	swr->swr_next = sw.sw_head;
316 	swr->swr_prev = NULL;
317 	if (sw.sw_head) {
318 		sw.sw_head->swr_prev = swr;
319 	}
320 	sw.sw_head = swr;
321 
322 	/*
323 	 * reset all timeouts, so all requests are in sync again
324 	 * XXX there is a small window where the watch thread releases
325 	 * the mutex so that could upset the resyncing
326 	 */
327 	sswr = swr;
328 	while (sswr) {
329 		sswr->swr_timeout = swr->swr_interval;
330 		sswr = sswr->swr_next;
331 	}
332 	cv_signal(&sw.sw_cv);
333 	mutex_exit(&sw.sw_mutex);
334 	return ((opaque_t)swr);
335 }
336 
337 
338 /*
339  * called by (eg. pwr management) to resume the scsi_watch_thread
340  */
341 void
342 scsi_watch_resume(opaque_t token)
343 {
344 	struct scsi_watch_request *swr = (struct scsi_watch_request *)NULL;
345 	/*
346 	 * Change the state to SW_RUNNING and wake up the scsi_watch_thread
347 	 */
348 	SW_DEBUG(0, sw_label, SCSI_DEBUG, "scsi_watch_resume:\n");
349 	mutex_enter(&sw.sw_mutex);
350 
351 	if (!sw.sw_head)
352 		goto exit;
353 
354 	/* search for token */
355 	for (swr = sw.sw_head; swr; swr = swr->swr_next) {
356 		if (swr == (struct scsi_watch_request *)token)
357 			break;
358 	}
359 
360 	/* if we can't find this value, then we just do nothing */
361 	if (swr == (struct scsi_watch_request *)NULL)
362 		goto exit;
363 
364 	swr->swr_what = SWR_WATCH;
365 
366 
367 	/* see if all swr's are awake, then start the thread again */
368 	for (swr = sw.sw_head; swr; swr = swr->swr_next) {
369 		if (swr->swr_what != SWR_WATCH)
370 			goto exit;
371 	}
372 
373 	sw.sw_state = SW_RUNNING;
374 	cv_signal(&sw.sw_cv);
375 
376 exit:
377 	mutex_exit(&sw.sw_mutex);
378 }
379 
380 
381 /*
382  * called by clients (eg. pwr management) to suspend the scsi_watch_thread
383  */
384 void
385 scsi_watch_suspend(opaque_t token)
386 {
387 	struct scsi_watch_request *swr = (struct scsi_watch_request *)NULL;
388 	clock_t	now;
389 	clock_t halfsec_delay = drv_usectohz(500000);
390 
391 	SW_DEBUG(0, sw_label, SCSI_DEBUG, "scsi_watch_suspend:\n");
392 
393 	mutex_enter(&sw.sw_mutex);
394 
395 	if (!sw.sw_head)
396 		goto exit;
397 
398 	/* search for token */
399 	for (swr = sw.sw_head; swr; swr = swr->swr_next) {
400 		if (swr == (struct scsi_watch_request *)token)
401 			break;
402 	}
403 
404 	/* if we can't find this value, then we just do nothing */
405 	if (swr == (struct scsi_watch_request *)NULL)
406 		goto exit;
407 
408 
409 	for (;;) {
410 		if (swr->swr_busy) {
411 			/*
412 			 * XXX: Assumes that this thread can rerun
413 			 * till all outstanding cmds are complete
414 			 */
415 			swr->swr_what = SWR_SUSPEND_REQUESTED;
416 			now = ddi_get_lbolt();
417 			(void) cv_timedwait(&sw.sw_cv, &sw.sw_mutex,
418 			    now + halfsec_delay);
419 		} else {
420 			swr->swr_what = SWR_SUSPENDED;
421 			break;
422 		}
423 	}
424 
425 	/* see if all swr's are suspended, then suspend the thread */
426 	for (swr = sw.sw_head; swr; swr = swr->swr_next) {
427 		if (swr->swr_what != SWR_SUSPENDED)
428 			goto exit;
429 	}
430 
431 	sw.sw_state = SW_SUSPENDED;
432 
433 exit:
434 	mutex_exit(&sw.sw_mutex);
435 }
436 
437 /*
438  * destroy swr, called for watch thread
439  */
440 static void
441 scsi_watch_request_destroy(struct scsi_watch_request *swr)
442 {
443 	ASSERT(MUTEX_HELD(&sw.sw_mutex));
444 	ASSERT(swr->swr_busy == 0);
445 
446 	SW_DEBUG((dev_info_t *)NULL, sw_label, SCSI_DEBUG,
447 	    "scsi_watch_request_destroy: Entering ...\n");
448 	if (swr->swr_ref != 0)
449 		return;
450 
451 	/*
452 	 * remove swr from linked list and destroy pkts
453 	 */
454 	if (swr->swr_prev) {
455 		swr->swr_prev->swr_next = swr->swr_next;
456 	}
457 	if (swr->swr_next) {
458 		swr->swr_next->swr_prev = swr->swr_prev;
459 	}
460 	if (sw.sw_head == swr) {
461 		sw.sw_head = swr->swr_next;
462 	}
463 	if (sw.swr_current == swr) {
464 		swr->suspend_destroy = SUSPEND_DESTROY;
465 		sw.swr_current = NULL;
466 	}
467 
468 	scsi_destroy_pkt(swr->swr_rqpkt);
469 	scsi_free_consistent_buf(swr->swr_rqbp);
470 	scsi_destroy_pkt(swr->swr_pkt);
471 	cv_signal(&swr->swr_terminate_cv);
472 }
473 
474 /*
475  * scsi_watch_request_terminate()
476  * called by requestor to terminate any pending watch request.
477  * if the request is currently "busy", and the caller cannot wait, failure
478  * is returned. O/w the request is cleaned up immediately.
479  */
480 int
481 scsi_watch_request_terminate(opaque_t token, int flags)
482 {
483 	struct scsi_watch_request *swr =
484 	    (struct scsi_watch_request *)token;
485 	struct scsi_watch_request *sswr;
486 
487 	int count = 0;
488 	int free_flag = 0;
489 
490 	/*
491 	 * We try to clean up this request if we can. We also inform
492 	 * the watch thread that we mucked around the list so it has
493 	 * to start reading from head of list again.
494 	 */
495 	SW_DEBUG((dev_info_t *)NULL, sw_label, SCSI_DEBUG,
496 	    "scsi_watch_request_terminate: Entering(0x%p) ...\n",
497 	    (void *)swr);
498 	mutex_enter(&sw.sw_mutex);
499 
500 	/*
501 	 * check if it is still in the list
502 	 */
503 	sswr = sw.sw_head;
504 	while (sswr) {
505 		if (sswr == swr) {
506 			swr->swr_ref--;
507 			count = swr->swr_ref;
508 
509 			if (swr->swr_busy) {
510 				if (flags == SCSI_WATCH_TERMINATE_NOWAIT) {
511 					mutex_exit(&sw.sw_mutex);
512 					return (SCSI_WATCH_TERMINATE_FAIL);
513 				}
514 				if (count != 0 && flags !=
515 				    SCSI_WATCH_TERMINATE_ALL_WAIT) {
516 					mutex_exit(&sw.sw_mutex);
517 					return (SCSI_WATCH_TERMINATE_SUCCESS);
518 				}
519 				if (SCSI_WATCH_TERMINATE_ALL_WAIT == flags) {
520 					swr->swr_ref = 0;
521 					count = 0;
522 				}
523 				swr->swr_what = SWR_STOP;
524 				cv_wait(&swr->swr_terminate_cv, &sw.sw_mutex);
525 				free_flag = 1;
526 				goto done;
527 			} else {
528 				if (SCSI_WATCH_TERMINATE_NOWAIT == flags ||
529 				    SCSI_WATCH_TERMINATE_ALL_WAIT == flags) {
530 					swr->swr_ref = 0;
531 					count = 0;
532 				}
533 				scsi_watch_request_destroy(swr);
534 				if (0 == count) {
535 					sw.sw_flags |= SW_START_HEAD;
536 					free_flag = 1;
537 				}
538 				goto done;
539 			}
540 		}
541 		sswr = sswr->swr_next;
542 	}
543 done:
544 	mutex_exit(&sw.sw_mutex);
545 	if (!sswr) {
546 		return (SCSI_WATCH_TERMINATE_FAIL);
547 	}
548 	if (1 == free_flag &&
549 	    sswr->suspend_destroy != SUSPEND_DESTROY) {
550 		cv_destroy(&swr->swr_terminate_cv);
551 		kmem_free((caddr_t)swr, sizeof (struct scsi_watch_request));
552 	}
553 
554 	return (SCSI_WATCH_TERMINATE_SUCCESS);
555 }
556 
557 
558 /*
559  * The routines scsi_watch_thread & scsi_watch_request_intr are
560  * on different threads.
561  * If there is no work to be done by the lower level driver
562  * then swr->swr_busy will not be set.
563  * In this case we will call CALLB_CPR_SAFE_BEGIN before
564  * calling cv_timedwait.
565  * In the other case where there is work to be done by
566  * the lower level driver then the flag swr->swr_busy will
567  * be set.
568  * We cannot call CALLB_CPR_SAFE_BEGIN at this point the reason
569  * is the intr thread can interfere with our operations. So
570  * we do a cv_timedwait here. Now at the completion of the
571  * lower level driver's work we will call CALLB_CPR_SAFE_BEGIN
572  * in scsi_watch_request_intr.
573  * In all the cases we will call CALLB_CPR_SAFE_END only if
574  * we already called a CALLB_CPR_SAFE_BEGIN and this is flagged
575  * by sw_cpr_flag.
576  * Warlock has a problem when we use different locks
577  * on the same type of structure in different contexts.
578  * We use callb_cpr_t in both scsi_watch and esp_callback threads.
579  * we use different mutexe's in different threads. And
580  * this is not acceptable to warlock. To avoid this
581  * problem we use the same name for the mutex in
582  * both scsi_watch & esp_callback. when __lock_lint is not defined
583  * esp_callback uses the mutex on the stack and in scsi_watch
584  * a static variable. But when __lock_lint is defined
585  * we make a mutex which is global in esp_callback and
586  * a external mutex for scsi_watch.
587  */
588 static int sw_cmd_count = 0;
589 static int sw_cpr_flag = 0;
590 static callb_cpr_t cpr_info;
591 #ifndef __lock_lint
592 static kmutex_t cpr_mutex;
593 #else
594 extern kmutex_t cpr_mutex;
595 #endif
596 
597 #if !defined(lint)
598 _NOTE(MUTEX_PROTECTS_DATA(cpr_mutex, cpr_info))
599 _NOTE(MUTEX_PROTECTS_DATA(cpr_mutex, sw_cmd_count))
600 #endif
601 /*
602  * the scsi watch thread:
603  * it either wakes up if there is work to do or if the cv_timeait
604  * timed out
605  * normally, it wakes up every <delay> seconds and checks the list.
606  * the interval is not very accurate if the cv was signalled but that
607  * really doesn't matter much
608  * it is more important that we fire off all TURs simulataneously so
609  * we don't have to wake up frequently
610  */
611 static void
612 scsi_watch_thread()
613 {
614 	struct scsi_watch_request	*swr, *next;
615 	clock_t				now;
616 	clock_t				last_delay = 0;
617 	clock_t				next_delay = 0;
618 	clock_t				onesec = drv_usectohz(1000000);
619 	clock_t				exit_delay = 60 * onesec;
620 
621 	SW_DEBUG((dev_info_t *)NULL, sw_label, SCSI_DEBUG,
622 	    "scsi_watch_thread: Entering ...\n");
623 
624 #if !defined(lint)
625 	_NOTE(NO_COMPETING_THREADS_NOW);
626 #endif
627 	mutex_init(&cpr_mutex, NULL, MUTEX_DRIVER, NULL);
628 	CALLB_CPR_INIT(&cpr_info,
629 	    &cpr_mutex, callb_generic_cpr, "scsi_watch");
630 	sw_cpr_flag = 0;
631 #if !defined(lint)
632 	/*LINTED*/
633 	_NOTE(COMPETING_THREADS_NOW);
634 #endif
635 	/*
636 	 * grab the mutex and wait for work
637 	 */
638 	mutex_enter(&sw.sw_mutex);
639 	if (sw.sw_head == NULL) {
640 		cv_wait(&sw.sw_cv, &sw.sw_mutex);
641 	}
642 
643 	/*
644 	 * now loop forever for work; if queue is empty exit
645 	 */
646 	for (;;) {
647 head:
648 		swr = sw.sw_head;
649 		while (swr) {
650 
651 			/*
652 			 * If state is not running, wait for scsi_watch_resume
653 			 * to signal restart, but before going into cv_wait
654 			 * need to let the PM framework know that it is safe
655 			 * to stop this thread for CPR
656 			 */
657 			if (sw.sw_state != SW_RUNNING) {
658 				SW_DEBUG(0, sw_label, SCSI_DEBUG,
659 				    "scsi_watch_thread suspended\n");
660 				mutex_enter(&cpr_mutex);
661 				if (!sw_cmd_count) {
662 					CALLB_CPR_SAFE_BEGIN(&cpr_info);
663 					sw_cpr_flag = 1;
664 				}
665 				mutex_exit(&cpr_mutex);
666 				sw.swr_current = swr;
667 				cv_wait(&sw.sw_cv, &sw.sw_mutex);
668 
669 
670 				/*
671 				 * Need to let the PM framework know that it
672 				 * is no longer safe to stop the thread for
673 				 * CPR.
674 				 */
675 				mutex_exit(&sw.sw_mutex);
676 				mutex_enter(&cpr_mutex);
677 				if (sw_cpr_flag == 1) {
678 					CALLB_CPR_SAFE_END(
679 					    &cpr_info, &cpr_mutex);
680 					sw_cpr_flag = 0;
681 				}
682 				mutex_exit(&cpr_mutex);
683 				mutex_enter(&sw.sw_mutex);
684 				if (SUSPEND_DESTROY == swr->suspend_destroy) {
685 					cv_destroy(&swr->swr_terminate_cv);
686 					kmem_free((caddr_t)swr,
687 					    sizeof (struct scsi_watch_request));
688 					goto head;
689 				} else {
690 					sw.swr_current = NULL;
691 				}
692 			}
693 			if (next_delay == 0) {
694 				next_delay = swr->swr_timeout;
695 			} else {
696 				next_delay = min(swr->swr_timeout, next_delay);
697 			}
698 
699 			swr->swr_timeout -= last_delay;
700 			next = swr->swr_next;
701 
702 			SW_DEBUG((dev_info_t *)NULL, sw_label, SCSI_DEBUG,
703 			    "scsi_watch_thread: "
704 			    "swr(0x%p),what=%x,timeout=%lx,"
705 			    "interval=%lx,delay=%lx\n",
706 			    (void *)swr, swr->swr_what, swr->swr_timeout,
707 			    swr->swr_interval, last_delay);
708 
709 			switch (swr->swr_what) {
710 			case SWR_SUSPENDED:
711 			case SWR_SUSPEND_REQUESTED:
712 				/* if we are suspended, don't do anything */
713 				break;
714 
715 			case SWR_STOP:
716 				if (swr->swr_busy == 0) {
717 					scsi_watch_request_destroy(swr);
718 				}
719 				break;
720 
721 			default:
722 				if (swr->swr_timeout <= 0 && !swr->swr_busy) {
723 					swr->swr_busy = 1;
724 
725 					/*
726 					 * submit the cmd and let the completion
727 					 * function handle the result
728 					 * release the mutex (good practice)
729 					 * this should be safe even if the list
730 					 * is changing
731 					 */
732 					mutex_exit(&sw.sw_mutex);
733 					mutex_enter(&cpr_mutex);
734 					sw_cmd_count++;
735 					mutex_exit(&cpr_mutex);
736 					SW_DEBUG((dev_info_t *)NULL,
737 					    sw_label, SCSI_DEBUG,
738 					    "scsi_watch_thread: "
739 					    "Starting TUR\n");
740 					if (scsi_transport(swr->swr_pkt) !=
741 					    TRAN_ACCEPT) {
742 
743 						/*
744 						 * try again later
745 						 */
746 						swr->swr_busy = 0;
747 						SW_DEBUG((dev_info_t *)NULL,
748 						    sw_label, SCSI_DEBUG,
749 						    "scsi_watch_thread: "
750 						    "Transport Failed\n");
751 						mutex_enter(&cpr_mutex);
752 						sw_cmd_count--;
753 						mutex_exit(&cpr_mutex);
754 					}
755 					mutex_enter(&sw.sw_mutex);
756 					swr->swr_timeout = swr->swr_interval;
757 				}
758 				break;
759 			}
760 			swr = next;
761 			if (sw.sw_flags & SW_START_HEAD) {
762 				sw.sw_flags &= ~SW_START_HEAD;
763 				goto head;
764 			}
765 		}
766 
767 		/*
768 		 * delay using cv_timedwait; we return when
769 		 * signalled or timed out
770 		 */
771 		if (sw.sw_head != NULL) {
772 			if (next_delay <= 0) {
773 				next_delay = onesec;
774 			}
775 		} else {
776 			next_delay = exit_delay;
777 		}
778 		now = ddi_get_lbolt();
779 
780 		mutex_enter(&cpr_mutex);
781 		if (!sw_cmd_count) {
782 			CALLB_CPR_SAFE_BEGIN(&cpr_info);
783 			sw_cpr_flag = 1;
784 		}
785 		mutex_exit(&cpr_mutex);
786 		/*
787 		 * if we return from cv_timedwait because we were
788 		 * signalled, the delay is not accurate but that doesn't
789 		 * really matter
790 		 */
791 		(void) cv_timedwait(&sw.sw_cv, &sw.sw_mutex, now + next_delay);
792 		mutex_exit(&sw.sw_mutex);
793 		mutex_enter(&cpr_mutex);
794 		if (sw_cpr_flag == 1) {
795 			CALLB_CPR_SAFE_END(&cpr_info, &cpr_mutex);
796 			sw_cpr_flag = 0;
797 		}
798 		mutex_exit(&cpr_mutex);
799 		mutex_enter(&sw.sw_mutex);
800 		last_delay = next_delay;
801 		next_delay = 0;
802 
803 		/*
804 		 * is there still work to do?
805 		 */
806 		if (sw.sw_head == NULL) {
807 			break;
808 		}
809 	}
810 
811 	/*
812 	 * no more work to do, reset sw_thread and exit
813 	 */
814 	sw.sw_thread = 0;
815 	mutex_exit(&sw.sw_mutex);
816 #ifndef __lock_lint
817 	mutex_enter(&cpr_mutex);
818 	CALLB_CPR_EXIT(&cpr_info);
819 #endif
820 	mutex_destroy(&cpr_mutex);
821 	SW_DEBUG((dev_info_t *)NULL, sw_label, SCSI_DEBUG,
822 	    "scsi_watch_thread: Exiting ...\n");
823 }
824 
825 /*
826  * callback completion function for scsi watch pkt
827  */
828 #define	SCBP(pkt)	((struct scsi_status *)(pkt)->pkt_scbp)
829 #define	SCBP_C(pkt)	((*(pkt)->pkt_scbp) & STATUS_MASK)
830 
831 static void
832 scsi_watch_request_intr(struct scsi_pkt *pkt)
833 {
834 	struct scsi_watch_result	result;
835 	struct scsi_watch_request	*swr =
836 	    (struct scsi_watch_request *)pkt->pkt_private;
837 	struct scsi_status		*rqstatusp;
838 	struct scsi_extended_sense	*rqsensep = NULL;
839 	int				amt = 0;
840 
841 	SW_DEBUG((dev_info_t *)NULL, sw_label, SCSI_DEBUG,
842 	    "scsi_watch_intr: Entering ...\n");
843 
844 	/*
845 	 * first check if it is the TUR or RQS pkt
846 	 */
847 	if (pkt == swr->swr_pkt) {
848 		if (SCBP_C(pkt) != STATUS_GOOD &&
849 		    SCBP_C(pkt) != STATUS_RESERVATION_CONFLICT) {
850 			if (SCBP(pkt)->sts_chk &&
851 			    ((pkt->pkt_state & STATE_ARQ_DONE) == 0)) {
852 
853 				/*
854 				 * submit the request sense pkt
855 				 */
856 				SW_DEBUG((dev_info_t *)NULL,
857 				    sw_label, SCSI_DEBUG,
858 				    "scsi_watch_intr: "
859 				    "Submitting a Request Sense "
860 				    "Packet\n");
861 				if (scsi_transport(swr->swr_rqpkt) !=
862 				    TRAN_ACCEPT) {
863 
864 					/*
865 					 * just give up and try again later
866 					 */
867 					SW_DEBUG((dev_info_t *)NULL,
868 					    sw_label, SCSI_DEBUG,
869 					    "scsi_watch_intr: "
870 					    "Request Sense "
871 					    "Transport Failed\n");
872 					goto done;
873 				}
874 
875 				/*
876 				 * wait for rqsense to complete
877 				 */
878 				return;
879 
880 			} else	if (SCBP(pkt)->sts_chk) {
881 
882 				/*
883 				 * check the autorequest sense data
884 				 */
885 				struct scsi_arq_status	*arqstat =
886 				    (struct scsi_arq_status *)pkt->pkt_scbp;
887 
888 				rqstatusp = &arqstat->sts_rqpkt_status;
889 				rqsensep = &arqstat->sts_sensedata;
890 				amt = swr->swr_sense_length -
891 				    arqstat->sts_rqpkt_resid;
892 				SW_DEBUG((dev_info_t *)NULL,
893 				    sw_label, SCSI_DEBUG,
894 				    "scsi_watch_intr: "
895 				    "Auto Request Sense, amt=%x\n", amt);
896 			}
897 		}
898 
899 	} else if (pkt == swr->swr_rqpkt) {
900 
901 		/*
902 		 * check the request sense data
903 		 */
904 		rqstatusp = (struct scsi_status *)pkt->pkt_scbp;
905 		rqsensep = (struct scsi_extended_sense *)
906 		    swr->swr_rqbp->b_un.b_addr;
907 		amt = swr->swr_sense_length - pkt->pkt_resid;
908 		SW_DEBUG((dev_info_t *)NULL, sw_label, SCSI_DEBUG,
909 		    "scsi_watch_intr: "
910 		    "Request Sense Completed, amt=%x\n", amt);
911 	} else {
912 
913 		/*
914 		 * should not reach here!!!
915 		 */
916 		scsi_log((dev_info_t *)NULL, sw_label, CE_PANIC,
917 		    "scsi_watch_intr: Bad Packet(0x%p)", (void *)pkt);
918 	}
919 
920 	if (rqsensep) {
921 
922 		/*
923 		 * check rqsense status and data
924 		 */
925 		if (rqstatusp->sts_busy || rqstatusp->sts_chk) {
926 
927 			/*
928 			 * try again later
929 			 */
930 			SW_DEBUG((dev_info_t *)NULL, sw_label, SCSI_DEBUG,
931 			    "scsi_watch_intr: "
932 			    "Auto Request Sense Failed - "
933 			    "Busy or Check Condition\n");
934 			goto done;
935 		}
936 
937 		SW_DEBUG((dev_info_t *)NULL, sw_label, SCSI_DEBUG,
938 		    "scsi_watch_intr: "
939 		    "es_key=%x, adq=%x, amt=%x\n",
940 		    rqsensep->es_key, rqsensep->es_add_code, amt);
941 	}
942 
943 	/*
944 	 * callback to target driver to do the real work
945 	 */
946 	result.statusp = SCBP(swr->swr_pkt);
947 	result.sensep = rqsensep;
948 	result.actual_sense_length = (uchar_t)amt;
949 	result.pkt = swr->swr_pkt;
950 
951 	if ((*swr->swr_callback)(swr->swr_callback_arg, &result)) {
952 		swr->swr_what = SWR_STOP;
953 	}
954 
955 done:
956 	swr->swr_busy = 0;
957 	mutex_enter(&cpr_mutex);
958 	sw_cmd_count --;
959 	if (!sw_cmd_count) {
960 		CALLB_CPR_SAFE_BEGIN(&cpr_info);
961 		sw_cpr_flag = 1;
962 	}
963 	mutex_exit(&cpr_mutex);
964 }
965