xref: /illumos-gate/usr/src/uts/common/io/scsi/impl/scsi_watch.c (revision b793cf1f804f52789df526036d96d1be7d3efc9d)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * generic scsi device watch
28  */
29 
30 #if DEBUG || lint
31 #define	SWDEBUG
32 #endif
33 
34 /*
35  * debug goodies
36  */
37 #ifdef SWDEBUG
38 static int swdebug = 0;
39 #define	DEBUGGING	((scsi_options & SCSI_DEBUG_TGT) && sddebug > 1)
40 #define	SW_DEBUG	if (swdebug == 1) scsi_log
41 #define	SW_DEBUG2	if (swdebug > 1) scsi_log
42 #else	/* SWDEBUG */
43 #define	swdebug		(0)
44 #define	DEBUGGING	(0)
45 #define	SW_DEBUG	if (0) scsi_log
46 #define	SW_DEBUG2	if (0) scsi_log
47 #endif
48 
49 
50 
51 /*
52  * Includes, Declarations and Local Data
53  */
54 
55 #include <sys/note.h>
56 #include <sys/scsi/scsi.h>
57 #include <sys/var.h>
58 #include <sys/proc.h>
59 #include <sys/thread.h>
60 #include <sys/callb.h>
61 
62 /*
63  * macro for filling in lun value for scsi-1 support
64  */
65 #define	FILL_SCSI1_LUN(devp, pkt) \
66 	if ((devp->sd_address.a_lun > 0) && \
67 	    (devp->sd_inq->inq_ansi == 0x1)) { \
68 		((union scsi_cdb *)(pkt)->pkt_cdbp)->scc_lun = \
69 		    devp->sd_address.a_lun; \
70 	}
71 
72 char *sw_label = "scsi-watch";
73 
74 static int scsi_watch_io_time = SCSI_WATCH_IO_TIME;
75 
76 /*
77  * all info resides in the scsi watch structure
78  *
79  * the monitoring is performed by one separate thread which works
80  * from a linked list of scsi_watch_request packets
81  */
82 static struct scsi_watch {
83 	kthread_t		*sw_thread;	/* the watch thread	*/
84 	kmutex_t		sw_mutex;	/* mutex protecting list */
85 						/* and this structure */
86 	kcondvar_t		sw_cv;		/* cv for waking up thread */
87 	struct scsi_watch_request *sw_head;	/* head of linked list	*/
88 						/* of request structures */
89 	uchar_t			sw_state;	/* for suspend-resume */
90 	uchar_t			sw_flags;	/* to start at head of list */
91 						/* for watch thread */
92 	struct scsi_watch_request *swr_current; /* the command waiting to be */
93 						/* processed by the watch */
94 						/* thread which is being */
95 						/* blocked */
96 } sw;
97 
98 #if !defined(lint)
99 _NOTE(MUTEX_PROTECTS_DATA(scsi_watch::sw_mutex, scsi_watch))
100 #endif
101 
102 /*
103  * Values for sw_state
104  */
105 #define	SW_RUNNING		0
106 #define	SW_SUSPEND_REQUESTED	1
107 #define	SW_SUSPENDED		2
108 
109 /*
110  * values for sw_flags
111  */
112 #define	SW_START_HEAD		0x1
113 
114 struct scsi_watch_request {
115 	struct scsi_watch_request *swr_next;	/* linked request list	*/
116 	struct scsi_watch_request *swr_prev;
117 	clock_t			swr_interval;	/* interval between TURs */
118 	clock_t			swr_timeout;	/* count down		*/
119 	uchar_t			swr_busy;	/* TUR in progress	*/
120 	uchar_t			swr_what;	/* watch or stop	*/
121 	uchar_t			swr_sense_length; /* required sense length */
122 	struct scsi_pkt		*swr_pkt;	/* TUR pkt itself	*/
123 	struct scsi_pkt		*swr_rqpkt;	/* request sense pkt	*/
124 	struct buf		*swr_rqbp;	/* bp for request sense data */
125 	int			(*swr_callback)(); /* callback to driver */
126 	caddr_t			swr_callback_arg;
127 	kcondvar_t		swr_terminate_cv; /* cv to wait on to cleanup */
128 						/* request synchronously */
129 	int			swr_ref;	/*  refer count to the swr */
130 	uchar_t			suspend_destroy; /* flag for free later */
131 };
132 
133 /*
134  * values for swr flags
135  */
136 #define	SUSPEND_DESTROY		1
137 
138 #if !defined(lint)
139 _NOTE(SCHEME_PROTECTS_DATA("unshared data", scsi_watch_request))
140 #endif
141 
142 /*
143  * values for sw_what
144  */
145 #define	SWR_WATCH		0	/* device watch */
146 #define	SWR_STOP		1	/* stop monitoring and destroy swr */
147 #define	SWR_SUSPEND_REQUESTED	2	/* req. pending suspend */
148 #define	SWR_SUSPENDED		3	/* req. is suspended */
149 
150 static void scsi_watch_request_destroy(struct scsi_watch_request *swr);
151 static void scsi_watch_thread(void);
152 static void scsi_watch_request_intr(struct scsi_pkt *pkt);
153 
154 /*
155  * setup, called from _init(), the thread is created when we need it
156  * and exits when there is nothing to do anymore and everything has been
157  * cleaned up (ie. resources deallocated)
158  */
159 void
160 scsi_watch_init()
161 {
162 /* NO OTHER THREADS ARE RUNNING */
163 	mutex_init(&sw.sw_mutex, NULL, MUTEX_DRIVER, NULL);
164 	cv_init(&sw.sw_cv, NULL, CV_DRIVER, NULL);
165 	sw.sw_state = SW_RUNNING;
166 	sw.sw_flags = 0;
167 	sw.swr_current = NULL;
168 }
169 
170 /*
171  * cleaning up, called from _fini()
172  */
173 void
174 scsi_watch_fini()
175 {
176 /* NO OTHER THREADS ARE RUNNING */
177 	/*
178 	 * hope and pray that the thread has exited
179 	 */
180 	ASSERT(sw.sw_thread == 0);
181 	mutex_destroy(&sw.sw_mutex);
182 	cv_destroy(&sw.sw_cv);
183 }
184 
185 /*
186  * allocate an swr (scsi watch request structure) and initialize pkts
187  */
188 #define	ROUTE		&devp->sd_address
189 
190 opaque_t
191 scsi_watch_request_submit(
192 	struct scsi_device	*devp,
193 	int			interval,
194 	int			sense_length,
195 	int			(*callback)(),	/* callback function */
196 	caddr_t			cb_arg)		/* device number */
197 {
198 	register struct scsi_watch_request	*swr = NULL;
199 	register struct scsi_watch_request	*sswr, *p;
200 	struct buf				*bp = NULL;
201 	struct scsi_pkt				*rqpkt = NULL;
202 	struct scsi_pkt				*pkt = NULL;
203 	uchar_t					dtype;
204 
205 	SW_DEBUG((dev_info_t *)NULL, sw_label, SCSI_DEBUG,
206 	    "scsi_watch_request_submit: Entering ...\n");
207 
208 	mutex_enter(&sw.sw_mutex);
209 	if (sw.sw_thread == 0) {
210 		register kthread_t	*t;
211 
212 		t = thread_create((caddr_t)NULL, 0, scsi_watch_thread,
213 		    NULL, 0, &p0, TS_RUN, v.v_maxsyspri - 2);
214 		sw.sw_thread = t;
215 	}
216 
217 	for (p = sw.sw_head; p != NULL; p = p->swr_next) {
218 		if ((p->swr_callback_arg == cb_arg) &&
219 		    (p->swr_callback == callback))
220 			break;
221 	}
222 
223 	/* update time interval for an existing request */
224 	if (p) {
225 		if (p->swr_what != SWR_STOP) {
226 			p->swr_timeout = p->swr_interval
227 			    = drv_usectohz(interval);
228 			p->swr_what = SWR_WATCH;
229 			p->swr_ref++;
230 			cv_signal(&sw.sw_cv);
231 			mutex_exit(&sw.sw_mutex);
232 			return ((opaque_t)p);
233 		}
234 	}
235 	mutex_exit(&sw.sw_mutex);
236 
237 	/*
238 	 * allocate space for scsi_watch_request
239 	 */
240 	swr = kmem_zalloc(sizeof (struct scsi_watch_request), KM_SLEEP);
241 
242 	/*
243 	 * allocate request sense bp and pkt and make cmd
244 	 * we shouldn't really need it if ARQ is enabled but it is useful
245 	 * if the ARQ failed.
246 	 */
247 	bp = scsi_alloc_consistent_buf(ROUTE, NULL,
248 	    sense_length, B_READ, SLEEP_FUNC, NULL);
249 
250 	rqpkt = scsi_init_pkt(ROUTE, (struct scsi_pkt *)NULL,
251 	    bp, CDB_GROUP0, 1, 0, PKT_CONSISTENT, SLEEP_FUNC, NULL);
252 
253 	(void) scsi_setup_cdb((union scsi_cdb *)rqpkt->pkt_cdbp,
254 	    SCMD_REQUEST_SENSE, 0, SENSE_LENGTH, 0);
255 	FILL_SCSI1_LUN(devp, rqpkt);
256 	rqpkt->pkt_private = (opaque_t)swr;
257 	rqpkt->pkt_time = scsi_watch_io_time;
258 	rqpkt->pkt_comp = scsi_watch_request_intr;
259 	rqpkt->pkt_flags |= FLAG_HEAD;
260 
261 	/*
262 	 * Create TUR pkt or a zero byte WRITE(10) based on the
263 	 * disk-type for reservation state.
264 	 * For inq_dtype of SBC (DIRECT, dtype == 0)
265 	 * OR for RBC devices (dtype is 0xE) AND for
266 	 * ANSI version of SPC/SPC-2/SPC-3 (inq_ansi == 3-5).
267 	 */
268 
269 	dtype = devp->sd_inq->inq_dtype & DTYPE_MASK;
270 	if (((dtype == 0) || (dtype == 0xE)) &&
271 	    (devp->sd_inq->inq_ansi > 2)) {
272 		pkt = scsi_init_pkt(ROUTE, (struct scsi_pkt *)NULL, NULL,
273 		    CDB_GROUP1, sizeof (struct scsi_arq_status),
274 		    0, 0, SLEEP_FUNC, NULL);
275 
276 		(void) scsi_setup_cdb((union scsi_cdb *)pkt->pkt_cdbp,
277 		    SCMD_WRITE_G1, 0, 0, 0);
278 	} else {
279 		pkt = scsi_init_pkt(ROUTE, (struct scsi_pkt *)NULL, NULL,
280 		    CDB_GROUP0, sizeof (struct scsi_arq_status),
281 		    0, 0, SLEEP_FUNC, NULL);
282 
283 		(void) scsi_setup_cdb((union scsi_cdb *)pkt->pkt_cdbp,
284 		    SCMD_TEST_UNIT_READY, 0, 0, 0);
285 		FILL_SCSI1_LUN(devp, pkt);
286 	}
287 
288 	pkt->pkt_private = (opaque_t)swr;
289 	pkt->pkt_time = scsi_watch_io_time;
290 	pkt->pkt_comp = scsi_watch_request_intr;
291 	if (scsi_ifgetcap(&pkt->pkt_address, "tagged-qing", 1) == 1) {
292 		pkt->pkt_flags |= FLAG_STAG;
293 	}
294 
295 	/*
296 	 * set the allocated resources in swr
297 	 */
298 	swr->swr_rqbp = bp;
299 	swr->swr_rqpkt = rqpkt;
300 	swr->swr_pkt = pkt;
301 	swr->swr_timeout = swr->swr_interval = drv_usectohz(interval);
302 	swr->swr_callback = callback;
303 	swr->swr_callback_arg = cb_arg;
304 	swr->swr_what = SWR_WATCH;
305 	swr->swr_sense_length = (uchar_t)sense_length;
306 	swr->swr_ref = 1;
307 	cv_init(&swr->swr_terminate_cv, NULL, CV_DRIVER, NULL);
308 
309 	/*
310 	 * add to the list and wake up the thread
311 	 */
312 	mutex_enter(&sw.sw_mutex);
313 	swr->swr_next = sw.sw_head;
314 	swr->swr_prev = NULL;
315 	if (sw.sw_head) {
316 		sw.sw_head->swr_prev = swr;
317 	}
318 	sw.sw_head = swr;
319 
320 	/*
321 	 * reset all timeouts, so all requests are in sync again
322 	 * XXX there is a small window where the watch thread releases
323 	 * the mutex so that could upset the resyncing
324 	 */
325 	sswr = swr;
326 	while (sswr) {
327 		sswr->swr_timeout = swr->swr_interval;
328 		sswr = sswr->swr_next;
329 	}
330 	cv_signal(&sw.sw_cv);
331 	mutex_exit(&sw.sw_mutex);
332 	return ((opaque_t)swr);
333 }
334 
335 
336 /*
337  * called by (eg. pwr management) to resume the scsi_watch_thread
338  */
339 void
340 scsi_watch_resume(opaque_t token)
341 {
342 	struct scsi_watch_request *swr = (struct scsi_watch_request *)NULL;
343 	/*
344 	 * Change the state to SW_RUNNING and wake up the scsi_watch_thread
345 	 */
346 	SW_DEBUG(0, sw_label, SCSI_DEBUG, "scsi_watch_resume:\n");
347 	mutex_enter(&sw.sw_mutex);
348 
349 	if (!sw.sw_head)
350 		goto exit;
351 
352 	/* search for token */
353 	for (swr = sw.sw_head; swr; swr = swr->swr_next) {
354 		if (swr == (struct scsi_watch_request *)token)
355 			break;
356 	}
357 
358 	/* if we can't find this value, then we just do nothing */
359 	if (swr == (struct scsi_watch_request *)NULL)
360 		goto exit;
361 
362 	swr->swr_what = SWR_WATCH;
363 
364 
365 	/* see if all swr's are awake, then start the thread again */
366 	for (swr = sw.sw_head; swr; swr = swr->swr_next) {
367 		if (swr->swr_what != SWR_WATCH)
368 			goto exit;
369 	}
370 
371 	sw.sw_state = SW_RUNNING;
372 	cv_signal(&sw.sw_cv);
373 
374 exit:
375 	mutex_exit(&sw.sw_mutex);
376 }
377 
378 
379 /*
380  * called by clients (eg. pwr management) to suspend the scsi_watch_thread
381  */
382 void
383 scsi_watch_suspend(opaque_t token)
384 {
385 	struct scsi_watch_request *swr = (struct scsi_watch_request *)NULL;
386 	clock_t	now;
387 	clock_t halfsec_delay = drv_usectohz(500000);
388 
389 	SW_DEBUG(0, sw_label, SCSI_DEBUG, "scsi_watch_suspend:\n");
390 
391 	mutex_enter(&sw.sw_mutex);
392 
393 	if (!sw.sw_head)
394 		goto exit;
395 
396 	/* search for token */
397 	for (swr = sw.sw_head; swr; swr = swr->swr_next) {
398 		if (swr == (struct scsi_watch_request *)token)
399 			break;
400 	}
401 
402 	/* if we can't find this value, then we just do nothing */
403 	if (swr == (struct scsi_watch_request *)NULL)
404 		goto exit;
405 
406 
407 	for (;;) {
408 		if (swr->swr_busy) {
409 			/*
410 			 * XXX: Assumes that this thread can rerun
411 			 * till all outstanding cmds are complete
412 			 */
413 			swr->swr_what = SWR_SUSPEND_REQUESTED;
414 			now = ddi_get_lbolt();
415 			(void) cv_timedwait(&sw.sw_cv, &sw.sw_mutex,
416 			    now + halfsec_delay);
417 		} else {
418 			swr->swr_what = SWR_SUSPENDED;
419 			break;
420 		}
421 	}
422 
423 	/* see if all swr's are suspended, then suspend the thread */
424 	for (swr = sw.sw_head; swr; swr = swr->swr_next) {
425 		if (swr->swr_what != SWR_SUSPENDED)
426 			goto exit;
427 	}
428 
429 	sw.sw_state = SW_SUSPENDED;
430 
431 exit:
432 	mutex_exit(&sw.sw_mutex);
433 }
434 
435 /*
436  * destroy swr, called for watch thread
437  */
438 static void
439 scsi_watch_request_destroy(struct scsi_watch_request *swr)
440 {
441 	ASSERT(MUTEX_HELD(&sw.sw_mutex));
442 	ASSERT(swr->swr_busy == 0);
443 
444 	SW_DEBUG((dev_info_t *)NULL, sw_label, SCSI_DEBUG,
445 	    "scsi_watch_request_destroy: Entering ...\n");
446 	if (swr->swr_ref != 0)
447 		return;
448 
449 	/*
450 	 * remove swr from linked list and destroy pkts
451 	 */
452 	if (swr->swr_prev) {
453 		swr->swr_prev->swr_next = swr->swr_next;
454 	}
455 	if (swr->swr_next) {
456 		swr->swr_next->swr_prev = swr->swr_prev;
457 	}
458 	if (sw.sw_head == swr) {
459 		sw.sw_head = swr->swr_next;
460 	}
461 	if (sw.swr_current == swr) {
462 		swr->suspend_destroy = SUSPEND_DESTROY;
463 		sw.swr_current = NULL;
464 	}
465 
466 	scsi_destroy_pkt(swr->swr_rqpkt);
467 	scsi_free_consistent_buf(swr->swr_rqbp);
468 	scsi_destroy_pkt(swr->swr_pkt);
469 	cv_signal(&swr->swr_terminate_cv);
470 }
471 
472 /*
473  * scsi_watch_request_terminate()
474  * called by requestor to terminate any pending watch request.
475  * if the request is currently "busy", and the caller cannot wait, failure
476  * is returned. O/w the request is cleaned up immediately.
477  */
478 int
479 scsi_watch_request_terminate(opaque_t token, int flags)
480 {
481 	struct scsi_watch_request *swr =
482 	    (struct scsi_watch_request *)token;
483 	struct scsi_watch_request *sswr;
484 
485 	int count = 0;
486 	int free_flag = 0;
487 
488 	/*
489 	 * We try to clean up this request if we can. We also inform
490 	 * the watch thread that we mucked around the list so it has
491 	 * to start reading from head of list again.
492 	 */
493 	SW_DEBUG((dev_info_t *)NULL, sw_label, SCSI_DEBUG,
494 	    "scsi_watch_request_terminate: Entering(0x%p) ...\n",
495 	    (void *)swr);
496 	mutex_enter(&sw.sw_mutex);
497 
498 	/*
499 	 * check if it is still in the list
500 	 */
501 	sswr = sw.sw_head;
502 	while (sswr) {
503 		if (sswr == swr) {
504 			swr->swr_ref--;
505 			count = swr->swr_ref;
506 
507 			if (swr->swr_busy) {
508 				if (flags == SCSI_WATCH_TERMINATE_NOWAIT) {
509 					mutex_exit(&sw.sw_mutex);
510 					return (SCSI_WATCH_TERMINATE_FAIL);
511 				}
512 				if (count != 0 && flags !=
513 				    SCSI_WATCH_TERMINATE_ALL_WAIT) {
514 					mutex_exit(&sw.sw_mutex);
515 					return (SCSI_WATCH_TERMINATE_SUCCESS);
516 				}
517 				if (SCSI_WATCH_TERMINATE_ALL_WAIT == flags) {
518 					swr->swr_ref = 0;
519 					count = 0;
520 				}
521 				swr->swr_what = SWR_STOP;
522 				cv_wait(&swr->swr_terminate_cv, &sw.sw_mutex);
523 				free_flag = 1;
524 				goto done;
525 			} else {
526 				if (SCSI_WATCH_TERMINATE_NOWAIT == flags ||
527 				    SCSI_WATCH_TERMINATE_ALL_WAIT == flags) {
528 					swr->swr_ref = 0;
529 					count = 0;
530 				}
531 				scsi_watch_request_destroy(swr);
532 				if (0 == count) {
533 					sw.sw_flags |= SW_START_HEAD;
534 					free_flag = 1;
535 				}
536 				goto done;
537 			}
538 		}
539 		sswr = sswr->swr_next;
540 	}
541 done:
542 	mutex_exit(&sw.sw_mutex);
543 	if (!sswr) {
544 		return (SCSI_WATCH_TERMINATE_FAIL);
545 	}
546 	if (1 == free_flag &&
547 	    sswr->suspend_destroy != SUSPEND_DESTROY) {
548 		cv_destroy(&swr->swr_terminate_cv);
549 		kmem_free((caddr_t)swr, sizeof (struct scsi_watch_request));
550 	}
551 
552 	return (SCSI_WATCH_TERMINATE_SUCCESS);
553 }
554 
555 
556 /*
557  * The routines scsi_watch_thread & scsi_watch_request_intr are
558  * on different threads.
559  * If there is no work to be done by the lower level driver
560  * then swr->swr_busy will not be set.
561  * In this case we will call CALLB_CPR_SAFE_BEGIN before
562  * calling cv_timedwait.
563  * In the other case where there is work to be done by
564  * the lower level driver then the flag swr->swr_busy will
565  * be set.
566  * We cannot call CALLB_CPR_SAFE_BEGIN at this point the reason
567  * is the intr thread can interfere with our operations. So
568  * we do a cv_timedwait here. Now at the completion of the
569  * lower level driver's work we will call CALLB_CPR_SAFE_BEGIN
570  * in scsi_watch_request_intr.
571  * In all the cases we will call CALLB_CPR_SAFE_END only if
572  * we already called a CALLB_CPR_SAFE_BEGIN and this is flagged
573  * by sw_cpr_flag.
574  * Warlock has a problem when we use different locks
575  * on the same type of structure in different contexts.
576  * We use callb_cpr_t in both scsi_watch and esp_callback threads.
577  * we use different mutexe's in different threads. And
578  * this is not acceptable to warlock. To avoid this
579  * problem we use the same name for the mutex in
580  * both scsi_watch & esp_callback. when __lock_lint is not defined
581  * esp_callback uses the mutex on the stack and in scsi_watch
582  * a static variable. But when __lock_lint is defined
583  * we make a mutex which is global in esp_callback and
584  * a external mutex for scsi_watch.
585  */
586 static int sw_cmd_count = 0;
587 static int sw_cpr_flag = 0;
588 static callb_cpr_t cpr_info;
589 #ifndef __lock_lint
590 static kmutex_t cpr_mutex;
591 #else
592 extern kmutex_t cpr_mutex;
593 #endif
594 
595 #if !defined(lint)
596 _NOTE(MUTEX_PROTECTS_DATA(cpr_mutex, cpr_info))
597 _NOTE(MUTEX_PROTECTS_DATA(cpr_mutex, sw_cmd_count))
598 #endif
599 /*
600  * the scsi watch thread:
601  * it either wakes up if there is work to do or if the cv_timeait
602  * timed out
603  * normally, it wakes up every <delay> seconds and checks the list.
604  * the interval is not very accurate if the cv was signalled but that
605  * really doesn't matter much
606  * it is more important that we fire off all TURs simulataneously so
607  * we don't have to wake up frequently
608  */
609 static void
610 scsi_watch_thread()
611 {
612 	struct scsi_watch_request	*swr, *next;
613 	clock_t				now;
614 	clock_t				last_delay = 0;
615 	clock_t				next_delay = 0;
616 	clock_t				onesec = drv_usectohz(1000000);
617 	clock_t				exit_delay = 60 * onesec;
618 
619 	SW_DEBUG((dev_info_t *)NULL, sw_label, SCSI_DEBUG,
620 	    "scsi_watch_thread: Entering ...\n");
621 
622 #if !defined(lint)
623 	_NOTE(NO_COMPETING_THREADS_NOW);
624 #endif
625 	mutex_init(&cpr_mutex, NULL, MUTEX_DRIVER, NULL);
626 	CALLB_CPR_INIT(&cpr_info,
627 	    &cpr_mutex, callb_generic_cpr, "scsi_watch");
628 	sw_cpr_flag = 0;
629 #if !defined(lint)
630 	/*LINTED*/
631 	_NOTE(COMPETING_THREADS_NOW);
632 #endif
633 	/*
634 	 * grab the mutex and wait for work
635 	 */
636 	mutex_enter(&sw.sw_mutex);
637 	if (sw.sw_head == NULL) {
638 		cv_wait(&sw.sw_cv, &sw.sw_mutex);
639 	}
640 
641 	/*
642 	 * now loop forever for work; if queue is empty exit
643 	 */
644 	for (;;) {
645 head:
646 		swr = sw.sw_head;
647 		while (swr) {
648 
649 			/*
650 			 * If state is not running, wait for scsi_watch_resume
651 			 * to signal restart, but before going into cv_wait
652 			 * need to let the PM framework know that it is safe
653 			 * to stop this thread for CPR
654 			 */
655 			if (sw.sw_state != SW_RUNNING) {
656 				SW_DEBUG(0, sw_label, SCSI_DEBUG,
657 				    "scsi_watch_thread suspended\n");
658 				mutex_enter(&cpr_mutex);
659 				if (!sw_cmd_count) {
660 					CALLB_CPR_SAFE_BEGIN(&cpr_info);
661 					sw_cpr_flag = 1;
662 				}
663 				mutex_exit(&cpr_mutex);
664 				sw.swr_current = swr;
665 				cv_wait(&sw.sw_cv, &sw.sw_mutex);
666 
667 
668 				/*
669 				 * Need to let the PM framework know that it
670 				 * is no longer safe to stop the thread for
671 				 * CPR.
672 				 */
673 				mutex_exit(&sw.sw_mutex);
674 				mutex_enter(&cpr_mutex);
675 				if (sw_cpr_flag == 1) {
676 					CALLB_CPR_SAFE_END(
677 					    &cpr_info, &cpr_mutex);
678 					sw_cpr_flag = 0;
679 				}
680 				mutex_exit(&cpr_mutex);
681 				mutex_enter(&sw.sw_mutex);
682 				if (SUSPEND_DESTROY == swr->suspend_destroy) {
683 					cv_destroy(&swr->swr_terminate_cv);
684 					kmem_free((caddr_t)swr,
685 					    sizeof (struct scsi_watch_request));
686 					goto head;
687 				} else {
688 					sw.swr_current = NULL;
689 				}
690 			}
691 			if (next_delay == 0) {
692 				next_delay = swr->swr_timeout;
693 			} else {
694 				next_delay = min(swr->swr_timeout, next_delay);
695 			}
696 
697 			swr->swr_timeout -= last_delay;
698 			next = swr->swr_next;
699 
700 			SW_DEBUG((dev_info_t *)NULL, sw_label, SCSI_DEBUG,
701 			    "scsi_watch_thread: "
702 			    "swr(0x%p),what=%x,timeout=%lx,"
703 			    "interval=%lx,delay=%lx\n",
704 			    (void *)swr, swr->swr_what, swr->swr_timeout,
705 			    swr->swr_interval, last_delay);
706 
707 			switch (swr->swr_what) {
708 			case SWR_SUSPENDED:
709 			case SWR_SUSPEND_REQUESTED:
710 				/* if we are suspended, don't do anything */
711 				break;
712 
713 			case SWR_STOP:
714 				if (swr->swr_busy == 0) {
715 					scsi_watch_request_destroy(swr);
716 				}
717 				break;
718 
719 			default:
720 				if (swr->swr_timeout <= 0 && !swr->swr_busy) {
721 					swr->swr_busy = 1;
722 
723 					/*
724 					 * submit the cmd and let the completion
725 					 * function handle the result
726 					 * release the mutex (good practice)
727 					 * this should be safe even if the list
728 					 * is changing
729 					 */
730 					mutex_exit(&sw.sw_mutex);
731 					mutex_enter(&cpr_mutex);
732 					sw_cmd_count++;
733 					mutex_exit(&cpr_mutex);
734 					SW_DEBUG((dev_info_t *)NULL,
735 					    sw_label, SCSI_DEBUG,
736 					    "scsi_watch_thread: "
737 					    "Starting TUR\n");
738 					if (scsi_transport(swr->swr_pkt) !=
739 					    TRAN_ACCEPT) {
740 
741 						/*
742 						 * try again later
743 						 */
744 						swr->swr_busy = 0;
745 						SW_DEBUG((dev_info_t *)NULL,
746 						    sw_label, SCSI_DEBUG,
747 						    "scsi_watch_thread: "
748 						    "Transport Failed\n");
749 						mutex_enter(&cpr_mutex);
750 						sw_cmd_count--;
751 						mutex_exit(&cpr_mutex);
752 					}
753 					mutex_enter(&sw.sw_mutex);
754 					swr->swr_timeout = swr->swr_interval;
755 				}
756 				break;
757 			}
758 			swr = next;
759 			if (sw.sw_flags & SW_START_HEAD) {
760 				sw.sw_flags &= ~SW_START_HEAD;
761 				goto head;
762 			}
763 		}
764 
765 		/*
766 		 * delay using cv_timedwait; we return when
767 		 * signalled or timed out
768 		 */
769 		if (sw.sw_head != NULL) {
770 			if (next_delay <= 0) {
771 				next_delay = onesec;
772 			}
773 		} else {
774 			next_delay = exit_delay;
775 		}
776 		now = ddi_get_lbolt();
777 
778 		mutex_enter(&cpr_mutex);
779 		if (!sw_cmd_count) {
780 			CALLB_CPR_SAFE_BEGIN(&cpr_info);
781 			sw_cpr_flag = 1;
782 		}
783 		mutex_exit(&cpr_mutex);
784 		/*
785 		 * if we return from cv_timedwait because we were
786 		 * signalled, the delay is not accurate but that doesn't
787 		 * really matter
788 		 */
789 		(void) cv_timedwait(&sw.sw_cv, &sw.sw_mutex, now + next_delay);
790 		mutex_exit(&sw.sw_mutex);
791 		mutex_enter(&cpr_mutex);
792 		if (sw_cpr_flag == 1) {
793 			CALLB_CPR_SAFE_END(&cpr_info, &cpr_mutex);
794 			sw_cpr_flag = 0;
795 		}
796 		mutex_exit(&cpr_mutex);
797 		mutex_enter(&sw.sw_mutex);
798 		last_delay = next_delay;
799 		next_delay = 0;
800 
801 		/*
802 		 * is there still work to do?
803 		 */
804 		if (sw.sw_head == NULL) {
805 			break;
806 		}
807 	}
808 
809 	/*
810 	 * no more work to do, reset sw_thread and exit
811 	 */
812 	sw.sw_thread = 0;
813 	mutex_exit(&sw.sw_mutex);
814 #ifndef __lock_lint
815 	mutex_enter(&cpr_mutex);
816 	CALLB_CPR_EXIT(&cpr_info);
817 #endif
818 	mutex_destroy(&cpr_mutex);
819 	SW_DEBUG((dev_info_t *)NULL, sw_label, SCSI_DEBUG,
820 	    "scsi_watch_thread: Exiting ...\n");
821 }
822 
823 /*
824  * callback completion function for scsi watch pkt
825  */
826 #define	SCBP(pkt)	((struct scsi_status *)(pkt)->pkt_scbp)
827 #define	SCBP_C(pkt)	((*(pkt)->pkt_scbp) & STATUS_MASK)
828 
829 static void
830 scsi_watch_request_intr(struct scsi_pkt *pkt)
831 {
832 	struct scsi_watch_result	result;
833 	struct scsi_watch_request	*swr =
834 	    (struct scsi_watch_request *)pkt->pkt_private;
835 	struct scsi_status		*rqstatusp;
836 	struct scsi_extended_sense	*rqsensep = NULL;
837 	int				amt = 0;
838 
839 	SW_DEBUG((dev_info_t *)NULL, sw_label, SCSI_DEBUG,
840 	    "scsi_watch_intr: Entering ...\n");
841 
842 	/*
843 	 * first check if it is the TUR or RQS pkt
844 	 */
845 	if (pkt == swr->swr_pkt) {
846 		if (SCBP_C(pkt) != STATUS_GOOD &&
847 		    SCBP_C(pkt) != STATUS_RESERVATION_CONFLICT) {
848 			if (SCBP(pkt)->sts_chk &&
849 			    ((pkt->pkt_state & STATE_ARQ_DONE) == 0)) {
850 
851 				/*
852 				 * submit the request sense pkt
853 				 */
854 				SW_DEBUG((dev_info_t *)NULL,
855 				    sw_label, SCSI_DEBUG,
856 				    "scsi_watch_intr: "
857 				    "Submitting a Request Sense "
858 				    "Packet\n");
859 				if (scsi_transport(swr->swr_rqpkt) !=
860 				    TRAN_ACCEPT) {
861 
862 					/*
863 					 * just give up and try again later
864 					 */
865 					SW_DEBUG((dev_info_t *)NULL,
866 					    sw_label, SCSI_DEBUG,
867 					    "scsi_watch_intr: "
868 					    "Request Sense "
869 					    "Transport Failed\n");
870 					goto done;
871 				}
872 
873 				/*
874 				 * wait for rqsense to complete
875 				 */
876 				return;
877 
878 			} else	if (SCBP(pkt)->sts_chk) {
879 
880 				/*
881 				 * check the autorequest sense data
882 				 */
883 				struct scsi_arq_status	*arqstat =
884 				    (struct scsi_arq_status *)pkt->pkt_scbp;
885 
886 				rqstatusp = &arqstat->sts_rqpkt_status;
887 				rqsensep = &arqstat->sts_sensedata;
888 				amt = swr->swr_sense_length -
889 				    arqstat->sts_rqpkt_resid;
890 				SW_DEBUG((dev_info_t *)NULL,
891 				    sw_label, SCSI_DEBUG,
892 				    "scsi_watch_intr: "
893 				    "Auto Request Sense, amt=%x\n", amt);
894 			}
895 		}
896 
897 	} else if (pkt == swr->swr_rqpkt) {
898 
899 		/*
900 		 * check the request sense data
901 		 */
902 		rqstatusp = (struct scsi_status *)pkt->pkt_scbp;
903 		rqsensep = (struct scsi_extended_sense *)
904 		    swr->swr_rqbp->b_un.b_addr;
905 		amt = swr->swr_sense_length - pkt->pkt_resid;
906 		SW_DEBUG((dev_info_t *)NULL, sw_label, SCSI_DEBUG,
907 		    "scsi_watch_intr: "
908 		    "Request Sense Completed, amt=%x\n", amt);
909 	} else {
910 
911 		/*
912 		 * should not reach here!!!
913 		 */
914 		scsi_log((dev_info_t *)NULL, sw_label, CE_PANIC,
915 		    "scsi_watch_intr: Bad Packet(0x%p)", (void *)pkt);
916 	}
917 
918 	if (rqsensep) {
919 
920 		/*
921 		 * check rqsense status and data
922 		 */
923 		if (rqstatusp->sts_busy || rqstatusp->sts_chk) {
924 
925 			/*
926 			 * try again later
927 			 */
928 			SW_DEBUG((dev_info_t *)NULL, sw_label, SCSI_DEBUG,
929 			    "scsi_watch_intr: "
930 			    "Auto Request Sense Failed - "
931 			    "Busy or Check Condition\n");
932 			goto done;
933 		}
934 
935 		SW_DEBUG((dev_info_t *)NULL, sw_label, SCSI_DEBUG,
936 		    "scsi_watch_intr: "
937 		    "es_key=%x, adq=%x, amt=%x\n",
938 		    rqsensep->es_key, rqsensep->es_add_code, amt);
939 	}
940 
941 	/*
942 	 * callback to target driver to do the real work
943 	 */
944 	result.statusp = SCBP(swr->swr_pkt);
945 	result.sensep = rqsensep;
946 	result.actual_sense_length = (uchar_t)amt;
947 	result.pkt = swr->swr_pkt;
948 
949 	if ((*swr->swr_callback)(swr->swr_callback_arg, &result)) {
950 		swr->swr_what = SWR_STOP;
951 	}
952 
953 done:
954 	swr->swr_busy = 0;
955 	mutex_enter(&cpr_mutex);
956 	sw_cmd_count --;
957 	if (!sw_cmd_count) {
958 		CALLB_CPR_SAFE_BEGIN(&cpr_info);
959 		sw_cpr_flag = 1;
960 	}
961 	mutex_exit(&cpr_mutex);
962 }
963 
964 /*
965  * scsi_watch_get_ref_count
966  * called by clients to query the reference count for a given token.
967  * return the number of reference count or 0 if the given token is
968  * not found.
969  */
970 int
971 scsi_watch_get_ref_count(opaque_t token)
972 {
973 	struct scsi_watch_request *swr =
974 	    (struct scsi_watch_request *)token;
975 	struct scsi_watch_request *sswr;
976 	int rval = 0;
977 
978 	SW_DEBUG((dev_info_t *)NULL, sw_label, SCSI_DEBUG,
979 	    "scsi_watch_get_ref_count: Entering(0x%p) ...\n",
980 	    (void *)swr);
981 	mutex_enter(&sw.sw_mutex);
982 
983 	sswr = sw.sw_head;
984 	while (sswr) {
985 		if (sswr == swr) {
986 			rval = swr->swr_ref;
987 			mutex_exit(&sw.sw_mutex);
988 			return (rval);
989 		}
990 		sswr = sswr->swr_next;
991 	}
992 
993 	mutex_exit(&sw.sw_mutex);
994 	return (rval);
995 }
996