xref: /illumos-gate/usr/src/uts/common/io/scsi/impl/scsi_watch.c (revision 46b592853d0f4f11781b6b0a7533f267c6aee132)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * generic scsi device watch
28  */
29 
30 #if DEBUG || lint
31 #define	SWDEBUG
32 #endif
33 
34 /*
35  * debug goodies
36  */
37 #ifdef SWDEBUG
38 static int swdebug = 0;
39 #define	DEBUGGING	((scsi_options & SCSI_DEBUG_TGT) && sddebug > 1)
40 #define	SW_DEBUG	if (swdebug == 1) scsi_log
41 #define	SW_DEBUG2	if (swdebug > 1) scsi_log
42 #else	/* SWDEBUG */
43 #define	swdebug		(0)
44 #define	DEBUGGING	(0)
45 #define	SW_DEBUG	if (0) scsi_log
46 #define	SW_DEBUG2	if (0) scsi_log
47 #endif
48 
49 
50 
51 /*
52  * Includes, Declarations and Local Data
53  */
54 
55 #include <sys/note.h>
56 #include <sys/scsi/scsi.h>
57 #include <sys/var.h>
58 #include <sys/proc.h>
59 #include <sys/thread.h>
60 #include <sys/callb.h>
61 
62 /*
63  * macro for filling in lun value for scsi-1 support
64  */
65 #define	FILL_SCSI1_LUN(devp, pkt) \
66 	if ((devp->sd_address.a_lun > 0) && \
67 	    (devp->sd_inq->inq_ansi == 0x1)) { \
68 		((union scsi_cdb *)(pkt)->pkt_cdbp)->scc_lun = \
69 		    devp->sd_address.a_lun; \
70 	}
71 
72 char *sw_label = "scsi-watch";
73 
74 static int scsi_watch_io_time = SCSI_WATCH_IO_TIME;
75 
76 /*
77  * all info resides in the scsi watch structure
78  *
79  * the monitoring is performed by one separate thread which works
80  * from a linked list of scsi_watch_request packets
81  */
82 static struct scsi_watch {
83 	kthread_t		*sw_thread;	/* the watch thread	*/
84 	kmutex_t		sw_mutex;	/* mutex protecting list */
85 						/* and this structure */
86 	kcondvar_t		sw_cv;		/* cv for waking up thread */
87 	struct scsi_watch_request *sw_head;	/* head of linked list	*/
88 						/* of request structures */
89 	uchar_t			sw_state;	/* for suspend-resume */
90 	uchar_t			sw_flags;	/* to start at head of list */
91 						/* for watch thread */
92 	struct scsi_watch_request *swr_current; /* the command waiting to be */
93 						/* processed by the watch */
94 						/* thread which is being */
95 						/* blocked */
96 } sw;
97 
98 #if !defined(lint)
99 _NOTE(MUTEX_PROTECTS_DATA(scsi_watch::sw_mutex, scsi_watch))
100 #endif
101 
102 /*
103  * Values for sw_state
104  */
105 #define	SW_RUNNING		0
106 #define	SW_SUSPEND_REQUESTED	1
107 #define	SW_SUSPENDED		2
108 
109 /*
110  * values for sw_flags
111  */
112 #define	SW_START_HEAD		0x1
113 
114 struct scsi_watch_request {
115 	struct scsi_watch_request *swr_next;	/* linked request list	*/
116 	struct scsi_watch_request *swr_prev;
117 	clock_t			swr_interval;	/* interval between TURs */
118 	clock_t			swr_timeout;	/* count down		*/
119 	uchar_t			swr_busy;	/* TUR in progress	*/
120 	uchar_t			swr_what;	/* watch or stop	*/
121 	uchar_t			swr_sense_length; /* required sense length */
122 	struct scsi_pkt		*swr_pkt;	/* TUR pkt itself	*/
123 	struct scsi_pkt		*swr_rqpkt;	/* request sense pkt	*/
124 	struct buf		*swr_rqbp;	/* bp for request sense data */
125 	struct buf		*swr_mmcbp;	/* bp for MMC command data */
126 	int			(*swr_callback)(); /* callback to driver */
127 	caddr_t			swr_callback_arg;
128 	kcondvar_t		swr_terminate_cv; /* cv to wait on to cleanup */
129 						/* request synchronously */
130 	int			swr_ref;	/*  refer count to the swr */
131 	uchar_t			suspend_destroy; /* flag for free later */
132 };
133 
134 /*
135  * values for swr flags
136  */
137 #define	SUSPEND_DESTROY		1
138 
139 #if !defined(lint)
140 _NOTE(SCHEME_PROTECTS_DATA("unshared data", scsi_watch_request))
141 #endif
142 
143 /*
144  * values for sw_what
145  */
146 #define	SWR_WATCH		0	/* device watch */
147 #define	SWR_STOP		1	/* stop monitoring and destroy swr */
148 #define	SWR_SUSPEND_REQUESTED	2	/* req. pending suspend */
149 #define	SWR_SUSPENDED		3	/* req. is suspended */
150 
151 static opaque_t scsi_watch_request_submit_impl(struct scsi_device *devp,
152     int interval, int sense_length, int (*callback)(), caddr_t cb_arg,
153     boolean_t mmc);
154 static void scsi_watch_request_destroy(struct scsi_watch_request *swr);
155 static void scsi_watch_thread(void);
156 static void scsi_watch_request_intr(struct scsi_pkt *pkt);
157 
158 /*
159  * setup, called from _init(), the thread is created when we need it
160  * and exits when there is nothing to do anymore and everything has been
161  * cleaned up (ie. resources deallocated)
162  */
163 void
164 scsi_watch_init()
165 {
166 /* NO OTHER THREADS ARE RUNNING */
167 	mutex_init(&sw.sw_mutex, NULL, MUTEX_DRIVER, NULL);
168 	cv_init(&sw.sw_cv, NULL, CV_DRIVER, NULL);
169 	sw.sw_state = SW_RUNNING;
170 	sw.sw_flags = 0;
171 	sw.swr_current = NULL;
172 }
173 
174 /*
175  * cleaning up, called from _fini()
176  */
177 void
178 scsi_watch_fini()
179 {
180 /* NO OTHER THREADS ARE RUNNING */
181 	/*
182 	 * hope and pray that the thread has exited
183 	 */
184 	ASSERT(sw.sw_thread == 0);
185 	mutex_destroy(&sw.sw_mutex);
186 	cv_destroy(&sw.sw_cv);
187 }
188 
189 /*
190  * allocate an swr (scsi watch request structure) and initialize pkts
191  */
192 #define	ROUTE		&devp->sd_address
193 
194 opaque_t
195 scsi_watch_request_submit(
196 	struct scsi_device	*devp,
197 	int			interval,
198 	int			sense_length,
199 	int			(*callback)(),	/* callback function */
200 	caddr_t			cb_arg)		/* device number */
201 {
202 	return (scsi_watch_request_submit_impl(devp, interval, sense_length,
203 	    callback, cb_arg, B_FALSE));
204 }
205 
206 opaque_t
207 scsi_mmc_watch_request_submit(
208 	struct scsi_device	*devp,
209 	int			interval,
210 	int			sense_length,
211 	int			(*callback)(),	/* callback function */
212 	caddr_t			cb_arg)		/* device number */
213 {
214 	return (scsi_watch_request_submit_impl(devp, interval, sense_length,
215 	    callback, cb_arg, B_TRUE));
216 }
217 
218 static opaque_t
219 scsi_watch_request_submit_impl(
220 	struct scsi_device	*devp,
221 	int			interval,
222 	int			sense_length,
223 	int			(*callback)(),	/* callback function */
224 	caddr_t			cb_arg,		/* device number */
225 	boolean_t		mmc)
226 {
227 	register struct scsi_watch_request	*swr = NULL;
228 	register struct scsi_watch_request	*sswr, *p;
229 	struct buf				*bp = NULL;
230 	struct buf				*mmcbp = NULL;
231 	struct scsi_pkt				*rqpkt = NULL;
232 	struct scsi_pkt				*pkt = NULL;
233 	uchar_t					dtype;
234 
235 	SW_DEBUG((dev_info_t *)NULL, sw_label, SCSI_DEBUG,
236 	    "scsi_watch_request_submit: Entering ...\n");
237 
238 	mutex_enter(&sw.sw_mutex);
239 	if (sw.sw_thread == 0) {
240 		register kthread_t	*t;
241 
242 		t = thread_create((caddr_t)NULL, 0, scsi_watch_thread,
243 		    NULL, 0, &p0, TS_RUN, v.v_maxsyspri - 2);
244 		sw.sw_thread = t;
245 	}
246 
247 	for (p = sw.sw_head; p != NULL; p = p->swr_next) {
248 		if ((p->swr_callback_arg == cb_arg) &&
249 		    (p->swr_callback == callback))
250 			break;
251 	}
252 
253 	/* update time interval for an existing request */
254 	if (p) {
255 		if (p->swr_what != SWR_STOP) {
256 			p->swr_timeout = p->swr_interval
257 			    = drv_usectohz(interval);
258 			p->swr_what = SWR_WATCH;
259 			p->swr_ref++;
260 			cv_signal(&sw.sw_cv);
261 			mutex_exit(&sw.sw_mutex);
262 			return ((opaque_t)p);
263 		}
264 	}
265 	mutex_exit(&sw.sw_mutex);
266 
267 	/*
268 	 * allocate space for scsi_watch_request
269 	 */
270 	swr = kmem_zalloc(sizeof (struct scsi_watch_request), KM_SLEEP);
271 
272 	/*
273 	 * allocate request sense bp and pkt and make cmd
274 	 * we shouldn't really need it if ARQ is enabled but it is useful
275 	 * if the ARQ failed.
276 	 */
277 	bp = scsi_alloc_consistent_buf(ROUTE, NULL,
278 	    sense_length, B_READ, SLEEP_FUNC, NULL);
279 
280 	rqpkt = scsi_init_pkt(ROUTE, (struct scsi_pkt *)NULL,
281 	    bp, CDB_GROUP0, 1, 0, PKT_CONSISTENT, SLEEP_FUNC, NULL);
282 
283 	(void) scsi_setup_cdb((union scsi_cdb *)rqpkt->pkt_cdbp,
284 	    SCMD_REQUEST_SENSE, 0, SENSE_LENGTH, 0);
285 	FILL_SCSI1_LUN(devp, rqpkt);
286 	rqpkt->pkt_private = (opaque_t)swr;
287 	rqpkt->pkt_time = scsi_watch_io_time;
288 	rqpkt->pkt_comp = scsi_watch_request_intr;
289 	rqpkt->pkt_flags |= FLAG_HEAD;
290 
291 	/*
292 	 * Create TUR pkt or GET STATUS EVENT NOTIFICATION for MMC requests or
293 	 * a zero byte WRITE(10) based on the disk-type for reservation state.
294 	 * For inq_dtype of SBC (DIRECT, dtype == 0)
295 	 * OR for RBC devices (dtype is 0xE) AND for
296 	 * ANSI version of SPC/SPC-2/SPC-3 (inq_ansi == 3-5).
297 	 */
298 
299 	dtype = devp->sd_inq->inq_dtype & DTYPE_MASK;
300 	if (mmc) {
301 		mmcbp = scsi_alloc_consistent_buf(ROUTE, NULL,
302 		    8, B_READ, SLEEP_FUNC, NULL);
303 
304 		pkt = scsi_init_pkt(ROUTE, (struct scsi_pkt *)NULL, mmcbp,
305 		    CDB_GROUP1, sizeof (struct scsi_arq_status),
306 		    0, 0, SLEEP_FUNC, NULL);
307 
308 		(void) scsi_setup_cdb((union scsi_cdb *)pkt->pkt_cdbp,
309 		    SCMD_GET_EVENT_STATUS_NOTIFICATION, 0, 8, 0);
310 		pkt->pkt_cdbp[1] = 1; /* polled */
311 		pkt->pkt_cdbp[4] = 1 << SD_GESN_MEDIA_CLASS;
312 	} else if (((dtype == 0) || (dtype == 0xE)) &&
313 	    (devp->sd_inq->inq_ansi > 2)) {
314 		pkt = scsi_init_pkt(ROUTE, (struct scsi_pkt *)NULL, NULL,
315 		    CDB_GROUP1, sizeof (struct scsi_arq_status),
316 		    0, 0, SLEEP_FUNC, NULL);
317 
318 		(void) scsi_setup_cdb((union scsi_cdb *)pkt->pkt_cdbp,
319 		    SCMD_WRITE_G1, 0, 0, 0);
320 	} else {
321 		pkt = scsi_init_pkt(ROUTE, (struct scsi_pkt *)NULL, NULL,
322 		    CDB_GROUP0, sizeof (struct scsi_arq_status),
323 		    0, 0, SLEEP_FUNC, NULL);
324 
325 		(void) scsi_setup_cdb((union scsi_cdb *)pkt->pkt_cdbp,
326 		    SCMD_TEST_UNIT_READY, 0, 0, 0);
327 		FILL_SCSI1_LUN(devp, pkt);
328 	}
329 
330 	pkt->pkt_private = (opaque_t)swr;
331 	pkt->pkt_time = scsi_watch_io_time;
332 	pkt->pkt_comp = scsi_watch_request_intr;
333 	if (scsi_ifgetcap(&pkt->pkt_address, "tagged-qing", 1) == 1) {
334 		pkt->pkt_flags |= FLAG_STAG;
335 	}
336 
337 	/*
338 	 * set the allocated resources in swr
339 	 */
340 	swr->swr_rqbp = bp;
341 	swr->swr_rqpkt = rqpkt;
342 	swr->swr_mmcbp = mmcbp;
343 	swr->swr_pkt = pkt;
344 	swr->swr_timeout = swr->swr_interval = drv_usectohz(interval);
345 	swr->swr_callback = callback;
346 	swr->swr_callback_arg = cb_arg;
347 	swr->swr_what = SWR_WATCH;
348 	swr->swr_sense_length = (uchar_t)sense_length;
349 	swr->swr_ref = 1;
350 	cv_init(&swr->swr_terminate_cv, NULL, CV_DRIVER, NULL);
351 
352 	/*
353 	 * add to the list and wake up the thread
354 	 */
355 	mutex_enter(&sw.sw_mutex);
356 	swr->swr_next = sw.sw_head;
357 	swr->swr_prev = NULL;
358 	if (sw.sw_head) {
359 		sw.sw_head->swr_prev = swr;
360 	}
361 	sw.sw_head = swr;
362 
363 	/*
364 	 * reset all timeouts, so all requests are in sync again
365 	 * XXX there is a small window where the watch thread releases
366 	 * the mutex so that could upset the resyncing
367 	 */
368 	sswr = swr;
369 	while (sswr) {
370 		sswr->swr_timeout = swr->swr_interval;
371 		sswr = sswr->swr_next;
372 	}
373 	cv_signal(&sw.sw_cv);
374 	mutex_exit(&sw.sw_mutex);
375 	return ((opaque_t)swr);
376 }
377 
378 
379 /*
380  * called by (eg. pwr management) to resume the scsi_watch_thread
381  */
382 void
383 scsi_watch_resume(opaque_t token)
384 {
385 	struct scsi_watch_request *swr = (struct scsi_watch_request *)NULL;
386 	/*
387 	 * Change the state to SW_RUNNING and wake up the scsi_watch_thread
388 	 */
389 	SW_DEBUG(0, sw_label, SCSI_DEBUG, "scsi_watch_resume:\n");
390 	mutex_enter(&sw.sw_mutex);
391 
392 	if (!sw.sw_head)
393 		goto exit;
394 
395 	/* search for token */
396 	for (swr = sw.sw_head; swr; swr = swr->swr_next) {
397 		if (swr == (struct scsi_watch_request *)token)
398 			break;
399 	}
400 
401 	/* if we can't find this value, then we just do nothing */
402 	if (swr == (struct scsi_watch_request *)NULL)
403 		goto exit;
404 
405 	swr->swr_what = SWR_WATCH;
406 
407 
408 	/* see if all swr's are awake, then start the thread again */
409 	for (swr = sw.sw_head; swr; swr = swr->swr_next) {
410 		if (swr->swr_what != SWR_WATCH)
411 			goto exit;
412 	}
413 
414 	sw.sw_state = SW_RUNNING;
415 	cv_signal(&sw.sw_cv);
416 
417 exit:
418 	mutex_exit(&sw.sw_mutex);
419 }
420 
421 
422 /*
423  * called by clients (eg. pwr management) to suspend the scsi_watch_thread
424  */
425 void
426 scsi_watch_suspend(opaque_t token)
427 {
428 	struct scsi_watch_request *swr = (struct scsi_watch_request *)NULL;
429 	clock_t	now;
430 	clock_t halfsec_delay = drv_usectohz(500000);
431 
432 	SW_DEBUG(0, sw_label, SCSI_DEBUG, "scsi_watch_suspend:\n");
433 
434 	mutex_enter(&sw.sw_mutex);
435 
436 	if (!sw.sw_head)
437 		goto exit;
438 
439 	/* search for token */
440 	for (swr = sw.sw_head; swr; swr = swr->swr_next) {
441 		if (swr == (struct scsi_watch_request *)token)
442 			break;
443 	}
444 
445 	/* if we can't find this value, then we just do nothing */
446 	if (swr == (struct scsi_watch_request *)NULL)
447 		goto exit;
448 
449 
450 	for (;;) {
451 		if (swr->swr_busy) {
452 			/*
453 			 * XXX: Assumes that this thread can rerun
454 			 * till all outstanding cmds are complete
455 			 */
456 			swr->swr_what = SWR_SUSPEND_REQUESTED;
457 			now = ddi_get_lbolt();
458 			(void) cv_timedwait(&sw.sw_cv, &sw.sw_mutex,
459 			    now + halfsec_delay);
460 		} else {
461 			swr->swr_what = SWR_SUSPENDED;
462 			break;
463 		}
464 	}
465 
466 	/* see if all swr's are suspended, then suspend the thread */
467 	for (swr = sw.sw_head; swr; swr = swr->swr_next) {
468 		if (swr->swr_what != SWR_SUSPENDED)
469 			goto exit;
470 	}
471 
472 	sw.sw_state = SW_SUSPENDED;
473 
474 exit:
475 	mutex_exit(&sw.sw_mutex);
476 }
477 
478 /*
479  * destroy swr, called for watch thread
480  */
481 static void
482 scsi_watch_request_destroy(struct scsi_watch_request *swr)
483 {
484 	ASSERT(MUTEX_HELD(&sw.sw_mutex));
485 	ASSERT(swr->swr_busy == 0);
486 
487 	SW_DEBUG((dev_info_t *)NULL, sw_label, SCSI_DEBUG,
488 	    "scsi_watch_request_destroy: Entering ...\n");
489 	if (swr->swr_ref != 0)
490 		return;
491 
492 	/*
493 	 * remove swr from linked list and destroy pkts
494 	 */
495 	if (swr->swr_prev) {
496 		swr->swr_prev->swr_next = swr->swr_next;
497 	}
498 	if (swr->swr_next) {
499 		swr->swr_next->swr_prev = swr->swr_prev;
500 	}
501 	if (sw.sw_head == swr) {
502 		sw.sw_head = swr->swr_next;
503 	}
504 	if (sw.swr_current == swr) {
505 		swr->suspend_destroy = SUSPEND_DESTROY;
506 		sw.swr_current = NULL;
507 	}
508 
509 	scsi_destroy_pkt(swr->swr_rqpkt);
510 	scsi_free_consistent_buf(swr->swr_rqbp);
511 	if (swr->swr_mmcbp != NULL) {
512 		scsi_free_consistent_buf(swr->swr_mmcbp);
513 	}
514 	scsi_destroy_pkt(swr->swr_pkt);
515 	cv_signal(&swr->swr_terminate_cv);
516 }
517 
518 /*
519  * scsi_watch_request_terminate()
520  * called by requestor to terminate any pending watch request.
521  * if the request is currently "busy", and the caller cannot wait, failure
522  * is returned. O/w the request is cleaned up immediately.
523  */
524 int
525 scsi_watch_request_terminate(opaque_t token, int flags)
526 {
527 	struct scsi_watch_request *swr =
528 	    (struct scsi_watch_request *)token;
529 	struct scsi_watch_request *sswr;
530 
531 	int count = 0;
532 	int free_flag = 0;
533 
534 	/*
535 	 * We try to clean up this request if we can. We also inform
536 	 * the watch thread that we mucked around the list so it has
537 	 * to start reading from head of list again.
538 	 */
539 	SW_DEBUG((dev_info_t *)NULL, sw_label, SCSI_DEBUG,
540 	    "scsi_watch_request_terminate: Entering(0x%p) ...\n",
541 	    (void *)swr);
542 	mutex_enter(&sw.sw_mutex);
543 
544 	/*
545 	 * check if it is still in the list
546 	 */
547 	sswr = sw.sw_head;
548 	while (sswr) {
549 		if (sswr == swr) {
550 			swr->swr_ref--;
551 			count = swr->swr_ref;
552 
553 			if (swr->swr_busy) {
554 				if (flags == SCSI_WATCH_TERMINATE_NOWAIT) {
555 					mutex_exit(&sw.sw_mutex);
556 					return (SCSI_WATCH_TERMINATE_FAIL);
557 				}
558 				if (count != 0 && flags !=
559 				    SCSI_WATCH_TERMINATE_ALL_WAIT) {
560 					mutex_exit(&sw.sw_mutex);
561 					return (SCSI_WATCH_TERMINATE_SUCCESS);
562 				}
563 				if (SCSI_WATCH_TERMINATE_ALL_WAIT == flags) {
564 					swr->swr_ref = 0;
565 					count = 0;
566 				}
567 				swr->swr_what = SWR_STOP;
568 				cv_wait(&swr->swr_terminate_cv, &sw.sw_mutex);
569 				free_flag = 1;
570 				goto done;
571 			} else {
572 				if (SCSI_WATCH_TERMINATE_NOWAIT == flags ||
573 				    SCSI_WATCH_TERMINATE_ALL_WAIT == flags) {
574 					swr->swr_ref = 0;
575 					count = 0;
576 				}
577 				scsi_watch_request_destroy(swr);
578 				if (0 == count) {
579 					sw.sw_flags |= SW_START_HEAD;
580 					free_flag = 1;
581 				}
582 				goto done;
583 			}
584 		}
585 		sswr = sswr->swr_next;
586 	}
587 done:
588 	mutex_exit(&sw.sw_mutex);
589 	if (!sswr) {
590 		return (SCSI_WATCH_TERMINATE_FAIL);
591 	}
592 	if (1 == free_flag &&
593 	    sswr->suspend_destroy != SUSPEND_DESTROY) {
594 		cv_destroy(&swr->swr_terminate_cv);
595 		kmem_free((caddr_t)swr, sizeof (struct scsi_watch_request));
596 	}
597 
598 	return (SCSI_WATCH_TERMINATE_SUCCESS);
599 }
600 
601 
602 /*
603  * The routines scsi_watch_thread & scsi_watch_request_intr are
604  * on different threads.
605  * If there is no work to be done by the lower level driver
606  * then swr->swr_busy will not be set.
607  * In this case we will call CALLB_CPR_SAFE_BEGIN before
608  * calling cv_timedwait.
609  * In the other case where there is work to be done by
610  * the lower level driver then the flag swr->swr_busy will
611  * be set.
612  * We cannot call CALLB_CPR_SAFE_BEGIN at this point the reason
613  * is the intr thread can interfere with our operations. So
614  * we do a cv_timedwait here. Now at the completion of the
615  * lower level driver's work we will call CALLB_CPR_SAFE_BEGIN
616  * in scsi_watch_request_intr.
617  * In all the cases we will call CALLB_CPR_SAFE_END only if
618  * we already called a CALLB_CPR_SAFE_BEGIN and this is flagged
619  * by sw_cpr_flag.
620  * Warlock has a problem when we use different locks
621  * on the same type of structure in different contexts.
622  * We use callb_cpr_t in both scsi_watch and esp_callback threads.
623  * we use different mutexe's in different threads. And
624  * this is not acceptable to warlock. To avoid this
625  * problem we use the same name for the mutex in
626  * both scsi_watch & esp_callback. when __lock_lint is not defined
627  * esp_callback uses the mutex on the stack and in scsi_watch
628  * a static variable. But when __lock_lint is defined
629  * we make a mutex which is global in esp_callback and
630  * a external mutex for scsi_watch.
631  */
632 static int sw_cmd_count = 0;
633 static int sw_cpr_flag = 0;
634 static callb_cpr_t cpr_info;
635 #ifndef __lock_lint
636 static kmutex_t cpr_mutex;
637 #else
638 extern kmutex_t cpr_mutex;
639 #endif
640 
641 #if !defined(lint)
642 _NOTE(MUTEX_PROTECTS_DATA(cpr_mutex, cpr_info))
643 _NOTE(MUTEX_PROTECTS_DATA(cpr_mutex, sw_cmd_count))
644 #endif
645 /*
646  * the scsi watch thread:
647  * it either wakes up if there is work to do or if the cv_timeait
648  * timed out
649  * normally, it wakes up every <delay> seconds and checks the list.
650  * the interval is not very accurate if the cv was signalled but that
651  * really doesn't matter much
652  * it is more important that we fire off all TURs simulataneously so
653  * we don't have to wake up frequently
654  */
655 static void
656 scsi_watch_thread()
657 {
658 	struct scsi_watch_request	*swr, *next;
659 	clock_t				now;
660 	clock_t				last_delay = 0;
661 	clock_t				next_delay = 0;
662 	clock_t				onesec = drv_usectohz(1000000);
663 	clock_t				exit_delay = 60 * onesec;
664 
665 	SW_DEBUG((dev_info_t *)NULL, sw_label, SCSI_DEBUG,
666 	    "scsi_watch_thread: Entering ...\n");
667 
668 #if !defined(lint)
669 	_NOTE(NO_COMPETING_THREADS_NOW);
670 #endif
671 	mutex_init(&cpr_mutex, NULL, MUTEX_DRIVER, NULL);
672 	CALLB_CPR_INIT(&cpr_info,
673 	    &cpr_mutex, callb_generic_cpr, "scsi_watch");
674 	sw_cpr_flag = 0;
675 #if !defined(lint)
676 	/*LINTED*/
677 	_NOTE(COMPETING_THREADS_NOW);
678 #endif
679 	/*
680 	 * grab the mutex and wait for work
681 	 */
682 	mutex_enter(&sw.sw_mutex);
683 	if (sw.sw_head == NULL) {
684 		cv_wait(&sw.sw_cv, &sw.sw_mutex);
685 	}
686 
687 	/*
688 	 * now loop forever for work; if queue is empty exit
689 	 */
690 	for (;;) {
691 head:
692 		swr = sw.sw_head;
693 		while (swr) {
694 
695 			/*
696 			 * If state is not running, wait for scsi_watch_resume
697 			 * to signal restart, but before going into cv_wait
698 			 * need to let the PM framework know that it is safe
699 			 * to stop this thread for CPR
700 			 */
701 			if (sw.sw_state != SW_RUNNING) {
702 				SW_DEBUG(0, sw_label, SCSI_DEBUG,
703 				    "scsi_watch_thread suspended\n");
704 				mutex_enter(&cpr_mutex);
705 				if (!sw_cmd_count) {
706 					CALLB_CPR_SAFE_BEGIN(&cpr_info);
707 					sw_cpr_flag = 1;
708 				}
709 				mutex_exit(&cpr_mutex);
710 				sw.swr_current = swr;
711 				cv_wait(&sw.sw_cv, &sw.sw_mutex);
712 
713 
714 				/*
715 				 * Need to let the PM framework know that it
716 				 * is no longer safe to stop the thread for
717 				 * CPR.
718 				 */
719 				mutex_exit(&sw.sw_mutex);
720 				mutex_enter(&cpr_mutex);
721 				if (sw_cpr_flag == 1) {
722 					CALLB_CPR_SAFE_END(
723 					    &cpr_info, &cpr_mutex);
724 					sw_cpr_flag = 0;
725 				}
726 				mutex_exit(&cpr_mutex);
727 				mutex_enter(&sw.sw_mutex);
728 				if (SUSPEND_DESTROY == swr->suspend_destroy) {
729 					cv_destroy(&swr->swr_terminate_cv);
730 					kmem_free((caddr_t)swr,
731 					    sizeof (struct scsi_watch_request));
732 					goto head;
733 				} else {
734 					sw.swr_current = NULL;
735 				}
736 			}
737 			if (next_delay == 0) {
738 				next_delay = swr->swr_timeout;
739 			} else {
740 				next_delay = min(swr->swr_timeout, next_delay);
741 			}
742 
743 			swr->swr_timeout -= last_delay;
744 			next = swr->swr_next;
745 
746 			SW_DEBUG((dev_info_t *)NULL, sw_label, SCSI_DEBUG,
747 			    "scsi_watch_thread: "
748 			    "swr(0x%p),what=%x,timeout=%lx,"
749 			    "interval=%lx,delay=%lx\n",
750 			    (void *)swr, swr->swr_what, swr->swr_timeout,
751 			    swr->swr_interval, last_delay);
752 
753 			switch (swr->swr_what) {
754 			case SWR_SUSPENDED:
755 			case SWR_SUSPEND_REQUESTED:
756 				/* if we are suspended, don't do anything */
757 				break;
758 
759 			case SWR_STOP:
760 				if (swr->swr_busy == 0) {
761 					scsi_watch_request_destroy(swr);
762 				}
763 				break;
764 
765 			default:
766 				if (swr->swr_timeout <= 0 && !swr->swr_busy) {
767 					swr->swr_busy = 1;
768 					swr->swr_timeout = swr->swr_interval;
769 
770 					/*
771 					 * submit the cmd and let the completion
772 					 * function handle the result
773 					 * release the mutex (good practice)
774 					 * this should be safe even if the list
775 					 * is changing
776 					 */
777 					mutex_exit(&sw.sw_mutex);
778 					mutex_enter(&cpr_mutex);
779 					sw_cmd_count++;
780 					mutex_exit(&cpr_mutex);
781 					SW_DEBUG((dev_info_t *)NULL,
782 					    sw_label, SCSI_DEBUG,
783 					    "scsi_watch_thread: "
784 					    "Starting TUR\n");
785 					if (scsi_transport(swr->swr_pkt) !=
786 					    TRAN_ACCEPT) {
787 
788 						/*
789 						 * try again later
790 						 */
791 						swr->swr_busy = 0;
792 						SW_DEBUG((dev_info_t *)NULL,
793 						    sw_label, SCSI_DEBUG,
794 						    "scsi_watch_thread: "
795 						    "Transport Failed\n");
796 						mutex_enter(&cpr_mutex);
797 						sw_cmd_count--;
798 						mutex_exit(&cpr_mutex);
799 					}
800 					mutex_enter(&sw.sw_mutex);
801 				}
802 				break;
803 			}
804 			swr = next;
805 			if (sw.sw_flags & SW_START_HEAD) {
806 				sw.sw_flags &= ~SW_START_HEAD;
807 				goto head;
808 			}
809 		}
810 
811 		/*
812 		 * delay using cv_timedwait; we return when
813 		 * signalled or timed out
814 		 */
815 		if (sw.sw_head != NULL) {
816 			if (next_delay <= 0) {
817 				next_delay = onesec;
818 			}
819 		} else {
820 			next_delay = exit_delay;
821 		}
822 		now = ddi_get_lbolt();
823 
824 		mutex_enter(&cpr_mutex);
825 		if (!sw_cmd_count) {
826 			CALLB_CPR_SAFE_BEGIN(&cpr_info);
827 			sw_cpr_flag = 1;
828 		}
829 		mutex_exit(&cpr_mutex);
830 		/*
831 		 * if we return from cv_timedwait because we were
832 		 * signalled, the delay is not accurate but that doesn't
833 		 * really matter
834 		 */
835 		(void) cv_timedwait(&sw.sw_cv, &sw.sw_mutex, now + next_delay);
836 		mutex_exit(&sw.sw_mutex);
837 		mutex_enter(&cpr_mutex);
838 		if (sw_cpr_flag == 1) {
839 			CALLB_CPR_SAFE_END(&cpr_info, &cpr_mutex);
840 			sw_cpr_flag = 0;
841 		}
842 		mutex_exit(&cpr_mutex);
843 		mutex_enter(&sw.sw_mutex);
844 		last_delay = next_delay;
845 		next_delay = 0;
846 
847 		/*
848 		 * is there still work to do?
849 		 */
850 		if (sw.sw_head == NULL) {
851 			break;
852 		}
853 	}
854 
855 	/*
856 	 * no more work to do, reset sw_thread and exit
857 	 */
858 	sw.sw_thread = 0;
859 	mutex_exit(&sw.sw_mutex);
860 #ifndef __lock_lint
861 	mutex_enter(&cpr_mutex);
862 	CALLB_CPR_EXIT(&cpr_info);
863 #endif
864 	mutex_destroy(&cpr_mutex);
865 	SW_DEBUG((dev_info_t *)NULL, sw_label, SCSI_DEBUG,
866 	    "scsi_watch_thread: Exiting ...\n");
867 }
868 
869 /*
870  * callback completion function for scsi watch pkt
871  */
872 #define	SCBP(pkt)	((struct scsi_status *)(pkt)->pkt_scbp)
873 #define	SCBP_C(pkt)	((*(pkt)->pkt_scbp) & STATUS_MASK)
874 
875 static void
876 scsi_watch_request_intr(struct scsi_pkt *pkt)
877 {
878 	struct scsi_watch_result	result;
879 	struct scsi_watch_request	*swr =
880 	    (struct scsi_watch_request *)pkt->pkt_private;
881 	struct scsi_status		*rqstatusp;
882 	struct scsi_extended_sense	*rqsensep = NULL;
883 	int				amt = 0;
884 
885 	SW_DEBUG((dev_info_t *)NULL, sw_label, SCSI_DEBUG,
886 	    "scsi_watch_intr: Entering ...\n");
887 
888 	/*
889 	 * first check if it is the TUR or RQS pkt
890 	 */
891 	if (pkt == swr->swr_pkt) {
892 		if (SCBP_C(pkt) != STATUS_GOOD &&
893 		    SCBP_C(pkt) != STATUS_RESERVATION_CONFLICT) {
894 			if (SCBP(pkt)->sts_chk &&
895 			    ((pkt->pkt_state & STATE_ARQ_DONE) == 0)) {
896 
897 				/*
898 				 * submit the request sense pkt
899 				 */
900 				SW_DEBUG((dev_info_t *)NULL,
901 				    sw_label, SCSI_DEBUG,
902 				    "scsi_watch_intr: "
903 				    "Submitting a Request Sense "
904 				    "Packet\n");
905 				if (scsi_transport(swr->swr_rqpkt) !=
906 				    TRAN_ACCEPT) {
907 
908 					/*
909 					 * just give up and try again later
910 					 */
911 					SW_DEBUG((dev_info_t *)NULL,
912 					    sw_label, SCSI_DEBUG,
913 					    "scsi_watch_intr: "
914 					    "Request Sense "
915 					    "Transport Failed\n");
916 					goto done;
917 				}
918 
919 				/*
920 				 * wait for rqsense to complete
921 				 */
922 				return;
923 
924 			} else	if (SCBP(pkt)->sts_chk) {
925 
926 				/*
927 				 * check the autorequest sense data
928 				 */
929 				struct scsi_arq_status	*arqstat =
930 				    (struct scsi_arq_status *)pkt->pkt_scbp;
931 
932 				rqstatusp = &arqstat->sts_rqpkt_status;
933 				rqsensep = &arqstat->sts_sensedata;
934 				amt = swr->swr_sense_length -
935 				    arqstat->sts_rqpkt_resid;
936 				SW_DEBUG((dev_info_t *)NULL,
937 				    sw_label, SCSI_DEBUG,
938 				    "scsi_watch_intr: "
939 				    "Auto Request Sense, amt=%x\n", amt);
940 			}
941 		}
942 
943 	} else if (pkt == swr->swr_rqpkt) {
944 
945 		/*
946 		 * check the request sense data
947 		 */
948 		rqstatusp = (struct scsi_status *)pkt->pkt_scbp;
949 		rqsensep = (struct scsi_extended_sense *)
950 		    swr->swr_rqbp->b_un.b_addr;
951 		amt = swr->swr_sense_length - pkt->pkt_resid;
952 		SW_DEBUG((dev_info_t *)NULL, sw_label, SCSI_DEBUG,
953 		    "scsi_watch_intr: "
954 		    "Request Sense Completed, amt=%x\n", amt);
955 	} else {
956 
957 		/*
958 		 * should not reach here!!!
959 		 */
960 		scsi_log((dev_info_t *)NULL, sw_label, CE_PANIC,
961 		    "scsi_watch_intr: Bad Packet(0x%p)", (void *)pkt);
962 	}
963 
964 	if (rqsensep) {
965 
966 		/*
967 		 * check rqsense status and data
968 		 */
969 		if (rqstatusp->sts_busy || rqstatusp->sts_chk) {
970 
971 			/*
972 			 * try again later
973 			 */
974 			SW_DEBUG((dev_info_t *)NULL, sw_label, SCSI_DEBUG,
975 			    "scsi_watch_intr: "
976 			    "Auto Request Sense Failed - "
977 			    "Busy or Check Condition\n");
978 			goto done;
979 		}
980 
981 		SW_DEBUG((dev_info_t *)NULL, sw_label, SCSI_DEBUG,
982 		    "scsi_watch_intr: "
983 		    "es_key=%x, adq=%x, amt=%x\n",
984 		    rqsensep->es_key, rqsensep->es_add_code, amt);
985 	}
986 
987 	/*
988 	 * callback to target driver to do the real work
989 	 */
990 	result.statusp = SCBP(swr->swr_pkt);
991 	result.sensep = rqsensep;
992 	result.actual_sense_length = (uchar_t)amt;
993 	result.pkt = swr->swr_pkt;
994 	if (swr->swr_mmcbp != NULL) {
995 		bcopy(swr->swr_mmcbp->b_un.b_addr, result.mmc_data, 8);
996 	}
997 
998 	if ((*swr->swr_callback)(swr->swr_callback_arg, &result)) {
999 		swr->swr_what = SWR_STOP;
1000 	}
1001 
1002 done:
1003 	swr->swr_busy = 0;
1004 	mutex_enter(&cpr_mutex);
1005 	sw_cmd_count --;
1006 	if (!sw_cmd_count) {
1007 		CALLB_CPR_SAFE_BEGIN(&cpr_info);
1008 		sw_cpr_flag = 1;
1009 	}
1010 	mutex_exit(&cpr_mutex);
1011 }
1012 
1013 /*
1014  * scsi_watch_get_ref_count
1015  * called by clients to query the reference count for a given token.
1016  * return the number of reference count or 0 if the given token is
1017  * not found.
1018  */
1019 int
1020 scsi_watch_get_ref_count(opaque_t token)
1021 {
1022 	struct scsi_watch_request *swr =
1023 	    (struct scsi_watch_request *)token;
1024 	struct scsi_watch_request *sswr;
1025 	int rval = 0;
1026 
1027 	SW_DEBUG((dev_info_t *)NULL, sw_label, SCSI_DEBUG,
1028 	    "scsi_watch_get_ref_count: Entering(0x%p) ...\n",
1029 	    (void *)swr);
1030 	mutex_enter(&sw.sw_mutex);
1031 
1032 	sswr = sw.sw_head;
1033 	while (sswr) {
1034 		if (sswr == swr) {
1035 			rval = swr->swr_ref;
1036 			mutex_exit(&sw.sw_mutex);
1037 			return (rval);
1038 		}
1039 		sswr = sswr->swr_next;
1040 	}
1041 
1042 	mutex_exit(&sw.sw_mutex);
1043 	return (rval);
1044 }
1045