xref: /illumos-gate/usr/src/uts/sun4u/serengeti/io/sbdp_quiesce.c (revision d5ebc4938a50bb2fb1914062e396761dc9161a51)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*
28  * Copyright 2023 Oxide Computer Company
29  */
30 
31 /*
32  * A CPR derivative specifically for sbd
33  */
34 
35 #include <sys/types.h>
36 #include <sys/systm.h>
37 #include <sys/machparam.h>
38 #include <sys/machsystm.h>
39 #include <sys/ddi.h>
40 #define	SUNDDI_IMPL
41 #include <sys/sunddi.h>
42 #include <sys/sunndi.h>
43 #include <sys/devctl.h>
44 #include <sys/time.h>
45 #include <sys/kmem.h>
46 #include <nfs/lm.h>
47 #include <sys/ddi_impldefs.h>
48 #include <sys/ndi_impldefs.h>
49 #include <sys/obpdefs.h>
50 #include <sys/cmn_err.h>
51 #include <sys/debug.h>
52 #include <sys/errno.h>
53 #include <sys/callb.h>
54 #include <sys/clock.h>
55 #include <sys/x_call.h>
56 #include <sys/cpuvar.h>
57 #include <sys/epm.h>
58 #include <sys/vfs.h>
59 
60 #ifdef DEBUG
61 #include <sys/note.h>
62 #endif
63 
64 #include <sys/promif.h>
65 #include <sys/conf.h>
66 #include <sys/cyclic.h>
67 
68 #include <sys/sbd_ioctl.h>
69 #include <sys/sbd.h>
70 #include <sys/sbdp_priv.h>
71 #include <sys/cpu_sgnblk_defs.h>
72 
73 static char *
sbdp_get_err_buf(sbd_error_t * ep)74 sbdp_get_err_buf(sbd_error_t *ep)
75 {
76 	return (ep->e_rsc);
77 }
78 
79 extern void	e_ddi_enter_driver_list(struct devnames *dnp, int *listcnt);
80 extern void	e_ddi_exit_driver_list(struct devnames *dnp, int listcnt);
81 extern int	is_pseudo_device(dev_info_t *dip);
82 
83 extern kmutex_t	cpu_lock;
84 
85 static int	sbdp_is_real_device(dev_info_t *dip);
86 #ifdef DEBUG
87 static int	sbdp_bypass_device(char *dname);
88 #endif
89 static int	sbdp_check_dip(dev_info_t *dip, void *arg, uint_t ref);
90 
91 static int	sbdp_resolve_devname(dev_info_t *dip, char *buffer,
92 				char *alias);
93 
94 int sbdp_test_suspend(sbdp_handle_t *hp);
95 
96 #define	SR_STATE(srh)			((srh)->sr_suspend_state)
97 #define	SR_SET_STATE(srh, state)	(SR_STATE((srh)) = (state))
98 #define	SR_FAILED_DIP(srh)		((srh)->sr_failed_dip)
99 
100 #define	SR_FLAG_WATCHDOG	0x1
101 #define	SR_CHECK_FLAG(srh, flag)	((srh)->sr_flags & (flag))
102 #define	SR_SET_FLAG(srh, flag)		((srh)->sr_flags |= (flag))
103 #define	SR_CLEAR_FLAG(srh, flag)	((srh)->sr_flags &= ~(flag))
104 
105 #ifdef DEBUG
106 /*
107  * Just for testing. List of drivers to bypass when performing a suspend.
108  */
109 static char *sbdp_bypass_list[] = {
110 	/* "sgsbbc", this is an example when needed */
111 	""
112 };
113 #endif
114 
115 #define		SKIP_SYNC	/* bypass sync ops in sbdp_suspend */
116 
117 /*
118  * sbdp_skip_user_threads is used to control if user threads should
119  * be suspended.  If sbdp_skip_user_threads is true, the rest of the
120  * flags are not used; if it is false, sbdp_check_user_stop_result
121  * will be used to control whether or not we need to check suspend
122  * result, and sbdp_allow_blocked_threads will be used to control
123  * whether or not we allow suspend to continue if there are blocked
124  * threads.  We allow all combinations of sbdp_check_user_stop_result
125  * and sbdp_allow_block_threads, even though it might not make much
126  * sense to not allow block threads when we don't even check stop
127  * result.
128  */
129 static int	sbdp_skip_user_threads = 0;		/* default to FALSE */
130 static int	sbdp_check_user_stop_result = 1;	/* default to TRUE */
131 static int	sbdp_allow_blocked_threads = 1;		/* default to TRUE */
132 
133 
134 static void
sbdp_stop_intr(void)135 sbdp_stop_intr(void)
136 {
137 	kpreempt_disable();
138 	cyclic_suspend();
139 }
140 
141 static void
sbdp_enable_intr(void)142 sbdp_enable_intr(void)
143 {
144 	cyclic_resume();
145 	kpreempt_enable();
146 }
147 
148 sbdp_sr_handle_t *
sbdp_get_sr_handle(void)149 sbdp_get_sr_handle(void)
150 {
151 	sbdp_sr_handle_t *srh;
152 	srh = kmem_zalloc(sizeof (sbdp_sr_handle_t), KM_SLEEP);
153 
154 	return (srh);
155 }
156 
157 void
sbdp_release_sr_handle(sbdp_sr_handle_t * srh)158 sbdp_release_sr_handle(sbdp_sr_handle_t *srh)
159 {
160 	ASSERT(SR_FAILED_DIP(srh) == NULL);
161 	kmem_free((caddr_t)srh, sizeof (sbdp_sr_handle_t));
162 }
163 
164 static int
sbdp_is_real_device(dev_info_t * dip)165 sbdp_is_real_device(dev_info_t *dip)
166 {
167 	struct regspec *regbuf = NULL;
168 	int length = 0;
169 	int rc;
170 
171 	if (ddi_get_driver(dip) == NULL)
172 		return (0);
173 
174 	if (DEVI(dip)->devi_pm_flags & (PMC_NEEDS_SR|PMC_PARENTAL_SR))
175 		return (1);
176 	if (DEVI(dip)->devi_pm_flags & PMC_NO_SR)
177 		return (0);
178 
179 	/*
180 	 * now the general case
181 	 */
182 	rc = ddi_getlongprop(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, "reg",
183 	    (caddr_t)&regbuf, &length);
184 	ASSERT(rc != DDI_PROP_NO_MEMORY);
185 	if (rc != DDI_PROP_SUCCESS) {
186 		return (0);
187 	} else {
188 		if ((length > 0) && (regbuf != NULL))
189 			kmem_free(regbuf, length);
190 		return (1);
191 	}
192 }
193 
194 #ifdef DEBUG
195 static int
sbdp_bypass_device(char * dname)196 sbdp_bypass_device(char *dname)
197 {
198 	int i;
199 	char **lname;
200 	/* check the bypass list */
201 	for (i = 0, lname = &sbdp_bypass_list[i]; **lname != '\0'; lname++) {
202 		SBDP_DBG_QR("Checking %s\n", *lname);
203 		if (strcmp(dname, sbdp_bypass_list[i++]) == 0)
204 			return (1);
205 	}
206 	return (0);
207 }
208 #endif
209 
210 static int
sbdp_resolve_devname(dev_info_t * dip,char * buffer,char * alias)211 sbdp_resolve_devname(dev_info_t *dip, char *buffer, char *alias)
212 {
213 	major_t	devmajor;
214 	char	*aka, *name;
215 
216 	*buffer = *alias = 0;
217 
218 	if (dip == NULL)
219 		return (-1);
220 
221 	if ((name = ddi_get_name(dip)) == NULL)
222 		name = "<null name>";
223 
224 	aka = name;
225 
226 	if ((devmajor = ddi_name_to_major(aka)) != -1)
227 		aka = ddi_major_to_name(devmajor);
228 
229 	(void) strcpy(buffer, name);
230 
231 	if (strcmp(name, aka))
232 		(void) strcpy(alias, aka);
233 	else
234 		*alias = 0;
235 
236 	return (0);
237 }
238 
239 typedef struct sbdp_ref {
240 	int *refcount;
241 	int *refcount_non_gldv3;
242 	sbd_error_t *sep;
243 } sbdp_ref_t;
244 
245 static int
sbdp_check_dip(dev_info_t * dip,void * arg,uint_t ref)246 sbdp_check_dip(dev_info_t *dip, void *arg, uint_t ref)
247 {
248 	char		*dname;
249 	sbdp_ref_t	*sbrp = (sbdp_ref_t *)arg;
250 
251 	if (dip == NULL)
252 		return (DDI_WALK_CONTINUE);
253 
254 	ASSERT(sbrp->sep != NULL);
255 	ASSERT(sbrp->refcount != NULL);
256 
257 	if (!sbdp_is_real_device(dip))
258 		return (DDI_WALK_CONTINUE);
259 
260 	dname = ddi_binding_name(dip);
261 
262 	if ((strcmp(dname, "pciclass,060940") == 0) || (strcmp(dname,
263 	    "pciclass,060980") == 0)) {
264 		(void) ddi_pathname(dip, sbdp_get_err_buf(sbrp->sep));
265 		sbdp_set_err(sbrp->sep, ESBD_BUSY, NULL);
266 		(*sbrp->refcount)++;
267 		return (DDI_WALK_TERMINATE);
268 	}
269 
270 #ifdef DEBUG
271 	if (sbdp_bypass_device(dname))
272 		return (DDI_WALK_CONTINUE);
273 #endif
274 
275 	if (ref) {
276 		major_t	major;
277 
278 		(*sbrp->refcount)++;
279 		SBDP_DBG_QR("\n%s (major# %d) is referenced\n",
280 		    dname, ddi_name_to_major(dname));
281 		(void) ddi_pathname(dip, sbdp_get_err_buf(sbrp->sep));
282 		major = ddi_driver_major(dip);
283 		if (sbrp->refcount_non_gldv3 && NETWORK_PHYSDRV(major) &&
284 		    !GLDV3_DRV(major)) {
285 			(*sbrp->refcount_non_gldv3)++;
286 			return (DDI_WALK_CONTINUE);
287 		}
288 		sbdp_set_err(sbrp->sep, ESBD_BUSY, NULL);
289 		return (DDI_WALK_TERMINATE);
290 	}
291 	return (DDI_WALK_CONTINUE);
292 }
293 
294 void
sbdp_check_devices(dev_info_t * dip,int * refcount,sbd_error_t * sep,int * refcount_non_gldv3)295 sbdp_check_devices(dev_info_t *dip, int *refcount, sbd_error_t *sep,
296     int *refcount_non_gldv3)
297 {
298 	sbdp_ref_t sbr;
299 
300 	sbr.refcount = refcount;
301 	sbr.refcount_non_gldv3 = refcount_non_gldv3;
302 	sbr.sep = sep;
303 
304 	ASSERT(e_ddi_branch_held(dip));
305 
306 	(void) e_ddi_branch_referenced(dip, sbdp_check_dip, &sbr);
307 }
308 
309 /*
310  * Starting from the root node suspend all devices in the device tree.
311  * Assumes that all devices have already been marked busy.
312  */
313 static int
sbdp_suspend_devices_(dev_info_t * dip,sbdp_sr_handle_t * srh)314 sbdp_suspend_devices_(dev_info_t *dip, sbdp_sr_handle_t *srh)
315 {
316 	major_t	major;
317 	char	*dname;
318 
319 	for (; dip != NULL; dip = ddi_get_next_sibling(dip)) {
320 		char	d_name[40], d_alias[40], *d_info;
321 
322 		if (sbdp_suspend_devices_(ddi_get_child(dip), srh)) {
323 			return (ENXIO);
324 		}
325 
326 		if (!sbdp_is_real_device(dip))
327 			continue;
328 
329 		major = (major_t)-1;
330 		if ((dname = DEVI(dip)->devi_binding_name) != NULL)
331 			major = ddi_name_to_major(dname);
332 
333 #ifdef DEBUG
334 		if (sbdp_bypass_device(dname)) {
335 			SBDP_DBG_QR("bypassed suspend of %s (major# %d)\n",
336 			    dname, major);
337 			continue;
338 		}
339 #endif
340 
341 		if ((d_info = ddi_get_name_addr(dip)) == NULL)
342 			d_info = "<null>";
343 
344 		d_name[0] = 0;
345 		if (sbdp_resolve_devname(dip, d_name, d_alias) == 0) {
346 			if (d_alias[0] != 0) {
347 				SBDP_DBG_QR("\tsuspending %s@%s (aka %s)\n",
348 				    d_name, d_info, d_alias);
349 			} else {
350 				SBDP_DBG_QR("\tsuspending %s@%s\n",
351 				    d_name, d_info);
352 			}
353 		} else {
354 			SBDP_DBG_QR("\tsuspending %s@%s\n", dname, d_info);
355 		}
356 
357 		if (devi_detach(dip, DDI_SUSPEND) != DDI_SUCCESS) {
358 			(void) sprintf(sbdp_get_err_buf(&srh->sep),
359 			    "%d", major);
360 
361 			sbdp_set_err(&srh->sep, ESGT_SUSPEND, NULL);
362 			ndi_hold_devi(dip);
363 			SR_FAILED_DIP(srh) = dip;
364 			return (DDI_FAILURE);
365 		}
366 	}
367 
368 	return (DDI_SUCCESS);
369 }
370 
371 /*ARGSUSED*/
372 static int
sbdp_suspend_devices_enter(dev_info_t * dip,void * arg)373 sbdp_suspend_devices_enter(dev_info_t *dip, void *arg)
374 {
375 	struct dev_info *devi = DEVI(dip);
376 	ndi_devi_enter(dip);
377 	return (DDI_WALK_CONTINUE);
378 }
379 
380 /*ARGSUSED*/
381 static int
sbdp_suspend_devices_exit(dev_info_t * dip,void * arg)382 sbdp_suspend_devices_exit(dev_info_t *dip, void *arg)
383 {
384 	struct dev_info *devi = DEVI(dip);
385 	ndi_devi_exit(dip);
386 	return (DDI_WALK_CONTINUE);
387 }
388 
389 /*
390  * Before suspending devices first mark all device nodes busy. This
391  * avoids a deadlock situation when another thread holds a device busy
392  * and accesses an already suspended device.
393  */
394 static int
sbdp_suspend_devices(dev_info_t * dip,sbdp_sr_handle_t * srh)395 sbdp_suspend_devices(dev_info_t *dip, sbdp_sr_handle_t *srh)
396 {
397 	int	rv;
398 
399 	/* assumes dip is ddi_root_node so no ndi_devi_enter required */
400 	ASSERT(dip == ddi_root_node());
401 	ddi_walk_devs(dip, sbdp_suspend_devices_enter, NULL);
402 	rv = sbdp_suspend_devices_(dip, srh);
403 	ddi_walk_devs(dip, sbdp_suspend_devices_exit, NULL);
404 	return (rv);
405 }
406 
407 static void
sbdp_resume_devices(dev_info_t * start,sbdp_sr_handle_t * srh)408 sbdp_resume_devices(dev_info_t *start, sbdp_sr_handle_t *srh)
409 {
410 	dev_info_t	*dip, *next, *last = NULL;
411 	char		*bn;
412 	sbd_error_t	*sep;
413 
414 	sep = &srh->sep;
415 
416 	/* attach in reverse device tree order */
417 	while (last != start) {
418 		dip = start;
419 		next = ddi_get_next_sibling(dip);
420 		while (next != last && dip != SR_FAILED_DIP(srh)) {
421 			dip = next;
422 			next = ddi_get_next_sibling(dip);
423 		}
424 		if (dip == SR_FAILED_DIP(srh)) {
425 			/* Release hold acquired in sbdp_suspend_devices() */
426 			ndi_rele_devi(dip);
427 			SR_FAILED_DIP(srh) = NULL;
428 		} else if (sbdp_is_real_device(dip) &&
429 		    SR_FAILED_DIP(srh) == NULL) {
430 
431 			if (DEVI(dip)->devi_binding_name != NULL) {
432 				bn = ddi_binding_name(dip);
433 			}
434 #ifdef DEBUG
435 			if (!sbdp_bypass_device(bn)) {
436 #else
437 			{
438 #endif
439 				char	d_name[40], d_alias[40], *d_info;
440 
441 				d_name[0] = 0;
442 				d_info = ddi_get_name_addr(dip);
443 				if (d_info == NULL)
444 					d_info = "<null>";
445 
446 				if (!sbdp_resolve_devname(dip, d_name,
447 				    d_alias)) {
448 					if (d_alias[0] != 0) {
449 						SBDP_DBG_QR("\tresuming "
450 						    "%s@%s (aka %s)\n",
451 						    d_name, d_info,
452 						    d_alias);
453 					} else {
454 						SBDP_DBG_QR("\tresuming "
455 						    "%s@%s\n",
456 						    d_name, d_info);
457 					}
458 				} else {
459 					SBDP_DBG_QR("\tresuming %s@%s\n",
460 					    bn, d_info);
461 				}
462 
463 				if (devi_attach(dip, DDI_RESUME) !=
464 				    DDI_SUCCESS) {
465 					/*
466 					 * Print a console warning,
467 					 * set an errno of ESGT_RESUME,
468 					 * and save the driver major
469 					 * number in the e_str.
470 					 */
471 
472 					(void) sprintf(sbdp_get_err_buf(sep),
473 					    "%s@%s",
474 					    d_name[0] ? d_name : bn, d_info);
475 					SBDP_DBG_QR("\tFAILED to resume "
476 					    "%s\n", sbdp_get_err_buf(sep));
477 					sbdp_set_err(sep,
478 					    ESGT_RESUME, NULL);
479 				}
480 			}
481 		}
482 		ndi_devi_enter(dip);
483 		sbdp_resume_devices(ddi_get_child(dip), srh);
484 		ndi_devi_exit(dip);
485 		last = dip;
486 	}
487 }
488 
489 /*
490  * True if thread is virtually stopped.  Similar to CPR_VSTOPPED
491  * but from DR point of view.  These user threads are waiting in
492  * the kernel.  Once they return from kernel, they will process
493  * the stop signal and stop.
494  */
495 #define	SBDP_VSTOPPED(t)			\
496 	((t)->t_state == TS_SLEEP &&		\
497 	(t)->t_wchan != NULL &&			\
498 	(t)->t_astflag &&		\
499 	((t)->t_proc_flag & TP_CHKPT))
500 
501 
502 static int
503 sbdp_stop_user_threads(sbdp_sr_handle_t *srh)
504 {
505 	int		count;
506 	char		cache_psargs[PSARGSZ];
507 	kthread_id_t	cache_tp;
508 	uint_t		cache_t_state;
509 	int		bailout;
510 	sbd_error_t	*sep;
511 	kthread_id_t	tp;
512 
513 	extern void add_one_utstop();
514 	extern void utstop_timedwait(clock_t);
515 	extern void utstop_init(void);
516 
517 #define	SBDP_UTSTOP_RETRY	4
518 #define	SBDP_UTSTOP_WAIT	hz
519 
520 	if (sbdp_skip_user_threads)
521 		return (DDI_SUCCESS);
522 
523 	sep = &srh->sep;
524 	ASSERT(sep);
525 
526 	utstop_init();
527 
528 	/* we need to try a few times to get past fork, etc. */
529 	for (count = 0; count < SBDP_UTSTOP_RETRY; count++) {
530 		/* walk the entire threadlist */
531 		mutex_enter(&pidlock);
532 		for (tp = curthread->t_next; tp != curthread; tp = tp->t_next) {
533 			proc_t *p = ttoproc(tp);
534 
535 			/* handle kernel threads separately */
536 			if (p->p_as == &kas || p->p_stat == SZOMB)
537 				continue;
538 
539 			mutex_enter(&p->p_lock);
540 			thread_lock(tp);
541 
542 			if (tp->t_state == TS_STOPPED) {
543 				/* add another reason to stop this thread */
544 				tp->t_schedflag &= ~TS_RESUME;
545 			} else {
546 				tp->t_proc_flag |= TP_CHKPT;
547 
548 				thread_unlock(tp);
549 				mutex_exit(&p->p_lock);
550 				add_one_utstop();
551 				mutex_enter(&p->p_lock);
552 				thread_lock(tp);
553 
554 				aston(tp);
555 
556 				if (ISWAKEABLE(tp) || ISWAITING(tp)) {
557 					setrun_locked(tp);
558 				}
559 			}
560 
561 			/* grab thread if needed */
562 			if (tp->t_state == TS_ONPROC && tp->t_cpu != CPU)
563 				poke_cpu(tp->t_cpu->cpu_id);
564 
565 
566 			thread_unlock(tp);
567 			mutex_exit(&p->p_lock);
568 		}
569 		mutex_exit(&pidlock);
570 
571 
572 		/* let everything catch up */
573 		utstop_timedwait(count * count * SBDP_UTSTOP_WAIT);
574 
575 
576 		/* now, walk the threadlist again to see if we are done */
577 		mutex_enter(&pidlock);
578 		for (tp = curthread->t_next, bailout = 0;
579 		    tp != curthread; tp = tp->t_next) {
580 			proc_t *p = ttoproc(tp);
581 
582 			/* handle kernel threads separately */
583 			if (p->p_as == &kas || p->p_stat == SZOMB)
584 				continue;
585 
586 			/*
587 			 * If this thread didn't stop, and we don't allow
588 			 * unstopped blocked threads, bail.
589 			 */
590 			thread_lock(tp);
591 			if (!CPR_ISTOPPED(tp) &&
592 			    !(sbdp_allow_blocked_threads &&
593 			    SBDP_VSTOPPED(tp))) {
594 
595 				/* nope, cache the details for later */
596 				bcopy(p->p_user.u_psargs, cache_psargs,
597 				    sizeof (cache_psargs));
598 				cache_tp = tp;
599 				cache_t_state = tp->t_state;
600 				bailout = 1;
601 			}
602 			thread_unlock(tp);
603 		}
604 		mutex_exit(&pidlock);
605 
606 		/* were all the threads stopped? */
607 		if (!bailout)
608 			break;
609 	}
610 
611 	/* were we unable to stop all threads after a few tries? */
612 	if (bailout) {
613 		cmn_err(CE_NOTE, "process: %s id: %p state: %x\n",
614 		    cache_psargs, (void *)cache_tp, cache_t_state);
615 
616 		(void) sprintf(sbdp_get_err_buf(sep), "%s", cache_psargs);
617 		sbdp_set_err(sep, ESGT_UTHREAD, NULL);
618 		return (ESRCH);
619 	}
620 
621 	return (DDI_SUCCESS);
622 }
623 
624 static void
625 sbdp_start_user_threads(void)
626 {
627 	kthread_id_t tp;
628 
629 	mutex_enter(&pidlock);
630 
631 	/* walk all threads and release them */
632 	for (tp = curthread->t_next; tp != curthread; tp = tp->t_next) {
633 		proc_t *p = ttoproc(tp);
634 
635 		/* skip kernel threads */
636 		if (ttoproc(tp)->p_as == &kas)
637 			continue;
638 
639 		mutex_enter(&p->p_lock);
640 		tp->t_proc_flag &= ~TP_CHKPT;
641 		mutex_exit(&p->p_lock);
642 
643 		thread_lock(tp);
644 		if (CPR_ISTOPPED(tp)) {
645 			/* back on the runq */
646 			tp->t_schedflag |= TS_RESUME;
647 			setrun_locked(tp);
648 		}
649 		thread_unlock(tp);
650 	}
651 
652 	mutex_exit(&pidlock);
653 }
654 
655 static void
656 sbdp_signal_user(int sig)
657 {
658 	struct proc *p;
659 
660 	mutex_enter(&pidlock);
661 
662 	for (p = practive; p != NULL; p = p->p_next) {
663 		/* only user threads */
664 		if (p->p_exec == NULL || p->p_stat == SZOMB ||
665 		    p == proc_init || p == ttoproc(curthread))
666 			continue;
667 
668 		mutex_enter(&p->p_lock);
669 		sigtoproc(p, NULL, sig);
670 		mutex_exit(&p->p_lock);
671 	}
672 
673 	mutex_exit(&pidlock);
674 
675 	/* add a bit of delay */
676 	delay(hz);
677 }
678 
679 static uint_t saved_watchdog_seconds;
680 
681 void
682 sbdp_resume(sbdp_sr_handle_t *srh)
683 {
684 	/*
685 	 * update the signature block
686 	 */
687 	CPU_SIGNATURE(OS_SIG, SIGST_RESUME_INPROGRESS, SIGSUBST_NULL,
688 	    CPU->cpu_id);
689 
690 	switch (SR_STATE(srh)) {
691 	case SBDP_SRSTATE_FULL:
692 
693 		ASSERT(MUTEX_HELD(&cpu_lock));
694 
695 		/*
696 		 * Prevent false alarm in tod_validate() due to tod
697 		 * value change between suspend and resume
698 		 */
699 		mutex_enter(&tod_lock);
700 		tod_status_set(TOD_DR_RESUME_DONE);
701 		mutex_exit(&tod_lock);
702 
703 		sbdp_enable_intr();	/* enable intr & clock */
704 
705 		/*
706 		 * release all the other cpus
707 		 * using start_cpus() vice sbdp_release_cpus()
708 		 */
709 		start_cpus();
710 		mutex_exit(&cpu_lock);
711 
712 		/*
713 		 * If we suspended hw watchdog at suspend,
714 		 * re-enable it now.
715 		 */
716 		if (SR_CHECK_FLAG(srh, SR_FLAG_WATCHDOG)) {
717 			mutex_enter(&tod_lock);
718 			tod_ops.tod_set_watchdog_timer(
719 			    saved_watchdog_seconds);
720 			mutex_exit(&tod_lock);
721 		}
722 
723 		/* FALLTHROUGH */
724 
725 	case SBDP_SRSTATE_DRIVER:
726 		/*
727 		 * resume devices: root node doesn't have to
728 		 * be held in any way.
729 		 */
730 		sbdp_resume_devices(ddi_root_node(), srh);
731 
732 		/*
733 		 * resume the lock manager
734 		 */
735 		lm_cprresume();
736 
737 		/* FALLTHROUGH */
738 
739 	case SBDP_SRSTATE_USER:
740 		/*
741 		 * finally, resume user threads
742 		 */
743 		if (!sbdp_skip_user_threads) {
744 			SBDP_DBG_QR("DR: resuming user threads...\n");
745 			sbdp_start_user_threads();
746 		}
747 		/* FALLTHROUGH */
748 
749 	case SBDP_SRSTATE_BEGIN:
750 	default:
751 		/*
752 		 * let those who care know that we've just resumed
753 		 */
754 		SBDP_DBG_QR("sending SIGTHAW...\n");
755 		sbdp_signal_user(SIGTHAW);
756 		break;
757 	}
758 
759 	/*
760 	 * update the signature block
761 	 */
762 	CPU_SIGNATURE(OS_SIG, SIGST_RUN, SIGSUBST_NULL, CPU->cpu_id);
763 
764 	SBDP_DBG_QR("DR: resume COMPLETED\n");
765 }
766 
767 int
768 sbdp_suspend(sbdp_sr_handle_t *srh)
769 {
770 	int force;
771 	int rc = DDI_SUCCESS;
772 
773 	force = (srh && (srh->sr_flags & SBDP_IOCTL_FLAG_FORCE));
774 
775 	/*
776 	 * if no force flag, check for unsafe drivers
777 	 */
778 	if (force) {
779 		SBDP_DBG_QR("\nsbdp_suspend invoked with force flag");
780 	}
781 
782 	/*
783 	 * update the signature block
784 	 */
785 	CPU_SIGNATURE(OS_SIG, SIGST_QUIESCE_INPROGRESS, SIGSUBST_NULL,
786 	    CPU->cpu_id);
787 
788 	/*
789 	 * first, stop all user threads
790 	 */
791 	SBDP_DBG_QR("SBDP: suspending user threads...\n");
792 	SR_SET_STATE(srh, SBDP_SRSTATE_USER);
793 	if (((rc = sbdp_stop_user_threads(srh)) != DDI_SUCCESS) &&
794 	    sbdp_check_user_stop_result) {
795 		sbdp_resume(srh);
796 		return (rc);
797 	}
798 
799 #ifndef	SKIP_SYNC
800 	/*
801 	 * This sync swap out all user pages
802 	 */
803 	vfs_sync(SYNC_ALL);
804 #endif
805 
806 	/*
807 	 * special treatment for lock manager
808 	 */
809 	lm_cprsuspend();
810 
811 #ifndef	SKIP_SYNC
812 	/*
813 	 * sync the file system in case we never make it back
814 	 */
815 	sync();
816 
817 #endif
818 	/*
819 	 * now suspend drivers
820 	 */
821 	SBDP_DBG_QR("SBDP: suspending drivers...\n");
822 	SR_SET_STATE(srh, SBDP_SRSTATE_DRIVER);
823 
824 	/*
825 	 * Root node doesn't have to be held in any way.
826 	 */
827 	if ((rc = sbdp_suspend_devices(ddi_root_node(), srh)) != DDI_SUCCESS) {
828 		sbdp_resume(srh);
829 		return (rc);
830 	}
831 
832 	/*
833 	 * finally, grab all cpus
834 	 */
835 	SR_SET_STATE(srh, SBDP_SRSTATE_FULL);
836 
837 	/*
838 	 * if watchdog was activated, disable it
839 	 */
840 	if (watchdog_activated) {
841 		mutex_enter(&tod_lock);
842 		saved_watchdog_seconds = tod_ops.tod_clear_watchdog_timer();
843 		mutex_exit(&tod_lock);
844 		SR_SET_FLAG(srh, SR_FLAG_WATCHDOG);
845 	} else {
846 		SR_CLEAR_FLAG(srh, SR_FLAG_WATCHDOG);
847 	}
848 
849 	mutex_enter(&cpu_lock);
850 	pause_cpus(NULL, NULL);
851 	sbdp_stop_intr();
852 
853 	/*
854 	 * update the signature block
855 	 */
856 	CPU_SIGNATURE(OS_SIG, SIGST_QUIESCED, SIGSUBST_NULL, CPU->cpu_id);
857 
858 	return (rc);
859 }
860 
861 /*ARGSUSED*/
862 int
863 sbdp_test_suspend(sbdp_handle_t *hp)
864 {
865 	sbdp_sr_handle_t	*srh;
866 	int			err;
867 
868 	SBDP_DBG_QR("%s...\n", "sbdp_test_suspend");
869 
870 	srh = sbdp_get_sr_handle();
871 
872 	srh->sr_flags = hp->h_flags;
873 
874 	if ((err = sbdp_suspend(srh)) == DDI_SUCCESS) {
875 		sbdp_resume(srh);
876 	} else {
877 		SBDP_DBG_MISC("sbdp_suspend() failed, err = 0x%x\n", err);
878 	}
879 	sbdp_release_sr_handle(srh);
880 
881 	return (0);
882 }
883 
884 #ifdef	DEBUG
885 int
886 sbdp_passthru_test_quiesce(sbdp_handle_t *hp, void *arg)
887 {
888 	_NOTE(ARGUNUSED(arg))
889 
890 	return (sbdp_test_suspend(hp));
891 }
892 #endif
893