xref: /illumos-gate/usr/src/uts/i86pc/io/dr/dr_quiesce.c (revision a2cdcdd260232b58202b11a9bfc0103c9449ed52)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
24  */
25 
26 /*
27  * A CPR derivative specifically for starfire/starcat
28  * X86 doesn't make use of the quiesce interfaces, it's kept for simplicity.
29  */
30 
31 #include <sys/types.h>
32 #include <sys/systm.h>
33 #include <sys/machparam.h>
34 #include <sys/machsystm.h>
35 #include <sys/ddi.h>
36 #define	SUNDDI_IMPL
37 #include <sys/sunddi.h>
38 #include <sys/sunndi.h>
39 #include <sys/devctl.h>
40 #include <sys/time.h>
41 #include <sys/kmem.h>
42 #include <nfs/lm.h>
43 #include <sys/ddi_impldefs.h>
44 #include <sys/ndi_impldefs.h>
45 #include <sys/obpdefs.h>
46 #include <sys/cmn_err.h>
47 #include <sys/debug.h>
48 #include <sys/errno.h>
49 #include <sys/callb.h>
50 #include <sys/clock.h>
51 #include <sys/x_call.h>
52 #include <sys/cpuvar.h>
53 #include <sys/epm.h>
54 #include <sys/vfs.h>
55 #include <sys/promif.h>
56 #include <sys/conf.h>
57 #include <sys/cyclic.h>
58 
59 #include <sys/dr.h>
60 #include <sys/dr_util.h>
61 
62 extern void	e_ddi_enter_driver_list(struct devnames *dnp, int *listcnt);
63 extern void	e_ddi_exit_driver_list(struct devnames *dnp, int listcnt);
64 extern int	is_pseudo_device(dev_info_t *dip);
65 
66 extern kmutex_t	cpu_lock;
67 extern dr_unsafe_devs_t dr_unsafe_devs;
68 
69 static int		dr_is_real_device(dev_info_t *dip);
70 static int		dr_is_unsafe_major(major_t major);
71 static int		dr_bypass_device(char *dname);
72 static int		dr_check_dip(dev_info_t *dip, void *arg, uint_t ref);
73 static int		dr_resolve_devname(dev_info_t *dip, char *buffer,
74 				char *alias);
75 static sbd_error_t	*drerr_int(int e_code, uint64_t *arr, int idx,
76 				int majors);
77 static int		dr_add_int(uint64_t *arr, int idx, int len,
78 				uint64_t val);
79 
80 int dr_pt_test_suspend(dr_handle_t *hp);
81 
82 /*
83  * dr_quiesce.c interface
84  * NOTE: states used internally by dr_suspend and dr_resume
85  */
86 typedef enum dr_suspend_state {
87 	DR_SRSTATE_BEGIN = 0,
88 	DR_SRSTATE_USER,
89 	DR_SRSTATE_DRIVER,
90 	DR_SRSTATE_FULL
91 } suspend_state_t;
92 
93 struct dr_sr_handle {
94 	dr_handle_t		*sr_dr_handlep;
95 	dev_info_t		*sr_failed_dip;
96 	suspend_state_t		sr_suspend_state;
97 	uint_t			sr_flags;
98 	uint64_t		sr_err_ints[DR_MAX_ERR_INT];
99 	int			sr_err_idx;
100 };
101 
102 #define	SR_FLAG_WATCHDOG	0x1
103 
104 /*
105  * XXX
106  * This hack will go away before RTI.  Just for testing.
107  * List of drivers to bypass when performing a suspend.
108  */
109 static char *dr_bypass_list[] = {
110 	""
111 };
112 
113 
114 #define		SKIP_SYNC	/* bypass sync ops in dr_suspend */
115 
116 /*
117  * dr_skip_user_threads is used to control if user threads should
118  * be suspended.  If dr_skip_user_threads is true, the rest of the
119  * flags are not used; if it is false, dr_check_user_stop_result
120  * will be used to control whether or not we need to check suspend
121  * result, and dr_allow_blocked_threads will be used to control
122  * whether or not we allow suspend to continue if there are blocked
123  * threads.  We allow all combinations of dr_check_user_stop_result
124  * and dr_allow_block_threads, even though it might not make much
125  * sense to not allow block threads when we don't even check stop
126  * result.
127  */
128 static int	dr_skip_user_threads = 0;	/* default to FALSE */
129 static int	dr_check_user_stop_result = 1;	/* default to TRUE */
130 static int	dr_allow_blocked_threads = 1;	/* default to TRUE */
131 
132 #define	DR_CPU_LOOP_MSEC	1000
133 
134 static void
135 dr_stop_intr(void)
136 {
137 	ASSERT(MUTEX_HELD(&cpu_lock));
138 
139 	kpreempt_disable();
140 	cyclic_suspend();
141 }
142 
143 static void
144 dr_enable_intr(void)
145 {
146 	ASSERT(MUTEX_HELD(&cpu_lock));
147 
148 	cyclic_resume();
149 	kpreempt_enable();
150 }
151 
152 dr_sr_handle_t *
153 dr_get_sr_handle(dr_handle_t *hp)
154 {
155 	dr_sr_handle_t *srh;
156 
157 	srh = GETSTRUCT(dr_sr_handle_t, 1);
158 	srh->sr_dr_handlep = hp;
159 
160 	return (srh);
161 }
162 
163 void
164 dr_release_sr_handle(dr_sr_handle_t *srh)
165 {
166 	ASSERT(srh->sr_failed_dip == NULL);
167 	FREESTRUCT(srh, dr_sr_handle_t, 1);
168 }
169 
170 static int
171 dr_is_real_device(dev_info_t *dip)
172 {
173 	struct regspec *regbuf = NULL;
174 	int length = 0;
175 	int rc;
176 
177 	if (ddi_get_driver(dip) == NULL)
178 		return (0);
179 
180 	if (DEVI(dip)->devi_pm_flags & (PMC_NEEDS_SR|PMC_PARENTAL_SR))
181 		return (1);
182 	if (DEVI(dip)->devi_pm_flags & PMC_NO_SR)
183 		return (0);
184 
185 	/*
186 	 * now the general case
187 	 */
188 	rc = ddi_getlongprop(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, "reg",
189 	    (caddr_t)&regbuf, &length);
190 	ASSERT(rc != DDI_PROP_NO_MEMORY);
191 	if (rc != DDI_PROP_SUCCESS) {
192 		return (0);
193 	} else {
194 		if ((length > 0) && (regbuf != NULL))
195 			kmem_free(regbuf, length);
196 		return (1);
197 	}
198 }
199 
200 static int
201 dr_is_unsafe_major(major_t major)
202 {
203 	char    *dname, **cpp;
204 	int	i, ndevs;
205 
206 	if ((dname = ddi_major_to_name(major)) == NULL) {
207 		PR_QR("dr_is_unsafe_major: invalid major # %d\n", major);
208 		return (0);
209 	}
210 
211 	ndevs = dr_unsafe_devs.ndevs;
212 	for (i = 0, cpp = dr_unsafe_devs.devnames; i < ndevs; i++) {
213 		if (strcmp(dname, *cpp++) == 0)
214 			return (1);
215 	}
216 	return (0);
217 }
218 
219 static int
220 dr_bypass_device(char *dname)
221 {
222 	int i;
223 	char **lname;
224 
225 	if (dname == NULL)
226 		return (0);
227 
228 	/* check the bypass list */
229 	for (i = 0, lname = &dr_bypass_list[i]; **lname != '\0'; lname++) {
230 		if (strcmp(dname, dr_bypass_list[i++]) == 0)
231 			return (1);
232 	}
233 	return (0);
234 }
235 
236 static int
237 dr_resolve_devname(dev_info_t *dip, char *buffer, char *alias)
238 {
239 	major_t	devmajor;
240 	char	*aka, *name;
241 
242 	*buffer = *alias = 0;
243 
244 	if (dip == NULL)
245 		return (-1);
246 
247 	if ((name = ddi_get_name(dip)) == NULL)
248 		name = "<null name>";
249 
250 	aka = name;
251 
252 	if ((devmajor = ddi_name_to_major(aka)) != DDI_MAJOR_T_NONE)
253 		aka = ddi_major_to_name(devmajor);
254 
255 	(void) strcpy(buffer, name);
256 
257 	if (strcmp(name, aka))
258 		(void) strcpy(alias, aka);
259 	else
260 		*alias = 0;
261 
262 	return (0);
263 }
264 
265 struct dr_ref {
266 	int		*refcount;
267 	int		*refcount_non_gldv3;
268 	uint64_t	*arr;
269 	int		*idx;
270 	int		len;
271 };
272 
273 /* ARGSUSED */
274 static int
275 dr_check_dip(dev_info_t *dip, void *arg, uint_t ref)
276 {
277 	major_t		major;
278 	char		*dname;
279 	struct dr_ref	*rp = (struct dr_ref *)arg;
280 
281 	if (dip == NULL)
282 		return (DDI_WALK_CONTINUE);
283 
284 	if (!dr_is_real_device(dip))
285 		return (DDI_WALK_CONTINUE);
286 
287 	dname = ddi_binding_name(dip);
288 
289 	if (dr_bypass_device(dname))
290 		return (DDI_WALK_CONTINUE);
291 
292 	if (dname && ((major = ddi_name_to_major(dname)) != (major_t)-1)) {
293 		if (ref && rp->refcount) {
294 			*rp->refcount += ref;
295 			PR_QR("\n  %s (major# %d) is referenced(%u)\n", dname,
296 			    major, ref);
297 		}
298 		if (ref && rp->refcount_non_gldv3) {
299 			if (NETWORK_PHYSDRV(major) && !GLDV3_DRV(major))
300 				*rp->refcount_non_gldv3 += ref;
301 		}
302 		if (dr_is_unsafe_major(major) && i_ddi_devi_attached(dip)) {
303 			PR_QR("\n  %s (major# %d) not hotpluggable\n", dname,
304 			    major);
305 			if (rp->arr != NULL && rp->idx != NULL)
306 				*rp->idx = dr_add_int(rp->arr, *rp->idx,
307 				    rp->len, (uint64_t)major);
308 		}
309 	}
310 	return (DDI_WALK_CONTINUE);
311 }
312 
313 static int
314 dr_check_unsafe_major(dev_info_t *dip, void *arg)
315 {
316 	return (dr_check_dip(dip, arg, 0));
317 }
318 
319 
320 /*ARGSUSED*/
321 void
322 dr_check_devices(dev_info_t *dip, int *refcount, dr_handle_t *handle,
323     uint64_t *arr, int *idx, int len, int *refcount_non_gldv3)
324 {
325 	struct dr_ref bref = {0};
326 
327 	if (dip == NULL)
328 		return;
329 
330 	bref.refcount = refcount;
331 	bref.refcount_non_gldv3 = refcount_non_gldv3;
332 	bref.arr = arr;
333 	bref.idx = idx;
334 	bref.len = len;
335 
336 	ASSERT(e_ddi_branch_held(dip));
337 	(void) e_ddi_branch_referenced(dip, dr_check_dip, &bref);
338 }
339 
340 /*
341  * The "dip" argument's parent (if it exists) must be held busy.
342  */
343 static int
344 dr_suspend_devices(dev_info_t *dip, dr_sr_handle_t *srh)
345 {
346 	dr_handle_t	*handle;
347 	major_t		major;
348 	char		*dname;
349 	int		circ;
350 
351 	/*
352 	 * If dip is the root node, it has no siblings and it is
353 	 * always held. If dip is not the root node, dr_suspend_devices()
354 	 * will be invoked with the parent held busy.
355 	 */
356 	for (; dip != NULL; dip = ddi_get_next_sibling(dip)) {
357 		char	d_name[40], d_alias[40], *d_info;
358 
359 		ndi_devi_enter(dip, &circ);
360 		if (dr_suspend_devices(ddi_get_child(dip), srh)) {
361 			ndi_devi_exit(dip, circ);
362 			return (ENXIO);
363 		}
364 		ndi_devi_exit(dip, circ);
365 
366 		if (!dr_is_real_device(dip))
367 			continue;
368 
369 		major = (major_t)-1;
370 		if ((dname = ddi_binding_name(dip)) != NULL)
371 			major = ddi_name_to_major(dname);
372 
373 		if (dr_bypass_device(dname)) {
374 			PR_QR(" bypassed suspend of %s (major# %d)\n", dname,
375 			    major);
376 			continue;
377 		}
378 
379 		if (drmach_verify_sr(dip, 1)) {
380 			PR_QR(" bypassed suspend of %s (major# %d)\n", dname,
381 			    major);
382 			continue;
383 		}
384 
385 		if ((d_info = ddi_get_name_addr(dip)) == NULL)
386 			d_info = "<null>";
387 
388 		d_name[0] = 0;
389 		if (dr_resolve_devname(dip, d_name, d_alias) == 0) {
390 			if (d_alias[0] != 0) {
391 				prom_printf("\tsuspending %s@%s (aka %s)\n",
392 				    d_name, d_info, d_alias);
393 			} else {
394 				prom_printf("\tsuspending %s@%s\n", d_name,
395 				    d_info);
396 			}
397 		} else {
398 			prom_printf("\tsuspending %s@%s\n", dname, d_info);
399 		}
400 
401 		if (devi_detach(dip, DDI_SUSPEND) != DDI_SUCCESS) {
402 			prom_printf("\tFAILED to suspend %s@%s\n",
403 			    d_name[0] ? d_name : dname, d_info);
404 
405 			srh->sr_err_idx = dr_add_int(srh->sr_err_ints,
406 			    srh->sr_err_idx, DR_MAX_ERR_INT, (uint64_t)major);
407 
408 			ndi_hold_devi(dip);
409 			srh->sr_failed_dip = dip;
410 
411 			handle = srh->sr_dr_handlep;
412 			dr_op_err(CE_IGNORE, handle, ESBD_SUSPEND, "%s@%s",
413 			    d_name[0] ? d_name : dname, d_info);
414 
415 			return (DDI_FAILURE);
416 		}
417 	}
418 
419 	return (DDI_SUCCESS);
420 }
421 
422 static void
423 dr_resume_devices(dev_info_t *start, dr_sr_handle_t *srh)
424 {
425 	dr_handle_t	*handle;
426 	dev_info_t	*dip, *next, *last = NULL;
427 	major_t		major;
428 	char		*bn;
429 	int		circ;
430 
431 	major = (major_t)-1;
432 
433 	/* attach in reverse device tree order */
434 	while (last != start) {
435 		dip = start;
436 		next = ddi_get_next_sibling(dip);
437 		while (next != last && dip != srh->sr_failed_dip) {
438 			dip = next;
439 			next = ddi_get_next_sibling(dip);
440 		}
441 		if (dip == srh->sr_failed_dip) {
442 			/* release hold acquired in dr_suspend_devices() */
443 			srh->sr_failed_dip = NULL;
444 			ndi_rele_devi(dip);
445 		} else if (dr_is_real_device(dip) &&
446 		    srh->sr_failed_dip == NULL) {
447 
448 			if ((bn = ddi_binding_name(dip)) != NULL) {
449 				major = ddi_name_to_major(bn);
450 			} else {
451 				bn = "<null>";
452 			}
453 			if (!dr_bypass_device(bn) &&
454 			    !drmach_verify_sr(dip, 0)) {
455 				char	d_name[40], d_alias[40], *d_info;
456 
457 				d_name[0] = 0;
458 				d_info = ddi_get_name_addr(dip);
459 				if (d_info == NULL)
460 					d_info = "<null>";
461 
462 				if (!dr_resolve_devname(dip, d_name, d_alias)) {
463 					if (d_alias[0] != 0) {
464 						prom_printf("\tresuming "
465 						    "%s@%s (aka %s)\n", d_name,
466 						    d_info, d_alias);
467 					} else {
468 						prom_printf("\tresuming "
469 						    "%s@%s\n", d_name, d_info);
470 					}
471 				} else {
472 					prom_printf("\tresuming %s@%s\n", bn,
473 					    d_info);
474 				}
475 
476 				if (devi_attach(dip, DDI_RESUME) !=
477 				    DDI_SUCCESS) {
478 					/*
479 					 * Print a console warning,
480 					 * set an e_code of ESBD_RESUME,
481 					 * and save the driver major
482 					 * number in the e_rsc.
483 					 */
484 					prom_printf("\tFAILED to resume %s@%s",
485 					    d_name[0] ? d_name : bn, d_info);
486 
487 					srh->sr_err_idx =
488 					    dr_add_int(srh->sr_err_ints,
489 					    srh->sr_err_idx, DR_MAX_ERR_INT,
490 					    (uint64_t)major);
491 
492 					handle = srh->sr_dr_handlep;
493 
494 					dr_op_err(CE_IGNORE, handle,
495 					    ESBD_RESUME, "%s@%s",
496 					    d_name[0] ? d_name : bn, d_info);
497 				}
498 			}
499 		}
500 
501 		/* Hold parent busy while walking its children */
502 		ndi_devi_enter(dip, &circ);
503 		dr_resume_devices(ddi_get_child(dip), srh);
504 		ndi_devi_exit(dip, circ);
505 		last = dip;
506 	}
507 }
508 
509 /*
510  * True if thread is virtually stopped.  Similar to CPR_VSTOPPED
511  * but from DR point of view.  These user threads are waiting in
512  * the kernel.  Once they complete in the kernel, they will process
513  * the stop signal and stop.
514  */
515 #define	DR_VSTOPPED(t)			\
516 	((t)->t_state == TS_SLEEP &&	\
517 	(t)->t_wchan != NULL &&		\
518 	(t)->t_astflag &&		\
519 	((t)->t_proc_flag & TP_CHKPT))
520 
521 /* ARGSUSED */
522 static int
523 dr_stop_user_threads(dr_sr_handle_t *srh)
524 {
525 	int		count;
526 	int		bailout;
527 	dr_handle_t	*handle = srh->sr_dr_handlep;
528 	static fn_t	f = "dr_stop_user_threads";
529 	kthread_id_t 	tp;
530 
531 	extern void add_one_utstop();
532 	extern void utstop_timedwait(clock_t);
533 	extern void utstop_init(void);
534 
535 #define	DR_UTSTOP_RETRY	4
536 #define	DR_UTSTOP_WAIT	hz
537 
538 	if (dr_skip_user_threads)
539 		return (DDI_SUCCESS);
540 
541 	utstop_init();
542 
543 	/* we need to try a few times to get past fork, etc. */
544 	srh->sr_err_idx = 0;
545 	for (count = 0; count < DR_UTSTOP_RETRY; count++) {
546 		/* walk the entire threadlist */
547 		mutex_enter(&pidlock);
548 		for (tp = curthread->t_next; tp != curthread; tp = tp->t_next) {
549 			proc_t *p = ttoproc(tp);
550 
551 			/* handle kernel threads separately */
552 			if (p->p_as == &kas || p->p_stat == SZOMB)
553 				continue;
554 
555 			mutex_enter(&p->p_lock);
556 			thread_lock(tp);
557 
558 			if (tp->t_state == TS_STOPPED) {
559 				/* add another reason to stop this thread */
560 				tp->t_schedflag &= ~TS_RESUME;
561 			} else {
562 				tp->t_proc_flag |= TP_CHKPT;
563 
564 				thread_unlock(tp);
565 				mutex_exit(&p->p_lock);
566 				add_one_utstop();
567 				mutex_enter(&p->p_lock);
568 				thread_lock(tp);
569 
570 				aston(tp);
571 
572 				if (ISWAKEABLE(tp) || ISWAITING(tp)) {
573 					setrun_locked(tp);
574 				}
575 
576 			}
577 
578 			/* grab thread if needed */
579 			if (tp->t_state == TS_ONPROC && tp->t_cpu != CPU)
580 				poke_cpu(tp->t_cpu->cpu_id);
581 
582 
583 			thread_unlock(tp);
584 			mutex_exit(&p->p_lock);
585 		}
586 		mutex_exit(&pidlock);
587 
588 
589 		/* let everything catch up */
590 		utstop_timedwait(count * count * DR_UTSTOP_WAIT);
591 
592 
593 		/* now, walk the threadlist again to see if we are done */
594 		mutex_enter(&pidlock);
595 		for (tp = curthread->t_next, bailout = 0;
596 		    tp != curthread; tp = tp->t_next) {
597 			proc_t *p = ttoproc(tp);
598 
599 			/* handle kernel threads separately */
600 			if (p->p_as == &kas || p->p_stat == SZOMB)
601 				continue;
602 
603 			/*
604 			 * If this thread didn't stop, and we don't allow
605 			 * unstopped blocked threads, bail.
606 			 */
607 			thread_lock(tp);
608 			if (!CPR_ISTOPPED(tp) &&
609 			    !(dr_allow_blocked_threads &&
610 			    DR_VSTOPPED(tp))) {
611 				bailout = 1;
612 				if (count == DR_UTSTOP_RETRY - 1) {
613 					/*
614 					 * save the pid for later reporting
615 					 */
616 					srh->sr_err_idx =
617 					    dr_add_int(srh->sr_err_ints,
618 					    srh->sr_err_idx, DR_MAX_ERR_INT,
619 					    (uint64_t)p->p_pid);
620 
621 					cmn_err(CE_WARN, "%s: "
622 					    "failed to stop thread: "
623 					    "process=%s, pid=%d",
624 					    f, p->p_user.u_psargs, p->p_pid);
625 
626 					PR_QR("%s: failed to stop thread: "
627 					    "process=%s, pid=%d, t_id=0x%p, "
628 					    "t_state=0x%x, t_proc_flag=0x%x, "
629 					    "t_schedflag=0x%x\n",
630 					    f, p->p_user.u_psargs, p->p_pid,
631 					    (void *)tp, tp->t_state,
632 					    tp->t_proc_flag, tp->t_schedflag);
633 				}
634 
635 			}
636 			thread_unlock(tp);
637 		}
638 		mutex_exit(&pidlock);
639 
640 		/* were all the threads stopped? */
641 		if (!bailout)
642 			break;
643 	}
644 
645 	/* were we unable to stop all threads after a few tries? */
646 	if (bailout) {
647 		handle->h_err = drerr_int(ESBD_UTHREAD, srh->sr_err_ints,
648 		    srh->sr_err_idx, 0);
649 		return (ESRCH);
650 	}
651 
652 	return (DDI_SUCCESS);
653 }
654 
655 static void
656 dr_start_user_threads(void)
657 {
658 	kthread_id_t tp;
659 
660 	mutex_enter(&pidlock);
661 
662 	/* walk all threads and release them */
663 	for (tp = curthread->t_next; tp != curthread; tp = tp->t_next) {
664 		proc_t *p = ttoproc(tp);
665 
666 		/* skip kernel threads */
667 		if (ttoproc(tp)->p_as == &kas)
668 			continue;
669 
670 		mutex_enter(&p->p_lock);
671 		tp->t_proc_flag &= ~TP_CHKPT;
672 		mutex_exit(&p->p_lock);
673 
674 		thread_lock(tp);
675 		if (CPR_ISTOPPED(tp)) {
676 			/* back on the runq */
677 			tp->t_schedflag |= TS_RESUME;
678 			setrun_locked(tp);
679 		}
680 		thread_unlock(tp);
681 	}
682 
683 	mutex_exit(&pidlock);
684 }
685 
686 static void
687 dr_signal_user(int sig)
688 {
689 	struct proc *p;
690 
691 	mutex_enter(&pidlock);
692 
693 	for (p = practive; p != NULL; p = p->p_next) {
694 		/* only user threads */
695 		if (p->p_exec == NULL || p->p_stat == SZOMB ||
696 		    p == proc_init || p == ttoproc(curthread))
697 			continue;
698 
699 		mutex_enter(&p->p_lock);
700 		sigtoproc(p, NULL, sig);
701 		mutex_exit(&p->p_lock);
702 	}
703 
704 	mutex_exit(&pidlock);
705 
706 	/* add a bit of delay */
707 	delay(hz);
708 }
709 
710 void
711 dr_resume(dr_sr_handle_t *srh)
712 {
713 	switch (srh->sr_suspend_state) {
714 	case DR_SRSTATE_FULL:
715 
716 		ASSERT(MUTEX_HELD(&cpu_lock));
717 
718 		/*
719 		 * Prevent false alarm in tod_validate() due to tod
720 		 * value change between suspend and resume
721 		 */
722 		mutex_enter(&tod_lock);
723 		tod_status_set(TOD_DR_RESUME_DONE);
724 		mutex_exit(&tod_lock);
725 
726 		dr_enable_intr(); 	/* enable intr & clock */
727 
728 		start_cpus();
729 		mutex_exit(&cpu_lock);
730 
731 		/*
732 		 * This should only be called if drmach_suspend_last()
733 		 * was called and state transitioned to DR_SRSTATE_FULL
734 		 * to prevent resume attempts on device instances that
735 		 * were not previously suspended.
736 		 */
737 		drmach_resume_first();
738 
739 		/* FALLTHROUGH */
740 
741 	case DR_SRSTATE_DRIVER:
742 		/*
743 		 * resume drivers
744 		 */
745 		srh->sr_err_idx = 0;
746 
747 		/* no parent dip to hold busy */
748 		dr_resume_devices(ddi_root_node(), srh);
749 
750 		if (srh->sr_err_idx && srh->sr_dr_handlep) {
751 			(srh->sr_dr_handlep)->h_err = drerr_int(ESBD_RESUME,
752 			    srh->sr_err_ints, srh->sr_err_idx, 1);
753 		}
754 
755 		/*
756 		 * resume the lock manager
757 		 */
758 		lm_cprresume();
759 
760 		/* FALLTHROUGH */
761 
762 	case DR_SRSTATE_USER:
763 		/*
764 		 * finally, resume user threads
765 		 */
766 		if (!dr_skip_user_threads) {
767 			prom_printf("DR: resuming user threads...\n");
768 			dr_start_user_threads();
769 		}
770 		/* FALLTHROUGH */
771 
772 	case DR_SRSTATE_BEGIN:
773 	default:
774 		/*
775 		 * let those who care know that we've just resumed
776 		 */
777 		PR_QR("sending SIGTHAW...\n");
778 		dr_signal_user(SIGTHAW);
779 		break;
780 	}
781 
782 	prom_printf("DR: resume COMPLETED\n");
783 }
784 
785 int
786 dr_suspend(dr_sr_handle_t *srh)
787 {
788 	dr_handle_t	*handle;
789 	int		force;
790 	int		dev_errs_idx;
791 	uint64_t	dev_errs[DR_MAX_ERR_INT];
792 	int		rc = DDI_SUCCESS;
793 
794 	handle = srh->sr_dr_handlep;
795 
796 	force = dr_cmd_flags(handle) & SBD_FLAG_FORCE;
797 
798 	prom_printf("\nDR: suspending user threads...\n");
799 	srh->sr_suspend_state = DR_SRSTATE_USER;
800 	if (((rc = dr_stop_user_threads(srh)) != DDI_SUCCESS) &&
801 	    dr_check_user_stop_result) {
802 		dr_resume(srh);
803 		return (rc);
804 	}
805 
806 	if (!force) {
807 		struct dr_ref drc = {0};
808 
809 		prom_printf("\nDR: checking devices...\n");
810 		dev_errs_idx = 0;
811 
812 		drc.arr = dev_errs;
813 		drc.idx = &dev_errs_idx;
814 		drc.len = DR_MAX_ERR_INT;
815 
816 		/*
817 		 * Since the root node can never go away, it
818 		 * doesn't have to be held.
819 		 */
820 		ddi_walk_devs(ddi_root_node(), dr_check_unsafe_major, &drc);
821 		if (dev_errs_idx) {
822 			handle->h_err = drerr_int(ESBD_UNSAFE, dev_errs,
823 			    dev_errs_idx, 1);
824 			dr_resume(srh);
825 			return (DDI_FAILURE);
826 		}
827 		PR_QR("done\n");
828 	} else {
829 		prom_printf("\nDR: dr_suspend invoked with force flag\n");
830 	}
831 
832 #ifndef	SKIP_SYNC
833 	/*
834 	 * This sync swap out all user pages
835 	 */
836 	vfs_sync(SYNC_ALL);
837 #endif
838 
839 	/*
840 	 * special treatment for lock manager
841 	 */
842 	lm_cprsuspend();
843 
844 #ifndef	SKIP_SYNC
845 	/*
846 	 * sync the file system in case we never make it back
847 	 */
848 	sync();
849 #endif
850 
851 	/*
852 	 * now suspend drivers
853 	 */
854 	prom_printf("DR: suspending drivers...\n");
855 	srh->sr_suspend_state = DR_SRSTATE_DRIVER;
856 	srh->sr_err_idx = 0;
857 	/* No parent to hold busy */
858 	if ((rc = dr_suspend_devices(ddi_root_node(), srh)) != DDI_SUCCESS) {
859 		if (srh->sr_err_idx && srh->sr_dr_handlep) {
860 			(srh->sr_dr_handlep)->h_err = drerr_int(ESBD_SUSPEND,
861 			    srh->sr_err_ints, srh->sr_err_idx, 1);
862 		}
863 		dr_resume(srh);
864 		return (rc);
865 	}
866 
867 	drmach_suspend_last();
868 
869 	/*
870 	 * finally, grab all cpus
871 	 */
872 	srh->sr_suspend_state = DR_SRSTATE_FULL;
873 
874 	mutex_enter(&cpu_lock);
875 	pause_cpus(NULL, NULL);
876 	dr_stop_intr();
877 
878 	return (rc);
879 }
880 
881 int
882 dr_pt_test_suspend(dr_handle_t *hp)
883 {
884 	dr_sr_handle_t *srh;
885 	int		err;
886 	uint_t		psmerr;
887 	static fn_t	f = "dr_pt_test_suspend";
888 
889 	PR_QR("%s...\n", f);
890 
891 	srh = dr_get_sr_handle(hp);
892 	if ((err = dr_suspend(srh)) == DDI_SUCCESS) {
893 		dr_resume(srh);
894 		if ((hp->h_err) && ((psmerr = hp->h_err->e_code) != 0)) {
895 			PR_QR("%s: error on dr_resume()", f);
896 			switch (psmerr) {
897 			case ESBD_RESUME:
898 				PR_QR("Couldn't resume devices: %s\n",
899 				    DR_GET_E_RSC(hp->h_err));
900 				break;
901 
902 			case ESBD_KTHREAD:
903 				PR_ALL("psmerr is ESBD_KTHREAD\n");
904 				break;
905 			default:
906 				PR_ALL("Resume error unknown = %d\n", psmerr);
907 				break;
908 			}
909 		}
910 	} else {
911 		PR_ALL("%s: dr_suspend() failed, err = 0x%x\n", f, err);
912 		psmerr = hp->h_err ? hp->h_err->e_code : ESBD_NOERROR;
913 		switch (psmerr) {
914 		case ESBD_UNSAFE:
915 			PR_ALL("Unsafe devices (major #): %s\n",
916 			    DR_GET_E_RSC(hp->h_err));
917 			break;
918 
919 		case ESBD_RTTHREAD:
920 			PR_ALL("RT threads (PIDs): %s\n",
921 			    DR_GET_E_RSC(hp->h_err));
922 			break;
923 
924 		case ESBD_UTHREAD:
925 			PR_ALL("User threads (PIDs): %s\n",
926 			    DR_GET_E_RSC(hp->h_err));
927 			break;
928 
929 		case ESBD_SUSPEND:
930 			PR_ALL("Non-suspendable devices (major #): %s\n",
931 			    DR_GET_E_RSC(hp->h_err));
932 			break;
933 
934 		case ESBD_RESUME:
935 			PR_ALL("Could not resume devices (major #): %s\n",
936 			    DR_GET_E_RSC(hp->h_err));
937 			break;
938 
939 		case ESBD_KTHREAD:
940 			PR_ALL("psmerr is ESBD_KTHREAD\n");
941 			break;
942 
943 		case ESBD_NOERROR:
944 			PR_ALL("sbd_error_t error code not set\n");
945 			break;
946 
947 		default:
948 			PR_ALL("Unknown error psmerr = %d\n", psmerr);
949 			break;
950 		}
951 	}
952 	dr_release_sr_handle(srh);
953 
954 	return (0);
955 }
956 
957 /*
958  * Add a new integer value to the end of an array.  Don't allow duplicates to
959  * appear in the array, and don't allow the array to overflow.  Return the new
960  * total number of entries in the array.
961  */
962 static int
963 dr_add_int(uint64_t *arr, int idx, int len, uint64_t val)
964 {
965 	int i;
966 
967 	if (arr == NULL)
968 		return (0);
969 
970 	if (idx >= len)
971 		return (idx);
972 
973 	for (i = 0; i < idx; i++) {
974 		if (arr[i] == val)
975 			return (idx);
976 	}
977 
978 	arr[idx++] = val;
979 
980 	return (idx);
981 }
982 
983 /*
984  * Construct an sbd_error_t featuring a string representation of an array of
985  * integers as its e_rsc.
986  */
987 static sbd_error_t *
988 drerr_int(int e_code, uint64_t *arr, int idx, int majors)
989 {
990 	int		i, n, buf_len, buf_idx, buf_avail;
991 	char		*dname;
992 	char		*buf;
993 	sbd_error_t	*new_sbd_err;
994 	static char	s_ellipsis[] = "...";
995 
996 	if (arr == NULL || idx <= 0)
997 		return (NULL);
998 
999 	/* MAXPATHLEN is the size of the e_rsc field in sbd_error_t. */
1000 	buf = (char *)kmem_zalloc(MAXPATHLEN, KM_SLEEP);
1001 
1002 	/*
1003 	 * This is the total working area of the buffer.  It must be computed
1004 	 * as the size of 'buf', minus reserved space for the null terminator
1005 	 * and the ellipsis string.
1006 	 */
1007 	buf_len = MAXPATHLEN - (strlen(s_ellipsis) + 1);
1008 
1009 	/* Construct a string representation of the array values */
1010 	for (buf_idx = 0, i = 0; i < idx; i++) {
1011 		buf_avail = buf_len - buf_idx;
1012 		if (majors) {
1013 			dname = ddi_major_to_name(arr[i]);
1014 			if (dname) {
1015 				n = snprintf(&buf[buf_idx], buf_avail, "%s, ",
1016 				    dname);
1017 			} else {
1018 				n = snprintf(&buf[buf_idx], buf_avail,
1019 				    "major %" PRIu64 ", ", arr[i]);
1020 			}
1021 		} else {
1022 			n = snprintf(&buf[buf_idx], buf_avail, "%" PRIu64 ", ",
1023 			    arr[i]);
1024 		}
1025 
1026 		/* An ellipsis gets appended when no more values fit */
1027 		if (n >= buf_avail) {
1028 			(void) strcpy(&buf[buf_idx], s_ellipsis);
1029 			break;
1030 		}
1031 
1032 		buf_idx += n;
1033 	}
1034 
1035 	/* If all the contents fit, remove the trailing comma */
1036 	if (n < buf_avail) {
1037 		buf[--buf_idx] = '\0';
1038 		buf[--buf_idx] = '\0';
1039 	}
1040 
1041 	/* Return an sbd_error_t with the buffer and e_code */
1042 	new_sbd_err = drerr_new(1, e_code, buf);
1043 	kmem_free(buf, MAXPATHLEN);
1044 	return (new_sbd_err);
1045 }
1046