xref: /titanic_50/usr/src/uts/i86pc/io/dr/dr_quiesce.c (revision 861a91627796c35220e75654dac61e5707536dcd)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*
28  * A CPR derivative specifically for starfire/starcat
29  * X86 doesn't make use of the quiesce interfaces, it's kept for simplicity.
30  */
31 
32 #include <sys/types.h>
33 #include <sys/systm.h>
34 #include <sys/machparam.h>
35 #include <sys/machsystm.h>
36 #include <sys/ddi.h>
37 #define	SUNDDI_IMPL
38 #include <sys/sunddi.h>
39 #include <sys/sunndi.h>
40 #include <sys/devctl.h>
41 #include <sys/time.h>
42 #include <sys/kmem.h>
43 #include <nfs/lm.h>
44 #include <sys/ddi_impldefs.h>
45 #include <sys/ndi_impldefs.h>
46 #include <sys/obpdefs.h>
47 #include <sys/cmn_err.h>
48 #include <sys/debug.h>
49 #include <sys/errno.h>
50 #include <sys/callb.h>
51 #include <sys/clock.h>
52 #include <sys/x_call.h>
53 #include <sys/cpuvar.h>
54 #include <sys/epm.h>
55 #include <sys/vfs.h>
56 #include <sys/promif.h>
57 #include <sys/conf.h>
58 #include <sys/cyclic.h>
59 
60 #include <sys/dr.h>
61 #include <sys/dr_util.h>
62 
63 extern void	e_ddi_enter_driver_list(struct devnames *dnp, int *listcnt);
64 extern void	e_ddi_exit_driver_list(struct devnames *dnp, int listcnt);
65 extern int	is_pseudo_device(dev_info_t *dip);
66 
67 extern kmutex_t	cpu_lock;
68 extern dr_unsafe_devs_t dr_unsafe_devs;
69 
70 static int		dr_is_real_device(dev_info_t *dip);
71 static int		dr_is_unsafe_major(major_t major);
72 static int		dr_bypass_device(char *dname);
73 static int		dr_check_dip(dev_info_t *dip, void *arg, uint_t ref);
74 static int		dr_resolve_devname(dev_info_t *dip, char *buffer,
75 				char *alias);
76 static sbd_error_t	*drerr_int(int e_code, uint64_t *arr, int idx,
77 				int majors);
78 static int		dr_add_int(uint64_t *arr, int idx, int len,
79 				uint64_t val);
80 
81 int dr_pt_test_suspend(dr_handle_t *hp);
82 
83 /*
84  * dr_quiesce.c interface
85  * NOTE: states used internally by dr_suspend and dr_resume
86  */
87 typedef enum dr_suspend_state {
88 	DR_SRSTATE_BEGIN = 0,
89 	DR_SRSTATE_USER,
90 	DR_SRSTATE_DRIVER,
91 	DR_SRSTATE_FULL
92 } suspend_state_t;
93 
94 struct dr_sr_handle {
95 	dr_handle_t		*sr_dr_handlep;
96 	dev_info_t		*sr_failed_dip;
97 	suspend_state_t		sr_suspend_state;
98 	uint_t			sr_flags;
99 	uint64_t		sr_err_ints[DR_MAX_ERR_INT];
100 	int			sr_err_idx;
101 };
102 
103 #define	SR_FLAG_WATCHDOG	0x1
104 
105 /*
106  * XXX
107  * This hack will go away before RTI.  Just for testing.
108  * List of drivers to bypass when performing a suspend.
109  */
110 static char *dr_bypass_list[] = {
111 	""
112 };
113 
114 
115 #define		SKIP_SYNC	/* bypass sync ops in dr_suspend */
116 
117 /*
118  * dr_skip_user_threads is used to control if user threads should
119  * be suspended.  If dr_skip_user_threads is true, the rest of the
120  * flags are not used; if it is false, dr_check_user_stop_result
121  * will be used to control whether or not we need to check suspend
122  * result, and dr_allow_blocked_threads will be used to control
123  * whether or not we allow suspend to continue if there are blocked
124  * threads.  We allow all combinations of dr_check_user_stop_result
125  * and dr_allow_block_threads, even though it might not make much
126  * sense to not allow block threads when we don't even check stop
127  * result.
128  */
129 static int	dr_skip_user_threads = 0;	/* default to FALSE */
130 static int	dr_check_user_stop_result = 1;	/* default to TRUE */
131 static int	dr_allow_blocked_threads = 1;	/* default to TRUE */
132 
133 #define	DR_CPU_LOOP_MSEC	1000
134 
135 static void
136 dr_stop_intr(void)
137 {
138 	ASSERT(MUTEX_HELD(&cpu_lock));
139 
140 	kpreempt_disable();
141 	cyclic_suspend();
142 }
143 
144 static void
145 dr_enable_intr(void)
146 {
147 	ASSERT(MUTEX_HELD(&cpu_lock));
148 
149 	cyclic_resume();
150 	kpreempt_enable();
151 }
152 
153 dr_sr_handle_t *
154 dr_get_sr_handle(dr_handle_t *hp)
155 {
156 	dr_sr_handle_t *srh;
157 
158 	srh = GETSTRUCT(dr_sr_handle_t, 1);
159 	srh->sr_dr_handlep = hp;
160 
161 	return (srh);
162 }
163 
164 void
165 dr_release_sr_handle(dr_sr_handle_t *srh)
166 {
167 	ASSERT(srh->sr_failed_dip == NULL);
168 	FREESTRUCT(srh, dr_sr_handle_t, 1);
169 }
170 
171 static int
172 dr_is_real_device(dev_info_t *dip)
173 {
174 	struct regspec *regbuf = NULL;
175 	int length = 0;
176 	int rc;
177 
178 	if (ddi_get_driver(dip) == NULL)
179 		return (0);
180 
181 	if (DEVI(dip)->devi_pm_flags & (PMC_NEEDS_SR|PMC_PARENTAL_SR))
182 		return (1);
183 	if (DEVI(dip)->devi_pm_flags & PMC_NO_SR)
184 		return (0);
185 
186 	/*
187 	 * now the general case
188 	 */
189 	rc = ddi_getlongprop(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, "reg",
190 	    (caddr_t)&regbuf, &length);
191 	ASSERT(rc != DDI_PROP_NO_MEMORY);
192 	if (rc != DDI_PROP_SUCCESS) {
193 		return (0);
194 	} else {
195 		if ((length > 0) && (regbuf != NULL))
196 			kmem_free(regbuf, length);
197 		return (1);
198 	}
199 }
200 
201 static int
202 dr_is_unsafe_major(major_t major)
203 {
204 	char    *dname, **cpp;
205 	int	i, ndevs;
206 
207 	if ((dname = ddi_major_to_name(major)) == NULL) {
208 		PR_QR("dr_is_unsafe_major: invalid major # %d\n", major);
209 		return (0);
210 	}
211 
212 	ndevs = dr_unsafe_devs.ndevs;
213 	for (i = 0, cpp = dr_unsafe_devs.devnames; i < ndevs; i++) {
214 		if (strcmp(dname, *cpp++) == 0)
215 			return (1);
216 	}
217 	return (0);
218 }
219 
220 static int
221 dr_bypass_device(char *dname)
222 {
223 	int i;
224 	char **lname;
225 	/* check the bypass list */
226 	for (i = 0, lname = &dr_bypass_list[i]; **lname != '\0'; lname++) {
227 		if (strcmp(dname, dr_bypass_list[i++]) == 0)
228 			return (1);
229 	}
230 	return (0);
231 }
232 
233 static int
234 dr_resolve_devname(dev_info_t *dip, char *buffer, char *alias)
235 {
236 	major_t	devmajor;
237 	char	*aka, *name;
238 
239 	*buffer = *alias = 0;
240 
241 	if (dip == NULL)
242 		return (-1);
243 
244 	if ((name = ddi_get_name(dip)) == NULL)
245 		name = "<null name>";
246 
247 	aka = name;
248 
249 	if ((devmajor = ddi_name_to_major(aka)) != DDI_MAJOR_T_NONE)
250 		aka = ddi_major_to_name(devmajor);
251 
252 	(void) strcpy(buffer, name);
253 
254 	if (strcmp(name, aka))
255 		(void) strcpy(alias, aka);
256 	else
257 		*alias = 0;
258 
259 	return (0);
260 }
261 
262 struct dr_ref {
263 	int		*refcount;
264 	int		*refcount_non_gldv3;
265 	uint64_t	*arr;
266 	int		*idx;
267 	int		len;
268 };
269 
270 /* ARGSUSED */
271 static int
272 dr_check_dip(dev_info_t *dip, void *arg, uint_t ref)
273 {
274 	major_t		major;
275 	char		*dname;
276 	struct dr_ref	*rp = (struct dr_ref *)arg;
277 
278 	if (dip == NULL)
279 		return (DDI_WALK_CONTINUE);
280 
281 	if (!dr_is_real_device(dip))
282 		return (DDI_WALK_CONTINUE);
283 
284 	dname = ddi_binding_name(dip);
285 
286 	if (dr_bypass_device(dname))
287 		return (DDI_WALK_CONTINUE);
288 
289 	if (dname && ((major = ddi_name_to_major(dname)) != (major_t)-1)) {
290 		if (ref && rp->refcount) {
291 			*rp->refcount += ref;
292 			PR_QR("\n  %s (major# %d) is referenced(%u)\n", dname,
293 			    major, ref);
294 		}
295 		if (ref && rp->refcount_non_gldv3) {
296 			if (NETWORK_PHYSDRV(major) && !GLDV3_DRV(major))
297 				*rp->refcount_non_gldv3 += ref;
298 		}
299 		if (dr_is_unsafe_major(major) && i_ddi_devi_attached(dip)) {
300 			PR_QR("\n  %s (major# %d) not hotpluggable\n", dname,
301 			    major);
302 			if (rp->arr != NULL && rp->idx != NULL)
303 				*rp->idx = dr_add_int(rp->arr, *rp->idx,
304 				    rp->len, (uint64_t)major);
305 		}
306 	}
307 	return (DDI_WALK_CONTINUE);
308 }
309 
310 static int
311 dr_check_unsafe_major(dev_info_t *dip, void *arg)
312 {
313 	return (dr_check_dip(dip, arg, 0));
314 }
315 
316 
317 /*ARGSUSED*/
318 void
319 dr_check_devices(dev_info_t *dip, int *refcount, dr_handle_t *handle,
320     uint64_t *arr, int *idx, int len, int *refcount_non_gldv3)
321 {
322 	struct dr_ref bref = {0};
323 
324 	if (dip == NULL)
325 		return;
326 
327 	bref.refcount = refcount;
328 	bref.refcount_non_gldv3 = refcount_non_gldv3;
329 	bref.arr = arr;
330 	bref.idx = idx;
331 	bref.len = len;
332 
333 	ASSERT(e_ddi_branch_held(dip));
334 	(void) e_ddi_branch_referenced(dip, dr_check_dip, &bref);
335 }
336 
337 /*
338  * The "dip" argument's parent (if it exists) must be held busy.
339  */
340 static int
341 dr_suspend_devices(dev_info_t *dip, dr_sr_handle_t *srh)
342 {
343 	dr_handle_t	*handle;
344 	major_t		major;
345 	char		*dname;
346 	int		circ;
347 
348 	/*
349 	 * If dip is the root node, it has no siblings and it is
350 	 * always held. If dip is not the root node, dr_suspend_devices()
351 	 * will be invoked with the parent held busy.
352 	 */
353 	for (; dip != NULL; dip = ddi_get_next_sibling(dip)) {
354 		char	d_name[40], d_alias[40], *d_info;
355 
356 		ndi_devi_enter(dip, &circ);
357 		if (dr_suspend_devices(ddi_get_child(dip), srh)) {
358 			ndi_devi_exit(dip, circ);
359 			return (ENXIO);
360 		}
361 		ndi_devi_exit(dip, circ);
362 
363 		if (!dr_is_real_device(dip))
364 			continue;
365 
366 		major = (major_t)-1;
367 		if ((dname = ddi_binding_name(dip)) != NULL)
368 			major = ddi_name_to_major(dname);
369 
370 		if (dr_bypass_device(dname)) {
371 			PR_QR(" bypassed suspend of %s (major# %d)\n", dname,
372 			    major);
373 			continue;
374 		}
375 
376 		if (drmach_verify_sr(dip, 1)) {
377 			PR_QR(" bypassed suspend of %s (major# %d)\n", dname,
378 			    major);
379 			continue;
380 		}
381 
382 		if ((d_info = ddi_get_name_addr(dip)) == NULL)
383 			d_info = "<null>";
384 
385 		d_name[0] = 0;
386 		if (dr_resolve_devname(dip, d_name, d_alias) == 0) {
387 			if (d_alias[0] != 0) {
388 				prom_printf("\tsuspending %s@%s (aka %s)\n",
389 				    d_name, d_info, d_alias);
390 			} else {
391 				prom_printf("\tsuspending %s@%s\n", d_name,
392 				    d_info);
393 			}
394 		} else {
395 			prom_printf("\tsuspending %s@%s\n", dname, d_info);
396 		}
397 
398 		if (devi_detach(dip, DDI_SUSPEND) != DDI_SUCCESS) {
399 			prom_printf("\tFAILED to suspend %s@%s\n",
400 			    d_name[0] ? d_name : dname, d_info);
401 
402 			srh->sr_err_idx = dr_add_int(srh->sr_err_ints,
403 			    srh->sr_err_idx, DR_MAX_ERR_INT, (uint64_t)major);
404 
405 			ndi_hold_devi(dip);
406 			srh->sr_failed_dip = dip;
407 
408 			handle = srh->sr_dr_handlep;
409 			dr_op_err(CE_IGNORE, handle, ESBD_SUSPEND, "%s@%s",
410 			    d_name[0] ? d_name : dname, d_info);
411 
412 			return (DDI_FAILURE);
413 		}
414 	}
415 
416 	return (DDI_SUCCESS);
417 }
418 
419 static void
420 dr_resume_devices(dev_info_t *start, dr_sr_handle_t *srh)
421 {
422 	dr_handle_t	*handle;
423 	dev_info_t	*dip, *next, *last = NULL;
424 	major_t		major;
425 	char		*bn;
426 	int		circ;
427 
428 	major = (major_t)-1;
429 
430 	/* attach in reverse device tree order */
431 	while (last != start) {
432 		dip = start;
433 		next = ddi_get_next_sibling(dip);
434 		while (next != last && dip != srh->sr_failed_dip) {
435 			dip = next;
436 			next = ddi_get_next_sibling(dip);
437 		}
438 		if (dip == srh->sr_failed_dip) {
439 			/* release hold acquired in dr_suspend_devices() */
440 			srh->sr_failed_dip = NULL;
441 			ndi_rele_devi(dip);
442 		} else if (dr_is_real_device(dip) &&
443 		    srh->sr_failed_dip == NULL) {
444 
445 			if ((bn = ddi_binding_name(dip)) != NULL) {
446 				major = ddi_name_to_major(bn);
447 			} else {
448 				bn = "<null>";
449 			}
450 			if (!dr_bypass_device(bn) &&
451 			    !drmach_verify_sr(dip, 0)) {
452 				char	d_name[40], d_alias[40], *d_info;
453 
454 				d_name[0] = 0;
455 				d_info = ddi_get_name_addr(dip);
456 				if (d_info == NULL)
457 					d_info = "<null>";
458 
459 				if (!dr_resolve_devname(dip, d_name, d_alias)) {
460 					if (d_alias[0] != 0) {
461 						prom_printf("\tresuming "
462 						    "%s@%s (aka %s)\n", d_name,
463 						    d_info, d_alias);
464 					} else {
465 						prom_printf("\tresuming "
466 						    "%s@%s\n", d_name, d_info);
467 					}
468 				} else {
469 					prom_printf("\tresuming %s@%s\n", bn,
470 					    d_info);
471 				}
472 
473 				if (devi_attach(dip, DDI_RESUME) !=
474 				    DDI_SUCCESS) {
475 					/*
476 					 * Print a console warning,
477 					 * set an e_code of ESBD_RESUME,
478 					 * and save the driver major
479 					 * number in the e_rsc.
480 					 */
481 					prom_printf("\tFAILED to resume %s@%s",
482 					    d_name[0] ? d_name : bn, d_info);
483 
484 					srh->sr_err_idx =
485 					    dr_add_int(srh->sr_err_ints,
486 					    srh->sr_err_idx, DR_MAX_ERR_INT,
487 					    (uint64_t)major);
488 
489 					handle = srh->sr_dr_handlep;
490 
491 					dr_op_err(CE_IGNORE, handle,
492 					    ESBD_RESUME, "%s@%s",
493 					    d_name[0] ? d_name : bn, d_info);
494 				}
495 			}
496 		}
497 
498 		/* Hold parent busy while walking its children */
499 		ndi_devi_enter(dip, &circ);
500 		dr_resume_devices(ddi_get_child(dip), srh);
501 		ndi_devi_exit(dip, circ);
502 		last = dip;
503 	}
504 }
505 
506 /*
507  * True if thread is virtually stopped.  Similar to CPR_VSTOPPED
508  * but from DR point of view.  These user threads are waiting in
509  * the kernel.  Once they complete in the kernel, they will process
510  * the stop signal and stop.
511  */
512 #define	DR_VSTOPPED(t)			\
513 	((t)->t_state == TS_SLEEP &&	\
514 	(t)->t_wchan != NULL &&		\
515 	(t)->t_astflag &&		\
516 	((t)->t_proc_flag & TP_CHKPT))
517 
518 /* ARGSUSED */
519 static int
520 dr_stop_user_threads(dr_sr_handle_t *srh)
521 {
522 	int		count;
523 	int		bailout;
524 	dr_handle_t	*handle = srh->sr_dr_handlep;
525 	static fn_t	f = "dr_stop_user_threads";
526 	kthread_id_t 	tp;
527 
528 	extern void add_one_utstop();
529 	extern void utstop_timedwait(clock_t);
530 	extern void utstop_init(void);
531 
532 #define	DR_UTSTOP_RETRY	4
533 #define	DR_UTSTOP_WAIT	hz
534 
535 	if (dr_skip_user_threads)
536 		return (DDI_SUCCESS);
537 
538 	utstop_init();
539 
540 	/* we need to try a few times to get past fork, etc. */
541 	srh->sr_err_idx = 0;
542 	for (count = 0; count < DR_UTSTOP_RETRY; count++) {
543 		/* walk the entire threadlist */
544 		mutex_enter(&pidlock);
545 		for (tp = curthread->t_next; tp != curthread; tp = tp->t_next) {
546 			proc_t *p = ttoproc(tp);
547 
548 			/* handle kernel threads separately */
549 			if (p->p_as == &kas || p->p_stat == SZOMB)
550 				continue;
551 
552 			mutex_enter(&p->p_lock);
553 			thread_lock(tp);
554 
555 			if (tp->t_state == TS_STOPPED) {
556 				/* add another reason to stop this thread */
557 				tp->t_schedflag &= ~TS_RESUME;
558 			} else {
559 				tp->t_proc_flag |= TP_CHKPT;
560 
561 				thread_unlock(tp);
562 				mutex_exit(&p->p_lock);
563 				add_one_utstop();
564 				mutex_enter(&p->p_lock);
565 				thread_lock(tp);
566 
567 				aston(tp);
568 
569 				if (ISWAKEABLE(tp) || ISWAITING(tp)) {
570 					setrun_locked(tp);
571 				}
572 
573 			}
574 
575 			/* grab thread if needed */
576 			if (tp->t_state == TS_ONPROC && tp->t_cpu != CPU)
577 				poke_cpu(tp->t_cpu->cpu_id);
578 
579 
580 			thread_unlock(tp);
581 			mutex_exit(&p->p_lock);
582 		}
583 		mutex_exit(&pidlock);
584 
585 
586 		/* let everything catch up */
587 		utstop_timedwait(count * count * DR_UTSTOP_WAIT);
588 
589 
590 		/* now, walk the threadlist again to see if we are done */
591 		mutex_enter(&pidlock);
592 		for (tp = curthread->t_next, bailout = 0;
593 		    tp != curthread; tp = tp->t_next) {
594 			proc_t *p = ttoproc(tp);
595 
596 			/* handle kernel threads separately */
597 			if (p->p_as == &kas || p->p_stat == SZOMB)
598 				continue;
599 
600 			/*
601 			 * If this thread didn't stop, and we don't allow
602 			 * unstopped blocked threads, bail.
603 			 */
604 			thread_lock(tp);
605 			if (!CPR_ISTOPPED(tp) &&
606 			    !(dr_allow_blocked_threads &&
607 			    DR_VSTOPPED(tp))) {
608 				bailout = 1;
609 				if (count == DR_UTSTOP_RETRY - 1) {
610 					/*
611 					 * save the pid for later reporting
612 					 */
613 					srh->sr_err_idx =
614 					    dr_add_int(srh->sr_err_ints,
615 					    srh->sr_err_idx, DR_MAX_ERR_INT,
616 					    (uint64_t)p->p_pid);
617 
618 					cmn_err(CE_WARN, "%s: "
619 					    "failed to stop thread: "
620 					    "process=%s, pid=%d",
621 					    f, p->p_user.u_psargs, p->p_pid);
622 
623 					PR_QR("%s: failed to stop thread: "
624 					    "process=%s, pid=%d, t_id=0x%p, "
625 					    "t_state=0x%x, t_proc_flag=0x%x, "
626 					    "t_schedflag=0x%x\n",
627 					    f, p->p_user.u_psargs, p->p_pid,
628 					    (void *)tp, tp->t_state,
629 					    tp->t_proc_flag, tp->t_schedflag);
630 				}
631 
632 			}
633 			thread_unlock(tp);
634 		}
635 		mutex_exit(&pidlock);
636 
637 		/* were all the threads stopped? */
638 		if (!bailout)
639 			break;
640 	}
641 
642 	/* were we unable to stop all threads after a few tries? */
643 	if (bailout) {
644 		handle->h_err = drerr_int(ESBD_UTHREAD, srh->sr_err_ints,
645 		    srh->sr_err_idx, 0);
646 		return (ESRCH);
647 	}
648 
649 	return (DDI_SUCCESS);
650 }
651 
652 static void
653 dr_start_user_threads(void)
654 {
655 	kthread_id_t tp;
656 
657 	mutex_enter(&pidlock);
658 
659 	/* walk all threads and release them */
660 	for (tp = curthread->t_next; tp != curthread; tp = tp->t_next) {
661 		proc_t *p = ttoproc(tp);
662 
663 		/* skip kernel threads */
664 		if (ttoproc(tp)->p_as == &kas)
665 			continue;
666 
667 		mutex_enter(&p->p_lock);
668 		tp->t_proc_flag &= ~TP_CHKPT;
669 		mutex_exit(&p->p_lock);
670 
671 		thread_lock(tp);
672 		if (CPR_ISTOPPED(tp)) {
673 			/* back on the runq */
674 			tp->t_schedflag |= TS_RESUME;
675 			setrun_locked(tp);
676 		}
677 		thread_unlock(tp);
678 	}
679 
680 	mutex_exit(&pidlock);
681 }
682 
683 static void
684 dr_signal_user(int sig)
685 {
686 	struct proc *p;
687 
688 	mutex_enter(&pidlock);
689 
690 	for (p = practive; p != NULL; p = p->p_next) {
691 		/* only user threads */
692 		if (p->p_exec == NULL || p->p_stat == SZOMB ||
693 		    p == proc_init || p == ttoproc(curthread))
694 			continue;
695 
696 		mutex_enter(&p->p_lock);
697 		sigtoproc(p, NULL, sig);
698 		mutex_exit(&p->p_lock);
699 	}
700 
701 	mutex_exit(&pidlock);
702 
703 	/* add a bit of delay */
704 	delay(hz);
705 }
706 
707 void
708 dr_resume(dr_sr_handle_t *srh)
709 {
710 	dr_handle_t	*handle;
711 
712 	handle = srh->sr_dr_handlep;
713 
714 	switch (srh->sr_suspend_state) {
715 	case DR_SRSTATE_FULL:
716 
717 		ASSERT(MUTEX_HELD(&cpu_lock));
718 
719 		/*
720 		 * Prevent false alarm in tod_validate() due to tod
721 		 * value change between suspend and resume
722 		 */
723 		mutex_enter(&tod_lock);
724 		tod_status_set(TOD_DR_RESUME_DONE);
725 		mutex_exit(&tod_lock);
726 
727 		dr_enable_intr(); 	/* enable intr & clock */
728 
729 		start_cpus();
730 		mutex_exit(&cpu_lock);
731 
732 		/*
733 		 * This should only be called if drmach_suspend_last()
734 		 * was called and state transitioned to DR_SRSTATE_FULL
735 		 * to prevent resume attempts on device instances that
736 		 * were not previously suspended.
737 		 */
738 		drmach_resume_first();
739 
740 		/* FALLTHROUGH */
741 
742 	case DR_SRSTATE_DRIVER:
743 		/*
744 		 * resume drivers
745 		 */
746 		srh->sr_err_idx = 0;
747 
748 		/* no parent dip to hold busy */
749 		dr_resume_devices(ddi_root_node(), srh);
750 
751 		if (srh->sr_err_idx && srh->sr_dr_handlep) {
752 			(srh->sr_dr_handlep)->h_err = drerr_int(ESBD_RESUME,
753 			    srh->sr_err_ints, srh->sr_err_idx, 1);
754 		}
755 
756 		/*
757 		 * resume the lock manager
758 		 */
759 		lm_cprresume();
760 
761 		/* FALLTHROUGH */
762 
763 	case DR_SRSTATE_USER:
764 		/*
765 		 * finally, resume user threads
766 		 */
767 		if (!dr_skip_user_threads) {
768 			prom_printf("DR: resuming user threads...\n");
769 			dr_start_user_threads();
770 		}
771 		/* FALLTHROUGH */
772 
773 	case DR_SRSTATE_BEGIN:
774 	default:
775 		/*
776 		 * let those who care know that we've just resumed
777 		 */
778 		PR_QR("sending SIGTHAW...\n");
779 		dr_signal_user(SIGTHAW);
780 		break;
781 	}
782 
783 	i_ndi_allow_device_tree_changes(handle->h_ndi);
784 
785 	prom_printf("DR: resume COMPLETED\n");
786 }
787 
788 int
789 dr_suspend(dr_sr_handle_t *srh)
790 {
791 	dr_handle_t	*handle;
792 	int		force;
793 	int		dev_errs_idx;
794 	uint64_t	dev_errs[DR_MAX_ERR_INT];
795 	int		rc = DDI_SUCCESS;
796 
797 	handle = srh->sr_dr_handlep;
798 
799 	force = dr_cmd_flags(handle) & SBD_FLAG_FORCE;
800 
801 	i_ndi_block_device_tree_changes(&handle->h_ndi);
802 
803 	prom_printf("\nDR: suspending user threads...\n");
804 	srh->sr_suspend_state = DR_SRSTATE_USER;
805 	if (((rc = dr_stop_user_threads(srh)) != DDI_SUCCESS) &&
806 	    dr_check_user_stop_result) {
807 		dr_resume(srh);
808 		return (rc);
809 	}
810 
811 	if (!force) {
812 		struct dr_ref drc = {0};
813 
814 		prom_printf("\nDR: checking devices...\n");
815 		dev_errs_idx = 0;
816 
817 		drc.arr = dev_errs;
818 		drc.idx = &dev_errs_idx;
819 		drc.len = DR_MAX_ERR_INT;
820 
821 		/*
822 		 * Since the root node can never go away, it
823 		 * doesn't have to be held.
824 		 */
825 		ddi_walk_devs(ddi_root_node(), dr_check_unsafe_major, &drc);
826 		if (dev_errs_idx) {
827 			handle->h_err = drerr_int(ESBD_UNSAFE, dev_errs,
828 			    dev_errs_idx, 1);
829 			dr_resume(srh);
830 			return (DDI_FAILURE);
831 		}
832 		PR_QR("done\n");
833 	} else {
834 		prom_printf("\nDR: dr_suspend invoked with force flag\n");
835 	}
836 
837 #ifndef	SKIP_SYNC
838 	/*
839 	 * This sync swap out all user pages
840 	 */
841 	vfs_sync(SYNC_ALL);
842 #endif
843 
844 	/*
845 	 * special treatment for lock manager
846 	 */
847 	lm_cprsuspend();
848 
849 #ifndef	SKIP_SYNC
850 	/*
851 	 * sync the file system in case we never make it back
852 	 */
853 	sync();
854 #endif
855 
856 	/*
857 	 * now suspend drivers
858 	 */
859 	prom_printf("DR: suspending drivers...\n");
860 	srh->sr_suspend_state = DR_SRSTATE_DRIVER;
861 	srh->sr_err_idx = 0;
862 	/* No parent to hold busy */
863 	if ((rc = dr_suspend_devices(ddi_root_node(), srh)) != DDI_SUCCESS) {
864 		if (srh->sr_err_idx && srh->sr_dr_handlep) {
865 			(srh->sr_dr_handlep)->h_err = drerr_int(ESBD_SUSPEND,
866 			    srh->sr_err_ints, srh->sr_err_idx, 1);
867 		}
868 		dr_resume(srh);
869 		return (rc);
870 	}
871 
872 	drmach_suspend_last();
873 
874 	/*
875 	 * finally, grab all cpus
876 	 */
877 	srh->sr_suspend_state = DR_SRSTATE_FULL;
878 
879 	mutex_enter(&cpu_lock);
880 	pause_cpus(NULL);
881 	dr_stop_intr();
882 
883 	return (rc);
884 }
885 
886 int
887 dr_pt_test_suspend(dr_handle_t *hp)
888 {
889 	dr_sr_handle_t *srh;
890 	int		err;
891 	uint_t		psmerr;
892 	static fn_t	f = "dr_pt_test_suspend";
893 
894 	PR_QR("%s...\n", f);
895 
896 	srh = dr_get_sr_handle(hp);
897 	if ((err = dr_suspend(srh)) == DDI_SUCCESS) {
898 		dr_resume(srh);
899 		if ((hp->h_err) && ((psmerr = hp->h_err->e_code) != 0)) {
900 			PR_QR("%s: error on dr_resume()", f);
901 			switch (psmerr) {
902 			case ESBD_RESUME:
903 				PR_QR("Couldn't resume devices: %s\n",
904 				    DR_GET_E_RSC(hp->h_err));
905 				break;
906 
907 			case ESBD_KTHREAD:
908 				PR_ALL("psmerr is ESBD_KTHREAD\n");
909 				break;
910 			default:
911 				PR_ALL("Resume error unknown = %d\n", psmerr);
912 				break;
913 			}
914 		}
915 	} else {
916 		PR_ALL("%s: dr_suspend() failed, err = 0x%x\n", f, err);
917 		psmerr = hp->h_err ? hp->h_err->e_code : ESBD_NOERROR;
918 		switch (psmerr) {
919 		case ESBD_UNSAFE:
920 			PR_ALL("Unsafe devices (major #): %s\n",
921 			    DR_GET_E_RSC(hp->h_err));
922 			break;
923 
924 		case ESBD_RTTHREAD:
925 			PR_ALL("RT threads (PIDs): %s\n",
926 			    DR_GET_E_RSC(hp->h_err));
927 			break;
928 
929 		case ESBD_UTHREAD:
930 			PR_ALL("User threads (PIDs): %s\n",
931 			    DR_GET_E_RSC(hp->h_err));
932 			break;
933 
934 		case ESBD_SUSPEND:
935 			PR_ALL("Non-suspendable devices (major #): %s\n",
936 			    DR_GET_E_RSC(hp->h_err));
937 			break;
938 
939 		case ESBD_RESUME:
940 			PR_ALL("Could not resume devices (major #): %s\n",
941 			    DR_GET_E_RSC(hp->h_err));
942 			break;
943 
944 		case ESBD_KTHREAD:
945 			PR_ALL("psmerr is ESBD_KTHREAD\n");
946 			break;
947 
948 		case ESBD_NOERROR:
949 			PR_ALL("sbd_error_t error code not set\n");
950 			break;
951 
952 		default:
953 			PR_ALL("Unknown error psmerr = %d\n", psmerr);
954 			break;
955 		}
956 	}
957 	dr_release_sr_handle(srh);
958 
959 	return (0);
960 }
961 
962 /*
963  * Add a new integer value to the end of an array.  Don't allow duplicates to
964  * appear in the array, and don't allow the array to overflow.  Return the new
965  * total number of entries in the array.
966  */
967 static int
968 dr_add_int(uint64_t *arr, int idx, int len, uint64_t val)
969 {
970 	int i;
971 
972 	if (arr == NULL)
973 		return (0);
974 
975 	if (idx >= len)
976 		return (idx);
977 
978 	for (i = 0; i < idx; i++) {
979 		if (arr[i] == val)
980 			return (idx);
981 	}
982 
983 	arr[idx++] = val;
984 
985 	return (idx);
986 }
987 
988 /*
989  * Construct an sbd_error_t featuring a string representation of an array of
990  * integers as its e_rsc.
991  */
992 static sbd_error_t *
993 drerr_int(int e_code, uint64_t *arr, int idx, int majors)
994 {
995 	int		i, n, buf_len, buf_idx, buf_avail;
996 	char		*dname;
997 	char		*buf;
998 	sbd_error_t	*new_sbd_err;
999 	static char	s_ellipsis[] = "...";
1000 
1001 	if (arr == NULL || idx <= 0)
1002 		return (NULL);
1003 
1004 	/* MAXPATHLEN is the size of the e_rsc field in sbd_error_t. */
1005 	buf = (char *)kmem_zalloc(MAXPATHLEN, KM_SLEEP);
1006 
1007 	/*
1008 	 * This is the total working area of the buffer.  It must be computed
1009 	 * as the size of 'buf', minus reserved space for the null terminator
1010 	 * and the ellipsis string.
1011 	 */
1012 	buf_len = MAXPATHLEN - (strlen(s_ellipsis) + 1);
1013 
1014 	/* Construct a string representation of the array values */
1015 	for (buf_idx = 0, i = 0; i < idx; i++) {
1016 		buf_avail = buf_len - buf_idx;
1017 		if (majors) {
1018 			dname = ddi_major_to_name(arr[i]);
1019 			if (dname) {
1020 				n = snprintf(&buf[buf_idx], buf_avail, "%s, ",
1021 				    dname);
1022 			} else {
1023 				n = snprintf(&buf[buf_idx], buf_avail,
1024 				    "major %" PRIu64 ", ", arr[i]);
1025 			}
1026 		} else {
1027 			n = snprintf(&buf[buf_idx], buf_avail, "%" PRIu64 ", ",
1028 			    arr[i]);
1029 		}
1030 
1031 		/* An ellipsis gets appended when no more values fit */
1032 		if (n >= buf_avail) {
1033 			(void) strcpy(&buf[buf_idx], s_ellipsis);
1034 			break;
1035 		}
1036 
1037 		buf_idx += n;
1038 	}
1039 
1040 	/* If all the contents fit, remove the trailing comma */
1041 	if (n < buf_avail) {
1042 		buf[--buf_idx] = '\0';
1043 		buf[--buf_idx] = '\0';
1044 	}
1045 
1046 	/* Return an sbd_error_t with the buffer and e_code */
1047 	new_sbd_err = drerr_new(1, e_code, buf);
1048 	kmem_free(buf, MAXPATHLEN);
1049 	return (new_sbd_err);
1050 }
1051