xref: /illumos-gate/usr/src/uts/sun4u/ngdr/io/dr_quiesce.c (revision 560f878bce5cdf0661659001415019ca5c8a01b4)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * A CPR derivative specifically for starfire/starcat
31  */
32 
33 #include <sys/types.h>
34 #include <sys/systm.h>
35 #include <sys/machparam.h>
36 #include <sys/machsystm.h>
37 #include <sys/ddi.h>
38 #define	SUNDDI_IMPL
39 #include <sys/sunddi.h>
40 #include <sys/sunndi.h>
41 #include <sys/devctl.h>
42 #include <sys/time.h>
43 #include <sys/kmem.h>
44 #include <nfs/lm.h>
45 #include <sys/ddi_impldefs.h>
46 #include <sys/ndi_impldefs.h>
47 #include <sys/obpdefs.h>
48 #include <sys/cmn_err.h>
49 #include <sys/debug.h>
50 #include <sys/errno.h>
51 #include <sys/callb.h>
52 #include <sys/clock.h>
53 #include <sys/x_call.h>
54 #include <sys/cpuvar.h>
55 #include <sys/epm.h>
56 #include <sys/vfs.h>
57 
58 #include <sys/cpu_sgnblk_defs.h>
59 #include <sys/dr.h>
60 #include <sys/dr_util.h>
61 
62 #include <sys/promif.h>
63 #include <sys/conf.h>
64 #include <sys/cyclic.h>
65 
66 extern void	e_ddi_enter_driver_list(struct devnames *dnp, int *listcnt);
67 extern void	e_ddi_exit_driver_list(struct devnames *dnp, int listcnt);
68 extern int	is_pseudo_device(dev_info_t *dip);
69 
70 extern kmutex_t	cpu_lock;
71 extern dr_unsafe_devs_t dr_unsafe_devs;
72 
73 static int		dr_is_real_device(dev_info_t *dip);
74 static int		dr_is_unsafe_major(major_t major);
75 static int		dr_bypass_device(char *dname);
76 static int		dr_check_dip(dev_info_t *dip, void *arg, uint_t ref);
77 static int		dr_resolve_devname(dev_info_t *dip, char *buffer,
78 				char *alias);
79 static sbd_error_t	*drerr_int(int e_code, uint64_t *arr, int idx,
80 				int majors);
81 static int		dr_add_int(uint64_t *arr, int idx, int len,
82 				uint64_t val);
83 
84 int dr_pt_test_suspend(dr_handle_t *hp);
85 
86 /*
87  * dr_quiesce.c interface
88  * NOTE: states used internally by dr_suspend and dr_resume
89  */
90 typedef enum dr_suspend_state {
91 	DR_SRSTATE_BEGIN = 0,
92 	DR_SRSTATE_USER,
93 	DR_SRSTATE_DRIVER,
94 	DR_SRSTATE_FULL
95 } suspend_state_t;
96 
97 struct dr_sr_handle {
98 	dr_handle_t		*sr_dr_handlep;
99 	dev_info_t		*sr_failed_dip;
100 	suspend_state_t		sr_suspend_state;
101 	uint_t			sr_flags;
102 	uint64_t		sr_err_ints[DR_MAX_ERR_INT];
103 	int			sr_err_idx;
104 };
105 
106 #define	SR_FLAG_WATCHDOG	0x1
107 
108 /*
109  * XXX
110  * This hack will go away before RTI.  Just for testing.
111  * List of drivers to bypass when performing a suspend.
112  */
113 static char *dr_bypass_list[] = {
114 	""
115 };
116 
117 
118 #define		SKIP_SYNC	/* bypass sync ops in dr_suspend */
119 
120 /*
121  * dr_skip_user_threads is used to control if user threads should
122  * be suspended.  If dr_skip_user_threads is true, the rest of the
123  * flags are not used; if it is false, dr_check_user_stop_result
124  * will be used to control whether or not we need to check suspend
125  * result, and dr_allow_blocked_threads will be used to control
126  * whether or not we allow suspend to continue if there are blocked
127  * threads.  We allow all combinations of dr_check_user_stop_result
128  * and dr_allow_block_threads, even though it might not make much
129  * sense to not allow block threads when we don't even check stop
130  * result.
131  */
132 static int	dr_skip_user_threads = 0;	/* default to FALSE */
133 static int	dr_check_user_stop_result = 1;	/* default to TRUE */
134 static int	dr_allow_blocked_threads = 1;	/* default to TRUE */
135 
136 #define	DR_CPU_LOOP_MSEC	1000
137 
138 static void
139 dr_stop_intr(void)
140 {
141 	ASSERT(MUTEX_HELD(&cpu_lock));
142 
143 	kpreempt_disable();
144 	cyclic_suspend();
145 }
146 
147 static void
148 dr_enable_intr(void)
149 {
150 	ASSERT(MUTEX_HELD(&cpu_lock));
151 
152 	cyclic_resume();
153 	kpreempt_enable();
154 }
155 
156 dr_sr_handle_t *
157 dr_get_sr_handle(dr_handle_t *hp)
158 {
159 	dr_sr_handle_t *srh;
160 
161 	srh = GETSTRUCT(dr_sr_handle_t, 1);
162 	srh->sr_dr_handlep = hp;
163 
164 	return (srh);
165 }
166 
167 void
168 dr_release_sr_handle(dr_sr_handle_t *srh)
169 {
170 	ASSERT(srh->sr_failed_dip == NULL);
171 	FREESTRUCT(srh, dr_sr_handle_t, 1);
172 }
173 
174 static int
175 dr_is_real_device(dev_info_t *dip)
176 {
177 	struct regspec *regbuf = NULL;
178 	int length = 0;
179 	int rc;
180 
181 	if (ddi_get_driver(dip) == NULL)
182 		return (0);
183 
184 	if (DEVI(dip)->devi_pm_flags & (PMC_NEEDS_SR|PMC_PARENTAL_SR))
185 		return (1);
186 	if (DEVI(dip)->devi_pm_flags & PMC_NO_SR)
187 		return (0);
188 
189 	/*
190 	 * now the general case
191 	 */
192 	rc = ddi_getlongprop(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, "reg",
193 		(caddr_t)&regbuf, &length);
194 	ASSERT(rc != DDI_PROP_NO_MEMORY);
195 	if (rc != DDI_PROP_SUCCESS) {
196 		return (0);
197 	} else {
198 		if ((length > 0) && (regbuf != NULL))
199 			kmem_free(regbuf, length);
200 		return (1);
201 	}
202 }
203 
204 static int
205 dr_is_unsafe_major(major_t major)
206 {
207 	char    *dname, **cpp;
208 	int	i, ndevs;
209 
210 	if ((dname = ddi_major_to_name(major)) == NULL) {
211 		PR_QR("dr_is_unsafe_major: invalid major # %d\n", major);
212 		return (0);
213 	}
214 
215 	ndevs = dr_unsafe_devs.ndevs;
216 	for (i = 0, cpp = dr_unsafe_devs.devnames; i < ndevs; i++) {
217 		if (strcmp(dname, *cpp++) == 0)
218 			return (1);
219 	}
220 	return (0);
221 }
222 
223 static int
224 dr_bypass_device(char *dname)
225 {
226 	int i;
227 	char **lname;
228 	/* check the bypass list */
229 	for (i = 0, lname = &dr_bypass_list[i]; **lname != '\0'; lname++) {
230 		if (strcmp(dname, dr_bypass_list[i++]) == 0)
231 			return (1);
232 	}
233 	return (0);
234 }
235 
236 static int
237 dr_resolve_devname(dev_info_t *dip, char *buffer, char *alias)
238 {
239 	major_t	devmajor;
240 	char	*aka, *name;
241 
242 	*buffer = *alias = 0;
243 
244 	if (dip == NULL)
245 		return (-1);
246 
247 	if ((name = ddi_get_name(dip)) == NULL)
248 		name = "<null name>";
249 
250 	aka = name;
251 
252 	if ((devmajor = ddi_name_to_major(aka)) != -1)
253 		aka = ddi_major_to_name(devmajor);
254 
255 	strcpy(buffer, name);
256 
257 	if (strcmp(name, aka))
258 		strcpy(alias, aka);
259 	else
260 		*alias = 0;
261 
262 	return (0);
263 }
264 
265 struct dr_ref {
266 	int		*refcount;
267 	uint64_t	*arr;
268 	int		*idx;
269 	int		len;
270 };
271 
272 /* ARGSUSED */
273 static int
274 dr_check_dip(dev_info_t *dip, void *arg, uint_t ref)
275 {
276 	major_t		major;
277 	char		*dname;
278 	struct dr_ref	*rp = (struct dr_ref *)arg;
279 
280 	if (dip == NULL)
281 		return (DDI_WALK_CONTINUE);
282 
283 	if (!dr_is_real_device(dip))
284 		return (DDI_WALK_CONTINUE);
285 
286 	dname = ddi_binding_name(dip);
287 
288 	if (dr_bypass_device(dname))
289 		return (DDI_WALK_CONTINUE);
290 
291 	if (dname && ((major = ddi_name_to_major(dname)) != (major_t)-1)) {
292 		if (ref && rp->refcount) {
293 			*rp->refcount += ref;
294 			PR_QR("\n  %s (major# %d) is referenced(%u)\n",
295 				dname, major, ref);
296 		}
297 		if (dr_is_unsafe_major(major) && i_ddi_devi_attached(dip)) {
298 			PR_QR("\n  %s (major# %d) not hotpluggable\n",
299 				dname, major);
300 			if (rp->arr != NULL && rp->idx != NULL)
301 				*rp->idx = dr_add_int(rp->arr, *rp->idx,
302 					rp->len, (uint64_t)major);
303 		}
304 	}
305 	return (DDI_WALK_CONTINUE);
306 }
307 
308 static int
309 dr_check_unsafe_major(dev_info_t *dip, void *arg)
310 {
311 	return (dr_check_dip(dip, arg, 0));
312 }
313 
314 
315 /*ARGSUSED*/
316 void
317 dr_check_devices(dev_info_t *dip, int *refcount, dr_handle_t *handle,
318     uint64_t *arr, int *idx, int len)
319 {
320 	struct dr_ref bref = {0};
321 
322 	if (dip == NULL)
323 		return;
324 
325 	bref.refcount = refcount;
326 	bref.arr = arr;
327 	bref.idx = idx;
328 	bref.len = len;
329 
330 	ASSERT(e_ddi_branch_held(dip));
331 	(void) e_ddi_branch_referenced(dip, dr_check_dip, &bref);
332 }
333 
334 /*
335  * The "dip" argument's parent (if it exists) must be held busy.
336  */
337 static int
338 dr_suspend_devices(dev_info_t *dip, dr_sr_handle_t *srh)
339 {
340 	dr_handle_t	*handle;
341 	major_t		major;
342 	char		*dname;
343 	int		circ;
344 
345 	/*
346 	 * If dip is the root node, it has no siblings and it is
347 	 * always held. If dip is not the root node, dr_suspend_devices()
348 	 * will be invoked with the parent held busy.
349 	 */
350 	for (; dip != NULL; dip = ddi_get_next_sibling(dip)) {
351 		char	d_name[40], d_alias[40], *d_info;
352 
353 		ndi_devi_enter(dip, &circ);
354 		if (dr_suspend_devices(ddi_get_child(dip), srh)) {
355 			ndi_devi_exit(dip, circ);
356 			return (ENXIO);
357 		}
358 		ndi_devi_exit(dip, circ);
359 
360 		if (!dr_is_real_device(dip))
361 			continue;
362 
363 		major = (major_t)-1;
364 		if ((dname = ddi_binding_name(dip)) != NULL)
365 			major = ddi_name_to_major(dname);
366 
367 		if (dr_bypass_device(dname)) {
368 			PR_QR(" bypassed suspend of %s (major# %d)\n", dname,
369 				major);
370 			continue;
371 		}
372 
373 		if (drmach_verify_sr(dip, 1)) {
374 			PR_QR(" bypassed suspend of %s (major# %d)\n", dname,
375 				major);
376 			continue;
377 		}
378 
379 		if ((d_info = ddi_get_name_addr(dip)) == NULL)
380 			d_info = "<null>";
381 
382 		d_name[0] = 0;
383 		if (dr_resolve_devname(dip, d_name, d_alias) == 0) {
384 			if (d_alias[0] != 0) {
385 				prom_printf("\tsuspending %s@%s (aka %s)\n",
386 					d_name, d_info, d_alias);
387 			} else {
388 				prom_printf("\tsuspending %s@%s\n",
389 					d_name, d_info);
390 			}
391 		} else {
392 			prom_printf("\tsuspending %s@%s\n", dname, d_info);
393 		}
394 
395 		if (devi_detach(dip, DDI_SUSPEND) != DDI_SUCCESS) {
396 			prom_printf("\tFAILED to suspend %s@%s\n",
397 				d_name[0] ? d_name : dname, d_info);
398 
399 			srh->sr_err_idx = dr_add_int(srh->sr_err_ints,
400 				srh->sr_err_idx, DR_MAX_ERR_INT,
401 				(uint64_t)major);
402 
403 			ndi_hold_devi(dip);
404 			srh->sr_failed_dip = dip;
405 
406 			handle = srh->sr_dr_handlep;
407 			dr_op_err(CE_IGNORE, handle, ESBD_SUSPEND, "%s@%s",
408 				d_name[0] ? d_name : dname, d_info);
409 
410 			return (DDI_FAILURE);
411 		}
412 	}
413 
414 	return (DDI_SUCCESS);
415 }
416 
417 static void
418 dr_resume_devices(dev_info_t *start, dr_sr_handle_t *srh)
419 {
420 	dr_handle_t	*handle;
421 	dev_info_t	*dip, *next, *last = NULL;
422 	major_t		major;
423 	char		*bn;
424 	int		circ;
425 
426 	major = (major_t)-1;
427 
428 	/* attach in reverse device tree order */
429 	while (last != start) {
430 		dip = start;
431 		next = ddi_get_next_sibling(dip);
432 		while (next != last && dip != srh->sr_failed_dip) {
433 			dip = next;
434 			next = ddi_get_next_sibling(dip);
435 		}
436 		if (dip == srh->sr_failed_dip) {
437 			/* release hold acquired in dr_suspend_devices() */
438 			srh->sr_failed_dip = NULL;
439 			ndi_rele_devi(dip);
440 		} else if (dr_is_real_device(dip) &&
441 				srh->sr_failed_dip == NULL) {
442 
443 			if ((bn = ddi_binding_name(dip)) != NULL) {
444 				major = ddi_name_to_major(bn);
445 			} else {
446 				bn = "<null>";
447 			}
448 			if (!dr_bypass_device(bn) &&
449 				!drmach_verify_sr(dip, 0)) {
450 				char	d_name[40], d_alias[40], *d_info;
451 
452 				d_name[0] = 0;
453 				d_info = ddi_get_name_addr(dip);
454 				if (d_info == NULL)
455 					d_info = "<null>";
456 
457 				if (!dr_resolve_devname(dip, d_name,
458 								d_alias)) {
459 					if (d_alias[0] != 0) {
460 						prom_printf("\tresuming "
461 							"%s@%s (aka %s)\n",
462 							d_name, d_info,
463 							d_alias);
464 					} else {
465 						prom_printf("\tresuming "
466 							"%s@%s\n",
467 							d_name, d_info);
468 					}
469 				} else {
470 					prom_printf("\tresuming %s@%s\n",
471 						bn, d_info);
472 				}
473 
474 				if (devi_attach(dip, DDI_RESUME) !=
475 							DDI_SUCCESS) {
476 					/*
477 					 * Print a console warning,
478 					 * set an e_code of ESBD_RESUME,
479 					 * and save the driver major
480 					 * number in the e_rsc.
481 					 */
482 					prom_printf("\tFAILED to resume %s@%s",
483 					    d_name[0] ? d_name : bn, d_info);
484 
485 					srh->sr_err_idx =
486 						dr_add_int(srh->sr_err_ints,
487 						srh->sr_err_idx, DR_MAX_ERR_INT,
488 						(uint64_t)major);
489 
490 					handle = srh->sr_dr_handlep;
491 
492 					dr_op_err(CE_IGNORE, handle,
493 					    ESBD_RESUME, "%s@%s",
494 					    d_name[0] ? d_name : bn, d_info);
495 				}
496 			}
497 		}
498 
499 		/* Hold parent busy while walking its children */
500 		ndi_devi_enter(dip, &circ);
501 		dr_resume_devices(ddi_get_child(dip), srh);
502 		ndi_devi_exit(dip, circ);
503 		last = dip;
504 	}
505 }
506 
507 /*
508  * True if thread is virtually stopped.  Similar to CPR_VSTOPPED
509  * but from DR point of view.  These user threads are waiting in
510  * the kernel.  Once they complete in the kernel, they will process
511  * the stop signal and stop.
512  */
513 #define	DR_VSTOPPED(t)			\
514 	((t)->t_state == TS_SLEEP &&	\
515 	(t)->t_wchan != NULL &&		\
516 	(t)->t_astflag &&		\
517 	((t)->t_proc_flag & TP_CHKPT))
518 
519 /* ARGSUSED */
520 static int
521 dr_stop_user_threads(dr_sr_handle_t *srh)
522 {
523 	int		count;
524 	int		bailout;
525 	dr_handle_t	*handle = srh->sr_dr_handlep;
526 	static fn_t	f = "dr_stop_user_threads";
527 	kthread_id_t 	tp;
528 
529 	extern void add_one_utstop();
530 	extern void utstop_timedwait(clock_t);
531 	extern void utstop_init(void);
532 
533 #define	DR_UTSTOP_RETRY	4
534 #define	DR_UTSTOP_WAIT	hz
535 
536 	if (dr_skip_user_threads)
537 		return (DDI_SUCCESS);
538 
539 	utstop_init();
540 
541 	/* we need to try a few times to get past fork, etc. */
542 	srh->sr_err_idx = 0;
543 	for (count = 0; count < DR_UTSTOP_RETRY; count++) {
544 		/* walk the entire threadlist */
545 		mutex_enter(&pidlock);
546 		for (tp = curthread->t_next; tp != curthread; tp = tp->t_next) {
547 			proc_t *p = ttoproc(tp);
548 
549 			/* handle kernel threads separately */
550 			if (p->p_as == &kas || p->p_stat == SZOMB)
551 				continue;
552 
553 			mutex_enter(&p->p_lock);
554 			thread_lock(tp);
555 
556 			if (tp->t_state == TS_STOPPED) {
557 				/* add another reason to stop this thread */
558 				tp->t_schedflag &= ~TS_RESUME;
559 			} else {
560 				tp->t_proc_flag |= TP_CHKPT;
561 
562 				thread_unlock(tp);
563 				mutex_exit(&p->p_lock);
564 				add_one_utstop();
565 				mutex_enter(&p->p_lock);
566 				thread_lock(tp);
567 
568 				aston(tp);
569 
570 				if (tp->t_state == TS_SLEEP &&
571 				    (tp->t_flag & T_WAKEABLE)) {
572 					setrun_locked(tp);
573 				}
574 
575 			}
576 
577 			/* grab thread if needed */
578 			if (tp->t_state == TS_ONPROC && tp->t_cpu != CPU)
579 				poke_cpu(tp->t_cpu->cpu_id);
580 
581 
582 			thread_unlock(tp);
583 			mutex_exit(&p->p_lock);
584 		}
585 		mutex_exit(&pidlock);
586 
587 
588 		/* let everything catch up */
589 		utstop_timedwait(count * count * DR_UTSTOP_WAIT);
590 
591 
592 		/* now, walk the threadlist again to see if we are done */
593 		mutex_enter(&pidlock);
594 		for (tp = curthread->t_next, bailout = 0;
595 		    tp != curthread; tp = tp->t_next) {
596 			proc_t *p = ttoproc(tp);
597 
598 			/* handle kernel threads separately */
599 			if (p->p_as == &kas || p->p_stat == SZOMB)
600 				continue;
601 
602 			/*
603 			 * If this thread didn't stop, and we don't allow
604 			 * unstopped blocked threads, bail.
605 			 */
606 			thread_lock(tp);
607 			if (!CPR_ISTOPPED(tp) &&
608 			    !(dr_allow_blocked_threads &&
609 			    DR_VSTOPPED(tp))) {
610 				bailout = 1;
611 				if (count == DR_UTSTOP_RETRY - 1) {
612 					/*
613 					 * save the pid for later reporting
614 					 */
615 					srh->sr_err_idx =
616 					    dr_add_int(srh->sr_err_ints,
617 					    srh->sr_err_idx, DR_MAX_ERR_INT,
618 					    (uint64_t)p->p_pid);
619 
620 					cmn_err(CE_WARN, "%s: "
621 					    "failed to stop thread: "
622 					    "process=%s, pid=%d",
623 					    f, p->p_user.u_psargs, p->p_pid);
624 
625 					PR_QR("%s: failed to stop thread: "
626 					    "process=%s, pid=%d, t_id=0x%p, "
627 					    "t_state=0x%x, t_proc_flag=0x%x, "
628 					    "t_schedflag=0x%x\n",
629 					    f, p->p_user.u_psargs, p->p_pid,
630 					    tp, tp->t_state, tp->t_proc_flag,
631 					    tp->t_schedflag);
632 				}
633 
634 			}
635 			thread_unlock(tp);
636 		}
637 		mutex_exit(&pidlock);
638 
639 		/* were all the threads stopped? */
640 		if (!bailout)
641 			break;
642 	}
643 
644 	/* were we unable to stop all threads after a few tries? */
645 	if (bailout) {
646 		handle->h_err = drerr_int(ESBD_UTHREAD, srh->sr_err_ints,
647 			srh->sr_err_idx, 0);
648 		return (ESRCH);
649 	}
650 
651 	return (DDI_SUCCESS);
652 }
653 
654 static void
655 dr_start_user_threads(void)
656 {
657 	kthread_id_t tp;
658 
659 	mutex_enter(&pidlock);
660 
661 	/* walk all threads and release them */
662 	for (tp = curthread->t_next; tp != curthread; tp = tp->t_next) {
663 		proc_t *p = ttoproc(tp);
664 
665 		/* skip kernel threads */
666 		if (ttoproc(tp)->p_as == &kas)
667 			continue;
668 
669 		mutex_enter(&p->p_lock);
670 		tp->t_proc_flag &= ~TP_CHKPT;
671 		mutex_exit(&p->p_lock);
672 
673 		thread_lock(tp);
674 		if (CPR_ISTOPPED(tp)) {
675 			/* back on the runq */
676 			tp->t_schedflag |= TS_RESUME;
677 			setrun_locked(tp);
678 		}
679 		thread_unlock(tp);
680 	}
681 
682 	mutex_exit(&pidlock);
683 }
684 
685 static void
686 dr_signal_user(int sig)
687 {
688 	struct proc *p;
689 
690 	mutex_enter(&pidlock);
691 
692 	for (p = practive; p != NULL; p = p->p_next) {
693 		/* only user threads */
694 		if (p->p_exec == NULL || p->p_stat == SZOMB ||
695 		    p == proc_init || p == ttoproc(curthread))
696 			continue;
697 
698 		mutex_enter(&p->p_lock);
699 		sigtoproc(p, NULL, sig);
700 		mutex_exit(&p->p_lock);
701 	}
702 
703 	mutex_exit(&pidlock);
704 
705 	/* add a bit of delay */
706 	delay(hz);
707 }
708 
709 void
710 dr_resume(dr_sr_handle_t *srh)
711 {
712 	dr_handle_t	*handle;
713 
714 	handle = srh->sr_dr_handlep;
715 
716 	if (srh->sr_suspend_state < DR_SRSTATE_FULL) {
717 		/*
718 		 * Update the signature block.
719 		 * If cpus are not paused, this can be done now.
720 		 * See comments below.
721 		 */
722 		CPU_SIGNATURE(OS_SIG, SIGST_RESUME_INPROGRESS, SIGSUBST_NULL,
723 		    CPU->cpu_id);
724 	}
725 
726 	switch (srh->sr_suspend_state) {
727 	case DR_SRSTATE_FULL:
728 
729 		ASSERT(MUTEX_HELD(&cpu_lock));
730 
731 		dr_enable_intr(); 	/* enable intr & clock */
732 
733 		start_cpus();
734 		mutex_exit(&cpu_lock);
735 
736 		/*
737 		 * Update the signature block.
738 		 * This must not be done while cpus are paused, since on
739 		 * Starcat the cpu signature update aquires an adaptive
740 		 * mutex in the iosram driver. Blocking with cpus paused
741 		 * can lead to deadlock.
742 		 */
743 		CPU_SIGNATURE(OS_SIG, SIGST_RESUME_INPROGRESS, SIGSUBST_NULL,
744 		    CPU->cpu_id);
745 
746 		/*
747 		 * If we suspended hw watchdog at suspend,
748 		 * re-enable it now.
749 		 */
750 
751 		if (srh->sr_flags & (SR_FLAG_WATCHDOG)) {
752 			mutex_enter(&tod_lock);
753 			tod_ops.tod_set_watchdog_timer(
754 				watchdog_timeout_seconds);
755 			mutex_exit(&tod_lock);
756 		}
757 
758 		/*
759 		 * This should only be called if drmach_suspend_last()
760 		 * was called and state transitioned to DR_SRSTATE_FULL
761 		 * to prevent resume attempts on device instances that
762 		 * were not previously suspended.
763 		 */
764 		drmach_resume_first();
765 
766 		/* FALLTHROUGH */
767 
768 	case DR_SRSTATE_DRIVER:
769 		/*
770 		 * resume drivers
771 		 */
772 		srh->sr_err_idx = 0;
773 
774 		/* no parent dip to hold busy */
775 		dr_resume_devices(ddi_root_node(), srh);
776 
777 		if (srh->sr_err_idx && srh->sr_dr_handlep) {
778 			(srh->sr_dr_handlep)->h_err = drerr_int(ESBD_RESUME,
779 				srh->sr_err_ints, srh->sr_err_idx, 1);
780 		}
781 
782 		/*
783 		 * resume the lock manager
784 		 */
785 		lm_cprresume();
786 
787 		/* FALLTHROUGH */
788 
789 	case DR_SRSTATE_USER:
790 		/*
791 		 * finally, resume user threads
792 		 */
793 		if (!dr_skip_user_threads) {
794 			prom_printf("DR: resuming user threads...\n");
795 			dr_start_user_threads();
796 		}
797 		/* FALLTHROUGH */
798 
799 	case DR_SRSTATE_BEGIN:
800 	default:
801 		/*
802 		 * let those who care know that we've just resumed
803 		 */
804 		PR_QR("sending SIGTHAW...\n");
805 		dr_signal_user(SIGTHAW);
806 		break;
807 	}
808 
809 	i_ndi_allow_device_tree_changes(handle->h_ndi);
810 
811 	/*
812 	 * update the signature block
813 	 */
814 	CPU_SIGNATURE(OS_SIG, SIGST_RUN, SIGSUBST_NULL, CPU->cpu_id);
815 
816 	prom_printf("DR: resume COMPLETED\n");
817 }
818 
819 int
820 dr_suspend(dr_sr_handle_t *srh)
821 {
822 	dr_handle_t	*handle;
823 	int		force;
824 	int		dev_errs_idx;
825 	uint64_t	dev_errs[DR_MAX_ERR_INT];
826 	int		rc = DDI_SUCCESS;
827 
828 	handle = srh->sr_dr_handlep;
829 
830 	force = dr_cmd_flags(handle) & SBD_FLAG_FORCE;
831 
832 	/*
833 	 * update the signature block
834 	 */
835 	CPU_SIGNATURE(OS_SIG, SIGST_QUIESCE_INPROGRESS, SIGSUBST_NULL,
836 	    CPU->cpu_id);
837 
838 	i_ndi_block_device_tree_changes(&handle->h_ndi);
839 
840 	prom_printf("\nDR: suspending user threads...\n");
841 	srh->sr_suspend_state = DR_SRSTATE_USER;
842 	if (((rc = dr_stop_user_threads(srh)) != DDI_SUCCESS) &&
843 	    dr_check_user_stop_result) {
844 		dr_resume(srh);
845 		return (rc);
846 	}
847 
848 	if (!force) {
849 		struct dr_ref drc = {0};
850 
851 		prom_printf("\nDR: checking devices...\n");
852 		dev_errs_idx = 0;
853 
854 		drc.arr = dev_errs;
855 		drc.idx = &dev_errs_idx;
856 		drc.len = DR_MAX_ERR_INT;
857 
858 		/*
859 		 * Since the root node can never go away, it
860 		 * doesn't have to be held.
861 		 */
862 		ddi_walk_devs(ddi_root_node(), dr_check_unsafe_major, &drc);
863 		if (dev_errs_idx) {
864 			handle->h_err = drerr_int(ESBD_UNSAFE, dev_errs,
865 				dev_errs_idx, 1);
866 			dr_resume(srh);
867 			return (DDI_FAILURE);
868 		}
869 		PR_QR("done\n");
870 	} else {
871 		prom_printf("\nDR: dr_suspend invoked with force flag\n");
872 	}
873 
874 #ifndef	SKIP_SYNC
875 	/*
876 	 * This sync swap out all user pages
877 	 */
878 	vfs_sync(SYNC_ALL);
879 #endif
880 
881 	/*
882 	 * special treatment for lock manager
883 	 */
884 	lm_cprsuspend();
885 
886 #ifndef	SKIP_SYNC
887 	/*
888 	 * sync the file system in case we never make it back
889 	 */
890 	sync();
891 #endif
892 
893 	/*
894 	 * now suspend drivers
895 	 */
896 	prom_printf("DR: suspending drivers...\n");
897 	srh->sr_suspend_state = DR_SRSTATE_DRIVER;
898 	srh->sr_err_idx = 0;
899 	/* No parent to hold busy */
900 	if ((rc = dr_suspend_devices(ddi_root_node(), srh)) != DDI_SUCCESS) {
901 		if (srh->sr_err_idx && srh->sr_dr_handlep) {
902 			(srh->sr_dr_handlep)->h_err = drerr_int(ESBD_SUSPEND,
903 				srh->sr_err_ints, srh->sr_err_idx, 1);
904 		}
905 		dr_resume(srh);
906 		return (rc);
907 	}
908 
909 	drmach_suspend_last();
910 
911 	/*
912 	 * finally, grab all cpus
913 	 */
914 	srh->sr_suspend_state = DR_SRSTATE_FULL;
915 
916 	/*
917 	 * if watchdog was activated, disable it
918 	 */
919 	if (watchdog_activated) {
920 		mutex_enter(&tod_lock);
921 		tod_ops.tod_clear_watchdog_timer();
922 		mutex_exit(&tod_lock);
923 		srh->sr_flags |= SR_FLAG_WATCHDOG;
924 	} else {
925 		srh->sr_flags &= ~(SR_FLAG_WATCHDOG);
926 	}
927 
928 	/*
929 	 * Update the signature block.
930 	 * This must be done before cpus are paused, since on Starcat the
931 	 * cpu signature update aquires an adaptive mutex in the iosram driver.
932 	 * Blocking with cpus paused can lead to deadlock.
933 	 */
934 	CPU_SIGNATURE(OS_SIG, SIGST_QUIESCED, SIGSUBST_NULL, CPU->cpu_id);
935 
936 	mutex_enter(&cpu_lock);
937 	pause_cpus(NULL);
938 	dr_stop_intr();
939 
940 	return (rc);
941 }
942 
943 int
944 dr_pt_test_suspend(dr_handle_t *hp)
945 {
946 	dr_sr_handle_t *srh;
947 	int		err;
948 	uint_t		psmerr;
949 	static fn_t	f = "dr_pt_test_suspend";
950 
951 	PR_QR("%s...\n", f);
952 
953 	srh = dr_get_sr_handle(hp);
954 	if ((err = dr_suspend(srh)) == DDI_SUCCESS) {
955 		dr_resume(srh);
956 		if ((hp->h_err) && ((psmerr = hp->h_err->e_code) != 0)) {
957 			PR_QR("%s: error on dr_resume()", f);
958 			switch (psmerr) {
959 			case ESBD_RESUME:
960 				PR_QR("Couldn't resume devices: %s\n",
961 					DR_GET_E_RSC(hp->h_err));
962 				break;
963 
964 			case ESBD_KTHREAD:
965 				PR_ALL("psmerr is ESBD_KTHREAD\n");
966 				break;
967 			default:
968 				PR_ALL("Resume error unknown = %d\n",
969 					psmerr);
970 				break;
971 			}
972 		}
973 	} else {
974 		PR_ALL("%s: dr_suspend() failed, err = 0x%x\n",
975 			f, err);
976 		psmerr = hp->h_err ? hp->h_err->e_code : ESBD_NOERROR;
977 		switch (psmerr) {
978 		case ESBD_UNSAFE:
979 			PR_ALL("Unsafe devices (major #): %s\n",
980 				DR_GET_E_RSC(hp->h_err));
981 			break;
982 
983 		case ESBD_RTTHREAD:
984 			PR_ALL("RT threads (PIDs): %s\n",
985 				DR_GET_E_RSC(hp->h_err));
986 			break;
987 
988 		case ESBD_UTHREAD:
989 			PR_ALL("User threads (PIDs): %s\n",
990 				DR_GET_E_RSC(hp->h_err));
991 			break;
992 
993 		case ESBD_SUSPEND:
994 			PR_ALL("Non-suspendable devices (major #): %s\n",
995 				DR_GET_E_RSC(hp->h_err));
996 			break;
997 
998 		case ESBD_RESUME:
999 			PR_ALL("Could not resume devices (major #): %s\n",
1000 				DR_GET_E_RSC(hp->h_err));
1001 			break;
1002 
1003 		case ESBD_KTHREAD:
1004 			PR_ALL("psmerr is ESBD_KTHREAD\n");
1005 			break;
1006 
1007 		case ESBD_NOERROR:
1008 			PR_ALL("sbd_error_t error code not set\n");
1009 			break;
1010 
1011 		default:
1012 			PR_ALL("Unknown error psmerr = %d\n", psmerr);
1013 			break;
1014 		}
1015 	}
1016 	dr_release_sr_handle(srh);
1017 
1018 	return (0);
1019 }
1020 
1021 /*
1022  * Add a new integer value to the end of an array.  Don't allow duplicates to
1023  * appear in the array, and don't allow the array to overflow.  Return the new
1024  * total number of entries in the array.
1025  */
1026 static int
1027 dr_add_int(uint64_t *arr, int idx, int len, uint64_t val)
1028 {
1029 	int i;
1030 
1031 	if (arr == NULL)
1032 		return (0);
1033 
1034 	if (idx >= len)
1035 		return (idx);
1036 
1037 	for (i = 0; i < idx; i++) {
1038 		if (arr[i] == val)
1039 			return (idx);
1040 	}
1041 
1042 	arr[idx++] = val;
1043 
1044 	return (idx);
1045 }
1046 
1047 /*
1048  * Construct an sbd_error_t featuring a string representation of an array of
1049  * integers as its e_rsc.
1050  */
1051 static sbd_error_t *
1052 drerr_int(int e_code, uint64_t *arr, int idx, int majors)
1053 {
1054 	int		i, n, buf_len, buf_idx, buf_avail;
1055 	char		*dname;
1056 	char		*buf;
1057 	sbd_error_t	*new_sbd_err;
1058 	static char	s_ellipsis[] = "...";
1059 
1060 	if (arr == NULL || idx <= 0)
1061 		return (NULL);
1062 
1063 	/* MAXPATHLEN is the size of the e_rsc field in sbd_error_t. */
1064 	buf = (char *)kmem_zalloc(MAXPATHLEN, KM_SLEEP);
1065 
1066 	/*
1067 	 * This is the total working area of the buffer.  It must be computed
1068 	 * as the size of 'buf', minus reserved space for the null terminator
1069 	 * and the ellipsis string.
1070 	 */
1071 	buf_len = MAXPATHLEN - (strlen(s_ellipsis) + 1);
1072 
1073 	/* Construct a string representation of the array values */
1074 	for (buf_idx = 0, i = 0; i < idx; i++) {
1075 		buf_avail = buf_len - buf_idx;
1076 		if (majors) {
1077 			dname = ddi_major_to_name(arr[i]);
1078 			if (dname) {
1079 				n = snprintf(&buf[buf_idx], buf_avail,
1080 					"%s, ", dname);
1081 			} else {
1082 				n = snprintf(&buf[buf_idx], buf_avail,
1083 					"major %lu, ", arr[i]);
1084 			}
1085 		} else {
1086 			n = snprintf(&buf[buf_idx], buf_avail, "%lu, ",
1087 				arr[i]);
1088 		}
1089 
1090 		/* An ellipsis gets appended when no more values fit */
1091 		if (n >= buf_avail) {
1092 			(void) strcpy(&buf[buf_idx], s_ellipsis);
1093 			break;
1094 		}
1095 
1096 		buf_idx += n;
1097 	}
1098 
1099 	/* If all the contents fit, remove the trailing comma */
1100 	if (n < buf_avail) {
1101 		buf[--buf_idx] = '\0';
1102 		buf[--buf_idx] = '\0';
1103 	}
1104 
1105 	/* Return an sbd_error_t with the buffer and e_code */
1106 	new_sbd_err = drerr_new(1, e_code, buf);
1107 	kmem_free(buf, MAXPATHLEN);
1108 	return (new_sbd_err);
1109 }
1110