xref: /titanic_41/usr/src/uts/sun4u/ngdr/io/dr_quiesce.c (revision 60405de4d8688d96dd05157c28db3ade5c9bc234)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * A CPR derivative specifically for starfire/starcat
30  */
31 
32 #include <sys/types.h>
33 #include <sys/systm.h>
34 #include <sys/machparam.h>
35 #include <sys/machsystm.h>
36 #include <sys/ddi.h>
37 #define	SUNDDI_IMPL
38 #include <sys/sunddi.h>
39 #include <sys/sunndi.h>
40 #include <sys/devctl.h>
41 #include <sys/time.h>
42 #include <sys/kmem.h>
43 #include <nfs/lm.h>
44 #include <sys/ddi_impldefs.h>
45 #include <sys/ndi_impldefs.h>
46 #include <sys/obpdefs.h>
47 #include <sys/cmn_err.h>
48 #include <sys/debug.h>
49 #include <sys/errno.h>
50 #include <sys/callb.h>
51 #include <sys/clock.h>
52 #include <sys/x_call.h>
53 #include <sys/cpuvar.h>
54 #include <sys/epm.h>
55 #include <sys/vfs.h>
56 
57 #include <sys/cpu_sgnblk_defs.h>
58 #include <sys/dr.h>
59 #include <sys/dr_util.h>
60 
61 #include <sys/promif.h>
62 #include <sys/conf.h>
63 #include <sys/cyclic.h>
64 
65 extern void	e_ddi_enter_driver_list(struct devnames *dnp, int *listcnt);
66 extern void	e_ddi_exit_driver_list(struct devnames *dnp, int listcnt);
67 extern int	is_pseudo_device(dev_info_t *dip);
68 
69 extern kmutex_t	cpu_lock;
70 extern dr_unsafe_devs_t dr_unsafe_devs;
71 
72 static int		dr_is_real_device(dev_info_t *dip);
73 static int		dr_is_unsafe_major(major_t major);
74 static int		dr_bypass_device(char *dname);
75 static int		dr_check_dip(dev_info_t *dip, void *arg, uint_t ref);
76 static int		dr_resolve_devname(dev_info_t *dip, char *buffer,
77 				char *alias);
78 static sbd_error_t	*drerr_int(int e_code, uint64_t *arr, int idx,
79 				int majors);
80 static int		dr_add_int(uint64_t *arr, int idx, int len,
81 				uint64_t val);
82 
83 int dr_pt_test_suspend(dr_handle_t *hp);
84 
85 /*
86  * dr_quiesce.c interface
87  * NOTE: states used internally by dr_suspend and dr_resume
88  */
89 typedef enum dr_suspend_state {
90 	DR_SRSTATE_BEGIN = 0,
91 	DR_SRSTATE_USER,
92 	DR_SRSTATE_DRIVER,
93 	DR_SRSTATE_FULL
94 } suspend_state_t;
95 
96 struct dr_sr_handle {
97 	dr_handle_t		*sr_dr_handlep;
98 	dev_info_t		*sr_failed_dip;
99 	suspend_state_t		sr_suspend_state;
100 	uint_t			sr_flags;
101 	uint64_t		sr_err_ints[DR_MAX_ERR_INT];
102 	int			sr_err_idx;
103 };
104 
105 #define	SR_FLAG_WATCHDOG	0x1
106 
107 /*
108  * XXX
109  * This hack will go away before RTI.  Just for testing.
110  * List of drivers to bypass when performing a suspend.
111  */
112 static char *dr_bypass_list[] = {
113 	""
114 };
115 
116 
117 #define		SKIP_SYNC	/* bypass sync ops in dr_suspend */
118 
119 /*
120  * dr_skip_user_threads is used to control if user threads should
121  * be suspended.  If dr_skip_user_threads is true, the rest of the
122  * flags are not used; if it is false, dr_check_user_stop_result
123  * will be used to control whether or not we need to check suspend
124  * result, and dr_allow_blocked_threads will be used to control
125  * whether or not we allow suspend to continue if there are blocked
126  * threads.  We allow all combinations of dr_check_user_stop_result
127  * and dr_allow_block_threads, even though it might not make much
128  * sense to not allow block threads when we don't even check stop
129  * result.
130  */
131 static int	dr_skip_user_threads = 0;	/* default to FALSE */
132 static int	dr_check_user_stop_result = 1;	/* default to TRUE */
133 static int	dr_allow_blocked_threads = 1;	/* default to TRUE */
134 
135 #define	DR_CPU_LOOP_MSEC	1000
136 
137 static void
138 dr_stop_intr(void)
139 {
140 	ASSERT(MUTEX_HELD(&cpu_lock));
141 
142 	kpreempt_disable();
143 	cyclic_suspend();
144 }
145 
146 static void
147 dr_enable_intr(void)
148 {
149 	ASSERT(MUTEX_HELD(&cpu_lock));
150 
151 	cyclic_resume();
152 	kpreempt_enable();
153 }
154 
155 dr_sr_handle_t *
156 dr_get_sr_handle(dr_handle_t *hp)
157 {
158 	dr_sr_handle_t *srh;
159 
160 	srh = GETSTRUCT(dr_sr_handle_t, 1);
161 	srh->sr_dr_handlep = hp;
162 
163 	return (srh);
164 }
165 
166 void
167 dr_release_sr_handle(dr_sr_handle_t *srh)
168 {
169 	ASSERT(srh->sr_failed_dip == NULL);
170 	FREESTRUCT(srh, dr_sr_handle_t, 1);
171 }
172 
173 static int
174 dr_is_real_device(dev_info_t *dip)
175 {
176 	struct regspec *regbuf = NULL;
177 	int length = 0;
178 	int rc;
179 
180 	if (ddi_get_driver(dip) == NULL)
181 		return (0);
182 
183 	if (DEVI(dip)->devi_pm_flags & (PMC_NEEDS_SR|PMC_PARENTAL_SR))
184 		return (1);
185 	if (DEVI(dip)->devi_pm_flags & PMC_NO_SR)
186 		return (0);
187 
188 	/*
189 	 * now the general case
190 	 */
191 	rc = ddi_getlongprop(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, "reg",
192 		(caddr_t)&regbuf, &length);
193 	ASSERT(rc != DDI_PROP_NO_MEMORY);
194 	if (rc != DDI_PROP_SUCCESS) {
195 		return (0);
196 	} else {
197 		if ((length > 0) && (regbuf != NULL))
198 			kmem_free(regbuf, length);
199 		return (1);
200 	}
201 }
202 
203 static int
204 dr_is_unsafe_major(major_t major)
205 {
206 	char    *dname, **cpp;
207 	int	i, ndevs;
208 
209 	if ((dname = ddi_major_to_name(major)) == NULL) {
210 		PR_QR("dr_is_unsafe_major: invalid major # %d\n", major);
211 		return (0);
212 	}
213 
214 	ndevs = dr_unsafe_devs.ndevs;
215 	for (i = 0, cpp = dr_unsafe_devs.devnames; i < ndevs; i++) {
216 		if (strcmp(dname, *cpp++) == 0)
217 			return (1);
218 	}
219 	return (0);
220 }
221 
222 static int
223 dr_bypass_device(char *dname)
224 {
225 	int i;
226 	char **lname;
227 	/* check the bypass list */
228 	for (i = 0, lname = &dr_bypass_list[i]; **lname != '\0'; lname++) {
229 		if (strcmp(dname, dr_bypass_list[i++]) == 0)
230 			return (1);
231 	}
232 	return (0);
233 }
234 
235 static int
236 dr_resolve_devname(dev_info_t *dip, char *buffer, char *alias)
237 {
238 	major_t	devmajor;
239 	char	*aka, *name;
240 
241 	*buffer = *alias = 0;
242 
243 	if (dip == NULL)
244 		return (-1);
245 
246 	if ((name = ddi_get_name(dip)) == NULL)
247 		name = "<null name>";
248 
249 	aka = name;
250 
251 	if ((devmajor = ddi_name_to_major(aka)) != -1)
252 		aka = ddi_major_to_name(devmajor);
253 
254 	strcpy(buffer, name);
255 
256 	if (strcmp(name, aka))
257 		strcpy(alias, aka);
258 	else
259 		*alias = 0;
260 
261 	return (0);
262 }
263 
264 struct dr_ref {
265 	int		*refcount;
266 	uint64_t	*arr;
267 	int		*idx;
268 	int		len;
269 };
270 
271 /* ARGSUSED */
272 static int
273 dr_check_dip(dev_info_t *dip, void *arg, uint_t ref)
274 {
275 	major_t		major;
276 	char		*dname;
277 	struct dr_ref	*rp = (struct dr_ref *)arg;
278 
279 	if (dip == NULL)
280 		return (DDI_WALK_CONTINUE);
281 
282 	if (!dr_is_real_device(dip))
283 		return (DDI_WALK_CONTINUE);
284 
285 	dname = ddi_binding_name(dip);
286 
287 	if (dr_bypass_device(dname))
288 		return (DDI_WALK_CONTINUE);
289 
290 	if (dname && ((major = ddi_name_to_major(dname)) != (major_t)-1)) {
291 		if (ref && rp->refcount) {
292 			*rp->refcount += ref;
293 			PR_QR("\n  %s (major# %d) is referenced(%u)\n",
294 				dname, major, ref);
295 		}
296 		if (dr_is_unsafe_major(major) && i_ddi_devi_attached(dip)) {
297 			PR_QR("\n  %s (major# %d) not hotpluggable\n",
298 				dname, major);
299 			if (rp->arr != NULL && rp->idx != NULL)
300 				*rp->idx = dr_add_int(rp->arr, *rp->idx,
301 					rp->len, (uint64_t)major);
302 		}
303 	}
304 	return (DDI_WALK_CONTINUE);
305 }
306 
307 static int
308 dr_check_unsafe_major(dev_info_t *dip, void *arg)
309 {
310 	return (dr_check_dip(dip, arg, 0));
311 }
312 
313 
314 /*ARGSUSED*/
315 void
316 dr_check_devices(dev_info_t *dip, int *refcount, dr_handle_t *handle,
317     uint64_t *arr, int *idx, int len)
318 {
319 	struct dr_ref bref = {0};
320 
321 	if (dip == NULL)
322 		return;
323 
324 	bref.refcount = refcount;
325 	bref.arr = arr;
326 	bref.idx = idx;
327 	bref.len = len;
328 
329 	ASSERT(e_ddi_branch_held(dip));
330 	(void) e_ddi_branch_referenced(dip, dr_check_dip, &bref);
331 }
332 
333 /*
334  * The "dip" argument's parent (if it exists) must be held busy.
335  */
336 static int
337 dr_suspend_devices(dev_info_t *dip, dr_sr_handle_t *srh)
338 {
339 	dr_handle_t	*handle;
340 	major_t		major;
341 	char		*dname;
342 	int		circ;
343 
344 	/*
345 	 * If dip is the root node, it has no siblings and it is
346 	 * always held. If dip is not the root node, dr_suspend_devices()
347 	 * will be invoked with the parent held busy.
348 	 */
349 	for (; dip != NULL; dip = ddi_get_next_sibling(dip)) {
350 		char	d_name[40], d_alias[40], *d_info;
351 
352 		ndi_devi_enter(dip, &circ);
353 		if (dr_suspend_devices(ddi_get_child(dip), srh)) {
354 			ndi_devi_exit(dip, circ);
355 			return (ENXIO);
356 		}
357 		ndi_devi_exit(dip, circ);
358 
359 		if (!dr_is_real_device(dip))
360 			continue;
361 
362 		major = (major_t)-1;
363 		if ((dname = ddi_binding_name(dip)) != NULL)
364 			major = ddi_name_to_major(dname);
365 
366 		if (dr_bypass_device(dname)) {
367 			PR_QR(" bypassed suspend of %s (major# %d)\n", dname,
368 				major);
369 			continue;
370 		}
371 
372 		if (drmach_verify_sr(dip, 1)) {
373 			PR_QR(" bypassed suspend of %s (major# %d)\n", dname,
374 				major);
375 			continue;
376 		}
377 
378 		if ((d_info = ddi_get_name_addr(dip)) == NULL)
379 			d_info = "<null>";
380 
381 		d_name[0] = 0;
382 		if (dr_resolve_devname(dip, d_name, d_alias) == 0) {
383 			if (d_alias[0] != 0) {
384 				prom_printf("\tsuspending %s@%s (aka %s)\n",
385 					d_name, d_info, d_alias);
386 			} else {
387 				prom_printf("\tsuspending %s@%s\n",
388 					d_name, d_info);
389 			}
390 		} else {
391 			prom_printf("\tsuspending %s@%s\n", dname, d_info);
392 		}
393 
394 		if (devi_detach(dip, DDI_SUSPEND) != DDI_SUCCESS) {
395 			prom_printf("\tFAILED to suspend %s@%s\n",
396 				d_name[0] ? d_name : dname, d_info);
397 
398 			srh->sr_err_idx = dr_add_int(srh->sr_err_ints,
399 				srh->sr_err_idx, DR_MAX_ERR_INT,
400 				(uint64_t)major);
401 
402 			ndi_hold_devi(dip);
403 			srh->sr_failed_dip = dip;
404 
405 			handle = srh->sr_dr_handlep;
406 			dr_op_err(CE_IGNORE, handle, ESBD_SUSPEND, "%s@%s",
407 				d_name[0] ? d_name : dname, d_info);
408 
409 			return (DDI_FAILURE);
410 		}
411 	}
412 
413 	return (DDI_SUCCESS);
414 }
415 
416 static void
417 dr_resume_devices(dev_info_t *start, dr_sr_handle_t *srh)
418 {
419 	dr_handle_t	*handle;
420 	dev_info_t	*dip, *next, *last = NULL;
421 	major_t		major;
422 	char		*bn;
423 	int		circ;
424 
425 	major = (major_t)-1;
426 
427 	/* attach in reverse device tree order */
428 	while (last != start) {
429 		dip = start;
430 		next = ddi_get_next_sibling(dip);
431 		while (next != last && dip != srh->sr_failed_dip) {
432 			dip = next;
433 			next = ddi_get_next_sibling(dip);
434 		}
435 		if (dip == srh->sr_failed_dip) {
436 			/* release hold acquired in dr_suspend_devices() */
437 			srh->sr_failed_dip = NULL;
438 			ndi_rele_devi(dip);
439 		} else if (dr_is_real_device(dip) &&
440 				srh->sr_failed_dip == NULL) {
441 
442 			if ((bn = ddi_binding_name(dip)) != NULL) {
443 				major = ddi_name_to_major(bn);
444 			} else {
445 				bn = "<null>";
446 			}
447 			if (!dr_bypass_device(bn) &&
448 				!drmach_verify_sr(dip, 0)) {
449 				char	d_name[40], d_alias[40], *d_info;
450 
451 				d_name[0] = 0;
452 				d_info = ddi_get_name_addr(dip);
453 				if (d_info == NULL)
454 					d_info = "<null>";
455 
456 				if (!dr_resolve_devname(dip, d_name,
457 								d_alias)) {
458 					if (d_alias[0] != 0) {
459 						prom_printf("\tresuming "
460 							"%s@%s (aka %s)\n",
461 							d_name, d_info,
462 							d_alias);
463 					} else {
464 						prom_printf("\tresuming "
465 							"%s@%s\n",
466 							d_name, d_info);
467 					}
468 				} else {
469 					prom_printf("\tresuming %s@%s\n",
470 						bn, d_info);
471 				}
472 
473 				if (devi_attach(dip, DDI_RESUME) !=
474 							DDI_SUCCESS) {
475 					/*
476 					 * Print a console warning,
477 					 * set an e_code of ESBD_RESUME,
478 					 * and save the driver major
479 					 * number in the e_rsc.
480 					 */
481 					prom_printf("\tFAILED to resume %s@%s",
482 					    d_name[0] ? d_name : bn, d_info);
483 
484 					srh->sr_err_idx =
485 						dr_add_int(srh->sr_err_ints,
486 						srh->sr_err_idx, DR_MAX_ERR_INT,
487 						(uint64_t)major);
488 
489 					handle = srh->sr_dr_handlep;
490 
491 					dr_op_err(CE_IGNORE, handle,
492 					    ESBD_RESUME, "%s@%s",
493 					    d_name[0] ? d_name : bn, d_info);
494 				}
495 			}
496 		}
497 
498 		/* Hold parent busy while walking its children */
499 		ndi_devi_enter(dip, &circ);
500 		dr_resume_devices(ddi_get_child(dip), srh);
501 		ndi_devi_exit(dip, circ);
502 		last = dip;
503 	}
504 }
505 
506 /*
507  * True if thread is virtually stopped.  Similar to CPR_VSTOPPED
508  * but from DR point of view.  These user threads are waiting in
509  * the kernel.  Once they complete in the kernel, they will process
510  * the stop signal and stop.
511  */
512 #define	DR_VSTOPPED(t)			\
513 	((t)->t_state == TS_SLEEP &&	\
514 	(t)->t_wchan != NULL &&		\
515 	(t)->t_astflag &&		\
516 	((t)->t_proc_flag & TP_CHKPT))
517 
518 /* ARGSUSED */
519 static int
520 dr_stop_user_threads(dr_sr_handle_t *srh)
521 {
522 	int		count;
523 	int		bailout;
524 	dr_handle_t	*handle = srh->sr_dr_handlep;
525 	static fn_t	f = "dr_stop_user_threads";
526 	kthread_id_t 	tp;
527 
528 	extern void add_one_utstop();
529 	extern void utstop_timedwait(clock_t);
530 	extern void utstop_init(void);
531 
532 #define	DR_UTSTOP_RETRY	4
533 #define	DR_UTSTOP_WAIT	hz
534 
535 	if (dr_skip_user_threads)
536 		return (DDI_SUCCESS);
537 
538 	utstop_init();
539 
540 	/* we need to try a few times to get past fork, etc. */
541 	srh->sr_err_idx = 0;
542 	for (count = 0; count < DR_UTSTOP_RETRY; count++) {
543 		/* walk the entire threadlist */
544 		mutex_enter(&pidlock);
545 		for (tp = curthread->t_next; tp != curthread; tp = tp->t_next) {
546 			proc_t *p = ttoproc(tp);
547 
548 			/* handle kernel threads separately */
549 			if (p->p_as == &kas || p->p_stat == SZOMB)
550 				continue;
551 
552 			mutex_enter(&p->p_lock);
553 			thread_lock(tp);
554 
555 			if (tp->t_state == TS_STOPPED) {
556 				/* add another reason to stop this thread */
557 				tp->t_schedflag &= ~TS_RESUME;
558 			} else {
559 				tp->t_proc_flag |= TP_CHKPT;
560 
561 				thread_unlock(tp);
562 				mutex_exit(&p->p_lock);
563 				add_one_utstop();
564 				mutex_enter(&p->p_lock);
565 				thread_lock(tp);
566 
567 				aston(tp);
568 
569 				if (tp->t_state == TS_SLEEP &&
570 				    (tp->t_flag & T_WAKEABLE)) {
571 					setrun_locked(tp);
572 				}
573 
574 			}
575 
576 			/* grab thread if needed */
577 			if (tp->t_state == TS_ONPROC && tp->t_cpu != CPU)
578 				poke_cpu(tp->t_cpu->cpu_id);
579 
580 
581 			thread_unlock(tp);
582 			mutex_exit(&p->p_lock);
583 		}
584 		mutex_exit(&pidlock);
585 
586 
587 		/* let everything catch up */
588 		utstop_timedwait(count * count * DR_UTSTOP_WAIT);
589 
590 
591 		/* now, walk the threadlist again to see if we are done */
592 		mutex_enter(&pidlock);
593 		for (tp = curthread->t_next, bailout = 0;
594 		    tp != curthread; tp = tp->t_next) {
595 			proc_t *p = ttoproc(tp);
596 
597 			/* handle kernel threads separately */
598 			if (p->p_as == &kas || p->p_stat == SZOMB)
599 				continue;
600 
601 			/*
602 			 * If this thread didn't stop, and we don't allow
603 			 * unstopped blocked threads, bail.
604 			 */
605 			thread_lock(tp);
606 			if (!CPR_ISTOPPED(tp) &&
607 			    !(dr_allow_blocked_threads &&
608 			    DR_VSTOPPED(tp))) {
609 				bailout = 1;
610 				if (count == DR_UTSTOP_RETRY - 1) {
611 					/*
612 					 * save the pid for later reporting
613 					 */
614 					srh->sr_err_idx =
615 					    dr_add_int(srh->sr_err_ints,
616 					    srh->sr_err_idx, DR_MAX_ERR_INT,
617 					    (uint64_t)p->p_pid);
618 
619 					cmn_err(CE_WARN, "%s: "
620 					    "failed to stop thread: "
621 					    "process=%s, pid=%d",
622 					    f, p->p_user.u_psargs, p->p_pid);
623 
624 					PR_QR("%s: failed to stop thread: "
625 					    "process=%s, pid=%d, t_id=0x%p, "
626 					    "t_state=0x%x, t_proc_flag=0x%x, "
627 					    "t_schedflag=0x%x\n",
628 					    f, p->p_user.u_psargs, p->p_pid,
629 					    tp, tp->t_state, tp->t_proc_flag,
630 					    tp->t_schedflag);
631 				}
632 
633 			}
634 			thread_unlock(tp);
635 		}
636 		mutex_exit(&pidlock);
637 
638 		/* were all the threads stopped? */
639 		if (!bailout)
640 			break;
641 	}
642 
643 	/* were we unable to stop all threads after a few tries? */
644 	if (bailout) {
645 		handle->h_err = drerr_int(ESBD_UTHREAD, srh->sr_err_ints,
646 			srh->sr_err_idx, 0);
647 		return (ESRCH);
648 	}
649 
650 	return (DDI_SUCCESS);
651 }
652 
653 static void
654 dr_start_user_threads(void)
655 {
656 	kthread_id_t tp;
657 
658 	mutex_enter(&pidlock);
659 
660 	/* walk all threads and release them */
661 	for (tp = curthread->t_next; tp != curthread; tp = tp->t_next) {
662 		proc_t *p = ttoproc(tp);
663 
664 		/* skip kernel threads */
665 		if (ttoproc(tp)->p_as == &kas)
666 			continue;
667 
668 		mutex_enter(&p->p_lock);
669 		tp->t_proc_flag &= ~TP_CHKPT;
670 		mutex_exit(&p->p_lock);
671 
672 		thread_lock(tp);
673 		if (CPR_ISTOPPED(tp)) {
674 			/* back on the runq */
675 			tp->t_schedflag |= TS_RESUME;
676 			setrun_locked(tp);
677 		}
678 		thread_unlock(tp);
679 	}
680 
681 	mutex_exit(&pidlock);
682 }
683 
684 static void
685 dr_signal_user(int sig)
686 {
687 	struct proc *p;
688 
689 	mutex_enter(&pidlock);
690 
691 	for (p = practive; p != NULL; p = p->p_next) {
692 		/* only user threads */
693 		if (p->p_exec == NULL || p->p_stat == SZOMB ||
694 		    p == proc_init || p == ttoproc(curthread))
695 			continue;
696 
697 		mutex_enter(&p->p_lock);
698 		sigtoproc(p, NULL, sig);
699 		mutex_exit(&p->p_lock);
700 	}
701 
702 	mutex_exit(&pidlock);
703 
704 	/* add a bit of delay */
705 	delay(hz);
706 }
707 
708 void
709 dr_resume(dr_sr_handle_t *srh)
710 {
711 	dr_handle_t	*handle;
712 
713 	handle = srh->sr_dr_handlep;
714 
715 	if (srh->sr_suspend_state < DR_SRSTATE_FULL) {
716 		/*
717 		 * Update the signature block.
718 		 * If cpus are not paused, this can be done now.
719 		 * See comments below.
720 		 */
721 		CPU_SIGNATURE(OS_SIG, SIGST_RESUME_INPROGRESS, SIGSUBST_NULL,
722 		    CPU->cpu_id);
723 	}
724 
725 	switch (srh->sr_suspend_state) {
726 	case DR_SRSTATE_FULL:
727 
728 		ASSERT(MUTEX_HELD(&cpu_lock));
729 
730 		/*
731 		 * Prevent false alarm in tod_validate() due to tod
732 		 * value change between suspend and resume
733 		 */
734 		mutex_enter(&tod_lock);
735 		tod_fault_reset();
736 		mutex_exit(&tod_lock);
737 
738 		dr_enable_intr(); 	/* enable intr & clock */
739 
740 		start_cpus();
741 		mutex_exit(&cpu_lock);
742 
743 		/*
744 		 * Update the signature block.
745 		 * This must not be done while cpus are paused, since on
746 		 * Starcat the cpu signature update aquires an adaptive
747 		 * mutex in the iosram driver. Blocking with cpus paused
748 		 * can lead to deadlock.
749 		 */
750 		CPU_SIGNATURE(OS_SIG, SIGST_RESUME_INPROGRESS, SIGSUBST_NULL,
751 		    CPU->cpu_id);
752 
753 		/*
754 		 * If we suspended hw watchdog at suspend,
755 		 * re-enable it now.
756 		 */
757 		if (srh->sr_flags & (SR_FLAG_WATCHDOG)) {
758 			mutex_enter(&tod_lock);
759 			tod_ops.tod_set_watchdog_timer(
760 				watchdog_timeout_seconds);
761 			mutex_exit(&tod_lock);
762 		}
763 
764 		/*
765 		 * This should only be called if drmach_suspend_last()
766 		 * was called and state transitioned to DR_SRSTATE_FULL
767 		 * to prevent resume attempts on device instances that
768 		 * were not previously suspended.
769 		 */
770 		drmach_resume_first();
771 
772 		/* FALLTHROUGH */
773 
774 	case DR_SRSTATE_DRIVER:
775 		/*
776 		 * resume drivers
777 		 */
778 		srh->sr_err_idx = 0;
779 
780 		/* no parent dip to hold busy */
781 		dr_resume_devices(ddi_root_node(), srh);
782 
783 		if (srh->sr_err_idx && srh->sr_dr_handlep) {
784 			(srh->sr_dr_handlep)->h_err = drerr_int(ESBD_RESUME,
785 				srh->sr_err_ints, srh->sr_err_idx, 1);
786 		}
787 
788 		/*
789 		 * resume the lock manager
790 		 */
791 		lm_cprresume();
792 
793 		/* FALLTHROUGH */
794 
795 	case DR_SRSTATE_USER:
796 		/*
797 		 * finally, resume user threads
798 		 */
799 		if (!dr_skip_user_threads) {
800 			prom_printf("DR: resuming user threads...\n");
801 			dr_start_user_threads();
802 		}
803 		/* FALLTHROUGH */
804 
805 	case DR_SRSTATE_BEGIN:
806 	default:
807 		/*
808 		 * let those who care know that we've just resumed
809 		 */
810 		PR_QR("sending SIGTHAW...\n");
811 		dr_signal_user(SIGTHAW);
812 		break;
813 	}
814 
815 	i_ndi_allow_device_tree_changes(handle->h_ndi);
816 
817 	/*
818 	 * update the signature block
819 	 */
820 	CPU_SIGNATURE(OS_SIG, SIGST_RUN, SIGSUBST_NULL, CPU->cpu_id);
821 
822 	prom_printf("DR: resume COMPLETED\n");
823 }
824 
825 int
826 dr_suspend(dr_sr_handle_t *srh)
827 {
828 	dr_handle_t	*handle;
829 	int		force;
830 	int		dev_errs_idx;
831 	uint64_t	dev_errs[DR_MAX_ERR_INT];
832 	int		rc = DDI_SUCCESS;
833 
834 	handle = srh->sr_dr_handlep;
835 
836 	force = dr_cmd_flags(handle) & SBD_FLAG_FORCE;
837 
838 	/*
839 	 * update the signature block
840 	 */
841 	CPU_SIGNATURE(OS_SIG, SIGST_QUIESCE_INPROGRESS, SIGSUBST_NULL,
842 	    CPU->cpu_id);
843 
844 	i_ndi_block_device_tree_changes(&handle->h_ndi);
845 
846 	prom_printf("\nDR: suspending user threads...\n");
847 	srh->sr_suspend_state = DR_SRSTATE_USER;
848 	if (((rc = dr_stop_user_threads(srh)) != DDI_SUCCESS) &&
849 	    dr_check_user_stop_result) {
850 		dr_resume(srh);
851 		return (rc);
852 	}
853 
854 	if (!force) {
855 		struct dr_ref drc = {0};
856 
857 		prom_printf("\nDR: checking devices...\n");
858 		dev_errs_idx = 0;
859 
860 		drc.arr = dev_errs;
861 		drc.idx = &dev_errs_idx;
862 		drc.len = DR_MAX_ERR_INT;
863 
864 		/*
865 		 * Since the root node can never go away, it
866 		 * doesn't have to be held.
867 		 */
868 		ddi_walk_devs(ddi_root_node(), dr_check_unsafe_major, &drc);
869 		if (dev_errs_idx) {
870 			handle->h_err = drerr_int(ESBD_UNSAFE, dev_errs,
871 				dev_errs_idx, 1);
872 			dr_resume(srh);
873 			return (DDI_FAILURE);
874 		}
875 		PR_QR("done\n");
876 	} else {
877 		prom_printf("\nDR: dr_suspend invoked with force flag\n");
878 	}
879 
880 #ifndef	SKIP_SYNC
881 	/*
882 	 * This sync swap out all user pages
883 	 */
884 	vfs_sync(SYNC_ALL);
885 #endif
886 
887 	/*
888 	 * special treatment for lock manager
889 	 */
890 	lm_cprsuspend();
891 
892 #ifndef	SKIP_SYNC
893 	/*
894 	 * sync the file system in case we never make it back
895 	 */
896 	sync();
897 #endif
898 
899 	/*
900 	 * now suspend drivers
901 	 */
902 	prom_printf("DR: suspending drivers...\n");
903 	srh->sr_suspend_state = DR_SRSTATE_DRIVER;
904 	srh->sr_err_idx = 0;
905 	/* No parent to hold busy */
906 	if ((rc = dr_suspend_devices(ddi_root_node(), srh)) != DDI_SUCCESS) {
907 		if (srh->sr_err_idx && srh->sr_dr_handlep) {
908 			(srh->sr_dr_handlep)->h_err = drerr_int(ESBD_SUSPEND,
909 				srh->sr_err_ints, srh->sr_err_idx, 1);
910 		}
911 		dr_resume(srh);
912 		return (rc);
913 	}
914 
915 	drmach_suspend_last();
916 
917 	/*
918 	 * finally, grab all cpus
919 	 */
920 	srh->sr_suspend_state = DR_SRSTATE_FULL;
921 
922 	/*
923 	 * if watchdog was activated, disable it
924 	 */
925 	if (watchdog_activated) {
926 		mutex_enter(&tod_lock);
927 		tod_ops.tod_clear_watchdog_timer();
928 		mutex_exit(&tod_lock);
929 		srh->sr_flags |= SR_FLAG_WATCHDOG;
930 	} else {
931 		srh->sr_flags &= ~(SR_FLAG_WATCHDOG);
932 	}
933 
934 	/*
935 	 * Update the signature block.
936 	 * This must be done before cpus are paused, since on Starcat the
937 	 * cpu signature update aquires an adaptive mutex in the iosram driver.
938 	 * Blocking with cpus paused can lead to deadlock.
939 	 */
940 	CPU_SIGNATURE(OS_SIG, SIGST_QUIESCED, SIGSUBST_NULL, CPU->cpu_id);
941 
942 	mutex_enter(&cpu_lock);
943 	pause_cpus(NULL);
944 	dr_stop_intr();
945 
946 	return (rc);
947 }
948 
949 int
950 dr_pt_test_suspend(dr_handle_t *hp)
951 {
952 	dr_sr_handle_t *srh;
953 	int		err;
954 	uint_t		psmerr;
955 	static fn_t	f = "dr_pt_test_suspend";
956 
957 	PR_QR("%s...\n", f);
958 
959 	srh = dr_get_sr_handle(hp);
960 	if ((err = dr_suspend(srh)) == DDI_SUCCESS) {
961 		dr_resume(srh);
962 		if ((hp->h_err) && ((psmerr = hp->h_err->e_code) != 0)) {
963 			PR_QR("%s: error on dr_resume()", f);
964 			switch (psmerr) {
965 			case ESBD_RESUME:
966 				PR_QR("Couldn't resume devices: %s\n",
967 					DR_GET_E_RSC(hp->h_err));
968 				break;
969 
970 			case ESBD_KTHREAD:
971 				PR_ALL("psmerr is ESBD_KTHREAD\n");
972 				break;
973 			default:
974 				PR_ALL("Resume error unknown = %d\n",
975 					psmerr);
976 				break;
977 			}
978 		}
979 	} else {
980 		PR_ALL("%s: dr_suspend() failed, err = 0x%x\n",
981 			f, err);
982 		psmerr = hp->h_err ? hp->h_err->e_code : ESBD_NOERROR;
983 		switch (psmerr) {
984 		case ESBD_UNSAFE:
985 			PR_ALL("Unsafe devices (major #): %s\n",
986 				DR_GET_E_RSC(hp->h_err));
987 			break;
988 
989 		case ESBD_RTTHREAD:
990 			PR_ALL("RT threads (PIDs): %s\n",
991 				DR_GET_E_RSC(hp->h_err));
992 			break;
993 
994 		case ESBD_UTHREAD:
995 			PR_ALL("User threads (PIDs): %s\n",
996 				DR_GET_E_RSC(hp->h_err));
997 			break;
998 
999 		case ESBD_SUSPEND:
1000 			PR_ALL("Non-suspendable devices (major #): %s\n",
1001 				DR_GET_E_RSC(hp->h_err));
1002 			break;
1003 
1004 		case ESBD_RESUME:
1005 			PR_ALL("Could not resume devices (major #): %s\n",
1006 				DR_GET_E_RSC(hp->h_err));
1007 			break;
1008 
1009 		case ESBD_KTHREAD:
1010 			PR_ALL("psmerr is ESBD_KTHREAD\n");
1011 			break;
1012 
1013 		case ESBD_NOERROR:
1014 			PR_ALL("sbd_error_t error code not set\n");
1015 			break;
1016 
1017 		default:
1018 			PR_ALL("Unknown error psmerr = %d\n", psmerr);
1019 			break;
1020 		}
1021 	}
1022 	dr_release_sr_handle(srh);
1023 
1024 	return (0);
1025 }
1026 
1027 /*
1028  * Add a new integer value to the end of an array.  Don't allow duplicates to
1029  * appear in the array, and don't allow the array to overflow.  Return the new
1030  * total number of entries in the array.
1031  */
1032 static int
1033 dr_add_int(uint64_t *arr, int idx, int len, uint64_t val)
1034 {
1035 	int i;
1036 
1037 	if (arr == NULL)
1038 		return (0);
1039 
1040 	if (idx >= len)
1041 		return (idx);
1042 
1043 	for (i = 0; i < idx; i++) {
1044 		if (arr[i] == val)
1045 			return (idx);
1046 	}
1047 
1048 	arr[idx++] = val;
1049 
1050 	return (idx);
1051 }
1052 
1053 /*
1054  * Construct an sbd_error_t featuring a string representation of an array of
1055  * integers as its e_rsc.
1056  */
1057 static sbd_error_t *
1058 drerr_int(int e_code, uint64_t *arr, int idx, int majors)
1059 {
1060 	int		i, n, buf_len, buf_idx, buf_avail;
1061 	char		*dname;
1062 	char		*buf;
1063 	sbd_error_t	*new_sbd_err;
1064 	static char	s_ellipsis[] = "...";
1065 
1066 	if (arr == NULL || idx <= 0)
1067 		return (NULL);
1068 
1069 	/* MAXPATHLEN is the size of the e_rsc field in sbd_error_t. */
1070 	buf = (char *)kmem_zalloc(MAXPATHLEN, KM_SLEEP);
1071 
1072 	/*
1073 	 * This is the total working area of the buffer.  It must be computed
1074 	 * as the size of 'buf', minus reserved space for the null terminator
1075 	 * and the ellipsis string.
1076 	 */
1077 	buf_len = MAXPATHLEN - (strlen(s_ellipsis) + 1);
1078 
1079 	/* Construct a string representation of the array values */
1080 	for (buf_idx = 0, i = 0; i < idx; i++) {
1081 		buf_avail = buf_len - buf_idx;
1082 		if (majors) {
1083 			dname = ddi_major_to_name(arr[i]);
1084 			if (dname) {
1085 				n = snprintf(&buf[buf_idx], buf_avail,
1086 					"%s, ", dname);
1087 			} else {
1088 				n = snprintf(&buf[buf_idx], buf_avail,
1089 					"major %lu, ", arr[i]);
1090 			}
1091 		} else {
1092 			n = snprintf(&buf[buf_idx], buf_avail, "%lu, ",
1093 				arr[i]);
1094 		}
1095 
1096 		/* An ellipsis gets appended when no more values fit */
1097 		if (n >= buf_avail) {
1098 			(void) strcpy(&buf[buf_idx], s_ellipsis);
1099 			break;
1100 		}
1101 
1102 		buf_idx += n;
1103 	}
1104 
1105 	/* If all the contents fit, remove the trailing comma */
1106 	if (n < buf_avail) {
1107 		buf[--buf_idx] = '\0';
1108 		buf[--buf_idx] = '\0';
1109 	}
1110 
1111 	/* Return an sbd_error_t with the buffer and e_code */
1112 	new_sbd_err = drerr_new(1, e_code, buf);
1113 	kmem_free(buf, MAXPATHLEN);
1114 	return (new_sbd_err);
1115 }
1116