xref: /illumos-gate/usr/src/uts/sun4u/ngdr/io/dr_quiesce.c (revision 533affcbc7fc4d0c8132976ea454aaa715fe2307)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24  */
25 
26 /*
27  * Copyright 2023 Oxide Computer Company
28  */
29 
30 /*
31  * A CPR derivative specifically for starfire/starcat
32  */
33 
34 #include <sys/types.h>
35 #include <sys/systm.h>
36 #include <sys/machparam.h>
37 #include <sys/machsystm.h>
38 #include <sys/ddi.h>
39 #define	SUNDDI_IMPL
40 #include <sys/sunddi.h>
41 #include <sys/sunndi.h>
42 #include <sys/devctl.h>
43 #include <sys/time.h>
44 #include <sys/kmem.h>
45 #include <nfs/lm.h>
46 #include <sys/ddi_impldefs.h>
47 #include <sys/ndi_impldefs.h>
48 #include <sys/obpdefs.h>
49 #include <sys/cmn_err.h>
50 #include <sys/debug.h>
51 #include <sys/errno.h>
52 #include <sys/callb.h>
53 #include <sys/clock.h>
54 #include <sys/x_call.h>
55 #include <sys/cpuvar.h>
56 #include <sys/epm.h>
57 #include <sys/vfs.h>
58 
59 #include <sys/cpu_sgnblk_defs.h>
60 #include <sys/dr.h>
61 #include <sys/dr_util.h>
62 
63 #include <sys/promif.h>
64 #include <sys/conf.h>
65 #include <sys/cyclic.h>
66 
67 extern void	e_ddi_enter_driver_list(struct devnames *dnp, int *listcnt);
68 extern void	e_ddi_exit_driver_list(struct devnames *dnp, int listcnt);
69 extern int	is_pseudo_device(dev_info_t *dip);
70 
71 extern kmutex_t	cpu_lock;
72 extern dr_unsafe_devs_t dr_unsafe_devs;
73 
74 static int		dr_is_real_device(dev_info_t *dip);
75 static int		dr_is_unsafe_major(major_t major);
76 static int		dr_bypass_device(char *dname);
77 static int		dr_check_dip(dev_info_t *dip, void *arg, uint_t ref);
78 static int		dr_resolve_devname(dev_info_t *dip, char *buffer,
79 				char *alias);
80 static sbd_error_t	*drerr_int(int e_code, uint64_t *arr, int idx,
81 				int majors);
82 static int		dr_add_int(uint64_t *arr, int idx, int len,
83 				uint64_t val);
84 
85 int dr_pt_test_suspend(dr_handle_t *hp);
86 
87 /*
88  * dr_quiesce.c interface
89  * NOTE: states used internally by dr_suspend and dr_resume
90  */
91 typedef enum dr_suspend_state {
92 	DR_SRSTATE_BEGIN = 0,
93 	DR_SRSTATE_USER,
94 	DR_SRSTATE_DRIVER,
95 	DR_SRSTATE_FULL
96 } suspend_state_t;
97 
98 struct dr_sr_handle {
99 	dr_handle_t		*sr_dr_handlep;
100 	dev_info_t		*sr_failed_dip;
101 	suspend_state_t		sr_suspend_state;
102 	uint_t			sr_flags;
103 	uint64_t		sr_err_ints[DR_MAX_ERR_INT];
104 	int			sr_err_idx;
105 };
106 
107 #define	SR_FLAG_WATCHDOG	0x1
108 
109 /*
110  * XXX
111  * This hack will go away before RTI.  Just for testing.
112  * List of drivers to bypass when performing a suspend.
113  */
114 static char *dr_bypass_list[] = {
115 	""
116 };
117 
118 
119 #define		SKIP_SYNC	/* bypass sync ops in dr_suspend */
120 
121 /*
122  * dr_skip_user_threads is used to control if user threads should
123  * be suspended.  If dr_skip_user_threads is true, the rest of the
124  * flags are not used; if it is false, dr_check_user_stop_result
125  * will be used to control whether or not we need to check suspend
126  * result, and dr_allow_blocked_threads will be used to control
127  * whether or not we allow suspend to continue if there are blocked
128  * threads.  We allow all combinations of dr_check_user_stop_result
129  * and dr_allow_block_threads, even though it might not make much
130  * sense to not allow block threads when we don't even check stop
131  * result.
132  */
133 static int	dr_skip_user_threads = 0;	/* default to FALSE */
134 static int	dr_check_user_stop_result = 1;	/* default to TRUE */
135 static int	dr_allow_blocked_threads = 1;	/* default to TRUE */
136 
137 #define	DR_CPU_LOOP_MSEC	1000
138 
139 static void
140 dr_stop_intr(void)
141 {
142 	ASSERT(MUTEX_HELD(&cpu_lock));
143 
144 	kpreempt_disable();
145 	cyclic_suspend();
146 }
147 
148 static void
149 dr_enable_intr(void)
150 {
151 	ASSERT(MUTEX_HELD(&cpu_lock));
152 
153 	cyclic_resume();
154 	kpreempt_enable();
155 }
156 
157 dr_sr_handle_t *
158 dr_get_sr_handle(dr_handle_t *hp)
159 {
160 	dr_sr_handle_t *srh;
161 
162 	srh = GETSTRUCT(dr_sr_handle_t, 1);
163 	srh->sr_dr_handlep = hp;
164 
165 	return (srh);
166 }
167 
168 void
169 dr_release_sr_handle(dr_sr_handle_t *srh)
170 {
171 	ASSERT(srh->sr_failed_dip == NULL);
172 	FREESTRUCT(srh, dr_sr_handle_t, 1);
173 }
174 
175 static int
176 dr_is_real_device(dev_info_t *dip)
177 {
178 	struct regspec *regbuf = NULL;
179 	int length = 0;
180 	int rc;
181 
182 	if (ddi_get_driver(dip) == NULL)
183 		return (0);
184 
185 	if (DEVI(dip)->devi_pm_flags & (PMC_NEEDS_SR|PMC_PARENTAL_SR))
186 		return (1);
187 	if (DEVI(dip)->devi_pm_flags & PMC_NO_SR)
188 		return (0);
189 
190 	/*
191 	 * now the general case
192 	 */
193 	rc = ddi_getlongprop(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, "reg",
194 	    (caddr_t)&regbuf, &length);
195 	ASSERT(rc != DDI_PROP_NO_MEMORY);
196 	if (rc != DDI_PROP_SUCCESS) {
197 		return (0);
198 	} else {
199 		if ((length > 0) && (regbuf != NULL))
200 			kmem_free(regbuf, length);
201 		return (1);
202 	}
203 }
204 
205 static int
206 dr_is_unsafe_major(major_t major)
207 {
208 	char    *dname, **cpp;
209 	int	i, ndevs;
210 
211 	if ((dname = ddi_major_to_name(major)) == NULL) {
212 		PR_QR("dr_is_unsafe_major: invalid major # %d\n", major);
213 		return (0);
214 	}
215 
216 	ndevs = dr_unsafe_devs.ndevs;
217 	for (i = 0, cpp = dr_unsafe_devs.devnames; i < ndevs; i++) {
218 		if (strcmp(dname, *cpp++) == 0)
219 			return (1);
220 	}
221 	return (0);
222 }
223 
224 static int
225 dr_bypass_device(char *dname)
226 {
227 	int i;
228 	char **lname;
229 
230 	if (dname == NULL)
231 		return (0);
232 
233 	/* check the bypass list */
234 	for (i = 0, lname = &dr_bypass_list[i]; **lname != '\0'; lname++) {
235 		if (strcmp(dname, dr_bypass_list[i++]) == 0)
236 			return (1);
237 	}
238 	return (0);
239 }
240 
241 static int
242 dr_resolve_devname(dev_info_t *dip, char *buffer, char *alias)
243 {
244 	major_t	devmajor;
245 	char	*aka, *name;
246 
247 	*buffer = *alias = 0;
248 
249 	if (dip == NULL)
250 		return (-1);
251 
252 	if ((name = ddi_get_name(dip)) == NULL)
253 		name = "<null name>";
254 
255 	aka = name;
256 
257 	if ((devmajor = ddi_name_to_major(aka)) != -1)
258 		aka = ddi_major_to_name(devmajor);
259 
260 	(void) strcpy(buffer, name);
261 
262 	if (strcmp(name, aka))
263 		(void) strcpy(alias, aka);
264 	else
265 		*alias = 0;
266 
267 	return (0);
268 }
269 
270 struct dr_ref {
271 	int		*refcount;
272 	int		*refcount_non_gldv3;
273 	uint64_t	*arr;
274 	int		*idx;
275 	int		len;
276 };
277 
278 /* ARGSUSED */
279 static int
280 dr_check_dip(dev_info_t *dip, void *arg, uint_t ref)
281 {
282 	major_t		major;
283 	char		*dname;
284 	struct dr_ref	*rp = (struct dr_ref *)arg;
285 
286 	if (dip == NULL)
287 		return (DDI_WALK_CONTINUE);
288 
289 	if (!dr_is_real_device(dip))
290 		return (DDI_WALK_CONTINUE);
291 
292 	dname = ddi_binding_name(dip);
293 
294 	if (dr_bypass_device(dname))
295 		return (DDI_WALK_CONTINUE);
296 
297 	if (dname && ((major = ddi_name_to_major(dname)) != (major_t)-1)) {
298 		if (ref && rp->refcount) {
299 			*rp->refcount += ref;
300 			PR_QR("\n  %s (major# %d) is referenced(%u)\n", dname,
301 			    major, ref);
302 		}
303 		if (ref && rp->refcount_non_gldv3) {
304 			if (NETWORK_PHYSDRV(major) && !GLDV3_DRV(major))
305 				*rp->refcount_non_gldv3 += ref;
306 		}
307 		if (dr_is_unsafe_major(major) && i_ddi_devi_attached(dip)) {
308 			PR_QR("\n  %s (major# %d) not hotpluggable\n", dname,
309 			    major);
310 			if (rp->arr != NULL && rp->idx != NULL)
311 				*rp->idx = dr_add_int(rp->arr, *rp->idx,
312 				    rp->len, (uint64_t)major);
313 		}
314 	}
315 	return (DDI_WALK_CONTINUE);
316 }
317 
318 static int
319 dr_check_unsafe_major(dev_info_t *dip, void *arg)
320 {
321 	return (dr_check_dip(dip, arg, 0));
322 }
323 
324 
325 /*ARGSUSED*/
326 void
327 dr_check_devices(dev_info_t *dip, int *refcount, dr_handle_t *handle,
328     uint64_t *arr, int *idx, int len, int *refcount_non_gldv3)
329 {
330 	struct dr_ref bref = {0};
331 
332 	if (dip == NULL)
333 		return;
334 
335 	bref.refcount = refcount;
336 	bref.refcount_non_gldv3 = refcount_non_gldv3;
337 	bref.arr = arr;
338 	bref.idx = idx;
339 	bref.len = len;
340 
341 	ASSERT(e_ddi_branch_held(dip));
342 	(void) e_ddi_branch_referenced(dip, dr_check_dip, &bref);
343 }
344 
345 /*
346  * The "dip" argument's parent (if it exists) must be held busy.
347  */
348 static int
349 dr_suspend_devices(dev_info_t *dip, dr_sr_handle_t *srh)
350 {
351 	dr_handle_t	*handle;
352 	major_t		major;
353 	char		*dname;
354 
355 	/*
356 	 * If dip is the root node, it has no siblings and it is
357 	 * always held. If dip is not the root node, dr_suspend_devices()
358 	 * will be invoked with the parent held busy.
359 	 */
360 	for (; dip != NULL; dip = ddi_get_next_sibling(dip)) {
361 		char	d_name[40], d_alias[40], *d_info;
362 
363 		ndi_devi_enter(dip);
364 		if (dr_suspend_devices(ddi_get_child(dip), srh)) {
365 			ndi_devi_exit(dip);
366 			return (ENXIO);
367 		}
368 		ndi_devi_exit(dip);
369 
370 		if (!dr_is_real_device(dip))
371 			continue;
372 
373 		major = (major_t)-1;
374 		if ((dname = ddi_binding_name(dip)) != NULL)
375 			major = ddi_name_to_major(dname);
376 
377 		if (dr_bypass_device(dname)) {
378 			PR_QR(" bypassed suspend of %s (major# %d)\n", dname,
379 			    major);
380 			continue;
381 		}
382 
383 		if (drmach_verify_sr(dip, 1)) {
384 			PR_QR(" bypassed suspend of %s (major# %d)\n", dname,
385 			    major);
386 			continue;
387 		}
388 
389 		if ((d_info = ddi_get_name_addr(dip)) == NULL)
390 			d_info = "<null>";
391 
392 		d_name[0] = 0;
393 		if (dr_resolve_devname(dip, d_name, d_alias) == 0) {
394 			if (d_alias[0] != 0) {
395 				prom_printf("\tsuspending %s@%s (aka %s)\n",
396 				    d_name, d_info, d_alias);
397 			} else {
398 				prom_printf("\tsuspending %s@%s\n", d_name,
399 				    d_info);
400 			}
401 		} else {
402 			prom_printf("\tsuspending %s@%s\n", dname, d_info);
403 		}
404 
405 		if (devi_detach(dip, DDI_SUSPEND) != DDI_SUCCESS) {
406 			prom_printf("\tFAILED to suspend %s@%s\n",
407 			    d_name[0] ? d_name : dname, d_info);
408 
409 			srh->sr_err_idx = dr_add_int(srh->sr_err_ints,
410 			    srh->sr_err_idx, DR_MAX_ERR_INT, (uint64_t)major);
411 
412 			ndi_hold_devi(dip);
413 			srh->sr_failed_dip = dip;
414 
415 			handle = srh->sr_dr_handlep;
416 			dr_op_err(CE_IGNORE, handle, ESBD_SUSPEND, "%s@%s",
417 			    d_name[0] ? d_name : dname, d_info);
418 
419 			return (DDI_FAILURE);
420 		}
421 	}
422 
423 	return (DDI_SUCCESS);
424 }
425 
426 static void
427 dr_resume_devices(dev_info_t *start, dr_sr_handle_t *srh)
428 {
429 	dr_handle_t	*handle;
430 	dev_info_t	*dip, *next, *last = NULL;
431 	major_t		major;
432 	char		*bn;
433 
434 	major = (major_t)-1;
435 
436 	/* attach in reverse device tree order */
437 	while (last != start) {
438 		dip = start;
439 		next = ddi_get_next_sibling(dip);
440 		while (next != last && dip != srh->sr_failed_dip) {
441 			dip = next;
442 			next = ddi_get_next_sibling(dip);
443 		}
444 		if (dip == srh->sr_failed_dip) {
445 			/* release hold acquired in dr_suspend_devices() */
446 			srh->sr_failed_dip = NULL;
447 			ndi_rele_devi(dip);
448 		} else if (dr_is_real_device(dip) &&
449 		    srh->sr_failed_dip == NULL) {
450 
451 			if ((bn = ddi_binding_name(dip)) != NULL) {
452 				major = ddi_name_to_major(bn);
453 			} else {
454 				bn = "<null>";
455 			}
456 			if (!dr_bypass_device(bn) &&
457 			    !drmach_verify_sr(dip, 0)) {
458 				char	d_name[40], d_alias[40], *d_info;
459 
460 				d_name[0] = 0;
461 				d_info = ddi_get_name_addr(dip);
462 				if (d_info == NULL)
463 					d_info = "<null>";
464 
465 				if (!dr_resolve_devname(dip, d_name, d_alias)) {
466 					if (d_alias[0] != 0) {
467 						prom_printf("\tresuming "
468 						    "%s@%s (aka %s)\n", d_name,
469 						    d_info, d_alias);
470 					} else {
471 						prom_printf("\tresuming "
472 						    "%s@%s\n", d_name, d_info);
473 					}
474 				} else {
475 					prom_printf("\tresuming %s@%s\n", bn,
476 					    d_info);
477 				}
478 
479 				if (devi_attach(dip, DDI_RESUME) !=
480 				    DDI_SUCCESS) {
481 					/*
482 					 * Print a console warning,
483 					 * set an e_code of ESBD_RESUME,
484 					 * and save the driver major
485 					 * number in the e_rsc.
486 					 */
487 					prom_printf("\tFAILED to resume %s@%s",
488 					    d_name[0] ? d_name : bn, d_info);
489 
490 					srh->sr_err_idx =
491 					    dr_add_int(srh->sr_err_ints,
492 					    srh->sr_err_idx, DR_MAX_ERR_INT,
493 					    (uint64_t)major);
494 
495 					handle = srh->sr_dr_handlep;
496 
497 					dr_op_err(CE_IGNORE, handle,
498 					    ESBD_RESUME, "%s@%s",
499 					    d_name[0] ? d_name : bn, d_info);
500 				}
501 			}
502 		}
503 
504 		/* Hold parent busy while walking its children */
505 		ndi_devi_enter(dip);
506 		dr_resume_devices(ddi_get_child(dip), srh);
507 		ndi_devi_exit(dip);
508 		last = dip;
509 	}
510 }
511 
512 /*
513  * True if thread is virtually stopped.  Similar to CPR_VSTOPPED
514  * but from DR point of view.  These user threads are waiting in
515  * the kernel.  Once they complete in the kernel, they will process
516  * the stop signal and stop.
517  */
518 #define	DR_VSTOPPED(t)			\
519 	((t)->t_state == TS_SLEEP &&	\
520 	(t)->t_wchan != NULL &&		\
521 	(t)->t_astflag &&		\
522 	((t)->t_proc_flag & TP_CHKPT))
523 
524 /* ARGSUSED */
525 static int
526 dr_stop_user_threads(dr_sr_handle_t *srh)
527 {
528 	int		count;
529 	int		bailout;
530 	dr_handle_t	*handle = srh->sr_dr_handlep;
531 	static fn_t	f = "dr_stop_user_threads";
532 	kthread_id_t	tp;
533 
534 	extern void add_one_utstop();
535 	extern void utstop_timedwait(clock_t);
536 	extern void utstop_init(void);
537 
538 #define	DR_UTSTOP_RETRY	4
539 #define	DR_UTSTOP_WAIT	hz
540 
541 	if (dr_skip_user_threads)
542 		return (DDI_SUCCESS);
543 
544 	utstop_init();
545 
546 	/* we need to try a few times to get past fork, etc. */
547 	srh->sr_err_idx = 0;
548 	for (count = 0; count < DR_UTSTOP_RETRY; count++) {
549 		/* walk the entire threadlist */
550 		mutex_enter(&pidlock);
551 		for (tp = curthread->t_next; tp != curthread; tp = tp->t_next) {
552 			proc_t *p = ttoproc(tp);
553 
554 			/* handle kernel threads separately */
555 			if (p->p_as == &kas || p->p_stat == SZOMB)
556 				continue;
557 
558 			mutex_enter(&p->p_lock);
559 			thread_lock(tp);
560 
561 			if (tp->t_state == TS_STOPPED) {
562 				/* add another reason to stop this thread */
563 				tp->t_schedflag &= ~TS_RESUME;
564 			} else {
565 				tp->t_proc_flag |= TP_CHKPT;
566 
567 				thread_unlock(tp);
568 				mutex_exit(&p->p_lock);
569 				add_one_utstop();
570 				mutex_enter(&p->p_lock);
571 				thread_lock(tp);
572 
573 				aston(tp);
574 
575 				if (ISWAKEABLE(tp) || ISWAITING(tp)) {
576 					setrun_locked(tp);
577 				}
578 
579 			}
580 
581 			/* grab thread if needed */
582 			if (tp->t_state == TS_ONPROC && tp->t_cpu != CPU)
583 				poke_cpu(tp->t_cpu->cpu_id);
584 
585 
586 			thread_unlock(tp);
587 			mutex_exit(&p->p_lock);
588 		}
589 		mutex_exit(&pidlock);
590 
591 
592 		/* let everything catch up */
593 		utstop_timedwait(count * count * DR_UTSTOP_WAIT);
594 
595 
596 		/* now, walk the threadlist again to see if we are done */
597 		mutex_enter(&pidlock);
598 		for (tp = curthread->t_next, bailout = 0;
599 		    tp != curthread; tp = tp->t_next) {
600 			proc_t *p = ttoproc(tp);
601 
602 			/* handle kernel threads separately */
603 			if (p->p_as == &kas || p->p_stat == SZOMB)
604 				continue;
605 
606 			/*
607 			 * If this thread didn't stop, and we don't allow
608 			 * unstopped blocked threads, bail.
609 			 */
610 			thread_lock(tp);
611 			if (!CPR_ISTOPPED(tp) &&
612 			    !(dr_allow_blocked_threads &&
613 			    DR_VSTOPPED(tp))) {
614 				bailout = 1;
615 				if (count == DR_UTSTOP_RETRY - 1) {
616 					/*
617 					 * save the pid for later reporting
618 					 */
619 					srh->sr_err_idx =
620 					    dr_add_int(srh->sr_err_ints,
621 					    srh->sr_err_idx, DR_MAX_ERR_INT,
622 					    (uint64_t)p->p_pid);
623 
624 					cmn_err(CE_WARN, "%s: "
625 					    "failed to stop thread: "
626 					    "process=%s, pid=%d",
627 					    f, p->p_user.u_psargs, p->p_pid);
628 
629 					PR_QR("%s: failed to stop thread: "
630 					    "process=%s, pid=%d, t_id=0x%p, "
631 					    "t_state=0x%x, t_proc_flag=0x%x, "
632 					    "t_schedflag=0x%x\n",
633 					    f, p->p_user.u_psargs, p->p_pid,
634 					    (void *)tp, tp->t_state,
635 					    tp->t_proc_flag, tp->t_schedflag);
636 				}
637 
638 			}
639 			thread_unlock(tp);
640 		}
641 		mutex_exit(&pidlock);
642 
643 		/* were all the threads stopped? */
644 		if (!bailout)
645 			break;
646 	}
647 
648 	/* were we unable to stop all threads after a few tries? */
649 	if (bailout) {
650 		handle->h_err = drerr_int(ESBD_UTHREAD, srh->sr_err_ints,
651 		    srh->sr_err_idx, 0);
652 		return (ESRCH);
653 	}
654 
655 	return (DDI_SUCCESS);
656 }
657 
658 static void
659 dr_start_user_threads(void)
660 {
661 	kthread_id_t tp;
662 
663 	mutex_enter(&pidlock);
664 
665 	/* walk all threads and release them */
666 	for (tp = curthread->t_next; tp != curthread; tp = tp->t_next) {
667 		proc_t *p = ttoproc(tp);
668 
669 		/* skip kernel threads */
670 		if (ttoproc(tp)->p_as == &kas)
671 			continue;
672 
673 		mutex_enter(&p->p_lock);
674 		tp->t_proc_flag &= ~TP_CHKPT;
675 		mutex_exit(&p->p_lock);
676 
677 		thread_lock(tp);
678 		if (CPR_ISTOPPED(tp)) {
679 			/* back on the runq */
680 			tp->t_schedflag |= TS_RESUME;
681 			setrun_locked(tp);
682 		}
683 		thread_unlock(tp);
684 	}
685 
686 	mutex_exit(&pidlock);
687 }
688 
689 static void
690 dr_signal_user(int sig)
691 {
692 	struct proc *p;
693 
694 	mutex_enter(&pidlock);
695 
696 	for (p = practive; p != NULL; p = p->p_next) {
697 		/* only user threads */
698 		if (p->p_exec == NULL || p->p_stat == SZOMB ||
699 		    p == proc_init || p == ttoproc(curthread))
700 			continue;
701 
702 		mutex_enter(&p->p_lock);
703 		sigtoproc(p, NULL, sig);
704 		mutex_exit(&p->p_lock);
705 	}
706 
707 	mutex_exit(&pidlock);
708 
709 	/* add a bit of delay */
710 	delay(hz);
711 }
712 
713 void
714 dr_resume(dr_sr_handle_t *srh)
715 {
716 	if (srh->sr_suspend_state < DR_SRSTATE_FULL) {
717 		/*
718 		 * Update the signature block.
719 		 * If cpus are not paused, this can be done now.
720 		 * See comments below.
721 		 */
722 		CPU_SIGNATURE(OS_SIG, SIGST_RESUME_INPROGRESS, SIGSUBST_NULL,
723 		    CPU->cpu_id);
724 	}
725 
726 	switch (srh->sr_suspend_state) {
727 	case DR_SRSTATE_FULL:
728 
729 		ASSERT(MUTEX_HELD(&cpu_lock));
730 
731 		/*
732 		 * Prevent false alarm in tod_validate() due to tod
733 		 * value change between suspend and resume
734 		 */
735 		mutex_enter(&tod_lock);
736 		tod_status_set(TOD_DR_RESUME_DONE);
737 		mutex_exit(&tod_lock);
738 
739 		dr_enable_intr();	/* enable intr & clock */
740 
741 		start_cpus();
742 		mutex_exit(&cpu_lock);
743 
744 		/*
745 		 * Update the signature block.
746 		 * This must not be done while cpus are paused, since on
747 		 * Starcat the cpu signature update aquires an adaptive
748 		 * mutex in the iosram driver. Blocking with cpus paused
749 		 * can lead to deadlock.
750 		 */
751 		CPU_SIGNATURE(OS_SIG, SIGST_RESUME_INPROGRESS, SIGSUBST_NULL,
752 		    CPU->cpu_id);
753 
754 		/*
755 		 * If we suspended hw watchdog at suspend,
756 		 * re-enable it now.
757 		 */
758 		if (srh->sr_flags & (SR_FLAG_WATCHDOG)) {
759 			mutex_enter(&tod_lock);
760 			tod_ops.tod_set_watchdog_timer(
761 			    watchdog_timeout_seconds);
762 			mutex_exit(&tod_lock);
763 		}
764 
765 		/*
766 		 * This should only be called if drmach_suspend_last()
767 		 * was called and state transitioned to DR_SRSTATE_FULL
768 		 * to prevent resume attempts on device instances that
769 		 * were not previously suspended.
770 		 */
771 		drmach_resume_first();
772 
773 		/* FALLTHROUGH */
774 
775 	case DR_SRSTATE_DRIVER:
776 		/*
777 		 * resume drivers
778 		 */
779 		srh->sr_err_idx = 0;
780 
781 		/* no parent dip to hold busy */
782 		dr_resume_devices(ddi_root_node(), srh);
783 
784 		if (srh->sr_err_idx && srh->sr_dr_handlep) {
785 			(srh->sr_dr_handlep)->h_err = drerr_int(ESBD_RESUME,
786 			    srh->sr_err_ints, srh->sr_err_idx, 1);
787 		}
788 
789 		/*
790 		 * resume the lock manager
791 		 */
792 		lm_cprresume();
793 
794 		/* FALLTHROUGH */
795 
796 	case DR_SRSTATE_USER:
797 		/*
798 		 * finally, resume user threads
799 		 */
800 		if (!dr_skip_user_threads) {
801 			prom_printf("DR: resuming user threads...\n");
802 			dr_start_user_threads();
803 		}
804 		/* FALLTHROUGH */
805 
806 	case DR_SRSTATE_BEGIN:
807 	default:
808 		/*
809 		 * let those who care know that we've just resumed
810 		 */
811 		PR_QR("sending SIGTHAW...\n");
812 		dr_signal_user(SIGTHAW);
813 		break;
814 	}
815 
816 	/*
817 	 * update the signature block
818 	 */
819 	CPU_SIGNATURE(OS_SIG, SIGST_RUN, SIGSUBST_NULL, CPU->cpu_id);
820 
821 	prom_printf("DR: resume COMPLETED\n");
822 }
823 
824 int
825 dr_suspend(dr_sr_handle_t *srh)
826 {
827 	dr_handle_t	*handle;
828 	int		force;
829 	int		dev_errs_idx;
830 	uint64_t	dev_errs[DR_MAX_ERR_INT];
831 	int		rc = DDI_SUCCESS;
832 
833 	handle = srh->sr_dr_handlep;
834 
835 	force = dr_cmd_flags(handle) & SBD_FLAG_FORCE;
836 
837 	/*
838 	 * update the signature block
839 	 */
840 	CPU_SIGNATURE(OS_SIG, SIGST_QUIESCE_INPROGRESS, SIGSUBST_NULL,
841 	    CPU->cpu_id);
842 
843 	prom_printf("\nDR: suspending user threads...\n");
844 	srh->sr_suspend_state = DR_SRSTATE_USER;
845 	if (((rc = dr_stop_user_threads(srh)) != DDI_SUCCESS) &&
846 	    dr_check_user_stop_result) {
847 		dr_resume(srh);
848 		return (rc);
849 	}
850 
851 	if (!force) {
852 		struct dr_ref drc = {0};
853 
854 		prom_printf("\nDR: checking devices...\n");
855 		dev_errs_idx = 0;
856 
857 		drc.arr = dev_errs;
858 		drc.idx = &dev_errs_idx;
859 		drc.len = DR_MAX_ERR_INT;
860 
861 		/*
862 		 * Since the root node can never go away, it
863 		 * doesn't have to be held.
864 		 */
865 		ddi_walk_devs(ddi_root_node(), dr_check_unsafe_major, &drc);
866 		if (dev_errs_idx) {
867 			handle->h_err = drerr_int(ESBD_UNSAFE, dev_errs,
868 			    dev_errs_idx, 1);
869 			dr_resume(srh);
870 			return (DDI_FAILURE);
871 		}
872 		PR_QR("done\n");
873 	} else {
874 		prom_printf("\nDR: dr_suspend invoked with force flag\n");
875 	}
876 
877 #ifndef	SKIP_SYNC
878 	/*
879 	 * This sync swap out all user pages
880 	 */
881 	vfs_sync(SYNC_ALL);
882 #endif
883 
884 	/*
885 	 * special treatment for lock manager
886 	 */
887 	lm_cprsuspend();
888 
889 #ifndef	SKIP_SYNC
890 	/*
891 	 * sync the file system in case we never make it back
892 	 */
893 	sync();
894 #endif
895 
896 	/*
897 	 * now suspend drivers
898 	 */
899 	prom_printf("DR: suspending drivers...\n");
900 	srh->sr_suspend_state = DR_SRSTATE_DRIVER;
901 	srh->sr_err_idx = 0;
902 	/* No parent to hold busy */
903 	if ((rc = dr_suspend_devices(ddi_root_node(), srh)) != DDI_SUCCESS) {
904 		if (srh->sr_err_idx && srh->sr_dr_handlep) {
905 			(srh->sr_dr_handlep)->h_err = drerr_int(ESBD_SUSPEND,
906 			    srh->sr_err_ints, srh->sr_err_idx, 1);
907 		}
908 		dr_resume(srh);
909 		return (rc);
910 	}
911 
912 	drmach_suspend_last();
913 
914 	/*
915 	 * finally, grab all cpus
916 	 */
917 	srh->sr_suspend_state = DR_SRSTATE_FULL;
918 
919 	/*
920 	 * if watchdog was activated, disable it
921 	 */
922 	if (watchdog_activated) {
923 		mutex_enter(&tod_lock);
924 		tod_ops.tod_clear_watchdog_timer();
925 		mutex_exit(&tod_lock);
926 		srh->sr_flags |= SR_FLAG_WATCHDOG;
927 	} else {
928 		srh->sr_flags &= ~(SR_FLAG_WATCHDOG);
929 	}
930 
931 	/*
932 	 * Update the signature block.
933 	 * This must be done before cpus are paused, since on Starcat the
934 	 * cpu signature update aquires an adaptive mutex in the iosram driver.
935 	 * Blocking with cpus paused can lead to deadlock.
936 	 */
937 	CPU_SIGNATURE(OS_SIG, SIGST_QUIESCED, SIGSUBST_NULL, CPU->cpu_id);
938 
939 	mutex_enter(&cpu_lock);
940 	pause_cpus(NULL, NULL);
941 	dr_stop_intr();
942 
943 	return (rc);
944 }
945 
946 int
947 dr_pt_test_suspend(dr_handle_t *hp)
948 {
949 	dr_sr_handle_t *srh;
950 	int		err;
951 	uint_t		psmerr;
952 	static fn_t	f = "dr_pt_test_suspend";
953 
954 	PR_QR("%s...\n", f);
955 
956 	srh = dr_get_sr_handle(hp);
957 	if ((err = dr_suspend(srh)) == DDI_SUCCESS) {
958 		dr_resume(srh);
959 		if ((hp->h_err) && ((psmerr = hp->h_err->e_code) != 0)) {
960 			PR_QR("%s: error on dr_resume()", f);
961 			switch (psmerr) {
962 			case ESBD_RESUME:
963 				PR_QR("Couldn't resume devices: %s\n",
964 				    DR_GET_E_RSC(hp->h_err));
965 				break;
966 
967 			case ESBD_KTHREAD:
968 				PR_ALL("psmerr is ESBD_KTHREAD\n");
969 				break;
970 			default:
971 				PR_ALL("Resume error unknown = %d\n", psmerr);
972 				break;
973 			}
974 		}
975 	} else {
976 		PR_ALL("%s: dr_suspend() failed, err = 0x%x\n", f, err);
977 		psmerr = hp->h_err ? hp->h_err->e_code : ESBD_NOERROR;
978 		switch (psmerr) {
979 		case ESBD_UNSAFE:
980 			PR_ALL("Unsafe devices (major #): %s\n",
981 			    DR_GET_E_RSC(hp->h_err));
982 			break;
983 
984 		case ESBD_RTTHREAD:
985 			PR_ALL("RT threads (PIDs): %s\n",
986 			    DR_GET_E_RSC(hp->h_err));
987 			break;
988 
989 		case ESBD_UTHREAD:
990 			PR_ALL("User threads (PIDs): %s\n",
991 			    DR_GET_E_RSC(hp->h_err));
992 			break;
993 
994 		case ESBD_SUSPEND:
995 			PR_ALL("Non-suspendable devices (major #): %s\n",
996 			    DR_GET_E_RSC(hp->h_err));
997 			break;
998 
999 		case ESBD_RESUME:
1000 			PR_ALL("Could not resume devices (major #): %s\n",
1001 			    DR_GET_E_RSC(hp->h_err));
1002 			break;
1003 
1004 		case ESBD_KTHREAD:
1005 			PR_ALL("psmerr is ESBD_KTHREAD\n");
1006 			break;
1007 
1008 		case ESBD_NOERROR:
1009 			PR_ALL("sbd_error_t error code not set\n");
1010 			break;
1011 
1012 		default:
1013 			PR_ALL("Unknown error psmerr = %d\n", psmerr);
1014 			break;
1015 		}
1016 	}
1017 	dr_release_sr_handle(srh);
1018 
1019 	return (0);
1020 }
1021 
1022 /*
1023  * Add a new integer value to the end of an array.  Don't allow duplicates to
1024  * appear in the array, and don't allow the array to overflow.  Return the new
1025  * total number of entries in the array.
1026  */
1027 static int
1028 dr_add_int(uint64_t *arr, int idx, int len, uint64_t val)
1029 {
1030 	int i;
1031 
1032 	if (arr == NULL)
1033 		return (0);
1034 
1035 	if (idx >= len)
1036 		return (idx);
1037 
1038 	for (i = 0; i < idx; i++) {
1039 		if (arr[i] == val)
1040 			return (idx);
1041 	}
1042 
1043 	arr[idx++] = val;
1044 
1045 	return (idx);
1046 }
1047 
1048 /*
1049  * Construct an sbd_error_t featuring a string representation of an array of
1050  * integers as its e_rsc.
1051  */
1052 static sbd_error_t *
1053 drerr_int(int e_code, uint64_t *arr, int idx, int majors)
1054 {
1055 	int		i, n, buf_len, buf_idx, buf_avail;
1056 	char		*dname;
1057 	char		*buf;
1058 	sbd_error_t	*new_sbd_err;
1059 	static char	s_ellipsis[] = "...";
1060 
1061 	if (arr == NULL || idx <= 0)
1062 		return (NULL);
1063 
1064 	/* MAXPATHLEN is the size of the e_rsc field in sbd_error_t. */
1065 	buf = (char *)kmem_zalloc(MAXPATHLEN, KM_SLEEP);
1066 
1067 	/*
1068 	 * This is the total working area of the buffer.  It must be computed
1069 	 * as the size of 'buf', minus reserved space for the null terminator
1070 	 * and the ellipsis string.
1071 	 */
1072 	buf_len = MAXPATHLEN - (strlen(s_ellipsis) + 1);
1073 
1074 	/* Construct a string representation of the array values */
1075 	for (buf_idx = 0, i = 0; i < idx; i++) {
1076 		buf_avail = buf_len - buf_idx;
1077 		if (majors) {
1078 			dname = ddi_major_to_name(arr[i]);
1079 			if (dname) {
1080 				n = snprintf(&buf[buf_idx], buf_avail, "%s, ",
1081 				    dname);
1082 			} else {
1083 				n = snprintf(&buf[buf_idx], buf_avail,
1084 				    "major %lu, ", arr[i]);
1085 			}
1086 		} else {
1087 			n = snprintf(&buf[buf_idx], buf_avail, "%lu, ", arr[i]);
1088 		}
1089 
1090 		/* An ellipsis gets appended when no more values fit */
1091 		if (n >= buf_avail) {
1092 			(void) strcpy(&buf[buf_idx], s_ellipsis);
1093 			break;
1094 		}
1095 
1096 		buf_idx += n;
1097 	}
1098 
1099 	/* If all the contents fit, remove the trailing comma */
1100 	if (n < buf_avail) {
1101 		buf[--buf_idx] = '\0';
1102 		buf[--buf_idx] = '\0';
1103 	}
1104 
1105 	/* Return an sbd_error_t with the buffer and e_code */
1106 	new_sbd_err = drerr_new(1, e_code, buf);
1107 	kmem_free(buf, MAXPATHLEN);
1108 	return (new_sbd_err);
1109 }
1110