xref: /titanic_52/usr/src/uts/sun4u/ngdr/io/dr_quiesce.c (revision 2dea4eed7ad1c66ae4770263aa2911815a8b86eb)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*
28  * A CPR derivative specifically for starfire/starcat
29  */
30 
31 #include <sys/types.h>
32 #include <sys/systm.h>
33 #include <sys/machparam.h>
34 #include <sys/machsystm.h>
35 #include <sys/ddi.h>
36 #define	SUNDDI_IMPL
37 #include <sys/sunddi.h>
38 #include <sys/sunndi.h>
39 #include <sys/devctl.h>
40 #include <sys/time.h>
41 #include <sys/kmem.h>
42 #include <nfs/lm.h>
43 #include <sys/ddi_impldefs.h>
44 #include <sys/ndi_impldefs.h>
45 #include <sys/obpdefs.h>
46 #include <sys/cmn_err.h>
47 #include <sys/debug.h>
48 #include <sys/errno.h>
49 #include <sys/callb.h>
50 #include <sys/clock.h>
51 #include <sys/x_call.h>
52 #include <sys/cpuvar.h>
53 #include <sys/epm.h>
54 #include <sys/vfs.h>
55 
56 #include <sys/cpu_sgnblk_defs.h>
57 #include <sys/dr.h>
58 #include <sys/dr_util.h>
59 
60 #include <sys/promif.h>
61 #include <sys/conf.h>
62 #include <sys/cyclic.h>
63 
64 extern void	e_ddi_enter_driver_list(struct devnames *dnp, int *listcnt);
65 extern void	e_ddi_exit_driver_list(struct devnames *dnp, int listcnt);
66 extern int	is_pseudo_device(dev_info_t *dip);
67 
68 extern kmutex_t	cpu_lock;
69 extern dr_unsafe_devs_t dr_unsafe_devs;
70 
71 static int		dr_is_real_device(dev_info_t *dip);
72 static int		dr_is_unsafe_major(major_t major);
73 static int		dr_bypass_device(char *dname);
74 static int		dr_check_dip(dev_info_t *dip, void *arg, uint_t ref);
75 static int		dr_resolve_devname(dev_info_t *dip, char *buffer,
76 				char *alias);
77 static sbd_error_t	*drerr_int(int e_code, uint64_t *arr, int idx,
78 				int majors);
79 static int		dr_add_int(uint64_t *arr, int idx, int len,
80 				uint64_t val);
81 
82 int dr_pt_test_suspend(dr_handle_t *hp);
83 
84 /*
85  * dr_quiesce.c interface
86  * NOTE: states used internally by dr_suspend and dr_resume
87  */
88 typedef enum dr_suspend_state {
89 	DR_SRSTATE_BEGIN = 0,
90 	DR_SRSTATE_USER,
91 	DR_SRSTATE_DRIVER,
92 	DR_SRSTATE_FULL
93 } suspend_state_t;
94 
95 struct dr_sr_handle {
96 	dr_handle_t		*sr_dr_handlep;
97 	dev_info_t		*sr_failed_dip;
98 	suspend_state_t		sr_suspend_state;
99 	uint_t			sr_flags;
100 	uint64_t		sr_err_ints[DR_MAX_ERR_INT];
101 	int			sr_err_idx;
102 };
103 
104 #define	SR_FLAG_WATCHDOG	0x1
105 
106 /*
107  * XXX
108  * This hack will go away before RTI.  Just for testing.
109  * List of drivers to bypass when performing a suspend.
110  */
111 static char *dr_bypass_list[] = {
112 	""
113 };
114 
115 
116 #define		SKIP_SYNC	/* bypass sync ops in dr_suspend */
117 
118 /*
119  * dr_skip_user_threads is used to control if user threads should
120  * be suspended.  If dr_skip_user_threads is true, the rest of the
121  * flags are not used; if it is false, dr_check_user_stop_result
122  * will be used to control whether or not we need to check suspend
123  * result, and dr_allow_blocked_threads will be used to control
124  * whether or not we allow suspend to continue if there are blocked
125  * threads.  We allow all combinations of dr_check_user_stop_result
126  * and dr_allow_block_threads, even though it might not make much
127  * sense to not allow block threads when we don't even check stop
128  * result.
129  */
130 static int	dr_skip_user_threads = 0;	/* default to FALSE */
131 static int	dr_check_user_stop_result = 1;	/* default to TRUE */
132 static int	dr_allow_blocked_threads = 1;	/* default to TRUE */
133 
134 #define	DR_CPU_LOOP_MSEC	1000
135 
136 static void
137 dr_stop_intr(void)
138 {
139 	ASSERT(MUTEX_HELD(&cpu_lock));
140 
141 	kpreempt_disable();
142 	cyclic_suspend();
143 }
144 
145 static void
146 dr_enable_intr(void)
147 {
148 	ASSERT(MUTEX_HELD(&cpu_lock));
149 
150 	cyclic_resume();
151 	kpreempt_enable();
152 }
153 
154 dr_sr_handle_t *
155 dr_get_sr_handle(dr_handle_t *hp)
156 {
157 	dr_sr_handle_t *srh;
158 
159 	srh = GETSTRUCT(dr_sr_handle_t, 1);
160 	srh->sr_dr_handlep = hp;
161 
162 	return (srh);
163 }
164 
165 void
166 dr_release_sr_handle(dr_sr_handle_t *srh)
167 {
168 	ASSERT(srh->sr_failed_dip == NULL);
169 	FREESTRUCT(srh, dr_sr_handle_t, 1);
170 }
171 
172 static int
173 dr_is_real_device(dev_info_t *dip)
174 {
175 	struct regspec *regbuf = NULL;
176 	int length = 0;
177 	int rc;
178 
179 	if (ddi_get_driver(dip) == NULL)
180 		return (0);
181 
182 	if (DEVI(dip)->devi_pm_flags & (PMC_NEEDS_SR|PMC_PARENTAL_SR))
183 		return (1);
184 	if (DEVI(dip)->devi_pm_flags & PMC_NO_SR)
185 		return (0);
186 
187 	/*
188 	 * now the general case
189 	 */
190 	rc = ddi_getlongprop(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, "reg",
191 		(caddr_t)&regbuf, &length);
192 	ASSERT(rc != DDI_PROP_NO_MEMORY);
193 	if (rc != DDI_PROP_SUCCESS) {
194 		return (0);
195 	} else {
196 		if ((length > 0) && (regbuf != NULL))
197 			kmem_free(regbuf, length);
198 		return (1);
199 	}
200 }
201 
202 static int
203 dr_is_unsafe_major(major_t major)
204 {
205 	char    *dname, **cpp;
206 	int	i, ndevs;
207 
208 	if ((dname = ddi_major_to_name(major)) == NULL) {
209 		PR_QR("dr_is_unsafe_major: invalid major # %d\n", major);
210 		return (0);
211 	}
212 
213 	ndevs = dr_unsafe_devs.ndevs;
214 	for (i = 0, cpp = dr_unsafe_devs.devnames; i < ndevs; i++) {
215 		if (strcmp(dname, *cpp++) == 0)
216 			return (1);
217 	}
218 	return (0);
219 }
220 
221 static int
222 dr_bypass_device(char *dname)
223 {
224 	int i;
225 	char **lname;
226 	/* check the bypass list */
227 	for (i = 0, lname = &dr_bypass_list[i]; **lname != '\0'; lname++) {
228 		if (strcmp(dname, dr_bypass_list[i++]) == 0)
229 			return (1);
230 	}
231 	return (0);
232 }
233 
234 static int
235 dr_resolve_devname(dev_info_t *dip, char *buffer, char *alias)
236 {
237 	major_t	devmajor;
238 	char	*aka, *name;
239 
240 	*buffer = *alias = 0;
241 
242 	if (dip == NULL)
243 		return (-1);
244 
245 	if ((name = ddi_get_name(dip)) == NULL)
246 		name = "<null name>";
247 
248 	aka = name;
249 
250 	if ((devmajor = ddi_name_to_major(aka)) != -1)
251 		aka = ddi_major_to_name(devmajor);
252 
253 	(void) strcpy(buffer, name);
254 
255 	if (strcmp(name, aka))
256 		(void) strcpy(alias, aka);
257 	else
258 		*alias = 0;
259 
260 	return (0);
261 }
262 
263 struct dr_ref {
264 	int		*refcount;
265 	uint64_t	*arr;
266 	int		*idx;
267 	int		len;
268 };
269 
270 /* ARGSUSED */
271 static int
272 dr_check_dip(dev_info_t *dip, void *arg, uint_t ref)
273 {
274 	major_t		major;
275 	char		*dname;
276 	struct dr_ref	*rp = (struct dr_ref *)arg;
277 
278 	if (dip == NULL)
279 		return (DDI_WALK_CONTINUE);
280 
281 	if (!dr_is_real_device(dip))
282 		return (DDI_WALK_CONTINUE);
283 
284 	dname = ddi_binding_name(dip);
285 
286 	if (dr_bypass_device(dname))
287 		return (DDI_WALK_CONTINUE);
288 
289 	if (dname && ((major = ddi_name_to_major(dname)) != (major_t)-1)) {
290 		if (ref && rp->refcount) {
291 			*rp->refcount += ref;
292 			PR_QR("\n  %s (major# %d) is referenced(%u)\n",
293 				dname, major, ref);
294 		}
295 		if (dr_is_unsafe_major(major) && i_ddi_devi_attached(dip)) {
296 			PR_QR("\n  %s (major# %d) not hotpluggable\n",
297 				dname, major);
298 			if (rp->arr != NULL && rp->idx != NULL)
299 				*rp->idx = dr_add_int(rp->arr, *rp->idx,
300 					rp->len, (uint64_t)major);
301 		}
302 	}
303 	return (DDI_WALK_CONTINUE);
304 }
305 
306 static int
307 dr_check_unsafe_major(dev_info_t *dip, void *arg)
308 {
309 	return (dr_check_dip(dip, arg, 0));
310 }
311 
312 
313 /*ARGSUSED*/
314 void
315 dr_check_devices(dev_info_t *dip, int *refcount, dr_handle_t *handle,
316     uint64_t *arr, int *idx, int len)
317 {
318 	struct dr_ref bref = {0};
319 
320 	if (dip == NULL)
321 		return;
322 
323 	bref.refcount = refcount;
324 	bref.arr = arr;
325 	bref.idx = idx;
326 	bref.len = len;
327 
328 	ASSERT(e_ddi_branch_held(dip));
329 	(void) e_ddi_branch_referenced(dip, dr_check_dip, &bref);
330 }
331 
332 /*
333  * The "dip" argument's parent (if it exists) must be held busy.
334  */
335 static int
336 dr_suspend_devices(dev_info_t *dip, dr_sr_handle_t *srh)
337 {
338 	dr_handle_t	*handle;
339 	major_t		major;
340 	char		*dname;
341 	int		circ;
342 
343 	/*
344 	 * If dip is the root node, it has no siblings and it is
345 	 * always held. If dip is not the root node, dr_suspend_devices()
346 	 * will be invoked with the parent held busy.
347 	 */
348 	for (; dip != NULL; dip = ddi_get_next_sibling(dip)) {
349 		char	d_name[40], d_alias[40], *d_info;
350 
351 		ndi_devi_enter(dip, &circ);
352 		if (dr_suspend_devices(ddi_get_child(dip), srh)) {
353 			ndi_devi_exit(dip, circ);
354 			return (ENXIO);
355 		}
356 		ndi_devi_exit(dip, circ);
357 
358 		if (!dr_is_real_device(dip))
359 			continue;
360 
361 		major = (major_t)-1;
362 		if ((dname = ddi_binding_name(dip)) != NULL)
363 			major = ddi_name_to_major(dname);
364 
365 		if (dr_bypass_device(dname)) {
366 			PR_QR(" bypassed suspend of %s (major# %d)\n", dname,
367 				major);
368 			continue;
369 		}
370 
371 		if (drmach_verify_sr(dip, 1)) {
372 			PR_QR(" bypassed suspend of %s (major# %d)\n", dname,
373 				major);
374 			continue;
375 		}
376 
377 		if ((d_info = ddi_get_name_addr(dip)) == NULL)
378 			d_info = "<null>";
379 
380 		d_name[0] = 0;
381 		if (dr_resolve_devname(dip, d_name, d_alias) == 0) {
382 			if (d_alias[0] != 0) {
383 				prom_printf("\tsuspending %s@%s (aka %s)\n",
384 					d_name, d_info, d_alias);
385 			} else {
386 				prom_printf("\tsuspending %s@%s\n",
387 					d_name, d_info);
388 			}
389 		} else {
390 			prom_printf("\tsuspending %s@%s\n", dname, d_info);
391 		}
392 
393 		if (devi_detach(dip, DDI_SUSPEND) != DDI_SUCCESS) {
394 			prom_printf("\tFAILED to suspend %s@%s\n",
395 				d_name[0] ? d_name : dname, d_info);
396 
397 			srh->sr_err_idx = dr_add_int(srh->sr_err_ints,
398 				srh->sr_err_idx, DR_MAX_ERR_INT,
399 				(uint64_t)major);
400 
401 			ndi_hold_devi(dip);
402 			srh->sr_failed_dip = dip;
403 
404 			handle = srh->sr_dr_handlep;
405 			dr_op_err(CE_IGNORE, handle, ESBD_SUSPEND, "%s@%s",
406 				d_name[0] ? d_name : dname, d_info);
407 
408 			return (DDI_FAILURE);
409 		}
410 	}
411 
412 	return (DDI_SUCCESS);
413 }
414 
415 static void
416 dr_resume_devices(dev_info_t *start, dr_sr_handle_t *srh)
417 {
418 	dr_handle_t	*handle;
419 	dev_info_t	*dip, *next, *last = NULL;
420 	major_t		major;
421 	char		*bn;
422 	int		circ;
423 
424 	major = (major_t)-1;
425 
426 	/* attach in reverse device tree order */
427 	while (last != start) {
428 		dip = start;
429 		next = ddi_get_next_sibling(dip);
430 		while (next != last && dip != srh->sr_failed_dip) {
431 			dip = next;
432 			next = ddi_get_next_sibling(dip);
433 		}
434 		if (dip == srh->sr_failed_dip) {
435 			/* release hold acquired in dr_suspend_devices() */
436 			srh->sr_failed_dip = NULL;
437 			ndi_rele_devi(dip);
438 		} else if (dr_is_real_device(dip) &&
439 				srh->sr_failed_dip == NULL) {
440 
441 			if ((bn = ddi_binding_name(dip)) != NULL) {
442 				major = ddi_name_to_major(bn);
443 			} else {
444 				bn = "<null>";
445 			}
446 			if (!dr_bypass_device(bn) &&
447 				!drmach_verify_sr(dip, 0)) {
448 				char	d_name[40], d_alias[40], *d_info;
449 
450 				d_name[0] = 0;
451 				d_info = ddi_get_name_addr(dip);
452 				if (d_info == NULL)
453 					d_info = "<null>";
454 
455 				if (!dr_resolve_devname(dip, d_name,
456 								d_alias)) {
457 					if (d_alias[0] != 0) {
458 						prom_printf("\tresuming "
459 							"%s@%s (aka %s)\n",
460 							d_name, d_info,
461 							d_alias);
462 					} else {
463 						prom_printf("\tresuming "
464 							"%s@%s\n",
465 							d_name, d_info);
466 					}
467 				} else {
468 					prom_printf("\tresuming %s@%s\n",
469 						bn, d_info);
470 				}
471 
472 				if (devi_attach(dip, DDI_RESUME) !=
473 							DDI_SUCCESS) {
474 					/*
475 					 * Print a console warning,
476 					 * set an e_code of ESBD_RESUME,
477 					 * and save the driver major
478 					 * number in the e_rsc.
479 					 */
480 					prom_printf("\tFAILED to resume %s@%s",
481 					    d_name[0] ? d_name : bn, d_info);
482 
483 					srh->sr_err_idx =
484 						dr_add_int(srh->sr_err_ints,
485 						srh->sr_err_idx, DR_MAX_ERR_INT,
486 						(uint64_t)major);
487 
488 					handle = srh->sr_dr_handlep;
489 
490 					dr_op_err(CE_IGNORE, handle,
491 					    ESBD_RESUME, "%s@%s",
492 					    d_name[0] ? d_name : bn, d_info);
493 				}
494 			}
495 		}
496 
497 		/* Hold parent busy while walking its children */
498 		ndi_devi_enter(dip, &circ);
499 		dr_resume_devices(ddi_get_child(dip), srh);
500 		ndi_devi_exit(dip, circ);
501 		last = dip;
502 	}
503 }
504 
505 /*
506  * True if thread is virtually stopped.  Similar to CPR_VSTOPPED
507  * but from DR point of view.  These user threads are waiting in
508  * the kernel.  Once they complete in the kernel, they will process
509  * the stop signal and stop.
510  */
511 #define	DR_VSTOPPED(t)			\
512 	((t)->t_state == TS_SLEEP &&	\
513 	(t)->t_wchan != NULL &&		\
514 	(t)->t_astflag &&		\
515 	((t)->t_proc_flag & TP_CHKPT))
516 
517 /* ARGSUSED */
518 static int
519 dr_stop_user_threads(dr_sr_handle_t *srh)
520 {
521 	int		count;
522 	int		bailout;
523 	dr_handle_t	*handle = srh->sr_dr_handlep;
524 	static fn_t	f = "dr_stop_user_threads";
525 	kthread_id_t 	tp;
526 
527 	extern void add_one_utstop();
528 	extern void utstop_timedwait(clock_t);
529 	extern void utstop_init(void);
530 
531 #define	DR_UTSTOP_RETRY	4
532 #define	DR_UTSTOP_WAIT	hz
533 
534 	if (dr_skip_user_threads)
535 		return (DDI_SUCCESS);
536 
537 	utstop_init();
538 
539 	/* we need to try a few times to get past fork, etc. */
540 	srh->sr_err_idx = 0;
541 	for (count = 0; count < DR_UTSTOP_RETRY; count++) {
542 		/* walk the entire threadlist */
543 		mutex_enter(&pidlock);
544 		for (tp = curthread->t_next; tp != curthread; tp = tp->t_next) {
545 			proc_t *p = ttoproc(tp);
546 
547 			/* handle kernel threads separately */
548 			if (p->p_as == &kas || p->p_stat == SZOMB)
549 				continue;
550 
551 			mutex_enter(&p->p_lock);
552 			thread_lock(tp);
553 
554 			if (tp->t_state == TS_STOPPED) {
555 				/* add another reason to stop this thread */
556 				tp->t_schedflag &= ~TS_RESUME;
557 			} else {
558 				tp->t_proc_flag |= TP_CHKPT;
559 
560 				thread_unlock(tp);
561 				mutex_exit(&p->p_lock);
562 				add_one_utstop();
563 				mutex_enter(&p->p_lock);
564 				thread_lock(tp);
565 
566 				aston(tp);
567 
568 				if (ISWAKEABLE(tp) || ISWAITING(tp)) {
569 					setrun_locked(tp);
570 				}
571 
572 			}
573 
574 			/* grab thread if needed */
575 			if (tp->t_state == TS_ONPROC && tp->t_cpu != CPU)
576 				poke_cpu(tp->t_cpu->cpu_id);
577 
578 
579 			thread_unlock(tp);
580 			mutex_exit(&p->p_lock);
581 		}
582 		mutex_exit(&pidlock);
583 
584 
585 		/* let everything catch up */
586 		utstop_timedwait(count * count * DR_UTSTOP_WAIT);
587 
588 
589 		/* now, walk the threadlist again to see if we are done */
590 		mutex_enter(&pidlock);
591 		for (tp = curthread->t_next, bailout = 0;
592 		    tp != curthread; tp = tp->t_next) {
593 			proc_t *p = ttoproc(tp);
594 
595 			/* handle kernel threads separately */
596 			if (p->p_as == &kas || p->p_stat == SZOMB)
597 				continue;
598 
599 			/*
600 			 * If this thread didn't stop, and we don't allow
601 			 * unstopped blocked threads, bail.
602 			 */
603 			thread_lock(tp);
604 			if (!CPR_ISTOPPED(tp) &&
605 			    !(dr_allow_blocked_threads &&
606 			    DR_VSTOPPED(tp))) {
607 				bailout = 1;
608 				if (count == DR_UTSTOP_RETRY - 1) {
609 					/*
610 					 * save the pid for later reporting
611 					 */
612 					srh->sr_err_idx =
613 					    dr_add_int(srh->sr_err_ints,
614 					    srh->sr_err_idx, DR_MAX_ERR_INT,
615 					    (uint64_t)p->p_pid);
616 
617 					cmn_err(CE_WARN, "%s: "
618 					    "failed to stop thread: "
619 					    "process=%s, pid=%d",
620 					    f, p->p_user.u_psargs, p->p_pid);
621 
622 					PR_QR("%s: failed to stop thread: "
623 					    "process=%s, pid=%d, t_id=0x%p, "
624 					    "t_state=0x%x, t_proc_flag=0x%x, "
625 					    "t_schedflag=0x%x\n",
626 					    f, p->p_user.u_psargs, p->p_pid,
627 					    (void *)tp, tp->t_state,
628 					    tp->t_proc_flag, tp->t_schedflag);
629 				}
630 
631 			}
632 			thread_unlock(tp);
633 		}
634 		mutex_exit(&pidlock);
635 
636 		/* were all the threads stopped? */
637 		if (!bailout)
638 			break;
639 	}
640 
641 	/* were we unable to stop all threads after a few tries? */
642 	if (bailout) {
643 		handle->h_err = drerr_int(ESBD_UTHREAD, srh->sr_err_ints,
644 			srh->sr_err_idx, 0);
645 		return (ESRCH);
646 	}
647 
648 	return (DDI_SUCCESS);
649 }
650 
651 static void
652 dr_start_user_threads(void)
653 {
654 	kthread_id_t tp;
655 
656 	mutex_enter(&pidlock);
657 
658 	/* walk all threads and release them */
659 	for (tp = curthread->t_next; tp != curthread; tp = tp->t_next) {
660 		proc_t *p = ttoproc(tp);
661 
662 		/* skip kernel threads */
663 		if (ttoproc(tp)->p_as == &kas)
664 			continue;
665 
666 		mutex_enter(&p->p_lock);
667 		tp->t_proc_flag &= ~TP_CHKPT;
668 		mutex_exit(&p->p_lock);
669 
670 		thread_lock(tp);
671 		if (CPR_ISTOPPED(tp)) {
672 			/* back on the runq */
673 			tp->t_schedflag |= TS_RESUME;
674 			setrun_locked(tp);
675 		}
676 		thread_unlock(tp);
677 	}
678 
679 	mutex_exit(&pidlock);
680 }
681 
682 static void
683 dr_signal_user(int sig)
684 {
685 	struct proc *p;
686 
687 	mutex_enter(&pidlock);
688 
689 	for (p = practive; p != NULL; p = p->p_next) {
690 		/* only user threads */
691 		if (p->p_exec == NULL || p->p_stat == SZOMB ||
692 		    p == proc_init || p == ttoproc(curthread))
693 			continue;
694 
695 		mutex_enter(&p->p_lock);
696 		sigtoproc(p, NULL, sig);
697 		mutex_exit(&p->p_lock);
698 	}
699 
700 	mutex_exit(&pidlock);
701 
702 	/* add a bit of delay */
703 	delay(hz);
704 }
705 
706 void
707 dr_resume(dr_sr_handle_t *srh)
708 {
709 	dr_handle_t	*handle;
710 
711 	handle = srh->sr_dr_handlep;
712 
713 	if (srh->sr_suspend_state < DR_SRSTATE_FULL) {
714 		/*
715 		 * Update the signature block.
716 		 * If cpus are not paused, this can be done now.
717 		 * See comments below.
718 		 */
719 		CPU_SIGNATURE(OS_SIG, SIGST_RESUME_INPROGRESS, SIGSUBST_NULL,
720 		    CPU->cpu_id);
721 	}
722 
723 	switch (srh->sr_suspend_state) {
724 	case DR_SRSTATE_FULL:
725 
726 		ASSERT(MUTEX_HELD(&cpu_lock));
727 
728 		/*
729 		 * Prevent false alarm in tod_validate() due to tod
730 		 * value change between suspend and resume
731 		 */
732 		mutex_enter(&tod_lock);
733 		tod_fault_reset();
734 		mutex_exit(&tod_lock);
735 
736 		dr_enable_intr(); 	/* enable intr & clock */
737 
738 		start_cpus();
739 		mutex_exit(&cpu_lock);
740 
741 		/*
742 		 * Update the signature block.
743 		 * This must not be done while cpus are paused, since on
744 		 * Starcat the cpu signature update aquires an adaptive
745 		 * mutex in the iosram driver. Blocking with cpus paused
746 		 * can lead to deadlock.
747 		 */
748 		CPU_SIGNATURE(OS_SIG, SIGST_RESUME_INPROGRESS, SIGSUBST_NULL,
749 		    CPU->cpu_id);
750 
751 		/*
752 		 * If we suspended hw watchdog at suspend,
753 		 * re-enable it now.
754 		 */
755 		if (srh->sr_flags & (SR_FLAG_WATCHDOG)) {
756 			mutex_enter(&tod_lock);
757 			tod_ops.tod_set_watchdog_timer(
758 				watchdog_timeout_seconds);
759 			mutex_exit(&tod_lock);
760 		}
761 
762 		/*
763 		 * This should only be called if drmach_suspend_last()
764 		 * was called and state transitioned to DR_SRSTATE_FULL
765 		 * to prevent resume attempts on device instances that
766 		 * were not previously suspended.
767 		 */
768 		drmach_resume_first();
769 
770 		/* FALLTHROUGH */
771 
772 	case DR_SRSTATE_DRIVER:
773 		/*
774 		 * resume drivers
775 		 */
776 		srh->sr_err_idx = 0;
777 
778 		/* no parent dip to hold busy */
779 		dr_resume_devices(ddi_root_node(), srh);
780 
781 		if (srh->sr_err_idx && srh->sr_dr_handlep) {
782 			(srh->sr_dr_handlep)->h_err = drerr_int(ESBD_RESUME,
783 				srh->sr_err_ints, srh->sr_err_idx, 1);
784 		}
785 
786 		/*
787 		 * resume the lock manager
788 		 */
789 		lm_cprresume();
790 
791 		/* FALLTHROUGH */
792 
793 	case DR_SRSTATE_USER:
794 		/*
795 		 * finally, resume user threads
796 		 */
797 		if (!dr_skip_user_threads) {
798 			prom_printf("DR: resuming user threads...\n");
799 			dr_start_user_threads();
800 		}
801 		/* FALLTHROUGH */
802 
803 	case DR_SRSTATE_BEGIN:
804 	default:
805 		/*
806 		 * let those who care know that we've just resumed
807 		 */
808 		PR_QR("sending SIGTHAW...\n");
809 		dr_signal_user(SIGTHAW);
810 		break;
811 	}
812 
813 	i_ndi_allow_device_tree_changes(handle->h_ndi);
814 
815 	/*
816 	 * update the signature block
817 	 */
818 	CPU_SIGNATURE(OS_SIG, SIGST_RUN, SIGSUBST_NULL, CPU->cpu_id);
819 
820 	prom_printf("DR: resume COMPLETED\n");
821 }
822 
823 int
824 dr_suspend(dr_sr_handle_t *srh)
825 {
826 	dr_handle_t	*handle;
827 	int		force;
828 	int		dev_errs_idx;
829 	uint64_t	dev_errs[DR_MAX_ERR_INT];
830 	int		rc = DDI_SUCCESS;
831 
832 	handle = srh->sr_dr_handlep;
833 
834 	force = dr_cmd_flags(handle) & SBD_FLAG_FORCE;
835 
836 	/*
837 	 * update the signature block
838 	 */
839 	CPU_SIGNATURE(OS_SIG, SIGST_QUIESCE_INPROGRESS, SIGSUBST_NULL,
840 	    CPU->cpu_id);
841 
842 	i_ndi_block_device_tree_changes(&handle->h_ndi);
843 
844 	prom_printf("\nDR: suspending user threads...\n");
845 	srh->sr_suspend_state = DR_SRSTATE_USER;
846 	if (((rc = dr_stop_user_threads(srh)) != DDI_SUCCESS) &&
847 	    dr_check_user_stop_result) {
848 		dr_resume(srh);
849 		return (rc);
850 	}
851 
852 	if (!force) {
853 		struct dr_ref drc = {0};
854 
855 		prom_printf("\nDR: checking devices...\n");
856 		dev_errs_idx = 0;
857 
858 		drc.arr = dev_errs;
859 		drc.idx = &dev_errs_idx;
860 		drc.len = DR_MAX_ERR_INT;
861 
862 		/*
863 		 * Since the root node can never go away, it
864 		 * doesn't have to be held.
865 		 */
866 		ddi_walk_devs(ddi_root_node(), dr_check_unsafe_major, &drc);
867 		if (dev_errs_idx) {
868 			handle->h_err = drerr_int(ESBD_UNSAFE, dev_errs,
869 				dev_errs_idx, 1);
870 			dr_resume(srh);
871 			return (DDI_FAILURE);
872 		}
873 		PR_QR("done\n");
874 	} else {
875 		prom_printf("\nDR: dr_suspend invoked with force flag\n");
876 	}
877 
878 #ifndef	SKIP_SYNC
879 	/*
880 	 * This sync swap out all user pages
881 	 */
882 	vfs_sync(SYNC_ALL);
883 #endif
884 
885 	/*
886 	 * special treatment for lock manager
887 	 */
888 	lm_cprsuspend();
889 
890 #ifndef	SKIP_SYNC
891 	/*
892 	 * sync the file system in case we never make it back
893 	 */
894 	sync();
895 #endif
896 
897 	/*
898 	 * now suspend drivers
899 	 */
900 	prom_printf("DR: suspending drivers...\n");
901 	srh->sr_suspend_state = DR_SRSTATE_DRIVER;
902 	srh->sr_err_idx = 0;
903 	/* No parent to hold busy */
904 	if ((rc = dr_suspend_devices(ddi_root_node(), srh)) != DDI_SUCCESS) {
905 		if (srh->sr_err_idx && srh->sr_dr_handlep) {
906 			(srh->sr_dr_handlep)->h_err = drerr_int(ESBD_SUSPEND,
907 				srh->sr_err_ints, srh->sr_err_idx, 1);
908 		}
909 		dr_resume(srh);
910 		return (rc);
911 	}
912 
913 	drmach_suspend_last();
914 
915 	/*
916 	 * finally, grab all cpus
917 	 */
918 	srh->sr_suspend_state = DR_SRSTATE_FULL;
919 
920 	/*
921 	 * if watchdog was activated, disable it
922 	 */
923 	if (watchdog_activated) {
924 		mutex_enter(&tod_lock);
925 		tod_ops.tod_clear_watchdog_timer();
926 		mutex_exit(&tod_lock);
927 		srh->sr_flags |= SR_FLAG_WATCHDOG;
928 	} else {
929 		srh->sr_flags &= ~(SR_FLAG_WATCHDOG);
930 	}
931 
932 	/*
933 	 * Update the signature block.
934 	 * This must be done before cpus are paused, since on Starcat the
935 	 * cpu signature update aquires an adaptive mutex in the iosram driver.
936 	 * Blocking with cpus paused can lead to deadlock.
937 	 */
938 	CPU_SIGNATURE(OS_SIG, SIGST_QUIESCED, SIGSUBST_NULL, CPU->cpu_id);
939 
940 	mutex_enter(&cpu_lock);
941 	pause_cpus(NULL);
942 	dr_stop_intr();
943 
944 	return (rc);
945 }
946 
947 int
948 dr_pt_test_suspend(dr_handle_t *hp)
949 {
950 	dr_sr_handle_t *srh;
951 	int		err;
952 	uint_t		psmerr;
953 	static fn_t	f = "dr_pt_test_suspend";
954 
955 	PR_QR("%s...\n", f);
956 
957 	srh = dr_get_sr_handle(hp);
958 	if ((err = dr_suspend(srh)) == DDI_SUCCESS) {
959 		dr_resume(srh);
960 		if ((hp->h_err) && ((psmerr = hp->h_err->e_code) != 0)) {
961 			PR_QR("%s: error on dr_resume()", f);
962 			switch (psmerr) {
963 			case ESBD_RESUME:
964 				PR_QR("Couldn't resume devices: %s\n",
965 					DR_GET_E_RSC(hp->h_err));
966 				break;
967 
968 			case ESBD_KTHREAD:
969 				PR_ALL("psmerr is ESBD_KTHREAD\n");
970 				break;
971 			default:
972 				PR_ALL("Resume error unknown = %d\n",
973 					psmerr);
974 				break;
975 			}
976 		}
977 	} else {
978 		PR_ALL("%s: dr_suspend() failed, err = 0x%x\n",
979 			f, err);
980 		psmerr = hp->h_err ? hp->h_err->e_code : ESBD_NOERROR;
981 		switch (psmerr) {
982 		case ESBD_UNSAFE:
983 			PR_ALL("Unsafe devices (major #): %s\n",
984 				DR_GET_E_RSC(hp->h_err));
985 			break;
986 
987 		case ESBD_RTTHREAD:
988 			PR_ALL("RT threads (PIDs): %s\n",
989 				DR_GET_E_RSC(hp->h_err));
990 			break;
991 
992 		case ESBD_UTHREAD:
993 			PR_ALL("User threads (PIDs): %s\n",
994 				DR_GET_E_RSC(hp->h_err));
995 			break;
996 
997 		case ESBD_SUSPEND:
998 			PR_ALL("Non-suspendable devices (major #): %s\n",
999 				DR_GET_E_RSC(hp->h_err));
1000 			break;
1001 
1002 		case ESBD_RESUME:
1003 			PR_ALL("Could not resume devices (major #): %s\n",
1004 				DR_GET_E_RSC(hp->h_err));
1005 			break;
1006 
1007 		case ESBD_KTHREAD:
1008 			PR_ALL("psmerr is ESBD_KTHREAD\n");
1009 			break;
1010 
1011 		case ESBD_NOERROR:
1012 			PR_ALL("sbd_error_t error code not set\n");
1013 			break;
1014 
1015 		default:
1016 			PR_ALL("Unknown error psmerr = %d\n", psmerr);
1017 			break;
1018 		}
1019 	}
1020 	dr_release_sr_handle(srh);
1021 
1022 	return (0);
1023 }
1024 
1025 /*
1026  * Add a new integer value to the end of an array.  Don't allow duplicates to
1027  * appear in the array, and don't allow the array to overflow.  Return the new
1028  * total number of entries in the array.
1029  */
1030 static int
1031 dr_add_int(uint64_t *arr, int idx, int len, uint64_t val)
1032 {
1033 	int i;
1034 
1035 	if (arr == NULL)
1036 		return (0);
1037 
1038 	if (idx >= len)
1039 		return (idx);
1040 
1041 	for (i = 0; i < idx; i++) {
1042 		if (arr[i] == val)
1043 			return (idx);
1044 	}
1045 
1046 	arr[idx++] = val;
1047 
1048 	return (idx);
1049 }
1050 
1051 /*
1052  * Construct an sbd_error_t featuring a string representation of an array of
1053  * integers as its e_rsc.
1054  */
1055 static sbd_error_t *
1056 drerr_int(int e_code, uint64_t *arr, int idx, int majors)
1057 {
1058 	int		i, n, buf_len, buf_idx, buf_avail;
1059 	char		*dname;
1060 	char		*buf;
1061 	sbd_error_t	*new_sbd_err;
1062 	static char	s_ellipsis[] = "...";
1063 
1064 	if (arr == NULL || idx <= 0)
1065 		return (NULL);
1066 
1067 	/* MAXPATHLEN is the size of the e_rsc field in sbd_error_t. */
1068 	buf = (char *)kmem_zalloc(MAXPATHLEN, KM_SLEEP);
1069 
1070 	/*
1071 	 * This is the total working area of the buffer.  It must be computed
1072 	 * as the size of 'buf', minus reserved space for the null terminator
1073 	 * and the ellipsis string.
1074 	 */
1075 	buf_len = MAXPATHLEN - (strlen(s_ellipsis) + 1);
1076 
1077 	/* Construct a string representation of the array values */
1078 	for (buf_idx = 0, i = 0; i < idx; i++) {
1079 		buf_avail = buf_len - buf_idx;
1080 		if (majors) {
1081 			dname = ddi_major_to_name(arr[i]);
1082 			if (dname) {
1083 				n = snprintf(&buf[buf_idx], buf_avail,
1084 					"%s, ", dname);
1085 			} else {
1086 				n = snprintf(&buf[buf_idx], buf_avail,
1087 					"major %lu, ", arr[i]);
1088 			}
1089 		} else {
1090 			n = snprintf(&buf[buf_idx], buf_avail, "%lu, ",
1091 				arr[i]);
1092 		}
1093 
1094 		/* An ellipsis gets appended when no more values fit */
1095 		if (n >= buf_avail) {
1096 			(void) strcpy(&buf[buf_idx], s_ellipsis);
1097 			break;
1098 		}
1099 
1100 		buf_idx += n;
1101 	}
1102 
1103 	/* If all the contents fit, remove the trailing comma */
1104 	if (n < buf_avail) {
1105 		buf[--buf_idx] = '\0';
1106 		buf[--buf_idx] = '\0';
1107 	}
1108 
1109 	/* Return an sbd_error_t with the buffer and e_code */
1110 	new_sbd_err = drerr_new(1, e_code, buf);
1111 	kmem_free(buf, MAXPATHLEN);
1112 	return (new_sbd_err);
1113 }
1114