xref: /titanic_44/usr/src/uts/sun4u/ngdr/io/dr_quiesce.c (revision d485aa23b5e424dd136afdf657683389f93f72d6)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * A CPR derivative specifically for starfire/starcat
31  */
32 
33 #include <sys/types.h>
34 #include <sys/systm.h>
35 #include <sys/machparam.h>
36 #include <sys/machsystm.h>
37 #include <sys/ddi.h>
38 #define	SUNDDI_IMPL
39 #include <sys/sunddi.h>
40 #include <sys/sunndi.h>
41 #include <sys/devctl.h>
42 #include <sys/time.h>
43 #include <sys/kmem.h>
44 #include <nfs/lm.h>
45 #include <sys/ddi_impldefs.h>
46 #include <sys/ndi_impldefs.h>
47 #include <sys/obpdefs.h>
48 #include <sys/cmn_err.h>
49 #include <sys/debug.h>
50 #include <sys/errno.h>
51 #include <sys/callb.h>
52 #include <sys/clock.h>
53 #include <sys/x_call.h>
54 #include <sys/cpuvar.h>
55 #include <sys/epm.h>
56 #include <sys/vfs.h>
57 
58 #include <sys/cpu_sgnblk_defs.h>
59 #include <sys/dr.h>
60 #include <sys/dr_util.h>
61 
62 #include <sys/promif.h>
63 #include <sys/conf.h>
64 #include <sys/cyclic.h>
65 
66 extern void	e_ddi_enter_driver_list(struct devnames *dnp, int *listcnt);
67 extern void	e_ddi_exit_driver_list(struct devnames *dnp, int listcnt);
68 extern int	is_pseudo_device(dev_info_t *dip);
69 
70 extern kmutex_t	cpu_lock;
71 extern dr_unsafe_devs_t dr_unsafe_devs;
72 
73 static int		dr_is_real_device(dev_info_t *dip);
74 static int		dr_is_unsafe_major(major_t major);
75 static int		dr_bypass_device(char *dname);
76 static int		dr_check_dip(dev_info_t *dip, void *arg, uint_t ref);
77 static int		dr_resolve_devname(dev_info_t *dip, char *buffer,
78 				char *alias);
79 static sbd_error_t	*drerr_int(int e_code, uint64_t *arr, int idx,
80 				int majors);
81 static int		dr_add_int(uint64_t *arr, int idx, int len,
82 				uint64_t val);
83 
84 int dr_pt_test_suspend(dr_handle_t *hp);
85 
86 /*
87  * dr_quiesce.c interface
88  * NOTE: states used internally by dr_suspend and dr_resume
89  */
90 typedef enum dr_suspend_state {
91 	DR_SRSTATE_BEGIN = 0,
92 	DR_SRSTATE_USER,
93 	DR_SRSTATE_DRIVER,
94 	DR_SRSTATE_FULL
95 } suspend_state_t;
96 
97 struct dr_sr_handle {
98 	dr_handle_t		*sr_dr_handlep;
99 	dev_info_t		*sr_failed_dip;
100 	suspend_state_t		sr_suspend_state;
101 	uint_t			sr_flags;
102 	uint64_t		sr_err_ints[DR_MAX_ERR_INT];
103 	int			sr_err_idx;
104 };
105 
106 #define	SR_FLAG_WATCHDOG	0x1
107 
108 /*
109  * XXX
110  * This hack will go away before RTI.  Just for testing.
111  * List of drivers to bypass when performing a suspend.
112  */
113 static char *dr_bypass_list[] = {
114 	""
115 };
116 
117 
118 #define		SKIP_SYNC	/* bypass sync ops in dr_suspend */
119 
120 /*
121  * dr_skip_user_threads is used to control if user threads should
122  * be suspended.  If dr_skip_user_threads is true, the rest of the
123  * flags are not used; if it is false, dr_check_user_stop_result
124  * will be used to control whether or not we need to check suspend
125  * result, and dr_allow_blocked_threads will be used to control
126  * whether or not we allow suspend to continue if there are blocked
127  * threads.  We allow all combinations of dr_check_user_stop_result
128  * and dr_allow_block_threads, even though it might not make much
129  * sense to not allow block threads when we don't even check stop
130  * result.
131  */
132 static int	dr_skip_user_threads = 0;	/* default to FALSE */
133 static int	dr_check_user_stop_result = 1;	/* default to TRUE */
134 static int	dr_allow_blocked_threads = 1;	/* default to TRUE */
135 
136 #define	DR_CPU_LOOP_MSEC	1000
137 
138 static void
139 dr_stop_intr(void)
140 {
141 	ASSERT(MUTEX_HELD(&cpu_lock));
142 
143 	kpreempt_disable();
144 	cyclic_suspend();
145 }
146 
147 static void
148 dr_enable_intr(void)
149 {
150 	ASSERT(MUTEX_HELD(&cpu_lock));
151 
152 	cyclic_resume();
153 	kpreempt_enable();
154 }
155 
156 dr_sr_handle_t *
157 dr_get_sr_handle(dr_handle_t *hp)
158 {
159 	dr_sr_handle_t *srh;
160 
161 	srh = GETSTRUCT(dr_sr_handle_t, 1);
162 	srh->sr_dr_handlep = hp;
163 
164 	return (srh);
165 }
166 
167 void
168 dr_release_sr_handle(dr_sr_handle_t *srh)
169 {
170 	ASSERT(srh->sr_failed_dip == NULL);
171 	FREESTRUCT(srh, dr_sr_handle_t, 1);
172 }
173 
174 static int
175 dr_is_real_device(dev_info_t *dip)
176 {
177 	struct regspec *regbuf = NULL;
178 	int length = 0;
179 	int rc;
180 
181 	if (ddi_get_driver(dip) == NULL)
182 		return (0);
183 
184 	if (DEVI(dip)->devi_pm_flags & (PMC_NEEDS_SR|PMC_PARENTAL_SR))
185 		return (1);
186 	if (DEVI(dip)->devi_pm_flags & PMC_NO_SR)
187 		return (0);
188 
189 	/*
190 	 * now the general case
191 	 */
192 	rc = ddi_getlongprop(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, "reg",
193 		(caddr_t)&regbuf, &length);
194 	ASSERT(rc != DDI_PROP_NO_MEMORY);
195 	if (rc != DDI_PROP_SUCCESS) {
196 		return (0);
197 	} else {
198 		if ((length > 0) && (regbuf != NULL))
199 			kmem_free(regbuf, length);
200 		return (1);
201 	}
202 }
203 
204 static int
205 dr_is_unsafe_major(major_t major)
206 {
207 	char    *dname, **cpp;
208 	int	i, ndevs;
209 
210 	if ((dname = ddi_major_to_name(major)) == NULL) {
211 		PR_QR("dr_is_unsafe_major: invalid major # %d\n", major);
212 		return (0);
213 	}
214 
215 	ndevs = dr_unsafe_devs.ndevs;
216 	for (i = 0, cpp = dr_unsafe_devs.devnames; i < ndevs; i++) {
217 		if (strcmp(dname, *cpp++) == 0)
218 			return (1);
219 	}
220 	return (0);
221 }
222 
223 static int
224 dr_bypass_device(char *dname)
225 {
226 	int i;
227 	char **lname;
228 	/* check the bypass list */
229 	for (i = 0, lname = &dr_bypass_list[i]; **lname != '\0'; lname++) {
230 		if (strcmp(dname, dr_bypass_list[i++]) == 0)
231 			return (1);
232 	}
233 	return (0);
234 }
235 
236 static int
237 dr_resolve_devname(dev_info_t *dip, char *buffer, char *alias)
238 {
239 	major_t	devmajor;
240 	char	*aka, *name;
241 
242 	*buffer = *alias = 0;
243 
244 	if (dip == NULL)
245 		return (-1);
246 
247 	if ((name = ddi_get_name(dip)) == NULL)
248 		name = "<null name>";
249 
250 	aka = name;
251 
252 	if ((devmajor = ddi_name_to_major(aka)) != -1)
253 		aka = ddi_major_to_name(devmajor);
254 
255 	strcpy(buffer, name);
256 
257 	if (strcmp(name, aka))
258 		strcpy(alias, aka);
259 	else
260 		*alias = 0;
261 
262 	return (0);
263 }
264 
265 struct dr_ref {
266 	int		*refcount;
267 	uint64_t	*arr;
268 	int		*idx;
269 	int		len;
270 };
271 
272 /* ARGSUSED */
273 static int
274 dr_check_dip(dev_info_t *dip, void *arg, uint_t ref)
275 {
276 	major_t		major;
277 	char		*dname;
278 	struct dr_ref	*rp = (struct dr_ref *)arg;
279 
280 	if (dip == NULL)
281 		return (DDI_WALK_CONTINUE);
282 
283 	if (!dr_is_real_device(dip))
284 		return (DDI_WALK_CONTINUE);
285 
286 	dname = ddi_binding_name(dip);
287 
288 	if (dr_bypass_device(dname))
289 		return (DDI_WALK_CONTINUE);
290 
291 	if (dname && ((major = ddi_name_to_major(dname)) != (major_t)-1)) {
292 		if (ref && rp->refcount) {
293 			*rp->refcount += ref;
294 			PR_QR("\n  %s (major# %d) is referenced(%u)\n",
295 				dname, major, ref);
296 		}
297 		if (dr_is_unsafe_major(major) &&
298 		    i_ddi_node_state(dip) >= DS_ATTACHED) {
299 			PR_QR("\n  %s (major# %d) not hotpluggable\n",
300 				dname, major);
301 			if (rp->arr != NULL && rp->idx != NULL)
302 				*rp->idx = dr_add_int(rp->arr, *rp->idx,
303 					rp->len, (uint64_t)major);
304 		}
305 	}
306 	return (DDI_WALK_CONTINUE);
307 }
308 
309 static int
310 dr_check_unsafe_major(dev_info_t *dip, void *arg)
311 {
312 	return (dr_check_dip(dip, arg, 0));
313 }
314 
315 
316 /*ARGSUSED*/
317 void
318 dr_check_devices(dev_info_t *dip, int *refcount, dr_handle_t *handle,
319     uint64_t *arr, int *idx, int len)
320 {
321 	struct dr_ref bref = {0};
322 
323 	if (dip == NULL)
324 		return;
325 
326 	bref.refcount = refcount;
327 	bref.arr = arr;
328 	bref.idx = idx;
329 	bref.len = len;
330 
331 	ASSERT(e_ddi_branch_held(dip));
332 	(void) e_ddi_branch_referenced(dip, dr_check_dip, &bref);
333 }
334 
335 /*
336  * The "dip" argument's parent (if it exists) must be held busy.
337  */
338 static int
339 dr_suspend_devices(dev_info_t *dip, dr_sr_handle_t *srh)
340 {
341 	dr_handle_t	*handle;
342 	major_t		major;
343 	char		*dname;
344 	int		circ;
345 
346 	/*
347 	 * If dip is the root node, it has no siblings and it is
348 	 * always held. If dip is not the root node, dr_suspend_devices()
349 	 * will be invoked with the parent held busy.
350 	 */
351 	for (; dip != NULL; dip = ddi_get_next_sibling(dip)) {
352 		char	d_name[40], d_alias[40], *d_info;
353 
354 		ndi_devi_enter(dip, &circ);
355 		if (dr_suspend_devices(ddi_get_child(dip), srh)) {
356 			ndi_devi_exit(dip, circ);
357 			return (ENXIO);
358 		}
359 		ndi_devi_exit(dip, circ);
360 
361 		if (!dr_is_real_device(dip))
362 			continue;
363 
364 		major = (major_t)-1;
365 		if ((dname = ddi_binding_name(dip)) != NULL)
366 			major = ddi_name_to_major(dname);
367 
368 		if (dr_bypass_device(dname)) {
369 			PR_QR(" bypassed suspend of %s (major# %d)\n", dname,
370 				major);
371 			continue;
372 		}
373 
374 		if (drmach_verify_sr(dip, 1)) {
375 			PR_QR(" bypassed suspend of %s (major# %d)\n", dname,
376 				major);
377 			continue;
378 		}
379 
380 		if ((d_info = ddi_get_name_addr(dip)) == NULL)
381 			d_info = "<null>";
382 
383 		d_name[0] = 0;
384 		if (dr_resolve_devname(dip, d_name, d_alias) == 0) {
385 			if (d_alias[0] != 0) {
386 				prom_printf("\tsuspending %s@%s (aka %s)\n",
387 					d_name, d_info, d_alias);
388 			} else {
389 				prom_printf("\tsuspending %s@%s\n",
390 					d_name, d_info);
391 			}
392 		} else {
393 			prom_printf("\tsuspending %s@%s\n", dname, d_info);
394 		}
395 
396 		if (devi_detach(dip, DDI_SUSPEND) != DDI_SUCCESS) {
397 			prom_printf("\tFAILED to suspend %s@%s\n",
398 				d_name[0] ? d_name : dname, d_info);
399 
400 			srh->sr_err_idx = dr_add_int(srh->sr_err_ints,
401 				srh->sr_err_idx, DR_MAX_ERR_INT,
402 				(uint64_t)major);
403 
404 			ndi_hold_devi(dip);
405 			srh->sr_failed_dip = dip;
406 
407 			handle = srh->sr_dr_handlep;
408 			dr_op_err(CE_IGNORE, handle, ESBD_SUSPEND, "%s@%s",
409 				d_name[0] ? d_name : dname, d_info);
410 
411 			return (DDI_FAILURE);
412 		}
413 	}
414 
415 	return (DDI_SUCCESS);
416 }
417 
418 static void
419 dr_resume_devices(dev_info_t *start, dr_sr_handle_t *srh)
420 {
421 	dr_handle_t	*handle;
422 	dev_info_t	*dip, *next, *last = NULL;
423 	major_t		major;
424 	char		*bn;
425 	int		circ;
426 
427 	major = (major_t)-1;
428 
429 	/* attach in reverse device tree order */
430 	while (last != start) {
431 		dip = start;
432 		next = ddi_get_next_sibling(dip);
433 		while (next != last && dip != srh->sr_failed_dip) {
434 			dip = next;
435 			next = ddi_get_next_sibling(dip);
436 		}
437 		if (dip == srh->sr_failed_dip) {
438 			/* release hold acquired in dr_suspend_devices() */
439 			srh->sr_failed_dip = NULL;
440 			ndi_rele_devi(dip);
441 		} else if (dr_is_real_device(dip) &&
442 				srh->sr_failed_dip == NULL) {
443 
444 			if ((bn = ddi_binding_name(dip)) != NULL) {
445 				major = ddi_name_to_major(bn);
446 			} else {
447 				bn = "<null>";
448 			}
449 			if (!dr_bypass_device(bn) &&
450 				!drmach_verify_sr(dip, 0)) {
451 				char	d_name[40], d_alias[40], *d_info;
452 
453 				d_name[0] = 0;
454 				d_info = ddi_get_name_addr(dip);
455 				if (d_info == NULL)
456 					d_info = "<null>";
457 
458 				if (!dr_resolve_devname(dip, d_name,
459 								d_alias)) {
460 					if (d_alias[0] != 0) {
461 						prom_printf("\tresuming "
462 							"%s@%s (aka %s)\n",
463 							d_name, d_info,
464 							d_alias);
465 					} else {
466 						prom_printf("\tresuming "
467 							"%s@%s\n",
468 							d_name, d_info);
469 					}
470 				} else {
471 					prom_printf("\tresuming %s@%s\n",
472 						bn, d_info);
473 				}
474 
475 				if (devi_attach(dip, DDI_RESUME) !=
476 							DDI_SUCCESS) {
477 					/*
478 					 * Print a console warning,
479 					 * set an e_code of ESBD_RESUME,
480 					 * and save the driver major
481 					 * number in the e_rsc.
482 					 */
483 					prom_printf("\tFAILED to resume %s@%s",
484 					    d_name[0] ? d_name : bn, d_info);
485 
486 					srh->sr_err_idx =
487 						dr_add_int(srh->sr_err_ints,
488 						srh->sr_err_idx, DR_MAX_ERR_INT,
489 						(uint64_t)major);
490 
491 					handle = srh->sr_dr_handlep;
492 
493 					dr_op_err(CE_IGNORE, handle,
494 					    ESBD_RESUME, "%s@%s",
495 					    d_name[0] ? d_name : bn, d_info);
496 				}
497 			}
498 		}
499 
500 		/* Hold parent busy while walking its children */
501 		ndi_devi_enter(dip, &circ);
502 		dr_resume_devices(ddi_get_child(dip), srh);
503 		ndi_devi_exit(dip, circ);
504 		last = dip;
505 	}
506 }
507 
508 /*
509  * True if thread is virtually stopped.  Similar to CPR_VSTOPPED
510  * but from DR point of view.  These user threads are waiting in
511  * the kernel.  Once they complete in the kernel, they will process
512  * the stop signal and stop.
513  */
514 #define	DR_VSTOPPED(t)			\
515 	((t)->t_state == TS_SLEEP &&	\
516 	(t)->t_wchan != NULL &&		\
517 	(t)->t_astflag &&		\
518 	((t)->t_proc_flag & TP_CHKPT))
519 
520 /* ARGSUSED */
521 static int
522 dr_stop_user_threads(dr_sr_handle_t *srh)
523 {
524 	int		count;
525 	int		bailout;
526 	dr_handle_t	*handle = srh->sr_dr_handlep;
527 	static fn_t	f = "dr_stop_user_threads";
528 	kthread_id_t 	tp;
529 
530 	extern void add_one_utstop();
531 	extern void utstop_timedwait(clock_t);
532 	extern void utstop_init(void);
533 
534 #define	DR_UTSTOP_RETRY	4
535 #define	DR_UTSTOP_WAIT	hz
536 
537 	if (dr_skip_user_threads)
538 		return (DDI_SUCCESS);
539 
540 	utstop_init();
541 
542 	/* we need to try a few times to get past fork, etc. */
543 	srh->sr_err_idx = 0;
544 	for (count = 0; count < DR_UTSTOP_RETRY; count++) {
545 		/* walk the entire threadlist */
546 		mutex_enter(&pidlock);
547 		for (tp = curthread->t_next; tp != curthread; tp = tp->t_next) {
548 			proc_t *p = ttoproc(tp);
549 
550 			/* handle kernel threads separately */
551 			if (p->p_as == &kas || p->p_stat == SZOMB)
552 				continue;
553 
554 			mutex_enter(&p->p_lock);
555 			thread_lock(tp);
556 
557 			if (tp->t_state == TS_STOPPED) {
558 				/* add another reason to stop this thread */
559 				tp->t_schedflag &= ~TS_RESUME;
560 			} else {
561 				tp->t_proc_flag |= TP_CHKPT;
562 
563 				thread_unlock(tp);
564 				mutex_exit(&p->p_lock);
565 				add_one_utstop();
566 				mutex_enter(&p->p_lock);
567 				thread_lock(tp);
568 
569 				aston(tp);
570 
571 				if (tp->t_state == TS_SLEEP &&
572 				    (tp->t_flag & T_WAKEABLE)) {
573 					setrun_locked(tp);
574 				}
575 
576 			}
577 
578 			/* grab thread if needed */
579 			if (tp->t_state == TS_ONPROC && tp->t_cpu != CPU)
580 				poke_cpu(tp->t_cpu->cpu_id);
581 
582 
583 			thread_unlock(tp);
584 			mutex_exit(&p->p_lock);
585 		}
586 		mutex_exit(&pidlock);
587 
588 
589 		/* let everything catch up */
590 		utstop_timedwait(count * count * DR_UTSTOP_WAIT);
591 
592 
593 		/* now, walk the threadlist again to see if we are done */
594 		mutex_enter(&pidlock);
595 		for (tp = curthread->t_next, bailout = 0;
596 		    tp != curthread; tp = tp->t_next) {
597 			proc_t *p = ttoproc(tp);
598 
599 			/* handle kernel threads separately */
600 			if (p->p_as == &kas || p->p_stat == SZOMB)
601 				continue;
602 
603 			/*
604 			 * If this thread didn't stop, and we don't allow
605 			 * unstopped blocked threads, bail.
606 			 */
607 			thread_lock(tp);
608 			if (!CPR_ISTOPPED(tp) &&
609 			    !(dr_allow_blocked_threads &&
610 			    DR_VSTOPPED(tp))) {
611 				bailout = 1;
612 				if (count == DR_UTSTOP_RETRY - 1) {
613 					/*
614 					 * save the pid for later reporting
615 					 */
616 					srh->sr_err_idx =
617 					    dr_add_int(srh->sr_err_ints,
618 					    srh->sr_err_idx, DR_MAX_ERR_INT,
619 					    (uint64_t)p->p_pid);
620 
621 					cmn_err(CE_WARN, "%s: "
622 					    "failed to stop thread: "
623 					    "process=%s, pid=%d",
624 					    f, p->p_user.u_psargs, p->p_pid);
625 
626 					PR_QR("%s: failed to stop thread: "
627 					    "process=%s, pid=%d, t_id=0x%p, "
628 					    "t_state=0x%x, t_proc_flag=0x%x, "
629 					    "t_schedflag=0x%x\n",
630 					    f, p->p_user.u_psargs, p->p_pid,
631 					    tp, tp->t_state, tp->t_proc_flag,
632 					    tp->t_schedflag);
633 				}
634 
635 			}
636 			thread_unlock(tp);
637 		}
638 		mutex_exit(&pidlock);
639 
640 		/* were all the threads stopped? */
641 		if (!bailout)
642 			break;
643 	}
644 
645 	/* were we unable to stop all threads after a few tries? */
646 	if (bailout) {
647 		handle->h_err = drerr_int(ESBD_UTHREAD, srh->sr_err_ints,
648 			srh->sr_err_idx, 0);
649 		return (ESRCH);
650 	}
651 
652 	return (DDI_SUCCESS);
653 }
654 
655 static void
656 dr_start_user_threads(void)
657 {
658 	kthread_id_t tp;
659 
660 	mutex_enter(&pidlock);
661 
662 	/* walk all threads and release them */
663 	for (tp = curthread->t_next; tp != curthread; tp = tp->t_next) {
664 		proc_t *p = ttoproc(tp);
665 
666 		/* skip kernel threads */
667 		if (ttoproc(tp)->p_as == &kas)
668 			continue;
669 
670 		mutex_enter(&p->p_lock);
671 		tp->t_proc_flag &= ~TP_CHKPT;
672 		mutex_exit(&p->p_lock);
673 
674 		thread_lock(tp);
675 		if (CPR_ISTOPPED(tp)) {
676 			/* back on the runq */
677 			tp->t_schedflag |= TS_RESUME;
678 			setrun_locked(tp);
679 		}
680 		thread_unlock(tp);
681 	}
682 
683 	mutex_exit(&pidlock);
684 }
685 
686 static void
687 dr_signal_user(int sig)
688 {
689 	struct proc *p;
690 
691 	mutex_enter(&pidlock);
692 
693 	for (p = practive; p != NULL; p = p->p_next) {
694 		/* only user threads */
695 		if (p->p_exec == NULL || p->p_stat == SZOMB ||
696 		    p == proc_init || p == ttoproc(curthread))
697 			continue;
698 
699 		mutex_enter(&p->p_lock);
700 		sigtoproc(p, NULL, sig);
701 		mutex_exit(&p->p_lock);
702 	}
703 
704 	mutex_exit(&pidlock);
705 
706 	/* add a bit of delay */
707 	delay(hz);
708 }
709 
710 void
711 dr_resume(dr_sr_handle_t *srh)
712 {
713 	dr_handle_t	*handle;
714 
715 	handle = srh->sr_dr_handlep;
716 
717 	if (srh->sr_suspend_state < DR_SRSTATE_FULL) {
718 		/*
719 		 * Update the signature block.
720 		 * If cpus are not paused, this can be done now.
721 		 * See comments below.
722 		 */
723 		CPU_SIGNATURE(OS_SIG, SIGST_RESUME_INPROGRESS, SIGSUBST_NULL,
724 		    CPU->cpu_id);
725 	}
726 
727 	switch (srh->sr_suspend_state) {
728 	case DR_SRSTATE_FULL:
729 
730 		ASSERT(MUTEX_HELD(&cpu_lock));
731 
732 		dr_enable_intr(); 	/* enable intr & clock */
733 
734 		start_cpus();
735 		mutex_exit(&cpu_lock);
736 
737 		/*
738 		 * Update the signature block.
739 		 * This must not be done while cpus are paused, since on
740 		 * Starcat the cpu signature update aquires an adaptive
741 		 * mutex in the iosram driver. Blocking with cpus paused
742 		 * can lead to deadlock.
743 		 */
744 		CPU_SIGNATURE(OS_SIG, SIGST_RESUME_INPROGRESS, SIGSUBST_NULL,
745 		    CPU->cpu_id);
746 
747 		/*
748 		 * If we suspended hw watchdog at suspend,
749 		 * re-enable it now.
750 		 */
751 
752 		if (srh->sr_flags & (SR_FLAG_WATCHDOG)) {
753 			mutex_enter(&tod_lock);
754 			tod_ops.tod_set_watchdog_timer(
755 				watchdog_timeout_seconds);
756 			mutex_exit(&tod_lock);
757 		}
758 
759 		/*
760 		 * This should only be called if drmach_suspend_last()
761 		 * was called and state transitioned to DR_SRSTATE_FULL
762 		 * to prevent resume attempts on device instances that
763 		 * were not previously suspended.
764 		 */
765 		drmach_resume_first();
766 
767 		/* FALLTHROUGH */
768 
769 	case DR_SRSTATE_DRIVER:
770 		/*
771 		 * resume drivers
772 		 */
773 		srh->sr_err_idx = 0;
774 
775 		/* no parent dip to hold busy */
776 		dr_resume_devices(ddi_root_node(), srh);
777 
778 		if (srh->sr_err_idx && srh->sr_dr_handlep) {
779 			(srh->sr_dr_handlep)->h_err = drerr_int(ESBD_RESUME,
780 				srh->sr_err_ints, srh->sr_err_idx, 1);
781 		}
782 
783 		/*
784 		 * resume the lock manager
785 		 */
786 		lm_cprresume();
787 
788 		/* FALLTHROUGH */
789 
790 	case DR_SRSTATE_USER:
791 		/*
792 		 * finally, resume user threads
793 		 */
794 		if (!dr_skip_user_threads) {
795 			prom_printf("DR: resuming user threads...\n");
796 			dr_start_user_threads();
797 		}
798 		/* FALLTHROUGH */
799 
800 	case DR_SRSTATE_BEGIN:
801 	default:
802 		/*
803 		 * let those who care know that we've just resumed
804 		 */
805 		PR_QR("sending SIGTHAW...\n");
806 		dr_signal_user(SIGTHAW);
807 		break;
808 	}
809 
810 	i_ndi_allow_device_tree_changes(handle->h_ndi);
811 
812 	/*
813 	 * update the signature block
814 	 */
815 	CPU_SIGNATURE(OS_SIG, SIGST_RUN, SIGSUBST_NULL, CPU->cpu_id);
816 
817 	prom_printf("DR: resume COMPLETED\n");
818 }
819 
820 int
821 dr_suspend(dr_sr_handle_t *srh)
822 {
823 	dr_handle_t	*handle;
824 	int		force;
825 	int		dev_errs_idx;
826 	uint64_t	dev_errs[DR_MAX_ERR_INT];
827 	int		rc = DDI_SUCCESS;
828 
829 	handle = srh->sr_dr_handlep;
830 
831 	force = dr_cmd_flags(handle) & SBD_FLAG_FORCE;
832 
833 	/*
834 	 * update the signature block
835 	 */
836 	CPU_SIGNATURE(OS_SIG, SIGST_QUIESCE_INPROGRESS, SIGSUBST_NULL,
837 	    CPU->cpu_id);
838 
839 	i_ndi_block_device_tree_changes(&handle->h_ndi);
840 
841 	prom_printf("\nDR: suspending user threads...\n");
842 	srh->sr_suspend_state = DR_SRSTATE_USER;
843 	if (((rc = dr_stop_user_threads(srh)) != DDI_SUCCESS) &&
844 	    dr_check_user_stop_result) {
845 		dr_resume(srh);
846 		return (rc);
847 	}
848 
849 	if (!force) {
850 		struct dr_ref drc = {0};
851 
852 		prom_printf("\nDR: checking devices...\n");
853 		dev_errs_idx = 0;
854 
855 		drc.arr = dev_errs;
856 		drc.idx = &dev_errs_idx;
857 		drc.len = DR_MAX_ERR_INT;
858 
859 		/*
860 		 * Since the root node can never go away, it
861 		 * doesn't have to be held.
862 		 */
863 		ddi_walk_devs(ddi_root_node(), dr_check_unsafe_major, &drc);
864 		if (dev_errs_idx) {
865 			handle->h_err = drerr_int(ESBD_UNSAFE, dev_errs,
866 				dev_errs_idx, 1);
867 			dr_resume(srh);
868 			return (DDI_FAILURE);
869 		}
870 		PR_QR("done\n");
871 	} else {
872 		prom_printf("\nDR: dr_suspend invoked with force flag\n");
873 	}
874 
875 #ifndef	SKIP_SYNC
876 	/*
877 	 * This sync swap out all user pages
878 	 */
879 	vfs_sync(SYNC_ALL);
880 #endif
881 
882 	/*
883 	 * special treatment for lock manager
884 	 */
885 	lm_cprsuspend();
886 
887 #ifndef	SKIP_SYNC
888 	/*
889 	 * sync the file system in case we never make it back
890 	 */
891 	sync();
892 #endif
893 
894 	/*
895 	 * now suspend drivers
896 	 */
897 	prom_printf("DR: suspending drivers...\n");
898 	srh->sr_suspend_state = DR_SRSTATE_DRIVER;
899 	srh->sr_err_idx = 0;
900 	/* No parent to hold busy */
901 	if ((rc = dr_suspend_devices(ddi_root_node(), srh)) != DDI_SUCCESS) {
902 		if (srh->sr_err_idx && srh->sr_dr_handlep) {
903 			(srh->sr_dr_handlep)->h_err = drerr_int(ESBD_SUSPEND,
904 				srh->sr_err_ints, srh->sr_err_idx, 1);
905 		}
906 		dr_resume(srh);
907 		return (rc);
908 	}
909 
910 	drmach_suspend_last();
911 
912 	/*
913 	 * finally, grab all cpus
914 	 */
915 	srh->sr_suspend_state = DR_SRSTATE_FULL;
916 
917 	/*
918 	 * if watchdog was activated, disable it
919 	 */
920 	if (watchdog_activated) {
921 		mutex_enter(&tod_lock);
922 		tod_ops.tod_clear_watchdog_timer();
923 		mutex_exit(&tod_lock);
924 		srh->sr_flags |= SR_FLAG_WATCHDOG;
925 	} else {
926 		srh->sr_flags &= ~(SR_FLAG_WATCHDOG);
927 	}
928 
929 	/*
930 	 * Update the signature block.
931 	 * This must be done before cpus are paused, since on Starcat the
932 	 * cpu signature update aquires an adaptive mutex in the iosram driver.
933 	 * Blocking with cpus paused can lead to deadlock.
934 	 */
935 	CPU_SIGNATURE(OS_SIG, SIGST_QUIESCED, SIGSUBST_NULL, CPU->cpu_id);
936 
937 	mutex_enter(&cpu_lock);
938 	pause_cpus(NULL);
939 	dr_stop_intr();
940 
941 	return (rc);
942 }
943 
944 int
945 dr_pt_test_suspend(dr_handle_t *hp)
946 {
947 	dr_sr_handle_t *srh;
948 	int		err;
949 	uint_t		psmerr;
950 	static fn_t	f = "dr_pt_test_suspend";
951 
952 	PR_QR("%s...\n", f);
953 
954 	srh = dr_get_sr_handle(hp);
955 	if ((err = dr_suspend(srh)) == DDI_SUCCESS) {
956 		dr_resume(srh);
957 		if ((hp->h_err) && ((psmerr = hp->h_err->e_code) != 0)) {
958 			PR_QR("%s: error on dr_resume()", f);
959 			switch (psmerr) {
960 			case ESBD_RESUME:
961 				PR_QR("Couldn't resume devices: %s\n",
962 					DR_GET_E_RSC(hp->h_err));
963 				break;
964 
965 			case ESBD_KTHREAD:
966 				PR_ALL("psmerr is ESBD_KTHREAD\n");
967 				break;
968 			default:
969 				PR_ALL("Resume error unknown = %d\n",
970 					psmerr);
971 				break;
972 			}
973 		}
974 	} else {
975 		PR_ALL("%s: dr_suspend() failed, err = 0x%x\n",
976 			f, err);
977 		psmerr = hp->h_err ? hp->h_err->e_code : ESBD_NOERROR;
978 		switch (psmerr) {
979 		case ESBD_UNSAFE:
980 			PR_ALL("Unsafe devices (major #): %s\n",
981 				DR_GET_E_RSC(hp->h_err));
982 			break;
983 
984 		case ESBD_RTTHREAD:
985 			PR_ALL("RT threads (PIDs): %s\n",
986 				DR_GET_E_RSC(hp->h_err));
987 			break;
988 
989 		case ESBD_UTHREAD:
990 			PR_ALL("User threads (PIDs): %s\n",
991 				DR_GET_E_RSC(hp->h_err));
992 			break;
993 
994 		case ESBD_SUSPEND:
995 			PR_ALL("Non-suspendable devices (major #): %s\n",
996 				DR_GET_E_RSC(hp->h_err));
997 			break;
998 
999 		case ESBD_RESUME:
1000 			PR_ALL("Could not resume devices (major #): %s\n",
1001 				DR_GET_E_RSC(hp->h_err));
1002 			break;
1003 
1004 		case ESBD_KTHREAD:
1005 			PR_ALL("psmerr is ESBD_KTHREAD\n");
1006 			break;
1007 
1008 		case ESBD_NOERROR:
1009 			PR_ALL("sbd_error_t error code not set\n");
1010 			break;
1011 
1012 		default:
1013 			PR_ALL("Unknown error psmerr = %d\n", psmerr);
1014 			break;
1015 		}
1016 	}
1017 	dr_release_sr_handle(srh);
1018 
1019 	return (0);
1020 }
1021 
1022 /*
1023  * Add a new integer value to the end of an array.  Don't allow duplicates to
1024  * appear in the array, and don't allow the array to overflow.  Return the new
1025  * total number of entries in the array.
1026  */
1027 static int
1028 dr_add_int(uint64_t *arr, int idx, int len, uint64_t val)
1029 {
1030 	int i;
1031 
1032 	if (arr == NULL)
1033 		return (0);
1034 
1035 	if (idx >= len)
1036 		return (idx);
1037 
1038 	for (i = 0; i < idx; i++) {
1039 		if (arr[i] == val)
1040 			return (idx);
1041 	}
1042 
1043 	arr[idx++] = val;
1044 
1045 	return (idx);
1046 }
1047 
1048 /*
1049  * Construct an sbd_error_t featuring a string representation of an array of
1050  * integers as its e_rsc.
1051  */
1052 static sbd_error_t *
1053 drerr_int(int e_code, uint64_t *arr, int idx, int majors)
1054 {
1055 	int		i, n, buf_len, buf_idx, buf_avail;
1056 	char		*dname;
1057 	char		*buf;
1058 	sbd_error_t	*new_sbd_err;
1059 	static char	s_ellipsis[] = "...";
1060 
1061 	if (arr == NULL || idx <= 0)
1062 		return (NULL);
1063 
1064 	/* MAXPATHLEN is the size of the e_rsc field in sbd_error_t. */
1065 	buf = (char *)kmem_zalloc(MAXPATHLEN, KM_SLEEP);
1066 
1067 	/*
1068 	 * This is the total working area of the buffer.  It must be computed
1069 	 * as the size of 'buf', minus reserved space for the null terminator
1070 	 * and the ellipsis string.
1071 	 */
1072 	buf_len = MAXPATHLEN - (strlen(s_ellipsis) + 1);
1073 
1074 	/* Construct a string representation of the array values */
1075 	for (buf_idx = 0, i = 0; i < idx; i++) {
1076 		buf_avail = buf_len - buf_idx;
1077 		if (majors) {
1078 			dname = ddi_major_to_name(arr[i]);
1079 			if (dname) {
1080 				n = snprintf(&buf[buf_idx], buf_avail,
1081 					"%s, ", dname);
1082 			} else {
1083 				n = snprintf(&buf[buf_idx], buf_avail,
1084 					"major %lu, ", arr[i]);
1085 			}
1086 		} else {
1087 			n = snprintf(&buf[buf_idx], buf_avail, "%lu, ",
1088 				arr[i]);
1089 		}
1090 
1091 		/* An ellipsis gets appended when no more values fit */
1092 		if (n >= buf_avail) {
1093 			(void) strcpy(&buf[buf_idx], s_ellipsis);
1094 			break;
1095 		}
1096 
1097 		buf_idx += n;
1098 	}
1099 
1100 	/* If all the contents fit, remove the trailing comma */
1101 	if (n < buf_avail) {
1102 		buf[--buf_idx] = '\0';
1103 		buf[--buf_idx] = '\0';
1104 	}
1105 
1106 	/* Return an sbd_error_t with the buffer and e_code */
1107 	new_sbd_err = drerr_new(1, e_code, buf);
1108 	kmem_free(buf, MAXPATHLEN);
1109 	return (new_sbd_err);
1110 }
1111