xref: /titanic_52/usr/src/uts/sun4u/ngdr/io/dr_quiesce.c (revision bdfc6d18da790deeec2e0eb09c625902defe2498)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * A CPR derivative specifically for starfire/starcat
31  */
32 
33 #include <sys/types.h>
34 #include <sys/systm.h>
35 #include <sys/machparam.h>
36 #include <sys/machsystm.h>
37 #include <sys/ddi.h>
38 #define	SUNDDI_IMPL
39 #include <sys/sunddi.h>
40 #include <sys/sunndi.h>
41 #include <sys/devctl.h>
42 #include <sys/time.h>
43 #include <sys/kmem.h>
44 #include <nfs/lm.h>
45 #include <sys/ddi_impldefs.h>
46 #include <sys/ndi_impldefs.h>
47 #include <sys/obpdefs.h>
48 #include <sys/cmn_err.h>
49 #include <sys/debug.h>
50 #include <sys/errno.h>
51 #include <sys/callb.h>
52 #include <sys/clock.h>
53 #include <sys/x_call.h>
54 #include <sys/cpuvar.h>
55 #include <sys/epm.h>
56 #include <sys/vfs.h>
57 
58 #include <sys/cpu_sgnblk_defs.h>
59 #include <sys/dr.h>
60 #include <sys/dr_util.h>
61 
62 #include <sys/promif.h>
63 #include <sys/conf.h>
64 #include <sys/cyclic.h>
65 
66 extern void	e_ddi_enter_driver_list(struct devnames *dnp, int *listcnt);
67 extern void	e_ddi_exit_driver_list(struct devnames *dnp, int listcnt);
68 extern int	is_pseudo_device(dev_info_t *dip);
69 
70 extern kmutex_t	cpu_lock;
71 extern dr_unsafe_devs_t dr_unsafe_devs;
72 
73 static int		dr_is_real_device(dev_info_t *dip);
74 static int		dr_is_unsafe_major(major_t major);
75 static int		dr_bypass_device(char *dname);
76 static int		dr_check_dip(dev_info_t *dip, void *arg, uint_t ref);
77 static int		dr_resolve_devname(dev_info_t *dip, char *buffer,
78 				char *alias);
79 static sbd_error_t	*drerr_int(int e_code, uint64_t *arr, int idx,
80 				int majors);
81 static int		dr_add_int(uint64_t *arr, int idx, int len,
82 				uint64_t val);
83 
84 int dr_pt_test_suspend(dr_handle_t *hp);
85 
86 /*
87  * dr_quiesce.c interface
88  * NOTE: states used internally by dr_suspend and dr_resume
89  */
90 typedef enum dr_suspend_state {
91 	DR_SRSTATE_BEGIN = 0,
92 	DR_SRSTATE_USER,
93 	DR_SRSTATE_DAEMON,
94 	DR_SRSTATE_DRIVER,
95 	DR_SRSTATE_FULL
96 } suspend_state_t;
97 
98 struct dr_sr_handle {
99 	dr_handle_t		*sr_dr_handlep;
100 	dev_info_t		*sr_failed_dip;
101 	suspend_state_t		sr_suspend_state;
102 	uint_t			sr_flags;
103 	uint64_t		sr_err_ints[DR_MAX_ERR_INT];
104 	int			sr_err_idx;
105 };
106 
107 #define	SR_FLAG_WATCHDOG	0x1
108 
109 /*
110  * XXX
111  * This hack will go away before RTI.  Just for testing.
112  * List of drivers to bypass when performing a suspend.
113  */
114 static char *dr_bypass_list[] = {
115 	""
116 };
117 
118 
119 static int	dr_skip_kernel_threads = 1;	/* "TRUE" */
120 #define		SKIP_SYNC	/* bypass sync ops in dr_suspend */
121 
122 /*
123  * dr_skip_user_threads is used to control if user threads should
124  * be suspended.  If dr_skip_user_threads is true, the rest of the
125  * flags are not used; if it is false, dr_check_user_stop_result
126  * will be used to control whether or not we need to check suspend
127  * result, and dr_allow_blocked_threads will be used to control
128  * whether or not we allow suspend to continue if there are blocked
129  * threads.  We allow all combinations of dr_check_user_stop_result
130  * and dr_allow_block_threads, even though it might not make much
131  * sense to not allow block threads when we don't even check stop
132  * result.
133  */
134 static int	dr_skip_user_threads = 0;	/* default to FALSE */
135 static int	dr_check_user_stop_result = 1;	/* default to TRUE */
136 static int	dr_allow_blocked_threads = 1;	/* default to TRUE */
137 
138 #define	DR_CPU_LOOP_MSEC	1000
139 
140 static void
141 dr_stop_intr(void)
142 {
143 	ASSERT(MUTEX_HELD(&cpu_lock));
144 
145 	kpreempt_disable();
146 	cyclic_suspend();
147 }
148 
149 static void
150 dr_enable_intr(void)
151 {
152 	ASSERT(MUTEX_HELD(&cpu_lock));
153 
154 	cyclic_resume();
155 	kpreempt_enable();
156 }
157 
158 dr_sr_handle_t *
159 dr_get_sr_handle(dr_handle_t *hp)
160 {
161 	dr_sr_handle_t *srh;
162 
163 	srh = GETSTRUCT(dr_sr_handle_t, 1);
164 	srh->sr_dr_handlep = hp;
165 
166 	return (srh);
167 }
168 
169 void
170 dr_release_sr_handle(dr_sr_handle_t *srh)
171 {
172 	ASSERT(srh->sr_failed_dip == NULL);
173 	FREESTRUCT(srh, dr_sr_handle_t, 1);
174 }
175 
176 static int
177 dr_is_real_device(dev_info_t *dip)
178 {
179 	struct regspec *regbuf = NULL;
180 	int length = 0;
181 	int rc;
182 
183 	if (ddi_get_driver(dip) == NULL)
184 		return (0);
185 
186 	if (DEVI(dip)->devi_pm_flags & (PMC_NEEDS_SR|PMC_PARENTAL_SR))
187 		return (1);
188 	if (DEVI(dip)->devi_pm_flags & PMC_NO_SR)
189 		return (0);
190 
191 	/*
192 	 * now the general case
193 	 */
194 	rc = ddi_getlongprop(DDI_DEV_T_NONE, dip, DDI_PROP_DONTPASS, "reg",
195 		(caddr_t)&regbuf, &length);
196 	ASSERT(rc != DDI_PROP_NO_MEMORY);
197 	if (rc != DDI_PROP_SUCCESS) {
198 		return (0);
199 	} else {
200 		if ((length > 0) && (regbuf != NULL))
201 			kmem_free(regbuf, length);
202 		return (1);
203 	}
204 }
205 
206 static int
207 dr_is_unsafe_major(major_t major)
208 {
209 	char    *dname, **cpp;
210 	int	i, ndevs;
211 
212 	if ((dname = ddi_major_to_name(major)) == NULL) {
213 		PR_QR("dr_is_unsafe_major: invalid major # %d\n", major);
214 		return (0);
215 	}
216 
217 	ndevs = dr_unsafe_devs.ndevs;
218 	for (i = 0, cpp = dr_unsafe_devs.devnames; i < ndevs; i++) {
219 		if (strcmp(dname, *cpp++) == 0)
220 			return (1);
221 	}
222 	return (0);
223 }
224 
225 static int
226 dr_bypass_device(char *dname)
227 {
228 	int i;
229 	char **lname;
230 	/* check the bypass list */
231 	for (i = 0, lname = &dr_bypass_list[i]; **lname != '\0'; lname++) {
232 		if (strcmp(dname, dr_bypass_list[i++]) == 0)
233 			return (1);
234 	}
235 	return (0);
236 }
237 
238 static int
239 dr_resolve_devname(dev_info_t *dip, char *buffer, char *alias)
240 {
241 	major_t	devmajor;
242 	char	*aka, *name;
243 
244 	*buffer = *alias = 0;
245 
246 	if (dip == NULL)
247 		return (-1);
248 
249 	if ((name = ddi_get_name(dip)) == NULL)
250 		name = "<null name>";
251 
252 	aka = name;
253 
254 	if ((devmajor = ddi_name_to_major(aka)) != -1)
255 		aka = ddi_major_to_name(devmajor);
256 
257 	strcpy(buffer, name);
258 
259 	if (strcmp(name, aka))
260 		strcpy(alias, aka);
261 	else
262 		*alias = 0;
263 
264 	return (0);
265 }
266 
267 struct dr_ref {
268 	int		*refcount;
269 	uint64_t	*arr;
270 	int		*idx;
271 	int		len;
272 };
273 
274 /* ARGSUSED */
275 static int
276 dr_check_dip(dev_info_t *dip, void *arg, uint_t ref)
277 {
278 	major_t		major;
279 	char		*dname;
280 	struct dr_ref	*rp = (struct dr_ref *)arg;
281 
282 	if (dip == NULL)
283 		return (DDI_WALK_CONTINUE);
284 
285 	if (!dr_is_real_device(dip))
286 		return (DDI_WALK_CONTINUE);
287 
288 	dname = ddi_binding_name(dip);
289 
290 	if (dr_bypass_device(dname))
291 		return (DDI_WALK_CONTINUE);
292 
293 	if (dname && ((major = ddi_name_to_major(dname)) != (major_t)-1)) {
294 		if (ref && rp->refcount) {
295 			*rp->refcount += ref;
296 			PR_QR("\n  %s (major# %d) is referenced(%u)\n",
297 				dname, major, ref);
298 		}
299 		if (dr_is_unsafe_major(major) &&
300 		    i_ddi_node_state(dip) >= DS_ATTACHED) {
301 			PR_QR("\n  %s (major# %d) not hotpluggable\n",
302 				dname, major);
303 			if (rp->arr != NULL && rp->idx != NULL)
304 				*rp->idx = dr_add_int(rp->arr, *rp->idx,
305 					rp->len, (uint64_t)major);
306 		}
307 	}
308 	return (DDI_WALK_CONTINUE);
309 }
310 
311 static int
312 dr_check_unsafe_major(dev_info_t *dip, void *arg)
313 {
314 	return (dr_check_dip(dip, arg, 0));
315 }
316 
317 
318 /*ARGSUSED*/
319 void
320 dr_check_devices(dev_info_t *dip, int *refcount, dr_handle_t *handle,
321     uint64_t *arr, int *idx, int len)
322 {
323 	struct dr_ref bref = {0};
324 
325 	if (dip == NULL)
326 		return;
327 
328 	bref.refcount = refcount;
329 	bref.arr = arr;
330 	bref.idx = idx;
331 	bref.len = len;
332 
333 	ASSERT(e_ddi_branch_held(dip));
334 	(void) e_ddi_branch_referenced(dip, dr_check_dip, &bref);
335 }
336 
337 /*
338  * The "dip" argument's parent (if it exists) must be held busy.
339  */
340 static int
341 dr_suspend_devices(dev_info_t *dip, dr_sr_handle_t *srh)
342 {
343 	dr_handle_t	*handle;
344 	major_t		major;
345 	char		*dname;
346 	int		circ;
347 
348 	/*
349 	 * If dip is the root node, it has no siblings and it is
350 	 * always held. If dip is not the root node, dr_suspend_devices()
351 	 * will be invoked with the parent held busy.
352 	 */
353 	for (; dip != NULL; dip = ddi_get_next_sibling(dip)) {
354 		char	d_name[40], d_alias[40], *d_info;
355 
356 		ndi_devi_enter(dip, &circ);
357 		if (dr_suspend_devices(ddi_get_child(dip), srh)) {
358 			ndi_devi_exit(dip, circ);
359 			return (ENXIO);
360 		}
361 		ndi_devi_exit(dip, circ);
362 
363 		if (!dr_is_real_device(dip))
364 			continue;
365 
366 		major = (major_t)-1;
367 		if ((dname = ddi_binding_name(dip)) != NULL)
368 			major = ddi_name_to_major(dname);
369 
370 		if (dr_bypass_device(dname)) {
371 			PR_QR(" bypassed suspend of %s (major# %d)\n", dname,
372 				major);
373 			continue;
374 		}
375 
376 		if (drmach_verify_sr(dip, 1)) {
377 			PR_QR(" bypassed suspend of %s (major# %d)\n", dname,
378 				major);
379 			continue;
380 		}
381 
382 		if ((d_info = ddi_get_name_addr(dip)) == NULL)
383 			d_info = "<null>";
384 
385 		d_name[0] = 0;
386 		if (dr_resolve_devname(dip, d_name, d_alias) == 0) {
387 			if (d_alias[0] != 0) {
388 				prom_printf("\tsuspending %s@%s (aka %s)\n",
389 					d_name, d_info, d_alias);
390 			} else {
391 				prom_printf("\tsuspending %s@%s\n",
392 					d_name, d_info);
393 			}
394 		} else {
395 			prom_printf("\tsuspending %s@%s\n", dname, d_info);
396 		}
397 
398 		if (devi_detach(dip, DDI_SUSPEND) != DDI_SUCCESS) {
399 			prom_printf("\tFAILED to suspend %s@%s\n",
400 				d_name[0] ? d_name : dname, d_info);
401 
402 			srh->sr_err_idx = dr_add_int(srh->sr_err_ints,
403 				srh->sr_err_idx, DR_MAX_ERR_INT,
404 				(uint64_t)major);
405 
406 			ndi_hold_devi(dip);
407 			srh->sr_failed_dip = dip;
408 
409 			handle = srh->sr_dr_handlep;
410 			dr_op_err(CE_IGNORE, handle, ESBD_SUSPEND, "%s@%s",
411 				d_name[0] ? d_name : dname, d_info);
412 
413 			return (DDI_FAILURE);
414 		}
415 	}
416 
417 	return (DDI_SUCCESS);
418 }
419 
420 static void
421 dr_resume_devices(dev_info_t *start, dr_sr_handle_t *srh)
422 {
423 	dr_handle_t	*handle;
424 	dev_info_t	*dip, *next, *last = NULL;
425 	major_t		major;
426 	char		*bn;
427 	int		circ;
428 
429 	major = (major_t)-1;
430 
431 	/* attach in reverse device tree order */
432 	while (last != start) {
433 		dip = start;
434 		next = ddi_get_next_sibling(dip);
435 		while (next != last && dip != srh->sr_failed_dip) {
436 			dip = next;
437 			next = ddi_get_next_sibling(dip);
438 		}
439 		if (dip == srh->sr_failed_dip) {
440 			/* release hold acquired in dr_suspend_devices() */
441 			srh->sr_failed_dip = NULL;
442 			ndi_rele_devi(dip);
443 		} else if (dr_is_real_device(dip) &&
444 				srh->sr_failed_dip == NULL) {
445 
446 			if ((bn = ddi_binding_name(dip)) != NULL) {
447 				major = ddi_name_to_major(bn);
448 			} else {
449 				bn = "<null>";
450 			}
451 			if (!dr_bypass_device(bn) &&
452 				!drmach_verify_sr(dip, 0)) {
453 				char	d_name[40], d_alias[40], *d_info;
454 
455 				d_name[0] = 0;
456 				d_info = ddi_get_name_addr(dip);
457 				if (d_info == NULL)
458 					d_info = "<null>";
459 
460 				if (!dr_resolve_devname(dip, d_name,
461 								d_alias)) {
462 					if (d_alias[0] != 0) {
463 						prom_printf("\tresuming "
464 							"%s@%s (aka %s)\n",
465 							d_name, d_info,
466 							d_alias);
467 					} else {
468 						prom_printf("\tresuming "
469 							"%s@%s\n",
470 							d_name, d_info);
471 					}
472 				} else {
473 					prom_printf("\tresuming %s@%s\n",
474 						bn, d_info);
475 				}
476 
477 				if (devi_attach(dip, DDI_RESUME) !=
478 							DDI_SUCCESS) {
479 					/*
480 					 * Print a console warning,
481 					 * set an e_code of ESBD_RESUME,
482 					 * and save the driver major
483 					 * number in the e_rsc.
484 					 */
485 					prom_printf("\tFAILED to resume %s@%s",
486 					    d_name[0] ? d_name : bn, d_info);
487 
488 					srh->sr_err_idx =
489 						dr_add_int(srh->sr_err_ints,
490 						srh->sr_err_idx, DR_MAX_ERR_INT,
491 						(uint64_t)major);
492 
493 					handle = srh->sr_dr_handlep;
494 
495 					dr_op_err(CE_IGNORE, handle,
496 					    ESBD_RESUME, "%s@%s",
497 					    d_name[0] ? d_name : bn, d_info);
498 				}
499 			}
500 		}
501 
502 		/* Hold parent busy while walking its children */
503 		ndi_devi_enter(dip, &circ);
504 		dr_resume_devices(ddi_get_child(dip), srh);
505 		ndi_devi_exit(dip, circ);
506 		last = dip;
507 	}
508 }
509 
510 /*
511  * True if thread is virtually stopped.  Similar to CPR_VSTOPPED
512  * but from DR point of view.  These user threads are waiting in
513  * the kernel.  Once they complete in the kernel, they will process
514  * the stop signal and stop.
515  */
516 #define	DR_VSTOPPED(t)			\
517 	((t)->t_state == TS_SLEEP &&	\
518 	(t)->t_wchan != NULL &&		\
519 	(t)->t_astflag &&		\
520 	((t)->t_proc_flag & TP_CHKPT))
521 
522 /* ARGSUSED */
523 static int
524 dr_stop_user_threads(dr_sr_handle_t *srh)
525 {
526 	int		count;
527 	int		bailout;
528 	dr_handle_t	*handle = srh->sr_dr_handlep;
529 	static fn_t	f = "dr_stop_user_threads";
530 	kthread_id_t 	tp;
531 
532 	extern void add_one_utstop();
533 	extern void utstop_timedwait(clock_t);
534 	extern void utstop_init(void);
535 
536 #define	DR_UTSTOP_RETRY	4
537 #define	DR_UTSTOP_WAIT	hz
538 
539 	if (dr_skip_user_threads)
540 		return (DDI_SUCCESS);
541 
542 	utstop_init();
543 
544 	/* we need to try a few times to get past fork, etc. */
545 	srh->sr_err_idx = 0;
546 	for (count = 0; count < DR_UTSTOP_RETRY; count++) {
547 		/* walk the entire threadlist */
548 		mutex_enter(&pidlock);
549 		for (tp = curthread->t_next; tp != curthread; tp = tp->t_next) {
550 			proc_t *p = ttoproc(tp);
551 
552 			/* handle kernel threads separately */
553 			if (p->p_as == &kas || p->p_stat == SZOMB)
554 				continue;
555 
556 			mutex_enter(&p->p_lock);
557 			thread_lock(tp);
558 
559 			if (tp->t_state == TS_STOPPED) {
560 				/* add another reason to stop this thread */
561 				tp->t_schedflag &= ~TS_RESUME;
562 			} else {
563 				tp->t_proc_flag |= TP_CHKPT;
564 
565 				thread_unlock(tp);
566 				mutex_exit(&p->p_lock);
567 				add_one_utstop();
568 				mutex_enter(&p->p_lock);
569 				thread_lock(tp);
570 
571 				aston(tp);
572 
573 				if (tp->t_state == TS_SLEEP &&
574 				    (tp->t_flag & T_WAKEABLE)) {
575 					setrun_locked(tp);
576 				}
577 
578 			}
579 
580 			/* grab thread if needed */
581 			if (tp->t_state == TS_ONPROC && tp->t_cpu != CPU)
582 				poke_cpu(tp->t_cpu->cpu_id);
583 
584 
585 			thread_unlock(tp);
586 			mutex_exit(&p->p_lock);
587 		}
588 		mutex_exit(&pidlock);
589 
590 
591 		/* let everything catch up */
592 		utstop_timedwait(count * count * DR_UTSTOP_WAIT);
593 
594 
595 		/* now, walk the threadlist again to see if we are done */
596 		mutex_enter(&pidlock);
597 		for (tp = curthread->t_next, bailout = 0;
598 		    tp != curthread; tp = tp->t_next) {
599 			proc_t *p = ttoproc(tp);
600 
601 			/* handle kernel threads separately */
602 			if (p->p_as == &kas || p->p_stat == SZOMB)
603 				continue;
604 
605 			/*
606 			 * If this thread didn't stop, and we don't allow
607 			 * unstopped blocked threads, bail.
608 			 */
609 			thread_lock(tp);
610 			if (!CPR_ISTOPPED(tp) &&
611 			    !(dr_allow_blocked_threads &&
612 			    DR_VSTOPPED(tp))) {
613 				bailout = 1;
614 				if (count == DR_UTSTOP_RETRY - 1) {
615 					/*
616 					 * save the pid for later reporting
617 					 */
618 					srh->sr_err_idx =
619 					    dr_add_int(srh->sr_err_ints,
620 					    srh->sr_err_idx, DR_MAX_ERR_INT,
621 					    (uint64_t)p->p_pid);
622 
623 					cmn_err(CE_WARN, "%s: "
624 					    "failed to stop thread: "
625 					    "process=%s, pid=%d",
626 					    f, p->p_user.u_psargs, p->p_pid);
627 
628 					PR_QR("%s: failed to stop thread: "
629 					    "process=%s, pid=%d, t_id=0x%lx, "
630 					    "t_state=0x%x, t_proc_flag=0x%x, "
631 					    "t_schedflag=0x%x\n",
632 					    f, p->p_user.u_psargs, p->p_pid,
633 					    tp, tp->t_state, tp->t_proc_flag,
634 					    tp->t_schedflag);
635 				}
636 
637 			}
638 			thread_unlock(tp);
639 		}
640 		mutex_exit(&pidlock);
641 
642 		/* were all the threads stopped? */
643 		if (!bailout)
644 			break;
645 	}
646 
647 	/* were we unable to stop all threads after a few tries? */
648 	if (bailout) {
649 		handle->h_err = drerr_int(ESBD_UTHREAD, srh->sr_err_ints,
650 			srh->sr_err_idx, 0);
651 		return (ESRCH);
652 	}
653 
654 	return (DDI_SUCCESS);
655 }
656 
657 static int
658 dr_stop_kernel_threads(dr_handle_t *handle)
659 {
660 	caddr_t		name;
661 	kthread_id_t	tp;
662 
663 	if (dr_skip_kernel_threads) {
664 		return (DDI_SUCCESS);
665 	}
666 
667 	/*
668 	 * Note: we unlock the table in resume.
669 	 * We need to lock the callback table only if we are actually
670 	 * suspending kernel threads.
671 	 */
672 	callb_lock_table();
673 	name = callb_execute_class(CB_CL_CPR_DAEMON, CB_CODE_CPR_CHKPT);
674 	if (name != NULL) {
675 		dr_op_err(CE_IGNORE, handle, ESBD_KTHREAD, name);
676 		return (EBUSY);
677 	}
678 
679 	/*
680 	 * Verify that all threads are accounted for
681 	 */
682 	mutex_enter(&pidlock);
683 	for (tp = curthread->t_next; tp != curthread; tp = tp->t_next) {
684 		proc_t	*p = ttoproc(tp);
685 
686 		if (p->p_as != &kas)
687 			continue;
688 
689 		if (tp->t_flag & T_INTR_THREAD)
690 			continue;
691 
692 		if (!callb_is_stopped(tp, &name)) {
693 			mutex_exit(&pidlock);
694 			dr_op_err(CE_IGNORE, handle, ESBD_KTHREAD, name);
695 			return (EBUSY);
696 		}
697 	}
698 
699 	mutex_exit(&pidlock);
700 	return (DDI_SUCCESS);
701 }
702 
703 static void
704 dr_start_user_threads(void)
705 {
706 	kthread_id_t tp;
707 
708 	mutex_enter(&pidlock);
709 
710 	/* walk all threads and release them */
711 	for (tp = curthread->t_next; tp != curthread; tp = tp->t_next) {
712 		proc_t *p = ttoproc(tp);
713 
714 		/* skip kernel threads */
715 		if (ttoproc(tp)->p_as == &kas)
716 			continue;
717 
718 		mutex_enter(&p->p_lock);
719 		tp->t_proc_flag &= ~TP_CHKPT;
720 		mutex_exit(&p->p_lock);
721 
722 		thread_lock(tp);
723 		if (CPR_ISTOPPED(tp)) {
724 			/* back on the runq */
725 			tp->t_schedflag |= TS_RESUME;
726 			setrun_locked(tp);
727 		}
728 		thread_unlock(tp);
729 	}
730 
731 	mutex_exit(&pidlock);
732 }
733 
734 static void
735 dr_signal_user(int sig)
736 {
737 	struct proc *p;
738 
739 	mutex_enter(&pidlock);
740 
741 	for (p = practive; p != NULL; p = p->p_next) {
742 		/* only user threads */
743 		if (p->p_exec == NULL || p->p_stat == SZOMB ||
744 		    p == proc_init || p == ttoproc(curthread))
745 			continue;
746 
747 		mutex_enter(&p->p_lock);
748 		sigtoproc(p, NULL, sig);
749 		mutex_exit(&p->p_lock);
750 	}
751 
752 	mutex_exit(&pidlock);
753 
754 	/* add a bit of delay */
755 	delay(hz);
756 }
757 
758 void
759 dr_resume(dr_sr_handle_t *srh)
760 {
761 	dr_handle_t	*handle;
762 
763 	handle = srh->sr_dr_handlep;
764 
765 	if (srh->sr_suspend_state < DR_SRSTATE_FULL) {
766 		/*
767 		 * Update the signature block.
768 		 * If cpus are not paused, this can be done now.
769 		 * See comments below.
770 		 */
771 		CPU_SIGNATURE(OS_SIG, SIGST_RESUME_INPROGRESS, SIGSUBST_NULL,
772 		    CPU->cpu_id);
773 	}
774 
775 	switch (srh->sr_suspend_state) {
776 	case DR_SRSTATE_FULL:
777 
778 		ASSERT(MUTEX_HELD(&cpu_lock));
779 
780 		dr_enable_intr(); 	/* enable intr & clock */
781 
782 		start_cpus();
783 		mutex_exit(&cpu_lock);
784 
785 		/*
786 		 * Update the signature block.
787 		 * This must not be done while cpus are paused, since on
788 		 * Starcat the cpu signature update aquires an adaptive
789 		 * mutex in the iosram driver. Blocking with cpus paused
790 		 * can lead to deadlock.
791 		 */
792 		CPU_SIGNATURE(OS_SIG, SIGST_RESUME_INPROGRESS, SIGSUBST_NULL,
793 		    CPU->cpu_id);
794 
795 		/*
796 		 * If we suspended hw watchdog at suspend,
797 		 * re-enable it now.
798 		 */
799 
800 		if (srh->sr_flags & (SR_FLAG_WATCHDOG)) {
801 			mutex_enter(&tod_lock);
802 			tod_ops.tod_set_watchdog_timer(
803 				watchdog_timeout_seconds);
804 			mutex_exit(&tod_lock);
805 		}
806 
807 		/*
808 		 * This should only be called if drmach_suspend_last()
809 		 * was called and state transitioned to DR_SRSTATE_FULL
810 		 * to prevent resume attempts on device instances that
811 		 * were not previously suspended.
812 		 */
813 		drmach_resume_first();
814 
815 		/* FALLTHROUGH */
816 
817 	case DR_SRSTATE_DRIVER:
818 		/*
819 		 * resume drivers
820 		 */
821 		srh->sr_err_idx = 0;
822 
823 		/* no parent dip to hold busy */
824 		dr_resume_devices(ddi_root_node(), srh);
825 
826 		if (srh->sr_err_idx && srh->sr_dr_handlep) {
827 			(srh->sr_dr_handlep)->h_err = drerr_int(ESBD_RESUME,
828 				srh->sr_err_ints, srh->sr_err_idx, 1);
829 		}
830 
831 		/*
832 		 * resume the lock manager
833 		 */
834 		lm_cprresume();
835 
836 		/* FALLTHROUGH */
837 
838 	case DR_SRSTATE_DAEMON:
839 		/*
840 		 * resume kernel daemons
841 		 */
842 		if (!dr_skip_kernel_threads) {
843 			prom_printf("DR: resuming kernel daemons...\n");
844 			(void) callb_execute_class(CB_CL_CPR_DAEMON,
845 				CB_CODE_CPR_RESUME);
846 			callb_unlock_table();
847 		}
848 
849 		/* FALLTHROUGH */
850 
851 	case DR_SRSTATE_USER:
852 		/*
853 		 * finally, resume user threads
854 		 */
855 		if (!dr_skip_user_threads) {
856 			prom_printf("DR: resuming user threads...\n");
857 			dr_start_user_threads();
858 		}
859 		/* FALLTHROUGH */
860 
861 	case DR_SRSTATE_BEGIN:
862 	default:
863 		/*
864 		 * let those who care know that we've just resumed
865 		 */
866 		PR_QR("sending SIGTHAW...\n");
867 		dr_signal_user(SIGTHAW);
868 		break;
869 	}
870 
871 	i_ndi_allow_device_tree_changes(handle->h_ndi);
872 
873 	/*
874 	 * update the signature block
875 	 */
876 	CPU_SIGNATURE(OS_SIG, SIGST_RUN, SIGSUBST_NULL, CPU->cpu_id);
877 
878 	prom_printf("DR: resume COMPLETED\n");
879 }
880 
881 int
882 dr_suspend(dr_sr_handle_t *srh)
883 {
884 	dr_handle_t	*handle;
885 	int		force;
886 	int		dev_errs_idx;
887 	uint64_t	dev_errs[DR_MAX_ERR_INT];
888 	int		rc = DDI_SUCCESS;
889 
890 	handle = srh->sr_dr_handlep;
891 
892 	force = dr_cmd_flags(handle) & SBD_FLAG_FORCE;
893 
894 	/*
895 	 * update the signature block
896 	 */
897 	CPU_SIGNATURE(OS_SIG, SIGST_QUIESCE_INPROGRESS, SIGSUBST_NULL,
898 	    CPU->cpu_id);
899 
900 	i_ndi_block_device_tree_changes(&handle->h_ndi);
901 
902 	prom_printf("\nDR: suspending user threads...\n");
903 	srh->sr_suspend_state = DR_SRSTATE_USER;
904 	if (((rc = dr_stop_user_threads(srh)) != DDI_SUCCESS) &&
905 	    dr_check_user_stop_result) {
906 		dr_resume(srh);
907 		return (rc);
908 	}
909 
910 	if (!force) {
911 		struct dr_ref drc = {0};
912 
913 		prom_printf("\nDR: checking devices...\n");
914 		dev_errs_idx = 0;
915 
916 		drc.arr = dev_errs;
917 		drc.idx = &dev_errs_idx;
918 		drc.len = DR_MAX_ERR_INT;
919 
920 		/*
921 		 * Since the root node can never go away, it
922 		 * doesn't have to be held.
923 		 */
924 		ddi_walk_devs(ddi_root_node(), dr_check_unsafe_major, &drc);
925 		if (dev_errs_idx) {
926 			handle->h_err = drerr_int(ESBD_UNSAFE, dev_errs,
927 				dev_errs_idx, 1);
928 			dr_resume(srh);
929 			return (DDI_FAILURE);
930 		}
931 		PR_QR("done\n");
932 	} else {
933 		prom_printf("\nDR: dr_suspend invoked with force flag\n");
934 	}
935 
936 	/*
937 	 * now stop daemon activities
938 	 */
939 	prom_printf("DR: suspending kernel daemons...\n");
940 	srh->sr_suspend_state = DR_SRSTATE_DAEMON;
941 	if ((rc = dr_stop_kernel_threads(handle)) != DDI_SUCCESS) {
942 		dr_resume(srh);
943 		return (rc);
944 	}
945 
946 #ifndef	SKIP_SYNC
947 	/*
948 	 * This sync swap out all user pages
949 	 */
950 	vfs_sync(SYNC_ALL);
951 #endif
952 
953 	/*
954 	 * special treatment for lock manager
955 	 */
956 	lm_cprsuspend();
957 
958 #ifndef	SKIP_SYNC
959 	/*
960 	 * sync the file system in case we never make it back
961 	 */
962 	sync();
963 #endif
964 
965 	/*
966 	 * now suspend drivers
967 	 */
968 	prom_printf("DR: suspending drivers...\n");
969 	srh->sr_suspend_state = DR_SRSTATE_DRIVER;
970 	srh->sr_err_idx = 0;
971 	/* No parent to hold busy */
972 	if ((rc = dr_suspend_devices(ddi_root_node(), srh)) != DDI_SUCCESS) {
973 		if (srh->sr_err_idx && srh->sr_dr_handlep) {
974 			(srh->sr_dr_handlep)->h_err = drerr_int(ESBD_SUSPEND,
975 				srh->sr_err_ints, srh->sr_err_idx, 1);
976 		}
977 		dr_resume(srh);
978 		return (rc);
979 	}
980 
981 	drmach_suspend_last();
982 
983 	/*
984 	 * finally, grab all cpus
985 	 */
986 	srh->sr_suspend_state = DR_SRSTATE_FULL;
987 
988 	/*
989 	 * if watchdog was activated, disable it
990 	 */
991 	if (watchdog_activated) {
992 		mutex_enter(&tod_lock);
993 		tod_ops.tod_clear_watchdog_timer();
994 		mutex_exit(&tod_lock);
995 		srh->sr_flags |= SR_FLAG_WATCHDOG;
996 	} else {
997 		srh->sr_flags &= ~(SR_FLAG_WATCHDOG);
998 	}
999 
1000 	/*
1001 	 * Update the signature block.
1002 	 * This must be done before cpus are paused, since on Starcat the
1003 	 * cpu signature update aquires an adaptive mutex in the iosram driver.
1004 	 * Blocking with cpus paused can lead to deadlock.
1005 	 */
1006 	CPU_SIGNATURE(OS_SIG, SIGST_QUIESCED, SIGSUBST_NULL, CPU->cpu_id);
1007 
1008 	mutex_enter(&cpu_lock);
1009 	pause_cpus(NULL);
1010 	dr_stop_intr();
1011 
1012 	return (rc);
1013 }
1014 
1015 int
1016 dr_pt_test_suspend(dr_handle_t *hp)
1017 {
1018 	dr_sr_handle_t *srh;
1019 	int		err;
1020 	uint_t		psmerr;
1021 	static fn_t	f = "dr_pt_test_suspend";
1022 
1023 	PR_QR("%s...\n", f);
1024 
1025 	srh = dr_get_sr_handle(hp);
1026 	if ((err = dr_suspend(srh)) == DDI_SUCCESS) {
1027 		dr_resume(srh);
1028 		if ((hp->h_err) && ((psmerr = hp->h_err->e_code) != 0)) {
1029 			PR_QR("%s: error on dr_resume()", f);
1030 			switch (psmerr) {
1031 			case ESBD_RESUME:
1032 				PR_QR("Couldn't resume devices: %s\n",
1033 					DR_GET_E_RSC(hp->h_err));
1034 				break;
1035 
1036 			case ESBD_KTHREAD:
1037 				PR_ALL("psmerr is ESBD_KTHREAD\n");
1038 				break;
1039 			default:
1040 				PR_ALL("Resume error unknown = %d\n",
1041 					psmerr);
1042 				break;
1043 			}
1044 		}
1045 	} else {
1046 		PR_ALL("%s: dr_suspend() failed, err = 0x%x\n",
1047 			f, err);
1048 		psmerr = hp->h_err ? hp->h_err->e_code : ESBD_NOERROR;
1049 		switch (psmerr) {
1050 		case ESBD_UNSAFE:
1051 			PR_ALL("Unsafe devices (major #): %s\n",
1052 				DR_GET_E_RSC(hp->h_err));
1053 			break;
1054 
1055 		case ESBD_RTTHREAD:
1056 			PR_ALL("RT threads (PIDs): %s\n",
1057 				DR_GET_E_RSC(hp->h_err));
1058 			break;
1059 
1060 		case ESBD_UTHREAD:
1061 			PR_ALL("User threads (PIDs): %s\n",
1062 				DR_GET_E_RSC(hp->h_err));
1063 			break;
1064 
1065 		case ESBD_SUSPEND:
1066 			PR_ALL("Non-suspendable devices (major #): %s\n",
1067 				DR_GET_E_RSC(hp->h_err));
1068 			break;
1069 
1070 		case ESBD_RESUME:
1071 			PR_ALL("Could not resume devices (major #): %s\n",
1072 				DR_GET_E_RSC(hp->h_err));
1073 			break;
1074 
1075 		case ESBD_KTHREAD:
1076 			PR_ALL("psmerr is ESBD_KTHREAD\n");
1077 			break;
1078 
1079 		case ESBD_NOERROR:
1080 			PR_ALL("sbd_error_t error code not set\n");
1081 			break;
1082 
1083 		default:
1084 			PR_ALL("Unknown error psmerr = %d\n", psmerr);
1085 			break;
1086 		}
1087 	}
1088 	dr_release_sr_handle(srh);
1089 
1090 	return (0);
1091 }
1092 
1093 /*
1094  * Add a new integer value to the end of an array.  Don't allow duplicates to
1095  * appear in the array, and don't allow the array to overflow.  Return the new
1096  * total number of entries in the array.
1097  */
1098 static int
1099 dr_add_int(uint64_t *arr, int idx, int len, uint64_t val)
1100 {
1101 	int i;
1102 
1103 	if (arr == NULL)
1104 		return (0);
1105 
1106 	if (idx >= len)
1107 		return (idx);
1108 
1109 	for (i = 0; i < idx; i++) {
1110 		if (arr[i] == val)
1111 			return (idx);
1112 	}
1113 
1114 	arr[idx++] = val;
1115 
1116 	return (idx);
1117 }
1118 
1119 /*
1120  * Construct an sbd_error_t featuring a string representation of an array of
1121  * integers as its e_rsc.
1122  */
1123 static sbd_error_t *
1124 drerr_int(int e_code, uint64_t *arr, int idx, int majors)
1125 {
1126 	int		i, n, buf_len, buf_idx, buf_avail;
1127 	char		*dname;
1128 	char		*buf;
1129 	sbd_error_t	*new_sbd_err;
1130 	static char	s_ellipsis[] = "...";
1131 
1132 	if (arr == NULL || idx <= 0)
1133 		return (NULL);
1134 
1135 	/* MAXPATHLEN is the size of the e_rsc field in sbd_error_t. */
1136 	buf = (char *)kmem_zalloc(MAXPATHLEN, KM_SLEEP);
1137 
1138 	/*
1139 	 * This is the total working area of the buffer.  It must be computed
1140 	 * as the size of 'buf', minus reserved space for the null terminator
1141 	 * and the ellipsis string.
1142 	 */
1143 	buf_len = MAXPATHLEN - (strlen(s_ellipsis) + 1);
1144 
1145 	/* Construct a string representation of the array values */
1146 	for (buf_idx = 0, i = 0; i < idx; i++) {
1147 		buf_avail = buf_len - buf_idx;
1148 		if (majors) {
1149 			dname = ddi_major_to_name(arr[i]);
1150 			if (dname) {
1151 				n = snprintf(&buf[buf_idx], buf_avail,
1152 					"%s, ", dname);
1153 			} else {
1154 				n = snprintf(&buf[buf_idx], buf_avail,
1155 					"major %llu, ", arr[i]);
1156 			}
1157 		} else {
1158 			n = snprintf(&buf[buf_idx], buf_avail, "%llu, ",
1159 				arr[i]);
1160 		}
1161 
1162 		/* An ellipsis gets appended when no more values fit */
1163 		if (n >= buf_avail) {
1164 			(void) strcpy(&buf[buf_idx], s_ellipsis);
1165 			break;
1166 		}
1167 
1168 		buf_idx += n;
1169 	}
1170 
1171 	/* If all the contents fit, remove the trailing comma */
1172 	if (n < buf_avail) {
1173 		buf[--buf_idx] = '\0';
1174 		buf[--buf_idx] = '\0';
1175 	}
1176 
1177 	/* Return an sbd_error_t with the buffer and e_code */
1178 	new_sbd_err = drerr_new(1, e_code, buf);
1179 	kmem_free(buf, MAXPATHLEN);
1180 	return (new_sbd_err);
1181 }
1182