xref: /illumos-gate/usr/src/uts/sun4u/ngdr/io/dr_quiesce.c (revision df3cd224ef765c29101e4110546062199562f757)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*
28  * A CPR derivative specifically for starfire/starcat
29  */
30 
31 #include <sys/types.h>
32 #include <sys/systm.h>
33 #include <sys/machparam.h>
34 #include <sys/machsystm.h>
35 #include <sys/ddi.h>
36 #define	SUNDDI_IMPL
37 #include <sys/sunddi.h>
38 #include <sys/sunndi.h>
39 #include <sys/devctl.h>
40 #include <sys/time.h>
41 #include <sys/kmem.h>
42 #include <nfs/lm.h>
43 #include <sys/ddi_impldefs.h>
44 #include <sys/ndi_impldefs.h>
45 #include <sys/obpdefs.h>
46 #include <sys/cmn_err.h>
47 #include <sys/debug.h>
48 #include <sys/errno.h>
49 #include <sys/callb.h>
50 #include <sys/clock.h>
51 #include <sys/x_call.h>
52 #include <sys/cpuvar.h>
53 #include <sys/epm.h>
54 #include <sys/vfs.h>
55 
56 #include <sys/cpu_sgnblk_defs.h>
57 #include <sys/dr.h>
58 #include <sys/dr_util.h>
59 
60 #include <sys/promif.h>
61 #include <sys/conf.h>
62 #include <sys/cyclic.h>
63 
64 extern void	e_ddi_enter_driver_list(struct devnames *dnp, int *listcnt);
65 extern void	e_ddi_exit_driver_list(struct devnames *dnp, int listcnt);
66 extern int	is_pseudo_device(dev_info_t *dip);
67 
68 extern kmutex_t	cpu_lock;
69 extern dr_unsafe_devs_t dr_unsafe_devs;
70 
71 static int		dr_is_real_device(dev_info_t *dip);
72 static int		dr_is_unsafe_major(major_t major);
73 static int		dr_bypass_device(char *dname);
74 static int		dr_check_dip(dev_info_t *dip, void *arg, uint_t ref);
75 static int		dr_resolve_devname(dev_info_t *dip, char *buffer,
76 				char *alias);
77 static sbd_error_t	*drerr_int(int e_code, uint64_t *arr, int idx,
78 				int majors);
79 static int		dr_add_int(uint64_t *arr, int idx, int len,
80 				uint64_t val);
81 
82 int dr_pt_test_suspend(dr_handle_t *hp);
83 
84 /*
85  * dr_quiesce.c interface
86  * NOTE: states used internally by dr_suspend and dr_resume
87  */
88 typedef enum dr_suspend_state {
89 	DR_SRSTATE_BEGIN = 0,
90 	DR_SRSTATE_USER,
91 	DR_SRSTATE_DRIVER,
92 	DR_SRSTATE_FULL
93 } suspend_state_t;
94 
95 struct dr_sr_handle {
96 	dr_handle_t		*sr_dr_handlep;
97 	dev_info_t		*sr_failed_dip;
98 	suspend_state_t		sr_suspend_state;
99 	uint_t			sr_flags;
100 	uint64_t		sr_err_ints[DR_MAX_ERR_INT];
101 	int			sr_err_idx;
102 };
103 
104 #define	SR_FLAG_WATCHDOG	0x1
105 
106 /*
107  * XXX
108  * This hack will go away before RTI.  Just for testing.
109  * List of drivers to bypass when performing a suspend.
110  */
111 static char *dr_bypass_list[] = {
112 	""
113 };
114 
115 
116 #define		SKIP_SYNC	/* bypass sync ops in dr_suspend */
117 
118 /*
119  * dr_skip_user_threads is used to control if user threads should
120  * be suspended.  If dr_skip_user_threads is true, the rest of the
121  * flags are not used; if it is false, dr_check_user_stop_result
122  * will be used to control whether or not we need to check suspend
123  * result, and dr_allow_blocked_threads will be used to control
124  * whether or not we allow suspend to continue if there are blocked
125  * threads.  We allow all combinations of dr_check_user_stop_result
126  * and dr_allow_block_threads, even though it might not make much
127  * sense to not allow block threads when we don't even check stop
128  * result.
129  */
130 static int	dr_skip_user_threads = 0;	/* default to FALSE */
131 static int	dr_check_user_stop_result = 1;	/* default to TRUE */
132 static int	dr_allow_blocked_threads = 1;	/* default to TRUE */
133 
134 #define	DR_CPU_LOOP_MSEC	1000
135 
136 static void
137 dr_stop_intr(void)
138 {
139 	ASSERT(MUTEX_HELD(&cpu_lock));
140 
141 	kpreempt_disable();
142 	cyclic_suspend();
143 }
144 
145 static void
146 dr_enable_intr(void)
147 {
148 	ASSERT(MUTEX_HELD(&cpu_lock));
149 
150 	cyclic_resume();
151 	kpreempt_enable();
152 }
153 
154 dr_sr_handle_t *
155 dr_get_sr_handle(dr_handle_t *hp)
156 {
157 	dr_sr_handle_t *srh;
158 
159 	srh = GETSTRUCT(dr_sr_handle_t, 1);
160 	srh->sr_dr_handlep = hp;
161 
162 	return (srh);
163 }
164 
165 void
166 dr_release_sr_handle(dr_sr_handle_t *srh)
167 {
168 	ASSERT(srh->sr_failed_dip == NULL);
169 	FREESTRUCT(srh, dr_sr_handle_t, 1);
170 }
171 
172 static int
173 dr_is_real_device(dev_info_t *dip)
174 {
175 	struct regspec *regbuf = NULL;
176 	int length = 0;
177 	int rc;
178 
179 	if (ddi_get_driver(dip) == NULL)
180 		return (0);
181 
182 	if (DEVI(dip)->devi_pm_flags & (PMC_NEEDS_SR|PMC_PARENTAL_SR))
183 		return (1);
184 	if (DEVI(dip)->devi_pm_flags & PMC_NO_SR)
185 		return (0);
186 
187 	/*
188 	 * now the general case
189 	 */
190 	rc = ddi_getlongprop(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, "reg",
191 	    (caddr_t)&regbuf, &length);
192 	ASSERT(rc != DDI_PROP_NO_MEMORY);
193 	if (rc != DDI_PROP_SUCCESS) {
194 		return (0);
195 	} else {
196 		if ((length > 0) && (regbuf != NULL))
197 			kmem_free(regbuf, length);
198 		return (1);
199 	}
200 }
201 
202 static int
203 dr_is_unsafe_major(major_t major)
204 {
205 	char    *dname, **cpp;
206 	int	i, ndevs;
207 
208 	if ((dname = ddi_major_to_name(major)) == NULL) {
209 		PR_QR("dr_is_unsafe_major: invalid major # %d\n", major);
210 		return (0);
211 	}
212 
213 	ndevs = dr_unsafe_devs.ndevs;
214 	for (i = 0, cpp = dr_unsafe_devs.devnames; i < ndevs; i++) {
215 		if (strcmp(dname, *cpp++) == 0)
216 			return (1);
217 	}
218 	return (0);
219 }
220 
221 static int
222 dr_bypass_device(char *dname)
223 {
224 	int i;
225 	char **lname;
226 	/* check the bypass list */
227 	for (i = 0, lname = &dr_bypass_list[i]; **lname != '\0'; lname++) {
228 		if (strcmp(dname, dr_bypass_list[i++]) == 0)
229 			return (1);
230 	}
231 	return (0);
232 }
233 
234 static int
235 dr_resolve_devname(dev_info_t *dip, char *buffer, char *alias)
236 {
237 	major_t	devmajor;
238 	char	*aka, *name;
239 
240 	*buffer = *alias = 0;
241 
242 	if (dip == NULL)
243 		return (-1);
244 
245 	if ((name = ddi_get_name(dip)) == NULL)
246 		name = "<null name>";
247 
248 	aka = name;
249 
250 	if ((devmajor = ddi_name_to_major(aka)) != -1)
251 		aka = ddi_major_to_name(devmajor);
252 
253 	(void) strcpy(buffer, name);
254 
255 	if (strcmp(name, aka))
256 		(void) strcpy(alias, aka);
257 	else
258 		*alias = 0;
259 
260 	return (0);
261 }
262 
263 struct dr_ref {
264 	int		*refcount;
265 	int		*refcount_non_gldv3;
266 	uint64_t	*arr;
267 	int		*idx;
268 	int		len;
269 };
270 
271 /* ARGSUSED */
272 static int
273 dr_check_dip(dev_info_t *dip, void *arg, uint_t ref)
274 {
275 	major_t		major;
276 	char		*dname;
277 	struct dr_ref	*rp = (struct dr_ref *)arg;
278 
279 	if (dip == NULL)
280 		return (DDI_WALK_CONTINUE);
281 
282 	if (!dr_is_real_device(dip))
283 		return (DDI_WALK_CONTINUE);
284 
285 	dname = ddi_binding_name(dip);
286 
287 	if (dr_bypass_device(dname))
288 		return (DDI_WALK_CONTINUE);
289 
290 	if (dname && ((major = ddi_name_to_major(dname)) != (major_t)-1)) {
291 		if (ref && rp->refcount) {
292 			*rp->refcount += ref;
293 			PR_QR("\n  %s (major# %d) is referenced(%u)\n",
294 			    dname, major, ref);
295 		}
296 		if (ref && rp->refcount_non_gldv3) {
297 			if (NETWORK_PHYSDRV(major) && !GLDV3_DRV(major))
298 				*rp->refcount_non_gldv3 += ref;
299 		}
300 		if (dr_is_unsafe_major(major) && i_ddi_devi_attached(dip)) {
301 			PR_QR("\n  %s (major# %d) not hotpluggable\n",
302 			    dname, major);
303 			if (rp->arr != NULL && rp->idx != NULL)
304 				*rp->idx = dr_add_int(rp->arr, *rp->idx,
305 				    rp->len, (uint64_t)major);
306 		}
307 	}
308 	return (DDI_WALK_CONTINUE);
309 }
310 
311 static int
312 dr_check_unsafe_major(dev_info_t *dip, void *arg)
313 {
314 	return (dr_check_dip(dip, arg, 0));
315 }
316 
317 
318 /*ARGSUSED*/
319 void
320 dr_check_devices(dev_info_t *dip, int *refcount, dr_handle_t *handle,
321     uint64_t *arr, int *idx, int len, int *refcount_non_gldv3)
322 {
323 	struct dr_ref bref = {0};
324 
325 	if (dip == NULL)
326 		return;
327 
328 	bref.refcount = refcount;
329 	bref.refcount_non_gldv3 = refcount_non_gldv3;
330 	bref.arr = arr;
331 	bref.idx = idx;
332 	bref.len = len;
333 
334 	ASSERT(e_ddi_branch_held(dip));
335 	(void) e_ddi_branch_referenced(dip, dr_check_dip, &bref);
336 }
337 
338 /*
339  * The "dip" argument's parent (if it exists) must be held busy.
340  */
341 static int
342 dr_suspend_devices(dev_info_t *dip, dr_sr_handle_t *srh)
343 {
344 	dr_handle_t	*handle;
345 	major_t		major;
346 	char		*dname;
347 	int		circ;
348 
349 	/*
350 	 * If dip is the root node, it has no siblings and it is
351 	 * always held. If dip is not the root node, dr_suspend_devices()
352 	 * will be invoked with the parent held busy.
353 	 */
354 	for (; dip != NULL; dip = ddi_get_next_sibling(dip)) {
355 		char	d_name[40], d_alias[40], *d_info;
356 
357 		ndi_devi_enter(dip, &circ);
358 		if (dr_suspend_devices(ddi_get_child(dip), srh)) {
359 			ndi_devi_exit(dip, circ);
360 			return (ENXIO);
361 		}
362 		ndi_devi_exit(dip, circ);
363 
364 		if (!dr_is_real_device(dip))
365 			continue;
366 
367 		major = (major_t)-1;
368 		if ((dname = ddi_binding_name(dip)) != NULL)
369 			major = ddi_name_to_major(dname);
370 
371 		if (dr_bypass_device(dname)) {
372 			PR_QR(" bypassed suspend of %s (major# %d)\n", dname,
373 			    major);
374 			continue;
375 		}
376 
377 		if (drmach_verify_sr(dip, 1)) {
378 			PR_QR(" bypassed suspend of %s (major# %d)\n", dname,
379 			    major);
380 			continue;
381 		}
382 
383 		if ((d_info = ddi_get_name_addr(dip)) == NULL)
384 			d_info = "<null>";
385 
386 		d_name[0] = 0;
387 		if (dr_resolve_devname(dip, d_name, d_alias) == 0) {
388 			if (d_alias[0] != 0) {
389 				prom_printf("\tsuspending %s@%s (aka %s)\n",
390 				    d_name, d_info, d_alias);
391 			} else {
392 				prom_printf("\tsuspending %s@%s\n",
393 				    d_name, d_info);
394 			}
395 		} else {
396 			prom_printf("\tsuspending %s@%s\n", dname, d_info);
397 		}
398 
399 		if (devi_detach(dip, DDI_SUSPEND) != DDI_SUCCESS) {
400 			prom_printf("\tFAILED to suspend %s@%s\n",
401 			    d_name[0] ? d_name : dname, d_info);
402 
403 			srh->sr_err_idx = dr_add_int(srh->sr_err_ints,
404 			    srh->sr_err_idx, DR_MAX_ERR_INT, (uint64_t)major);
405 
406 			ndi_hold_devi(dip);
407 			srh->sr_failed_dip = dip;
408 
409 			handle = srh->sr_dr_handlep;
410 			dr_op_err(CE_IGNORE, handle, ESBD_SUSPEND, "%s@%s",
411 			    d_name[0] ? d_name : dname, d_info);
412 
413 			return (DDI_FAILURE);
414 		}
415 	}
416 
417 	return (DDI_SUCCESS);
418 }
419 
420 static void
421 dr_resume_devices(dev_info_t *start, dr_sr_handle_t *srh)
422 {
423 	dr_handle_t	*handle;
424 	dev_info_t	*dip, *next, *last = NULL;
425 	major_t		major;
426 	char		*bn;
427 	int		circ;
428 
429 	major = (major_t)-1;
430 
431 	/* attach in reverse device tree order */
432 	while (last != start) {
433 		dip = start;
434 		next = ddi_get_next_sibling(dip);
435 		while (next != last && dip != srh->sr_failed_dip) {
436 			dip = next;
437 			next = ddi_get_next_sibling(dip);
438 		}
439 		if (dip == srh->sr_failed_dip) {
440 			/* release hold acquired in dr_suspend_devices() */
441 			srh->sr_failed_dip = NULL;
442 			ndi_rele_devi(dip);
443 		} else
444 		if (dr_is_real_device(dip) && srh->sr_failed_dip == NULL) {
445 
446 			if ((bn = ddi_binding_name(dip)) != NULL) {
447 				major = ddi_name_to_major(bn);
448 			} else {
449 				bn = "<null>";
450 			}
451 			if (!dr_bypass_device(bn) &&
452 			    !drmach_verify_sr(dip, 0)) {
453 				char	d_name[40], d_alias[40], *d_info;
454 
455 				d_name[0] = 0;
456 				d_info = ddi_get_name_addr(dip);
457 				if (d_info == NULL)
458 					d_info = "<null>";
459 
460 				if (!dr_resolve_devname(dip, d_name,
461 				    d_alias)) {
462 					if (d_alias[0] != 0) {
463 						prom_printf("\tresuming "
464 						    "%s@%s (aka %s)\n",
465 						    d_name, d_info, d_alias);
466 					} else {
467 						prom_printf("\tresuming "
468 						    "%s@%s\n", d_name, d_info);
469 					}
470 				} else {
471 					prom_printf("\tresuming %s@%s\n",
472 					    bn, d_info);
473 				}
474 
475 				if (devi_attach(dip, DDI_RESUME) !=
476 				    DDI_SUCCESS) {
477 					/*
478 					 * Print a console warning,
479 					 * set an e_code of ESBD_RESUME,
480 					 * and save the driver major
481 					 * number in the e_rsc.
482 					 */
483 					prom_printf("\tFAILED to resume %s@%s",
484 					    d_name[0] ? d_name : bn, d_info);
485 
486 					srh->sr_err_idx =
487 					    dr_add_int(srh->sr_err_ints,
488 					    srh->sr_err_idx, DR_MAX_ERR_INT,
489 					    (uint64_t)major);
490 
491 					handle = srh->sr_dr_handlep;
492 
493 					dr_op_err(CE_IGNORE, handle,
494 					    ESBD_RESUME, "%s@%s",
495 					    d_name[0] ? d_name : bn, d_info);
496 				}
497 			}
498 		}
499 
500 		/* Hold parent busy while walking its children */
501 		ndi_devi_enter(dip, &circ);
502 		dr_resume_devices(ddi_get_child(dip), srh);
503 		ndi_devi_exit(dip, circ);
504 		last = dip;
505 	}
506 }
507 
508 /*
509  * True if thread is virtually stopped.  Similar to CPR_VSTOPPED
510  * but from DR point of view.  These user threads are waiting in
511  * the kernel.  Once they complete in the kernel, they will process
512  * the stop signal and stop.
513  */
514 #define	DR_VSTOPPED(t)			\
515 	((t)->t_state == TS_SLEEP &&	\
516 	(t)->t_wchan != NULL &&		\
517 	(t)->t_astflag &&		\
518 	((t)->t_proc_flag & TP_CHKPT))
519 
520 /* ARGSUSED */
521 static int
522 dr_stop_user_threads(dr_sr_handle_t *srh)
523 {
524 	int		count;
525 	int		bailout;
526 	dr_handle_t	*handle = srh->sr_dr_handlep;
527 	static fn_t	f = "dr_stop_user_threads";
528 	kthread_id_t 	tp;
529 
530 	extern void add_one_utstop();
531 	extern void utstop_timedwait(clock_t);
532 	extern void utstop_init(void);
533 
534 #define	DR_UTSTOP_RETRY	4
535 #define	DR_UTSTOP_WAIT	hz
536 
537 	if (dr_skip_user_threads)
538 		return (DDI_SUCCESS);
539 
540 	utstop_init();
541 
542 	/* we need to try a few times to get past fork, etc. */
543 	srh->sr_err_idx = 0;
544 	for (count = 0; count < DR_UTSTOP_RETRY; count++) {
545 		/* walk the entire threadlist */
546 		mutex_enter(&pidlock);
547 		for (tp = curthread->t_next; tp != curthread; tp = tp->t_next) {
548 			proc_t *p = ttoproc(tp);
549 
550 			/* handle kernel threads separately */
551 			if (p->p_as == &kas || p->p_stat == SZOMB)
552 				continue;
553 
554 			mutex_enter(&p->p_lock);
555 			thread_lock(tp);
556 
557 			if (tp->t_state == TS_STOPPED) {
558 				/* add another reason to stop this thread */
559 				tp->t_schedflag &= ~TS_RESUME;
560 			} else {
561 				tp->t_proc_flag |= TP_CHKPT;
562 
563 				thread_unlock(tp);
564 				mutex_exit(&p->p_lock);
565 				add_one_utstop();
566 				mutex_enter(&p->p_lock);
567 				thread_lock(tp);
568 
569 				aston(tp);
570 
571 				if (ISWAKEABLE(tp) || ISWAITING(tp)) {
572 					setrun_locked(tp);
573 				}
574 
575 			}
576 
577 			/* grab thread if needed */
578 			if (tp->t_state == TS_ONPROC && tp->t_cpu != CPU)
579 				poke_cpu(tp->t_cpu->cpu_id);
580 
581 
582 			thread_unlock(tp);
583 			mutex_exit(&p->p_lock);
584 		}
585 		mutex_exit(&pidlock);
586 
587 
588 		/* let everything catch up */
589 		utstop_timedwait(count * count * DR_UTSTOP_WAIT);
590 
591 
592 		/* now, walk the threadlist again to see if we are done */
593 		mutex_enter(&pidlock);
594 		for (tp = curthread->t_next, bailout = 0;
595 		    tp != curthread; tp = tp->t_next) {
596 			proc_t *p = ttoproc(tp);
597 
598 			/* handle kernel threads separately */
599 			if (p->p_as == &kas || p->p_stat == SZOMB)
600 				continue;
601 
602 			/*
603 			 * If this thread didn't stop, and we don't allow
604 			 * unstopped blocked threads, bail.
605 			 */
606 			thread_lock(tp);
607 			if (!CPR_ISTOPPED(tp) &&
608 			    !(dr_allow_blocked_threads &&
609 			    DR_VSTOPPED(tp))) {
610 				bailout = 1;
611 				if (count == DR_UTSTOP_RETRY - 1) {
612 					/*
613 					 * save the pid for later reporting
614 					 */
615 					srh->sr_err_idx =
616 					    dr_add_int(srh->sr_err_ints,
617 					    srh->sr_err_idx, DR_MAX_ERR_INT,
618 					    (uint64_t)p->p_pid);
619 
620 					cmn_err(CE_WARN, "%s: "
621 					    "failed to stop thread: "
622 					    "process=%s, pid=%d",
623 					    f, p->p_user.u_psargs, p->p_pid);
624 
625 					PR_QR("%s: failed to stop thread: "
626 					    "process=%s, pid=%d, t_id=0x%p, "
627 					    "t_state=0x%x, t_proc_flag=0x%x, "
628 					    "t_schedflag=0x%x\n",
629 					    f, p->p_user.u_psargs, p->p_pid,
630 					    (void *)tp, tp->t_state,
631 					    tp->t_proc_flag, tp->t_schedflag);
632 				}
633 
634 			}
635 			thread_unlock(tp);
636 		}
637 		mutex_exit(&pidlock);
638 
639 		/* were all the threads stopped? */
640 		if (!bailout)
641 			break;
642 	}
643 
644 	/* were we unable to stop all threads after a few tries? */
645 	if (bailout) {
646 		handle->h_err = drerr_int(ESBD_UTHREAD, srh->sr_err_ints,
647 		    srh->sr_err_idx, 0);
648 		return (ESRCH);
649 	}
650 
651 	return (DDI_SUCCESS);
652 }
653 
654 static void
655 dr_start_user_threads(void)
656 {
657 	kthread_id_t tp;
658 
659 	mutex_enter(&pidlock);
660 
661 	/* walk all threads and release them */
662 	for (tp = curthread->t_next; tp != curthread; tp = tp->t_next) {
663 		proc_t *p = ttoproc(tp);
664 
665 		/* skip kernel threads */
666 		if (ttoproc(tp)->p_as == &kas)
667 			continue;
668 
669 		mutex_enter(&p->p_lock);
670 		tp->t_proc_flag &= ~TP_CHKPT;
671 		mutex_exit(&p->p_lock);
672 
673 		thread_lock(tp);
674 		if (CPR_ISTOPPED(tp)) {
675 			/* back on the runq */
676 			tp->t_schedflag |= TS_RESUME;
677 			setrun_locked(tp);
678 		}
679 		thread_unlock(tp);
680 	}
681 
682 	mutex_exit(&pidlock);
683 }
684 
685 static void
686 dr_signal_user(int sig)
687 {
688 	struct proc *p;
689 
690 	mutex_enter(&pidlock);
691 
692 	for (p = practive; p != NULL; p = p->p_next) {
693 		/* only user threads */
694 		if (p->p_exec == NULL || p->p_stat == SZOMB ||
695 		    p == proc_init || p == ttoproc(curthread))
696 			continue;
697 
698 		mutex_enter(&p->p_lock);
699 		sigtoproc(p, NULL, sig);
700 		mutex_exit(&p->p_lock);
701 	}
702 
703 	mutex_exit(&pidlock);
704 
705 	/* add a bit of delay */
706 	delay(hz);
707 }
708 
709 void
710 dr_resume(dr_sr_handle_t *srh)
711 {
712 	dr_handle_t	*handle;
713 
714 	handle = srh->sr_dr_handlep;
715 
716 	if (srh->sr_suspend_state < DR_SRSTATE_FULL) {
717 		/*
718 		 * Update the signature block.
719 		 * If cpus are not paused, this can be done now.
720 		 * See comments below.
721 		 */
722 		CPU_SIGNATURE(OS_SIG, SIGST_RESUME_INPROGRESS, SIGSUBST_NULL,
723 		    CPU->cpu_id);
724 	}
725 
726 	switch (srh->sr_suspend_state) {
727 	case DR_SRSTATE_FULL:
728 
729 		ASSERT(MUTEX_HELD(&cpu_lock));
730 
731 		/*
732 		 * Prevent false alarm in tod_validate() due to tod
733 		 * value change between suspend and resume
734 		 */
735 		mutex_enter(&tod_lock);
736 		tod_fault_reset();
737 		mutex_exit(&tod_lock);
738 
739 		dr_enable_intr(); 	/* enable intr & clock */
740 
741 		start_cpus();
742 		mutex_exit(&cpu_lock);
743 
744 		/*
745 		 * Update the signature block.
746 		 * This must not be done while cpus are paused, since on
747 		 * Starcat the cpu signature update aquires an adaptive
748 		 * mutex in the iosram driver. Blocking with cpus paused
749 		 * can lead to deadlock.
750 		 */
751 		CPU_SIGNATURE(OS_SIG, SIGST_RESUME_INPROGRESS, SIGSUBST_NULL,
752 		    CPU->cpu_id);
753 
754 		/*
755 		 * If we suspended hw watchdog at suspend,
756 		 * re-enable it now.
757 		 */
758 		if (srh->sr_flags & (SR_FLAG_WATCHDOG)) {
759 			mutex_enter(&tod_lock);
760 			tod_ops.tod_set_watchdog_timer(
761 			    watchdog_timeout_seconds);
762 			mutex_exit(&tod_lock);
763 		}
764 
765 		/*
766 		 * This should only be called if drmach_suspend_last()
767 		 * was called and state transitioned to DR_SRSTATE_FULL
768 		 * to prevent resume attempts on device instances that
769 		 * were not previously suspended.
770 		 */
771 		drmach_resume_first();
772 
773 		/* FALLTHROUGH */
774 
775 	case DR_SRSTATE_DRIVER:
776 		/*
777 		 * resume drivers
778 		 */
779 		srh->sr_err_idx = 0;
780 
781 		/* no parent dip to hold busy */
782 		dr_resume_devices(ddi_root_node(), srh);
783 
784 		if (srh->sr_err_idx && srh->sr_dr_handlep) {
785 			(srh->sr_dr_handlep)->h_err = drerr_int(ESBD_RESUME,
786 			    srh->sr_err_ints, srh->sr_err_idx, 1);
787 		}
788 
789 		/*
790 		 * resume the lock manager
791 		 */
792 		lm_cprresume();
793 
794 		/* FALLTHROUGH */
795 
796 	case DR_SRSTATE_USER:
797 		/*
798 		 * finally, resume user threads
799 		 */
800 		if (!dr_skip_user_threads) {
801 			prom_printf("DR: resuming user threads...\n");
802 			dr_start_user_threads();
803 		}
804 		/* FALLTHROUGH */
805 
806 	case DR_SRSTATE_BEGIN:
807 	default:
808 		/*
809 		 * let those who care know that we've just resumed
810 		 */
811 		PR_QR("sending SIGTHAW...\n");
812 		dr_signal_user(SIGTHAW);
813 		break;
814 	}
815 
816 	i_ndi_allow_device_tree_changes(handle->h_ndi);
817 
818 	/*
819 	 * update the signature block
820 	 */
821 	CPU_SIGNATURE(OS_SIG, SIGST_RUN, SIGSUBST_NULL, CPU->cpu_id);
822 
823 	prom_printf("DR: resume COMPLETED\n");
824 }
825 
826 int
827 dr_suspend(dr_sr_handle_t *srh)
828 {
829 	dr_handle_t	*handle;
830 	int		force;
831 	int		dev_errs_idx;
832 	uint64_t	dev_errs[DR_MAX_ERR_INT];
833 	int		rc = DDI_SUCCESS;
834 
835 	handle = srh->sr_dr_handlep;
836 
837 	force = dr_cmd_flags(handle) & SBD_FLAG_FORCE;
838 
839 	/*
840 	 * update the signature block
841 	 */
842 	CPU_SIGNATURE(OS_SIG, SIGST_QUIESCE_INPROGRESS, SIGSUBST_NULL,
843 	    CPU->cpu_id);
844 
845 	i_ndi_block_device_tree_changes(&handle->h_ndi);
846 
847 	prom_printf("\nDR: suspending user threads...\n");
848 	srh->sr_suspend_state = DR_SRSTATE_USER;
849 	if (((rc = dr_stop_user_threads(srh)) != DDI_SUCCESS) &&
850 	    dr_check_user_stop_result) {
851 		dr_resume(srh);
852 		return (rc);
853 	}
854 
855 	if (!force) {
856 		struct dr_ref drc = {0};
857 
858 		prom_printf("\nDR: checking devices...\n");
859 		dev_errs_idx = 0;
860 
861 		drc.arr = dev_errs;
862 		drc.idx = &dev_errs_idx;
863 		drc.len = DR_MAX_ERR_INT;
864 
865 		/*
866 		 * Since the root node can never go away, it
867 		 * doesn't have to be held.
868 		 */
869 		ddi_walk_devs(ddi_root_node(), dr_check_unsafe_major, &drc);
870 		if (dev_errs_idx) {
871 			handle->h_err = drerr_int(ESBD_UNSAFE, dev_errs,
872 			    dev_errs_idx, 1);
873 			dr_resume(srh);
874 			return (DDI_FAILURE);
875 		}
876 		PR_QR("done\n");
877 	} else {
878 		prom_printf("\nDR: dr_suspend invoked with force flag\n");
879 	}
880 
881 #ifndef	SKIP_SYNC
882 	/*
883 	 * This sync swap out all user pages
884 	 */
885 	vfs_sync(SYNC_ALL);
886 #endif
887 
888 	/*
889 	 * special treatment for lock manager
890 	 */
891 	lm_cprsuspend();
892 
893 #ifndef	SKIP_SYNC
894 	/*
895 	 * sync the file system in case we never make it back
896 	 */
897 	sync();
898 #endif
899 
900 	/*
901 	 * now suspend drivers
902 	 */
903 	prom_printf("DR: suspending drivers...\n");
904 	srh->sr_suspend_state = DR_SRSTATE_DRIVER;
905 	srh->sr_err_idx = 0;
906 	/* No parent to hold busy */
907 	if ((rc = dr_suspend_devices(ddi_root_node(), srh)) != DDI_SUCCESS) {
908 		if (srh->sr_err_idx && srh->sr_dr_handlep) {
909 			(srh->sr_dr_handlep)->h_err = drerr_int(ESBD_SUSPEND,
910 			    srh->sr_err_ints, srh->sr_err_idx, 1);
911 		}
912 		dr_resume(srh);
913 		return (rc);
914 	}
915 
916 	drmach_suspend_last();
917 
918 	/*
919 	 * finally, grab all cpus
920 	 */
921 	srh->sr_suspend_state = DR_SRSTATE_FULL;
922 
923 	/*
924 	 * if watchdog was activated, disable it
925 	 */
926 	if (watchdog_activated) {
927 		mutex_enter(&tod_lock);
928 		tod_ops.tod_clear_watchdog_timer();
929 		mutex_exit(&tod_lock);
930 		srh->sr_flags |= SR_FLAG_WATCHDOG;
931 	} else {
932 		srh->sr_flags &= ~(SR_FLAG_WATCHDOG);
933 	}
934 
935 	/*
936 	 * Update the signature block.
937 	 * This must be done before cpus are paused, since on Starcat the
938 	 * cpu signature update aquires an adaptive mutex in the iosram driver.
939 	 * Blocking with cpus paused can lead to deadlock.
940 	 */
941 	CPU_SIGNATURE(OS_SIG, SIGST_QUIESCED, SIGSUBST_NULL, CPU->cpu_id);
942 
943 	mutex_enter(&cpu_lock);
944 	pause_cpus(NULL);
945 	dr_stop_intr();
946 
947 	return (rc);
948 }
949 
950 int
951 dr_pt_test_suspend(dr_handle_t *hp)
952 {
953 	dr_sr_handle_t *srh;
954 	int		err;
955 	uint_t		psmerr;
956 	static fn_t	f = "dr_pt_test_suspend";
957 
958 	PR_QR("%s...\n", f);
959 
960 	srh = dr_get_sr_handle(hp);
961 	if ((err = dr_suspend(srh)) == DDI_SUCCESS) {
962 		dr_resume(srh);
963 		if ((hp->h_err) && ((psmerr = hp->h_err->e_code) != 0)) {
964 			PR_QR("%s: error on dr_resume()", f);
965 			switch (psmerr) {
966 			case ESBD_RESUME:
967 				PR_QR("Couldn't resume devices: %s\n",
968 				    DR_GET_E_RSC(hp->h_err));
969 				break;
970 
971 			case ESBD_KTHREAD:
972 				PR_ALL("psmerr is ESBD_KTHREAD\n");
973 				break;
974 			default:
975 				PR_ALL("Resume error unknown = %d\n",
976 				    psmerr);
977 				break;
978 			}
979 		}
980 	} else {
981 		PR_ALL("%s: dr_suspend() failed, err = 0x%x\n",
982 		    f, err);
983 		psmerr = hp->h_err ? hp->h_err->e_code : ESBD_NOERROR;
984 		switch (psmerr) {
985 		case ESBD_UNSAFE:
986 			PR_ALL("Unsafe devices (major #): %s\n",
987 			    DR_GET_E_RSC(hp->h_err));
988 			break;
989 
990 		case ESBD_RTTHREAD:
991 			PR_ALL("RT threads (PIDs): %s\n",
992 			    DR_GET_E_RSC(hp->h_err));
993 			break;
994 
995 		case ESBD_UTHREAD:
996 			PR_ALL("User threads (PIDs): %s\n",
997 			    DR_GET_E_RSC(hp->h_err));
998 			break;
999 
1000 		case ESBD_SUSPEND:
1001 			PR_ALL("Non-suspendable devices (major #): %s\n",
1002 			    DR_GET_E_RSC(hp->h_err));
1003 			break;
1004 
1005 		case ESBD_RESUME:
1006 			PR_ALL("Could not resume devices (major #): %s\n",
1007 			    DR_GET_E_RSC(hp->h_err));
1008 			break;
1009 
1010 		case ESBD_KTHREAD:
1011 			PR_ALL("psmerr is ESBD_KTHREAD\n");
1012 			break;
1013 
1014 		case ESBD_NOERROR:
1015 			PR_ALL("sbd_error_t error code not set\n");
1016 			break;
1017 
1018 		default:
1019 			PR_ALL("Unknown error psmerr = %d\n", psmerr);
1020 			break;
1021 		}
1022 	}
1023 	dr_release_sr_handle(srh);
1024 
1025 	return (0);
1026 }
1027 
1028 /*
1029  * Add a new integer value to the end of an array.  Don't allow duplicates to
1030  * appear in the array, and don't allow the array to overflow.  Return the new
1031  * total number of entries in the array.
1032  */
1033 static int
1034 dr_add_int(uint64_t *arr, int idx, int len, uint64_t val)
1035 {
1036 	int i;
1037 
1038 	if (arr == NULL)
1039 		return (0);
1040 
1041 	if (idx >= len)
1042 		return (idx);
1043 
1044 	for (i = 0; i < idx; i++) {
1045 		if (arr[i] == val)
1046 			return (idx);
1047 	}
1048 
1049 	arr[idx++] = val;
1050 
1051 	return (idx);
1052 }
1053 
1054 /*
1055  * Construct an sbd_error_t featuring a string representation of an array of
1056  * integers as its e_rsc.
1057  */
1058 static sbd_error_t *
1059 drerr_int(int e_code, uint64_t *arr, int idx, int majors)
1060 {
1061 	int		i, n, buf_len, buf_idx, buf_avail;
1062 	char		*dname;
1063 	char		*buf;
1064 	sbd_error_t	*new_sbd_err;
1065 	static char	s_ellipsis[] = "...";
1066 
1067 	if (arr == NULL || idx <= 0)
1068 		return (NULL);
1069 
1070 	/* MAXPATHLEN is the size of the e_rsc field in sbd_error_t. */
1071 	buf = (char *)kmem_zalloc(MAXPATHLEN, KM_SLEEP);
1072 
1073 	/*
1074 	 * This is the total working area of the buffer.  It must be computed
1075 	 * as the size of 'buf', minus reserved space for the null terminator
1076 	 * and the ellipsis string.
1077 	 */
1078 	buf_len = MAXPATHLEN - (strlen(s_ellipsis) + 1);
1079 
1080 	/* Construct a string representation of the array values */
1081 	for (buf_idx = 0, i = 0; i < idx; i++) {
1082 		buf_avail = buf_len - buf_idx;
1083 		if (majors) {
1084 			dname = ddi_major_to_name(arr[i]);
1085 			if (dname) {
1086 				n = snprintf(&buf[buf_idx], buf_avail,
1087 				    "%s, ", dname);
1088 			} else {
1089 				n = snprintf(&buf[buf_idx], buf_avail,
1090 				    "major %lu, ", arr[i]);
1091 			}
1092 		} else {
1093 			n = snprintf(&buf[buf_idx], buf_avail, "%lu, ", arr[i]);
1094 		}
1095 
1096 		/* An ellipsis gets appended when no more values fit */
1097 		if (n >= buf_avail) {
1098 			(void) strcpy(&buf[buf_idx], s_ellipsis);
1099 			break;
1100 		}
1101 
1102 		buf_idx += n;
1103 	}
1104 
1105 	/* If all the contents fit, remove the trailing comma */
1106 	if (n < buf_avail) {
1107 		buf[--buf_idx] = '\0';
1108 		buf[--buf_idx] = '\0';
1109 	}
1110 
1111 	/* Return an sbd_error_t with the buffer and e_code */
1112 	new_sbd_err = drerr_new(1, e_code, buf);
1113 	kmem_free(buf, MAXPATHLEN);
1114 	return (new_sbd_err);
1115 }
1116