1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
24 */
25
26 /*
27 * Copyright 2023 Oxide Computer Company
28 */
29
30 /*
31 * A CPR derivative specifically for starfire/starcat
32 * X86 doesn't make use of the quiesce interfaces, it's kept for simplicity.
33 */
34
35 #include <sys/types.h>
36 #include <sys/systm.h>
37 #include <sys/machparam.h>
38 #include <sys/machsystm.h>
39 #include <sys/ddi.h>
40 #define SUNDDI_IMPL
41 #include <sys/sunddi.h>
42 #include <sys/sunndi.h>
43 #include <sys/devctl.h>
44 #include <sys/time.h>
45 #include <sys/kmem.h>
46 #include <nfs/lm.h>
47 #include <sys/ddi_impldefs.h>
48 #include <sys/ndi_impldefs.h>
49 #include <sys/obpdefs.h>
50 #include <sys/cmn_err.h>
51 #include <sys/debug.h>
52 #include <sys/errno.h>
53 #include <sys/callb.h>
54 #include <sys/clock.h>
55 #include <sys/x_call.h>
56 #include <sys/cpuvar.h>
57 #include <sys/epm.h>
58 #include <sys/vfs.h>
59 #include <sys/promif.h>
60 #include <sys/conf.h>
61 #include <sys/cyclic.h>
62
63 #include <sys/dr.h>
64 #include <sys/dr_util.h>
65
66 extern void e_ddi_enter_driver_list(struct devnames *dnp, int *listcnt);
67 extern void e_ddi_exit_driver_list(struct devnames *dnp, int listcnt);
68 extern int is_pseudo_device(dev_info_t *dip);
69
70 extern kmutex_t cpu_lock;
71 extern dr_unsafe_devs_t dr_unsafe_devs;
72
73 static int dr_is_real_device(dev_info_t *dip);
74 static int dr_is_unsafe_major(major_t major);
75 static int dr_bypass_device(char *dname);
76 static int dr_check_dip(dev_info_t *dip, void *arg, uint_t ref);
77 static int dr_resolve_devname(dev_info_t *dip, char *buffer,
78 char *alias);
79 static sbd_error_t *drerr_int(int e_code, uint64_t *arr, int idx,
80 int majors);
81 static int dr_add_int(uint64_t *arr, int idx, int len,
82 uint64_t val);
83
84 int dr_pt_test_suspend(dr_handle_t *hp);
85
86 /*
87 * dr_quiesce.c interface
88 * NOTE: states used internally by dr_suspend and dr_resume
89 */
90 typedef enum dr_suspend_state {
91 DR_SRSTATE_BEGIN = 0,
92 DR_SRSTATE_USER,
93 DR_SRSTATE_DRIVER,
94 DR_SRSTATE_FULL
95 } suspend_state_t;
96
97 struct dr_sr_handle {
98 dr_handle_t *sr_dr_handlep;
99 dev_info_t *sr_failed_dip;
100 suspend_state_t sr_suspend_state;
101 uint_t sr_flags;
102 uint64_t sr_err_ints[DR_MAX_ERR_INT];
103 int sr_err_idx;
104 };
105
106 #define SR_FLAG_WATCHDOG 0x1
107
108 /*
109 * XXX
110 * This hack will go away before RTI. Just for testing.
111 * List of drivers to bypass when performing a suspend.
112 */
113 static char *dr_bypass_list[] = {
114 ""
115 };
116
117
118 #define SKIP_SYNC /* bypass sync ops in dr_suspend */
119
120 /*
121 * dr_skip_user_threads is used to control if user threads should
122 * be suspended. If dr_skip_user_threads is true, the rest of the
123 * flags are not used; if it is false, dr_check_user_stop_result
124 * will be used to control whether or not we need to check suspend
125 * result, and dr_allow_blocked_threads will be used to control
126 * whether or not we allow suspend to continue if there are blocked
127 * threads. We allow all combinations of dr_check_user_stop_result
128 * and dr_allow_block_threads, even though it might not make much
129 * sense to not allow block threads when we don't even check stop
130 * result.
131 */
132 static int dr_skip_user_threads = 0; /* default to FALSE */
133 static int dr_check_user_stop_result = 1; /* default to TRUE */
134 static int dr_allow_blocked_threads = 1; /* default to TRUE */
135
136 #define DR_CPU_LOOP_MSEC 1000
137
138 static void
dr_stop_intr(void)139 dr_stop_intr(void)
140 {
141 ASSERT(MUTEX_HELD(&cpu_lock));
142
143 kpreempt_disable();
144 cyclic_suspend();
145 }
146
147 static void
dr_enable_intr(void)148 dr_enable_intr(void)
149 {
150 ASSERT(MUTEX_HELD(&cpu_lock));
151
152 cyclic_resume();
153 kpreempt_enable();
154 }
155
156 dr_sr_handle_t *
dr_get_sr_handle(dr_handle_t * hp)157 dr_get_sr_handle(dr_handle_t *hp)
158 {
159 dr_sr_handle_t *srh;
160
161 srh = GETSTRUCT(dr_sr_handle_t, 1);
162 srh->sr_dr_handlep = hp;
163
164 return (srh);
165 }
166
167 void
dr_release_sr_handle(dr_sr_handle_t * srh)168 dr_release_sr_handle(dr_sr_handle_t *srh)
169 {
170 ASSERT(srh->sr_failed_dip == NULL);
171 FREESTRUCT(srh, dr_sr_handle_t, 1);
172 }
173
174 static int
dr_is_real_device(dev_info_t * dip)175 dr_is_real_device(dev_info_t *dip)
176 {
177 struct regspec *regbuf = NULL;
178 int length = 0;
179 int rc;
180
181 if (ddi_get_driver(dip) == NULL)
182 return (0);
183
184 if (DEVI(dip)->devi_pm_flags & (PMC_NEEDS_SR|PMC_PARENTAL_SR))
185 return (1);
186 if (DEVI(dip)->devi_pm_flags & PMC_NO_SR)
187 return (0);
188
189 /*
190 * now the general case
191 */
192 rc = ddi_getlongprop(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, "reg",
193 (caddr_t)®buf, &length);
194 ASSERT(rc != DDI_PROP_NO_MEMORY);
195 if (rc != DDI_PROP_SUCCESS) {
196 return (0);
197 } else {
198 if ((length > 0) && (regbuf != NULL))
199 kmem_free(regbuf, length);
200 return (1);
201 }
202 }
203
204 static int
dr_is_unsafe_major(major_t major)205 dr_is_unsafe_major(major_t major)
206 {
207 char *dname, **cpp;
208 int i, ndevs;
209
210 if ((dname = ddi_major_to_name(major)) == NULL) {
211 PR_QR("dr_is_unsafe_major: invalid major # %d\n", major);
212 return (0);
213 }
214
215 ndevs = dr_unsafe_devs.ndevs;
216 for (i = 0, cpp = dr_unsafe_devs.devnames; i < ndevs; i++) {
217 if (strcmp(dname, *cpp++) == 0)
218 return (1);
219 }
220 return (0);
221 }
222
223 static int
dr_bypass_device(char * dname)224 dr_bypass_device(char *dname)
225 {
226 int i;
227 char **lname;
228
229 if (dname == NULL)
230 return (0);
231
232 /* check the bypass list */
233 for (i = 0, lname = &dr_bypass_list[i]; **lname != '\0'; lname++) {
234 if (strcmp(dname, dr_bypass_list[i++]) == 0)
235 return (1);
236 }
237 return (0);
238 }
239
240 static int
dr_resolve_devname(dev_info_t * dip,char * buffer,char * alias)241 dr_resolve_devname(dev_info_t *dip, char *buffer, char *alias)
242 {
243 major_t devmajor;
244 char *aka, *name;
245
246 *buffer = *alias = 0;
247
248 if (dip == NULL)
249 return (-1);
250
251 if ((name = ddi_get_name(dip)) == NULL)
252 name = "<null name>";
253
254 aka = name;
255
256 if ((devmajor = ddi_name_to_major(aka)) != DDI_MAJOR_T_NONE)
257 aka = ddi_major_to_name(devmajor);
258
259 (void) strcpy(buffer, name);
260
261 if (strcmp(name, aka))
262 (void) strcpy(alias, aka);
263 else
264 *alias = 0;
265
266 return (0);
267 }
268
269 struct dr_ref {
270 int *refcount;
271 int *refcount_non_gldv3;
272 uint64_t *arr;
273 int *idx;
274 int len;
275 };
276
277 /* ARGSUSED */
278 static int
dr_check_dip(dev_info_t * dip,void * arg,uint_t ref)279 dr_check_dip(dev_info_t *dip, void *arg, uint_t ref)
280 {
281 major_t major;
282 char *dname;
283 struct dr_ref *rp = (struct dr_ref *)arg;
284
285 if (dip == NULL)
286 return (DDI_WALK_CONTINUE);
287
288 if (!dr_is_real_device(dip))
289 return (DDI_WALK_CONTINUE);
290
291 dname = ddi_binding_name(dip);
292
293 if (dr_bypass_device(dname))
294 return (DDI_WALK_CONTINUE);
295
296 if (dname && ((major = ddi_name_to_major(dname)) != (major_t)-1)) {
297 if (ref && rp->refcount) {
298 *rp->refcount += ref;
299 PR_QR("\n %s (major# %d) is referenced(%u)\n", dname,
300 major, ref);
301 }
302 if (ref && rp->refcount_non_gldv3) {
303 if (NETWORK_PHYSDRV(major) && !GLDV3_DRV(major))
304 *rp->refcount_non_gldv3 += ref;
305 }
306 if (dr_is_unsafe_major(major) && i_ddi_devi_attached(dip)) {
307 PR_QR("\n %s (major# %d) not hotpluggable\n", dname,
308 major);
309 if (rp->arr != NULL && rp->idx != NULL)
310 *rp->idx = dr_add_int(rp->arr, *rp->idx,
311 rp->len, (uint64_t)major);
312 }
313 }
314 return (DDI_WALK_CONTINUE);
315 }
316
317 static int
dr_check_unsafe_major(dev_info_t * dip,void * arg)318 dr_check_unsafe_major(dev_info_t *dip, void *arg)
319 {
320 return (dr_check_dip(dip, arg, 0));
321 }
322
323
324 /*ARGSUSED*/
325 void
dr_check_devices(dev_info_t * dip,int * refcount,dr_handle_t * handle,uint64_t * arr,int * idx,int len,int * refcount_non_gldv3)326 dr_check_devices(dev_info_t *dip, int *refcount, dr_handle_t *handle,
327 uint64_t *arr, int *idx, int len, int *refcount_non_gldv3)
328 {
329 struct dr_ref bref = {0};
330
331 if (dip == NULL)
332 return;
333
334 bref.refcount = refcount;
335 bref.refcount_non_gldv3 = refcount_non_gldv3;
336 bref.arr = arr;
337 bref.idx = idx;
338 bref.len = len;
339
340 ASSERT(e_ddi_branch_held(dip));
341 (void) e_ddi_branch_referenced(dip, dr_check_dip, &bref);
342 }
343
344 /*
345 * The "dip" argument's parent (if it exists) must be held busy.
346 */
347 static int
dr_suspend_devices(dev_info_t * dip,dr_sr_handle_t * srh)348 dr_suspend_devices(dev_info_t *dip, dr_sr_handle_t *srh)
349 {
350 dr_handle_t *handle;
351 major_t major;
352 char *dname;
353
354 /*
355 * If dip is the root node, it has no siblings and it is
356 * always held. If dip is not the root node, dr_suspend_devices()
357 * will be invoked with the parent held busy.
358 */
359 for (; dip != NULL; dip = ddi_get_next_sibling(dip)) {
360 char d_name[40], d_alias[40], *d_info;
361
362 ndi_devi_enter(dip);
363 if (dr_suspend_devices(ddi_get_child(dip), srh)) {
364 ndi_devi_exit(dip);
365 return (ENXIO);
366 }
367 ndi_devi_exit(dip);
368
369 if (!dr_is_real_device(dip))
370 continue;
371
372 major = (major_t)-1;
373 if ((dname = ddi_binding_name(dip)) != NULL)
374 major = ddi_name_to_major(dname);
375
376 if (dr_bypass_device(dname)) {
377 PR_QR(" bypassed suspend of %s (major# %d)\n", dname,
378 major);
379 continue;
380 }
381
382 if (drmach_verify_sr(dip, 1)) {
383 PR_QR(" bypassed suspend of %s (major# %d)\n", dname,
384 major);
385 continue;
386 }
387
388 if ((d_info = ddi_get_name_addr(dip)) == NULL)
389 d_info = "<null>";
390
391 d_name[0] = 0;
392 if (dr_resolve_devname(dip, d_name, d_alias) == 0) {
393 if (d_alias[0] != 0) {
394 prom_printf("\tsuspending %s@%s (aka %s)\n",
395 d_name, d_info, d_alias);
396 } else {
397 prom_printf("\tsuspending %s@%s\n", d_name,
398 d_info);
399 }
400 } else {
401 prom_printf("\tsuspending %s@%s\n", dname, d_info);
402 }
403
404 if (devi_detach(dip, DDI_SUSPEND) != DDI_SUCCESS) {
405 prom_printf("\tFAILED to suspend %s@%s\n",
406 d_name[0] ? d_name : dname, d_info);
407
408 srh->sr_err_idx = dr_add_int(srh->sr_err_ints,
409 srh->sr_err_idx, DR_MAX_ERR_INT, (uint64_t)major);
410
411 ndi_hold_devi(dip);
412 srh->sr_failed_dip = dip;
413
414 handle = srh->sr_dr_handlep;
415 dr_op_err(CE_IGNORE, handle, ESBD_SUSPEND, "%s@%s",
416 d_name[0] ? d_name : dname, d_info);
417
418 return (DDI_FAILURE);
419 }
420 }
421
422 return (DDI_SUCCESS);
423 }
424
425 static void
dr_resume_devices(dev_info_t * start,dr_sr_handle_t * srh)426 dr_resume_devices(dev_info_t *start, dr_sr_handle_t *srh)
427 {
428 dr_handle_t *handle;
429 dev_info_t *dip, *next, *last = NULL;
430 major_t major;
431 char *bn;
432
433 major = (major_t)-1;
434
435 /* attach in reverse device tree order */
436 while (last != start) {
437 dip = start;
438 next = ddi_get_next_sibling(dip);
439 while (next != last && dip != srh->sr_failed_dip) {
440 dip = next;
441 next = ddi_get_next_sibling(dip);
442 }
443 if (dip == srh->sr_failed_dip) {
444 /* release hold acquired in dr_suspend_devices() */
445 srh->sr_failed_dip = NULL;
446 ndi_rele_devi(dip);
447 } else if (dr_is_real_device(dip) &&
448 srh->sr_failed_dip == NULL) {
449
450 if ((bn = ddi_binding_name(dip)) != NULL) {
451 major = ddi_name_to_major(bn);
452 } else {
453 bn = "<null>";
454 }
455 if (!dr_bypass_device(bn) &&
456 !drmach_verify_sr(dip, 0)) {
457 char d_name[40], d_alias[40], *d_info;
458
459 d_name[0] = 0;
460 d_info = ddi_get_name_addr(dip);
461 if (d_info == NULL)
462 d_info = "<null>";
463
464 if (!dr_resolve_devname(dip, d_name, d_alias)) {
465 if (d_alias[0] != 0) {
466 prom_printf("\tresuming "
467 "%s@%s (aka %s)\n", d_name,
468 d_info, d_alias);
469 } else {
470 prom_printf("\tresuming "
471 "%s@%s\n", d_name, d_info);
472 }
473 } else {
474 prom_printf("\tresuming %s@%s\n", bn,
475 d_info);
476 }
477
478 if (devi_attach(dip, DDI_RESUME) !=
479 DDI_SUCCESS) {
480 /*
481 * Print a console warning,
482 * set an e_code of ESBD_RESUME,
483 * and save the driver major
484 * number in the e_rsc.
485 */
486 prom_printf("\tFAILED to resume %s@%s",
487 d_name[0] ? d_name : bn, d_info);
488
489 srh->sr_err_idx =
490 dr_add_int(srh->sr_err_ints,
491 srh->sr_err_idx, DR_MAX_ERR_INT,
492 (uint64_t)major);
493
494 handle = srh->sr_dr_handlep;
495
496 dr_op_err(CE_IGNORE, handle,
497 ESBD_RESUME, "%s@%s",
498 d_name[0] ? d_name : bn, d_info);
499 }
500 }
501 }
502
503 /* Hold parent busy while walking its children */
504 ndi_devi_enter(dip);
505 dr_resume_devices(ddi_get_child(dip), srh);
506 ndi_devi_exit(dip);
507 last = dip;
508 }
509 }
510
511 /*
512 * True if thread is virtually stopped. Similar to CPR_VSTOPPED
513 * but from DR point of view. These user threads are waiting in
514 * the kernel. Once they complete in the kernel, they will process
515 * the stop signal and stop.
516 */
517 #define DR_VSTOPPED(t) \
518 ((t)->t_state == TS_SLEEP && \
519 (t)->t_wchan != NULL && \
520 (t)->t_astflag && \
521 ((t)->t_proc_flag & TP_CHKPT))
522
523 /* ARGSUSED */
524 static int
dr_stop_user_threads(dr_sr_handle_t * srh)525 dr_stop_user_threads(dr_sr_handle_t *srh)
526 {
527 int count;
528 int bailout;
529 dr_handle_t *handle = srh->sr_dr_handlep;
530 static fn_t f = "dr_stop_user_threads";
531 kthread_id_t tp;
532
533 extern void add_one_utstop();
534 extern void utstop_timedwait(clock_t);
535 extern void utstop_init(void);
536
537 #define DR_UTSTOP_RETRY 4
538 #define DR_UTSTOP_WAIT hz
539
540 if (dr_skip_user_threads)
541 return (DDI_SUCCESS);
542
543 utstop_init();
544
545 /* we need to try a few times to get past fork, etc. */
546 srh->sr_err_idx = 0;
547 for (count = 0; count < DR_UTSTOP_RETRY; count++) {
548 /* walk the entire threadlist */
549 mutex_enter(&pidlock);
550 for (tp = curthread->t_next; tp != curthread; tp = tp->t_next) {
551 proc_t *p = ttoproc(tp);
552
553 /* handle kernel threads separately */
554 if (p->p_as == &kas || p->p_stat == SZOMB)
555 continue;
556
557 mutex_enter(&p->p_lock);
558 thread_lock(tp);
559
560 if (tp->t_state == TS_STOPPED) {
561 /* add another reason to stop this thread */
562 tp->t_schedflag &= ~TS_RESUME;
563 } else {
564 tp->t_proc_flag |= TP_CHKPT;
565
566 thread_unlock(tp);
567 mutex_exit(&p->p_lock);
568 add_one_utstop();
569 mutex_enter(&p->p_lock);
570 thread_lock(tp);
571
572 aston(tp);
573
574 if (ISWAKEABLE(tp) || ISWAITING(tp)) {
575 setrun_locked(tp);
576 }
577
578 }
579
580 /* grab thread if needed */
581 if (tp->t_state == TS_ONPROC && tp->t_cpu != CPU)
582 poke_cpu(tp->t_cpu->cpu_id);
583
584
585 thread_unlock(tp);
586 mutex_exit(&p->p_lock);
587 }
588 mutex_exit(&pidlock);
589
590
591 /* let everything catch up */
592 utstop_timedwait(count * count * DR_UTSTOP_WAIT);
593
594
595 /* now, walk the threadlist again to see if we are done */
596 mutex_enter(&pidlock);
597 for (tp = curthread->t_next, bailout = 0;
598 tp != curthread; tp = tp->t_next) {
599 proc_t *p = ttoproc(tp);
600
601 /* handle kernel threads separately */
602 if (p->p_as == &kas || p->p_stat == SZOMB)
603 continue;
604
605 /*
606 * If this thread didn't stop, and we don't allow
607 * unstopped blocked threads, bail.
608 */
609 thread_lock(tp);
610 if (!CPR_ISTOPPED(tp) &&
611 !(dr_allow_blocked_threads &&
612 DR_VSTOPPED(tp))) {
613 bailout = 1;
614 if (count == DR_UTSTOP_RETRY - 1) {
615 /*
616 * save the pid for later reporting
617 */
618 srh->sr_err_idx =
619 dr_add_int(srh->sr_err_ints,
620 srh->sr_err_idx, DR_MAX_ERR_INT,
621 (uint64_t)p->p_pid);
622
623 cmn_err(CE_WARN, "%s: "
624 "failed to stop thread: "
625 "process=%s, pid=%d",
626 f, p->p_user.u_psargs, p->p_pid);
627
628 PR_QR("%s: failed to stop thread: "
629 "process=%s, pid=%d, t_id=0x%p, "
630 "t_state=0x%x, t_proc_flag=0x%x, "
631 "t_schedflag=0x%x\n",
632 f, p->p_user.u_psargs, p->p_pid,
633 (void *)tp, tp->t_state,
634 tp->t_proc_flag, tp->t_schedflag);
635 }
636
637 }
638 thread_unlock(tp);
639 }
640 mutex_exit(&pidlock);
641
642 /* were all the threads stopped? */
643 if (!bailout)
644 break;
645 }
646
647 /* were we unable to stop all threads after a few tries? */
648 if (bailout) {
649 handle->h_err = drerr_int(ESBD_UTHREAD, srh->sr_err_ints,
650 srh->sr_err_idx, 0);
651 return (ESRCH);
652 }
653
654 return (DDI_SUCCESS);
655 }
656
657 static void
dr_start_user_threads(void)658 dr_start_user_threads(void)
659 {
660 kthread_id_t tp;
661
662 mutex_enter(&pidlock);
663
664 /* walk all threads and release them */
665 for (tp = curthread->t_next; tp != curthread; tp = tp->t_next) {
666 proc_t *p = ttoproc(tp);
667
668 /* skip kernel threads */
669 if (ttoproc(tp)->p_as == &kas)
670 continue;
671
672 mutex_enter(&p->p_lock);
673 tp->t_proc_flag &= ~TP_CHKPT;
674 mutex_exit(&p->p_lock);
675
676 thread_lock(tp);
677 if (CPR_ISTOPPED(tp)) {
678 /* back on the runq */
679 tp->t_schedflag |= TS_RESUME;
680 setrun_locked(tp);
681 }
682 thread_unlock(tp);
683 }
684
685 mutex_exit(&pidlock);
686 }
687
688 static void
dr_signal_user(int sig)689 dr_signal_user(int sig)
690 {
691 struct proc *p;
692
693 mutex_enter(&pidlock);
694
695 for (p = practive; p != NULL; p = p->p_next) {
696 /* only user threads */
697 if (p->p_exec == NULL || p->p_stat == SZOMB ||
698 p == proc_init || p == ttoproc(curthread))
699 continue;
700
701 mutex_enter(&p->p_lock);
702 sigtoproc(p, NULL, sig);
703 mutex_exit(&p->p_lock);
704 }
705
706 mutex_exit(&pidlock);
707
708 /* add a bit of delay */
709 delay(hz);
710 }
711
712 void
dr_resume(dr_sr_handle_t * srh)713 dr_resume(dr_sr_handle_t *srh)
714 {
715 switch (srh->sr_suspend_state) {
716 case DR_SRSTATE_FULL:
717
718 ASSERT(MUTEX_HELD(&cpu_lock));
719
720 /*
721 * Prevent false alarm in tod_validate() due to tod
722 * value change between suspend and resume
723 */
724 mutex_enter(&tod_lock);
725 tod_status_set(TOD_DR_RESUME_DONE);
726 mutex_exit(&tod_lock);
727
728 dr_enable_intr(); /* enable intr & clock */
729
730 start_cpus();
731 mutex_exit(&cpu_lock);
732
733 /*
734 * This should only be called if drmach_suspend_last()
735 * was called and state transitioned to DR_SRSTATE_FULL
736 * to prevent resume attempts on device instances that
737 * were not previously suspended.
738 */
739 drmach_resume_first();
740
741 /* FALLTHROUGH */
742
743 case DR_SRSTATE_DRIVER:
744 /*
745 * resume drivers
746 */
747 srh->sr_err_idx = 0;
748
749 /* no parent dip to hold busy */
750 dr_resume_devices(ddi_root_node(), srh);
751
752 if (srh->sr_err_idx && srh->sr_dr_handlep) {
753 (srh->sr_dr_handlep)->h_err = drerr_int(ESBD_RESUME,
754 srh->sr_err_ints, srh->sr_err_idx, 1);
755 }
756
757 /*
758 * resume the lock manager
759 */
760 lm_cprresume();
761
762 /* FALLTHROUGH */
763
764 case DR_SRSTATE_USER:
765 /*
766 * finally, resume user threads
767 */
768 if (!dr_skip_user_threads) {
769 prom_printf("DR: resuming user threads...\n");
770 dr_start_user_threads();
771 }
772 /* FALLTHROUGH */
773
774 case DR_SRSTATE_BEGIN:
775 default:
776 /*
777 * let those who care know that we've just resumed
778 */
779 PR_QR("sending SIGTHAW...\n");
780 dr_signal_user(SIGTHAW);
781 break;
782 }
783
784 prom_printf("DR: resume COMPLETED\n");
785 }
786
787 int
dr_suspend(dr_sr_handle_t * srh)788 dr_suspend(dr_sr_handle_t *srh)
789 {
790 dr_handle_t *handle;
791 int force;
792 int dev_errs_idx;
793 uint64_t dev_errs[DR_MAX_ERR_INT];
794 int rc = DDI_SUCCESS;
795
796 handle = srh->sr_dr_handlep;
797
798 force = dr_cmd_flags(handle) & SBD_FLAG_FORCE;
799
800 prom_printf("\nDR: suspending user threads...\n");
801 srh->sr_suspend_state = DR_SRSTATE_USER;
802 if (((rc = dr_stop_user_threads(srh)) != DDI_SUCCESS) &&
803 dr_check_user_stop_result) {
804 dr_resume(srh);
805 return (rc);
806 }
807
808 if (!force) {
809 struct dr_ref drc = {0};
810
811 prom_printf("\nDR: checking devices...\n");
812 dev_errs_idx = 0;
813
814 drc.arr = dev_errs;
815 drc.idx = &dev_errs_idx;
816 drc.len = DR_MAX_ERR_INT;
817
818 /*
819 * Since the root node can never go away, it
820 * doesn't have to be held.
821 */
822 ddi_walk_devs(ddi_root_node(), dr_check_unsafe_major, &drc);
823 if (dev_errs_idx) {
824 handle->h_err = drerr_int(ESBD_UNSAFE, dev_errs,
825 dev_errs_idx, 1);
826 dr_resume(srh);
827 return (DDI_FAILURE);
828 }
829 PR_QR("done\n");
830 } else {
831 prom_printf("\nDR: dr_suspend invoked with force flag\n");
832 }
833
834 #ifndef SKIP_SYNC
835 /*
836 * This sync swap out all user pages
837 */
838 vfs_sync(SYNC_ALL);
839 #endif
840
841 /*
842 * special treatment for lock manager
843 */
844 lm_cprsuspend();
845
846 #ifndef SKIP_SYNC
847 /*
848 * sync the file system in case we never make it back
849 */
850 sync();
851 #endif
852
853 /*
854 * now suspend drivers
855 */
856 prom_printf("DR: suspending drivers...\n");
857 srh->sr_suspend_state = DR_SRSTATE_DRIVER;
858 srh->sr_err_idx = 0;
859 /* No parent to hold busy */
860 if ((rc = dr_suspend_devices(ddi_root_node(), srh)) != DDI_SUCCESS) {
861 if (srh->sr_err_idx && srh->sr_dr_handlep) {
862 (srh->sr_dr_handlep)->h_err = drerr_int(ESBD_SUSPEND,
863 srh->sr_err_ints, srh->sr_err_idx, 1);
864 }
865 dr_resume(srh);
866 return (rc);
867 }
868
869 drmach_suspend_last();
870
871 /*
872 * finally, grab all cpus
873 */
874 srh->sr_suspend_state = DR_SRSTATE_FULL;
875
876 mutex_enter(&cpu_lock);
877 pause_cpus(NULL, NULL);
878 dr_stop_intr();
879
880 return (rc);
881 }
882
883 int
dr_pt_test_suspend(dr_handle_t * hp)884 dr_pt_test_suspend(dr_handle_t *hp)
885 {
886 dr_sr_handle_t *srh;
887 int err;
888 uint_t psmerr;
889 static fn_t f = "dr_pt_test_suspend";
890
891 PR_QR("%s...\n", f);
892
893 srh = dr_get_sr_handle(hp);
894 if ((err = dr_suspend(srh)) == DDI_SUCCESS) {
895 dr_resume(srh);
896 if ((hp->h_err) && ((psmerr = hp->h_err->e_code) != 0)) {
897 PR_QR("%s: error on dr_resume()", f);
898 switch (psmerr) {
899 case ESBD_RESUME:
900 PR_QR("Couldn't resume devices: %s\n",
901 DR_GET_E_RSC(hp->h_err));
902 break;
903
904 case ESBD_KTHREAD:
905 PR_ALL("psmerr is ESBD_KTHREAD\n");
906 break;
907 default:
908 PR_ALL("Resume error unknown = %d\n", psmerr);
909 break;
910 }
911 }
912 } else {
913 PR_ALL("%s: dr_suspend() failed, err = 0x%x\n", f, err);
914 psmerr = hp->h_err ? hp->h_err->e_code : ESBD_NOERROR;
915 switch (psmerr) {
916 case ESBD_UNSAFE:
917 PR_ALL("Unsafe devices (major #): %s\n",
918 DR_GET_E_RSC(hp->h_err));
919 break;
920
921 case ESBD_RTTHREAD:
922 PR_ALL("RT threads (PIDs): %s\n",
923 DR_GET_E_RSC(hp->h_err));
924 break;
925
926 case ESBD_UTHREAD:
927 PR_ALL("User threads (PIDs): %s\n",
928 DR_GET_E_RSC(hp->h_err));
929 break;
930
931 case ESBD_SUSPEND:
932 PR_ALL("Non-suspendable devices (major #): %s\n",
933 DR_GET_E_RSC(hp->h_err));
934 break;
935
936 case ESBD_RESUME:
937 PR_ALL("Could not resume devices (major #): %s\n",
938 DR_GET_E_RSC(hp->h_err));
939 break;
940
941 case ESBD_KTHREAD:
942 PR_ALL("psmerr is ESBD_KTHREAD\n");
943 break;
944
945 case ESBD_NOERROR:
946 PR_ALL("sbd_error_t error code not set\n");
947 break;
948
949 default:
950 PR_ALL("Unknown error psmerr = %d\n", psmerr);
951 break;
952 }
953 }
954 dr_release_sr_handle(srh);
955
956 return (0);
957 }
958
959 /*
960 * Add a new integer value to the end of an array. Don't allow duplicates to
961 * appear in the array, and don't allow the array to overflow. Return the new
962 * total number of entries in the array.
963 */
964 static int
dr_add_int(uint64_t * arr,int idx,int len,uint64_t val)965 dr_add_int(uint64_t *arr, int idx, int len, uint64_t val)
966 {
967 int i;
968
969 if (arr == NULL)
970 return (0);
971
972 if (idx >= len)
973 return (idx);
974
975 for (i = 0; i < idx; i++) {
976 if (arr[i] == val)
977 return (idx);
978 }
979
980 arr[idx++] = val;
981
982 return (idx);
983 }
984
985 /*
986 * Construct an sbd_error_t featuring a string representation of an array of
987 * integers as its e_rsc.
988 */
989 static sbd_error_t *
drerr_int(int e_code,uint64_t * arr,int idx,int majors)990 drerr_int(int e_code, uint64_t *arr, int idx, int majors)
991 {
992 int i, n, buf_len, buf_idx, buf_avail;
993 char *dname;
994 char *buf;
995 sbd_error_t *new_sbd_err;
996 static char s_ellipsis[] = "...";
997
998 if (arr == NULL || idx <= 0)
999 return (NULL);
1000
1001 /* MAXPATHLEN is the size of the e_rsc field in sbd_error_t. */
1002 buf = (char *)kmem_zalloc(MAXPATHLEN, KM_SLEEP);
1003
1004 /*
1005 * This is the total working area of the buffer. It must be computed
1006 * as the size of 'buf', minus reserved space for the null terminator
1007 * and the ellipsis string.
1008 */
1009 buf_len = MAXPATHLEN - (strlen(s_ellipsis) + 1);
1010
1011 /* Construct a string representation of the array values */
1012 for (buf_idx = 0, i = 0; i < idx; i++) {
1013 buf_avail = buf_len - buf_idx;
1014 if (majors) {
1015 dname = ddi_major_to_name(arr[i]);
1016 if (dname) {
1017 n = snprintf(&buf[buf_idx], buf_avail, "%s, ",
1018 dname);
1019 } else {
1020 n = snprintf(&buf[buf_idx], buf_avail,
1021 "major %" PRIu64 ", ", arr[i]);
1022 }
1023 } else {
1024 n = snprintf(&buf[buf_idx], buf_avail, "%" PRIu64 ", ",
1025 arr[i]);
1026 }
1027
1028 /* An ellipsis gets appended when no more values fit */
1029 if (n >= buf_avail) {
1030 (void) strcpy(&buf[buf_idx], s_ellipsis);
1031 break;
1032 }
1033
1034 buf_idx += n;
1035 }
1036
1037 /* If all the contents fit, remove the trailing comma */
1038 if (n < buf_avail) {
1039 buf[--buf_idx] = '\0';
1040 buf[--buf_idx] = '\0';
1041 }
1042
1043 /* Return an sbd_error_t with the buffer and e_code */
1044 new_sbd_err = drerr_new(1, e_code, buf);
1045 kmem_free(buf, MAXPATHLEN);
1046 return (new_sbd_err);
1047 }
1048