xref: /illumos-gate/usr/src/cmd/svc/startd/fork.c (revision 5a8d3db1a9faa71e948f85f673574967cfdad948)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  * Copyright 2012 Milan Jurik. All rights reserved.
25  */
26 
27 /*
28  * Copyright (c) 2013, Joyent, Inc. All rights reserved.
29  */
30 
31 /*
32  * fork.c - safe forking for svc.startd
33  *
34  * fork_configd() and fork_sulogin() are related, special cases that handle the
35  * spawning of specific client processes for svc.startd.
36  */
37 
38 #include <sys/contract/process.h>
39 #include <sys/corectl.h>
40 #include <sys/ctfs.h>
41 #include <sys/stat.h>
42 #include <sys/types.h>
43 #include <sys/uio.h>
44 #include <sys/wait.h>
45 #include <assert.h>
46 #include <errno.h>
47 #include <fcntl.h>
48 #include <libcontract.h>
49 #include <libcontract_priv.h>
50 #include <libscf_priv.h>
51 #include <limits.h>
52 #include <poll.h>
53 #include <port.h>
54 #include <signal.h>
55 #include <stdarg.h>
56 #include <stdio.h>
57 #include <stdlib.h>
58 #include <string.h>
59 #include <unistd.h>
60 #include <utmpx.h>
61 #include <spawn.h>
62 
63 #include "manifest_hash.h"
64 #include "configd_exit.h"
65 #include "protocol.h"
66 #include "startd.h"
67 
68 static	struct	utmpx	*utmpp;	/* pointer for getutxent() */
69 
70 pid_t
71 startd_fork1(int *forkerr)
72 {
73 	pid_t p;
74 
75 	/*
76 	 * prefork stack
77 	 */
78 	wait_prefork();
79 	utmpx_prefork();
80 
81 	p = fork1();
82 
83 	if (p == -1 && forkerr != NULL)
84 		*forkerr = errno;
85 
86 	/*
87 	 * postfork stack
88 	 */
89 	utmpx_postfork();
90 	wait_postfork(p);
91 
92 	return (p);
93 }
94 
95 /*
96  * void fork_mount(char *, char *)
97  *   Run mount(1M) with the given options and mount point.  (mount(1M) has much
98  *   hidden knowledge; it's much less correct to reimplement that logic here to
99  *   save a fork(2)/exec(2) invocation.)
100  */
101 int
102 fork_mount(char *path, char *opts)
103 {
104 	pid_t pid;
105 	uint_t tries = 0;
106 	int status;
107 
108 	for (pid = fork1(); pid == -1; pid = fork1()) {
109 		if (++tries > MAX_MOUNT_RETRIES)
110 			return (-1);
111 
112 		(void) sleep(tries);
113 	}
114 
115 	if (pid != 0) {
116 		(void) waitpid(pid, &status, 0);
117 
118 		/*
119 		 * If our mount(1M) invocation exited by peculiar means, or with
120 		 * a non-zero status, our mount likelihood is low.
121 		 */
122 		if (!WIFEXITED(status) ||
123 		    WEXITSTATUS(status) != 0)
124 			return (-1);
125 
126 		return (0);
127 	}
128 
129 	(void) execl("/sbin/mount", "mount", "-o", opts, path, NULL);
130 
131 	return (-1);
132 }
133 
134 /*
135  * pid_t fork_common(...)
136  *   Common routine used by fork_sulogin, fork_emi, and fork_configd to
137  *   fork a process in a contract with the provided terms.  Invokes
138  *   fork_sulogin (with its no-fork argument set) on errors.
139  */
140 static pid_t
141 fork_common(const char *name, const char *svc_fmri, int retries, ctid_t *ctidp,
142     uint_t inf, uint_t crit, uint_t fatal, uint_t param, uint64_t cookie)
143 {
144 	uint_t tries = 0;
145 	int ctfd, err;
146 	pid_t pid;
147 
148 	/*
149 	 * Establish process contract terms.
150 	 */
151 	if ((ctfd = open64(CTFS_ROOT "/process/template", O_RDWR)) == -1) {
152 		fork_sulogin(B_TRUE, "Could not open process contract template "
153 		    "for %s: %s\n", name, strerror(errno));
154 		/* NOTREACHED */
155 	}
156 
157 	err = ct_tmpl_set_critical(ctfd, crit);
158 	err |= ct_pr_tmpl_set_fatal(ctfd, fatal);
159 	err |= ct_tmpl_set_informative(ctfd, inf);
160 	err |= ct_pr_tmpl_set_param(ctfd, param);
161 	err |= ct_tmpl_set_cookie(ctfd, cookie);
162 	err |= ct_pr_tmpl_set_svc_fmri(ctfd, svc_fmri);
163 	err |= ct_pr_tmpl_set_svc_aux(ctfd, name);
164 	if (err) {
165 		(void) close(ctfd);
166 		fork_sulogin(B_TRUE, "Could not set %s process contract "
167 		    "terms\n", name);
168 		/* NOTREACHED */
169 	}
170 
171 	if (err = ct_tmpl_activate(ctfd)) {
172 		(void) close(ctfd);
173 		fork_sulogin(B_TRUE, "Could not activate %s process contract "
174 		    "template: %s\n", name, strerror(err));
175 		/* NOTREACHED */
176 	}
177 
178 	utmpx_prefork();
179 
180 	/*
181 	 * Attempt to fork "retries" times.
182 	 */
183 	for (pid = fork1(); pid == -1; pid = fork1()) {
184 		if (++tries > retries) {
185 			/*
186 			 * When we exit the sulogin session, init(1M)
187 			 * will restart svc.startd(1M).
188 			 */
189 			err = errno;
190 			(void) ct_tmpl_clear(ctfd);
191 			(void) close(ctfd);
192 			utmpx_postfork();
193 			fork_sulogin(B_TRUE, "Could not fork to start %s: %s\n",
194 			    name, strerror(err));
195 			/* NOTREACHED */
196 		}
197 		(void) sleep(tries);
198 	}
199 
200 	utmpx_postfork();
201 
202 	/*
203 	 * Clean up, return pid and ctid.
204 	 */
205 	if (pid != 0 && (errno = contract_latest(ctidp)) != 0)
206 		uu_die("Could not get new contract id for %s\n", name);
207 	(void) ct_tmpl_clear(ctfd);
208 	(void) close(ctfd);
209 
210 	return (pid);
211 }
212 
213 /*
214  * void fork_sulogin(boolean_t, const char *, ...)
215  *   When we are invoked with the -s flag from boot (or run into an unfixable
216  *   situation), we run a private copy of sulogin.  When the sulogin session
217  *   is ended, we continue.  This is the last fallback action for system
218  *   maintenance.
219  *
220  *   If immediate is true, fork_sulogin() executes sulogin(1M) directly, without
221  *   forking.
222  *
223  *   Because fork_sulogin() is needed potentially before we daemonize, we leave
224  *   it outside the wait_register() framework.
225  */
226 /*PRINTFLIKE2*/
227 void
228 fork_sulogin(boolean_t immediate, const char *format, ...)
229 {
230 	va_list args;
231 	int fd_console;
232 
233 	(void) printf("Requesting System Maintenance Mode\n");
234 
235 	if (!booting_to_single_user)
236 		(void) printf("(See /lib/svc/share/README for more "
237 		    "information.)\n");
238 
239 	va_start(args, format);
240 	(void) vprintf(format, args);
241 	va_end(args);
242 
243 	if (!immediate) {
244 		ctid_t	ctid;
245 		pid_t	pid;
246 
247 		pid = fork_common("sulogin", SVC_SULOGIN_FMRI,
248 		    MAX_SULOGIN_RETRIES, &ctid, CT_PR_EV_HWERR, 0,
249 		    CT_PR_EV_HWERR, CT_PR_PGRPONLY, SULOGIN_COOKIE);
250 
251 		if (pid != 0) {
252 			(void) waitpid(pid, NULL, 0);
253 			contract_abandon(ctid);
254 			return;
255 		}
256 		/* close all inherited fds */
257 		closefrom(0);
258 	} else {
259 		(void) printf("Directly executing sulogin.\n");
260 		/*
261 		 * Can't call closefrom() in this MT section
262 		 * so safely close a minimum set of fds.
263 		 */
264 		(void) close(STDIN_FILENO);
265 		(void) close(STDOUT_FILENO);
266 		(void) close(STDERR_FILENO);
267 	}
268 
269 	(void) setpgrp();
270 
271 	/* open the console for sulogin */
272 	if ((fd_console = open("/dev/console", O_RDWR)) >= 0) {
273 		if (fd_console != STDIN_FILENO)
274 			while (dup2(fd_console, STDIN_FILENO) < 0 &&
275 			    errno == EINTR)
276 				;
277 		if (fd_console != STDOUT_FILENO)
278 			while (dup2(fd_console, STDOUT_FILENO) < 0 &&
279 			    errno == EINTR)
280 				;
281 		if (fd_console != STDERR_FILENO)
282 			while (dup2(fd_console, STDERR_FILENO) < 0 &&
283 			    errno == EINTR)
284 				;
285 		if (fd_console > STDERR_FILENO)
286 			(void) close(fd_console);
287 	}
288 
289 	setutxent();
290 	while ((utmpp = getutxent()) != NULL) {
291 		if (strcmp(utmpp->ut_user, "LOGIN") != 0) {
292 			if (strcmp(utmpp->ut_line, "console") == 0) {
293 				(void) kill(utmpp->ut_pid, 9);
294 				break;
295 			}
296 		}
297 	}
298 
299 	(void) execl("/sbin/sulogin", "sulogin", NULL);
300 
301 	uu_warn("Could not exec() sulogin");
302 
303 	exit(1);
304 }
305 
306 #define	CONFIGD_PATH	"/lib/svc/bin/svc.configd"
307 
308 /*
309  * void fork_configd(int status)
310  *   We are interested in exit events (since the parent's exiting means configd
311  *   is ready to run and since the child's exiting indicates an error case) and
312  *   in empty events.  This means we have a unique template for initiating
313  *   configd.
314  */
315 void
316 fork_configd(int exitstatus)
317 {
318 	pid_t pid;
319 	ctid_t ctid = -1;
320 	int err;
321 	char path[PATH_MAX];
322 
323 	/*
324 	 * Checking the existatus for the potential failure of the
325 	 * daemonized svc.configd.  If this is not the first time
326 	 * through, but a call from the svc.configd monitoring thread
327 	 * after a failure this is the status that is expected.  Other
328 	 * failures are exposed during initialization or are fixed
329 	 * by a restart (e.g door closings).
330 	 *
331 	 * If this is on-disk database corruption it will also be
332 	 * caught by a restart but could be cleared before the restart.
333 	 *
334 	 * Or this could be internal database corruption due to a
335 	 * rogue service that needs to be cleared before restart.
336 	 */
337 	if (WEXITSTATUS(exitstatus) == CONFIGD_EXIT_DATABASE_BAD) {
338 		fork_sulogin(B_FALSE, "svc.configd exited with database "
339 		    "corrupt error after initialization of the repository\n");
340 	}
341 
342 retry:
343 	log_framework(LOG_DEBUG, "fork_configd trying to start svc.configd\n");
344 
345 	/*
346 	 * If we're retrying, we will have an old contract lying around
347 	 * from the failure.  Since we're going to be creating a new
348 	 * contract shortly, we abandon the old one now.
349 	 */
350 	if (ctid != -1)
351 		contract_abandon(ctid);
352 	ctid = -1;
353 
354 	pid = fork_common("svc.configd", SCF_SERVICE_CONFIGD,
355 	    MAX_CONFIGD_RETRIES, &ctid, 0, CT_PR_EV_EXIT, 0,
356 	    CT_PR_INHERIT | CT_PR_REGENT, CONFIGD_COOKIE);
357 
358 	if (pid != 0) {
359 		int exitstatus;
360 
361 		st->st_configd_pid = pid;
362 
363 		if (waitpid(pid, &exitstatus, 0) == -1) {
364 			fork_sulogin(B_FALSE, "waitpid on svc.configd "
365 			    "failed: %s\n", strerror(errno));
366 		} else if (WIFEXITED(exitstatus)) {
367 			char *errstr;
368 
369 			/*
370 			 * Examine exitstatus.  This will eventually get more
371 			 * complicated, as we will want to teach startd how to
372 			 * invoke configd with alternate repositories, etc.
373 			 *
374 			 * Note that exec(2) failure results in an exit status
375 			 * of 1, resulting in the default clause below.
376 			 */
377 
378 			/*
379 			 * Assign readable strings to cases we don't handle, or
380 			 * have error outcomes that cannot be eliminated.
381 			 */
382 			switch (WEXITSTATUS(exitstatus)) {
383 			case CONFIGD_EXIT_BAD_ARGS:
384 				errstr = "bad arguments";
385 				break;
386 
387 			case CONFIGD_EXIT_DATABASE_BAD:
388 				errstr = "database corrupt";
389 				break;
390 
391 			case CONFIGD_EXIT_DATABASE_LOCKED:
392 				errstr = "database locked";
393 				break;
394 			case CONFIGD_EXIT_INIT_FAILED:
395 				errstr = "initialization failure";
396 				break;
397 			case CONFIGD_EXIT_DOOR_INIT_FAILED:
398 				errstr = "door initialization failure";
399 				break;
400 			case CONFIGD_EXIT_DATABASE_INIT_FAILED:
401 				errstr = "database initialization failure";
402 				break;
403 			case CONFIGD_EXIT_NO_THREADS:
404 				errstr = "no threads available";
405 				break;
406 			case CONFIGD_EXIT_LOST_MAIN_DOOR:
407 				errstr = "lost door server attachment";
408 				break;
409 			case 1:
410 				errstr = "execution failure";
411 				break;
412 			default:
413 				errstr = "unknown error";
414 				break;
415 			}
416 
417 			/*
418 			 * Remedial actions for various configd failures.
419 			 */
420 			switch (WEXITSTATUS(exitstatus)) {
421 			case CONFIGD_EXIT_OKAY:
422 				break;
423 
424 			case CONFIGD_EXIT_DATABASE_LOCKED:
425 				/* attempt remount of / read-write */
426 				if (fs_is_read_only("/", NULL) == 1) {
427 					if (fs_remount("/") == -1)
428 						fork_sulogin(B_FALSE,
429 						    "remount of root "
430 						    "filesystem failed\n");
431 
432 					goto retry;
433 				}
434 				break;
435 
436 			default:
437 				fork_sulogin(B_FALSE, "svc.configd exited "
438 				    "with status %d (%s)\n",
439 				    WEXITSTATUS(exitstatus), errstr);
440 				goto retry;
441 			}
442 		} else if (WIFSIGNALED(exitstatus)) {
443 			char signame[SIG2STR_MAX];
444 
445 			if (sig2str(WTERMSIG(exitstatus), signame))
446 				(void) snprintf(signame, SIG2STR_MAX,
447 				    "signum %d", WTERMSIG(exitstatus));
448 
449 			fork_sulogin(B_FALSE, "svc.configd signalled:"
450 			    " %s\n", signame);
451 
452 			goto retry;
453 		} else {
454 			fork_sulogin(B_FALSE, "svc.configd non-exit "
455 			    "condition: 0x%x\n", exitstatus);
456 
457 			goto retry;
458 		}
459 
460 		/*
461 		 * Announce that we have a valid svc.configd status.
462 		 */
463 		MUTEX_LOCK(&st->st_configd_live_lock);
464 		st->st_configd_lives = 1;
465 		err = pthread_cond_broadcast(&st->st_configd_live_cv);
466 		assert(err == 0);
467 		MUTEX_UNLOCK(&st->st_configd_live_lock);
468 
469 		log_framework(LOG_DEBUG, "fork_configd broadcasts configd is "
470 		    "live\n");
471 		return;
472 	}
473 
474 	/*
475 	 * Set our per-process core file path to leave core files in
476 	 * /etc/svc/volatile directory, named after the PID to aid in debugging.
477 	 */
478 	(void) snprintf(path, sizeof (path),
479 	    "/etc/svc/volatile/core.configd.%%p");
480 
481 	(void) core_set_process_path(path, strlen(path) + 1, getpid());
482 
483 	log_framework(LOG_DEBUG, "executing svc.configd\n");
484 
485 	(void) execl(CONFIGD_PATH, CONFIGD_PATH, NULL);
486 
487 	/*
488 	 * Status code is used above to identify configd exec failure.
489 	 */
490 	exit(1);
491 }
492 
493 void *
494 fork_configd_thread(void *vctid)
495 {
496 	int fd, err;
497 	ctid_t configd_ctid = (ctid_t)vctid;
498 
499 	if (configd_ctid == -1) {
500 		log_framework(LOG_DEBUG,
501 		    "fork_configd_thread starting svc.configd\n");
502 		fork_configd(0);
503 	} else {
504 		/*
505 		 * configd_ctid is known:  we broadcast and continue.
506 		 * test contract for appropriate state by verifying that
507 		 * there is one or more processes within it?
508 		 */
509 		log_framework(LOG_DEBUG,
510 		    "fork_configd_thread accepting svc.configd with CTID %ld\n",
511 		    configd_ctid);
512 		MUTEX_LOCK(&st->st_configd_live_lock);
513 		st->st_configd_lives = 1;
514 		(void) pthread_cond_broadcast(&st->st_configd_live_cv);
515 		MUTEX_UNLOCK(&st->st_configd_live_lock);
516 	}
517 
518 	fd = open64(CTFS_ROOT "/process/pbundle", O_RDONLY);
519 	if (fd == -1)
520 		uu_die("process bundle open failed");
521 
522 	/*
523 	 * Make sure we get all events (including those generated by configd
524 	 * before this thread was started).
525 	 */
526 	err = ct_event_reset(fd);
527 	assert(err == 0);
528 
529 	for (;;) {
530 		int efd, sfd;
531 		ct_evthdl_t ev;
532 		uint32_t type;
533 		ctevid_t evid;
534 		ct_stathdl_t status;
535 		ctid_t ctid;
536 		uint64_t cookie;
537 		pid_t pid;
538 
539 		if (err = ct_event_read_critical(fd, &ev)) {
540 			assert(err != EINVAL && err != EAGAIN);
541 			log_error(LOG_WARNING,
542 			    "Error reading next contract event: %s",
543 			    strerror(err));
544 			continue;
545 		}
546 
547 		evid = ct_event_get_evid(ev);
548 		ctid = ct_event_get_ctid(ev);
549 		type = ct_event_get_type(ev);
550 
551 		/* Fetch cookie. */
552 		sfd = contract_open(ctid, "process", "status", O_RDONLY);
553 		if (sfd < 0) {
554 			ct_event_free(ev);
555 			continue;
556 		}
557 
558 		if (err = ct_status_read(sfd, CTD_COMMON, &status)) {
559 			log_framework(LOG_WARNING, "Could not get status for "
560 			    "contract %ld: %s\n", ctid, strerror(err));
561 
562 			ct_event_free(ev);
563 			startd_close(sfd);
564 			continue;
565 		}
566 
567 		cookie = ct_status_get_cookie(status);
568 
569 		ct_status_free(status);
570 
571 		startd_close(sfd);
572 
573 		/*
574 		 * Don't process events from contracts we aren't interested in.
575 		 */
576 		if (cookie != CONFIGD_COOKIE) {
577 			ct_event_free(ev);
578 			continue;
579 		}
580 
581 		if (type == CT_PR_EV_EXIT) {
582 			int exitstatus;
583 
584 			(void) ct_pr_event_get_pid(ev, &pid);
585 			(void) ct_pr_event_get_exitstatus(ev,
586 			    &exitstatus);
587 
588 			if (st->st_configd_pid != pid) {
589 				/*
590 				 * This is the child exiting, so we
591 				 * abandon the contract and restart
592 				 * configd.
593 				 */
594 				contract_abandon(ctid);
595 				fork_configd(exitstatus);
596 			}
597 		}
598 
599 		efd = contract_open(ctid, "process", "ctl", O_WRONLY);
600 		if (efd != -1) {
601 			(void) ct_ctl_ack(efd, evid);
602 			startd_close(efd);
603 		}
604 
605 		ct_event_free(ev);
606 
607 	}
608 
609 	/*NOTREACHED*/
610 	return (NULL);
611 }
612 
613 void
614 fork_rc_script(char rl, const char *arg, boolean_t wait)
615 {
616 	pid_t pid;
617 	int tmpl, err, stat;
618 	char path[20] = "/sbin/rc.", log[20] = "rc..log", timebuf[20];
619 	time_t now;
620 	struct tm ltime;
621 	size_t sz;
622 	char *pathenv;
623 	char **nenv;
624 
625 	path[8] = rl;
626 
627 	tmpl = open64(CTFS_ROOT "/process/template", O_RDWR);
628 	if (tmpl >= 0) {
629 		err = ct_tmpl_set_critical(tmpl, 0);
630 		assert(err == 0);
631 
632 		err = ct_tmpl_set_informative(tmpl, 0);
633 		assert(err == 0);
634 
635 		err = ct_pr_tmpl_set_fatal(tmpl, 0);
636 		assert(err == 0);
637 
638 		err = ct_tmpl_activate(tmpl);
639 		assert(err == 0);
640 
641 		err = close(tmpl);
642 		assert(err == 0);
643 	} else {
644 		uu_warn("Could not create contract template for %s.\n", path);
645 	}
646 
647 	pid = startd_fork1(NULL);
648 	if (pid < 0) {
649 		return;
650 	} else if (pid != 0) {
651 		/* parent */
652 		if (wait) {
653 			do
654 				err = waitpid(pid, &stat, 0);
655 			while (err != 0 && errno == EINTR)
656 				;
657 
658 			if (!WIFEXITED(stat)) {
659 				log_framework(LOG_INFO,
660 				    "%s terminated with waitpid() status %d.\n",
661 				    path, stat);
662 			} else if (WEXITSTATUS(stat) != 0) {
663 				log_framework(LOG_INFO,
664 				    "%s failed with status %d.\n", path,
665 				    WEXITSTATUS(stat));
666 			}
667 		}
668 
669 		return;
670 	}
671 
672 	/* child */
673 
674 	log[2] = rl;
675 
676 	setlog(log);
677 
678 	now = time(NULL);
679 	sz = strftime(timebuf, sizeof (timebuf), "%b %e %T",
680 	    localtime_r(&now, &ltime));
681 	assert(sz != 0);
682 
683 	(void) fprintf(stderr, "%s Executing %s %s\n", timebuf, path, arg);
684 
685 	if (rl == 'S')
686 		pathenv = "PATH=/sbin:/usr/sbin:/usr/bin";
687 	else
688 		pathenv = "PATH=/usr/sbin:/usr/bin";
689 
690 	nenv = set_smf_env(NULL, 0, pathenv, NULL, NULL);
691 
692 	(void) execle(path, path, arg, 0, nenv);
693 
694 	perror("exec");
695 	exit(0);
696 }
697 
698 #define	SVCCFG_PATH	"/usr/sbin/svccfg"
699 #define	EMI_MFST	"/lib/svc/manifest/system/early-manifest-import.xml"
700 #define	EMI_PATH	"/lib/svc/method/manifest-import"
701 
702 /*
703  * Set Early Manifest Import service's state and log file.
704  */
705 static int
706 emi_set_state(restarter_instance_state_t state, boolean_t setlog)
707 {
708 	int r, ret = 1;
709 	instance_data_t idata;
710 	scf_handle_t *hndl = NULL;
711 	scf_instance_t *inst = NULL;
712 
713 retry:
714 	if (hndl == NULL)
715 		hndl = libscf_handle_create_bound(SCF_VERSION);
716 
717 	if (hndl == NULL) {
718 		/*
719 		 * In the case that we can't bind to the repository
720 		 * (which should have been started), we need to allow
721 		 * the user into maintenance mode to determine what's
722 		 * failed.
723 		 */
724 		fork_sulogin(B_FALSE, "Unable to bind a new repository"
725 		    " handle: %s\n", scf_strerror(scf_error()));
726 		goto retry;
727 	}
728 
729 	if (inst == NULL)
730 		inst = safe_scf_instance_create(hndl);
731 
732 	if (scf_handle_decode_fmri(hndl, SCF_INSTANCE_EMI, NULL, NULL,
733 	    inst, NULL, NULL, SCF_DECODE_FMRI_EXACT) == -1) {
734 		switch (scf_error()) {
735 		case SCF_ERROR_NOT_FOUND:
736 			goto out;
737 
738 		case SCF_ERROR_CONNECTION_BROKEN:
739 		case SCF_ERROR_NOT_BOUND:
740 			libscf_handle_rebind(hndl);
741 			goto retry;
742 
743 		default:
744 			fork_sulogin(B_FALSE, "Couldn't fetch %s service: "
745 			    "%s\n", SCF_INSTANCE_EMI,
746 			    scf_strerror(scf_error()));
747 			goto retry;
748 		}
749 	}
750 
751 	if (setlog) {
752 		(void) libscf_note_method_log(inst, st->st_log_prefix, EMI_LOG);
753 		log_framework(LOG_DEBUG,
754 		    "Set logfile property for %s\n", SCF_INSTANCE_EMI);
755 	}
756 
757 	idata.i_fmri = SCF_INSTANCE_EMI;
758 	idata.i_state =  RESTARTER_STATE_NONE;
759 	idata.i_next_state = RESTARTER_STATE_NONE;
760 	switch (r = _restarter_commit_states(hndl, &idata, state,
761 	    RESTARTER_STATE_NONE, NULL)) {
762 	case 0:
763 		break;
764 
765 	case ECONNABORTED:
766 		libscf_handle_rebind(hndl);
767 		goto retry;
768 
769 	case ENOMEM:
770 	case ENOENT:
771 	case EPERM:
772 	case EACCES:
773 	case EROFS:
774 		fork_sulogin(B_FALSE, "Could not set state of "
775 		    "%s: %s\n", SCF_INSTANCE_EMI, strerror(r));
776 		goto retry;
777 
778 	case EINVAL:
779 	default:
780 		bad_error("_restarter_commit_states", r);
781 	}
782 	ret = 0;
783 
784 out:
785 	scf_instance_destroy(inst);
786 	scf_handle_destroy(hndl);
787 	return (ret);
788 }
789 
790 /*
791  * It is possible that the early-manifest-import service is disabled.  This
792  * would not be the normal case for Solaris, but it may happen on dedicated
793  * systems.  So this function checks the state of the general/enabled
794  * property for Early Manifest Import.
795  *
796  * It is also possible that the early-manifest-import service does not yet
797  * have a repository representation when this function runs.  This happens
798  * if non-Early Manifest Import system is upgraded to an Early Manifest
799  * Import based system.  Thus, the non-existence of general/enabled is not
800  * an error.
801  *
802  * Returns 1 if Early Manifest Import is disabled and 0 otherwise.
803  */
804 static int
805 emi_is_disabled()
806 {
807 	int disabled = 0;
808 	int disconnected = 1;
809 	int enabled;
810 	scf_handle_t *hndl = NULL;
811 	scf_instance_t *inst = NULL;
812 	uchar_t stored_hash[MHASH_SIZE];
813 	char *pname;
814 	int hashash, r;
815 
816 	while (hndl == NULL) {
817 		hndl = libscf_handle_create_bound(SCF_VERSION);
818 
819 		if (hndl == NULL) {
820 			/*
821 			 * In the case that we can't bind to the repository
822 			 * (which should have been started), we need to
823 			 * allow the user into maintenance mode to
824 			 * determine what's failed.
825 			 */
826 			fork_sulogin(B_FALSE, "Unable to bind a new repository "
827 			    "handle: %s\n", scf_strerror(scf_error()));
828 		}
829 	}
830 
831 	while (disconnected) {
832 		r = libscf_fmri_get_instance(hndl, SCF_INSTANCE_EMI, &inst);
833 		if (r != 0) {
834 			switch (r) {
835 			case ECONNABORTED:
836 				libscf_handle_rebind(hndl);
837 				continue;
838 
839 			case ENOENT:
840 				/*
841 				 * Early Manifest Import service is not in
842 				 * the repository. Check the manifest file
843 				 * and service's hash in smf/manifest to
844 				 * figure out whether Early Manifest Import
845 				 * service was deleted. If Early Manifest Import
846 				 * service was deleted, treat that as a disable
847 				 * and don't run early import.
848 				 */
849 
850 				if (access(EMI_MFST, F_OK)) {
851 					/*
852 					 * Manifest isn't found, so service is
853 					 * properly removed.
854 					 */
855 					disabled = 1;
856 				} else {
857 					/*
858 					 * If manifest exists and we have the
859 					 * hash, the service was improperly
860 					 * deleted, generate a warning and treat
861 					 * this as a disable.
862 					 */
863 
864 					if ((pname = mhash_filename_to_propname(
865 					    EMI_MFST, B_TRUE)) == NULL) {
866 						/*
867 						 * Treat failure to get propname
868 						 * as a disable.
869 						 */
870 						disabled = 1;
871 						uu_warn("Failed to get propname"
872 						    " for %s.\n",
873 						    SCF_INSTANCE_EMI);
874 					} else {
875 						hashash = mhash_retrieve_entry(
876 						    hndl, pname,
877 						    stored_hash,
878 						    NULL) == 0;
879 						uu_free(pname);
880 
881 						if (hashash) {
882 							disabled = 1;
883 							uu_warn("%s service is "
884 							    "deleted \n",
885 							    SCF_INSTANCE_EMI);
886 						}
887 					}
888 
889 				}
890 
891 				disconnected = 0;
892 				continue;
893 
894 			default:
895 				bad_error("libscf_fmri_get_instance",
896 				    scf_error());
897 			}
898 		}
899 		r = libscf_get_basic_instance_data(hndl, inst, SCF_INSTANCE_EMI,
900 		    &enabled, NULL, NULL);
901 		if (r == 0) {
902 			/*
903 			 * enabled can be returned as -1, which indicates
904 			 * that the enabled property was not found.  To us
905 			 * that means that the service was not disabled.
906 			 */
907 			if (enabled == 0)
908 				disabled = 1;
909 		} else {
910 			switch (r) {
911 			case ECONNABORTED:
912 				libscf_handle_rebind(hndl);
913 				continue;
914 
915 			case ECANCELED:
916 			case ENOENT:
917 				break;
918 			default:
919 				bad_error("libscf_get_basic_instance_data", r);
920 			}
921 		}
922 		disconnected = 0;
923 	}
924 
925 out:
926 	if (inst != NULL)
927 		scf_instance_destroy(inst);
928 	scf_handle_destroy(hndl);
929 	return (disabled);
930 }
931 
932 void
933 fork_emi()
934 {
935 	pid_t pid;
936 	ctid_t ctid = -1;
937 	char **envp, **np;
938 	char *emipath;
939 	char corepath[PATH_MAX];
940 	char *svc_state;
941 	int setemilog;
942 	int sz;
943 
944 	if (emi_is_disabled()) {
945 		log_framework(LOG_NOTICE, "%s is  disabled and will "
946 		    "not be run.\n", SCF_INSTANCE_EMI);
947 		return;
948 	}
949 
950 	/*
951 	 * Early Manifest Import should run only once, at boot. If svc.startd
952 	 * is some how restarted, Early Manifest Import  should not run again.
953 	 * Use the Early Manifest Import service's state to figure out whether
954 	 * Early Manifest Import has successfully completed earlier and bail
955 	 * out if it did.
956 	 */
957 	if (svc_state = smf_get_state(SCF_INSTANCE_EMI)) {
958 		if (strcmp(svc_state, SCF_STATE_STRING_ONLINE) == 0) {
959 			free(svc_state);
960 			return;
961 		}
962 		free(svc_state);
963 	}
964 
965 	/*
966 	 * Attempt to set Early Manifest Import service's state and log file.
967 	 * If emi_set_state fails, set log file again in the next call to
968 	 * emi_set_state.
969 	 */
970 	setemilog = emi_set_state(RESTARTER_STATE_OFFLINE, B_TRUE);
971 
972 	/* Don't go further if /usr isn't available */
973 	if (access(SVCCFG_PATH, F_OK)) {
974 		log_framework(LOG_NOTICE, "Early Manifest Import is not "
975 		    "supported on systems with a separate /usr filesystem.\n");
976 		return;
977 	}
978 
979 fork_retry:
980 	log_framework(LOG_DEBUG, "Starting Early Manifest Import\n");
981 
982 	/*
983 	 * If we're retrying, we will have an old contract lying around
984 	 * from the failure.  Since we're going to be creating a new
985 	 * contract shortly, we abandon the old one now.
986 	 */
987 	if (ctid != -1)
988 		contract_abandon(ctid);
989 	ctid = -1;
990 
991 	pid = fork_common(SCF_INSTANCE_EMI, SCF_INSTANCE_EMI,
992 	    MAX_EMI_RETRIES, &ctid, 0, 0, 0, 0, EMI_COOKIE);
993 
994 	if (pid != 0) {
995 		int exitstatus;
996 
997 		if (waitpid(pid, &exitstatus, 0) == -1) {
998 			fork_sulogin(B_FALSE, "waitpid on %s failed: "
999 			    "%s\n", SCF_INSTANCE_EMI, strerror(errno));
1000 		} else if (WIFEXITED(exitstatus)) {
1001 			if (WEXITSTATUS(exitstatus)) {
1002 				fork_sulogin(B_FALSE, "%s exited with status "
1003 				    "%d \n", SCF_INSTANCE_EMI,
1004 				    WEXITSTATUS(exitstatus));
1005 				goto fork_retry;
1006 			}
1007 		} else if (WIFSIGNALED(exitstatus)) {
1008 			char signame[SIG2STR_MAX];
1009 
1010 			if (sig2str(WTERMSIG(exitstatus), signame))
1011 				(void) snprintf(signame, SIG2STR_MAX,
1012 				    "signum %d", WTERMSIG(exitstatus));
1013 
1014 			fork_sulogin(B_FALSE, "%s signalled: %s\n",
1015 			    SCF_INSTANCE_EMI, signame);
1016 			goto fork_retry;
1017 		} else {
1018 			fork_sulogin(B_FALSE, "%s non-exit condition: 0x%x\n",
1019 			    SCF_INSTANCE_EMI, exitstatus);
1020 			goto fork_retry;
1021 		}
1022 
1023 		log_framework(LOG_DEBUG, "%s completed successfully\n",
1024 		    SCF_INSTANCE_EMI);
1025 
1026 		/*
1027 		 * Once Early Manifest Import completed, the Early Manifest
1028 		 * Import service must have been imported so set log file and
1029 		 * state properties. Since this information is required for
1030 		 * late manifest import and common admin operations, failing to
1031 		 * set these properties should result in su login so admin can
1032 		 * correct the problem.
1033 		 */
1034 		(void) emi_set_state(RESTARTER_STATE_ONLINE,
1035 		    setemilog ? B_TRUE : B_FALSE);
1036 
1037 		return;
1038 	}
1039 
1040 	/* child */
1041 
1042 	/*
1043 	 * Set our per-process core file path to leave core files in
1044 	 * /etc/svc/volatile directory, named after the PID to aid in debugging.
1045 	 */
1046 	(void) snprintf(corepath, sizeof (corepath),
1047 	    "/etc/svc/volatile/core.emi.%%p");
1048 	(void) core_set_process_path(corepath, strlen(corepath) + 1, getpid());
1049 
1050 	/*
1051 	 * Similar to running legacy services, we need to manually set
1052 	 * log files here and environment variables.
1053 	 */
1054 	setlog(EMI_LOG);
1055 
1056 	envp = startd_zalloc(sizeof (char *) * 3);
1057 	np = envp;
1058 
1059 	sz = sizeof ("SMF_FMRI=") + strlen(SCF_INSTANCE_EMI);
1060 	*np = startd_zalloc(sz);
1061 	(void) strlcpy(*np, "SMF_FMRI=", sz);
1062 	(void) strncat(*np, SCF_INSTANCE_EMI, sz);
1063 	np++;
1064 
1065 	emipath = getenv("PATH");
1066 	if (emipath == NULL)
1067 		emipath = strdup("/usr/sbin:/usr/bin");
1068 
1069 	sz = sizeof ("PATH=") + strlen(emipath);
1070 	*np = startd_zalloc(sz);
1071 	(void) strlcpy(*np, "PATH=", sz);
1072 	(void) strncat(*np, emipath, sz);
1073 
1074 	log_framework(LOG_DEBUG, "executing Early Manifest Import\n");
1075 	(void) execle(EMI_PATH, EMI_PATH, NULL, envp);
1076 
1077 	/*
1078 	 * Status code is used above to identify Early Manifest Import
1079 	 * exec failure.
1080 	 */
1081 	exit(1);
1082 }
1083 
1084 extern char **environ;
1085 
1086 /*
1087  * A local variation on system(3c) which accepts a timeout argument.  This
1088  * allows us to better ensure that the system will actually shut down.
1089  *
1090  * gracetime specifies an amount of time in seconds which the routine must wait
1091  * after the command exits, to allow for asynchronous effects (like sent
1092  * signals) to take effect.  This can be zero.
1093  */
1094 void
1095 fork_with_timeout(const char *cmd, uint_t gracetime, uint_t timeout)
1096 {
1097 	int err = 0;
1098 	pid_t pid;
1099 	char *argv[4];
1100 	posix_spawnattr_t attr;
1101 	posix_spawn_file_actions_t factions;
1102 
1103 	sigset_t mask, savemask;
1104 	uint_t msec_timeout;
1105 	uint_t msec_spent = 0;
1106 	uint_t msec_gracetime;
1107 	int status;
1108 
1109 	msec_timeout = timeout * 1000;
1110 	msec_gracetime = gracetime * 1000;
1111 
1112 	/*
1113 	 * See also system(3c) in libc.  This is very similar, except
1114 	 * that we avoid some unneeded complexity.
1115 	 */
1116 	err = posix_spawnattr_init(&attr);
1117 	if (err == 0)
1118 		err = posix_spawnattr_setflags(&attr,
1119 		    POSIX_SPAWN_SETSIGMASK | POSIX_SPAWN_SETSIGDEF |
1120 		    POSIX_SPAWN_NOSIGCHLD_NP | POSIX_SPAWN_WAITPID_NP |
1121 		    POSIX_SPAWN_NOEXECERR_NP);
1122 
1123 	/*
1124 	 * We choose to close fd's above 2, a deviation from system.
1125 	 */
1126 	if (err == 0)
1127 		err = posix_spawn_file_actions_init(&factions);
1128 	if (err == 0)
1129 		err = posix_spawn_file_actions_addclosefrom_np(&factions,
1130 		    STDERR_FILENO + 1);
1131 
1132 	(void) sigemptyset(&mask);
1133 	(void) sigaddset(&mask, SIGCHLD);
1134 	(void) thr_sigsetmask(SIG_BLOCK, &mask, &savemask);
1135 
1136 	argv[0] = "/bin/sh";
1137 	argv[1] = "-c";
1138 	argv[2] = (char *)cmd;
1139 	argv[3] = NULL;
1140 
1141 	if (err == 0)
1142 		err = posix_spawn(&pid, "/bin/sh", &factions, &attr,
1143 		    (char *const *)argv, (char *const *)environ);
1144 
1145 	(void) posix_spawnattr_destroy(&attr);
1146 	(void) posix_spawn_file_actions_destroy(&factions);
1147 
1148 	if (err) {
1149 		uu_warn("Failed to spawn %s: %s\n", cmd, strerror(err));
1150 	} else {
1151 		for (;;) {
1152 			int w;
1153 			w = waitpid(pid, &status, WNOHANG);
1154 			if (w == -1 && errno != EINTR)
1155 				break;
1156 			if (w > 0) {
1157 				/*
1158 				 * Command succeeded, so give it gracetime
1159 				 * seconds for it to have an effect.
1160 				 */
1161 				if (status == 0 && msec_gracetime != 0)
1162 					(void) poll(NULL, 0, msec_gracetime);
1163 				break;
1164 			}
1165 
1166 			(void) poll(NULL, 0, 100);
1167 			msec_spent += 100;
1168 			/*
1169 			 * If we timed out, kill off the process, then try to
1170 			 * wait for it-- it's possible that we could accumulate
1171 			 * a zombie here since we don't allow waitpid to hang,
1172 			 * but it's better to let that happen and continue to
1173 			 * make progress.
1174 			 */
1175 			if (msec_spent >= msec_timeout) {
1176 				uu_warn("'%s' timed out after %d "
1177 				    "seconds.  Killing.\n", cmd,
1178 				    timeout);
1179 				(void) kill(pid, SIGTERM);
1180 				(void) poll(NULL, 0, 100);
1181 				(void) kill(pid, SIGKILL);
1182 				(void) poll(NULL, 0, 100);
1183 				(void) waitpid(pid, &status, WNOHANG);
1184 				break;
1185 			}
1186 		}
1187 	}
1188 	(void) thr_sigsetmask(SIG_BLOCK, &savemask, NULL);
1189 }
1190