xref: /titanic_50/usr/src/cmd/svc/startd/fork.c (revision 49f9b365248ee858ee91baa36eab27c5200f6dca)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  * Copyright 2012 Milan Jurik. All rights reserved.
25  */
26 
27 /*
28  * fork.c - safe forking for svc.startd
29  *
30  * fork_configd() and fork_sulogin() are related, special cases that handle the
31  * spawning of specific client processes for svc.startd.
32  */
33 
34 #include <sys/contract/process.h>
35 #include <sys/corectl.h>
36 #include <sys/ctfs.h>
37 #include <sys/stat.h>
38 #include <sys/types.h>
39 #include <sys/uio.h>
40 #include <sys/wait.h>
41 #include <assert.h>
42 #include <errno.h>
43 #include <fcntl.h>
44 #include <libcontract.h>
45 #include <libcontract_priv.h>
46 #include <libscf_priv.h>
47 #include <limits.h>
48 #include <poll.h>
49 #include <port.h>
50 #include <signal.h>
51 #include <stdarg.h>
52 #include <stdio.h>
53 #include <stdlib.h>
54 #include <string.h>
55 #include <unistd.h>
56 #include <utmpx.h>
57 #include <spawn.h>
58 
59 #include "manifest_hash.h"
60 #include "configd_exit.h"
61 #include "protocol.h"
62 #include "startd.h"
63 
64 static	struct	utmpx	*utmpp;	/* pointer for getutxent() */
65 
66 pid_t
67 startd_fork1(int *forkerr)
68 {
69 	pid_t p;
70 
71 	/*
72 	 * prefork stack
73 	 */
74 	wait_prefork();
75 
76 	p = fork1();
77 
78 	if (p == -1 && forkerr != NULL)
79 		*forkerr = errno;
80 
81 	/*
82 	 * postfork stack
83 	 */
84 	wait_postfork(p);
85 
86 	return (p);
87 }
88 
89 /*
90  * void fork_mount(char *, char *)
91  *   Run mount(1M) with the given options and mount point.  (mount(1M) has much
92  *   hidden knowledge; it's much less correct to reimplement that logic here to
93  *   save a fork(2)/exec(2) invocation.)
94  */
95 int
96 fork_mount(char *path, char *opts)
97 {
98 	pid_t pid;
99 	uint_t tries = 0;
100 	int status;
101 
102 	for (pid = fork1(); pid == -1; pid = fork1()) {
103 		if (++tries > MAX_MOUNT_RETRIES)
104 			return (-1);
105 
106 		(void) sleep(tries);
107 	}
108 
109 	if (pid != 0) {
110 		(void) waitpid(pid, &status, 0);
111 
112 		/*
113 		 * If our mount(1M) invocation exited by peculiar means, or with
114 		 * a non-zero status, our mount likelihood is low.
115 		 */
116 		if (!WIFEXITED(status) ||
117 		    WEXITSTATUS(status) != 0)
118 			return (-1);
119 
120 		return (0);
121 	}
122 
123 	(void) execl("/sbin/mount", "mount", "-o", opts, path, NULL);
124 
125 	return (-1);
126 }
127 
128 /*
129  * pid_t fork_common(...)
130  *   Common routine used by fork_sulogin, fork_emi, and fork_configd to
131  *   fork a process in a contract with the provided terms.  Invokes
132  *   fork_sulogin (with its no-fork argument set) on errors.
133  */
134 static pid_t
135 fork_common(const char *name, const char *svc_fmri, int retries, ctid_t *ctidp,
136     uint_t inf, uint_t crit, uint_t fatal, uint_t param, uint64_t cookie)
137 {
138 	uint_t tries = 0;
139 	int ctfd, err;
140 	pid_t pid;
141 
142 	/*
143 	 * Establish process contract terms.
144 	 */
145 	if ((ctfd = open64(CTFS_ROOT "/process/template", O_RDWR)) == -1) {
146 		fork_sulogin(B_TRUE, "Could not open process contract template "
147 		    "for %s: %s\n", name, strerror(errno));
148 		/* NOTREACHED */
149 	}
150 
151 	err = ct_tmpl_set_critical(ctfd, crit);
152 	err |= ct_pr_tmpl_set_fatal(ctfd, fatal);
153 	err |= ct_tmpl_set_informative(ctfd, inf);
154 	err |= ct_pr_tmpl_set_param(ctfd, param);
155 	err |= ct_tmpl_set_cookie(ctfd, cookie);
156 	err |= ct_pr_tmpl_set_svc_fmri(ctfd, svc_fmri);
157 	err |= ct_pr_tmpl_set_svc_aux(ctfd, name);
158 	if (err) {
159 		(void) close(ctfd);
160 		fork_sulogin(B_TRUE, "Could not set %s process contract "
161 		    "terms\n", name);
162 		/* NOTREACHED */
163 	}
164 
165 	if (err = ct_tmpl_activate(ctfd)) {
166 		(void) close(ctfd);
167 		fork_sulogin(B_TRUE, "Could not activate %s process contract "
168 		    "template: %s\n", name, strerror(err));
169 		/* NOTREACHED */
170 	}
171 
172 	/*
173 	 * Attempt to fork "retries" times.
174 	 */
175 	for (pid = fork1(); pid == -1; pid = fork1()) {
176 		if (++tries > retries) {
177 			/*
178 			 * When we exit the sulogin session, init(1M)
179 			 * will restart svc.startd(1M).
180 			 */
181 			err = errno;
182 			(void) ct_tmpl_clear(ctfd);
183 			(void) close(ctfd);
184 			fork_sulogin(B_TRUE, "Could not fork to start %s: %s\n",
185 			    name, strerror(err));
186 			/* NOTREACHED */
187 		}
188 		(void) sleep(tries);
189 	}
190 
191 	/*
192 	 * Clean up, return pid and ctid.
193 	 */
194 	if (pid != 0 && (errno = contract_latest(ctidp)) != 0)
195 		uu_die("Could not get new contract id for %s\n", name);
196 	(void) ct_tmpl_clear(ctfd);
197 	(void) close(ctfd);
198 
199 	return (pid);
200 }
201 
202 /*
203  * void fork_sulogin(boolean_t, const char *, ...)
204  *   When we are invoked with the -s flag from boot (or run into an unfixable
205  *   situation), we run a private copy of sulogin.  When the sulogin session
206  *   is ended, we continue.  This is the last fallback action for system
207  *   maintenance.
208  *
209  *   If immediate is true, fork_sulogin() executes sulogin(1M) directly, without
210  *   forking.
211  *
212  *   Because fork_sulogin() is needed potentially before we daemonize, we leave
213  *   it outside the wait_register() framework.
214  */
215 /*PRINTFLIKE2*/
216 void
217 fork_sulogin(boolean_t immediate, const char *format, ...)
218 {
219 	va_list args;
220 	int fd_console;
221 
222 	(void) printf("Requesting System Maintenance Mode\n");
223 
224 	if (!booting_to_single_user)
225 		(void) printf("(See /lib/svc/share/README for more "
226 		    "information.)\n");
227 
228 	va_start(args, format);
229 	(void) vprintf(format, args);
230 	va_end(args);
231 
232 	if (!immediate) {
233 		ctid_t	ctid;
234 		pid_t	pid;
235 
236 		pid = fork_common("sulogin", SVC_SULOGIN_FMRI,
237 		    MAX_SULOGIN_RETRIES, &ctid, CT_PR_EV_HWERR, 0,
238 		    CT_PR_EV_HWERR, CT_PR_PGRPONLY, SULOGIN_COOKIE);
239 
240 		if (pid != 0) {
241 			(void) waitpid(pid, NULL, 0);
242 			contract_abandon(ctid);
243 			return;
244 		}
245 		/* close all inherited fds */
246 		closefrom(0);
247 	} else {
248 		(void) printf("Directly executing sulogin.\n");
249 		/*
250 		 * Can't call closefrom() in this MT section
251 		 * so safely close a minimum set of fds.
252 		 */
253 		(void) close(STDIN_FILENO);
254 		(void) close(STDOUT_FILENO);
255 		(void) close(STDERR_FILENO);
256 	}
257 
258 	(void) setpgrp();
259 
260 	/* open the console for sulogin */
261 	if ((fd_console = open("/dev/console", O_RDWR)) >= 0) {
262 		if (fd_console != STDIN_FILENO)
263 			while (dup2(fd_console, STDIN_FILENO) < 0 &&
264 			    errno == EINTR)
265 				;
266 		if (fd_console != STDOUT_FILENO)
267 			while (dup2(fd_console, STDOUT_FILENO) < 0 &&
268 			    errno == EINTR)
269 				;
270 		if (fd_console != STDERR_FILENO)
271 			while (dup2(fd_console, STDERR_FILENO) < 0 &&
272 			    errno == EINTR)
273 				;
274 		if (fd_console > STDERR_FILENO)
275 			(void) close(fd_console);
276 	}
277 
278 	setutxent();
279 	while ((utmpp = getutxent()) != NULL) {
280 		if (strcmp(utmpp->ut_user, "LOGIN") != 0) {
281 			if (strcmp(utmpp->ut_line, "console") == 0) {
282 				(void) kill(utmpp->ut_pid, 9);
283 				break;
284 			}
285 		}
286 	}
287 
288 	(void) execl("/sbin/sulogin", "sulogin", NULL);
289 
290 	uu_warn("Could not exec() sulogin");
291 
292 	exit(1);
293 }
294 
295 #define	CONFIGD_PATH	"/lib/svc/bin/svc.configd"
296 
297 /*
298  * void fork_configd(int status)
299  *   We are interested in exit events (since the parent's exiting means configd
300  *   is ready to run and since the child's exiting indicates an error case) and
301  *   in empty events.  This means we have a unique template for initiating
302  *   configd.
303  */
304 void
305 fork_configd(int exitstatus)
306 {
307 	pid_t pid;
308 	ctid_t ctid = -1;
309 	int err;
310 	char path[PATH_MAX];
311 
312 	/*
313 	 * Checking the existatus for the potential failure of the
314 	 * daemonized svc.configd.  If this is not the first time
315 	 * through, but a call from the svc.configd monitoring thread
316 	 * after a failure this is the status that is expected.  Other
317 	 * failures are exposed during initialization or are fixed
318 	 * by a restart (e.g door closings).
319 	 *
320 	 * If this is on-disk database corruption it will also be
321 	 * caught by a restart but could be cleared before the restart.
322 	 *
323 	 * Or this could be internal database corruption due to a
324 	 * rogue service that needs to be cleared before restart.
325 	 */
326 	if (WEXITSTATUS(exitstatus) == CONFIGD_EXIT_DATABASE_BAD) {
327 		fork_sulogin(B_FALSE, "svc.configd exited with database "
328 		    "corrupt error after initialization of the repository\n");
329 	}
330 
331 retry:
332 	log_framework(LOG_DEBUG, "fork_configd trying to start svc.configd\n");
333 
334 	/*
335 	 * If we're retrying, we will have an old contract lying around
336 	 * from the failure.  Since we're going to be creating a new
337 	 * contract shortly, we abandon the old one now.
338 	 */
339 	if (ctid != -1)
340 		contract_abandon(ctid);
341 	ctid = -1;
342 
343 	pid = fork_common("svc.configd", SCF_SERVICE_CONFIGD,
344 	    MAX_CONFIGD_RETRIES, &ctid, 0, CT_PR_EV_EXIT, 0,
345 	    CT_PR_INHERIT | CT_PR_REGENT, CONFIGD_COOKIE);
346 
347 	if (pid != 0) {
348 		int exitstatus;
349 
350 		st->st_configd_pid = pid;
351 
352 		if (waitpid(pid, &exitstatus, 0) == -1) {
353 			fork_sulogin(B_FALSE, "waitpid on svc.configd "
354 			    "failed: %s\n", strerror(errno));
355 		} else if (WIFEXITED(exitstatus)) {
356 			char *errstr;
357 
358 			/*
359 			 * Examine exitstatus.  This will eventually get more
360 			 * complicated, as we will want to teach startd how to
361 			 * invoke configd with alternate repositories, etc.
362 			 *
363 			 * Note that exec(2) failure results in an exit status
364 			 * of 1, resulting in the default clause below.
365 			 */
366 
367 			/*
368 			 * Assign readable strings to cases we don't handle, or
369 			 * have error outcomes that cannot be eliminated.
370 			 */
371 			switch (WEXITSTATUS(exitstatus)) {
372 			case CONFIGD_EXIT_BAD_ARGS:
373 				errstr = "bad arguments";
374 				break;
375 
376 			case CONFIGD_EXIT_DATABASE_BAD:
377 				errstr = "database corrupt";
378 				break;
379 
380 			case CONFIGD_EXIT_DATABASE_LOCKED:
381 				errstr = "database locked";
382 				break;
383 			case CONFIGD_EXIT_INIT_FAILED:
384 				errstr = "initialization failure";
385 				break;
386 			case CONFIGD_EXIT_DOOR_INIT_FAILED:
387 				errstr = "door initialization failure";
388 				break;
389 			case CONFIGD_EXIT_DATABASE_INIT_FAILED:
390 				errstr = "database initialization failure";
391 				break;
392 			case CONFIGD_EXIT_NO_THREADS:
393 				errstr = "no threads available";
394 				break;
395 			case CONFIGD_EXIT_LOST_MAIN_DOOR:
396 				errstr = "lost door server attachment";
397 				break;
398 			case 1:
399 				errstr = "execution failure";
400 				break;
401 			default:
402 				errstr = "unknown error";
403 				break;
404 			}
405 
406 			/*
407 			 * Remedial actions for various configd failures.
408 			 */
409 			switch (WEXITSTATUS(exitstatus)) {
410 			case CONFIGD_EXIT_OKAY:
411 				break;
412 
413 			case CONFIGD_EXIT_DATABASE_LOCKED:
414 				/* attempt remount of / read-write */
415 				if (fs_is_read_only("/", NULL) == 1) {
416 					if (fs_remount("/") == -1)
417 						fork_sulogin(B_FALSE,
418 						    "remount of root "
419 						    "filesystem failed\n");
420 
421 					goto retry;
422 				}
423 				break;
424 
425 			default:
426 				fork_sulogin(B_FALSE, "svc.configd exited "
427 				    "with status %d (%s)\n",
428 				    WEXITSTATUS(exitstatus), errstr);
429 				goto retry;
430 			}
431 		} else if (WIFSIGNALED(exitstatus)) {
432 			char signame[SIG2STR_MAX];
433 
434 			if (sig2str(WTERMSIG(exitstatus), signame))
435 				(void) snprintf(signame, SIG2STR_MAX,
436 				    "signum %d", WTERMSIG(exitstatus));
437 
438 			fork_sulogin(B_FALSE, "svc.configd signalled:"
439 			    " %s\n", signame);
440 
441 			goto retry;
442 		} else {
443 			fork_sulogin(B_FALSE, "svc.configd non-exit "
444 			    "condition: 0x%x\n", exitstatus);
445 
446 			goto retry;
447 		}
448 
449 		/*
450 		 * Announce that we have a valid svc.configd status.
451 		 */
452 		MUTEX_LOCK(&st->st_configd_live_lock);
453 		st->st_configd_lives = 1;
454 		err = pthread_cond_broadcast(&st->st_configd_live_cv);
455 		assert(err == 0);
456 		MUTEX_UNLOCK(&st->st_configd_live_lock);
457 
458 		log_framework(LOG_DEBUG, "fork_configd broadcasts configd is "
459 		    "live\n");
460 		return;
461 	}
462 
463 	/*
464 	 * Set our per-process core file path to leave core files in
465 	 * /etc/svc/volatile directory, named after the PID to aid in debugging.
466 	 */
467 	(void) snprintf(path, sizeof (path),
468 	    "/etc/svc/volatile/core.configd.%%p");
469 
470 	(void) core_set_process_path(path, strlen(path) + 1, getpid());
471 
472 	log_framework(LOG_DEBUG, "executing svc.configd\n");
473 
474 	(void) execl(CONFIGD_PATH, CONFIGD_PATH, NULL);
475 
476 	/*
477 	 * Status code is used above to identify configd exec failure.
478 	 */
479 	exit(1);
480 }
481 
482 void *
483 fork_configd_thread(void *vctid)
484 {
485 	int fd, err;
486 	ctid_t configd_ctid = (ctid_t)vctid;
487 
488 	if (configd_ctid == -1) {
489 		log_framework(LOG_DEBUG,
490 		    "fork_configd_thread starting svc.configd\n");
491 		fork_configd(0);
492 	} else {
493 		/*
494 		 * configd_ctid is known:  we broadcast and continue.
495 		 * test contract for appropriate state by verifying that
496 		 * there is one or more processes within it?
497 		 */
498 		log_framework(LOG_DEBUG,
499 		    "fork_configd_thread accepting svc.configd with CTID %ld\n",
500 		    configd_ctid);
501 		MUTEX_LOCK(&st->st_configd_live_lock);
502 		st->st_configd_lives = 1;
503 		(void) pthread_cond_broadcast(&st->st_configd_live_cv);
504 		MUTEX_UNLOCK(&st->st_configd_live_lock);
505 	}
506 
507 	fd = open64(CTFS_ROOT "/process/pbundle", O_RDONLY);
508 	if (fd == -1)
509 		uu_die("process bundle open failed");
510 
511 	/*
512 	 * Make sure we get all events (including those generated by configd
513 	 * before this thread was started).
514 	 */
515 	err = ct_event_reset(fd);
516 	assert(err == 0);
517 
518 	for (;;) {
519 		int efd, sfd;
520 		ct_evthdl_t ev;
521 		uint32_t type;
522 		ctevid_t evid;
523 		ct_stathdl_t status;
524 		ctid_t ctid;
525 		uint64_t cookie;
526 		pid_t pid;
527 
528 		if (err = ct_event_read_critical(fd, &ev)) {
529 			assert(err != EINVAL && err != EAGAIN);
530 			log_error(LOG_WARNING,
531 			    "Error reading next contract event: %s",
532 			    strerror(err));
533 			continue;
534 		}
535 
536 		evid = ct_event_get_evid(ev);
537 		ctid = ct_event_get_ctid(ev);
538 		type = ct_event_get_type(ev);
539 
540 		/* Fetch cookie. */
541 		sfd = contract_open(ctid, "process", "status", O_RDONLY);
542 		if (sfd < 0) {
543 			ct_event_free(ev);
544 			continue;
545 		}
546 
547 		if (err = ct_status_read(sfd, CTD_COMMON, &status)) {
548 			log_framework(LOG_WARNING, "Could not get status for "
549 			    "contract %ld: %s\n", ctid, strerror(err));
550 
551 			ct_event_free(ev);
552 			startd_close(sfd);
553 			continue;
554 		}
555 
556 		cookie = ct_status_get_cookie(status);
557 
558 		ct_status_free(status);
559 
560 		startd_close(sfd);
561 
562 		/*
563 		 * Don't process events from contracts we aren't interested in.
564 		 */
565 		if (cookie != CONFIGD_COOKIE) {
566 			ct_event_free(ev);
567 			continue;
568 		}
569 
570 		if (type == CT_PR_EV_EXIT) {
571 			int exitstatus;
572 
573 			(void) ct_pr_event_get_pid(ev, &pid);
574 			(void) ct_pr_event_get_exitstatus(ev,
575 			    &exitstatus);
576 
577 			if (st->st_configd_pid != pid) {
578 				/*
579 				 * This is the child exiting, so we
580 				 * abandon the contract and restart
581 				 * configd.
582 				 */
583 				contract_abandon(ctid);
584 				fork_configd(exitstatus);
585 			}
586 		}
587 
588 		efd = contract_open(ctid, "process", "ctl", O_WRONLY);
589 		if (efd != -1) {
590 			(void) ct_ctl_ack(efd, evid);
591 			startd_close(efd);
592 		}
593 
594 		ct_event_free(ev);
595 
596 	}
597 
598 	/*NOTREACHED*/
599 	return (NULL);
600 }
601 
602 void
603 fork_rc_script(char rl, const char *arg, boolean_t wait)
604 {
605 	pid_t pid;
606 	int tmpl, err, stat;
607 	char path[20] = "/sbin/rc.", log[20] = "rc..log", timebuf[20];
608 	time_t now;
609 	struct tm ltime;
610 	size_t sz;
611 	char *pathenv;
612 	char **nenv;
613 
614 	path[8] = rl;
615 
616 	tmpl = open64(CTFS_ROOT "/process/template", O_RDWR);
617 	if (tmpl >= 0) {
618 		err = ct_tmpl_set_critical(tmpl, 0);
619 		assert(err == 0);
620 
621 		err = ct_tmpl_set_informative(tmpl, 0);
622 		assert(err == 0);
623 
624 		err = ct_pr_tmpl_set_fatal(tmpl, 0);
625 		assert(err == 0);
626 
627 		err = ct_tmpl_activate(tmpl);
628 		assert(err == 0);
629 
630 		err = close(tmpl);
631 		assert(err == 0);
632 	} else {
633 		uu_warn("Could not create contract template for %s.\n", path);
634 	}
635 
636 	pid = startd_fork1(NULL);
637 	if (pid < 0) {
638 		return;
639 	} else if (pid != 0) {
640 		/* parent */
641 		if (wait) {
642 			do
643 				err = waitpid(pid, &stat, 0);
644 			while (err != 0 && errno == EINTR)
645 				;
646 
647 			if (!WIFEXITED(stat)) {
648 				log_framework(LOG_INFO,
649 				    "%s terminated with waitpid() status %d.\n",
650 				    path, stat);
651 			} else if (WEXITSTATUS(stat) != 0) {
652 				log_framework(LOG_INFO,
653 				    "%s failed with status %d.\n", path,
654 				    WEXITSTATUS(stat));
655 			}
656 		}
657 
658 		return;
659 	}
660 
661 	/* child */
662 
663 	log[2] = rl;
664 
665 	setlog(log);
666 
667 	now = time(NULL);
668 	sz = strftime(timebuf, sizeof (timebuf), "%b %e %T",
669 	    localtime_r(&now, &ltime));
670 	assert(sz != 0);
671 
672 	(void) fprintf(stderr, "%s Executing %s %s\n", timebuf, path, arg);
673 
674 	if (rl == 'S')
675 		pathenv = "PATH=/sbin:/usr/sbin:/usr/bin";
676 	else
677 		pathenv = "PATH=/usr/sbin:/usr/bin";
678 
679 	nenv = set_smf_env(NULL, 0, pathenv, NULL, NULL);
680 
681 	(void) execle(path, path, arg, 0, nenv);
682 
683 	perror("exec");
684 	exit(0);
685 }
686 
687 #define	SVCCFG_PATH	"/usr/sbin/svccfg"
688 #define	EMI_MFST	"/lib/svc/manifest/system/early-manifest-import.xml"
689 #define	EMI_PATH	"/lib/svc/method/manifest-import"
690 
691 /*
692  * Set Early Manifest Import service's state and log file.
693  */
694 static int
695 emi_set_state(restarter_instance_state_t state, boolean_t setlog)
696 {
697 	int r, ret = 1;
698 	instance_data_t idata;
699 	scf_handle_t *hndl = NULL;
700 	scf_instance_t *inst = NULL;
701 
702 retry:
703 	if (hndl == NULL)
704 		hndl = libscf_handle_create_bound(SCF_VERSION);
705 
706 	if (hndl == NULL) {
707 		/*
708 		 * In the case that we can't bind to the repository
709 		 * (which should have been started), we need to allow
710 		 * the user into maintenance mode to determine what's
711 		 * failed.
712 		 */
713 		fork_sulogin(B_FALSE, "Unable to bind a new repository"
714 		    " handle: %s\n", scf_strerror(scf_error()));
715 		goto retry;
716 	}
717 
718 	if (inst == NULL)
719 		inst = safe_scf_instance_create(hndl);
720 
721 	if (scf_handle_decode_fmri(hndl, SCF_INSTANCE_EMI, NULL, NULL,
722 	    inst, NULL, NULL, SCF_DECODE_FMRI_EXACT) == -1) {
723 		switch (scf_error()) {
724 		case SCF_ERROR_NOT_FOUND:
725 			goto out;
726 
727 		case SCF_ERROR_CONNECTION_BROKEN:
728 		case SCF_ERROR_NOT_BOUND:
729 			libscf_handle_rebind(hndl);
730 			goto retry;
731 
732 		default:
733 			fork_sulogin(B_FALSE, "Couldn't fetch %s service: "
734 			    "%s\n", SCF_INSTANCE_EMI,
735 			    scf_strerror(scf_error()));
736 			goto retry;
737 		}
738 	}
739 
740 	if (setlog) {
741 		(void) libscf_note_method_log(inst, st->st_log_prefix, EMI_LOG);
742 		log_framework(LOG_DEBUG,
743 		    "Set logfile property for %s\n", SCF_INSTANCE_EMI);
744 	}
745 
746 	idata.i_fmri = SCF_INSTANCE_EMI;
747 	idata.i_state =  RESTARTER_STATE_NONE;
748 	idata.i_next_state = RESTARTER_STATE_NONE;
749 	switch (r = _restarter_commit_states(hndl, &idata, state,
750 	    RESTARTER_STATE_NONE, NULL)) {
751 	case 0:
752 		break;
753 
754 	case ECONNABORTED:
755 		libscf_handle_rebind(hndl);
756 		goto retry;
757 
758 	case ENOMEM:
759 	case ENOENT:
760 	case EPERM:
761 	case EACCES:
762 	case EROFS:
763 		fork_sulogin(B_FALSE, "Could not set state of "
764 		    "%s: %s\n", SCF_INSTANCE_EMI, strerror(r));
765 		goto retry;
766 
767 	case EINVAL:
768 	default:
769 		bad_error("_restarter_commit_states", r);
770 	}
771 	ret = 0;
772 
773 out:
774 	scf_instance_destroy(inst);
775 	scf_handle_destroy(hndl);
776 	return (ret);
777 }
778 
779 /*
780  * It is possible that the early-manifest-import service is disabled.  This
781  * would not be the normal case for Solaris, but it may happen on dedicated
782  * systems.  So this function checks the state of the general/enabled
783  * property for Early Manifest Import.
784  *
785  * It is also possible that the early-manifest-import service does not yet
786  * have a repository representation when this function runs.  This happens
787  * if non-Early Manifest Import system is upgraded to an Early Manifest
788  * Import based system.  Thus, the non-existence of general/enabled is not
789  * an error.
790  *
791  * Returns 1 if Early Manifest Import is disabled and 0 otherwise.
792  */
793 static int
794 emi_is_disabled()
795 {
796 	int disabled = 0;
797 	int disconnected = 1;
798 	int enabled;
799 	scf_handle_t *hndl = NULL;
800 	scf_instance_t *inst = NULL;
801 	uchar_t stored_hash[MHASH_SIZE];
802 	char *pname;
803 	int hashash, r;
804 
805 	while (hndl == NULL) {
806 		hndl = libscf_handle_create_bound(SCF_VERSION);
807 
808 		if (hndl == NULL) {
809 			/*
810 			 * In the case that we can't bind to the repository
811 			 * (which should have been started), we need to
812 			 * allow the user into maintenance mode to
813 			 * determine what's failed.
814 			 */
815 			fork_sulogin(B_FALSE, "Unable to bind a new repository "
816 			    "handle: %s\n", scf_strerror(scf_error()));
817 		}
818 	}
819 
820 	while (disconnected) {
821 		r = libscf_fmri_get_instance(hndl, SCF_INSTANCE_EMI, &inst);
822 		if (r != 0) {
823 			switch (r) {
824 			case ECONNABORTED:
825 				libscf_handle_rebind(hndl);
826 				continue;
827 
828 			case ENOENT:
829 				/*
830 				 * Early Manifest Import service is not in
831 				 * the repository. Check the manifest file
832 				 * and service's hash in smf/manifest to
833 				 * figure out whether Early Manifest Import
834 				 * service was deleted. If Early Manifest Import
835 				 * service was deleted, treat that as a disable
836 				 * and don't run early import.
837 				 */
838 
839 				if (access(EMI_MFST, F_OK)) {
840 					/*
841 					 * Manifest isn't found, so service is
842 					 * properly removed.
843 					 */
844 					disabled = 1;
845 				} else {
846 					/*
847 					 * If manifest exists and we have the
848 					 * hash, the service was improperly
849 					 * deleted, generate a warning and treat
850 					 * this as a disable.
851 					 */
852 
853 					if ((pname = mhash_filename_to_propname(
854 					    EMI_MFST, B_TRUE)) == NULL) {
855 						/*
856 						 * Treat failure to get propname
857 						 * as a disable.
858 						 */
859 						disabled = 1;
860 						uu_warn("Failed to get propname"
861 						    " for %s.\n",
862 						    SCF_INSTANCE_EMI);
863 					} else {
864 						hashash = mhash_retrieve_entry(
865 						    hndl, pname,
866 						    stored_hash,
867 						    NULL) == 0;
868 						uu_free(pname);
869 
870 						if (hashash) {
871 							disabled = 1;
872 							uu_warn("%s service is "
873 							    "deleted \n",
874 							    SCF_INSTANCE_EMI);
875 						}
876 					}
877 
878 				}
879 
880 				disconnected = 0;
881 				continue;
882 
883 			default:
884 				bad_error("libscf_fmri_get_instance",
885 				    scf_error());
886 			}
887 		}
888 		r = libscf_get_basic_instance_data(hndl, inst, SCF_INSTANCE_EMI,
889 		    &enabled, NULL, NULL);
890 		if (r == 0) {
891 			/*
892 			 * enabled can be returned as -1, which indicates
893 			 * that the enabled property was not found.  To us
894 			 * that means that the service was not disabled.
895 			 */
896 			if (enabled == 0)
897 				disabled = 1;
898 		} else {
899 			switch (r) {
900 			case ECONNABORTED:
901 				libscf_handle_rebind(hndl);
902 				continue;
903 
904 			case ECANCELED:
905 			case ENOENT:
906 				break;
907 			default:
908 				bad_error("libscf_get_basic_instance_data", r);
909 			}
910 		}
911 		disconnected = 0;
912 	}
913 
914 out:
915 	if (inst != NULL)
916 		scf_instance_destroy(inst);
917 	scf_handle_destroy(hndl);
918 	return (disabled);
919 }
920 
921 void
922 fork_emi()
923 {
924 	pid_t pid;
925 	ctid_t ctid = -1;
926 	char **envp, **np;
927 	char *emipath;
928 	char corepath[PATH_MAX];
929 	char *svc_state;
930 	int setemilog;
931 	int sz;
932 
933 	if (emi_is_disabled()) {
934 		log_framework(LOG_NOTICE, "%s is  disabled and will "
935 		    "not be run.\n", SCF_INSTANCE_EMI);
936 		return;
937 	}
938 
939 	/*
940 	 * Early Manifest Import should run only once, at boot. If svc.startd
941 	 * is some how restarted, Early Manifest Import  should not run again.
942 	 * Use the Early Manifest Import service's state to figure out whether
943 	 * Early Manifest Import has successfully completed earlier and bail
944 	 * out if it did.
945 	 */
946 	if (svc_state = smf_get_state(SCF_INSTANCE_EMI)) {
947 		if (strcmp(svc_state, SCF_STATE_STRING_ONLINE) == 0) {
948 			free(svc_state);
949 			return;
950 		}
951 		free(svc_state);
952 	}
953 
954 	/*
955 	 * Attempt to set Early Manifest Import service's state and log file.
956 	 * If emi_set_state fails, set log file again in the next call to
957 	 * emi_set_state.
958 	 */
959 	setemilog = emi_set_state(RESTARTER_STATE_OFFLINE, B_TRUE);
960 
961 	/* Don't go further if /usr isn't available */
962 	if (access(SVCCFG_PATH, F_OK)) {
963 		log_framework(LOG_NOTICE, "Early Manifest Import is not "
964 		    "supported on systems with a separate /usr filesystem.\n");
965 		return;
966 	}
967 
968 fork_retry:
969 	log_framework(LOG_DEBUG, "Starting Early Manifest Import\n");
970 
971 	/*
972 	 * If we're retrying, we will have an old contract lying around
973 	 * from the failure.  Since we're going to be creating a new
974 	 * contract shortly, we abandon the old one now.
975 	 */
976 	if (ctid != -1)
977 		contract_abandon(ctid);
978 	ctid = -1;
979 
980 	pid = fork_common(SCF_INSTANCE_EMI, SCF_INSTANCE_EMI,
981 	    MAX_EMI_RETRIES, &ctid, 0, 0, 0, 0, EMI_COOKIE);
982 
983 	if (pid != 0) {
984 		int exitstatus;
985 
986 		if (waitpid(pid, &exitstatus, 0) == -1) {
987 			fork_sulogin(B_FALSE, "waitpid on %s failed: "
988 			    "%s\n", SCF_INSTANCE_EMI, strerror(errno));
989 		} else if (WIFEXITED(exitstatus)) {
990 			if (WEXITSTATUS(exitstatus)) {
991 				fork_sulogin(B_FALSE, "%s exited with status "
992 				    "%d \n", SCF_INSTANCE_EMI,
993 				    WEXITSTATUS(exitstatus));
994 				goto fork_retry;
995 			}
996 		} else if (WIFSIGNALED(exitstatus)) {
997 			char signame[SIG2STR_MAX];
998 
999 			if (sig2str(WTERMSIG(exitstatus), signame))
1000 				(void) snprintf(signame, SIG2STR_MAX,
1001 				    "signum %d", WTERMSIG(exitstatus));
1002 
1003 			fork_sulogin(B_FALSE, "%s signalled: %s\n",
1004 			    SCF_INSTANCE_EMI, signame);
1005 			goto fork_retry;
1006 		} else {
1007 			fork_sulogin(B_FALSE, "%s non-exit condition: 0x%x\n",
1008 			    SCF_INSTANCE_EMI, exitstatus);
1009 			goto fork_retry;
1010 		}
1011 
1012 		log_framework(LOG_DEBUG, "%s completed successfully\n",
1013 		    SCF_INSTANCE_EMI);
1014 
1015 		/*
1016 		 * Once Early Manifest Import completed, the Early Manifest
1017 		 * Import service must have been imported so set log file and
1018 		 * state properties. Since this information is required for
1019 		 * late manifest import and common admin operations, failing to
1020 		 * set these properties should result in su login so admin can
1021 		 * correct the problem.
1022 		 */
1023 		(void) emi_set_state(RESTARTER_STATE_ONLINE,
1024 		    setemilog ? B_TRUE : B_FALSE);
1025 
1026 		return;
1027 	}
1028 
1029 	/* child */
1030 
1031 	/*
1032 	 * Set our per-process core file path to leave core files in
1033 	 * /etc/svc/volatile directory, named after the PID to aid in debugging.
1034 	 */
1035 	(void) snprintf(corepath, sizeof (corepath),
1036 	    "/etc/svc/volatile/core.emi.%%p");
1037 	(void) core_set_process_path(corepath, strlen(corepath) + 1, getpid());
1038 
1039 	/*
1040 	 * Similar to running legacy services, we need to manually set
1041 	 * log files here and environment variables.
1042 	 */
1043 	setlog(EMI_LOG);
1044 
1045 	envp = startd_zalloc(sizeof (char *) * 3);
1046 	np = envp;
1047 
1048 	sz = sizeof ("SMF_FMRI=") + strlen(SCF_INSTANCE_EMI);
1049 	*np = startd_zalloc(sz);
1050 	(void) strlcpy(*np, "SMF_FMRI=", sz);
1051 	(void) strncat(*np, SCF_INSTANCE_EMI, sz);
1052 	np++;
1053 
1054 	emipath = getenv("PATH");
1055 	if (emipath == NULL)
1056 		emipath = strdup("/usr/sbin:/usr/bin");
1057 
1058 	sz = sizeof ("PATH=") + strlen(emipath);
1059 	*np = startd_zalloc(sz);
1060 	(void) strlcpy(*np, "PATH=", sz);
1061 	(void) strncat(*np, emipath, sz);
1062 
1063 	log_framework(LOG_DEBUG, "executing Early Manifest Import\n");
1064 	(void) execle(EMI_PATH, EMI_PATH, NULL, envp);
1065 
1066 	/*
1067 	 * Status code is used above to identify Early Manifest Import
1068 	 * exec failure.
1069 	 */
1070 	exit(1);
1071 }
1072 
1073 extern char **environ;
1074 
1075 /*
1076  * A local variation on system(3c) which accepts a timeout argument.  This
1077  * allows us to better ensure that the system will actually shut down.
1078  *
1079  * gracetime specifies an amount of time in seconds which the routine must wait
1080  * after the command exits, to allow for asynchronous effects (like sent
1081  * signals) to take effect.  This can be zero.
1082  */
1083 void
1084 fork_with_timeout(const char *cmd, uint_t gracetime, uint_t timeout)
1085 {
1086 	int err = 0;
1087 	pid_t pid;
1088 	char *argv[4];
1089 	posix_spawnattr_t attr;
1090 	posix_spawn_file_actions_t factions;
1091 
1092 	sigset_t mask, savemask;
1093 	uint_t msec_timeout;
1094 	uint_t msec_spent = 0;
1095 	uint_t msec_gracetime;
1096 	int status;
1097 
1098 	msec_timeout = timeout * 1000;
1099 	msec_gracetime = gracetime * 1000;
1100 
1101 	/*
1102 	 * See also system(3c) in libc.  This is very similar, except
1103 	 * that we avoid some unneeded complexity.
1104 	 */
1105 	err = posix_spawnattr_init(&attr);
1106 	if (err == 0)
1107 		err = posix_spawnattr_setflags(&attr,
1108 		    POSIX_SPAWN_SETSIGMASK | POSIX_SPAWN_SETSIGDEF |
1109 		    POSIX_SPAWN_NOSIGCHLD_NP | POSIX_SPAWN_WAITPID_NP |
1110 		    POSIX_SPAWN_NOEXECERR_NP);
1111 
1112 	/*
1113 	 * We choose to close fd's above 2, a deviation from system.
1114 	 */
1115 	if (err == 0)
1116 		err = posix_spawn_file_actions_init(&factions);
1117 	if (err == 0)
1118 		err = posix_spawn_file_actions_addclosefrom_np(&factions,
1119 		    STDERR_FILENO + 1);
1120 
1121 	(void) sigemptyset(&mask);
1122 	(void) sigaddset(&mask, SIGCHLD);
1123 	(void) thr_sigsetmask(SIG_BLOCK, &mask, &savemask);
1124 
1125 	argv[0] = "/bin/sh";
1126 	argv[1] = "-c";
1127 	argv[2] = (char *)cmd;
1128 	argv[3] = NULL;
1129 
1130 	if (err == 0)
1131 		err = posix_spawn(&pid, "/bin/sh", &factions, &attr,
1132 		    (char *const *)argv, (char *const *)environ);
1133 
1134 	(void) posix_spawnattr_destroy(&attr);
1135 	(void) posix_spawn_file_actions_destroy(&factions);
1136 
1137 	if (err) {
1138 		uu_warn("Failed to spawn %s: %s\n", cmd, strerror(err));
1139 	} else {
1140 		for (;;) {
1141 			int w;
1142 			w = waitpid(pid, &status, WNOHANG);
1143 			if (w == -1 && errno != EINTR)
1144 				break;
1145 			if (w > 0) {
1146 				/*
1147 				 * Command succeeded, so give it gracetime
1148 				 * seconds for it to have an effect.
1149 				 */
1150 				if (status == 0 && msec_gracetime != 0)
1151 					(void) poll(NULL, 0, msec_gracetime);
1152 				break;
1153 			}
1154 
1155 			(void) poll(NULL, 0, 100);
1156 			msec_spent += 100;
1157 			/*
1158 			 * If we timed out, kill off the process, then try to
1159 			 * wait for it-- it's possible that we could accumulate
1160 			 * a zombie here since we don't allow waitpid to hang,
1161 			 * but it's better to let that happen and continue to
1162 			 * make progress.
1163 			 */
1164 			if (msec_spent >= msec_timeout) {
1165 				uu_warn("'%s' timed out after %d "
1166 				    "seconds.  Killing.\n", cmd,
1167 				    timeout);
1168 				(void) kill(pid, SIGTERM);
1169 				(void) poll(NULL, 0, 100);
1170 				(void) kill(pid, SIGKILL);
1171 				(void) poll(NULL, 0, 100);
1172 				(void) waitpid(pid, &status, WNOHANG);
1173 				break;
1174 			}
1175 		}
1176 	}
1177 	(void) thr_sigsetmask(SIG_BLOCK, &savemask, NULL);
1178 }
1179