xref: /titanic_41/usr/src/cmd/svc/startd/fork.c (revision c586600796766c83eb9485c446886fd9ed2359a9)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * fork.c - safe forking for svc.startd
28  *
29  * fork_configd() and fork_sulogin() are related, special cases that handle the
30  * spawning of specific client processes for svc.startd.
31  */
32 
33 #include <sys/contract/process.h>
34 #include <sys/corectl.h>
35 #include <sys/ctfs.h>
36 #include <sys/stat.h>
37 #include <sys/types.h>
38 #include <sys/uio.h>
39 #include <sys/wait.h>
40 #include <assert.h>
41 #include <errno.h>
42 #include <fcntl.h>
43 #include <libcontract.h>
44 #include <libcontract_priv.h>
45 #include <libscf_priv.h>
46 #include <limits.h>
47 #include <poll.h>
48 #include <port.h>
49 #include <signal.h>
50 #include <stdarg.h>
51 #include <stdio.h>
52 #include <stdlib.h>
53 #include <string.h>
54 #include <unistd.h>
55 #include <utmpx.h>
56 #include <spawn.h>
57 
58 #include "manifest_hash.h"
59 #include "configd_exit.h"
60 #include "protocol.h"
61 #include "startd.h"
62 
63 static	struct	utmpx	*utmpp;	/* pointer for getutxent() */
64 
65 pid_t
66 startd_fork1(int *forkerr)
67 {
68 	pid_t p;
69 
70 	/*
71 	 * prefork stack
72 	 */
73 	wait_prefork();
74 
75 	p = fork1();
76 
77 	if (p == -1 && forkerr != NULL)
78 		*forkerr = errno;
79 
80 	/*
81 	 * postfork stack
82 	 */
83 	wait_postfork(p);
84 
85 	return (p);
86 }
87 
88 /*
89  * void fork_mount(char *, char *)
90  *   Run mount(1M) with the given options and mount point.  (mount(1M) has much
91  *   hidden knowledge; it's much less correct to reimplement that logic here to
92  *   save a fork(2)/exec(2) invocation.)
93  */
94 int
95 fork_mount(char *path, char *opts)
96 {
97 	pid_t pid;
98 	uint_t tries = 0;
99 	int status;
100 
101 	for (pid = fork1(); pid == -1; pid = fork1()) {
102 		if (++tries > MAX_MOUNT_RETRIES)
103 			return (-1);
104 
105 		(void) sleep(tries);
106 	}
107 
108 	if (pid != 0) {
109 		(void) waitpid(pid, &status, 0);
110 
111 		/*
112 		 * If our mount(1M) invocation exited by peculiar means, or with
113 		 * a non-zero status, our mount likelihood is low.
114 		 */
115 		if (!WIFEXITED(status) ||
116 		    WEXITSTATUS(status) != 0)
117 			return (-1);
118 
119 		return (0);
120 	}
121 
122 	(void) execl("/sbin/mount", "mount", "-o", opts, path, NULL);
123 
124 	return (-1);
125 }
126 
127 /*
128  * pid_t fork_common(...)
129  *   Common routine used by fork_sulogin, fork_emi, and fork_configd to
130  *   fork a process in a contract with the provided terms.  Invokes
131  *   fork_sulogin (with its no-fork argument set) on errors.
132  */
133 static pid_t
134 fork_common(const char *name, const char *svc_fmri, int retries, ctid_t *ctidp,
135     uint_t inf, uint_t crit, uint_t fatal, uint_t param, uint64_t cookie)
136 {
137 	uint_t tries = 0;
138 	int ctfd, err;
139 	pid_t pid;
140 
141 	/*
142 	 * Establish process contract terms.
143 	 */
144 	if ((ctfd = open64(CTFS_ROOT "/process/template", O_RDWR)) == -1) {
145 		fork_sulogin(B_TRUE, "Could not open process contract template "
146 		    "for %s: %s\n", name, strerror(errno));
147 		/* NOTREACHED */
148 	}
149 
150 	err = ct_tmpl_set_critical(ctfd, crit);
151 	err |= ct_pr_tmpl_set_fatal(ctfd, fatal);
152 	err |= ct_tmpl_set_informative(ctfd, inf);
153 	err |= ct_pr_tmpl_set_param(ctfd, param);
154 	err |= ct_tmpl_set_cookie(ctfd, cookie);
155 	err |= ct_pr_tmpl_set_svc_fmri(ctfd, svc_fmri);
156 	err |= ct_pr_tmpl_set_svc_aux(ctfd, name);
157 	if (err) {
158 		(void) close(ctfd);
159 		fork_sulogin(B_TRUE, "Could not set %s process contract "
160 		    "terms\n", name);
161 		/* NOTREACHED */
162 	}
163 
164 	if (err = ct_tmpl_activate(ctfd)) {
165 		(void) close(ctfd);
166 		fork_sulogin(B_TRUE, "Could not activate %s process contract "
167 		    "template: %s\n", name, strerror(err));
168 		/* NOTREACHED */
169 	}
170 
171 	/*
172 	 * Attempt to fork "retries" times.
173 	 */
174 	for (pid = fork1(); pid == -1; pid = fork1()) {
175 		if (++tries > retries) {
176 			/*
177 			 * When we exit the sulogin session, init(1M)
178 			 * will restart svc.startd(1M).
179 			 */
180 			err = errno;
181 			(void) ct_tmpl_clear(ctfd);
182 			(void) close(ctfd);
183 			fork_sulogin(B_TRUE, "Could not fork to start %s: %s\n",
184 			    name, strerror(err));
185 			/* NOTREACHED */
186 		}
187 		(void) sleep(tries);
188 	}
189 
190 	/*
191 	 * Clean up, return pid and ctid.
192 	 */
193 	if (pid != 0 && (errno = contract_latest(ctidp)) != 0)
194 		uu_die("Could not get new contract id for %s\n", name);
195 	(void) ct_tmpl_clear(ctfd);
196 	(void) close(ctfd);
197 
198 	return (pid);
199 }
200 
201 /*
202  * void fork_sulogin(boolean_t, const char *, ...)
203  *   When we are invoked with the -s flag from boot (or run into an unfixable
204  *   situation), we run a private copy of sulogin.  When the sulogin session
205  *   is ended, we continue.  This is the last fallback action for system
206  *   maintenance.
207  *
208  *   If immediate is true, fork_sulogin() executes sulogin(1M) directly, without
209  *   forking.
210  *
211  *   Because fork_sulogin() is needed potentially before we daemonize, we leave
212  *   it outside the wait_register() framework.
213  */
214 /*PRINTFLIKE2*/
215 void
216 fork_sulogin(boolean_t immediate, const char *format, ...)
217 {
218 	va_list args;
219 	int fd_console;
220 
221 	(void) printf("Requesting System Maintenance Mode\n");
222 
223 	if (!booting_to_single_user)
224 		(void) printf("(See /lib/svc/share/README for more "
225 		    "information.)\n");
226 
227 	va_start(args, format);
228 	(void) vprintf(format, args);
229 	va_end(args);
230 
231 	if (!immediate) {
232 		ctid_t	ctid;
233 		pid_t	pid;
234 
235 		pid = fork_common("sulogin", SVC_SULOGIN_FMRI,
236 		    MAX_SULOGIN_RETRIES, &ctid, CT_PR_EV_HWERR, 0,
237 		    CT_PR_EV_HWERR, CT_PR_PGRPONLY, SULOGIN_COOKIE);
238 
239 		if (pid != 0) {
240 			(void) waitpid(pid, NULL, 0);
241 			contract_abandon(ctid);
242 			return;
243 		}
244 		/* close all inherited fds */
245 		closefrom(0);
246 	} else {
247 		(void) printf("Directly executing sulogin.\n");
248 		/*
249 		 * Can't call closefrom() in this MT section
250 		 * so safely close a minimum set of fds.
251 		 */
252 		(void) close(STDIN_FILENO);
253 		(void) close(STDOUT_FILENO);
254 		(void) close(STDERR_FILENO);
255 	}
256 
257 	(void) setpgrp();
258 
259 	/* open the console for sulogin */
260 	if ((fd_console = open("/dev/console", O_RDWR)) >= 0) {
261 		if (fd_console != STDIN_FILENO)
262 			while (dup2(fd_console, STDIN_FILENO) < 0 &&
263 			    errno == EINTR)
264 				;
265 		if (fd_console != STDOUT_FILENO)
266 			while (dup2(fd_console, STDOUT_FILENO) < 0 &&
267 			    errno == EINTR)
268 				;
269 		if (fd_console != STDERR_FILENO)
270 			while (dup2(fd_console, STDERR_FILENO) < 0 &&
271 			    errno == EINTR)
272 				;
273 		if (fd_console > STDERR_FILENO)
274 			(void) close(fd_console);
275 	}
276 
277 	setutxent();
278 	while ((utmpp = getutxent()) != NULL) {
279 		if (strcmp(utmpp->ut_user, "LOGIN") != 0) {
280 			if (strcmp(utmpp->ut_line, "console") == 0) {
281 				(void) kill(utmpp->ut_pid, 9);
282 				break;
283 			}
284 		}
285 	}
286 
287 	(void) execl("/sbin/sulogin", "sulogin", NULL);
288 
289 	uu_warn("Could not exec() sulogin");
290 
291 	exit(1);
292 }
293 
294 #define	CONFIGD_PATH	"/lib/svc/bin/svc.configd"
295 
296 /*
297  * void fork_configd(int status)
298  *   We are interested in exit events (since the parent's exiting means configd
299  *   is ready to run and since the child's exiting indicates an error case) and
300  *   in empty events.  This means we have a unique template for initiating
301  *   configd.
302  */
303 void
304 fork_configd(int exitstatus)
305 {
306 	pid_t pid;
307 	ctid_t ctid = -1;
308 	int err;
309 	char path[PATH_MAX];
310 
311 	/*
312 	 * Checking the existatus for the potential failure of the
313 	 * daemonized svc.configd.  If this is not the first time
314 	 * through, but a call from the svc.configd monitoring thread
315 	 * after a failure this is the status that is expected.  Other
316 	 * failures are exposed during initialization or are fixed
317 	 * by a restart (e.g door closings).
318 	 *
319 	 * If this is on-disk database corruption it will also be
320 	 * caught by a restart but could be cleared before the restart.
321 	 *
322 	 * Or this could be internal database corruption due to a
323 	 * rogue service that needs to be cleared before restart.
324 	 */
325 	if (WEXITSTATUS(exitstatus) == CONFIGD_EXIT_DATABASE_BAD) {
326 		fork_sulogin(B_FALSE, "svc.configd exited with database "
327 		    "corrupt error after initialization of the repository\n");
328 	}
329 
330 retry:
331 	log_framework(LOG_DEBUG, "fork_configd trying to start svc.configd\n");
332 
333 	/*
334 	 * If we're retrying, we will have an old contract lying around
335 	 * from the failure.  Since we're going to be creating a new
336 	 * contract shortly, we abandon the old one now.
337 	 */
338 	if (ctid != -1)
339 		contract_abandon(ctid);
340 	ctid = -1;
341 
342 	pid = fork_common("svc.configd", SCF_SERVICE_CONFIGD,
343 	    MAX_CONFIGD_RETRIES, &ctid, 0, CT_PR_EV_EXIT, 0,
344 	    CT_PR_INHERIT | CT_PR_REGENT, CONFIGD_COOKIE);
345 
346 	if (pid != 0) {
347 		int exitstatus;
348 
349 		st->st_configd_pid = pid;
350 
351 		if (waitpid(pid, &exitstatus, 0) == -1) {
352 			fork_sulogin(B_FALSE, "waitpid on svc.configd "
353 			    "failed: %s\n", strerror(errno));
354 		} else if (WIFEXITED(exitstatus)) {
355 			char *errstr;
356 
357 			/*
358 			 * Examine exitstatus.  This will eventually get more
359 			 * complicated, as we will want to teach startd how to
360 			 * invoke configd with alternate repositories, etc.
361 			 *
362 			 * Note that exec(2) failure results in an exit status
363 			 * of 1, resulting in the default clause below.
364 			 */
365 
366 			/*
367 			 * Assign readable strings to cases we don't handle, or
368 			 * have error outcomes that cannot be eliminated.
369 			 */
370 			switch (WEXITSTATUS(exitstatus)) {
371 			case CONFIGD_EXIT_BAD_ARGS:
372 				errstr = "bad arguments";
373 				break;
374 
375 			case CONFIGD_EXIT_DATABASE_BAD:
376 				errstr = "database corrupt";
377 				break;
378 
379 			case CONFIGD_EXIT_DATABASE_LOCKED:
380 				errstr = "database locked";
381 				break;
382 			case CONFIGD_EXIT_INIT_FAILED:
383 				errstr = "initialization failure";
384 				break;
385 			case CONFIGD_EXIT_DOOR_INIT_FAILED:
386 				errstr = "door initialization failure";
387 				break;
388 			case CONFIGD_EXIT_DATABASE_INIT_FAILED:
389 				errstr = "database initialization failure";
390 				break;
391 			case CONFIGD_EXIT_NO_THREADS:
392 				errstr = "no threads available";
393 				break;
394 			case CONFIGD_EXIT_LOST_MAIN_DOOR:
395 				errstr = "lost door server attachment";
396 				break;
397 			case 1:
398 				errstr = "execution failure";
399 				break;
400 			default:
401 				errstr = "unknown error";
402 				break;
403 			}
404 
405 			/*
406 			 * Remedial actions for various configd failures.
407 			 */
408 			switch (WEXITSTATUS(exitstatus)) {
409 			case CONFIGD_EXIT_OKAY:
410 				break;
411 
412 			case CONFIGD_EXIT_DATABASE_LOCKED:
413 				/* attempt remount of / read-write */
414 				if (fs_is_read_only("/", NULL) == 1) {
415 					if (fs_remount("/") == -1)
416 						fork_sulogin(B_FALSE,
417 						    "remount of root "
418 						    "filesystem failed\n");
419 
420 					goto retry;
421 				}
422 				break;
423 
424 			default:
425 				fork_sulogin(B_FALSE, "svc.configd exited "
426 				    "with status %d (%s)\n",
427 				    WEXITSTATUS(exitstatus), errstr);
428 				goto retry;
429 			}
430 		} else if (WIFSIGNALED(exitstatus)) {
431 			char signame[SIG2STR_MAX];
432 
433 			if (sig2str(WTERMSIG(exitstatus), signame))
434 				(void) snprintf(signame, SIG2STR_MAX,
435 				    "signum %d", WTERMSIG(exitstatus));
436 
437 			fork_sulogin(B_FALSE, "svc.configd signalled:"
438 			    " %s\n", signame);
439 
440 			goto retry;
441 		} else {
442 			fork_sulogin(B_FALSE, "svc.configd non-exit "
443 			    "condition: 0x%x\n", exitstatus);
444 
445 			goto retry;
446 		}
447 
448 		/*
449 		 * Announce that we have a valid svc.configd status.
450 		 */
451 		MUTEX_LOCK(&st->st_configd_live_lock);
452 		st->st_configd_lives = 1;
453 		err = pthread_cond_broadcast(&st->st_configd_live_cv);
454 		assert(err == 0);
455 		MUTEX_UNLOCK(&st->st_configd_live_lock);
456 
457 		log_framework(LOG_DEBUG, "fork_configd broadcasts configd is "
458 		    "live\n");
459 		return;
460 	}
461 
462 	/*
463 	 * Set our per-process core file path to leave core files in
464 	 * /etc/svc/volatile directory, named after the PID to aid in debugging.
465 	 */
466 	(void) snprintf(path, sizeof (path),
467 	    "/etc/svc/volatile/core.configd.%%p");
468 
469 	(void) core_set_process_path(path, strlen(path) + 1, getpid());
470 
471 	log_framework(LOG_DEBUG, "executing svc.configd\n");
472 
473 	(void) execl(CONFIGD_PATH, CONFIGD_PATH, NULL);
474 
475 	/*
476 	 * Status code is used above to identify configd exec failure.
477 	 */
478 	exit(1);
479 }
480 
481 void *
482 fork_configd_thread(void *vctid)
483 {
484 	int fd, err;
485 	ctid_t configd_ctid = (ctid_t)vctid;
486 
487 	if (configd_ctid == -1) {
488 		log_framework(LOG_DEBUG,
489 		    "fork_configd_thread starting svc.configd\n");
490 		fork_configd(0);
491 	} else {
492 		/*
493 		 * configd_ctid is known:  we broadcast and continue.
494 		 * test contract for appropriate state by verifying that
495 		 * there is one or more processes within it?
496 		 */
497 		log_framework(LOG_DEBUG,
498 		    "fork_configd_thread accepting svc.configd with CTID %ld\n",
499 		    configd_ctid);
500 		MUTEX_LOCK(&st->st_configd_live_lock);
501 		st->st_configd_lives = 1;
502 		(void) pthread_cond_broadcast(&st->st_configd_live_cv);
503 		MUTEX_UNLOCK(&st->st_configd_live_lock);
504 	}
505 
506 	fd = open64(CTFS_ROOT "/process/pbundle", O_RDONLY);
507 	if (fd == -1)
508 		uu_die("process bundle open failed");
509 
510 	/*
511 	 * Make sure we get all events (including those generated by configd
512 	 * before this thread was started).
513 	 */
514 	err = ct_event_reset(fd);
515 	assert(err == 0);
516 
517 	for (;;) {
518 		int efd, sfd;
519 		ct_evthdl_t ev;
520 		uint32_t type;
521 		ctevid_t evid;
522 		ct_stathdl_t status;
523 		ctid_t ctid;
524 		uint64_t cookie;
525 		pid_t pid;
526 
527 		if (err = ct_event_read_critical(fd, &ev)) {
528 			assert(err != EINVAL && err != EAGAIN);
529 			log_error(LOG_WARNING,
530 			    "Error reading next contract event: %s",
531 			    strerror(err));
532 			continue;
533 		}
534 
535 		evid = ct_event_get_evid(ev);
536 		ctid = ct_event_get_ctid(ev);
537 		type = ct_event_get_type(ev);
538 
539 		/* Fetch cookie. */
540 		sfd = contract_open(ctid, "process", "status", O_RDONLY);
541 		if (sfd < 0) {
542 			ct_event_free(ev);
543 			continue;
544 		}
545 
546 		if (err = ct_status_read(sfd, CTD_COMMON, &status)) {
547 			log_framework(LOG_WARNING, "Could not get status for "
548 			    "contract %ld: %s\n", ctid, strerror(err));
549 
550 			ct_event_free(ev);
551 			startd_close(sfd);
552 			continue;
553 		}
554 
555 		cookie = ct_status_get_cookie(status);
556 
557 		ct_status_free(status);
558 
559 		startd_close(sfd);
560 
561 		/*
562 		 * Don't process events from contracts we aren't interested in.
563 		 */
564 		if (cookie != CONFIGD_COOKIE) {
565 			ct_event_free(ev);
566 			continue;
567 		}
568 
569 		if (type == CT_PR_EV_EXIT) {
570 			int exitstatus;
571 
572 			(void) ct_pr_event_get_pid(ev, &pid);
573 			(void) ct_pr_event_get_exitstatus(ev,
574 			    &exitstatus);
575 
576 			if (st->st_configd_pid != pid) {
577 				/*
578 				 * This is the child exiting, so we
579 				 * abandon the contract and restart
580 				 * configd.
581 				 */
582 				contract_abandon(ctid);
583 				fork_configd(exitstatus);
584 			}
585 		}
586 
587 		efd = contract_open(ctid, "process", "ctl", O_WRONLY);
588 		if (efd != -1) {
589 			(void) ct_ctl_ack(efd, evid);
590 			startd_close(efd);
591 		}
592 
593 		ct_event_free(ev);
594 
595 	}
596 
597 	/*NOTREACHED*/
598 	return (NULL);
599 }
600 
601 void
602 fork_rc_script(char rl, const char *arg, boolean_t wait)
603 {
604 	pid_t pid;
605 	int tmpl, err, stat;
606 	char path[20] = "/sbin/rc.", log[20] = "rc..log", timebuf[20];
607 	time_t now;
608 	struct tm ltime;
609 	size_t sz;
610 	char *pathenv;
611 	char **nenv;
612 
613 	path[8] = rl;
614 
615 	tmpl = open64(CTFS_ROOT "/process/template", O_RDWR);
616 	if (tmpl >= 0) {
617 		err = ct_tmpl_set_critical(tmpl, 0);
618 		assert(err == 0);
619 
620 		err = ct_tmpl_set_informative(tmpl, 0);
621 		assert(err == 0);
622 
623 		err = ct_pr_tmpl_set_fatal(tmpl, 0);
624 		assert(err == 0);
625 
626 		err = ct_tmpl_activate(tmpl);
627 		assert(err == 0);
628 
629 		err = close(tmpl);
630 		assert(err == 0);
631 	} else {
632 		uu_warn("Could not create contract template for %s.\n", path);
633 	}
634 
635 	pid = startd_fork1(NULL);
636 	if (pid < 0) {
637 		return;
638 	} else if (pid != 0) {
639 		/* parent */
640 		if (wait) {
641 			do
642 				err = waitpid(pid, &stat, 0);
643 			while (err != 0 && errno == EINTR)
644 				;
645 
646 			if (!WIFEXITED(stat)) {
647 				log_framework(LOG_INFO,
648 				    "%s terminated with waitpid() status %d.\n",
649 				    path, stat);
650 			} else if (WEXITSTATUS(stat) != 0) {
651 				log_framework(LOG_INFO,
652 				    "%s failed with status %d.\n", path,
653 				    WEXITSTATUS(stat));
654 			}
655 		}
656 
657 		return;
658 	}
659 
660 	/* child */
661 
662 	log[2] = rl;
663 
664 	setlog(log);
665 
666 	now = time(NULL);
667 	sz = strftime(timebuf, sizeof (timebuf), "%b %e %T",
668 	    localtime_r(&now, &ltime));
669 	assert(sz != 0);
670 
671 	(void) fprintf(stderr, "%s Executing %s %s\n", timebuf, path, arg);
672 
673 	if (rl == 'S')
674 		pathenv = "PATH=/sbin:/usr/sbin:/usr/bin";
675 	else
676 		pathenv = "PATH=/usr/sbin:/usr/bin";
677 
678 	nenv = set_smf_env(NULL, 0, pathenv, NULL, NULL);
679 
680 	(void) execle(path, path, arg, 0, nenv);
681 
682 	perror("exec");
683 	exit(0);
684 }
685 
686 #define	SVCCFG_PATH	"/usr/sbin/svccfg"
687 #define	EMI_MFST	"/lib/svc/manifest/system/early-manifest-import.xml"
688 #define	EMI_PATH	"/lib/svc/method/manifest-import"
689 
690 /*
691  * Set Early Manifest Import service's state and log file.
692  */
693 static int
694 emi_set_state(restarter_instance_state_t state, boolean_t setlog)
695 {
696 	int r, ret = 1;
697 	instance_data_t idata;
698 	scf_handle_t *hndl = NULL;
699 	scf_instance_t *inst = NULL;
700 
701 retry:
702 	if (hndl == NULL)
703 		hndl = libscf_handle_create_bound(SCF_VERSION);
704 
705 	if (hndl == NULL) {
706 		/*
707 		 * In the case that we can't bind to the repository
708 		 * (which should have been started), we need to allow
709 		 * the user into maintenance mode to determine what's
710 		 * failed.
711 		 */
712 		fork_sulogin(B_FALSE, "Unable to bind a new repository"
713 		    " handle: %s\n", scf_strerror(scf_error()));
714 		goto retry;
715 	}
716 
717 	if (inst == NULL)
718 		inst = safe_scf_instance_create(hndl);
719 
720 	if (scf_handle_decode_fmri(hndl, SCF_INSTANCE_EMI, NULL, NULL,
721 	    inst, NULL, NULL, SCF_DECODE_FMRI_EXACT) == -1) {
722 		switch (scf_error()) {
723 		case SCF_ERROR_NOT_FOUND:
724 			goto out;
725 
726 		case SCF_ERROR_CONNECTION_BROKEN:
727 		case SCF_ERROR_NOT_BOUND:
728 			libscf_handle_rebind(hndl);
729 			goto retry;
730 
731 		default:
732 			fork_sulogin(B_FALSE, "Couldn't fetch %s service: "
733 			    "%s\n", SCF_INSTANCE_EMI,
734 			    scf_strerror(scf_error()));
735 			goto retry;
736 		}
737 	}
738 
739 	if (setlog) {
740 		(void) libscf_note_method_log(inst, st->st_log_prefix, EMI_LOG);
741 		log_framework(LOG_DEBUG,
742 		    "Set logfile property for %s\n", SCF_INSTANCE_EMI);
743 	}
744 
745 	idata.i_fmri = SCF_INSTANCE_EMI;
746 	idata.i_state =  RESTARTER_STATE_NONE;
747 	idata.i_next_state = RESTARTER_STATE_NONE;
748 	switch (r = _restarter_commit_states(hndl, &idata, state,
749 	    RESTARTER_STATE_NONE, NULL)) {
750 	case 0:
751 		break;
752 
753 	case ECONNABORTED:
754 		libscf_handle_rebind(hndl);
755 		goto retry;
756 
757 	case ENOMEM:
758 	case ENOENT:
759 	case EPERM:
760 	case EACCES:
761 	case EROFS:
762 		fork_sulogin(B_FALSE, "Could not set state of "
763 		    "%s: %s\n", SCF_INSTANCE_EMI, strerror(r));
764 		goto retry;
765 		break;
766 
767 	case EINVAL:
768 	default:
769 		bad_error("_restarter_commit_states", r);
770 	}
771 	ret = 0;
772 
773 out:
774 	scf_instance_destroy(inst);
775 	scf_handle_destroy(hndl);
776 	return (ret);
777 }
778 
779 /*
780  * It is possible that the early-manifest-import service is disabled.  This
781  * would not be the normal case for Solaris, but it may happen on dedicated
782  * systems.  So this function checks the state of the general/enabled
783  * property for Early Manifest Import.
784  *
785  * It is also possible that the early-manifest-import service does not yet
786  * have a repository representation when this function runs.  This happens
787  * if non-Early Manifest Import system is upgraded to an Early Manifest
788  * Import based system.  Thus, the non-existence of general/enabled is not
789  * an error.
790  *
791  * Returns 1 if Early Manifest Import is disabled and 0 otherwise.
792  */
793 static int
794 emi_is_disabled()
795 {
796 	int disabled = 0;
797 	int disconnected = 1;
798 	int enabled;
799 	scf_handle_t *hndl = NULL;
800 	scf_instance_t *inst = NULL;
801 	uchar_t stored_hash[MHASH_SIZE];
802 	char *pname;
803 	int hashash, r;
804 
805 	while (hndl == NULL) {
806 		hndl = libscf_handle_create_bound(SCF_VERSION);
807 
808 		if (hndl == NULL) {
809 			/*
810 			 * In the case that we can't bind to the repository
811 			 * (which should have been started), we need to
812 			 * allow the user into maintenance mode to
813 			 * determine what's failed.
814 			 */
815 			fork_sulogin(B_FALSE, "Unable to bind a new repository "
816 			    "handle: %s\n", scf_strerror(scf_error()));
817 		}
818 	}
819 
820 	while (disconnected) {
821 		r = libscf_fmri_get_instance(hndl, SCF_INSTANCE_EMI, &inst);
822 		if (r != 0) {
823 			switch (r) {
824 			case ECONNABORTED:
825 				libscf_handle_rebind(hndl);
826 				continue;
827 
828 			case ENOENT:
829 				/*
830 				 * Early Manifest Import service is not in
831 				 * the repository. Check the manifest file
832 				 * and service's hash in smf/manifest to
833 				 * figure out whether Early Manifest Import
834 				 * service was deleted. If Early Manifest Import
835 				 * service was deleted, treat that as a disable
836 				 * and don't run early import.
837 				 */
838 
839 				if (access(EMI_MFST, F_OK)) {
840 					/*
841 					 * Manifest isn't found, so service is
842 					 * properly removed.
843 					 */
844 					disabled = 1;
845 				} else {
846 					/*
847 					 * If manifest exists and we have the
848 					 * hash, the service was improperly
849 					 * deleted, generate a warning and treat
850 					 * this as a disable.
851 					 */
852 
853 					if ((pname = mhash_filename_to_propname(
854 					    EMI_MFST, B_TRUE)) == NULL) {
855 						/*
856 						 * Treat failure to get propname
857 						 * as a disable.
858 						 */
859 						disabled = 1;
860 						uu_warn("Failed to get propname"
861 						    " for %s.\n",
862 						    SCF_INSTANCE_EMI);
863 					} else {
864 						hashash = mhash_retrieve_entry(
865 						    hndl, pname,
866 						    stored_hash,
867 						    NULL) == 0;
868 						uu_free(pname);
869 
870 						if (hashash) {
871 							disabled = 1;
872 							uu_warn("%s service is "
873 							    "deleted \n",
874 							    SCF_INSTANCE_EMI);
875 						}
876 					}
877 
878 				}
879 
880 				disconnected = 0;
881 				continue;
882 
883 			default:
884 				bad_error("libscf_fmri_get_instance",
885 				    scf_error());
886 			}
887 		}
888 		r = libscf_get_basic_instance_data(hndl, inst, SCF_INSTANCE_EMI,
889 		    &enabled, NULL, NULL);
890 		if (r == 0) {
891 			/*
892 			 * enabled can be returned as -1, which indicates
893 			 * that the enabled property was not found.  To us
894 			 * that means that the service was not disabled.
895 			 */
896 			if (enabled == 0)
897 				disabled = 1;
898 		} else {
899 			switch (r) {
900 			case ECONNABORTED:
901 				libscf_handle_rebind(hndl);
902 				continue;
903 
904 			case ECANCELED:
905 			case ENOENT:
906 				break;
907 			default:
908 				bad_error("libscf_get_basic_instance_data", r);
909 			}
910 		}
911 		disconnected = 0;
912 	}
913 
914 out:
915 	if (inst != NULL)
916 		scf_instance_destroy(inst);
917 	scf_handle_destroy(hndl);
918 	return (disabled);
919 }
920 
921 void
922 fork_emi()
923 {
924 	pid_t pid;
925 	ctid_t ctid = -1;
926 	char **envp, **np;
927 	char *emipath;
928 	char corepath[PATH_MAX];
929 	char *svc_state;
930 	int setemilog;
931 	int sz;
932 
933 	if (emi_is_disabled()) {
934 		log_framework(LOG_NOTICE, "%s is  disabled and will "
935 		    "not be run.\n", SCF_INSTANCE_EMI);
936 		return;
937 	}
938 
939 	/*
940 	 * Early Manifest Import should run only once, at boot. If svc.startd
941 	 * is some how restarted, Early Manifest Import  should not run again.
942 	 * Use the Early Manifest Import service's state to figure out whether
943 	 * Early Manifest Import has successfully completed earlier and bail
944 	 * out if it did.
945 	 */
946 	if (svc_state = smf_get_state(SCF_INSTANCE_EMI)) {
947 		if (strcmp(svc_state, SCF_STATE_STRING_ONLINE) == 0) {
948 			free(svc_state);
949 			return;
950 		}
951 		free(svc_state);
952 	}
953 
954 	/*
955 	 * Attempt to set Early Manifest Import service's state and log file.
956 	 * If emi_set_state fails, set log file again in the next call to
957 	 * emi_set_state.
958 	 */
959 	setemilog = emi_set_state(RESTARTER_STATE_OFFLINE, B_TRUE);
960 
961 	/* Don't go further if /usr isn't available */
962 	if (access(SVCCFG_PATH, F_OK)) {
963 		log_framework(LOG_NOTICE, "Early Manifest Import is not "
964 		    "supported on systems with a separate /usr filesystem.\n");
965 		return;
966 	}
967 
968 fork_retry:
969 	log_framework(LOG_DEBUG, "Starting Early Manifest Import\n");
970 
971 	/*
972 	 * If we're retrying, we will have an old contract lying around
973 	 * from the failure.  Since we're going to be creating a new
974 	 * contract shortly, we abandon the old one now.
975 	 */
976 	if (ctid != -1)
977 		contract_abandon(ctid);
978 	ctid = -1;
979 
980 	pid = fork_common(SCF_INSTANCE_EMI, SCF_INSTANCE_EMI,
981 	    MAX_EMI_RETRIES, &ctid, 0, 0, 0, 0, EMI_COOKIE);
982 
983 	if (pid != 0) {
984 		int exitstatus;
985 
986 		if (waitpid(pid, &exitstatus, 0) == -1) {
987 			fork_sulogin(B_FALSE, "waitpid on %s failed: "
988 			    "%s\n", SCF_INSTANCE_EMI, strerror(errno));
989 		} else if (WIFEXITED(exitstatus)) {
990 			if (WEXITSTATUS(exitstatus)) {
991 				fork_sulogin(B_FALSE, "%s exited with status "
992 				    "%d \n", SCF_INSTANCE_EMI,
993 				    WEXITSTATUS(exitstatus));
994 				goto fork_retry;
995 			}
996 		} else if (WIFSIGNALED(exitstatus)) {
997 			char signame[SIG2STR_MAX];
998 
999 			if (sig2str(WTERMSIG(exitstatus), signame))
1000 				(void) snprintf(signame, SIG2STR_MAX,
1001 				    "signum %d", WTERMSIG(exitstatus));
1002 
1003 			fork_sulogin(B_FALSE, "%s signalled: %s\n",
1004 			    SCF_INSTANCE_EMI, signame);
1005 			goto fork_retry;
1006 		} else {
1007 			fork_sulogin(B_FALSE, "%s non-exit condition: 0x%x\n",
1008 			    SCF_INSTANCE_EMI, exitstatus);
1009 			goto fork_retry;
1010 		}
1011 
1012 		log_framework(LOG_DEBUG, "%s completed successfully\n",
1013 		    SCF_INSTANCE_EMI);
1014 
1015 		/*
1016 		 * Once Early Manifest Import completed, the Early Manifest
1017 		 * Import service must have been imported so set log file and
1018 		 * state properties. Since this information is required for
1019 		 * late manifest import and common admin operations, failing to
1020 		 * set these properties should result in su login so admin can
1021 		 * correct the problem.
1022 		 */
1023 		(void) emi_set_state(RESTARTER_STATE_ONLINE,
1024 		    setemilog ? B_TRUE : B_FALSE);
1025 
1026 		return;
1027 	}
1028 
1029 	/* child */
1030 
1031 	/*
1032 	 * Set our per-process core file path to leave core files in
1033 	 * /etc/svc/volatile directory, named after the PID to aid in debugging.
1034 	 */
1035 	(void) snprintf(corepath, sizeof (corepath),
1036 	    "/etc/svc/volatile/core.emi.%%p");
1037 	(void) core_set_process_path(corepath, strlen(corepath) + 1, getpid());
1038 
1039 	/*
1040 	 * Similar to running legacy services, we need to manually set
1041 	 * log files here and environment variables.
1042 	 */
1043 	setlog(EMI_LOG);
1044 
1045 	envp = startd_zalloc(sizeof (char *) * 3);
1046 	np = envp;
1047 
1048 	sz = sizeof ("SMF_FMRI=") + strlen(SCF_INSTANCE_EMI);
1049 	*np = startd_zalloc(sz);
1050 	(void) strlcpy(*np, "SMF_FMRI=", sz);
1051 	(void) strncat(*np, SCF_INSTANCE_EMI, sz);
1052 	np++;
1053 
1054 	emipath = getenv("PATH");
1055 	if (emipath == NULL)
1056 		emipath = strdup("/usr/sbin:/usr/bin");
1057 
1058 	sz = sizeof ("PATH=") + strlen(emipath);
1059 	*np = startd_zalloc(sz);
1060 	(void) strlcpy(*np, "PATH=", sz);
1061 	(void) strncat(*np, emipath, sz);
1062 
1063 	log_framework(LOG_DEBUG, "executing Early Manifest Import\n");
1064 	(void) execle(EMI_PATH, EMI_PATH, NULL, envp);
1065 
1066 	/*
1067 	 * Status code is used above to identify Early Manifest Import
1068 	 * exec failure.
1069 	 */
1070 	exit(1);
1071 }
1072 
1073 extern char **environ;
1074 
1075 /*
1076  * A local variation on system(3c) which accepts a timeout argument.  This
1077  * allows us to better ensure that the system will actually shut down.
1078  *
1079  * gracetime specifies an amount of time in seconds which the routine must wait
1080  * after the command exits, to allow for asynchronous effects (like sent
1081  * signals) to take effect.  This can be zero.
1082  */
1083 void
1084 fork_with_timeout(const char *cmd, uint_t gracetime, uint_t timeout)
1085 {
1086 	int err = 0;
1087 	pid_t pid;
1088 	char *argv[4];
1089 	posix_spawnattr_t attr;
1090 	posix_spawn_file_actions_t factions;
1091 
1092 	sigset_t mask, savemask;
1093 	uint_t msec_timeout;
1094 	uint_t msec_spent = 0;
1095 	uint_t msec_gracetime;
1096 	int status;
1097 
1098 	msec_timeout = timeout * 1000;
1099 	msec_gracetime = gracetime * 1000;
1100 
1101 	/*
1102 	 * See also system(3c) in libc.  This is very similar, except
1103 	 * that we avoid some unneeded complexity.
1104 	 */
1105 	err = posix_spawnattr_init(&attr);
1106 	if (err == 0)
1107 		err = posix_spawnattr_setflags(&attr,
1108 		    POSIX_SPAWN_SETSIGMASK | POSIX_SPAWN_SETSIGDEF |
1109 		    POSIX_SPAWN_NOSIGCHLD_NP | POSIX_SPAWN_WAITPID_NP |
1110 		    POSIX_SPAWN_NOEXECERR_NP);
1111 
1112 	/*
1113 	 * We choose to close fd's above 2, a deviation from system.
1114 	 */
1115 	if (err == 0)
1116 		err = posix_spawn_file_actions_init(&factions);
1117 	if (err == 0)
1118 		err = posix_spawn_file_actions_addclosefrom_np(&factions,
1119 		    STDERR_FILENO + 1);
1120 
1121 	(void) sigemptyset(&mask);
1122 	(void) sigaddset(&mask, SIGCHLD);
1123 	(void) thr_sigsetmask(SIG_BLOCK, &mask, &savemask);
1124 
1125 	argv[0] = "/bin/sh";
1126 	argv[1] = "-c";
1127 	argv[2] = (char *)cmd;
1128 	argv[3] = NULL;
1129 
1130 	if (err == 0)
1131 		err = posix_spawn(&pid, "/bin/sh", &factions, &attr,
1132 		    (char *const *)argv, (char *const *)environ);
1133 
1134 	(void) posix_spawnattr_destroy(&attr);
1135 	(void) posix_spawn_file_actions_destroy(&factions);
1136 
1137 	if (err) {
1138 		uu_warn("Failed to spawn %s: %s\n", cmd, strerror(err));
1139 	} else {
1140 		for (;;) {
1141 			int w;
1142 			w = waitpid(pid, &status, WNOHANG);
1143 			if (w == -1 && errno != EINTR)
1144 				break;
1145 			if (w > 0) {
1146 				/*
1147 				 * Command succeeded, so give it gracetime
1148 				 * seconds for it to have an effect.
1149 				 */
1150 				if (status == 0 && msec_gracetime != 0)
1151 					(void) poll(NULL, 0, msec_gracetime);
1152 				break;
1153 			}
1154 
1155 			(void) poll(NULL, 0, 100);
1156 			msec_spent += 100;
1157 			/*
1158 			 * If we timed out, kill off the process, then try to
1159 			 * wait for it-- it's possible that we could accumulate
1160 			 * a zombie here since we don't allow waitpid to hang,
1161 			 * but it's better to let that happen and continue to
1162 			 * make progress.
1163 			 */
1164 			if (msec_spent >= msec_timeout) {
1165 				uu_warn("'%s' timed out after %d "
1166 				    "seconds.  Killing.\n", cmd,
1167 				    timeout);
1168 				(void) kill(pid, SIGTERM);
1169 				(void) poll(NULL, 0, 100);
1170 				(void) kill(pid, SIGKILL);
1171 				(void) poll(NULL, 0, 100);
1172 				(void) waitpid(pid, &status, WNOHANG);
1173 				break;
1174 			}
1175 		}
1176 	}
1177 	(void) thr_sigsetmask(SIG_BLOCK, &savemask, NULL);
1178 }
1179