xref: /illumos-gate/usr/src/cmd/init/init.c (revision a92282e44f968185a6bba094d1e5fece2da819cf)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2020 Oxide Computer Company
24  * Copyright (c) 2013 Gary Mills
25  *
26  * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
27  */
28 
29 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
30 /*	  All Rights Reserved  	*/
31 
32 /*
33  * University Copyright- Copyright (c) 1982, 1986, 1988
34  * The Regents of the University of California
35  * All Rights Reserved
36  *
37  * University Acknowledgment- Portions of this document are derived from
38  * software developed by the University of California, Berkeley, and its
39  * contributors.
40  */
41 
42 /*
43  * init(1M) is the general process spawning program.  Its primary job is to
44  * start and restart svc.startd for smf(5).  For backwards-compatibility it also
45  * spawns and respawns processes according to /etc/inittab and the current
46  * run-level.  It reads /etc/default/inittab for general configuration.
47  *
48  * To change run-levels the system administrator runs init from the command
49  * line with a level name.  init signals svc.startd via libscf and directs the
50  * zone's init (pid 1 in the global zone) what to do by sending it a signal;
51  * these signal numbers are commonly refered to in the code as 'states'.  Valid
52  * run-levels are [sS0123456].  Additionally, init can be given directives
53  * [qQabc], which indicate actions to be taken pertaining to /etc/inittab.
54  *
55  * When init processes inittab entries, it finds processes that are to be
56  * spawned at various run-levels.  inittab contains the set of the levels for
57  * which each inittab entry is valid.
58  *
59  * State File and Restartability
60  *   Premature exit by init(1M) is handled as a special case by the kernel:
61  *   init(1M) will be immediately re-executed, retaining its original PID.  (PID
62  *   1 in the global zone.)  To track the processes it has previously spawned,
63  *   as well as other mutable state, init(1M) regularly updates a state file
64  *   such that its subsequent invocations have knowledge of its various
65  *   dependent processes and duties.
66  *
67  * Process Contracts
68  *   We start svc.startd(1M) in a contract and transfer inherited contracts when
69  *   restarting it.  Everything else is started using the legacy contract
70  *   template, and the created contracts are abandoned when they become empty.
71  *
72  * utmpx Entry Handling
73  *   Because init(1M) no longer governs the startup process, its knowledge of
74  *   when utmpx becomes writable is indirect.  However, spawned processes
75  *   expect to be constructed with valid utmpx entries.  As a result, attempts
76  *   to write normal entries will be retried until successful.
77  *
78  * Maintenance Mode
79  *   In certain failure scenarios, init(1M) will enter a maintenance mode, in
80  *   which it invokes sulogin(1M) to allow the operator an opportunity to
81  *   repair the system.  Normally, this operation is performed as a
82  *   fork(2)-exec(2)-waitpid(3C) sequence with the parent waiting for repair or
83  *   diagnosis to be completed.  In the cases that fork(2) requests themselves
84  *   fail, init(1M) will directly execute sulogin(1M), and allow the kernel to
85  *   restart init(1M) on exit from the operator session.
86  *
87  *   One scenario where init(1M) enters its maintenance mode is when
88  *   svc.startd(1M) begins to fail rapidly, defined as when the average time
89  *   between recent failures drops below a given threshold.
90  */
91 
92 #include <sys/contract/process.h>
93 #include <sys/ctfs.h>
94 #include <sys/stat.h>
95 #include <sys/statvfs.h>
96 #include <sys/stropts.h>
97 #include <sys/systeminfo.h>
98 #include <sys/time.h>
99 #include <sys/termios.h>
100 #include <sys/tty.h>
101 #include <sys/types.h>
102 #include <sys/utsname.h>
103 #include <sys/bootbanner.h>
104 
105 #include <bsm/adt_event.h>
106 #include <bsm/libbsm.h>
107 #include <security/pam_appl.h>
108 
109 #include <assert.h>
110 #include <ctype.h>
111 #include <dirent.h>
112 #include <errno.h>
113 #include <fcntl.h>
114 #include <libcontract.h>
115 #include <libcontract_priv.h>
116 #include <libintl.h>
117 #include <libscf.h>
118 #include <libscf_priv.h>
119 #include <poll.h>
120 #include <procfs.h>
121 #include <signal.h>
122 #include <stdarg.h>
123 #include <stdio.h>
124 #include <stdio_ext.h>
125 #include <stdlib.h>
126 #include <string.h>
127 #include <strings.h>
128 #include <syslog.h>
129 #include <time.h>
130 #include <ulimit.h>
131 #include <unistd.h>
132 #include <utmpx.h>
133 #include <wait.h>
134 #include <zone.h>
135 #include <ucontext.h>
136 
137 #undef	sleep
138 
139 #define	fioctl(p, sptr, cmd)	ioctl(fileno(p), sptr, cmd)
140 #define	min(a, b)		(((a) < (b)) ? (a) : (b))
141 
142 #define	TRUE	1
143 #define	FALSE	0
144 #define	FAILURE	-1
145 
146 #define	UT_USER_SZ	32	/* Size of a utmpx ut_user field */
147 #define	UT_LINE_SZ	32	/* Size of a utmpx ut_line field */
148 
149 /*
150  * SLEEPTIME	The number of seconds "init" sleeps between wakeups if
151  *		nothing else requires this "init" wakeup.
152  */
153 #define	SLEEPTIME	(5 * 60)
154 
155 /*
156  * MAXCMDL	The maximum length of a command string in inittab.
157  */
158 #define	MAXCMDL	512
159 
160 /*
161  * EXEC		The length of the prefix string added to all comamnds
162  *		found in inittab.
163  */
164 #define	EXEC	(sizeof ("exec ") - 1)
165 
166 /*
167  * TWARN	The amount of time between warning signal, SIGTERM,
168  *		and the fatal kill signal, SIGKILL.
169  */
170 #define	TWARN	5
171 
172 #define	id_eq(x, y)	((x[0] == y[0] && x[1] == y[1] && x[2] == y[2] &&\
173 			x[3] == y[3]) ? TRUE : FALSE)
174 
175 /*
176  * The kernel's default umask is 022 these days; since some processes inherit
177  * their umask from init, init will set it from CMASK in /etc/default/init.
178  * init gets the default umask from the kernel, it sets it to 022 whenever
179  * it wants to create a file and reverts to CMASK afterwards.
180  */
181 
182 static int cmask;
183 
184 /*
185  * The following definitions, concluding with the 'lvls' array, provide a
186  * common mapping between level-name (like 'S'), signal number (state),
187  * run-level mask, and specific properties associated with a run-level.
188  * This array should be accessed using the routines lvlname_to_state(),
189  * lvlname_to_mask(), state_to_mask(), and state_to_flags().
190  */
191 
192 /*
193  * Correspondence of signals to init actions.
194  */
195 #define	LVLQ		SIGHUP
196 #define	LVL0		SIGINT
197 #define	LVL1		SIGQUIT
198 #define	LVL2		SIGILL
199 #define	LVL3		SIGTRAP
200 #define	LVL4		SIGIOT
201 #define	LVL5		SIGEMT
202 #define	LVL6		SIGFPE
203 #define	SINGLE_USER	SIGBUS
204 #define	LVLa		SIGSEGV
205 #define	LVLb		SIGSYS
206 #define	LVLc		SIGPIPE
207 
208 /*
209  * Bit Mask for each level.  Used to determine legal levels.
210  */
211 #define	MASK0	0x0001
212 #define	MASK1	0x0002
213 #define	MASK2	0x0004
214 #define	MASK3	0x0008
215 #define	MASK4	0x0010
216 #define	MASK5	0x0020
217 #define	MASK6	0x0040
218 #define	MASKSU	0x0080
219 #define	MASKa	0x0100
220 #define	MASKb	0x0200
221 #define	MASKc	0x0400
222 
223 #define	MASK_NUMERIC (MASK0 | MASK1 | MASK2 | MASK3 | MASK4 | MASK5 | MASK6)
224 #define	MASK_abc (MASKa | MASKb | MASKc)
225 
226 /*
227  * Flags to indicate properties of various states.
228  */
229 #define	LSEL_RUNLEVEL	0x0001	/* runlevels you can transition to */
230 
231 typedef struct lvl {
232 	int	lvl_state;
233 	int	lvl_mask;
234 	char	lvl_name;
235 	int	lvl_flags;
236 } lvl_t;
237 
238 static lvl_t lvls[] = {
239 	{ LVLQ,		0,	'Q', 0					},
240 	{ LVLQ,		0,	'q', 0					},
241 	{ LVL0,		MASK0,	'0', LSEL_RUNLEVEL			},
242 	{ LVL1, 	MASK1,	'1', LSEL_RUNLEVEL			},
243 	{ LVL2, 	MASK2,	'2', LSEL_RUNLEVEL			},
244 	{ LVL3, 	MASK3,	'3', LSEL_RUNLEVEL			},
245 	{ LVL4, 	MASK4,	'4', LSEL_RUNLEVEL			},
246 	{ LVL5, 	MASK5,	'5', LSEL_RUNLEVEL			},
247 	{ LVL6, 	MASK6, 	'6', LSEL_RUNLEVEL			},
248 	{ SINGLE_USER, 	MASKSU, 'S', LSEL_RUNLEVEL			},
249 	{ SINGLE_USER, 	MASKSU, 's', LSEL_RUNLEVEL			},
250 	{ LVLa,		MASKa,	'a', 0					},
251 	{ LVLb,		MASKb,	'b', 0					},
252 	{ LVLc,		MASKc,	'c', 0					}
253 };
254 
255 #define	LVL_NELEMS (sizeof (lvls) / sizeof (lvl_t))
256 
257 /*
258  * Legal action field values.
259  */
260 #define	OFF		0	/* Kill process if on, else ignore */
261 #define	RESPAWN		1	/* Continuously restart process when it dies */
262 #define	ONDEMAND	RESPAWN	/* Respawn for a, b, c type processes */
263 #define	ONCE		2	/* Start process, do not respawn when dead */
264 #define	WAIT		3	/* Perform once and wait to complete */
265 #define	BOOT		4	/* Start at boot time only */
266 #define	BOOTWAIT	5	/* Start at boot time and wait to complete */
267 #define	POWERFAIL	6	/* Start on powerfail */
268 #define	POWERWAIT	7	/* Start and wait for complete on powerfail */
269 #define	INITDEFAULT	8	/* Default level "init" should start at */
270 #define	SYSINIT		9	/* Actions performed before init speaks */
271 
272 #define	M_OFF		0001
273 #define	M_RESPAWN	0002
274 #define	M_ONDEMAND	M_RESPAWN
275 #define	M_ONCE		0004
276 #define	M_WAIT		0010
277 #define	M_BOOT		0020
278 #define	M_BOOTWAIT	0040
279 #define	M_PF		0100
280 #define	M_PWAIT		0200
281 #define	M_INITDEFAULT	0400
282 #define	M_SYSINIT	01000
283 
284 /* States for the inittab parser in getcmd(). */
285 #define	ID	1
286 #define	LEVELS	2
287 #define	ACTION	3
288 #define	COMMAND	4
289 #define	COMMENT	5
290 
291 /*
292  * inittab entry id constants
293  */
294 #define	INITTAB_ENTRY_ID_SIZE 4
295 #define	INITTAB_ENTRY_ID_STR_FORMAT "%.4s"	/* if INITTAB_ENTRY_ID_SIZE */
296 						/* changes, this should */
297 						/* change accordingly */
298 
299 /*
300  * Init can be in any of three main states, "normal" mode where it is
301  * processing entries for the lines file in a normal fashion, "boot" mode,
302  * where it is only interested in the boot actions, and "powerfail" mode,
303  * where it is only interested in powerfail related actions. The following
304  * masks declare the legal actions for each mode.
305  */
306 #define	NORMAL_MODES	(M_OFF | M_RESPAWN | M_ONCE | M_WAIT)
307 #define	BOOT_MODES	(M_BOOT | M_BOOTWAIT)
308 #define	PF_MODES	(M_PF | M_PWAIT)
309 
310 struct PROC_TABLE {
311 	char	p_id[INITTAB_ENTRY_ID_SIZE];	/* Four letter unique id of */
312 						/* process */
313 	pid_t	p_pid;		/* Process id */
314 	short	p_count;	/* How many respawns of this command in */
315 				/*   the current series */
316 	long	p_time;		/* Start time for a series of respawns */
317 	short	p_flags;
318 	short	p_exit;		/* Exit status of a process which died */
319 };
320 
321 /*
322  * Flags for the "p_flags" word of a PROC_TABLE entry:
323  *
324  *	OCCUPIED	This slot in init's proc table is in use.
325  *
326  *	LIVING		Process is alive.
327  *
328  *	NOCLEANUP	efork() is not allowed to cleanup this entry even
329  *			if process is dead.
330  *
331  *	NAMED		This process has a name, i.e. came from inittab.
332  *
333  *	DEMANDREQUEST	Process started by a "telinit [abc]" command.  Processes
334  *			formed this way are respawnable and immune to level
335  *			changes as long as their entry exists in inittab.
336  *
337  *	TOUCHED		Flag used by remv() to determine whether it has looked
338  *			at an entry while checking for processes to be killed.
339  *
340  *	WARNED		Flag used by remv() to mark processes that have been
341  *			sent the SIGTERM signal.  If they don't die in 5
342  *			seconds, they are sent the SIGKILL signal.
343  *
344  *	KILLED		Flag used by remv() to mark procs that have been sent
345  *			the SIGTERM and SIGKILL signals.
346  *
347  *	PF_MASK		Bitwise or of legal flags, for sanity checking.
348  */
349 #define	OCCUPIED	01
350 #define	LIVING		02
351 #define	NOCLEANUP	04
352 #define	NAMED		010
353 #define	DEMANDREQUEST	020
354 #define	TOUCHED		040
355 #define	WARNED		0100
356 #define	KILLED		0200
357 #define	PF_MASK		0377
358 
359 /*
360  * Respawn limits for processes that are to be respawned:
361  *
362  *	SPAWN_INTERVAL	The number of seconds over which "init" will try to
363  *			respawn a process SPAWN_LIMIT times before it gets mad.
364  *
365  *	SPAWN_LIMIT	The number of respawns "init" will attempt in
366  *			SPAWN_INTERVAL seconds before it generates an
367  *			error message and inhibits further tries for
368  *			INHIBIT seconds.
369  *
370  *	INHIBIT		The number of seconds "init" ignores an entry it had
371  *			trouble spawning unless a "telinit Q" is received.
372  */
373 
374 #define	SPAWN_INTERVAL	(2*60)
375 #define	SPAWN_LIMIT	10
376 #define	INHIBIT		(5*60)
377 
378 /*
379  * The maximum number of decimal digits for an id_t.  (ceil(log10 (max_id)))
380  */
381 #define	ID_MAX_STR_LEN	10
382 
383 #define	NULLPROC	((struct PROC_TABLE *)(0))
384 #define	NO_ROOM		((struct PROC_TABLE *)(FAILURE))
385 
386 struct CMD_LINE {
387 	char c_id[INITTAB_ENTRY_ID_SIZE];	/* Four letter unique id of */
388 						/* process to be affected by */
389 						/* action */
390 	short c_levels;	/* Mask of legal levels for process */
391 	short c_action;	/* Mask for type of action required */
392 	char *c_command; /* Pointer to init command */
393 };
394 
395 struct	pidrec {
396 	int	pd_type;	/* Command type */
397 	pid_t	pd_pid;		/* pid to add or remove */
398 };
399 
400 /*
401  * pd_type's
402  */
403 #define	ADDPID	1
404 #define	REMPID	2
405 
406 static struct	pidlist {
407 	pid_t	pl_pid;		/* pid to watch for */
408 	int	pl_dflag;	/* Flag indicating SIGCLD from this pid */
409 	short	pl_exit;	/* Exit status of proc */
410 	struct	pidlist	*pl_next; /* Next in list */
411 } *Plhead, *Plfree;
412 
413 /*
414  * The following structure contains a set of modes for /dev/syscon
415  * and should match the default contents of /etc/ioctl.syscon.
416  */
417 static struct termios	dflt_termios = {
418 	.c_iflag = BRKINT|ICRNL|IXON|IMAXBEL,
419 	.c_oflag = OPOST|ONLCR|TAB3,
420 	.c_cflag = CS8|CREAD|B9600,
421 	.c_lflag = ISIG|ICANON|ECHO|ECHOE|ECHOK|ECHOCTL|ECHOKE|IEXTEN,
422 	.c_cc = { CINTR, CQUIT, CERASE, CKILL, CEOF, 0, 0, 0,
423 	    CSTART, CSTOP, CSWTCH, CDSUSP, CRPRNT, CFLUSH, CWERASE, CLNEXT,
424 	    CSTATUS, CERASE2, 0
425 	}
426 };
427 
428 static struct termios	stored_syscon_termios;
429 static int		write_ioctl = 0;	/* Rewrite /etc/ioctl.syscon */
430 
431 static union WAKEUP {
432 	struct WAKEFLAGS {
433 		unsigned w_usersignal : 1;	/* User sent signal to "init" */
434 		unsigned w_childdeath : 1;	/* An "init" child died */
435 		unsigned w_powerhit : 1;	/* OS experienced powerfail */
436 	}	w_flags;
437 	int w_mask;
438 } wakeup;
439 
440 
441 struct init_state {
442 	int			ist_runlevel;
443 	int			ist_num_proc;
444 	int			ist_utmpx_ok;
445 	struct PROC_TABLE	ist_proc_table[1];
446 };
447 
448 #define	cur_state	(g_state->ist_runlevel)
449 #define	num_proc	(g_state->ist_num_proc)
450 #define	proc_table	(g_state->ist_proc_table)
451 #define	utmpx_ok	(g_state->ist_utmpx_ok)
452 
453 /* Contract cookies. */
454 #define	ORDINARY_COOKIE		0
455 #define	STARTD_COOKIE		1
456 
457 
458 #ifndef NDEBUG
459 #define	bad_error(func, err)	{					\
460 	(void) fprintf(stderr, "%s:%d: %s() failed with unexpected "	\
461 	    "error %d.  Aborting.\n", __FILE__, __LINE__, (func), (err)); \
462 	abort();							\
463 }
464 #else
465 #define	bad_error(func, err)	abort()
466 #endif
467 
468 
469 /*
470  * Useful file and device names.
471  */
472 static char *CONSOLE	  = "/dev/console";	/* Real system console */
473 static char *INITPIPE_DIR = "/var/run";
474 static char *INITPIPE	  = "/var/run/initpipe";
475 
476 #define	INIT_STATE_DIR "/etc/svc/volatile"
477 static const char * const init_state_file = INIT_STATE_DIR "/init.state";
478 static const char * const init_next_state_file =
479 	INIT_STATE_DIR "/init-next.state";
480 
481 static const int init_num_proc = 20;	/* Initial size of process table. */
482 
483 static char *UTMPX	 = UTMPX_FILE;		/* Snapshot record file */
484 static char *WTMPX	 = WTMPX_FILE;		/* Long term record file */
485 static char *INITTAB	 = "/etc/inittab";	/* Script file for "init" */
486 static char *SYSTTY	 = "/dev/systty";	/* System Console */
487 static char *SYSCON	 = "/dev/syscon";	/* Virtual System console */
488 static char *IOCTLSYSCON = "/etc/ioctl.syscon";	/* Last syscon modes */
489 static char *ENVFILE	 = "/etc/default/init";	/* Default env. */
490 static char *SU	= "/etc/sulogin";	/* Super-user program for single user */
491 static char *SH	= "/sbin/sh";		/* Standard shell */
492 
493 /*
494  * Default Path.  /sbin is included in path only during sysinit phase
495  */
496 #define	DEF_PATH	"PATH=/usr/sbin:/usr/bin"
497 #define	INIT_PATH	"PATH=/sbin:/usr/sbin:/usr/bin"
498 
499 static int	prior_state;
500 static int	prev_state;	/* State "init" was in last time it woke */
501 static int	new_state;	/* State user wants "init" to go to. */
502 static int	lvlq_received;	/* Explicit request to examine state */
503 static int	op_modes = BOOT_MODES; /* Current state of "init" */
504 static int	Gchild = 0;	/* Flag to indicate "godchild" died, set in */
505 				/*   childeath() and cleared in cleanaux() */
506 static int	Pfd = -1;	/* fd to receive pids thru */
507 static unsigned int	spawncnt, pausecnt;
508 static int	rsflag;		/* Set if a respawn has taken place */
509 static volatile int time_up;	/* Flag set to TRUE by the alarm interrupt */
510 				/* routine each time an alarm interrupt */
511 				/* takes place. */
512 static int	sflg = 0;	/* Set if we were booted -s to single user */
513 static int	rflg = 0;	/* Set if booted -r, reconfigure devices */
514 static int	bflg = 0;	/* Set if booted -b, don't run rc scripts */
515 static pid_t	init_pid;	/* PID of "one true" init for current zone */
516 
517 static struct init_state *g_state = NULL;
518 static size_t	g_state_sz;
519 static int	booting = 1;	/* Set while we're booting. */
520 
521 /*
522  * Array for default global environment.
523  */
524 #define	MAXENVENT	24	/* Max number of default env variables + 1 */
525 				/* init can use three itself, so this leaves */
526 				/* 20 for the administrator in ENVFILE. */
527 static char	*glob_envp[MAXENVENT];	/* Array of environment strings */
528 static int	glob_envn;		/* Number of environment strings */
529 
530 
531 static struct pollfd	poll_fds[1];
532 static int		poll_nfds = 0;	/* poll_fds is uninitialized */
533 
534 /*
535  * Contracts constants
536  */
537 #define	SVC_INIT_PREFIX "init:/"
538 #define	SVC_AUX_SIZE (INITTAB_ENTRY_ID_SIZE + 1)
539 #define	SVC_FMRI_SIZE (sizeof (SVC_INIT_PREFIX) + INITTAB_ENTRY_ID_SIZE)
540 
541 static int	legacy_tmpl = -1;	/* fd for legacy contract template */
542 static int	startd_tmpl = -1;	/* fd for svc.startd's template */
543 static char	startd_svc_aux[SVC_AUX_SIZE];
544 
545 static char	startd_cline[256] = "";	/* svc.startd's command line */
546 static int	do_restart_startd = 1;	/* Whether to restart svc.startd. */
547 static char	*smf_options = NULL;	/* Options to give to startd. */
548 static int	smf_debug = 0;		/* Messages for debugging smf(5) */
549 static time_t	init_boot_time;		/* Substitute for kernel boot time. */
550 
551 #define	NSTARTD_FAILURE_TIMES	3		/* trigger after 3 failures */
552 #define	STARTD_FAILURE_RATE_NS	5000000000LL	/* 1 failure/5 seconds */
553 
554 static hrtime_t	startd_failure_time[NSTARTD_FAILURE_TIMES];
555 static uint_t	startd_failure_index;
556 
557 
558 static char	*prog_name(char *);
559 static int	state_to_mask(int);
560 static int	lvlname_to_mask(char, int *);
561 static void	lscf_set_runlevel(char);
562 static int	state_to_flags(int);
563 static char	state_to_name(int);
564 static int	lvlname_to_state(char);
565 static int	getcmd(struct CMD_LINE *, char *);
566 static int	realcon();
567 static int	spawn_processes();
568 static int	get_ioctl_syscon();
569 static int	account(short, struct PROC_TABLE *, char *);
570 static void	alarmclk();
571 static void	childeath(int);
572 static void	cleanaux();
573 static void	clearent(pid_t, short);
574 static void	console(boolean_t, char *, ...);
575 static void	init_signals(void);
576 static void	setup_pipe();
577 static void	killproc(pid_t);
578 static void	init_env();
579 static void	boot_init();
580 static void	powerfail();
581 static void	remv();
582 static void	write_ioctl_syscon();
583 static void	spawn(struct PROC_TABLE *, struct CMD_LINE *);
584 static void	setimer(int);
585 static void	siglvl(int, siginfo_t *, ucontext_t *);
586 static void	sigpoll(int);
587 static void	enter_maintenance(void);
588 static void	timer(int);
589 static void	userinit(int, char **);
590 static void	notify_pam_dead(struct utmpx *);
591 static long	waitproc(struct PROC_TABLE *);
592 static struct PROC_TABLE *efork(int, struct PROC_TABLE *, int);
593 static struct PROC_TABLE *findpslot(struct CMD_LINE *);
594 static void	increase_proc_table_size();
595 static void	st_init();
596 static void	st_write();
597 static void	contracts_init();
598 static void	contract_event(struct pollfd *);
599 static int	startd_run(const char *, int, ctid_t);
600 static void	startd_record_failure();
601 static int	startd_failure_rate_critical();
602 static char	*audit_boot_msg();
603 static int	audit_put_record(int, int, char *);
604 static void	update_boot_archive(int new_state);
605 static void	init_bootbanner_print(const char *, uint_t);
606 
607 int
608 main(int argc, char *argv[])
609 {
610 	int	chg_lvl_flag = FALSE, print_banner = FALSE;
611 	int	may_need_audit = 1;
612 	int	c;
613 	char	*msg;
614 
615 	/* Get a timestamp for use as boot time, if needed. */
616 	(void) time(&init_boot_time);
617 
618 	/* Get the default umask */
619 	cmask = umask(022);
620 	(void) umask(cmask);
621 
622 	/* Parse the arguments to init. Check for single user */
623 	opterr = 0;
624 	while ((c = getopt(argc, argv, "brsm:")) != EOF) {
625 		switch (c) {
626 		case 'b':
627 			rflg = 0;
628 			bflg = 1;
629 			if (!sflg)
630 				sflg++;
631 			break;
632 		case 'r':
633 			bflg = 0;
634 			rflg++;
635 			break;
636 		case 's':
637 			if (!bflg)
638 				sflg++;
639 			break;
640 		case 'm':
641 			smf_options = optarg;
642 			smf_debug = (strstr(smf_options, "debug") != NULL);
643 			break;
644 		}
645 	}
646 
647 	/*
648 	 * Determine if we are the main init, or a user invoked init, whose job
649 	 * it is to inform init to change levels or perform some other action.
650 	 */
651 	if (zone_getattr(getzoneid(), ZONE_ATTR_INITPID, &init_pid,
652 	    sizeof (init_pid)) != sizeof (init_pid)) {
653 		(void) fprintf(stderr, "could not get pid for init\n");
654 		return (1);
655 	}
656 
657 	/*
658 	 * If this PID is not the same as the "true" init for the zone, then we
659 	 * must be in 'user' mode.
660 	 */
661 	if (getpid() != init_pid) {
662 		userinit(argc, argv);
663 	}
664 
665 	if (getzoneid() != GLOBAL_ZONEID) {
666 		print_banner = TRUE;
667 	}
668 
669 	/*
670 	 * Initialize state (and set "booting").
671 	 */
672 	st_init();
673 
674 	if (booting && print_banner) {
675 		/*
676 		 * We want to print the boot banner as soon as
677 		 * possible.  In the global zone, the kernel does it,
678 		 * but we do not have that luxury in non-global zones,
679 		 * so we will print it here.
680 		 */
681 #ifdef	LEGACY_BANNER
682 		struct utsname un;
683 		char buf[BUFSIZ];
684 		const char *bits;
685 		int r;
686 
687 		(void) uname(&un);
688 		if ((r = sysinfo(SI_ADDRESS_WIDTH, buf, sizeof (buf))) > 0 &&
689 		    r < sizeof (buf)) {
690 			bits = buf;
691 		} else {
692 			bits = "64";
693 		}
694 
695 		console(B_FALSE,
696 		    "\n\n%s Release %s Version %s %s-bit\r\n",
697 		    un.sysname, un.release, un.version, bits);
698 		console(B_FALSE,
699 		    "Copyright (c) 1983, 2010, Oracle and/or its affiliates."
700 		    " All rights reserved.\r\n");
701 #else
702 		bootbanner_print(init_bootbanner_print, 0);
703 #endif
704 	}
705 
706 	/*
707 	 * Get the ioctl settings for /dev/syscon from /etc/ioctl.syscon
708 	 * so that it can be brought up in the state it was in when the
709 	 * system went down; or set to defaults if ioctl.syscon isn't
710 	 * valid.
711 	 *
712 	 * This needs to be done even if we're restarting so reset_modes()
713 	 * will work in case we need to go down to single user mode.
714 	 */
715 	write_ioctl = get_ioctl_syscon();
716 
717 	/*
718 	 * Set up all signals to be caught or ignored as appropriate.
719 	 */
720 	init_signals();
721 
722 	/* Load glob_envp from ENVFILE. */
723 	init_env();
724 
725 	contracts_init();
726 
727 	if (!booting) {
728 		/* cur_state should have been read in. */
729 
730 		op_modes = NORMAL_MODES;
731 
732 		/* Rewrite the ioctl file if it was bad. */
733 		if (write_ioctl)
734 			write_ioctl_syscon();
735 	} else {
736 		/*
737 		 * It's fine to boot up with state as zero, because
738 		 * startd will later tell us the real state.
739 		 */
740 		cur_state = 0;
741 		op_modes = BOOT_MODES;
742 
743 		boot_init();
744 	}
745 
746 	prev_state = prior_state = cur_state;
747 
748 	setup_pipe();
749 
750 	/*
751 	 * Here is the beginning of the main process loop.
752 	 */
753 	for (;;) {
754 		if (lvlq_received) {
755 			setup_pipe();
756 			lvlq_received = B_FALSE;
757 		}
758 
759 		/*
760 		 * Clean up any accounting records for dead "godchildren".
761 		 */
762 		if (Gchild)
763 			cleanaux();
764 
765 		/*
766 		 * If in "normal" mode, check all living processes and initiate
767 		 * kill sequence on those that should not be there anymore.
768 		 */
769 		if (op_modes == NORMAL_MODES && cur_state != LVLa &&
770 		    cur_state != LVLb && cur_state != LVLc)
771 			remv();
772 
773 		/*
774 		 * If a change in run levels is the reason we awoke, now do
775 		 * the accounting to report the change in the utmp file.
776 		 * Also report the change on the system console.
777 		 */
778 		if (chg_lvl_flag) {
779 			chg_lvl_flag = FALSE;
780 
781 			if (state_to_flags(cur_state) & LSEL_RUNLEVEL) {
782 				char rl = state_to_name(cur_state);
783 
784 				if (rl != -1)
785 					lscf_set_runlevel(rl);
786 			}
787 
788 			may_need_audit = 1;
789 		}
790 
791 		/*
792 		 * Scan the inittab file and spawn and respawn processes that
793 		 * should be alive in the current state. If inittab does not
794 		 * exist default to  single user mode.
795 		 */
796 		if (spawn_processes() == FAILURE) {
797 			prior_state = prev_state;
798 			cur_state = SINGLE_USER;
799 		}
800 
801 		/* If any respawns occurred, take note. */
802 		if (rsflag) {
803 			rsflag = 0;
804 			spawncnt++;
805 		}
806 
807 		/*
808 		 * If a powerfail signal was received during the last
809 		 * sequence, set mode to powerfail.  When spawn_processes() is
810 		 * entered the first thing it does is to check "powerhit".  If
811 		 * it is in PF_MODES then it clears "powerhit" and does
812 		 * a powerfail sequence.  If it is not in PF_MODES, then it
813 		 * puts itself in PF_MODES and then clears "powerhit".  Should
814 		 * "powerhit" get set again while spawn_processes() is working
815 		 * on a powerfail sequence, the following code  will see that
816 		 * spawn_processes() tries to execute the powerfail sequence
817 		 * again.  This guarantees that the powerfail sequence will be
818 		 * successfully completed before further processing takes
819 		 * place.
820 		 */
821 		if (wakeup.w_flags.w_powerhit) {
822 			op_modes = PF_MODES;
823 			/*
824 			 * Make sure that cur_state != prev_state so that
825 			 * ONCE and WAIT types work.
826 			 */
827 			prev_state = 0;
828 		} else if (op_modes != NORMAL_MODES) {
829 			/*
830 			 * If spawn_processes() was not just called while in
831 			 * normal mode, we set the mode to normal and it will
832 			 * be called again to check normal modes.  If we have
833 			 * just finished a powerfail sequence with prev_state
834 			 * equal to zero, we set prev_state equal to cur_state
835 			 * before the next pass through.
836 			 */
837 			if (op_modes == PF_MODES)
838 				prev_state = cur_state;
839 			op_modes = NORMAL_MODES;
840 		} else if (cur_state == LVLa || cur_state == LVLb ||
841 		    cur_state == LVLc) {
842 			/*
843 			 * If it was a change of levels that awakened us and the
844 			 * new level is one of the demand levels then reset
845 			 * cur_state to the previous state and do another scan
846 			 * to take care of the usual respawn actions.
847 			 */
848 			cur_state = prior_state;
849 			prior_state = prev_state;
850 			prev_state = cur_state;
851 		} else {
852 			prev_state = cur_state;
853 
854 			if (wakeup.w_mask == 0) {
855 				int ret;
856 
857 				if (may_need_audit && (cur_state == LVL3)) {
858 					msg = audit_boot_msg();
859 
860 					may_need_audit = 0;
861 					(void) audit_put_record(ADT_SUCCESS,
862 					    ADT_SUCCESS, msg);
863 					free(msg);
864 				}
865 
866 				/*
867 				 * "init" is finished with all actions for
868 				 * the current wakeup.
869 				 */
870 				ret = poll(poll_fds, poll_nfds,
871 				    SLEEPTIME * MILLISEC);
872 				pausecnt++;
873 				if (ret > 0)
874 					contract_event(&poll_fds[0]);
875 				else if (ret < 0 && errno != EINTR)
876 					console(B_TRUE, "poll() error: %s\n",
877 					    strerror(errno));
878 			}
879 
880 			if (wakeup.w_flags.w_usersignal) {
881 				/*
882 				 * Install the new level.  This could be a real
883 				 * change in levels  or a telinit [Q|a|b|c] or
884 				 * just a telinit to the same level at which
885 				 * we are running.
886 				 */
887 				if (new_state != cur_state) {
888 					if (new_state == LVLa ||
889 					    new_state == LVLb ||
890 					    new_state == LVLc) {
891 						prev_state = prior_state;
892 						prior_state = cur_state;
893 						cur_state = new_state;
894 					} else {
895 						prev_state = cur_state;
896 						if (cur_state >= 0)
897 							prior_state = cur_state;
898 						cur_state = new_state;
899 						chg_lvl_flag = TRUE;
900 					}
901 				}
902 
903 				new_state = 0;
904 			}
905 
906 			if (wakeup.w_flags.w_powerhit)
907 				op_modes = PF_MODES;
908 
909 			/*
910 			 * Clear all wakeup reasons.
911 			 */
912 			wakeup.w_mask = 0;
913 		}
914 	}
915 
916 	/*NOTREACHED*/
917 }
918 
919 static void
920 init_bootbanner_print(const char *line, uint_t num)
921 {
922 	const char *pfx = (num == 0) ? "\n\n" : "";
923 
924 	console(B_FALSE, "%s%s\r\n", pfx, line);
925 }
926 
927 static void
928 update_boot_archive(int new_state)
929 {
930 	if (new_state != LVL0 && new_state != LVL5 && new_state != LVL6)
931 		return;
932 
933 	if (getzoneid() != GLOBAL_ZONEID)
934 		return;
935 
936 	(void) system("/sbin/bootadm -ea update_all");
937 }
938 
939 /*
940  * void enter_maintenance()
941  *   A simple invocation of sulogin(1M), with no baggage, in the case that we
942  *   are unable to activate svc.startd(1M).  We fork; the child runs sulogin;
943  *   we wait for it to exit.
944  */
945 static void
946 enter_maintenance()
947 {
948 	struct PROC_TABLE	*su_process;
949 
950 	console(B_FALSE, "Requesting maintenance mode\n"
951 	    "(See /lib/svc/share/README for additional information.)\n");
952 	(void) sighold(SIGCLD);
953 	while ((su_process = efork(M_OFF, NULLPROC, NOCLEANUP)) == NO_ROOM)
954 		(void) pause();
955 	(void) sigrelse(SIGCLD);
956 	if (su_process == NULLPROC) {
957 		int fd;
958 
959 		(void) fclose(stdin);
960 		(void) fclose(stdout);
961 		(void) fclose(stderr);
962 		closefrom(0);
963 
964 		fd = open(SYSCON, O_RDWR | O_NOCTTY);
965 		if (fd >= 0) {
966 			(void) dup2(fd, 1);
967 			(void) dup2(fd, 2);
968 		} else {
969 			/*
970 			 * Need to issue an error message somewhere.
971 			 */
972 			syslog(LOG_CRIT, "init[%d]: cannot open %s; %s\n",
973 			    getpid(), SYSCON, strerror(errno));
974 		}
975 
976 		/*
977 		 * Execute the "su" program.
978 		 */
979 		(void) execle(SU, SU, "-", (char *)0, glob_envp);
980 		console(B_TRUE, "execle of %s failed: %s\n", SU,
981 		    strerror(errno));
982 		timer(5);
983 		exit(1);
984 	}
985 
986 	/*
987 	 * If we are the parent, wait around for the child to die
988 	 * or for "init" to be signaled to change levels.
989 	 */
990 	while (waitproc(su_process) == FAILURE) {
991 		/*
992 		 * All other reasons for waking are ignored when in
993 		 * single-user mode.  The only child we are interested
994 		 * in is being waited for explicitly by waitproc().
995 		 */
996 		wakeup.w_mask = 0;
997 	}
998 }
999 
1000 /*
1001  * remv() scans through "proc_table" and performs cleanup.  If
1002  * there is a process in the table, which shouldn't be here at
1003  * the current run level, then remv() kills the process.
1004  */
1005 static void
1006 remv()
1007 {
1008 	struct PROC_TABLE	*process;
1009 	struct CMD_LINE		cmd;
1010 	char			cmd_string[MAXCMDL];
1011 	int			change_level;
1012 
1013 	change_level = (cur_state != prev_state ? TRUE : FALSE);
1014 
1015 	/*
1016 	 * Clear the TOUCHED flag on all entries so that when we have
1017 	 * finished scanning inittab, we will be able to tell if we
1018 	 * have any processes for which there is no entry in inittab.
1019 	 */
1020 	for (process = proc_table;
1021 	    (process < proc_table + num_proc); process++) {
1022 		process->p_flags &= ~TOUCHED;
1023 	}
1024 
1025 	/*
1026 	 * Scan all inittab entries.
1027 	 */
1028 	while (getcmd(&cmd, &cmd_string[0]) == TRUE) {
1029 		/* Scan for process which goes with this entry in inittab. */
1030 		for (process = proc_table;
1031 		    (process < proc_table + num_proc); process++) {
1032 			if ((process->p_flags & OCCUPIED) == 0 ||
1033 			    !id_eq(process->p_id, cmd.c_id))
1034 				continue;
1035 
1036 			/*
1037 			 * This slot contains the process we are looking for.
1038 			 */
1039 
1040 			/*
1041 			 * Is the cur_state SINGLE_USER or is this process
1042 			 * marked as "off" or was this proc started by some
1043 			 * mechanism other than LVL{a|b|c} and the current level
1044 			 * does not support this process?
1045 			 */
1046 			if (cur_state == SINGLE_USER ||
1047 			    cmd.c_action == M_OFF ||
1048 			    ((cmd.c_levels & state_to_mask(cur_state)) == 0 &&
1049 			    (process->p_flags & DEMANDREQUEST) == 0)) {
1050 				if (process->p_flags & LIVING) {
1051 					/*
1052 					 * Touch this entry so we know we have
1053 					 * treated it.  Note that procs which
1054 					 * are already dead at this point and
1055 					 * should not be restarted are left
1056 					 * untouched.  This causes their slot to
1057 					 * be freed later after dead accounting
1058 					 * is done.
1059 					 */
1060 					process->p_flags |= TOUCHED;
1061 
1062 					if ((process->p_flags & KILLED) == 0) {
1063 						if (change_level) {
1064 							process->p_flags
1065 							    |= WARNED;
1066 							(void) kill(
1067 							    process->p_pid,
1068 							    SIGTERM);
1069 						} else {
1070 							/*
1071 							 * Fork a killing proc
1072 							 * so "init" can
1073 							 * continue without
1074 							 * having to pause for
1075 							 * TWARN seconds.
1076 							 */
1077 							killproc(
1078 							    process->p_pid);
1079 						}
1080 						process->p_flags |= KILLED;
1081 					}
1082 				}
1083 			} else {
1084 				/*
1085 				 * Process can exist at current level.  If it is
1086 				 * still alive or a DEMANDREQUEST we touch it so
1087 				 * it will be left alone.  Otherwise we leave it
1088 				 * untouched so it will be accounted for and
1089 				 * cleaned up later in remv().  Dead
1090 				 * DEMANDREQUESTs will be accounted but not
1091 				 * freed.
1092 				 */
1093 				if (process->p_flags &
1094 				    (LIVING|NOCLEANUP|DEMANDREQUEST))
1095 					process->p_flags |= TOUCHED;
1096 			}
1097 
1098 			break;
1099 		}
1100 	}
1101 
1102 	st_write();
1103 
1104 	/*
1105 	 * If this was a change of levels call, scan through the
1106 	 * process table for processes that were warned to die.  If any
1107 	 * are found that haven't left yet, sleep for TWARN seconds and
1108 	 * then send final terminations to any that haven't died yet.
1109 	 */
1110 	if (change_level) {
1111 
1112 		/*
1113 		 * Set the alarm for TWARN seconds on the assumption
1114 		 * that there will be some that need to be waited for.
1115 		 * This won't harm anything except we are guaranteed to
1116 		 * wakeup in TWARN seconds whether we need to or not.
1117 		 */
1118 		setimer(TWARN);
1119 
1120 		/*
1121 		 * Scan for processes which should be dying.  We hope they
1122 		 * will die without having to be sent a SIGKILL signal.
1123 		 */
1124 		for (process = proc_table;
1125 		    (process < proc_table + num_proc); process++) {
1126 			/*
1127 			 * If this process should die, hasn't yet, and the
1128 			 * TWARN time hasn't expired yet, wait for process
1129 			 * to die or for timer to expire.
1130 			 */
1131 			while (time_up == FALSE &&
1132 			    (process->p_flags & (WARNED|LIVING|OCCUPIED)) ==
1133 			    (WARNED|LIVING|OCCUPIED))
1134 				(void) pause();
1135 
1136 			if (time_up == TRUE)
1137 				break;
1138 		}
1139 
1140 		/*
1141 		 * If we reached the end of the table without the timer
1142 		 * expiring, then there are no procs which will have to be
1143 		 * sent the SIGKILL signal.  If the timer has expired, then
1144 		 * it is necessary to scan the table again and send signals
1145 		 * to all processes which aren't going away nicely.
1146 		 */
1147 		if (time_up == TRUE) {
1148 			for (process = proc_table;
1149 			    (process < proc_table + num_proc); process++) {
1150 				if ((process->p_flags &
1151 				    (WARNED|LIVING|OCCUPIED)) ==
1152 				    (WARNED|LIVING|OCCUPIED))
1153 					(void) kill(process->p_pid, SIGKILL);
1154 			}
1155 		}
1156 		setimer(0);
1157 	}
1158 
1159 	/*
1160 	 * Rescan the proc_table for two kinds of entry, those marked LIVING,
1161 	 * NAMED, which don't have an entry in inittab (haven't been TOUCHED
1162 	 * by the above scanning), and haven't been sent kill signals, and
1163 	 * those entries marked not LIVING, NAMED.  The former procs are killed.
1164 	 * The latter have DEAD_PROCESS accounting done and the slot cleared.
1165 	 */
1166 	for (process = proc_table;
1167 	    (process < proc_table + num_proc); process++) {
1168 		if ((process->p_flags & (LIVING|NAMED|TOUCHED|KILLED|OCCUPIED))
1169 		    == (LIVING|NAMED|OCCUPIED)) {
1170 			killproc(process->p_pid);
1171 			process->p_flags |= KILLED;
1172 		} else if ((process->p_flags & (LIVING|NAMED|OCCUPIED)) ==
1173 		    (NAMED|OCCUPIED)) {
1174 			(void) account(DEAD_PROCESS, process, NULL);
1175 			/*
1176 			 * If this named proc hasn't been TOUCHED, then free the
1177 			 * space. It has either died of it's own accord, but
1178 			 * isn't respawnable or it was killed because it
1179 			 * shouldn't exist at this level.
1180 			 */
1181 			if ((process->p_flags & TOUCHED) == 0)
1182 				process->p_flags = 0;
1183 		}
1184 	}
1185 
1186 	st_write();
1187 }
1188 
1189 /*
1190  * Extract the svc.startd command line and whether to restart it from its
1191  * inittab entry.
1192  */
1193 /*ARGSUSED*/
1194 static void
1195 process_startd_line(struct CMD_LINE *cmd, char *cmd_string)
1196 {
1197 	size_t sz;
1198 
1199 	/* Save the command line. */
1200 	if (sflg || rflg) {
1201 		/* Also append -r or -s. */
1202 		(void) strlcpy(startd_cline, cmd_string, sizeof (startd_cline));
1203 		(void) strlcat(startd_cline, " -", sizeof (startd_cline));
1204 		if (sflg)
1205 			sz = strlcat(startd_cline, "s", sizeof (startd_cline));
1206 		if (rflg)
1207 			sz = strlcat(startd_cline, "r", sizeof (startd_cline));
1208 	} else {
1209 		sz = strlcpy(startd_cline, cmd_string, sizeof (startd_cline));
1210 	}
1211 
1212 	if (sz >= sizeof (startd_cline)) {
1213 		console(B_TRUE,
1214 		    "svc.startd command line too long.  Ignoring.\n");
1215 		startd_cline[0] = '\0';
1216 		return;
1217 	}
1218 }
1219 
1220 /*
1221  * spawn_processes() scans inittab for entries which should be run at this
1222  * mode.  Processes which should be running but are not, are started.
1223  */
1224 static int
1225 spawn_processes()
1226 {
1227 	struct PROC_TABLE		*pp;
1228 	struct CMD_LINE			cmd;
1229 	char				cmd_string[MAXCMDL];
1230 	short				lvl_mask;
1231 	int				status;
1232 
1233 	/*
1234 	 * First check the "powerhit" flag.  If it is set, make sure the modes
1235 	 * are PF_MODES and clear the "powerhit" flag.  Avoid the possible race
1236 	 * on the "powerhit" flag by disallowing a new powerfail interrupt
1237 	 * between the test of the powerhit flag and the clearing of it.
1238 	 */
1239 	if (wakeup.w_flags.w_powerhit) {
1240 		wakeup.w_flags.w_powerhit = 0;
1241 		op_modes = PF_MODES;
1242 	}
1243 	lvl_mask = state_to_mask(cur_state);
1244 
1245 	/*
1246 	 * Scan through all the entries in inittab.
1247 	 */
1248 	while ((status = getcmd(&cmd, &cmd_string[0])) == TRUE) {
1249 		if (id_eq(cmd.c_id, "smf")) {
1250 			process_startd_line(&cmd, cmd_string);
1251 			continue;
1252 		}
1253 
1254 retry_for_proc_slot:
1255 
1256 		/*
1257 		 * Find out if there is a process slot for this entry already.
1258 		 */
1259 		if ((pp = findpslot(&cmd)) == NULLPROC) {
1260 			/*
1261 			 * we've run out of proc table entries
1262 			 * increase proc_table.
1263 			 */
1264 			increase_proc_table_size();
1265 
1266 			/*
1267 			 * Retry now as we have an empty proc slot.
1268 			 * In case increase_proc_table_size() fails,
1269 			 * we will keep retrying.
1270 			 */
1271 			goto retry_for_proc_slot;
1272 		}
1273 
1274 		/*
1275 		 * If there is an entry, and it is marked as DEMANDREQUEST,
1276 		 * one of the levels a, b, or c is in its levels mask, and
1277 		 * the action field is ONDEMAND and ONDEMAND is a permissable
1278 		 * mode, and the process is dead, then respawn it.
1279 		 */
1280 		if (((pp->p_flags & (LIVING|DEMANDREQUEST)) == DEMANDREQUEST) &&
1281 		    (cmd.c_levels & MASK_abc) &&
1282 		    (cmd.c_action & op_modes) == M_ONDEMAND) {
1283 			spawn(pp, &cmd);
1284 			continue;
1285 		}
1286 
1287 		/*
1288 		 * If the action is not an action we are interested in,
1289 		 * skip the entry.
1290 		 */
1291 		if ((cmd.c_action & op_modes) == 0 || pp->p_flags & LIVING ||
1292 		    (cmd.c_levels & lvl_mask) == 0)
1293 			continue;
1294 
1295 		/*
1296 		 * If the modes are the normal modes (ONCE, WAIT, RESPAWN, OFF,
1297 		 * ONDEMAND) and the action field is either OFF or the action
1298 		 * field is ONCE or WAIT and the current level is the same as
1299 		 * the last level, then skip this entry.  ONCE and WAIT only
1300 		 * get run when the level changes.
1301 		 */
1302 		if (op_modes == NORMAL_MODES &&
1303 		    (cmd.c_action == M_OFF ||
1304 		    (cmd.c_action & (M_ONCE|M_WAIT)) &&
1305 		    cur_state == prev_state))
1306 			continue;
1307 
1308 		/*
1309 		 * At this point we are interested in performing the action for
1310 		 * this entry.  Actions fall into two categories, spinning off
1311 		 * a process and not waiting, and spinning off a process and
1312 		 * waiting for it to die.  If the action is ONCE, RESPAWN,
1313 		 * ONDEMAND, POWERFAIL, or BOOT we don't wait for the process
1314 		 * to die, for all other actions we do wait.
1315 		 */
1316 		if (cmd.c_action & (M_ONCE | M_RESPAWN | M_PF | M_BOOT)) {
1317 			spawn(pp, &cmd);
1318 
1319 		} else {
1320 			spawn(pp, &cmd);
1321 			while (waitproc(pp) == FAILURE)
1322 				;
1323 			(void) account(DEAD_PROCESS, pp, NULL);
1324 			pp->p_flags = 0;
1325 		}
1326 	}
1327 	return (status);
1328 }
1329 
1330 /*
1331  * spawn() spawns a shell, inserts the information about the process
1332  * process into the proc_table, and does the startup accounting.
1333  */
1334 static void
1335 spawn(struct PROC_TABLE *process, struct CMD_LINE *cmd)
1336 {
1337 	int		i;
1338 	int		modes, maxfiles;
1339 	time_t		now;
1340 	struct PROC_TABLE tmproc, *oprocess;
1341 
1342 	/*
1343 	 * The modes to be sent to efork() are 0 unless we are
1344 	 * spawning a LVLa, LVLb, or LVLc entry or we will be
1345 	 * waiting for the death of the child before continuing.
1346 	 */
1347 	modes = NAMED;
1348 	if (process->p_flags & DEMANDREQUEST || cur_state == LVLa ||
1349 	    cur_state == LVLb || cur_state == LVLc)
1350 		modes |= DEMANDREQUEST;
1351 	if ((cmd->c_action & (M_SYSINIT | M_WAIT | M_BOOTWAIT | M_PWAIT)) != 0)
1352 		modes |= NOCLEANUP;
1353 
1354 	/*
1355 	 * If this is a respawnable process, check the threshold
1356 	 * information to avoid excessive respawns.
1357 	 */
1358 	if (cmd->c_action & M_RESPAWN) {
1359 		/*
1360 		 * Add NOCLEANUP to all respawnable commands so that the
1361 		 * information about the frequency of respawns isn't lost.
1362 		 */
1363 		modes |= NOCLEANUP;
1364 		(void) time(&now);
1365 
1366 		/*
1367 		 * If no time is assigned, then this is the first time
1368 		 * this command is being processed in this series.  Assign
1369 		 * the current time.
1370 		 */
1371 		if (process->p_time == 0L)
1372 			process->p_time = now;
1373 
1374 		if (process->p_count++ == SPAWN_LIMIT) {
1375 
1376 			if ((now - process->p_time) < SPAWN_INTERVAL) {
1377 				/*
1378 				 * Process is respawning too rapidly.  Print
1379 				 * message and refuse to respawn it for now.
1380 				 */
1381 				console(B_TRUE, "Command is respawning too "
1382 				    "rapidly. Check for possible errors.\n"
1383 				    "id:%4s \"%s\"\n",
1384 				    &cmd->c_id[0], &cmd->c_command[EXEC]);
1385 				return;
1386 			}
1387 			process->p_time = now;
1388 			process->p_count = 0;
1389 
1390 		} else if (process->p_count > SPAWN_LIMIT) {
1391 			/*
1392 			 * If process has been respawning too rapidly and
1393 			 * the inhibit time limit hasn't expired yet, we
1394 			 * refuse to respawn.
1395 			 */
1396 			if (now - process->p_time < SPAWN_INTERVAL + INHIBIT)
1397 				return;
1398 			process->p_time = now;
1399 			process->p_count = 0;
1400 		}
1401 		rsflag = TRUE;
1402 	}
1403 
1404 	/*
1405 	 * Spawn a child process to execute this command.
1406 	 */
1407 	(void) sighold(SIGCLD);
1408 	oprocess = process;
1409 	while ((process = efork(cmd->c_action, oprocess, modes)) == NO_ROOM)
1410 		(void) pause();
1411 
1412 	if (process == NULLPROC) {
1413 
1414 		/*
1415 		 * We are the child.  We must make sure we get a different
1416 		 * file pointer for our references to utmpx.  Otherwise our
1417 		 * seeks and reads will compete with those of the parent.
1418 		 */
1419 		endutxent();
1420 
1421 		/*
1422 		 * Perform the accounting for the beginning of a process.
1423 		 * Note that all processes are initially "INIT_PROCESS"es.
1424 		 */
1425 		tmproc.p_id[0] = cmd->c_id[0];
1426 		tmproc.p_id[1] = cmd->c_id[1];
1427 		tmproc.p_id[2] = cmd->c_id[2];
1428 		tmproc.p_id[3] = cmd->c_id[3];
1429 		tmproc.p_pid = getpid();
1430 		tmproc.p_exit = 0;
1431 		(void) account(INIT_PROCESS, &tmproc,
1432 		    prog_name(&cmd->c_command[EXEC]));
1433 		maxfiles = ulimit(UL_GDESLIM, 0);
1434 		for (i = 0; i < maxfiles; i++)
1435 			(void) fcntl(i, F_SETFD, FD_CLOEXEC);
1436 
1437 		/*
1438 		 * Now exec a shell with the -c option and the command
1439 		 * from inittab.
1440 		 */
1441 		(void) execle(SH, "INITSH", "-c", cmd->c_command, (char *)0,
1442 		    glob_envp);
1443 		console(B_TRUE, "Command\n\"%s\"\n failed to execute.  errno "
1444 		    "= %d (exec of shell failed)\n", cmd->c_command, errno);
1445 
1446 		/*
1447 		 * Don't come back so quickly that "init" doesn't have a
1448 		 * chance to finish putting this child in "proc_table".
1449 		 */
1450 		timer(20);
1451 		exit(1);
1452 
1453 	}
1454 
1455 	/*
1456 	 * We are the parent.  Insert the necessary
1457 	 * information in the proc_table.
1458 	 */
1459 	process->p_id[0] = cmd->c_id[0];
1460 	process->p_id[1] = cmd->c_id[1];
1461 	process->p_id[2] = cmd->c_id[2];
1462 	process->p_id[3] = cmd->c_id[3];
1463 
1464 	st_write();
1465 
1466 	(void) sigrelse(SIGCLD);
1467 }
1468 
1469 /*
1470  * findpslot() finds the old slot in the process table for the
1471  * command with the same id, or it finds an empty slot.
1472  */
1473 static struct PROC_TABLE *
1474 findpslot(struct CMD_LINE *cmd)
1475 {
1476 	struct PROC_TABLE	*process;
1477 	struct PROC_TABLE	*empty = NULLPROC;
1478 
1479 	for (process = proc_table;
1480 	    (process < proc_table + num_proc); process++) {
1481 		if (process->p_flags & OCCUPIED &&
1482 		    id_eq(process->p_id, cmd->c_id))
1483 			break;
1484 
1485 		/*
1486 		 * If the entry is totally empty and "empty" is still 0,
1487 		 * remember where this hole is and make sure the slot is
1488 		 * zeroed out.
1489 		 */
1490 		if (empty == NULLPROC && (process->p_flags & OCCUPIED) == 0) {
1491 			empty = process;
1492 			process->p_id[0] = '\0';
1493 			process->p_id[1] = '\0';
1494 			process->p_id[2] = '\0';
1495 			process->p_id[3] = '\0';
1496 			process->p_pid = 0;
1497 			process->p_time = 0L;
1498 			process->p_count = 0;
1499 			process->p_flags = 0;
1500 			process->p_exit = 0;
1501 		}
1502 	}
1503 
1504 	/*
1505 	 * If there is no entry for this slot, then there should be an
1506 	 * empty slot.  If there is no empty slot, then we've run out
1507 	 * of proc_table space.  If the latter is true, empty will be
1508 	 * NULL and the caller will have to complain.
1509 	 */
1510 	if (process == (proc_table + num_proc))
1511 		process = empty;
1512 
1513 	return (process);
1514 }
1515 
1516 /*
1517  * getcmd() parses lines from inittab.  Each time it finds a command line
1518  * it will return TRUE as well as fill the passed CMD_LINE structure and
1519  * the shell command string.  When the end of inittab is reached, FALSE
1520  * is returned inittab is automatically opened if it is not currently open
1521  * and is closed when the end of the file is reached.
1522  */
1523 static FILE *fp_inittab = NULL;
1524 
1525 static int
1526 getcmd(struct CMD_LINE *cmd, char *shcmd)
1527 {
1528 	char	*ptr;
1529 	int	c, lastc, state;
1530 	char 	*ptr1;
1531 	int	answer, i, proceed;
1532 	struct	stat	sbuf;
1533 	static char *actions[] = {
1534 		"off", "respawn", "ondemand", "once", "wait", "boot",
1535 		"bootwait", "powerfail", "powerwait", "initdefault",
1536 		"sysinit",
1537 	};
1538 	static short act_masks[] = {
1539 		M_OFF, M_RESPAWN, M_ONDEMAND, M_ONCE, M_WAIT, M_BOOT,
1540 		M_BOOTWAIT, M_PF, M_PWAIT, M_INITDEFAULT, M_SYSINIT,
1541 	};
1542 	/*
1543 	 * Only these actions will be allowed for entries which
1544 	 * are specified for single-user mode.
1545 	 */
1546 	short su_acts = M_INITDEFAULT | M_PF | M_PWAIT | M_WAIT;
1547 
1548 	if (fp_inittab == NULL) {
1549 		/*
1550 		 * Before attempting to open inittab we stat it to make
1551 		 * sure it currently exists and is not empty.  We try
1552 		 * several times because someone may have temporarily
1553 		 * unlinked or truncated the file.
1554 		 */
1555 		for (i = 0; i < 3; i++) {
1556 			if (stat(INITTAB, &sbuf) == -1) {
1557 				if (i == 2) {
1558 					console(B_TRUE,
1559 					    "Cannot stat %s, errno: %d\n",
1560 					    INITTAB, errno);
1561 					return (FAILURE);
1562 				} else {
1563 					timer(3);
1564 				}
1565 			} else if (sbuf.st_size < 10) {
1566 				if (i == 2) {
1567 					console(B_TRUE,
1568 					    "%s truncated or corrupted\n",
1569 					    INITTAB);
1570 					return (FAILURE);
1571 				} else {
1572 					timer(3);
1573 				}
1574 			} else {
1575 				break;
1576 			}
1577 		}
1578 
1579 		/*
1580 		 * If unable to open inittab, print error message and
1581 		 * return FAILURE to caller.
1582 		 */
1583 		if ((fp_inittab = fopen(INITTAB, "r")) == NULL) {
1584 			console(B_TRUE, "Cannot open %s errno: %d\n", INITTAB,
1585 			    errno);
1586 			return (FAILURE);
1587 		}
1588 	}
1589 
1590 	/*
1591 	 * Keep getting commands from inittab until you find a
1592 	 * good one or run out of file.
1593 	 */
1594 	for (answer = FALSE; answer == FALSE; ) {
1595 		/*
1596 		 * Zero out the cmd itself before trying next line.
1597 		 */
1598 		bzero(cmd, sizeof (struct CMD_LINE));
1599 
1600 		/*
1601 		 * Read in lines of inittab, parsing at colons, until a line is
1602 		 * read in which doesn't end with a backslash.  Do not start if
1603 		 * the first character read is an EOF.  Note that this means
1604 		 * that lines which don't end in a newline are still processed,
1605 		 * since the "for" will terminate normally once started,
1606 		 * regardless of whether line terminates with a newline or EOF.
1607 		 */
1608 		state = FAILURE;
1609 		if ((c = fgetc(fp_inittab)) == EOF) {
1610 			answer = FALSE;
1611 			(void) fclose(fp_inittab);
1612 			fp_inittab = NULL;
1613 			break;
1614 		}
1615 
1616 		for (proceed = TRUE, ptr = shcmd, state = ID, lastc = '\0';
1617 		    proceed && c != EOF;
1618 		    lastc = c, c = fgetc(fp_inittab)) {
1619 			/* If we're not in the FAILURE state and haven't */
1620 			/* yet reached the shell command field, process	 */
1621 			/* the line, otherwise just look for a real end	 */
1622 			/* of line.					 */
1623 			if (state != FAILURE && state != COMMAND) {
1624 			/*
1625 			 * Squeeze out spaces and tabs.
1626 			 */
1627 			if (c == ' ' || c == '\t')
1628 				continue;
1629 
1630 			/*
1631 			 * Ignore characters in a comment, except for the \n.
1632 			 */
1633 			if (state == COMMENT) {
1634 				if (c == '\n') {
1635 					lastc = ' ';
1636 					break;
1637 				} else {
1638 					continue;
1639 				}
1640 			}
1641 
1642 			/*
1643 			 * Detect comments (lines whose first non-whitespace
1644 			 * character is '#') by checking that we're at the
1645 			 * beginning of a line, have seen a '#', and haven't
1646 			 * yet accumulated any characters.
1647 			 */
1648 			if (state == ID && c == '#' && ptr == shcmd) {
1649 				state = COMMENT;
1650 				continue;
1651 			}
1652 
1653 			/*
1654 			 * If the character is a ':', then check the
1655 			 * previous field for correctness and advance
1656 			 * to the next field.
1657 			 */
1658 			if (c == ':') {
1659 				switch (state) {
1660 
1661 				case ID :
1662 				/*
1663 				 * Check to see that there are only
1664 				 * 1 to 4 characters for the id.
1665 				 */
1666 				if ((i = ptr - shcmd) < 1 || i > 4) {
1667 					state = FAILURE;
1668 				} else {
1669 					bcopy(shcmd, &cmd->c_id[0], i);
1670 					ptr = shcmd;
1671 					state = LEVELS;
1672 				}
1673 				break;
1674 
1675 				case LEVELS :
1676 				/*
1677 				 * Build a mask for all the levels for
1678 				 * which this command will be legal.
1679 				 */
1680 				for (cmd->c_levels = 0, ptr1 = shcmd;
1681 				    ptr1 < ptr; ptr1++) {
1682 					int mask;
1683 					if (lvlname_to_mask(*ptr1,
1684 					    &mask) == -1) {
1685 						state = FAILURE;
1686 						break;
1687 					}
1688 					cmd->c_levels |= mask;
1689 				}
1690 				if (state != FAILURE) {
1691 					state = ACTION;
1692 					ptr = shcmd;	/* Reset the buffer */
1693 				}
1694 				break;
1695 
1696 				case ACTION :
1697 				/*
1698 				 * Null terminate the string in shcmd buffer and
1699 				 * then try to match against legal actions.  If
1700 				 * the field is of length 0, then the default of
1701 				 * "RESPAWN" is used if the id is numeric,
1702 				 * otherwise the default is "OFF".
1703 				 */
1704 				if (ptr == shcmd) {
1705 					if (isdigit(cmd->c_id[0]) &&
1706 					    (cmd->c_id[1] == '\0' ||
1707 					    isdigit(cmd->c_id[1])) &&
1708 					    (cmd->c_id[2] == '\0' ||
1709 					    isdigit(cmd->c_id[2])) &&
1710 					    (cmd->c_id[3] == '\0' ||
1711 					    isdigit(cmd->c_id[3])))
1712 						cmd->c_action = M_RESPAWN;
1713 					else
1714 						cmd->c_action = M_OFF;
1715 				} else {
1716 					for (cmd->c_action = 0, i = 0,
1717 					    *ptr = '\0';
1718 					    i <
1719 					    sizeof (actions)/sizeof (char *);
1720 					    i++) {
1721 					if (strcmp(shcmd, actions[i]) == 0) {
1722 						if ((cmd->c_levels & MASKSU) &&
1723 						    !(act_masks[i] & su_acts))
1724 							cmd->c_action = 0;
1725 						else
1726 							cmd->c_action =
1727 							    act_masks[i];
1728 						break;
1729 					}
1730 					}
1731 				}
1732 
1733 				/*
1734 				 * If the action didn't match any legal action,
1735 				 * set state to FAILURE.
1736 				 */
1737 				if (cmd->c_action == 0) {
1738 					state = FAILURE;
1739 				} else {
1740 					state = COMMAND;
1741 					(void) strcpy(shcmd, "exec ");
1742 				}
1743 				ptr = shcmd + EXEC;
1744 				break;
1745 				}
1746 				continue;
1747 			}
1748 		}
1749 
1750 		/* If the character is a '\n', then this is the end of a */
1751 		/* line.  If the '\n' wasn't preceded by a backslash, */
1752 		/* it is also the end of an inittab command.  If it was */
1753 		/* preceded by a backslash then the next line is a */
1754 		/* continuation.  Note that the continuation '\n' falls */
1755 		/* through and is treated like other characters and is */
1756 		/* stored in the shell command line. */
1757 		if (c == '\n' && lastc != '\\') {
1758 			proceed = FALSE;
1759 			*ptr = '\0';
1760 			break;
1761 		}
1762 
1763 		/* For all other characters just stuff them into the */
1764 		/* command as long as there aren't too many of them. */
1765 		/* Make sure there is room for a terminating '\0' also. */
1766 		if (ptr >= shcmd + MAXCMDL - 1)
1767 			state = FAILURE;
1768 		else
1769 			*ptr++ = (char)c;
1770 
1771 		/* If the character we just stored was a quoted	*/
1772 		/* backslash, then change "c" to '\0', so that this	*/
1773 		/* backslash will not cause a subsequent '\n' to appear */
1774 		/* quoted.  In otherwords '\' '\' '\n' is the real end */
1775 		/* of a command, while '\' '\n' is a continuation. */
1776 		if (c == '\\' && lastc == '\\')
1777 			c = '\0';
1778 		}
1779 
1780 		/*
1781 		 * Make sure all the fields are properly specified
1782 		 * for a good command line.
1783 		 */
1784 		if (state == COMMAND) {
1785 			answer = TRUE;
1786 			cmd->c_command = shcmd;
1787 
1788 			/*
1789 			 * If no default level was supplied, insert
1790 			 * all numerical levels.
1791 			 */
1792 			if (cmd->c_levels == 0)
1793 				cmd->c_levels = MASK_NUMERIC;
1794 
1795 			/*
1796 			 * If no action has been supplied, declare this
1797 			 * entry to be OFF.
1798 			 */
1799 			if (cmd->c_action == 0)
1800 				cmd->c_action = M_OFF;
1801 
1802 			/*
1803 			 * If no shell command has been supplied, make sure
1804 			 * there is a null string in the command field.
1805 			 */
1806 			if (ptr == shcmd + EXEC)
1807 				*shcmd = '\0';
1808 		} else
1809 			answer = FALSE;
1810 
1811 		/*
1812 		 * If we have reached the end of inittab, then close it
1813 		 * and quit trying to find a good command line.
1814 		 */
1815 		if (c == EOF) {
1816 			(void) fclose(fp_inittab);
1817 			fp_inittab = NULL;
1818 			break;
1819 		}
1820 	}
1821 	return (answer);
1822 }
1823 
1824 /*
1825  * lvlname_to_state(): convert the character name of a state to its level
1826  * (its corresponding signal number).
1827  */
1828 static int
1829 lvlname_to_state(char name)
1830 {
1831 	int i;
1832 	for (i = 0; i < LVL_NELEMS; i++) {
1833 		if (lvls[i].lvl_name == name)
1834 			return (lvls[i].lvl_state);
1835 	}
1836 	return (-1);
1837 }
1838 
1839 /*
1840  * state_to_name(): convert the level to the character name.
1841  */
1842 static char
1843 state_to_name(int state)
1844 {
1845 	int i;
1846 	for (i = 0; i < LVL_NELEMS; i++) {
1847 		if (lvls[i].lvl_state == state)
1848 			return (lvls[i].lvl_name);
1849 	}
1850 	return (-1);
1851 }
1852 
1853 /*
1854  * state_to_mask(): return the mask corresponding to a signal number
1855  */
1856 static int
1857 state_to_mask(int state)
1858 {
1859 	int i;
1860 	for (i = 0; i < LVL_NELEMS; i++) {
1861 		if (lvls[i].lvl_state == state)
1862 			return (lvls[i].lvl_mask);
1863 	}
1864 	return (0);	/* return 0, since that represents an empty mask */
1865 }
1866 
1867 /*
1868  * lvlname_to_mask(): return the mask corresponding to a levels character name
1869  */
1870 static int
1871 lvlname_to_mask(char name, int *mask)
1872 {
1873 	int i;
1874 	for (i = 0; i < LVL_NELEMS; i++) {
1875 		if (lvls[i].lvl_name == name) {
1876 			*mask = lvls[i].lvl_mask;
1877 			return (0);
1878 		}
1879 	}
1880 	return (-1);
1881 }
1882 
1883 /*
1884  * state_to_flags(): return the flags corresponding to a runlevel.  These
1885  * indicate properties of that runlevel.
1886  */
1887 static int
1888 state_to_flags(int state)
1889 {
1890 	int i;
1891 	for (i = 0; i < LVL_NELEMS; i++) {
1892 		if (lvls[i].lvl_state == state)
1893 			return (lvls[i].lvl_flags);
1894 	}
1895 	return (0);
1896 }
1897 
1898 /*
1899  * killproc() creates a child which kills the process specified by pid.
1900  */
1901 void
1902 killproc(pid_t pid)
1903 {
1904 	struct PROC_TABLE	*process;
1905 
1906 	(void) sighold(SIGCLD);
1907 	while ((process = efork(M_OFF, NULLPROC, 0)) == NO_ROOM)
1908 		(void) pause();
1909 	(void) sigrelse(SIGCLD);
1910 
1911 	if (process == NULLPROC) {
1912 		/*
1913 		 * efork() sets all signal handlers to the default, so reset
1914 		 * the ALRM handler to make timer() work as expected.
1915 		 */
1916 		(void) sigset(SIGALRM, alarmclk);
1917 
1918 		/*
1919 		 * We are the child.  Try to terminate the process nicely
1920 		 * first using SIGTERM and if it refuses to die in TWARN
1921 		 * seconds kill it with SIGKILL.
1922 		 */
1923 		(void) kill(pid, SIGTERM);
1924 		(void) timer(TWARN);
1925 		(void) kill(pid, SIGKILL);
1926 		(void) exit(0);
1927 	}
1928 }
1929 
1930 /*
1931  * Set up the default environment for all procs to be forked from init.
1932  * Read the values from the /etc/default/init file, except for PATH.  If
1933  * there's not enough room in the environment array, the environment
1934  * lines that don't fit are silently discarded.
1935  */
1936 void
1937 init_env()
1938 {
1939 	char	line[MAXCMDL];
1940 	FILE	*fp;
1941 	int	inquotes, length, wslength;
1942 	char	*tokp, *cp1, *cp2;
1943 
1944 	glob_envp[0] = malloc((unsigned)(strlen(DEF_PATH)+2));
1945 	(void) strcpy(glob_envp[0], DEF_PATH);
1946 	glob_envn = 1;
1947 
1948 	if (rflg) {
1949 		glob_envp[1] =
1950 		    malloc((unsigned)(strlen("_DVFS_RECONFIG=YES")+2));
1951 		(void) strcpy(glob_envp[1], "_DVFS_RECONFIG=YES");
1952 		++glob_envn;
1953 	} else if (bflg == 1) {
1954 		glob_envp[1] =
1955 		    malloc((unsigned)(strlen("RB_NOBOOTRC=YES")+2));
1956 		(void) strcpy(glob_envp[1], "RB_NOBOOTRC=YES");
1957 		++glob_envn;
1958 	}
1959 
1960 	if ((fp = fopen(ENVFILE, "r")) == NULL) {
1961 		console(B_TRUE,
1962 		    "Cannot open %s. Environment not initialized.\n",
1963 		    ENVFILE);
1964 	} else {
1965 		while (fgets(line, MAXCMDL - 1, fp) != NULL &&
1966 		    glob_envn < MAXENVENT - 2) {
1967 			/*
1968 			 * Toss newline
1969 			 */
1970 			length = strlen(line);
1971 			if (line[length - 1] == '\n')
1972 				line[length - 1] = '\0';
1973 
1974 			/*
1975 			 * Ignore blank or comment lines.
1976 			 */
1977 			if (line[0] == '#' || line[0] == '\0' ||
1978 			    (wslength = strspn(line, " \t\n")) ==
1979 			    strlen(line) ||
1980 			    strchr(line, '#') == line + wslength)
1981 				continue;
1982 
1983 			/*
1984 			 * First make a pass through the line and change
1985 			 * any non-quoted semi-colons to blanks so they
1986 			 * will be treated as token separators below.
1987 			 */
1988 			inquotes = 0;
1989 			for (cp1 = line; *cp1 != '\0'; cp1++) {
1990 				if (*cp1 == '"') {
1991 					if (inquotes == 0)
1992 						inquotes = 1;
1993 					else
1994 						inquotes = 0;
1995 				} else if (*cp1 == ';') {
1996 					if (inquotes == 0)
1997 						*cp1 = ' ';
1998 				}
1999 			}
2000 
2001 			/*
2002 			 * Tokens within the line are separated by blanks
2003 			 *  and tabs.  For each token in the line which
2004 			 * contains a '=' we strip out any quotes and then
2005 			 * stick the token in the environment array.
2006 			 */
2007 			if ((tokp = strtok(line, " \t")) == NULL)
2008 				continue;
2009 			do {
2010 				if (strchr(tokp, '=') == NULL)
2011 					continue;
2012 				length = strlen(tokp);
2013 				while ((cp1 = strpbrk(tokp, "\"\'")) != NULL) {
2014 					for (cp2 = cp1;
2015 					    cp2 < &tokp[length]; cp2++)
2016 						*cp2 = *(cp2 + 1);
2017 					length--;
2018 				}
2019 
2020 				if (strncmp(tokp, "CMASK=",
2021 				    sizeof ("CMASK=") - 1) == 0) {
2022 					long t;
2023 
2024 					/* We know there's an = */
2025 					t = strtol(strchr(tokp, '=') + 1, NULL,
2026 					    8);
2027 
2028 					/* Sanity */
2029 					if (t <= 077 && t >= 0)
2030 						cmask = (int)t;
2031 					(void) umask(cmask);
2032 					continue;
2033 				}
2034 				glob_envp[glob_envn] =
2035 				    malloc((unsigned)(length + 1));
2036 				(void) strcpy(glob_envp[glob_envn], tokp);
2037 				if (++glob_envn >= MAXENVENT - 1)
2038 					break;
2039 			} while ((tokp = strtok(NULL, " \t")) != NULL);
2040 		}
2041 
2042 		/*
2043 		 * Append a null pointer to the environment array
2044 		 * to mark its end.
2045 		 */
2046 		glob_envp[glob_envn] = NULL;
2047 		(void) fclose(fp);
2048 	}
2049 }
2050 
2051 /*
2052  * boot_init(): Do initialization things that should be done at boot.
2053  */
2054 void
2055 boot_init()
2056 {
2057 	int i;
2058 	struct PROC_TABLE *process, *oprocess;
2059 	struct CMD_LINE	cmd;
2060 	char	line[MAXCMDL];
2061 	char	svc_aux[SVC_AUX_SIZE];
2062 	char	init_svc_fmri[SVC_FMRI_SIZE];
2063 	char *old_path;
2064 	int maxfiles;
2065 
2066 	/* Use INIT_PATH for sysinit cmds */
2067 	old_path = glob_envp[0];
2068 	glob_envp[0] = malloc((unsigned)(strlen(INIT_PATH)+2));
2069 	(void) strcpy(glob_envp[0], INIT_PATH);
2070 
2071 	/*
2072 	 * Scan inittab(4) and process the special svc.startd entry, initdefault
2073 	 * and sysinit entries.
2074 	 */
2075 	while (getcmd(&cmd, &line[0]) == TRUE) {
2076 		if (startd_tmpl >= 0 && id_eq(cmd.c_id, "smf")) {
2077 			process_startd_line(&cmd, line);
2078 			(void) snprintf(startd_svc_aux, SVC_AUX_SIZE,
2079 			    INITTAB_ENTRY_ID_STR_FORMAT, cmd.c_id);
2080 		} else if (cmd.c_action == M_INITDEFAULT) {
2081 			/*
2082 			 * initdefault is no longer meaningful, as the SMF
2083 			 * milestone controls what (legacy) run level we
2084 			 * boot to.
2085 			 */
2086 			console(B_TRUE,
2087 			    "Ignoring legacy \"initdefault\" entry.\n");
2088 		} else if (cmd.c_action == M_SYSINIT) {
2089 			/*
2090 			 * Execute the "sysinit" entry and wait for it to
2091 			 * complete.  No bookkeeping is performed on these
2092 			 * entries because we avoid writing to the file system
2093 			 * until after there has been an chance to check it.
2094 			 */
2095 			if (process = findpslot(&cmd)) {
2096 				(void) sighold(SIGCLD);
2097 				(void) snprintf(svc_aux, SVC_AUX_SIZE,
2098 				    INITTAB_ENTRY_ID_STR_FORMAT, cmd.c_id);
2099 				(void) snprintf(init_svc_fmri, SVC_FMRI_SIZE,
2100 				    SVC_INIT_PREFIX INITTAB_ENTRY_ID_STR_FORMAT,
2101 				    cmd.c_id);
2102 				if (legacy_tmpl >= 0) {
2103 					(void) ct_pr_tmpl_set_svc_fmri(
2104 					    legacy_tmpl, init_svc_fmri);
2105 					(void) ct_pr_tmpl_set_svc_aux(
2106 					    legacy_tmpl, svc_aux);
2107 				}
2108 
2109 				for (oprocess = process;
2110 				    (process = efork(M_OFF, oprocess,
2111 				    (NAMED|NOCLEANUP))) == NO_ROOM;
2112 				    /* CSTYLED */)
2113 					;
2114 				(void) sigrelse(SIGCLD);
2115 
2116 				if (process == NULLPROC) {
2117 					maxfiles = ulimit(UL_GDESLIM, 0);
2118 
2119 					for (i = 0; i < maxfiles; i++)
2120 						(void) fcntl(i, F_SETFD,
2121 						    FD_CLOEXEC);
2122 					(void) execle(SH, "INITSH", "-c",
2123 					    cmd.c_command,
2124 					    (char *)0, glob_envp);
2125 					console(B_TRUE,
2126 "Command\n\"%s\"\n failed to execute.  errno = %d (exec of shell failed)\n",
2127 					    cmd.c_command, errno);
2128 					exit(1);
2129 				} else
2130 					while (waitproc(process) == FAILURE)
2131 						;
2132 				process->p_flags = 0;
2133 				st_write();
2134 			}
2135 		}
2136 	}
2137 
2138 	/* Restore the path. */
2139 	free(glob_envp[0]);
2140 	glob_envp[0] = old_path;
2141 
2142 	/*
2143 	 * This will enable st_write() to complain about init_state_file.
2144 	 */
2145 	booting = 0;
2146 
2147 	/*
2148 	 * If the /etc/ioctl.syscon didn't exist or had invalid contents write
2149 	 * out a correct version.
2150 	 */
2151 	if (write_ioctl)
2152 		write_ioctl_syscon();
2153 
2154 	/*
2155 	 * Start svc.startd(1M), which does most of the work.
2156 	 */
2157 	if (startd_cline[0] != '\0' && startd_tmpl >= 0) {
2158 		/* Start svc.startd. */
2159 		if (startd_run(startd_cline, startd_tmpl, 0) == -1)
2160 			cur_state = SINGLE_USER;
2161 	} else {
2162 		console(B_TRUE, "Absent svc.startd entry or bad "
2163 		    "contract template.  Not starting svc.startd.\n");
2164 		enter_maintenance();
2165 	}
2166 }
2167 
2168 /*
2169  * init_signals(): Initialize all signals to either be caught or ignored.
2170  */
2171 void
2172 init_signals(void)
2173 {
2174 	struct sigaction act;
2175 	int i;
2176 
2177 	/*
2178 	 * Start by ignoring all signals, then selectively re-enable some.
2179 	 * The SIG_IGN disposition will only affect asynchronous signals:
2180 	 * any signal that we trigger synchronously that doesn't end up
2181 	 * being handled by siglvl() will be forcibly delivered by the kernel.
2182 	 */
2183 	for (i = SIGHUP; i <= SIGRTMAX; i++)
2184 		(void) sigset(i, SIG_IGN);
2185 
2186 	/*
2187 	 * Handle all level-changing signals using siglvl() and set sa_mask so
2188 	 * that all level-changing signals are blocked while in siglvl().
2189 	 */
2190 	act.sa_handler = siglvl;
2191 	act.sa_flags = SA_SIGINFO;
2192 	(void) sigemptyset(&act.sa_mask);
2193 
2194 	(void) sigaddset(&act.sa_mask, LVLQ);
2195 	(void) sigaddset(&act.sa_mask, LVL0);
2196 	(void) sigaddset(&act.sa_mask, LVL1);
2197 	(void) sigaddset(&act.sa_mask, LVL2);
2198 	(void) sigaddset(&act.sa_mask, LVL3);
2199 	(void) sigaddset(&act.sa_mask, LVL4);
2200 	(void) sigaddset(&act.sa_mask, LVL5);
2201 	(void) sigaddset(&act.sa_mask, LVL6);
2202 	(void) sigaddset(&act.sa_mask, SINGLE_USER);
2203 	(void) sigaddset(&act.sa_mask, LVLa);
2204 	(void) sigaddset(&act.sa_mask, LVLb);
2205 	(void) sigaddset(&act.sa_mask, LVLc);
2206 
2207 	(void) sigaction(LVLQ, &act, NULL);
2208 	(void) sigaction(LVL0, &act, NULL);
2209 	(void) sigaction(LVL1, &act, NULL);
2210 	(void) sigaction(LVL2, &act, NULL);
2211 	(void) sigaction(LVL3, &act, NULL);
2212 	(void) sigaction(LVL4, &act, NULL);
2213 	(void) sigaction(LVL5, &act, NULL);
2214 	(void) sigaction(LVL6, &act, NULL);
2215 	(void) sigaction(SINGLE_USER, &act, NULL);
2216 	(void) sigaction(LVLa, &act, NULL);
2217 	(void) sigaction(LVLb, &act, NULL);
2218 	(void) sigaction(LVLc, &act, NULL);
2219 
2220 	(void) sigset(SIGALRM, alarmclk);
2221 	alarmclk();
2222 
2223 	(void) sigset(SIGCLD, childeath);
2224 	(void) sigset(SIGPWR, powerfail);
2225 }
2226 
2227 /*
2228  * Set up pipe for "godchildren". If the file exists and is a pipe just open
2229  * it. Else, if the file system is r/w create it.  Otherwise, defer its
2230  * creation and open until after /var/run has been mounted.  This function is
2231  * only called on startup and when explicitly requested via LVLQ.
2232  */
2233 void
2234 setup_pipe()
2235 {
2236 	struct stat stat_buf;
2237 	struct statvfs statvfs_buf;
2238 	struct sigaction act;
2239 
2240 	/*
2241 	 * Always close the previous pipe descriptor as the mounted filesystems
2242 	 * may have changed.
2243 	 */
2244 	if (Pfd >= 0)
2245 		(void) close(Pfd);
2246 
2247 	if ((stat(INITPIPE, &stat_buf) == 0) &&
2248 	    ((stat_buf.st_mode & (S_IFMT|S_IRUSR)) == (S_IFIFO|S_IRUSR)))
2249 		Pfd = open(INITPIPE, O_RDWR | O_NDELAY);
2250 	else
2251 		if ((statvfs(INITPIPE_DIR, &statvfs_buf) == 0) &&
2252 		    ((statvfs_buf.f_flag & ST_RDONLY) == 0)) {
2253 			(void) unlink(INITPIPE);
2254 			(void) mknod(INITPIPE, S_IFIFO | 0600, 0);
2255 			Pfd = open(INITPIPE, O_RDWR | O_NDELAY);
2256 		}
2257 
2258 	if (Pfd >= 0) {
2259 		(void) ioctl(Pfd, I_SETSIG, S_INPUT);
2260 		/*
2261 		 * Read pipe in message discard mode.
2262 		 */
2263 		(void) ioctl(Pfd, I_SRDOPT, RMSGD);
2264 
2265 		act.sa_handler = sigpoll;
2266 		act.sa_flags = 0;
2267 		(void) sigemptyset(&act.sa_mask);
2268 		(void) sigaddset(&act.sa_mask, SIGCLD);
2269 		(void) sigaction(SIGPOLL, &act, NULL);
2270 	}
2271 }
2272 
2273 /*
2274  * siglvl - handle an asynchronous signal from init(1M) telling us that we
2275  * should change the current run level.  We set new_state accordingly.
2276  */
2277 void
2278 siglvl(int sig, siginfo_t *sip, ucontext_t *ucp)
2279 {
2280 	struct PROC_TABLE *process;
2281 	struct sigaction act;
2282 
2283 	/*
2284 	 * If the signal was from the kernel (rather than init(1M)) then init
2285 	 * itself tripped the signal.  That is, we might have a bug and tripped
2286 	 * a real SIGSEGV instead of receiving it as an alias for SIGLVLa.  In
2287 	 * such a case we reset the disposition to SIG_DFL, block all signals
2288 	 * in uc_mask but the current one, and return to the interrupted ucp
2289 	 * to effect an appropriate death.  The kernel will then restart us.
2290 	 *
2291 	 * The one exception to SI_FROMKERNEL() is SIGFPE (a.k.a. LVL6), which
2292 	 * the kernel can send us when it wants to effect an orderly reboot.
2293 	 * For this case we must also verify si_code is zero, rather than a
2294 	 * code such as FPE_INTDIV which a bug might have triggered.
2295 	 */
2296 	if (sip != NULL && SI_FROMKERNEL(sip) &&
2297 	    (sig != SIGFPE || sip->si_code == 0)) {
2298 
2299 		(void) sigemptyset(&act.sa_mask);
2300 		act.sa_handler = SIG_DFL;
2301 		act.sa_flags = 0;
2302 		(void) sigaction(sig, &act, NULL);
2303 
2304 		(void) sigfillset(&ucp->uc_sigmask);
2305 		(void) sigdelset(&ucp->uc_sigmask, sig);
2306 		ucp->uc_flags |= UC_SIGMASK;
2307 
2308 		(void) setcontext(ucp);
2309 	}
2310 
2311 	/*
2312 	 * If the signal received is a LVLQ signal, do not really
2313 	 * change levels, just restate the current level.  If the
2314 	 * signal is not a LVLQ, set the new level to the signal
2315 	 * received.
2316 	 */
2317 	if (sig == LVLQ) {
2318 		new_state = cur_state;
2319 		lvlq_received = B_TRUE;
2320 	} else {
2321 		new_state = sig;
2322 	}
2323 
2324 	/*
2325 	 * Clear all times and repeat counts in the process table
2326 	 * since either the level is changing or the user has editted
2327 	 * the inittab file and wants us to look at it again.
2328 	 * If the user has fixed a typo, we don't want residual timing
2329 	 * data preventing the fixed command line from executing.
2330 	 */
2331 	for (process = proc_table;
2332 	    (process < proc_table + num_proc); process++) {
2333 		process->p_time = 0L;
2334 		process->p_count = 0;
2335 	}
2336 
2337 	/*
2338 	 * Set the flag to indicate that a "user signal" was received.
2339 	 */
2340 	wakeup.w_flags.w_usersignal = 1;
2341 }
2342 
2343 
2344 /*
2345  * alarmclk
2346  */
2347 static void
2348 alarmclk()
2349 {
2350 	time_up = TRUE;
2351 }
2352 
2353 /*
2354  * childeath_single():
2355  *
2356  * This used to be the SIGCLD handler and it was set with signal()
2357  * (as opposed to sigset()).  When a child exited we'd come to the
2358  * handler, wait for the child, and reenable the handler with
2359  * signal() just before returning.  The implementation of signal()
2360  * checks with waitid() for waitable children and sends a SIGCLD
2361  * if there are some.  If children are exiting faster than the
2362  * handler can run we keep sending signals and the handler never
2363  * gets to return and eventually the stack runs out and init dies.
2364  * To prevent that we set the handler with sigset() so the handler
2365  * doesn't need to be reset, and in childeath() (see below) we
2366  * call childeath_single() as long as there are children to be
2367  * waited for.  If a child exits while init is in the handler a
2368  * SIGCLD will be pending and delivered on return from the handler.
2369  * If the child was already waited for the handler will have nothing
2370  * to do and return, otherwise the child will be waited for.
2371  */
2372 static void
2373 childeath_single(pid_t pid, int status)
2374 {
2375 	struct PROC_TABLE	*process;
2376 	struct pidlist		*pp;
2377 
2378 	/*
2379 	 * Scan the process table to see if we are interested in this process.
2380 	 */
2381 	for (process = proc_table;
2382 	    (process < proc_table + num_proc); process++) {
2383 		if ((process->p_flags & (LIVING|OCCUPIED)) ==
2384 		    (LIVING|OCCUPIED) && process->p_pid == pid) {
2385 
2386 			/*
2387 			 * Mark this process as having died and store the exit
2388 			 * status.  Also set the wakeup flag for a dead child
2389 			 * and break out of the loop.
2390 			 */
2391 			process->p_flags &= ~LIVING;
2392 			process->p_exit = (short)status;
2393 			wakeup.w_flags.w_childdeath = 1;
2394 
2395 			return;
2396 		}
2397 	}
2398 
2399 	/*
2400 	 * No process was found above, look through auxiliary list.
2401 	 */
2402 	(void) sighold(SIGPOLL);
2403 	pp = Plhead;
2404 	while (pp) {
2405 		if (pid > pp->pl_pid) {
2406 			/*
2407 			 * Keep on looking.
2408 			 */
2409 			pp = pp->pl_next;
2410 			continue;
2411 		} else if (pid < pp->pl_pid) {
2412 			/*
2413 			 * Not in the list.
2414 			 */
2415 			break;
2416 		} else {
2417 			/*
2418 			 * This is a dead "godchild".
2419 			 */
2420 			pp->pl_dflag = 1;
2421 			pp->pl_exit = (short)status;
2422 			wakeup.w_flags.w_childdeath = 1;
2423 			Gchild = 1;	/* Notice to call cleanaux(). */
2424 			break;
2425 		}
2426 	}
2427 
2428 	(void) sigrelse(SIGPOLL);
2429 }
2430 
2431 /* ARGSUSED */
2432 static void
2433 childeath(int signo)
2434 {
2435 	pid_t pid;
2436 	int status;
2437 
2438 	while ((pid = waitpid(-1, &status, WNOHANG)) > 0)
2439 		childeath_single(pid, status);
2440 }
2441 
2442 static void
2443 powerfail()
2444 {
2445 	(void) nice(-19);
2446 	wakeup.w_flags.w_powerhit = 1;
2447 }
2448 
2449 /*
2450  * efork() forks a child and the parent inserts the process in its table
2451  * of processes that are directly a result of forks that it has performed.
2452  * The child just changes the "global" with the process id for this process
2453  * to it's new value.
2454  * If efork() is called with a pointer into the proc_table it uses that slot,
2455  * otherwise it searches for a free slot.  Regardless of how it was called,
2456  * it returns the pointer to the proc_table entry
2457  *
2458  * The SIGCLD signal is blocked (held) before calling efork()
2459  * and is unblocked (released) after efork() returns.
2460  *
2461  * Ideally, this should be rewritten to use modern signal semantics.
2462  */
2463 static struct PROC_TABLE *
2464 efork(int action, struct PROC_TABLE *process, int modes)
2465 {
2466 	pid_t	childpid;
2467 	struct PROC_TABLE *proc;
2468 	int		i;
2469 	/*
2470 	 * Freshen up the proc_table, removing any entries for dead processes
2471 	 * that don't have NOCLEANUP set.  Perform the necessary accounting.
2472 	 */
2473 	for (proc = proc_table; (proc < proc_table + num_proc); proc++) {
2474 		if ((proc->p_flags & (OCCUPIED|LIVING|NOCLEANUP)) ==
2475 		    (OCCUPIED)) {
2476 			/*
2477 			 * Is this a named process?
2478 			 * If so, do the necessary bookkeeping.
2479 			 */
2480 			if (proc->p_flags & NAMED)
2481 				(void) account(DEAD_PROCESS, proc, NULL);
2482 
2483 			/*
2484 			 * Free this entry for new usage.
2485 			 */
2486 			proc->p_flags = 0;
2487 		}
2488 	}
2489 
2490 	while ((childpid = fork()) == FAILURE) {
2491 		/*
2492 		 * Shorten the alarm timer in case someone else's child dies
2493 		 * and free up a slot in the process table.
2494 		 */
2495 		setimer(5);
2496 
2497 		/*
2498 		 * Wait for some children to die.  Since efork()
2499 		 * is always called with SIGCLD blocked, unblock
2500 		 * it here so that child death signals can come in.
2501 		 */
2502 		(void) sigrelse(SIGCLD);
2503 		(void) pause();
2504 		(void) sighold(SIGCLD);
2505 		setimer(0);
2506 	}
2507 
2508 	if (childpid != 0) {
2509 
2510 		if (process == NULLPROC) {
2511 			/*
2512 			 * No proc table pointer specified so search
2513 			 * for a free slot.
2514 			 */
2515 			for (process = proc_table;  process->p_flags != 0 &&
2516 			    (process < proc_table + num_proc); process++)
2517 					;
2518 
2519 			if (process == (proc_table + num_proc)) {
2520 				int old_proc_table_size = num_proc;
2521 
2522 				/* Increase the process table size */
2523 				increase_proc_table_size();
2524 				if (old_proc_table_size == num_proc) {
2525 					/* didn't grow: memory failure */
2526 					return (NO_ROOM);
2527 				} else {
2528 					process =
2529 					    proc_table + old_proc_table_size;
2530 				}
2531 			}
2532 
2533 			process->p_time = 0L;
2534 			process->p_count = 0;
2535 		}
2536 		process->p_id[0] = '\0';
2537 		process->p_id[1] = '\0';
2538 		process->p_id[2] = '\0';
2539 		process->p_id[3] = '\0';
2540 		process->p_pid = childpid;
2541 		process->p_flags = (LIVING | OCCUPIED | modes);
2542 		process->p_exit = 0;
2543 
2544 		st_write();
2545 	} else {
2546 		if ((action & (M_WAIT | M_BOOTWAIT)) == 0)
2547 			(void) setpgrp();
2548 
2549 		process = NULLPROC;
2550 
2551 		/*
2552 		 * Reset all signals to the system defaults.
2553 		 */
2554 		for (i = SIGHUP; i <= SIGRTMAX; i++)
2555 			(void) sigset(i, SIG_DFL);
2556 
2557 		/*
2558 		 * POSIX B.2.2.2 advises that init should set SIGTTOU,
2559 		 * SIGTTIN, and SIGTSTP to SIG_IGN.
2560 		 *
2561 		 * Make sure that SIGXCPU and SIGXFSZ also remain ignored,
2562 		 * for backward compatibility.
2563 		 */
2564 		(void) sigset(SIGTTIN, SIG_IGN);
2565 		(void) sigset(SIGTTOU, SIG_IGN);
2566 		(void) sigset(SIGTSTP, SIG_IGN);
2567 		(void) sigset(SIGXCPU, SIG_IGN);
2568 		(void) sigset(SIGXFSZ, SIG_IGN);
2569 	}
2570 	return (process);
2571 }
2572 
2573 
2574 /*
2575  * waitproc() waits for a specified process to die.  For this function to
2576  * work, the specified process must already in the proc_table.  waitproc()
2577  * returns the exit status of the specified process when it dies.
2578  */
2579 static long
2580 waitproc(struct PROC_TABLE *process)
2581 {
2582 	int		answer;
2583 	sigset_t	oldmask, newmask, zeromask;
2584 
2585 	(void) sigemptyset(&zeromask);
2586 	(void) sigemptyset(&newmask);
2587 
2588 	(void) sigaddset(&newmask, SIGCLD);
2589 
2590 	/* Block SIGCLD and save the current signal mask */
2591 	if (sigprocmask(SIG_BLOCK, &newmask, &oldmask) < 0)
2592 		perror("SIG_BLOCK error");
2593 
2594 	/*
2595 	 * Wait around until the process dies.
2596 	 */
2597 	if (process->p_flags & LIVING)
2598 		(void) sigsuspend(&zeromask);
2599 
2600 	/* Reset signal mask to unblock SIGCLD */
2601 	if (sigprocmask(SIG_SETMASK, &oldmask, NULL) < 0)
2602 		perror("SIG_SETMASK error");
2603 
2604 	if (process->p_flags & LIVING)
2605 		return (FAILURE);
2606 
2607 	/*
2608 	 * Make sure to only return 16 bits so that answer will always
2609 	 * be positive whenever the process of interest really died.
2610 	 */
2611 	answer = (process->p_exit & 0xffff);
2612 
2613 	/*
2614 	 * Free the slot in the proc_table.
2615 	 */
2616 	process->p_flags = 0;
2617 	return (answer);
2618 }
2619 
2620 /*
2621  * notify_pam_dead(): calls into the PAM framework to close the given session.
2622  */
2623 static void
2624 notify_pam_dead(struct utmpx *up)
2625 {
2626 	pam_handle_t *pamh;
2627 	char user[sizeof (up->ut_user) + 1];
2628 	char ttyn[sizeof (up->ut_line) + 1];
2629 	char host[sizeof (up->ut_host) + 1];
2630 
2631 	/*
2632 	 * PAM does not take care of updating utmpx/wtmpx.
2633 	 */
2634 	(void) snprintf(user, sizeof (user), "%s", up->ut_user);
2635 	(void) snprintf(ttyn, sizeof (ttyn), "%s", up->ut_line);
2636 	(void) snprintf(host, sizeof (host), "%s", up->ut_host);
2637 
2638 	if (pam_start("init", user, NULL, &pamh) == PAM_SUCCESS)  {
2639 		(void) pam_set_item(pamh, PAM_TTY, ttyn);
2640 		(void) pam_set_item(pamh, PAM_RHOST, host);
2641 		(void) pam_close_session(pamh, 0);
2642 		(void) pam_end(pamh, PAM_SUCCESS);
2643 	}
2644 }
2645 
2646 /*
2647  * Check you can access utmpx (As / may be read-only and
2648  * /var may not be mounted yet).
2649  */
2650 static int
2651 access_utmpx(void)
2652 {
2653 	do {
2654 		utmpx_ok = (access(UTMPX, R_OK|W_OK) == 0);
2655 	} while (!utmpx_ok && errno == EINTR);
2656 
2657 	return (utmpx_ok);
2658 }
2659 
2660 /*
2661  * account() updates entries in utmpx and appends new entries to the end of
2662  * wtmpx (assuming they exist).  The program argument indicates the name of
2663  * program if INIT_PROCESS, otherwise should be NULL.
2664  *
2665  * account() only blocks for INIT_PROCESS requests.
2666  *
2667  * Returns non-zero if write failed.
2668  */
2669 static int
2670 account(short state, struct PROC_TABLE *process, char *program)
2671 {
2672 	struct utmpx utmpbuf, *u, *oldu;
2673 	int tmplen;
2674 	char fail_buf[UT_LINE_SZ];
2675 	sigset_t block, unblock;
2676 
2677 	if (!utmpx_ok && !access_utmpx()) {
2678 		return (-1);
2679 	}
2680 
2681 	/*
2682 	 * Set up the prototype for the utmp structure we want to write.
2683 	 */
2684 	u = &utmpbuf;
2685 	(void) memset(u, 0, sizeof (struct utmpx));
2686 
2687 	/*
2688 	 * Fill in the various fields of the utmp structure.
2689 	 */
2690 	u->ut_id[0] = process->p_id[0];
2691 	u->ut_id[1] = process->p_id[1];
2692 	u->ut_id[2] = process->p_id[2];
2693 	u->ut_id[3] = process->p_id[3];
2694 	u->ut_pid = process->p_pid;
2695 
2696 	/*
2697 	 * Fill the "ut_exit" structure.
2698 	 */
2699 	u->ut_exit.e_termination = WTERMSIG(process->p_exit);
2700 	u->ut_exit.e_exit = WEXITSTATUS(process->p_exit);
2701 	u->ut_type = state;
2702 
2703 	(void) time(&u->ut_tv.tv_sec);
2704 
2705 	/*
2706 	 * Block signals for utmp update.
2707 	 */
2708 	(void) sigfillset(&block);
2709 	(void) sigprocmask(SIG_BLOCK, &block, &unblock);
2710 
2711 	/*
2712 	 * See if there already is such an entry in the "utmpx" file.
2713 	 */
2714 	setutxent();	/* Start at beginning of utmpx file. */
2715 
2716 	if ((oldu = getutxid(u)) != NULL) {
2717 		/*
2718 		 * Copy in the old "user", "line" and "host" fields
2719 		 * to our new structure.
2720 		 */
2721 		bcopy(oldu->ut_user, u->ut_user, sizeof (u->ut_user));
2722 		bcopy(oldu->ut_line, u->ut_line, sizeof (u->ut_line));
2723 		bcopy(oldu->ut_host, u->ut_host, sizeof (u->ut_host));
2724 		u->ut_syslen = (tmplen = strlen(u->ut_host)) ?
2725 		    min(tmplen + 1, sizeof (u->ut_host)) : 0;
2726 
2727 		if (oldu->ut_type == USER_PROCESS && state == DEAD_PROCESS) {
2728 			notify_pam_dead(oldu);
2729 		}
2730 	}
2731 
2732 	/*
2733 	 * Perform special accounting. Insert the special string into the
2734 	 * ut_line array. For INIT_PROCESSes put in the name of the
2735 	 * program in the "ut_user" field.
2736 	 */
2737 	switch (state) {
2738 	case INIT_PROCESS:
2739 		(void) strncpy(u->ut_user, program, sizeof (u->ut_user));
2740 		(void) strcpy(fail_buf, "INIT_PROCESS");
2741 		break;
2742 
2743 	default:
2744 		(void) strlcpy(fail_buf, u->ut_id, sizeof (u->ut_id) + 1);
2745 		break;
2746 	}
2747 
2748 	/*
2749 	 * Write out the updated entry to utmpx file.
2750 	 */
2751 	if (pututxline(u) == NULL) {
2752 		console(B_TRUE, "Failed write of utmpx entry: \"%s\": %s\n",
2753 		    fail_buf, strerror(errno));
2754 		endutxent();
2755 		(void) sigprocmask(SIG_SETMASK, &unblock, NULL);
2756 		return (-1);
2757 	}
2758 
2759 	/*
2760 	 * If we're able to write to utmpx, then attempt to add to the
2761 	 * end of the wtmpx file.
2762 	 */
2763 	updwtmpx(WTMPX, u);
2764 
2765 	endutxent();
2766 
2767 	(void) sigprocmask(SIG_SETMASK, &unblock, NULL);
2768 
2769 	return (0);
2770 }
2771 
2772 static void
2773 clearent(pid_t pid, short status)
2774 {
2775 	struct utmpx *up;
2776 	sigset_t block, unblock;
2777 
2778 	/*
2779 	 * Block signals for utmp update.
2780 	 */
2781 	(void) sigfillset(&block);
2782 	(void) sigprocmask(SIG_BLOCK, &block, &unblock);
2783 
2784 	/*
2785 	 * No error checking for now.
2786 	 */
2787 
2788 	setutxent();
2789 	while (up = getutxent()) {
2790 		if (up->ut_pid == pid) {
2791 			if (up->ut_type == DEAD_PROCESS) {
2792 				/*
2793 				 * Cleaned up elsewhere.
2794 				 */
2795 				continue;
2796 			}
2797 
2798 			notify_pam_dead(up);
2799 
2800 			up->ut_type = DEAD_PROCESS;
2801 			up->ut_exit.e_termination = WTERMSIG(status);
2802 			up->ut_exit.e_exit = WEXITSTATUS(status);
2803 			(void) time(&up->ut_tv.tv_sec);
2804 
2805 			(void) pututxline(up);
2806 			/*
2807 			 * Now attempt to add to the end of the
2808 			 * wtmp and wtmpx files.  Do not create
2809 			 * if they don't already exist.
2810 			 */
2811 			updwtmpx(WTMPX, up);
2812 
2813 			break;
2814 		}
2815 	}
2816 
2817 	endutxent();
2818 	(void) sigprocmask(SIG_SETMASK, &unblock, NULL);
2819 }
2820 
2821 /*
2822  * prog_name() searches for the word or unix path name and
2823  * returns a pointer to the last element of the pathname.
2824  */
2825 static char *
2826 prog_name(char *string)
2827 {
2828 	char	*ptr, *ptr2;
2829 	static char word[UT_USER_SZ + 1];
2830 
2831 	/*
2832 	 * Search for the first word skipping leading spaces and tabs.
2833 	 */
2834 	while (*string == ' ' || *string == '\t')
2835 		string++;
2836 
2837 	/*
2838 	 * If the first non-space non-tab character is not one allowed in
2839 	 * a word, return a pointer to a null string, otherwise parse the
2840 	 * pathname.
2841 	 */
2842 	if (*string != '.' && *string != '/' && *string != '_' &&
2843 	    (*string < 'a' || *string > 'z') &&
2844 	    (*string < 'A' || * string > 'Z') &&
2845 	    (*string < '0' || *string > '9'))
2846 		return ("");
2847 
2848 	/*
2849 	 * Parse the pathname looking forward for '/', ' ', '\t', '\n' or
2850 	 * '\0'.  Each time a '/' is found, move "ptr" to one past the
2851 	 * '/', thus when a ' ', '\t', '\n', or '\0' is found, "ptr" will
2852 	 * point to the last element of the pathname.
2853 	 */
2854 	for (ptr = string; *string != ' ' && *string != '\t' &&
2855 	    *string != '\n' && *string != '\0'; string++) {
2856 		if (*string == '/')
2857 			ptr = string+1;
2858 	}
2859 
2860 	/*
2861 	 * Copy out up to the size of the "ut_user" array into "word",
2862 	 * null terminate it and return a pointer to it.
2863 	 */
2864 	for (ptr2 = &word[0]; ptr2 < &word[UT_USER_SZ] &&
2865 	    ptr < string; /* CSTYLED */)
2866 		*ptr2++ = *ptr++;
2867 
2868 	*ptr2 = '\0';
2869 	return (&word[0]);
2870 }
2871 
2872 
2873 /*
2874  * realcon() returns a nonzero value if there is a character device
2875  * associated with SYSCON that has the same device number as CONSOLE.
2876  */
2877 static int
2878 realcon()
2879 {
2880 	struct stat sconbuf, conbuf;
2881 
2882 	if (stat(SYSCON, &sconbuf) != -1 &&
2883 	    stat(CONSOLE, &conbuf) != -1 &&
2884 	    S_ISCHR(sconbuf.st_mode) &&
2885 	    S_ISCHR(conbuf.st_mode) &&
2886 	    sconbuf.st_rdev == conbuf.st_rdev) {
2887 		return (1);
2888 	} else {
2889 		return (0);
2890 	}
2891 }
2892 
2893 
2894 /*
2895  * get_ioctl_syscon() retrieves the SYSCON settings from the IOCTLSYSCON file.
2896  * Returns true if the IOCTLSYSCON file needs to be written (with
2897  * write_ioctl_syscon() below)
2898  */
2899 static int
2900 get_ioctl_syscon()
2901 {
2902 	FILE	*fp;
2903 	unsigned int	iflags, oflags, cflags, lflags, ldisc, cc[18];
2904 	int		i, valid_format = 0;
2905 
2906 	/*
2907 	 * Read in the previous modes for SYSCON from IOCTLSYSCON.
2908 	 */
2909 	if ((fp = fopen(IOCTLSYSCON, "r")) == NULL) {
2910 		stored_syscon_termios = dflt_termios;
2911 		console(B_TRUE,
2912 		    "warning:%s does not exist, default settings assumed\n",
2913 		    IOCTLSYSCON);
2914 	} else {
2915 
2916 		i = fscanf(fp,
2917 	    "%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x",
2918 		    &iflags, &oflags, &cflags, &lflags,
2919 		    &cc[0], &cc[1], &cc[2], &cc[3], &cc[4], &cc[5], &cc[6],
2920 		    &cc[7], &cc[8], &cc[9], &cc[10], &cc[11], &cc[12], &cc[13],
2921 		    &cc[14], &cc[15], &cc[16], &cc[17]);
2922 
2923 		if (i == 22) {
2924 			stored_syscon_termios.c_iflag = iflags;
2925 			stored_syscon_termios.c_oflag = oflags;
2926 			stored_syscon_termios.c_cflag = cflags;
2927 			stored_syscon_termios.c_lflag = lflags;
2928 			for (i = 0; i < 18; i++)
2929 				stored_syscon_termios.c_cc[i] = (char)cc[i];
2930 			valid_format = 1;
2931 		} else if (i == 13) {
2932 		rewind(fp);
2933 		i = fscanf(fp, "%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x",
2934 		    &iflags, &oflags, &cflags, &lflags, &ldisc, &cc[0], &cc[1],
2935 		    &cc[2], &cc[3], &cc[4], &cc[5], &cc[6], &cc[7]);
2936 
2937 		/*
2938 		 * If the file is formatted properly, use the values to
2939 		 * initialize the console terminal condition.
2940 		 */
2941 		stored_syscon_termios.c_iflag = (ushort_t)iflags;
2942 		stored_syscon_termios.c_oflag = (ushort_t)oflags;
2943 		stored_syscon_termios.c_cflag = (ushort_t)cflags;
2944 		stored_syscon_termios.c_lflag = (ushort_t)lflags;
2945 		for (i = 0; i < 8; i++)
2946 			stored_syscon_termios.c_cc[i] = (char)cc[i];
2947 		valid_format = 1;
2948 		}
2949 		(void) fclose(fp);
2950 
2951 		/* If the file is badly formatted, use the default settings. */
2952 		if (!valid_format)
2953 			stored_syscon_termios = dflt_termios;
2954 	}
2955 
2956 	/* If the file had a bad format, rewrite it later. */
2957 	return (!valid_format);
2958 }
2959 
2960 
2961 static void
2962 write_ioctl_syscon()
2963 {
2964 	FILE *fp;
2965 	int i;
2966 
2967 	(void) unlink(SYSCON);
2968 	(void) link(SYSTTY, SYSCON);
2969 	(void) umask(022);
2970 	fp = fopen(IOCTLSYSCON, "w");
2971 
2972 	(void) fprintf(fp, "%x:%x:%x:%x:0", stored_syscon_termios.c_iflag,
2973 	    stored_syscon_termios.c_oflag, stored_syscon_termios.c_cflag,
2974 	    stored_syscon_termios.c_lflag);
2975 	for (i = 0; i < 8; ++i)
2976 		(void) fprintf(fp, ":%x", stored_syscon_termios.c_cc[i]);
2977 	(void) putc('\n', fp);
2978 
2979 	(void) fflush(fp);
2980 	(void) fsync(fileno(fp));
2981 	(void) fclose(fp);
2982 	(void) umask(cmask);
2983 }
2984 
2985 
2986 /*
2987  * void console(boolean_t, char *, ...)
2988  *   Outputs the requested message to the system console.  Note that the number
2989  *   of arguments passed to console() should be determined by the print format.
2990  *
2991  *   The "prefix" parameter indicates whether or not "INIT: " should precede the
2992  *   message.
2993  *
2994  *   To make sure we write to the console in a sane fashion, we use the modes
2995  *   we keep in stored_syscon_termios (which we read out of /etc/ioctl.syscon).
2996  *   Afterwards we restore whatever modes were already there.
2997  */
2998 /* PRINTFLIKE2 */
2999 static void
3000 console(boolean_t prefix, char *format, ...)
3001 {
3002 	char	outbuf[BUFSIZ];
3003 	va_list	args;
3004 	int fd, getret;
3005 	struct termios old_syscon_termios;
3006 	FILE *f;
3007 
3008 	/*
3009 	 * We open SYSCON anew each time in case it has changed (see
3010 	 * userinit()).
3011 	 */
3012 	if ((fd = open(SYSCON, O_RDWR | O_NOCTTY)) < 0 ||
3013 	    (f = fdopen(fd, "r+")) == NULL) {
3014 		if (prefix)
3015 			syslog(LOG_WARNING, "INIT: ");
3016 		va_start(args, format);
3017 		vsyslog(LOG_WARNING, format, args);
3018 		va_end(args);
3019 		if (fd >= 0)
3020 			(void) close(fd);
3021 		return;
3022 	}
3023 	setbuf(f, &outbuf[0]);
3024 
3025 	getret = tcgetattr(fd, &old_syscon_termios);
3026 	old_syscon_termios.c_cflag &= ~HUPCL;
3027 	if (realcon())
3028 		/* Don't overwrite cflag of real console. */
3029 		stored_syscon_termios.c_cflag = old_syscon_termios.c_cflag;
3030 
3031 	stored_syscon_termios.c_cflag &= ~HUPCL;
3032 
3033 	(void) tcsetattr(fd, TCSANOW, &stored_syscon_termios);
3034 
3035 	if (prefix)
3036 		(void) fprintf(f, "\nINIT: ");
3037 	va_start(args, format);
3038 	(void) vfprintf(f, format, args);
3039 	va_end(args);
3040 
3041 	if (getret == 0)
3042 		(void) tcsetattr(fd, TCSADRAIN, &old_syscon_termios);
3043 
3044 	(void) fclose(f);
3045 }
3046 
3047 /*
3048  * timer() is a substitute for sleep() which uses alarm() and pause().
3049  */
3050 static void
3051 timer(int waitime)
3052 {
3053 	setimer(waitime);
3054 	while (time_up == FALSE)
3055 		(void) pause();
3056 }
3057 
3058 static void
3059 setimer(int timelimit)
3060 {
3061 	alarmclk();
3062 	(void) alarm(timelimit);
3063 	time_up = (timelimit ? FALSE : TRUE);
3064 }
3065 
3066 /*
3067  * Fails with
3068  *   ENOMEM - out of memory
3069  *   ECONNABORTED - repository connection broken
3070  *   EPERM - permission denied
3071  *   EACCES - backend access denied
3072  *   EROFS - backend readonly
3073  */
3074 static int
3075 get_or_add_startd(scf_instance_t *inst)
3076 {
3077 	scf_handle_t *h;
3078 	scf_scope_t *scope = NULL;
3079 	scf_service_t *svc = NULL;
3080 	int ret = 0;
3081 
3082 	h = scf_instance_handle(inst);
3083 
3084 	if (scf_handle_decode_fmri(h, SCF_SERVICE_STARTD, NULL, NULL, inst,
3085 	    NULL, NULL, SCF_DECODE_FMRI_EXACT) == 0)
3086 		return (0);
3087 
3088 	switch (scf_error()) {
3089 	case SCF_ERROR_CONNECTION_BROKEN:
3090 		return (ECONNABORTED);
3091 
3092 	case SCF_ERROR_NOT_FOUND:
3093 		break;
3094 
3095 	case SCF_ERROR_HANDLE_MISMATCH:
3096 	case SCF_ERROR_INVALID_ARGUMENT:
3097 	case SCF_ERROR_CONSTRAINT_VIOLATED:
3098 	default:
3099 		bad_error("scf_handle_decode_fmri", scf_error());
3100 	}
3101 
3102 	/* Make sure we're right, since we're adding piece-by-piece. */
3103 	assert(strcmp(SCF_SERVICE_STARTD,
3104 	    "svc:/system/svc/restarter:default") == 0);
3105 
3106 	if ((scope = scf_scope_create(h)) == NULL ||
3107 	    (svc = scf_service_create(h)) == NULL) {
3108 		ret = ENOMEM;
3109 		goto out;
3110 	}
3111 
3112 get_scope:
3113 	if (scf_handle_get_scope(h, SCF_SCOPE_LOCAL, scope) != 0) {
3114 		switch (scf_error()) {
3115 		case SCF_ERROR_CONNECTION_BROKEN:
3116 			ret = ECONNABORTED;
3117 			goto out;
3118 
3119 		case SCF_ERROR_NOT_FOUND:
3120 			(void) fputs(gettext(
3121 			    "smf(5) repository missing local scope.\n"),
3122 			    stderr);
3123 			exit(1);
3124 			/* NOTREACHED */
3125 
3126 		case SCF_ERROR_HANDLE_MISMATCH:
3127 		case SCF_ERROR_INVALID_ARGUMENT:
3128 		default:
3129 			bad_error("scf_handle_get_scope", scf_error());
3130 		}
3131 	}
3132 
3133 get_svc:
3134 	if (scf_scope_get_service(scope, "system/svc/restarter", svc) != 0) {
3135 		switch (scf_error()) {
3136 		case SCF_ERROR_CONNECTION_BROKEN:
3137 			ret = ECONNABORTED;
3138 			goto out;
3139 
3140 		case SCF_ERROR_DELETED:
3141 			goto get_scope;
3142 
3143 		case SCF_ERROR_NOT_FOUND:
3144 			break;
3145 
3146 		case SCF_ERROR_HANDLE_MISMATCH:
3147 		case SCF_ERROR_INVALID_ARGUMENT:
3148 		case SCF_ERROR_NOT_SET:
3149 		default:
3150 			bad_error("scf_scope_get_service", scf_error());
3151 		}
3152 
3153 add_svc:
3154 		if (scf_scope_add_service(scope, "system/svc/restarter", svc) !=
3155 		    0) {
3156 			switch (scf_error()) {
3157 			case SCF_ERROR_CONNECTION_BROKEN:
3158 				ret = ECONNABORTED;
3159 				goto out;
3160 
3161 			case SCF_ERROR_EXISTS:
3162 				goto get_svc;
3163 
3164 			case SCF_ERROR_PERMISSION_DENIED:
3165 				ret = EPERM;
3166 				goto out;
3167 
3168 			case SCF_ERROR_BACKEND_ACCESS:
3169 				ret = EACCES;
3170 				goto out;
3171 
3172 			case SCF_ERROR_BACKEND_READONLY:
3173 				ret = EROFS;
3174 				goto out;
3175 
3176 			case SCF_ERROR_HANDLE_MISMATCH:
3177 			case SCF_ERROR_INVALID_ARGUMENT:
3178 			case SCF_ERROR_NOT_SET:
3179 			default:
3180 				bad_error("scf_scope_add_service", scf_error());
3181 			}
3182 		}
3183 	}
3184 
3185 get_inst:
3186 	if (scf_service_get_instance(svc, "default", inst) != 0) {
3187 		switch (scf_error()) {
3188 		case SCF_ERROR_CONNECTION_BROKEN:
3189 			ret = ECONNABORTED;
3190 			goto out;
3191 
3192 		case SCF_ERROR_DELETED:
3193 			goto add_svc;
3194 
3195 		case SCF_ERROR_NOT_FOUND:
3196 			break;
3197 
3198 		case SCF_ERROR_HANDLE_MISMATCH:
3199 		case SCF_ERROR_INVALID_ARGUMENT:
3200 		case SCF_ERROR_NOT_SET:
3201 		default:
3202 			bad_error("scf_service_get_instance", scf_error());
3203 		}
3204 
3205 		if (scf_service_add_instance(svc, "default", inst) !=
3206 		    0) {
3207 			switch (scf_error()) {
3208 			case SCF_ERROR_CONNECTION_BROKEN:
3209 				ret = ECONNABORTED;
3210 				goto out;
3211 
3212 			case SCF_ERROR_DELETED:
3213 				goto add_svc;
3214 
3215 			case SCF_ERROR_EXISTS:
3216 				goto get_inst;
3217 
3218 			case SCF_ERROR_PERMISSION_DENIED:
3219 				ret = EPERM;
3220 				goto out;
3221 
3222 			case SCF_ERROR_BACKEND_ACCESS:
3223 				ret = EACCES;
3224 				goto out;
3225 
3226 			case SCF_ERROR_BACKEND_READONLY:
3227 				ret = EROFS;
3228 				goto out;
3229 
3230 			case SCF_ERROR_HANDLE_MISMATCH:
3231 			case SCF_ERROR_INVALID_ARGUMENT:
3232 			case SCF_ERROR_NOT_SET:
3233 			default:
3234 				bad_error("scf_service_add_instance",
3235 				    scf_error());
3236 			}
3237 		}
3238 	}
3239 
3240 	ret = 0;
3241 
3242 out:
3243 	scf_service_destroy(svc);
3244 	scf_scope_destroy(scope);
3245 	return (ret);
3246 }
3247 
3248 /*
3249  * Fails with
3250  *   ECONNABORTED - repository connection broken
3251  *   ECANCELED - the transaction's property group was deleted
3252  */
3253 static int
3254 transaction_add_set(scf_transaction_t *tx, scf_transaction_entry_t *ent,
3255     const char *pname, scf_type_t type)
3256 {
3257 change_type:
3258 	if (scf_transaction_property_change_type(tx, ent, pname, type) == 0)
3259 		return (0);
3260 
3261 	switch (scf_error()) {
3262 	case SCF_ERROR_CONNECTION_BROKEN:
3263 		return (ECONNABORTED);
3264 
3265 	case SCF_ERROR_DELETED:
3266 		return (ECANCELED);
3267 
3268 	case SCF_ERROR_NOT_FOUND:
3269 		goto new;
3270 
3271 	case SCF_ERROR_HANDLE_MISMATCH:
3272 	case SCF_ERROR_INVALID_ARGUMENT:
3273 	case SCF_ERROR_NOT_BOUND:
3274 	case SCF_ERROR_NOT_SET:
3275 	default:
3276 		bad_error("scf_transaction_property_change_type", scf_error());
3277 	}
3278 
3279 new:
3280 	if (scf_transaction_property_new(tx, ent, pname, type) == 0)
3281 		return (0);
3282 
3283 	switch (scf_error()) {
3284 	case SCF_ERROR_CONNECTION_BROKEN:
3285 		return (ECONNABORTED);
3286 
3287 	case SCF_ERROR_DELETED:
3288 		return (ECANCELED);
3289 
3290 	case SCF_ERROR_EXISTS:
3291 		goto change_type;
3292 
3293 	case SCF_ERROR_HANDLE_MISMATCH:
3294 	case SCF_ERROR_INVALID_ARGUMENT:
3295 	case SCF_ERROR_NOT_BOUND:
3296 	case SCF_ERROR_NOT_SET:
3297 	default:
3298 		bad_error("scf_transaction_property_new", scf_error());
3299 		/* NOTREACHED */
3300 	}
3301 }
3302 
3303 static void
3304 scferr(void)
3305 {
3306 	switch (scf_error()) {
3307 	case SCF_ERROR_NO_MEMORY:
3308 		console(B_TRUE, gettext("Out of memory.\n"));
3309 		break;
3310 
3311 	case SCF_ERROR_CONNECTION_BROKEN:
3312 		console(B_TRUE, gettext(
3313 		    "Connection to smf(5) repository server broken.\n"));
3314 		break;
3315 
3316 	case SCF_ERROR_NO_RESOURCES:
3317 		console(B_TRUE, gettext(
3318 		    "smf(5) repository server is out of memory.\n"));
3319 		break;
3320 
3321 	case SCF_ERROR_PERMISSION_DENIED:
3322 		console(B_TRUE, gettext("Insufficient privileges.\n"));
3323 		break;
3324 
3325 	default:
3326 		console(B_TRUE, gettext("libscf error: %s\n"),
3327 		    scf_strerror(scf_error()));
3328 	}
3329 }
3330 
3331 static void
3332 lscf_set_runlevel(char rl)
3333 {
3334 	scf_handle_t *h;
3335 	scf_instance_t *inst = NULL;
3336 	scf_propertygroup_t *pg = NULL;
3337 	scf_transaction_t *tx = NULL;
3338 	scf_transaction_entry_t *ent = NULL;
3339 	scf_value_t *val = NULL;
3340 	char buf[2];
3341 	int r;
3342 
3343 	h = scf_handle_create(SCF_VERSION);
3344 	if (h == NULL) {
3345 		scferr();
3346 		return;
3347 	}
3348 
3349 	if (scf_handle_bind(h) != 0) {
3350 		switch (scf_error()) {
3351 		case SCF_ERROR_NO_SERVER:
3352 			console(B_TRUE,
3353 			    gettext("smf(5) repository server not running.\n"));
3354 			goto bail;
3355 
3356 		default:
3357 			scferr();
3358 			goto bail;
3359 		}
3360 	}
3361 
3362 	if ((inst = scf_instance_create(h)) == NULL ||
3363 	    (pg = scf_pg_create(h)) == NULL ||
3364 	    (val = scf_value_create(h)) == NULL ||
3365 	    (tx = scf_transaction_create(h)) == NULL ||
3366 	    (ent = scf_entry_create(h)) == NULL) {
3367 		scferr();
3368 		goto bail;
3369 	}
3370 
3371 get_inst:
3372 	r = get_or_add_startd(inst);
3373 	switch (r) {
3374 	case 0:
3375 		break;
3376 
3377 	case ENOMEM:
3378 	case ECONNABORTED:
3379 	case EPERM:
3380 	case EACCES:
3381 	case EROFS:
3382 		scferr();
3383 		goto bail;
3384 	default:
3385 		bad_error("get_or_add_startd", r);
3386 	}
3387 
3388 get_pg:
3389 	if (scf_instance_get_pg(inst, SCF_PG_OPTIONS_OVR, pg) != 0) {
3390 		switch (scf_error()) {
3391 		case SCF_ERROR_CONNECTION_BROKEN:
3392 			scferr();
3393 			goto bail;
3394 
3395 		case SCF_ERROR_DELETED:
3396 			goto get_inst;
3397 
3398 		case SCF_ERROR_NOT_FOUND:
3399 			break;
3400 
3401 		case SCF_ERROR_HANDLE_MISMATCH:
3402 		case SCF_ERROR_INVALID_ARGUMENT:
3403 		case SCF_ERROR_NOT_SET:
3404 		default:
3405 			bad_error("scf_instance_get_pg", scf_error());
3406 		}
3407 
3408 add_pg:
3409 		if (scf_instance_add_pg(inst, SCF_PG_OPTIONS_OVR,
3410 		    SCF_PG_OPTIONS_OVR_TYPE, SCF_PG_OPTIONS_OVR_FLAGS, pg) !=
3411 		    0) {
3412 			switch (scf_error()) {
3413 			case SCF_ERROR_CONNECTION_BROKEN:
3414 			case SCF_ERROR_PERMISSION_DENIED:
3415 			case SCF_ERROR_BACKEND_ACCESS:
3416 				scferr();
3417 				goto bail;
3418 
3419 			case SCF_ERROR_DELETED:
3420 				goto get_inst;
3421 
3422 			case SCF_ERROR_EXISTS:
3423 				goto get_pg;
3424 
3425 			case SCF_ERROR_HANDLE_MISMATCH:
3426 			case SCF_ERROR_INVALID_ARGUMENT:
3427 			case SCF_ERROR_NOT_SET:
3428 			default:
3429 				bad_error("scf_instance_add_pg", scf_error());
3430 			}
3431 		}
3432 	}
3433 
3434 	buf[0] = rl;
3435 	buf[1] = '\0';
3436 	r = scf_value_set_astring(val, buf);
3437 	assert(r == 0);
3438 
3439 	for (;;) {
3440 		if (scf_transaction_start(tx, pg) != 0) {
3441 			switch (scf_error()) {
3442 			case SCF_ERROR_CONNECTION_BROKEN:
3443 			case SCF_ERROR_PERMISSION_DENIED:
3444 			case SCF_ERROR_BACKEND_ACCESS:
3445 				scferr();
3446 				goto bail;
3447 
3448 			case SCF_ERROR_DELETED:
3449 				goto add_pg;
3450 
3451 			case SCF_ERROR_HANDLE_MISMATCH:
3452 			case SCF_ERROR_NOT_BOUND:
3453 			case SCF_ERROR_IN_USE:
3454 			case SCF_ERROR_NOT_SET:
3455 			default:
3456 				bad_error("scf_transaction_start", scf_error());
3457 			}
3458 		}
3459 
3460 		r = transaction_add_set(tx, ent, "runlevel", SCF_TYPE_ASTRING);
3461 		switch (r) {
3462 		case 0:
3463 			break;
3464 
3465 		case ECONNABORTED:
3466 			scferr();
3467 			goto bail;
3468 
3469 		case ECANCELED:
3470 			scf_transaction_reset(tx);
3471 			goto add_pg;
3472 
3473 		default:
3474 			bad_error("transaction_add_set", r);
3475 		}
3476 
3477 		r = scf_entry_add_value(ent, val);
3478 		assert(r == 0);
3479 
3480 		r = scf_transaction_commit(tx);
3481 		if (r == 1)
3482 			break;
3483 
3484 		if (r != 0) {
3485 			switch (scf_error()) {
3486 			case SCF_ERROR_CONNECTION_BROKEN:
3487 			case SCF_ERROR_PERMISSION_DENIED:
3488 			case SCF_ERROR_BACKEND_ACCESS:
3489 			case SCF_ERROR_BACKEND_READONLY:
3490 				scferr();
3491 				goto bail;
3492 
3493 			case SCF_ERROR_DELETED:
3494 				scf_transaction_reset(tx);
3495 				goto add_pg;
3496 
3497 			case SCF_ERROR_INVALID_ARGUMENT:
3498 			case SCF_ERROR_NOT_BOUND:
3499 			case SCF_ERROR_NOT_SET:
3500 			default:
3501 				bad_error("scf_transaction_commit",
3502 				    scf_error());
3503 			}
3504 		}
3505 
3506 		scf_transaction_reset(tx);
3507 		(void) scf_pg_update(pg);
3508 	}
3509 
3510 bail:
3511 	scf_transaction_destroy(tx);
3512 	scf_entry_destroy(ent);
3513 	scf_value_destroy(val);
3514 	scf_pg_destroy(pg);
3515 	scf_instance_destroy(inst);
3516 
3517 	(void) scf_handle_unbind(h);
3518 	scf_handle_destroy(h);
3519 }
3520 
3521 /*
3522  * Function to handle requests from users to main init running as process 1.
3523  */
3524 static void
3525 userinit(int argc, char **argv)
3526 {
3527 	FILE	*fp;
3528 	char	*ln;
3529 	int	init_signal;
3530 	struct stat	sconbuf, conbuf;
3531 	const char *usage_msg = "Usage: init [0123456SsQqabc]\n";
3532 
3533 	/*
3534 	 * We are a user invoked init.  Is there an argument and is it
3535 	 * a single character?  If not, print usage message and quit.
3536 	 */
3537 	if (argc != 2 || argv[1][1] != '\0') {
3538 		(void) fprintf(stderr, usage_msg);
3539 		exit(0);
3540 	}
3541 
3542 	if ((init_signal = lvlname_to_state((char)argv[1][0])) == -1) {
3543 		(void) fprintf(stderr, usage_msg);
3544 		(void) audit_put_record(ADT_FAILURE, ADT_FAIL_VALUE_BAD_CMD,
3545 		    argv[1]);
3546 		exit(1);
3547 	}
3548 
3549 	if (init_signal == SINGLE_USER) {
3550 		/*
3551 		 * Make sure this process is talking to a legal tty line
3552 		 * and that /dev/syscon is linked to this line.
3553 		 */
3554 		ln = ttyname(0);	/* Get the name of tty */
3555 		if (ln == NULL) {
3556 			(void) fprintf(stderr,
3557 			    "Standard input not a tty line\n");
3558 			(void) audit_put_record(ADT_FAILURE,
3559 			    ADT_FAIL_VALUE_BAD_TTY, argv[1]);
3560 			exit(1);
3561 		}
3562 
3563 		if ((stat(ln, &sconbuf) != -1) &&
3564 		    (stat(SYSCON, &conbuf) == -1 ||
3565 		    sconbuf.st_rdev != conbuf.st_rdev)) {
3566 			/*
3567 			 * /dev/syscon needs to change.
3568 			 * Unlink /dev/syscon and relink it to the current line.
3569 			 */
3570 			if (lstat(SYSCON, &conbuf) != -1 &&
3571 			    unlink(SYSCON) == FAILURE) {
3572 				perror("Can't unlink /dev/syscon");
3573 				(void) fprintf(stderr,
3574 				    "Run command on the system console.\n");
3575 				(void) audit_put_record(ADT_FAILURE,
3576 				    ADT_FAIL_VALUE_PROGRAM, argv[1]);
3577 				exit(1);
3578 			}
3579 			if (symlink(ln, SYSCON) == FAILURE) {
3580 				(void) fprintf(stderr,
3581 				    "Can't symlink /dev/syscon to %s: %s", ln,
3582 				    strerror(errno));
3583 
3584 				/* Try to leave a syscon */
3585 				(void) link(SYSTTY, SYSCON);
3586 				(void) audit_put_record(ADT_FAILURE,
3587 				    ADT_FAIL_VALUE_PROGRAM, argv[1]);
3588 				exit(1);
3589 			}
3590 
3591 			/*
3592 			 * Try to leave a message on system console saying where
3593 			 * /dev/syscon is currently connected.
3594 			 */
3595 			if ((fp = fopen(SYSTTY, "r+")) != NULL) {
3596 				(void) fprintf(fp,
3597 				    "\n****	SYSCON CHANGED TO %s	****\n",
3598 				    ln);
3599 				(void) fclose(fp);
3600 			}
3601 		}
3602 	}
3603 
3604 	update_boot_archive(init_signal);
3605 
3606 	(void) audit_put_record(ADT_SUCCESS, ADT_SUCCESS, argv[1]);
3607 
3608 	/*
3609 	 * Signal init; init will take care of telling svc.startd.
3610 	 */
3611 	if (kill(init_pid, init_signal) == FAILURE) {
3612 		(void) fprintf(stderr, "Must be super-user\n");
3613 		(void) audit_put_record(ADT_FAILURE,
3614 		    ADT_FAIL_VALUE_AUTH, argv[1]);
3615 		exit(1);
3616 	}
3617 
3618 	exit(0);
3619 }
3620 
3621 
3622 #define	DELTA	25	/* Number of pidlist elements to allocate at a time */
3623 
3624 /* ARGSUSED */
3625 void
3626 sigpoll(int n)
3627 {
3628 	struct pidrec prec;
3629 	struct pidrec *p = &prec;
3630 	struct pidlist *plp;
3631 	struct pidlist *tp, *savetp;
3632 	int i;
3633 
3634 	if (Pfd < 0) {
3635 		return;
3636 	}
3637 
3638 	for (;;) {
3639 		/*
3640 		 * Important Note: Either read will really fail (in which case
3641 		 * return is all we can do) or will get EAGAIN (Pfd was opened
3642 		 * O_NDELAY), in which case we also want to return.
3643 		 * Always return from here!
3644 		 */
3645 		if (read(Pfd, p, sizeof (struct pidrec)) !=
3646 						sizeof (struct pidrec)) {
3647 			return;
3648 		}
3649 		switch (p->pd_type) {
3650 
3651 		case ADDPID:
3652 			/*
3653 			 * New "godchild", add to list.
3654 			 */
3655 			if (Plfree == NULL) {
3656 				plp = (struct pidlist *)calloc(DELTA,
3657 				    sizeof (struct pidlist));
3658 				if (plp == NULL) {
3659 					/* Can't save pid */
3660 					break;
3661 				}
3662 				/*
3663 				 * Point at 2nd record allocated, we'll use plp.
3664 				 */
3665 				tp = plp + 1;
3666 				/*
3667 				 * Link them into a chain.
3668 				 */
3669 				Plfree = tp;
3670 				for (i = 0; i < DELTA - 2; i++) {
3671 					tp->pl_next = tp + 1;
3672 					tp++;
3673 				}
3674 			} else {
3675 				plp = Plfree;
3676 				Plfree = plp->pl_next;
3677 			}
3678 			plp->pl_pid = p->pd_pid;
3679 			plp->pl_dflag = 0;
3680 			plp->pl_next = NULL;
3681 			/*
3682 			 * Note - pid list is kept in increasing order of pids.
3683 			 */
3684 			if (Plhead == NULL) {
3685 				Plhead = plp;
3686 				/* Back up to read next record */
3687 				break;
3688 			} else {
3689 				savetp = tp = Plhead;
3690 				while (tp) {
3691 					if (plp->pl_pid > tp->pl_pid) {
3692 						savetp = tp;
3693 						tp = tp->pl_next;
3694 						continue;
3695 					} else if (plp->pl_pid < tp->pl_pid) {
3696 						if (tp == Plhead) {
3697 							plp->pl_next = Plhead;
3698 							Plhead = plp;
3699 						} else {
3700 							plp->pl_next =
3701 							    savetp->pl_next;
3702 							savetp->pl_next = plp;
3703 						}
3704 						break;
3705 					} else {
3706 						/* Already in list! */
3707 						plp->pl_next = Plfree;
3708 						Plfree = plp;
3709 						break;
3710 					}
3711 				}
3712 				if (tp == NULL) {
3713 					/* Add to end of list */
3714 					savetp->pl_next = plp;
3715 				}
3716 			}
3717 			/* Back up to read next record. */
3718 			break;
3719 
3720 		case REMPID:
3721 			/*
3722 			 * This one was handled by someone else,
3723 			 * purge it from the list.
3724 			 */
3725 			if (Plhead == NULL) {
3726 				/* Back up to read next record. */
3727 				break;
3728 			}
3729 			savetp = tp = Plhead;
3730 			while (tp) {
3731 				if (p->pd_pid > tp->pl_pid) {
3732 					/* Keep on looking. */
3733 					savetp = tp;
3734 					tp = tp->pl_next;
3735 					continue;
3736 				} else if (p->pd_pid < tp->pl_pid) {
3737 					/* Not in list. */
3738 					break;
3739 				} else {
3740 					/* Found it. */
3741 					if (tp == Plhead)
3742 						Plhead = tp->pl_next;
3743 					else
3744 						savetp->pl_next = tp->pl_next;
3745 					tp->pl_next = Plfree;
3746 					Plfree = tp;
3747 					break;
3748 				}
3749 			}
3750 			/* Back up to read next record. */
3751 			break;
3752 		default:
3753 			console(B_TRUE, "Bad message on initpipe\n");
3754 			break;
3755 		}
3756 	}
3757 }
3758 
3759 
3760 static void
3761 cleanaux()
3762 {
3763 	struct pidlist *savep, *p;
3764 	pid_t	pid;
3765 	short	status;
3766 
3767 	(void) sighold(SIGCLD);
3768 	Gchild = 0;	/* Note - Safe to do this here since no SIGCLDs */
3769 	(void) sighold(SIGPOLL);
3770 	savep = p = Plhead;
3771 	while (p) {
3772 		if (p->pl_dflag) {
3773 			/*
3774 			 * Found an entry to delete,
3775 			 * remove it from list first.
3776 			 */
3777 			pid = p->pl_pid;
3778 			status = p->pl_exit;
3779 			if (p == Plhead) {
3780 				Plhead = p->pl_next;
3781 				p->pl_next = Plfree;
3782 				Plfree = p;
3783 				savep = p = Plhead;
3784 			} else {
3785 				savep->pl_next = p->pl_next;
3786 				p->pl_next = Plfree;
3787 				Plfree = p;
3788 				p = savep->pl_next;
3789 			}
3790 			clearent(pid, status);
3791 			continue;
3792 		}
3793 		savep = p;
3794 		p = p->pl_next;
3795 	}
3796 	(void) sigrelse(SIGPOLL);
3797 	(void) sigrelse(SIGCLD);
3798 }
3799 
3800 
3801 /*
3802  * /etc/inittab has more entries and we have run out of room in the proc_table
3803  * array. Double the size of proc_table to accomodate the extra entries.
3804  */
3805 static void
3806 increase_proc_table_size()
3807 {
3808 	sigset_t block, unblock;
3809 	void *ptr;
3810 	size_t delta = num_proc * sizeof (struct PROC_TABLE);
3811 
3812 
3813 	/*
3814 	 * Block signals for realloc.
3815 	 */
3816 	(void) sigfillset(&block);
3817 	(void) sigprocmask(SIG_BLOCK, &block, &unblock);
3818 
3819 
3820 	/*
3821 	 * On failure we just return because callers of this function check
3822 	 * for failure.
3823 	 */
3824 	do
3825 		ptr = realloc(g_state, g_state_sz + delta);
3826 	while (ptr == NULL && errno == EAGAIN)
3827 		;
3828 
3829 	if (ptr != NULL) {
3830 		/* ensure that the new part is initialized to zero */
3831 		bzero((caddr_t)ptr + g_state_sz, delta);
3832 
3833 		g_state = ptr;
3834 		g_state_sz += delta;
3835 		num_proc <<= 1;
3836 	}
3837 
3838 
3839 	/* unblock our signals before returning */
3840 	(void) sigprocmask(SIG_SETMASK, &unblock, NULL);
3841 }
3842 
3843 
3844 
3845 /*
3846  * Sanity check g_state.
3847  */
3848 static int
3849 st_sane()
3850 {
3851 	int i;
3852 	struct PROC_TABLE *ptp;
3853 
3854 
3855 	/* Note: cur_state is encoded as a signal number */
3856 	if (cur_state < 1 || cur_state == 9 || cur_state > 13)
3857 		return (0);
3858 
3859 	/* Check num_proc */
3860 	if (g_state_sz != sizeof (struct init_state) + (num_proc - 1) *
3861 	    sizeof (struct PROC_TABLE))
3862 		return (0);
3863 
3864 	/* Check proc_table */
3865 	for (i = 0, ptp = proc_table; i < num_proc; ++i, ++ptp) {
3866 		/* skip unoccupied entries */
3867 		if (!(ptp->p_flags & OCCUPIED))
3868 			continue;
3869 
3870 		/* p_flags has no bits outside of PF_MASK */
3871 		if (ptp->p_flags & ~(PF_MASK))
3872 			return (0);
3873 
3874 		/* 5 <= pid <= MAXPID */
3875 		if (ptp->p_pid < 5 || ptp->p_pid > MAXPID)
3876 			return (0);
3877 
3878 		/* p_count >= 0 */
3879 		if (ptp->p_count < 0)
3880 			return (0);
3881 
3882 		/* p_time >= 0 */
3883 		if (ptp->p_time < 0)
3884 			return (0);
3885 	}
3886 
3887 	return (1);
3888 }
3889 
3890 /*
3891  * Initialize our state.
3892  *
3893  * If the system just booted, then init_state_file, which is located on an
3894  * everpresent tmpfs filesystem, should not exist.
3895  *
3896  * If we were restarted, then init_state_file should exist, in
3897  * which case we'll read it in, sanity check it, and use it.
3898  *
3899  * Note: You can't call console() until proc_table is ready.
3900  */
3901 void
3902 st_init()
3903 {
3904 	struct stat stb;
3905 	int ret, st_fd, insane = 0;
3906 	size_t to_be_read;
3907 	char *ptr;
3908 
3909 
3910 	booting = 1;
3911 
3912 	do {
3913 		/*
3914 		 * If we can exclusively create the file, then we're the
3915 		 * initial invocation of init(1M).
3916 		 */
3917 		st_fd = open(init_state_file, O_RDWR | O_CREAT | O_EXCL,
3918 		    S_IRUSR | S_IWUSR);
3919 	} while (st_fd == -1 && errno == EINTR);
3920 	if (st_fd != -1)
3921 		goto new_state;
3922 
3923 	booting = 0;
3924 
3925 	do {
3926 		st_fd = open(init_state_file, O_RDWR, S_IRUSR | S_IWUSR);
3927 	} while (st_fd == -1 && errno == EINTR);
3928 	if (st_fd == -1)
3929 		goto new_state;
3930 
3931 	/* Get the size of the file. */
3932 	do
3933 		ret = fstat(st_fd, &stb);
3934 	while (ret == -1 && errno == EINTR)
3935 		;
3936 	if (ret == -1)
3937 		goto new_state;
3938 
3939 	do
3940 		g_state = malloc(stb.st_size);
3941 	while (g_state == NULL && errno == EAGAIN)
3942 		;
3943 	if (g_state == NULL)
3944 		goto new_state;
3945 
3946 	to_be_read = stb.st_size;
3947 	ptr = (char *)g_state;
3948 	while (to_be_read > 0) {
3949 		ssize_t read_ret;
3950 
3951 		read_ret = read(st_fd, ptr, to_be_read);
3952 		if (read_ret < 0) {
3953 			if (errno == EINTR)
3954 				continue;
3955 
3956 			goto new_state;
3957 		}
3958 
3959 		to_be_read -= read_ret;
3960 		ptr += read_ret;
3961 	}
3962 
3963 	(void) close(st_fd);
3964 
3965 	g_state_sz = stb.st_size;
3966 
3967 	if (st_sane()) {
3968 		console(B_TRUE, "Restarting.\n");
3969 		return;
3970 	}
3971 
3972 	insane = 1;
3973 
3974 new_state:
3975 	if (st_fd >= 0)
3976 		(void) close(st_fd);
3977 	else
3978 		(void) unlink(init_state_file);
3979 
3980 	if (g_state != NULL)
3981 		free(g_state);
3982 
3983 	/* Something went wrong, so allocate new state. */
3984 	g_state_sz = sizeof (struct init_state) +
3985 	    ((init_num_proc - 1) * sizeof (struct PROC_TABLE));
3986 	do
3987 		g_state = calloc(1, g_state_sz);
3988 	while (g_state == NULL && errno == EAGAIN)
3989 		;
3990 	if (g_state == NULL) {
3991 		/* Fatal error! */
3992 		exit(errno);
3993 	}
3994 
3995 	g_state->ist_runlevel = -1;
3996 	num_proc = init_num_proc;
3997 
3998 	if (!booting) {
3999 		console(B_TRUE, "Restarting.\n");
4000 
4001 		/* Overwrite the bad state file. */
4002 		st_write();
4003 
4004 		if (!insane) {
4005 			console(B_TRUE,
4006 			    "Error accessing persistent state file `%s'.  "
4007 			    "Ignored.\n", init_state_file);
4008 		} else {
4009 			console(B_TRUE,
4010 			    "Persistent state file `%s' is invalid and was "
4011 			    "ignored.\n", init_state_file);
4012 		}
4013 	}
4014 }
4015 
4016 /*
4017  * Write g_state out to the state file.
4018  */
4019 void
4020 st_write()
4021 {
4022 	static int complained = 0;
4023 
4024 	int st_fd;
4025 	char *cp;
4026 	size_t sz;
4027 	ssize_t ret;
4028 
4029 
4030 	do {
4031 		st_fd = open(init_next_state_file,
4032 		    O_WRONLY | O_CREAT | O_TRUNC, S_IRUSR | S_IWUSR);
4033 	} while (st_fd < 0 && errno == EINTR);
4034 	if (st_fd < 0)
4035 		goto err;
4036 
4037 	cp = (char *)g_state;
4038 	sz = g_state_sz;
4039 	while (sz > 0) {
4040 		ret = write(st_fd, cp, sz);
4041 		if (ret < 0) {
4042 			if (errno == EINTR)
4043 				continue;
4044 
4045 			goto err;
4046 		}
4047 
4048 		sz -= ret;
4049 		cp += ret;
4050 	}
4051 
4052 	(void) close(st_fd);
4053 	st_fd = -1;
4054 	if (rename(init_next_state_file, init_state_file)) {
4055 		(void) unlink(init_next_state_file);
4056 		goto err;
4057 	}
4058 	complained = 0;
4059 
4060 	return;
4061 
4062 err:
4063 	if (st_fd >= 0)
4064 		(void) close(st_fd);
4065 
4066 	if (!booting && !complained) {
4067 		/*
4068 		 * Only complain after the filesystem should have come up.
4069 		 * And only do it once so we don't loop between console()
4070 		 * & efork().
4071 		 */
4072 		complained = 1;
4073 		if (st_fd)
4074 			console(B_TRUE, "Couldn't write persistent state "
4075 			    "file `%s'.\n", init_state_file);
4076 		else
4077 			console(B_TRUE, "Couldn't move persistent state "
4078 			    "file `%s' to `%s'.\n", init_next_state_file,
4079 			    init_state_file);
4080 	}
4081 }
4082 
4083 /*
4084  * Create a contract with these parameters.
4085  */
4086 static int
4087 contract_make_template(uint_t info, uint_t critical, uint_t fatal,
4088     uint64_t cookie)
4089 {
4090 	int fd, err;
4091 
4092 	char *ioctl_tset_emsg =
4093 	    "Couldn't set \"%s\" contract template parameter: %s.\n";
4094 
4095 	do
4096 		fd = open64(CTFS_ROOT "/process/template", O_RDWR);
4097 	while (fd < 0 && errno == EINTR)
4098 		;
4099 	if (fd < 0) {
4100 		console(B_TRUE, "Couldn't create process template: %s.\n",
4101 		    strerror(errno));
4102 		return (-1);
4103 	}
4104 
4105 	if (err = ct_pr_tmpl_set_param(fd, CT_PR_INHERIT | CT_PR_REGENT))
4106 		console(B_TRUE, "Contract set template inherit, regent "
4107 		    "failed: %s.\n", strerror(err));
4108 
4109 	/*
4110 	 * These errors result in a misconfigured template, which is better
4111 	 * than no template at all, so warn but don't abort.
4112 	 */
4113 	if (err = ct_tmpl_set_informative(fd, info))
4114 		console(B_TRUE, ioctl_tset_emsg, "informative", strerror(err));
4115 
4116 	if (err = ct_tmpl_set_critical(fd, critical))
4117 		console(B_TRUE, ioctl_tset_emsg, "critical", strerror(err));
4118 
4119 	if (err = ct_pr_tmpl_set_fatal(fd, fatal))
4120 		console(B_TRUE, ioctl_tset_emsg, "fatal", strerror(err));
4121 
4122 	if (err = ct_tmpl_set_cookie(fd, cookie))
4123 		console(B_TRUE, ioctl_tset_emsg, "cookie", strerror(err));
4124 
4125 	(void) fcntl(fd, F_SETFD, FD_CLOEXEC);
4126 
4127 	return (fd);
4128 }
4129 
4130 /*
4131  * Create the templates and open an event file descriptor.  We use dup2(2) to
4132  * get these descriptors away from the stdin/stdout/stderr group.
4133  */
4134 static void
4135 contracts_init()
4136 {
4137 	int err, fd;
4138 
4139 	/*
4140 	 * Create & configure a legacy template.  We only want empty events so
4141 	 * we know when to abandon them.
4142 	 */
4143 	legacy_tmpl = contract_make_template(0, CT_PR_EV_EMPTY, CT_PR_EV_HWERR,
4144 	    ORDINARY_COOKIE);
4145 	if (legacy_tmpl >= 0) {
4146 		err = ct_tmpl_activate(legacy_tmpl);
4147 		if (err != 0) {
4148 			(void) close(legacy_tmpl);
4149 			legacy_tmpl = -1;
4150 			console(B_TRUE,
4151 			    "Couldn't activate legacy template (%s); "
4152 			    "legacy services will be in init's contract.\n",
4153 			    strerror(err));
4154 		}
4155 	} else
4156 		console(B_TRUE,
4157 		    "Legacy services will be in init's contract.\n");
4158 
4159 	if (dup2(legacy_tmpl, 255) == -1) {
4160 		console(B_TRUE, "Could not duplicate legacy template: %s.\n",
4161 		    strerror(errno));
4162 	} else {
4163 		(void) close(legacy_tmpl);
4164 		legacy_tmpl = 255;
4165 	}
4166 
4167 	(void) fcntl(legacy_tmpl, F_SETFD, FD_CLOEXEC);
4168 
4169 	startd_tmpl = contract_make_template(0, CT_PR_EV_EMPTY,
4170 	    CT_PR_EV_HWERR | CT_PR_EV_SIGNAL | CT_PR_EV_CORE, STARTD_COOKIE);
4171 
4172 	if (dup2(startd_tmpl, 254) == -1) {
4173 		console(B_TRUE, "Could not duplicate startd template: %s.\n",
4174 		    strerror(errno));
4175 	} else {
4176 		(void) close(startd_tmpl);
4177 		startd_tmpl = 254;
4178 	}
4179 
4180 	(void) fcntl(startd_tmpl, F_SETFD, FD_CLOEXEC);
4181 
4182 	if (legacy_tmpl < 0 && startd_tmpl < 0) {
4183 		/* The creation errors have already been reported. */
4184 		console(B_TRUE,
4185 		    "Ignoring contract events.  Core smf(5) services will not "
4186 		    "be restarted.\n");
4187 		return;
4188 	}
4189 
4190 	/*
4191 	 * Open an event endpoint.
4192 	 */
4193 	do
4194 		fd = open64(CTFS_ROOT "/process/pbundle", O_RDONLY);
4195 	while (fd < 0 && errno == EINTR)
4196 		;
4197 	if (fd < 0) {
4198 		console(B_TRUE,
4199 		    "Couldn't open process pbundle: %s.  Core smf(5) services "
4200 		    "will not be restarted.\n", strerror(errno));
4201 		return;
4202 	}
4203 
4204 	if (dup2(fd, 253) == -1) {
4205 		console(B_TRUE, "Could not duplicate process bundle: %s.\n",
4206 		    strerror(errno));
4207 	} else {
4208 		(void) close(fd);
4209 		fd = 253;
4210 	}
4211 
4212 	(void) fcntl(fd, F_SETFD, FD_CLOEXEC);
4213 
4214 	/* Reset in case we've been restarted. */
4215 	(void) ct_event_reset(fd);
4216 
4217 	poll_fds[0].fd = fd;
4218 	poll_fds[0].events = POLLIN;
4219 	poll_nfds = 1;
4220 }
4221 
4222 static int
4223 contract_getfile(ctid_t id, const char *name, int oflag)
4224 {
4225 	int fd;
4226 
4227 	do
4228 		fd = contract_open(id, "process", name, oflag);
4229 	while (fd < 0 && errno == EINTR)
4230 		;
4231 
4232 	if (fd < 0)
4233 		console(B_TRUE, "Couldn't open %s for contract %ld: %s.\n",
4234 		    name, id, strerror(errno));
4235 
4236 	return (fd);
4237 }
4238 
4239 static int
4240 contract_cookie(ctid_t id, uint64_t *cp)
4241 {
4242 	int fd, err;
4243 	ct_stathdl_t sh;
4244 
4245 	fd = contract_getfile(id, "status", O_RDONLY);
4246 	if (fd < 0)
4247 		return (-1);
4248 
4249 	err = ct_status_read(fd, CTD_COMMON, &sh);
4250 	if (err != 0) {
4251 		console(B_TRUE, "Couldn't read status of contract %ld: %s.\n",
4252 		    id, strerror(err));
4253 		(void) close(fd);
4254 		return (-1);
4255 	}
4256 
4257 	(void) close(fd);
4258 
4259 	*cp = ct_status_get_cookie(sh);
4260 
4261 	ct_status_free(sh);
4262 	return (0);
4263 }
4264 
4265 static void
4266 contract_ack(ct_evthdl_t e)
4267 {
4268 	int fd;
4269 
4270 	if (ct_event_get_flags(e) & CTE_INFO)
4271 		return;
4272 
4273 	fd = contract_getfile(ct_event_get_ctid(e), "ctl", O_WRONLY);
4274 	if (fd < 0)
4275 		return;
4276 
4277 	(void) ct_ctl_ack(fd, ct_event_get_evid(e));
4278 	(void) close(fd);
4279 }
4280 
4281 /*
4282  * Process a contract event.
4283  */
4284 static void
4285 contract_event(struct pollfd *poll)
4286 {
4287 	ct_evthdl_t e;
4288 	int err;
4289 	ctid_t ctid;
4290 
4291 	if (!(poll->revents & POLLIN)) {
4292 		if (poll->revents & POLLERR)
4293 			console(B_TRUE,
4294 			    "Unknown poll error on my process contract "
4295 			    "pbundle.\n");
4296 		return;
4297 	}
4298 
4299 	err = ct_event_read(poll->fd, &e);
4300 	if (err != 0) {
4301 		console(B_TRUE, "Error retrieving contract event: %s.\n",
4302 		    strerror(err));
4303 		return;
4304 	}
4305 
4306 	ctid = ct_event_get_ctid(e);
4307 
4308 	if (ct_event_get_type(e) == CT_PR_EV_EMPTY) {
4309 		uint64_t cookie;
4310 		int ret, abandon = 1;
4311 
4312 		/* If it's svc.startd, restart it.  Else, abandon. */
4313 		ret = contract_cookie(ctid, &cookie);
4314 
4315 		if (ret == 0) {
4316 			if (cookie == STARTD_COOKIE &&
4317 			    do_restart_startd) {
4318 				if (smf_debug)
4319 					console(B_TRUE, "Restarting "
4320 					    "svc.startd.\n");
4321 
4322 				/*
4323 				 * Account for the failure.  If the failure rate
4324 				 * exceeds a threshold, then drop to maintenance
4325 				 * mode.
4326 				 */
4327 				startd_record_failure();
4328 				if (startd_failure_rate_critical())
4329 					enter_maintenance();
4330 
4331 				if (startd_tmpl < 0)
4332 					console(B_TRUE,
4333 					    "Restarting svc.startd in "
4334 					    "improper contract (bad "
4335 					    "template).\n");
4336 
4337 				(void) startd_run(startd_cline, startd_tmpl,
4338 				    ctid);
4339 
4340 				abandon = 0;
4341 			}
4342 		}
4343 
4344 		if (abandon && (err = contract_abandon_id(ctid))) {
4345 			console(B_TRUE, "Couldn't abandon contract %ld: %s.\n",
4346 			    ctid, strerror(err));
4347 		}
4348 
4349 		/*
4350 		 * No need to acknowledge the event since either way the
4351 		 * originating contract should be abandoned.
4352 		 */
4353 	} else {
4354 		console(B_TRUE,
4355 		    "Received contract event of unexpected type %d from "
4356 		    "contract %ld.\n", ct_event_get_type(e), ctid);
4357 
4358 		if ((ct_event_get_flags(e) & (CTE_INFO | CTE_ACK)) == 0)
4359 			/* Allow unexpected critical events to be released. */
4360 			contract_ack(e);
4361 	}
4362 
4363 	ct_event_free(e);
4364 }
4365 
4366 /*
4367  * svc.startd(1M) Management
4368  */
4369 
4370 /*
4371  * (Re)start svc.startd(1M).  old_ctid should be the contract ID of the old
4372  * contract, or 0 if we're starting it for the first time.  If wait is true
4373  * we'll wait for and return the exit value of the child.
4374  */
4375 static int
4376 startd_run(const char *cline, int tmpl, ctid_t old_ctid)
4377 {
4378 	int err, i, ret, did_activate;
4379 	pid_t pid;
4380 	struct stat sb;
4381 
4382 	if (cline[0] == '\0')
4383 		return (-1);
4384 
4385 	/*
4386 	 * Don't restart startd if the system is rebooting or shutting down.
4387 	 */
4388 	do {
4389 		ret = stat("/etc/svc/volatile/resetting", &sb);
4390 	} while (ret == -1 && errno == EINTR);
4391 
4392 	if (ret == 0) {
4393 		if (smf_debug)
4394 			console(B_TRUE, "Quiescing for reboot.\n");
4395 		(void) pause();
4396 		return (-1);
4397 	}
4398 
4399 	err = ct_pr_tmpl_set_transfer(tmpl, old_ctid);
4400 	if (err == EINVAL) {
4401 		console(B_TRUE, "Remake startd_tmpl; reattempt transfer.\n");
4402 		tmpl = startd_tmpl = contract_make_template(0, CT_PR_EV_EMPTY,
4403 		    CT_PR_EV_HWERR, STARTD_COOKIE);
4404 
4405 		err = ct_pr_tmpl_set_transfer(tmpl, old_ctid);
4406 	}
4407 	if (err != 0) {
4408 		console(B_TRUE,
4409 		    "Couldn't set transfer parameter of contract template: "
4410 		    "%s.\n", strerror(err));
4411 	}
4412 
4413 	if ((err = ct_pr_tmpl_set_svc_fmri(startd_tmpl,
4414 	    SCF_SERVICE_STARTD)) != 0)
4415 		console(B_TRUE,
4416 		    "Can not set svc_fmri in contract template: %s\n",
4417 		    strerror(err));
4418 	if ((err = ct_pr_tmpl_set_svc_aux(startd_tmpl,
4419 	    startd_svc_aux)) != 0)
4420 		console(B_TRUE,
4421 		    "Can not set svc_aux in contract template: %s\n",
4422 		    strerror(err));
4423 	did_activate = !(ct_tmpl_activate(tmpl));
4424 	if (!did_activate)
4425 		console(B_TRUE,
4426 		    "Template activation failed; not starting \"%s\" in "
4427 		    "proper contract.\n", cline);
4428 
4429 	/* Hold SIGCLD so we can wait if necessary. */
4430 	(void) sighold(SIGCLD);
4431 
4432 	while ((pid = fork()) < 0) {
4433 		if (errno == EPERM) {
4434 			console(B_TRUE, "Insufficient permission to fork.\n");
4435 
4436 			/* Now that's a doozy. */
4437 			exit(1);
4438 		}
4439 
4440 		console(B_TRUE,
4441 		    "fork() for svc.startd failed: %s.  Will retry in 1 "
4442 		    "second...\n", strerror(errno));
4443 
4444 		(void) sleep(1);
4445 
4446 		/* Eventually give up? */
4447 	}
4448 
4449 	if (pid == 0) {
4450 		/* child */
4451 
4452 		/* See the comment in efork() */
4453 		for (i = SIGHUP; i <= SIGRTMAX; ++i) {
4454 			if (i == SIGTTOU || i == SIGTTIN || i == SIGTSTP)
4455 				(void) sigset(i, SIG_IGN);
4456 			else
4457 				(void) sigset(i, SIG_DFL);
4458 		}
4459 
4460 		if (smf_options != NULL) {
4461 			/* Put smf_options in the environment. */
4462 			glob_envp[glob_envn] =
4463 			    malloc(sizeof ("SMF_OPTIONS=") - 1 +
4464 			    strlen(smf_options) + 1);
4465 
4466 			if (glob_envp[glob_envn] != NULL) {
4467 				/* LINTED */
4468 				(void) sprintf(glob_envp[glob_envn],
4469 				    "SMF_OPTIONS=%s", smf_options);
4470 				glob_envp[glob_envn+1] = NULL;
4471 			} else {
4472 				console(B_TRUE,
4473 				    "Could not set SMF_OPTIONS (%s).\n",
4474 				    strerror(errno));
4475 			}
4476 		}
4477 
4478 		if (smf_debug)
4479 			console(B_TRUE, "Executing svc.startd\n");
4480 
4481 		(void) execle(SH, "INITSH", "-c", cline, NULL, glob_envp);
4482 
4483 		console(B_TRUE, "Could not exec \"%s\" (%s).\n", SH,
4484 		    strerror(errno));
4485 
4486 		exit(1);
4487 	}
4488 
4489 	/* parent */
4490 
4491 	if (did_activate) {
4492 		if (legacy_tmpl < 0 || ct_tmpl_activate(legacy_tmpl) != 0)
4493 			(void) ct_tmpl_clear(tmpl);
4494 	}
4495 
4496 	/* Clear the old_ctid reference so the kernel can reclaim it. */
4497 	if (old_ctid != 0)
4498 		(void) ct_pr_tmpl_set_transfer(tmpl, 0);
4499 
4500 	(void) sigrelse(SIGCLD);
4501 
4502 	return (0);
4503 }
4504 
4505 /*
4506  * void startd_record_failure(void)
4507  *   Place the current time in our circular array of svc.startd failures.
4508  */
4509 void
4510 startd_record_failure()
4511 {
4512 	int index = startd_failure_index++ % NSTARTD_FAILURE_TIMES;
4513 
4514 	startd_failure_time[index] = gethrtime();
4515 }
4516 
4517 /*
4518  * int startd_failure_rate_critical(void)
4519  *   Return true if the average failure interval is less than the permitted
4520  *   interval.  Implicit success if insufficient measurements for an average
4521  *   exist.
4522  */
4523 int
4524 startd_failure_rate_critical()
4525 {
4526 	int n = startd_failure_index;
4527 	hrtime_t avg_ns = 0;
4528 
4529 	if (startd_failure_index < NSTARTD_FAILURE_TIMES)
4530 		return (0);
4531 
4532 	avg_ns =
4533 	    (startd_failure_time[(n - 1) % NSTARTD_FAILURE_TIMES] -
4534 	    startd_failure_time[n % NSTARTD_FAILURE_TIMES]) /
4535 	    NSTARTD_FAILURE_TIMES;
4536 
4537 	return (avg_ns < STARTD_FAILURE_RATE_NS);
4538 }
4539 
4540 /*
4541  * returns string that must be free'd
4542  */
4543 
4544 static char
4545 *audit_boot_msg()
4546 {
4547 	char		*b, *p;
4548 	char		desc[] = "booted";
4549 	zoneid_t	zid = getzoneid();
4550 
4551 	b = malloc(sizeof (desc) + MAXNAMELEN + 3);
4552 	if (b == NULL)
4553 		return (b);
4554 
4555 	p = b;
4556 	p += strlcpy(p, desc, sizeof (desc));
4557 	if (zid != GLOBAL_ZONEID) {
4558 		p += strlcpy(p, ": ", 3);
4559 		(void) getzonenamebyid(zid, p, MAXNAMELEN);
4560 	}
4561 	return (b);
4562 }
4563 
4564 /*
4565  * Generate AUE_init_solaris audit record.  Return 1 if
4566  * auditing is enabled in case the caller cares.
4567  *
4568  * In the case of userint() or a local zone invocation of
4569  * one_true_init, the process initially contains the audit
4570  * characteristics of the process that invoked init.  The first pass
4571  * through here uses those characteristics then for the case of
4572  * one_true_init in a local zone, clears them so subsequent system
4573  * state changes won't be attributed to the person who booted the
4574  * zone.
4575  */
4576 static int
4577 audit_put_record(int pass_fail, int status, char *msg)
4578 {
4579 	adt_session_data_t	*ah;
4580 	adt_event_data_t	*event;
4581 
4582 	if (!adt_audit_enabled())
4583 		return (0);
4584 
4585 	/*
4586 	 * the PROC_DATA picks up the context to tell whether this is
4587 	 * an attributed record (auid = -2 is unattributed)
4588 	 */
4589 	if (adt_start_session(&ah, NULL, ADT_USE_PROC_DATA)) {
4590 		console(B_TRUE, "audit failure:  %s\n", strerror(errno));
4591 		return (1);
4592 	}
4593 	event = adt_alloc_event(ah, ADT_init_solaris);
4594 	if (event == NULL) {
4595 		console(B_TRUE, "audit failure:  %s\n", strerror(errno));
4596 		(void) adt_end_session(ah);
4597 		return (1);
4598 	}
4599 	event->adt_init_solaris.info = msg;	/* NULL is ok here */
4600 
4601 	if (adt_put_event(event, pass_fail, status)) {
4602 		console(B_TRUE, "audit failure:  %s\n", strerror(errno));
4603 		(void) adt_end_session(ah);
4604 		return (1);
4605 	}
4606 	adt_free_event(event);
4607 
4608 	(void) adt_end_session(ah);
4609 
4610 	return (1);
4611 }
4612