xref: /illumos-gate/usr/src/cmd/init/init.c (revision fea9cb91bd8e12d84069b4dab1268363668b4bff)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
28 /*	  All Rights Reserved  	*/
29 
30 /*
31  * University Copyright- Copyright (c) 1982, 1986, 1988
32  * The Regents of the University of California
33  * All Rights Reserved
34  *
35  * University Acknowledgment- Portions of this document are derived from
36  * software developed by the University of California, Berkeley, and its
37  * contributors.
38  */
39 
40 #pragma ident	"%Z%%M%	%I%	%E% SMI"
41 
42 /*
43  * init(1M) is the general process spawning program.  Its primary job is to
44  * start and restart svc.startd for smf(5).  For backwards-compatibility it also
45  * spawns and respawns processes according to /etc/inittab and the current
46  * run-level.  It reads /etc/default/inittab for general configuration.
47  *
48  * To change run-levels the system administrator runs init from the command
49  * line with a level name.  init signals svc.startd via libscf and directs the
50  * zone's init (pid 1 in the global zone) what to do by sending it a signal;
51  * these signal numbers are commonly refered to in the code as 'states'.  Valid
52  * run-levels are [sS0123456].  Additionally, init can be given directives
53  * [qQabc], which indicate actions to be taken pertaining to /etc/inittab.
54  *
55  * When init processes inittab entries, it finds processes that are to be
56  * spawned at various run-levels.  inittab contains the set of the levels for
57  * which each inittab entry is valid.
58  *
59  * State File and Restartability
60  *   Premature exit by init(1M) is handled as a special case by the kernel:
61  *   init(1M) will be immediately re-executed, retaining its original PID.  (PID
62  *   1 in the global zone.)  To track the processes it has previously spawned,
63  *   as well as other mutable state, init(1M) regularly updates a state file
64  *   such that its subsequent invocations have knowledge of its various
65  *   dependent processes and duties.
66  *
67  * Process Contracts
68  *   We start svc.startd(1M) in a contract and transfer inherited contracts when
69  *   restarting it.  Everything else is started using the legacy contract
70  *   template, and the created contracts are abandoned when they become empty.
71  *
72  * utmpx Entry Handling
73  *   Because init(1M) no longer governs the startup process, its knowledge of
74  *   when utmpx becomes writable is indirect.  However, spawned processes
75  *   expect to be constructed with valid utmpx entries.  As a result, attempts
76  *   to write normal entries will be retried until successful.
77  *
78  * Maintenance Mode
79  *   In certain failure scenarios, init(1M) will enter a maintenance mode, in
80  *   which it invokes sulogin(1M) to allow the operator an opportunity to
81  *   repair the system.  Normally, this operation is performed as a
82  *   fork(2)-exec(2)-waitpid(3C) sequence with the parent waiting for repair or
83  *   diagnosis to be completed.  In the cases that fork(2) requests themselves
84  *   fail, init(1M) will directly execute sulogin(1M), and allow the kernel to
85  *   restart init(1M) on exit from the operator session.
86  *
87  *   One scenario where init(1M) enters its maintenance mode is when
88  *   svc.startd(1M) begins to fail rapidly, defined as when the average time
89  *   between recent failures drops below a given threshold.
90  */
91 
92 #include <sys/contract/process.h>
93 #include <sys/ctfs.h>
94 #include <sys/stat.h>
95 #include <sys/statvfs.h>
96 #include <sys/stropts.h>
97 #include <sys/systeminfo.h>
98 #include <sys/time.h>
99 #include <sys/termios.h>
100 #include <sys/tty.h>
101 #include <sys/types.h>
102 #include <sys/utsname.h>
103 
104 #include <bsm/adt_event.h>
105 #include <bsm/libbsm.h>
106 #include <security/pam_appl.h>
107 
108 #include <assert.h>
109 #include <ctype.h>
110 #include <dirent.h>
111 #include <errno.h>
112 #include <fcntl.h>
113 #include <libcontract.h>
114 #include <libcontract_priv.h>
115 #include <libintl.h>
116 #include <libscf.h>
117 #include <libscf_priv.h>
118 #include <poll.h>
119 #include <procfs.h>
120 #include <signal.h>
121 #include <stdarg.h>
122 #include <stdio.h>
123 #include <stdio_ext.h>
124 #include <stdlib.h>
125 #include <string.h>
126 #include <strings.h>
127 #include <syslog.h>
128 #include <time.h>
129 #include <ulimit.h>
130 #include <unistd.h>
131 #include <utmpx.h>
132 #include <wait.h>
133 #include <zone.h>
134 #include <ucontext.h>
135 
136 #undef	sleep
137 
138 #define	fioctl(p, sptr, cmd)	ioctl(fileno(p), sptr, cmd)
139 #define	min(a, b)		(((a) < (b)) ? (a) : (b))
140 
141 #define	TRUE	1
142 #define	FALSE	0
143 #define	FAILURE	-1
144 
145 #define	UT_LINE_SZ	32	/* Size of a utmpx ut_line field */
146 
147 /*
148  * SLEEPTIME	The number of seconds "init" sleeps between wakeups if
149  *		nothing else requires this "init" wakeup.
150  */
151 #define	SLEEPTIME	(5 * 60)
152 
153 /*
154  * MAXCMDL	The maximum length of a command string in inittab.
155  */
156 #define	MAXCMDL	512
157 
158 /*
159  * EXEC		The length of the prefix string added to all comamnds
160  *		found in inittab.
161  */
162 #define	EXEC	(sizeof ("exec ") - 1)
163 
164 /*
165  * TWARN	The amount of time between warning signal, SIGTERM,
166  *		and the fatal kill signal, SIGKILL.
167  */
168 #define	TWARN	5
169 
170 #define	id_eq(x, y)	((x[0] == y[0] && x[1] == y[1] && x[2] == y[2] &&\
171 			x[3] == y[3]) ? TRUE : FALSE)
172 
173 /*
174  * The kernel's default umask is 022 these days; since some processes inherit
175  * their umask from init, init will set it from CMASK in /etc/default/init.
176  * init gets the default umask from the kernel, it sets it to 022 whenever
177  * it wants to create a file and reverts to CMASK afterwards.
178  */
179 
180 static int cmask;
181 
182 /*
183  * The following definitions, concluding with the 'lvls' array, provide a
184  * common mapping between level-name (like 'S'), signal number (state),
185  * run-level mask, and specific properties associated with a run-level.
186  * This array should be accessed using the routines lvlname_to_state(),
187  * lvlname_to_mask(), state_to_mask(), and state_to_flags().
188  */
189 
190 /*
191  * Correspondence of signals to init actions.
192  */
193 #define	LVLQ		SIGHUP
194 #define	LVL0		SIGINT
195 #define	LVL1		SIGQUIT
196 #define	LVL2		SIGILL
197 #define	LVL3		SIGTRAP
198 #define	LVL4		SIGIOT
199 #define	LVL5		SIGEMT
200 #define	LVL6		SIGFPE
201 #define	SINGLE_USER	SIGBUS
202 #define	LVLa		SIGSEGV
203 #define	LVLb		SIGSYS
204 #define	LVLc		SIGPIPE
205 
206 /*
207  * Bit Mask for each level.  Used to determine legal levels.
208  */
209 #define	MASK0	0x0001
210 #define	MASK1	0x0002
211 #define	MASK2	0x0004
212 #define	MASK3	0x0008
213 #define	MASK4	0x0010
214 #define	MASK5	0x0020
215 #define	MASK6	0x0040
216 #define	MASKSU	0x0080
217 #define	MASKa	0x0100
218 #define	MASKb	0x0200
219 #define	MASKc	0x0400
220 
221 #define	MASK_NUMERIC (MASK0 | MASK1 | MASK2 | MASK3 | MASK4 | MASK5 | MASK6)
222 #define	MASK_abc (MASKa | MASKb | MASKc)
223 
224 /*
225  * Flags to indicate properties of various states.
226  */
227 #define	LSEL_RUNLEVEL	0x0001	/* runlevels you can transition to */
228 #define	LSEL_NOAUDIT	0x0002	/* levels with auditing disabled */
229 
230 typedef struct lvl {
231 	int	lvl_state;
232 	int	lvl_mask;
233 	char	lvl_name;
234 	int	lvl_flags;
235 } lvl_t;
236 
237 static lvl_t lvls[] = {
238 	{ LVLQ,		0,	'Q', 0					},
239 	{ LVLQ,		0,	'q', 0					},
240 	{ LVL0,		MASK0,	'0', LSEL_RUNLEVEL | LSEL_NOAUDIT	},
241 	{ LVL1, 	MASK1,	'1', LSEL_RUNLEVEL | LSEL_NOAUDIT	},
242 	{ LVL2, 	MASK2,	'2', LSEL_RUNLEVEL			},
243 	{ LVL3, 	MASK3,	'3', LSEL_RUNLEVEL			},
244 	{ LVL4, 	MASK4,	'4', LSEL_RUNLEVEL			},
245 	{ LVL5, 	MASK5,	'5', LSEL_RUNLEVEL | LSEL_NOAUDIT	},
246 	{ LVL6, 	MASK6, 	'6', LSEL_RUNLEVEL | LSEL_NOAUDIT	},
247 	{ SINGLE_USER, 	MASKSU, 'S', LSEL_RUNLEVEL | LSEL_NOAUDIT	},
248 	{ SINGLE_USER, 	MASKSU, 's', LSEL_RUNLEVEL | LSEL_NOAUDIT	},
249 	{ LVLa,		MASKa,	'a', 0					},
250 	{ LVLb,		MASKb,	'b', 0					},
251 	{ LVLc,		MASKc,	'c', 0					}
252 };
253 
254 #define	LVL_NELEMS (sizeof (lvls) / sizeof (lvl_t))
255 
256 /*
257  * Legal action field values.
258  */
259 #define	OFF		0	/* Kill process if on, else ignore */
260 #define	RESPAWN		1	/* Continuously restart process when it dies */
261 #define	ONDEMAND	RESPAWN	/* Respawn for a, b, c type processes */
262 #define	ONCE		2	/* Start process, do not respawn when dead */
263 #define	WAIT		3	/* Perform once and wait to complete */
264 #define	BOOT		4	/* Start at boot time only */
265 #define	BOOTWAIT	5	/* Start at boot time and wait to complete */
266 #define	POWERFAIL	6	/* Start on powerfail */
267 #define	POWERWAIT	7	/* Start and wait for complete on powerfail */
268 #define	INITDEFAULT	8	/* Default level "init" should start at */
269 #define	SYSINIT		9	/* Actions performed before init speaks */
270 
271 #define	M_OFF		0001
272 #define	M_RESPAWN	0002
273 #define	M_ONDEMAND	M_RESPAWN
274 #define	M_ONCE		0004
275 #define	M_WAIT		0010
276 #define	M_BOOT		0020
277 #define	M_BOOTWAIT	0040
278 #define	M_PF		0100
279 #define	M_PWAIT		0200
280 #define	M_INITDEFAULT	0400
281 #define	M_SYSINIT	01000
282 
283 /* States for the inittab parser in getcmd(). */
284 #define	ID	1
285 #define	LEVELS	2
286 #define	ACTION	3
287 #define	COMMAND	4
288 #define	COMMENT	5
289 
290 /*
291  * Init can be in any of three main states, "normal" mode where it is
292  * processing entries for the lines file in a normal fashion, "boot" mode,
293  * where it is only interested in the boot actions, and "powerfail" mode,
294  * where it is only interested in powerfail related actions. The following
295  * masks declare the legal actions for each mode.
296  */
297 #define	NORMAL_MODES	(M_OFF | M_RESPAWN | M_ONCE | M_WAIT)
298 #define	BOOT_MODES	(M_BOOT | M_BOOTWAIT)
299 #define	PF_MODES	(M_PF | M_PWAIT)
300 
301 struct PROC_TABLE {
302 	char	p_id[4];	/* Four letter unique id of process */
303 	pid_t	p_pid;		/* Process id */
304 	short	p_count;	/* How many respawns of this command in */
305 				/*   the current series */
306 	long	p_time;		/* Start time for a series of respawns */
307 	short	p_flags;
308 	short	p_exit;		/* Exit status of a process which died */
309 };
310 
311 /*
312  * Flags for the "p_flags" word of a PROC_TABLE entry:
313  *
314  *	OCCUPIED	This slot in init's proc table is in use.
315  *
316  *	LIVING		Process is alive.
317  *
318  *	NOCLEANUP	efork() is not allowed to cleanup this entry even
319  *			if process is dead.
320  *
321  *	NAMED		This process has a name, i.e. came from inittab.
322  *
323  *	DEMANDREQUEST	Process started by a "telinit [abc]" command.  Processes
324  *			formed this way are respawnable and immune to level
325  *			changes as long as their entry exists in inittab.
326  *
327  *	TOUCHED		Flag used by remv() to determine whether it has looked
328  *			at an entry while checking for processes to be killed.
329  *
330  *	WARNED		Flag used by remv() to mark processes that have been
331  *			sent the SIGTERM signal.  If they don't die in 5
332  *			seconds, they are sent the SIGKILL signal.
333  *
334  *	KILLED		Flag used by remv() to mark procs that have been sent
335  *			the SIGTERM and SIGKILL signals.
336  *
337  *	PF_MASK		Bitwise or of legal flags, for sanity checking.
338  */
339 #define	OCCUPIED	01
340 #define	LIVING		02
341 #define	NOCLEANUP	04
342 #define	NAMED		010
343 #define	DEMANDREQUEST	020
344 #define	TOUCHED		040
345 #define	WARNED		0100
346 #define	KILLED		0200
347 #define	PF_MASK		0377
348 
349 /*
350  * Respawn limits for processes that are to be respawned:
351  *
352  *	SPAWN_INTERVAL	The number of seconds over which "init" will try to
353  *			respawn a process SPAWN_LIMIT times before it gets mad.
354  *
355  *	SPAWN_LIMIT	The number of respawns "init" will attempt in
356  *			SPAWN_INTERVAL seconds before it generates an
357  *			error message and inhibits further tries for
358  *			INHIBIT seconds.
359  *
360  *	INHIBIT		The number of seconds "init" ignores an entry it had
361  *			trouble spawning unless a "telinit Q" is received.
362  */
363 
364 #define	SPAWN_INTERVAL	(2*60)
365 #define	SPAWN_LIMIT	10
366 #define	INHIBIT		(5*60)
367 
368 /*
369  * The maximum number of decimal digits for an id_t.  (ceil(log10 (max_id)))
370  */
371 #define	ID_MAX_STR_LEN	10
372 
373 #define	NULLPROC	((struct PROC_TABLE *)(0))
374 #define	NO_ROOM		((struct PROC_TABLE *)(FAILURE))
375 
376 struct CMD_LINE {
377 	char c_id[4];	/* Four letter unique id of process to be */
378 			/*   affected by action */
379 	short c_levels;	/* Mask of legal levels for process */
380 	short c_action;	/* Mask for type of action required */
381 	char *c_command; /* Pointer to init command */
382 };
383 
384 struct	pidrec {
385 	int	pd_type;	/* Command type */
386 	pid_t	pd_pid;		/* pid to add or remove */
387 };
388 
389 /*
390  * pd_type's
391  */
392 #define	ADDPID	1
393 #define	REMPID	2
394 
395 static struct	pidlist {
396 	pid_t	pl_pid;		/* pid to watch for */
397 	int	pl_dflag;	/* Flag indicating SIGCLD from this pid */
398 	short	pl_exit;	/* Exit status of proc */
399 	struct	pidlist	*pl_next; /* Next in list */
400 } *Plhead, *Plfree;
401 
402 /*
403  * The following structure contains a set of modes for /dev/syscon
404  * and should match the default contents of /etc/ioctl.syscon.
405  */
406 static struct termios	dflt_termios = {
407 	BRKINT|ICRNL|IXON|IMAXBEL,			/* iflag */
408 	OPOST|ONLCR|TAB3,				/* oflag */
409 	CS8|CREAD|B9600,				/* cflag */
410 	ISIG|ICANON|ECHO|ECHOE|ECHOK|ECHOCTL|ECHOKE|IEXTEN, /* lflag */
411 	CINTR, CQUIT, CERASE, CKILL, CEOF, 0, 0, 0,
412 	0, 0, 0, 0, 0, 0, 0, 0,
413 	0, 0, 0
414 };
415 
416 static struct termios	stored_syscon_termios;
417 static int		write_ioctl = 0;	/* Rewrite /etc/ioctl.syscon */
418 
419 static union WAKEUP {
420 	struct WAKEFLAGS {
421 		unsigned w_usersignal : 1;	/* User sent signal to "init" */
422 		unsigned w_childdeath : 1;	/* An "init" child died */
423 		unsigned w_powerhit : 1;	/* OS experienced powerfail */
424 	}	w_flags;
425 	int w_mask;
426 } wakeup;
427 
428 
429 struct init_state {
430 	int			ist_runlevel;
431 	int			ist_num_proc;
432 	int			ist_utmpx_ok;
433 	struct PROC_TABLE	ist_proc_table[1];
434 };
435 
436 #define	cur_state	(g_state->ist_runlevel)
437 #define	num_proc	(g_state->ist_num_proc)
438 #define	proc_table	(g_state->ist_proc_table)
439 #define	utmpx_ok	(g_state->ist_utmpx_ok)
440 
441 /* Contract cookies. */
442 #define	ORDINARY_COOKIE		0
443 #define	STARTD_COOKIE		1
444 
445 
446 #ifndef NDEBUG
447 #define	bad_error(func, err)	{					\
448 	(void) fprintf(stderr, "%s:%d: %s() failed with unexpected "	\
449 	    "error %d.  Aborting.\n", __FILE__, __LINE__, (func), (err)); \
450 	abort();							\
451 }
452 #else
453 #define	bad_error(func, err)	abort()
454 #endif
455 
456 
457 /*
458  * Useful file and device names.
459  */
460 static char *CONSOLE	  = "/dev/console";	/* Real system console */
461 static char *INITPIPE_DIR = "/etc";
462 static char *INITPIPE	  = "/etc/initpipe";
463 
464 #define	INIT_STATE_DIR "/etc/svc/volatile"
465 static const char * const init_state_file = INIT_STATE_DIR "/init.state";
466 static const char * const init_next_state_file =
467 	INIT_STATE_DIR "/init-next.state";
468 
469 static const int init_num_proc = 20;	/* Initial size of process table. */
470 
471 static char *UTMPX	 = UTMPX_FILE;		/* Snapshot record file */
472 static char *WTMPX	 = WTMPX_FILE;		/* Long term record file */
473 static char *INITTAB	 = "/etc/inittab";	/* Script file for "init" */
474 static char *SYSTTY	 = "/dev/systty";	/* System Console */
475 static char *SYSCON	 = "/dev/syscon";	/* Virtual System console */
476 static char *IOCTLSYSCON = "/etc/ioctl.syscon";	/* Last syscon modes */
477 static char *ENVFILE	 = "/etc/default/init";	/* Default env. */
478 static char *SU	= "/etc/sulogin";	/* Super-user program for single user */
479 static char *SH	= "/sbin/sh";		/* Standard shell */
480 
481 /*
482  * Default Path.  /sbin is included in path only during sysinit phase
483  */
484 #define	DEF_PATH	"PATH=/usr/sbin:/usr/bin"
485 #define	INIT_PATH	"PATH=/sbin:/usr/sbin:/usr/bin"
486 
487 static int	prior_state;
488 static int	prev_state;	/* State "init" was in last time it woke */
489 static int	new_state;	/* State user wants "init" to go to. */
490 static int	op_modes = BOOT_MODES; /* Current state of "init" */
491 static int	Gchild = 0;	/* Flag to indicate "godchild" died, set in */
492 				/*   childeath() and cleared in cleanaux() */
493 static int	Pfd = -1;	/* fd to receive pids thru */
494 static unsigned int	spawncnt, pausecnt;
495 static int	rsflag;		/* Set if a respawn has taken place */
496 static volatile int time_up;	/* Flag set to TRUE by the alarm interrupt */
497 				/* routine each time an alarm interrupt */
498 				/* takes place. */
499 static int	sflg = 0;	/* Set if we were booted -s to single user */
500 static int	rflg = 0;	/* Set if booted -r, reconfigure devices */
501 static int	bflg = 0;	/* Set if booted -b, don't run rc scripts */
502 static pid_t	init_pid;	/* PID of "one true" init for current zone */
503 
504 static struct init_state *g_state = NULL;
505 static size_t	g_state_sz;
506 static int	booting = 1;	/* Set while we're booting. */
507 
508 /*
509  * Array for default global environment.
510  */
511 #define	MAXENVENT	24	/* Max number of default env variables + 1 */
512 				/* init can use three itself, so this leaves */
513 				/* 20 for the administrator in ENVFILE. */
514 static char	*glob_envp[MAXENVENT];	/* Array of environment strings */
515 static int	glob_envn;		/* Number of environment strings */
516 
517 
518 static struct pollfd	poll_fds[1];
519 static int		poll_nfds = 0;	/* poll_fds is uninitialized */
520 
521 static int	legacy_tmpl = -1;	/* fd for legacy contract template */
522 static int	startd_tmpl = -1;	/* fd for svc.startd's template */
523 
524 static char	startd_cline[256] = "";	/* svc.startd's command line */
525 static int	do_restart_startd = 1;	/* Whether to restart svc.startd. */
526 static char	*smf_options = NULL;	/* Options to give to startd. */
527 static int	smf_debug = 0;		/* Messages for debugging smf(5) */
528 static time_t	init_boot_time;		/* Substitute for kernel boot time. */
529 
530 #define	NSTARTD_FAILURE_TIMES	3		/* trigger after 3 failures */
531 #define	STARTD_FAILURE_RATE_NS	5000000000LL	/* 1 failure/5 seconds */
532 
533 static hrtime_t	startd_failure_time[NSTARTD_FAILURE_TIMES];
534 static uint_t	startd_failure_index;
535 
536 
537 static char	*prog_name(char *);
538 static int	state_to_mask(int);
539 static int	lvlname_to_mask(char, int *);
540 static void	lscf_set_runlevel(char);
541 static int	state_to_flags(int);
542 static char	state_to_name(int);
543 static int	lvlname_to_state(char);
544 static int	getcmd(struct CMD_LINE *, char *);
545 static int	realcon();
546 static int	spawn_processes();
547 static int	get_ioctl_syscon();
548 static int	account(short, struct PROC_TABLE *, char *);
549 static void	alarmclk();
550 static void	childeath(int);
551 static void	cleanaux();
552 static void	clearent(pid_t, short);
553 static void	console(boolean_t, char *, ...);
554 static void	init_signals(void);
555 static void	setup_pipe();
556 static void	killproc(pid_t);
557 static void	init_env();
558 static void	boot_init();
559 static void	powerfail();
560 static void	remv();
561 static void	write_ioctl_syscon();
562 static void	spawn(struct PROC_TABLE *, struct CMD_LINE *);
563 static void	setimer(int);
564 static void	siglvl(int, siginfo_t *, ucontext_t *);
565 static void	sigpoll(int);
566 static void	enter_maintenance(void);
567 static void	timer(int);
568 static void	userinit(int, char **);
569 static void	notify_pam_dead(struct utmpx *);
570 static long	waitproc(struct PROC_TABLE *);
571 static struct PROC_TABLE *efork(int, struct PROC_TABLE *, int);
572 static struct PROC_TABLE *findpslot(struct CMD_LINE *);
573 static void	increase_proc_table_size();
574 static void	st_init();
575 static void	st_write();
576 static void	contracts_init();
577 static void	contract_event(struct pollfd *);
578 static int	startd_run(const char *, int, ctid_t);
579 static void	startd_record_failure();
580 static int	startd_failure_rate_critical();
581 static char	*audit_boot_msg();
582 static int	audit_put_record(int, int, char *);
583 static void	update_boot_archive(int new_state);
584 
585 int
586 main(int argc, char *argv[])
587 {
588 	int	chg_lvl_flag = FALSE, print_banner = FALSE;
589 	int	may_need_audit = 1;
590 	int	c;
591 	char	*msg;
592 
593 	/* Get a timestamp for use as boot time, if needed. */
594 	(void) time(&init_boot_time);
595 
596 	/* Get the default umask */
597 	cmask = umask(022);
598 	(void) umask(cmask);
599 
600 	/* Parse the arguments to init. Check for single user */
601 	opterr = 0;
602 	while ((c = getopt(argc, argv, "brsm:")) != EOF) {
603 		switch (c) {
604 		case 'b':
605 			rflg = 0;
606 			bflg = 1;
607 			if (!sflg)
608 				sflg++;
609 			break;
610 		case 'r':
611 			bflg = 0;
612 			rflg++;
613 			break;
614 		case 's':
615 			if (!bflg)
616 				sflg++;
617 			break;
618 		case 'm':
619 			smf_options = optarg;
620 			smf_debug = (strstr(smf_options, "debug") != NULL);
621 			break;
622 		}
623 	}
624 
625 	/*
626 	 * Determine if we are the main init, or a user invoked init, whose job
627 	 * it is to inform init to change levels or perform some other action.
628 	 */
629 	if (zone_getattr(getzoneid(), ZONE_ATTR_INITPID, &init_pid,
630 	    sizeof (init_pid)) != sizeof (init_pid)) {
631 		(void) fprintf(stderr, "could not get pid for init\n");
632 		return (1);
633 	}
634 
635 	/*
636 	 * If this PID is not the same as the "true" init for the zone, then we
637 	 * must be in 'user' mode.
638 	 */
639 	if (getpid() != init_pid) {
640 		userinit(argc, argv);
641 	}
642 
643 	if (getzoneid() != GLOBAL_ZONEID) {
644 		print_banner = TRUE;
645 	}
646 
647 	/*
648 	 * Initialize state (and set "booting").
649 	 */
650 	st_init();
651 
652 	if (booting && print_banner) {
653 		struct utsname un;
654 		char buf[BUFSIZ], *isa;
655 		long ret;
656 		int bits = 32;
657 
658 		/*
659 		 * We want to print the boot banner as soon as
660 		 * possible.  In the global zone, the kernel does it,
661 		 * but we do not have that luxury in non-global zones,
662 		 * so we will print it here.
663 		 */
664 		(void) uname(&un);
665 		ret = sysinfo(SI_ISALIST, buf, sizeof (buf));
666 		if (ret != -1L && ret <= sizeof (buf)) {
667 			for (isa = strtok(buf, " "); isa;
668 			    isa = strtok(NULL, " ")) {
669 				if (strcmp(isa, "sparcv9") == 0 ||
670 				    strcmp(isa, "amd64") == 0) {
671 					bits = 64;
672 					break;
673 				}
674 			}
675 		}
676 
677 		console(B_FALSE,
678 		    "\n\n%s Release %s Version %s %d-bit\r\n",
679 		    un.sysname, un.release, un.version, bits);
680 		console(B_FALSE,
681 		    "Copyright 1983-2005 Sun Microsystems, Inc. "
682 		    " All rights reserved.\r\n");
683 		console(B_FALSE,
684 		    "Use is subject to license terms.\r\n");
685 	}
686 
687 	/*
688 	 * Get the ioctl settings for /dev/syscon from /etc/ioctl.syscon
689 	 * so that it can be brought up in the state it was in when the
690 	 * system went down; or set to defaults if ioctl.syscon isn't
691 	 * valid.
692 	 *
693 	 * This needs to be done even if we're restarting so reset_modes()
694 	 * will work in case we need to go down to single user mode.
695 	 */
696 	write_ioctl = get_ioctl_syscon();
697 
698 	/*
699 	 * Set up all signals to be caught or ignored as appropriate.
700 	 */
701 	init_signals();
702 
703 	/* Load glob_envp from ENVFILE. */
704 	init_env();
705 
706 	contracts_init();
707 
708 	if (!booting) {
709 		/* cur_state should have been read in. */
710 
711 		op_modes = NORMAL_MODES;
712 
713 		/* Rewrite the ioctl file if it was bad. */
714 		if (write_ioctl)
715 			write_ioctl_syscon();
716 	} else {
717 		/*
718 		 * It's fine to boot up with state as zero, because
719 		 * startd will later tell us the real state.
720 		 */
721 		cur_state = 0;
722 		op_modes = BOOT_MODES;
723 
724 		boot_init();
725 	}
726 
727 	prev_state = prior_state = cur_state;
728 
729 	/*
730 	 * Here is the beginning of the main process loop.
731 	 */
732 	for (;;) {
733 		if (Pfd < 0)
734 			setup_pipe();
735 
736 		/*
737 		 * Clean up any accounting records for dead "godchildren".
738 		 */
739 		if (Gchild)
740 			cleanaux();
741 
742 		/*
743 		 * If in "normal" mode, check all living processes and initiate
744 		 * kill sequence on those that should not be there anymore.
745 		 */
746 		if (op_modes == NORMAL_MODES && cur_state != LVLa &&
747 		    cur_state != LVLb && cur_state != LVLc)
748 			remv();
749 
750 		/*
751 		 * If a change in run levels is the reason we awoke, now do
752 		 * the accounting to report the change in the utmp file.
753 		 * Also report the change on the system console.
754 		 */
755 		if (chg_lvl_flag) {
756 			chg_lvl_flag = FALSE;
757 
758 			if (state_to_flags(cur_state) & LSEL_RUNLEVEL) {
759 				char rl = state_to_name(cur_state);
760 
761 				if (rl != -1)
762 					lscf_set_runlevel(rl);
763 			}
764 
765 			may_need_audit = 1;
766 		}
767 
768 		/*
769 		 * Scan the inittab file and spawn and respawn processes that
770 		 * should be alive in the current state. If inittab does not
771 		 * exist default to  single user mode.
772 		 */
773 		if (spawn_processes() == FAILURE) {
774 			prior_state = prev_state;
775 			cur_state = SINGLE_USER;
776 		}
777 
778 		/* If any respawns occurred, take note. */
779 		if (rsflag) {
780 			rsflag = 0;
781 			spawncnt++;
782 		}
783 
784 		/*
785 		 * If a powerfail signal was received during the last
786 		 * sequence, set mode to powerfail.  When spawn_processes() is
787 		 * entered the first thing it does is to check "powerhit".  If
788 		 * it is in PF_MODES then it clears "powerhit" and does
789 		 * a powerfail sequence.  If it is not in PF_MODES, then it
790 		 * puts itself in PF_MODES and then clears "powerhit".  Should
791 		 * "powerhit" get set again while spawn_processes() is working
792 		 * on a powerfail sequence, the following code  will see that
793 		 * spawn_processes() tries to execute the powerfail sequence
794 		 * again.  This guarantees that the powerfail sequence will be
795 		 * successfully completed before further processing takes
796 		 * place.
797 		 */
798 		if (wakeup.w_flags.w_powerhit) {
799 			op_modes = PF_MODES;
800 			/*
801 			 * Make sure that cur_state != prev_state so that
802 			 * ONCE and WAIT types work.
803 			 */
804 			prev_state = 0;
805 		} else if (op_modes != NORMAL_MODES) {
806 			/*
807 			 * If spawn_processes() was not just called while in
808 			 * normal mode, we set the mode to normal and it will
809 			 * be called again to check normal modes.  If we have
810 			 * just finished a powerfail sequence with prev_state
811 			 * equal to zero, we set prev_state equal to cur_state
812 			 * before the next pass through.
813 			 */
814 			if (op_modes == PF_MODES)
815 				prev_state = cur_state;
816 			op_modes = NORMAL_MODES;
817 		} else if (cur_state == LVLa || cur_state == LVLb ||
818 		    cur_state == LVLc) {
819 			/*
820 			 * If it was a change of levels that awakened us and the
821 			 * new level is one of the demand levels then reset
822 			 * cur_state to the previous state and do another scan
823 			 * to take care of the usual respawn actions.
824 			 */
825 			cur_state = prior_state;
826 			prior_state = prev_state;
827 			prev_state = cur_state;
828 		} else {
829 			prev_state = cur_state;
830 
831 			if (wakeup.w_mask == 0) {
832 				int ret;
833 
834 				if (may_need_audit && (cur_state == LVL3)) {
835 					msg = audit_boot_msg();
836 
837 					may_need_audit = 0;
838 					(void) audit_put_record(ADT_SUCCESS,
839 					    ADT_SUCCESS, msg);
840 					free(msg);
841 				}
842 
843 				/*
844 				 * "init" is finished with all actions for
845 				 * the current wakeup.
846 				 */
847 				ret = poll(poll_fds, poll_nfds,
848 				    SLEEPTIME * MILLISEC);
849 				pausecnt++;
850 				if (ret > 0)
851 					contract_event(&poll_fds[0]);
852 				else if (ret < 0 && errno != EINTR)
853 					console(B_TRUE, "poll() error: %s\n",
854 					    strerror(errno));
855 			}
856 
857 			if (wakeup.w_flags.w_usersignal) {
858 				/*
859 				 * Install the new level.  This could be a real
860 				 * change in levels  or a telinit [Q|a|b|c] or
861 				 * just a telinit to the same level at which
862 				 * we are running.
863 				 */
864 				if (new_state != cur_state) {
865 					if (new_state == LVLa ||
866 					    new_state == LVLb ||
867 					    new_state == LVLc) {
868 						prev_state = prior_state;
869 						prior_state = cur_state;
870 						cur_state = new_state;
871 					} else {
872 						prev_state = cur_state;
873 						if (cur_state >= 0)
874 							prior_state = cur_state;
875 						cur_state = new_state;
876 						chg_lvl_flag = TRUE;
877 					}
878 				}
879 
880 				new_state = 0;
881 			}
882 
883 			if (wakeup.w_flags.w_powerhit)
884 				op_modes = PF_MODES;
885 
886 			/*
887 			 * Clear all wakeup reasons.
888 			 */
889 			wakeup.w_mask = 0;
890 		}
891 	}
892 
893 	/*NOTREACHED*/
894 }
895 
896 static void
897 update_boot_archive(int new_state)
898 {
899 	if (new_state != LVL0 && new_state != LVL5 && new_state != LVL6)
900 		return;
901 
902 	if (getzoneid() != GLOBAL_ZONEID)
903 		return;
904 
905 	(void) system("/sbin/bootadm -a update_all");
906 }
907 
908 /*
909  * void enter_maintenance()
910  *   A simple invocation of sulogin(1M), with no baggage, in the case that we
911  *   are unable to activate svc.startd(1M).  We fork; the child runs sulogin;
912  *   we wait for it to exit.
913  */
914 static void
915 enter_maintenance()
916 {
917 	struct PROC_TABLE	*su_process;
918 
919 	console(B_FALSE, "Requesting maintenance mode\n"
920 	    "(See /lib/svc/share/README for additional information.)\n");
921 	(void) sigset(SIGCLD, SIG_DFL);
922 	while ((su_process = efork(M_OFF, NULLPROC, NOCLEANUP)) == NO_ROOM)
923 		(void) pause();
924 	(void) sigset(SIGCLD, childeath);
925 	if (su_process == NULLPROC) {
926 		int fd;
927 
928 		(void) fclose(stdin);
929 		(void) fclose(stdout);
930 		(void) fclose(stderr);
931 		closefrom(0);
932 
933 		fd = open(SYSCON, O_RDWR | O_NOCTTY);
934 		if (fd >= 0) {
935 			(void) dup2(fd, 1);
936 			(void) dup2(fd, 2);
937 		} else {
938 			/*
939 			 * Need to issue an error message somewhere.
940 			 */
941 			syslog(LOG_CRIT, "init[%d]: cannot open %s; %s\n",
942 			    getpid(), SYSCON, strerror(errno));
943 		}
944 
945 		/*
946 		 * Execute the "su" program.
947 		 */
948 		(void) execle(SU, SU, "-", (char *)0, glob_envp);
949 		console(B_TRUE, "execle of %s failed: %s\n", SU,
950 		    strerror(errno));
951 		timer(5);
952 		exit(1);
953 	}
954 
955 	/*
956 	 * If we are the parent, wait around for the child to die
957 	 * or for "init" to be signaled to change levels.
958 	 */
959 	while (waitproc(su_process) == FAILURE) {
960 		/*
961 		 * All other reasons for waking are ignored when in
962 		 * single-user mode.  The only child we are interested
963 		 * in is being waited for explicitly by waitproc().
964 		 */
965 		wakeup.w_mask = 0;
966 	}
967 }
968 
969 /*
970  * remv() scans through "proc_table" and performs cleanup.  If
971  * there is a process in the table, which shouldn't be here at
972  * the current run level, then remv() kills the process.
973  */
974 static void
975 remv()
976 {
977 	struct PROC_TABLE	*process;
978 	struct CMD_LINE		cmd;
979 	char			cmd_string[MAXCMDL];
980 	int			change_level;
981 
982 	change_level = (cur_state != prev_state ? TRUE : FALSE);
983 
984 	/*
985 	 * Clear the TOUCHED flag on all entries so that when we have
986 	 * finished scanning inittab, we will be able to tell if we
987 	 * have any processes for which there is no entry in inittab.
988 	 */
989 	for (process = proc_table;
990 	    (process < proc_table + num_proc); process++) {
991 		process->p_flags &= ~TOUCHED;
992 	}
993 
994 	/*
995 	 * Scan all inittab entries.
996 	 */
997 	while (getcmd(&cmd, &cmd_string[0]) == TRUE) {
998 		/* Scan for process which goes with this entry in inittab. */
999 		for (process = proc_table;
1000 		    (process < proc_table + num_proc); process++) {
1001 			if ((process->p_flags & OCCUPIED) == 0 ||
1002 			    !id_eq(process->p_id, cmd.c_id))
1003 				continue;
1004 
1005 			/*
1006 			 * This slot contains the process we are looking for.
1007 			 */
1008 
1009 			/*
1010 			 * Is the cur_state SINGLE_USER or is this process
1011 			 * marked as "off" or was this proc started by some
1012 			 * mechanism other than LVL{a|b|c} and the current level
1013 			 * does not support this process?
1014 			 */
1015 			if (cur_state == SINGLE_USER ||
1016 			    cmd.c_action == M_OFF ||
1017 			    ((cmd.c_levels & state_to_mask(cur_state)) == 0 &&
1018 			    (process->p_flags & DEMANDREQUEST) == 0)) {
1019 				if (process->p_flags & LIVING) {
1020 					/*
1021 					 * Touch this entry so we know we have
1022 					 * treated it.  Note that procs which
1023 					 * are already dead at this point and
1024 					 * should not be restarted are left
1025 					 * untouched.  This causes their slot to
1026 					 * be freed later after dead accounting
1027 					 * is done.
1028 					 */
1029 					process->p_flags |= TOUCHED;
1030 
1031 					if ((process->p_flags & KILLED) == 0) {
1032 						if (change_level) {
1033 							process->p_flags
1034 							    |= WARNED;
1035 							(void) kill(
1036 							    process->p_pid,
1037 							    SIGTERM);
1038 						} else {
1039 							/*
1040 							 * Fork a killing proc
1041 							 * so "init" can
1042 							 * continue without
1043 							 * having to pause for
1044 							 * TWARN seconds.
1045 							 */
1046 							killproc(
1047 							    process->p_pid);
1048 						}
1049 						process->p_flags |= KILLED;
1050 					}
1051 				}
1052 			} else {
1053 				/*
1054 				 * Process can exist at current level.  If it is
1055 				 * still alive or a DEMANDREQUEST we touch it so
1056 				 * it will be left alone.  Otherwise we leave it
1057 				 * untouched so it will be accounted for and
1058 				 * cleaned up later in remv().  Dead
1059 				 * DEMANDREQUESTs will be accounted but not
1060 				 * freed.
1061 				 */
1062 				if (process->p_flags &
1063 				    (LIVING|NOCLEANUP|DEMANDREQUEST))
1064 					process->p_flags |= TOUCHED;
1065 			}
1066 
1067 			break;
1068 		}
1069 	}
1070 
1071 	st_write();
1072 
1073 	/*
1074 	 * If this was a change of levels call, scan through the
1075 	 * process table for processes that were warned to die.  If any
1076 	 * are found that haven't left yet, sleep for TWARN seconds and
1077 	 * then send final terminations to any that haven't died yet.
1078 	 */
1079 	if (change_level) {
1080 
1081 		/*
1082 		 * Set the alarm for TWARN seconds on the assumption
1083 		 * that there will be some that need to be waited for.
1084 		 * This won't harm anything except we are guaranteed to
1085 		 * wakeup in TWARN seconds whether we need to or not.
1086 		 */
1087 		setimer(TWARN);
1088 
1089 		/*
1090 		 * Scan for processes which should be dying.  We hope they
1091 		 * will die without having to be sent a SIGKILL signal.
1092 		 */
1093 		for (process = proc_table;
1094 		    (process < proc_table + num_proc); process++) {
1095 			/*
1096 			 * If this process should die, hasn't yet, and the
1097 			 * TWARN time hasn't expired yet, wait for process
1098 			 * to die or for timer to expire.
1099 			 */
1100 			while (time_up == FALSE &&
1101 			    (process->p_flags & (WARNED|LIVING|OCCUPIED)) ==
1102 			    (WARNED|LIVING|OCCUPIED))
1103 				(void) pause();
1104 
1105 			if (time_up == TRUE)
1106 				break;
1107 		}
1108 
1109 		/*
1110 		 * If we reached the end of the table without the timer
1111 		 * expiring, then there are no procs which will have to be
1112 		 * sent the SIGKILL signal.  If the timer has expired, then
1113 		 * it is necessary to scan the table again and send signals
1114 		 * to all processes which aren't going away nicely.
1115 		 */
1116 		if (time_up == TRUE) {
1117 			for (process = proc_table;
1118 			    (process < proc_table + num_proc); process++) {
1119 				if ((process->p_flags &
1120 				    (WARNED|LIVING|OCCUPIED)) ==
1121 				    (WARNED|LIVING|OCCUPIED))
1122 					(void) kill(process->p_pid, SIGKILL);
1123 			}
1124 		}
1125 		setimer(0);
1126 	}
1127 
1128 	/*
1129 	 * Rescan the proc_table for two kinds of entry, those marked LIVING,
1130 	 * NAMED, which don't have an entry in inittab (haven't been TOUCHED
1131 	 * by the above scanning), and haven't been sent kill signals, and
1132 	 * those entries marked not LIVING, NAMED.  The former procs are killed.
1133 	 * The latter have DEAD_PROCESS accounting done and the slot cleared.
1134 	 */
1135 	for (process = proc_table;
1136 	    (process < proc_table + num_proc); process++) {
1137 		if ((process->p_flags & (LIVING|NAMED|TOUCHED|KILLED|OCCUPIED))
1138 		    == (LIVING|NAMED|OCCUPIED)) {
1139 			killproc(process->p_pid);
1140 			process->p_flags |= KILLED;
1141 		} else if ((process->p_flags & (LIVING|NAMED|OCCUPIED)) ==
1142 		    (NAMED|OCCUPIED)) {
1143 			(void) account(DEAD_PROCESS, process, NULL);
1144 			/*
1145 			 * If this named proc hasn't been TOUCHED, then free the
1146 			 * space. It has either died of it's own accord, but
1147 			 * isn't respawnable or it was killed because it
1148 			 * shouldn't exist at this level.
1149 			 */
1150 			if ((process->p_flags & TOUCHED) == 0)
1151 				process->p_flags = 0;
1152 		}
1153 	}
1154 
1155 	st_write();
1156 }
1157 
1158 /*
1159  * Extract the svc.startd command line and whether to restart it from its
1160  * inittab entry.
1161  */
1162 /*ARGSUSED*/
1163 static void
1164 process_startd_line(struct CMD_LINE *cmd, char *cmd_string)
1165 {
1166 	size_t sz;
1167 
1168 	/* Save the command line. */
1169 	if (sflg || rflg) {
1170 		/* Also append -r or -s. */
1171 		(void) strlcpy(startd_cline, cmd_string, sizeof (startd_cline));
1172 		(void) strlcat(startd_cline, " -", sizeof (startd_cline));
1173 		if (sflg)
1174 			sz = strlcat(startd_cline, "s", sizeof (startd_cline));
1175 		if (rflg)
1176 			sz = strlcat(startd_cline, "r", sizeof (startd_cline));
1177 	} else {
1178 		sz = strlcpy(startd_cline, cmd_string, sizeof (startd_cline));
1179 	}
1180 
1181 	if (sz >= sizeof (startd_cline)) {
1182 		console(B_TRUE,
1183 		    "svc.startd command line too long.  Ignoring.\n");
1184 		startd_cline[0] = '\0';
1185 		return;
1186 	}
1187 }
1188 
1189 /*
1190  * spawn_processes() scans inittab for entries which should be run at this
1191  * mode.  Processes which should be running but are not, are started.
1192  */
1193 static int
1194 spawn_processes()
1195 {
1196 	struct PROC_TABLE		*pp;
1197 	struct CMD_LINE			cmd;
1198 	char				cmd_string[MAXCMDL];
1199 	short				lvl_mask;
1200 	int				status;
1201 
1202 	/*
1203 	 * First check the "powerhit" flag.  If it is set, make sure the modes
1204 	 * are PF_MODES and clear the "powerhit" flag.  Avoid the possible race
1205 	 * on the "powerhit" flag by disallowing a new powerfail interrupt
1206 	 * between the test of the powerhit flag and the clearing of it.
1207 	 */
1208 	if (wakeup.w_flags.w_powerhit) {
1209 		wakeup.w_flags.w_powerhit = 0;
1210 		op_modes = PF_MODES;
1211 	}
1212 	lvl_mask = state_to_mask(cur_state);
1213 
1214 	/*
1215 	 * Scan through all the entries in inittab.
1216 	 */
1217 	while ((status = getcmd(&cmd, &cmd_string[0])) == TRUE) {
1218 		if (id_eq(cmd.c_id, "smf")) {
1219 			process_startd_line(&cmd, cmd_string);
1220 			continue;
1221 		}
1222 
1223 retry_for_proc_slot:
1224 
1225 		/*
1226 		 * Find out if there is a process slot for this entry already.
1227 		 */
1228 		if ((pp = findpslot(&cmd)) == NULLPROC) {
1229 			/*
1230 			 * we've run out of proc table entries
1231 			 * increase proc_table.
1232 			 */
1233 			increase_proc_table_size();
1234 
1235 			/*
1236 			 * Retry now as we have an empty proc slot.
1237 			 * In case increase_proc_table_size() fails,
1238 			 * we will keep retrying.
1239 			 */
1240 			goto retry_for_proc_slot;
1241 		}
1242 
1243 		/*
1244 		 * If there is an entry, and it is marked as DEMANDREQUEST,
1245 		 * one of the levels a, b, or c is in its levels mask, and
1246 		 * the action field is ONDEMAND and ONDEMAND is a permissable
1247 		 * mode, and the process is dead, then respawn it.
1248 		 */
1249 		if (((pp->p_flags & (LIVING|DEMANDREQUEST)) == DEMANDREQUEST) &&
1250 		    (cmd.c_levels & MASK_abc) &&
1251 		    (cmd.c_action & op_modes) == M_ONDEMAND) {
1252 			spawn(pp, &cmd);
1253 			continue;
1254 		}
1255 
1256 		/*
1257 		 * If the action is not an action we are interested in,
1258 		 * skip the entry.
1259 		 */
1260 		if ((cmd.c_action & op_modes) == 0 || pp->p_flags & LIVING ||
1261 		    (cmd.c_levels & lvl_mask) == 0)
1262 			continue;
1263 
1264 		/*
1265 		 * If the modes are the normal modes (ONCE, WAIT, RESPAWN, OFF,
1266 		 * ONDEMAND) and the action field is either OFF or the action
1267 		 * field is ONCE or WAIT and the current level is the same as
1268 		 * the last level, then skip this entry.  ONCE and WAIT only
1269 		 * get run when the level changes.
1270 		 */
1271 		if (op_modes == NORMAL_MODES &&
1272 		    (cmd.c_action == M_OFF ||
1273 			(cmd.c_action & (M_ONCE|M_WAIT)) &&
1274 			cur_state == prev_state))
1275 			continue;
1276 
1277 		/*
1278 		 * At this point we are interested in performing the action for
1279 		 * this entry.  Actions fall into two categories, spinning off
1280 		 * a process and not waiting, and spinning off a process and
1281 		 * waiting for it to die.  If the action is ONCE, RESPAWN,
1282 		 * ONDEMAND, POWERFAIL, or BOOT we don't wait for the process
1283 		 * to die, for all other actions we do wait.
1284 		 */
1285 		if (cmd.c_action & (M_ONCE | M_RESPAWN | M_PF | M_BOOT)) {
1286 			spawn(pp, &cmd);
1287 
1288 		} else {
1289 			spawn(pp, &cmd);
1290 			while (waitproc(pp) == FAILURE);
1291 			(void) account(DEAD_PROCESS, pp, NULL);
1292 			pp->p_flags = 0;
1293 		}
1294 	}
1295 	return (status);
1296 }
1297 
1298 /*
1299  * spawn() spawns a shell, inserts the information about the process
1300  * process into the proc_table, and does the startup accounting.
1301  */
1302 static void
1303 spawn(struct PROC_TABLE *process, struct CMD_LINE *cmd)
1304 {
1305 	int		i;
1306 	int		modes, maxfiles;
1307 	time_t		now;
1308 	struct PROC_TABLE tmproc, *oprocess;
1309 
1310 	/*
1311 	 * The modes to be sent to efork() are 0 unless we are
1312 	 * spawning a LVLa, LVLb, or LVLc entry or we will be
1313 	 * waiting for the death of the child before continuing.
1314 	 */
1315 	modes = NAMED;
1316 	if (process->p_flags & DEMANDREQUEST || cur_state == LVLa ||
1317 	    cur_state == LVLb || cur_state == LVLc)
1318 		modes |= DEMANDREQUEST;
1319 	if ((cmd->c_action & (M_SYSINIT | M_WAIT | M_BOOTWAIT | M_PWAIT)) != 0)
1320 		modes |= NOCLEANUP;
1321 
1322 	/*
1323 	 * If this is a respawnable process, check the threshold
1324 	 * information to avoid excessive respawns.
1325 	 */
1326 	if (cmd->c_action & M_RESPAWN) {
1327 		/*
1328 		 * Add NOCLEANUP to all respawnable commands so that the
1329 		 * information about the frequency of respawns isn't lost.
1330 		 */
1331 		modes |= NOCLEANUP;
1332 		(void) time(&now);
1333 
1334 		/*
1335 		 * If no time is assigned, then this is the first time
1336 		 * this command is being processed in this series.  Assign
1337 		 * the current time.
1338 		 */
1339 		if (process->p_time == 0L)
1340 			process->p_time = now;
1341 
1342 		if (process->p_count++ == SPAWN_LIMIT) {
1343 
1344 			if ((now - process->p_time) < SPAWN_INTERVAL) {
1345 				/*
1346 				 * Process is respawning too rapidly.  Print
1347 				 * message and refuse to respawn it for now.
1348 				 */
1349 				console(B_TRUE, "Command is respawning too "
1350 				    "rapidly. Check for possible errors.\n"
1351 				    "id:%4s \"%s\"\n",
1352 				    &cmd->c_id[0], &cmd->c_command[EXEC]);
1353 				return;
1354 			}
1355 			process->p_time = now;
1356 			process->p_count = 0;
1357 
1358 		} else if (process->p_count > SPAWN_LIMIT) {
1359 			/*
1360 			 * If process has been respawning too rapidly and
1361 			 * the inhibit time limit hasn't expired yet, we
1362 			 * refuse to respawn.
1363 			 */
1364 			if (now - process->p_time < SPAWN_INTERVAL + INHIBIT)
1365 				return;
1366 			process->p_time = now;
1367 			process->p_count = 0;
1368 		}
1369 		rsflag = TRUE;
1370 	}
1371 
1372 	/*
1373 	 * Spawn a child process to execute this command.
1374 	 */
1375 	(void) sigset(SIGCLD, SIG_DFL);
1376 	oprocess = process;
1377 	while ((process = efork(cmd->c_action, oprocess, modes)) == NO_ROOM)
1378 		(void) pause();
1379 
1380 	if (process == NULLPROC) {
1381 
1382 		/*
1383 		 * We are the child.  We must make sure we get a different
1384 		 * file pointer for our references to utmpx.  Otherwise our
1385 		 * seeks and reads will compete with those of the parent.
1386 		 */
1387 		endutxent();
1388 
1389 		/*
1390 		 * Perform the accounting for the beginning of a process.
1391 		 * Note that all processes are initially "INIT_PROCESS"es.
1392 		 */
1393 		tmproc.p_id[0] = cmd->c_id[0];
1394 		tmproc.p_id[1] = cmd->c_id[1];
1395 		tmproc.p_id[2] = cmd->c_id[2];
1396 		tmproc.p_id[3] = cmd->c_id[3];
1397 		tmproc.p_pid = getpid();
1398 		tmproc.p_exit = 0;
1399 		(void) account(INIT_PROCESS, &tmproc,
1400 		    prog_name(&cmd->c_command[EXEC]));
1401 		maxfiles = ulimit(UL_GDESLIM, 0);
1402 		for (i = 0; i < maxfiles; i++)
1403 			(void) fcntl(i, F_SETFD, FD_CLOEXEC);
1404 
1405 		/*
1406 		 * Now exec a shell with the -c option and the command
1407 		 * from inittab.
1408 		 */
1409 		(void) execle(SH, "INITSH", "-c", cmd->c_command, (char *)0,
1410 		    glob_envp);
1411 		console(B_TRUE, "Command\n\"%s\"\n failed to execute.  errno "
1412 		    "= %d (exec of shell failed)\n", cmd->c_command, errno);
1413 
1414 		/*
1415 		 * Don't come back so quickly that "init" doesn't have a
1416 		 * chance to finish putting this child in "proc_table".
1417 		 */
1418 		timer(20);
1419 		exit(1);
1420 
1421 	}
1422 
1423 	/*
1424 	 * We are the parent.  Insert the necessary
1425 	 * information in the proc_table.
1426 	 */
1427 	process->p_id[0] = cmd->c_id[0];
1428 	process->p_id[1] = cmd->c_id[1];
1429 	process->p_id[2] = cmd->c_id[2];
1430 	process->p_id[3] = cmd->c_id[3];
1431 
1432 	st_write();
1433 
1434 	(void) sigset(SIGCLD, childeath);
1435 }
1436 
1437 /*
1438  * findpslot() finds the old slot in the process table for the
1439  * command with the same id, or it finds an empty slot.
1440  */
1441 static struct PROC_TABLE *
1442 findpslot(struct CMD_LINE *cmd)
1443 {
1444 	struct PROC_TABLE	*process;
1445 	struct PROC_TABLE	*empty = NULLPROC;
1446 
1447 	for (process = proc_table;
1448 	    (process < proc_table + num_proc); process++) {
1449 		if (process->p_flags & OCCUPIED &&
1450 		    id_eq(process->p_id, cmd->c_id))
1451 			break;
1452 
1453 		/*
1454 		 * If the entry is totally empty and "empty" is still 0,
1455 		 * remember where this hole is and make sure the slot is
1456 		 * zeroed out.
1457 		 */
1458 		if (empty == NULLPROC && (process->p_flags & OCCUPIED) == 0) {
1459 			empty = process;
1460 			process->p_id[0] = '\0';
1461 			process->p_id[1] = '\0';
1462 			process->p_id[2] = '\0';
1463 			process->p_id[3] = '\0';
1464 			process->p_pid = 0;
1465 			process->p_time = 0L;
1466 			process->p_count = 0;
1467 			process->p_flags = 0;
1468 			process->p_exit = 0;
1469 		}
1470 	}
1471 
1472 	/*
1473 	 * If there is no entry for this slot, then there should be an
1474 	 * empty slot.  If there is no empty slot, then we've run out
1475 	 * of proc_table space.  If the latter is true, empty will be
1476 	 * NULL and the caller will have to complain.
1477 	 */
1478 	if (process == (proc_table + num_proc))
1479 		process = empty;
1480 
1481 	return (process);
1482 }
1483 
1484 /*
1485  * getcmd() parses lines from inittab.  Each time it finds a command line
1486  * it will return TRUE as well as fill the passed CMD_LINE structure and
1487  * the shell command string.  When the end of inittab is reached, FALSE
1488  * is returned inittab is automatically opened if it is not currently open
1489  * and is closed when the end of the file is reached.
1490  */
1491 static FILE *fp_inittab = NULL;
1492 
1493 static int
1494 getcmd(struct CMD_LINE *cmd, char *shcmd)
1495 {
1496 	char	*ptr;
1497 	int	c, lastc, state;
1498 	char 	*ptr1;
1499 	int	answer, i, proceed;
1500 	struct	stat	sbuf;
1501 	static char *actions[] = {
1502 		"off", "respawn", "ondemand", "once", "wait", "boot",
1503 		"bootwait", "powerfail", "powerwait", "initdefault",
1504 		"sysinit",
1505 	};
1506 	static short act_masks[] = {
1507 		M_OFF, M_RESPAWN, M_ONDEMAND, M_ONCE, M_WAIT, M_BOOT,
1508 		M_BOOTWAIT, M_PF, M_PWAIT, M_INITDEFAULT, M_SYSINIT,
1509 	};
1510 	/*
1511 	 * Only these actions will be allowed for entries which
1512 	 * are specified for single-user mode.
1513 	 */
1514 	short su_acts = M_INITDEFAULT | M_PF | M_PWAIT | M_WAIT;
1515 
1516 	if (fp_inittab == NULL) {
1517 		/*
1518 		 * Before attempting to open inittab we stat it to make
1519 		 * sure it currently exists and is not empty.  We try
1520 		 * several times because someone may have temporarily
1521 		 * unlinked or truncated the file.
1522 		 */
1523 		for (i = 0; i < 3; i++) {
1524 			if (stat(INITTAB, &sbuf) == -1) {
1525 				if (i == 2) {
1526 					console(B_TRUE,
1527 					    "Cannot stat %s, errno: %d\n",
1528 					    INITTAB, errno);
1529 					return (FAILURE);
1530 				} else {
1531 					timer(3);
1532 				}
1533 			} else if (sbuf.st_size < 10) {
1534 				if (i == 2) {
1535 					console(B_TRUE,
1536 					    "%s truncated or corrupted\n",
1537 					    INITTAB);
1538 					return (FAILURE);
1539 				} else {
1540 					timer(3);
1541 				}
1542 			} else {
1543 				break;
1544 			}
1545 		}
1546 
1547 		/*
1548 		 * If unable to open inittab, print error message and
1549 		 * return FAILURE to caller.
1550 		 */
1551 		if ((fp_inittab = fopen(INITTAB, "r")) == NULL) {
1552 			console(B_TRUE, "Cannot open %s errno: %d\n", INITTAB,
1553 			    errno);
1554 			return (FAILURE);
1555 		}
1556 	}
1557 
1558 	/*
1559 	 * Keep getting commands from inittab until you find a
1560 	 * good one or run out of file.
1561 	 */
1562 	for (answer = FALSE; answer == FALSE; ) {
1563 		/*
1564 		 * Zero out the cmd itself before trying next line.
1565 		 */
1566 		bzero(cmd, sizeof (struct CMD_LINE));
1567 
1568 		/*
1569 		 * Read in lines of inittab, parsing at colons, until a line is
1570 		 * read in which doesn't end with a backslash.  Do not start if
1571 		 * the first character read is an EOF.  Note that this means
1572 		 * that lines which don't end in a newline are still processed,
1573 		 * since the "for" will terminate normally once started,
1574 		 * regardless of whether line terminates with a newline or EOF.
1575 		 */
1576 		state = FAILURE;
1577 		if ((c = fgetc(fp_inittab)) == EOF) {
1578 			answer = FALSE;
1579 			(void) fclose(fp_inittab);
1580 			fp_inittab = NULL;
1581 			break;
1582 		}
1583 
1584 		for (proceed = TRUE, ptr = shcmd, state = ID, lastc = '\0';
1585 		    proceed && c != EOF;
1586 		    lastc = c, c = fgetc(fp_inittab)) {
1587 		    /* If we're not in the FAILURE state and haven't	*/
1588 		    /* yet reached the shell command field, process	*/
1589 		    /* the line, otherwise just look for a real end	*/
1590 		    /* of line.						*/
1591 		    if (state != FAILURE && state != COMMAND) {
1592 			/*
1593 			 * Squeeze out spaces and tabs.
1594 			 */
1595 			if (c == ' ' || c == '\t')
1596 				continue;
1597 
1598 			/*
1599 			 * Ignore characters in a comment, except for the \n.
1600 			 */
1601 			if (state == COMMENT) {
1602 				if (c == '\n') {
1603 					lastc = ' ';
1604 					break;
1605 				} else {
1606 					continue;
1607 				}
1608 			}
1609 
1610 			/*
1611 			 * Detect comments (lines whose first non-whitespace
1612 			 * character is '#') by checking that we're at the
1613 			 * beginning of a line, have seen a '#', and haven't
1614 			 * yet accumulated any characters.
1615 			 */
1616 			if (state == ID && c == '#' && ptr == shcmd) {
1617 				state = COMMENT;
1618 				continue;
1619 			}
1620 
1621 			/*
1622 			 * If the character is a ':', then check the
1623 			 * previous field for correctness and advance
1624 			 * to the next field.
1625 			 */
1626 			if (c == ':') {
1627 			    switch (state) {
1628 
1629 			    case ID :
1630 				/*
1631 				 * Check to see that there are only
1632 				 * 1 to 4 characters for the id.
1633 				 */
1634 				if ((i = ptr - shcmd) < 1 || i > 4) {
1635 					state = FAILURE;
1636 				} else {
1637 					bcopy(shcmd, &cmd->c_id[0], i);
1638 					ptr = shcmd;
1639 					state = LEVELS;
1640 				}
1641 				break;
1642 
1643 			    case LEVELS :
1644 				/*
1645 				 * Build a mask for all the levels for
1646 				 * which this command will be legal.
1647 				 */
1648 				for (cmd->c_levels = 0, ptr1 = shcmd;
1649 				    ptr1 < ptr; ptr1++) {
1650 					int mask;
1651 					if (lvlname_to_mask(*ptr1,
1652 					    &mask) == -1) {
1653 						state = FAILURE;
1654 						break;
1655 					}
1656 					cmd->c_levels |= mask;
1657 				}
1658 				if (state != FAILURE) {
1659 					state = ACTION;
1660 					ptr = shcmd;	/* Reset the buffer */
1661 				}
1662 				break;
1663 
1664 			    case ACTION :
1665 				/*
1666 				 * Null terminate the string in shcmd buffer and
1667 				 * then try to match against legal actions.  If
1668 				 * the field is of length 0, then the default of
1669 				 * "RESPAWN" is used if the id is numeric,
1670 				 * otherwise the default is "OFF".
1671 				 */
1672 				if (ptr == shcmd) {
1673 					if (isdigit(cmd->c_id[0]) &&
1674 					    (cmd->c_id[1] == '\0' ||
1675 						isdigit(cmd->c_id[1])) &&
1676 					    (cmd->c_id[2] == '\0' ||
1677 						isdigit(cmd->c_id[2])) &&
1678 					    (cmd->c_id[3] == '\0' ||
1679 						isdigit(cmd->c_id[3])))
1680 						    cmd->c_action = M_RESPAWN;
1681 					else
1682 						    cmd->c_action = M_OFF;
1683 				} else {
1684 				    for (cmd->c_action = 0, i = 0, *ptr = '\0';
1685 				    i < sizeof (actions)/sizeof (char *);
1686 				    i++) {
1687 					if (strcmp(shcmd, actions[i]) == 0) {
1688 					    if ((cmd->c_levels & MASKSU) &&
1689 						!(act_masks[i] & su_acts))
1690 						    cmd->c_action = 0;
1691 					    else
1692 						cmd->c_action = act_masks[i];
1693 					    break;
1694 					}
1695 				    }
1696 				}
1697 
1698 				/*
1699 				 * If the action didn't match any legal action,
1700 				 * set state to FAILURE.
1701 				 */
1702 				if (cmd->c_action == 0) {
1703 					state = FAILURE;
1704 				} else {
1705 					state = COMMAND;
1706 					(void) strcpy(shcmd, "exec ");
1707 				}
1708 				ptr = shcmd + EXEC;
1709 				break;
1710 			    }
1711 			    continue;
1712 			}
1713 		    }
1714 
1715 		    /* If the character is a '\n', then this is the end of a */
1716 		    /* line.  If the '\n' wasn't preceded by a backslash, */
1717 		    /* it is also the end of an inittab command.  If it was */
1718 		    /* preceded by a backslash then the next line is a */
1719 		    /* continuation.  Note that the continuation '\n' falls */
1720 		    /* through and is treated like other characters and is */
1721 		    /* stored in the shell command line. */
1722 		    if (c == '\n' && lastc != '\\') {
1723 				proceed = FALSE;
1724 				*ptr = '\0';
1725 				break;
1726 		    }
1727 
1728 		    /* For all other characters just stuff them into the */
1729 		    /* command as long as there aren't too many of them. */
1730 		    /* Make sure there is room for a terminating '\0' also. */
1731 		    if (ptr >= shcmd + MAXCMDL - 1)
1732 			state = FAILURE;
1733 		    else
1734 			*ptr++ = (char)c;
1735 
1736 		    /* If the character we just stored was a quoted	*/
1737 		    /* backslash, then change "c" to '\0', so that this	*/
1738 		    /* backslash will not cause a subsequent '\n' to appear */
1739 		    /* quoted.  In otherwords '\' '\' '\n' is the real end */
1740 		    /* of a command, while '\' '\n' is a continuation. */
1741 		    if (c == '\\' && lastc == '\\')
1742 			c = '\0';
1743 		}
1744 
1745 		/*
1746 		 * Make sure all the fields are properly specified
1747 		 * for a good command line.
1748 		 */
1749 		if (state == COMMAND) {
1750 			answer = TRUE;
1751 			cmd->c_command = shcmd;
1752 
1753 			/*
1754 			 * If no default level was supplied, insert
1755 			 * all numerical levels.
1756 			 */
1757 			if (cmd->c_levels == 0)
1758 				cmd->c_levels = MASK_NUMERIC;
1759 
1760 			/*
1761 			 * If no action has been supplied, declare this
1762 			 * entry to be OFF.
1763 			 */
1764 			if (cmd->c_action == 0)
1765 				cmd->c_action = M_OFF;
1766 
1767 			/*
1768 			 * If no shell command has been supplied, make sure
1769 			 * there is a null string in the command field.
1770 			 */
1771 			if (ptr == shcmd + EXEC)
1772 				*shcmd = '\0';
1773 		} else
1774 			answer = FALSE;
1775 
1776 		/*
1777 		 * If we have reached the end of inittab, then close it
1778 		 * and quit trying to find a good command line.
1779 		 */
1780 		if (c == EOF) {
1781 			(void) fclose(fp_inittab);
1782 			fp_inittab = NULL;
1783 			break;
1784 		}
1785 	}
1786 	return (answer);
1787 }
1788 
1789 /*
1790  * lvlname_to_state(): convert the character name of a state to its level
1791  * (its corresponding signal number).
1792  */
1793 static int
1794 lvlname_to_state(char name)
1795 {
1796 	int i;
1797 	for (i = 0; i < LVL_NELEMS; i++) {
1798 		if (lvls[i].lvl_name == name)
1799 			return (lvls[i].lvl_state);
1800 	}
1801 	return (-1);
1802 }
1803 
1804 /*
1805  * state_to_name(): convert the level to the character name.
1806  */
1807 static char
1808 state_to_name(int state)
1809 {
1810 	int i;
1811 	for (i = 0; i < LVL_NELEMS; i++) {
1812 		if (lvls[i].lvl_state == state)
1813 			return (lvls[i].lvl_name);
1814 	}
1815 	return (-1);
1816 }
1817 
1818 /*
1819  * state_to_mask(): return the mask corresponding to a signal number
1820  */
1821 static int
1822 state_to_mask(int state)
1823 {
1824 	int i;
1825 	for (i = 0; i < LVL_NELEMS; i++) {
1826 		if (lvls[i].lvl_state == state)
1827 			return (lvls[i].lvl_mask);
1828 	}
1829 	return (0);	/* return 0, since that represents an empty mask */
1830 }
1831 
1832 /*
1833  * lvlname_to_mask(): return the mask corresponding to a levels character name
1834  */
1835 static int
1836 lvlname_to_mask(char name, int *mask)
1837 {
1838 	int i;
1839 	for (i = 0; i < LVL_NELEMS; i++) {
1840 		if (lvls[i].lvl_name == name) {
1841 			*mask = lvls[i].lvl_mask;
1842 			return (0);
1843 		}
1844 	}
1845 	return (-1);
1846 }
1847 
1848 /*
1849  * state_to_flags(): return the flags corresponding to a runlevel.  These
1850  * indicate properties of that runlevel.
1851  */
1852 static int
1853 state_to_flags(int state)
1854 {
1855 	int i;
1856 	for (i = 0; i < LVL_NELEMS; i++) {
1857 		if (lvls[i].lvl_state == state)
1858 			return (lvls[i].lvl_flags);
1859 	}
1860 	return (0);
1861 }
1862 
1863 /*
1864  * killproc() creates a child which kills the process specified by pid.
1865  */
1866 void
1867 killproc(pid_t pid)
1868 {
1869 	struct PROC_TABLE	*process;
1870 
1871 	(void) sigset(SIGCLD, SIG_DFL);
1872 	while ((process = efork(M_OFF, NULLPROC, 0)) == NO_ROOM)
1873 		(void) pause();
1874 	(void) sigset(SIGCLD, childeath);
1875 
1876 	if (process == NULLPROC) {
1877 		/*
1878 		 * efork() sets all signal handlers to the default, so reset
1879 		 * the ALRM handler to make timer() work as expected.
1880 		 */
1881 		(void) sigset(SIGALRM, alarmclk);
1882 
1883 		/*
1884 		 * We are the child.  Try to terminate the process nicely
1885 		 * first using SIGTERM and if it refuses to die in TWARN
1886 		 * seconds kill it with SIGKILL.
1887 		 */
1888 		(void) kill(pid, SIGTERM);
1889 		(void) timer(TWARN);
1890 		(void) kill(pid, SIGKILL);
1891 		(void) exit(0);
1892 	}
1893 }
1894 
1895 /*
1896  * Set up the default environment for all procs to be forked from init.
1897  * Read the values from the /etc/default/init file, except for PATH.  If
1898  * there's not enough room in the environment array, the environment
1899  * lines that don't fit are silently discarded.
1900  */
1901 void
1902 init_env()
1903 {
1904 	char	line[MAXCMDL];
1905 	FILE	*fp;
1906 	int	inquotes, length, wslength;
1907 	char	*tokp, *cp1, *cp2;
1908 
1909 	glob_envp[0] = malloc((unsigned)(strlen(DEF_PATH)+2));
1910 	(void) strcpy(glob_envp[0], DEF_PATH);
1911 	glob_envn = 1;
1912 
1913 	if (rflg) {
1914 		glob_envp[1] =
1915 			malloc((unsigned)(strlen("_DVFS_RECONFIG=YES")+2));
1916 		(void) strcpy(glob_envp[1], "_DVFS_RECONFIG=YES");
1917 		++glob_envn;
1918 	} else if (bflg == 1) {
1919 		glob_envp[1] =
1920 			malloc((unsigned)(strlen("RB_NOBOOTRC=YES")+2));
1921 		(void) strcpy(glob_envp[1], "RB_NOBOOTRC=YES");
1922 		++glob_envn;
1923 	}
1924 
1925 	if ((fp = fopen(ENVFILE, "r")) == NULL) {
1926 		console(B_TRUE,
1927 		    "Cannot open %s. Environment not initialized.\n",
1928 		    ENVFILE);
1929 	} else {
1930 		while (fgets(line, MAXCMDL - 1, fp) != NULL &&
1931 		    glob_envn < MAXENVENT - 2) {
1932 			/*
1933 			 * Toss newline
1934 			 */
1935 			length = strlen(line);
1936 			if (line[length - 1] == '\n')
1937 				line[length - 1] = '\0';
1938 
1939 			/*
1940 			 * Ignore blank or comment lines.
1941 			 */
1942 			if (line[0] == '#' || line[0] == '\0' ||
1943 			    (wslength = strspn(line, " \t\n")) ==
1944 			    strlen(line) ||
1945 			    strchr(line, '#') == line + wslength)
1946 				continue;
1947 
1948 			/*
1949 			 * First make a pass through the line and change
1950 			 * any non-quoted semi-colons to blanks so they
1951 			 * will be treated as token separators below.
1952 			 */
1953 			inquotes = 0;
1954 			for (cp1 = line; *cp1 != '\0'; cp1++) {
1955 				if (*cp1 == '"') {
1956 					if (inquotes == 0)
1957 						inquotes = 1;
1958 					else
1959 						inquotes = 0;
1960 				} else if (*cp1 == ';') {
1961 					if (inquotes == 0)
1962 						*cp1 = ' ';
1963 				}
1964 			}
1965 
1966 			/*
1967 			 * Tokens within the line are separated by blanks
1968 			 *  and tabs.  For each token in the line which
1969 			 * contains a '=' we strip out any quotes and then
1970 			 * stick the token in the environment array.
1971 			 */
1972 			if ((tokp = strtok(line, " \t")) == NULL)
1973 				continue;
1974 			do {
1975 				if (strchr(tokp, '=') == NULL)
1976 					continue;
1977 				length = strlen(tokp);
1978 				while ((cp1 = strpbrk(tokp, "\"\'")) != NULL) {
1979 					for (cp2 = cp1;
1980 					    cp2 < &tokp[length]; cp2++)
1981 						*cp2 = *(cp2 + 1);
1982 					length--;
1983 				}
1984 
1985 				if (strncmp(tokp, "CMASK=",
1986 				    sizeof ("CMASK=") - 1) == 0) {
1987 					long t;
1988 
1989 					/* We know there's an = */
1990 					t = strtol(strchr(tokp, '=') + 1, NULL,
1991 					    8);
1992 
1993 					/* Sanity */
1994 					if (t <= 077 && t >= 0)
1995 						cmask = (int)t;
1996 					(void) umask(cmask);
1997 					continue;
1998 				}
1999 				glob_envp[glob_envn] =
2000 				    malloc((unsigned)(length + 1));
2001 				(void) strcpy(glob_envp[glob_envn], tokp);
2002 				if (++glob_envn >= MAXENVENT - 1)
2003 					break;
2004 			} while ((tokp = strtok(NULL, " \t")) != NULL);
2005 		}
2006 
2007 		/*
2008 		 * Append a null pointer to the environment array
2009 		 * to mark its end.
2010 		 */
2011 		glob_envp[glob_envn] = NULL;
2012 		(void) fclose(fp);
2013 	}
2014 }
2015 
2016 /*
2017  * boot_init(): Do initialization things that should be done at boot.
2018  */
2019 void
2020 boot_init()
2021 {
2022 	int i;
2023 	struct PROC_TABLE *process, *oprocess;
2024 	struct CMD_LINE	cmd;
2025 	char	line[MAXCMDL];
2026 	char *old_path;
2027 	int maxfiles;
2028 
2029 	/* Use INIT_PATH for sysinit cmds */
2030 	old_path = glob_envp[0];
2031 	glob_envp[0] = malloc((unsigned)(strlen(INIT_PATH)+2));
2032 	(void) strcpy(glob_envp[0], INIT_PATH);
2033 
2034 	/*
2035 	 * Scan inittab(4) and process the special svc.startd entry, initdefault
2036 	 * and sysinit entries.
2037 	 */
2038 	while (getcmd(&cmd, &line[0]) == TRUE) {
2039 		if (startd_tmpl >= 0 && id_eq(cmd.c_id, "smf"))
2040 			process_startd_line(&cmd, line);
2041 		else if (cmd.c_action == M_INITDEFAULT) {
2042 			/*
2043 			 * initdefault is no longer meaningful, as the SMF
2044 			 * milestone controls what (legacy) run level we
2045 			 * boot to.
2046 			 */
2047 			console(B_TRUE,
2048 			    "Ignoring legacy \"initdefault\" entry.\n");
2049 		} else if (cmd.c_action == M_SYSINIT) {
2050 			/*
2051 			 * Execute the "sysinit" entry and wait for it to
2052 			 * complete.  No bookkeeping is performed on these
2053 			 * entries because we avoid writing to the file system
2054 			 * until after there has been an chance to check it.
2055 			 */
2056 			if (process = findpslot(&cmd)) {
2057 				(void) sigset(SIGCLD, SIG_DFL);
2058 
2059 				for (oprocess = process;
2060 				    (process = efork(M_OFF, oprocess,
2061 				    (NAMED|NOCLEANUP))) == NO_ROOM;
2062 				    /* CSTYLED */)
2063 					;
2064 				(void) sigset(SIGCLD, childeath);
2065 
2066 				if (process == NULLPROC) {
2067 					maxfiles = ulimit(UL_GDESLIM, 0);
2068 
2069 					for (i = 0; i < maxfiles; i++)
2070 						(void) fcntl(i, F_SETFD,
2071 						    FD_CLOEXEC);
2072 					(void) execle(SH, "INITSH", "-c",
2073 					    cmd.c_command,
2074 					    (char *)0, glob_envp);
2075 					console(B_TRUE,
2076 "Command\n\"%s\"\n failed to execute.  errno = %d (exec of shell failed)\n",
2077 						cmd.c_command, errno);
2078 					exit(1);
2079 				} else while (waitproc(process) == FAILURE);
2080 				process->p_flags = 0;
2081 				st_write();
2082 			}
2083 		}
2084 	}
2085 
2086 	/* Restore the path. */
2087 	free(glob_envp[0]);
2088 	glob_envp[0] = old_path;
2089 
2090 	/*
2091 	 * This will enable st_write() to complain about init_state_file.
2092 	 */
2093 	booting = 0;
2094 
2095 	/*
2096 	 * If the /etc/ioctl.syscon didn't exist or had invalid contents write
2097 	 * out a correct version.
2098 	 */
2099 	if (write_ioctl)
2100 		write_ioctl_syscon();
2101 
2102 	/*
2103 	 * Start svc.startd(1M), which does most of the work.
2104 	 */
2105 	if (startd_cline[0] != '\0' && startd_tmpl >= 0) {
2106 		/* Start svc.startd. */
2107 		if (startd_run(startd_cline, startd_tmpl, 0) == -1)
2108 			cur_state = SINGLE_USER;
2109 	} else {
2110 		console(B_TRUE, "Absent svc.startd entry or bad "
2111 		    "contract template.  Not starting svc.startd.\n");
2112 		enter_maintenance();
2113 	}
2114 }
2115 
2116 /*
2117  * init_signals(): Initialize all signals to either be caught or ignored.
2118  */
2119 void
2120 init_signals(void)
2121 {
2122 	struct sigaction act;
2123 	int i;
2124 
2125 	/*
2126 	 * Start by ignoring all signals, then selectively re-enable some.
2127 	 * The SIG_IGN disposition will only affect asynchronous signals:
2128 	 * any signal that we trigger synchronously that doesn't end up
2129 	 * being handled by siglvl() will be forcibly delivered by the kernel.
2130 	 */
2131 	for (i = SIGHUP; i <= SIGRTMAX; i++)
2132 		(void) sigset(i, SIG_IGN);
2133 
2134 	/*
2135 	 * Handle all level-changing signals using siglvl() and set sa_mask so
2136 	 * that all level-changing signals are blocked while in siglvl().
2137 	 */
2138 	act.sa_handler = siglvl;
2139 	act.sa_flags = SA_SIGINFO;
2140 	(void) sigemptyset(&act.sa_mask);
2141 
2142 	(void) sigaddset(&act.sa_mask, LVLQ);
2143 	(void) sigaddset(&act.sa_mask, LVL0);
2144 	(void) sigaddset(&act.sa_mask, LVL1);
2145 	(void) sigaddset(&act.sa_mask, LVL2);
2146 	(void) sigaddset(&act.sa_mask, LVL3);
2147 	(void) sigaddset(&act.sa_mask, LVL4);
2148 	(void) sigaddset(&act.sa_mask, LVL5);
2149 	(void) sigaddset(&act.sa_mask, LVL6);
2150 	(void) sigaddset(&act.sa_mask, SINGLE_USER);
2151 	(void) sigaddset(&act.sa_mask, LVLa);
2152 	(void) sigaddset(&act.sa_mask, LVLb);
2153 	(void) sigaddset(&act.sa_mask, LVLc);
2154 
2155 	(void) sigaction(LVLQ, &act, NULL);
2156 	(void) sigaction(LVL0, &act, NULL);
2157 	(void) sigaction(LVL1, &act, NULL);
2158 	(void) sigaction(LVL2, &act, NULL);
2159 	(void) sigaction(LVL3, &act, NULL);
2160 	(void) sigaction(LVL4, &act, NULL);
2161 	(void) sigaction(LVL5, &act, NULL);
2162 	(void) sigaction(LVL6, &act, NULL);
2163 	(void) sigaction(SINGLE_USER, &act, NULL);
2164 	(void) sigaction(LVLa, &act, NULL);
2165 	(void) sigaction(LVLb, &act, NULL);
2166 	(void) sigaction(LVLc, &act, NULL);
2167 
2168 	(void) sigset(SIGALRM, alarmclk);
2169 	alarmclk();
2170 
2171 	(void) sigset(SIGCLD, childeath);
2172 	(void) sigset(SIGPWR, powerfail);
2173 }
2174 
2175 /*
2176  * Set up pipe for "godchildren". If the file exists and is a pipe just open
2177  * it. Else, if the file system is r/w create it.  Otherwise, defer its
2178  * creation and open until after the sysinit functions have had a chance to
2179  * make the root read/write.
2180  */
2181 void
2182 setup_pipe()
2183 {
2184 	struct stat stat_buf;
2185 	struct statvfs statvfs_buf;
2186 
2187 	if ((stat(INITPIPE, &stat_buf) == 0) &&
2188 	    ((stat_buf.st_mode & (S_IFMT|S_IRUSR)) == (S_IFIFO|S_IRUSR)))
2189 		Pfd = open(INITPIPE, O_RDWR | O_NDELAY);
2190 	else
2191 		if ((statvfs(INITPIPE_DIR, &statvfs_buf) == 0) &&
2192 		    ((statvfs_buf.f_flag & ST_RDONLY) == 0)) {
2193 			(void) unlink(INITPIPE);
2194 			(void) mknod(INITPIPE, S_IFIFO | 0600, 0);
2195 			Pfd = open(INITPIPE, O_RDWR | O_NDELAY);
2196 		}
2197 
2198 	if (Pfd >= 0) {
2199 		(void) ioctl(Pfd, I_SETSIG, S_INPUT);
2200 		/*
2201 		 * Read pipe in message discard mode.
2202 		 */
2203 		(void) ioctl(Pfd, I_SRDOPT, RMSGD);
2204 		(void) sigset(SIGPOLL, sigpoll);
2205 	}
2206 }
2207 
2208 /*
2209  * siglvl - handle an asynchronous signal from init(1M) telling us that we
2210  * should change the current run level.  We set new_state accordingly.
2211  */
2212 void
2213 siglvl(int sig, siginfo_t *sip, ucontext_t *ucp)
2214 {
2215 	struct PROC_TABLE *process;
2216 	struct sigaction act;
2217 
2218 	/*
2219 	 * If the signal was from the kernel (rather than init(1M)) then init
2220 	 * itself tripped the signal.  That is, we might have a bug and tripped
2221 	 * a real SIGSEGV instead of receiving it as an alias for SIGLVLa.  In
2222 	 * such a case we reset the disposition to SIG_DFL, block all signals
2223 	 * in uc_mask but the current one, and return to the interrupted ucp
2224 	 * to effect an appropriate death.  The kernel will then restart us.
2225 	 *
2226 	 * The one exception to SI_FROMKERNEL() is SIGFPE (a.k.a. LVL6), which
2227 	 * the kernel can send us when it wants to effect an orderly reboot.
2228 	 * For this case we must also verify si_code is zero, rather than a
2229 	 * code such as FPE_INTDIV which a bug might have triggered.
2230 	 */
2231 	if (sip != NULL && SI_FROMKERNEL(sip) &&
2232 	    (sig != SIGFPE || sip->si_code == 0)) {
2233 
2234 		(void) sigemptyset(&act.sa_mask);
2235 		act.sa_handler = SIG_DFL;
2236 		act.sa_flags = 0;
2237 		(void) sigaction(sig, &act, NULL);
2238 
2239 		(void) sigfillset(&ucp->uc_sigmask);
2240 		(void) sigdelset(&ucp->uc_sigmask, sig);
2241 		ucp->uc_flags |= UC_SIGMASK;
2242 
2243 		(void) setcontext(ucp);
2244 	}
2245 
2246 	/*
2247 	 * If the signal received is a LVLQ signal, do not really
2248 	 * change levels, just restate the current level.  If the
2249 	 * signal is not a LVLQ, set the new level to the signal
2250 	 * received.
2251 	 */
2252 	if (sig == LVLQ)
2253 		new_state = cur_state;
2254 	else
2255 		new_state = sig;
2256 
2257 	/*
2258 	 * Clear all times and repeat counts in the process table
2259 	 * since either the level is changing or the user has editted
2260 	 * the inittab file and wants us to look at it again.
2261 	 * If the user has fixed a typo, we don't want residual timing
2262 	 * data preventing the fixed command line from executing.
2263 	 */
2264 	for (process = proc_table;
2265 		(process < proc_table + num_proc); process++) {
2266 		process->p_time = 0L;
2267 		process->p_count = 0;
2268 	}
2269 
2270 	/*
2271 	 * Set the flag to indicate that a "user signal" was received.
2272 	 */
2273 	wakeup.w_flags.w_usersignal = 1;
2274 }
2275 
2276 
2277 /*
2278  * alarmclk
2279  */
2280 static void
2281 alarmclk()
2282 {
2283 	time_up = TRUE;
2284 }
2285 
2286 /*
2287  * childeath_single():
2288  *
2289  * This used to be the SIGCLD handler and it was set with signal()
2290  * (as opposed to sigset()).  When a child exited we'd come to the
2291  * handler, wait for the child, and reenable the handler with
2292  * signal() just before returning.  The implementation of signal()
2293  * checks with waitid() for waitable children and sends a SIGCLD
2294  * if there are some.  If children are exiting faster than the
2295  * handler can run we keep sending signals and the handler never
2296  * gets to return and eventually the stack runs out and init dies.
2297  * To prevent that we set the handler with sigset() so the handler
2298  * doesn't need to be reset, and in childeath() (see below) we
2299  * call childeath_single() as long as there are children to be
2300  * waited for.  If a child exits while init is in the handler a
2301  * SIGCLD will be pending and delivered on return from the handler.
2302  * If the child was already waited for the handler will have nothing
2303  * to do and return, otherwise the child will be waited for.
2304  */
2305 static void
2306 childeath_single()
2307 {
2308 	struct PROC_TABLE	*process;
2309 	struct pidlist		*pp;
2310 	pid_t			pid;
2311 	int			status;
2312 
2313 	/*
2314 	 * Perform wait to get the process id of the child that died and
2315 	 * then scan the process table to see if we are interested in
2316 	 * this process. NOTE: if a super-user sends the SIGCLD signal
2317 	 * to init, the following wait will not immediately return and
2318 	 * init will be inoperative until one of its child really does die.
2319 	 */
2320 	pid = wait(&status);
2321 
2322 	for (process = proc_table;
2323 		(process < proc_table + num_proc); process++) {
2324 		if ((process->p_flags & (LIVING|OCCUPIED)) ==
2325 		    (LIVING|OCCUPIED) && process->p_pid == pid) {
2326 
2327 			/*
2328 			 * Mark this process as having died and store the exit
2329 			 * status.  Also set the wakeup flag for a dead child
2330 			 * and break out of the loop.
2331 			 */
2332 			process->p_flags &= ~LIVING;
2333 			process->p_exit = (short)status;
2334 			wakeup.w_flags.w_childdeath = 1;
2335 
2336 			return;
2337 		}
2338 	}
2339 
2340 	/*
2341 	 * No process was found above, look through auxiliary list.
2342 	 */
2343 	(void) sighold(SIGPOLL);
2344 	pp = Plhead;
2345 	while (pp) {
2346 		if (pid > pp->pl_pid) {
2347 			/*
2348 			 * Keep on looking.
2349 			 */
2350 			pp = pp->pl_next;
2351 			continue;
2352 		} else if (pid < pp->pl_pid) {
2353 			/*
2354 			 * Not in the list.
2355 			 */
2356 			break;
2357 		} else {
2358 			/*
2359 			 * This is a dead "godchild".
2360 			 */
2361 			pp->pl_dflag = 1;
2362 			pp->pl_exit = (short)status;
2363 			wakeup.w_flags.w_childdeath = 1;
2364 			Gchild = 1;	/* Notice to call cleanaux(). */
2365 			break;
2366 		}
2367 	}
2368 
2369 	(void) sigrelse(SIGPOLL);
2370 }
2371 
2372 /* ARGSUSED */
2373 static void
2374 childeath(int signo)
2375 {
2376 	siginfo_t info;
2377 
2378 	while ((waitid(P_ALL, (id_t)0, &info, WEXITED|WNOHANG|WNOWAIT) == 0) &&
2379 	    info.si_pid != 0)
2380 		childeath_single();
2381 }
2382 
2383 static void
2384 powerfail()
2385 {
2386 	(void) nice(-19);
2387 	wakeup.w_flags.w_powerhit = 1;
2388 }
2389 
2390 /*
2391  * efork() forks a child and the parent inserts the process in its table
2392  * of processes that are directly a result of forks that it has performed.
2393  * The child just changes the "global" with the process id for this process
2394  * to it's new value.
2395  * If efork() is called with a pointer into the proc_table it uses that slot,
2396  * otherwise it searches for a free slot.  Regardless of how it was called,
2397  * it returns the pointer to the proc_table entry
2398  *
2399  * The SIGCLD handler is set to default (SIG_DFL) before calling efork().
2400  * This relies on the somewhat obscure SVR2 SIGCLD/SIG_DFL semantic
2401  * implied by the use of signal(3c).  While the meaning of SIG_DFL for
2402  * SIGCLD is nominally to ignore the signal, once the signal disposition
2403  * is set to childeath(), the kernel will post a SIGCLD if a child
2404  * exited during the period the disposition was SIG_DFL.  It acts more
2405  * like a signal block.
2406  *
2407  * Ideally, this should be rewritten to use modern signal semantics.
2408  */
2409 static struct PROC_TABLE *
2410 efork(int action, struct PROC_TABLE *process, int modes)
2411 {
2412 	pid_t	childpid;
2413 	struct PROC_TABLE *proc;
2414 	int		i;
2415 	void (*oldroutine)();
2416 	/*
2417 	 * Freshen up the proc_table, removing any entries for dead processes
2418 	 * that don't have NOCLEANUP set.  Perform the necessary accounting.
2419 	 */
2420 	for (proc = proc_table; (proc < proc_table + num_proc); proc++) {
2421 		if ((proc->p_flags & (OCCUPIED|LIVING|NOCLEANUP)) ==
2422 		    (OCCUPIED)) {
2423 			/*
2424 			 * Is this a named process?
2425 			 * If so, do the necessary bookkeeping.
2426 			 */
2427 			if (proc->p_flags & NAMED)
2428 				(void) account(DEAD_PROCESS, proc, NULL);
2429 
2430 			/*
2431 			 * Free this entry for new usage.
2432 			 */
2433 			proc->p_flags = 0;
2434 		}
2435 	}
2436 
2437 	while ((childpid = fork()) == FAILURE) {
2438 		/*
2439 		 * Shorten the alarm timer in case someone else's child dies
2440 		 * and free up a slot in the process table.
2441 		 */
2442 		setimer(5);
2443 
2444 		/*
2445 		 * Wait for some children to die.  Since efork() is normally
2446 		 * called with SIGCLD in the default state, reset it to catch
2447 		 * so that child death signals can come in.
2448 		 */
2449 		oldroutine = sigset(SIGCLD, childeath);
2450 		(void) pause();
2451 		(void) sigset(SIGCLD, oldroutine);
2452 		setimer(0);
2453 	}
2454 
2455 	if (childpid != 0) {
2456 
2457 		if (process == NULLPROC) {
2458 			/*
2459 			 * No proc table pointer specified so search
2460 			 * for a free slot.
2461 			 */
2462 			for (process = proc_table;  process->p_flags != 0 &&
2463 				(process < proc_table + num_proc); process++)
2464 					;
2465 
2466 			if (process == (proc_table + num_proc)) {
2467 				int old_proc_table_size = num_proc;
2468 
2469 				/* Increase the process table size */
2470 				increase_proc_table_size();
2471 				if (old_proc_table_size == num_proc) {
2472 					/* didn't grow: memory failure */
2473 					return (NO_ROOM);
2474 				} else {
2475 					process =
2476 					    proc_table + old_proc_table_size;
2477 				}
2478 			}
2479 
2480 			process->p_time = 0L;
2481 			process->p_count = 0;
2482 		}
2483 		process->p_id[0] = '\0';
2484 		process->p_id[1] = '\0';
2485 		process->p_id[2] = '\0';
2486 		process->p_id[3] = '\0';
2487 		process->p_pid = childpid;
2488 		process->p_flags = (LIVING | OCCUPIED | modes);
2489 		process->p_exit = 0;
2490 
2491 		st_write();
2492 	} else {
2493 		if ((action & (M_WAIT | M_BOOTWAIT)) == 0)
2494 			(void) setpgrp();
2495 
2496 		process = NULLPROC;
2497 
2498 		/*
2499 		 * Reset all signals to the system defaults.
2500 		 */
2501 		for (i = SIGHUP; i <= SIGRTMAX; i++)
2502 			(void) sigset(i, SIG_DFL);
2503 
2504 		/*
2505 		 * POSIX B.2.2.2 advises that init should set SIGTTOU,
2506 		 * SIGTTIN, and SIGTSTP to SIG_IGN.
2507 		 *
2508 		 * Make sure that SIGXCPU and SIGXFSZ also remain ignored,
2509 		 * for backward compatibility.
2510 		 */
2511 		(void) sigset(SIGTTIN, SIG_IGN);
2512 		(void) sigset(SIGTTOU, SIG_IGN);
2513 		(void) sigset(SIGTSTP, SIG_IGN);
2514 		(void) sigset(SIGXCPU, SIG_IGN);
2515 		(void) sigset(SIGXFSZ, SIG_IGN);
2516 	}
2517 	return (process);
2518 }
2519 
2520 
2521 /*
2522  * waitproc() waits for a specified process to die.  For this function to
2523  * work, the specified process must already in the proc_table.  waitproc()
2524  * returns the exit status of the specified process when it dies.
2525  */
2526 static long
2527 waitproc(struct PROC_TABLE *process)
2528 {
2529 	int		answer;
2530 	sigset_t	oldmask, newmask, zeromask;
2531 
2532 	(void) sigemptyset(&zeromask);
2533 	(void) sigemptyset(&newmask);
2534 
2535 	(void) sigaddset(&newmask, SIGCLD);
2536 
2537 	/* Block SIGCLD and save the current signal mask */
2538 	if (sigprocmask(SIG_BLOCK, &newmask, &oldmask) < 0)
2539 		perror("SIG_BLOCK error");
2540 
2541 	/*
2542 	 * Wait around until the process dies.
2543 	 */
2544 	if (process->p_flags & LIVING)
2545 		(void) sigsuspend(&zeromask);
2546 
2547 	/* Reset signal mask to unblock SIGCLD */
2548 	if (sigprocmask(SIG_SETMASK, &oldmask, NULL) < 0)
2549 		perror("SIG_SETMASK error");
2550 
2551 	if (process->p_flags & LIVING)
2552 		return (FAILURE);
2553 
2554 	/*
2555 	 * Make sure to only return 16 bits so that answer will always
2556 	 * be positive whenever the process of interest really died.
2557 	 */
2558 	answer = (process->p_exit & 0xffff);
2559 
2560 	/*
2561 	 * Free the slot in the proc_table.
2562 	 */
2563 	process->p_flags = 0;
2564 	return (answer);
2565 }
2566 
2567 /*
2568  * notify_pam_dead(): calls into the PAM framework to close the given session.
2569  */
2570 static void
2571 notify_pam_dead(struct utmpx *up)
2572 {
2573 	pam_handle_t *pamh;
2574 	char user[sizeof (up->ut_user) + 1];
2575 	char ttyn[sizeof (up->ut_line) + 1];
2576 	char host[sizeof (up->ut_host) + 1];
2577 
2578 	/*
2579 	 * PAM does not take care of updating utmpx/wtmpx.
2580 	 */
2581 	(void) snprintf(user, sizeof (user), "%s", up->ut_user);
2582 	(void) snprintf(ttyn, sizeof (ttyn), "%s", up->ut_line);
2583 	(void) snprintf(host, sizeof (host), "%s", up->ut_host);
2584 
2585 	if (pam_start("init", user, NULL, &pamh) == PAM_SUCCESS)  {
2586 		(void) pam_set_item(pamh, PAM_TTY, ttyn);
2587 		(void) pam_set_item(pamh, PAM_RHOST, host);
2588 		(void) pam_close_session(pamh, 0);
2589 		(void) pam_end(pamh, PAM_SUCCESS);
2590 	}
2591 }
2592 
2593 /*
2594  * Check you can access utmpx (As / may be read-only and
2595  * /var may not be mounted yet).
2596  */
2597 static int
2598 access_utmpx(void)
2599 {
2600 	do {
2601 		utmpx_ok = (access(UTMPX, R_OK|W_OK) == 0);
2602 	} while (!utmpx_ok && errno == EINTR);
2603 
2604 	return (utmpx_ok);
2605 }
2606 
2607 /*
2608  * account() updates entries in utmpx and appends new entries to the end of
2609  * wtmpx (assuming they exist).  The program argument indicates the name of
2610  * program if INIT_PROCESS, otherwise should be NULL.
2611  *
2612  * account() only blocks for INIT_PROCESS requests.
2613  *
2614  * Returns non-zero if write failed.
2615  */
2616 static int
2617 account(short state, struct PROC_TABLE *process, char *program)
2618 {
2619 	struct utmpx utmpbuf, *u, *oldu;
2620 	int tmplen;
2621 	char fail_buf[UT_LINE_SZ];
2622 	sigset_t block, unblock;
2623 
2624 	if (!utmpx_ok && !access_utmpx()) {
2625 		return (-1);
2626 	}
2627 
2628 	/*
2629 	 * Set up the prototype for the utmp structure we want to write.
2630 	 */
2631 	u = &utmpbuf;
2632 	(void) memset(u, 0, sizeof (struct utmpx));
2633 
2634 	/*
2635 	 * Fill in the various fields of the utmp structure.
2636 	 */
2637 	u->ut_id[0] = process->p_id[0];
2638 	u->ut_id[1] = process->p_id[1];
2639 	u->ut_id[2] = process->p_id[2];
2640 	u->ut_id[3] = process->p_id[3];
2641 	u->ut_pid = process->p_pid;
2642 
2643 	/*
2644 	 * Fill the "ut_exit" structure.
2645 	 */
2646 	u->ut_exit.e_termination = WTERMSIG(process->p_exit);
2647 	u->ut_exit.e_exit = WEXITSTATUS(process->p_exit);
2648 	u->ut_type = state;
2649 
2650 	(void) time(&u->ut_tv.tv_sec);
2651 
2652 	/*
2653 	 * Block signals for utmp update.
2654 	 */
2655 	(void) sigfillset(&block);
2656 	(void) sigprocmask(SIG_BLOCK, &block, &unblock);
2657 
2658 	/*
2659 	 * See if there already is such an entry in the "utmpx" file.
2660 	 */
2661 	setutxent();	/* Start at beginning of utmpx file. */
2662 
2663 	if ((oldu = getutxid(u)) != NULL) {
2664 		/*
2665 		 * Copy in the old "user", "line" and "host" fields
2666 		 * to our new structure.
2667 		 */
2668 		bcopy(oldu->ut_user, u->ut_user, sizeof (u->ut_user));
2669 		bcopy(oldu->ut_line, u->ut_line, sizeof (u->ut_line));
2670 		bcopy(oldu->ut_host, u->ut_host, sizeof (u->ut_host));
2671 		u->ut_syslen = (tmplen = strlen(u->ut_host)) ?
2672 			min(tmplen + 1, sizeof (u->ut_host)) : 0;
2673 
2674 		if (oldu->ut_type == USER_PROCESS && state == DEAD_PROCESS) {
2675 			notify_pam_dead(oldu);
2676 		}
2677 	}
2678 
2679 	/*
2680 	 * Perform special accounting. Insert the special string into the
2681 	 * ut_line array. For INIT_PROCESSes put in the name of the
2682 	 * program in the "ut_user" field.
2683 	 */
2684 	switch (state) {
2685 	case INIT_PROCESS:
2686 		(void) strncpy(u->ut_user, program, sizeof (u->ut_user));
2687 		(void) strcpy(fail_buf, "INIT_PROCESS");
2688 		break;
2689 
2690 	default:
2691 		(void) strlcpy(fail_buf, u->ut_id, sizeof (u->ut_id) + 1);
2692 		break;
2693 	}
2694 
2695 	/*
2696 	 * Write out the updated entry to utmpx file.
2697 	 */
2698 	if (pututxline(u) == NULL) {
2699 		console(B_TRUE, "Failed write of utmpx entry: \"%s\": %s\n",
2700 		    fail_buf, strerror(errno));
2701 		endutxent();
2702 		(void) sigprocmask(SIG_SETMASK, &unblock, NULL);
2703 		return (-1);
2704 	}
2705 
2706 	/*
2707 	 * If we're able to write to utmpx, then attempt to add to the
2708 	 * end of the wtmpx file.
2709 	 */
2710 	updwtmpx(WTMPX, u);
2711 
2712 	endutxent();
2713 
2714 	(void) sigprocmask(SIG_SETMASK, &unblock, NULL);
2715 
2716 	return (0);
2717 }
2718 
2719 static void
2720 clearent(pid_t pid, short status)
2721 {
2722 	struct utmpx *up;
2723 	sigset_t block, unblock;
2724 
2725 	/*
2726 	 * Block signals for utmp update.
2727 	 */
2728 	(void) sigfillset(&block);
2729 	(void) sigprocmask(SIG_BLOCK, &block, &unblock);
2730 
2731 	/*
2732 	 * No error checking for now.
2733 	 */
2734 
2735 	setutxent();
2736 	while (up = getutxent()) {
2737 		if (up->ut_pid == pid) {
2738 			if (up->ut_type == DEAD_PROCESS) {
2739 				/*
2740 				 * Cleaned up elsewhere.
2741 				 */
2742 				continue;
2743 			}
2744 
2745 			notify_pam_dead(up);
2746 
2747 			up->ut_type = DEAD_PROCESS;
2748 			up->ut_exit.e_termination = WTERMSIG(status);
2749 			up->ut_exit.e_exit = WEXITSTATUS(status);
2750 			(void) time(&up->ut_tv.tv_sec);
2751 
2752 			(void) pututxline(up);
2753 			/*
2754 			 * Now attempt to add to the end of the
2755 			 * wtmp and wtmpx files.  Do not create
2756 			 * if they don't already exist.
2757 			 */
2758 			updwtmpx(WTMPX, up);
2759 
2760 			break;
2761 		}
2762 	}
2763 
2764 	endutxent();
2765 	(void) sigprocmask(SIG_SETMASK, &unblock, NULL);
2766 }
2767 
2768 /*
2769  * prog_name() searches for the word or unix path name and
2770  * returns a pointer to the last element of the pathname.
2771  */
2772 static char *
2773 prog_name(char *string)
2774 {
2775 	char	*ptr, *ptr2;
2776 	/* XXX - utmp - fix name length */
2777 	static char word[_POSIX_LOGIN_NAME_MAX];
2778 
2779 	/*
2780 	 * Search for the first word skipping leading spaces and tabs.
2781 	 */
2782 	while (*string == ' ' || *string == '\t')
2783 		string++;
2784 
2785 	/*
2786 	 * If the first non-space non-tab character is not one allowed in
2787 	 * a word, return a pointer to a null string, otherwise parse the
2788 	 * pathname.
2789 	 */
2790 	if (*string != '.' && *string != '/' && *string != '_' &&
2791 	    (*string < 'a' || *string > 'z') &&
2792 	    (*string < 'A' || * string > 'Z') &&
2793 	    (*string < '0' || *string > '9'))
2794 		return ("");
2795 
2796 	/*
2797 	 * Parse the pathname looking forward for '/', ' ', '\t', '\n' or
2798 	 * '\0'.  Each time a '/' is found, move "ptr" to one past the
2799 	 * '/', thus when a ' ', '\t', '\n', or '\0' is found, "ptr" will
2800 	 * point to the last element of the pathname.
2801 	 */
2802 	for (ptr = string;
2803 		*string != ' ' && *string != '\t' && *string != '\n' &&
2804 							*string != '\0';
2805 		string++) {
2806 		if (*string == '/')
2807 			ptr = string+1;
2808 	}
2809 
2810 	/*
2811 	 * Copy out up to the size of the "ut_user" array into "word",
2812 	 * null terminate it and return a pointer to it.
2813 	 */
2814 	/* XXX - utmp - fix name length */
2815 	for (ptr2 = &word[0]; ptr2 < &word[_POSIX_LOGIN_NAME_MAX - 1] &&
2816 	    ptr < string; /* CSTYLED */)
2817 		*ptr2++ = *ptr++;
2818 
2819 	*ptr2 = '\0';
2820 	return (&word[0]);
2821 }
2822 
2823 
2824 /*
2825  * realcon() returns a nonzero value if there is a character device
2826  * associated with SYSCON that has the same device number as CONSOLE.
2827  */
2828 static int
2829 realcon()
2830 {
2831 	struct stat sconbuf, conbuf;
2832 
2833 	if (stat(SYSCON, &sconbuf) != -1 &&
2834 	    stat(CONSOLE, &conbuf) != -1 &&
2835 	    S_ISCHR(sconbuf.st_mode) &&
2836 	    S_ISCHR(conbuf.st_mode) &&
2837 	    sconbuf.st_rdev == conbuf.st_rdev) {
2838 		return (1);
2839 	} else {
2840 		return (0);
2841 	}
2842 }
2843 
2844 
2845 /*
2846  * get_ioctl_syscon() retrieves the SYSCON settings from the IOCTLSYSCON file.
2847  * Returns true if the IOCTLSYSCON file needs to be written (with
2848  * write_ioctl_syscon() below)
2849  */
2850 static int
2851 get_ioctl_syscon()
2852 {
2853 	FILE	*fp;
2854 	unsigned int	iflags, oflags, cflags, lflags, ldisc, cc[18];
2855 	int		i, valid_format = 0;
2856 
2857 	/*
2858 	 * Read in the previous modes for SYSCON from IOCTLSYSCON.
2859 	 */
2860 	if ((fp = fopen(IOCTLSYSCON, "r")) == NULL) {
2861 		stored_syscon_termios = dflt_termios;
2862 		console(B_TRUE,
2863 		    "warning:%s does not exist, default settings assumed\n",
2864 		    IOCTLSYSCON);
2865 	} else {
2866 
2867 	    i = fscanf(fp,
2868 	    "%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x",
2869 		&iflags, &oflags, &cflags, &lflags,
2870 		&cc[0], &cc[1], &cc[2], &cc[3], &cc[4], &cc[5], &cc[6],
2871 		&cc[7], &cc[8], &cc[9], &cc[10], &cc[11], &cc[12], &cc[13],
2872 		&cc[14], &cc[15], &cc[16], &cc[17]);
2873 
2874 	    if (i == 22) {
2875 		stored_syscon_termios.c_iflag = iflags;
2876 		stored_syscon_termios.c_oflag = oflags;
2877 		stored_syscon_termios.c_cflag = cflags;
2878 		stored_syscon_termios.c_lflag = lflags;
2879 		for (i = 0; i < 18; i++)
2880 			stored_syscon_termios.c_cc[i] = (char)cc[i];
2881 		valid_format = 1;
2882 	    } else if (i == 13) {
2883 		rewind(fp);
2884 		i = fscanf(fp, "%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x",
2885 		    &iflags, &oflags, &cflags, &lflags, &ldisc, &cc[0], &cc[1],
2886 		    &cc[2], &cc[3], &cc[4], &cc[5], &cc[6], &cc[7]);
2887 
2888 		/*
2889 		 * If the file is formatted properly, use the values to
2890 		 * initialize the console terminal condition.
2891 		 */
2892 		stored_syscon_termios.c_iflag = (ushort_t)iflags;
2893 		stored_syscon_termios.c_oflag = (ushort_t)oflags;
2894 		stored_syscon_termios.c_cflag = (ushort_t)cflags;
2895 		stored_syscon_termios.c_lflag = (ushort_t)lflags;
2896 		for (i = 0; i < 8; i++)
2897 			stored_syscon_termios.c_cc[i] = (char)cc[i];
2898 		valid_format = 1;
2899 	    }
2900 	    (void) fclose(fp);
2901 
2902 	    /* If the file is badly formatted, use the default settings. */
2903 	    if (!valid_format)
2904 		stored_syscon_termios = dflt_termios;
2905 	}
2906 
2907 	/* If the file had a bad format, rewrite it later. */
2908 	return (!valid_format);
2909 }
2910 
2911 
2912 static void
2913 write_ioctl_syscon()
2914 {
2915 	FILE *fp;
2916 	int i;
2917 
2918 	(void) unlink(SYSCON);
2919 	(void) link(SYSTTY, SYSCON);
2920 	(void) umask(022);
2921 	fp = fopen(IOCTLSYSCON, "w");
2922 
2923 	(void) fprintf(fp, "%x:%x:%x:%x:0", stored_syscon_termios.c_iflag,
2924 	    stored_syscon_termios.c_oflag, stored_syscon_termios.c_cflag,
2925 	    stored_syscon_termios.c_lflag);
2926 	for (i = 0; i < 8; ++i)
2927 		(void) fprintf(fp, ":%x", stored_syscon_termios.c_cc[i]);
2928 	(void) putc('\n', fp);
2929 
2930 	(void) fflush(fp);
2931 	(void) fsync(fileno(fp));
2932 	(void) fclose(fp);
2933 	(void) umask(cmask);
2934 }
2935 
2936 
2937 /*
2938  * void console(boolean_t, char *, ...)
2939  *   Outputs the requested message to the system console.  Note that the number
2940  *   of arguments passed to console() should be determined by the print format.
2941  *
2942  *   The "prefix" parameter indicates whether or not "INIT: " should precede the
2943  *   message.
2944  *
2945  *   To make sure we write to the console in a sane fashion, we use the modes
2946  *   we keep in stored_syscon_termios (which we read out of /etc/ioctl.syscon).
2947  *   Afterwards we restore whatever modes were already there.
2948  */
2949 /* PRINTFLIKE2 */
2950 static void
2951 console(boolean_t prefix, char *format, ...)
2952 {
2953 	char	outbuf[BUFSIZ];
2954 	va_list	args;
2955 	int fd, getret;
2956 	struct termios old_syscon_termios;
2957 	FILE *f;
2958 
2959 	/*
2960 	 * We open SYSCON anew each time in case it has changed (see
2961 	 * userinit()).
2962 	 */
2963 	if ((fd = open(SYSCON, O_RDWR | O_NOCTTY)) < 0 ||
2964 	    (f = fdopen(fd, "r+")) == NULL) {
2965 		if (prefix)
2966 			syslog(LOG_WARNING, "INIT: ");
2967 		va_start(args, format);
2968 		vsyslog(LOG_WARNING, format, args);
2969 		va_end(args);
2970 		if (fd >= 0)
2971 			(void) close(fd);
2972 		return;
2973 	}
2974 	setbuf(f, &outbuf[0]);
2975 
2976 	getret = tcgetattr(fd, &old_syscon_termios);
2977 	old_syscon_termios.c_cflag &= ~HUPCL;
2978 	if (realcon())
2979 		/* Don't overwrite cflag of real console. */
2980 		stored_syscon_termios.c_cflag = old_syscon_termios.c_cflag;
2981 
2982 	stored_syscon_termios.c_cflag &= ~HUPCL;
2983 
2984 	(void) tcsetattr(fd, TCSANOW, &stored_syscon_termios);
2985 
2986 	if (prefix)
2987 		(void) fprintf(f, "\nINIT: ");
2988 	va_start(args, format);
2989 	(void) vfprintf(f, format, args);
2990 	va_end(args);
2991 
2992 	if (getret == 0)
2993 		(void) tcsetattr(fd, TCSADRAIN, &old_syscon_termios);
2994 
2995 	(void) fclose(f);
2996 }
2997 
2998 /*
2999  * timer() is a substitute for sleep() which uses alarm() and pause().
3000  */
3001 static void
3002 timer(int waitime)
3003 {
3004 	setimer(waitime);
3005 	while (time_up == FALSE)
3006 		(void) pause();
3007 }
3008 
3009 static void
3010 setimer(int timelimit)
3011 {
3012 	alarmclk();
3013 	(void) alarm(timelimit);
3014 	time_up = (timelimit ? FALSE : TRUE);
3015 }
3016 
3017 /*
3018  * Fails with
3019  *   ENOMEM - out of memory
3020  *   ECONNABORTED - repository connection broken
3021  *   EPERM - permission denied
3022  *   EACCES - backend access denied
3023  *   EROFS - backend readonly
3024  */
3025 static int
3026 get_or_add_startd(scf_instance_t *inst)
3027 {
3028 	scf_handle_t *h;
3029 	scf_scope_t *scope = NULL;
3030 	scf_service_t *svc = NULL;
3031 	int ret = 0;
3032 
3033 	h = scf_instance_handle(inst);
3034 
3035 	if (scf_handle_decode_fmri(h, SCF_SERVICE_STARTD, NULL, NULL, inst,
3036 	    NULL, NULL, SCF_DECODE_FMRI_EXACT) == 0)
3037 		return (0);
3038 
3039 	switch (scf_error()) {
3040 	case SCF_ERROR_CONNECTION_BROKEN:
3041 		return (ECONNABORTED);
3042 
3043 	case SCF_ERROR_NOT_FOUND:
3044 		break;
3045 
3046 	case SCF_ERROR_HANDLE_MISMATCH:
3047 	case SCF_ERROR_INVALID_ARGUMENT:
3048 	case SCF_ERROR_CONSTRAINT_VIOLATED:
3049 	default:
3050 		bad_error("scf_handle_decode_fmri", scf_error());
3051 	}
3052 
3053 	/* Make sure we're right, since we're adding piece-by-piece. */
3054 	assert(strcmp(SCF_SERVICE_STARTD,
3055 	    "svc:/system/svc/restarter:default") == 0);
3056 
3057 	if ((scope = scf_scope_create(h)) == NULL ||
3058 	    (svc = scf_service_create(h)) == NULL) {
3059 		ret = ENOMEM;
3060 		goto out;
3061 	}
3062 
3063 get_scope:
3064 	if (scf_handle_get_scope(h, SCF_SCOPE_LOCAL, scope) != 0) {
3065 		switch (scf_error()) {
3066 		case SCF_ERROR_CONNECTION_BROKEN:
3067 			ret = ECONNABORTED;
3068 			goto out;
3069 
3070 		case SCF_ERROR_NOT_FOUND:
3071 			(void) fputs(gettext(
3072 			    "smf(5) repository missing local scope.\n"),
3073 			    stderr);
3074 			exit(1);
3075 			/* NOTREACHED */
3076 
3077 		case SCF_ERROR_HANDLE_MISMATCH:
3078 		case SCF_ERROR_INVALID_ARGUMENT:
3079 		default:
3080 			bad_error("scf_handle_get_scope", scf_error());
3081 		}
3082 	}
3083 
3084 get_svc:
3085 	if (scf_scope_get_service(scope, "system/svc/restarter", svc) != 0) {
3086 		switch (scf_error()) {
3087 		case SCF_ERROR_CONNECTION_BROKEN:
3088 			ret = ECONNABORTED;
3089 			goto out;
3090 
3091 		case SCF_ERROR_DELETED:
3092 			goto get_scope;
3093 
3094 		case SCF_ERROR_NOT_FOUND:
3095 			break;
3096 
3097 		case SCF_ERROR_HANDLE_MISMATCH:
3098 		case SCF_ERROR_INVALID_ARGUMENT:
3099 		case SCF_ERROR_NOT_SET:
3100 		default:
3101 			bad_error("scf_scope_get_service", scf_error());
3102 		}
3103 
3104 add_svc:
3105 		if (scf_scope_add_service(scope, "system/svc/restarter", svc) !=
3106 		    0) {
3107 			switch (scf_error()) {
3108 			case SCF_ERROR_CONNECTION_BROKEN:
3109 				ret = ECONNABORTED;
3110 				goto out;
3111 
3112 			case SCF_ERROR_EXISTS:
3113 				goto get_svc;
3114 
3115 			case SCF_ERROR_PERMISSION_DENIED:
3116 				ret = EPERM;
3117 				goto out;
3118 
3119 			case SCF_ERROR_BACKEND_ACCESS:
3120 				ret = EACCES;
3121 				goto out;
3122 
3123 			case SCF_ERROR_BACKEND_READONLY:
3124 				ret = EROFS;
3125 				goto out;
3126 
3127 			case SCF_ERROR_HANDLE_MISMATCH:
3128 			case SCF_ERROR_INVALID_ARGUMENT:
3129 			case SCF_ERROR_NOT_SET:
3130 			default:
3131 				bad_error("scf_scope_add_service", scf_error());
3132 			}
3133 		}
3134 	}
3135 
3136 get_inst:
3137 	if (scf_service_get_instance(svc, "default", inst) != 0) {
3138 		switch (scf_error()) {
3139 		case SCF_ERROR_CONNECTION_BROKEN:
3140 			ret = ECONNABORTED;
3141 			goto out;
3142 
3143 		case SCF_ERROR_DELETED:
3144 			goto add_svc;
3145 
3146 		case SCF_ERROR_NOT_FOUND:
3147 			break;
3148 
3149 		case SCF_ERROR_HANDLE_MISMATCH:
3150 		case SCF_ERROR_INVALID_ARGUMENT:
3151 		case SCF_ERROR_NOT_SET:
3152 		default:
3153 			bad_error("scf_service_get_instance", scf_error());
3154 		}
3155 
3156 		if (scf_service_add_instance(svc, "default", inst) !=
3157 		    0) {
3158 			switch (scf_error()) {
3159 			case SCF_ERROR_CONNECTION_BROKEN:
3160 				ret = ECONNABORTED;
3161 				goto out;
3162 
3163 			case SCF_ERROR_DELETED:
3164 				goto add_svc;
3165 
3166 			case SCF_ERROR_EXISTS:
3167 				goto get_inst;
3168 
3169 			case SCF_ERROR_PERMISSION_DENIED:
3170 				ret = EPERM;
3171 				goto out;
3172 
3173 			case SCF_ERROR_BACKEND_ACCESS:
3174 				ret = EACCES;
3175 				goto out;
3176 
3177 			case SCF_ERROR_BACKEND_READONLY:
3178 				ret = EROFS;
3179 				goto out;
3180 
3181 			case SCF_ERROR_HANDLE_MISMATCH:
3182 			case SCF_ERROR_INVALID_ARGUMENT:
3183 			case SCF_ERROR_NOT_SET:
3184 			default:
3185 				bad_error("scf_service_add_instance",
3186 				    scf_error());
3187 			}
3188 		}
3189 	}
3190 
3191 	ret = 0;
3192 
3193 out:
3194 	scf_service_destroy(svc);
3195 	scf_scope_destroy(scope);
3196 	return (ret);
3197 }
3198 
3199 /*
3200  * Fails with
3201  *   ECONNABORTED - repository connection broken
3202  *   ECANCELED - the transaction's property group was deleted
3203  */
3204 static int
3205 transaction_add_set(scf_transaction_t *tx, scf_transaction_entry_t *ent,
3206     const char *pname, scf_type_t type)
3207 {
3208 change_type:
3209 	if (scf_transaction_property_change_type(tx, ent, pname, type) == 0)
3210 		return (0);
3211 
3212 	switch (scf_error()) {
3213 	case SCF_ERROR_CONNECTION_BROKEN:
3214 		return (ECONNABORTED);
3215 
3216 	case SCF_ERROR_DELETED:
3217 		return (ECANCELED);
3218 
3219 	case SCF_ERROR_NOT_FOUND:
3220 		goto new;
3221 
3222 	case SCF_ERROR_HANDLE_MISMATCH:
3223 	case SCF_ERROR_INVALID_ARGUMENT:
3224 	case SCF_ERROR_NOT_BOUND:
3225 	case SCF_ERROR_NOT_SET:
3226 	default:
3227 		bad_error("scf_transaction_property_change_type", scf_error());
3228 	}
3229 
3230 new:
3231 	if (scf_transaction_property_new(tx, ent, pname, type) == 0)
3232 		return (0);
3233 
3234 	switch (scf_error()) {
3235 	case SCF_ERROR_CONNECTION_BROKEN:
3236 		return (ECONNABORTED);
3237 
3238 	case SCF_ERROR_DELETED:
3239 		return (ECANCELED);
3240 
3241 	case SCF_ERROR_EXISTS:
3242 		goto change_type;
3243 
3244 	case SCF_ERROR_HANDLE_MISMATCH:
3245 	case SCF_ERROR_INVALID_ARGUMENT:
3246 	case SCF_ERROR_NOT_BOUND:
3247 	case SCF_ERROR_NOT_SET:
3248 	default:
3249 		bad_error("scf_transaction_property_new", scf_error());
3250 		/* NOTREACHED */
3251 	}
3252 }
3253 
3254 static void
3255 scferr(void)
3256 {
3257 	switch (scf_error()) {
3258 	case SCF_ERROR_NO_MEMORY:
3259 		console(B_TRUE, gettext("Out of memory.\n"));
3260 		break;
3261 
3262 	case SCF_ERROR_CONNECTION_BROKEN:
3263 		console(B_TRUE, gettext(
3264 		    "Connection to smf(5) repository server broken.\n"));
3265 		break;
3266 
3267 	case SCF_ERROR_NO_RESOURCES:
3268 		console(B_TRUE, gettext(
3269 		    "smf(5) repository server is out of memory.\n"));
3270 		break;
3271 
3272 	case SCF_ERROR_PERMISSION_DENIED:
3273 		console(B_TRUE, gettext("Insufficient privileges.\n"));
3274 		break;
3275 
3276 	default:
3277 		console(B_TRUE, gettext("libscf error: %s\n"),
3278 		    scf_strerror(scf_error()));
3279 	}
3280 }
3281 
3282 static void
3283 lscf_set_runlevel(char rl)
3284 {
3285 	scf_handle_t *h;
3286 	scf_instance_t *inst = NULL;
3287 	scf_propertygroup_t *pg = NULL;
3288 	scf_transaction_t *tx = NULL;
3289 	scf_transaction_entry_t *ent = NULL;
3290 	scf_value_t *val = NULL;
3291 	char buf[2];
3292 	int r;
3293 
3294 	h = scf_handle_create(SCF_VERSION);
3295 	if (h == NULL) {
3296 		scferr();
3297 		return;
3298 	}
3299 
3300 	if (scf_handle_bind(h) != 0) {
3301 		switch (scf_error()) {
3302 		case SCF_ERROR_NO_SERVER:
3303 			console(B_TRUE,
3304 			    gettext("smf(5) repository server not running.\n"));
3305 			goto bail;
3306 
3307 		default:
3308 			scferr();
3309 			goto bail;
3310 		}
3311 	}
3312 
3313 	if ((inst = scf_instance_create(h)) == NULL ||
3314 	    (pg = scf_pg_create(h)) == NULL ||
3315 	    (val = scf_value_create(h)) == NULL ||
3316 	    (tx = scf_transaction_create(h)) == NULL ||
3317 	    (ent = scf_entry_create(h)) == NULL) {
3318 		scferr();
3319 		goto bail;
3320 	}
3321 
3322 get_inst:
3323 	r = get_or_add_startd(inst);
3324 	switch (r) {
3325 	case 0:
3326 		break;
3327 
3328 	case ENOMEM:
3329 	case ECONNABORTED:
3330 	case EPERM:
3331 	case EACCES:
3332 	case EROFS:
3333 		scferr();
3334 		goto bail;
3335 	default:
3336 		bad_error("get_or_add_startd", r);
3337 	}
3338 
3339 get_pg:
3340 	if (scf_instance_get_pg(inst, SCF_PG_OPTIONS_OVR, pg) != 0) {
3341 		switch (scf_error()) {
3342 		case SCF_ERROR_CONNECTION_BROKEN:
3343 			scferr();
3344 			goto bail;
3345 
3346 		case SCF_ERROR_DELETED:
3347 			goto get_inst;
3348 
3349 		case SCF_ERROR_NOT_FOUND:
3350 			break;
3351 
3352 		case SCF_ERROR_HANDLE_MISMATCH:
3353 		case SCF_ERROR_INVALID_ARGUMENT:
3354 		case SCF_ERROR_NOT_SET:
3355 		default:
3356 			bad_error("scf_instance_get_pg", scf_error());
3357 		}
3358 
3359 add_pg:
3360 		if (scf_instance_add_pg(inst, SCF_PG_OPTIONS_OVR,
3361 		    SCF_PG_OPTIONS_OVR_TYPE, SCF_PG_OPTIONS_OVR_FLAGS, pg) !=
3362 		    0) {
3363 			switch (scf_error()) {
3364 			case SCF_ERROR_CONNECTION_BROKEN:
3365 			case SCF_ERROR_PERMISSION_DENIED:
3366 			case SCF_ERROR_BACKEND_ACCESS:
3367 				scferr();
3368 				goto bail;
3369 
3370 			case SCF_ERROR_DELETED:
3371 				goto get_inst;
3372 
3373 			case SCF_ERROR_EXISTS:
3374 				goto get_pg;
3375 
3376 			case SCF_ERROR_HANDLE_MISMATCH:
3377 			case SCF_ERROR_INVALID_ARGUMENT:
3378 			case SCF_ERROR_NOT_SET:
3379 			default:
3380 				bad_error("scf_instance_add_pg", scf_error());
3381 			}
3382 		}
3383 	}
3384 
3385 	buf[0] = rl;
3386 	buf[1] = '\0';
3387 	r = scf_value_set_astring(val, buf);
3388 	assert(r == 0);
3389 
3390 	for (;;) {
3391 		if (scf_transaction_start(tx, pg) != 0) {
3392 			switch (scf_error()) {
3393 			case SCF_ERROR_CONNECTION_BROKEN:
3394 			case SCF_ERROR_PERMISSION_DENIED:
3395 			case SCF_ERROR_BACKEND_ACCESS:
3396 				scferr();
3397 				goto bail;
3398 
3399 			case SCF_ERROR_DELETED:
3400 				goto add_pg;
3401 
3402 			case SCF_ERROR_HANDLE_MISMATCH:
3403 			case SCF_ERROR_NOT_BOUND:
3404 			case SCF_ERROR_IN_USE:
3405 			case SCF_ERROR_NOT_SET:
3406 			default:
3407 				bad_error("scf_transaction_start", scf_error());
3408 			}
3409 		}
3410 
3411 		r = transaction_add_set(tx, ent, "runlevel", SCF_TYPE_ASTRING);
3412 		switch (r) {
3413 		case 0:
3414 			break;
3415 
3416 		case ECONNABORTED:
3417 			scferr();
3418 			goto bail;
3419 
3420 		case ECANCELED:
3421 			scf_transaction_reset(tx);
3422 			goto add_pg;
3423 
3424 		default:
3425 			bad_error("transaction_add_set", r);
3426 		}
3427 
3428 		r = scf_entry_add_value(ent, val);
3429 		assert(r == 0);
3430 
3431 		r = scf_transaction_commit(tx);
3432 		if (r == 1)
3433 			break;
3434 
3435 		if (r != 0) {
3436 			switch (scf_error()) {
3437 			case SCF_ERROR_CONNECTION_BROKEN:
3438 			case SCF_ERROR_PERMISSION_DENIED:
3439 			case SCF_ERROR_BACKEND_ACCESS:
3440 			case SCF_ERROR_BACKEND_READONLY:
3441 				scferr();
3442 				goto bail;
3443 
3444 			case SCF_ERROR_DELETED:
3445 				scf_transaction_reset(tx);
3446 				goto add_pg;
3447 
3448 			case SCF_ERROR_INVALID_ARGUMENT:
3449 			case SCF_ERROR_NOT_BOUND:
3450 			case SCF_ERROR_NOT_SET:
3451 			default:
3452 				bad_error("scf_transaction_commit",
3453 				    scf_error());
3454 			}
3455 		}
3456 
3457 		scf_transaction_reset(tx);
3458 		(void) scf_pg_update(pg);
3459 	}
3460 
3461 bail:
3462 	scf_transaction_destroy(tx);
3463 	scf_entry_destroy(ent);
3464 	scf_value_destroy(val);
3465 	scf_pg_destroy(pg);
3466 	scf_instance_destroy(inst);
3467 
3468 	(void) scf_handle_unbind(h);
3469 	scf_handle_destroy(h);
3470 }
3471 
3472 /*
3473  * Function to handle requests from users to main init running as process 1.
3474  */
3475 static void
3476 userinit(int argc, char **argv)
3477 {
3478 	FILE	*fp;
3479 	char	*ln;
3480 	int	init_signal;
3481 	struct stat	sconbuf, conbuf;
3482 	int turnoff = 0;
3483 	const char *usage_msg = "Usage: init [0123456SsQqabc]\n";
3484 
3485 	/*
3486 	 * We are a user invoked init.  Is there an argument and is it
3487 	 * a single character?  If not, print usage message and quit.
3488 	 */
3489 	if (argc != 2 || argv[1][1] != '\0') {
3490 		(void) fprintf(stderr, usage_msg);
3491 		exit(0);
3492 	}
3493 
3494 	if ((init_signal = lvlname_to_state((char)argv[1][0])) == -1) {
3495 		(void) fprintf(stderr, usage_msg);
3496 		(void) audit_put_record(ADT_FAILURE, ADT_FAIL_VALUE_BAD_CMD,
3497 		    argv[1]);
3498 		exit(1);
3499 	}
3500 
3501 	turnoff = LSEL_NOAUDIT & state_to_flags(init_signal);
3502 
3503 	if (init_signal == SINGLE_USER) {
3504 		/*
3505 		 * Make sure this process is talking to a legal tty line
3506 		 * and that /dev/syscon is linked to this line.
3507 		 */
3508 		ln = ttyname(0);	/* Get the name of tty */
3509 		if (ln == NULL) {
3510 			(void) fprintf(stderr,
3511 			    "Standard input not a tty line\n");
3512 			(void) audit_put_record(ADT_FAILURE,
3513 			    ADT_FAIL_VALUE_BAD_TTY, argv[1]);
3514 			exit(1);
3515 		}
3516 		if (stat(ln, &sconbuf) != -1 &&
3517 		    stat(SYSCON, &conbuf) != -1 &&
3518 		    sconbuf.st_rdev != conbuf.st_rdev &&
3519 		    sconbuf.st_ino != conbuf.st_ino) {
3520 			/*
3521 			 * Unlink /dev/syscon and relink it to the current line.
3522 			 */
3523 			if (unlink(SYSCON) == FAILURE) {
3524 				perror("Can't unlink /dev/syscon");
3525 				(void) fprintf(stderr,
3526 				    "Run command on the system console.\n");
3527 				(void) audit_put_record(ADT_FAILURE,
3528 				    ADT_FAIL_VALUE_PROGRAM, argv[1]);
3529 				exit(1);
3530 			}
3531 			if (link(ln, SYSCON) == FAILURE) {
3532 				(void) fprintf(stderr,
3533 				    "Can't link /dev/syscon to %s: %s", ln,
3534 				    strerror(errno));
3535 
3536 				/* Try to leave a syscon */
3537 				(void) link(SYSTTY, SYSCON);
3538 				(void) audit_put_record(ADT_FAILURE,
3539 				    ADT_FAIL_VALUE_PROGRAM, argv[1]);
3540 				exit(1);
3541 			}
3542 
3543 			/*
3544 			 * Try to leave a message on system console saying where
3545 			 * /dev/syscon is currently connected.
3546 			 */
3547 			if ((fp = fopen(SYSTTY, "r+")) != NULL) {
3548 				(void) fprintf(fp,
3549 				    "\n****	SYSCON CHANGED TO %s	****\n",
3550 				    ln);
3551 				(void) fclose(fp);
3552 			}
3553 		}
3554 	}
3555 
3556 	update_boot_archive(init_signal);
3557 
3558 	if (audit_put_record(ADT_SUCCESS, ADT_SUCCESS, argv[1]) &&
3559 	    turnoff) {
3560 		/* turn off audit daemon and try to flush audit queue */
3561 
3562 		if (system("/usr/sbin/audit -t")) {
3563 			(void) fprintf(stderr, "%s: can't turn off auditd\n",
3564 				argv[0]);
3565 		} else {
3566 			(void) sleep(5);
3567 		}
3568 	}
3569 
3570 	/*
3571 	 * Signal init; init will take care of telling svc.startd.
3572 	 */
3573 	if (kill(init_pid, init_signal) == FAILURE) {
3574 		(void) fprintf(stderr, "Must be super-user\n");
3575 		(void) audit_put_record(ADT_FAILURE,
3576 		    ADT_FAIL_VALUE_AUTH, argv[1]);
3577 		exit(1);
3578 	}
3579 
3580 	exit(0);
3581 }
3582 
3583 
3584 #define	DELTA	25	/* Number of pidlist elements to allocate at a time */
3585 
3586 /* ARGSUSED */
3587 void
3588 sigpoll(int n)
3589 {
3590 	struct pidrec prec;
3591 	struct pidrec *p = &prec;
3592 	struct pidlist *plp;
3593 	struct pidlist *tp, *savetp;
3594 	int i;
3595 
3596 	if (Pfd < 0) {
3597 		return;
3598 	}
3599 	(void) sigset(SIGCLD, SIG_DFL);
3600 	for (;;) {
3601 		/*
3602 		 * Important Note: Either read will really fail (in which case
3603 		 * return is all we can do) or will get EAGAIN (Pfd was opened
3604 		 * O_NDELAY), in which case we also want to return.
3605 		 * Always return from here!
3606 		 */
3607 		if (read(Pfd, p, sizeof (struct pidrec)) !=
3608 						sizeof (struct pidrec)) {
3609 			(void) sigset(SIGCLD, childeath);
3610 			return;
3611 		}
3612 		switch (p->pd_type) {
3613 
3614 		case ADDPID:
3615 			/*
3616 			 * New "godchild", add to list.
3617 			 */
3618 			if (Plfree == NULL) {
3619 				plp = (struct pidlist *)calloc(DELTA,
3620 				    sizeof (struct pidlist));
3621 				if (plp == NULL) {
3622 					/* Can't save pid */
3623 					break;
3624 				}
3625 				/*
3626 				 * Point at 2nd record allocated, we'll use plp.
3627 				 */
3628 				tp = plp + 1;
3629 				/*
3630 				 * Link them into a chain.
3631 				 */
3632 				Plfree = tp;
3633 				for (i = 0; i < DELTA - 2; i++) {
3634 					tp->pl_next = tp + 1;
3635 					tp++;
3636 				}
3637 			} else {
3638 				plp = Plfree;
3639 				Plfree = plp->pl_next;
3640 			}
3641 			plp->pl_pid = p->pd_pid;
3642 			plp->pl_dflag = 0;
3643 			plp->pl_next = NULL;
3644 			/*
3645 			 * Note - pid list is kept in increasing order of pids.
3646 			 */
3647 			if (Plhead == NULL) {
3648 				Plhead = plp;
3649 				/* Back up to read next record */
3650 				break;
3651 			} else {
3652 				savetp = tp = Plhead;
3653 				while (tp) {
3654 					if (plp->pl_pid > tp->pl_pid) {
3655 						savetp = tp;
3656 						tp = tp->pl_next;
3657 						continue;
3658 					} else if (plp->pl_pid < tp->pl_pid) {
3659 						if (tp == Plhead) {
3660 							plp->pl_next = Plhead;
3661 							Plhead = plp;
3662 						} else {
3663 							plp->pl_next =
3664 							    savetp->pl_next;
3665 							savetp->pl_next = plp;
3666 						}
3667 						break;
3668 					} else {
3669 						/* Already in list! */
3670 						plp->pl_next = Plfree;
3671 						Plfree = plp;
3672 						break;
3673 					}
3674 				}
3675 				if (tp == NULL) {
3676 					/* Add to end of list */
3677 					savetp->pl_next = plp;
3678 				}
3679 			}
3680 			/* Back up to read next record. */
3681 			break;
3682 
3683 		case REMPID:
3684 			/*
3685 			 * This one was handled by someone else,
3686 			 * purge it from the list.
3687 			 */
3688 			if (Plhead == NULL) {
3689 				/* Back up to read next record. */
3690 				break;
3691 			}
3692 			savetp = tp = Plhead;
3693 			while (tp) {
3694 				if (p->pd_pid > tp->pl_pid) {
3695 					/* Keep on looking. */
3696 					savetp = tp;
3697 					tp = tp->pl_next;
3698 					continue;
3699 				} else if (p->pd_pid < tp->pl_pid) {
3700 					/* Not in list. */
3701 					break;
3702 				} else {
3703 					/* Found it. */
3704 					if (tp == Plhead)
3705 						Plhead = tp->pl_next;
3706 					else
3707 						savetp->pl_next = tp->pl_next;
3708 					tp->pl_next = Plfree;
3709 					Plfree = tp;
3710 					break;
3711 				}
3712 			}
3713 			/* Back up to read next record. */
3714 			break;
3715 		default:
3716 			console(B_TRUE, "Bad message on initpipe\n");
3717 			break;
3718 		}
3719 	}
3720 }
3721 
3722 
3723 static void
3724 cleanaux()
3725 {
3726 	struct pidlist *savep, *p;
3727 	pid_t	pid;
3728 	short	status;
3729 
3730 	(void) sigset(SIGCLD, SIG_DFL);
3731 	Gchild = 0;	/* Note - Safe to do this here since no SIGCLDs */
3732 	(void) sighold(SIGPOLL);
3733 	savep = p = Plhead;
3734 	while (p) {
3735 		if (p->pl_dflag) {
3736 			/*
3737 			 * Found an entry to delete,
3738 			 * remove it from list first.
3739 			 */
3740 			pid = p->pl_pid;
3741 			status = p->pl_exit;
3742 			if (p == Plhead) {
3743 				Plhead = p->pl_next;
3744 				p->pl_next = Plfree;
3745 				Plfree = p;
3746 				savep = p = Plhead;
3747 			} else {
3748 				savep->pl_next = p->pl_next;
3749 				p->pl_next = Plfree;
3750 				Plfree = p;
3751 				p = savep->pl_next;
3752 			}
3753 			clearent(pid, status);
3754 			continue;
3755 		}
3756 		savep = p;
3757 		p = p->pl_next;
3758 	}
3759 	(void) sigrelse(SIGPOLL);
3760 	(void) sigset(SIGCLD, childeath);
3761 }
3762 
3763 
3764 /*
3765  * /etc/inittab has more entries and we have run out of room in the proc_table
3766  * array. Double the size of proc_table to accomodate the extra entries.
3767  */
3768 static void
3769 increase_proc_table_size()
3770 {
3771 	sigset_t block, unblock;
3772 	void *ptr;
3773 	size_t delta = num_proc * sizeof (struct PROC_TABLE);
3774 
3775 
3776 	/*
3777 	 * Block signals for realloc.
3778 	 */
3779 	(void) sigfillset(&block);
3780 	(void) sigprocmask(SIG_BLOCK, &block, &unblock);
3781 
3782 
3783 	/*
3784 	 * On failure we just return because callers of this function check
3785 	 * for failure.
3786 	 */
3787 	do
3788 		ptr = realloc(g_state, g_state_sz + delta);
3789 	while (ptr == NULL && errno == EAGAIN);
3790 
3791 	if (ptr != NULL) {
3792 		/* ensure that the new part is initialized to zero */
3793 		bzero((caddr_t)ptr + g_state_sz, delta);
3794 
3795 		g_state = ptr;
3796 		g_state_sz += delta;
3797 		num_proc <<= 1;
3798 	}
3799 
3800 
3801 	/* unblock our signals before returning */
3802 	(void) sigprocmask(SIG_SETMASK, &unblock, NULL);
3803 }
3804 
3805 
3806 
3807 /*
3808  * Sanity check g_state.
3809  */
3810 static int
3811 st_sane()
3812 {
3813 	int i;
3814 	struct PROC_TABLE *ptp;
3815 
3816 
3817 	/* Note: cur_state is encoded as a signal number */
3818 	if (cur_state < 1 || cur_state == 9 || cur_state > 13)
3819 		return (0);
3820 
3821 	/* Check num_proc */
3822 	if (g_state_sz != sizeof (struct init_state) + (num_proc - 1) *
3823 	    sizeof (struct PROC_TABLE))
3824 		return (0);
3825 
3826 	/* Check proc_table */
3827 	for (i = 0, ptp = proc_table; i < num_proc; ++i, ++ptp) {
3828 		/* skip unoccupied entries */
3829 		if (!(ptp->p_flags & OCCUPIED))
3830 			continue;
3831 
3832 		/* p_flags has no bits outside of PF_MASK */
3833 		if (ptp->p_flags & ~(PF_MASK))
3834 			return (0);
3835 
3836 		/* 5 <= pid <= MAXPID */
3837 		if (ptp->p_pid < 5 || ptp->p_pid > MAXPID)
3838 			return (0);
3839 
3840 		/* p_count >= 0 */
3841 		if (ptp->p_count < 0)
3842 			return (0);
3843 
3844 		/* p_time >= 0 */
3845 		if (ptp->p_time < 0)
3846 			return (0);
3847 	}
3848 
3849 	return (1);
3850 }
3851 
3852 /*
3853  * Initialize our state.
3854  *
3855  * If the system just booted, then init_state_file, which is located on an
3856  * everpresent tmpfs filesystem, should not exist.
3857  *
3858  * If we were restarted, then init_state_file should exist, in
3859  * which case we'll read it in, sanity check it, and use it.
3860  *
3861  * Note: You can't call console() until proc_table is ready.
3862  */
3863 void
3864 st_init()
3865 {
3866 	struct stat stb;
3867 	int ret, st_fd, insane = 0;
3868 	size_t to_be_read;
3869 	char *ptr;
3870 
3871 
3872 	booting = 1;
3873 
3874 	do {
3875 		/*
3876 		 * If we can exclusively create the file, then we're the
3877 		 * initial invocation of init(1M).
3878 		 */
3879 		st_fd = open(init_state_file, O_RDWR | O_CREAT | O_EXCL,
3880 		    S_IRUSR | S_IWUSR);
3881 	} while (st_fd == -1 && errno == EINTR);
3882 	if (st_fd != -1)
3883 		goto new_state;
3884 
3885 	booting = 0;
3886 
3887 	do {
3888 		st_fd = open(init_state_file, O_RDWR, S_IRUSR | S_IWUSR);
3889 	} while (st_fd == -1 && errno == EINTR);
3890 	if (st_fd == -1)
3891 		goto new_state;
3892 
3893 	/* Get the size of the file. */
3894 	do
3895 		ret = fstat(st_fd, &stb);
3896 	while (ret == -1 && errno == EINTR);
3897 	if (ret == -1)
3898 		goto new_state;
3899 
3900 	do
3901 		g_state = malloc(stb.st_size);
3902 	while (g_state == NULL && errno == EAGAIN);
3903 	if (g_state == NULL)
3904 		goto new_state;
3905 
3906 	to_be_read = stb.st_size;
3907 	ptr = (char *)g_state;
3908 	while (to_be_read > 0) {
3909 		ssize_t read_ret;
3910 
3911 		read_ret = read(st_fd, ptr, to_be_read);
3912 		if (read_ret < 0) {
3913 			if (errno == EINTR)
3914 				continue;
3915 
3916 			goto new_state;
3917 		}
3918 
3919 		to_be_read -= read_ret;
3920 		ptr += read_ret;
3921 	}
3922 
3923 	(void) close(st_fd);
3924 
3925 	g_state_sz = stb.st_size;
3926 
3927 	if (st_sane()) {
3928 		console(B_TRUE, "Restarting.\n");
3929 		return;
3930 	}
3931 
3932 	insane = 1;
3933 
3934 new_state:
3935 	if (st_fd >= 0)
3936 		(void) close(st_fd);
3937 	else
3938 		(void) unlink(init_state_file);
3939 
3940 	if (g_state != NULL)
3941 		free(g_state);
3942 
3943 	/* Something went wrong, so allocate new state. */
3944 	g_state_sz = sizeof (struct init_state) +
3945 	    ((init_num_proc - 1) * sizeof (struct PROC_TABLE));
3946 	do
3947 		g_state = calloc(1, g_state_sz);
3948 	while (g_state == NULL && errno == EAGAIN);
3949 	if (g_state == NULL) {
3950 		/* Fatal error! */
3951 		exit(errno);
3952 	}
3953 
3954 	g_state->ist_runlevel = -1;
3955 	num_proc = init_num_proc;
3956 
3957 	if (!booting) {
3958 		console(B_TRUE, "Restarting.\n");
3959 
3960 		/* Overwrite the bad state file. */
3961 		st_write();
3962 
3963 		if (!insane) {
3964 			console(B_TRUE,
3965 			    "Error accessing persistent state file `%s'.  "
3966 			    "Ignored.\n", init_state_file);
3967 		} else {
3968 			console(B_TRUE,
3969 			    "Persistent state file `%s' is invalid and was "
3970 			    "ignored.\n", init_state_file);
3971 		}
3972 	}
3973 }
3974 
3975 /*
3976  * Write g_state out to the state file.
3977  */
3978 void
3979 st_write()
3980 {
3981 	static int complained = 0;
3982 
3983 	int st_fd;
3984 	char *cp;
3985 	size_t sz;
3986 	ssize_t ret;
3987 
3988 
3989 	do {
3990 		st_fd = open(init_next_state_file,
3991 		    O_WRONLY | O_CREAT | O_TRUNC, S_IRUSR | S_IWUSR);
3992 	} while (st_fd < 0 && errno == EINTR);
3993 	if (st_fd < 0)
3994 		goto err;
3995 
3996 	cp = (char *)g_state;
3997 	sz = g_state_sz;
3998 	while (sz > 0) {
3999 		ret = write(st_fd, cp, sz);
4000 		if (ret < 0) {
4001 			if (errno == EINTR)
4002 				continue;
4003 
4004 			goto err;
4005 		}
4006 
4007 		sz -= ret;
4008 		cp += ret;
4009 	}
4010 
4011 	(void) close(st_fd);
4012 	st_fd = -1;
4013 	if (rename(init_next_state_file, init_state_file)) {
4014 		(void) unlink(init_next_state_file);
4015 		goto err;
4016 	}
4017 	complained = 0;
4018 
4019 	return;
4020 
4021 err:
4022 	if (st_fd >= 0)
4023 		(void) close(st_fd);
4024 
4025 	if (!booting && !complained) {
4026 		/*
4027 		 * Only complain after the filesystem should have come up.
4028 		 * And only do it once so we don't loop between console()
4029 		 * & efork().
4030 		 */
4031 		complained = 1;
4032 		if (st_fd)
4033 			console(B_TRUE, "Couldn't write persistent state "
4034 			    "file `%s'.\n", init_state_file);
4035 		else
4036 			console(B_TRUE, "Couldn't move persistent state "
4037 			    "file `%s' to `%s'.\n", init_next_state_file,
4038 			    init_state_file);
4039 	}
4040 }
4041 
4042 /*
4043  * Create a contract with these parameters.
4044  */
4045 static int
4046 contract_make_template(uint_t info, uint_t critical, uint_t fatal,
4047     uint64_t cookie)
4048 {
4049 	int fd, err;
4050 
4051 	char *ioctl_tset_emsg =
4052 	    "Couldn't set \"%s\" contract template parameter: %s.\n";
4053 
4054 	do
4055 		fd = open64(CTFS_ROOT "/process/template", O_RDWR);
4056 	while (fd < 0 && errno == EINTR);
4057 	if (fd < 0) {
4058 		console(B_TRUE, "Couldn't create process template: %s.\n",
4059 		    strerror(errno));
4060 		return (-1);
4061 	}
4062 
4063 	if (err = ct_pr_tmpl_set_param(fd, CT_PR_INHERIT | CT_PR_REGENT))
4064 		console(B_TRUE, "Contract set template inherit, regent "
4065 		    "failed.\n");
4066 
4067 	/*
4068 	 * These errors result in a misconfigured template, which is better
4069 	 * than no template at all, so warn but don't abort.
4070 	 */
4071 	if (err = ct_tmpl_set_informative(fd, info))
4072 		console(B_TRUE, ioctl_tset_emsg, "informative", strerror(err));
4073 
4074 	if (err = ct_tmpl_set_critical(fd, critical))
4075 		console(B_TRUE, ioctl_tset_emsg, "critical", strerror(err));
4076 
4077 	if (err = ct_pr_tmpl_set_fatal(fd, fatal))
4078 		console(B_TRUE, ioctl_tset_emsg, "fatal", strerror(err));
4079 
4080 	if (err = ct_tmpl_set_cookie(fd, cookie))
4081 		console(B_TRUE, ioctl_tset_emsg, "cookie", strerror(err));
4082 
4083 	(void) fcntl(fd, F_SETFD, FD_CLOEXEC);
4084 
4085 	return (fd);
4086 }
4087 
4088 /*
4089  * Create the templates and open an event file descriptor.  We use dup2(2) to
4090  * get these descriptors away from the stdin/stdout/stderr group.
4091  */
4092 static void
4093 contracts_init()
4094 {
4095 	int err, fd;
4096 
4097 	/*
4098 	 * Create & configure a legacy template.  We only want empty events so
4099 	 * we know when to abandon them.
4100 	 */
4101 	legacy_tmpl = contract_make_template(0, CT_PR_EV_EMPTY, CT_PR_EV_HWERR,
4102 	    ORDINARY_COOKIE);
4103 	if (legacy_tmpl >= 0) {
4104 		err = ct_tmpl_activate(legacy_tmpl);
4105 		if (err != 0) {
4106 			(void) close(legacy_tmpl);
4107 			legacy_tmpl = -1;
4108 			console(B_TRUE,
4109 			    "Couldn't activate legacy template (%s); "
4110 			    "legacy services will be in init's contract.\n",
4111 			    strerror(err));
4112 		}
4113 	} else
4114 		console(B_TRUE,
4115 		    "Legacy services will be in init's contract.\n");
4116 
4117 	if (dup2(legacy_tmpl, 255) == -1) {
4118 		console(B_TRUE, "Could not duplicate legacy template: %s.\n",
4119 		    strerror(errno));
4120 	} else {
4121 		(void) close(legacy_tmpl);
4122 		legacy_tmpl = 255;
4123 	}
4124 
4125 	(void) fcntl(legacy_tmpl, F_SETFD, FD_CLOEXEC);
4126 
4127 	startd_tmpl = contract_make_template(0, CT_PR_EV_EMPTY,
4128 	    CT_PR_EV_HWERR | CT_PR_EV_SIGNAL | CT_PR_EV_CORE, STARTD_COOKIE);
4129 
4130 	if (dup2(startd_tmpl, 254) == -1) {
4131 		console(B_TRUE, "Could not duplicate startd template: %s.\n",
4132 		    strerror(errno));
4133 	} else {
4134 		(void) close(startd_tmpl);
4135 		startd_tmpl = 254;
4136 	}
4137 
4138 	(void) fcntl(startd_tmpl, F_SETFD, FD_CLOEXEC);
4139 
4140 	if (legacy_tmpl < 0 && startd_tmpl < 0) {
4141 		/* The creation errors have already been reported. */
4142 		console(B_TRUE,
4143 		    "Ignoring contract events.  Core smf(5) services will not "
4144 		    "be restarted.\n");
4145 		return;
4146 	}
4147 
4148 	/*
4149 	 * Open an event endpoint.
4150 	 */
4151 	do
4152 		fd = open64(CTFS_ROOT "/process/pbundle", O_RDONLY);
4153 	while (fd < 0 && errno == EINTR);
4154 	if (fd < 0) {
4155 		console(B_TRUE,
4156 		    "Couldn't open process pbundle: %s.  Core smf(5) services "
4157 		    "will not be restarted.\n", strerror(errno));
4158 		return;
4159 	}
4160 
4161 	if (dup2(fd, 253) == -1) {
4162 		console(B_TRUE, "Could not duplicate process bundle: %s.\n",
4163 		    strerror(errno));
4164 	} else {
4165 		(void) close(fd);
4166 		fd = 253;
4167 	}
4168 
4169 	(void) fcntl(fd, F_SETFD, FD_CLOEXEC);
4170 
4171 	/* Reset in case we've been restarted. */
4172 	(void) ct_event_reset(fd);
4173 
4174 	poll_fds[0].fd = fd;
4175 	poll_fds[0].events = POLLIN;
4176 	poll_nfds = 1;
4177 }
4178 
4179 static int
4180 contract_getfile(ctid_t id, const char *name, int oflag)
4181 {
4182 	int fd;
4183 
4184 	do
4185 		fd = contract_open(id, "process", name, oflag);
4186 	while (fd < 0 && errno == EINTR);
4187 
4188 	if (fd < 0)
4189 		console(B_TRUE, "Couldn't open %s for contract %ld: %s.\n",
4190 		    name, id, strerror(errno));
4191 
4192 	return (fd);
4193 }
4194 
4195 static int
4196 contract_cookie(ctid_t id, uint64_t *cp)
4197 {
4198 	int fd, err;
4199 	ct_stathdl_t sh;
4200 
4201 	fd = contract_getfile(id, "status", O_RDONLY);
4202 	if (fd < 0)
4203 		return (-1);
4204 
4205 	err = ct_status_read(fd, CTD_COMMON, &sh);
4206 	if (err != 0) {
4207 		console(B_TRUE, "Couldn't read status of contract %ld: %s.\n",
4208 		    id, strerror(err));
4209 		(void) close(fd);
4210 		return (-1);
4211 	}
4212 
4213 	(void) close(fd);
4214 
4215 	*cp = ct_status_get_cookie(sh);
4216 
4217 	ct_status_free(sh);
4218 	return (0);
4219 }
4220 
4221 static void
4222 contract_ack(ct_evthdl_t e)
4223 {
4224 	int fd;
4225 
4226 	if (ct_event_get_flags(e) & CTE_INFO)
4227 		return;
4228 
4229 	fd = contract_getfile(ct_event_get_ctid(e), "ctl", O_WRONLY);
4230 	if (fd < 0)
4231 		return;
4232 
4233 	(void) ct_ctl_ack(fd, ct_event_get_evid(e));
4234 	(void) close(fd);
4235 }
4236 
4237 /*
4238  * Process a contract event.
4239  */
4240 static void
4241 contract_event(struct pollfd *poll)
4242 {
4243 	ct_evthdl_t e;
4244 	int err;
4245 	ctid_t ctid;
4246 
4247 	if (!(poll->revents & POLLIN)) {
4248 		if (poll->revents & POLLERR)
4249 			console(B_TRUE,
4250 			    "Unknown poll error on my process contract "
4251 			    "pbundle.\n");
4252 		return;
4253 	}
4254 
4255 	err = ct_event_read(poll->fd, &e);
4256 	if (err != 0) {
4257 		console(B_TRUE, "Error retrieving contract event: %s.\n",
4258 		    strerror(err));
4259 		return;
4260 	}
4261 
4262 	ctid = ct_event_get_ctid(e);
4263 
4264 	if (ct_event_get_type(e) == CT_PR_EV_EMPTY) {
4265 		uint64_t cookie;
4266 		int ret, abandon = 1;
4267 
4268 		/* If it's svc.startd, restart it.  Else, abandon. */
4269 		ret = contract_cookie(ctid, &cookie);
4270 
4271 		if (ret == 0) {
4272 			if (cookie == STARTD_COOKIE &&
4273 			    do_restart_startd) {
4274 				if (smf_debug)
4275 					console(B_TRUE, "Restarting "
4276 					    "svc.startd.\n");
4277 
4278 				/*
4279 				 * Account for the failure.  If the failure rate
4280 				 * exceeds a threshold, then drop to maintenance
4281 				 * mode.
4282 				 */
4283 				startd_record_failure();
4284 				if (startd_failure_rate_critical())
4285 					enter_maintenance();
4286 
4287 				if (startd_tmpl < 0)
4288 					console(B_TRUE,
4289 					    "Restarting svc.startd in "
4290 					    "improper contract (bad "
4291 					    "template).\n");
4292 
4293 				(void) startd_run(startd_cline, startd_tmpl,
4294 				    ctid);
4295 
4296 				abandon = 0;
4297 			}
4298 		}
4299 
4300 		if (abandon && (err = contract_abandon_id(ctid))) {
4301 			console(B_TRUE, "Couldn't abandon contract %ld: %s.\n",
4302 			    ctid, strerror(err));
4303 		}
4304 
4305 		/*
4306 		 * No need to acknowledge the event since either way the
4307 		 * originating contract should be abandoned.
4308 		 */
4309 	} else {
4310 		console(B_TRUE,
4311 		    "Received contract event of unexpected type %d from "
4312 		    "contract %ld.\n", ct_event_get_type(e), ctid);
4313 
4314 		if ((ct_event_get_flags(e) & (CTE_INFO | CTE_ACK)) == 0)
4315 			/* Allow unexpected critical events to be released. */
4316 			contract_ack(e);
4317 	}
4318 
4319 	ct_event_free(e);
4320 }
4321 
4322 /*
4323  * svc.startd(1M) Management
4324  */
4325 
4326 /*
4327  * (Re)start svc.startd(1M).  old_ctid should be the contract ID of the old
4328  * contract, or 0 if we're starting it for the first time.  If wait is true
4329  * we'll wait for and return the exit value of the child.
4330  */
4331 static int
4332 startd_run(const char *cline, int tmpl, ctid_t old_ctid)
4333 {
4334 	int err, i, ret, did_activate;
4335 	pid_t pid;
4336 	struct stat sb;
4337 
4338 	if (cline[0] == '\0')
4339 		return (-1);
4340 
4341 	/*
4342 	 * Don't restart startd if the system is rebooting or shutting down.
4343 	 */
4344 	do {
4345 		ret = stat("/etc/svc/volatile/resetting", &sb);
4346 	} while (ret == -1 && errno == EINTR);
4347 
4348 	if (ret == 0) {
4349 		if (smf_debug)
4350 			console(B_TRUE, "Quiescing for reboot.\n");
4351 		(void) pause();
4352 		return (-1);
4353 	}
4354 
4355 	err = ct_pr_tmpl_set_transfer(tmpl, old_ctid);
4356 	if (err == EINVAL) {
4357 		console(B_TRUE, "Remake startd_tmpl; reattempt transfer.\n");
4358 		tmpl = startd_tmpl = contract_make_template(0, CT_PR_EV_EMPTY,
4359 		    CT_PR_EV_HWERR, STARTD_COOKIE);
4360 
4361 		err = ct_pr_tmpl_set_transfer(tmpl, old_ctid);
4362 	}
4363 	if (err != 0) {
4364 		console(B_TRUE,
4365 		    "Couldn't set transfer parameter of contract template: "
4366 		    "%s.\n", strerror(err));
4367 	}
4368 
4369 	did_activate = !(ct_tmpl_activate(tmpl));
4370 	if (!did_activate)
4371 		console(B_TRUE,
4372 		    "Template activation failed; not starting \"%s\" in "
4373 		    "proper contract.\n", cline);
4374 
4375 	/* Hold SIGCHLD so we can wait if necessary. */
4376 	(void) sighold(SIGCHLD);
4377 
4378 	while ((pid = fork()) < 0) {
4379 		if (errno == EPERM) {
4380 			console(B_TRUE, "Insufficient permission to fork.\n");
4381 
4382 			/* Now that's a doozy. */
4383 			exit(1);
4384 		}
4385 
4386 		console(B_TRUE,
4387 		    "fork() for svc.startd failed: %s.  Will retry in 1 "
4388 		    "second...\n", strerror(errno));
4389 
4390 		(void) sleep(1);
4391 
4392 		/* Eventually give up? */
4393 	}
4394 
4395 	if (pid == 0) {
4396 		/* child */
4397 
4398 		/* See the comment in efork() */
4399 		for (i = SIGHUP; i <= SIGRTMAX; ++i) {
4400 			if (i == SIGTTOU || i == SIGTTIN || i == SIGTSTP)
4401 				(void) sigset(i, SIG_IGN);
4402 			else
4403 				(void) sigset(i, SIG_DFL);
4404 		}
4405 
4406 		if (smf_options != NULL) {
4407 			/* Put smf_options in the environment. */
4408 			glob_envp[glob_envn] =
4409 			    malloc(sizeof ("SMF_OPTIONS=") - 1 +
4410 				strlen(smf_options) + 1);
4411 
4412 			if (glob_envp[glob_envn] != NULL) {
4413 				/* LINTED */
4414 				(void) sprintf(glob_envp[glob_envn],
4415 				    "SMF_OPTIONS=%s", smf_options);
4416 				glob_envp[glob_envn+1] = NULL;
4417 			} else {
4418 				console(B_TRUE,
4419 				    "Could not set SMF_OPTIONS (%s).\n",
4420 				    strerror(errno));
4421 			}
4422 		}
4423 
4424 		if (smf_debug)
4425 			console(B_TRUE, "Executing svc.startd\n");
4426 
4427 		(void) execle(SH, "INITSH", "-c", cline, NULL, glob_envp);
4428 
4429 		console(B_TRUE, "Could not exec \"%s\" (%s).\n", SH,
4430 		    strerror(errno));
4431 
4432 		exit(1);
4433 	}
4434 
4435 	/* parent */
4436 
4437 	if (did_activate) {
4438 		if (legacy_tmpl < 0 || ct_tmpl_activate(legacy_tmpl) != 0)
4439 			(void) ct_tmpl_clear(tmpl);
4440 	}
4441 
4442 	/* Clear the old_ctid reference so the kernel can reclaim it. */
4443 	if (old_ctid != 0)
4444 		(void) ct_pr_tmpl_set_transfer(tmpl, 0);
4445 
4446 	(void) sigrelse(SIGCHLD);
4447 
4448 	return (0);
4449 }
4450 
4451 /*
4452  * void startd_record_failure(void)
4453  *   Place the current time in our circular array of svc.startd failures.
4454  */
4455 void
4456 startd_record_failure()
4457 {
4458 	int index = startd_failure_index++ % NSTARTD_FAILURE_TIMES;
4459 
4460 	startd_failure_time[index] = gethrtime();
4461 }
4462 
4463 /*
4464  * int startd_failure_rate_critical(void)
4465  *   Return true if the average failure interval is less than the permitted
4466  *   interval.  Implicit success if insufficient measurements for an average
4467  *   exist.
4468  */
4469 int
4470 startd_failure_rate_critical()
4471 {
4472 	int n = startd_failure_index;
4473 	hrtime_t avg_ns = 0;
4474 
4475 	if (startd_failure_index < NSTARTD_FAILURE_TIMES)
4476 		return (0);
4477 
4478 	avg_ns =
4479 	    (startd_failure_time[(n - 1) % NSTARTD_FAILURE_TIMES] -
4480 	    startd_failure_time[n % NSTARTD_FAILURE_TIMES]) /
4481 	    NSTARTD_FAILURE_TIMES;
4482 
4483 	return (avg_ns < STARTD_FAILURE_RATE_NS);
4484 }
4485 
4486 /*
4487  * returns string that must be free'd
4488  */
4489 
4490 static char
4491 *audit_boot_msg()
4492 {
4493 	char		*b, *p;
4494 	char		desc[] = "booted";
4495 	zoneid_t	zid = getzoneid();
4496 
4497 	b = malloc(sizeof (desc) + MAXNAMELEN + 3);
4498 	if (b == NULL)
4499 		return (b);
4500 
4501 	p = b;
4502 	p += strlcpy(p, desc, sizeof (desc));
4503 	if (zid != GLOBAL_ZONEID) {
4504 		p += strlcpy(p, ": ", 3);
4505 		(void) getzonenamebyid(zid, p, MAXNAMELEN);
4506 	}
4507 	return (b);
4508 }
4509 
4510 /*
4511  * Generate AUE_init_solaris audit record.  Return 1 if
4512  * auditing is enabled in case the caller cares.
4513  *
4514  * In the case of userint() or a local zone invocation of
4515  * one_true_init, the process initially contains the audit
4516  * characteristics of the process that invoked init.  The first pass
4517  * through here uses those characteristics then for the case of
4518  * one_true_init in a local zone, clears them so subsequent system
4519  * state changes won't be attributed to the person who booted the
4520  * zone.
4521  */
4522 static int
4523 audit_put_record(int pass_fail, int status, char *msg)
4524 {
4525 	adt_session_data_t	*ah;
4526 	adt_event_data_t	*event;
4527 
4528 	if (!adt_audit_enabled())
4529 		return (0);
4530 
4531 	/*
4532 	 * the PROC_DATA picks up the context to tell whether this is
4533 	 * an attributed record (auid = -2 is unattributed)
4534 	 */
4535 	if (adt_start_session(&ah, NULL, ADT_USE_PROC_DATA)) {
4536 		console(B_TRUE, "audit failure:  %s\n", strerror(errno));
4537 		return (1);
4538 	}
4539 	event = adt_alloc_event(ah, ADT_init_solaris);
4540 	if (event == NULL) {
4541 		console(B_TRUE, "audit failure:  %s\n", strerror(errno));
4542 		(void) adt_end_session(ah);
4543 		return (1);
4544 	}
4545 	event->adt_init_solaris.info = msg;	/* NULL is ok here */
4546 
4547 	if (adt_put_event(event, pass_fail, status)) {
4548 		console(B_TRUE, "audit failure:  %s\n", strerror(errno));
4549 		(void) adt_end_session(ah);
4550 		return (1);
4551 	}
4552 	adt_free_event(event);
4553 
4554 	(void) adt_end_session(ah);
4555 
4556 	return (1);
4557 }
4558