xref: /titanic_50/usr/src/cmd/init/init.c (revision 107c18c129a0fbc18532c43616e5dc1a489d6ddc)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2013 Gary Mills
24  *
25  * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
26  */
27 
28 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
29 /*	  All Rights Reserved  	*/
30 
31 /*
32  * University Copyright- Copyright (c) 1982, 1986, 1988
33  * The Regents of the University of California
34  * All Rights Reserved
35  *
36  * University Acknowledgment- Portions of this document are derived from
37  * software developed by the University of California, Berkeley, and its
38  * contributors.
39  */
40 
41 /*
42  * init(1M) is the general process spawning program.  Its primary job is to
43  * start and restart svc.startd for smf(5).  For backwards-compatibility it also
44  * spawns and respawns processes according to /etc/inittab and the current
45  * run-level.  It reads /etc/default/inittab for general configuration.
46  *
47  * To change run-levels the system administrator runs init from the command
48  * line with a level name.  init signals svc.startd via libscf and directs the
49  * zone's init (pid 1 in the global zone) what to do by sending it a signal;
50  * these signal numbers are commonly refered to in the code as 'states'.  Valid
51  * run-levels are [sS0123456].  Additionally, init can be given directives
52  * [qQabc], which indicate actions to be taken pertaining to /etc/inittab.
53  *
54  * When init processes inittab entries, it finds processes that are to be
55  * spawned at various run-levels.  inittab contains the set of the levels for
56  * which each inittab entry is valid.
57  *
58  * State File and Restartability
59  *   Premature exit by init(1M) is handled as a special case by the kernel:
60  *   init(1M) will be immediately re-executed, retaining its original PID.  (PID
61  *   1 in the global zone.)  To track the processes it has previously spawned,
62  *   as well as other mutable state, init(1M) regularly updates a state file
63  *   such that its subsequent invocations have knowledge of its various
64  *   dependent processes and duties.
65  *
66  * Process Contracts
67  *   We start svc.startd(1M) in a contract and transfer inherited contracts when
68  *   restarting it.  Everything else is started using the legacy contract
69  *   template, and the created contracts are abandoned when they become empty.
70  *
71  * utmpx Entry Handling
72  *   Because init(1M) no longer governs the startup process, its knowledge of
73  *   when utmpx becomes writable is indirect.  However, spawned processes
74  *   expect to be constructed with valid utmpx entries.  As a result, attempts
75  *   to write normal entries will be retried until successful.
76  *
77  * Maintenance Mode
78  *   In certain failure scenarios, init(1M) will enter a maintenance mode, in
79  *   which it invokes sulogin(1M) to allow the operator an opportunity to
80  *   repair the system.  Normally, this operation is performed as a
81  *   fork(2)-exec(2)-waitpid(3C) sequence with the parent waiting for repair or
82  *   diagnosis to be completed.  In the cases that fork(2) requests themselves
83  *   fail, init(1M) will directly execute sulogin(1M), and allow the kernel to
84  *   restart init(1M) on exit from the operator session.
85  *
86  *   One scenario where init(1M) enters its maintenance mode is when
87  *   svc.startd(1M) begins to fail rapidly, defined as when the average time
88  *   between recent failures drops below a given threshold.
89  */
90 
91 #include <sys/contract/process.h>
92 #include <sys/ctfs.h>
93 #include <sys/stat.h>
94 #include <sys/statvfs.h>
95 #include <sys/stropts.h>
96 #include <sys/systeminfo.h>
97 #include <sys/time.h>
98 #include <sys/termios.h>
99 #include <sys/tty.h>
100 #include <sys/types.h>
101 #include <sys/utsname.h>
102 
103 #include <bsm/adt_event.h>
104 #include <bsm/libbsm.h>
105 #include <security/pam_appl.h>
106 
107 #include <assert.h>
108 #include <ctype.h>
109 #include <dirent.h>
110 #include <errno.h>
111 #include <fcntl.h>
112 #include <libcontract.h>
113 #include <libcontract_priv.h>
114 #include <libintl.h>
115 #include <libscf.h>
116 #include <libscf_priv.h>
117 #include <poll.h>
118 #include <procfs.h>
119 #include <signal.h>
120 #include <stdarg.h>
121 #include <stdio.h>
122 #include <stdio_ext.h>
123 #include <stdlib.h>
124 #include <string.h>
125 #include <strings.h>
126 #include <syslog.h>
127 #include <time.h>
128 #include <ulimit.h>
129 #include <unistd.h>
130 #include <utmpx.h>
131 #include <wait.h>
132 #include <zone.h>
133 #include <ucontext.h>
134 
135 #undef	sleep
136 
137 #define	fioctl(p, sptr, cmd)	ioctl(fileno(p), sptr, cmd)
138 #define	min(a, b)		(((a) < (b)) ? (a) : (b))
139 
140 #define	TRUE	1
141 #define	FALSE	0
142 #define	FAILURE	-1
143 
144 #define	UT_USER_SZ	32	/* Size of a utmpx ut_user field */
145 #define	UT_LINE_SZ	32	/* Size of a utmpx ut_line field */
146 
147 /*
148  * SLEEPTIME	The number of seconds "init" sleeps between wakeups if
149  *		nothing else requires this "init" wakeup.
150  */
151 #define	SLEEPTIME	(5 * 60)
152 
153 /*
154  * MAXCMDL	The maximum length of a command string in inittab.
155  */
156 #define	MAXCMDL	512
157 
158 /*
159  * EXEC		The length of the prefix string added to all comamnds
160  *		found in inittab.
161  */
162 #define	EXEC	(sizeof ("exec ") - 1)
163 
164 /*
165  * TWARN	The amount of time between warning signal, SIGTERM,
166  *		and the fatal kill signal, SIGKILL.
167  */
168 #define	TWARN	5
169 
170 #define	id_eq(x, y)	((x[0] == y[0] && x[1] == y[1] && x[2] == y[2] &&\
171 			x[3] == y[3]) ? TRUE : FALSE)
172 
173 /*
174  * The kernel's default umask is 022 these days; since some processes inherit
175  * their umask from init, init will set it from CMASK in /etc/default/init.
176  * init gets the default umask from the kernel, it sets it to 022 whenever
177  * it wants to create a file and reverts to CMASK afterwards.
178  */
179 
180 static int cmask;
181 
182 /*
183  * The following definitions, concluding with the 'lvls' array, provide a
184  * common mapping between level-name (like 'S'), signal number (state),
185  * run-level mask, and specific properties associated with a run-level.
186  * This array should be accessed using the routines lvlname_to_state(),
187  * lvlname_to_mask(), state_to_mask(), and state_to_flags().
188  */
189 
190 /*
191  * Correspondence of signals to init actions.
192  */
193 #define	LVLQ		SIGHUP
194 #define	LVL0		SIGINT
195 #define	LVL1		SIGQUIT
196 #define	LVL2		SIGILL
197 #define	LVL3		SIGTRAP
198 #define	LVL4		SIGIOT
199 #define	LVL5		SIGEMT
200 #define	LVL6		SIGFPE
201 #define	SINGLE_USER	SIGBUS
202 #define	LVLa		SIGSEGV
203 #define	LVLb		SIGSYS
204 #define	LVLc		SIGPIPE
205 
206 /*
207  * Bit Mask for each level.  Used to determine legal levels.
208  */
209 #define	MASK0	0x0001
210 #define	MASK1	0x0002
211 #define	MASK2	0x0004
212 #define	MASK3	0x0008
213 #define	MASK4	0x0010
214 #define	MASK5	0x0020
215 #define	MASK6	0x0040
216 #define	MASKSU	0x0080
217 #define	MASKa	0x0100
218 #define	MASKb	0x0200
219 #define	MASKc	0x0400
220 
221 #define	MASK_NUMERIC (MASK0 | MASK1 | MASK2 | MASK3 | MASK4 | MASK5 | MASK6)
222 #define	MASK_abc (MASKa | MASKb | MASKc)
223 
224 /*
225  * Flags to indicate properties of various states.
226  */
227 #define	LSEL_RUNLEVEL	0x0001	/* runlevels you can transition to */
228 
229 typedef struct lvl {
230 	int	lvl_state;
231 	int	lvl_mask;
232 	char	lvl_name;
233 	int	lvl_flags;
234 } lvl_t;
235 
236 static lvl_t lvls[] = {
237 	{ LVLQ,		0,	'Q', 0					},
238 	{ LVLQ,		0,	'q', 0					},
239 	{ LVL0,		MASK0,	'0', LSEL_RUNLEVEL			},
240 	{ LVL1, 	MASK1,	'1', LSEL_RUNLEVEL			},
241 	{ LVL2, 	MASK2,	'2', LSEL_RUNLEVEL			},
242 	{ LVL3, 	MASK3,	'3', LSEL_RUNLEVEL			},
243 	{ LVL4, 	MASK4,	'4', LSEL_RUNLEVEL			},
244 	{ LVL5, 	MASK5,	'5', LSEL_RUNLEVEL			},
245 	{ LVL6, 	MASK6, 	'6', LSEL_RUNLEVEL			},
246 	{ SINGLE_USER, 	MASKSU, 'S', LSEL_RUNLEVEL			},
247 	{ SINGLE_USER, 	MASKSU, 's', LSEL_RUNLEVEL			},
248 	{ LVLa,		MASKa,	'a', 0					},
249 	{ LVLb,		MASKb,	'b', 0					},
250 	{ LVLc,		MASKc,	'c', 0					}
251 };
252 
253 #define	LVL_NELEMS (sizeof (lvls) / sizeof (lvl_t))
254 
255 /*
256  * Legal action field values.
257  */
258 #define	OFF		0	/* Kill process if on, else ignore */
259 #define	RESPAWN		1	/* Continuously restart process when it dies */
260 #define	ONDEMAND	RESPAWN	/* Respawn for a, b, c type processes */
261 #define	ONCE		2	/* Start process, do not respawn when dead */
262 #define	WAIT		3	/* Perform once and wait to complete */
263 #define	BOOT		4	/* Start at boot time only */
264 #define	BOOTWAIT	5	/* Start at boot time and wait to complete */
265 #define	POWERFAIL	6	/* Start on powerfail */
266 #define	POWERWAIT	7	/* Start and wait for complete on powerfail */
267 #define	INITDEFAULT	8	/* Default level "init" should start at */
268 #define	SYSINIT		9	/* Actions performed before init speaks */
269 
270 #define	M_OFF		0001
271 #define	M_RESPAWN	0002
272 #define	M_ONDEMAND	M_RESPAWN
273 #define	M_ONCE		0004
274 #define	M_WAIT		0010
275 #define	M_BOOT		0020
276 #define	M_BOOTWAIT	0040
277 #define	M_PF		0100
278 #define	M_PWAIT		0200
279 #define	M_INITDEFAULT	0400
280 #define	M_SYSINIT	01000
281 
282 /* States for the inittab parser in getcmd(). */
283 #define	ID	1
284 #define	LEVELS	2
285 #define	ACTION	3
286 #define	COMMAND	4
287 #define	COMMENT	5
288 
289 /*
290  * inittab entry id constants
291  */
292 #define	INITTAB_ENTRY_ID_SIZE 4
293 #define	INITTAB_ENTRY_ID_STR_FORMAT "%.4s"	/* if INITTAB_ENTRY_ID_SIZE */
294 						/* changes, this should */
295 						/* change accordingly */
296 
297 /*
298  * Init can be in any of three main states, "normal" mode where it is
299  * processing entries for the lines file in a normal fashion, "boot" mode,
300  * where it is only interested in the boot actions, and "powerfail" mode,
301  * where it is only interested in powerfail related actions. The following
302  * masks declare the legal actions for each mode.
303  */
304 #define	NORMAL_MODES	(M_OFF | M_RESPAWN | M_ONCE | M_WAIT)
305 #define	BOOT_MODES	(M_BOOT | M_BOOTWAIT)
306 #define	PF_MODES	(M_PF | M_PWAIT)
307 
308 struct PROC_TABLE {
309 	char	p_id[INITTAB_ENTRY_ID_SIZE];	/* Four letter unique id of */
310 						/* process */
311 	pid_t	p_pid;		/* Process id */
312 	short	p_count;	/* How many respawns of this command in */
313 				/*   the current series */
314 	long	p_time;		/* Start time for a series of respawns */
315 	short	p_flags;
316 	short	p_exit;		/* Exit status of a process which died */
317 };
318 
319 /*
320  * Flags for the "p_flags" word of a PROC_TABLE entry:
321  *
322  *	OCCUPIED	This slot in init's proc table is in use.
323  *
324  *	LIVING		Process is alive.
325  *
326  *	NOCLEANUP	efork() is not allowed to cleanup this entry even
327  *			if process is dead.
328  *
329  *	NAMED		This process has a name, i.e. came from inittab.
330  *
331  *	DEMANDREQUEST	Process started by a "telinit [abc]" command.  Processes
332  *			formed this way are respawnable and immune to level
333  *			changes as long as their entry exists in inittab.
334  *
335  *	TOUCHED		Flag used by remv() to determine whether it has looked
336  *			at an entry while checking for processes to be killed.
337  *
338  *	WARNED		Flag used by remv() to mark processes that have been
339  *			sent the SIGTERM signal.  If they don't die in 5
340  *			seconds, they are sent the SIGKILL signal.
341  *
342  *	KILLED		Flag used by remv() to mark procs that have been sent
343  *			the SIGTERM and SIGKILL signals.
344  *
345  *	PF_MASK		Bitwise or of legal flags, for sanity checking.
346  */
347 #define	OCCUPIED	01
348 #define	LIVING		02
349 #define	NOCLEANUP	04
350 #define	NAMED		010
351 #define	DEMANDREQUEST	020
352 #define	TOUCHED		040
353 #define	WARNED		0100
354 #define	KILLED		0200
355 #define	PF_MASK		0377
356 
357 /*
358  * Respawn limits for processes that are to be respawned:
359  *
360  *	SPAWN_INTERVAL	The number of seconds over which "init" will try to
361  *			respawn a process SPAWN_LIMIT times before it gets mad.
362  *
363  *	SPAWN_LIMIT	The number of respawns "init" will attempt in
364  *			SPAWN_INTERVAL seconds before it generates an
365  *			error message and inhibits further tries for
366  *			INHIBIT seconds.
367  *
368  *	INHIBIT		The number of seconds "init" ignores an entry it had
369  *			trouble spawning unless a "telinit Q" is received.
370  */
371 
372 #define	SPAWN_INTERVAL	(2*60)
373 #define	SPAWN_LIMIT	10
374 #define	INHIBIT		(5*60)
375 
376 /*
377  * The maximum number of decimal digits for an id_t.  (ceil(log10 (max_id)))
378  */
379 #define	ID_MAX_STR_LEN	10
380 
381 #define	NULLPROC	((struct PROC_TABLE *)(0))
382 #define	NO_ROOM		((struct PROC_TABLE *)(FAILURE))
383 
384 struct CMD_LINE {
385 	char c_id[INITTAB_ENTRY_ID_SIZE];	/* Four letter unique id of */
386 						/* process to be affected by */
387 						/* action */
388 	short c_levels;	/* Mask of legal levels for process */
389 	short c_action;	/* Mask for type of action required */
390 	char *c_command; /* Pointer to init command */
391 };
392 
393 struct	pidrec {
394 	int	pd_type;	/* Command type */
395 	pid_t	pd_pid;		/* pid to add or remove */
396 };
397 
398 /*
399  * pd_type's
400  */
401 #define	ADDPID	1
402 #define	REMPID	2
403 
404 static struct	pidlist {
405 	pid_t	pl_pid;		/* pid to watch for */
406 	int	pl_dflag;	/* Flag indicating SIGCLD from this pid */
407 	short	pl_exit;	/* Exit status of proc */
408 	struct	pidlist	*pl_next; /* Next in list */
409 } *Plhead, *Plfree;
410 
411 /*
412  * The following structure contains a set of modes for /dev/syscon
413  * and should match the default contents of /etc/ioctl.syscon.  It should also
414  * be kept in-sync with base_termios in uts/common/io/ttcompat.c.
415  */
416 static struct termios	dflt_termios = {
417 	BRKINT|ICRNL|IXON|IMAXBEL,			/* iflag */
418 	OPOST|ONLCR|TAB3,				/* oflag */
419 	CS8|CREAD|B9600,				/* cflag */
420 	ISIG|ICANON|ECHO|ECHOE|ECHOK|ECHOCTL|ECHOKE|IEXTEN, /* lflag */
421 	CINTR, CQUIT, CERASE, CKILL, CEOF, 0, 0, 0,
422 	0, 0, 0, 0, 0, 0, 0, 0,
423 	0, 0, 0
424 };
425 
426 static struct termios	stored_syscon_termios;
427 static int		write_ioctl = 0;	/* Rewrite /etc/ioctl.syscon */
428 
429 static union WAKEUP {
430 	struct WAKEFLAGS {
431 		unsigned w_usersignal : 1;	/* User sent signal to "init" */
432 		unsigned w_childdeath : 1;	/* An "init" child died */
433 		unsigned w_powerhit : 1;	/* OS experienced powerfail */
434 	}	w_flags;
435 	int w_mask;
436 } wakeup;
437 
438 
439 struct init_state {
440 	int			ist_runlevel;
441 	int			ist_num_proc;
442 	int			ist_utmpx_ok;
443 	struct PROC_TABLE	ist_proc_table[1];
444 };
445 
446 #define	cur_state	(g_state->ist_runlevel)
447 #define	num_proc	(g_state->ist_num_proc)
448 #define	proc_table	(g_state->ist_proc_table)
449 #define	utmpx_ok	(g_state->ist_utmpx_ok)
450 
451 /* Contract cookies. */
452 #define	ORDINARY_COOKIE		0
453 #define	STARTD_COOKIE		1
454 
455 
456 #ifndef NDEBUG
457 #define	bad_error(func, err)	{					\
458 	(void) fprintf(stderr, "%s:%d: %s() failed with unexpected "	\
459 	    "error %d.  Aborting.\n", __FILE__, __LINE__, (func), (err)); \
460 	abort();							\
461 }
462 #else
463 #define	bad_error(func, err)	abort()
464 #endif
465 
466 
467 /*
468  * Useful file and device names.
469  */
470 static char *CONSOLE	  = "/dev/console";	/* Real system console */
471 static char *INITPIPE_DIR = "/var/run";
472 static char *INITPIPE	  = "/var/run/initpipe";
473 
474 #define	INIT_STATE_DIR "/etc/svc/volatile"
475 static const char * const init_state_file = INIT_STATE_DIR "/init.state";
476 static const char * const init_next_state_file =
477 	INIT_STATE_DIR "/init-next.state";
478 
479 static const int init_num_proc = 20;	/* Initial size of process table. */
480 
481 static char *UTMPX	 = UTMPX_FILE;		/* Snapshot record file */
482 static char *WTMPX	 = WTMPX_FILE;		/* Long term record file */
483 static char *INITTAB	 = "/etc/inittab";	/* Script file for "init" */
484 static char *SYSTTY	 = "/dev/systty";	/* System Console */
485 static char *SYSCON	 = "/dev/syscon";	/* Virtual System console */
486 static char *IOCTLSYSCON = "/etc/ioctl.syscon";	/* Last syscon modes */
487 static char *ENVFILE	 = "/etc/default/init";	/* Default env. */
488 static char *SU	= "/etc/sulogin";	/* Super-user program for single user */
489 static char *SH	= "/sbin/sh";		/* Standard shell */
490 
491 /*
492  * Default Path.  /sbin is included in path only during sysinit phase
493  */
494 #define	DEF_PATH	"PATH=/usr/sbin:/usr/bin"
495 #define	INIT_PATH	"PATH=/sbin:/usr/sbin:/usr/bin"
496 
497 static int	prior_state;
498 static int	prev_state;	/* State "init" was in last time it woke */
499 static int	new_state;	/* State user wants "init" to go to. */
500 static int	lvlq_received;	/* Explicit request to examine state */
501 static int	op_modes = BOOT_MODES; /* Current state of "init" */
502 static int	Gchild = 0;	/* Flag to indicate "godchild" died, set in */
503 				/*   childeath() and cleared in cleanaux() */
504 static int	Pfd = -1;	/* fd to receive pids thru */
505 static unsigned int	spawncnt, pausecnt;
506 static int	rsflag;		/* Set if a respawn has taken place */
507 static volatile int time_up;	/* Flag set to TRUE by the alarm interrupt */
508 				/* routine each time an alarm interrupt */
509 				/* takes place. */
510 static int	sflg = 0;	/* Set if we were booted -s to single user */
511 static int	rflg = 0;	/* Set if booted -r, reconfigure devices */
512 static int	bflg = 0;	/* Set if booted -b, don't run rc scripts */
513 static pid_t	init_pid;	/* PID of "one true" init for current zone */
514 
515 static struct init_state *g_state = NULL;
516 static size_t	g_state_sz;
517 static int	booting = 1;	/* Set while we're booting. */
518 
519 /*
520  * Array for default global environment.
521  */
522 #define	MAXENVENT	24	/* Max number of default env variables + 1 */
523 				/* init can use three itself, so this leaves */
524 				/* 20 for the administrator in ENVFILE. */
525 static char	*glob_envp[MAXENVENT];	/* Array of environment strings */
526 static int	glob_envn;		/* Number of environment strings */
527 
528 
529 static struct pollfd	poll_fds[1];
530 static int		poll_nfds = 0;	/* poll_fds is uninitialized */
531 
532 /*
533  * Contracts constants
534  */
535 #define	SVC_INIT_PREFIX "init:/"
536 #define	SVC_AUX_SIZE (INITTAB_ENTRY_ID_SIZE + 1)
537 #define	SVC_FMRI_SIZE (sizeof (SVC_INIT_PREFIX) + INITTAB_ENTRY_ID_SIZE)
538 
539 static int	legacy_tmpl = -1;	/* fd for legacy contract template */
540 static int	startd_tmpl = -1;	/* fd for svc.startd's template */
541 static char	startd_svc_aux[SVC_AUX_SIZE];
542 
543 static char	startd_cline[256] = "";	/* svc.startd's command line */
544 static int	do_restart_startd = 1;	/* Whether to restart svc.startd. */
545 static char	*smf_options = NULL;	/* Options to give to startd. */
546 static int	smf_debug = 0;		/* Messages for debugging smf(5) */
547 static time_t	init_boot_time;		/* Substitute for kernel boot time. */
548 
549 #define	NSTARTD_FAILURE_TIMES	3		/* trigger after 3 failures */
550 #define	STARTD_FAILURE_RATE_NS	5000000000LL	/* 1 failure/5 seconds */
551 
552 static hrtime_t	startd_failure_time[NSTARTD_FAILURE_TIMES];
553 static uint_t	startd_failure_index;
554 
555 
556 static char	*prog_name(char *);
557 static int	state_to_mask(int);
558 static int	lvlname_to_mask(char, int *);
559 static void	lscf_set_runlevel(char);
560 static int	state_to_flags(int);
561 static char	state_to_name(int);
562 static int	lvlname_to_state(char);
563 static int	getcmd(struct CMD_LINE *, char *);
564 static int	realcon();
565 static int	spawn_processes();
566 static int	get_ioctl_syscon();
567 static int	account(short, struct PROC_TABLE *, char *);
568 static void	alarmclk();
569 static void	childeath(int);
570 static void	cleanaux();
571 static void	clearent(pid_t, short);
572 static void	console(boolean_t, char *, ...);
573 static void	init_signals(void);
574 static void	setup_pipe();
575 static void	killproc(pid_t);
576 static void	init_env();
577 static void	boot_init();
578 static void	powerfail();
579 static void	remv();
580 static void	write_ioctl_syscon();
581 static void	spawn(struct PROC_TABLE *, struct CMD_LINE *);
582 static void	setimer(int);
583 static void	siglvl(int, siginfo_t *, ucontext_t *);
584 static void	sigpoll(int);
585 static void	enter_maintenance(void);
586 static void	timer(int);
587 static void	userinit(int, char **);
588 static void	notify_pam_dead(struct utmpx *);
589 static long	waitproc(struct PROC_TABLE *);
590 static struct PROC_TABLE *efork(int, struct PROC_TABLE *, int);
591 static struct PROC_TABLE *findpslot(struct CMD_LINE *);
592 static void	increase_proc_table_size();
593 static void	st_init();
594 static void	st_write();
595 static void	contracts_init();
596 static void	contract_event(struct pollfd *);
597 static int	startd_run(const char *, int, ctid_t);
598 static void	startd_record_failure();
599 static int	startd_failure_rate_critical();
600 static char	*audit_boot_msg();
601 static int	audit_put_record(int, int, char *);
602 static void	update_boot_archive(int new_state);
603 
604 int
605 main(int argc, char *argv[])
606 {
607 	int	chg_lvl_flag = FALSE, print_banner = FALSE;
608 	int	may_need_audit = 1;
609 	int	c;
610 	char	*msg;
611 
612 	/* Get a timestamp for use as boot time, if needed. */
613 	(void) time(&init_boot_time);
614 
615 	/* Get the default umask */
616 	cmask = umask(022);
617 	(void) umask(cmask);
618 
619 	/* Parse the arguments to init. Check for single user */
620 	opterr = 0;
621 	while ((c = getopt(argc, argv, "brsm:")) != EOF) {
622 		switch (c) {
623 		case 'b':
624 			rflg = 0;
625 			bflg = 1;
626 			if (!sflg)
627 				sflg++;
628 			break;
629 		case 'r':
630 			bflg = 0;
631 			rflg++;
632 			break;
633 		case 's':
634 			if (!bflg)
635 				sflg++;
636 			break;
637 		case 'm':
638 			smf_options = optarg;
639 			smf_debug = (strstr(smf_options, "debug") != NULL);
640 			break;
641 		}
642 	}
643 
644 	/*
645 	 * Determine if we are the main init, or a user invoked init, whose job
646 	 * it is to inform init to change levels or perform some other action.
647 	 */
648 	if (zone_getattr(getzoneid(), ZONE_ATTR_INITPID, &init_pid,
649 	    sizeof (init_pid)) != sizeof (init_pid)) {
650 		(void) fprintf(stderr, "could not get pid for init\n");
651 		return (1);
652 	}
653 
654 	/*
655 	 * If this PID is not the same as the "true" init for the zone, then we
656 	 * must be in 'user' mode.
657 	 */
658 	if (getpid() != init_pid) {
659 		userinit(argc, argv);
660 	}
661 
662 	if (getzoneid() != GLOBAL_ZONEID) {
663 		print_banner = TRUE;
664 	}
665 
666 	/*
667 	 * Initialize state (and set "booting").
668 	 */
669 	st_init();
670 
671 	if (booting && print_banner) {
672 		struct utsname un;
673 		char buf[BUFSIZ], *isa;
674 		long ret;
675 		int bits = 32;
676 
677 		/*
678 		 * We want to print the boot banner as soon as
679 		 * possible.  In the global zone, the kernel does it,
680 		 * but we do not have that luxury in non-global zones,
681 		 * so we will print it here.
682 		 */
683 		(void) uname(&un);
684 		ret = sysinfo(SI_ISALIST, buf, sizeof (buf));
685 		if (ret != -1L && ret <= sizeof (buf)) {
686 			for (isa = strtok(buf, " "); isa;
687 			    isa = strtok(NULL, " ")) {
688 				if (strcmp(isa, "sparcv9") == 0 ||
689 				    strcmp(isa, "amd64") == 0) {
690 					bits = 64;
691 					break;
692 				}
693 			}
694 		}
695 
696 		console(B_FALSE,
697 		    "\n\n%s Release %s Version %s %d-bit\r\n",
698 		    un.sysname, un.release, un.version, bits);
699 		console(B_FALSE,
700 		    "Copyright (c) 1983, 2010, Oracle and/or its affiliates."
701 		    " All rights reserved.\r\n");
702 	}
703 
704 	/*
705 	 * Get the ioctl settings for /dev/syscon from /etc/ioctl.syscon
706 	 * so that it can be brought up in the state it was in when the
707 	 * system went down; or set to defaults if ioctl.syscon isn't
708 	 * valid.
709 	 *
710 	 * This needs to be done even if we're restarting so reset_modes()
711 	 * will work in case we need to go down to single user mode.
712 	 */
713 	write_ioctl = get_ioctl_syscon();
714 
715 	/*
716 	 * Set up all signals to be caught or ignored as appropriate.
717 	 */
718 	init_signals();
719 
720 	/* Load glob_envp from ENVFILE. */
721 	init_env();
722 
723 	contracts_init();
724 
725 	if (!booting) {
726 		/* cur_state should have been read in. */
727 
728 		op_modes = NORMAL_MODES;
729 
730 		/* Rewrite the ioctl file if it was bad. */
731 		if (write_ioctl)
732 			write_ioctl_syscon();
733 	} else {
734 		/*
735 		 * It's fine to boot up with state as zero, because
736 		 * startd will later tell us the real state.
737 		 */
738 		cur_state = 0;
739 		op_modes = BOOT_MODES;
740 
741 		boot_init();
742 	}
743 
744 	prev_state = prior_state = cur_state;
745 
746 	setup_pipe();
747 
748 	/*
749 	 * Here is the beginning of the main process loop.
750 	 */
751 	for (;;) {
752 		if (lvlq_received) {
753 			setup_pipe();
754 			lvlq_received = B_FALSE;
755 		}
756 
757 		/*
758 		 * Clean up any accounting records for dead "godchildren".
759 		 */
760 		if (Gchild)
761 			cleanaux();
762 
763 		/*
764 		 * If in "normal" mode, check all living processes and initiate
765 		 * kill sequence on those that should not be there anymore.
766 		 */
767 		if (op_modes == NORMAL_MODES && cur_state != LVLa &&
768 		    cur_state != LVLb && cur_state != LVLc)
769 			remv();
770 
771 		/*
772 		 * If a change in run levels is the reason we awoke, now do
773 		 * the accounting to report the change in the utmp file.
774 		 * Also report the change on the system console.
775 		 */
776 		if (chg_lvl_flag) {
777 			chg_lvl_flag = FALSE;
778 
779 			if (state_to_flags(cur_state) & LSEL_RUNLEVEL) {
780 				char rl = state_to_name(cur_state);
781 
782 				if (rl != -1)
783 					lscf_set_runlevel(rl);
784 			}
785 
786 			may_need_audit = 1;
787 		}
788 
789 		/*
790 		 * Scan the inittab file and spawn and respawn processes that
791 		 * should be alive in the current state. If inittab does not
792 		 * exist default to  single user mode.
793 		 */
794 		if (spawn_processes() == FAILURE) {
795 			prior_state = prev_state;
796 			cur_state = SINGLE_USER;
797 		}
798 
799 		/* If any respawns occurred, take note. */
800 		if (rsflag) {
801 			rsflag = 0;
802 			spawncnt++;
803 		}
804 
805 		/*
806 		 * If a powerfail signal was received during the last
807 		 * sequence, set mode to powerfail.  When spawn_processes() is
808 		 * entered the first thing it does is to check "powerhit".  If
809 		 * it is in PF_MODES then it clears "powerhit" and does
810 		 * a powerfail sequence.  If it is not in PF_MODES, then it
811 		 * puts itself in PF_MODES and then clears "powerhit".  Should
812 		 * "powerhit" get set again while spawn_processes() is working
813 		 * on a powerfail sequence, the following code  will see that
814 		 * spawn_processes() tries to execute the powerfail sequence
815 		 * again.  This guarantees that the powerfail sequence will be
816 		 * successfully completed before further processing takes
817 		 * place.
818 		 */
819 		if (wakeup.w_flags.w_powerhit) {
820 			op_modes = PF_MODES;
821 			/*
822 			 * Make sure that cur_state != prev_state so that
823 			 * ONCE and WAIT types work.
824 			 */
825 			prev_state = 0;
826 		} else if (op_modes != NORMAL_MODES) {
827 			/*
828 			 * If spawn_processes() was not just called while in
829 			 * normal mode, we set the mode to normal and it will
830 			 * be called again to check normal modes.  If we have
831 			 * just finished a powerfail sequence with prev_state
832 			 * equal to zero, we set prev_state equal to cur_state
833 			 * before the next pass through.
834 			 */
835 			if (op_modes == PF_MODES)
836 				prev_state = cur_state;
837 			op_modes = NORMAL_MODES;
838 		} else if (cur_state == LVLa || cur_state == LVLb ||
839 		    cur_state == LVLc) {
840 			/*
841 			 * If it was a change of levels that awakened us and the
842 			 * new level is one of the demand levels then reset
843 			 * cur_state to the previous state and do another scan
844 			 * to take care of the usual respawn actions.
845 			 */
846 			cur_state = prior_state;
847 			prior_state = prev_state;
848 			prev_state = cur_state;
849 		} else {
850 			prev_state = cur_state;
851 
852 			if (wakeup.w_mask == 0) {
853 				int ret;
854 
855 				if (may_need_audit && (cur_state == LVL3)) {
856 					msg = audit_boot_msg();
857 
858 					may_need_audit = 0;
859 					(void) audit_put_record(ADT_SUCCESS,
860 					    ADT_SUCCESS, msg);
861 					free(msg);
862 				}
863 
864 				/*
865 				 * "init" is finished with all actions for
866 				 * the current wakeup.
867 				 */
868 				ret = poll(poll_fds, poll_nfds,
869 				    SLEEPTIME * MILLISEC);
870 				pausecnt++;
871 				if (ret > 0)
872 					contract_event(&poll_fds[0]);
873 				else if (ret < 0 && errno != EINTR)
874 					console(B_TRUE, "poll() error: %s\n",
875 					    strerror(errno));
876 			}
877 
878 			if (wakeup.w_flags.w_usersignal) {
879 				/*
880 				 * Install the new level.  This could be a real
881 				 * change in levels  or a telinit [Q|a|b|c] or
882 				 * just a telinit to the same level at which
883 				 * we are running.
884 				 */
885 				if (new_state != cur_state) {
886 					if (new_state == LVLa ||
887 					    new_state == LVLb ||
888 					    new_state == LVLc) {
889 						prev_state = prior_state;
890 						prior_state = cur_state;
891 						cur_state = new_state;
892 					} else {
893 						prev_state = cur_state;
894 						if (cur_state >= 0)
895 							prior_state = cur_state;
896 						cur_state = new_state;
897 						chg_lvl_flag = TRUE;
898 					}
899 				}
900 
901 				new_state = 0;
902 			}
903 
904 			if (wakeup.w_flags.w_powerhit)
905 				op_modes = PF_MODES;
906 
907 			/*
908 			 * Clear all wakeup reasons.
909 			 */
910 			wakeup.w_mask = 0;
911 		}
912 	}
913 
914 	/*NOTREACHED*/
915 }
916 
917 static void
918 update_boot_archive(int new_state)
919 {
920 	if (new_state != LVL0 && new_state != LVL5 && new_state != LVL6)
921 		return;
922 
923 	if (getzoneid() != GLOBAL_ZONEID)
924 		return;
925 
926 	(void) system("/sbin/bootadm -ea update_all");
927 }
928 
929 /*
930  * void enter_maintenance()
931  *   A simple invocation of sulogin(1M), with no baggage, in the case that we
932  *   are unable to activate svc.startd(1M).  We fork; the child runs sulogin;
933  *   we wait for it to exit.
934  */
935 static void
936 enter_maintenance()
937 {
938 	struct PROC_TABLE	*su_process;
939 
940 	console(B_FALSE, "Requesting maintenance mode\n"
941 	    "(See /lib/svc/share/README for additional information.)\n");
942 	(void) sighold(SIGCLD);
943 	while ((su_process = efork(M_OFF, NULLPROC, NOCLEANUP)) == NO_ROOM)
944 		(void) pause();
945 	(void) sigrelse(SIGCLD);
946 	if (su_process == NULLPROC) {
947 		int fd;
948 
949 		(void) fclose(stdin);
950 		(void) fclose(stdout);
951 		(void) fclose(stderr);
952 		closefrom(0);
953 
954 		fd = open(SYSCON, O_RDWR | O_NOCTTY);
955 		if (fd >= 0) {
956 			(void) dup2(fd, 1);
957 			(void) dup2(fd, 2);
958 		} else {
959 			/*
960 			 * Need to issue an error message somewhere.
961 			 */
962 			syslog(LOG_CRIT, "init[%d]: cannot open %s; %s\n",
963 			    getpid(), SYSCON, strerror(errno));
964 		}
965 
966 		/*
967 		 * Execute the "su" program.
968 		 */
969 		(void) execle(SU, SU, "-", (char *)0, glob_envp);
970 		console(B_TRUE, "execle of %s failed: %s\n", SU,
971 		    strerror(errno));
972 		timer(5);
973 		exit(1);
974 	}
975 
976 	/*
977 	 * If we are the parent, wait around for the child to die
978 	 * or for "init" to be signaled to change levels.
979 	 */
980 	while (waitproc(su_process) == FAILURE) {
981 		/*
982 		 * All other reasons for waking are ignored when in
983 		 * single-user mode.  The only child we are interested
984 		 * in is being waited for explicitly by waitproc().
985 		 */
986 		wakeup.w_mask = 0;
987 	}
988 }
989 
990 /*
991  * remv() scans through "proc_table" and performs cleanup.  If
992  * there is a process in the table, which shouldn't be here at
993  * the current run level, then remv() kills the process.
994  */
995 static void
996 remv()
997 {
998 	struct PROC_TABLE	*process;
999 	struct CMD_LINE		cmd;
1000 	char			cmd_string[MAXCMDL];
1001 	int			change_level;
1002 
1003 	change_level = (cur_state != prev_state ? TRUE : FALSE);
1004 
1005 	/*
1006 	 * Clear the TOUCHED flag on all entries so that when we have
1007 	 * finished scanning inittab, we will be able to tell if we
1008 	 * have any processes for which there is no entry in inittab.
1009 	 */
1010 	for (process = proc_table;
1011 	    (process < proc_table + num_proc); process++) {
1012 		process->p_flags &= ~TOUCHED;
1013 	}
1014 
1015 	/*
1016 	 * Scan all inittab entries.
1017 	 */
1018 	while (getcmd(&cmd, &cmd_string[0]) == TRUE) {
1019 		/* Scan for process which goes with this entry in inittab. */
1020 		for (process = proc_table;
1021 		    (process < proc_table + num_proc); process++) {
1022 			if ((process->p_flags & OCCUPIED) == 0 ||
1023 			    !id_eq(process->p_id, cmd.c_id))
1024 				continue;
1025 
1026 			/*
1027 			 * This slot contains the process we are looking for.
1028 			 */
1029 
1030 			/*
1031 			 * Is the cur_state SINGLE_USER or is this process
1032 			 * marked as "off" or was this proc started by some
1033 			 * mechanism other than LVL{a|b|c} and the current level
1034 			 * does not support this process?
1035 			 */
1036 			if (cur_state == SINGLE_USER ||
1037 			    cmd.c_action == M_OFF ||
1038 			    ((cmd.c_levels & state_to_mask(cur_state)) == 0 &&
1039 			    (process->p_flags & DEMANDREQUEST) == 0)) {
1040 				if (process->p_flags & LIVING) {
1041 					/*
1042 					 * Touch this entry so we know we have
1043 					 * treated it.  Note that procs which
1044 					 * are already dead at this point and
1045 					 * should not be restarted are left
1046 					 * untouched.  This causes their slot to
1047 					 * be freed later after dead accounting
1048 					 * is done.
1049 					 */
1050 					process->p_flags |= TOUCHED;
1051 
1052 					if ((process->p_flags & KILLED) == 0) {
1053 						if (change_level) {
1054 							process->p_flags
1055 							    |= WARNED;
1056 							(void) kill(
1057 							    process->p_pid,
1058 							    SIGTERM);
1059 						} else {
1060 							/*
1061 							 * Fork a killing proc
1062 							 * so "init" can
1063 							 * continue without
1064 							 * having to pause for
1065 							 * TWARN seconds.
1066 							 */
1067 							killproc(
1068 							    process->p_pid);
1069 						}
1070 						process->p_flags |= KILLED;
1071 					}
1072 				}
1073 			} else {
1074 				/*
1075 				 * Process can exist at current level.  If it is
1076 				 * still alive or a DEMANDREQUEST we touch it so
1077 				 * it will be left alone.  Otherwise we leave it
1078 				 * untouched so it will be accounted for and
1079 				 * cleaned up later in remv().  Dead
1080 				 * DEMANDREQUESTs will be accounted but not
1081 				 * freed.
1082 				 */
1083 				if (process->p_flags &
1084 				    (LIVING|NOCLEANUP|DEMANDREQUEST))
1085 					process->p_flags |= TOUCHED;
1086 			}
1087 
1088 			break;
1089 		}
1090 	}
1091 
1092 	st_write();
1093 
1094 	/*
1095 	 * If this was a change of levels call, scan through the
1096 	 * process table for processes that were warned to die.  If any
1097 	 * are found that haven't left yet, sleep for TWARN seconds and
1098 	 * then send final terminations to any that haven't died yet.
1099 	 */
1100 	if (change_level) {
1101 
1102 		/*
1103 		 * Set the alarm for TWARN seconds on the assumption
1104 		 * that there will be some that need to be waited for.
1105 		 * This won't harm anything except we are guaranteed to
1106 		 * wakeup in TWARN seconds whether we need to or not.
1107 		 */
1108 		setimer(TWARN);
1109 
1110 		/*
1111 		 * Scan for processes which should be dying.  We hope they
1112 		 * will die without having to be sent a SIGKILL signal.
1113 		 */
1114 		for (process = proc_table;
1115 		    (process < proc_table + num_proc); process++) {
1116 			/*
1117 			 * If this process should die, hasn't yet, and the
1118 			 * TWARN time hasn't expired yet, wait for process
1119 			 * to die or for timer to expire.
1120 			 */
1121 			while (time_up == FALSE &&
1122 			    (process->p_flags & (WARNED|LIVING|OCCUPIED)) ==
1123 			    (WARNED|LIVING|OCCUPIED))
1124 				(void) pause();
1125 
1126 			if (time_up == TRUE)
1127 				break;
1128 		}
1129 
1130 		/*
1131 		 * If we reached the end of the table without the timer
1132 		 * expiring, then there are no procs which will have to be
1133 		 * sent the SIGKILL signal.  If the timer has expired, then
1134 		 * it is necessary to scan the table again and send signals
1135 		 * to all processes which aren't going away nicely.
1136 		 */
1137 		if (time_up == TRUE) {
1138 			for (process = proc_table;
1139 			    (process < proc_table + num_proc); process++) {
1140 				if ((process->p_flags &
1141 				    (WARNED|LIVING|OCCUPIED)) ==
1142 				    (WARNED|LIVING|OCCUPIED))
1143 					(void) kill(process->p_pid, SIGKILL);
1144 			}
1145 		}
1146 		setimer(0);
1147 	}
1148 
1149 	/*
1150 	 * Rescan the proc_table for two kinds of entry, those marked LIVING,
1151 	 * NAMED, which don't have an entry in inittab (haven't been TOUCHED
1152 	 * by the above scanning), and haven't been sent kill signals, and
1153 	 * those entries marked not LIVING, NAMED.  The former procs are killed.
1154 	 * The latter have DEAD_PROCESS accounting done and the slot cleared.
1155 	 */
1156 	for (process = proc_table;
1157 	    (process < proc_table + num_proc); process++) {
1158 		if ((process->p_flags & (LIVING|NAMED|TOUCHED|KILLED|OCCUPIED))
1159 		    == (LIVING|NAMED|OCCUPIED)) {
1160 			killproc(process->p_pid);
1161 			process->p_flags |= KILLED;
1162 		} else if ((process->p_flags & (LIVING|NAMED|OCCUPIED)) ==
1163 		    (NAMED|OCCUPIED)) {
1164 			(void) account(DEAD_PROCESS, process, NULL);
1165 			/*
1166 			 * If this named proc hasn't been TOUCHED, then free the
1167 			 * space. It has either died of it's own accord, but
1168 			 * isn't respawnable or it was killed because it
1169 			 * shouldn't exist at this level.
1170 			 */
1171 			if ((process->p_flags & TOUCHED) == 0)
1172 				process->p_flags = 0;
1173 		}
1174 	}
1175 
1176 	st_write();
1177 }
1178 
1179 /*
1180  * Extract the svc.startd command line and whether to restart it from its
1181  * inittab entry.
1182  */
1183 /*ARGSUSED*/
1184 static void
1185 process_startd_line(struct CMD_LINE *cmd, char *cmd_string)
1186 {
1187 	size_t sz;
1188 
1189 	/* Save the command line. */
1190 	if (sflg || rflg) {
1191 		/* Also append -r or -s. */
1192 		(void) strlcpy(startd_cline, cmd_string, sizeof (startd_cline));
1193 		(void) strlcat(startd_cline, " -", sizeof (startd_cline));
1194 		if (sflg)
1195 			sz = strlcat(startd_cline, "s", sizeof (startd_cline));
1196 		if (rflg)
1197 			sz = strlcat(startd_cline, "r", sizeof (startd_cline));
1198 	} else {
1199 		sz = strlcpy(startd_cline, cmd_string, sizeof (startd_cline));
1200 	}
1201 
1202 	if (sz >= sizeof (startd_cline)) {
1203 		console(B_TRUE,
1204 		    "svc.startd command line too long.  Ignoring.\n");
1205 		startd_cline[0] = '\0';
1206 		return;
1207 	}
1208 }
1209 
1210 /*
1211  * spawn_processes() scans inittab for entries which should be run at this
1212  * mode.  Processes which should be running but are not, are started.
1213  */
1214 static int
1215 spawn_processes()
1216 {
1217 	struct PROC_TABLE		*pp;
1218 	struct CMD_LINE			cmd;
1219 	char				cmd_string[MAXCMDL];
1220 	short				lvl_mask;
1221 	int				status;
1222 
1223 	/*
1224 	 * First check the "powerhit" flag.  If it is set, make sure the modes
1225 	 * are PF_MODES and clear the "powerhit" flag.  Avoid the possible race
1226 	 * on the "powerhit" flag by disallowing a new powerfail interrupt
1227 	 * between the test of the powerhit flag and the clearing of it.
1228 	 */
1229 	if (wakeup.w_flags.w_powerhit) {
1230 		wakeup.w_flags.w_powerhit = 0;
1231 		op_modes = PF_MODES;
1232 	}
1233 	lvl_mask = state_to_mask(cur_state);
1234 
1235 	/*
1236 	 * Scan through all the entries in inittab.
1237 	 */
1238 	while ((status = getcmd(&cmd, &cmd_string[0])) == TRUE) {
1239 		if (id_eq(cmd.c_id, "smf")) {
1240 			process_startd_line(&cmd, cmd_string);
1241 			continue;
1242 		}
1243 
1244 retry_for_proc_slot:
1245 
1246 		/*
1247 		 * Find out if there is a process slot for this entry already.
1248 		 */
1249 		if ((pp = findpslot(&cmd)) == NULLPROC) {
1250 			/*
1251 			 * we've run out of proc table entries
1252 			 * increase proc_table.
1253 			 */
1254 			increase_proc_table_size();
1255 
1256 			/*
1257 			 * Retry now as we have an empty proc slot.
1258 			 * In case increase_proc_table_size() fails,
1259 			 * we will keep retrying.
1260 			 */
1261 			goto retry_for_proc_slot;
1262 		}
1263 
1264 		/*
1265 		 * If there is an entry, and it is marked as DEMANDREQUEST,
1266 		 * one of the levels a, b, or c is in its levels mask, and
1267 		 * the action field is ONDEMAND and ONDEMAND is a permissable
1268 		 * mode, and the process is dead, then respawn it.
1269 		 */
1270 		if (((pp->p_flags & (LIVING|DEMANDREQUEST)) == DEMANDREQUEST) &&
1271 		    (cmd.c_levels & MASK_abc) &&
1272 		    (cmd.c_action & op_modes) == M_ONDEMAND) {
1273 			spawn(pp, &cmd);
1274 			continue;
1275 		}
1276 
1277 		/*
1278 		 * If the action is not an action we are interested in,
1279 		 * skip the entry.
1280 		 */
1281 		if ((cmd.c_action & op_modes) == 0 || pp->p_flags & LIVING ||
1282 		    (cmd.c_levels & lvl_mask) == 0)
1283 			continue;
1284 
1285 		/*
1286 		 * If the modes are the normal modes (ONCE, WAIT, RESPAWN, OFF,
1287 		 * ONDEMAND) and the action field is either OFF or the action
1288 		 * field is ONCE or WAIT and the current level is the same as
1289 		 * the last level, then skip this entry.  ONCE and WAIT only
1290 		 * get run when the level changes.
1291 		 */
1292 		if (op_modes == NORMAL_MODES &&
1293 		    (cmd.c_action == M_OFF ||
1294 		    (cmd.c_action & (M_ONCE|M_WAIT)) &&
1295 		    cur_state == prev_state))
1296 			continue;
1297 
1298 		/*
1299 		 * At this point we are interested in performing the action for
1300 		 * this entry.  Actions fall into two categories, spinning off
1301 		 * a process and not waiting, and spinning off a process and
1302 		 * waiting for it to die.  If the action is ONCE, RESPAWN,
1303 		 * ONDEMAND, POWERFAIL, or BOOT we don't wait for the process
1304 		 * to die, for all other actions we do wait.
1305 		 */
1306 		if (cmd.c_action & (M_ONCE | M_RESPAWN | M_PF | M_BOOT)) {
1307 			spawn(pp, &cmd);
1308 
1309 		} else {
1310 			spawn(pp, &cmd);
1311 			while (waitproc(pp) == FAILURE)
1312 				;
1313 			(void) account(DEAD_PROCESS, pp, NULL);
1314 			pp->p_flags = 0;
1315 		}
1316 	}
1317 	return (status);
1318 }
1319 
1320 /*
1321  * spawn() spawns a shell, inserts the information about the process
1322  * process into the proc_table, and does the startup accounting.
1323  */
1324 static void
1325 spawn(struct PROC_TABLE *process, struct CMD_LINE *cmd)
1326 {
1327 	int		i;
1328 	int		modes, maxfiles;
1329 	time_t		now;
1330 	struct PROC_TABLE tmproc, *oprocess;
1331 
1332 	/*
1333 	 * The modes to be sent to efork() are 0 unless we are
1334 	 * spawning a LVLa, LVLb, or LVLc entry or we will be
1335 	 * waiting for the death of the child before continuing.
1336 	 */
1337 	modes = NAMED;
1338 	if (process->p_flags & DEMANDREQUEST || cur_state == LVLa ||
1339 	    cur_state == LVLb || cur_state == LVLc)
1340 		modes |= DEMANDREQUEST;
1341 	if ((cmd->c_action & (M_SYSINIT | M_WAIT | M_BOOTWAIT | M_PWAIT)) != 0)
1342 		modes |= NOCLEANUP;
1343 
1344 	/*
1345 	 * If this is a respawnable process, check the threshold
1346 	 * information to avoid excessive respawns.
1347 	 */
1348 	if (cmd->c_action & M_RESPAWN) {
1349 		/*
1350 		 * Add NOCLEANUP to all respawnable commands so that the
1351 		 * information about the frequency of respawns isn't lost.
1352 		 */
1353 		modes |= NOCLEANUP;
1354 		(void) time(&now);
1355 
1356 		/*
1357 		 * If no time is assigned, then this is the first time
1358 		 * this command is being processed in this series.  Assign
1359 		 * the current time.
1360 		 */
1361 		if (process->p_time == 0L)
1362 			process->p_time = now;
1363 
1364 		if (process->p_count++ == SPAWN_LIMIT) {
1365 
1366 			if ((now - process->p_time) < SPAWN_INTERVAL) {
1367 				/*
1368 				 * Process is respawning too rapidly.  Print
1369 				 * message and refuse to respawn it for now.
1370 				 */
1371 				console(B_TRUE, "Command is respawning too "
1372 				    "rapidly. Check for possible errors.\n"
1373 				    "id:%4s \"%s\"\n",
1374 				    &cmd->c_id[0], &cmd->c_command[EXEC]);
1375 				return;
1376 			}
1377 			process->p_time = now;
1378 			process->p_count = 0;
1379 
1380 		} else if (process->p_count > SPAWN_LIMIT) {
1381 			/*
1382 			 * If process has been respawning too rapidly and
1383 			 * the inhibit time limit hasn't expired yet, we
1384 			 * refuse to respawn.
1385 			 */
1386 			if (now - process->p_time < SPAWN_INTERVAL + INHIBIT)
1387 				return;
1388 			process->p_time = now;
1389 			process->p_count = 0;
1390 		}
1391 		rsflag = TRUE;
1392 	}
1393 
1394 	/*
1395 	 * Spawn a child process to execute this command.
1396 	 */
1397 	(void) sighold(SIGCLD);
1398 	oprocess = process;
1399 	while ((process = efork(cmd->c_action, oprocess, modes)) == NO_ROOM)
1400 		(void) pause();
1401 
1402 	if (process == NULLPROC) {
1403 
1404 		/*
1405 		 * We are the child.  We must make sure we get a different
1406 		 * file pointer for our references to utmpx.  Otherwise our
1407 		 * seeks and reads will compete with those of the parent.
1408 		 */
1409 		endutxent();
1410 
1411 		/*
1412 		 * Perform the accounting for the beginning of a process.
1413 		 * Note that all processes are initially "INIT_PROCESS"es.
1414 		 */
1415 		tmproc.p_id[0] = cmd->c_id[0];
1416 		tmproc.p_id[1] = cmd->c_id[1];
1417 		tmproc.p_id[2] = cmd->c_id[2];
1418 		tmproc.p_id[3] = cmd->c_id[3];
1419 		tmproc.p_pid = getpid();
1420 		tmproc.p_exit = 0;
1421 		(void) account(INIT_PROCESS, &tmproc,
1422 		    prog_name(&cmd->c_command[EXEC]));
1423 		maxfiles = ulimit(UL_GDESLIM, 0);
1424 		for (i = 0; i < maxfiles; i++)
1425 			(void) fcntl(i, F_SETFD, FD_CLOEXEC);
1426 
1427 		/*
1428 		 * Now exec a shell with the -c option and the command
1429 		 * from inittab.
1430 		 */
1431 		(void) execle(SH, "INITSH", "-c", cmd->c_command, (char *)0,
1432 		    glob_envp);
1433 		console(B_TRUE, "Command\n\"%s\"\n failed to execute.  errno "
1434 		    "= %d (exec of shell failed)\n", cmd->c_command, errno);
1435 
1436 		/*
1437 		 * Don't come back so quickly that "init" doesn't have a
1438 		 * chance to finish putting this child in "proc_table".
1439 		 */
1440 		timer(20);
1441 		exit(1);
1442 
1443 	}
1444 
1445 	/*
1446 	 * We are the parent.  Insert the necessary
1447 	 * information in the proc_table.
1448 	 */
1449 	process->p_id[0] = cmd->c_id[0];
1450 	process->p_id[1] = cmd->c_id[1];
1451 	process->p_id[2] = cmd->c_id[2];
1452 	process->p_id[3] = cmd->c_id[3];
1453 
1454 	st_write();
1455 
1456 	(void) sigrelse(SIGCLD);
1457 }
1458 
1459 /*
1460  * findpslot() finds the old slot in the process table for the
1461  * command with the same id, or it finds an empty slot.
1462  */
1463 static struct PROC_TABLE *
1464 findpslot(struct CMD_LINE *cmd)
1465 {
1466 	struct PROC_TABLE	*process;
1467 	struct PROC_TABLE	*empty = NULLPROC;
1468 
1469 	for (process = proc_table;
1470 	    (process < proc_table + num_proc); process++) {
1471 		if (process->p_flags & OCCUPIED &&
1472 		    id_eq(process->p_id, cmd->c_id))
1473 			break;
1474 
1475 		/*
1476 		 * If the entry is totally empty and "empty" is still 0,
1477 		 * remember where this hole is and make sure the slot is
1478 		 * zeroed out.
1479 		 */
1480 		if (empty == NULLPROC && (process->p_flags & OCCUPIED) == 0) {
1481 			empty = process;
1482 			process->p_id[0] = '\0';
1483 			process->p_id[1] = '\0';
1484 			process->p_id[2] = '\0';
1485 			process->p_id[3] = '\0';
1486 			process->p_pid = 0;
1487 			process->p_time = 0L;
1488 			process->p_count = 0;
1489 			process->p_flags = 0;
1490 			process->p_exit = 0;
1491 		}
1492 	}
1493 
1494 	/*
1495 	 * If there is no entry for this slot, then there should be an
1496 	 * empty slot.  If there is no empty slot, then we've run out
1497 	 * of proc_table space.  If the latter is true, empty will be
1498 	 * NULL and the caller will have to complain.
1499 	 */
1500 	if (process == (proc_table + num_proc))
1501 		process = empty;
1502 
1503 	return (process);
1504 }
1505 
1506 /*
1507  * getcmd() parses lines from inittab.  Each time it finds a command line
1508  * it will return TRUE as well as fill the passed CMD_LINE structure and
1509  * the shell command string.  When the end of inittab is reached, FALSE
1510  * is returned inittab is automatically opened if it is not currently open
1511  * and is closed when the end of the file is reached.
1512  */
1513 static FILE *fp_inittab = NULL;
1514 
1515 static int
1516 getcmd(struct CMD_LINE *cmd, char *shcmd)
1517 {
1518 	char	*ptr;
1519 	int	c, lastc, state;
1520 	char 	*ptr1;
1521 	int	answer, i, proceed;
1522 	struct	stat	sbuf;
1523 	static char *actions[] = {
1524 		"off", "respawn", "ondemand", "once", "wait", "boot",
1525 		"bootwait", "powerfail", "powerwait", "initdefault",
1526 		"sysinit",
1527 	};
1528 	static short act_masks[] = {
1529 		M_OFF, M_RESPAWN, M_ONDEMAND, M_ONCE, M_WAIT, M_BOOT,
1530 		M_BOOTWAIT, M_PF, M_PWAIT, M_INITDEFAULT, M_SYSINIT,
1531 	};
1532 	/*
1533 	 * Only these actions will be allowed for entries which
1534 	 * are specified for single-user mode.
1535 	 */
1536 	short su_acts = M_INITDEFAULT | M_PF | M_PWAIT | M_WAIT;
1537 
1538 	if (fp_inittab == NULL) {
1539 		/*
1540 		 * Before attempting to open inittab we stat it to make
1541 		 * sure it currently exists and is not empty.  We try
1542 		 * several times because someone may have temporarily
1543 		 * unlinked or truncated the file.
1544 		 */
1545 		for (i = 0; i < 3; i++) {
1546 			if (stat(INITTAB, &sbuf) == -1) {
1547 				if (i == 2) {
1548 					console(B_TRUE,
1549 					    "Cannot stat %s, errno: %d\n",
1550 					    INITTAB, errno);
1551 					return (FAILURE);
1552 				} else {
1553 					timer(3);
1554 				}
1555 			} else if (sbuf.st_size < 10) {
1556 				if (i == 2) {
1557 					console(B_TRUE,
1558 					    "%s truncated or corrupted\n",
1559 					    INITTAB);
1560 					return (FAILURE);
1561 				} else {
1562 					timer(3);
1563 				}
1564 			} else {
1565 				break;
1566 			}
1567 		}
1568 
1569 		/*
1570 		 * If unable to open inittab, print error message and
1571 		 * return FAILURE to caller.
1572 		 */
1573 		if ((fp_inittab = fopen(INITTAB, "r")) == NULL) {
1574 			console(B_TRUE, "Cannot open %s errno: %d\n", INITTAB,
1575 			    errno);
1576 			return (FAILURE);
1577 		}
1578 	}
1579 
1580 	/*
1581 	 * Keep getting commands from inittab until you find a
1582 	 * good one or run out of file.
1583 	 */
1584 	for (answer = FALSE; answer == FALSE; ) {
1585 		/*
1586 		 * Zero out the cmd itself before trying next line.
1587 		 */
1588 		bzero(cmd, sizeof (struct CMD_LINE));
1589 
1590 		/*
1591 		 * Read in lines of inittab, parsing at colons, until a line is
1592 		 * read in which doesn't end with a backslash.  Do not start if
1593 		 * the first character read is an EOF.  Note that this means
1594 		 * that lines which don't end in a newline are still processed,
1595 		 * since the "for" will terminate normally once started,
1596 		 * regardless of whether line terminates with a newline or EOF.
1597 		 */
1598 		state = FAILURE;
1599 		if ((c = fgetc(fp_inittab)) == EOF) {
1600 			answer = FALSE;
1601 			(void) fclose(fp_inittab);
1602 			fp_inittab = NULL;
1603 			break;
1604 		}
1605 
1606 		for (proceed = TRUE, ptr = shcmd, state = ID, lastc = '\0';
1607 		    proceed && c != EOF;
1608 		    lastc = c, c = fgetc(fp_inittab)) {
1609 			/* If we're not in the FAILURE state and haven't */
1610 			/* yet reached the shell command field, process	 */
1611 			/* the line, otherwise just look for a real end	 */
1612 			/* of line.					 */
1613 			if (state != FAILURE && state != COMMAND) {
1614 			/*
1615 			 * Squeeze out spaces and tabs.
1616 			 */
1617 			if (c == ' ' || c == '\t')
1618 				continue;
1619 
1620 			/*
1621 			 * Ignore characters in a comment, except for the \n.
1622 			 */
1623 			if (state == COMMENT) {
1624 				if (c == '\n') {
1625 					lastc = ' ';
1626 					break;
1627 				} else {
1628 					continue;
1629 				}
1630 			}
1631 
1632 			/*
1633 			 * Detect comments (lines whose first non-whitespace
1634 			 * character is '#') by checking that we're at the
1635 			 * beginning of a line, have seen a '#', and haven't
1636 			 * yet accumulated any characters.
1637 			 */
1638 			if (state == ID && c == '#' && ptr == shcmd) {
1639 				state = COMMENT;
1640 				continue;
1641 			}
1642 
1643 			/*
1644 			 * If the character is a ':', then check the
1645 			 * previous field for correctness and advance
1646 			 * to the next field.
1647 			 */
1648 			if (c == ':') {
1649 				switch (state) {
1650 
1651 				case ID :
1652 				/*
1653 				 * Check to see that there are only
1654 				 * 1 to 4 characters for the id.
1655 				 */
1656 				if ((i = ptr - shcmd) < 1 || i > 4) {
1657 					state = FAILURE;
1658 				} else {
1659 					bcopy(shcmd, &cmd->c_id[0], i);
1660 					ptr = shcmd;
1661 					state = LEVELS;
1662 				}
1663 				break;
1664 
1665 				case LEVELS :
1666 				/*
1667 				 * Build a mask for all the levels for
1668 				 * which this command will be legal.
1669 				 */
1670 				for (cmd->c_levels = 0, ptr1 = shcmd;
1671 				    ptr1 < ptr; ptr1++) {
1672 					int mask;
1673 					if (lvlname_to_mask(*ptr1,
1674 					    &mask) == -1) {
1675 						state = FAILURE;
1676 						break;
1677 					}
1678 					cmd->c_levels |= mask;
1679 				}
1680 				if (state != FAILURE) {
1681 					state = ACTION;
1682 					ptr = shcmd;	/* Reset the buffer */
1683 				}
1684 				break;
1685 
1686 				case ACTION :
1687 				/*
1688 				 * Null terminate the string in shcmd buffer and
1689 				 * then try to match against legal actions.  If
1690 				 * the field is of length 0, then the default of
1691 				 * "RESPAWN" is used if the id is numeric,
1692 				 * otherwise the default is "OFF".
1693 				 */
1694 				if (ptr == shcmd) {
1695 					if (isdigit(cmd->c_id[0]) &&
1696 					    (cmd->c_id[1] == '\0' ||
1697 					    isdigit(cmd->c_id[1])) &&
1698 					    (cmd->c_id[2] == '\0' ||
1699 					    isdigit(cmd->c_id[2])) &&
1700 					    (cmd->c_id[3] == '\0' ||
1701 					    isdigit(cmd->c_id[3])))
1702 						cmd->c_action = M_RESPAWN;
1703 					else
1704 						cmd->c_action = M_OFF;
1705 				} else {
1706 					for (cmd->c_action = 0, i = 0,
1707 					    *ptr = '\0';
1708 					    i <
1709 					    sizeof (actions)/sizeof (char *);
1710 					    i++) {
1711 					if (strcmp(shcmd, actions[i]) == 0) {
1712 						if ((cmd->c_levels & MASKSU) &&
1713 						    !(act_masks[i] & su_acts))
1714 							cmd->c_action = 0;
1715 						else
1716 							cmd->c_action =
1717 							    act_masks[i];
1718 						break;
1719 					}
1720 					}
1721 				}
1722 
1723 				/*
1724 				 * If the action didn't match any legal action,
1725 				 * set state to FAILURE.
1726 				 */
1727 				if (cmd->c_action == 0) {
1728 					state = FAILURE;
1729 				} else {
1730 					state = COMMAND;
1731 					(void) strcpy(shcmd, "exec ");
1732 				}
1733 				ptr = shcmd + EXEC;
1734 				break;
1735 				}
1736 				continue;
1737 			}
1738 		}
1739 
1740 		/* If the character is a '\n', then this is the end of a */
1741 		/* line.  If the '\n' wasn't preceded by a backslash, */
1742 		/* it is also the end of an inittab command.  If it was */
1743 		/* preceded by a backslash then the next line is a */
1744 		/* continuation.  Note that the continuation '\n' falls */
1745 		/* through and is treated like other characters and is */
1746 		/* stored in the shell command line. */
1747 		if (c == '\n' && lastc != '\\') {
1748 			proceed = FALSE;
1749 			*ptr = '\0';
1750 			break;
1751 		}
1752 
1753 		/* For all other characters just stuff them into the */
1754 		/* command as long as there aren't too many of them. */
1755 		/* Make sure there is room for a terminating '\0' also. */
1756 		if (ptr >= shcmd + MAXCMDL - 1)
1757 			state = FAILURE;
1758 		else
1759 			*ptr++ = (char)c;
1760 
1761 		/* If the character we just stored was a quoted	*/
1762 		/* backslash, then change "c" to '\0', so that this	*/
1763 		/* backslash will not cause a subsequent '\n' to appear */
1764 		/* quoted.  In otherwords '\' '\' '\n' is the real end */
1765 		/* of a command, while '\' '\n' is a continuation. */
1766 		if (c == '\\' && lastc == '\\')
1767 			c = '\0';
1768 		}
1769 
1770 		/*
1771 		 * Make sure all the fields are properly specified
1772 		 * for a good command line.
1773 		 */
1774 		if (state == COMMAND) {
1775 			answer = TRUE;
1776 			cmd->c_command = shcmd;
1777 
1778 			/*
1779 			 * If no default level was supplied, insert
1780 			 * all numerical levels.
1781 			 */
1782 			if (cmd->c_levels == 0)
1783 				cmd->c_levels = MASK_NUMERIC;
1784 
1785 			/*
1786 			 * If no action has been supplied, declare this
1787 			 * entry to be OFF.
1788 			 */
1789 			if (cmd->c_action == 0)
1790 				cmd->c_action = M_OFF;
1791 
1792 			/*
1793 			 * If no shell command has been supplied, make sure
1794 			 * there is a null string in the command field.
1795 			 */
1796 			if (ptr == shcmd + EXEC)
1797 				*shcmd = '\0';
1798 		} else
1799 			answer = FALSE;
1800 
1801 		/*
1802 		 * If we have reached the end of inittab, then close it
1803 		 * and quit trying to find a good command line.
1804 		 */
1805 		if (c == EOF) {
1806 			(void) fclose(fp_inittab);
1807 			fp_inittab = NULL;
1808 			break;
1809 		}
1810 	}
1811 	return (answer);
1812 }
1813 
1814 /*
1815  * lvlname_to_state(): convert the character name of a state to its level
1816  * (its corresponding signal number).
1817  */
1818 static int
1819 lvlname_to_state(char name)
1820 {
1821 	int i;
1822 	for (i = 0; i < LVL_NELEMS; i++) {
1823 		if (lvls[i].lvl_name == name)
1824 			return (lvls[i].lvl_state);
1825 	}
1826 	return (-1);
1827 }
1828 
1829 /*
1830  * state_to_name(): convert the level to the character name.
1831  */
1832 static char
1833 state_to_name(int state)
1834 {
1835 	int i;
1836 	for (i = 0; i < LVL_NELEMS; i++) {
1837 		if (lvls[i].lvl_state == state)
1838 			return (lvls[i].lvl_name);
1839 	}
1840 	return (-1);
1841 }
1842 
1843 /*
1844  * state_to_mask(): return the mask corresponding to a signal number
1845  */
1846 static int
1847 state_to_mask(int state)
1848 {
1849 	int i;
1850 	for (i = 0; i < LVL_NELEMS; i++) {
1851 		if (lvls[i].lvl_state == state)
1852 			return (lvls[i].lvl_mask);
1853 	}
1854 	return (0);	/* return 0, since that represents an empty mask */
1855 }
1856 
1857 /*
1858  * lvlname_to_mask(): return the mask corresponding to a levels character name
1859  */
1860 static int
1861 lvlname_to_mask(char name, int *mask)
1862 {
1863 	int i;
1864 	for (i = 0; i < LVL_NELEMS; i++) {
1865 		if (lvls[i].lvl_name == name) {
1866 			*mask = lvls[i].lvl_mask;
1867 			return (0);
1868 		}
1869 	}
1870 	return (-1);
1871 }
1872 
1873 /*
1874  * state_to_flags(): return the flags corresponding to a runlevel.  These
1875  * indicate properties of that runlevel.
1876  */
1877 static int
1878 state_to_flags(int state)
1879 {
1880 	int i;
1881 	for (i = 0; i < LVL_NELEMS; i++) {
1882 		if (lvls[i].lvl_state == state)
1883 			return (lvls[i].lvl_flags);
1884 	}
1885 	return (0);
1886 }
1887 
1888 /*
1889  * killproc() creates a child which kills the process specified by pid.
1890  */
1891 void
1892 killproc(pid_t pid)
1893 {
1894 	struct PROC_TABLE	*process;
1895 
1896 	(void) sighold(SIGCLD);
1897 	while ((process = efork(M_OFF, NULLPROC, 0)) == NO_ROOM)
1898 		(void) pause();
1899 	(void) sigrelse(SIGCLD);
1900 
1901 	if (process == NULLPROC) {
1902 		/*
1903 		 * efork() sets all signal handlers to the default, so reset
1904 		 * the ALRM handler to make timer() work as expected.
1905 		 */
1906 		(void) sigset(SIGALRM, alarmclk);
1907 
1908 		/*
1909 		 * We are the child.  Try to terminate the process nicely
1910 		 * first using SIGTERM and if it refuses to die in TWARN
1911 		 * seconds kill it with SIGKILL.
1912 		 */
1913 		(void) kill(pid, SIGTERM);
1914 		(void) timer(TWARN);
1915 		(void) kill(pid, SIGKILL);
1916 		(void) exit(0);
1917 	}
1918 }
1919 
1920 /*
1921  * Set up the default environment for all procs to be forked from init.
1922  * Read the values from the /etc/default/init file, except for PATH.  If
1923  * there's not enough room in the environment array, the environment
1924  * lines that don't fit are silently discarded.
1925  */
1926 void
1927 init_env()
1928 {
1929 	char	line[MAXCMDL];
1930 	FILE	*fp;
1931 	int	inquotes, length, wslength;
1932 	char	*tokp, *cp1, *cp2;
1933 
1934 	glob_envp[0] = malloc((unsigned)(strlen(DEF_PATH)+2));
1935 	(void) strcpy(glob_envp[0], DEF_PATH);
1936 	glob_envn = 1;
1937 
1938 	if (rflg) {
1939 		glob_envp[1] =
1940 		    malloc((unsigned)(strlen("_DVFS_RECONFIG=YES")+2));
1941 		(void) strcpy(glob_envp[1], "_DVFS_RECONFIG=YES");
1942 		++glob_envn;
1943 	} else if (bflg == 1) {
1944 		glob_envp[1] =
1945 		    malloc((unsigned)(strlen("RB_NOBOOTRC=YES")+2));
1946 		(void) strcpy(glob_envp[1], "RB_NOBOOTRC=YES");
1947 		++glob_envn;
1948 	}
1949 
1950 	if ((fp = fopen(ENVFILE, "r")) == NULL) {
1951 		console(B_TRUE,
1952 		    "Cannot open %s. Environment not initialized.\n",
1953 		    ENVFILE);
1954 	} else {
1955 		while (fgets(line, MAXCMDL - 1, fp) != NULL &&
1956 		    glob_envn < MAXENVENT - 2) {
1957 			/*
1958 			 * Toss newline
1959 			 */
1960 			length = strlen(line);
1961 			if (line[length - 1] == '\n')
1962 				line[length - 1] = '\0';
1963 
1964 			/*
1965 			 * Ignore blank or comment lines.
1966 			 */
1967 			if (line[0] == '#' || line[0] == '\0' ||
1968 			    (wslength = strspn(line, " \t\n")) ==
1969 			    strlen(line) ||
1970 			    strchr(line, '#') == line + wslength)
1971 				continue;
1972 
1973 			/*
1974 			 * First make a pass through the line and change
1975 			 * any non-quoted semi-colons to blanks so they
1976 			 * will be treated as token separators below.
1977 			 */
1978 			inquotes = 0;
1979 			for (cp1 = line; *cp1 != '\0'; cp1++) {
1980 				if (*cp1 == '"') {
1981 					if (inquotes == 0)
1982 						inquotes = 1;
1983 					else
1984 						inquotes = 0;
1985 				} else if (*cp1 == ';') {
1986 					if (inquotes == 0)
1987 						*cp1 = ' ';
1988 				}
1989 			}
1990 
1991 			/*
1992 			 * Tokens within the line are separated by blanks
1993 			 *  and tabs.  For each token in the line which
1994 			 * contains a '=' we strip out any quotes and then
1995 			 * stick the token in the environment array.
1996 			 */
1997 			if ((tokp = strtok(line, " \t")) == NULL)
1998 				continue;
1999 			do {
2000 				if (strchr(tokp, '=') == NULL)
2001 					continue;
2002 				length = strlen(tokp);
2003 				while ((cp1 = strpbrk(tokp, "\"\'")) != NULL) {
2004 					for (cp2 = cp1;
2005 					    cp2 < &tokp[length]; cp2++)
2006 						*cp2 = *(cp2 + 1);
2007 					length--;
2008 				}
2009 
2010 				if (strncmp(tokp, "CMASK=",
2011 				    sizeof ("CMASK=") - 1) == 0) {
2012 					long t;
2013 
2014 					/* We know there's an = */
2015 					t = strtol(strchr(tokp, '=') + 1, NULL,
2016 					    8);
2017 
2018 					/* Sanity */
2019 					if (t <= 077 && t >= 0)
2020 						cmask = (int)t;
2021 					(void) umask(cmask);
2022 					continue;
2023 				}
2024 				glob_envp[glob_envn] =
2025 				    malloc((unsigned)(length + 1));
2026 				(void) strcpy(glob_envp[glob_envn], tokp);
2027 				if (++glob_envn >= MAXENVENT - 1)
2028 					break;
2029 			} while ((tokp = strtok(NULL, " \t")) != NULL);
2030 		}
2031 
2032 		/*
2033 		 * Append a null pointer to the environment array
2034 		 * to mark its end.
2035 		 */
2036 		glob_envp[glob_envn] = NULL;
2037 		(void) fclose(fp);
2038 	}
2039 }
2040 
2041 /*
2042  * boot_init(): Do initialization things that should be done at boot.
2043  */
2044 void
2045 boot_init()
2046 {
2047 	int i;
2048 	struct PROC_TABLE *process, *oprocess;
2049 	struct CMD_LINE	cmd;
2050 	char	line[MAXCMDL];
2051 	char	svc_aux[SVC_AUX_SIZE];
2052 	char	init_svc_fmri[SVC_FMRI_SIZE];
2053 	char *old_path;
2054 	int maxfiles;
2055 
2056 	/* Use INIT_PATH for sysinit cmds */
2057 	old_path = glob_envp[0];
2058 	glob_envp[0] = malloc((unsigned)(strlen(INIT_PATH)+2));
2059 	(void) strcpy(glob_envp[0], INIT_PATH);
2060 
2061 	/*
2062 	 * Scan inittab(4) and process the special svc.startd entry, initdefault
2063 	 * and sysinit entries.
2064 	 */
2065 	while (getcmd(&cmd, &line[0]) == TRUE) {
2066 		if (startd_tmpl >= 0 && id_eq(cmd.c_id, "smf")) {
2067 			process_startd_line(&cmd, line);
2068 			(void) snprintf(startd_svc_aux, SVC_AUX_SIZE,
2069 			    INITTAB_ENTRY_ID_STR_FORMAT, cmd.c_id);
2070 		} else if (cmd.c_action == M_INITDEFAULT) {
2071 			/*
2072 			 * initdefault is no longer meaningful, as the SMF
2073 			 * milestone controls what (legacy) run level we
2074 			 * boot to.
2075 			 */
2076 			console(B_TRUE,
2077 			    "Ignoring legacy \"initdefault\" entry.\n");
2078 		} else if (cmd.c_action == M_SYSINIT) {
2079 			/*
2080 			 * Execute the "sysinit" entry and wait for it to
2081 			 * complete.  No bookkeeping is performed on these
2082 			 * entries because we avoid writing to the file system
2083 			 * until after there has been an chance to check it.
2084 			 */
2085 			if (process = findpslot(&cmd)) {
2086 				(void) sighold(SIGCLD);
2087 				(void) snprintf(svc_aux, SVC_AUX_SIZE,
2088 				    INITTAB_ENTRY_ID_STR_FORMAT, cmd.c_id);
2089 				(void) snprintf(init_svc_fmri, SVC_FMRI_SIZE,
2090 				    SVC_INIT_PREFIX INITTAB_ENTRY_ID_STR_FORMAT,
2091 				    cmd.c_id);
2092 				if (legacy_tmpl >= 0) {
2093 					(void) ct_pr_tmpl_set_svc_fmri(
2094 					    legacy_tmpl, init_svc_fmri);
2095 					(void) ct_pr_tmpl_set_svc_aux(
2096 					    legacy_tmpl, svc_aux);
2097 				}
2098 
2099 				for (oprocess = process;
2100 				    (process = efork(M_OFF, oprocess,
2101 				    (NAMED|NOCLEANUP))) == NO_ROOM;
2102 				    /* CSTYLED */)
2103 					;
2104 				(void) sigrelse(SIGCLD);
2105 
2106 				if (process == NULLPROC) {
2107 					maxfiles = ulimit(UL_GDESLIM, 0);
2108 
2109 					for (i = 0; i < maxfiles; i++)
2110 						(void) fcntl(i, F_SETFD,
2111 						    FD_CLOEXEC);
2112 					(void) execle(SH, "INITSH", "-c",
2113 					    cmd.c_command,
2114 					    (char *)0, glob_envp);
2115 					console(B_TRUE,
2116 "Command\n\"%s\"\n failed to execute.  errno = %d (exec of shell failed)\n",
2117 					    cmd.c_command, errno);
2118 					exit(1);
2119 				} else
2120 					while (waitproc(process) == FAILURE)
2121 						;
2122 				process->p_flags = 0;
2123 				st_write();
2124 			}
2125 		}
2126 	}
2127 
2128 	/* Restore the path. */
2129 	free(glob_envp[0]);
2130 	glob_envp[0] = old_path;
2131 
2132 	/*
2133 	 * This will enable st_write() to complain about init_state_file.
2134 	 */
2135 	booting = 0;
2136 
2137 	/*
2138 	 * If the /etc/ioctl.syscon didn't exist or had invalid contents write
2139 	 * out a correct version.
2140 	 */
2141 	if (write_ioctl)
2142 		write_ioctl_syscon();
2143 
2144 	/*
2145 	 * Start svc.startd(1M), which does most of the work.
2146 	 */
2147 	if (startd_cline[0] != '\0' && startd_tmpl >= 0) {
2148 		/* Start svc.startd. */
2149 		if (startd_run(startd_cline, startd_tmpl, 0) == -1)
2150 			cur_state = SINGLE_USER;
2151 	} else {
2152 		console(B_TRUE, "Absent svc.startd entry or bad "
2153 		    "contract template.  Not starting svc.startd.\n");
2154 		enter_maintenance();
2155 	}
2156 }
2157 
2158 /*
2159  * init_signals(): Initialize all signals to either be caught or ignored.
2160  */
2161 void
2162 init_signals(void)
2163 {
2164 	struct sigaction act;
2165 	int i;
2166 
2167 	/*
2168 	 * Start by ignoring all signals, then selectively re-enable some.
2169 	 * The SIG_IGN disposition will only affect asynchronous signals:
2170 	 * any signal that we trigger synchronously that doesn't end up
2171 	 * being handled by siglvl() will be forcibly delivered by the kernel.
2172 	 */
2173 	for (i = SIGHUP; i <= SIGRTMAX; i++)
2174 		(void) sigset(i, SIG_IGN);
2175 
2176 	/*
2177 	 * Handle all level-changing signals using siglvl() and set sa_mask so
2178 	 * that all level-changing signals are blocked while in siglvl().
2179 	 */
2180 	act.sa_handler = siglvl;
2181 	act.sa_flags = SA_SIGINFO;
2182 	(void) sigemptyset(&act.sa_mask);
2183 
2184 	(void) sigaddset(&act.sa_mask, LVLQ);
2185 	(void) sigaddset(&act.sa_mask, LVL0);
2186 	(void) sigaddset(&act.sa_mask, LVL1);
2187 	(void) sigaddset(&act.sa_mask, LVL2);
2188 	(void) sigaddset(&act.sa_mask, LVL3);
2189 	(void) sigaddset(&act.sa_mask, LVL4);
2190 	(void) sigaddset(&act.sa_mask, LVL5);
2191 	(void) sigaddset(&act.sa_mask, LVL6);
2192 	(void) sigaddset(&act.sa_mask, SINGLE_USER);
2193 	(void) sigaddset(&act.sa_mask, LVLa);
2194 	(void) sigaddset(&act.sa_mask, LVLb);
2195 	(void) sigaddset(&act.sa_mask, LVLc);
2196 
2197 	(void) sigaction(LVLQ, &act, NULL);
2198 	(void) sigaction(LVL0, &act, NULL);
2199 	(void) sigaction(LVL1, &act, NULL);
2200 	(void) sigaction(LVL2, &act, NULL);
2201 	(void) sigaction(LVL3, &act, NULL);
2202 	(void) sigaction(LVL4, &act, NULL);
2203 	(void) sigaction(LVL5, &act, NULL);
2204 	(void) sigaction(LVL6, &act, NULL);
2205 	(void) sigaction(SINGLE_USER, &act, NULL);
2206 	(void) sigaction(LVLa, &act, NULL);
2207 	(void) sigaction(LVLb, &act, NULL);
2208 	(void) sigaction(LVLc, &act, NULL);
2209 
2210 	(void) sigset(SIGALRM, alarmclk);
2211 	alarmclk();
2212 
2213 	(void) sigset(SIGCLD, childeath);
2214 	(void) sigset(SIGPWR, powerfail);
2215 }
2216 
2217 /*
2218  * Set up pipe for "godchildren". If the file exists and is a pipe just open
2219  * it. Else, if the file system is r/w create it.  Otherwise, defer its
2220  * creation and open until after /var/run has been mounted.  This function is
2221  * only called on startup and when explicitly requested via LVLQ.
2222  */
2223 void
2224 setup_pipe()
2225 {
2226 	struct stat stat_buf;
2227 	struct statvfs statvfs_buf;
2228 	struct sigaction act;
2229 
2230 	/*
2231 	 * Always close the previous pipe descriptor as the mounted filesystems
2232 	 * may have changed.
2233 	 */
2234 	if (Pfd >= 0)
2235 		(void) close(Pfd);
2236 
2237 	if ((stat(INITPIPE, &stat_buf) == 0) &&
2238 	    ((stat_buf.st_mode & (S_IFMT|S_IRUSR)) == (S_IFIFO|S_IRUSR)))
2239 		Pfd = open(INITPIPE, O_RDWR | O_NDELAY);
2240 	else
2241 		if ((statvfs(INITPIPE_DIR, &statvfs_buf) == 0) &&
2242 		    ((statvfs_buf.f_flag & ST_RDONLY) == 0)) {
2243 			(void) unlink(INITPIPE);
2244 			(void) mknod(INITPIPE, S_IFIFO | 0600, 0);
2245 			Pfd = open(INITPIPE, O_RDWR | O_NDELAY);
2246 		}
2247 
2248 	if (Pfd >= 0) {
2249 		(void) ioctl(Pfd, I_SETSIG, S_INPUT);
2250 		/*
2251 		 * Read pipe in message discard mode.
2252 		 */
2253 		(void) ioctl(Pfd, I_SRDOPT, RMSGD);
2254 
2255 		act.sa_handler = sigpoll;
2256 		act.sa_flags = 0;
2257 		(void) sigemptyset(&act.sa_mask);
2258 		(void) sigaddset(&act.sa_mask, SIGCLD);
2259 		(void) sigaction(SIGPOLL, &act, NULL);
2260 	}
2261 }
2262 
2263 /*
2264  * siglvl - handle an asynchronous signal from init(1M) telling us that we
2265  * should change the current run level.  We set new_state accordingly.
2266  */
2267 void
2268 siglvl(int sig, siginfo_t *sip, ucontext_t *ucp)
2269 {
2270 	struct PROC_TABLE *process;
2271 	struct sigaction act;
2272 
2273 	/*
2274 	 * If the signal was from the kernel (rather than init(1M)) then init
2275 	 * itself tripped the signal.  That is, we might have a bug and tripped
2276 	 * a real SIGSEGV instead of receiving it as an alias for SIGLVLa.  In
2277 	 * such a case we reset the disposition to SIG_DFL, block all signals
2278 	 * in uc_mask but the current one, and return to the interrupted ucp
2279 	 * to effect an appropriate death.  The kernel will then restart us.
2280 	 *
2281 	 * The one exception to SI_FROMKERNEL() is SIGFPE (a.k.a. LVL6), which
2282 	 * the kernel can send us when it wants to effect an orderly reboot.
2283 	 * For this case we must also verify si_code is zero, rather than a
2284 	 * code such as FPE_INTDIV which a bug might have triggered.
2285 	 */
2286 	if (sip != NULL && SI_FROMKERNEL(sip) &&
2287 	    (sig != SIGFPE || sip->si_code == 0)) {
2288 
2289 		(void) sigemptyset(&act.sa_mask);
2290 		act.sa_handler = SIG_DFL;
2291 		act.sa_flags = 0;
2292 		(void) sigaction(sig, &act, NULL);
2293 
2294 		(void) sigfillset(&ucp->uc_sigmask);
2295 		(void) sigdelset(&ucp->uc_sigmask, sig);
2296 		ucp->uc_flags |= UC_SIGMASK;
2297 
2298 		(void) setcontext(ucp);
2299 	}
2300 
2301 	/*
2302 	 * If the signal received is a LVLQ signal, do not really
2303 	 * change levels, just restate the current level.  If the
2304 	 * signal is not a LVLQ, set the new level to the signal
2305 	 * received.
2306 	 */
2307 	if (sig == LVLQ) {
2308 		new_state = cur_state;
2309 		lvlq_received = B_TRUE;
2310 	} else {
2311 		new_state = sig;
2312 	}
2313 
2314 	/*
2315 	 * Clear all times and repeat counts in the process table
2316 	 * since either the level is changing or the user has editted
2317 	 * the inittab file and wants us to look at it again.
2318 	 * If the user has fixed a typo, we don't want residual timing
2319 	 * data preventing the fixed command line from executing.
2320 	 */
2321 	for (process = proc_table;
2322 	    (process < proc_table + num_proc); process++) {
2323 		process->p_time = 0L;
2324 		process->p_count = 0;
2325 	}
2326 
2327 	/*
2328 	 * Set the flag to indicate that a "user signal" was received.
2329 	 */
2330 	wakeup.w_flags.w_usersignal = 1;
2331 }
2332 
2333 
2334 /*
2335  * alarmclk
2336  */
2337 static void
2338 alarmclk()
2339 {
2340 	time_up = TRUE;
2341 }
2342 
2343 /*
2344  * childeath_single():
2345  *
2346  * This used to be the SIGCLD handler and it was set with signal()
2347  * (as opposed to sigset()).  When a child exited we'd come to the
2348  * handler, wait for the child, and reenable the handler with
2349  * signal() just before returning.  The implementation of signal()
2350  * checks with waitid() for waitable children and sends a SIGCLD
2351  * if there are some.  If children are exiting faster than the
2352  * handler can run we keep sending signals and the handler never
2353  * gets to return and eventually the stack runs out and init dies.
2354  * To prevent that we set the handler with sigset() so the handler
2355  * doesn't need to be reset, and in childeath() (see below) we
2356  * call childeath_single() as long as there are children to be
2357  * waited for.  If a child exits while init is in the handler a
2358  * SIGCLD will be pending and delivered on return from the handler.
2359  * If the child was already waited for the handler will have nothing
2360  * to do and return, otherwise the child will be waited for.
2361  */
2362 static void
2363 childeath_single(pid_t pid, int status)
2364 {
2365 	struct PROC_TABLE	*process;
2366 	struct pidlist		*pp;
2367 
2368 	/*
2369 	 * Scan the process table to see if we are interested in this process.
2370 	 */
2371 	for (process = proc_table;
2372 	    (process < proc_table + num_proc); process++) {
2373 		if ((process->p_flags & (LIVING|OCCUPIED)) ==
2374 		    (LIVING|OCCUPIED) && process->p_pid == pid) {
2375 
2376 			/*
2377 			 * Mark this process as having died and store the exit
2378 			 * status.  Also set the wakeup flag for a dead child
2379 			 * and break out of the loop.
2380 			 */
2381 			process->p_flags &= ~LIVING;
2382 			process->p_exit = (short)status;
2383 			wakeup.w_flags.w_childdeath = 1;
2384 
2385 			return;
2386 		}
2387 	}
2388 
2389 	/*
2390 	 * No process was found above, look through auxiliary list.
2391 	 */
2392 	(void) sighold(SIGPOLL);
2393 	pp = Plhead;
2394 	while (pp) {
2395 		if (pid > pp->pl_pid) {
2396 			/*
2397 			 * Keep on looking.
2398 			 */
2399 			pp = pp->pl_next;
2400 			continue;
2401 		} else if (pid < pp->pl_pid) {
2402 			/*
2403 			 * Not in the list.
2404 			 */
2405 			break;
2406 		} else {
2407 			/*
2408 			 * This is a dead "godchild".
2409 			 */
2410 			pp->pl_dflag = 1;
2411 			pp->pl_exit = (short)status;
2412 			wakeup.w_flags.w_childdeath = 1;
2413 			Gchild = 1;	/* Notice to call cleanaux(). */
2414 			break;
2415 		}
2416 	}
2417 
2418 	(void) sigrelse(SIGPOLL);
2419 }
2420 
2421 /* ARGSUSED */
2422 static void
2423 childeath(int signo)
2424 {
2425 	pid_t pid;
2426 	int status;
2427 
2428 	while ((pid = waitpid(-1, &status, WNOHANG)) > 0)
2429 		childeath_single(pid, status);
2430 }
2431 
2432 static void
2433 powerfail()
2434 {
2435 	(void) nice(-19);
2436 	wakeup.w_flags.w_powerhit = 1;
2437 }
2438 
2439 /*
2440  * efork() forks a child and the parent inserts the process in its table
2441  * of processes that are directly a result of forks that it has performed.
2442  * The child just changes the "global" with the process id for this process
2443  * to it's new value.
2444  * If efork() is called with a pointer into the proc_table it uses that slot,
2445  * otherwise it searches for a free slot.  Regardless of how it was called,
2446  * it returns the pointer to the proc_table entry
2447  *
2448  * The SIGCLD signal is blocked (held) before calling efork()
2449  * and is unblocked (released) after efork() returns.
2450  *
2451  * Ideally, this should be rewritten to use modern signal semantics.
2452  */
2453 static struct PROC_TABLE *
2454 efork(int action, struct PROC_TABLE *process, int modes)
2455 {
2456 	pid_t	childpid;
2457 	struct PROC_TABLE *proc;
2458 	int		i;
2459 	/*
2460 	 * Freshen up the proc_table, removing any entries for dead processes
2461 	 * that don't have NOCLEANUP set.  Perform the necessary accounting.
2462 	 */
2463 	for (proc = proc_table; (proc < proc_table + num_proc); proc++) {
2464 		if ((proc->p_flags & (OCCUPIED|LIVING|NOCLEANUP)) ==
2465 		    (OCCUPIED)) {
2466 			/*
2467 			 * Is this a named process?
2468 			 * If so, do the necessary bookkeeping.
2469 			 */
2470 			if (proc->p_flags & NAMED)
2471 				(void) account(DEAD_PROCESS, proc, NULL);
2472 
2473 			/*
2474 			 * Free this entry for new usage.
2475 			 */
2476 			proc->p_flags = 0;
2477 		}
2478 	}
2479 
2480 	while ((childpid = fork()) == FAILURE) {
2481 		/*
2482 		 * Shorten the alarm timer in case someone else's child dies
2483 		 * and free up a slot in the process table.
2484 		 */
2485 		setimer(5);
2486 
2487 		/*
2488 		 * Wait for some children to die.  Since efork()
2489 		 * is always called with SIGCLD blocked, unblock
2490 		 * it here so that child death signals can come in.
2491 		 */
2492 		(void) sigrelse(SIGCLD);
2493 		(void) pause();
2494 		(void) sighold(SIGCLD);
2495 		setimer(0);
2496 	}
2497 
2498 	if (childpid != 0) {
2499 
2500 		if (process == NULLPROC) {
2501 			/*
2502 			 * No proc table pointer specified so search
2503 			 * for a free slot.
2504 			 */
2505 			for (process = proc_table;  process->p_flags != 0 &&
2506 			    (process < proc_table + num_proc); process++)
2507 					;
2508 
2509 			if (process == (proc_table + num_proc)) {
2510 				int old_proc_table_size = num_proc;
2511 
2512 				/* Increase the process table size */
2513 				increase_proc_table_size();
2514 				if (old_proc_table_size == num_proc) {
2515 					/* didn't grow: memory failure */
2516 					return (NO_ROOM);
2517 				} else {
2518 					process =
2519 					    proc_table + old_proc_table_size;
2520 				}
2521 			}
2522 
2523 			process->p_time = 0L;
2524 			process->p_count = 0;
2525 		}
2526 		process->p_id[0] = '\0';
2527 		process->p_id[1] = '\0';
2528 		process->p_id[2] = '\0';
2529 		process->p_id[3] = '\0';
2530 		process->p_pid = childpid;
2531 		process->p_flags = (LIVING | OCCUPIED | modes);
2532 		process->p_exit = 0;
2533 
2534 		st_write();
2535 	} else {
2536 		if ((action & (M_WAIT | M_BOOTWAIT)) == 0)
2537 			(void) setpgrp();
2538 
2539 		process = NULLPROC;
2540 
2541 		/*
2542 		 * Reset all signals to the system defaults.
2543 		 */
2544 		for (i = SIGHUP; i <= SIGRTMAX; i++)
2545 			(void) sigset(i, SIG_DFL);
2546 
2547 		/*
2548 		 * POSIX B.2.2.2 advises that init should set SIGTTOU,
2549 		 * SIGTTIN, and SIGTSTP to SIG_IGN.
2550 		 *
2551 		 * Make sure that SIGXCPU and SIGXFSZ also remain ignored,
2552 		 * for backward compatibility.
2553 		 */
2554 		(void) sigset(SIGTTIN, SIG_IGN);
2555 		(void) sigset(SIGTTOU, SIG_IGN);
2556 		(void) sigset(SIGTSTP, SIG_IGN);
2557 		(void) sigset(SIGXCPU, SIG_IGN);
2558 		(void) sigset(SIGXFSZ, SIG_IGN);
2559 	}
2560 	return (process);
2561 }
2562 
2563 
2564 /*
2565  * waitproc() waits for a specified process to die.  For this function to
2566  * work, the specified process must already in the proc_table.  waitproc()
2567  * returns the exit status of the specified process when it dies.
2568  */
2569 static long
2570 waitproc(struct PROC_TABLE *process)
2571 {
2572 	int		answer;
2573 	sigset_t	oldmask, newmask, zeromask;
2574 
2575 	(void) sigemptyset(&zeromask);
2576 	(void) sigemptyset(&newmask);
2577 
2578 	(void) sigaddset(&newmask, SIGCLD);
2579 
2580 	/* Block SIGCLD and save the current signal mask */
2581 	if (sigprocmask(SIG_BLOCK, &newmask, &oldmask) < 0)
2582 		perror("SIG_BLOCK error");
2583 
2584 	/*
2585 	 * Wait around until the process dies.
2586 	 */
2587 	if (process->p_flags & LIVING)
2588 		(void) sigsuspend(&zeromask);
2589 
2590 	/* Reset signal mask to unblock SIGCLD */
2591 	if (sigprocmask(SIG_SETMASK, &oldmask, NULL) < 0)
2592 		perror("SIG_SETMASK error");
2593 
2594 	if (process->p_flags & LIVING)
2595 		return (FAILURE);
2596 
2597 	/*
2598 	 * Make sure to only return 16 bits so that answer will always
2599 	 * be positive whenever the process of interest really died.
2600 	 */
2601 	answer = (process->p_exit & 0xffff);
2602 
2603 	/*
2604 	 * Free the slot in the proc_table.
2605 	 */
2606 	process->p_flags = 0;
2607 	return (answer);
2608 }
2609 
2610 /*
2611  * notify_pam_dead(): calls into the PAM framework to close the given session.
2612  */
2613 static void
2614 notify_pam_dead(struct utmpx *up)
2615 {
2616 	pam_handle_t *pamh;
2617 	char user[sizeof (up->ut_user) + 1];
2618 	char ttyn[sizeof (up->ut_line) + 1];
2619 	char host[sizeof (up->ut_host) + 1];
2620 
2621 	/*
2622 	 * PAM does not take care of updating utmpx/wtmpx.
2623 	 */
2624 	(void) snprintf(user, sizeof (user), "%s", up->ut_user);
2625 	(void) snprintf(ttyn, sizeof (ttyn), "%s", up->ut_line);
2626 	(void) snprintf(host, sizeof (host), "%s", up->ut_host);
2627 
2628 	if (pam_start("init", user, NULL, &pamh) == PAM_SUCCESS)  {
2629 		(void) pam_set_item(pamh, PAM_TTY, ttyn);
2630 		(void) pam_set_item(pamh, PAM_RHOST, host);
2631 		(void) pam_close_session(pamh, 0);
2632 		(void) pam_end(pamh, PAM_SUCCESS);
2633 	}
2634 }
2635 
2636 /*
2637  * Check you can access utmpx (As / may be read-only and
2638  * /var may not be mounted yet).
2639  */
2640 static int
2641 access_utmpx(void)
2642 {
2643 	do {
2644 		utmpx_ok = (access(UTMPX, R_OK|W_OK) == 0);
2645 	} while (!utmpx_ok && errno == EINTR);
2646 
2647 	return (utmpx_ok);
2648 }
2649 
2650 /*
2651  * account() updates entries in utmpx and appends new entries to the end of
2652  * wtmpx (assuming they exist).  The program argument indicates the name of
2653  * program if INIT_PROCESS, otherwise should be NULL.
2654  *
2655  * account() only blocks for INIT_PROCESS requests.
2656  *
2657  * Returns non-zero if write failed.
2658  */
2659 static int
2660 account(short state, struct PROC_TABLE *process, char *program)
2661 {
2662 	struct utmpx utmpbuf, *u, *oldu;
2663 	int tmplen;
2664 	char fail_buf[UT_LINE_SZ];
2665 	sigset_t block, unblock;
2666 
2667 	if (!utmpx_ok && !access_utmpx()) {
2668 		return (-1);
2669 	}
2670 
2671 	/*
2672 	 * Set up the prototype for the utmp structure we want to write.
2673 	 */
2674 	u = &utmpbuf;
2675 	(void) memset(u, 0, sizeof (struct utmpx));
2676 
2677 	/*
2678 	 * Fill in the various fields of the utmp structure.
2679 	 */
2680 	u->ut_id[0] = process->p_id[0];
2681 	u->ut_id[1] = process->p_id[1];
2682 	u->ut_id[2] = process->p_id[2];
2683 	u->ut_id[3] = process->p_id[3];
2684 	u->ut_pid = process->p_pid;
2685 
2686 	/*
2687 	 * Fill the "ut_exit" structure.
2688 	 */
2689 	u->ut_exit.e_termination = WTERMSIG(process->p_exit);
2690 	u->ut_exit.e_exit = WEXITSTATUS(process->p_exit);
2691 	u->ut_type = state;
2692 
2693 	(void) time(&u->ut_tv.tv_sec);
2694 
2695 	/*
2696 	 * Block signals for utmp update.
2697 	 */
2698 	(void) sigfillset(&block);
2699 	(void) sigprocmask(SIG_BLOCK, &block, &unblock);
2700 
2701 	/*
2702 	 * See if there already is such an entry in the "utmpx" file.
2703 	 */
2704 	setutxent();	/* Start at beginning of utmpx file. */
2705 
2706 	if ((oldu = getutxid(u)) != NULL) {
2707 		/*
2708 		 * Copy in the old "user", "line" and "host" fields
2709 		 * to our new structure.
2710 		 */
2711 		bcopy(oldu->ut_user, u->ut_user, sizeof (u->ut_user));
2712 		bcopy(oldu->ut_line, u->ut_line, sizeof (u->ut_line));
2713 		bcopy(oldu->ut_host, u->ut_host, sizeof (u->ut_host));
2714 		u->ut_syslen = (tmplen = strlen(u->ut_host)) ?
2715 		    min(tmplen + 1, sizeof (u->ut_host)) : 0;
2716 
2717 		if (oldu->ut_type == USER_PROCESS && state == DEAD_PROCESS) {
2718 			notify_pam_dead(oldu);
2719 		}
2720 	}
2721 
2722 	/*
2723 	 * Perform special accounting. Insert the special string into the
2724 	 * ut_line array. For INIT_PROCESSes put in the name of the
2725 	 * program in the "ut_user" field.
2726 	 */
2727 	switch (state) {
2728 	case INIT_PROCESS:
2729 		(void) strncpy(u->ut_user, program, sizeof (u->ut_user));
2730 		(void) strcpy(fail_buf, "INIT_PROCESS");
2731 		break;
2732 
2733 	default:
2734 		(void) strlcpy(fail_buf, u->ut_id, sizeof (u->ut_id) + 1);
2735 		break;
2736 	}
2737 
2738 	/*
2739 	 * Write out the updated entry to utmpx file.
2740 	 */
2741 	if (pututxline(u) == NULL) {
2742 		console(B_TRUE, "Failed write of utmpx entry: \"%s\": %s\n",
2743 		    fail_buf, strerror(errno));
2744 		endutxent();
2745 		(void) sigprocmask(SIG_SETMASK, &unblock, NULL);
2746 		return (-1);
2747 	}
2748 
2749 	/*
2750 	 * If we're able to write to utmpx, then attempt to add to the
2751 	 * end of the wtmpx file.
2752 	 */
2753 	updwtmpx(WTMPX, u);
2754 
2755 	endutxent();
2756 
2757 	(void) sigprocmask(SIG_SETMASK, &unblock, NULL);
2758 
2759 	return (0);
2760 }
2761 
2762 static void
2763 clearent(pid_t pid, short status)
2764 {
2765 	struct utmpx *up;
2766 	sigset_t block, unblock;
2767 
2768 	/*
2769 	 * Block signals for utmp update.
2770 	 */
2771 	(void) sigfillset(&block);
2772 	(void) sigprocmask(SIG_BLOCK, &block, &unblock);
2773 
2774 	/*
2775 	 * No error checking for now.
2776 	 */
2777 
2778 	setutxent();
2779 	while (up = getutxent()) {
2780 		if (up->ut_pid == pid) {
2781 			if (up->ut_type == DEAD_PROCESS) {
2782 				/*
2783 				 * Cleaned up elsewhere.
2784 				 */
2785 				continue;
2786 			}
2787 
2788 			notify_pam_dead(up);
2789 
2790 			up->ut_type = DEAD_PROCESS;
2791 			up->ut_exit.e_termination = WTERMSIG(status);
2792 			up->ut_exit.e_exit = WEXITSTATUS(status);
2793 			(void) time(&up->ut_tv.tv_sec);
2794 
2795 			(void) pututxline(up);
2796 			/*
2797 			 * Now attempt to add to the end of the
2798 			 * wtmp and wtmpx files.  Do not create
2799 			 * if they don't already exist.
2800 			 */
2801 			updwtmpx(WTMPX, up);
2802 
2803 			break;
2804 		}
2805 	}
2806 
2807 	endutxent();
2808 	(void) sigprocmask(SIG_SETMASK, &unblock, NULL);
2809 }
2810 
2811 /*
2812  * prog_name() searches for the word or unix path name and
2813  * returns a pointer to the last element of the pathname.
2814  */
2815 static char *
2816 prog_name(char *string)
2817 {
2818 	char	*ptr, *ptr2;
2819 	static char word[UT_USER_SZ + 1];
2820 
2821 	/*
2822 	 * Search for the first word skipping leading spaces and tabs.
2823 	 */
2824 	while (*string == ' ' || *string == '\t')
2825 		string++;
2826 
2827 	/*
2828 	 * If the first non-space non-tab character is not one allowed in
2829 	 * a word, return a pointer to a null string, otherwise parse the
2830 	 * pathname.
2831 	 */
2832 	if (*string != '.' && *string != '/' && *string != '_' &&
2833 	    (*string < 'a' || *string > 'z') &&
2834 	    (*string < 'A' || * string > 'Z') &&
2835 	    (*string < '0' || *string > '9'))
2836 		return ("");
2837 
2838 	/*
2839 	 * Parse the pathname looking forward for '/', ' ', '\t', '\n' or
2840 	 * '\0'.  Each time a '/' is found, move "ptr" to one past the
2841 	 * '/', thus when a ' ', '\t', '\n', or '\0' is found, "ptr" will
2842 	 * point to the last element of the pathname.
2843 	 */
2844 	for (ptr = string; *string != ' ' && *string != '\t' &&
2845 	    *string != '\n' && *string != '\0'; string++) {
2846 		if (*string == '/')
2847 			ptr = string+1;
2848 	}
2849 
2850 	/*
2851 	 * Copy out up to the size of the "ut_user" array into "word",
2852 	 * null terminate it and return a pointer to it.
2853 	 */
2854 	for (ptr2 = &word[0]; ptr2 < &word[UT_USER_SZ] &&
2855 	    ptr < string; /* CSTYLED */)
2856 		*ptr2++ = *ptr++;
2857 
2858 	*ptr2 = '\0';
2859 	return (&word[0]);
2860 }
2861 
2862 
2863 /*
2864  * realcon() returns a nonzero value if there is a character device
2865  * associated with SYSCON that has the same device number as CONSOLE.
2866  */
2867 static int
2868 realcon()
2869 {
2870 	struct stat sconbuf, conbuf;
2871 
2872 	if (stat(SYSCON, &sconbuf) != -1 &&
2873 	    stat(CONSOLE, &conbuf) != -1 &&
2874 	    S_ISCHR(sconbuf.st_mode) &&
2875 	    S_ISCHR(conbuf.st_mode) &&
2876 	    sconbuf.st_rdev == conbuf.st_rdev) {
2877 		return (1);
2878 	} else {
2879 		return (0);
2880 	}
2881 }
2882 
2883 
2884 /*
2885  * get_ioctl_syscon() retrieves the SYSCON settings from the IOCTLSYSCON file.
2886  * Returns true if the IOCTLSYSCON file needs to be written (with
2887  * write_ioctl_syscon() below)
2888  */
2889 static int
2890 get_ioctl_syscon()
2891 {
2892 	FILE	*fp;
2893 	unsigned int	iflags, oflags, cflags, lflags, ldisc, cc[18];
2894 	int		i, valid_format = 0;
2895 
2896 	/*
2897 	 * Read in the previous modes for SYSCON from IOCTLSYSCON.
2898 	 */
2899 	if ((fp = fopen(IOCTLSYSCON, "r")) == NULL) {
2900 		stored_syscon_termios = dflt_termios;
2901 		console(B_TRUE,
2902 		    "warning:%s does not exist, default settings assumed\n",
2903 		    IOCTLSYSCON);
2904 	} else {
2905 
2906 		i = fscanf(fp,
2907 	    "%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x",
2908 		    &iflags, &oflags, &cflags, &lflags,
2909 		    &cc[0], &cc[1], &cc[2], &cc[3], &cc[4], &cc[5], &cc[6],
2910 		    &cc[7], &cc[8], &cc[9], &cc[10], &cc[11], &cc[12], &cc[13],
2911 		    &cc[14], &cc[15], &cc[16], &cc[17]);
2912 
2913 		if (i == 22) {
2914 			stored_syscon_termios.c_iflag = iflags;
2915 			stored_syscon_termios.c_oflag = oflags;
2916 			stored_syscon_termios.c_cflag = cflags;
2917 			stored_syscon_termios.c_lflag = lflags;
2918 			for (i = 0; i < 18; i++)
2919 				stored_syscon_termios.c_cc[i] = (char)cc[i];
2920 			valid_format = 1;
2921 		} else if (i == 13) {
2922 		rewind(fp);
2923 		i = fscanf(fp, "%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x",
2924 		    &iflags, &oflags, &cflags, &lflags, &ldisc, &cc[0], &cc[1],
2925 		    &cc[2], &cc[3], &cc[4], &cc[5], &cc[6], &cc[7]);
2926 
2927 		/*
2928 		 * If the file is formatted properly, use the values to
2929 		 * initialize the console terminal condition.
2930 		 */
2931 		stored_syscon_termios.c_iflag = (ushort_t)iflags;
2932 		stored_syscon_termios.c_oflag = (ushort_t)oflags;
2933 		stored_syscon_termios.c_cflag = (ushort_t)cflags;
2934 		stored_syscon_termios.c_lflag = (ushort_t)lflags;
2935 		for (i = 0; i < 8; i++)
2936 			stored_syscon_termios.c_cc[i] = (char)cc[i];
2937 		valid_format = 1;
2938 		}
2939 		(void) fclose(fp);
2940 
2941 		/* If the file is badly formatted, use the default settings. */
2942 		if (!valid_format)
2943 			stored_syscon_termios = dflt_termios;
2944 	}
2945 
2946 	/* If the file had a bad format, rewrite it later. */
2947 	return (!valid_format);
2948 }
2949 
2950 
2951 static void
2952 write_ioctl_syscon()
2953 {
2954 	FILE *fp;
2955 	int i;
2956 
2957 	(void) unlink(SYSCON);
2958 	(void) link(SYSTTY, SYSCON);
2959 	(void) umask(022);
2960 	fp = fopen(IOCTLSYSCON, "w");
2961 
2962 	(void) fprintf(fp, "%x:%x:%x:%x:0", stored_syscon_termios.c_iflag,
2963 	    stored_syscon_termios.c_oflag, stored_syscon_termios.c_cflag,
2964 	    stored_syscon_termios.c_lflag);
2965 	for (i = 0; i < 8; ++i)
2966 		(void) fprintf(fp, ":%x", stored_syscon_termios.c_cc[i]);
2967 	(void) putc('\n', fp);
2968 
2969 	(void) fflush(fp);
2970 	(void) fsync(fileno(fp));
2971 	(void) fclose(fp);
2972 	(void) umask(cmask);
2973 }
2974 
2975 
2976 /*
2977  * void console(boolean_t, char *, ...)
2978  *   Outputs the requested message to the system console.  Note that the number
2979  *   of arguments passed to console() should be determined by the print format.
2980  *
2981  *   The "prefix" parameter indicates whether or not "INIT: " should precede the
2982  *   message.
2983  *
2984  *   To make sure we write to the console in a sane fashion, we use the modes
2985  *   we keep in stored_syscon_termios (which we read out of /etc/ioctl.syscon).
2986  *   Afterwards we restore whatever modes were already there.
2987  */
2988 /* PRINTFLIKE2 */
2989 static void
2990 console(boolean_t prefix, char *format, ...)
2991 {
2992 	char	outbuf[BUFSIZ];
2993 	va_list	args;
2994 	int fd, getret;
2995 	struct termios old_syscon_termios;
2996 	FILE *f;
2997 
2998 	/*
2999 	 * We open SYSCON anew each time in case it has changed (see
3000 	 * userinit()).
3001 	 */
3002 	if ((fd = open(SYSCON, O_RDWR | O_NOCTTY)) < 0 ||
3003 	    (f = fdopen(fd, "r+")) == NULL) {
3004 		if (prefix)
3005 			syslog(LOG_WARNING, "INIT: ");
3006 		va_start(args, format);
3007 		vsyslog(LOG_WARNING, format, args);
3008 		va_end(args);
3009 		if (fd >= 0)
3010 			(void) close(fd);
3011 		return;
3012 	}
3013 	setbuf(f, &outbuf[0]);
3014 
3015 	getret = tcgetattr(fd, &old_syscon_termios);
3016 	old_syscon_termios.c_cflag &= ~HUPCL;
3017 	if (realcon())
3018 		/* Don't overwrite cflag of real console. */
3019 		stored_syscon_termios.c_cflag = old_syscon_termios.c_cflag;
3020 
3021 	stored_syscon_termios.c_cflag &= ~HUPCL;
3022 
3023 	(void) tcsetattr(fd, TCSANOW, &stored_syscon_termios);
3024 
3025 	if (prefix)
3026 		(void) fprintf(f, "\nINIT: ");
3027 	va_start(args, format);
3028 	(void) vfprintf(f, format, args);
3029 	va_end(args);
3030 
3031 	if (getret == 0)
3032 		(void) tcsetattr(fd, TCSADRAIN, &old_syscon_termios);
3033 
3034 	(void) fclose(f);
3035 }
3036 
3037 /*
3038  * timer() is a substitute for sleep() which uses alarm() and pause().
3039  */
3040 static void
3041 timer(int waitime)
3042 {
3043 	setimer(waitime);
3044 	while (time_up == FALSE)
3045 		(void) pause();
3046 }
3047 
3048 static void
3049 setimer(int timelimit)
3050 {
3051 	alarmclk();
3052 	(void) alarm(timelimit);
3053 	time_up = (timelimit ? FALSE : TRUE);
3054 }
3055 
3056 /*
3057  * Fails with
3058  *   ENOMEM - out of memory
3059  *   ECONNABORTED - repository connection broken
3060  *   EPERM - permission denied
3061  *   EACCES - backend access denied
3062  *   EROFS - backend readonly
3063  */
3064 static int
3065 get_or_add_startd(scf_instance_t *inst)
3066 {
3067 	scf_handle_t *h;
3068 	scf_scope_t *scope = NULL;
3069 	scf_service_t *svc = NULL;
3070 	int ret = 0;
3071 
3072 	h = scf_instance_handle(inst);
3073 
3074 	if (scf_handle_decode_fmri(h, SCF_SERVICE_STARTD, NULL, NULL, inst,
3075 	    NULL, NULL, SCF_DECODE_FMRI_EXACT) == 0)
3076 		return (0);
3077 
3078 	switch (scf_error()) {
3079 	case SCF_ERROR_CONNECTION_BROKEN:
3080 		return (ECONNABORTED);
3081 
3082 	case SCF_ERROR_NOT_FOUND:
3083 		break;
3084 
3085 	case SCF_ERROR_HANDLE_MISMATCH:
3086 	case SCF_ERROR_INVALID_ARGUMENT:
3087 	case SCF_ERROR_CONSTRAINT_VIOLATED:
3088 	default:
3089 		bad_error("scf_handle_decode_fmri", scf_error());
3090 	}
3091 
3092 	/* Make sure we're right, since we're adding piece-by-piece. */
3093 	assert(strcmp(SCF_SERVICE_STARTD,
3094 	    "svc:/system/svc/restarter:default") == 0);
3095 
3096 	if ((scope = scf_scope_create(h)) == NULL ||
3097 	    (svc = scf_service_create(h)) == NULL) {
3098 		ret = ENOMEM;
3099 		goto out;
3100 	}
3101 
3102 get_scope:
3103 	if (scf_handle_get_scope(h, SCF_SCOPE_LOCAL, scope) != 0) {
3104 		switch (scf_error()) {
3105 		case SCF_ERROR_CONNECTION_BROKEN:
3106 			ret = ECONNABORTED;
3107 			goto out;
3108 
3109 		case SCF_ERROR_NOT_FOUND:
3110 			(void) fputs(gettext(
3111 			    "smf(5) repository missing local scope.\n"),
3112 			    stderr);
3113 			exit(1);
3114 			/* NOTREACHED */
3115 
3116 		case SCF_ERROR_HANDLE_MISMATCH:
3117 		case SCF_ERROR_INVALID_ARGUMENT:
3118 		default:
3119 			bad_error("scf_handle_get_scope", scf_error());
3120 		}
3121 	}
3122 
3123 get_svc:
3124 	if (scf_scope_get_service(scope, "system/svc/restarter", svc) != 0) {
3125 		switch (scf_error()) {
3126 		case SCF_ERROR_CONNECTION_BROKEN:
3127 			ret = ECONNABORTED;
3128 			goto out;
3129 
3130 		case SCF_ERROR_DELETED:
3131 			goto get_scope;
3132 
3133 		case SCF_ERROR_NOT_FOUND:
3134 			break;
3135 
3136 		case SCF_ERROR_HANDLE_MISMATCH:
3137 		case SCF_ERROR_INVALID_ARGUMENT:
3138 		case SCF_ERROR_NOT_SET:
3139 		default:
3140 			bad_error("scf_scope_get_service", scf_error());
3141 		}
3142 
3143 add_svc:
3144 		if (scf_scope_add_service(scope, "system/svc/restarter", svc) !=
3145 		    0) {
3146 			switch (scf_error()) {
3147 			case SCF_ERROR_CONNECTION_BROKEN:
3148 				ret = ECONNABORTED;
3149 				goto out;
3150 
3151 			case SCF_ERROR_EXISTS:
3152 				goto get_svc;
3153 
3154 			case SCF_ERROR_PERMISSION_DENIED:
3155 				ret = EPERM;
3156 				goto out;
3157 
3158 			case SCF_ERROR_BACKEND_ACCESS:
3159 				ret = EACCES;
3160 				goto out;
3161 
3162 			case SCF_ERROR_BACKEND_READONLY:
3163 				ret = EROFS;
3164 				goto out;
3165 
3166 			case SCF_ERROR_HANDLE_MISMATCH:
3167 			case SCF_ERROR_INVALID_ARGUMENT:
3168 			case SCF_ERROR_NOT_SET:
3169 			default:
3170 				bad_error("scf_scope_add_service", scf_error());
3171 			}
3172 		}
3173 	}
3174 
3175 get_inst:
3176 	if (scf_service_get_instance(svc, "default", inst) != 0) {
3177 		switch (scf_error()) {
3178 		case SCF_ERROR_CONNECTION_BROKEN:
3179 			ret = ECONNABORTED;
3180 			goto out;
3181 
3182 		case SCF_ERROR_DELETED:
3183 			goto add_svc;
3184 
3185 		case SCF_ERROR_NOT_FOUND:
3186 			break;
3187 
3188 		case SCF_ERROR_HANDLE_MISMATCH:
3189 		case SCF_ERROR_INVALID_ARGUMENT:
3190 		case SCF_ERROR_NOT_SET:
3191 		default:
3192 			bad_error("scf_service_get_instance", scf_error());
3193 		}
3194 
3195 		if (scf_service_add_instance(svc, "default", inst) !=
3196 		    0) {
3197 			switch (scf_error()) {
3198 			case SCF_ERROR_CONNECTION_BROKEN:
3199 				ret = ECONNABORTED;
3200 				goto out;
3201 
3202 			case SCF_ERROR_DELETED:
3203 				goto add_svc;
3204 
3205 			case SCF_ERROR_EXISTS:
3206 				goto get_inst;
3207 
3208 			case SCF_ERROR_PERMISSION_DENIED:
3209 				ret = EPERM;
3210 				goto out;
3211 
3212 			case SCF_ERROR_BACKEND_ACCESS:
3213 				ret = EACCES;
3214 				goto out;
3215 
3216 			case SCF_ERROR_BACKEND_READONLY:
3217 				ret = EROFS;
3218 				goto out;
3219 
3220 			case SCF_ERROR_HANDLE_MISMATCH:
3221 			case SCF_ERROR_INVALID_ARGUMENT:
3222 			case SCF_ERROR_NOT_SET:
3223 			default:
3224 				bad_error("scf_service_add_instance",
3225 				    scf_error());
3226 			}
3227 		}
3228 	}
3229 
3230 	ret = 0;
3231 
3232 out:
3233 	scf_service_destroy(svc);
3234 	scf_scope_destroy(scope);
3235 	return (ret);
3236 }
3237 
3238 /*
3239  * Fails with
3240  *   ECONNABORTED - repository connection broken
3241  *   ECANCELED - the transaction's property group was deleted
3242  */
3243 static int
3244 transaction_add_set(scf_transaction_t *tx, scf_transaction_entry_t *ent,
3245     const char *pname, scf_type_t type)
3246 {
3247 change_type:
3248 	if (scf_transaction_property_change_type(tx, ent, pname, type) == 0)
3249 		return (0);
3250 
3251 	switch (scf_error()) {
3252 	case SCF_ERROR_CONNECTION_BROKEN:
3253 		return (ECONNABORTED);
3254 
3255 	case SCF_ERROR_DELETED:
3256 		return (ECANCELED);
3257 
3258 	case SCF_ERROR_NOT_FOUND:
3259 		goto new;
3260 
3261 	case SCF_ERROR_HANDLE_MISMATCH:
3262 	case SCF_ERROR_INVALID_ARGUMENT:
3263 	case SCF_ERROR_NOT_BOUND:
3264 	case SCF_ERROR_NOT_SET:
3265 	default:
3266 		bad_error("scf_transaction_property_change_type", scf_error());
3267 	}
3268 
3269 new:
3270 	if (scf_transaction_property_new(tx, ent, pname, type) == 0)
3271 		return (0);
3272 
3273 	switch (scf_error()) {
3274 	case SCF_ERROR_CONNECTION_BROKEN:
3275 		return (ECONNABORTED);
3276 
3277 	case SCF_ERROR_DELETED:
3278 		return (ECANCELED);
3279 
3280 	case SCF_ERROR_EXISTS:
3281 		goto change_type;
3282 
3283 	case SCF_ERROR_HANDLE_MISMATCH:
3284 	case SCF_ERROR_INVALID_ARGUMENT:
3285 	case SCF_ERROR_NOT_BOUND:
3286 	case SCF_ERROR_NOT_SET:
3287 	default:
3288 		bad_error("scf_transaction_property_new", scf_error());
3289 		/* NOTREACHED */
3290 	}
3291 }
3292 
3293 static void
3294 scferr(void)
3295 {
3296 	switch (scf_error()) {
3297 	case SCF_ERROR_NO_MEMORY:
3298 		console(B_TRUE, gettext("Out of memory.\n"));
3299 		break;
3300 
3301 	case SCF_ERROR_CONNECTION_BROKEN:
3302 		console(B_TRUE, gettext(
3303 		    "Connection to smf(5) repository server broken.\n"));
3304 		break;
3305 
3306 	case SCF_ERROR_NO_RESOURCES:
3307 		console(B_TRUE, gettext(
3308 		    "smf(5) repository server is out of memory.\n"));
3309 		break;
3310 
3311 	case SCF_ERROR_PERMISSION_DENIED:
3312 		console(B_TRUE, gettext("Insufficient privileges.\n"));
3313 		break;
3314 
3315 	default:
3316 		console(B_TRUE, gettext("libscf error: %s\n"),
3317 		    scf_strerror(scf_error()));
3318 	}
3319 }
3320 
3321 static void
3322 lscf_set_runlevel(char rl)
3323 {
3324 	scf_handle_t *h;
3325 	scf_instance_t *inst = NULL;
3326 	scf_propertygroup_t *pg = NULL;
3327 	scf_transaction_t *tx = NULL;
3328 	scf_transaction_entry_t *ent = NULL;
3329 	scf_value_t *val = NULL;
3330 	char buf[2];
3331 	int r;
3332 
3333 	h = scf_handle_create(SCF_VERSION);
3334 	if (h == NULL) {
3335 		scferr();
3336 		return;
3337 	}
3338 
3339 	if (scf_handle_bind(h) != 0) {
3340 		switch (scf_error()) {
3341 		case SCF_ERROR_NO_SERVER:
3342 			console(B_TRUE,
3343 			    gettext("smf(5) repository server not running.\n"));
3344 			goto bail;
3345 
3346 		default:
3347 			scferr();
3348 			goto bail;
3349 		}
3350 	}
3351 
3352 	if ((inst = scf_instance_create(h)) == NULL ||
3353 	    (pg = scf_pg_create(h)) == NULL ||
3354 	    (val = scf_value_create(h)) == NULL ||
3355 	    (tx = scf_transaction_create(h)) == NULL ||
3356 	    (ent = scf_entry_create(h)) == NULL) {
3357 		scferr();
3358 		goto bail;
3359 	}
3360 
3361 get_inst:
3362 	r = get_or_add_startd(inst);
3363 	switch (r) {
3364 	case 0:
3365 		break;
3366 
3367 	case ENOMEM:
3368 	case ECONNABORTED:
3369 	case EPERM:
3370 	case EACCES:
3371 	case EROFS:
3372 		scferr();
3373 		goto bail;
3374 	default:
3375 		bad_error("get_or_add_startd", r);
3376 	}
3377 
3378 get_pg:
3379 	if (scf_instance_get_pg(inst, SCF_PG_OPTIONS_OVR, pg) != 0) {
3380 		switch (scf_error()) {
3381 		case SCF_ERROR_CONNECTION_BROKEN:
3382 			scferr();
3383 			goto bail;
3384 
3385 		case SCF_ERROR_DELETED:
3386 			goto get_inst;
3387 
3388 		case SCF_ERROR_NOT_FOUND:
3389 			break;
3390 
3391 		case SCF_ERROR_HANDLE_MISMATCH:
3392 		case SCF_ERROR_INVALID_ARGUMENT:
3393 		case SCF_ERROR_NOT_SET:
3394 		default:
3395 			bad_error("scf_instance_get_pg", scf_error());
3396 		}
3397 
3398 add_pg:
3399 		if (scf_instance_add_pg(inst, SCF_PG_OPTIONS_OVR,
3400 		    SCF_PG_OPTIONS_OVR_TYPE, SCF_PG_OPTIONS_OVR_FLAGS, pg) !=
3401 		    0) {
3402 			switch (scf_error()) {
3403 			case SCF_ERROR_CONNECTION_BROKEN:
3404 			case SCF_ERROR_PERMISSION_DENIED:
3405 			case SCF_ERROR_BACKEND_ACCESS:
3406 				scferr();
3407 				goto bail;
3408 
3409 			case SCF_ERROR_DELETED:
3410 				goto get_inst;
3411 
3412 			case SCF_ERROR_EXISTS:
3413 				goto get_pg;
3414 
3415 			case SCF_ERROR_HANDLE_MISMATCH:
3416 			case SCF_ERROR_INVALID_ARGUMENT:
3417 			case SCF_ERROR_NOT_SET:
3418 			default:
3419 				bad_error("scf_instance_add_pg", scf_error());
3420 			}
3421 		}
3422 	}
3423 
3424 	buf[0] = rl;
3425 	buf[1] = '\0';
3426 	r = scf_value_set_astring(val, buf);
3427 	assert(r == 0);
3428 
3429 	for (;;) {
3430 		if (scf_transaction_start(tx, pg) != 0) {
3431 			switch (scf_error()) {
3432 			case SCF_ERROR_CONNECTION_BROKEN:
3433 			case SCF_ERROR_PERMISSION_DENIED:
3434 			case SCF_ERROR_BACKEND_ACCESS:
3435 				scferr();
3436 				goto bail;
3437 
3438 			case SCF_ERROR_DELETED:
3439 				goto add_pg;
3440 
3441 			case SCF_ERROR_HANDLE_MISMATCH:
3442 			case SCF_ERROR_NOT_BOUND:
3443 			case SCF_ERROR_IN_USE:
3444 			case SCF_ERROR_NOT_SET:
3445 			default:
3446 				bad_error("scf_transaction_start", scf_error());
3447 			}
3448 		}
3449 
3450 		r = transaction_add_set(tx, ent, "runlevel", SCF_TYPE_ASTRING);
3451 		switch (r) {
3452 		case 0:
3453 			break;
3454 
3455 		case ECONNABORTED:
3456 			scferr();
3457 			goto bail;
3458 
3459 		case ECANCELED:
3460 			scf_transaction_reset(tx);
3461 			goto add_pg;
3462 
3463 		default:
3464 			bad_error("transaction_add_set", r);
3465 		}
3466 
3467 		r = scf_entry_add_value(ent, val);
3468 		assert(r == 0);
3469 
3470 		r = scf_transaction_commit(tx);
3471 		if (r == 1)
3472 			break;
3473 
3474 		if (r != 0) {
3475 			switch (scf_error()) {
3476 			case SCF_ERROR_CONNECTION_BROKEN:
3477 			case SCF_ERROR_PERMISSION_DENIED:
3478 			case SCF_ERROR_BACKEND_ACCESS:
3479 			case SCF_ERROR_BACKEND_READONLY:
3480 				scferr();
3481 				goto bail;
3482 
3483 			case SCF_ERROR_DELETED:
3484 				scf_transaction_reset(tx);
3485 				goto add_pg;
3486 
3487 			case SCF_ERROR_INVALID_ARGUMENT:
3488 			case SCF_ERROR_NOT_BOUND:
3489 			case SCF_ERROR_NOT_SET:
3490 			default:
3491 				bad_error("scf_transaction_commit",
3492 				    scf_error());
3493 			}
3494 		}
3495 
3496 		scf_transaction_reset(tx);
3497 		(void) scf_pg_update(pg);
3498 	}
3499 
3500 bail:
3501 	scf_transaction_destroy(tx);
3502 	scf_entry_destroy(ent);
3503 	scf_value_destroy(val);
3504 	scf_pg_destroy(pg);
3505 	scf_instance_destroy(inst);
3506 
3507 	(void) scf_handle_unbind(h);
3508 	scf_handle_destroy(h);
3509 }
3510 
3511 /*
3512  * Function to handle requests from users to main init running as process 1.
3513  */
3514 static void
3515 userinit(int argc, char **argv)
3516 {
3517 	FILE	*fp;
3518 	char	*ln;
3519 	int	init_signal;
3520 	struct stat	sconbuf, conbuf;
3521 	const char *usage_msg = "Usage: init [0123456SsQqabc]\n";
3522 
3523 	/*
3524 	 * We are a user invoked init.  Is there an argument and is it
3525 	 * a single character?  If not, print usage message and quit.
3526 	 */
3527 	if (argc != 2 || argv[1][1] != '\0') {
3528 		(void) fprintf(stderr, usage_msg);
3529 		exit(0);
3530 	}
3531 
3532 	if ((init_signal = lvlname_to_state((char)argv[1][0])) == -1) {
3533 		(void) fprintf(stderr, usage_msg);
3534 		(void) audit_put_record(ADT_FAILURE, ADT_FAIL_VALUE_BAD_CMD,
3535 		    argv[1]);
3536 		exit(1);
3537 	}
3538 
3539 	if (init_signal == SINGLE_USER) {
3540 		/*
3541 		 * Make sure this process is talking to a legal tty line
3542 		 * and that /dev/syscon is linked to this line.
3543 		 */
3544 		ln = ttyname(0);	/* Get the name of tty */
3545 		if (ln == NULL) {
3546 			(void) fprintf(stderr,
3547 			    "Standard input not a tty line\n");
3548 			(void) audit_put_record(ADT_FAILURE,
3549 			    ADT_FAIL_VALUE_BAD_TTY, argv[1]);
3550 			exit(1);
3551 		}
3552 
3553 		if ((stat(ln, &sconbuf) != -1) &&
3554 		    (stat(SYSCON, &conbuf) == -1 ||
3555 		    sconbuf.st_rdev != conbuf.st_rdev)) {
3556 			/*
3557 			 * /dev/syscon needs to change.
3558 			 * Unlink /dev/syscon and relink it to the current line.
3559 			 */
3560 			if (lstat(SYSCON, &conbuf) != -1 &&
3561 			    unlink(SYSCON) == FAILURE) {
3562 				perror("Can't unlink /dev/syscon");
3563 				(void) fprintf(stderr,
3564 				    "Run command on the system console.\n");
3565 				(void) audit_put_record(ADT_FAILURE,
3566 				    ADT_FAIL_VALUE_PROGRAM, argv[1]);
3567 				exit(1);
3568 			}
3569 			if (symlink(ln, SYSCON) == FAILURE) {
3570 				(void) fprintf(stderr,
3571 				    "Can't symlink /dev/syscon to %s: %s", ln,
3572 				    strerror(errno));
3573 
3574 				/* Try to leave a syscon */
3575 				(void) link(SYSTTY, SYSCON);
3576 				(void) audit_put_record(ADT_FAILURE,
3577 				    ADT_FAIL_VALUE_PROGRAM, argv[1]);
3578 				exit(1);
3579 			}
3580 
3581 			/*
3582 			 * Try to leave a message on system console saying where
3583 			 * /dev/syscon is currently connected.
3584 			 */
3585 			if ((fp = fopen(SYSTTY, "r+")) != NULL) {
3586 				(void) fprintf(fp,
3587 				    "\n****	SYSCON CHANGED TO %s	****\n",
3588 				    ln);
3589 				(void) fclose(fp);
3590 			}
3591 		}
3592 	}
3593 
3594 	update_boot_archive(init_signal);
3595 
3596 	(void) audit_put_record(ADT_SUCCESS, ADT_SUCCESS, argv[1]);
3597 
3598 	/*
3599 	 * Signal init; init will take care of telling svc.startd.
3600 	 */
3601 	if (kill(init_pid, init_signal) == FAILURE) {
3602 		(void) fprintf(stderr, "Must be super-user\n");
3603 		(void) audit_put_record(ADT_FAILURE,
3604 		    ADT_FAIL_VALUE_AUTH, argv[1]);
3605 		exit(1);
3606 	}
3607 
3608 	exit(0);
3609 }
3610 
3611 
3612 #define	DELTA	25	/* Number of pidlist elements to allocate at a time */
3613 
3614 /* ARGSUSED */
3615 void
3616 sigpoll(int n)
3617 {
3618 	struct pidrec prec;
3619 	struct pidrec *p = &prec;
3620 	struct pidlist *plp;
3621 	struct pidlist *tp, *savetp;
3622 	int i;
3623 
3624 	if (Pfd < 0) {
3625 		return;
3626 	}
3627 
3628 	for (;;) {
3629 		/*
3630 		 * Important Note: Either read will really fail (in which case
3631 		 * return is all we can do) or will get EAGAIN (Pfd was opened
3632 		 * O_NDELAY), in which case we also want to return.
3633 		 * Always return from here!
3634 		 */
3635 		if (read(Pfd, p, sizeof (struct pidrec)) !=
3636 						sizeof (struct pidrec)) {
3637 			return;
3638 		}
3639 		switch (p->pd_type) {
3640 
3641 		case ADDPID:
3642 			/*
3643 			 * New "godchild", add to list.
3644 			 */
3645 			if (Plfree == NULL) {
3646 				plp = (struct pidlist *)calloc(DELTA,
3647 				    sizeof (struct pidlist));
3648 				if (plp == NULL) {
3649 					/* Can't save pid */
3650 					break;
3651 				}
3652 				/*
3653 				 * Point at 2nd record allocated, we'll use plp.
3654 				 */
3655 				tp = plp + 1;
3656 				/*
3657 				 * Link them into a chain.
3658 				 */
3659 				Plfree = tp;
3660 				for (i = 0; i < DELTA - 2; i++) {
3661 					tp->pl_next = tp + 1;
3662 					tp++;
3663 				}
3664 			} else {
3665 				plp = Plfree;
3666 				Plfree = plp->pl_next;
3667 			}
3668 			plp->pl_pid = p->pd_pid;
3669 			plp->pl_dflag = 0;
3670 			plp->pl_next = NULL;
3671 			/*
3672 			 * Note - pid list is kept in increasing order of pids.
3673 			 */
3674 			if (Plhead == NULL) {
3675 				Plhead = plp;
3676 				/* Back up to read next record */
3677 				break;
3678 			} else {
3679 				savetp = tp = Plhead;
3680 				while (tp) {
3681 					if (plp->pl_pid > tp->pl_pid) {
3682 						savetp = tp;
3683 						tp = tp->pl_next;
3684 						continue;
3685 					} else if (plp->pl_pid < tp->pl_pid) {
3686 						if (tp == Plhead) {
3687 							plp->pl_next = Plhead;
3688 							Plhead = plp;
3689 						} else {
3690 							plp->pl_next =
3691 							    savetp->pl_next;
3692 							savetp->pl_next = plp;
3693 						}
3694 						break;
3695 					} else {
3696 						/* Already in list! */
3697 						plp->pl_next = Plfree;
3698 						Plfree = plp;
3699 						break;
3700 					}
3701 				}
3702 				if (tp == NULL) {
3703 					/* Add to end of list */
3704 					savetp->pl_next = plp;
3705 				}
3706 			}
3707 			/* Back up to read next record. */
3708 			break;
3709 
3710 		case REMPID:
3711 			/*
3712 			 * This one was handled by someone else,
3713 			 * purge it from the list.
3714 			 */
3715 			if (Plhead == NULL) {
3716 				/* Back up to read next record. */
3717 				break;
3718 			}
3719 			savetp = tp = Plhead;
3720 			while (tp) {
3721 				if (p->pd_pid > tp->pl_pid) {
3722 					/* Keep on looking. */
3723 					savetp = tp;
3724 					tp = tp->pl_next;
3725 					continue;
3726 				} else if (p->pd_pid < tp->pl_pid) {
3727 					/* Not in list. */
3728 					break;
3729 				} else {
3730 					/* Found it. */
3731 					if (tp == Plhead)
3732 						Plhead = tp->pl_next;
3733 					else
3734 						savetp->pl_next = tp->pl_next;
3735 					tp->pl_next = Plfree;
3736 					Plfree = tp;
3737 					break;
3738 				}
3739 			}
3740 			/* Back up to read next record. */
3741 			break;
3742 		default:
3743 			console(B_TRUE, "Bad message on initpipe\n");
3744 			break;
3745 		}
3746 	}
3747 }
3748 
3749 
3750 static void
3751 cleanaux()
3752 {
3753 	struct pidlist *savep, *p;
3754 	pid_t	pid;
3755 	short	status;
3756 
3757 	(void) sighold(SIGCLD);
3758 	Gchild = 0;	/* Note - Safe to do this here since no SIGCLDs */
3759 	(void) sighold(SIGPOLL);
3760 	savep = p = Plhead;
3761 	while (p) {
3762 		if (p->pl_dflag) {
3763 			/*
3764 			 * Found an entry to delete,
3765 			 * remove it from list first.
3766 			 */
3767 			pid = p->pl_pid;
3768 			status = p->pl_exit;
3769 			if (p == Plhead) {
3770 				Plhead = p->pl_next;
3771 				p->pl_next = Plfree;
3772 				Plfree = p;
3773 				savep = p = Plhead;
3774 			} else {
3775 				savep->pl_next = p->pl_next;
3776 				p->pl_next = Plfree;
3777 				Plfree = p;
3778 				p = savep->pl_next;
3779 			}
3780 			clearent(pid, status);
3781 			continue;
3782 		}
3783 		savep = p;
3784 		p = p->pl_next;
3785 	}
3786 	(void) sigrelse(SIGPOLL);
3787 	(void) sigrelse(SIGCLD);
3788 }
3789 
3790 
3791 /*
3792  * /etc/inittab has more entries and we have run out of room in the proc_table
3793  * array. Double the size of proc_table to accomodate the extra entries.
3794  */
3795 static void
3796 increase_proc_table_size()
3797 {
3798 	sigset_t block, unblock;
3799 	void *ptr;
3800 	size_t delta = num_proc * sizeof (struct PROC_TABLE);
3801 
3802 
3803 	/*
3804 	 * Block signals for realloc.
3805 	 */
3806 	(void) sigfillset(&block);
3807 	(void) sigprocmask(SIG_BLOCK, &block, &unblock);
3808 
3809 
3810 	/*
3811 	 * On failure we just return because callers of this function check
3812 	 * for failure.
3813 	 */
3814 	do
3815 		ptr = realloc(g_state, g_state_sz + delta);
3816 	while (ptr == NULL && errno == EAGAIN)
3817 		;
3818 
3819 	if (ptr != NULL) {
3820 		/* ensure that the new part is initialized to zero */
3821 		bzero((caddr_t)ptr + g_state_sz, delta);
3822 
3823 		g_state = ptr;
3824 		g_state_sz += delta;
3825 		num_proc <<= 1;
3826 	}
3827 
3828 
3829 	/* unblock our signals before returning */
3830 	(void) sigprocmask(SIG_SETMASK, &unblock, NULL);
3831 }
3832 
3833 
3834 
3835 /*
3836  * Sanity check g_state.
3837  */
3838 static int
3839 st_sane()
3840 {
3841 	int i;
3842 	struct PROC_TABLE *ptp;
3843 
3844 
3845 	/* Note: cur_state is encoded as a signal number */
3846 	if (cur_state < 1 || cur_state == 9 || cur_state > 13)
3847 		return (0);
3848 
3849 	/* Check num_proc */
3850 	if (g_state_sz != sizeof (struct init_state) + (num_proc - 1) *
3851 	    sizeof (struct PROC_TABLE))
3852 		return (0);
3853 
3854 	/* Check proc_table */
3855 	for (i = 0, ptp = proc_table; i < num_proc; ++i, ++ptp) {
3856 		/* skip unoccupied entries */
3857 		if (!(ptp->p_flags & OCCUPIED))
3858 			continue;
3859 
3860 		/* p_flags has no bits outside of PF_MASK */
3861 		if (ptp->p_flags & ~(PF_MASK))
3862 			return (0);
3863 
3864 		/* 5 <= pid <= MAXPID */
3865 		if (ptp->p_pid < 5 || ptp->p_pid > MAXPID)
3866 			return (0);
3867 
3868 		/* p_count >= 0 */
3869 		if (ptp->p_count < 0)
3870 			return (0);
3871 
3872 		/* p_time >= 0 */
3873 		if (ptp->p_time < 0)
3874 			return (0);
3875 	}
3876 
3877 	return (1);
3878 }
3879 
3880 /*
3881  * Initialize our state.
3882  *
3883  * If the system just booted, then init_state_file, which is located on an
3884  * everpresent tmpfs filesystem, should not exist.
3885  *
3886  * If we were restarted, then init_state_file should exist, in
3887  * which case we'll read it in, sanity check it, and use it.
3888  *
3889  * Note: You can't call console() until proc_table is ready.
3890  */
3891 void
3892 st_init()
3893 {
3894 	struct stat stb;
3895 	int ret, st_fd, insane = 0;
3896 	size_t to_be_read;
3897 	char *ptr;
3898 
3899 
3900 	booting = 1;
3901 
3902 	do {
3903 		/*
3904 		 * If we can exclusively create the file, then we're the
3905 		 * initial invocation of init(1M).
3906 		 */
3907 		st_fd = open(init_state_file, O_RDWR | O_CREAT | O_EXCL,
3908 		    S_IRUSR | S_IWUSR);
3909 	} while (st_fd == -1 && errno == EINTR);
3910 	if (st_fd != -1)
3911 		goto new_state;
3912 
3913 	booting = 0;
3914 
3915 	do {
3916 		st_fd = open(init_state_file, O_RDWR, S_IRUSR | S_IWUSR);
3917 	} while (st_fd == -1 && errno == EINTR);
3918 	if (st_fd == -1)
3919 		goto new_state;
3920 
3921 	/* Get the size of the file. */
3922 	do
3923 		ret = fstat(st_fd, &stb);
3924 	while (ret == -1 && errno == EINTR)
3925 		;
3926 	if (ret == -1)
3927 		goto new_state;
3928 
3929 	do
3930 		g_state = malloc(stb.st_size);
3931 	while (g_state == NULL && errno == EAGAIN)
3932 		;
3933 	if (g_state == NULL)
3934 		goto new_state;
3935 
3936 	to_be_read = stb.st_size;
3937 	ptr = (char *)g_state;
3938 	while (to_be_read > 0) {
3939 		ssize_t read_ret;
3940 
3941 		read_ret = read(st_fd, ptr, to_be_read);
3942 		if (read_ret < 0) {
3943 			if (errno == EINTR)
3944 				continue;
3945 
3946 			goto new_state;
3947 		}
3948 
3949 		to_be_read -= read_ret;
3950 		ptr += read_ret;
3951 	}
3952 
3953 	(void) close(st_fd);
3954 
3955 	g_state_sz = stb.st_size;
3956 
3957 	if (st_sane()) {
3958 		console(B_TRUE, "Restarting.\n");
3959 		return;
3960 	}
3961 
3962 	insane = 1;
3963 
3964 new_state:
3965 	if (st_fd >= 0)
3966 		(void) close(st_fd);
3967 	else
3968 		(void) unlink(init_state_file);
3969 
3970 	if (g_state != NULL)
3971 		free(g_state);
3972 
3973 	/* Something went wrong, so allocate new state. */
3974 	g_state_sz = sizeof (struct init_state) +
3975 	    ((init_num_proc - 1) * sizeof (struct PROC_TABLE));
3976 	do
3977 		g_state = calloc(1, g_state_sz);
3978 	while (g_state == NULL && errno == EAGAIN)
3979 		;
3980 	if (g_state == NULL) {
3981 		/* Fatal error! */
3982 		exit(errno);
3983 	}
3984 
3985 	g_state->ist_runlevel = -1;
3986 	num_proc = init_num_proc;
3987 
3988 	if (!booting) {
3989 		console(B_TRUE, "Restarting.\n");
3990 
3991 		/* Overwrite the bad state file. */
3992 		st_write();
3993 
3994 		if (!insane) {
3995 			console(B_TRUE,
3996 			    "Error accessing persistent state file `%s'.  "
3997 			    "Ignored.\n", init_state_file);
3998 		} else {
3999 			console(B_TRUE,
4000 			    "Persistent state file `%s' is invalid and was "
4001 			    "ignored.\n", init_state_file);
4002 		}
4003 	}
4004 }
4005 
4006 /*
4007  * Write g_state out to the state file.
4008  */
4009 void
4010 st_write()
4011 {
4012 	static int complained = 0;
4013 
4014 	int st_fd;
4015 	char *cp;
4016 	size_t sz;
4017 	ssize_t ret;
4018 
4019 
4020 	do {
4021 		st_fd = open(init_next_state_file,
4022 		    O_WRONLY | O_CREAT | O_TRUNC, S_IRUSR | S_IWUSR);
4023 	} while (st_fd < 0 && errno == EINTR);
4024 	if (st_fd < 0)
4025 		goto err;
4026 
4027 	cp = (char *)g_state;
4028 	sz = g_state_sz;
4029 	while (sz > 0) {
4030 		ret = write(st_fd, cp, sz);
4031 		if (ret < 0) {
4032 			if (errno == EINTR)
4033 				continue;
4034 
4035 			goto err;
4036 		}
4037 
4038 		sz -= ret;
4039 		cp += ret;
4040 	}
4041 
4042 	(void) close(st_fd);
4043 	st_fd = -1;
4044 	if (rename(init_next_state_file, init_state_file)) {
4045 		(void) unlink(init_next_state_file);
4046 		goto err;
4047 	}
4048 	complained = 0;
4049 
4050 	return;
4051 
4052 err:
4053 	if (st_fd >= 0)
4054 		(void) close(st_fd);
4055 
4056 	if (!booting && !complained) {
4057 		/*
4058 		 * Only complain after the filesystem should have come up.
4059 		 * And only do it once so we don't loop between console()
4060 		 * & efork().
4061 		 */
4062 		complained = 1;
4063 		if (st_fd)
4064 			console(B_TRUE, "Couldn't write persistent state "
4065 			    "file `%s'.\n", init_state_file);
4066 		else
4067 			console(B_TRUE, "Couldn't move persistent state "
4068 			    "file `%s' to `%s'.\n", init_next_state_file,
4069 			    init_state_file);
4070 	}
4071 }
4072 
4073 /*
4074  * Create a contract with these parameters.
4075  */
4076 static int
4077 contract_make_template(uint_t info, uint_t critical, uint_t fatal,
4078     uint64_t cookie)
4079 {
4080 	int fd, err;
4081 
4082 	char *ioctl_tset_emsg =
4083 	    "Couldn't set \"%s\" contract template parameter: %s.\n";
4084 
4085 	do
4086 		fd = open64(CTFS_ROOT "/process/template", O_RDWR);
4087 	while (fd < 0 && errno == EINTR)
4088 		;
4089 	if (fd < 0) {
4090 		console(B_TRUE, "Couldn't create process template: %s.\n",
4091 		    strerror(errno));
4092 		return (-1);
4093 	}
4094 
4095 	if (err = ct_pr_tmpl_set_param(fd, CT_PR_INHERIT | CT_PR_REGENT))
4096 		console(B_TRUE, "Contract set template inherit, regent "
4097 		    "failed: %s.\n", strerror(err));
4098 
4099 	/*
4100 	 * These errors result in a misconfigured template, which is better
4101 	 * than no template at all, so warn but don't abort.
4102 	 */
4103 	if (err = ct_tmpl_set_informative(fd, info))
4104 		console(B_TRUE, ioctl_tset_emsg, "informative", strerror(err));
4105 
4106 	if (err = ct_tmpl_set_critical(fd, critical))
4107 		console(B_TRUE, ioctl_tset_emsg, "critical", strerror(err));
4108 
4109 	if (err = ct_pr_tmpl_set_fatal(fd, fatal))
4110 		console(B_TRUE, ioctl_tset_emsg, "fatal", strerror(err));
4111 
4112 	if (err = ct_tmpl_set_cookie(fd, cookie))
4113 		console(B_TRUE, ioctl_tset_emsg, "cookie", strerror(err));
4114 
4115 	(void) fcntl(fd, F_SETFD, FD_CLOEXEC);
4116 
4117 	return (fd);
4118 }
4119 
4120 /*
4121  * Create the templates and open an event file descriptor.  We use dup2(2) to
4122  * get these descriptors away from the stdin/stdout/stderr group.
4123  */
4124 static void
4125 contracts_init()
4126 {
4127 	int err, fd;
4128 
4129 	/*
4130 	 * Create & configure a legacy template.  We only want empty events so
4131 	 * we know when to abandon them.
4132 	 */
4133 	legacy_tmpl = contract_make_template(0, CT_PR_EV_EMPTY, CT_PR_EV_HWERR,
4134 	    ORDINARY_COOKIE);
4135 	if (legacy_tmpl >= 0) {
4136 		err = ct_tmpl_activate(legacy_tmpl);
4137 		if (err != 0) {
4138 			(void) close(legacy_tmpl);
4139 			legacy_tmpl = -1;
4140 			console(B_TRUE,
4141 			    "Couldn't activate legacy template (%s); "
4142 			    "legacy services will be in init's contract.\n",
4143 			    strerror(err));
4144 		}
4145 	} else
4146 		console(B_TRUE,
4147 		    "Legacy services will be in init's contract.\n");
4148 
4149 	if (dup2(legacy_tmpl, 255) == -1) {
4150 		console(B_TRUE, "Could not duplicate legacy template: %s.\n",
4151 		    strerror(errno));
4152 	} else {
4153 		(void) close(legacy_tmpl);
4154 		legacy_tmpl = 255;
4155 	}
4156 
4157 	(void) fcntl(legacy_tmpl, F_SETFD, FD_CLOEXEC);
4158 
4159 	startd_tmpl = contract_make_template(0, CT_PR_EV_EMPTY,
4160 	    CT_PR_EV_HWERR | CT_PR_EV_SIGNAL | CT_PR_EV_CORE, STARTD_COOKIE);
4161 
4162 	if (dup2(startd_tmpl, 254) == -1) {
4163 		console(B_TRUE, "Could not duplicate startd template: %s.\n",
4164 		    strerror(errno));
4165 	} else {
4166 		(void) close(startd_tmpl);
4167 		startd_tmpl = 254;
4168 	}
4169 
4170 	(void) fcntl(startd_tmpl, F_SETFD, FD_CLOEXEC);
4171 
4172 	if (legacy_tmpl < 0 && startd_tmpl < 0) {
4173 		/* The creation errors have already been reported. */
4174 		console(B_TRUE,
4175 		    "Ignoring contract events.  Core smf(5) services will not "
4176 		    "be restarted.\n");
4177 		return;
4178 	}
4179 
4180 	/*
4181 	 * Open an event endpoint.
4182 	 */
4183 	do
4184 		fd = open64(CTFS_ROOT "/process/pbundle", O_RDONLY);
4185 	while (fd < 0 && errno == EINTR)
4186 		;
4187 	if (fd < 0) {
4188 		console(B_TRUE,
4189 		    "Couldn't open process pbundle: %s.  Core smf(5) services "
4190 		    "will not be restarted.\n", strerror(errno));
4191 		return;
4192 	}
4193 
4194 	if (dup2(fd, 253) == -1) {
4195 		console(B_TRUE, "Could not duplicate process bundle: %s.\n",
4196 		    strerror(errno));
4197 	} else {
4198 		(void) close(fd);
4199 		fd = 253;
4200 	}
4201 
4202 	(void) fcntl(fd, F_SETFD, FD_CLOEXEC);
4203 
4204 	/* Reset in case we've been restarted. */
4205 	(void) ct_event_reset(fd);
4206 
4207 	poll_fds[0].fd = fd;
4208 	poll_fds[0].events = POLLIN;
4209 	poll_nfds = 1;
4210 }
4211 
4212 static int
4213 contract_getfile(ctid_t id, const char *name, int oflag)
4214 {
4215 	int fd;
4216 
4217 	do
4218 		fd = contract_open(id, "process", name, oflag);
4219 	while (fd < 0 && errno == EINTR)
4220 		;
4221 
4222 	if (fd < 0)
4223 		console(B_TRUE, "Couldn't open %s for contract %ld: %s.\n",
4224 		    name, id, strerror(errno));
4225 
4226 	return (fd);
4227 }
4228 
4229 static int
4230 contract_cookie(ctid_t id, uint64_t *cp)
4231 {
4232 	int fd, err;
4233 	ct_stathdl_t sh;
4234 
4235 	fd = contract_getfile(id, "status", O_RDONLY);
4236 	if (fd < 0)
4237 		return (-1);
4238 
4239 	err = ct_status_read(fd, CTD_COMMON, &sh);
4240 	if (err != 0) {
4241 		console(B_TRUE, "Couldn't read status of contract %ld: %s.\n",
4242 		    id, strerror(err));
4243 		(void) close(fd);
4244 		return (-1);
4245 	}
4246 
4247 	(void) close(fd);
4248 
4249 	*cp = ct_status_get_cookie(sh);
4250 
4251 	ct_status_free(sh);
4252 	return (0);
4253 }
4254 
4255 static void
4256 contract_ack(ct_evthdl_t e)
4257 {
4258 	int fd;
4259 
4260 	if (ct_event_get_flags(e) & CTE_INFO)
4261 		return;
4262 
4263 	fd = contract_getfile(ct_event_get_ctid(e), "ctl", O_WRONLY);
4264 	if (fd < 0)
4265 		return;
4266 
4267 	(void) ct_ctl_ack(fd, ct_event_get_evid(e));
4268 	(void) close(fd);
4269 }
4270 
4271 /*
4272  * Process a contract event.
4273  */
4274 static void
4275 contract_event(struct pollfd *poll)
4276 {
4277 	ct_evthdl_t e;
4278 	int err;
4279 	ctid_t ctid;
4280 
4281 	if (!(poll->revents & POLLIN)) {
4282 		if (poll->revents & POLLERR)
4283 			console(B_TRUE,
4284 			    "Unknown poll error on my process contract "
4285 			    "pbundle.\n");
4286 		return;
4287 	}
4288 
4289 	err = ct_event_read(poll->fd, &e);
4290 	if (err != 0) {
4291 		console(B_TRUE, "Error retrieving contract event: %s.\n",
4292 		    strerror(err));
4293 		return;
4294 	}
4295 
4296 	ctid = ct_event_get_ctid(e);
4297 
4298 	if (ct_event_get_type(e) == CT_PR_EV_EMPTY) {
4299 		uint64_t cookie;
4300 		int ret, abandon = 1;
4301 
4302 		/* If it's svc.startd, restart it.  Else, abandon. */
4303 		ret = contract_cookie(ctid, &cookie);
4304 
4305 		if (ret == 0) {
4306 			if (cookie == STARTD_COOKIE &&
4307 			    do_restart_startd) {
4308 				if (smf_debug)
4309 					console(B_TRUE, "Restarting "
4310 					    "svc.startd.\n");
4311 
4312 				/*
4313 				 * Account for the failure.  If the failure rate
4314 				 * exceeds a threshold, then drop to maintenance
4315 				 * mode.
4316 				 */
4317 				startd_record_failure();
4318 				if (startd_failure_rate_critical())
4319 					enter_maintenance();
4320 
4321 				if (startd_tmpl < 0)
4322 					console(B_TRUE,
4323 					    "Restarting svc.startd in "
4324 					    "improper contract (bad "
4325 					    "template).\n");
4326 
4327 				(void) startd_run(startd_cline, startd_tmpl,
4328 				    ctid);
4329 
4330 				abandon = 0;
4331 			}
4332 		}
4333 
4334 		if (abandon && (err = contract_abandon_id(ctid))) {
4335 			console(B_TRUE, "Couldn't abandon contract %ld: %s.\n",
4336 			    ctid, strerror(err));
4337 		}
4338 
4339 		/*
4340 		 * No need to acknowledge the event since either way the
4341 		 * originating contract should be abandoned.
4342 		 */
4343 	} else {
4344 		console(B_TRUE,
4345 		    "Received contract event of unexpected type %d from "
4346 		    "contract %ld.\n", ct_event_get_type(e), ctid);
4347 
4348 		if ((ct_event_get_flags(e) & (CTE_INFO | CTE_ACK)) == 0)
4349 			/* Allow unexpected critical events to be released. */
4350 			contract_ack(e);
4351 	}
4352 
4353 	ct_event_free(e);
4354 }
4355 
4356 /*
4357  * svc.startd(1M) Management
4358  */
4359 
4360 /*
4361  * (Re)start svc.startd(1M).  old_ctid should be the contract ID of the old
4362  * contract, or 0 if we're starting it for the first time.  If wait is true
4363  * we'll wait for and return the exit value of the child.
4364  */
4365 static int
4366 startd_run(const char *cline, int tmpl, ctid_t old_ctid)
4367 {
4368 	int err, i, ret, did_activate;
4369 	pid_t pid;
4370 	struct stat sb;
4371 
4372 	if (cline[0] == '\0')
4373 		return (-1);
4374 
4375 	/*
4376 	 * Don't restart startd if the system is rebooting or shutting down.
4377 	 */
4378 	do {
4379 		ret = stat("/etc/svc/volatile/resetting", &sb);
4380 	} while (ret == -1 && errno == EINTR);
4381 
4382 	if (ret == 0) {
4383 		if (smf_debug)
4384 			console(B_TRUE, "Quiescing for reboot.\n");
4385 		(void) pause();
4386 		return (-1);
4387 	}
4388 
4389 	err = ct_pr_tmpl_set_transfer(tmpl, old_ctid);
4390 	if (err == EINVAL) {
4391 		console(B_TRUE, "Remake startd_tmpl; reattempt transfer.\n");
4392 		tmpl = startd_tmpl = contract_make_template(0, CT_PR_EV_EMPTY,
4393 		    CT_PR_EV_HWERR, STARTD_COOKIE);
4394 
4395 		err = ct_pr_tmpl_set_transfer(tmpl, old_ctid);
4396 	}
4397 	if (err != 0) {
4398 		console(B_TRUE,
4399 		    "Couldn't set transfer parameter of contract template: "
4400 		    "%s.\n", strerror(err));
4401 	}
4402 
4403 	if ((err = ct_pr_tmpl_set_svc_fmri(startd_tmpl,
4404 	    SCF_SERVICE_STARTD)) != 0)
4405 		console(B_TRUE,
4406 		    "Can not set svc_fmri in contract template: %s\n",
4407 		    strerror(err));
4408 	if ((err = ct_pr_tmpl_set_svc_aux(startd_tmpl,
4409 	    startd_svc_aux)) != 0)
4410 		console(B_TRUE,
4411 		    "Can not set svc_aux in contract template: %s\n",
4412 		    strerror(err));
4413 	did_activate = !(ct_tmpl_activate(tmpl));
4414 	if (!did_activate)
4415 		console(B_TRUE,
4416 		    "Template activation failed; not starting \"%s\" in "
4417 		    "proper contract.\n", cline);
4418 
4419 	/* Hold SIGCLD so we can wait if necessary. */
4420 	(void) sighold(SIGCLD);
4421 
4422 	while ((pid = fork()) < 0) {
4423 		if (errno == EPERM) {
4424 			console(B_TRUE, "Insufficient permission to fork.\n");
4425 
4426 			/* Now that's a doozy. */
4427 			exit(1);
4428 		}
4429 
4430 		console(B_TRUE,
4431 		    "fork() for svc.startd failed: %s.  Will retry in 1 "
4432 		    "second...\n", strerror(errno));
4433 
4434 		(void) sleep(1);
4435 
4436 		/* Eventually give up? */
4437 	}
4438 
4439 	if (pid == 0) {
4440 		/* child */
4441 
4442 		/* See the comment in efork() */
4443 		for (i = SIGHUP; i <= SIGRTMAX; ++i) {
4444 			if (i == SIGTTOU || i == SIGTTIN || i == SIGTSTP)
4445 				(void) sigset(i, SIG_IGN);
4446 			else
4447 				(void) sigset(i, SIG_DFL);
4448 		}
4449 
4450 		if (smf_options != NULL) {
4451 			/* Put smf_options in the environment. */
4452 			glob_envp[glob_envn] =
4453 			    malloc(sizeof ("SMF_OPTIONS=") - 1 +
4454 			    strlen(smf_options) + 1);
4455 
4456 			if (glob_envp[glob_envn] != NULL) {
4457 				/* LINTED */
4458 				(void) sprintf(glob_envp[glob_envn],
4459 				    "SMF_OPTIONS=%s", smf_options);
4460 				glob_envp[glob_envn+1] = NULL;
4461 			} else {
4462 				console(B_TRUE,
4463 				    "Could not set SMF_OPTIONS (%s).\n",
4464 				    strerror(errno));
4465 			}
4466 		}
4467 
4468 		if (smf_debug)
4469 			console(B_TRUE, "Executing svc.startd\n");
4470 
4471 		(void) execle(SH, "INITSH", "-c", cline, NULL, glob_envp);
4472 
4473 		console(B_TRUE, "Could not exec \"%s\" (%s).\n", SH,
4474 		    strerror(errno));
4475 
4476 		exit(1);
4477 	}
4478 
4479 	/* parent */
4480 
4481 	if (did_activate) {
4482 		if (legacy_tmpl < 0 || ct_tmpl_activate(legacy_tmpl) != 0)
4483 			(void) ct_tmpl_clear(tmpl);
4484 	}
4485 
4486 	/* Clear the old_ctid reference so the kernel can reclaim it. */
4487 	if (old_ctid != 0)
4488 		(void) ct_pr_tmpl_set_transfer(tmpl, 0);
4489 
4490 	(void) sigrelse(SIGCLD);
4491 
4492 	return (0);
4493 }
4494 
4495 /*
4496  * void startd_record_failure(void)
4497  *   Place the current time in our circular array of svc.startd failures.
4498  */
4499 void
4500 startd_record_failure()
4501 {
4502 	int index = startd_failure_index++ % NSTARTD_FAILURE_TIMES;
4503 
4504 	startd_failure_time[index] = gethrtime();
4505 }
4506 
4507 /*
4508  * int startd_failure_rate_critical(void)
4509  *   Return true if the average failure interval is less than the permitted
4510  *   interval.  Implicit success if insufficient measurements for an average
4511  *   exist.
4512  */
4513 int
4514 startd_failure_rate_critical()
4515 {
4516 	int n = startd_failure_index;
4517 	hrtime_t avg_ns = 0;
4518 
4519 	if (startd_failure_index < NSTARTD_FAILURE_TIMES)
4520 		return (0);
4521 
4522 	avg_ns =
4523 	    (startd_failure_time[(n - 1) % NSTARTD_FAILURE_TIMES] -
4524 	    startd_failure_time[n % NSTARTD_FAILURE_TIMES]) /
4525 	    NSTARTD_FAILURE_TIMES;
4526 
4527 	return (avg_ns < STARTD_FAILURE_RATE_NS);
4528 }
4529 
4530 /*
4531  * returns string that must be free'd
4532  */
4533 
4534 static char
4535 *audit_boot_msg()
4536 {
4537 	char		*b, *p;
4538 	char		desc[] = "booted";
4539 	zoneid_t	zid = getzoneid();
4540 
4541 	b = malloc(sizeof (desc) + MAXNAMELEN + 3);
4542 	if (b == NULL)
4543 		return (b);
4544 
4545 	p = b;
4546 	p += strlcpy(p, desc, sizeof (desc));
4547 	if (zid != GLOBAL_ZONEID) {
4548 		p += strlcpy(p, ": ", 3);
4549 		(void) getzonenamebyid(zid, p, MAXNAMELEN);
4550 	}
4551 	return (b);
4552 }
4553 
4554 /*
4555  * Generate AUE_init_solaris audit record.  Return 1 if
4556  * auditing is enabled in case the caller cares.
4557  *
4558  * In the case of userint() or a local zone invocation of
4559  * one_true_init, the process initially contains the audit
4560  * characteristics of the process that invoked init.  The first pass
4561  * through here uses those characteristics then for the case of
4562  * one_true_init in a local zone, clears them so subsequent system
4563  * state changes won't be attributed to the person who booted the
4564  * zone.
4565  */
4566 static int
4567 audit_put_record(int pass_fail, int status, char *msg)
4568 {
4569 	adt_session_data_t	*ah;
4570 	adt_event_data_t	*event;
4571 
4572 	if (!adt_audit_enabled())
4573 		return (0);
4574 
4575 	/*
4576 	 * the PROC_DATA picks up the context to tell whether this is
4577 	 * an attributed record (auid = -2 is unattributed)
4578 	 */
4579 	if (adt_start_session(&ah, NULL, ADT_USE_PROC_DATA)) {
4580 		console(B_TRUE, "audit failure:  %s\n", strerror(errno));
4581 		return (1);
4582 	}
4583 	event = adt_alloc_event(ah, ADT_init_solaris);
4584 	if (event == NULL) {
4585 		console(B_TRUE, "audit failure:  %s\n", strerror(errno));
4586 		(void) adt_end_session(ah);
4587 		return (1);
4588 	}
4589 	event->adt_init_solaris.info = msg;	/* NULL is ok here */
4590 
4591 	if (adt_put_event(event, pass_fail, status)) {
4592 		console(B_TRUE, "audit failure:  %s\n", strerror(errno));
4593 		(void) adt_end_session(ah);
4594 		return (1);
4595 	}
4596 	adt_free_event(event);
4597 
4598 	(void) adt_end_session(ah);
4599 
4600 	return (1);
4601 }
4602