xref: /illumos-gate/usr/src/cmd/init/init.c (revision 02ac56e010f18fc0c5aafe47377586d8ba8c897c)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2021 OmniOS Community Edition (OmniOSce) Association.
24  * Copyright 2020 Oxide Computer Company
25  * Copyright (c) 2013 Gary Mills
26  *
27  * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
28  */
29 
30 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
31 /*	  All Rights Reserved	*/
32 
33 /*
34  * University Copyright- Copyright (c) 1982, 1986, 1988
35  * The Regents of the University of California
36  * All Rights Reserved
37  *
38  * University Acknowledgment- Portions of this document are derived from
39  * software developed by the University of California, Berkeley, and its
40  * contributors.
41  */
42 
43 /*
44  * init(8) is the general process spawning program.  Its primary job is to
45  * start and restart svc.startd for smf(7).  For backwards-compatibility it also
46  * spawns and respawns processes according to /etc/inittab and the current
47  * run-level.  It reads /etc/default/inittab for general configuration.
48  *
49  * To change run-levels the system administrator runs init from the command
50  * line with a level name.  init signals svc.startd via libscf and directs the
51  * zone's init (pid 1 in the global zone) what to do by sending it a signal;
52  * these signal numbers are commonly refered to in the code as 'states'.  Valid
53  * run-levels are [sS0123456].  Additionally, init can be given directives
54  * [qQabc], which indicate actions to be taken pertaining to /etc/inittab.
55  *
56  * When init processes inittab entries, it finds processes that are to be
57  * spawned at various run-levels.  inittab contains the set of the levels for
58  * which each inittab entry is valid.
59  *
60  * State File and Restartability
61  *   Premature exit by init(8) is handled as a special case by the kernel:
62  *   init(8) will be immediately re-executed, retaining its original PID.  (PID
63  *   1 in the global zone.)  To track the processes it has previously spawned,
64  *   as well as other mutable state, init(8) regularly updates a state file
65  *   such that its subsequent invocations have knowledge of its various
66  *   dependent processes and duties.
67  *
68  * Process Contracts
69  *   We start svc.startd(8) in a contract and transfer inherited contracts when
70  *   restarting it.  Everything else is started using the legacy contract
71  *   template, and the created contracts are abandoned when they become empty.
72  *
73  * utmpx Entry Handling
74  *   Because init(8) no longer governs the startup process, its knowledge of
75  *   when utmpx becomes writable is indirect.  However, spawned processes
76  *   expect to be constructed with valid utmpx entries.  As a result, attempts
77  *   to write normal entries will be retried until successful.
78  *
79  * Maintenance Mode
80  *   In certain failure scenarios, init(8) will enter a maintenance mode, in
81  *   which it invokes sulogin(8) to allow the operator an opportunity to
82  *   repair the system.  Normally, this operation is performed as a
83  *   fork(2)-exec(2)-waitpid(3C) sequence with the parent waiting for repair or
84  *   diagnosis to be completed.  In the cases that fork(2) requests themselves
85  *   fail, init(8) will directly execute sulogin(8), and allow the kernel to
86  *   restart init(8) on exit from the operator session.
87  *
88  *   One scenario where init(8) enters its maintenance mode is when
89  *   svc.startd(8) begins to fail rapidly, defined as when the average time
90  *   between recent failures drops below a given threshold.
91  */
92 
93 #include <sys/contract/process.h>
94 #include <sys/ctfs.h>
95 #include <sys/stat.h>
96 #include <sys/statvfs.h>
97 #include <sys/stropts.h>
98 #include <sys/systeminfo.h>
99 #include <sys/time.h>
100 #include <sys/termios.h>
101 #include <sys/tty.h>
102 #include <sys/types.h>
103 #include <sys/utsname.h>
104 #include <sys/bootbanner.h>
105 
106 #include <bsm/adt_event.h>
107 #include <bsm/libbsm.h>
108 #include <security/pam_appl.h>
109 
110 #include <assert.h>
111 #include <ctype.h>
112 #include <definit.h>
113 #include <dirent.h>
114 #include <errno.h>
115 #include <fcntl.h>
116 #include <libcontract.h>
117 #include <libcontract_priv.h>
118 #include <libintl.h>
119 #include <libscf.h>
120 #include <libscf_priv.h>
121 #include <poll.h>
122 #include <procfs.h>
123 #include <signal.h>
124 #include <stdarg.h>
125 #include <stdio.h>
126 #include <stdio_ext.h>
127 #include <stdlib.h>
128 #include <string.h>
129 #include <strings.h>
130 #include <syslog.h>
131 #include <time.h>
132 #include <ulimit.h>
133 #include <unistd.h>
134 #include <utmpx.h>
135 #include <wait.h>
136 #include <zone.h>
137 #include <ucontext.h>
138 
139 #undef	sleep
140 
141 #define	fioctl(p, sptr, cmd)	ioctl(fileno(p), sptr, cmd)
142 #define	min(a, b)		(((a) < (b)) ? (a) : (b))
143 
144 #define	TRUE	1
145 #define	FALSE	0
146 #define	FAILURE	-1
147 
148 #define	UT_USER_SZ	32	/* Size of a utmpx ut_user field */
149 #define	UT_LINE_SZ	32	/* Size of a utmpx ut_line field */
150 
151 /*
152  * SLEEPTIME	The number of seconds "init" sleeps between wakeups if
153  *		nothing else requires this "init" wakeup.
154  */
155 #define	SLEEPTIME	(5 * 60)
156 
157 /*
158  * MAXCMDL	The maximum length of a command string in inittab.
159  */
160 #define	MAXCMDL	512
161 
162 /*
163  * EXEC		The length of the prefix string added to all comamnds
164  *		found in inittab.
165  */
166 #define	EXEC	(sizeof ("exec ") - 1)
167 
168 /*
169  * TWARN	The amount of time between warning signal, SIGTERM,
170  *		and the fatal kill signal, SIGKILL.
171  */
172 #define	TWARN	5
173 
174 #define	id_eq(x, y)	((x[0] == y[0] && x[1] == y[1] && x[2] == y[2] &&\
175 			x[3] == y[3]) ? TRUE : FALSE)
176 
177 /*
178  * The kernel's default umask is 022 these days; since some processes inherit
179  * their umask from init, init will set it from CMASK in /etc/default/init.
180  * init gets the default umask from the kernel, it sets it to 022 whenever
181  * it wants to create a file and reverts to CMASK afterwards.
182  */
183 
184 static int cmask;
185 
186 /*
187  * The following definitions, concluding with the 'lvls' array, provide a
188  * common mapping between level-name (like 'S'), signal number (state),
189  * run-level mask, and specific properties associated with a run-level.
190  * This array should be accessed using the routines lvlname_to_state(),
191  * lvlname_to_mask(), state_to_mask(), and state_to_flags().
192  */
193 
194 /*
195  * Correspondence of signals to init actions.
196  */
197 #define	LVLQ		SIGHUP
198 #define	LVL0		SIGINT
199 #define	LVL1		SIGQUIT
200 #define	LVL2		SIGILL
201 #define	LVL3		SIGTRAP
202 #define	LVL4		SIGIOT
203 #define	LVL5		SIGEMT
204 #define	LVL6		SIGFPE
205 #define	SINGLE_USER	SIGBUS
206 #define	LVLa		SIGSEGV
207 #define	LVLb		SIGSYS
208 #define	LVLc		SIGPIPE
209 
210 /*
211  * Bit Mask for each level.  Used to determine legal levels.
212  */
213 #define	MASK0	0x0001
214 #define	MASK1	0x0002
215 #define	MASK2	0x0004
216 #define	MASK3	0x0008
217 #define	MASK4	0x0010
218 #define	MASK5	0x0020
219 #define	MASK6	0x0040
220 #define	MASKSU	0x0080
221 #define	MASKa	0x0100
222 #define	MASKb	0x0200
223 #define	MASKc	0x0400
224 
225 #define	MASK_NUMERIC (MASK0 | MASK1 | MASK2 | MASK3 | MASK4 | MASK5 | MASK6)
226 #define	MASK_abc (MASKa | MASKb | MASKc)
227 
228 /*
229  * Flags to indicate properties of various states.
230  */
231 #define	LSEL_RUNLEVEL	0x0001	/* runlevels you can transition to */
232 
233 typedef struct lvl {
234 	int	lvl_state;
235 	int	lvl_mask;
236 	char	lvl_name;
237 	int	lvl_flags;
238 } lvl_t;
239 
240 static lvl_t lvls[] = {
241 	{ LVLQ,		0,	'Q', 0					},
242 	{ LVLQ,		0,	'q', 0					},
243 	{ LVL0,		MASK0,	'0', LSEL_RUNLEVEL			},
244 	{ LVL1,		MASK1,	'1', LSEL_RUNLEVEL			},
245 	{ LVL2,		MASK2,	'2', LSEL_RUNLEVEL			},
246 	{ LVL3,		MASK3,	'3', LSEL_RUNLEVEL			},
247 	{ LVL4,		MASK4,	'4', LSEL_RUNLEVEL			},
248 	{ LVL5,		MASK5,	'5', LSEL_RUNLEVEL			},
249 	{ LVL6,		MASK6,	'6', LSEL_RUNLEVEL			},
250 	{ SINGLE_USER,	MASKSU, 'S', LSEL_RUNLEVEL			},
251 	{ SINGLE_USER,	MASKSU, 's', LSEL_RUNLEVEL			},
252 	{ LVLa,		MASKa,	'a', 0					},
253 	{ LVLb,		MASKb,	'b', 0					},
254 	{ LVLc,		MASKc,	'c', 0					}
255 };
256 
257 #define	LVL_NELEMS (sizeof (lvls) / sizeof (lvl_t))
258 
259 /*
260  * Legal action field values.
261  */
262 #define	OFF		0	/* Kill process if on, else ignore */
263 #define	RESPAWN		1	/* Continuously restart process when it dies */
264 #define	ONDEMAND	RESPAWN	/* Respawn for a, b, c type processes */
265 #define	ONCE		2	/* Start process, do not respawn when dead */
266 #define	WAIT		3	/* Perform once and wait to complete */
267 #define	BOOT		4	/* Start at boot time only */
268 #define	BOOTWAIT	5	/* Start at boot time and wait to complete */
269 #define	POWERFAIL	6	/* Start on powerfail */
270 #define	POWERWAIT	7	/* Start and wait for complete on powerfail */
271 #define	INITDEFAULT	8	/* Default level "init" should start at */
272 #define	SYSINIT		9	/* Actions performed before init speaks */
273 
274 #define	M_OFF		0001
275 #define	M_RESPAWN	0002
276 #define	M_ONDEMAND	M_RESPAWN
277 #define	M_ONCE		0004
278 #define	M_WAIT		0010
279 #define	M_BOOT		0020
280 #define	M_BOOTWAIT	0040
281 #define	M_PF		0100
282 #define	M_PWAIT		0200
283 #define	M_INITDEFAULT	0400
284 #define	M_SYSINIT	01000
285 
286 /* States for the inittab parser in getcmd(). */
287 #define	ID	1
288 #define	LEVELS	2
289 #define	ACTION	3
290 #define	COMMAND	4
291 #define	COMMENT	5
292 
293 /*
294  * inittab entry id constants
295  */
296 #define	INITTAB_ENTRY_ID_SIZE 4
297 #define	INITTAB_ENTRY_ID_STR_FORMAT "%.4s"	/* if INITTAB_ENTRY_ID_SIZE */
298 						/* changes, this should */
299 						/* change accordingly */
300 
301 /*
302  * Init can be in any of three main states, "normal" mode where it is
303  * processing entries for the lines file in a normal fashion, "boot" mode,
304  * where it is only interested in the boot actions, and "powerfail" mode,
305  * where it is only interested in powerfail related actions. The following
306  * masks declare the legal actions for each mode.
307  */
308 #define	NORMAL_MODES	(M_OFF | M_RESPAWN | M_ONCE | M_WAIT)
309 #define	BOOT_MODES	(M_BOOT | M_BOOTWAIT)
310 #define	PF_MODES	(M_PF | M_PWAIT)
311 
312 struct PROC_TABLE {
313 	char	p_id[INITTAB_ENTRY_ID_SIZE];	/* Four letter unique id of */
314 						/* process */
315 	pid_t	p_pid;		/* Process id */
316 	short	p_count;	/* How many respawns of this command in */
317 				/*   the current series */
318 	long	p_time;		/* Start time for a series of respawns */
319 	short	p_flags;
320 	short	p_exit;		/* Exit status of a process which died */
321 };
322 
323 /*
324  * Flags for the "p_flags" word of a PROC_TABLE entry:
325  *
326  *	OCCUPIED	This slot in init's proc table is in use.
327  *
328  *	LIVING		Process is alive.
329  *
330  *	NOCLEANUP	efork() is not allowed to cleanup this entry even
331  *			if process is dead.
332  *
333  *	NAMED		This process has a name, i.e. came from inittab.
334  *
335  *	DEMANDREQUEST	Process started by a "telinit [abc]" command.  Processes
336  *			formed this way are respawnable and immune to level
337  *			changes as long as their entry exists in inittab.
338  *
339  *	TOUCHED		Flag used by remv() to determine whether it has looked
340  *			at an entry while checking for processes to be killed.
341  *
342  *	WARNED		Flag used by remv() to mark processes that have been
343  *			sent the SIGTERM signal.  If they don't die in 5
344  *			seconds, they are sent the SIGKILL signal.
345  *
346  *	KILLED		Flag used by remv() to mark procs that have been sent
347  *			the SIGTERM and SIGKILL signals.
348  *
349  *	PF_MASK		Bitwise or of legal flags, for sanity checking.
350  */
351 #define	OCCUPIED	01
352 #define	LIVING		02
353 #define	NOCLEANUP	04
354 #define	NAMED		010
355 #define	DEMANDREQUEST	020
356 #define	TOUCHED		040
357 #define	WARNED		0100
358 #define	KILLED		0200
359 #define	PF_MASK		0377
360 
361 /*
362  * Respawn limits for processes that are to be respawned:
363  *
364  *	SPAWN_INTERVAL	The number of seconds over which "init" will try to
365  *			respawn a process SPAWN_LIMIT times before it gets mad.
366  *
367  *	SPAWN_LIMIT	The number of respawns "init" will attempt in
368  *			SPAWN_INTERVAL seconds before it generates an
369  *			error message and inhibits further tries for
370  *			INHIBIT seconds.
371  *
372  *	INHIBIT		The number of seconds "init" ignores an entry it had
373  *			trouble spawning unless a "telinit Q" is received.
374  */
375 
376 #define	SPAWN_INTERVAL	(2*60)
377 #define	SPAWN_LIMIT	10
378 #define	INHIBIT		(5*60)
379 
380 /*
381  * The maximum number of decimal digits for an id_t.  (ceil(log10 (max_id)))
382  */
383 #define	ID_MAX_STR_LEN	10
384 
385 #define	NULLPROC	((struct PROC_TABLE *)(0))
386 #define	NO_ROOM		((struct PROC_TABLE *)(FAILURE))
387 
388 struct CMD_LINE {
389 	char c_id[INITTAB_ENTRY_ID_SIZE];	/* Four letter unique id of */
390 						/* process to be affected by */
391 						/* action */
392 	short c_levels;	/* Mask of legal levels for process */
393 	short c_action;	/* Mask for type of action required */
394 	char *c_command; /* Pointer to init command */
395 };
396 
397 struct	pidrec {
398 	int	pd_type;	/* Command type */
399 	pid_t	pd_pid;		/* pid to add or remove */
400 };
401 
402 /*
403  * pd_type's
404  */
405 #define	ADDPID	1
406 #define	REMPID	2
407 
408 static struct	pidlist {
409 	pid_t	pl_pid;		/* pid to watch for */
410 	int	pl_dflag;	/* Flag indicating SIGCLD from this pid */
411 	short	pl_exit;	/* Exit status of proc */
412 	struct	pidlist	*pl_next; /* Next in list */
413 } *Plhead, *Plfree;
414 
415 /*
416  * The following structure contains a set of modes for /dev/syscon
417  * and should match the default contents of /etc/ioctl.syscon.
418  */
419 static struct termios	dflt_termios = {
420 	.c_iflag = BRKINT|ICRNL|IXON|IMAXBEL,
421 	.c_oflag = OPOST|ONLCR|TAB3,
422 	.c_cflag = CS8|CREAD|B9600,
423 	.c_lflag = ISIG|ICANON|ECHO|ECHOE|ECHOK|ECHOCTL|ECHOKE|IEXTEN,
424 	.c_cc = { CINTR, CQUIT, CERASE, CKILL, CEOF, 0, 0, 0,
425 	    CSTART, CSTOP, CSWTCH, CDSUSP, CRPRNT, CFLUSH, CWERASE, CLNEXT,
426 	    CSTATUS, CERASE2, 0
427 	}
428 };
429 
430 static struct termios	stored_syscon_termios;
431 static int		write_ioctl = 0;	/* Rewrite /etc/ioctl.syscon */
432 
433 static union WAKEUP {
434 	struct WAKEFLAGS {
435 		unsigned w_usersignal : 1;	/* User sent signal to "init" */
436 		unsigned w_childdeath : 1;	/* An "init" child died */
437 		unsigned w_powerhit : 1;	/* OS experienced powerfail */
438 	}	w_flags;
439 	int w_mask;
440 } wakeup;
441 
442 
443 struct init_state {
444 	int			ist_runlevel;
445 	int			ist_num_proc;
446 	int			ist_utmpx_ok;
447 	struct PROC_TABLE	ist_proc_table[1];
448 };
449 
450 #define	cur_state	(g_state->ist_runlevel)
451 #define	num_proc	(g_state->ist_num_proc)
452 #define	proc_table	(g_state->ist_proc_table)
453 #define	utmpx_ok	(g_state->ist_utmpx_ok)
454 
455 /* Contract cookies. */
456 #define	ORDINARY_COOKIE		0
457 #define	STARTD_COOKIE		1
458 
459 
460 #ifndef NDEBUG
461 #define	bad_error(func, err)	{					\
462 	(void) fprintf(stderr, "%s:%d: %s() failed with unexpected "	\
463 	    "error %d.  Aborting.\n", __FILE__, __LINE__, (func), (err)); \
464 	abort();							\
465 }
466 #else
467 #define	bad_error(func, err)	abort()
468 #endif
469 
470 
471 /*
472  * Useful file and device names.
473  */
474 static char *CONSOLE	  = "/dev/console";	/* Real system console */
475 static char *INITPIPE_DIR = "/var/run";
476 static char *INITPIPE	  = "/var/run/initpipe";
477 
478 #define	INIT_STATE_DIR "/etc/svc/volatile"
479 static const char * const init_state_file = INIT_STATE_DIR "/init.state";
480 static const char * const init_next_state_file =
481 	INIT_STATE_DIR "/init-next.state";
482 
483 static const int init_num_proc = 20;	/* Initial size of process table. */
484 
485 static char *UTMPX	 = UTMPX_FILE;		/* Snapshot record file */
486 static char *WTMPX	 = WTMPX_FILE;		/* Long term record file */
487 static char *INITTAB	 = "/etc/inittab";	/* Script file for "init" */
488 static char *SYSTTY	 = "/dev/systty";	/* System Console */
489 static char *SYSCON	 = "/dev/syscon";	/* Virtual System console */
490 static char *IOCTLSYSCON = "/etc/ioctl.syscon";	/* Last syscon modes */
491 static char *ENVFILE	 = DEFINIT_DEFAULT_FILE; /* Default env. */
492 static char *SU	= "/etc/sulogin";	/* Super-user program for single user */
493 static char *SH	= "/sbin/sh";		/* Standard shell */
494 
495 /*
496  * Default Path.  /sbin is included in path only during sysinit phase
497  */
498 #define	DEF_PATH	"PATH=/usr/sbin:/usr/bin"
499 #define	INIT_PATH	"PATH=/sbin:/usr/sbin:/usr/bin"
500 
501 static int	prior_state;
502 static int	prev_state;	/* State "init" was in last time it woke */
503 static int	new_state;	/* State user wants "init" to go to. */
504 static int	lvlq_received;	/* Explicit request to examine state */
505 static int	op_modes = BOOT_MODES; /* Current state of "init" */
506 static int	Gchild = 0;	/* Flag to indicate "godchild" died, set in */
507 				/*   childeath() and cleared in cleanaux() */
508 static int	Pfd = -1;	/* fd to receive pids thru */
509 static unsigned int	spawncnt, pausecnt;
510 static int	rsflag;		/* Set if a respawn has taken place */
511 static volatile int time_up;	/* Flag set to TRUE by the alarm interrupt */
512 				/* routine each time an alarm interrupt */
513 				/* takes place. */
514 static int	sflg = 0;	/* Set if we were booted -s to single user */
515 static int	rflg = 0;	/* Set if booted -r, reconfigure devices */
516 static int	bflg = 0;	/* Set if booted -b, don't run rc scripts */
517 static pid_t	init_pid;	/* PID of "one true" init for current zone */
518 
519 static struct init_state *g_state = NULL;
520 static size_t	g_state_sz;
521 static int	booting = 1;	/* Set while we're booting. */
522 
523 /*
524  * Array for default global environment.
525  */
526 #define	MAXENVENT	24	/* Max number of default env variables + 1 */
527 				/* init can use three itself, so this leaves */
528 				/* 20 for the administrator in ENVFILE. */
529 static char	*glob_envp[MAXENVENT];	/* Array of environment strings */
530 static int	glob_envn;		/* Number of environment strings */
531 
532 
533 static struct pollfd	poll_fds[1];
534 static int		poll_nfds = 0;	/* poll_fds is uninitialized */
535 
536 /*
537  * Contracts constants
538  */
539 #define	SVC_INIT_PREFIX "init:/"
540 #define	SVC_AUX_SIZE (INITTAB_ENTRY_ID_SIZE + 1)
541 #define	SVC_FMRI_SIZE (sizeof (SVC_INIT_PREFIX) + INITTAB_ENTRY_ID_SIZE)
542 
543 static int	legacy_tmpl = -1;	/* fd for legacy contract template */
544 static int	startd_tmpl = -1;	/* fd for svc.startd's template */
545 static char	startd_svc_aux[SVC_AUX_SIZE];
546 
547 static char	startd_cline[256] = "";	/* svc.startd's command line */
548 static int	do_restart_startd = 1;	/* Whether to restart svc.startd. */
549 static char	*smf_options = NULL;	/* Options to give to startd. */
550 static int	smf_debug = 0;		/* Messages for debugging smf(7) */
551 static time_t	init_boot_time;		/* Substitute for kernel boot time. */
552 
553 #define	NSTARTD_FAILURE_TIMES	3		/* trigger after 3 failures */
554 #define	STARTD_FAILURE_RATE_NS	5000000000LL	/* 1 failure/5 seconds */
555 
556 static hrtime_t	startd_failure_time[NSTARTD_FAILURE_TIMES];
557 static uint_t	startd_failure_index;
558 
559 
560 static char	*prog_name(char *);
561 static int	state_to_mask(int);
562 static int	lvlname_to_mask(char, int *);
563 static void	lscf_set_runlevel(char);
564 static int	state_to_flags(int);
565 static char	state_to_name(int);
566 static int	lvlname_to_state(char);
567 static int	getcmd(struct CMD_LINE *, char *);
568 static int	realcon();
569 static int	spawn_processes();
570 static int	get_ioctl_syscon();
571 static int	account(short, struct PROC_TABLE *, char *);
572 static void	alarmclk();
573 static void	childeath(int);
574 static void	cleanaux();
575 static void	clearent(pid_t, short);
576 static void	console(boolean_t, char *, ...);
577 static void	init_signals(void);
578 static void	setup_pipe();
579 static void	killproc(pid_t);
580 static void	init_env();
581 static void	boot_init();
582 static void	powerfail();
583 static void	remv();
584 static void	write_ioctl_syscon();
585 static void	spawn(struct PROC_TABLE *, struct CMD_LINE *);
586 static void	setimer(int);
587 static void	siglvl(int, siginfo_t *, void *);
588 static void	sigpoll(int);
589 static void	enter_maintenance(void);
590 static void	timer(int);
591 static void	userinit(int, char **);
592 static void	notify_pam_dead(struct utmpx *);
593 static long	waitproc(struct PROC_TABLE *);
594 static struct PROC_TABLE *efork(int, struct PROC_TABLE *, int);
595 static struct PROC_TABLE *findpslot(struct CMD_LINE *);
596 static void	increase_proc_table_size();
597 static void	st_init();
598 static void	st_write();
599 static void	contracts_init();
600 static void	contract_event(struct pollfd *);
601 static int	startd_run(const char *, int, ctid_t);
602 static void	startd_record_failure();
603 static int	startd_failure_rate_critical();
604 static char	*audit_boot_msg();
605 static int	audit_put_record(int, int, char *);
606 static void	update_boot_archive(int new_state);
607 static void	init_bootbanner_print(const char *, uint_t);
608 
609 int
610 main(int argc, char *argv[])
611 {
612 	int	chg_lvl_flag = FALSE, print_banner = FALSE;
613 	int	may_need_audit = 1;
614 	int	c;
615 	char	*msg;
616 
617 	/* Get a timestamp for use as boot time, if needed. */
618 	(void) time(&init_boot_time);
619 
620 	/* Get the default umask */
621 	cmask = umask(022);
622 	(void) umask(cmask);
623 
624 	/* Parse the arguments to init. Check for single user */
625 	opterr = 0;
626 	while ((c = getopt(argc, argv, "brsm:")) != EOF) {
627 		switch (c) {
628 		case 'b':
629 			rflg = 0;
630 			bflg = 1;
631 			if (!sflg)
632 				sflg++;
633 			break;
634 		case 'r':
635 			bflg = 0;
636 			rflg++;
637 			break;
638 		case 's':
639 			if (!bflg)
640 				sflg++;
641 			break;
642 		case 'm':
643 			smf_options = optarg;
644 			smf_debug = (strstr(smf_options, "debug") != NULL);
645 			break;
646 		}
647 	}
648 
649 	/*
650 	 * Determine if we are the main init, or a user invoked init, whose job
651 	 * it is to inform init to change levels or perform some other action.
652 	 */
653 	if (zone_getattr(getzoneid(), ZONE_ATTR_INITPID, &init_pid,
654 	    sizeof (init_pid)) != sizeof (init_pid)) {
655 		(void) fprintf(stderr, "could not get pid for init\n");
656 		return (1);
657 	}
658 
659 	/*
660 	 * If this PID is not the same as the "true" init for the zone, then we
661 	 * must be in 'user' mode.
662 	 */
663 	if (getpid() != init_pid) {
664 		userinit(argc, argv);
665 	}
666 
667 	if (getzoneid() != GLOBAL_ZONEID) {
668 		print_banner = TRUE;
669 	}
670 
671 	/*
672 	 * Initialize state (and set "booting").
673 	 */
674 	st_init();
675 
676 	if (booting && print_banner) {
677 		/*
678 		 * We want to print the boot banner as soon as
679 		 * possible.  In the global zone, the kernel does it,
680 		 * but we do not have that luxury in non-global zones,
681 		 * so we will print it here.
682 		 */
683 #ifdef	LEGACY_BANNER
684 		struct utsname un;
685 		char buf[BUFSIZ];
686 		const char *bits;
687 		int r;
688 
689 		(void) uname(&un);
690 		if ((r = sysinfo(SI_ADDRESS_WIDTH, buf, sizeof (buf))) > 0 &&
691 		    r < sizeof (buf)) {
692 			bits = buf;
693 		} else {
694 			bits = "64";
695 		}
696 
697 		console(B_FALSE,
698 		    "\n\n%s Release %s Version %s %s-bit\r\n",
699 		    un.sysname, un.release, un.version, bits);
700 		console(B_FALSE,
701 		    "Copyright (c) 1983, 2010, Oracle and/or its affiliates."
702 		    " All rights reserved.\r\n");
703 #else
704 		bootbanner_print(init_bootbanner_print);
705 #endif
706 	}
707 
708 	/*
709 	 * Get the ioctl settings for /dev/syscon from /etc/ioctl.syscon
710 	 * so that it can be brought up in the state it was in when the
711 	 * system went down; or set to defaults if ioctl.syscon isn't
712 	 * valid.
713 	 *
714 	 * This needs to be done even if we're restarting so reset_modes()
715 	 * will work in case we need to go down to single user mode.
716 	 */
717 	write_ioctl = get_ioctl_syscon();
718 
719 	/*
720 	 * Set up all signals to be caught or ignored as appropriate.
721 	 */
722 	init_signals();
723 
724 	/* Load glob_envp from ENVFILE. */
725 	init_env();
726 
727 	contracts_init();
728 
729 	if (!booting) {
730 		/* cur_state should have been read in. */
731 
732 		op_modes = NORMAL_MODES;
733 
734 		/* Rewrite the ioctl file if it was bad. */
735 		if (write_ioctl)
736 			write_ioctl_syscon();
737 	} else {
738 		/*
739 		 * It's fine to boot up with state as zero, because
740 		 * startd will later tell us the real state.
741 		 */
742 		cur_state = 0;
743 		op_modes = BOOT_MODES;
744 
745 		boot_init();
746 	}
747 
748 	prev_state = prior_state = cur_state;
749 
750 	setup_pipe();
751 
752 	/*
753 	 * Here is the beginning of the main process loop.
754 	 */
755 	for (;;) {
756 		if (lvlq_received) {
757 			setup_pipe();
758 			lvlq_received = B_FALSE;
759 		}
760 
761 		/*
762 		 * Clean up any accounting records for dead "godchildren".
763 		 */
764 		if (Gchild)
765 			cleanaux();
766 
767 		/*
768 		 * If in "normal" mode, check all living processes and initiate
769 		 * kill sequence on those that should not be there anymore.
770 		 */
771 		if (op_modes == NORMAL_MODES && cur_state != LVLa &&
772 		    cur_state != LVLb && cur_state != LVLc)
773 			remv();
774 
775 		/*
776 		 * If a change in run levels is the reason we awoke, now do
777 		 * the accounting to report the change in the utmp file.
778 		 * Also report the change on the system console.
779 		 */
780 		if (chg_lvl_flag) {
781 			chg_lvl_flag = FALSE;
782 
783 			if (state_to_flags(cur_state) & LSEL_RUNLEVEL) {
784 				char rl = state_to_name(cur_state);
785 
786 				if (rl != -1)
787 					lscf_set_runlevel(rl);
788 			}
789 
790 			may_need_audit = 1;
791 		}
792 
793 		/*
794 		 * Scan the inittab file and spawn and respawn processes that
795 		 * should be alive in the current state. If inittab does not
796 		 * exist default to  single user mode.
797 		 */
798 		if (spawn_processes() == FAILURE) {
799 			prior_state = prev_state;
800 			cur_state = SINGLE_USER;
801 		}
802 
803 		/* If any respawns occurred, take note. */
804 		if (rsflag) {
805 			rsflag = 0;
806 			spawncnt++;
807 		}
808 
809 		/*
810 		 * If a powerfail signal was received during the last
811 		 * sequence, set mode to powerfail.  When spawn_processes() is
812 		 * entered the first thing it does is to check "powerhit".  If
813 		 * it is in PF_MODES then it clears "powerhit" and does
814 		 * a powerfail sequence.  If it is not in PF_MODES, then it
815 		 * puts itself in PF_MODES and then clears "powerhit".  Should
816 		 * "powerhit" get set again while spawn_processes() is working
817 		 * on a powerfail sequence, the following code  will see that
818 		 * spawn_processes() tries to execute the powerfail sequence
819 		 * again.  This guarantees that the powerfail sequence will be
820 		 * successfully completed before further processing takes
821 		 * place.
822 		 */
823 		if (wakeup.w_flags.w_powerhit) {
824 			op_modes = PF_MODES;
825 			/*
826 			 * Make sure that cur_state != prev_state so that
827 			 * ONCE and WAIT types work.
828 			 */
829 			prev_state = 0;
830 		} else if (op_modes != NORMAL_MODES) {
831 			/*
832 			 * If spawn_processes() was not just called while in
833 			 * normal mode, we set the mode to normal and it will
834 			 * be called again to check normal modes.  If we have
835 			 * just finished a powerfail sequence with prev_state
836 			 * equal to zero, we set prev_state equal to cur_state
837 			 * before the next pass through.
838 			 */
839 			if (op_modes == PF_MODES)
840 				prev_state = cur_state;
841 			op_modes = NORMAL_MODES;
842 		} else if (cur_state == LVLa || cur_state == LVLb ||
843 		    cur_state == LVLc) {
844 			/*
845 			 * If it was a change of levels that awakened us and the
846 			 * new level is one of the demand levels then reset
847 			 * cur_state to the previous state and do another scan
848 			 * to take care of the usual respawn actions.
849 			 */
850 			cur_state = prior_state;
851 			prior_state = prev_state;
852 			prev_state = cur_state;
853 		} else {
854 			prev_state = cur_state;
855 
856 			if (wakeup.w_mask == 0) {
857 				int ret;
858 
859 				if (may_need_audit && (cur_state == LVL3)) {
860 					msg = audit_boot_msg();
861 
862 					may_need_audit = 0;
863 					(void) audit_put_record(ADT_SUCCESS,
864 					    ADT_SUCCESS, msg);
865 					free(msg);
866 				}
867 
868 				/*
869 				 * "init" is finished with all actions for
870 				 * the current wakeup.
871 				 */
872 				ret = poll(poll_fds, poll_nfds,
873 				    SLEEPTIME * MILLISEC);
874 				pausecnt++;
875 				if (ret > 0)
876 					contract_event(&poll_fds[0]);
877 				else if (ret < 0 && errno != EINTR)
878 					console(B_TRUE, "poll() error: %s\n",
879 					    strerror(errno));
880 			}
881 
882 			if (wakeup.w_flags.w_usersignal) {
883 				/*
884 				 * Install the new level.  This could be a real
885 				 * change in levels  or a telinit [Q|a|b|c] or
886 				 * just a telinit to the same level at which
887 				 * we are running.
888 				 */
889 				if (new_state != cur_state) {
890 					if (new_state == LVLa ||
891 					    new_state == LVLb ||
892 					    new_state == LVLc) {
893 						prev_state = prior_state;
894 						prior_state = cur_state;
895 						cur_state = new_state;
896 					} else {
897 						prev_state = cur_state;
898 						if (cur_state >= 0)
899 							prior_state = cur_state;
900 						cur_state = new_state;
901 						chg_lvl_flag = TRUE;
902 					}
903 				}
904 
905 				new_state = 0;
906 			}
907 
908 			if (wakeup.w_flags.w_powerhit)
909 				op_modes = PF_MODES;
910 
911 			/*
912 			 * Clear all wakeup reasons.
913 			 */
914 			wakeup.w_mask = 0;
915 		}
916 	}
917 
918 	/*NOTREACHED*/
919 }
920 
921 static void
922 init_bootbanner_print(const char *line, uint_t num)
923 {
924 	const char *pfx = (num == 0) ? "\n\n" : "";
925 
926 	console(B_FALSE, "%s%s\r\n", pfx, line);
927 }
928 
929 static void
930 update_boot_archive(int new_state)
931 {
932 	if (new_state != LVL0 && new_state != LVL5 && new_state != LVL6)
933 		return;
934 
935 	if (getzoneid() != GLOBAL_ZONEID)
936 		return;
937 
938 	(void) system("/sbin/bootadm -ea update_all");
939 }
940 
941 /*
942  * void enter_maintenance()
943  *   A simple invocation of sulogin(8), with no baggage, in the case that we
944  *   are unable to activate svc.startd(8).  We fork; the child runs sulogin;
945  *   we wait for it to exit.
946  */
947 static void
948 enter_maintenance()
949 {
950 	struct PROC_TABLE	*su_process;
951 
952 	console(B_FALSE, "Requesting maintenance mode\n"
953 	    "(See /lib/svc/share/README for additional information.)\n");
954 	(void) sighold(SIGCLD);
955 	while ((su_process = efork(M_OFF, NULLPROC, NOCLEANUP)) == NO_ROOM)
956 		(void) pause();
957 	(void) sigrelse(SIGCLD);
958 	if (su_process == NULLPROC) {
959 		int fd;
960 
961 		(void) fclose(stdin);
962 		(void) fclose(stdout);
963 		(void) fclose(stderr);
964 		closefrom(0);
965 
966 		fd = open(SYSCON, O_RDWR | O_NOCTTY);
967 		if (fd >= 0) {
968 			(void) dup2(fd, 1);
969 			(void) dup2(fd, 2);
970 		} else {
971 			/*
972 			 * Need to issue an error message somewhere.
973 			 */
974 			syslog(LOG_CRIT, "init[%d]: cannot open %s; %s\n",
975 			    getpid(), SYSCON, strerror(errno));
976 		}
977 
978 		/*
979 		 * Execute the "su" program.
980 		 */
981 		(void) execle(SU, SU, "-", (char *)0, glob_envp);
982 		console(B_TRUE, "execle of %s failed: %s\n", SU,
983 		    strerror(errno));
984 		timer(5);
985 		exit(1);
986 	}
987 
988 	/*
989 	 * If we are the parent, wait around for the child to die
990 	 * or for "init" to be signaled to change levels.
991 	 */
992 	while (waitproc(su_process) == FAILURE) {
993 		/*
994 		 * All other reasons for waking are ignored when in
995 		 * single-user mode.  The only child we are interested
996 		 * in is being waited for explicitly by waitproc().
997 		 */
998 		wakeup.w_mask = 0;
999 	}
1000 }
1001 
1002 /*
1003  * remv() scans through "proc_table" and performs cleanup.  If
1004  * there is a process in the table, which shouldn't be here at
1005  * the current run level, then remv() kills the process.
1006  */
1007 static void
1008 remv()
1009 {
1010 	struct PROC_TABLE	*process;
1011 	struct CMD_LINE		cmd;
1012 	char			cmd_string[MAXCMDL];
1013 	int			change_level;
1014 
1015 	change_level = (cur_state != prev_state ? TRUE : FALSE);
1016 
1017 	/*
1018 	 * Clear the TOUCHED flag on all entries so that when we have
1019 	 * finished scanning inittab, we will be able to tell if we
1020 	 * have any processes for which there is no entry in inittab.
1021 	 */
1022 	for (process = proc_table;
1023 	    (process < proc_table + num_proc); process++) {
1024 		process->p_flags &= ~TOUCHED;
1025 	}
1026 
1027 	/*
1028 	 * Scan all inittab entries.
1029 	 */
1030 	while (getcmd(&cmd, &cmd_string[0]) == TRUE) {
1031 		/* Scan for process which goes with this entry in inittab. */
1032 		for (process = proc_table;
1033 		    (process < proc_table + num_proc); process++) {
1034 			if ((process->p_flags & OCCUPIED) == 0 ||
1035 			    !id_eq(process->p_id, cmd.c_id))
1036 				continue;
1037 
1038 			/*
1039 			 * This slot contains the process we are looking for.
1040 			 */
1041 
1042 			/*
1043 			 * Is the cur_state SINGLE_USER or is this process
1044 			 * marked as "off" or was this proc started by some
1045 			 * mechanism other than LVL{a|b|c} and the current level
1046 			 * does not support this process?
1047 			 */
1048 			if (cur_state == SINGLE_USER ||
1049 			    cmd.c_action == M_OFF ||
1050 			    ((cmd.c_levels & state_to_mask(cur_state)) == 0 &&
1051 			    (process->p_flags & DEMANDREQUEST) == 0)) {
1052 				if (process->p_flags & LIVING) {
1053 					/*
1054 					 * Touch this entry so we know we have
1055 					 * treated it.  Note that procs which
1056 					 * are already dead at this point and
1057 					 * should not be restarted are left
1058 					 * untouched.  This causes their slot to
1059 					 * be freed later after dead accounting
1060 					 * is done.
1061 					 */
1062 					process->p_flags |= TOUCHED;
1063 
1064 					if ((process->p_flags & KILLED) == 0) {
1065 						if (change_level) {
1066 							process->p_flags
1067 							    |= WARNED;
1068 							(void) kill(
1069 							    process->p_pid,
1070 							    SIGTERM);
1071 						} else {
1072 							/*
1073 							 * Fork a killing proc
1074 							 * so "init" can
1075 							 * continue without
1076 							 * having to pause for
1077 							 * TWARN seconds.
1078 							 */
1079 							killproc(
1080 							    process->p_pid);
1081 						}
1082 						process->p_flags |= KILLED;
1083 					}
1084 				}
1085 			} else {
1086 				/*
1087 				 * Process can exist at current level.  If it is
1088 				 * still alive or a DEMANDREQUEST we touch it so
1089 				 * it will be left alone.  Otherwise we leave it
1090 				 * untouched so it will be accounted for and
1091 				 * cleaned up later in remv().  Dead
1092 				 * DEMANDREQUESTs will be accounted but not
1093 				 * freed.
1094 				 */
1095 				if (process->p_flags &
1096 				    (LIVING|NOCLEANUP|DEMANDREQUEST))
1097 					process->p_flags |= TOUCHED;
1098 			}
1099 
1100 			break;
1101 		}
1102 	}
1103 
1104 	st_write();
1105 
1106 	/*
1107 	 * If this was a change of levels call, scan through the
1108 	 * process table for processes that were warned to die.  If any
1109 	 * are found that haven't left yet, sleep for TWARN seconds and
1110 	 * then send final terminations to any that haven't died yet.
1111 	 */
1112 	if (change_level) {
1113 
1114 		/*
1115 		 * Set the alarm for TWARN seconds on the assumption
1116 		 * that there will be some that need to be waited for.
1117 		 * This won't harm anything except we are guaranteed to
1118 		 * wakeup in TWARN seconds whether we need to or not.
1119 		 */
1120 		setimer(TWARN);
1121 
1122 		/*
1123 		 * Scan for processes which should be dying.  We hope they
1124 		 * will die without having to be sent a SIGKILL signal.
1125 		 */
1126 		for (process = proc_table;
1127 		    (process < proc_table + num_proc); process++) {
1128 			/*
1129 			 * If this process should die, hasn't yet, and the
1130 			 * TWARN time hasn't expired yet, wait for process
1131 			 * to die or for timer to expire.
1132 			 */
1133 			while (time_up == FALSE &&
1134 			    (process->p_flags & (WARNED|LIVING|OCCUPIED)) ==
1135 			    (WARNED|LIVING|OCCUPIED))
1136 				(void) pause();
1137 
1138 			if (time_up == TRUE)
1139 				break;
1140 		}
1141 
1142 		/*
1143 		 * If we reached the end of the table without the timer
1144 		 * expiring, then there are no procs which will have to be
1145 		 * sent the SIGKILL signal.  If the timer has expired, then
1146 		 * it is necessary to scan the table again and send signals
1147 		 * to all processes which aren't going away nicely.
1148 		 */
1149 		if (time_up == TRUE) {
1150 			for (process = proc_table;
1151 			    (process < proc_table + num_proc); process++) {
1152 				if ((process->p_flags &
1153 				    (WARNED|LIVING|OCCUPIED)) ==
1154 				    (WARNED|LIVING|OCCUPIED))
1155 					(void) kill(process->p_pid, SIGKILL);
1156 			}
1157 		}
1158 		setimer(0);
1159 	}
1160 
1161 	/*
1162 	 * Rescan the proc_table for two kinds of entry, those marked LIVING,
1163 	 * NAMED, which don't have an entry in inittab (haven't been TOUCHED
1164 	 * by the above scanning), and haven't been sent kill signals, and
1165 	 * those entries marked not LIVING, NAMED.  The former procs are killed.
1166 	 * The latter have DEAD_PROCESS accounting done and the slot cleared.
1167 	 */
1168 	for (process = proc_table;
1169 	    (process < proc_table + num_proc); process++) {
1170 		if ((process->p_flags & (LIVING|NAMED|TOUCHED|KILLED|OCCUPIED))
1171 		    == (LIVING|NAMED|OCCUPIED)) {
1172 			killproc(process->p_pid);
1173 			process->p_flags |= KILLED;
1174 		} else if ((process->p_flags & (LIVING|NAMED|OCCUPIED)) ==
1175 		    (NAMED|OCCUPIED)) {
1176 			(void) account(DEAD_PROCESS, process, NULL);
1177 			/*
1178 			 * If this named proc hasn't been TOUCHED, then free the
1179 			 * space. It has either died of it's own accord, but
1180 			 * isn't respawnable or it was killed because it
1181 			 * shouldn't exist at this level.
1182 			 */
1183 			if ((process->p_flags & TOUCHED) == 0)
1184 				process->p_flags = 0;
1185 		}
1186 	}
1187 
1188 	st_write();
1189 }
1190 
1191 /*
1192  * Extract the svc.startd command line and whether to restart it from its
1193  * inittab entry.
1194  */
1195 /*ARGSUSED*/
1196 static void
1197 process_startd_line(struct CMD_LINE *cmd, char *cmd_string)
1198 {
1199 	size_t sz;
1200 
1201 	/* Save the command line. */
1202 	if (sflg || rflg) {
1203 		/* Also append -r or -s. */
1204 		(void) strlcpy(startd_cline, cmd_string, sizeof (startd_cline));
1205 		(void) strlcat(startd_cline, " -", sizeof (startd_cline));
1206 		if (sflg)
1207 			sz = strlcat(startd_cline, "s", sizeof (startd_cline));
1208 		if (rflg)
1209 			sz = strlcat(startd_cline, "r", sizeof (startd_cline));
1210 	} else {
1211 		sz = strlcpy(startd_cline, cmd_string, sizeof (startd_cline));
1212 	}
1213 
1214 	if (sz >= sizeof (startd_cline)) {
1215 		console(B_TRUE,
1216 		    "svc.startd command line too long.  Ignoring.\n");
1217 		startd_cline[0] = '\0';
1218 		return;
1219 	}
1220 }
1221 
1222 /*
1223  * spawn_processes() scans inittab for entries which should be run at this
1224  * mode.  Processes which should be running but are not, are started.
1225  */
1226 static int
1227 spawn_processes()
1228 {
1229 	struct PROC_TABLE		*pp;
1230 	struct CMD_LINE			cmd;
1231 	char				cmd_string[MAXCMDL];
1232 	short				lvl_mask;
1233 	int				status;
1234 
1235 	/*
1236 	 * First check the "powerhit" flag.  If it is set, make sure the modes
1237 	 * are PF_MODES and clear the "powerhit" flag.  Avoid the possible race
1238 	 * on the "powerhit" flag by disallowing a new powerfail interrupt
1239 	 * between the test of the powerhit flag and the clearing of it.
1240 	 */
1241 	if (wakeup.w_flags.w_powerhit) {
1242 		wakeup.w_flags.w_powerhit = 0;
1243 		op_modes = PF_MODES;
1244 	}
1245 	lvl_mask = state_to_mask(cur_state);
1246 
1247 	/*
1248 	 * Scan through all the entries in inittab.
1249 	 */
1250 	while ((status = getcmd(&cmd, &cmd_string[0])) == TRUE) {
1251 		if (id_eq(cmd.c_id, "smf")) {
1252 			process_startd_line(&cmd, cmd_string);
1253 			continue;
1254 		}
1255 
1256 retry_for_proc_slot:
1257 
1258 		/*
1259 		 * Find out if there is a process slot for this entry already.
1260 		 */
1261 		if ((pp = findpslot(&cmd)) == NULLPROC) {
1262 			/*
1263 			 * we've run out of proc table entries
1264 			 * increase proc_table.
1265 			 */
1266 			increase_proc_table_size();
1267 
1268 			/*
1269 			 * Retry now as we have an empty proc slot.
1270 			 * In case increase_proc_table_size() fails,
1271 			 * we will keep retrying.
1272 			 */
1273 			goto retry_for_proc_slot;
1274 		}
1275 
1276 		/*
1277 		 * If there is an entry, and it is marked as DEMANDREQUEST,
1278 		 * one of the levels a, b, or c is in its levels mask, and
1279 		 * the action field is ONDEMAND and ONDEMAND is a permissable
1280 		 * mode, and the process is dead, then respawn it.
1281 		 */
1282 		if (((pp->p_flags & (LIVING|DEMANDREQUEST)) == DEMANDREQUEST) &&
1283 		    (cmd.c_levels & MASK_abc) &&
1284 		    (cmd.c_action & op_modes) == M_ONDEMAND) {
1285 			spawn(pp, &cmd);
1286 			continue;
1287 		}
1288 
1289 		/*
1290 		 * If the action is not an action we are interested in,
1291 		 * skip the entry.
1292 		 */
1293 		if ((cmd.c_action & op_modes) == 0 || pp->p_flags & LIVING ||
1294 		    (cmd.c_levels & lvl_mask) == 0)
1295 			continue;
1296 
1297 		/*
1298 		 * If the modes are the normal modes (ONCE, WAIT, RESPAWN, OFF,
1299 		 * ONDEMAND) and the action field is either OFF or the action
1300 		 * field is ONCE or WAIT and the current level is the same as
1301 		 * the last level, then skip this entry.  ONCE and WAIT only
1302 		 * get run when the level changes.
1303 		 */
1304 		if (op_modes == NORMAL_MODES &&
1305 		    (cmd.c_action == M_OFF ||
1306 		    (cmd.c_action & (M_ONCE|M_WAIT)) &&
1307 		    cur_state == prev_state))
1308 			continue;
1309 
1310 		/*
1311 		 * At this point we are interested in performing the action for
1312 		 * this entry.  Actions fall into two categories, spinning off
1313 		 * a process and not waiting, and spinning off a process and
1314 		 * waiting for it to die.  If the action is ONCE, RESPAWN,
1315 		 * ONDEMAND, POWERFAIL, or BOOT we don't wait for the process
1316 		 * to die, for all other actions we do wait.
1317 		 */
1318 		if (cmd.c_action & (M_ONCE | M_RESPAWN | M_PF | M_BOOT)) {
1319 			spawn(pp, &cmd);
1320 
1321 		} else {
1322 			spawn(pp, &cmd);
1323 			while (waitproc(pp) == FAILURE)
1324 				;
1325 			(void) account(DEAD_PROCESS, pp, NULL);
1326 			pp->p_flags = 0;
1327 		}
1328 	}
1329 	return (status);
1330 }
1331 
1332 /*
1333  * spawn() spawns a shell, inserts the information about the process
1334  * process into the proc_table, and does the startup accounting.
1335  */
1336 static void
1337 spawn(struct PROC_TABLE *process, struct CMD_LINE *cmd)
1338 {
1339 	int		i;
1340 	int		modes, maxfiles;
1341 	time_t		now;
1342 	struct PROC_TABLE tmproc, *oprocess;
1343 
1344 	/*
1345 	 * The modes to be sent to efork() are 0 unless we are
1346 	 * spawning a LVLa, LVLb, or LVLc entry or we will be
1347 	 * waiting for the death of the child before continuing.
1348 	 */
1349 	modes = NAMED;
1350 	if (process->p_flags & DEMANDREQUEST || cur_state == LVLa ||
1351 	    cur_state == LVLb || cur_state == LVLc)
1352 		modes |= DEMANDREQUEST;
1353 	if ((cmd->c_action & (M_SYSINIT | M_WAIT | M_BOOTWAIT | M_PWAIT)) != 0)
1354 		modes |= NOCLEANUP;
1355 
1356 	/*
1357 	 * If this is a respawnable process, check the threshold
1358 	 * information to avoid excessive respawns.
1359 	 */
1360 	if (cmd->c_action & M_RESPAWN) {
1361 		/*
1362 		 * Add NOCLEANUP to all respawnable commands so that the
1363 		 * information about the frequency of respawns isn't lost.
1364 		 */
1365 		modes |= NOCLEANUP;
1366 		(void) time(&now);
1367 
1368 		/*
1369 		 * If no time is assigned, then this is the first time
1370 		 * this command is being processed in this series.  Assign
1371 		 * the current time.
1372 		 */
1373 		if (process->p_time == 0L)
1374 			process->p_time = now;
1375 
1376 		if (process->p_count++ == SPAWN_LIMIT) {
1377 
1378 			if ((now - process->p_time) < SPAWN_INTERVAL) {
1379 				/*
1380 				 * Process is respawning too rapidly.  Print
1381 				 * message and refuse to respawn it for now.
1382 				 */
1383 				console(B_TRUE, "Command is respawning too "
1384 				    "rapidly. Check for possible errors.\n"
1385 				    "id:%4s \"%s\"\n",
1386 				    &cmd->c_id[0], &cmd->c_command[EXEC]);
1387 				return;
1388 			}
1389 			process->p_time = now;
1390 			process->p_count = 0;
1391 
1392 		} else if (process->p_count > SPAWN_LIMIT) {
1393 			/*
1394 			 * If process has been respawning too rapidly and
1395 			 * the inhibit time limit hasn't expired yet, we
1396 			 * refuse to respawn.
1397 			 */
1398 			if (now - process->p_time < SPAWN_INTERVAL + INHIBIT)
1399 				return;
1400 			process->p_time = now;
1401 			process->p_count = 0;
1402 		}
1403 		rsflag = TRUE;
1404 	}
1405 
1406 	/*
1407 	 * Spawn a child process to execute this command.
1408 	 */
1409 	(void) sighold(SIGCLD);
1410 	oprocess = process;
1411 	while ((process = efork(cmd->c_action, oprocess, modes)) == NO_ROOM)
1412 		(void) pause();
1413 
1414 	if (process == NULLPROC) {
1415 
1416 		/*
1417 		 * We are the child.  We must make sure we get a different
1418 		 * file pointer for our references to utmpx.  Otherwise our
1419 		 * seeks and reads will compete with those of the parent.
1420 		 */
1421 		endutxent();
1422 
1423 		/*
1424 		 * Perform the accounting for the beginning of a process.
1425 		 * Note that all processes are initially "INIT_PROCESS"es.
1426 		 */
1427 		tmproc.p_id[0] = cmd->c_id[0];
1428 		tmproc.p_id[1] = cmd->c_id[1];
1429 		tmproc.p_id[2] = cmd->c_id[2];
1430 		tmproc.p_id[3] = cmd->c_id[3];
1431 		tmproc.p_pid = getpid();
1432 		tmproc.p_exit = 0;
1433 		(void) account(INIT_PROCESS, &tmproc,
1434 		    prog_name(&cmd->c_command[EXEC]));
1435 		maxfiles = ulimit(UL_GDESLIM, 0);
1436 		for (i = 0; i < maxfiles; i++)
1437 			(void) fcntl(i, F_SETFD, FD_CLOEXEC);
1438 
1439 		/*
1440 		 * Now exec a shell with the -c option and the command
1441 		 * from inittab.
1442 		 */
1443 		(void) execle(SH, "INITSH", "-c", cmd->c_command, (char *)0,
1444 		    glob_envp);
1445 		console(B_TRUE, "Command\n\"%s\"\n failed to execute.  errno "
1446 		    "= %d (exec of shell failed)\n", cmd->c_command, errno);
1447 
1448 		/*
1449 		 * Don't come back so quickly that "init" doesn't have a
1450 		 * chance to finish putting this child in "proc_table".
1451 		 */
1452 		timer(20);
1453 		exit(1);
1454 
1455 	}
1456 
1457 	/*
1458 	 * We are the parent.  Insert the necessary
1459 	 * information in the proc_table.
1460 	 */
1461 	process->p_id[0] = cmd->c_id[0];
1462 	process->p_id[1] = cmd->c_id[1];
1463 	process->p_id[2] = cmd->c_id[2];
1464 	process->p_id[3] = cmd->c_id[3];
1465 
1466 	st_write();
1467 
1468 	(void) sigrelse(SIGCLD);
1469 }
1470 
1471 /*
1472  * findpslot() finds the old slot in the process table for the
1473  * command with the same id, or it finds an empty slot.
1474  */
1475 static struct PROC_TABLE *
1476 findpslot(struct CMD_LINE *cmd)
1477 {
1478 	struct PROC_TABLE	*process;
1479 	struct PROC_TABLE	*empty = NULLPROC;
1480 
1481 	for (process = proc_table;
1482 	    (process < proc_table + num_proc); process++) {
1483 		if (process->p_flags & OCCUPIED &&
1484 		    id_eq(process->p_id, cmd->c_id))
1485 			break;
1486 
1487 		/*
1488 		 * If the entry is totally empty and "empty" is still 0,
1489 		 * remember where this hole is and make sure the slot is
1490 		 * zeroed out.
1491 		 */
1492 		if (empty == NULLPROC && (process->p_flags & OCCUPIED) == 0) {
1493 			empty = process;
1494 			process->p_id[0] = '\0';
1495 			process->p_id[1] = '\0';
1496 			process->p_id[2] = '\0';
1497 			process->p_id[3] = '\0';
1498 			process->p_pid = 0;
1499 			process->p_time = 0L;
1500 			process->p_count = 0;
1501 			process->p_flags = 0;
1502 			process->p_exit = 0;
1503 		}
1504 	}
1505 
1506 	/*
1507 	 * If there is no entry for this slot, then there should be an
1508 	 * empty slot.  If there is no empty slot, then we've run out
1509 	 * of proc_table space.  If the latter is true, empty will be
1510 	 * NULL and the caller will have to complain.
1511 	 */
1512 	if (process == (proc_table + num_proc))
1513 		process = empty;
1514 
1515 	return (process);
1516 }
1517 
1518 /*
1519  * getcmd() parses lines from inittab.  Each time it finds a command line
1520  * it will return TRUE as well as fill the passed CMD_LINE structure and
1521  * the shell command string.  When the end of inittab is reached, FALSE
1522  * is returned inittab is automatically opened if it is not currently open
1523  * and is closed when the end of the file is reached.
1524  */
1525 static FILE *fp_inittab = NULL;
1526 
1527 static int
1528 getcmd(struct CMD_LINE *cmd, char *shcmd)
1529 {
1530 	char	*ptr;
1531 	int	c, lastc, state;
1532 	char	*ptr1;
1533 	int	answer, i, proceed;
1534 	struct	stat	sbuf;
1535 	static char *actions[] = {
1536 		"off", "respawn", "ondemand", "once", "wait", "boot",
1537 		"bootwait", "powerfail", "powerwait", "initdefault",
1538 		"sysinit",
1539 	};
1540 	static short act_masks[] = {
1541 		M_OFF, M_RESPAWN, M_ONDEMAND, M_ONCE, M_WAIT, M_BOOT,
1542 		M_BOOTWAIT, M_PF, M_PWAIT, M_INITDEFAULT, M_SYSINIT,
1543 	};
1544 	/*
1545 	 * Only these actions will be allowed for entries which
1546 	 * are specified for single-user mode.
1547 	 */
1548 	short su_acts = M_INITDEFAULT | M_PF | M_PWAIT | M_WAIT;
1549 
1550 	if (fp_inittab == NULL) {
1551 		/*
1552 		 * Before attempting to open inittab we stat it to make
1553 		 * sure it currently exists and is not empty.  We try
1554 		 * several times because someone may have temporarily
1555 		 * unlinked or truncated the file.
1556 		 */
1557 		for (i = 0; i < 3; i++) {
1558 			if (stat(INITTAB, &sbuf) == -1) {
1559 				if (i == 2) {
1560 					console(B_TRUE,
1561 					    "Cannot stat %s, errno: %d\n",
1562 					    INITTAB, errno);
1563 					return (FAILURE);
1564 				} else {
1565 					timer(3);
1566 				}
1567 			} else if (sbuf.st_size < 10) {
1568 				if (i == 2) {
1569 					console(B_TRUE,
1570 					    "%s truncated or corrupted\n",
1571 					    INITTAB);
1572 					return (FAILURE);
1573 				} else {
1574 					timer(3);
1575 				}
1576 			} else {
1577 				break;
1578 			}
1579 		}
1580 
1581 		/*
1582 		 * If unable to open inittab, print error message and
1583 		 * return FAILURE to caller.
1584 		 */
1585 		if ((fp_inittab = fopen(INITTAB, "r")) == NULL) {
1586 			console(B_TRUE, "Cannot open %s errno: %d\n", INITTAB,
1587 			    errno);
1588 			return (FAILURE);
1589 		}
1590 	}
1591 
1592 	/*
1593 	 * Keep getting commands from inittab until you find a
1594 	 * good one or run out of file.
1595 	 */
1596 	for (answer = FALSE; answer == FALSE; ) {
1597 		/*
1598 		 * Zero out the cmd itself before trying next line.
1599 		 */
1600 		bzero(cmd, sizeof (struct CMD_LINE));
1601 
1602 		/*
1603 		 * Read in lines of inittab, parsing at colons, until a line is
1604 		 * read in which doesn't end with a backslash.  Do not start if
1605 		 * the first character read is an EOF.  Note that this means
1606 		 * that lines which don't end in a newline are still processed,
1607 		 * since the "for" will terminate normally once started,
1608 		 * regardless of whether line terminates with a newline or EOF.
1609 		 */
1610 		state = FAILURE;
1611 		if ((c = fgetc(fp_inittab)) == EOF) {
1612 			answer = FALSE;
1613 			(void) fclose(fp_inittab);
1614 			fp_inittab = NULL;
1615 			break;
1616 		}
1617 
1618 		for (proceed = TRUE, ptr = shcmd, state = ID, lastc = '\0';
1619 		    proceed && c != EOF;
1620 		    lastc = c, c = fgetc(fp_inittab)) {
1621 			/* If we're not in the FAILURE state and haven't */
1622 			/* yet reached the shell command field, process	 */
1623 			/* the line, otherwise just look for a real end	 */
1624 			/* of line.					 */
1625 			if (state != FAILURE && state != COMMAND) {
1626 			/*
1627 			 * Squeeze out spaces and tabs.
1628 			 */
1629 			if (c == ' ' || c == '\t')
1630 				continue;
1631 
1632 			/*
1633 			 * Ignore characters in a comment, except for the \n.
1634 			 */
1635 			if (state == COMMENT) {
1636 				if (c == '\n') {
1637 					lastc = ' ';
1638 					break;
1639 				} else {
1640 					continue;
1641 				}
1642 			}
1643 
1644 			/*
1645 			 * Detect comments (lines whose first non-whitespace
1646 			 * character is '#') by checking that we're at the
1647 			 * beginning of a line, have seen a '#', and haven't
1648 			 * yet accumulated any characters.
1649 			 */
1650 			if (state == ID && c == '#' && ptr == shcmd) {
1651 				state = COMMENT;
1652 				continue;
1653 			}
1654 
1655 			/*
1656 			 * If the character is a ':', then check the
1657 			 * previous field for correctness and advance
1658 			 * to the next field.
1659 			 */
1660 			if (c == ':') {
1661 				switch (state) {
1662 
1663 				case ID :
1664 				/*
1665 				 * Check to see that there are only
1666 				 * 1 to 4 characters for the id.
1667 				 */
1668 				if ((i = ptr - shcmd) < 1 || i > 4) {
1669 					state = FAILURE;
1670 				} else {
1671 					bcopy(shcmd, &cmd->c_id[0], i);
1672 					ptr = shcmd;
1673 					state = LEVELS;
1674 				}
1675 				break;
1676 
1677 				case LEVELS :
1678 				/*
1679 				 * Build a mask for all the levels for
1680 				 * which this command will be legal.
1681 				 */
1682 				for (cmd->c_levels = 0, ptr1 = shcmd;
1683 				    ptr1 < ptr; ptr1++) {
1684 					int mask;
1685 					if (lvlname_to_mask(*ptr1,
1686 					    &mask) == -1) {
1687 						state = FAILURE;
1688 						break;
1689 					}
1690 					cmd->c_levels |= mask;
1691 				}
1692 				if (state != FAILURE) {
1693 					state = ACTION;
1694 					ptr = shcmd;	/* Reset the buffer */
1695 				}
1696 				break;
1697 
1698 				case ACTION :
1699 				/*
1700 				 * Null terminate the string in shcmd buffer and
1701 				 * then try to match against legal actions.  If
1702 				 * the field is of length 0, then the default of
1703 				 * "RESPAWN" is used if the id is numeric,
1704 				 * otherwise the default is "OFF".
1705 				 */
1706 				if (ptr == shcmd) {
1707 					if (isdigit(cmd->c_id[0]) &&
1708 					    (cmd->c_id[1] == '\0' ||
1709 					    isdigit(cmd->c_id[1])) &&
1710 					    (cmd->c_id[2] == '\0' ||
1711 					    isdigit(cmd->c_id[2])) &&
1712 					    (cmd->c_id[3] == '\0' ||
1713 					    isdigit(cmd->c_id[3])))
1714 						cmd->c_action = M_RESPAWN;
1715 					else
1716 						cmd->c_action = M_OFF;
1717 				} else {
1718 					for (cmd->c_action = 0, i = 0,
1719 					    *ptr = '\0';
1720 					    i <
1721 					    sizeof (actions)/sizeof (char *);
1722 					    i++) {
1723 					if (strcmp(shcmd, actions[i]) == 0) {
1724 						if ((cmd->c_levels & MASKSU) &&
1725 						    !(act_masks[i] & su_acts))
1726 							cmd->c_action = 0;
1727 						else
1728 							cmd->c_action =
1729 							    act_masks[i];
1730 						break;
1731 					}
1732 					}
1733 				}
1734 
1735 				/*
1736 				 * If the action didn't match any legal action,
1737 				 * set state to FAILURE.
1738 				 */
1739 				if (cmd->c_action == 0) {
1740 					state = FAILURE;
1741 				} else {
1742 					state = COMMAND;
1743 					(void) strcpy(shcmd, "exec ");
1744 				}
1745 				ptr = shcmd + EXEC;
1746 				break;
1747 				}
1748 				continue;
1749 			}
1750 		}
1751 
1752 		/* If the character is a '\n', then this is the end of a */
1753 		/* line.  If the '\n' wasn't preceded by a backslash, */
1754 		/* it is also the end of an inittab command.  If it was */
1755 		/* preceded by a backslash then the next line is a */
1756 		/* continuation.  Note that the continuation '\n' falls */
1757 		/* through and is treated like other characters and is */
1758 		/* stored in the shell command line. */
1759 		if (c == '\n' && lastc != '\\') {
1760 			proceed = FALSE;
1761 			*ptr = '\0';
1762 			break;
1763 		}
1764 
1765 		/* For all other characters just stuff them into the */
1766 		/* command as long as there aren't too many of them. */
1767 		/* Make sure there is room for a terminating '\0' also. */
1768 		if (ptr >= shcmd + MAXCMDL - 1)
1769 			state = FAILURE;
1770 		else
1771 			*ptr++ = (char)c;
1772 
1773 		/* If the character we just stored was a quoted	*/
1774 		/* backslash, then change "c" to '\0', so that this	*/
1775 		/* backslash will not cause a subsequent '\n' to appear */
1776 		/* quoted.  In otherwords '\' '\' '\n' is the real end */
1777 		/* of a command, while '\' '\n' is a continuation. */
1778 		if (c == '\\' && lastc == '\\')
1779 			c = '\0';
1780 		}
1781 
1782 		/*
1783 		 * Make sure all the fields are properly specified
1784 		 * for a good command line.
1785 		 */
1786 		if (state == COMMAND) {
1787 			answer = TRUE;
1788 			cmd->c_command = shcmd;
1789 
1790 			/*
1791 			 * If no default level was supplied, insert
1792 			 * all numerical levels.
1793 			 */
1794 			if (cmd->c_levels == 0)
1795 				cmd->c_levels = MASK_NUMERIC;
1796 
1797 			/*
1798 			 * If no action has been supplied, declare this
1799 			 * entry to be OFF.
1800 			 */
1801 			if (cmd->c_action == 0)
1802 				cmd->c_action = M_OFF;
1803 
1804 			/*
1805 			 * If no shell command has been supplied, make sure
1806 			 * there is a null string in the command field.
1807 			 */
1808 			if (ptr == shcmd + EXEC)
1809 				*shcmd = '\0';
1810 		} else
1811 			answer = FALSE;
1812 
1813 		/*
1814 		 * If we have reached the end of inittab, then close it
1815 		 * and quit trying to find a good command line.
1816 		 */
1817 		if (c == EOF) {
1818 			(void) fclose(fp_inittab);
1819 			fp_inittab = NULL;
1820 			break;
1821 		}
1822 	}
1823 	return (answer);
1824 }
1825 
1826 /*
1827  * lvlname_to_state(): convert the character name of a state to its level
1828  * (its corresponding signal number).
1829  */
1830 static int
1831 lvlname_to_state(char name)
1832 {
1833 	int i;
1834 	for (i = 0; i < LVL_NELEMS; i++) {
1835 		if (lvls[i].lvl_name == name)
1836 			return (lvls[i].lvl_state);
1837 	}
1838 	return (-1);
1839 }
1840 
1841 /*
1842  * state_to_name(): convert the level to the character name.
1843  */
1844 static char
1845 state_to_name(int state)
1846 {
1847 	int i;
1848 	for (i = 0; i < LVL_NELEMS; i++) {
1849 		if (lvls[i].lvl_state == state)
1850 			return (lvls[i].lvl_name);
1851 	}
1852 	return (-1);
1853 }
1854 
1855 /*
1856  * state_to_mask(): return the mask corresponding to a signal number
1857  */
1858 static int
1859 state_to_mask(int state)
1860 {
1861 	int i;
1862 	for (i = 0; i < LVL_NELEMS; i++) {
1863 		if (lvls[i].lvl_state == state)
1864 			return (lvls[i].lvl_mask);
1865 	}
1866 	return (0);	/* return 0, since that represents an empty mask */
1867 }
1868 
1869 /*
1870  * lvlname_to_mask(): return the mask corresponding to a levels character name
1871  */
1872 static int
1873 lvlname_to_mask(char name, int *mask)
1874 {
1875 	int i;
1876 	for (i = 0; i < LVL_NELEMS; i++) {
1877 		if (lvls[i].lvl_name == name) {
1878 			*mask = lvls[i].lvl_mask;
1879 			return (0);
1880 		}
1881 	}
1882 	return (-1);
1883 }
1884 
1885 /*
1886  * state_to_flags(): return the flags corresponding to a runlevel.  These
1887  * indicate properties of that runlevel.
1888  */
1889 static int
1890 state_to_flags(int state)
1891 {
1892 	int i;
1893 	for (i = 0; i < LVL_NELEMS; i++) {
1894 		if (lvls[i].lvl_state == state)
1895 			return (lvls[i].lvl_flags);
1896 	}
1897 	return (0);
1898 }
1899 
1900 /*
1901  * killproc() creates a child which kills the process specified by pid.
1902  */
1903 void
1904 killproc(pid_t pid)
1905 {
1906 	struct PROC_TABLE	*process;
1907 
1908 	(void) sighold(SIGCLD);
1909 	while ((process = efork(M_OFF, NULLPROC, 0)) == NO_ROOM)
1910 		(void) pause();
1911 	(void) sigrelse(SIGCLD);
1912 
1913 	if (process == NULLPROC) {
1914 		/*
1915 		 * efork() sets all signal handlers to the default, so reset
1916 		 * the ALRM handler to make timer() work as expected.
1917 		 */
1918 		(void) sigset(SIGALRM, alarmclk);
1919 
1920 		/*
1921 		 * We are the child.  Try to terminate the process nicely
1922 		 * first using SIGTERM and if it refuses to die in TWARN
1923 		 * seconds kill it with SIGKILL.
1924 		 */
1925 		(void) kill(pid, SIGTERM);
1926 		(void) timer(TWARN);
1927 		(void) kill(pid, SIGKILL);
1928 		(void) exit(0);
1929 	}
1930 }
1931 
1932 /*
1933  * Set up the default environment for all procs to be forked from init.
1934  * Read the values from the /etc/default/init file, except for PATH.  If
1935  * there is not enough room in the environment array, the environment
1936  * lines that don't fit are discarded and a message is written to the console.
1937  */
1938 void
1939 init_env()
1940 {
1941 	void		*dstate;
1942 	const char	*tokp;
1943 
1944 	glob_envp[0] = malloc((unsigned)(strlen(DEF_PATH)+2));
1945 	(void) strcpy(glob_envp[0], DEF_PATH);
1946 	glob_envn = 1;
1947 
1948 	if (rflg) {
1949 		glob_envp[1] =
1950 		    malloc((unsigned)(strlen("_DVFS_RECONFIG=YES")+2));
1951 		(void) strcpy(glob_envp[1], "_DVFS_RECONFIG=YES");
1952 		++glob_envn;
1953 	} else if (bflg == 1) {
1954 		glob_envp[1] =
1955 		    malloc((unsigned)(strlen("RB_NOBOOTRC=YES")+2));
1956 		(void) strcpy(glob_envp[1], "RB_NOBOOTRC=YES");
1957 		++glob_envn;
1958 	}
1959 
1960 	if (definit_open(ENVFILE, &dstate) != 0) {
1961 		console(B_TRUE,
1962 		    "Cannot open %s. Environment not initialized.\n",
1963 		    ENVFILE);
1964 		return;
1965 	}
1966 
1967 	while ((tokp = definit_token(dstate)) != NULL &&
1968 	    glob_envn < MAXENVENT - 2) {
1969 
1970 		if (strncmp(tokp, "CMASK=", sizeof ("CMASK=") - 1) == 0) {
1971 			long t;
1972 
1973 			/* We know there's an = */
1974 			t = strtol(strchr(tokp, '=') + 1, NULL, 8);
1975 
1976 			/* Sanity */
1977 			if (t >= DEFINIT_MIN_UMASK && t <= DEFINIT_MAX_UMASK)
1978 				cmask = (int)t;
1979 			(void) umask(cmask);
1980 			continue;
1981 		}
1982 		glob_envp[glob_envn] = strdup(tokp);
1983 		if (glob_envp[glob_envn] == NULL) {
1984 			console(B_TRUE, "Out of memory building environment, "
1985 			    "truncated.\n");
1986 			break;
1987 		}
1988 		if (++glob_envn >= MAXENVENT - 1) {
1989 			console(B_TRUE, "Too many variables in %s; "
1990 			    "environment not fully initialized.\n", ENVFILE);
1991 			break;
1992 		}
1993 	}
1994 
1995 	/*
1996 	 * Append a null pointer to the environment array to mark its end.
1997 	 */
1998 	glob_envp[glob_envn] = NULL;
1999 
2000 	definit_close(dstate);
2001 }
2002 
2003 /*
2004  * boot_init(): Do initialization things that should be done at boot.
2005  */
2006 void
2007 boot_init()
2008 {
2009 	int i;
2010 	struct PROC_TABLE *process, *oprocess;
2011 	struct CMD_LINE	cmd;
2012 	char	line[MAXCMDL];
2013 	char	svc_aux[SVC_AUX_SIZE];
2014 	char	init_svc_fmri[SVC_FMRI_SIZE];
2015 	char *old_path;
2016 	int maxfiles;
2017 
2018 	/* Use INIT_PATH for sysinit cmds */
2019 	old_path = glob_envp[0];
2020 	glob_envp[0] = malloc((unsigned)(strlen(INIT_PATH)+2));
2021 	(void) strcpy(glob_envp[0], INIT_PATH);
2022 
2023 	/*
2024 	 * Scan inittab(5) and process the special svc.startd entry, initdefault
2025 	 * and sysinit entries.
2026 	 */
2027 	while (getcmd(&cmd, &line[0]) == TRUE) {
2028 		if (startd_tmpl >= 0 && id_eq(cmd.c_id, "smf")) {
2029 			process_startd_line(&cmd, line);
2030 			(void) snprintf(startd_svc_aux, SVC_AUX_SIZE,
2031 			    INITTAB_ENTRY_ID_STR_FORMAT, cmd.c_id);
2032 		} else if (cmd.c_action == M_INITDEFAULT) {
2033 			/*
2034 			 * initdefault is no longer meaningful, as the SMF
2035 			 * milestone controls what (legacy) run level we
2036 			 * boot to.
2037 			 */
2038 			console(B_TRUE,
2039 			    "Ignoring legacy \"initdefault\" entry.\n");
2040 		} else if (cmd.c_action == M_SYSINIT) {
2041 			/*
2042 			 * Execute the "sysinit" entry and wait for it to
2043 			 * complete.  No bookkeeping is performed on these
2044 			 * entries because we avoid writing to the file system
2045 			 * until after there has been an chance to check it.
2046 			 */
2047 			if (process = findpslot(&cmd)) {
2048 				(void) sighold(SIGCLD);
2049 				(void) snprintf(svc_aux, SVC_AUX_SIZE,
2050 				    INITTAB_ENTRY_ID_STR_FORMAT, cmd.c_id);
2051 				(void) snprintf(init_svc_fmri, SVC_FMRI_SIZE,
2052 				    SVC_INIT_PREFIX INITTAB_ENTRY_ID_STR_FORMAT,
2053 				    cmd.c_id);
2054 				if (legacy_tmpl >= 0) {
2055 					(void) ct_pr_tmpl_set_svc_fmri(
2056 					    legacy_tmpl, init_svc_fmri);
2057 					(void) ct_pr_tmpl_set_svc_aux(
2058 					    legacy_tmpl, svc_aux);
2059 				}
2060 
2061 				for (oprocess = process;
2062 				    (process = efork(M_OFF, oprocess,
2063 				    (NAMED|NOCLEANUP))) == NO_ROOM;
2064 				    /* CSTYLED */)
2065 					;
2066 				(void) sigrelse(SIGCLD);
2067 
2068 				if (process == NULLPROC) {
2069 					maxfiles = ulimit(UL_GDESLIM, 0);
2070 
2071 					for (i = 0; i < maxfiles; i++)
2072 						(void) fcntl(i, F_SETFD,
2073 						    FD_CLOEXEC);
2074 					(void) execle(SH, "INITSH", "-c",
2075 					    cmd.c_command,
2076 					    (char *)0, glob_envp);
2077 					console(B_TRUE,
2078 "Command\n\"%s\"\n failed to execute.  errno = %d (exec of shell failed)\n",
2079 					    cmd.c_command, errno);
2080 					exit(1);
2081 				} else
2082 					while (waitproc(process) == FAILURE)
2083 						;
2084 				process->p_flags = 0;
2085 				st_write();
2086 			}
2087 		}
2088 	}
2089 
2090 	/* Restore the path. */
2091 	free(glob_envp[0]);
2092 	glob_envp[0] = old_path;
2093 
2094 	/*
2095 	 * This will enable st_write() to complain about init_state_file.
2096 	 */
2097 	booting = 0;
2098 
2099 	/*
2100 	 * If the /etc/ioctl.syscon didn't exist or had invalid contents write
2101 	 * out a correct version.
2102 	 */
2103 	if (write_ioctl)
2104 		write_ioctl_syscon();
2105 
2106 	/*
2107 	 * Start svc.startd(8), which does most of the work.
2108 	 */
2109 	if (startd_cline[0] != '\0' && startd_tmpl >= 0) {
2110 		/* Start svc.startd. */
2111 		if (startd_run(startd_cline, startd_tmpl, 0) == -1)
2112 			cur_state = SINGLE_USER;
2113 	} else {
2114 		console(B_TRUE, "Absent svc.startd entry or bad "
2115 		    "contract template.  Not starting svc.startd.\n");
2116 		enter_maintenance();
2117 	}
2118 }
2119 
2120 /*
2121  * init_signals(): Initialize all signals to either be caught or ignored.
2122  */
2123 void
2124 init_signals(void)
2125 {
2126 	struct sigaction act;
2127 	int i;
2128 
2129 	/*
2130 	 * Start by ignoring all signals, then selectively re-enable some.
2131 	 * The SIG_IGN disposition will only affect asynchronous signals:
2132 	 * any signal that we trigger synchronously that doesn't end up
2133 	 * being handled by siglvl() will be forcibly delivered by the kernel.
2134 	 */
2135 	for (i = SIGHUP; i <= SIGRTMAX; i++)
2136 		(void) sigset(i, SIG_IGN);
2137 
2138 	/*
2139 	 * Handle all level-changing signals using siglvl() and set sa_mask so
2140 	 * that all level-changing signals are blocked while in siglvl().
2141 	 */
2142 	act.sa_sigaction = siglvl;
2143 	act.sa_flags = SA_SIGINFO;
2144 	(void) sigemptyset(&act.sa_mask);
2145 
2146 	(void) sigaddset(&act.sa_mask, LVLQ);
2147 	(void) sigaddset(&act.sa_mask, LVL0);
2148 	(void) sigaddset(&act.sa_mask, LVL1);
2149 	(void) sigaddset(&act.sa_mask, LVL2);
2150 	(void) sigaddset(&act.sa_mask, LVL3);
2151 	(void) sigaddset(&act.sa_mask, LVL4);
2152 	(void) sigaddset(&act.sa_mask, LVL5);
2153 	(void) sigaddset(&act.sa_mask, LVL6);
2154 	(void) sigaddset(&act.sa_mask, SINGLE_USER);
2155 	(void) sigaddset(&act.sa_mask, LVLa);
2156 	(void) sigaddset(&act.sa_mask, LVLb);
2157 	(void) sigaddset(&act.sa_mask, LVLc);
2158 
2159 	(void) sigaction(LVLQ, &act, NULL);
2160 	(void) sigaction(LVL0, &act, NULL);
2161 	(void) sigaction(LVL1, &act, NULL);
2162 	(void) sigaction(LVL2, &act, NULL);
2163 	(void) sigaction(LVL3, &act, NULL);
2164 	(void) sigaction(LVL4, &act, NULL);
2165 	(void) sigaction(LVL5, &act, NULL);
2166 	(void) sigaction(LVL6, &act, NULL);
2167 	(void) sigaction(SINGLE_USER, &act, NULL);
2168 	(void) sigaction(LVLa, &act, NULL);
2169 	(void) sigaction(LVLb, &act, NULL);
2170 	(void) sigaction(LVLc, &act, NULL);
2171 
2172 	(void) sigset(SIGALRM, alarmclk);
2173 	alarmclk();
2174 
2175 	(void) sigset(SIGCLD, childeath);
2176 	(void) sigset(SIGPWR, powerfail);
2177 }
2178 
2179 /*
2180  * Set up pipe for "godchildren". If the file exists and is a pipe just open
2181  * it. Else, if the file system is r/w create it.  Otherwise, defer its
2182  * creation and open until after /var/run has been mounted.  This function is
2183  * only called on startup and when explicitly requested via LVLQ.
2184  */
2185 void
2186 setup_pipe()
2187 {
2188 	struct stat stat_buf;
2189 	struct statvfs statvfs_buf;
2190 	struct sigaction act;
2191 
2192 	/*
2193 	 * Always close the previous pipe descriptor as the mounted filesystems
2194 	 * may have changed.
2195 	 */
2196 	if (Pfd >= 0)
2197 		(void) close(Pfd);
2198 
2199 	if ((stat(INITPIPE, &stat_buf) == 0) &&
2200 	    ((stat_buf.st_mode & (S_IFMT|S_IRUSR)) == (S_IFIFO|S_IRUSR)))
2201 		Pfd = open(INITPIPE, O_RDWR | O_NDELAY);
2202 	else
2203 		if ((statvfs(INITPIPE_DIR, &statvfs_buf) == 0) &&
2204 		    ((statvfs_buf.f_flag & ST_RDONLY) == 0)) {
2205 			(void) unlink(INITPIPE);
2206 			(void) mknod(INITPIPE, S_IFIFO | 0600, 0);
2207 			Pfd = open(INITPIPE, O_RDWR | O_NDELAY);
2208 		}
2209 
2210 	if (Pfd >= 0) {
2211 		(void) ioctl(Pfd, I_SETSIG, S_INPUT);
2212 		/*
2213 		 * Read pipe in message discard mode.
2214 		 */
2215 		(void) ioctl(Pfd, I_SRDOPT, RMSGD);
2216 
2217 		act.sa_handler = sigpoll;
2218 		act.sa_flags = 0;
2219 		(void) sigemptyset(&act.sa_mask);
2220 		(void) sigaddset(&act.sa_mask, SIGCLD);
2221 		(void) sigaction(SIGPOLL, &act, NULL);
2222 	}
2223 }
2224 
2225 /*
2226  * siglvl - handle an asynchronous signal from init(8) telling us that we
2227  * should change the current run level.  We set new_state accordingly.
2228  */
2229 void
2230 siglvl(int sig, siginfo_t *sip, void *arg)
2231 {
2232 	ucontext_t *ucp = arg;
2233 	struct PROC_TABLE *process;
2234 	struct sigaction act;
2235 
2236 	/*
2237 	 * If the signal was from the kernel (rather than init(8)) then init
2238 	 * itself tripped the signal.  That is, we might have a bug and tripped
2239 	 * a real SIGSEGV instead of receiving it as an alias for SIGLVLa.  In
2240 	 * such a case we reset the disposition to SIG_DFL, block all signals
2241 	 * in uc_mask but the current one, and return to the interrupted ucp
2242 	 * to effect an appropriate death.  The kernel will then restart us.
2243 	 *
2244 	 * The one exception to SI_FROMKERNEL() is SIGFPE (a.k.a. LVL6), which
2245 	 * the kernel can send us when it wants to effect an orderly reboot.
2246 	 * For this case we must also verify si_code is zero, rather than a
2247 	 * code such as FPE_INTDIV which a bug might have triggered.
2248 	 */
2249 	if (sip != NULL && SI_FROMKERNEL(sip) &&
2250 	    (sig != SIGFPE || sip->si_code == 0)) {
2251 
2252 		(void) sigemptyset(&act.sa_mask);
2253 		act.sa_handler = SIG_DFL;
2254 		act.sa_flags = 0;
2255 		(void) sigaction(sig, &act, NULL);
2256 
2257 		(void) sigfillset(&ucp->uc_sigmask);
2258 		(void) sigdelset(&ucp->uc_sigmask, sig);
2259 		ucp->uc_flags |= UC_SIGMASK;
2260 
2261 		(void) setcontext(ucp);
2262 	}
2263 
2264 	/*
2265 	 * If the signal received is a LVLQ signal, do not really
2266 	 * change levels, just restate the current level.  If the
2267 	 * signal is not a LVLQ, set the new level to the signal
2268 	 * received.
2269 	 */
2270 	if (sig == LVLQ) {
2271 		new_state = cur_state;
2272 		lvlq_received = B_TRUE;
2273 	} else {
2274 		new_state = sig;
2275 	}
2276 
2277 	/*
2278 	 * Clear all times and repeat counts in the process table
2279 	 * since either the level is changing or the user has editted
2280 	 * the inittab file and wants us to look at it again.
2281 	 * If the user has fixed a typo, we don't want residual timing
2282 	 * data preventing the fixed command line from executing.
2283 	 */
2284 	for (process = proc_table;
2285 	    (process < proc_table + num_proc); process++) {
2286 		process->p_time = 0L;
2287 		process->p_count = 0;
2288 	}
2289 
2290 	/*
2291 	 * Set the flag to indicate that a "user signal" was received.
2292 	 */
2293 	wakeup.w_flags.w_usersignal = 1;
2294 }
2295 
2296 
2297 /*
2298  * alarmclk
2299  */
2300 static void
2301 alarmclk()
2302 {
2303 	time_up = TRUE;
2304 }
2305 
2306 /*
2307  * childeath_single():
2308  *
2309  * This used to be the SIGCLD handler and it was set with signal()
2310  * (as opposed to sigset()).  When a child exited we'd come to the
2311  * handler, wait for the child, and reenable the handler with
2312  * signal() just before returning.  The implementation of signal()
2313  * checks with waitid() for waitable children and sends a SIGCLD
2314  * if there are some.  If children are exiting faster than the
2315  * handler can run we keep sending signals and the handler never
2316  * gets to return and eventually the stack runs out and init dies.
2317  * To prevent that we set the handler with sigset() so the handler
2318  * doesn't need to be reset, and in childeath() (see below) we
2319  * call childeath_single() as long as there are children to be
2320  * waited for.  If a child exits while init is in the handler a
2321  * SIGCLD will be pending and delivered on return from the handler.
2322  * If the child was already waited for the handler will have nothing
2323  * to do and return, otherwise the child will be waited for.
2324  */
2325 static void
2326 childeath_single(pid_t pid, int status)
2327 {
2328 	struct PROC_TABLE	*process;
2329 	struct pidlist		*pp;
2330 
2331 	/*
2332 	 * Scan the process table to see if we are interested in this process.
2333 	 */
2334 	for (process = proc_table;
2335 	    (process < proc_table + num_proc); process++) {
2336 		if ((process->p_flags & (LIVING|OCCUPIED)) ==
2337 		    (LIVING|OCCUPIED) && process->p_pid == pid) {
2338 
2339 			/*
2340 			 * Mark this process as having died and store the exit
2341 			 * status.  Also set the wakeup flag for a dead child
2342 			 * and break out of the loop.
2343 			 */
2344 			process->p_flags &= ~LIVING;
2345 			process->p_exit = (short)status;
2346 			wakeup.w_flags.w_childdeath = 1;
2347 
2348 			return;
2349 		}
2350 	}
2351 
2352 	/*
2353 	 * No process was found above, look through auxiliary list.
2354 	 */
2355 	(void) sighold(SIGPOLL);
2356 	pp = Plhead;
2357 	while (pp) {
2358 		if (pid > pp->pl_pid) {
2359 			/*
2360 			 * Keep on looking.
2361 			 */
2362 			pp = pp->pl_next;
2363 			continue;
2364 		} else if (pid < pp->pl_pid) {
2365 			/*
2366 			 * Not in the list.
2367 			 */
2368 			break;
2369 		} else {
2370 			/*
2371 			 * This is a dead "godchild".
2372 			 */
2373 			pp->pl_dflag = 1;
2374 			pp->pl_exit = (short)status;
2375 			wakeup.w_flags.w_childdeath = 1;
2376 			Gchild = 1;	/* Notice to call cleanaux(). */
2377 			break;
2378 		}
2379 	}
2380 
2381 	(void) sigrelse(SIGPOLL);
2382 }
2383 
2384 /* ARGSUSED */
2385 static void
2386 childeath(int signo)
2387 {
2388 	pid_t pid;
2389 	int status;
2390 
2391 	while ((pid = waitpid(-1, &status, WNOHANG)) > 0)
2392 		childeath_single(pid, status);
2393 }
2394 
2395 static void
2396 powerfail()
2397 {
2398 	(void) nice(-19);
2399 	wakeup.w_flags.w_powerhit = 1;
2400 }
2401 
2402 /*
2403  * efork() forks a child and the parent inserts the process in its table
2404  * of processes that are directly a result of forks that it has performed.
2405  * The child just changes the "global" with the process id for this process
2406  * to it's new value.
2407  * If efork() is called with a pointer into the proc_table it uses that slot,
2408  * otherwise it searches for a free slot.  Regardless of how it was called,
2409  * it returns the pointer to the proc_table entry
2410  *
2411  * The SIGCLD signal is blocked (held) before calling efork()
2412  * and is unblocked (released) after efork() returns.
2413  *
2414  * Ideally, this should be rewritten to use modern signal semantics.
2415  */
2416 static struct PROC_TABLE *
2417 efork(int action, struct PROC_TABLE *process, int modes)
2418 {
2419 	pid_t	childpid;
2420 	struct PROC_TABLE *proc;
2421 	int		i;
2422 	/*
2423 	 * Freshen up the proc_table, removing any entries for dead processes
2424 	 * that don't have NOCLEANUP set.  Perform the necessary accounting.
2425 	 */
2426 	for (proc = proc_table; (proc < proc_table + num_proc); proc++) {
2427 		if ((proc->p_flags & (OCCUPIED|LIVING|NOCLEANUP)) ==
2428 		    (OCCUPIED)) {
2429 			/*
2430 			 * Is this a named process?
2431 			 * If so, do the necessary bookkeeping.
2432 			 */
2433 			if (proc->p_flags & NAMED)
2434 				(void) account(DEAD_PROCESS, proc, NULL);
2435 
2436 			/*
2437 			 * Free this entry for new usage.
2438 			 */
2439 			proc->p_flags = 0;
2440 		}
2441 	}
2442 
2443 	while ((childpid = fork()) == FAILURE) {
2444 		/*
2445 		 * Shorten the alarm timer in case someone else's child dies
2446 		 * and free up a slot in the process table.
2447 		 */
2448 		setimer(5);
2449 
2450 		/*
2451 		 * Wait for some children to die.  Since efork()
2452 		 * is always called with SIGCLD blocked, unblock
2453 		 * it here so that child death signals can come in.
2454 		 */
2455 		(void) sigrelse(SIGCLD);
2456 		(void) pause();
2457 		(void) sighold(SIGCLD);
2458 		setimer(0);
2459 	}
2460 
2461 	if (childpid != 0) {
2462 
2463 		if (process == NULLPROC) {
2464 			/*
2465 			 * No proc table pointer specified so search
2466 			 * for a free slot.
2467 			 */
2468 			for (process = proc_table;  process->p_flags != 0 &&
2469 			    (process < proc_table + num_proc); process++)
2470 					;
2471 
2472 			if (process == (proc_table + num_proc)) {
2473 				int old_proc_table_size = num_proc;
2474 
2475 				/* Increase the process table size */
2476 				increase_proc_table_size();
2477 				if (old_proc_table_size == num_proc) {
2478 					/* didn't grow: memory failure */
2479 					return (NO_ROOM);
2480 				} else {
2481 					process =
2482 					    proc_table + old_proc_table_size;
2483 				}
2484 			}
2485 
2486 			process->p_time = 0L;
2487 			process->p_count = 0;
2488 		}
2489 		process->p_id[0] = '\0';
2490 		process->p_id[1] = '\0';
2491 		process->p_id[2] = '\0';
2492 		process->p_id[3] = '\0';
2493 		process->p_pid = childpid;
2494 		process->p_flags = (LIVING | OCCUPIED | modes);
2495 		process->p_exit = 0;
2496 
2497 		st_write();
2498 	} else {
2499 		if ((action & (M_WAIT | M_BOOTWAIT)) == 0)
2500 			(void) setpgrp();
2501 
2502 		process = NULLPROC;
2503 
2504 		/*
2505 		 * Reset all signals to the system defaults.
2506 		 */
2507 		for (i = SIGHUP; i <= SIGRTMAX; i++)
2508 			(void) sigset(i, SIG_DFL);
2509 
2510 		/*
2511 		 * POSIX B.2.2.2 advises that init should set SIGTTOU,
2512 		 * SIGTTIN, and SIGTSTP to SIG_IGN.
2513 		 *
2514 		 * Make sure that SIGXCPU and SIGXFSZ also remain ignored,
2515 		 * for backward compatibility.
2516 		 */
2517 		(void) sigset(SIGTTIN, SIG_IGN);
2518 		(void) sigset(SIGTTOU, SIG_IGN);
2519 		(void) sigset(SIGTSTP, SIG_IGN);
2520 		(void) sigset(SIGXCPU, SIG_IGN);
2521 		(void) sigset(SIGXFSZ, SIG_IGN);
2522 	}
2523 	return (process);
2524 }
2525 
2526 
2527 /*
2528  * waitproc() waits for a specified process to die.  For this function to
2529  * work, the specified process must already in the proc_table.  waitproc()
2530  * returns the exit status of the specified process when it dies.
2531  */
2532 static long
2533 waitproc(struct PROC_TABLE *process)
2534 {
2535 	int		answer;
2536 	sigset_t	oldmask, newmask, zeromask;
2537 
2538 	(void) sigemptyset(&zeromask);
2539 	(void) sigemptyset(&newmask);
2540 
2541 	(void) sigaddset(&newmask, SIGCLD);
2542 
2543 	/* Block SIGCLD and save the current signal mask */
2544 	if (sigprocmask(SIG_BLOCK, &newmask, &oldmask) < 0)
2545 		perror("SIG_BLOCK error");
2546 
2547 	/*
2548 	 * Wait around until the process dies.
2549 	 */
2550 	if (process->p_flags & LIVING)
2551 		(void) sigsuspend(&zeromask);
2552 
2553 	/* Reset signal mask to unblock SIGCLD */
2554 	if (sigprocmask(SIG_SETMASK, &oldmask, NULL) < 0)
2555 		perror("SIG_SETMASK error");
2556 
2557 	if (process->p_flags & LIVING)
2558 		return (FAILURE);
2559 
2560 	/*
2561 	 * Make sure to only return 16 bits so that answer will always
2562 	 * be positive whenever the process of interest really died.
2563 	 */
2564 	answer = (process->p_exit & 0xffff);
2565 
2566 	/*
2567 	 * Free the slot in the proc_table.
2568 	 */
2569 	process->p_flags = 0;
2570 	return (answer);
2571 }
2572 
2573 /*
2574  * notify_pam_dead(): calls into the PAM framework to close the given session.
2575  */
2576 static void
2577 notify_pam_dead(struct utmpx *up)
2578 {
2579 	pam_handle_t *pamh;
2580 	char user[sizeof (up->ut_user) + 1];
2581 	char ttyn[sizeof (up->ut_line) + 1];
2582 	char host[sizeof (up->ut_host) + 1];
2583 
2584 	/*
2585 	 * PAM does not take care of updating utmpx/wtmpx.
2586 	 */
2587 	(void) snprintf(user, sizeof (user), "%s", up->ut_user);
2588 	(void) snprintf(ttyn, sizeof (ttyn), "%s", up->ut_line);
2589 	(void) snprintf(host, sizeof (host), "%s", up->ut_host);
2590 
2591 	if (pam_start("init", user, NULL, &pamh) == PAM_SUCCESS)  {
2592 		(void) pam_set_item(pamh, PAM_TTY, ttyn);
2593 		(void) pam_set_item(pamh, PAM_RHOST, host);
2594 		(void) pam_close_session(pamh, 0);
2595 		(void) pam_end(pamh, PAM_SUCCESS);
2596 	}
2597 }
2598 
2599 /*
2600  * Check you can access utmpx (As / may be read-only and
2601  * /var may not be mounted yet).
2602  */
2603 static int
2604 access_utmpx(void)
2605 {
2606 	do {
2607 		utmpx_ok = (access(UTMPX, R_OK|W_OK) == 0);
2608 	} while (!utmpx_ok && errno == EINTR);
2609 
2610 	return (utmpx_ok);
2611 }
2612 
2613 /*
2614  * account() updates entries in utmpx and appends new entries to the end of
2615  * wtmpx (assuming they exist).  The program argument indicates the name of
2616  * program if INIT_PROCESS, otherwise should be NULL.
2617  *
2618  * account() only blocks for INIT_PROCESS requests.
2619  *
2620  * Returns non-zero if write failed.
2621  */
2622 static int
2623 account(short state, struct PROC_TABLE *process, char *program)
2624 {
2625 	struct utmpx utmpbuf, *u, *oldu;
2626 	int tmplen;
2627 	char fail_buf[UT_LINE_SZ];
2628 	sigset_t block, unblock;
2629 
2630 	if (!utmpx_ok && !access_utmpx()) {
2631 		return (-1);
2632 	}
2633 
2634 	/*
2635 	 * Set up the prototype for the utmp structure we want to write.
2636 	 */
2637 	u = &utmpbuf;
2638 	(void) memset(u, 0, sizeof (struct utmpx));
2639 
2640 	/*
2641 	 * Fill in the various fields of the utmp structure.
2642 	 */
2643 	u->ut_id[0] = process->p_id[0];
2644 	u->ut_id[1] = process->p_id[1];
2645 	u->ut_id[2] = process->p_id[2];
2646 	u->ut_id[3] = process->p_id[3];
2647 	u->ut_pid = process->p_pid;
2648 
2649 	/*
2650 	 * Fill the "ut_exit" structure.
2651 	 */
2652 	u->ut_exit.e_termination = WTERMSIG(process->p_exit);
2653 	u->ut_exit.e_exit = WEXITSTATUS(process->p_exit);
2654 	u->ut_type = state;
2655 
2656 	(void) time(&u->ut_tv.tv_sec);
2657 
2658 	/*
2659 	 * Block signals for utmp update.
2660 	 */
2661 	(void) sigfillset(&block);
2662 	(void) sigprocmask(SIG_BLOCK, &block, &unblock);
2663 
2664 	/*
2665 	 * See if there already is such an entry in the "utmpx" file.
2666 	 */
2667 	setutxent();	/* Start at beginning of utmpx file. */
2668 
2669 	if ((oldu = getutxid(u)) != NULL) {
2670 		/*
2671 		 * Copy in the old "user", "line" and "host" fields
2672 		 * to our new structure.
2673 		 */
2674 		bcopy(oldu->ut_user, u->ut_user, sizeof (u->ut_user));
2675 		bcopy(oldu->ut_line, u->ut_line, sizeof (u->ut_line));
2676 		bcopy(oldu->ut_host, u->ut_host, sizeof (u->ut_host));
2677 		u->ut_syslen = (tmplen = strlen(u->ut_host)) ?
2678 		    min(tmplen + 1, sizeof (u->ut_host)) : 0;
2679 
2680 		if (oldu->ut_type == USER_PROCESS && state == DEAD_PROCESS) {
2681 			notify_pam_dead(oldu);
2682 		}
2683 	}
2684 
2685 	/*
2686 	 * Perform special accounting. Insert the special string into the
2687 	 * ut_line array. For INIT_PROCESSes put in the name of the
2688 	 * program in the "ut_user" field.
2689 	 */
2690 	switch (state) {
2691 	case INIT_PROCESS:
2692 		(void) strncpy(u->ut_user, program, sizeof (u->ut_user));
2693 		(void) strcpy(fail_buf, "INIT_PROCESS");
2694 		break;
2695 
2696 	default:
2697 		(void) strlcpy(fail_buf, u->ut_id, sizeof (u->ut_id) + 1);
2698 		break;
2699 	}
2700 
2701 	/*
2702 	 * Write out the updated entry to utmpx file.
2703 	 */
2704 	if (pututxline(u) == NULL) {
2705 		console(B_TRUE, "Failed write of utmpx entry: \"%s\": %s\n",
2706 		    fail_buf, strerror(errno));
2707 		endutxent();
2708 		(void) sigprocmask(SIG_SETMASK, &unblock, NULL);
2709 		return (-1);
2710 	}
2711 
2712 	/*
2713 	 * If we're able to write to utmpx, then attempt to add to the
2714 	 * end of the wtmpx file.
2715 	 */
2716 	updwtmpx(WTMPX, u);
2717 
2718 	endutxent();
2719 
2720 	(void) sigprocmask(SIG_SETMASK, &unblock, NULL);
2721 
2722 	return (0);
2723 }
2724 
2725 static void
2726 clearent(pid_t pid, short status)
2727 {
2728 	struct utmpx *up;
2729 	sigset_t block, unblock;
2730 
2731 	/*
2732 	 * Block signals for utmp update.
2733 	 */
2734 	(void) sigfillset(&block);
2735 	(void) sigprocmask(SIG_BLOCK, &block, &unblock);
2736 
2737 	/*
2738 	 * No error checking for now.
2739 	 */
2740 
2741 	setutxent();
2742 	while (up = getutxent()) {
2743 		if (up->ut_pid == pid) {
2744 			if (up->ut_type == DEAD_PROCESS) {
2745 				/*
2746 				 * Cleaned up elsewhere.
2747 				 */
2748 				continue;
2749 			}
2750 
2751 			notify_pam_dead(up);
2752 
2753 			up->ut_type = DEAD_PROCESS;
2754 			up->ut_exit.e_termination = WTERMSIG(status);
2755 			up->ut_exit.e_exit = WEXITSTATUS(status);
2756 			(void) time(&up->ut_tv.tv_sec);
2757 
2758 			(void) pututxline(up);
2759 			/*
2760 			 * Now attempt to add to the end of the
2761 			 * wtmp and wtmpx files.  Do not create
2762 			 * if they don't already exist.
2763 			 */
2764 			updwtmpx(WTMPX, up);
2765 
2766 			break;
2767 		}
2768 	}
2769 
2770 	endutxent();
2771 	(void) sigprocmask(SIG_SETMASK, &unblock, NULL);
2772 }
2773 
2774 /*
2775  * prog_name() searches for the word or unix path name and
2776  * returns a pointer to the last element of the pathname.
2777  */
2778 static char *
2779 prog_name(char *string)
2780 {
2781 	char	*ptr, *ptr2;
2782 	static char word[UT_USER_SZ + 1];
2783 
2784 	/*
2785 	 * Search for the first word skipping leading spaces and tabs.
2786 	 */
2787 	while (*string == ' ' || *string == '\t')
2788 		string++;
2789 
2790 	/*
2791 	 * If the first non-space non-tab character is not one allowed in
2792 	 * a word, return a pointer to a null string, otherwise parse the
2793 	 * pathname.
2794 	 */
2795 	if (*string != '.' && *string != '/' && *string != '_' &&
2796 	    (*string < 'a' || *string > 'z') &&
2797 	    (*string < 'A' || * string > 'Z') &&
2798 	    (*string < '0' || *string > '9'))
2799 		return ("");
2800 
2801 	/*
2802 	 * Parse the pathname looking forward for '/', ' ', '\t', '\n' or
2803 	 * '\0'.  Each time a '/' is found, move "ptr" to one past the
2804 	 * '/', thus when a ' ', '\t', '\n', or '\0' is found, "ptr" will
2805 	 * point to the last element of the pathname.
2806 	 */
2807 	for (ptr = string; *string != ' ' && *string != '\t' &&
2808 	    *string != '\n' && *string != '\0'; string++) {
2809 		if (*string == '/')
2810 			ptr = string+1;
2811 	}
2812 
2813 	/*
2814 	 * Copy out up to the size of the "ut_user" array into "word",
2815 	 * null terminate it and return a pointer to it.
2816 	 */
2817 	for (ptr2 = &word[0]; ptr2 < &word[UT_USER_SZ] &&
2818 	    ptr < string; /* CSTYLED */)
2819 		*ptr2++ = *ptr++;
2820 
2821 	*ptr2 = '\0';
2822 	return (&word[0]);
2823 }
2824 
2825 
2826 /*
2827  * realcon() returns a nonzero value if there is a character device
2828  * associated with SYSCON that has the same device number as CONSOLE.
2829  */
2830 static int
2831 realcon()
2832 {
2833 	struct stat sconbuf, conbuf;
2834 
2835 	if (stat(SYSCON, &sconbuf) != -1 &&
2836 	    stat(CONSOLE, &conbuf) != -1 &&
2837 	    S_ISCHR(sconbuf.st_mode) &&
2838 	    S_ISCHR(conbuf.st_mode) &&
2839 	    sconbuf.st_rdev == conbuf.st_rdev) {
2840 		return (1);
2841 	} else {
2842 		return (0);
2843 	}
2844 }
2845 
2846 
2847 /*
2848  * get_ioctl_syscon() retrieves the SYSCON settings from the IOCTLSYSCON file.
2849  * Returns true if the IOCTLSYSCON file needs to be written (with
2850  * write_ioctl_syscon() below)
2851  */
2852 static int
2853 get_ioctl_syscon()
2854 {
2855 	FILE	*fp;
2856 	unsigned int	iflags, oflags, cflags, lflags, ldisc, cc[18];
2857 	int		i, valid_format = 0;
2858 
2859 	/*
2860 	 * Read in the previous modes for SYSCON from IOCTLSYSCON.
2861 	 */
2862 	if ((fp = fopen(IOCTLSYSCON, "r")) == NULL) {
2863 		stored_syscon_termios = dflt_termios;
2864 		console(B_TRUE,
2865 		    "warning:%s does not exist, default settings assumed\n",
2866 		    IOCTLSYSCON);
2867 	} else {
2868 
2869 		i = fscanf(fp,
2870 	    "%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x",
2871 		    &iflags, &oflags, &cflags, &lflags,
2872 		    &cc[0], &cc[1], &cc[2], &cc[3], &cc[4], &cc[5], &cc[6],
2873 		    &cc[7], &cc[8], &cc[9], &cc[10], &cc[11], &cc[12], &cc[13],
2874 		    &cc[14], &cc[15], &cc[16], &cc[17]);
2875 
2876 		if (i == 22) {
2877 			stored_syscon_termios.c_iflag = iflags;
2878 			stored_syscon_termios.c_oflag = oflags;
2879 			stored_syscon_termios.c_cflag = cflags;
2880 			stored_syscon_termios.c_lflag = lflags;
2881 			for (i = 0; i < 18; i++)
2882 				stored_syscon_termios.c_cc[i] = (char)cc[i];
2883 			valid_format = 1;
2884 		} else if (i == 13) {
2885 		rewind(fp);
2886 		i = fscanf(fp, "%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x",
2887 		    &iflags, &oflags, &cflags, &lflags, &ldisc, &cc[0], &cc[1],
2888 		    &cc[2], &cc[3], &cc[4], &cc[5], &cc[6], &cc[7]);
2889 
2890 		/*
2891 		 * If the file is formatted properly, use the values to
2892 		 * initialize the console terminal condition.
2893 		 */
2894 		stored_syscon_termios.c_iflag = (ushort_t)iflags;
2895 		stored_syscon_termios.c_oflag = (ushort_t)oflags;
2896 		stored_syscon_termios.c_cflag = (ushort_t)cflags;
2897 		stored_syscon_termios.c_lflag = (ushort_t)lflags;
2898 		for (i = 0; i < 8; i++)
2899 			stored_syscon_termios.c_cc[i] = (char)cc[i];
2900 		valid_format = 1;
2901 		}
2902 		(void) fclose(fp);
2903 
2904 		/* If the file is badly formatted, use the default settings. */
2905 		if (!valid_format)
2906 			stored_syscon_termios = dflt_termios;
2907 	}
2908 
2909 	/* If the file had a bad format, rewrite it later. */
2910 	return (!valid_format);
2911 }
2912 
2913 
2914 static void
2915 write_ioctl_syscon()
2916 {
2917 	FILE *fp;
2918 	int i;
2919 
2920 	(void) unlink(SYSCON);
2921 	(void) link(SYSTTY, SYSCON);
2922 	(void) umask(022);
2923 	fp = fopen(IOCTLSYSCON, "w");
2924 
2925 	(void) fprintf(fp, "%x:%x:%x:%x:0", stored_syscon_termios.c_iflag,
2926 	    stored_syscon_termios.c_oflag, stored_syscon_termios.c_cflag,
2927 	    stored_syscon_termios.c_lflag);
2928 	for (i = 0; i < 8; ++i)
2929 		(void) fprintf(fp, ":%x", stored_syscon_termios.c_cc[i]);
2930 	(void) putc('\n', fp);
2931 
2932 	(void) fflush(fp);
2933 	(void) fsync(fileno(fp));
2934 	(void) fclose(fp);
2935 	(void) umask(cmask);
2936 }
2937 
2938 
2939 /*
2940  * void console(boolean_t, char *, ...)
2941  *   Outputs the requested message to the system console.  Note that the number
2942  *   of arguments passed to console() should be determined by the print format.
2943  *
2944  *   The "prefix" parameter indicates whether or not "INIT: " should precede the
2945  *   message.
2946  *
2947  *   To make sure we write to the console in a sane fashion, we use the modes
2948  *   we keep in stored_syscon_termios (which we read out of /etc/ioctl.syscon).
2949  *   Afterwards we restore whatever modes were already there.
2950  */
2951 /* PRINTFLIKE2 */
2952 static void
2953 console(boolean_t prefix, char *format, ...)
2954 {
2955 	char	outbuf[BUFSIZ];
2956 	va_list	args;
2957 	int fd, getret;
2958 	struct termios old_syscon_termios;
2959 	FILE *f;
2960 
2961 	/*
2962 	 * We open SYSCON anew each time in case it has changed (see
2963 	 * userinit()).
2964 	 */
2965 	if ((fd = open(SYSCON, O_RDWR | O_NOCTTY)) < 0 ||
2966 	    (f = fdopen(fd, "r+")) == NULL) {
2967 		if (prefix)
2968 			syslog(LOG_WARNING, "INIT: ");
2969 		va_start(args, format);
2970 		vsyslog(LOG_WARNING, format, args);
2971 		va_end(args);
2972 		if (fd >= 0)
2973 			(void) close(fd);
2974 		return;
2975 	}
2976 	setbuf(f, &outbuf[0]);
2977 
2978 	getret = tcgetattr(fd, &old_syscon_termios);
2979 	old_syscon_termios.c_cflag &= ~HUPCL;
2980 	if (realcon())
2981 		/* Don't overwrite cflag of real console. */
2982 		stored_syscon_termios.c_cflag = old_syscon_termios.c_cflag;
2983 
2984 	stored_syscon_termios.c_cflag &= ~HUPCL;
2985 
2986 	(void) tcsetattr(fd, TCSANOW, &stored_syscon_termios);
2987 
2988 	if (prefix)
2989 		(void) fprintf(f, "\nINIT: ");
2990 	va_start(args, format);
2991 	(void) vfprintf(f, format, args);
2992 	va_end(args);
2993 
2994 	if (getret == 0)
2995 		(void) tcsetattr(fd, TCSADRAIN, &old_syscon_termios);
2996 
2997 	(void) fclose(f);
2998 }
2999 
3000 /*
3001  * timer() is a substitute for sleep() which uses alarm() and pause().
3002  */
3003 static void
3004 timer(int waitime)
3005 {
3006 	setimer(waitime);
3007 	while (time_up == FALSE)
3008 		(void) pause();
3009 }
3010 
3011 static void
3012 setimer(int timelimit)
3013 {
3014 	alarmclk();
3015 	(void) alarm(timelimit);
3016 	time_up = (timelimit ? FALSE : TRUE);
3017 }
3018 
3019 /*
3020  * Fails with
3021  *   ENOMEM - out of memory
3022  *   ECONNABORTED - repository connection broken
3023  *   EPERM - permission denied
3024  *   EACCES - backend access denied
3025  *   EROFS - backend readonly
3026  */
3027 static int
3028 get_or_add_startd(scf_instance_t *inst)
3029 {
3030 	scf_handle_t *h;
3031 	scf_scope_t *scope = NULL;
3032 	scf_service_t *svc = NULL;
3033 	int ret = 0;
3034 
3035 	h = scf_instance_handle(inst);
3036 
3037 	if (scf_handle_decode_fmri(h, SCF_SERVICE_STARTD, NULL, NULL, inst,
3038 	    NULL, NULL, SCF_DECODE_FMRI_EXACT) == 0)
3039 		return (0);
3040 
3041 	switch (scf_error()) {
3042 	case SCF_ERROR_CONNECTION_BROKEN:
3043 		return (ECONNABORTED);
3044 
3045 	case SCF_ERROR_NOT_FOUND:
3046 		break;
3047 
3048 	case SCF_ERROR_HANDLE_MISMATCH:
3049 	case SCF_ERROR_INVALID_ARGUMENT:
3050 	case SCF_ERROR_CONSTRAINT_VIOLATED:
3051 	default:
3052 		bad_error("scf_handle_decode_fmri", scf_error());
3053 	}
3054 
3055 	/* Make sure we're right, since we're adding piece-by-piece. */
3056 	assert(strcmp(SCF_SERVICE_STARTD,
3057 	    "svc:/system/svc/restarter:default") == 0);
3058 
3059 	if ((scope = scf_scope_create(h)) == NULL ||
3060 	    (svc = scf_service_create(h)) == NULL) {
3061 		ret = ENOMEM;
3062 		goto out;
3063 	}
3064 
3065 get_scope:
3066 	if (scf_handle_get_scope(h, SCF_SCOPE_LOCAL, scope) != 0) {
3067 		switch (scf_error()) {
3068 		case SCF_ERROR_CONNECTION_BROKEN:
3069 			ret = ECONNABORTED;
3070 			goto out;
3071 
3072 		case SCF_ERROR_NOT_FOUND:
3073 			(void) fputs(gettext(
3074 			    "smf(7) repository missing local scope.\n"),
3075 			    stderr);
3076 			exit(1);
3077 			/* NOTREACHED */
3078 
3079 		case SCF_ERROR_HANDLE_MISMATCH:
3080 		case SCF_ERROR_INVALID_ARGUMENT:
3081 		default:
3082 			bad_error("scf_handle_get_scope", scf_error());
3083 		}
3084 	}
3085 
3086 get_svc:
3087 	if (scf_scope_get_service(scope, "system/svc/restarter", svc) != 0) {
3088 		switch (scf_error()) {
3089 		case SCF_ERROR_CONNECTION_BROKEN:
3090 			ret = ECONNABORTED;
3091 			goto out;
3092 
3093 		case SCF_ERROR_DELETED:
3094 			goto get_scope;
3095 
3096 		case SCF_ERROR_NOT_FOUND:
3097 			break;
3098 
3099 		case SCF_ERROR_HANDLE_MISMATCH:
3100 		case SCF_ERROR_INVALID_ARGUMENT:
3101 		case SCF_ERROR_NOT_SET:
3102 		default:
3103 			bad_error("scf_scope_get_service", scf_error());
3104 		}
3105 
3106 add_svc:
3107 		if (scf_scope_add_service(scope, "system/svc/restarter", svc) !=
3108 		    0) {
3109 			switch (scf_error()) {
3110 			case SCF_ERROR_CONNECTION_BROKEN:
3111 				ret = ECONNABORTED;
3112 				goto out;
3113 
3114 			case SCF_ERROR_EXISTS:
3115 				goto get_svc;
3116 
3117 			case SCF_ERROR_PERMISSION_DENIED:
3118 				ret = EPERM;
3119 				goto out;
3120 
3121 			case SCF_ERROR_BACKEND_ACCESS:
3122 				ret = EACCES;
3123 				goto out;
3124 
3125 			case SCF_ERROR_BACKEND_READONLY:
3126 				ret = EROFS;
3127 				goto out;
3128 
3129 			case SCF_ERROR_HANDLE_MISMATCH:
3130 			case SCF_ERROR_INVALID_ARGUMENT:
3131 			case SCF_ERROR_NOT_SET:
3132 			default:
3133 				bad_error("scf_scope_add_service", scf_error());
3134 			}
3135 		}
3136 	}
3137 
3138 get_inst:
3139 	if (scf_service_get_instance(svc, "default", inst) != 0) {
3140 		switch (scf_error()) {
3141 		case SCF_ERROR_CONNECTION_BROKEN:
3142 			ret = ECONNABORTED;
3143 			goto out;
3144 
3145 		case SCF_ERROR_DELETED:
3146 			goto add_svc;
3147 
3148 		case SCF_ERROR_NOT_FOUND:
3149 			break;
3150 
3151 		case SCF_ERROR_HANDLE_MISMATCH:
3152 		case SCF_ERROR_INVALID_ARGUMENT:
3153 		case SCF_ERROR_NOT_SET:
3154 		default:
3155 			bad_error("scf_service_get_instance", scf_error());
3156 		}
3157 
3158 		if (scf_service_add_instance(svc, "default", inst) !=
3159 		    0) {
3160 			switch (scf_error()) {
3161 			case SCF_ERROR_CONNECTION_BROKEN:
3162 				ret = ECONNABORTED;
3163 				goto out;
3164 
3165 			case SCF_ERROR_DELETED:
3166 				goto add_svc;
3167 
3168 			case SCF_ERROR_EXISTS:
3169 				goto get_inst;
3170 
3171 			case SCF_ERROR_PERMISSION_DENIED:
3172 				ret = EPERM;
3173 				goto out;
3174 
3175 			case SCF_ERROR_BACKEND_ACCESS:
3176 				ret = EACCES;
3177 				goto out;
3178 
3179 			case SCF_ERROR_BACKEND_READONLY:
3180 				ret = EROFS;
3181 				goto out;
3182 
3183 			case SCF_ERROR_HANDLE_MISMATCH:
3184 			case SCF_ERROR_INVALID_ARGUMENT:
3185 			case SCF_ERROR_NOT_SET:
3186 			default:
3187 				bad_error("scf_service_add_instance",
3188 				    scf_error());
3189 			}
3190 		}
3191 	}
3192 
3193 	ret = 0;
3194 
3195 out:
3196 	scf_service_destroy(svc);
3197 	scf_scope_destroy(scope);
3198 	return (ret);
3199 }
3200 
3201 /*
3202  * Fails with
3203  *   ECONNABORTED - repository connection broken
3204  *   ECANCELED - the transaction's property group was deleted
3205  */
3206 static int
3207 transaction_add_set(scf_transaction_t *tx, scf_transaction_entry_t *ent,
3208     const char *pname, scf_type_t type)
3209 {
3210 change_type:
3211 	if (scf_transaction_property_change_type(tx, ent, pname, type) == 0)
3212 		return (0);
3213 
3214 	switch (scf_error()) {
3215 	case SCF_ERROR_CONNECTION_BROKEN:
3216 		return (ECONNABORTED);
3217 
3218 	case SCF_ERROR_DELETED:
3219 		return (ECANCELED);
3220 
3221 	case SCF_ERROR_NOT_FOUND:
3222 		goto new;
3223 
3224 	case SCF_ERROR_HANDLE_MISMATCH:
3225 	case SCF_ERROR_INVALID_ARGUMENT:
3226 	case SCF_ERROR_NOT_BOUND:
3227 	case SCF_ERROR_NOT_SET:
3228 	default:
3229 		bad_error("scf_transaction_property_change_type", scf_error());
3230 	}
3231 
3232 new:
3233 	if (scf_transaction_property_new(tx, ent, pname, type) == 0)
3234 		return (0);
3235 
3236 	switch (scf_error()) {
3237 	case SCF_ERROR_CONNECTION_BROKEN:
3238 		return (ECONNABORTED);
3239 
3240 	case SCF_ERROR_DELETED:
3241 		return (ECANCELED);
3242 
3243 	case SCF_ERROR_EXISTS:
3244 		goto change_type;
3245 
3246 	case SCF_ERROR_HANDLE_MISMATCH:
3247 	case SCF_ERROR_INVALID_ARGUMENT:
3248 	case SCF_ERROR_NOT_BOUND:
3249 	case SCF_ERROR_NOT_SET:
3250 	default:
3251 		bad_error("scf_transaction_property_new", scf_error());
3252 		/* NOTREACHED */
3253 	}
3254 }
3255 
3256 static void
3257 scferr(void)
3258 {
3259 	switch (scf_error()) {
3260 	case SCF_ERROR_NO_MEMORY:
3261 		console(B_TRUE, gettext("Out of memory.\n"));
3262 		break;
3263 
3264 	case SCF_ERROR_CONNECTION_BROKEN:
3265 		console(B_TRUE, gettext(
3266 		    "Connection to smf(7) repository server broken.\n"));
3267 		break;
3268 
3269 	case SCF_ERROR_NO_RESOURCES:
3270 		console(B_TRUE, gettext(
3271 		    "smf(7) repository server is out of memory.\n"));
3272 		break;
3273 
3274 	case SCF_ERROR_PERMISSION_DENIED:
3275 		console(B_TRUE, gettext("Insufficient privileges.\n"));
3276 		break;
3277 
3278 	default:
3279 		console(B_TRUE, gettext("libscf error: %s\n"),
3280 		    scf_strerror(scf_error()));
3281 	}
3282 }
3283 
3284 static void
3285 lscf_set_runlevel(char rl)
3286 {
3287 	scf_handle_t *h;
3288 	scf_instance_t *inst = NULL;
3289 	scf_propertygroup_t *pg = NULL;
3290 	scf_transaction_t *tx = NULL;
3291 	scf_transaction_entry_t *ent = NULL;
3292 	scf_value_t *val = NULL;
3293 	char buf[2];
3294 	int r;
3295 
3296 	h = scf_handle_create(SCF_VERSION);
3297 	if (h == NULL) {
3298 		scferr();
3299 		return;
3300 	}
3301 
3302 	if (scf_handle_bind(h) != 0) {
3303 		switch (scf_error()) {
3304 		case SCF_ERROR_NO_SERVER:
3305 			console(B_TRUE,
3306 			    gettext("smf(7) repository server not running.\n"));
3307 			goto bail;
3308 
3309 		default:
3310 			scferr();
3311 			goto bail;
3312 		}
3313 	}
3314 
3315 	if ((inst = scf_instance_create(h)) == NULL ||
3316 	    (pg = scf_pg_create(h)) == NULL ||
3317 	    (val = scf_value_create(h)) == NULL ||
3318 	    (tx = scf_transaction_create(h)) == NULL ||
3319 	    (ent = scf_entry_create(h)) == NULL) {
3320 		scferr();
3321 		goto bail;
3322 	}
3323 
3324 get_inst:
3325 	r = get_or_add_startd(inst);
3326 	switch (r) {
3327 	case 0:
3328 		break;
3329 
3330 	case ENOMEM:
3331 	case ECONNABORTED:
3332 	case EPERM:
3333 	case EACCES:
3334 	case EROFS:
3335 		scferr();
3336 		goto bail;
3337 	default:
3338 		bad_error("get_or_add_startd", r);
3339 	}
3340 
3341 get_pg:
3342 	if (scf_instance_get_pg(inst, SCF_PG_OPTIONS_OVR, pg) != 0) {
3343 		switch (scf_error()) {
3344 		case SCF_ERROR_CONNECTION_BROKEN:
3345 			scferr();
3346 			goto bail;
3347 
3348 		case SCF_ERROR_DELETED:
3349 			goto get_inst;
3350 
3351 		case SCF_ERROR_NOT_FOUND:
3352 			break;
3353 
3354 		case SCF_ERROR_HANDLE_MISMATCH:
3355 		case SCF_ERROR_INVALID_ARGUMENT:
3356 		case SCF_ERROR_NOT_SET:
3357 		default:
3358 			bad_error("scf_instance_get_pg", scf_error());
3359 		}
3360 
3361 add_pg:
3362 		if (scf_instance_add_pg(inst, SCF_PG_OPTIONS_OVR,
3363 		    SCF_PG_OPTIONS_OVR_TYPE, SCF_PG_OPTIONS_OVR_FLAGS, pg) !=
3364 		    0) {
3365 			switch (scf_error()) {
3366 			case SCF_ERROR_CONNECTION_BROKEN:
3367 			case SCF_ERROR_PERMISSION_DENIED:
3368 			case SCF_ERROR_BACKEND_ACCESS:
3369 				scferr();
3370 				goto bail;
3371 
3372 			case SCF_ERROR_DELETED:
3373 				goto get_inst;
3374 
3375 			case SCF_ERROR_EXISTS:
3376 				goto get_pg;
3377 
3378 			case SCF_ERROR_HANDLE_MISMATCH:
3379 			case SCF_ERROR_INVALID_ARGUMENT:
3380 			case SCF_ERROR_NOT_SET:
3381 			default:
3382 				bad_error("scf_instance_add_pg", scf_error());
3383 			}
3384 		}
3385 	}
3386 
3387 	buf[0] = rl;
3388 	buf[1] = '\0';
3389 	r = scf_value_set_astring(val, buf);
3390 	assert(r == 0);
3391 
3392 	for (;;) {
3393 		if (scf_transaction_start(tx, pg) != 0) {
3394 			switch (scf_error()) {
3395 			case SCF_ERROR_CONNECTION_BROKEN:
3396 			case SCF_ERROR_PERMISSION_DENIED:
3397 			case SCF_ERROR_BACKEND_ACCESS:
3398 				scferr();
3399 				goto bail;
3400 
3401 			case SCF_ERROR_DELETED:
3402 				goto add_pg;
3403 
3404 			case SCF_ERROR_HANDLE_MISMATCH:
3405 			case SCF_ERROR_NOT_BOUND:
3406 			case SCF_ERROR_IN_USE:
3407 			case SCF_ERROR_NOT_SET:
3408 			default:
3409 				bad_error("scf_transaction_start", scf_error());
3410 			}
3411 		}
3412 
3413 		r = transaction_add_set(tx, ent, "runlevel", SCF_TYPE_ASTRING);
3414 		switch (r) {
3415 		case 0:
3416 			break;
3417 
3418 		case ECONNABORTED:
3419 			scferr();
3420 			goto bail;
3421 
3422 		case ECANCELED:
3423 			scf_transaction_reset(tx);
3424 			goto add_pg;
3425 
3426 		default:
3427 			bad_error("transaction_add_set", r);
3428 		}
3429 
3430 		r = scf_entry_add_value(ent, val);
3431 		assert(r == 0);
3432 
3433 		r = scf_transaction_commit(tx);
3434 		if (r == 1)
3435 			break;
3436 
3437 		if (r != 0) {
3438 			switch (scf_error()) {
3439 			case SCF_ERROR_CONNECTION_BROKEN:
3440 			case SCF_ERROR_PERMISSION_DENIED:
3441 			case SCF_ERROR_BACKEND_ACCESS:
3442 			case SCF_ERROR_BACKEND_READONLY:
3443 				scferr();
3444 				goto bail;
3445 
3446 			case SCF_ERROR_DELETED:
3447 				scf_transaction_reset(tx);
3448 				goto add_pg;
3449 
3450 			case SCF_ERROR_INVALID_ARGUMENT:
3451 			case SCF_ERROR_NOT_BOUND:
3452 			case SCF_ERROR_NOT_SET:
3453 			default:
3454 				bad_error("scf_transaction_commit",
3455 				    scf_error());
3456 			}
3457 		}
3458 
3459 		scf_transaction_reset(tx);
3460 		(void) scf_pg_update(pg);
3461 	}
3462 
3463 bail:
3464 	scf_transaction_destroy(tx);
3465 	scf_entry_destroy(ent);
3466 	scf_value_destroy(val);
3467 	scf_pg_destroy(pg);
3468 	scf_instance_destroy(inst);
3469 
3470 	(void) scf_handle_unbind(h);
3471 	scf_handle_destroy(h);
3472 }
3473 
3474 /*
3475  * Function to handle requests from users to main init running as process 1.
3476  */
3477 static void
3478 userinit(int argc, char **argv)
3479 {
3480 	FILE	*fp;
3481 	char	*ln;
3482 	int	init_signal;
3483 	struct stat	sconbuf, conbuf;
3484 	const char *usage_msg = "Usage: init [0123456SsQqabc]\n";
3485 
3486 	/*
3487 	 * We are a user invoked init.  Is there an argument and is it
3488 	 * a single character?  If not, print usage message and quit.
3489 	 */
3490 	if (argc != 2 || argv[1][1] != '\0') {
3491 		(void) fprintf(stderr, usage_msg);
3492 		exit(0);
3493 	}
3494 
3495 	if ((init_signal = lvlname_to_state((char)argv[1][0])) == -1) {
3496 		(void) fprintf(stderr, usage_msg);
3497 		(void) audit_put_record(ADT_FAILURE, ADT_FAIL_VALUE_BAD_CMD,
3498 		    argv[1]);
3499 		exit(1);
3500 	}
3501 
3502 	if (init_signal == SINGLE_USER) {
3503 		/*
3504 		 * Make sure this process is talking to a legal tty line
3505 		 * and that /dev/syscon is linked to this line.
3506 		 */
3507 		ln = ttyname(0);	/* Get the name of tty */
3508 		if (ln == NULL) {
3509 			(void) fprintf(stderr,
3510 			    "Standard input not a tty line\n");
3511 			(void) audit_put_record(ADT_FAILURE,
3512 			    ADT_FAIL_VALUE_BAD_TTY, argv[1]);
3513 			exit(1);
3514 		}
3515 
3516 		if ((stat(ln, &sconbuf) != -1) &&
3517 		    (stat(SYSCON, &conbuf) == -1 ||
3518 		    sconbuf.st_rdev != conbuf.st_rdev)) {
3519 			/*
3520 			 * /dev/syscon needs to change.
3521 			 * Unlink /dev/syscon and relink it to the current line.
3522 			 */
3523 			if (lstat(SYSCON, &conbuf) != -1 &&
3524 			    unlink(SYSCON) == FAILURE) {
3525 				perror("Can't unlink /dev/syscon");
3526 				(void) fprintf(stderr,
3527 				    "Run command on the system console.\n");
3528 				(void) audit_put_record(ADT_FAILURE,
3529 				    ADT_FAIL_VALUE_PROGRAM, argv[1]);
3530 				exit(1);
3531 			}
3532 			if (symlink(ln, SYSCON) == FAILURE) {
3533 				(void) fprintf(stderr,
3534 				    "Can't symlink /dev/syscon to %s: %s", ln,
3535 				    strerror(errno));
3536 
3537 				/* Try to leave a syscon */
3538 				(void) link(SYSTTY, SYSCON);
3539 				(void) audit_put_record(ADT_FAILURE,
3540 				    ADT_FAIL_VALUE_PROGRAM, argv[1]);
3541 				exit(1);
3542 			}
3543 
3544 			/*
3545 			 * Try to leave a message on system console saying where
3546 			 * /dev/syscon is currently connected.
3547 			 */
3548 			if ((fp = fopen(SYSTTY, "r+")) != NULL) {
3549 				(void) fprintf(fp,
3550 				    "\n****	SYSCON CHANGED TO %s	****\n",
3551 				    ln);
3552 				(void) fclose(fp);
3553 			}
3554 		}
3555 	}
3556 
3557 	update_boot_archive(init_signal);
3558 
3559 	(void) audit_put_record(ADT_SUCCESS, ADT_SUCCESS, argv[1]);
3560 
3561 	/*
3562 	 * Signal init; init will take care of telling svc.startd.
3563 	 */
3564 	if (kill(init_pid, init_signal) == FAILURE) {
3565 		(void) fprintf(stderr, "Must be super-user\n");
3566 		(void) audit_put_record(ADT_FAILURE,
3567 		    ADT_FAIL_VALUE_AUTH, argv[1]);
3568 		exit(1);
3569 	}
3570 
3571 	exit(0);
3572 }
3573 
3574 
3575 #define	DELTA	25	/* Number of pidlist elements to allocate at a time */
3576 
3577 /* ARGSUSED */
3578 void
3579 sigpoll(int n)
3580 {
3581 	struct pidrec prec;
3582 	struct pidrec *p = &prec;
3583 	struct pidlist *plp;
3584 	struct pidlist *tp, *savetp;
3585 	int i;
3586 
3587 	if (Pfd < 0) {
3588 		return;
3589 	}
3590 
3591 	for (;;) {
3592 		/*
3593 		 * Important Note: Either read will really fail (in which case
3594 		 * return is all we can do) or will get EAGAIN (Pfd was opened
3595 		 * O_NDELAY), in which case we also want to return.
3596 		 * Always return from here!
3597 		 */
3598 		if (read(Pfd, p, sizeof (struct pidrec)) !=
3599 						sizeof (struct pidrec)) {
3600 			return;
3601 		}
3602 		switch (p->pd_type) {
3603 
3604 		case ADDPID:
3605 			/*
3606 			 * New "godchild", add to list.
3607 			 */
3608 			if (Plfree == NULL) {
3609 				plp = (struct pidlist *)calloc(DELTA,
3610 				    sizeof (struct pidlist));
3611 				if (plp == NULL) {
3612 					/* Can't save pid */
3613 					break;
3614 				}
3615 				/*
3616 				 * Point at 2nd record allocated, we'll use plp.
3617 				 */
3618 				tp = plp + 1;
3619 				/*
3620 				 * Link them into a chain.
3621 				 */
3622 				Plfree = tp;
3623 				for (i = 0; i < DELTA - 2; i++) {
3624 					tp->pl_next = tp + 1;
3625 					tp++;
3626 				}
3627 			} else {
3628 				plp = Plfree;
3629 				Plfree = plp->pl_next;
3630 			}
3631 			plp->pl_pid = p->pd_pid;
3632 			plp->pl_dflag = 0;
3633 			plp->pl_next = NULL;
3634 			/*
3635 			 * Note - pid list is kept in increasing order of pids.
3636 			 */
3637 			if (Plhead == NULL) {
3638 				Plhead = plp;
3639 				/* Back up to read next record */
3640 				break;
3641 			} else {
3642 				savetp = tp = Plhead;
3643 				while (tp) {
3644 					if (plp->pl_pid > tp->pl_pid) {
3645 						savetp = tp;
3646 						tp = tp->pl_next;
3647 						continue;
3648 					} else if (plp->pl_pid < tp->pl_pid) {
3649 						if (tp == Plhead) {
3650 							plp->pl_next = Plhead;
3651 							Plhead = plp;
3652 						} else {
3653 							plp->pl_next =
3654 							    savetp->pl_next;
3655 							savetp->pl_next = plp;
3656 						}
3657 						break;
3658 					} else {
3659 						/* Already in list! */
3660 						plp->pl_next = Plfree;
3661 						Plfree = plp;
3662 						break;
3663 					}
3664 				}
3665 				if (tp == NULL) {
3666 					/* Add to end of list */
3667 					savetp->pl_next = plp;
3668 				}
3669 			}
3670 			/* Back up to read next record. */
3671 			break;
3672 
3673 		case REMPID:
3674 			/*
3675 			 * This one was handled by someone else,
3676 			 * purge it from the list.
3677 			 */
3678 			if (Plhead == NULL) {
3679 				/* Back up to read next record. */
3680 				break;
3681 			}
3682 			savetp = tp = Plhead;
3683 			while (tp) {
3684 				if (p->pd_pid > tp->pl_pid) {
3685 					/* Keep on looking. */
3686 					savetp = tp;
3687 					tp = tp->pl_next;
3688 					continue;
3689 				} else if (p->pd_pid < tp->pl_pid) {
3690 					/* Not in list. */
3691 					break;
3692 				} else {
3693 					/* Found it. */
3694 					if (tp == Plhead)
3695 						Plhead = tp->pl_next;
3696 					else
3697 						savetp->pl_next = tp->pl_next;
3698 					tp->pl_next = Plfree;
3699 					Plfree = tp;
3700 					break;
3701 				}
3702 			}
3703 			/* Back up to read next record. */
3704 			break;
3705 		default:
3706 			console(B_TRUE, "Bad message on initpipe\n");
3707 			break;
3708 		}
3709 	}
3710 }
3711 
3712 
3713 static void
3714 cleanaux()
3715 {
3716 	struct pidlist *savep, *p;
3717 	pid_t	pid;
3718 	short	status;
3719 
3720 	(void) sighold(SIGCLD);
3721 	Gchild = 0;	/* Note - Safe to do this here since no SIGCLDs */
3722 	(void) sighold(SIGPOLL);
3723 	savep = p = Plhead;
3724 	while (p) {
3725 		if (p->pl_dflag) {
3726 			/*
3727 			 * Found an entry to delete,
3728 			 * remove it from list first.
3729 			 */
3730 			pid = p->pl_pid;
3731 			status = p->pl_exit;
3732 			if (p == Plhead) {
3733 				Plhead = p->pl_next;
3734 				p->pl_next = Plfree;
3735 				Plfree = p;
3736 				savep = p = Plhead;
3737 			} else {
3738 				savep->pl_next = p->pl_next;
3739 				p->pl_next = Plfree;
3740 				Plfree = p;
3741 				p = savep->pl_next;
3742 			}
3743 			clearent(pid, status);
3744 			continue;
3745 		}
3746 		savep = p;
3747 		p = p->pl_next;
3748 	}
3749 	(void) sigrelse(SIGPOLL);
3750 	(void) sigrelse(SIGCLD);
3751 }
3752 
3753 
3754 /*
3755  * /etc/inittab has more entries and we have run out of room in the proc_table
3756  * array. Double the size of proc_table to accomodate the extra entries.
3757  */
3758 static void
3759 increase_proc_table_size()
3760 {
3761 	sigset_t block, unblock;
3762 	void *ptr;
3763 	size_t delta = num_proc * sizeof (struct PROC_TABLE);
3764 
3765 
3766 	/*
3767 	 * Block signals for realloc.
3768 	 */
3769 	(void) sigfillset(&block);
3770 	(void) sigprocmask(SIG_BLOCK, &block, &unblock);
3771 
3772 
3773 	/*
3774 	 * On failure we just return because callers of this function check
3775 	 * for failure.
3776 	 */
3777 	do
3778 		ptr = realloc(g_state, g_state_sz + delta);
3779 	while (ptr == NULL && errno == EAGAIN)
3780 		;
3781 
3782 	if (ptr != NULL) {
3783 		/* ensure that the new part is initialized to zero */
3784 		bzero((caddr_t)ptr + g_state_sz, delta);
3785 
3786 		g_state = ptr;
3787 		g_state_sz += delta;
3788 		num_proc <<= 1;
3789 	}
3790 
3791 
3792 	/* unblock our signals before returning */
3793 	(void) sigprocmask(SIG_SETMASK, &unblock, NULL);
3794 }
3795 
3796 
3797 
3798 /*
3799  * Sanity check g_state.
3800  */
3801 static int
3802 st_sane()
3803 {
3804 	int i;
3805 	struct PROC_TABLE *ptp;
3806 
3807 
3808 	/* Note: cur_state is encoded as a signal number */
3809 	if (cur_state < 1 || cur_state == 9 || cur_state > 13)
3810 		return (0);
3811 
3812 	/* Check num_proc */
3813 	if (g_state_sz != sizeof (struct init_state) + (num_proc - 1) *
3814 	    sizeof (struct PROC_TABLE))
3815 		return (0);
3816 
3817 	/* Check proc_table */
3818 	for (i = 0, ptp = proc_table; i < num_proc; ++i, ++ptp) {
3819 		/* skip unoccupied entries */
3820 		if (!(ptp->p_flags & OCCUPIED))
3821 			continue;
3822 
3823 		/* p_flags has no bits outside of PF_MASK */
3824 		if (ptp->p_flags & ~(PF_MASK))
3825 			return (0);
3826 
3827 		/* 5 <= pid <= MAXPID */
3828 		if (ptp->p_pid < 5 || ptp->p_pid > MAXPID)
3829 			return (0);
3830 
3831 		/* p_count >= 0 */
3832 		if (ptp->p_count < 0)
3833 			return (0);
3834 
3835 		/* p_time >= 0 */
3836 		if (ptp->p_time < 0)
3837 			return (0);
3838 	}
3839 
3840 	return (1);
3841 }
3842 
3843 /*
3844  * Initialize our state.
3845  *
3846  * If the system just booted, then init_state_file, which is located on an
3847  * everpresent tmpfs filesystem, should not exist.
3848  *
3849  * If we were restarted, then init_state_file should exist, in
3850  * which case we'll read it in, sanity check it, and use it.
3851  *
3852  * Note: You can't call console() until proc_table is ready.
3853  */
3854 void
3855 st_init()
3856 {
3857 	struct stat stb;
3858 	int ret, st_fd, insane = 0;
3859 	size_t to_be_read;
3860 	char *ptr;
3861 
3862 
3863 	booting = 1;
3864 
3865 	do {
3866 		/*
3867 		 * If we can exclusively create the file, then we're the
3868 		 * initial invocation of init(8).
3869 		 */
3870 		st_fd = open(init_state_file, O_RDWR | O_CREAT | O_EXCL,
3871 		    S_IRUSR | S_IWUSR);
3872 	} while (st_fd == -1 && errno == EINTR);
3873 	if (st_fd != -1)
3874 		goto new_state;
3875 
3876 	booting = 0;
3877 
3878 	do {
3879 		st_fd = open(init_state_file, O_RDWR, S_IRUSR | S_IWUSR);
3880 	} while (st_fd == -1 && errno == EINTR);
3881 	if (st_fd == -1)
3882 		goto new_state;
3883 
3884 	/* Get the size of the file. */
3885 	do
3886 		ret = fstat(st_fd, &stb);
3887 	while (ret == -1 && errno == EINTR)
3888 		;
3889 	if (ret == -1)
3890 		goto new_state;
3891 
3892 	do
3893 		g_state = malloc(stb.st_size);
3894 	while (g_state == NULL && errno == EAGAIN)
3895 		;
3896 	if (g_state == NULL)
3897 		goto new_state;
3898 
3899 	to_be_read = stb.st_size;
3900 	ptr = (char *)g_state;
3901 	while (to_be_read > 0) {
3902 		ssize_t read_ret;
3903 
3904 		read_ret = read(st_fd, ptr, to_be_read);
3905 		if (read_ret < 0) {
3906 			if (errno == EINTR)
3907 				continue;
3908 
3909 			goto new_state;
3910 		}
3911 
3912 		to_be_read -= read_ret;
3913 		ptr += read_ret;
3914 	}
3915 
3916 	(void) close(st_fd);
3917 
3918 	g_state_sz = stb.st_size;
3919 
3920 	if (st_sane()) {
3921 		console(B_TRUE, "Restarting.\n");
3922 		return;
3923 	}
3924 
3925 	insane = 1;
3926 
3927 new_state:
3928 	if (st_fd >= 0)
3929 		(void) close(st_fd);
3930 	else
3931 		(void) unlink(init_state_file);
3932 
3933 	if (g_state != NULL)
3934 		free(g_state);
3935 
3936 	/* Something went wrong, so allocate new state. */
3937 	g_state_sz = sizeof (struct init_state) +
3938 	    ((init_num_proc - 1) * sizeof (struct PROC_TABLE));
3939 	do
3940 		g_state = calloc(1, g_state_sz);
3941 	while (g_state == NULL && errno == EAGAIN)
3942 		;
3943 	if (g_state == NULL) {
3944 		/* Fatal error! */
3945 		exit(errno);
3946 	}
3947 
3948 	g_state->ist_runlevel = -1;
3949 	num_proc = init_num_proc;
3950 
3951 	if (!booting) {
3952 		console(B_TRUE, "Restarting.\n");
3953 
3954 		/* Overwrite the bad state file. */
3955 		st_write();
3956 
3957 		if (!insane) {
3958 			console(B_TRUE,
3959 			    "Error accessing persistent state file `%s'.  "
3960 			    "Ignored.\n", init_state_file);
3961 		} else {
3962 			console(B_TRUE,
3963 			    "Persistent state file `%s' is invalid and was "
3964 			    "ignored.\n", init_state_file);
3965 		}
3966 	}
3967 }
3968 
3969 /*
3970  * Write g_state out to the state file.
3971  */
3972 void
3973 st_write()
3974 {
3975 	static int complained = 0;
3976 
3977 	int st_fd;
3978 	char *cp;
3979 	size_t sz;
3980 	ssize_t ret;
3981 
3982 
3983 	do {
3984 		st_fd = open(init_next_state_file,
3985 		    O_WRONLY | O_CREAT | O_TRUNC, S_IRUSR | S_IWUSR);
3986 	} while (st_fd < 0 && errno == EINTR);
3987 	if (st_fd < 0)
3988 		goto err;
3989 
3990 	cp = (char *)g_state;
3991 	sz = g_state_sz;
3992 	while (sz > 0) {
3993 		ret = write(st_fd, cp, sz);
3994 		if (ret < 0) {
3995 			if (errno == EINTR)
3996 				continue;
3997 
3998 			goto err;
3999 		}
4000 
4001 		sz -= ret;
4002 		cp += ret;
4003 	}
4004 
4005 	(void) close(st_fd);
4006 	st_fd = -1;
4007 	if (rename(init_next_state_file, init_state_file)) {
4008 		(void) unlink(init_next_state_file);
4009 		goto err;
4010 	}
4011 	complained = 0;
4012 
4013 	return;
4014 
4015 err:
4016 	if (st_fd >= 0)
4017 		(void) close(st_fd);
4018 
4019 	if (!booting && !complained) {
4020 		/*
4021 		 * Only complain after the filesystem should have come up.
4022 		 * And only do it once so we don't loop between console()
4023 		 * & efork().
4024 		 */
4025 		complained = 1;
4026 		if (st_fd)
4027 			console(B_TRUE, "Couldn't write persistent state "
4028 			    "file `%s'.\n", init_state_file);
4029 		else
4030 			console(B_TRUE, "Couldn't move persistent state "
4031 			    "file `%s' to `%s'.\n", init_next_state_file,
4032 			    init_state_file);
4033 	}
4034 }
4035 
4036 /*
4037  * Create a contract with these parameters.
4038  */
4039 static int
4040 contract_make_template(uint_t info, uint_t critical, uint_t fatal,
4041     uint64_t cookie)
4042 {
4043 	int fd, err;
4044 
4045 	char *ioctl_tset_emsg =
4046 	    "Couldn't set \"%s\" contract template parameter: %s.\n";
4047 
4048 	do
4049 		fd = open64(CTFS_ROOT "/process/template", O_RDWR);
4050 	while (fd < 0 && errno == EINTR)
4051 		;
4052 	if (fd < 0) {
4053 		console(B_TRUE, "Couldn't create process template: %s.\n",
4054 		    strerror(errno));
4055 		return (-1);
4056 	}
4057 
4058 	if (err = ct_pr_tmpl_set_param(fd, CT_PR_INHERIT | CT_PR_REGENT))
4059 		console(B_TRUE, "Contract set template inherit, regent "
4060 		    "failed: %s.\n", strerror(err));
4061 
4062 	/*
4063 	 * These errors result in a misconfigured template, which is better
4064 	 * than no template at all, so warn but don't abort.
4065 	 */
4066 	if (err = ct_tmpl_set_informative(fd, info))
4067 		console(B_TRUE, ioctl_tset_emsg, "informative", strerror(err));
4068 
4069 	if (err = ct_tmpl_set_critical(fd, critical))
4070 		console(B_TRUE, ioctl_tset_emsg, "critical", strerror(err));
4071 
4072 	if (err = ct_pr_tmpl_set_fatal(fd, fatal))
4073 		console(B_TRUE, ioctl_tset_emsg, "fatal", strerror(err));
4074 
4075 	if (err = ct_tmpl_set_cookie(fd, cookie))
4076 		console(B_TRUE, ioctl_tset_emsg, "cookie", strerror(err));
4077 
4078 	(void) fcntl(fd, F_SETFD, FD_CLOEXEC);
4079 
4080 	return (fd);
4081 }
4082 
4083 /*
4084  * Create the templates and open an event file descriptor.  We use dup2(2) to
4085  * get these descriptors away from the stdin/stdout/stderr group.
4086  */
4087 static void
4088 contracts_init()
4089 {
4090 	int err, fd;
4091 
4092 	/*
4093 	 * Create & configure a legacy template.  We only want empty events so
4094 	 * we know when to abandon them.
4095 	 */
4096 	legacy_tmpl = contract_make_template(0, CT_PR_EV_EMPTY, CT_PR_EV_HWERR,
4097 	    ORDINARY_COOKIE);
4098 	if (legacy_tmpl >= 0) {
4099 		err = ct_tmpl_activate(legacy_tmpl);
4100 		if (err != 0) {
4101 			(void) close(legacy_tmpl);
4102 			legacy_tmpl = -1;
4103 			console(B_TRUE,
4104 			    "Couldn't activate legacy template (%s); "
4105 			    "legacy services will be in init's contract.\n",
4106 			    strerror(err));
4107 		}
4108 	} else
4109 		console(B_TRUE,
4110 		    "Legacy services will be in init's contract.\n");
4111 
4112 	if (dup2(legacy_tmpl, 255) == -1) {
4113 		console(B_TRUE, "Could not duplicate legacy template: %s.\n",
4114 		    strerror(errno));
4115 	} else {
4116 		(void) close(legacy_tmpl);
4117 		legacy_tmpl = 255;
4118 	}
4119 
4120 	(void) fcntl(legacy_tmpl, F_SETFD, FD_CLOEXEC);
4121 
4122 	startd_tmpl = contract_make_template(0, CT_PR_EV_EMPTY,
4123 	    CT_PR_EV_HWERR | CT_PR_EV_SIGNAL | CT_PR_EV_CORE, STARTD_COOKIE);
4124 
4125 	if (dup2(startd_tmpl, 254) == -1) {
4126 		console(B_TRUE, "Could not duplicate startd template: %s.\n",
4127 		    strerror(errno));
4128 	} else {
4129 		(void) close(startd_tmpl);
4130 		startd_tmpl = 254;
4131 	}
4132 
4133 	(void) fcntl(startd_tmpl, F_SETFD, FD_CLOEXEC);
4134 
4135 	if (legacy_tmpl < 0 && startd_tmpl < 0) {
4136 		/* The creation errors have already been reported. */
4137 		console(B_TRUE,
4138 		    "Ignoring contract events.  Core smf(7) services will not "
4139 		    "be restarted.\n");
4140 		return;
4141 	}
4142 
4143 	/*
4144 	 * Open an event endpoint.
4145 	 */
4146 	do
4147 		fd = open64(CTFS_ROOT "/process/pbundle", O_RDONLY);
4148 	while (fd < 0 && errno == EINTR)
4149 		;
4150 	if (fd < 0) {
4151 		console(B_TRUE,
4152 		    "Couldn't open process pbundle: %s.  Core smf(7) services "
4153 		    "will not be restarted.\n", strerror(errno));
4154 		return;
4155 	}
4156 
4157 	if (dup2(fd, 253) == -1) {
4158 		console(B_TRUE, "Could not duplicate process bundle: %s.\n",
4159 		    strerror(errno));
4160 	} else {
4161 		(void) close(fd);
4162 		fd = 253;
4163 	}
4164 
4165 	(void) fcntl(fd, F_SETFD, FD_CLOEXEC);
4166 
4167 	/* Reset in case we've been restarted. */
4168 	(void) ct_event_reset(fd);
4169 
4170 	poll_fds[0].fd = fd;
4171 	poll_fds[0].events = POLLIN;
4172 	poll_nfds = 1;
4173 }
4174 
4175 static int
4176 contract_getfile(ctid_t id, const char *name, int oflag)
4177 {
4178 	int fd;
4179 
4180 	do
4181 		fd = contract_open(id, "process", name, oflag);
4182 	while (fd < 0 && errno == EINTR)
4183 		;
4184 
4185 	if (fd < 0)
4186 		console(B_TRUE, "Couldn't open %s for contract %ld: %s.\n",
4187 		    name, id, strerror(errno));
4188 
4189 	return (fd);
4190 }
4191 
4192 static int
4193 contract_cookie(ctid_t id, uint64_t *cp)
4194 {
4195 	int fd, err;
4196 	ct_stathdl_t sh;
4197 
4198 	fd = contract_getfile(id, "status", O_RDONLY);
4199 	if (fd < 0)
4200 		return (-1);
4201 
4202 	err = ct_status_read(fd, CTD_COMMON, &sh);
4203 	if (err != 0) {
4204 		console(B_TRUE, "Couldn't read status of contract %ld: %s.\n",
4205 		    id, strerror(err));
4206 		(void) close(fd);
4207 		return (-1);
4208 	}
4209 
4210 	(void) close(fd);
4211 
4212 	*cp = ct_status_get_cookie(sh);
4213 
4214 	ct_status_free(sh);
4215 	return (0);
4216 }
4217 
4218 static void
4219 contract_ack(ct_evthdl_t e)
4220 {
4221 	int fd;
4222 
4223 	if (ct_event_get_flags(e) & CTE_INFO)
4224 		return;
4225 
4226 	fd = contract_getfile(ct_event_get_ctid(e), "ctl", O_WRONLY);
4227 	if (fd < 0)
4228 		return;
4229 
4230 	(void) ct_ctl_ack(fd, ct_event_get_evid(e));
4231 	(void) close(fd);
4232 }
4233 
4234 /*
4235  * Process a contract event.
4236  */
4237 static void
4238 contract_event(struct pollfd *poll)
4239 {
4240 	ct_evthdl_t e;
4241 	int err;
4242 	ctid_t ctid;
4243 
4244 	if (!(poll->revents & POLLIN)) {
4245 		if (poll->revents & POLLERR)
4246 			console(B_TRUE,
4247 			    "Unknown poll error on my process contract "
4248 			    "pbundle.\n");
4249 		return;
4250 	}
4251 
4252 	err = ct_event_read(poll->fd, &e);
4253 	if (err != 0) {
4254 		console(B_TRUE, "Error retrieving contract event: %s.\n",
4255 		    strerror(err));
4256 		return;
4257 	}
4258 
4259 	ctid = ct_event_get_ctid(e);
4260 
4261 	if (ct_event_get_type(e) == CT_PR_EV_EMPTY) {
4262 		uint64_t cookie;
4263 		int ret, abandon = 1;
4264 
4265 		/* If it's svc.startd, restart it.  Else, abandon. */
4266 		ret = contract_cookie(ctid, &cookie);
4267 
4268 		if (ret == 0) {
4269 			if (cookie == STARTD_COOKIE &&
4270 			    do_restart_startd) {
4271 				if (smf_debug)
4272 					console(B_TRUE, "Restarting "
4273 					    "svc.startd.\n");
4274 
4275 				/*
4276 				 * Account for the failure.  If the failure rate
4277 				 * exceeds a threshold, then drop to maintenance
4278 				 * mode.
4279 				 */
4280 				startd_record_failure();
4281 				if (startd_failure_rate_critical())
4282 					enter_maintenance();
4283 
4284 				if (startd_tmpl < 0)
4285 					console(B_TRUE,
4286 					    "Restarting svc.startd in "
4287 					    "improper contract (bad "
4288 					    "template).\n");
4289 
4290 				(void) startd_run(startd_cline, startd_tmpl,
4291 				    ctid);
4292 
4293 				abandon = 0;
4294 			}
4295 		}
4296 
4297 		if (abandon && (err = contract_abandon_id(ctid))) {
4298 			console(B_TRUE, "Couldn't abandon contract %ld: %s.\n",
4299 			    ctid, strerror(err));
4300 		}
4301 
4302 		/*
4303 		 * No need to acknowledge the event since either way the
4304 		 * originating contract should be abandoned.
4305 		 */
4306 	} else {
4307 		console(B_TRUE,
4308 		    "Received contract event of unexpected type %d from "
4309 		    "contract %ld.\n", ct_event_get_type(e), ctid);
4310 
4311 		if ((ct_event_get_flags(e) & (CTE_INFO | CTE_ACK)) == 0)
4312 			/* Allow unexpected critical events to be released. */
4313 			contract_ack(e);
4314 	}
4315 
4316 	ct_event_free(e);
4317 }
4318 
4319 /*
4320  * svc.startd(8) Management
4321  */
4322 
4323 /*
4324  * (Re)start svc.startd(8).  old_ctid should be the contract ID of the old
4325  * contract, or 0 if we're starting it for the first time.  If wait is true
4326  * we'll wait for and return the exit value of the child.
4327  */
4328 static int
4329 startd_run(const char *cline, int tmpl, ctid_t old_ctid)
4330 {
4331 	int err, i, ret, did_activate;
4332 	pid_t pid;
4333 	struct stat sb;
4334 
4335 	if (cline[0] == '\0')
4336 		return (-1);
4337 
4338 	/*
4339 	 * Don't restart startd if the system is rebooting or shutting down.
4340 	 */
4341 	do {
4342 		ret = stat("/etc/svc/volatile/resetting", &sb);
4343 	} while (ret == -1 && errno == EINTR);
4344 
4345 	if (ret == 0) {
4346 		if (smf_debug)
4347 			console(B_TRUE, "Quiescing for reboot.\n");
4348 		(void) pause();
4349 		return (-1);
4350 	}
4351 
4352 	err = ct_pr_tmpl_set_transfer(tmpl, old_ctid);
4353 	if (err == EINVAL) {
4354 		console(B_TRUE, "Remake startd_tmpl; reattempt transfer.\n");
4355 		tmpl = startd_tmpl = contract_make_template(0, CT_PR_EV_EMPTY,
4356 		    CT_PR_EV_HWERR, STARTD_COOKIE);
4357 
4358 		err = ct_pr_tmpl_set_transfer(tmpl, old_ctid);
4359 	}
4360 	if (err != 0) {
4361 		console(B_TRUE,
4362 		    "Couldn't set transfer parameter of contract template: "
4363 		    "%s.\n", strerror(err));
4364 	}
4365 
4366 	if ((err = ct_pr_tmpl_set_svc_fmri(startd_tmpl,
4367 	    SCF_SERVICE_STARTD)) != 0)
4368 		console(B_TRUE,
4369 		    "Can not set svc_fmri in contract template: %s\n",
4370 		    strerror(err));
4371 	if ((err = ct_pr_tmpl_set_svc_aux(startd_tmpl,
4372 	    startd_svc_aux)) != 0)
4373 		console(B_TRUE,
4374 		    "Can not set svc_aux in contract template: %s\n",
4375 		    strerror(err));
4376 	did_activate = !(ct_tmpl_activate(tmpl));
4377 	if (!did_activate)
4378 		console(B_TRUE,
4379 		    "Template activation failed; not starting \"%s\" in "
4380 		    "proper contract.\n", cline);
4381 
4382 	/* Hold SIGCLD so we can wait if necessary. */
4383 	(void) sighold(SIGCLD);
4384 
4385 	while ((pid = fork()) < 0) {
4386 		if (errno == EPERM) {
4387 			console(B_TRUE, "Insufficient permission to fork.\n");
4388 
4389 			/* Now that's a doozy. */
4390 			exit(1);
4391 		}
4392 
4393 		console(B_TRUE,
4394 		    "fork() for svc.startd failed: %s.  Will retry in 1 "
4395 		    "second...\n", strerror(errno));
4396 
4397 		(void) sleep(1);
4398 
4399 		/* Eventually give up? */
4400 	}
4401 
4402 	if (pid == 0) {
4403 		/* child */
4404 
4405 		/* See the comment in efork() */
4406 		for (i = SIGHUP; i <= SIGRTMAX; ++i) {
4407 			if (i == SIGTTOU || i == SIGTTIN || i == SIGTSTP)
4408 				(void) sigset(i, SIG_IGN);
4409 			else
4410 				(void) sigset(i, SIG_DFL);
4411 		}
4412 
4413 		if (smf_options != NULL) {
4414 			/* Put smf_options in the environment. */
4415 			glob_envp[glob_envn] =
4416 			    malloc(sizeof ("SMF_OPTIONS=") - 1 +
4417 			    strlen(smf_options) + 1);
4418 
4419 			if (glob_envp[glob_envn] != NULL) {
4420 				/* LINTED */
4421 				(void) sprintf(glob_envp[glob_envn],
4422 				    "SMF_OPTIONS=%s", smf_options);
4423 				glob_envp[glob_envn+1] = NULL;
4424 			} else {
4425 				console(B_TRUE,
4426 				    "Could not set SMF_OPTIONS (%s).\n",
4427 				    strerror(errno));
4428 			}
4429 		}
4430 
4431 		if (smf_debug)
4432 			console(B_TRUE, "Executing svc.startd\n");
4433 
4434 		(void) execle(SH, "INITSH", "-c", cline, NULL, glob_envp);
4435 
4436 		console(B_TRUE, "Could not exec \"%s\" (%s).\n", SH,
4437 		    strerror(errno));
4438 
4439 		exit(1);
4440 	}
4441 
4442 	/* parent */
4443 
4444 	if (did_activate) {
4445 		if (legacy_tmpl < 0 || ct_tmpl_activate(legacy_tmpl) != 0)
4446 			(void) ct_tmpl_clear(tmpl);
4447 	}
4448 
4449 	/* Clear the old_ctid reference so the kernel can reclaim it. */
4450 	if (old_ctid != 0)
4451 		(void) ct_pr_tmpl_set_transfer(tmpl, 0);
4452 
4453 	(void) sigrelse(SIGCLD);
4454 
4455 	return (0);
4456 }
4457 
4458 /*
4459  * void startd_record_failure(void)
4460  *   Place the current time in our circular array of svc.startd failures.
4461  */
4462 void
4463 startd_record_failure()
4464 {
4465 	int index = startd_failure_index++ % NSTARTD_FAILURE_TIMES;
4466 
4467 	startd_failure_time[index] = gethrtime();
4468 }
4469 
4470 /*
4471  * int startd_failure_rate_critical(void)
4472  *   Return true if the average failure interval is less than the permitted
4473  *   interval.  Implicit success if insufficient measurements for an average
4474  *   exist.
4475  */
4476 int
4477 startd_failure_rate_critical()
4478 {
4479 	int n = startd_failure_index;
4480 	hrtime_t avg_ns = 0;
4481 
4482 	if (startd_failure_index < NSTARTD_FAILURE_TIMES)
4483 		return (0);
4484 
4485 	avg_ns =
4486 	    (startd_failure_time[(n - 1) % NSTARTD_FAILURE_TIMES] -
4487 	    startd_failure_time[n % NSTARTD_FAILURE_TIMES]) /
4488 	    NSTARTD_FAILURE_TIMES;
4489 
4490 	return (avg_ns < STARTD_FAILURE_RATE_NS);
4491 }
4492 
4493 /*
4494  * returns string that must be free'd
4495  */
4496 
4497 static char
4498 *audit_boot_msg()
4499 {
4500 	char		*b, *p;
4501 	char		desc[] = "booted";
4502 	zoneid_t	zid = getzoneid();
4503 
4504 	b = malloc(sizeof (desc) + MAXNAMELEN + 3);
4505 	if (b == NULL)
4506 		return (b);
4507 
4508 	p = b;
4509 	p += strlcpy(p, desc, sizeof (desc));
4510 	if (zid != GLOBAL_ZONEID) {
4511 		p += strlcpy(p, ": ", 3);
4512 		(void) getzonenamebyid(zid, p, MAXNAMELEN);
4513 	}
4514 	return (b);
4515 }
4516 
4517 /*
4518  * Generate AUE_init_solaris audit record.  Return 1 if
4519  * auditing is enabled in case the caller cares.
4520  *
4521  * In the case of userint() or a local zone invocation of
4522  * one_true_init, the process initially contains the audit
4523  * characteristics of the process that invoked init.  The first pass
4524  * through here uses those characteristics then for the case of
4525  * one_true_init in a local zone, clears them so subsequent system
4526  * state changes won't be attributed to the person who booted the
4527  * zone.
4528  */
4529 static int
4530 audit_put_record(int pass_fail, int status, char *msg)
4531 {
4532 	adt_session_data_t	*ah;
4533 	adt_event_data_t	*event;
4534 
4535 	if (!adt_audit_enabled())
4536 		return (0);
4537 
4538 	/*
4539 	 * the PROC_DATA picks up the context to tell whether this is
4540 	 * an attributed record (auid = -2 is unattributed)
4541 	 */
4542 	if (adt_start_session(&ah, NULL, ADT_USE_PROC_DATA)) {
4543 		console(B_TRUE, "audit failure:  %s\n", strerror(errno));
4544 		return (1);
4545 	}
4546 	event = adt_alloc_event(ah, ADT_init_solaris);
4547 	if (event == NULL) {
4548 		console(B_TRUE, "audit failure:  %s\n", strerror(errno));
4549 		(void) adt_end_session(ah);
4550 		return (1);
4551 	}
4552 	event->adt_init_solaris.info = msg;	/* NULL is ok here */
4553 
4554 	if (adt_put_event(event, pass_fail, status)) {
4555 		console(B_TRUE, "audit failure:  %s\n", strerror(errno));
4556 		(void) adt_end_session(ah);
4557 		return (1);
4558 	}
4559 	adt_free_event(event);
4560 
4561 	(void) adt_end_session(ah);
4562 
4563 	return (1);
4564 }
4565