xref: /titanic_50/usr/src/cmd/cmd-inet/usr.lib/inetd/inetd.c (revision 8461248208fabd3a8230615f8615e5bf1b4dcdcb)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * NOTES: To be expanded.
31  *
32  * The SMF inetd.
33  *
34  * Below are some high level notes of the operation of the SMF inetd. The
35  * notes don't go into any real detail, and the viewer of this file is
36  * encouraged to look at the code and its associated comments to better
37  * understand inetd's operation. This saves the potential for the code
38  * and these notes diverging over time.
39  *
40  * Inetd's major work is done from the context of event_loop(). Within this
41  * loop, inetd polls for events arriving from a number of different file
42  * descriptors, representing the following event types, and initiates
43  * any necessary event processing:
44  * - incoming network connections/datagrams.
45  * - notification of terminated processes (discovered via contract events).
46  * - instance specific events originating from the SMF master restarter.
47  * - stop/refresh requests from the inetd method processes (coming in on a
48  *   Unix Domain socket).
49  * There's also a timeout set for the poll, which is set to the nearest
50  * scheduled timer in a timer queue that inetd uses to perform delayed
51  * processing, such as bind retries.
52  * The SIGHUP and SIGINT signals can also interrupt the poll, and will
53  * result in inetd being refreshed or stopped respectively, as was the
54  * behavior with the old inetd.
55  *
56  * Inetd implements a state machine for each instance. The states within the
57  * machine are: offline, online, disabled, maintenance, uninitialized and
58  * specializations of the offline state for when an instance exceeds one of
59  * its DOS limits. The state of an instance can be changed as a
60  * result/side-effect of one of the above events occurring, or inetd being
61  * started up. The ongoing state of an instance is stored in the SMF
62  * repository, as required of SMF restarters. This enables an administrator
63  * to view the state of each instance, and, if inetd was to terminate
64  * unexpectedly, it could use the stored state to re-commence where it left off.
65  *
66  * Within the state machine a number of methods are run (if provided) as part
67  * of a state transition to aid/ effect a change in an instance's state. The
68  * supported methods are: offline, online, disable, refresh and start. The
69  * latter of these is the equivalent of the server program and its arguments
70  * in the old inetd.
71  *
72  * Events from the SMF master restarter come in on a number of threads
73  * created in the registration routine of librestart, the delegated restarter
74  * library. These threads call into the restart_event_proxy() function
75  * when an event arrives. To serialize the processing of instances, these events
76  * are then written down a pipe to the process's main thread, which listens
77  * for these events via a poll call, with the file descriptor of the other
78  * end of the pipe in its read set, and processes the event appropriately.
79  * When the event has been  processed (which may be delayed if the instance
80  * for which the event is for is in the process of executing one of its methods
81  * as part of a state transition) it writes an acknowledgement back down the
82  * pipe the event was received on. The thread in restart_event_proxy() that
83  * wrote the event will read the acknowledgement it was blocked upon, and will
84  * then be able to return to its caller, thus implicitly acknowledging the
85  * event, and allowing another event to be written down the pipe for the main
86  * thread to process.
87  */
88 
89 
90 #include <netdb.h>
91 #include <stdio.h>
92 #include <stdlib.h>
93 #include <strings.h>
94 #include <unistd.h>
95 #include <assert.h>
96 #include <sys/types.h>
97 #include <sys/socket.h>
98 #include <netinet/in.h>
99 #include <fcntl.h>
100 #include <signal.h>
101 #include <errno.h>
102 #include <locale.h>
103 #include <syslog.h>
104 #include <libintl.h>
105 #include <librestart.h>
106 #include <pthread.h>
107 #include <sys/stat.h>
108 #include <time.h>
109 #include <limits.h>
110 #include <libgen.h>
111 #include <tcpd.h>
112 #include <libscf.h>
113 #include <libuutil.h>
114 #include <stddef.h>
115 #include <bsm/adt_event.h>
116 #include "inetd_impl.h"
117 
118 /* path to inetd's binary */
119 #define	INETD_PATH	"/usr/lib/inet/inetd"
120 
121 /*
122  * inetd's default configuration file paths. /etc/inetd/inetd.conf is set
123  * be be the primary file, so it is checked before /etc/inetd.conf.
124  */
125 #define	PRIMARY_DEFAULT_CONF_FILE	"/etc/inet/inetd.conf"
126 #define	SECONDARY_DEFAULT_CONF_FILE	"/etc/inetd.conf"
127 
128 /* Arguments passed to this binary to request which method to execute. */
129 #define	START_METHOD_ARG	"start"
130 #define	STOP_METHOD_ARG		"stop"
131 #define	REFRESH_METHOD_ARG	"refresh"
132 
133 /* connection backlog for unix domain socket */
134 #define	UDS_BACKLOG	2
135 
136 /* number of retries to recv() a request on the UDS socket before giving up */
137 #define	UDS_RECV_RETRIES	10
138 
139 /* enumeration of the different ends of a pipe */
140 enum pipe_end {
141 	PE_CONSUMER,
142 	PE_PRODUCER
143 };
144 
145 typedef struct {
146 	internal_inst_state_t		istate;
147 	const char			*name;
148 	restarter_instance_state_t	smf_state;
149 	instance_method_t		method_running;
150 } state_info_t;
151 
152 
153 /*
154  * Collection of information for each state.
155  * NOTE:  This table is indexed into using the internal_inst_state_t
156  * enumeration, so the ordering needs to be kept in synch.
157  */
158 static state_info_t states[] = {
159 	{IIS_UNINITIALIZED, "uninitialized", RESTARTER_STATE_UNINIT,
160 	    IM_NONE},
161 	{IIS_ONLINE, "online", RESTARTER_STATE_ONLINE, IM_START},
162 	{IIS_IN_ONLINE_METHOD, "online_method", RESTARTER_STATE_OFFLINE,
163 	    IM_ONLINE},
164 	{IIS_OFFLINE, "offline", RESTARTER_STATE_OFFLINE, IM_NONE},
165 	{IIS_IN_OFFLINE_METHOD, "offline_method", RESTARTER_STATE_OFFLINE,
166 	    IM_OFFLINE},
167 	{IIS_DISABLED, "disabled", RESTARTER_STATE_DISABLED, IM_NONE},
168 	{IIS_IN_DISABLE_METHOD, "disabled_method", RESTARTER_STATE_OFFLINE,
169 	    IM_DISABLE},
170 	{IIS_IN_REFRESH_METHOD, "refresh_method", RESTARTER_STATE_ONLINE,
171 	    IM_REFRESH},
172 	{IIS_MAINTENANCE, "maintenance", RESTARTER_STATE_MAINT, IM_NONE},
173 	{IIS_OFFLINE_CONRATE, "cr_offline", RESTARTER_STATE_OFFLINE, IM_NONE},
174 	{IIS_OFFLINE_BIND, "bind_offline", RESTARTER_STATE_OFFLINE, IM_NONE},
175 	{IIS_OFFLINE_COPIES, "copies_offline", RESTARTER_STATE_OFFLINE,
176 	    IM_NONE},
177 	{IIS_DEGRADED, "degraded", RESTARTER_STATE_DEGRADED, IM_NONE},
178 	{IIS_NONE, "none", RESTARTER_STATE_NONE, IM_NONE}
179 };
180 
181 /*
182  * Pipe used to send events from the threads created by restarter_bind_handle()
183  * to the main thread of control.
184  */
185 static int			rst_event_pipe[] = {-1, -1};
186 /*
187  * Used to protect the critical section of code in restarter_event_proxy() that
188  * involves writing an event down the event pipe and reading an acknowledgement.
189  */
190 static pthread_mutex_t		rst_event_pipe_mtx = PTHREAD_MUTEX_INITIALIZER;
191 
192 /* handle used in communication with the master restarter */
193 static restarter_event_handle_t *rst_event_handle = NULL;
194 
195 /* set to indicate a refresh of inetd is requested */
196 static boolean_t		refresh_inetd_requested = B_FALSE;
197 
198 /* set by the SIGTERM handler to flag we got a SIGTERM */
199 static boolean_t		got_sigterm = B_FALSE;
200 
201 /*
202  * Timer queue used to store timers for delayed event processing, such as
203  * bind retries.
204  */
205 iu_tq_t				*timer_queue = NULL;
206 
207 /*
208  * fd of Unix Domain socket used to communicate stop and refresh requests
209  * to the inetd start method process.
210  */
211 static int			uds_fd = -1;
212 
213 /*
214  * List of inetd's currently managed instances; each containing its state,
215  * and in certain states its configuration.
216  */
217 static uu_list_pool_t		*instance_pool = NULL;
218 uu_list_t			*instance_list = NULL;
219 
220 /* set to indicate we're being stopped */
221 boolean_t			inetd_stopping = B_FALSE;
222 
223 /* TCP wrappers syslog globals. Consumed by libwrap. */
224 int				allow_severity = LOG_INFO;
225 int				deny_severity = LOG_WARNING;
226 
227 /* path of the configuration file being monitored by check_conf_file() */
228 static char			*conf_file = NULL;
229 
230 /* Auditing session handle */
231 static adt_session_data_t	*audit_handle;
232 
233 static void uds_fini(void);
234 static int uds_init(void);
235 static int run_method(instance_t *, instance_method_t, const proto_info_t *);
236 static void create_bound_fds(instance_t *);
237 static void destroy_bound_fds(instance_t *);
238 static void destroy_instance(instance_t *);
239 static void inetd_stop(void);
240 
241 /*
242  * The following two functions are callbacks that libumem uses to determine
243  * inetd's desired debugging/logging levels. The interface they consume is
244  * exported by FMA and is consolidation private. The comments in the two
245  * functions give the environment variable that will effectively be set to
246  * their returned value, and thus whose behavior for this value, described in
247  * umem_debug(3MALLOC), will be followed.
248  */
249 
250 const char *
251 _umem_debug_init(void)
252 {
253 	return ("default,verbose");	/* UMEM_DEBUG setting */
254 }
255 
256 const char *
257 _umem_logging_init(void)
258 {
259 	return ("fail,contents");	/* UMEM_LOGGING setting */
260 }
261 
262 static void
263 log_invalid_cfg(const char *fmri)
264 {
265 	error_msg(gettext(
266 	    "Invalid configuration for instance %s, placing in maintenance"),
267 	    fmri);
268 }
269 
270 /*
271  * Returns B_TRUE if the instance is in a suitable state for inetd to stop.
272  */
273 static boolean_t
274 instance_stopped(const instance_t *inst)
275 {
276 	return ((inst->cur_istate == IIS_OFFLINE) ||
277 	    (inst->cur_istate == IIS_MAINTENANCE) ||
278 	    (inst->cur_istate == IIS_DISABLED) ||
279 	    (inst->cur_istate == IIS_UNINITIALIZED));
280 }
281 
282 /*
283  * Updates the current and next repository states of instance 'inst'. If
284  * any errors occur an error message is output.
285  */
286 static void
287 update_instance_states(instance_t *inst, internal_inst_state_t new_cur_state,
288     internal_inst_state_t new_next_state, restarter_error_t err)
289 {
290 	internal_inst_state_t	old_cur = inst->cur_istate;
291 	internal_inst_state_t	old_next = inst->next_istate;
292 	scf_error_t		sret;
293 	int			ret;
294 
295 	debug_msg("Entering update_instance_states: oldcur: %s, newcur: %s "
296 	    "oldnext: %s, newnext: %s", states[old_cur].name,
297 	    states[new_cur_state].name, states[old_next].name,
298 	    states[new_next_state].name);
299 
300 
301 	/* update the repository/cached internal state */
302 	inst->cur_istate = new_cur_state;
303 	inst->next_istate = new_next_state;
304 	(void) set_single_rep_val(inst->cur_istate_rep,
305 	    (int64_t)new_cur_state);
306 	(void) set_single_rep_val(inst->next_istate_rep,
307 	    (int64_t)new_next_state);
308 
309 	if (((sret = store_rep_vals(inst->cur_istate_rep, inst->fmri,
310 	    PR_NAME_CUR_INT_STATE)) != 0) ||
311 	    ((sret = store_rep_vals(inst->next_istate_rep, inst->fmri,
312 	    PR_NAME_NEXT_INT_STATE)) != 0))
313 		error_msg(gettext("Failed to update state of instance %s in "
314 		    "repository: %s"), inst->fmri, scf_strerror(sret));
315 
316 	/* update the repository SMF state */
317 	if ((ret = restarter_set_states(rst_event_handle, inst->fmri,
318 	    states[old_cur].smf_state, states[new_cur_state].smf_state,
319 	    states[old_next].smf_state, states[new_next_state].smf_state,
320 	    err, 0)) != 0)
321 		error_msg(gettext("Failed to update state of instance %s in "
322 		    "repository: %s"), inst->fmri, strerror(ret));
323 
324 }
325 
326 void
327 update_state(instance_t *inst, internal_inst_state_t new_cur,
328     restarter_error_t err)
329 {
330 	update_instance_states(inst, new_cur, IIS_NONE, err);
331 }
332 
333 /*
334  * Sends a refresh event to the inetd start method process and returns
335  * SMF_EXIT_OK if it managed to send it. If it fails to send the request for
336  * some reason it returns SMF_EXIT_ERR_OTHER.
337  */
338 static int
339 refresh_method(void)
340 {
341 	uds_request_t   req = UR_REFRESH_INETD;
342 	int		fd;
343 
344 	debug_msg("Entering refresh_method");
345 
346 	if ((fd = connect_to_inetd()) < 0) {
347 		error_msg(gettext("Failed to connect to inetd: %s"),
348 		    strerror(errno));
349 		return (SMF_EXIT_ERR_OTHER);
350 	}
351 
352 	/* write the request and return success */
353 	if (safe_write(fd, &req, sizeof (req)) == -1) {
354 		error_msg(
355 		    gettext("Failed to send refresh request to inetd: %s"),
356 		    strerror(errno));
357 		(void) close(fd);
358 		return (SMF_EXIT_ERR_OTHER);
359 	}
360 
361 	(void) close(fd);
362 
363 	return (SMF_EXIT_OK);
364 }
365 
366 /*
367  * Sends a stop event to the inetd start method process and wait till it goes
368  * away. If inetd is determined to have stopped SMF_EXIT_OK is returned, else
369  * SMF_EXIT_ERR_OTHER is returned.
370  */
371 static int
372 stop_method(void)
373 {
374 	uds_request_t   req = UR_STOP_INETD;
375 	int		fd;
376 	char		c;
377 	ssize_t		ret;
378 
379 	debug_msg("Entering stop_method");
380 
381 	if ((fd = connect_to_inetd()) == -1) {
382 		debug_msg(gettext("Failed to connect to inetd: %s"),
383 		    strerror(errno));
384 		/*
385 		 * Assume connect_to_inetd() failed because inetd was already
386 		 * stopped, and return success.
387 		 */
388 		return (SMF_EXIT_OK);
389 	}
390 
391 	/*
392 	 * This is safe to do since we're fired off in a separate process
393 	 * than inetd and in the case we get wedged, the stop method timeout
394 	 * will occur and we'd be killed by our restarter.
395 	 */
396 	enable_blocking(fd);
397 
398 	/* write the stop request to inetd and wait till it goes away */
399 	if (safe_write(fd, &req, sizeof (req)) != 0) {
400 		error_msg(gettext("Failed to send stop request to inetd"));
401 		(void) close(fd);
402 		return (SMF_EXIT_ERR_OTHER);
403 	}
404 
405 	/* wait until remote end of socket is closed */
406 	while (((ret = recv(fd, &c, sizeof (c), 0)) != 0) && (errno == EINTR))
407 		;
408 
409 	(void) close(fd);
410 
411 	if (ret != 0) {
412 		error_msg(gettext("Failed to determine whether inetd stopped"));
413 		return (SMF_EXIT_ERR_OTHER);
414 	}
415 
416 	return (SMF_EXIT_OK);
417 }
418 
419 
420 /*
421  * This function is called to handle restarter events coming in from the
422  * master restarter. It is registered with the master restarter via
423  * restarter_bind_handle() and simply passes a pointer to the event down
424  * the event pipe, which will be discovered by the poll in the event loop
425  * and processed there. It waits for an acknowledgement to be written back down
426  * the pipe before returning.
427  * Writing a pointer to the function's 'event' parameter down the pipe will
428  * be safe, as the thread in restarter_event_proxy() doesn't return until
429  * the main thread has finished its processing of the passed event, thus
430  * the referenced event will remain around until the function returns.
431  * To impose the limit of only one event being in the pipe and processed
432  * at once, a lock is taken on entry to this function and returned on exit.
433  * Always returns 0.
434  */
435 static int
436 restarter_event_proxy(restarter_event_t *event)
437 {
438 	restarter_event_type_t  ev_type;
439 	boolean_t		processed;
440 
441 	debug_msg("Entering restarter_event_proxy");
442 	ev_type = restarter_event_get_type(event);
443 	debug_msg("event: %x, event type: %d", event, ev_type);
444 
445 	(void) pthread_mutex_lock(&rst_event_pipe_mtx);
446 
447 	/* write the event to the main worker thread down the pipe */
448 	if (safe_write(rst_event_pipe[PE_PRODUCER], &event,
449 	    sizeof (event)) != 0)
450 		goto pipe_error;
451 
452 	/*
453 	 * Wait for an acknowledgement that the event has been processed from
454 	 * the same pipe. In the case that inetd is stopping, any thread in
455 	 * this function will simply block on this read until inetd eventually
456 	 * exits. This will result in this function not returning success to
457 	 * its caller, and the event that was being processed when the
458 	 * function exited will be re-sent when inetd is next started.
459 	 */
460 	if (safe_read(rst_event_pipe[PE_PRODUCER], &processed,
461 	    sizeof (processed)) != 0)
462 		goto pipe_error;
463 
464 	(void) pthread_mutex_unlock(&rst_event_pipe_mtx);
465 
466 	return (processed ? 0 : EAGAIN);
467 
468 pipe_error:
469 	/*
470 	 * Something's seriously wrong with the event pipe. Notify the
471 	 * worker thread by closing this end of the event pipe and pause till
472 	 * inetd exits.
473 	 */
474 	error_msg(gettext("Can't process restarter events: %s"),
475 	    strerror(errno));
476 	(void) close(rst_event_pipe[PE_PRODUCER]);
477 	for (;;)
478 		(void) pause();
479 
480 	/* NOTREACHED */
481 }
482 
483 /*
484  * Let restarter_event_proxy() know we're finished with the event it's blocked
485  * upon. The 'processed' argument denotes whether we successfully processed the
486  * event.
487  */
488 static void
489 ack_restarter_event(boolean_t processed)
490 {
491 	debug_msg("Entering ack_restarter_event");
492 
493 	/*
494 	 * If safe_write returns -1 something's seriously wrong with the event
495 	 * pipe, so start the shutdown proceedings.
496 	 */
497 	if (safe_write(rst_event_pipe[PE_CONSUMER], &processed,
498 	    sizeof (processed)) == -1)
499 		inetd_stop();
500 }
501 
502 /*
503  * Switch the syslog identification string to 'ident'.
504  */
505 static void
506 change_syslog_ident(const char *ident)
507 {
508 	debug_msg("Entering change_syslog_ident: ident: %s", ident);
509 
510 	closelog();
511 	openlog(ident, LOG_PID|LOG_CONS, LOG_DAEMON);
512 }
513 
514 /*
515  * Perform TCP wrappers checks on this instance. Due to the fact that the
516  * current wrappers code used in Solaris is taken untouched from the open
517  * source version, we're stuck with using the daemon name for the checks, as
518  * opposed to making use of instance FMRIs. Sigh.
519  * Returns B_TRUE if the check passed, else B_FALSE.
520  */
521 static boolean_t
522 tcp_wrappers_ok(instance_t *instance)
523 {
524 	boolean_t		rval = B_TRUE;
525 	char			*daemon_name;
526 	basic_cfg_t		*cfg = instance->config->basic;
527 	struct request_info	req;
528 
529 	debug_msg("Entering tcp_wrappers_ok, instance: %s", instance->fmri);
530 
531 	/*
532 	 * Wrap the service using libwrap functions. The code below implements
533 	 * the functionality of tcpd. This is done only for stream,nowait
534 	 * services, following the convention of other vendors.  udp/dgram and
535 	 * stream/wait can NOT be wrapped with this libwrap, so be wary of
536 	 * changing the test below.
537 	 */
538 	if (cfg->do_tcp_wrappers && !cfg->iswait && !cfg->istlx) {
539 
540 		daemon_name = instance->config->methods[
541 		    IM_START]->exec_args_we.we_wordv[0];
542 		if (*daemon_name == '/')
543 			daemon_name = strrchr(daemon_name, '/') + 1;
544 
545 		/*
546 		 * Change the syslog message identity to the name of the
547 		 * daemon being wrapped, as opposed to "inetd".
548 		 */
549 		change_syslog_ident(daemon_name);
550 
551 		(void) request_init(&req, RQ_DAEMON, daemon_name, RQ_FILE,
552 		    instance->conn_fd, NULL);
553 		fromhost(&req);
554 
555 		if (strcasecmp(eval_hostname(req.client), paranoid) == 0) {
556 			syslog(deny_severity,
557 			    "refused connect from %s (name/address mismatch)",
558 			    eval_client(&req));
559 			if (req.sink != NULL)
560 				req.sink(instance->conn_fd);
561 			rval = B_FALSE;
562 		} else if (!hosts_access(&req)) {
563 			syslog(deny_severity,
564 			    "refused connect from %s (access denied)",
565 			    eval_client(&req));
566 			if (req.sink != NULL)
567 				req.sink(instance->conn_fd);
568 			rval = B_FALSE;
569 		} else {
570 			syslog(allow_severity, "connect from %s",
571 			    eval_client(&req));
572 		}
573 
574 		/* Revert syslog identity back to "inetd". */
575 		change_syslog_ident(SYSLOG_IDENT);
576 	}
577 	return (rval);
578 }
579 
580 /*
581  * Handler registered with the timer queue code to remove an instance from
582  * the connection rate offline state when it has been there for its allotted
583  * time.
584  */
585 /* ARGSUSED */
586 static void
587 conn_rate_online(iu_tq_t *tq, void *arg)
588 {
589 	instance_t *instance = arg;
590 
591 	debug_msg("Entering conn_rate_online, instance: %s",
592 	    instance->fmri);
593 
594 	assert(instance->cur_istate == IIS_OFFLINE_CONRATE);
595 	instance->timer_id = -1;
596 	update_state(instance, IIS_OFFLINE, RERR_RESTART);
597 	process_offline_inst(instance);
598 }
599 
600 /*
601  * Check whether this instance in the offline state is in transition to
602  * another state and do the work to continue this transition.
603  */
604 void
605 process_offline_inst(instance_t *inst)
606 {
607 	debug_msg("Entering process_offline_inst");
608 
609 	if (inst->disable_req) {
610 		inst->disable_req = B_FALSE;
611 		(void) run_method(inst, IM_DISABLE, NULL);
612 	} else if (inst->maintenance_req) {
613 		inst->maintenance_req = B_FALSE;
614 		update_state(inst, IIS_MAINTENANCE, RERR_RESTART);
615 	/*
616 	 * If inetd is in the process of stopping, we don't want to enter
617 	 * any states but offline, disabled and maintenance.
618 	 */
619 	} else if (!inetd_stopping) {
620 		if (inst->conn_rate_exceeded) {
621 			basic_cfg_t *cfg = inst->config->basic;
622 
623 			inst->conn_rate_exceeded = B_FALSE;
624 			update_state(inst, IIS_OFFLINE_CONRATE, RERR_RESTART);
625 			/*
626 			 * Schedule a timer to bring the instance out of the
627 			 * connection rate offline state.
628 			 */
629 			inst->timer_id = iu_schedule_timer(timer_queue,
630 			    cfg->conn_rate_offline, conn_rate_online,
631 			    inst);
632 			if (inst->timer_id == -1) {
633 				error_msg(gettext("%s unable to set timer, "
634 				    "won't be brought on line after %d "
635 				    "seconds."), inst->fmri,
636 				    cfg->conn_rate_offline);
637 			}
638 
639 		} else if (copies_limit_exceeded(inst)) {
640 			update_state(inst, IIS_OFFLINE_COPIES, RERR_RESTART);
641 		}
642 	}
643 }
644 
645 /*
646  * Create a socket bound to the instance's configured address. If the
647  * bind fails, returns -1, else the fd of the bound socket.
648  */
649 static int
650 create_bound_socket(const char *fmri, socket_info_t *sock_info)
651 {
652 	int		fd;
653 	int		on = 1;
654 	rpc_info_t	*rpc = sock_info->pr_info.ri;
655 	const char	*proto = sock_info->pr_info.proto;
656 
657 	debug_msg("Entering create_bound_socket");
658 
659 	fd = socket(sock_info->local_addr.ss_family, sock_info->type,
660 	    sock_info->protocol);
661 	if (fd < 0) {
662 		error_msg(gettext(
663 		    "Socket creation failure for instance %s, proto %s: %s"),
664 		    fmri, proto, strerror(errno));
665 		return (-1);
666 	}
667 
668 	if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &on, sizeof (on)) == -1) {
669 		error_msg(gettext("setsockopt SO_REUSEADDR failed for service "
670 		    "instance %s, proto %s: %s"), fmri, proto, strerror(errno));
671 		(void) close(fd);
672 		return (-1);
673 	}
674 	if (sock_info->pr_info.v6only) {
675 		/* restrict socket to IPv6 communications only */
676 		if (setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &on,
677 		    sizeof (on)) == -1) {
678 			error_msg(gettext("setsockopt IPV6_V6ONLY failed for "
679 			    "service instance %s, proto %s: %s"), fmri, proto,
680 			    strerror(errno));
681 			(void) close(fd);
682 			return (-1);
683 		}
684 	}
685 
686 	if (rpc != NULL)
687 		SS_SETPORT(sock_info->local_addr, 0);
688 
689 	if (bind(fd, (struct sockaddr *)&(sock_info->local_addr),
690 	    SS_ADDRLEN(sock_info->local_addr)) < 0) {
691 		error_msg(gettext(
692 		    "Failed to bind to the port of service instance %s, "
693 		    "proto %s: %s"), fmri, proto, strerror(errno));
694 		(void) close(fd);
695 		return (-1);
696 	}
697 
698 	/*
699 	 * Retrieve and store the address bound to for RPC services.
700 	 */
701 	if (rpc != NULL) {
702 		struct sockaddr_storage	ss;
703 		int			ss_size = sizeof (ss);
704 
705 		if (getsockname(fd, (struct sockaddr *)&ss, &ss_size) < 0) {
706 			error_msg(gettext("Failed getsockname for instance %s, "
707 			    "proto %s: %s"), fmri, proto, strerror(errno));
708 			(void) close(fd);
709 			return (-1);
710 		}
711 		(void) memcpy(rpc->netbuf.buf, &ss,
712 		    sizeof (struct sockaddr_storage));
713 		rpc->netbuf.len = SS_ADDRLEN(ss);
714 		rpc->netbuf.maxlen = SS_ADDRLEN(ss);
715 	}
716 
717 	if (sock_info->type == SOCK_STREAM)
718 		(void) listen(fd, CONNECTION_BACKLOG);
719 
720 	return (fd);
721 }
722 
723 /*
724  * Handler registered with the timer queue code to retry the creation
725  * of a bound fd.
726  */
727 /* ARGSUSED */
728 static void
729 retry_bind(iu_tq_t *tq, void *arg)
730 {
731 	instance_t *instance = arg;
732 
733 	debug_msg("Entering retry_bind, instance: %s", instance->fmri);
734 
735 	switch (instance->cur_istate) {
736 	case IIS_OFFLINE_BIND:
737 	case IIS_ONLINE:
738 	case IIS_DEGRADED:
739 	case IIS_IN_ONLINE_METHOD:
740 	case IIS_IN_REFRESH_METHOD:
741 		break;
742 	default:
743 #ifndef NDEBUG
744 		(void) fprintf(stderr, "%s:%d: Unknown instance state %d.\n",
745 		    __FILE__, __LINE__, instance->cur_istate);
746 #endif
747 		abort();
748 	}
749 
750 	instance->bind_timer_id = -1;
751 	create_bound_fds(instance);
752 }
753 
754 /*
755  * For each of the fds for the given instance that are bound, if 'listen' is
756  * set add them to the poll set, else remove them from it. If any additions
757  * fail, returns -1, else 0 on success.
758  */
759 int
760 poll_bound_fds(instance_t *instance, boolean_t listen)
761 {
762 	basic_cfg_t	*cfg = instance->config->basic;
763 	proto_info_t	*pi;
764 	int		ret = 0;
765 
766 	debug_msg("Entering poll_bound_fds: instance: %s, on: %d",
767 	    instance->fmri, listen);
768 
769 	for (pi = uu_list_first(cfg->proto_list); pi != NULL;
770 	    pi = uu_list_next(cfg->proto_list, pi)) {
771 		if (pi->listen_fd != -1) {	/* fd bound */
772 			if (!listen) {
773 				clear_pollfd(pi->listen_fd);
774 			} else if (set_pollfd(pi->listen_fd, POLLIN) == -1) {
775 				ret = -1;
776 			}
777 		}
778 	}
779 
780 	return (ret);
781 }
782 
783 /*
784  * Handle the case were we either fail to create a bound fd or we fail
785  * to add a bound fd to the poll set for the given instance.
786  */
787 static void
788 handle_bind_failure(instance_t *instance)
789 {
790 	basic_cfg_t *cfg = instance->config->basic;
791 
792 	debug_msg("Entering handle_bind_failure: instance: %s", instance);
793 
794 	/*
795 	 * We must be being called as a result of a failed poll_bound_fds()
796 	 * as a bind retry is already scheduled. Just return and let it do
797 	 * the work.
798 	 */
799 	if (instance->bind_timer_id != -1)
800 		return;
801 
802 	/*
803 	 * Check if the rebind retries limit is operative and if so,
804 	 * if it has been reached.
805 	 */
806 	if (((cfg->bind_fail_interval <= 0) ||		/* no retries */
807 	    ((cfg->bind_fail_max >= 0) &&		/* limit reached */
808 	    (++instance->bind_fail_count > cfg->bind_fail_max))) ||
809 	    ((instance->bind_timer_id = iu_schedule_timer(timer_queue,
810 	    cfg->bind_fail_interval, retry_bind, instance)) == -1)) {
811 		proto_info_t *pi;
812 
813 		instance->bind_fail_count = 0;
814 
815 		switch (instance->cur_istate) {
816 		case IIS_DEGRADED:
817 		case IIS_ONLINE:
818 			/* check if any of the fds are being poll'd upon */
819 			for (pi = uu_list_first(cfg->proto_list); pi != NULL;
820 			    pi = uu_list_next(cfg->proto_list, pi)) {
821 				if ((pi->listen_fd != -1) &&
822 				    (find_pollfd(pi->listen_fd) != NULL))
823 					break;
824 			}
825 			if (pi != NULL)	{	/* polling on > 0 fds */
826 				warn_msg(gettext("Failed to bind on "
827 				    "all protocols for instance %s, "
828 				    "transitioning to degraded"),
829 				    instance->fmri);
830 				update_state(instance, IIS_DEGRADED, RERR_NONE);
831 				instance->bind_retries_exceeded = B_TRUE;
832 				break;
833 			}
834 
835 			destroy_bound_fds(instance);
836 			/*
837 			 * In the case we failed the 'bind' because set_pollfd()
838 			 * failed on all bound fds, use the offline handling.
839 			 */
840 			/* FALLTHROUGH */
841 		case IIS_OFFLINE:
842 		case IIS_OFFLINE_BIND:
843 			error_msg(gettext("Too many bind failures for instance "
844 			"%s, transitioning to maintenance"), instance->fmri);
845 			update_state(instance, IIS_MAINTENANCE,
846 			    RERR_FAULT);
847 			break;
848 		case IIS_IN_ONLINE_METHOD:
849 		case IIS_IN_REFRESH_METHOD:
850 			warn_msg(gettext("Failed to bind on all "
851 			    "protocols for instance %s, instance will go to "
852 			    "degraded"), instance->fmri);
853 			/*
854 			 * Set the retries exceeded flag so when the method
855 			 * completes the instance goes to the degraded state.
856 			 */
857 			instance->bind_retries_exceeded = B_TRUE;
858 			break;
859 		default:
860 #ifndef NDEBUG
861 			(void) fprintf(stderr,
862 			    "%s:%d: Unknown instance state %d.\n",
863 			    __FILE__, __LINE__, instance->cur_istate);
864 #endif
865 			abort();
866 		}
867 	} else if (instance->cur_istate == IIS_OFFLINE) {
868 		/*
869 		 * bind re-scheduled, so if we're offline reflect this in the
870 		 * state.
871 		 */
872 		update_state(instance, IIS_OFFLINE_BIND, RERR_NONE);
873 	}
874 }
875 
876 /*
877  * Independent of the transport, for each of the entries in the instance's
878  * proto list this function first attempts to create an associated network fd;
879  * for RPC services these are then bound to a kernel chosen port and the
880  * fd is registered with rpcbind; for non-RPC services the fds are bound
881  * to the port associated with the instance's service name. On any successful
882  * binds the instance is taken online. Failed binds are handled by
883  * handle_bind_failure().
884  */
885 void
886 create_bound_fds(instance_t *instance)
887 {
888 	basic_cfg_t	*cfg = instance->config->basic;
889 	boolean_t	failure = B_FALSE;
890 	boolean_t	success = B_FALSE;
891 	proto_info_t	*pi;
892 
893 	debug_msg("Entering create_bound_fd: instance: %s", instance->fmri);
894 
895 	/*
896 	 * Loop through and try and bind any unbound protos.
897 	 */
898 	for (pi = uu_list_first(cfg->proto_list); pi != NULL;
899 	    pi = uu_list_next(cfg->proto_list, pi)) {
900 		if (pi->listen_fd != -1)
901 			continue;
902 		if (cfg->istlx) {
903 			pi->listen_fd = create_bound_endpoint(instance->fmri,
904 			    (tlx_info_t *)pi);
905 		} else {
906 			/*
907 			 * We cast pi to a void so we can then go on to cast
908 			 * it to a socket_info_t without lint complaining
909 			 * about alignment. This is done because the x86
910 			 * version of lint thinks a lint suppression directive
911 			 * is unnecessary and flags it as such, yet the sparc
912 			 * version complains if it's absent.
913 			 */
914 			void *p = pi;
915 			pi->listen_fd = create_bound_socket(instance->fmri,
916 			    (socket_info_t *)p);
917 		}
918 		if (pi->listen_fd == -1) {
919 			failure = B_TRUE;
920 			continue;
921 		}
922 
923 		if (pi->ri != NULL) {
924 			unregister_rpc_service(instance->fmri, pi->ri);
925 			if (register_rpc_service(instance->fmri, pi->ri) ==
926 			    -1) {
927 				close_net_fd(instance, pi->listen_fd);
928 				pi->listen_fd = -1;
929 				failure = B_TRUE;
930 				continue;
931 			}
932 		}
933 
934 		success = B_TRUE;
935 	}
936 
937 	switch (instance->cur_istate) {
938 	case IIS_OFFLINE:
939 	case IIS_OFFLINE_BIND:
940 		/*
941 		 * If we've managed to bind at least one proto lets run the
942 		 * online method, so we can start listening for it.
943 		 */
944 		if (success && run_method(instance, IM_ONLINE, NULL) == -1)
945 			return;	/* instance gone to maintenance */
946 		break;
947 	case IIS_ONLINE:
948 	case IIS_IN_REFRESH_METHOD:
949 		/*
950 		 * We're 'online', so start polling on any bound fds we're
951 		 * currently not.
952 		 */
953 		if (poll_bound_fds(instance, B_TRUE) != 0) {
954 			failure = B_TRUE;
955 		} else if (!failure) {
956 			/*
957 			 * We've successfully bound and poll'd upon all protos,
958 			 * so reset the failure count.
959 			 */
960 			instance->bind_fail_count = 0;
961 		}
962 		break;
963 	case IIS_IN_ONLINE_METHOD:
964 		/*
965 		 * Nothing to do here as the method completion code will start
966 		 * listening for any successfully bound fds.
967 		 */
968 		break;
969 	default:
970 #ifndef NDEBUG
971 		(void) fprintf(stderr, "%s:%d: Unknown instance state %d.\n",
972 		    __FILE__, __LINE__, instance->cur_istate);
973 #endif
974 		abort();
975 	}
976 
977 	if (failure)
978 		handle_bind_failure(instance);
979 }
980 
981 /*
982  * Counter to create_bound_fds(), for each of the bound network fds this
983  * function unregisters the instance from rpcbind if it's an RPC service,
984  * stops listening for new connections for it and then closes the listening fd.
985  */
986 static void
987 destroy_bound_fds(instance_t *instance)
988 {
989 	basic_cfg_t	*cfg = instance->config->basic;
990 	proto_info_t	*pi;
991 
992 	debug_msg("Entering destroy_bound_fds: instance: %s", instance->fmri);
993 
994 	for (pi = uu_list_first(cfg->proto_list); pi != NULL;
995 	    pi = uu_list_next(cfg->proto_list, pi)) {
996 		if (pi->listen_fd != -1) {
997 			if (pi->ri != NULL)
998 				unregister_rpc_service(instance->fmri, pi->ri);
999 			clear_pollfd(pi->listen_fd);
1000 			close_net_fd(instance, pi->listen_fd);
1001 			pi->listen_fd = -1;
1002 		}
1003 	}
1004 
1005 	/* cancel any bind retries */
1006 	if (instance->bind_timer_id != -1)
1007 		cancel_bind_timer(instance);
1008 
1009 	instance->bind_retries_exceeded = B_FALSE;
1010 }
1011 
1012 /*
1013  * Perform %A address expansion and return a pointer to a static string
1014  * array containing crafted arguments. This expansion is provided for
1015  * compatibility with 4.2BSD daemons, and as such we've copied the logic of
1016  * the legacy inetd to maintain this compatibility as much as possible. This
1017  * logic is a bit scatty, but it dates back at least as far as SunOS 4.x.
1018  */
1019 static char **
1020 expand_address(instance_t *inst, const proto_info_t *pi)
1021 {
1022 	static char	addrbuf[sizeof ("ffffffff.65536")];
1023 	static char	*ret[3];
1024 	instance_cfg_t	*cfg = inst->config;
1025 	/*
1026 	 * We cast pi to a void so we can then go on to cast it to a
1027 	 * socket_info_t without lint complaining about alignment. This
1028 	 * is done because the x86 version of lint thinks a lint suppression
1029 	 * directive is unnecessary and flags it as such, yet the sparc
1030 	 * version complains if it's absent.
1031 	 */
1032 	const void	*p = pi;
1033 
1034 	debug_msg("Entering expand_address");
1035 
1036 	/* set ret[0] to the basename of exec path */
1037 	if ((ret[0] = strrchr(cfg->methods[IM_START]->exec_path, '/'))
1038 	    != NULL) {
1039 		ret[0]++;
1040 	} else {
1041 		ret[0] = cfg->methods[IM_START]->exec_path;
1042 	}
1043 
1044 	if (!cfg->basic->istlx &&
1045 	    (((socket_info_t *)p)->type == SOCK_DGRAM)) {
1046 		ret[1] = NULL;
1047 	} else {
1048 		addrbuf[0] = '\0';
1049 		if (!cfg->basic->iswait &&
1050 		    (inst->remote_addr.ss_family == AF_INET)) {
1051 			struct sockaddr_in *sp;
1052 
1053 			sp = (struct sockaddr_in *)&(inst->remote_addr);
1054 			(void) snprintf(addrbuf, sizeof (addrbuf), "%x.%hu",
1055 			    ntohl(sp->sin_addr.s_addr), ntohs(sp->sin_port));
1056 		}
1057 		ret[1] = addrbuf;
1058 		ret[2] = NULL;
1059 	}
1060 
1061 	return (ret);
1062 }
1063 
1064 /*
1065  * Returns the state associated with the supplied method being run for an
1066  * instance.
1067  */
1068 static internal_inst_state_t
1069 get_method_state(instance_method_t method)
1070 {
1071 	state_info_t *sip;
1072 
1073 	for (sip = states; sip->istate != IIS_NONE; sip++) {
1074 		if (sip->method_running == method)
1075 			break;
1076 	}
1077 	assert(sip->istate != IIS_NONE);
1078 
1079 	return (sip->istate);
1080 }
1081 
1082 /*
1083  * Store the method's PID and CID in the repository. If the store fails
1084  * we ignore it and just drive on.
1085  */
1086 static void
1087 add_method_ids(instance_t *ins, pid_t pid, ctid_t cid, instance_method_t mthd)
1088 {
1089 	debug_msg("Entering add_method_ids");
1090 
1091 	if (cid != -1)
1092 		(void) add_remove_contract(ins->fmri, B_TRUE, cid);
1093 
1094 	if (mthd == IM_START) {
1095 		if (add_rep_val(ins->start_pids, (int64_t)pid) == 0) {
1096 			(void) store_rep_vals(ins->start_pids, ins->fmri,
1097 			    PR_NAME_START_PIDS);
1098 		}
1099 	} else {
1100 		if (add_rep_val(ins->non_start_pid, (int64_t)pid) == 0) {
1101 			(void) store_rep_vals(ins->non_start_pid, ins->fmri,
1102 			    PR_NAME_NON_START_PID);
1103 		}
1104 	}
1105 }
1106 
1107 /*
1108  * Remove the method's PID and CID from the repository. If the removal
1109  * fails we ignore it and drive on.
1110  */
1111 void
1112 remove_method_ids(instance_t *inst, pid_t pid, ctid_t cid,
1113     instance_method_t mthd)
1114 {
1115 	debug_msg("Entering remove_method_ids");
1116 
1117 	if (cid != -1)
1118 		(void) add_remove_contract(inst->fmri, B_FALSE, cid);
1119 
1120 	if (mthd == IM_START) {
1121 		remove_rep_val(inst->start_pids, (int64_t)pid);
1122 		(void) store_rep_vals(inst->start_pids, inst->fmri,
1123 		    PR_NAME_START_PIDS);
1124 	} else {
1125 		remove_rep_val(inst->non_start_pid, (int64_t)pid);
1126 		(void) store_rep_vals(inst->non_start_pid, inst->fmri,
1127 		    PR_NAME_NON_START_PID);
1128 	}
1129 }
1130 
1131 static instance_t *
1132 create_instance(const char *fmri)
1133 {
1134 	instance_t *ret;
1135 
1136 	debug_msg("Entering create_instance, instance: %s", fmri);
1137 
1138 	if (((ret = calloc(1, sizeof (instance_t))) == NULL) ||
1139 	    ((ret->fmri = strdup(fmri)) == NULL))
1140 		goto alloc_fail;
1141 
1142 	ret->conn_fd = -1;
1143 
1144 	ret->copies = 0;
1145 
1146 	ret->conn_rate_count = 0;
1147 	ret->fail_rate_count = 0;
1148 	ret->bind_fail_count = 0;
1149 
1150 	if (((ret->non_start_pid = create_rep_val_list()) == NULL) ||
1151 	    ((ret->start_pids = create_rep_val_list()) == NULL))
1152 		goto alloc_fail;
1153 
1154 	ret->cur_istate = IIS_NONE;
1155 	ret->next_istate = IIS_NONE;
1156 
1157 	if (((ret->cur_istate_rep = create_rep_val_list()) == NULL) ||
1158 	    ((ret->next_istate_rep = create_rep_val_list()) == NULL))
1159 		goto alloc_fail;
1160 
1161 	ret->config = NULL;
1162 	ret->new_config = NULL;
1163 
1164 	ret->timer_id = -1;
1165 	ret->bind_timer_id = -1;
1166 
1167 	ret->disable_req = B_FALSE;
1168 	ret->maintenance_req = B_FALSE;
1169 	ret->conn_rate_exceeded = B_FALSE;
1170 	ret->bind_retries_exceeded = B_FALSE;
1171 
1172 	ret->pending_rst_event = RESTARTER_EVENT_TYPE_INVALID;
1173 
1174 	return (ret);
1175 
1176 alloc_fail:
1177 	error_msg(strerror(errno));
1178 	destroy_instance(ret);
1179 	return (NULL);
1180 }
1181 
1182 static void
1183 destroy_instance(instance_t *inst)
1184 {
1185 	debug_msg("Entering destroy_instance");
1186 
1187 	if (inst == NULL)
1188 		return;
1189 
1190 	destroy_instance_cfg(inst->config);
1191 	destroy_instance_cfg(inst->new_config);
1192 
1193 	destroy_rep_val_list(inst->cur_istate_rep);
1194 	destroy_rep_val_list(inst->next_istate_rep);
1195 
1196 	destroy_rep_val_list(inst->start_pids);
1197 	destroy_rep_val_list(inst->non_start_pid);
1198 
1199 	free(inst->fmri);
1200 
1201 	free(inst);
1202 }
1203 
1204 /*
1205  * Retrieves the current and next states internal states. Returns 0 on success,
1206  * else returns one of the following on error:
1207  * SCF_ERROR_NO_MEMORY if memory allocation failed.
1208  * SCF_ERROR_CONNECTION_BROKEN if the connection to the repository was broken.
1209  * SCF_ERROR_TYPE_MISMATCH if the property was of an unexpected type.
1210  * SCF_ERROR_NO_RESOURCES if the server doesn't have adequate resources.
1211  * SCF_ERROR_NO_SERVER if the server isn't running.
1212  */
1213 static scf_error_t
1214 retrieve_instance_state(instance_t *inst)
1215 {
1216 	scf_error_t	ret;
1217 
1218 	debug_msg("Entering retrieve_instance_state: instance: %s",
1219 	    inst->fmri);
1220 
1221 	/* retrieve internal states */
1222 	if (((ret = retrieve_rep_vals(inst->cur_istate_rep, inst->fmri,
1223 	    PR_NAME_CUR_INT_STATE)) != 0) ||
1224 	    ((ret = retrieve_rep_vals(inst->next_istate_rep, inst->fmri,
1225 	    PR_NAME_NEXT_INT_STATE)) != 0)) {
1226 		if (ret != SCF_ERROR_NOT_FOUND) {
1227 			error_msg(gettext(
1228 			    "Failed to read state of instance %s: %s"),
1229 			    inst->fmri, scf_strerror(scf_error()));
1230 			return (ret);
1231 		}
1232 
1233 		debug_msg("instance with no previous int state - "
1234 		    "setting state to uninitialized");
1235 
1236 		if ((set_single_rep_val(inst->cur_istate_rep,
1237 		    (int64_t)IIS_UNINITIALIZED) == -1) ||
1238 		    (set_single_rep_val(inst->next_istate_rep,
1239 		    (int64_t)IIS_NONE) == -1)) {
1240 			return (SCF_ERROR_NO_MEMORY);
1241 		}
1242 	}
1243 
1244 	/* update convenience states */
1245 	inst->cur_istate = get_single_rep_val(inst->cur_istate_rep);
1246 	inst->next_istate = get_single_rep_val(inst->next_istate_rep);
1247 	debug_msg("previous states: cur: %d, next: %d", inst->cur_istate,
1248 	    inst->next_istate);
1249 
1250 	return (0);
1251 }
1252 
1253 /*
1254  * Retrieve stored process ids and register each of them so we process their
1255  * termination.
1256  */
1257 static int
1258 retrieve_method_pids(instance_t *inst)
1259 {
1260 	rep_val_t	*rv;
1261 
1262 	debug_msg("Entering remove_method_pids");
1263 
1264 	switch (retrieve_rep_vals(inst->start_pids, inst->fmri,
1265 	    PR_NAME_START_PIDS)) {
1266 	case 0:
1267 		break;
1268 	case SCF_ERROR_NOT_FOUND:
1269 		return (0);
1270 	default:
1271 		error_msg(gettext("Failed to retrieve the start pids of "
1272 		    "instance %s from repository: %s"), inst->fmri,
1273 		    scf_strerror(scf_error()));
1274 		return (-1);
1275 	}
1276 
1277 	rv = uu_list_first(inst->start_pids);
1278 	while (rv != NULL) {
1279 		if (register_method(inst, (pid_t)rv->val, (ctid_t)-1,
1280 		    IM_START) == 0) {
1281 			inst->copies++;
1282 			rv = uu_list_next(inst->start_pids, rv);
1283 		} else if (errno == ENOENT) {
1284 			pid_t pid = (pid_t)rv->val;
1285 
1286 			/*
1287 			 * The process must have already terminated. Remove
1288 			 * it from the list.
1289 			 */
1290 			rv = uu_list_next(inst->start_pids, rv);
1291 			remove_rep_val(inst->start_pids, pid);
1292 		} else {
1293 			error_msg(gettext("Failed to listen for the completion "
1294 			    "of %s method of instance %s"), START_METHOD_NAME,
1295 			    inst->fmri);
1296 			rv = uu_list_next(inst->start_pids, rv);
1297 		}
1298 	}
1299 
1300 	/* synch the repository pid list to remove any terminated pids */
1301 	(void) store_rep_vals(inst->start_pids, inst->fmri, PR_NAME_START_PIDS);
1302 
1303 	return (0);
1304 }
1305 
1306 /*
1307  * Remove the passed instance from inetd control.
1308  */
1309 static void
1310 remove_instance(instance_t *instance)
1311 {
1312 	debug_msg("Entering remove_instance");
1313 
1314 	switch (instance->cur_istate) {
1315 	case IIS_ONLINE:
1316 	case IIS_DEGRADED:
1317 		/* stop listening for network connections */
1318 		destroy_bound_fds(instance);
1319 		break;
1320 	case IIS_OFFLINE_BIND:
1321 		cancel_bind_timer(instance);
1322 		break;
1323 	case IIS_OFFLINE_CONRATE:
1324 		cancel_inst_timer(instance);
1325 		break;
1326 	}
1327 
1328 	/* stop listening for terminated methods */
1329 	unregister_instance_methods(instance);
1330 
1331 	uu_list_remove(instance_list, instance);
1332 	destroy_instance(instance);
1333 }
1334 
1335 /*
1336  * Refresh the configuration of instance 'inst'. This method gets called as
1337  * a result of a refresh event for the instance from the master restarter, so
1338  * we can rely upon the instance's running snapshot having been updated from
1339  * its configuration snapshot.
1340  */
1341 void
1342 refresh_instance(instance_t *inst)
1343 {
1344 	instance_cfg_t	*cfg;
1345 
1346 	debug_msg("Entering refresh_instance: inst: %s", inst->fmri);
1347 
1348 	switch (inst->cur_istate) {
1349 	case IIS_MAINTENANCE:
1350 	case IIS_DISABLED:
1351 	case IIS_UNINITIALIZED:
1352 		/*
1353 		 * Ignore any possible changes, we'll re-read the configuration
1354 		 * automatically when we exit these states.
1355 		 */
1356 		break;
1357 
1358 	case IIS_OFFLINE_COPIES:
1359 	case IIS_OFFLINE_BIND:
1360 	case IIS_OFFLINE:
1361 	case IIS_OFFLINE_CONRATE:
1362 		destroy_instance_cfg(inst->config);
1363 		if ((inst->config = read_instance_cfg(inst->fmri)) == NULL) {
1364 			log_invalid_cfg(inst->fmri);
1365 			if (inst->cur_istate == IIS_OFFLINE_BIND) {
1366 				cancel_bind_timer(inst);
1367 			} else if (inst->cur_istate == IIS_OFFLINE_CONRATE) {
1368 				cancel_inst_timer(inst);
1369 			}
1370 			update_state(inst, IIS_MAINTENANCE, RERR_FAULT);
1371 		} else {
1372 			switch (inst->cur_istate) {
1373 			case IIS_OFFLINE_BIND:
1374 				if (copies_limit_exceeded(inst)) {
1375 					/* Cancel scheduled bind retries. */
1376 					cancel_bind_timer(inst);
1377 
1378 					/*
1379 					 * Take the instance to the copies
1380 					 * offline state, via the offline
1381 					 * state.
1382 					 */
1383 					update_state(inst, IIS_OFFLINE,
1384 					    RERR_RESTART);
1385 					process_offline_inst(inst);
1386 				}
1387 				break;
1388 
1389 			case IIS_OFFLINE:
1390 				process_offline_inst(inst);
1391 				break;
1392 
1393 			case IIS_OFFLINE_CONRATE:
1394 				/*
1395 				 * Since we're already in a DOS state,
1396 				 * don't bother evaluating the copies
1397 				 * limit. This will be evaluated when
1398 				 * we leave this state in
1399 				 * process_offline_inst().
1400 				 */
1401 				break;
1402 
1403 			case IIS_OFFLINE_COPIES:
1404 				/*
1405 				 * Check if the copies limit has been increased
1406 				 * above the current count.
1407 				 */
1408 				if (!copies_limit_exceeded(inst)) {
1409 					update_state(inst, IIS_OFFLINE,
1410 					    RERR_RESTART);
1411 					process_offline_inst(inst);
1412 				}
1413 				break;
1414 
1415 			default:
1416 				assert(0);
1417 			}
1418 		}
1419 		break;
1420 
1421 	case IIS_DEGRADED:
1422 	case IIS_ONLINE:
1423 		if ((cfg = read_instance_cfg(inst->fmri)) != NULL) {
1424 			instance_cfg_t *ocfg = inst->config;
1425 
1426 			/*
1427 			 * Try to avoid the overhead of taking an instance
1428 			 * offline and back on again. We do this by limiting
1429 			 * this behavior to two eventualities:
1430 			 * - there needs to be a re-bind to listen on behalf
1431 			 *   of the instance with its new configuration. This
1432 			 *   could be because for example its service has been
1433 			 *   associated with a different port, or because the
1434 			 *   v6only protocol option has been newly applied to
1435 			 *   the instance.
1436 			 * - one or both of the start or online methods of the
1437 			 *   instance have changed in the new configuration.
1438 			 *   Without taking the instance offline when the
1439 			 *   start method changed the instance may be running
1440 			 *   with unwanted parameters (or event an unwanted
1441 			 *   binary); and without taking the instance offline
1442 			 *   if its online method was to change, some part of
1443 			 *   its running environment may have changed and would
1444 			 *   not be picked up until the instance next goes
1445 			 *   offline for another reason.
1446 			 */
1447 			if ((!bind_config_equal(ocfg->basic, cfg->basic)) ||
1448 			    !method_info_equal(ocfg->methods[IM_ONLINE],
1449 			    cfg->methods[IM_ONLINE]) ||
1450 			    !method_info_equal(ocfg->methods[IM_START],
1451 			    cfg->methods[IM_START])) {
1452 				destroy_bound_fds(inst);
1453 
1454 				assert(inst->new_config == NULL);
1455 				inst->new_config = cfg;
1456 
1457 				(void) run_method(inst, IM_OFFLINE, NULL);
1458 			} else {	/* no bind config / method changes */
1459 
1460 				/*
1461 				 * swap the proto list over from the old
1462 				 * configuration to the new, so we retain
1463 				 * our set of network fds.
1464 				 */
1465 				destroy_proto_list(cfg->basic);
1466 				cfg->basic->proto_list =
1467 				    ocfg->basic->proto_list;
1468 				ocfg->basic->proto_list = NULL;
1469 				destroy_instance_cfg(ocfg);
1470 				inst->config = cfg;
1471 
1472 				/* re-evaluate copies limits based on new cfg */
1473 				if (copies_limit_exceeded(inst)) {
1474 					destroy_bound_fds(inst);
1475 					(void) run_method(inst, IM_OFFLINE,
1476 					    NULL);
1477 				} else {
1478 					/*
1479 					 * Since the instance isn't being
1480 					 * taken offline, where we assume it
1481 					 * would pick-up any configuration
1482 					 * changes automatically when it goes
1483 					 * back online, run its refresh method
1484 					 * to allow it to pick-up any changes
1485 					 * whilst still online.
1486 					 */
1487 					(void) run_method(inst, IM_REFRESH,
1488 					    NULL);
1489 				}
1490 			}
1491 		} else {
1492 			log_invalid_cfg(inst->fmri);
1493 
1494 			destroy_bound_fds(inst);
1495 
1496 			inst->maintenance_req = B_TRUE;
1497 			(void) run_method(inst, IM_OFFLINE, NULL);
1498 		}
1499 		break;
1500 
1501 	default:
1502 		debug_msg("Unhandled current state %d for instance in "
1503 		    "refresh_instance", inst->cur_istate);
1504 		assert(0);
1505 	}
1506 }
1507 
1508 /*
1509  * Called by process_restarter_event() to handle a restarter event for an
1510  * instance.
1511  */
1512 static void
1513 handle_restarter_event(instance_t *instance, restarter_event_type_t event,
1514     boolean_t send_ack)
1515 {
1516 	debug_msg("Entering handle_restarter_event: inst: %s, event: %d, "
1517 	    "curr state: %d", instance->fmri, event, instance->cur_istate);
1518 
1519 	switch (event) {
1520 	case RESTARTER_EVENT_TYPE_ADMIN_REFRESH:
1521 		refresh_instance(instance);
1522 		goto done;
1523 	case RESTARTER_EVENT_TYPE_REMOVE_INSTANCE:
1524 		remove_instance(instance);
1525 		goto done;
1526 	case RESTARTER_EVENT_TYPE_STOP:
1527 		switch (instance->cur_istate) {
1528 		case IIS_OFFLINE_CONRATE:
1529 		case IIS_OFFLINE_BIND:
1530 		case IIS_OFFLINE_COPIES:
1531 			/*
1532 			 * inetd must be closing down as we wouldn't get this
1533 			 * event in one of these states from the master
1534 			 * restarter. Take the instance to the offline resting
1535 			 * state.
1536 			 */
1537 			if (instance->cur_istate == IIS_OFFLINE_BIND) {
1538 				cancel_bind_timer(instance);
1539 			} else if (instance->cur_istate ==
1540 			    IIS_OFFLINE_CONRATE) {
1541 				cancel_inst_timer(instance);
1542 			}
1543 			update_state(instance, IIS_OFFLINE, RERR_RESTART);
1544 			goto done;
1545 		}
1546 		break;
1547 	case RESTARTER_EVENT_TYPE_ADMIN_RESTART:
1548 		/*
1549 		 * We've got a restart event, so if the instance is online
1550 		 * in any way initiate taking it offline, and rely upon
1551 		 * our restarter to send us an online event to bring
1552 		 * it back online.
1553 		 */
1554 		switch (instance->cur_istate) {
1555 		case IIS_ONLINE:
1556 		case IIS_DEGRADED:
1557 			destroy_bound_fds(instance);
1558 			(void) run_method(instance, IM_OFFLINE, NULL);
1559 		}
1560 		goto done;
1561 	}
1562 
1563 	switch (instance->cur_istate) {
1564 	case IIS_OFFLINE:
1565 		switch (event) {
1566 		case RESTARTER_EVENT_TYPE_START:
1567 			/*
1568 			 * Dependencies are met, let's take the service online.
1569 			 * Only try and bind for a wait type service if
1570 			 * no process is running on its behalf. Otherwise, just
1571 			 * mark the service online and binding will be attempted
1572 			 * when the process exits.
1573 			 */
1574 			if (!(instance->config->basic->iswait &&
1575 			    (uu_list_first(instance->start_pids) != NULL))) {
1576 				create_bound_fds(instance);
1577 			} else {
1578 				update_state(instance, IIS_ONLINE, RERR_NONE);
1579 			}
1580 			break;
1581 		case RESTARTER_EVENT_TYPE_DISABLE:
1582 		case RESTARTER_EVENT_TYPE_ADMIN_DISABLE:
1583 			/*
1584 			 * The instance should be disabled, so run the
1585 			 * instance's disabled method that will do the work
1586 			 * to take it there.
1587 			 */
1588 			(void) run_method(instance, IM_DISABLE, NULL);
1589 			break;
1590 		case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON:
1591 		case RESTARTER_EVENT_TYPE_DEPENDENCY_CYCLE:
1592 		case RESTARTER_EVENT_TYPE_INVALID_DEPENDENCY:
1593 			/*
1594 			 * The master restarter has requested the instance
1595 			 * go to maintenance; since we're already offline
1596 			 * just update the state to the maintenance state.
1597 			 */
1598 			update_state(instance, IIS_MAINTENANCE, RERR_RESTART);
1599 			break;
1600 		}
1601 		break;
1602 
1603 	case IIS_OFFLINE_BIND:
1604 		switch (event) {
1605 		case RESTARTER_EVENT_TYPE_DISABLE:
1606 		case RESTARTER_EVENT_TYPE_ADMIN_DISABLE:
1607 			/*
1608 			 * The instance should be disabled. Firstly, as for
1609 			 * the above dependencies unmet comment, cancel
1610 			 * the bind retry timer and update the state to
1611 			 * offline. Then, run the disable method to do the
1612 			 * work to take the instance from offline to
1613 			 * disabled.
1614 			 */
1615 			cancel_bind_timer(instance);
1616 			update_state(instance, IIS_OFFLINE, RERR_RESTART);
1617 			(void) run_method(instance, IM_DISABLE, NULL);
1618 			break;
1619 		case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON:
1620 		case RESTARTER_EVENT_TYPE_DEPENDENCY_CYCLE:
1621 		case RESTARTER_EVENT_TYPE_INVALID_DEPENDENCY:
1622 			/*
1623 			 * The master restarter has requested the instance
1624 			 * be placed in the maintenance state. Cancel the
1625 			 * outstanding retry timer, and since we're already
1626 			 * offline, update the state to maintenance.
1627 			 */
1628 			cancel_bind_timer(instance);
1629 			update_state(instance, IIS_MAINTENANCE, RERR_RESTART);
1630 			break;
1631 		}
1632 		break;
1633 
1634 	case IIS_DEGRADED:
1635 	case IIS_ONLINE:
1636 		switch (event) {
1637 		case RESTARTER_EVENT_TYPE_DISABLE:
1638 		case RESTARTER_EVENT_TYPE_ADMIN_DISABLE:
1639 			/*
1640 			 * The instance needs to be disabled. Do the same work
1641 			 * as for the dependencies unmet event below to
1642 			 * take the instance offline.
1643 			 */
1644 			destroy_bound_fds(instance);
1645 			/*
1646 			 * Indicate that the offline method is being run
1647 			 * as part of going to the disabled state, and to
1648 			 * carry on this transition.
1649 			 */
1650 			instance->disable_req = B_TRUE;
1651 			(void) run_method(instance, IM_OFFLINE, NULL);
1652 			break;
1653 		case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON:
1654 		case RESTARTER_EVENT_TYPE_DEPENDENCY_CYCLE:
1655 		case RESTARTER_EVENT_TYPE_INVALID_DEPENDENCY:
1656 			/*
1657 			 * The master restarter has requested the instance be
1658 			 * placed in the maintenance state. This involves
1659 			 * firstly taking the service offline, so do the
1660 			 * same work as for the dependencies unmet event
1661 			 * below. We set the maintenance_req flag to
1662 			 * indicate that when we get to the offline state
1663 			 * we should be placed directly into the maintenance
1664 			 * state.
1665 			 */
1666 			instance->maintenance_req = B_TRUE;
1667 			/* FALLTHROUGH */
1668 		case RESTARTER_EVENT_TYPE_STOP:
1669 			/*
1670 			 * Dependencies have become unmet. Close and
1671 			 * stop listening on the instance's network file
1672 			 * descriptor, and run the offline method to do
1673 			 * any work required to take us to the offline state.
1674 			 */
1675 			destroy_bound_fds(instance);
1676 			(void) run_method(instance, IM_OFFLINE, NULL);
1677 		}
1678 		break;
1679 
1680 	case IIS_UNINITIALIZED:
1681 		if (event == RESTARTER_EVENT_TYPE_DISABLE ||
1682 		    event == RESTARTER_EVENT_TYPE_ADMIN_DISABLE) {
1683 			update_state(instance, IIS_DISABLED, RERR_NONE);
1684 			break;
1685 		} else if (event != RESTARTER_EVENT_TYPE_ENABLE) {
1686 			/*
1687 			 * Ignore other events until we know whether we're
1688 			 * enabled or not.
1689 			 */
1690 			break;
1691 		}
1692 
1693 		/*
1694 		 * We've got an enabled event; make use of the handling in the
1695 		 * disable case.
1696 		 */
1697 		/* FALLTHROUGH */
1698 
1699 	case IIS_DISABLED:
1700 		switch (event) {
1701 		case RESTARTER_EVENT_TYPE_ENABLE:
1702 			/*
1703 			 * The instance needs enabling. Commence reading its
1704 			 * configuration and if successful place the instance
1705 			 * in the offline state and let process_offline_inst()
1706 			 * take it from there.
1707 			 */
1708 			destroy_instance_cfg(instance->config);
1709 			instance->config = read_instance_cfg(instance->fmri);
1710 			if (instance->config != NULL) {
1711 				update_state(instance, IIS_OFFLINE,
1712 				    RERR_RESTART);
1713 				process_offline_inst(instance);
1714 			} else {
1715 				log_invalid_cfg(instance->fmri);
1716 				update_state(instance, IIS_MAINTENANCE,
1717 				    RERR_RESTART);
1718 			}
1719 
1720 			break;
1721 		case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON:
1722 		case RESTARTER_EVENT_TYPE_DEPENDENCY_CYCLE:
1723 		case RESTARTER_EVENT_TYPE_INVALID_DEPENDENCY:
1724 			/*
1725 			 * The master restarter has requested the instance be
1726 			 * placed in the maintenance state, so just update its
1727 			 * state to maintenance.
1728 			 */
1729 			update_state(instance, IIS_MAINTENANCE, RERR_RESTART);
1730 			break;
1731 		}
1732 		break;
1733 
1734 	case IIS_MAINTENANCE:
1735 		switch (event) {
1736 		case RESTARTER_EVENT_TYPE_ADMIN_MAINT_OFF:
1737 		case RESTARTER_EVENT_TYPE_ADMIN_DISABLE:
1738 			/*
1739 			 * The master restarter has requested that the instance
1740 			 * be taken out of maintenance. Read its configuration,
1741 			 * and if successful place the instance in the offline
1742 			 * state and call process_offline_inst() to take it
1743 			 * from there.
1744 			 */
1745 			destroy_instance_cfg(instance->config);
1746 			instance->config = read_instance_cfg(instance->fmri);
1747 			if (instance->config != NULL) {
1748 				update_state(instance, IIS_OFFLINE,
1749 				    RERR_RESTART);
1750 				process_offline_inst(instance);
1751 			} else {
1752 				boolean_t enabled;
1753 
1754 				/*
1755 				 * The configuration was invalid. If the
1756 				 * service has disabled requested, let's
1757 				 * just place the instance in disabled even
1758 				 * though we haven't been able to run its
1759 				 * disable method, as the slightly incorrect
1760 				 * state is likely to be less of an issue to
1761 				 * an administrator than refusing to move an
1762 				 * instance to disabled. If disable isn't
1763 				 * requested, re-mark the service's state
1764 				 * as maintenance, so the administrator can
1765 				 * see the request was processed.
1766 				 */
1767 				if ((read_enable_merged(instance->fmri,
1768 				    &enabled) == 0) && !enabled) {
1769 					update_state(instance, IIS_DISABLED,
1770 					    RERR_RESTART);
1771 				} else {
1772 					log_invalid_cfg(instance->fmri);
1773 					update_state(instance, IIS_MAINTENANCE,
1774 					    RERR_FAULT);
1775 				}
1776 			}
1777 			break;
1778 		}
1779 		break;
1780 
1781 	case IIS_OFFLINE_CONRATE:
1782 		switch (event) {
1783 		case RESTARTER_EVENT_TYPE_DISABLE:
1784 			/*
1785 			 * The instance wants disabling. Take the instance
1786 			 * offline as for the dependencies unmet event above,
1787 			 * and then from there run the disable method to do
1788 			 * the work to take the instance to the disabled state.
1789 			 */
1790 			cancel_inst_timer(instance);
1791 			update_state(instance, IIS_OFFLINE, RERR_RESTART);
1792 			(void) run_method(instance, IM_DISABLE, NULL);
1793 			break;
1794 		case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON:
1795 		case RESTARTER_EVENT_TYPE_DEPENDENCY_CYCLE:
1796 		case RESTARTER_EVENT_TYPE_INVALID_DEPENDENCY:
1797 			/*
1798 			 * The master restarter has requested the instance
1799 			 * be taken to maintenance. Cancel the timer setup
1800 			 * when we entered this state, and go directly to
1801 			 * maintenance.
1802 			 */
1803 			cancel_inst_timer(instance);
1804 			update_state(instance, IIS_MAINTENANCE, RERR_RESTART);
1805 			break;
1806 		}
1807 		break;
1808 
1809 	case IIS_OFFLINE_COPIES:
1810 		switch (event) {
1811 		case RESTARTER_EVENT_TYPE_DISABLE:
1812 			/*
1813 			 * The instance wants disabling. Update the state
1814 			 * to offline, and run the disable method to do the
1815 			 * work to take it to the disabled state.
1816 			 */
1817 			update_state(instance, IIS_OFFLINE, RERR_RESTART);
1818 			(void) run_method(instance, IM_DISABLE, NULL);
1819 			break;
1820 		case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON:
1821 		case RESTARTER_EVENT_TYPE_DEPENDENCY_CYCLE:
1822 		case RESTARTER_EVENT_TYPE_INVALID_DEPENDENCY:
1823 			/*
1824 			 * The master restarter has requested the instance be
1825 			 * placed in maintenance. Since it's already offline
1826 			 * simply update the state.
1827 			 */
1828 			update_state(instance, IIS_MAINTENANCE, RERR_RESTART);
1829 			break;
1830 		}
1831 		break;
1832 
1833 	default:
1834 		debug_msg("handle_restarter_event: instance in an "
1835 		    "unexpected state");
1836 		assert(0);
1837 	}
1838 
1839 done:
1840 	if (send_ack)
1841 		ack_restarter_event(B_TRUE);
1842 }
1843 
1844 /*
1845  * Tries to read and process an event from the event pipe. If there isn't one
1846  * or an error occurred processing the event it returns -1. Else, if the event
1847  * is for an instance we're not already managing we read its state, add it to
1848  * our list to manage, and if appropriate read its configuration. Whether it's
1849  * new to us or not, we then handle the specific event.
1850  * Returns 0 if an event was read and processed successfully, else -1.
1851  */
1852 static int
1853 process_restarter_event(void)
1854 {
1855 	char			*fmri;
1856 	size_t			fmri_size;
1857 	restarter_event_type_t  event_type;
1858 	instance_t		*instance;
1859 	restarter_event_t	*event;
1860 	ssize_t			sz;
1861 
1862 	debug_msg("Entering process_restarter_event");
1863 
1864 	/*
1865 	 * Try to read an event pointer from the event pipe.
1866 	 */
1867 	errno = 0;
1868 	switch (safe_read(rst_event_pipe[PE_CONSUMER], &event,
1869 	    sizeof (event))) {
1870 	case 0:
1871 		break;
1872 	case  1:
1873 		if (errno == EAGAIN)	/* no event to read */
1874 			return (-1);
1875 
1876 		/* other end of pipe closed */
1877 
1878 		/* FALLTHROUGH */
1879 	default:			/* unexpected read error */
1880 		/*
1881 		 * There's something wrong with the event pipe. Let's
1882 		 * shutdown and be restarted.
1883 		 */
1884 		inetd_stop();
1885 		return (-1);
1886 	}
1887 
1888 	/*
1889 	 * Check if we're currently managing the instance which the event
1890 	 * pertains to. If not, read its complete state and add it to our
1891 	 * list to manage.
1892 	 */
1893 
1894 	fmri_size = scf_limit(SCF_LIMIT_MAX_FMRI_LENGTH);
1895 	if ((fmri = malloc(fmri_size)) == NULL) {
1896 		error_msg(strerror(errno));
1897 		goto fail;
1898 	}
1899 	sz = restarter_event_get_instance(event, fmri, fmri_size);
1900 	if (sz >= fmri_size)
1901 		assert(0);
1902 
1903 	for (instance = uu_list_first(instance_list); instance != NULL;
1904 	    instance = uu_list_next(instance_list, instance)) {
1905 		if (strcmp(instance->fmri, fmri) == 0)
1906 			break;
1907 	}
1908 
1909 	if (instance == NULL) {
1910 		int err;
1911 
1912 		debug_msg("New instance to manage: %s", fmri);
1913 
1914 		if (((instance = create_instance(fmri)) == NULL) ||
1915 		    (retrieve_instance_state(instance) != 0) ||
1916 		    (retrieve_method_pids(instance) != 0)) {
1917 			destroy_instance(instance);
1918 			free(fmri);
1919 			goto fail;
1920 		}
1921 
1922 		if (((err = iterate_repository_contracts(instance->fmri, 0))
1923 		    != 0) && (err != ENOENT)) {
1924 			error_msg(gettext(
1925 			    "Failed to adopt contracts of instance %s: %s"),
1926 			    instance->fmri, strerror(err));
1927 			destroy_instance(instance);
1928 			free(fmri);
1929 			goto fail;
1930 		}
1931 
1932 		uu_list_node_init(instance, &instance->link, instance_pool);
1933 		(void) uu_list_insert_after(instance_list, NULL, instance);
1934 
1935 		/*
1936 		 * Only read configuration for instances that aren't in any of
1937 		 * the disabled, maintenance or uninitialized states, since
1938 		 * they'll read it on state exit.
1939 		 */
1940 		if ((instance->cur_istate != IIS_DISABLED) &&
1941 		    (instance->cur_istate != IIS_MAINTENANCE) &&
1942 		    (instance->cur_istate != IIS_UNINITIALIZED)) {
1943 			instance->config = read_instance_cfg(instance->fmri);
1944 			if (instance->config == NULL) {
1945 				log_invalid_cfg(instance->fmri);
1946 				update_state(instance, IIS_MAINTENANCE,
1947 				    RERR_FAULT);
1948 			}
1949 		}
1950 	}
1951 
1952 	free(fmri);
1953 
1954 	event_type = restarter_event_get_type(event);
1955 	debug_msg("Event type: %d for instance: %s", event_type,
1956 	    instance->fmri);
1957 
1958 	/*
1959 	 * If the instance is currently running a method, don't process the
1960 	 * event now, but attach it to the instance for processing when
1961 	 * the instance finishes its transition.
1962 	 */
1963 	if (INST_IN_TRANSITION(instance)) {
1964 		debug_msg("storing event %d for instance %s", event_type,
1965 		    instance->fmri);
1966 		instance->pending_rst_event = event_type;
1967 	} else {
1968 		handle_restarter_event(instance, event_type, B_TRUE);
1969 	}
1970 
1971 	return (0);
1972 
1973 fail:
1974 	ack_restarter_event(B_FALSE);
1975 	return (-1);
1976 }
1977 
1978 /*
1979  * Do the state machine processing associated with the termination of instance
1980  * 'inst''s start method.
1981  */
1982 void
1983 process_start_term(instance_t *inst)
1984 {
1985 	basic_cfg_t	*cfg;
1986 
1987 	debug_msg("Entering process_start_term: inst: %s", inst->fmri);
1988 
1989 	inst->copies--;
1990 
1991 	if ((inst->cur_istate == IIS_MAINTENANCE) ||
1992 	    (inst->cur_istate == IIS_DISABLED)) {
1993 		/* do any further processing/checks when we exit these states */
1994 		return;
1995 	}
1996 
1997 	cfg = inst->config->basic;
1998 
1999 	if (cfg->iswait) {
2000 		proto_info_t	*pi;
2001 
2002 		switch (inst->cur_istate) {
2003 		case IIS_ONLINE:
2004 		case IIS_DEGRADED:
2005 		case IIS_IN_REFRESH_METHOD:
2006 			/*
2007 			 * A wait type service's start method has exited.
2008 			 * Check if the method was fired off in this inetd's
2009 			 * lifetime, or a previous one; if the former,
2010 			 * re-commence listening on the service's behalf; if
2011 			 * the latter, mark the service offline and let bind
2012 			 * attempts commence.
2013 			 */
2014 			for (pi = uu_list_first(cfg->proto_list); pi != NULL;
2015 			    pi = uu_list_next(cfg->proto_list, pi)) {
2016 				/*
2017 				 * If a bound fd exists, the method was fired
2018 				 * off during this inetd's lifetime.
2019 				 */
2020 				if (pi->listen_fd != -1)
2021 					break;
2022 			}
2023 			if (pi != NULL) {
2024 				if (poll_bound_fds(inst, B_TRUE) != 0)
2025 					handle_bind_failure(inst);
2026 			} else {
2027 				update_state(inst, IIS_OFFLINE, RERR_RESTART);
2028 				create_bound_fds(inst);
2029 			}
2030 		}
2031 	} else {
2032 		/*
2033 		 * Check if a nowait service should be brought back online
2034 		 * after exceeding its copies limit.
2035 		 */
2036 		if ((inst->cur_istate == IIS_OFFLINE_COPIES) &&
2037 		    !copies_limit_exceeded(inst)) {
2038 			update_state(inst, IIS_OFFLINE, RERR_NONE);
2039 			process_offline_inst(inst);
2040 		}
2041 	}
2042 }
2043 
2044 /*
2045  * If the instance has a pending event process it and initiate the
2046  * acknowledgement.
2047  */
2048 static void
2049 process_pending_rst_event(instance_t *inst)
2050 {
2051 	if (inst->pending_rst_event != RESTARTER_EVENT_TYPE_INVALID) {
2052 		restarter_event_type_t re;
2053 
2054 		debug_msg("Injecting pending event %d for instance %s",
2055 		    inst->pending_rst_event, inst->fmri);
2056 		re = inst->pending_rst_event;
2057 		inst->pending_rst_event = RESTARTER_EVENT_TYPE_INVALID;
2058 		handle_restarter_event(inst, re, B_TRUE);
2059 	}
2060 }
2061 
2062 /*
2063  * Do the state machine processing associated with the termination
2064  * of the specified instance's non-start method with the specified status.
2065  * Once the processing of the termination is done, the function also picks up
2066  * any processing that was blocked on the method running.
2067  */
2068 void
2069 process_non_start_term(instance_t *inst, int status)
2070 {
2071 	boolean_t ran_online_method = B_FALSE;
2072 
2073 	debug_msg("Entering process_non_start_term: inst: %s, method: %s",
2074 	    inst->fmri, methods[states[inst->cur_istate].method_running].name);
2075 
2076 	if (status == IMRET_FAILURE) {
2077 		error_msg(gettext("The %s method of instance %s failed, "
2078 		    "transitioning to maintenance"),
2079 		    methods[states[inst->cur_istate].method_running].name,
2080 		    inst->fmri);
2081 
2082 		if ((inst->cur_istate == IIS_IN_ONLINE_METHOD) ||
2083 		    (inst->cur_istate == IIS_IN_REFRESH_METHOD))
2084 			destroy_bound_fds(inst);
2085 
2086 		update_state(inst, IIS_MAINTENANCE, RERR_FAULT);
2087 
2088 		inst->maintenance_req = B_FALSE;
2089 		inst->conn_rate_exceeded = B_FALSE;
2090 
2091 		if (inst->new_config != NULL) {
2092 			destroy_instance_cfg(inst->new_config);
2093 			inst->new_config = NULL;
2094 		}
2095 
2096 		if (!inetd_stopping)
2097 			process_pending_rst_event(inst);
2098 
2099 		return;
2100 	}
2101 
2102 	/* non-failure method return */
2103 
2104 	if (status != IMRET_SUCCESS) {
2105 		/*
2106 		 * An instance method never returned a supported return code.
2107 		 * We'll assume this means the method succeeded for now whilst
2108 		 * non-GL-cognizant methods are used - eg. pkill.
2109 		 */
2110 		debug_msg("The %s method of instance %s returned "
2111 		    "non-compliant exit code: %d, assuming success",
2112 		    methods[states[inst->cur_istate].method_running].name,
2113 		    inst->fmri, status);
2114 	}
2115 
2116 	/*
2117 	 * Update the state from the in-transition state.
2118 	 */
2119 	switch (inst->cur_istate) {
2120 	case IIS_IN_ONLINE_METHOD:
2121 		ran_online_method = B_TRUE;
2122 		/* FALLTHROUGH */
2123 	case IIS_IN_REFRESH_METHOD:
2124 		/*
2125 		 * If we've exhausted the bind retries, flag that by setting
2126 		 * the instance's state to degraded.
2127 		 */
2128 		if (inst->bind_retries_exceeded) {
2129 			update_state(inst, IIS_DEGRADED, RERR_NONE);
2130 			break;
2131 		}
2132 		/* FALLTHROUGH */
2133 	default:
2134 		update_state(inst,
2135 		    methods[states[inst->cur_istate].method_running].dst_state,
2136 		    RERR_NONE);
2137 	}
2138 
2139 	if (inst->cur_istate == IIS_OFFLINE) {
2140 		if (inst->new_config != NULL) {
2141 			/*
2142 			 * This instance was found during refresh to need
2143 			 * taking offline because its newly read configuration
2144 			 * was sufficiently different. Now we're offline,
2145 			 * activate this new configuration.
2146 			 */
2147 			destroy_instance_cfg(inst->config);
2148 			inst->config = inst->new_config;
2149 			inst->new_config = NULL;
2150 		}
2151 
2152 		/* continue/complete any transitions that are in progress */
2153 		process_offline_inst(inst);
2154 
2155 	} else if (ran_online_method) {
2156 		/*
2157 		 * We've just successfully executed the online method. We have
2158 		 * a set of bound network fds that were created before running
2159 		 * this method, so now we're online start listening for
2160 		 * connections on them.
2161 		 */
2162 		if (poll_bound_fds(inst, B_TRUE) != 0)
2163 			handle_bind_failure(inst);
2164 	}
2165 
2166 	/*
2167 	 * If we're now out of transition (process_offline_inst() could have
2168 	 * fired off another method), carry out any jobs that were blocked by
2169 	 * us being in transition.
2170 	 */
2171 	if (!INST_IN_TRANSITION(inst)) {
2172 		if (inetd_stopping) {
2173 			if (!instance_stopped(inst)) {
2174 				/*
2175 				 * inetd is stopping, and this instance hasn't
2176 				 * been stopped. Inject a stop event.
2177 				 */
2178 				handle_restarter_event(inst,
2179 				    RESTARTER_EVENT_TYPE_STOP, B_FALSE);
2180 			}
2181 		} else {
2182 			process_pending_rst_event(inst);
2183 		}
2184 	}
2185 }
2186 
2187 /*
2188  * Check if configuration file specified is readable. If not return B_FALSE,
2189  * else return B_TRUE.
2190  */
2191 static boolean_t
2192 can_read_file(const char *path)
2193 {
2194 	int	ret;
2195 	int	serrno;
2196 
2197 	debug_msg("Entering can_read_file");
2198 	do {
2199 		ret = access(path, R_OK);
2200 	} while ((ret < 0) && (errno == EINTR));
2201 	if (ret < 0) {
2202 		if (errno != ENOENT) {
2203 			serrno = errno;
2204 			error_msg(gettext("Failed to access configuration "
2205 			    "file %s for performing modification checks: %s"),
2206 			    path, strerror(errno));
2207 			errno = serrno;
2208 		}
2209 		return (B_FALSE);
2210 	}
2211 	return (B_TRUE);
2212 }
2213 
2214 /*
2215  * Check whether the configuration file has changed contents since inetd
2216  * was last started/refreshed, and if so, log a message indicating that
2217  * inetconv needs to be run.
2218  */
2219 static void
2220 check_conf_file(void)
2221 {
2222 	char		*new_hash;
2223 	char		*old_hash = NULL;
2224 	scf_error_t	ret;
2225 	const char	*file;
2226 
2227 	debug_msg("Entering check_conf_file");
2228 
2229 	if (conf_file == NULL) {
2230 		/*
2231 		 * No explicit config file specified, so see if one of the
2232 		 * default two are readable, checking the primary one first
2233 		 * followed by the secondary.
2234 		 */
2235 		if (can_read_file(PRIMARY_DEFAULT_CONF_FILE)) {
2236 			file = PRIMARY_DEFAULT_CONF_FILE;
2237 		} else if ((errno == ENOENT) &&
2238 		    can_read_file(SECONDARY_DEFAULT_CONF_FILE)) {
2239 			file = SECONDARY_DEFAULT_CONF_FILE;
2240 		} else {
2241 			return;
2242 		}
2243 	} else {
2244 		file = conf_file;
2245 		if (!can_read_file(file))
2246 			return;
2247 	}
2248 
2249 	if (calculate_hash(file, &new_hash) == 0) {
2250 		ret = retrieve_inetd_hash(&old_hash);
2251 		if (((ret == SCF_ERROR_NONE) &&
2252 		    (strcmp(old_hash, new_hash) != 0))) {
2253 			/* modified config file */
2254 			warn_msg(gettext(
2255 			    "Configuration file %s has been modified since "
2256 			    "inetconv was last run. \"inetconv -i %s\" must be "
2257 			    "run to apply any changes to the SMF"), file, file);
2258 		} else if ((ret != SCF_ERROR_NOT_FOUND) &&
2259 		    (ret != SCF_ERROR_NONE)) {
2260 			/* No message if hash not yet computed */
2261 			error_msg(gettext("Failed to check whether "
2262 			    "configuration file %s has been modified: %s"),
2263 			    file, scf_strerror(ret));
2264 		}
2265 		free(old_hash);
2266 		free(new_hash);
2267 	} else {
2268 		error_msg(gettext("Failed to check whether configuration file "
2269 		    "%s has been modified: %s"), file, strerror(errno));
2270 	}
2271 }
2272 
2273 /*
2274  * Refresh all inetd's managed instances and check the configuration file
2275  * for any updates since inetconv was last run, logging a message if there
2276  * are. We call the SMF refresh function to refresh each instance so that
2277  * the refresh request goes through the framework, and thus results in the
2278  * running snapshot of each instance being updated from the configuration
2279  * snapshot.
2280  */
2281 static void
2282 inetd_refresh(void)
2283 {
2284 	instance_t	*inst;
2285 
2286 	debug_msg("Entering inetd_refresh");
2287 
2288 	/* call libscf to send refresh requests for all managed instances */
2289 	for (inst = uu_list_first(instance_list); inst != NULL;
2290 	    inst = uu_list_next(instance_list, inst)) {
2291 		if (smf_refresh_instance(inst->fmri) < 0) {
2292 			error_msg(gettext("Failed to refresh instance %s: %s"),
2293 			    inst->fmri, scf_strerror(scf_error()));
2294 		}
2295 	}
2296 
2297 	/*
2298 	 * Log a message if the configuration file has changed since inetconv
2299 	 * was last run.
2300 	 */
2301 	check_conf_file();
2302 }
2303 
2304 /*
2305  * Initiate inetd's shutdown.
2306  */
2307 static void
2308 inetd_stop(void)
2309 {
2310 	instance_t *inst;
2311 
2312 	debug_msg("Entering inetd_stop");
2313 
2314 	/* Block handling signals for stop and refresh */
2315 	(void) sighold(SIGHUP);
2316 	(void) sighold(SIGTERM);
2317 
2318 	/* Indicate inetd is coming down */
2319 	inetd_stopping = B_TRUE;
2320 
2321 	/* Stop polling on restarter events. */
2322 	clear_pollfd(rst_event_pipe[PE_CONSUMER]);
2323 
2324 	/* Stop polling for any more stop/refresh requests. */
2325 	clear_pollfd(uds_fd);
2326 
2327 	/*
2328 	 * Send a stop event to all currently unstopped instances that
2329 	 * aren't in transition. For those that are in transition, the
2330 	 * event will get sent when the transition completes.
2331 	 */
2332 	for (inst = uu_list_first(instance_list); inst != NULL;
2333 	    inst = uu_list_next(instance_list, inst)) {
2334 		if (!instance_stopped(inst) && !INST_IN_TRANSITION(inst))
2335 			handle_restarter_event(inst,
2336 			    RESTARTER_EVENT_TYPE_STOP, B_FALSE);
2337 	}
2338 }
2339 
2340 /*
2341  * Sets up the intra-inetd-process Unix Domain Socket.
2342  * Returns -1 on error, else 0.
2343  */
2344 static int
2345 uds_init(void)
2346 {
2347 	struct sockaddr_un addr;
2348 
2349 	debug_msg("Entering uds_init");
2350 
2351 	if ((uds_fd = socket(AF_UNIX, SOCK_STREAM, 0)) < 0) {
2352 		error_msg("socket: %s", strerror(errno));
2353 		return (-1);
2354 	}
2355 
2356 	disable_blocking(uds_fd);
2357 
2358 	(void) unlink(INETD_UDS_PATH);  /* clean-up any stale files */
2359 
2360 	(void) memset(&addr, 0, sizeof (addr));
2361 	addr.sun_family = AF_UNIX;
2362 	/* CONSTCOND */
2363 	assert(sizeof (INETD_UDS_PATH) <= sizeof (addr.sun_path));
2364 	(void) strlcpy(addr.sun_path, INETD_UDS_PATH, sizeof (addr.sun_path));
2365 
2366 	if (bind(uds_fd, (struct sockaddr *)(&addr), sizeof (addr)) < 0) {
2367 		error_msg(gettext("Failed to bind socket to %s: %s"),
2368 		    INETD_UDS_PATH, strerror(errno));
2369 		(void) close(uds_fd);
2370 		return (-1);
2371 	}
2372 
2373 	(void) listen(uds_fd, UDS_BACKLOG);
2374 
2375 	if ((set_pollfd(uds_fd, POLLIN)) == -1) {
2376 		(void) close(uds_fd);
2377 		(void) unlink(INETD_UDS_PATH);
2378 		return (-1);
2379 	}
2380 
2381 	return (0);
2382 }
2383 
2384 static void
2385 uds_fini(void)
2386 {
2387 	if (uds_fd != -1)
2388 		(void) close(uds_fd);
2389 	(void) unlink(INETD_UDS_PATH);
2390 }
2391 
2392 /*
2393  * Handle an incoming request on the Unix Domain Socket. Returns -1 if there
2394  * was an error handling the event, else 0.
2395  */
2396 static int
2397 process_uds_event(void)
2398 {
2399 	uds_request_t		req;
2400 	int			fd;
2401 	struct sockaddr_un	addr;
2402 	socklen_t		len = sizeof (addr);
2403 	int			ret;
2404 	uint_t			retries = 0;
2405 
2406 	debug_msg("Entering process_uds_event");
2407 
2408 	do {
2409 		fd = accept(uds_fd, (struct sockaddr *)&addr, &len);
2410 	} while ((fd < 0) && (errno == EINTR));
2411 	if (fd < 0) {
2412 		if (errno != EWOULDBLOCK)
2413 			error_msg("accept failed: %s", strerror(errno));
2414 		return (-1);
2415 	}
2416 
2417 	for (retries = 0; retries < UDS_RECV_RETRIES; retries++) {
2418 		if (((ret = safe_read(fd, &req, sizeof (req))) != 1) ||
2419 		    (errno != EAGAIN))
2420 			break;
2421 
2422 		(void) poll(NULL, 0, 100);	/* 100ms pause */
2423 	}
2424 
2425 	if (ret != 0) {
2426 		error_msg(gettext("Failed read: %s"), strerror(errno));
2427 		(void) close(fd);
2428 		return (-1);
2429 	}
2430 
2431 	switch (req) {
2432 	case UR_REFRESH_INETD:
2433 		/* flag the request for event_loop() to process */
2434 		refresh_inetd_requested = B_TRUE;
2435 		(void) close(fd);
2436 		break;
2437 	case UR_STOP_INETD:
2438 		inetd_stop();
2439 		break;
2440 	default:
2441 		error_msg("unexpected UDS request");
2442 		(void) close(fd);
2443 		return (-1);
2444 	}
2445 
2446 	return (0);
2447 }
2448 
2449 /*
2450  * Perform checks for common exec string errors. We limit the checks to
2451  * whether the file exists, is a regular file, and has at least one execute
2452  * bit set. We leave the core security checks to exec() so as not to duplicate
2453  * and thus incur the associated drawbacks, but hope to catch the common
2454  * errors here.
2455  */
2456 static boolean_t
2457 passes_basic_exec_checks(const char *instance, const char *method,
2458     const char *path)
2459 {
2460 	struct stat	sbuf;
2461 
2462 	debug_msg("Entering passes_basic_exec_checks");
2463 
2464 	/* check the file exists */
2465 	while (stat(path, &sbuf) == -1) {
2466 		if (errno != EINTR) {
2467 			error_msg(gettext(
2468 			    "Can't stat the %s method of instance %s: %s"),
2469 			    method, instance, strerror(errno));
2470 			return (B_FALSE);
2471 		}
2472 	}
2473 
2474 	/*
2475 	 * Check if the file is a regular file and has at least one execute
2476 	 * bit set.
2477 	 */
2478 	if ((sbuf.st_mode & S_IFMT) != S_IFREG) {
2479 		error_msg(gettext(
2480 		    "The %s method of instance %s isn't a regular file"),
2481 		    method, instance);
2482 		return (B_FALSE);
2483 	} else if ((sbuf.st_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) == 0) {
2484 		error_msg(gettext("The %s method instance %s doesn't have "
2485 		    "any execute permissions set"), method, instance);
2486 		return (B_FALSE);
2487 	}
2488 
2489 	return (B_TRUE);
2490 }
2491 
2492 static void
2493 exec_method(instance_t *instance, instance_method_t method, method_info_t *mi,
2494     struct method_context *mthd_ctxt, const proto_info_t *pi)
2495 {
2496 	char		**args;
2497 	char 		**env;
2498 	const char	*errf;
2499 	int		serrno;
2500 	basic_cfg_t	*cfg = instance->config->basic;
2501 
2502 	if (method == IM_START) {
2503 		/*
2504 		 * If wrappers checks fail, pretend the method was exec'd and
2505 		 * failed.
2506 		 */
2507 		if (!tcp_wrappers_ok(instance))
2508 			exit(IMRET_FAILURE);
2509 	}
2510 
2511 	/*
2512 	 * Revert the disposition of handled signals and ignored signals to
2513 	 * their defaults, unblocking any blocked ones as a side effect.
2514 	 */
2515 	(void) sigset(SIGHUP, SIG_DFL);
2516 	(void) sigset(SIGTERM, SIG_DFL);
2517 	(void) sigset(SIGINT, SIG_DFL);
2518 
2519 	/*
2520 	 * Setup exec arguments. Do this before the fd setup below, so our
2521 	 * logging related file fd doesn't get taken over before we call
2522 	 * expand_address().
2523 	 */
2524 	if ((method == IM_START) &&
2525 	    (strcmp(mi->exec_args_we.we_wordv[0], "%A") == 0)) {
2526 		args = expand_address(instance, pi);
2527 	} else {
2528 		args = mi->exec_args_we.we_wordv;
2529 	}
2530 
2531 	/* Generate audit trail for start operations */
2532 	if (method == IM_START) {
2533 		adt_event_data_t *ae;
2534 		struct sockaddr_storage ss;
2535 		priv_set_t *privset;
2536 		socklen_t sslen = sizeof (ss);
2537 
2538 		if ((ae = adt_alloc_event(audit_handle, ADT_inetd_connect))
2539 		    == NULL) {
2540 			error_msg(gettext("Unable to allocate audit event for "
2541 			    "the %s method of instance %s"),
2542 			    methods[method].name, instance->fmri);
2543 			exit(IMRET_FAILURE);
2544 		}
2545 
2546 		/*
2547 		 * The inetd_connect audit record consists of:
2548 		 *	Service name
2549 		 *	Execution path
2550 		 *	Remote address and port
2551 		 *	Local port
2552 		 *	Process privileges
2553 		 */
2554 		ae->adt_inetd_connect.service_name = cfg->svc_name;
2555 		ae->adt_inetd_connect.cmd = mi->exec_path;
2556 
2557 		if (instance->remote_addr.ss_family == AF_INET) {
2558 			struct in_addr *in = SS_SINADDR(instance->remote_addr);
2559 			ae->adt_inetd_connect.ip_adr[0] = in->s_addr;
2560 			ae->adt_inetd_connect.ip_type = ADT_IPv4;
2561 		} else {
2562 			uint32_t *addr6;
2563 			int i;
2564 
2565 			ae->adt_inetd_connect.ip_type = ADT_IPv6;
2566 			addr6 = (uint32_t *)SS_SINADDR(instance->remote_addr);
2567 			for (i = 0; i < 4; ++i)
2568 				ae->adt_inetd_connect.ip_adr[i] = addr6[i];
2569 		}
2570 
2571 		ae->adt_inetd_connect.ip_remote_port =
2572 		    ntohs(SS_PORT(instance->remote_addr));
2573 
2574 		if (getsockname(instance->conn_fd, (struct sockaddr *)&ss,
2575 		    &sslen) == 0)
2576 			ae->adt_inetd_connect.ip_local_port =
2577 			    ntohs(SS_PORT(ss));
2578 
2579 		privset = mthd_ctxt->priv_set;
2580 		if (privset == NULL) {
2581 			privset = priv_allocset();
2582 			if (privset != NULL &&
2583 			    getppriv(PRIV_EFFECTIVE, privset) != 0) {
2584 				priv_freeset(privset);
2585 				privset = NULL;
2586 			}
2587 		}
2588 
2589 		ae->adt_inetd_connect.privileges = privset;
2590 
2591 		(void) adt_put_event(ae, ADT_SUCCESS, ADT_SUCCESS);
2592 		adt_free_event(ae);
2593 
2594 		if (privset != NULL && mthd_ctxt->priv_set == NULL)
2595 			priv_freeset(privset);
2596 	}
2597 
2598 	/*
2599 	 * Set method context before the fd setup below so we can output an
2600 	 * error message if it fails.
2601 	 */
2602 	if ((errno = restarter_set_method_context(mthd_ctxt, &errf)) != 0) {
2603 		const char *msg;
2604 
2605 		if (errno == -1) {
2606 			if (strcmp(errf, "core_set_process_path") == 0) {
2607 				msg = gettext("Failed to set the corefile path "
2608 				    "for the %s method of instance %s");
2609 			} else if (strcmp(errf, "setproject") == 0) {
2610 				msg = gettext("Failed to assign a resource "
2611 				    "control for the %s method of instance %s");
2612 			} else if (strcmp(errf, "pool_set_binding") == 0) {
2613 				msg = gettext("Failed to bind the %s method of "
2614 				    "instance %s to a pool due to a system "
2615 				    "error");
2616 			} else {
2617 				assert(0);
2618 				abort();
2619 			}
2620 
2621 			error_msg(msg, methods[method].name, instance->fmri);
2622 
2623 			exit(IMRET_FAILURE);
2624 		}
2625 
2626 		if (errf != NULL && strcmp(errf, "pool_set_binding") == 0) {
2627 			switch (errno) {
2628 			case ENOENT:
2629 				msg = gettext("Failed to find resource pool "
2630 				    "for the %s method of instance %s");
2631 				break;
2632 
2633 			case EBADF:
2634 				msg = gettext("Failed to bind the %s method of "
2635 				    "instance %s to a pool due to invalid "
2636 				    "configuration");
2637 				break;
2638 
2639 			default:
2640 				assert(0);
2641 				abort();
2642 			}
2643 
2644 			exit(IMRET_FAILURE);
2645 		}
2646 
2647 		if (errf != NULL) {
2648 			error_msg(gettext("Failed to set credentials for the "
2649 			    "%s method of instance %s (%s: %s)"),
2650 			    methods[method].name, instance->fmri, errf,
2651 			    strerror(errno));
2652 			exit(IMRET_FAILURE);
2653 		}
2654 
2655 		switch (errno) {
2656 		case ENOMEM:
2657 			msg = gettext("Failed to set credentials for the %s "
2658 			    "method of instance %s (out of memory)");
2659 			break;
2660 
2661 		case ENOENT:
2662 			msg = gettext("Failed to set credentials for the %s "
2663 			    "method of instance %s (no passwd or shadow "
2664 			    "entry for user)");
2665 			break;
2666 
2667 		default:
2668 			assert(0);
2669 			abort();
2670 		}
2671 
2672 		error_msg(msg, methods[method].name, instance->fmri);
2673 		exit(IMRET_FAILURE);
2674 	}
2675 
2676 	/* let exec() free mthd_ctxt */
2677 
2678 	/* setup standard fds */
2679 	if (method == IM_START) {
2680 		(void) dup2(instance->conn_fd, STDIN_FILENO);
2681 	} else {
2682 		(void) close(STDIN_FILENO);
2683 		(void) open("/dev/null", O_RDONLY);
2684 	}
2685 	(void) dup2(STDIN_FILENO, STDOUT_FILENO);
2686 	(void) dup2(STDIN_FILENO, STDERR_FILENO);
2687 
2688 	closefrom(STDERR_FILENO + 1);
2689 
2690 	method_preexec();
2691 
2692 	env = set_smf_env(mthd_ctxt, instance, methods[method].name);
2693 
2694 	if (env != NULL) {
2695 		do {
2696 			(void) execve(mi->exec_path, args, env);
2697 		} while (errno == EINTR);
2698 	}
2699 
2700 	serrno = errno;
2701 	/* start up logging again to report the error */
2702 	msg_init();
2703 	errno = serrno;
2704 
2705 	error_msg(
2706 	    gettext("Failed to exec %s method of instance %s: %s"),
2707 	    methods[method].name, instance->fmri, strerror(errno));
2708 
2709 	if ((method == IM_START) && (instance->config->basic->iswait)) {
2710 		/*
2711 		 * We couldn't exec the start method for a wait type service.
2712 		 * Eat up data from the endpoint, so that hopefully the
2713 		 * service's fd won't wake poll up on the next time round
2714 		 * event_loop(). This behavior is carried over from the old
2715 		 * inetd, and it seems somewhat arbitrary that it isn't
2716 		 * also done in the case of fork failures; but I guess
2717 		 * it assumes an exec failure is less likely to be the result
2718 		 * of a resource shortage, and is thus not worth retrying.
2719 		 */
2720 		consume_wait_data(instance, 0);
2721 	}
2722 
2723 	exit(IMRET_FAILURE);
2724 }
2725 
2726 static restarter_error_t
2727 get_method_error_success(instance_method_t method)
2728 {
2729 	switch (method) {
2730 	case IM_OFFLINE:
2731 		return (RERR_RESTART);
2732 	case IM_ONLINE:
2733 		return (RERR_RESTART);
2734 	case IM_DISABLE:
2735 		return (RERR_RESTART);
2736 	case IM_REFRESH:
2737 		return (RERR_REFRESH);
2738 	case IM_START:
2739 		return (RERR_RESTART);
2740 	}
2741 	/* NOTREACHED */
2742 }
2743 
2744 /*
2745  * Runs the specified method of the specified service instance.
2746  * If the method was never specified, we handle it the same as if the
2747  * method was called and returned success, carrying on any transition the
2748  * instance may be in the midst of.
2749  * If the method isn't executable in its specified profile or an error occurs
2750  * forking a process to run the method in the function returns -1.
2751  * If a method binary is successfully executed, the function switches the
2752  * instance's cur state to the method's associated 'run' state and the next
2753  * state to the methods associated next state.
2754  * Returns -1 if there's an error before forking, else 0.
2755  */
2756 int
2757 run_method(instance_t *instance, instance_method_t method,
2758     const proto_info_t *start_info)
2759 {
2760 	pid_t			child_pid;
2761 	method_info_t		*mi;
2762 	struct method_context	*mthd_ctxt = NULL;
2763 	const char		*errstr;
2764 	int			sig;
2765 	int			ret;
2766 	instance_cfg_t		*cfg = instance->config;
2767 	ctid_t			cid;
2768 	boolean_t		trans_failure = B_TRUE;
2769 	int			serrno;
2770 
2771 	debug_msg("Entering run_method, instance: %s, method: %s",
2772 	    instance->fmri, methods[method].name);
2773 
2774 	/*
2775 	 * Don't bother updating the instance's state for the start method
2776 	 * as there isn't a separate start method state.
2777 	 */
2778 	if (method != IM_START)
2779 		update_instance_states(instance, get_method_state(method),
2780 		    methods[method].dst_state,
2781 		    get_method_error_success(method));
2782 
2783 	if ((mi = cfg->methods[method]) == NULL) {
2784 		/*
2785 		 * An unspecified method. Since the absence of this method
2786 		 * must be valid (otherwise it would have been caught
2787 		 * during configuration validation), simply pretend the method
2788 		 * ran and returned success.
2789 		 */
2790 		process_non_start_term(instance, IMRET_SUCCESS);
2791 		return (0);
2792 	}
2793 
2794 	/* Handle special method tokens, not allowed on start */
2795 	if (method != IM_START) {
2796 		if (restarter_is_null_method(mi->exec_path)) {
2797 			/* :true means nothing should be done */
2798 			process_non_start_term(instance, IMRET_SUCCESS);
2799 			return (0);
2800 		}
2801 
2802 		if ((sig = restarter_is_kill_method(mi->exec_path)) >= 0) {
2803 			/* Carry out contract assassination */
2804 			ret = iterate_repository_contracts(instance->fmri, sig);
2805 			/* ENOENT means we didn't find any contracts */
2806 			if (ret != 0 && ret != ENOENT) {
2807 				error_msg(gettext("Failed to send signal %d "
2808 				    "to contracts of instance %s: %s"), sig,
2809 				    instance->fmri, strerror(ret));
2810 				goto prefork_failure;
2811 			} else {
2812 				process_non_start_term(instance, IMRET_SUCCESS);
2813 				return (0);
2814 			}
2815 		}
2816 
2817 		if ((sig = restarter_is_kill_proc_method(mi->exec_path)) >= 0) {
2818 			/* Carry out process assassination */
2819 			rep_val_t	*rv;
2820 
2821 			ret = IMRET_SUCCESS;
2822 			for (rv = uu_list_first(instance->start_pids);
2823 			    rv != NULL;
2824 			    rv = uu_list_next(instance->start_pids, rv)) {
2825 				if ((kill((pid_t)rv->val, sig) != 0) &&
2826 				    (errno != ESRCH)) {
2827 					ret = IMRET_FAILURE;
2828 					error_msg(gettext("Unable to signal "
2829 					    "start process of instance %s: %s"),
2830 					    instance->fmri, strerror(errno));
2831 				}
2832 			}
2833 
2834 			process_non_start_term(instance, ret);
2835 			return (0);
2836 		}
2837 	}
2838 
2839 	/*
2840 	 * Get the associated method context before the fork so we can
2841 	 * modify the instances state if things go wrong.
2842 	 */
2843 	if ((mthd_ctxt = read_method_context(instance->fmri,
2844 	    methods[method].name, mi->exec_path, &errstr)) == NULL) {
2845 		error_msg(gettext("Failed to retrieve method context for the "
2846 		    "%s method of instance %s: %s"), methods[method].name,
2847 		    instance->fmri, errstr);
2848 		goto prefork_failure;
2849 	}
2850 
2851 	/*
2852 	 * Perform some basic checks before we fork to limit the possibility
2853 	 * of exec failures, so we can modify the instance state if necessary.
2854 	 */
2855 	if (!passes_basic_exec_checks(instance->fmri, methods[method].name,
2856 	    mi->exec_path)) {
2857 		trans_failure = B_FALSE;
2858 		goto prefork_failure;
2859 	}
2860 
2861 	if (contract_prefork() == -1)
2862 		goto prefork_failure;
2863 	child_pid = fork();
2864 	serrno = errno;
2865 	contract_postfork();
2866 
2867 	switch (child_pid) {
2868 	case -1:
2869 		error_msg(gettext(
2870 		    "Unable to fork %s method of instance %s: %s"),
2871 		    methods[method].name, instance->fmri, strerror(serrno));
2872 		if ((serrno != EAGAIN) && (serrno != ENOMEM))
2873 			trans_failure = B_FALSE;
2874 		goto prefork_failure;
2875 	case 0:				/* child */
2876 		exec_method(instance, method, mi, mthd_ctxt, start_info);
2877 		break;
2878 	default:			/* parent */
2879 		restarter_free_method_context(mthd_ctxt);
2880 		mthd_ctxt = NULL;
2881 
2882 		if (get_latest_contract(&cid) < 0)
2883 			cid = -1;
2884 
2885 		/*
2886 		 * Register this method so its termination is noticed and
2887 		 * the state transition this method participates in is
2888 		 * continued.
2889 		 */
2890 		if (register_method(instance, child_pid, cid, method) != 0) {
2891 			/*
2892 			 * Since we will never find out about the termination
2893 			 * of this method, if it's a non-start method treat
2894 			 * is as a failure so we don't block restarter event
2895 			 * processing on it whilst it languishes in a method
2896 			 * running state.
2897 			 */
2898 			error_msg(gettext("Failed to monitor status of "
2899 			    "%s method of instance %s"), methods[method].name,
2900 			    instance->fmri);
2901 			if (method != IM_START)
2902 				process_non_start_term(instance, IMRET_FAILURE);
2903 		}
2904 
2905 		add_method_ids(instance, child_pid, cid, method);
2906 
2907 		/* do tcp tracing for those nowait instances that request it */
2908 		if ((method == IM_START) && cfg->basic->do_tcp_trace &&
2909 		    !cfg->basic->iswait) {
2910 			char buf[INET6_ADDRSTRLEN];
2911 
2912 			syslog(LOG_NOTICE, "%s[%d] from %s %d",
2913 			    cfg->basic->svc_name, child_pid,
2914 			    inet_ntop_native(instance->remote_addr.ss_family,
2915 			    SS_SINADDR(instance->remote_addr), buf,
2916 			    sizeof (buf)),
2917 			    ntohs(SS_PORT(instance->remote_addr)));
2918 		}
2919 	}
2920 
2921 	return (0);
2922 
2923 prefork_failure:
2924 	if (mthd_ctxt != NULL) {
2925 		restarter_free_method_context(mthd_ctxt);
2926 		mthd_ctxt = NULL;
2927 	}
2928 
2929 	if (method == IM_START) {
2930 		/*
2931 		 * Only place a start method in maintenance if we're sure
2932 		 * that the failure was non-transient.
2933 		 */
2934 		if (!trans_failure) {
2935 			destroy_bound_fds(instance);
2936 			update_state(instance, IIS_MAINTENANCE, RERR_FAULT);
2937 		}
2938 	} else {
2939 		/* treat the failure as if the method ran and failed */
2940 		process_non_start_term(instance, IMRET_FAILURE);
2941 	}
2942 
2943 	return (-1);
2944 }
2945 
2946 static int
2947 accept_connection(instance_t *instance, proto_info_t *pi)
2948 {
2949 	int		fd;
2950 	socklen_t	size;
2951 
2952 	debug_msg("Entering accept_connection");
2953 
2954 	if (instance->config->basic->istlx) {
2955 		fd = tlx_accept(instance->fmri, (tlx_info_t *)pi,
2956 		    &(instance->remote_addr));
2957 	} else {
2958 		size = sizeof (instance->remote_addr);
2959 		fd = accept(pi->listen_fd,
2960 		    (struct sockaddr *)&(instance->remote_addr), &size);
2961 		if (fd < 0)
2962 			error_msg("accept: %s", strerror(errno));
2963 	}
2964 
2965 	return (fd);
2966 }
2967 
2968 /*
2969  * Handle an incoming connection request for a nowait service.
2970  * This involves accepting the incoming connection on a new fd. Connection
2971  * rate checks are then performed, transitioning the service to the
2972  * conrate offline state if these fail. Otherwise, the service's start method
2973  * is run (performing TCP wrappers checks if applicable as we do), and on
2974  * success concurrent copies checking is done, transitioning the service to the
2975  * copies offline state if this fails.
2976  */
2977 static void
2978 process_nowait_request(instance_t *instance, proto_info_t *pi)
2979 {
2980 	basic_cfg_t		*cfg = instance->config->basic;
2981 	int			ret;
2982 	adt_event_data_t	*ae;
2983 	char			buf[BUFSIZ];
2984 
2985 	debug_msg("Entering process_nowait_req");
2986 
2987 	/* accept nowait service connections on a new fd */
2988 	if ((instance->conn_fd = accept_connection(instance, pi)) == -1) {
2989 		/*
2990 		 * Failed accept. Return and allow the event loop to initiate
2991 		 * another attempt later if the request is still present.
2992 		 */
2993 		return;
2994 	}
2995 
2996 	/*
2997 	 * Limit connection rate of nowait services. If either conn_rate_max
2998 	 * or conn_rate_offline are <= 0, no connection rate limit checking
2999 	 * is done. If the configured rate is exceeded, the instance is taken
3000 	 * to the connrate_offline state and a timer scheduled to try and
3001 	 * bring the instance back online after the configured offline time.
3002 	 */
3003 	if ((cfg->conn_rate_max > 0) && (cfg->conn_rate_offline > 0)) {
3004 		if (instance->conn_rate_count++ == 0) {
3005 			instance->conn_rate_start = time(NULL);
3006 		} else if (instance->conn_rate_count >
3007 		    cfg->conn_rate_max) {
3008 			time_t now = time(NULL);
3009 
3010 			if ((now - instance->conn_rate_start) > 1) {
3011 				instance->conn_rate_start = now;
3012 				instance->conn_rate_count = 1;
3013 			} else {
3014 				/* Generate audit record */
3015 				if ((ae = adt_alloc_event(audit_handle,
3016 				    ADT_inetd_ratelimit)) == NULL) {
3017 					error_msg(gettext("Unable to allocate "
3018 					    "rate limit audit event"));
3019 				} else {
3020 					adt_inetd_ratelimit_t *rl =
3021 					    &ae->adt_inetd_ratelimit;
3022 					/*
3023 					 * The inetd_ratelimit audit
3024 					 * record consists of:
3025 					 * 	Service name
3026 					 *	Connection rate limit
3027 					 */
3028 					rl->service_name = cfg->svc_name;
3029 					(void) snprintf(buf, sizeof (buf),
3030 					    "limit=%lld", cfg->conn_rate_max);
3031 					rl->limit = buf;
3032 					(void) adt_put_event(ae, ADT_SUCCESS,
3033 					    ADT_SUCCESS);
3034 					adt_free_event(ae);
3035 				}
3036 
3037 				error_msg(gettext(
3038 				    "Instance %s has exceeded its configured "
3039 				    "connection rate, additional connections "
3040 				    "will not be accepted for %d seconds"),
3041 				    instance->fmri, cfg->conn_rate_offline);
3042 
3043 				close_net_fd(instance, instance->conn_fd);
3044 				instance->conn_fd = -1;
3045 
3046 				destroy_bound_fds(instance);
3047 
3048 				instance->conn_rate_count = 0;
3049 
3050 				instance->conn_rate_exceeded = B_TRUE;
3051 				(void) run_method(instance, IM_OFFLINE, NULL);
3052 
3053 				return;
3054 			}
3055 		}
3056 	}
3057 
3058 	ret = run_method(instance, IM_START, pi);
3059 
3060 	close_net_fd(instance, instance->conn_fd);
3061 	instance->conn_fd = -1;
3062 
3063 	if (ret == -1) /* the method wasn't forked  */
3064 		return;
3065 
3066 	instance->copies++;
3067 
3068 	/*
3069 	 * Limit concurrent connections of nowait services.
3070 	 */
3071 	if (copies_limit_exceeded(instance)) {
3072 		/* Generate audit record */
3073 		if ((ae = adt_alloc_event(audit_handle, ADT_inetd_copylimit))
3074 		    == NULL) {
3075 			error_msg(gettext("Unable to allocate copy limit "
3076 			    "audit event"));
3077 		} else {
3078 			/*
3079 			 * The inetd_copylimit audit record consists of:
3080 			 *	Service name
3081 			 * 	Copy limit
3082 			 */
3083 			ae->adt_inetd_copylimit.service_name = cfg->svc_name;
3084 			(void) snprintf(buf, sizeof (buf), "limit=%lld",
3085 			    cfg->max_copies);
3086 			ae->adt_inetd_copylimit.limit = buf;
3087 			(void) adt_put_event(ae, ADT_SUCCESS, ADT_SUCCESS);
3088 			adt_free_event(ae);
3089 		}
3090 
3091 		warn_msg(gettext("Instance %s has reached its maximum "
3092 		    "configured copies, no new connections will be accepted"),
3093 		    instance->fmri);
3094 		destroy_bound_fds(instance);
3095 		(void) run_method(instance, IM_OFFLINE, NULL);
3096 	}
3097 }
3098 
3099 /*
3100  * Handle an incoming request for a wait type service.
3101  * Failure rate checking is done first, taking the service to the maintenance
3102  * state if the checks fail. Following this, the service's start method is run,
3103  * and on success, we stop listening for new requests for this service.
3104  */
3105 static void
3106 process_wait_request(instance_t *instance, const proto_info_t *pi)
3107 {
3108 	basic_cfg_t		*cfg = instance->config->basic;
3109 	int			ret;
3110 	adt_event_data_t	*ae;
3111 	char			buf[BUFSIZ];
3112 
3113 	debug_msg("Entering process_wait_request");
3114 
3115 	instance->conn_fd = pi->listen_fd;
3116 
3117 	/*
3118 	 * Detect broken servers and transition them to maintenance. If a
3119 	 * wait type service exits without accepting the connection or
3120 	 * consuming (reading) the datagram, that service's descriptor will
3121 	 * select readable again, and inetd will fork another instance of
3122 	 * the server. If either wait_fail_cnt or wait_fail_interval are <= 0,
3123 	 * no failure rate detection is done.
3124 	 */
3125 	if ((cfg->wait_fail_cnt > 0) && (cfg->wait_fail_interval > 0)) {
3126 		if (instance->fail_rate_count++ == 0) {
3127 			instance->fail_rate_start = time(NULL);
3128 		} else if (instance->fail_rate_count > cfg->wait_fail_cnt) {
3129 			time_t now = time(NULL);
3130 
3131 			if ((now - instance->fail_rate_start) >
3132 			    cfg->wait_fail_interval) {
3133 				instance->fail_rate_start = now;
3134 				instance->fail_rate_count = 1;
3135 			} else {
3136 				/* Generate audit record */
3137 				if ((ae = adt_alloc_event(audit_handle,
3138 				    ADT_inetd_failrate)) == NULL) {
3139 					error_msg(gettext("Unable to allocate "
3140 					    "failure rate audit event"));
3141 				} else {
3142 					adt_inetd_failrate_t *fr =
3143 					    &ae->adt_inetd_failrate;
3144 					/*
3145 					 * The inetd_failrate audit record
3146 					 * consists of:
3147 					 * 	Service name
3148 					 * 	Failure rate
3149 					 *	Interval
3150 					 * Last two are expressed as k=v pairs
3151 					 * in the values field.
3152 					 */
3153 					fr->service_name = cfg->svc_name;
3154 					(void) snprintf(buf, sizeof (buf),
3155 					    "limit=%lld,interval=%d",
3156 					    cfg->wait_fail_cnt,
3157 					    cfg->wait_fail_interval);
3158 					fr->values = buf;
3159 					(void) adt_put_event(ae, ADT_SUCCESS,
3160 					    ADT_SUCCESS);
3161 					adt_free_event(ae);
3162 				}
3163 
3164 				error_msg(gettext(
3165 				    "Instance %s has exceeded its configured "
3166 				    "failure rate, transitioning to "
3167 				    "maintenance"), instance->fmri);
3168 				instance->fail_rate_count = 0;
3169 
3170 				destroy_bound_fds(instance);
3171 
3172 				instance->maintenance_req = B_TRUE;
3173 				(void) run_method(instance, IM_OFFLINE, NULL);
3174 				return;
3175 			}
3176 		}
3177 	}
3178 
3179 	ret = run_method(instance, IM_START, pi);
3180 
3181 	instance->conn_fd = -1;
3182 
3183 	if (ret == 0) {
3184 		/*
3185 		 * Stop listening for connections now we've fired off the
3186 		 * server for a wait type instance.
3187 		 */
3188 		(void) poll_bound_fds(instance, B_FALSE);
3189 	}
3190 }
3191 
3192 /*
3193  * Process any networks requests for each proto for each instance.
3194  */
3195 void
3196 process_network_events(void)
3197 {
3198 	instance_t	*instance;
3199 
3200 	debug_msg("Entering process_network_events");
3201 
3202 	for (instance = uu_list_first(instance_list); instance != NULL;
3203 	    instance = uu_list_next(instance_list, instance)) {
3204 		basic_cfg_t	*cfg;
3205 		proto_info_t	*pi;
3206 
3207 		/*
3208 		 * Ignore instances in states that definitely don't have any
3209 		 * listening fds.
3210 		 */
3211 		switch (instance->cur_istate) {
3212 		case IIS_ONLINE:
3213 		case IIS_DEGRADED:
3214 		case IIS_IN_REFRESH_METHOD:
3215 			break;
3216 		default:
3217 			continue;
3218 		}
3219 
3220 		cfg = instance->config->basic;
3221 
3222 		for (pi = uu_list_first(cfg->proto_list); pi != NULL;
3223 		    pi = uu_list_next(cfg->proto_list, pi)) {
3224 			if ((pi->listen_fd != -1) &&
3225 			    isset_pollfd(pi->listen_fd)) {
3226 				if (cfg->iswait) {
3227 					process_wait_request(instance, pi);
3228 				} else {
3229 					process_nowait_request(instance, pi);
3230 				}
3231 			}
3232 		}
3233 	}
3234 }
3235 
3236 /* ARGSUSED0 */
3237 static void
3238 sigterm_handler(int sig)
3239 {
3240 	debug_msg("Entering sigterm_handler");
3241 
3242 	got_sigterm = B_TRUE;
3243 }
3244 
3245 /* ARGSUSED0 */
3246 static void
3247 sighup_handler(int sig)
3248 {
3249 	debug_msg("Entering sighup_handler");
3250 
3251 	refresh_inetd_requested = B_TRUE;
3252 }
3253 
3254 /*
3255  * inetd's major work loop. This function sits in poll waiting for events
3256  * to occur, processing them when they do. The possible events are
3257  * master restarter requests, expired timer queue timers, stop/refresh signal
3258  * requests, contract events indicating process termination, stop/refresh
3259  * requests originating from one of the stop/refresh inetd processes and
3260  * network events.
3261  * The loop is exited when a stop request is received and processed, and
3262  * all the instances have reached a suitable 'stopping' state.
3263  */
3264 static void
3265 event_loop(void)
3266 {
3267 	instance_t		*instance;
3268 	int			timeout;
3269 
3270 	debug_msg("Entering event_loop");
3271 
3272 	for (;;) {
3273 		int	pret = -1;
3274 
3275 		timeout = iu_earliest_timer(timer_queue);
3276 
3277 		debug_msg("Doing signal check/poll");
3278 		if (!got_sigterm && !refresh_inetd_requested) {
3279 			pret = poll(poll_fds, num_pollfds, timeout);
3280 			if ((pret == -1) && (errno != EINTR)) {
3281 				error_msg(gettext("poll failure: %s"),
3282 				    strerror(errno));
3283 				continue;
3284 			}
3285 			debug_msg("Exiting poll, returned: %d", pret);
3286 		}
3287 
3288 		if (got_sigterm) {
3289 			msg_fini();
3290 			inetd_stop();
3291 			got_sigterm = B_FALSE;
3292 			goto check_if_stopped;
3293 		}
3294 
3295 		/*
3296 		 * Process any stop/refresh requests from the Unix Domain
3297 		 * Socket.
3298 		 */
3299 		if ((pret != -1) && isset_pollfd(uds_fd)) {
3300 			while (process_uds_event() == 0)
3301 				;
3302 		}
3303 
3304 		/*
3305 		 * Process refresh request. We do this check after the UDS
3306 		 * event check above, as it would be wasted processing if we
3307 		 * started refreshing inetd based on a SIGHUP, and then were
3308 		 * told to shut-down via a UDS event.
3309 		 */
3310 		if (refresh_inetd_requested) {
3311 			refresh_inetd_requested = B_FALSE;
3312 			if (!inetd_stopping)
3313 				inetd_refresh();
3314 		}
3315 
3316 		/*
3317 		 * We were interrupted by a signal. Don't waste any more
3318 		 * time processing a potentially inaccurate poll return.
3319 		 */
3320 		if (pret == -1)
3321 			continue;
3322 
3323 		/*
3324 		 * Process any instance restarter events.
3325 		 */
3326 		if (isset_pollfd(rst_event_pipe[PE_CONSUMER])) {
3327 			while (process_restarter_event() == 0)
3328 				;
3329 		}
3330 
3331 		/*
3332 		 * Process any expired timers (bind retry, con-rate offline,
3333 		 * method timeouts).
3334 		 */
3335 		(void) iu_expire_timers(timer_queue);
3336 
3337 		process_terminated_methods();
3338 
3339 		/*
3340 		 * If inetd is stopping, check whether all our managed
3341 		 * instances have been stopped and we can return.
3342 		 */
3343 		if (inetd_stopping) {
3344 check_if_stopped:
3345 			for (instance = uu_list_first(instance_list);
3346 			    instance != NULL;
3347 			    instance = uu_list_next(instance_list, instance)) {
3348 				if (!instance_stopped(instance)) {
3349 					debug_msg("%s not yet stopped",
3350 					    instance->fmri);
3351 					break;
3352 				}
3353 			}
3354 			/* if all instances are stopped, return */
3355 			if (instance == NULL)
3356 				return;
3357 		}
3358 
3359 		process_network_events();
3360 	}
3361 }
3362 
3363 static void
3364 fini(void)
3365 {
3366 	debug_msg("Entering fini");
3367 
3368 	method_fini();
3369 	uds_fini();
3370 	if (timer_queue != NULL)
3371 		iu_tq_destroy(timer_queue);
3372 	if (rst_event_handle != NULL)
3373 		restarter_unbind_handle(rst_event_handle);
3374 
3375 	/*
3376 	 * We don't explicitly close the event pipe as restarter_event_proxy()
3377 	 * doesn't anticipate the pipe being closed; in the case it was trying
3378 	 * to write to it it would get a SIGPIPE and result in an ungraceful
3379 	 * shutdown, and in the case it was trying to read from it, safe_read()
3380 	 * would return, and it would have to block itself in some way until the
3381 	 * process exited. Not closing this end of the pipe prevents the first
3382 	 * problem from occurring, and allows the thread in
3383 	 * restarter_event_proxy() to block on the read till the process exits.
3384 	 */
3385 
3386 	if (instance_list != NULL) {
3387 		void		*cookie = NULL;
3388 		instance_t	*inst;
3389 
3390 		while ((inst = uu_list_teardown(instance_list, &cookie)) !=
3391 		    NULL)
3392 			destroy_instance(inst);
3393 		uu_list_destroy(instance_list);
3394 	}
3395 	if (instance_pool != NULL)
3396 		uu_list_pool_destroy(instance_pool);
3397 	tlx_fini();
3398 	config_fini();
3399 	repval_fini();
3400 	poll_fini();
3401 
3402 	/* Close audit session */
3403 	(void) adt_end_session(audit_handle);
3404 }
3405 
3406 static int
3407 init(void)
3408 {
3409 	int err;
3410 
3411 	debug_msg("Entering init");
3412 
3413 	if (repval_init() < 0)
3414 		goto failed;
3415 
3416 	if (config_init() < 0)
3417 		goto failed;
3418 
3419 	if (tlx_init() < 0)
3420 		goto failed;
3421 
3422 	/* Setup instance list. */
3423 	if ((instance_pool = uu_list_pool_create("instance_pool",
3424 	    sizeof (instance_t), offsetof(instance_t, link), NULL,
3425 	    UU_LIST_POOL_DEBUG)) == NULL) {
3426 		error_msg("%s: %s",
3427 		    gettext("Failed to create instance pool"),
3428 		    uu_strerror(uu_error()));
3429 		goto failed;
3430 	}
3431 	if ((instance_list = uu_list_create(instance_pool, NULL, 0)) == NULL) {
3432 		error_msg("%s: %s",
3433 		    gettext("Failed to create instance list"),
3434 		    uu_strerror(uu_error()));
3435 		goto failed;
3436 	}
3437 
3438 	/*
3439 	 * Create event pipe to communicate events with the main event
3440 	 * loop and add it to the event loop's fdset.
3441 	 */
3442 	if (pipe(rst_event_pipe) < 0) {
3443 		error_msg("pipe: %s", strerror(errno));
3444 		goto failed;
3445 	}
3446 	/*
3447 	 * We only leave the producer end to block on reads/writes as we
3448 	 * can't afford to block in the main thread, yet need to in
3449 	 * the restarter event thread, so it can sit and wait for an
3450 	 * acknowledgement to be written to the pipe.
3451 	 */
3452 	disable_blocking(rst_event_pipe[PE_CONSUMER]);
3453 	if ((set_pollfd(rst_event_pipe[PE_CONSUMER], POLLIN)) == -1)
3454 		goto failed;
3455 
3456 	/*
3457 	 * Register with master restarter for managed service events. This
3458 	 * will fail, amongst other reasons, if inetd is already running.
3459 	 */
3460 	if ((err = restarter_bind_handle(RESTARTER_EVENT_VERSION,
3461 	    INETD_INSTANCE_FMRI, restarter_event_proxy, 0,
3462 	    &rst_event_handle)) != 0) {
3463 		error_msg(gettext(
3464 		    "Failed to register for restarter events: %s"),
3465 		    strerror(err));
3466 		goto failed;
3467 	}
3468 
3469 	if (contract_init() < 0)
3470 		goto failed;
3471 
3472 	if ((timer_queue = iu_tq_create()) == NULL) {
3473 		error_msg(gettext("Failed to create timer queue."));
3474 		goto failed;
3475 	}
3476 
3477 	if (uds_init() < 0)
3478 		goto failed;
3479 
3480 	if (method_init() < 0)
3481 		goto failed;
3482 
3483 	/* Initialize auditing session */
3484 	if (adt_start_session(&audit_handle, NULL, ADT_USE_PROC_DATA) != 0) {
3485 		error_msg(gettext("Unable to start audit session"));
3486 	}
3487 
3488 	/*
3489 	 * Initialize signal dispositions/masks
3490 	 */
3491 	(void) sigset(SIGHUP, sighup_handler);
3492 	(void) sigset(SIGTERM, sigterm_handler);
3493 	(void) sigignore(SIGINT);
3494 
3495 	return (0);
3496 
3497 failed:
3498 	fini();
3499 	return (-1);
3500 }
3501 
3502 static int
3503 start_method(void)
3504 {
3505 	int	i;
3506 	int	pipe_fds[2];
3507 	int	child;
3508 
3509 	debug_msg("ENTERING START_METHOD:");
3510 
3511 	/* Create pipe for child to notify parent of initialization success. */
3512 	if (pipe(pipe_fds) < 0) {
3513 		debug_msg("pipe: %s", strerror(errno));
3514 		return (SMF_EXIT_ERR_OTHER);
3515 	}
3516 
3517 	if ((child = fork()) == -1) {
3518 		debug_msg("fork: %s", strerror(errno));
3519 		(void) close(pipe_fds[PE_CONSUMER]);
3520 		(void) close(pipe_fds[PE_PRODUCER]);
3521 		return (SMF_EXIT_ERR_OTHER);
3522 	} else if (child > 0) {			/* parent */
3523 
3524 		/* Wait on child to return success of initialization. */
3525 		(void) close(pipe_fds[PE_PRODUCER]);
3526 		if ((safe_read(pipe_fds[PE_CONSUMER], &i, sizeof (i)) != 0) ||
3527 		    (i < 0)) {
3528 			error_msg(gettext(
3529 			    "Initialization failed, unable to start"));
3530 			(void) close(pipe_fds[PE_CONSUMER]);
3531 			/*
3532 			 * Batch all initialization errors as 'other' errors,
3533 			 * resulting in retries being attempted.
3534 			 */
3535 			return (SMF_EXIT_ERR_OTHER);
3536 		} else {
3537 			(void) close(pipe_fds[PE_CONSUMER]);
3538 			return (SMF_EXIT_OK);
3539 		}
3540 	} else {				/* child */
3541 		/*
3542 		 * Perform initialization and return success code down
3543 		 * the pipe.
3544 		 */
3545 		(void) close(pipe_fds[PE_CONSUMER]);
3546 		i = init();
3547 		if ((safe_write(pipe_fds[PE_PRODUCER], &i, sizeof (i)) < 0) ||
3548 		    (i < 0)) {
3549 			error_msg(gettext("pipe write failure: %s"),
3550 			    strerror(errno));
3551 			exit(1);
3552 		}
3553 		(void) close(pipe_fds[PE_PRODUCER]);
3554 
3555 		(void) setsid();
3556 
3557 		/*
3558 		 * Log a message if the configuration file has changed since
3559 		 * inetconv was last run.
3560 		 */
3561 		check_conf_file();
3562 
3563 		event_loop();
3564 
3565 		fini();
3566 		debug_msg("inetd stopped");
3567 		msg_fini();
3568 		exit(0);
3569 	}
3570 	/* NOTREACHED */
3571 }
3572 
3573 /*
3574  * When inetd is run from outside the SMF, this message is output to provide
3575  * the person invoking inetd with further information that will help them
3576  * understand how to start and stop inetd, and to achieve the other
3577  * behaviors achievable with the legacy inetd command line interface, if
3578  * it is possible.
3579  */
3580 static void
3581 legacy_usage(void)
3582 {
3583 	(void) fprintf(stderr,
3584 	    "inetd is now an smf(5) managed service and can no longer be run "
3585 	    "from the\n"
3586 	    "command line. To enable or disable inetd refer to svcadm(1M) on\n"
3587 	    "how to enable \"%s\", the inetd instance.\n"
3588 	    "\n"
3589 	    "The traditional inetd command line option mappings are:\n"
3590 	    "\t-d : there is no supported debug output\n"
3591 	    "\t-s : inetd is only runnable from within the SMF\n"
3592 	    "\t-t : See inetadm(1M) on how to enable TCP tracing\n"
3593 	    "\t-r : See inetadm(1M) on how to set a failure rate\n"
3594 	    "\n"
3595 	    "To specify an alternative configuration file see svccfg(1M)\n"
3596 	    "for how to modify the \"%s/%s\" string type property of\n"
3597 	    "the inetd instance, and modify it according to the syntax:\n"
3598 	    "\"%s [alt_config_file] %%m\".\n"
3599 	    "\n"
3600 	    "For further information on inetd see inetd(1M).\n",
3601 	    INETD_INSTANCE_FMRI, START_METHOD_ARG, SCF_PROPERTY_EXEC,
3602 	    INETD_PATH);
3603 }
3604 
3605 /*
3606  * Usage message printed out for usage errors when running under the SMF.
3607  */
3608 static void
3609 smf_usage(const char *arg0)
3610 {
3611 	error_msg("Usage: %s [alt_conf_file] %s|%s|%s", arg0, START_METHOD_ARG,
3612 	    STOP_METHOD_ARG, REFRESH_METHOD_ARG);
3613 }
3614 
3615 /*
3616  * Returns B_TRUE if we're being run from within the SMF, else B_FALSE.
3617  */
3618 static boolean_t
3619 run_through_smf(void)
3620 {
3621 	char *fmri;
3622 
3623 	/*
3624 	 * check if the instance fmri environment variable has been set by
3625 	 * our restarter.
3626 	 */
3627 	return (((fmri = getenv("SMF_FMRI")) != NULL) &&
3628 	    (strcmp(fmri, INETD_INSTANCE_FMRI) == 0));
3629 }
3630 
3631 int
3632 main(int argc, char *argv[])
3633 {
3634 	char		*method;
3635 	int		ret;
3636 
3637 #if	!defined(TEXT_DOMAIN)
3638 #define	TEXT_DOMAIN "SYS_TEST"
3639 #endif
3640 	(void) textdomain(TEXT_DOMAIN);
3641 	(void) setlocale(LC_ALL, "");
3642 
3643 	if (!run_through_smf()) {
3644 		legacy_usage();
3645 		return (SMF_EXIT_ERR_NOSMF);
3646 	}
3647 
3648 	msg_init();	/* setup logging */
3649 
3650 	/* inetd invocation syntax is inetd [alt_conf_file] method_name */
3651 
3652 	switch (argc) {
3653 	case 2:
3654 		method = argv[1];
3655 		break;
3656 	case 3:
3657 		conf_file = argv[1];
3658 		method = argv[2];
3659 		break;
3660 	default:
3661 		smf_usage(argv[0]);
3662 		return (SMF_EXIT_ERR_CONFIG);
3663 
3664 	}
3665 
3666 	if (strcmp(method, START_METHOD_ARG) == 0) {
3667 		ret = start_method();
3668 	} else if (strcmp(method, STOP_METHOD_ARG) == 0) {
3669 		ret = stop_method();
3670 	} else if (strcmp(method, REFRESH_METHOD_ARG) == 0) {
3671 		ret = refresh_method();
3672 	} else {
3673 		smf_usage(argv[0]);
3674 		return (SMF_EXIT_ERR_CONFIG);
3675 	}
3676 
3677 	return (ret);
3678 }
3679