xref: /titanic_51/usr/src/cmd/cmd-inet/usr.lib/inetd/inetd.c (revision 1d842814faabbb6af3e2fe30a4bd61aa4a70eeb3)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * NOTES: To be expanded.
30  *
31  * The SMF inetd.
32  *
33  * Below are some high level notes of the operation of the SMF inetd. The
34  * notes don't go into any real detail, and the viewer of this file is
35  * encouraged to look at the code and its associated comments to better
36  * understand inetd's operation. This saves the potential for the code
37  * and these notes diverging over time.
38  *
39  * Inetd's major work is done from the context of event_loop(). Within this
40  * loop, inetd polls for events arriving from a number of different file
41  * descriptors, representing the following event types, and initiates
42  * any necessary event processing:
43  * - incoming network connections/datagrams.
44  * - notification of terminated processes (discovered via contract events).
45  * - instance specific events originating from the SMF master restarter.
46  * - stop/refresh requests from the inetd method processes (coming in on a
47  *   Unix Domain socket).
48  * There's also a timeout set for the poll, which is set to the nearest
49  * scheduled timer in a timer queue that inetd uses to perform delayed
50  * processing, such as bind retries.
51  * The SIGHUP and SIGINT signals can also interrupt the poll, and will
52  * result in inetd being refreshed or stopped respectively, as was the
53  * behavior with the old inetd.
54  *
55  * Inetd implements a state machine for each instance. The states within the
56  * machine are: offline, online, disabled, maintenance, uninitialized and
57  * specializations of the offline state for when an instance exceeds one of
58  * its DOS limits. The state of an instance can be changed as a
59  * result/side-effect of one of the above events occurring, or inetd being
60  * started up. The ongoing state of an instance is stored in the SMF
61  * repository, as required of SMF restarters. This enables an administrator
62  * to view the state of each instance, and, if inetd was to terminate
63  * unexpectedly, it could use the stored state to re-commence where it left off.
64  *
65  * Within the state machine a number of methods are run (if provided) as part
66  * of a state transition to aid/ effect a change in an instance's state. The
67  * supported methods are: offline, online, disable, refresh and start. The
68  * latter of these is the equivalent of the server program and its arguments
69  * in the old inetd.
70  *
71  * Events from the SMF master restarter come in on a number of threads
72  * created in the registration routine of librestart, the delegated restarter
73  * library. These threads call into the restart_event_proxy() function
74  * when an event arrives. To serialize the processing of instances, these events
75  * are then written down a pipe to the process's main thread, which listens
76  * for these events via a poll call, with the file descriptor of the other
77  * end of the pipe in its read set, and processes the event appropriately.
78  * When the event has been  processed (which may be delayed if the instance
79  * for which the event is for is in the process of executing one of its methods
80  * as part of a state transition) it writes an acknowledgement back down the
81  * pipe the event was received on. The thread in restart_event_proxy() that
82  * wrote the event will read the acknowledgement it was blocked upon, and will
83  * then be able to return to its caller, thus implicitly acknowledging the
84  * event, and allowing another event to be written down the pipe for the main
85  * thread to process.
86  */
87 
88 
89 #include <netdb.h>
90 #include <stdio.h>
91 #include <stdio_ext.h>
92 #include <stdlib.h>
93 #include <strings.h>
94 #include <unistd.h>
95 #include <assert.h>
96 #include <sys/types.h>
97 #include <sys/socket.h>
98 #include <netinet/in.h>
99 #include <fcntl.h>
100 #include <signal.h>
101 #include <errno.h>
102 #include <locale.h>
103 #include <syslog.h>
104 #include <libintl.h>
105 #include <librestart.h>
106 #include <pthread.h>
107 #include <sys/stat.h>
108 #include <time.h>
109 #include <limits.h>
110 #include <libgen.h>
111 #include <tcpd.h>
112 #include <libscf.h>
113 #include <libuutil.h>
114 #include <stddef.h>
115 #include <bsm/adt_event.h>
116 #include "inetd_impl.h"
117 
118 /* path to inetd's binary */
119 #define	INETD_PATH	"/usr/lib/inet/inetd"
120 
121 /*
122  * inetd's default configuration file paths. /etc/inetd/inetd.conf is set
123  * be be the primary file, so it is checked before /etc/inetd.conf.
124  */
125 #define	PRIMARY_DEFAULT_CONF_FILE	"/etc/inet/inetd.conf"
126 #define	SECONDARY_DEFAULT_CONF_FILE	"/etc/inetd.conf"
127 
128 /* Arguments passed to this binary to request which method to execute. */
129 #define	START_METHOD_ARG	"start"
130 #define	STOP_METHOD_ARG		"stop"
131 #define	REFRESH_METHOD_ARG	"refresh"
132 
133 /* connection backlog for unix domain socket */
134 #define	UDS_BACKLOG	2
135 
136 /* number of retries to recv() a request on the UDS socket before giving up */
137 #define	UDS_RECV_RETRIES	10
138 
139 /* enumeration of the different ends of a pipe */
140 enum pipe_end {
141 	PE_CONSUMER,
142 	PE_PRODUCER
143 };
144 
145 typedef struct {
146 	internal_inst_state_t		istate;
147 	const char			*name;
148 	restarter_instance_state_t	smf_state;
149 	instance_method_t		method_running;
150 } state_info_t;
151 
152 
153 /*
154  * Collection of information for each state.
155  * NOTE:  This table is indexed into using the internal_inst_state_t
156  * enumeration, so the ordering needs to be kept in synch.
157  */
158 static state_info_t states[] = {
159 	{IIS_UNINITIALIZED, "uninitialized", RESTARTER_STATE_UNINIT,
160 	    IM_NONE},
161 	{IIS_ONLINE, "online", RESTARTER_STATE_ONLINE, IM_START},
162 	{IIS_IN_ONLINE_METHOD, "online_method", RESTARTER_STATE_OFFLINE,
163 	    IM_ONLINE},
164 	{IIS_OFFLINE, "offline", RESTARTER_STATE_OFFLINE, IM_NONE},
165 	{IIS_IN_OFFLINE_METHOD, "offline_method", RESTARTER_STATE_OFFLINE,
166 	    IM_OFFLINE},
167 	{IIS_DISABLED, "disabled", RESTARTER_STATE_DISABLED, IM_NONE},
168 	{IIS_IN_DISABLE_METHOD, "disabled_method", RESTARTER_STATE_OFFLINE,
169 	    IM_DISABLE},
170 	{IIS_IN_REFRESH_METHOD, "refresh_method", RESTARTER_STATE_ONLINE,
171 	    IM_REFRESH},
172 	{IIS_MAINTENANCE, "maintenance", RESTARTER_STATE_MAINT, IM_NONE},
173 	{IIS_OFFLINE_CONRATE, "cr_offline", RESTARTER_STATE_OFFLINE, IM_NONE},
174 	{IIS_OFFLINE_BIND, "bind_offline", RESTARTER_STATE_OFFLINE, IM_NONE},
175 	{IIS_OFFLINE_COPIES, "copies_offline", RESTARTER_STATE_OFFLINE,
176 	    IM_NONE},
177 	{IIS_DEGRADED, "degraded", RESTARTER_STATE_DEGRADED, IM_NONE},
178 	{IIS_NONE, "none", RESTARTER_STATE_NONE, IM_NONE}
179 };
180 
181 /*
182  * Pipe used to send events from the threads created by restarter_bind_handle()
183  * to the main thread of control.
184  */
185 static int			rst_event_pipe[] = {-1, -1};
186 /*
187  * Used to protect the critical section of code in restarter_event_proxy() that
188  * involves writing an event down the event pipe and reading an acknowledgement.
189  */
190 static pthread_mutex_t		rst_event_pipe_mtx = PTHREAD_MUTEX_INITIALIZER;
191 
192 /* handle used in communication with the master restarter */
193 static restarter_event_handle_t *rst_event_handle = NULL;
194 
195 /* set to indicate a refresh of inetd is requested */
196 static boolean_t		refresh_inetd_requested = B_FALSE;
197 
198 /* set by the SIGTERM handler to flag we got a SIGTERM */
199 static boolean_t		got_sigterm = B_FALSE;
200 
201 /*
202  * Timer queue used to store timers for delayed event processing, such as
203  * bind retries.
204  */
205 iu_tq_t				*timer_queue = NULL;
206 
207 /*
208  * fd of Unix Domain socket used to communicate stop and refresh requests
209  * to the inetd start method process.
210  */
211 static int			uds_fd = -1;
212 
213 /*
214  * List of inetd's currently managed instances; each containing its state,
215  * and in certain states its configuration.
216  */
217 static uu_list_pool_t		*instance_pool = NULL;
218 uu_list_t			*instance_list = NULL;
219 
220 /* set to indicate we're being stopped */
221 boolean_t			inetd_stopping = B_FALSE;
222 
223 /* TCP wrappers syslog globals. Consumed by libwrap. */
224 int				allow_severity = LOG_INFO;
225 int				deny_severity = LOG_WARNING;
226 
227 /* path of the configuration file being monitored by check_conf_file() */
228 static char			*conf_file = NULL;
229 
230 /* Auditing session handle */
231 static adt_session_data_t	*audit_handle;
232 
233 static void uds_fini(void);
234 static int uds_init(void);
235 static int run_method(instance_t *, instance_method_t, const proto_info_t *);
236 static void create_bound_fds(instance_t *);
237 static void destroy_bound_fds(instance_t *);
238 static void destroy_instance(instance_t *);
239 static void inetd_stop(void);
240 static void
241 exec_method(instance_t *instance, instance_method_t method, method_info_t *mi,
242     struct method_context *mthd_ctxt, const proto_info_t *pi) __NORETURN;
243 
244 /*
245  * The following two functions are callbacks that libumem uses to determine
246  * inetd's desired debugging/logging levels. The interface they consume is
247  * exported by FMA and is consolidation private. The comments in the two
248  * functions give the environment variable that will effectively be set to
249  * their returned value, and thus whose behavior for this value, described in
250  * umem_debug(3MALLOC), will be followed.
251  */
252 
253 const char *
254 _umem_debug_init(void)
255 {
256 	return ("default,verbose");	/* UMEM_DEBUG setting */
257 }
258 
259 const char *
260 _umem_logging_init(void)
261 {
262 	return ("fail,contents");	/* UMEM_LOGGING setting */
263 }
264 
265 static void
266 log_invalid_cfg(const char *fmri)
267 {
268 	error_msg(gettext(
269 	    "Invalid configuration for instance %s, placing in maintenance"),
270 	    fmri);
271 }
272 
273 /*
274  * Returns B_TRUE if the instance is in a suitable state for inetd to stop.
275  */
276 static boolean_t
277 instance_stopped(const instance_t *inst)
278 {
279 	return ((inst->cur_istate == IIS_OFFLINE) ||
280 	    (inst->cur_istate == IIS_MAINTENANCE) ||
281 	    (inst->cur_istate == IIS_DISABLED) ||
282 	    (inst->cur_istate == IIS_UNINITIALIZED));
283 }
284 
285 /*
286  * Updates the current and next repository states of instance 'inst'. If
287  * any errors occur an error message is output.
288  */
289 static void
290 update_instance_states(instance_t *inst, internal_inst_state_t new_cur_state,
291     internal_inst_state_t new_next_state, restarter_error_t err)
292 {
293 	internal_inst_state_t	old_cur = inst->cur_istate;
294 	internal_inst_state_t	old_next = inst->next_istate;
295 	scf_error_t		sret;
296 	int			ret;
297 
298 	debug_msg("Entering update_instance_states: oldcur: %s, newcur: %s "
299 	    "oldnext: %s, newnext: %s", states[old_cur].name,
300 	    states[new_cur_state].name, states[old_next].name,
301 	    states[new_next_state].name);
302 
303 
304 	/* update the repository/cached internal state */
305 	inst->cur_istate = new_cur_state;
306 	inst->next_istate = new_next_state;
307 	(void) set_single_rep_val(inst->cur_istate_rep,
308 	    (int64_t)new_cur_state);
309 	(void) set_single_rep_val(inst->next_istate_rep,
310 	    (int64_t)new_next_state);
311 
312 	if (((sret = store_rep_vals(inst->cur_istate_rep, inst->fmri,
313 	    PR_NAME_CUR_INT_STATE)) != 0) ||
314 	    ((sret = store_rep_vals(inst->next_istate_rep, inst->fmri,
315 	    PR_NAME_NEXT_INT_STATE)) != 0))
316 		error_msg(gettext("Failed to update state of instance %s in "
317 		    "repository: %s"), inst->fmri, scf_strerror(sret));
318 
319 	/* update the repository SMF state */
320 	if ((ret = restarter_set_states(rst_event_handle, inst->fmri,
321 	    states[old_cur].smf_state, states[new_cur_state].smf_state,
322 	    states[old_next].smf_state, states[new_next_state].smf_state,
323 	    err, 0)) != 0)
324 		error_msg(gettext("Failed to update state of instance %s in "
325 		    "repository: %s"), inst->fmri, strerror(ret));
326 
327 }
328 
329 void
330 update_state(instance_t *inst, internal_inst_state_t new_cur,
331     restarter_error_t err)
332 {
333 	update_instance_states(inst, new_cur, IIS_NONE, err);
334 }
335 
336 /*
337  * Sends a refresh event to the inetd start method process and returns
338  * SMF_EXIT_OK if it managed to send it. If it fails to send the request for
339  * some reason it returns SMF_EXIT_ERR_OTHER.
340  */
341 static int
342 refresh_method(void)
343 {
344 	uds_request_t   req = UR_REFRESH_INETD;
345 	int		fd;
346 
347 	debug_msg("Entering refresh_method");
348 
349 	if ((fd = connect_to_inetd()) < 0) {
350 		error_msg(gettext("Failed to connect to inetd: %s"),
351 		    strerror(errno));
352 		return (SMF_EXIT_ERR_OTHER);
353 	}
354 
355 	/* write the request and return success */
356 	if (safe_write(fd, &req, sizeof (req)) == -1) {
357 		error_msg(
358 		    gettext("Failed to send refresh request to inetd: %s"),
359 		    strerror(errno));
360 		(void) close(fd);
361 		return (SMF_EXIT_ERR_OTHER);
362 	}
363 
364 	(void) close(fd);
365 
366 	return (SMF_EXIT_OK);
367 }
368 
369 /*
370  * Sends a stop event to the inetd start method process and wait till it goes
371  * away. If inetd is determined to have stopped SMF_EXIT_OK is returned, else
372  * SMF_EXIT_ERR_OTHER is returned.
373  */
374 static int
375 stop_method(void)
376 {
377 	uds_request_t   req = UR_STOP_INETD;
378 	int		fd;
379 	char		c;
380 	ssize_t		ret;
381 
382 	debug_msg("Entering stop_method");
383 
384 	if ((fd = connect_to_inetd()) == -1) {
385 		debug_msg(gettext("Failed to connect to inetd: %s"),
386 		    strerror(errno));
387 		/*
388 		 * Assume connect_to_inetd() failed because inetd was already
389 		 * stopped, and return success.
390 		 */
391 		return (SMF_EXIT_OK);
392 	}
393 
394 	/*
395 	 * This is safe to do since we're fired off in a separate process
396 	 * than inetd and in the case we get wedged, the stop method timeout
397 	 * will occur and we'd be killed by our restarter.
398 	 */
399 	enable_blocking(fd);
400 
401 	/* write the stop request to inetd and wait till it goes away */
402 	if (safe_write(fd, &req, sizeof (req)) != 0) {
403 		error_msg(gettext("Failed to send stop request to inetd"));
404 		(void) close(fd);
405 		return (SMF_EXIT_ERR_OTHER);
406 	}
407 
408 	/* wait until remote end of socket is closed */
409 	while (((ret = recv(fd, &c, sizeof (c), 0)) != 0) && (errno == EINTR))
410 		;
411 
412 	(void) close(fd);
413 
414 	if (ret != 0) {
415 		error_msg(gettext("Failed to determine whether inetd stopped"));
416 		return (SMF_EXIT_ERR_OTHER);
417 	}
418 
419 	return (SMF_EXIT_OK);
420 }
421 
422 
423 /*
424  * This function is called to handle restarter events coming in from the
425  * master restarter. It is registered with the master restarter via
426  * restarter_bind_handle() and simply passes a pointer to the event down
427  * the event pipe, which will be discovered by the poll in the event loop
428  * and processed there. It waits for an acknowledgement to be written back down
429  * the pipe before returning.
430  * Writing a pointer to the function's 'event' parameter down the pipe will
431  * be safe, as the thread in restarter_event_proxy() doesn't return until
432  * the main thread has finished its processing of the passed event, thus
433  * the referenced event will remain around until the function returns.
434  * To impose the limit of only one event being in the pipe and processed
435  * at once, a lock is taken on entry to this function and returned on exit.
436  * Always returns 0.
437  */
438 static int
439 restarter_event_proxy(restarter_event_t *event)
440 {
441 	restarter_event_type_t  ev_type;
442 	boolean_t		processed;
443 
444 	debug_msg("Entering restarter_event_proxy");
445 	ev_type = restarter_event_get_type(event);
446 	debug_msg("event: %x, event type: %d", event, ev_type);
447 
448 	(void) pthread_mutex_lock(&rst_event_pipe_mtx);
449 
450 	/* write the event to the main worker thread down the pipe */
451 	if (safe_write(rst_event_pipe[PE_PRODUCER], &event,
452 	    sizeof (event)) != 0)
453 		goto pipe_error;
454 
455 	/*
456 	 * Wait for an acknowledgement that the event has been processed from
457 	 * the same pipe. In the case that inetd is stopping, any thread in
458 	 * this function will simply block on this read until inetd eventually
459 	 * exits. This will result in this function not returning success to
460 	 * its caller, and the event that was being processed when the
461 	 * function exited will be re-sent when inetd is next started.
462 	 */
463 	if (safe_read(rst_event_pipe[PE_PRODUCER], &processed,
464 	    sizeof (processed)) != 0)
465 		goto pipe_error;
466 
467 	(void) pthread_mutex_unlock(&rst_event_pipe_mtx);
468 
469 	return (processed ? 0 : EAGAIN);
470 
471 pipe_error:
472 	/*
473 	 * Something's seriously wrong with the event pipe. Notify the
474 	 * worker thread by closing this end of the event pipe and pause till
475 	 * inetd exits.
476 	 */
477 	error_msg(gettext("Can't process restarter events: %s"),
478 	    strerror(errno));
479 	(void) close(rst_event_pipe[PE_PRODUCER]);
480 	for (;;)
481 		(void) pause();
482 
483 	/* NOTREACHED */
484 }
485 
486 /*
487  * Let restarter_event_proxy() know we're finished with the event it's blocked
488  * upon. The 'processed' argument denotes whether we successfully processed the
489  * event.
490  */
491 static void
492 ack_restarter_event(boolean_t processed)
493 {
494 	debug_msg("Entering ack_restarter_event");
495 
496 	/*
497 	 * If safe_write returns -1 something's seriously wrong with the event
498 	 * pipe, so start the shutdown proceedings.
499 	 */
500 	if (safe_write(rst_event_pipe[PE_CONSUMER], &processed,
501 	    sizeof (processed)) == -1)
502 		inetd_stop();
503 }
504 
505 /*
506  * Switch the syslog identification string to 'ident'.
507  */
508 static void
509 change_syslog_ident(const char *ident)
510 {
511 	debug_msg("Entering change_syslog_ident: ident: %s", ident);
512 
513 	closelog();
514 	openlog(ident, LOG_PID|LOG_CONS, LOG_DAEMON);
515 }
516 
517 /*
518  * Perform TCP wrappers checks on this instance. Due to the fact that the
519  * current wrappers code used in Solaris is taken untouched from the open
520  * source version, we're stuck with using the daemon name for the checks, as
521  * opposed to making use of instance FMRIs. Sigh.
522  * Returns B_TRUE if the check passed, else B_FALSE.
523  */
524 static boolean_t
525 tcp_wrappers_ok(instance_t *instance)
526 {
527 	boolean_t		rval = B_TRUE;
528 	char			*daemon_name;
529 	basic_cfg_t		*cfg = instance->config->basic;
530 	struct request_info	req;
531 
532 	debug_msg("Entering tcp_wrappers_ok, instance: %s", instance->fmri);
533 
534 	/*
535 	 * Wrap the service using libwrap functions. The code below implements
536 	 * the functionality of tcpd. This is done only for stream,nowait
537 	 * services, following the convention of other vendors.  udp/dgram and
538 	 * stream/wait can NOT be wrapped with this libwrap, so be wary of
539 	 * changing the test below.
540 	 */
541 	if (cfg->do_tcp_wrappers && !cfg->iswait && !cfg->istlx) {
542 
543 		daemon_name = instance->config->methods[
544 		    IM_START]->exec_args_we.we_wordv[0];
545 		if (*daemon_name == '/')
546 			daemon_name = strrchr(daemon_name, '/') + 1;
547 
548 		/*
549 		 * Change the syslog message identity to the name of the
550 		 * daemon being wrapped, as opposed to "inetd".
551 		 */
552 		change_syslog_ident(daemon_name);
553 
554 		(void) request_init(&req, RQ_DAEMON, daemon_name, RQ_FILE,
555 		    instance->conn_fd, NULL);
556 		fromhost(&req);
557 
558 		if (strcasecmp(eval_hostname(req.client), paranoid) == 0) {
559 			syslog(deny_severity,
560 			    "refused connect from %s (name/address mismatch)",
561 			    eval_client(&req));
562 			if (req.sink != NULL)
563 				req.sink(instance->conn_fd);
564 			rval = B_FALSE;
565 		} else if (!hosts_access(&req)) {
566 			syslog(deny_severity,
567 			    "refused connect from %s (access denied)",
568 			    eval_client(&req));
569 			if (req.sink != NULL)
570 				req.sink(instance->conn_fd);
571 			rval = B_FALSE;
572 		} else {
573 			syslog(allow_severity, "connect from %s",
574 			    eval_client(&req));
575 		}
576 
577 		/* Revert syslog identity back to "inetd". */
578 		change_syslog_ident(SYSLOG_IDENT);
579 	}
580 	return (rval);
581 }
582 
583 /*
584  * Handler registered with the timer queue code to remove an instance from
585  * the connection rate offline state when it has been there for its allotted
586  * time.
587  */
588 /* ARGSUSED */
589 static void
590 conn_rate_online(iu_tq_t *tq, void *arg)
591 {
592 	instance_t *instance = arg;
593 
594 	debug_msg("Entering conn_rate_online, instance: %s",
595 	    instance->fmri);
596 
597 	assert(instance->cur_istate == IIS_OFFLINE_CONRATE);
598 	instance->timer_id = -1;
599 	update_state(instance, IIS_OFFLINE, RERR_RESTART);
600 	process_offline_inst(instance);
601 }
602 
603 /*
604  * Check whether this instance in the offline state is in transition to
605  * another state and do the work to continue this transition.
606  */
607 void
608 process_offline_inst(instance_t *inst)
609 {
610 	debug_msg("Entering process_offline_inst");
611 
612 	if (inst->disable_req) {
613 		inst->disable_req = B_FALSE;
614 		(void) run_method(inst, IM_DISABLE, NULL);
615 	} else if (inst->maintenance_req) {
616 		inst->maintenance_req = B_FALSE;
617 		update_state(inst, IIS_MAINTENANCE, RERR_RESTART);
618 	/*
619 	 * If inetd is in the process of stopping, we don't want to enter
620 	 * any states but offline, disabled and maintenance.
621 	 */
622 	} else if (!inetd_stopping) {
623 		if (inst->conn_rate_exceeded) {
624 			basic_cfg_t *cfg = inst->config->basic;
625 
626 			inst->conn_rate_exceeded = B_FALSE;
627 			update_state(inst, IIS_OFFLINE_CONRATE, RERR_RESTART);
628 			/*
629 			 * Schedule a timer to bring the instance out of the
630 			 * connection rate offline state.
631 			 */
632 			inst->timer_id = iu_schedule_timer(timer_queue,
633 			    cfg->conn_rate_offline, conn_rate_online,
634 			    inst);
635 			if (inst->timer_id == -1) {
636 				error_msg(gettext("%s unable to set timer, "
637 				    "won't be brought on line after %d "
638 				    "seconds."), inst->fmri,
639 				    cfg->conn_rate_offline);
640 			}
641 
642 		} else if (copies_limit_exceeded(inst)) {
643 			update_state(inst, IIS_OFFLINE_COPIES, RERR_RESTART);
644 		}
645 	}
646 }
647 
648 /*
649  * Create a socket bound to the instance's configured address. If the
650  * bind fails, returns -1, else the fd of the bound socket.
651  */
652 static int
653 create_bound_socket(const char *fmri, socket_info_t *sock_info)
654 {
655 	int		fd;
656 	int		on = 1;
657 	rpc_info_t	*rpc = sock_info->pr_info.ri;
658 	const char	*proto = sock_info->pr_info.proto;
659 
660 	debug_msg("Entering create_bound_socket");
661 
662 	fd = socket(sock_info->local_addr.ss_family, sock_info->type,
663 	    sock_info->protocol);
664 	if (fd < 0) {
665 		error_msg(gettext(
666 		    "Socket creation failure for instance %s, proto %s: %s"),
667 		    fmri, proto, strerror(errno));
668 		return (-1);
669 	}
670 
671 	if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &on, sizeof (on)) == -1) {
672 		error_msg(gettext("setsockopt SO_REUSEADDR failed for service "
673 		    "instance %s, proto %s: %s"), fmri, proto, strerror(errno));
674 		(void) close(fd);
675 		return (-1);
676 	}
677 	if (sock_info->pr_info.v6only) {
678 		/* restrict socket to IPv6 communications only */
679 		if (setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &on,
680 		    sizeof (on)) == -1) {
681 			error_msg(gettext("setsockopt IPV6_V6ONLY failed for "
682 			    "service instance %s, proto %s: %s"), fmri, proto,
683 			    strerror(errno));
684 			(void) close(fd);
685 			return (-1);
686 		}
687 	}
688 
689 	if (rpc != NULL)
690 		SS_SETPORT(sock_info->local_addr, 0);
691 
692 	if (bind(fd, (struct sockaddr *)&(sock_info->local_addr),
693 	    SS_ADDRLEN(sock_info->local_addr)) < 0) {
694 		error_msg(gettext(
695 		    "Failed to bind to the port of service instance %s, "
696 		    "proto %s: %s"), fmri, proto, strerror(errno));
697 		(void) close(fd);
698 		return (-1);
699 	}
700 
701 	/*
702 	 * Retrieve and store the address bound to for RPC services.
703 	 */
704 	if (rpc != NULL) {
705 		struct sockaddr_storage	ss;
706 		int			ss_size = sizeof (ss);
707 
708 		if (getsockname(fd, (struct sockaddr *)&ss, &ss_size) < 0) {
709 			error_msg(gettext("Failed getsockname for instance %s, "
710 			    "proto %s: %s"), fmri, proto, strerror(errno));
711 			(void) close(fd);
712 			return (-1);
713 		}
714 		(void) memcpy(rpc->netbuf.buf, &ss,
715 		    sizeof (struct sockaddr_storage));
716 		rpc->netbuf.len = SS_ADDRLEN(ss);
717 		rpc->netbuf.maxlen = SS_ADDRLEN(ss);
718 	}
719 
720 	if (sock_info->type == SOCK_STREAM)
721 		(void) listen(fd, CONNECTION_BACKLOG);
722 
723 	return (fd);
724 }
725 
726 /*
727  * Handler registered with the timer queue code to retry the creation
728  * of a bound fd.
729  */
730 /* ARGSUSED */
731 static void
732 retry_bind(iu_tq_t *tq, void *arg)
733 {
734 	instance_t *instance = arg;
735 
736 	debug_msg("Entering retry_bind, instance: %s", instance->fmri);
737 
738 	switch (instance->cur_istate) {
739 	case IIS_OFFLINE_BIND:
740 	case IIS_ONLINE:
741 	case IIS_DEGRADED:
742 	case IIS_IN_ONLINE_METHOD:
743 	case IIS_IN_REFRESH_METHOD:
744 		break;
745 	default:
746 #ifndef NDEBUG
747 		(void) fprintf(stderr, "%s:%d: Unknown instance state %d.\n",
748 		    __FILE__, __LINE__, instance->cur_istate);
749 #endif
750 		abort();
751 	}
752 
753 	instance->bind_timer_id = -1;
754 	create_bound_fds(instance);
755 }
756 
757 /*
758  * For each of the fds for the given instance that are bound, if 'listen' is
759  * set add them to the poll set, else remove them from it. If any additions
760  * fail, returns -1, else 0 on success.
761  */
762 int
763 poll_bound_fds(instance_t *instance, boolean_t listen)
764 {
765 	basic_cfg_t	*cfg = instance->config->basic;
766 	proto_info_t	*pi;
767 	int		ret = 0;
768 
769 	debug_msg("Entering poll_bound_fds: instance: %s, on: %d",
770 	    instance->fmri, listen);
771 
772 	for (pi = uu_list_first(cfg->proto_list); pi != NULL;
773 	    pi = uu_list_next(cfg->proto_list, pi)) {
774 		if (pi->listen_fd != -1) {	/* fd bound */
775 			if (!listen) {
776 				clear_pollfd(pi->listen_fd);
777 			} else if (set_pollfd(pi->listen_fd, POLLIN) == -1) {
778 				ret = -1;
779 			}
780 		}
781 	}
782 
783 	return (ret);
784 }
785 
786 /*
787  * Handle the case were we either fail to create a bound fd or we fail
788  * to add a bound fd to the poll set for the given instance.
789  */
790 static void
791 handle_bind_failure(instance_t *instance)
792 {
793 	basic_cfg_t *cfg = instance->config->basic;
794 
795 	debug_msg("Entering handle_bind_failure: instance: %s", instance);
796 
797 	/*
798 	 * We must be being called as a result of a failed poll_bound_fds()
799 	 * as a bind retry is already scheduled. Just return and let it do
800 	 * the work.
801 	 */
802 	if (instance->bind_timer_id != -1)
803 		return;
804 
805 	/*
806 	 * Check if the rebind retries limit is operative and if so,
807 	 * if it has been reached.
808 	 */
809 	if (((cfg->bind_fail_interval <= 0) ||		/* no retries */
810 	    ((cfg->bind_fail_max >= 0) &&		/* limit reached */
811 	    (++instance->bind_fail_count > cfg->bind_fail_max))) ||
812 	    ((instance->bind_timer_id = iu_schedule_timer(timer_queue,
813 	    cfg->bind_fail_interval, retry_bind, instance)) == -1)) {
814 		proto_info_t *pi;
815 
816 		instance->bind_fail_count = 0;
817 
818 		switch (instance->cur_istate) {
819 		case IIS_DEGRADED:
820 		case IIS_ONLINE:
821 			/* check if any of the fds are being poll'd upon */
822 			for (pi = uu_list_first(cfg->proto_list); pi != NULL;
823 			    pi = uu_list_next(cfg->proto_list, pi)) {
824 				if ((pi->listen_fd != -1) &&
825 				    (find_pollfd(pi->listen_fd) != NULL))
826 					break;
827 			}
828 			if (pi != NULL)	{	/* polling on > 0 fds */
829 				warn_msg(gettext("Failed to bind on "
830 				    "all protocols for instance %s, "
831 				    "transitioning to degraded"),
832 				    instance->fmri);
833 				update_state(instance, IIS_DEGRADED, RERR_NONE);
834 				instance->bind_retries_exceeded = B_TRUE;
835 				break;
836 			}
837 
838 			destroy_bound_fds(instance);
839 			/*
840 			 * In the case we failed the 'bind' because set_pollfd()
841 			 * failed on all bound fds, use the offline handling.
842 			 */
843 			/* FALLTHROUGH */
844 		case IIS_OFFLINE:
845 		case IIS_OFFLINE_BIND:
846 			error_msg(gettext("Too many bind failures for instance "
847 			"%s, transitioning to maintenance"), instance->fmri);
848 			update_state(instance, IIS_MAINTENANCE,
849 			    RERR_FAULT);
850 			break;
851 		case IIS_IN_ONLINE_METHOD:
852 		case IIS_IN_REFRESH_METHOD:
853 			warn_msg(gettext("Failed to bind on all "
854 			    "protocols for instance %s, instance will go to "
855 			    "degraded"), instance->fmri);
856 			/*
857 			 * Set the retries exceeded flag so when the method
858 			 * completes the instance goes to the degraded state.
859 			 */
860 			instance->bind_retries_exceeded = B_TRUE;
861 			break;
862 		default:
863 #ifndef NDEBUG
864 			(void) fprintf(stderr,
865 			    "%s:%d: Unknown instance state %d.\n",
866 			    __FILE__, __LINE__, instance->cur_istate);
867 #endif
868 			abort();
869 		}
870 	} else if (instance->cur_istate == IIS_OFFLINE) {
871 		/*
872 		 * bind re-scheduled, so if we're offline reflect this in the
873 		 * state.
874 		 */
875 		update_state(instance, IIS_OFFLINE_BIND, RERR_NONE);
876 	}
877 }
878 
879 
880 /*
881  * Check if two transport protocols for RPC conflict.
882  */
883 
884 boolean_t
885 is_rpc_proto_conflict(const char *proto0, const char *proto1) {
886 	if (strcmp(proto0, "tcp") == 0) {
887 		if (strcmp(proto1, "tcp") == 0)
888 			return (B_TRUE);
889 		if (strcmp(proto1, "tcp6") == 0)
890 			return (B_TRUE);
891 		return (B_FALSE);
892 	}
893 
894 	if (strcmp(proto0, "tcp6") == 0) {
895 		if (strcmp(proto1, "tcp") == 0)
896 			return (B_TRUE);
897 		if (strcmp(proto1, "tcp6only") == 0)
898 			return (B_TRUE);
899 		if (strcmp(proto1, "tcp6") == 0)
900 			return (B_TRUE);
901 		return (B_FALSE);
902 	}
903 
904 	if (strcmp(proto0, "tcp6only") == 0) {
905 		if (strcmp(proto1, "tcp6only") == 0)
906 			return (B_TRUE);
907 		if (strcmp(proto1, "tcp6") == 0)
908 			return (B_TRUE);
909 		return (B_FALSE);
910 	}
911 
912 	if (strcmp(proto0, "udp") == 0) {
913 		if (strcmp(proto1, "udp") == 0)
914 			return (B_TRUE);
915 		if (strcmp(proto1, "udp6") == 0)
916 			return (B_TRUE);
917 		return (B_FALSE);
918 	}
919 
920 	if (strcmp(proto0, "udp6") == 0) {
921 
922 		if (strcmp(proto1, "udp") == 0)
923 			return (B_TRUE);
924 		if (strcmp(proto1, "udp6only") == 0)
925 			return (B_TRUE);
926 		if (strcmp(proto1, "udp6") == 0)
927 			return (B_TRUE);
928 		return (B_FALSE);
929 	}
930 
931 	if (strcmp(proto0, "udp6only") == 0) {
932 
933 		if (strcmp(proto1, "udp6only") == 0)
934 			return (B_TRUE);
935 		if (strcmp(proto1, "udp6") == 0)
936 			return (B_TRUE);
937 		return (0);
938 	}
939 
940 	/*
941 	 * If the protocol isn't TCP/IP or UDP/IP assume that it has its own
942 	 * port namepsace and that conflicts can be detected by literal string
943 	 * comparison.
944 	 */
945 
946 	if (strcmp(proto0, proto1))
947 		return (FALSE);
948 
949 	return (B_TRUE);
950 }
951 
952 
953 /*
954  * Check if inetd thinks this RPC program number is already registered.
955  *
956  * An RPC protocol conflict occurs if
957  * 	a) the program numbers are the same and,
958  * 	b) the version numbers overlap,
959  * 	c) the protocols (TCP vs UDP vs tic*) are the same.
960  */
961 
962 boolean_t
963 is_rpc_num_in_use(int rpc_n, char *proto, int lowver, int highver) {
964 	instance_t *i;
965 	basic_cfg_t *cfg;
966 	proto_info_t *pi;
967 
968 	for (i = uu_list_first(instance_list); i != NULL;
969 	    i = uu_list_next(instance_list, i)) {
970 
971 		if (i->cur_istate != IIS_ONLINE)
972 			continue;
973 		cfg = i->config->basic;
974 
975 		for (pi = uu_list_first(cfg->proto_list); pi != NULL;
976 		    pi = uu_list_next(cfg->proto_list, pi)) {
977 
978 			if (pi->ri == NULL)
979 				continue;
980 			if (pi->ri->prognum != rpc_n)
981 				continue;
982 			if (!is_rpc_proto_conflict(pi->proto, proto))
983 				continue;
984 			if ((lowver < pi->ri->lowver &&
985 			    highver < pi->ri->lowver) ||
986 			    (lowver > pi->ri->highver &&
987 			    highver > pi->ri->highver))
988 				continue;
989 			return (B_TRUE);
990 		}
991 	}
992 	return (B_FALSE);
993 }
994 
995 
996 /*
997  * Independent of the transport, for each of the entries in the instance's
998  * proto list this function first attempts to create an associated network fd;
999  * for RPC services these are then bound to a kernel chosen port and the
1000  * fd is registered with rpcbind; for non-RPC services the fds are bound
1001  * to the port associated with the instance's service name. On any successful
1002  * binds the instance is taken online. Failed binds are handled by
1003  * handle_bind_failure().
1004  */
1005 void
1006 create_bound_fds(instance_t *instance)
1007 {
1008 	basic_cfg_t	*cfg = instance->config->basic;
1009 	boolean_t	failure = B_FALSE;
1010 	boolean_t	success = B_FALSE;
1011 	proto_info_t	*pi;
1012 
1013 	debug_msg("Entering create_bound_fd: instance: %s", instance->fmri);
1014 
1015 	/*
1016 	 * Loop through and try and bind any unbound protos.
1017 	 */
1018 	for (pi = uu_list_first(cfg->proto_list); pi != NULL;
1019 	    pi = uu_list_next(cfg->proto_list, pi)) {
1020 		if (pi->listen_fd != -1)
1021 			continue;
1022 		if (cfg->istlx) {
1023 			pi->listen_fd = create_bound_endpoint(instance->fmri,
1024 			    (tlx_info_t *)pi);
1025 		} else {
1026 			/*
1027 			 * We cast pi to a void so we can then go on to cast
1028 			 * it to a socket_info_t without lint complaining
1029 			 * about alignment. This is done because the x86
1030 			 * version of lint thinks a lint suppression directive
1031 			 * is unnecessary and flags it as such, yet the sparc
1032 			 * version complains if it's absent.
1033 			 */
1034 			void *p = pi;
1035 			pi->listen_fd = create_bound_socket(instance->fmri,
1036 			    (socket_info_t *)p);
1037 		}
1038 		if (pi->listen_fd == -1) {
1039 			failure = B_TRUE;
1040 			continue;
1041 		}
1042 
1043 		if (pi->ri != NULL) {
1044 
1045 			/*
1046 			 * Don't register the same RPC program number twice.
1047 			 * Doing so silently discards the old service
1048 			 * without causing an error.
1049 			 */
1050 			if (is_rpc_num_in_use(pi->ri->prognum, pi->proto,
1051 				pi->ri->lowver, pi->ri->highver)) {
1052 				failure = B_TRUE;
1053 				close_net_fd(instance, pi->listen_fd);
1054 				pi->listen_fd = -1;
1055 				continue;
1056 			}
1057 
1058 			unregister_rpc_service(instance->fmri, pi->ri);
1059 			if (register_rpc_service(instance->fmri, pi->ri) ==
1060 			    -1) {
1061 				close_net_fd(instance, pi->listen_fd);
1062 				pi->listen_fd = -1;
1063 				failure = B_TRUE;
1064 				continue;
1065 			}
1066 		}
1067 
1068 		success = B_TRUE;
1069 	}
1070 
1071 	switch (instance->cur_istate) {
1072 	case IIS_OFFLINE:
1073 	case IIS_OFFLINE_BIND:
1074 		/*
1075 		 * If we've managed to bind at least one proto lets run the
1076 		 * online method, so we can start listening for it.
1077 		 */
1078 		if (success && run_method(instance, IM_ONLINE, NULL) == -1)
1079 			return;	/* instance gone to maintenance */
1080 		break;
1081 	case IIS_ONLINE:
1082 	case IIS_IN_REFRESH_METHOD:
1083 		/*
1084 		 * We're 'online', so start polling on any bound fds we're
1085 		 * currently not.
1086 		 */
1087 		if (poll_bound_fds(instance, B_TRUE) != 0) {
1088 			failure = B_TRUE;
1089 		} else if (!failure) {
1090 			/*
1091 			 * We've successfully bound and poll'd upon all protos,
1092 			 * so reset the failure count.
1093 			 */
1094 			instance->bind_fail_count = 0;
1095 		}
1096 		break;
1097 	case IIS_IN_ONLINE_METHOD:
1098 		/*
1099 		 * Nothing to do here as the method completion code will start
1100 		 * listening for any successfully bound fds.
1101 		 */
1102 		break;
1103 	default:
1104 #ifndef NDEBUG
1105 		(void) fprintf(stderr, "%s:%d: Unknown instance state %d.\n",
1106 		    __FILE__, __LINE__, instance->cur_istate);
1107 #endif
1108 		abort();
1109 	}
1110 
1111 	if (failure)
1112 		handle_bind_failure(instance);
1113 }
1114 
1115 /*
1116  * Counter to create_bound_fds(), for each of the bound network fds this
1117  * function unregisters the instance from rpcbind if it's an RPC service,
1118  * stops listening for new connections for it and then closes the listening fd.
1119  */
1120 static void
1121 destroy_bound_fds(instance_t *instance)
1122 {
1123 	basic_cfg_t	*cfg = instance->config->basic;
1124 	proto_info_t	*pi;
1125 
1126 	debug_msg("Entering destroy_bound_fds: instance: %s", instance->fmri);
1127 
1128 	for (pi = uu_list_first(cfg->proto_list); pi != NULL;
1129 	    pi = uu_list_next(cfg->proto_list, pi)) {
1130 		if (pi->listen_fd != -1) {
1131 			if (pi->ri != NULL)
1132 				unregister_rpc_service(instance->fmri, pi->ri);
1133 			clear_pollfd(pi->listen_fd);
1134 			close_net_fd(instance, pi->listen_fd);
1135 			pi->listen_fd = -1;
1136 		}
1137 	}
1138 
1139 	/* cancel any bind retries */
1140 	if (instance->bind_timer_id != -1)
1141 		cancel_bind_timer(instance);
1142 
1143 	instance->bind_retries_exceeded = B_FALSE;
1144 }
1145 
1146 /*
1147  * Perform %A address expansion and return a pointer to a static string
1148  * array containing crafted arguments. This expansion is provided for
1149  * compatibility with 4.2BSD daemons, and as such we've copied the logic of
1150  * the legacy inetd to maintain this compatibility as much as possible. This
1151  * logic is a bit scatty, but it dates back at least as far as SunOS 4.x.
1152  */
1153 static char **
1154 expand_address(instance_t *inst, const proto_info_t *pi)
1155 {
1156 	static char	addrbuf[sizeof ("ffffffff.65536")];
1157 	static char	*ret[3];
1158 	instance_cfg_t	*cfg = inst->config;
1159 	/*
1160 	 * We cast pi to a void so we can then go on to cast it to a
1161 	 * socket_info_t without lint complaining about alignment. This
1162 	 * is done because the x86 version of lint thinks a lint suppression
1163 	 * directive is unnecessary and flags it as such, yet the sparc
1164 	 * version complains if it's absent.
1165 	 */
1166 	const void	*p = pi;
1167 
1168 	debug_msg("Entering expand_address");
1169 
1170 	/* set ret[0] to the basename of exec path */
1171 	if ((ret[0] = strrchr(cfg->methods[IM_START]->exec_path, '/'))
1172 	    != NULL) {
1173 		ret[0]++;
1174 	} else {
1175 		ret[0] = cfg->methods[IM_START]->exec_path;
1176 	}
1177 
1178 	if (!cfg->basic->istlx &&
1179 	    (((socket_info_t *)p)->type == SOCK_DGRAM)) {
1180 		ret[1] = NULL;
1181 	} else {
1182 		addrbuf[0] = '\0';
1183 		if (!cfg->basic->iswait &&
1184 		    (inst->remote_addr.ss_family == AF_INET)) {
1185 			struct sockaddr_in *sp;
1186 
1187 			sp = (struct sockaddr_in *)&(inst->remote_addr);
1188 			(void) snprintf(addrbuf, sizeof (addrbuf), "%x.%hu",
1189 			    ntohl(sp->sin_addr.s_addr), ntohs(sp->sin_port));
1190 		}
1191 		ret[1] = addrbuf;
1192 		ret[2] = NULL;
1193 	}
1194 
1195 	return (ret);
1196 }
1197 
1198 /*
1199  * Returns the state associated with the supplied method being run for an
1200  * instance.
1201  */
1202 static internal_inst_state_t
1203 get_method_state(instance_method_t method)
1204 {
1205 	state_info_t *sip;
1206 
1207 	for (sip = states; sip->istate != IIS_NONE; sip++) {
1208 		if (sip->method_running == method)
1209 			break;
1210 	}
1211 	assert(sip->istate != IIS_NONE);
1212 
1213 	return (sip->istate);
1214 }
1215 
1216 /*
1217  * Store the method's PID and CID in the repository. If the store fails
1218  * we ignore it and just drive on.
1219  */
1220 static void
1221 add_method_ids(instance_t *ins, pid_t pid, ctid_t cid, instance_method_t mthd)
1222 {
1223 	debug_msg("Entering add_method_ids");
1224 
1225 	if (cid != -1)
1226 		(void) add_remove_contract(ins, B_TRUE, cid);
1227 
1228 	if (mthd == IM_START) {
1229 		if (add_rep_val(ins->start_pids, (int64_t)pid) == 0) {
1230 			(void) store_rep_vals(ins->start_pids, ins->fmri,
1231 			    PR_NAME_START_PIDS);
1232 		}
1233 	} else {
1234 		if (add_rep_val(ins->non_start_pid, (int64_t)pid) == 0) {
1235 			(void) store_rep_vals(ins->non_start_pid, ins->fmri,
1236 			    PR_NAME_NON_START_PID);
1237 		}
1238 	}
1239 }
1240 
1241 /*
1242  * Remove the method's PID and CID from the repository. If the removal
1243  * fails we ignore it and drive on.
1244  */
1245 void
1246 remove_method_ids(instance_t *inst, pid_t pid, ctid_t cid,
1247     instance_method_t mthd)
1248 {
1249 	debug_msg("Entering remove_method_ids");
1250 
1251 	if (cid != -1)
1252 		(void) add_remove_contract(inst, B_FALSE, cid);
1253 
1254 	if (mthd == IM_START) {
1255 		remove_rep_val(inst->start_pids, (int64_t)pid);
1256 		(void) store_rep_vals(inst->start_pids, inst->fmri,
1257 		    PR_NAME_START_PIDS);
1258 	} else {
1259 		remove_rep_val(inst->non_start_pid, (int64_t)pid);
1260 		(void) store_rep_vals(inst->non_start_pid, inst->fmri,
1261 		    PR_NAME_NON_START_PID);
1262 	}
1263 }
1264 
1265 static instance_t *
1266 create_instance(const char *fmri)
1267 {
1268 	instance_t *ret;
1269 
1270 	debug_msg("Entering create_instance, instance: %s", fmri);
1271 
1272 	if (((ret = calloc(1, sizeof (instance_t))) == NULL) ||
1273 	    ((ret->fmri = strdup(fmri)) == NULL))
1274 		goto alloc_fail;
1275 
1276 	ret->conn_fd = -1;
1277 
1278 	ret->copies = 0;
1279 
1280 	ret->conn_rate_count = 0;
1281 	ret->fail_rate_count = 0;
1282 	ret->bind_fail_count = 0;
1283 
1284 	if (((ret->non_start_pid = create_rep_val_list()) == NULL) ||
1285 	    ((ret->start_pids = create_rep_val_list()) == NULL) ||
1286 	    ((ret->start_ctids = create_rep_val_list()) == NULL))
1287 		goto alloc_fail;
1288 
1289 	ret->cur_istate = IIS_NONE;
1290 	ret->next_istate = IIS_NONE;
1291 
1292 	if (((ret->cur_istate_rep = create_rep_val_list()) == NULL) ||
1293 	    ((ret->next_istate_rep = create_rep_val_list()) == NULL))
1294 		goto alloc_fail;
1295 
1296 	ret->config = NULL;
1297 	ret->new_config = NULL;
1298 
1299 	ret->timer_id = -1;
1300 	ret->bind_timer_id = -1;
1301 
1302 	ret->disable_req = B_FALSE;
1303 	ret->maintenance_req = B_FALSE;
1304 	ret->conn_rate_exceeded = B_FALSE;
1305 	ret->bind_retries_exceeded = B_FALSE;
1306 
1307 	ret->pending_rst_event = RESTARTER_EVENT_TYPE_INVALID;
1308 
1309 	return (ret);
1310 
1311 alloc_fail:
1312 	error_msg(strerror(errno));
1313 	destroy_instance(ret);
1314 	return (NULL);
1315 }
1316 
1317 static void
1318 destroy_instance(instance_t *inst)
1319 {
1320 	debug_msg("Entering destroy_instance");
1321 
1322 	if (inst == NULL)
1323 		return;
1324 
1325 	destroy_instance_cfg(inst->config);
1326 	destroy_instance_cfg(inst->new_config);
1327 
1328 	destroy_rep_val_list(inst->cur_istate_rep);
1329 	destroy_rep_val_list(inst->next_istate_rep);
1330 
1331 	destroy_rep_val_list(inst->start_pids);
1332 	destroy_rep_val_list(inst->non_start_pid);
1333 	destroy_rep_val_list(inst->start_ctids);
1334 
1335 	free(inst->fmri);
1336 
1337 	free(inst);
1338 }
1339 
1340 /*
1341  * Retrieves the current and next states internal states. Returns 0 on success,
1342  * else returns one of the following on error:
1343  * SCF_ERROR_NO_MEMORY if memory allocation failed.
1344  * SCF_ERROR_CONNECTION_BROKEN if the connection to the repository was broken.
1345  * SCF_ERROR_TYPE_MISMATCH if the property was of an unexpected type.
1346  * SCF_ERROR_NO_RESOURCES if the server doesn't have adequate resources.
1347  * SCF_ERROR_NO_SERVER if the server isn't running.
1348  */
1349 static scf_error_t
1350 retrieve_instance_state(instance_t *inst)
1351 {
1352 	scf_error_t	ret;
1353 
1354 	debug_msg("Entering retrieve_instance_state: instance: %s",
1355 	    inst->fmri);
1356 
1357 	/* retrieve internal states */
1358 	if (((ret = retrieve_rep_vals(inst->cur_istate_rep, inst->fmri,
1359 	    PR_NAME_CUR_INT_STATE)) != 0) ||
1360 	    ((ret = retrieve_rep_vals(inst->next_istate_rep, inst->fmri,
1361 	    PR_NAME_NEXT_INT_STATE)) != 0)) {
1362 		if (ret != SCF_ERROR_NOT_FOUND) {
1363 			error_msg(gettext(
1364 			    "Failed to read state of instance %s: %s"),
1365 			    inst->fmri, scf_strerror(scf_error()));
1366 			return (ret);
1367 		}
1368 
1369 		debug_msg("instance with no previous int state - "
1370 		    "setting state to uninitialized");
1371 
1372 		if ((set_single_rep_val(inst->cur_istate_rep,
1373 		    (int64_t)IIS_UNINITIALIZED) == -1) ||
1374 		    (set_single_rep_val(inst->next_istate_rep,
1375 		    (int64_t)IIS_NONE) == -1)) {
1376 			return (SCF_ERROR_NO_MEMORY);
1377 		}
1378 	}
1379 
1380 	/* update convenience states */
1381 	inst->cur_istate = get_single_rep_val(inst->cur_istate_rep);
1382 	inst->next_istate = get_single_rep_val(inst->next_istate_rep);
1383 	debug_msg("previous states: cur: %d, next: %d", inst->cur_istate,
1384 	    inst->next_istate);
1385 
1386 	return (0);
1387 }
1388 
1389 /*
1390  * Retrieve stored process ids and register each of them so we process their
1391  * termination.
1392  */
1393 static int
1394 retrieve_method_pids(instance_t *inst)
1395 {
1396 	rep_val_t	*rv;
1397 
1398 	debug_msg("Entering remove_method_pids");
1399 
1400 	switch (retrieve_rep_vals(inst->start_pids, inst->fmri,
1401 	    PR_NAME_START_PIDS)) {
1402 	case 0:
1403 		break;
1404 	case SCF_ERROR_NOT_FOUND:
1405 		return (0);
1406 	default:
1407 		error_msg(gettext("Failed to retrieve the start pids of "
1408 		    "instance %s from repository: %s"), inst->fmri,
1409 		    scf_strerror(scf_error()));
1410 		return (-1);
1411 	}
1412 
1413 	rv = uu_list_first(inst->start_pids);
1414 	while (rv != NULL) {
1415 		if (register_method(inst, (pid_t)rv->val, (ctid_t)-1,
1416 		    IM_START) == 0) {
1417 			inst->copies++;
1418 			rv = uu_list_next(inst->start_pids, rv);
1419 		} else if (errno == ENOENT) {
1420 			pid_t pid = (pid_t)rv->val;
1421 
1422 			/*
1423 			 * The process must have already terminated. Remove
1424 			 * it from the list.
1425 			 */
1426 			rv = uu_list_next(inst->start_pids, rv);
1427 			remove_rep_val(inst->start_pids, pid);
1428 		} else {
1429 			error_msg(gettext("Failed to listen for the completion "
1430 			    "of %s method of instance %s"), START_METHOD_NAME,
1431 			    inst->fmri);
1432 			rv = uu_list_next(inst->start_pids, rv);
1433 		}
1434 	}
1435 
1436 	/* synch the repository pid list to remove any terminated pids */
1437 	(void) store_rep_vals(inst->start_pids, inst->fmri, PR_NAME_START_PIDS);
1438 
1439 	return (0);
1440 }
1441 
1442 /*
1443  * Remove the passed instance from inetd control.
1444  */
1445 static void
1446 remove_instance(instance_t *instance)
1447 {
1448 	debug_msg("Entering remove_instance");
1449 
1450 	switch (instance->cur_istate) {
1451 	case IIS_ONLINE:
1452 	case IIS_DEGRADED:
1453 		/* stop listening for network connections */
1454 		destroy_bound_fds(instance);
1455 		break;
1456 	case IIS_OFFLINE_BIND:
1457 		cancel_bind_timer(instance);
1458 		break;
1459 	case IIS_OFFLINE_CONRATE:
1460 		cancel_inst_timer(instance);
1461 		break;
1462 	}
1463 
1464 	/* stop listening for terminated methods */
1465 	unregister_instance_methods(instance);
1466 
1467 	uu_list_remove(instance_list, instance);
1468 	destroy_instance(instance);
1469 }
1470 
1471 /*
1472  * Refresh the configuration of instance 'inst'. This method gets called as
1473  * a result of a refresh event for the instance from the master restarter, so
1474  * we can rely upon the instance's running snapshot having been updated from
1475  * its configuration snapshot.
1476  */
1477 void
1478 refresh_instance(instance_t *inst)
1479 {
1480 	instance_cfg_t	*cfg;
1481 
1482 	debug_msg("Entering refresh_instance: inst: %s", inst->fmri);
1483 
1484 	switch (inst->cur_istate) {
1485 	case IIS_MAINTENANCE:
1486 	case IIS_DISABLED:
1487 	case IIS_UNINITIALIZED:
1488 		/*
1489 		 * Ignore any possible changes, we'll re-read the configuration
1490 		 * automatically when we exit these states.
1491 		 */
1492 		break;
1493 
1494 	case IIS_OFFLINE_COPIES:
1495 	case IIS_OFFLINE_BIND:
1496 	case IIS_OFFLINE:
1497 	case IIS_OFFLINE_CONRATE:
1498 		destroy_instance_cfg(inst->config);
1499 		if ((inst->config = read_instance_cfg(inst->fmri)) == NULL) {
1500 			log_invalid_cfg(inst->fmri);
1501 			if (inst->cur_istate == IIS_OFFLINE_BIND) {
1502 				cancel_bind_timer(inst);
1503 			} else if (inst->cur_istate == IIS_OFFLINE_CONRATE) {
1504 				cancel_inst_timer(inst);
1505 			}
1506 			update_state(inst, IIS_MAINTENANCE, RERR_FAULT);
1507 		} else {
1508 			switch (inst->cur_istate) {
1509 			case IIS_OFFLINE_BIND:
1510 				if (copies_limit_exceeded(inst)) {
1511 					/* Cancel scheduled bind retries. */
1512 					cancel_bind_timer(inst);
1513 
1514 					/*
1515 					 * Take the instance to the copies
1516 					 * offline state, via the offline
1517 					 * state.
1518 					 */
1519 					update_state(inst, IIS_OFFLINE,
1520 					    RERR_RESTART);
1521 					process_offline_inst(inst);
1522 				}
1523 				break;
1524 
1525 			case IIS_OFFLINE:
1526 				process_offline_inst(inst);
1527 				break;
1528 
1529 			case IIS_OFFLINE_CONRATE:
1530 				/*
1531 				 * Since we're already in a DOS state,
1532 				 * don't bother evaluating the copies
1533 				 * limit. This will be evaluated when
1534 				 * we leave this state in
1535 				 * process_offline_inst().
1536 				 */
1537 				break;
1538 
1539 			case IIS_OFFLINE_COPIES:
1540 				/*
1541 				 * Check if the copies limit has been increased
1542 				 * above the current count.
1543 				 */
1544 				if (!copies_limit_exceeded(inst)) {
1545 					update_state(inst, IIS_OFFLINE,
1546 					    RERR_RESTART);
1547 					process_offline_inst(inst);
1548 				}
1549 				break;
1550 
1551 			default:
1552 				assert(0);
1553 			}
1554 		}
1555 		break;
1556 
1557 	case IIS_DEGRADED:
1558 	case IIS_ONLINE:
1559 		if ((cfg = read_instance_cfg(inst->fmri)) != NULL) {
1560 			instance_cfg_t *ocfg = inst->config;
1561 
1562 			/*
1563 			 * Try to avoid the overhead of taking an instance
1564 			 * offline and back on again. We do this by limiting
1565 			 * this behavior to two eventualities:
1566 			 * - there needs to be a re-bind to listen on behalf
1567 			 *   of the instance with its new configuration. This
1568 			 *   could be because for example its service has been
1569 			 *   associated with a different port, or because the
1570 			 *   v6only protocol option has been newly applied to
1571 			 *   the instance.
1572 			 * - one or both of the start or online methods of the
1573 			 *   instance have changed in the new configuration.
1574 			 *   Without taking the instance offline when the
1575 			 *   start method changed the instance may be running
1576 			 *   with unwanted parameters (or event an unwanted
1577 			 *   binary); and without taking the instance offline
1578 			 *   if its online method was to change, some part of
1579 			 *   its running environment may have changed and would
1580 			 *   not be picked up until the instance next goes
1581 			 *   offline for another reason.
1582 			 */
1583 			if ((!bind_config_equal(ocfg->basic, cfg->basic)) ||
1584 			    !method_info_equal(ocfg->methods[IM_ONLINE],
1585 			    cfg->methods[IM_ONLINE]) ||
1586 			    !method_info_equal(ocfg->methods[IM_START],
1587 			    cfg->methods[IM_START])) {
1588 				destroy_bound_fds(inst);
1589 
1590 				assert(inst->new_config == NULL);
1591 				inst->new_config = cfg;
1592 
1593 				(void) run_method(inst, IM_OFFLINE, NULL);
1594 			} else {	/* no bind config / method changes */
1595 
1596 				/*
1597 				 * swap the proto list over from the old
1598 				 * configuration to the new, so we retain
1599 				 * our set of network fds.
1600 				 */
1601 				destroy_proto_list(cfg->basic);
1602 				cfg->basic->proto_list =
1603 				    ocfg->basic->proto_list;
1604 				ocfg->basic->proto_list = NULL;
1605 				destroy_instance_cfg(ocfg);
1606 				inst->config = cfg;
1607 
1608 				/* re-evaluate copies limits based on new cfg */
1609 				if (copies_limit_exceeded(inst)) {
1610 					destroy_bound_fds(inst);
1611 					(void) run_method(inst, IM_OFFLINE,
1612 					    NULL);
1613 				} else {
1614 					/*
1615 					 * Since the instance isn't being
1616 					 * taken offline, where we assume it
1617 					 * would pick-up any configuration
1618 					 * changes automatically when it goes
1619 					 * back online, run its refresh method
1620 					 * to allow it to pick-up any changes
1621 					 * whilst still online.
1622 					 */
1623 					(void) run_method(inst, IM_REFRESH,
1624 					    NULL);
1625 				}
1626 			}
1627 		} else {
1628 			log_invalid_cfg(inst->fmri);
1629 
1630 			destroy_bound_fds(inst);
1631 
1632 			inst->maintenance_req = B_TRUE;
1633 			(void) run_method(inst, IM_OFFLINE, NULL);
1634 		}
1635 		break;
1636 
1637 	default:
1638 		debug_msg("Unhandled current state %d for instance in "
1639 		    "refresh_instance", inst->cur_istate);
1640 		assert(0);
1641 	}
1642 }
1643 
1644 /*
1645  * Called by process_restarter_event() to handle a restarter event for an
1646  * instance.
1647  */
1648 static void
1649 handle_restarter_event(instance_t *instance, restarter_event_type_t event,
1650     boolean_t send_ack)
1651 {
1652 	debug_msg("Entering handle_restarter_event: inst: %s, event: %d, "
1653 	    "curr state: %d", instance->fmri, event, instance->cur_istate);
1654 
1655 	switch (event) {
1656 	case RESTARTER_EVENT_TYPE_ADMIN_REFRESH:
1657 		refresh_instance(instance);
1658 		goto done;
1659 	case RESTARTER_EVENT_TYPE_REMOVE_INSTANCE:
1660 		remove_instance(instance);
1661 		goto done;
1662 	case RESTARTER_EVENT_TYPE_STOP:
1663 		switch (instance->cur_istate) {
1664 		case IIS_OFFLINE_CONRATE:
1665 		case IIS_OFFLINE_BIND:
1666 		case IIS_OFFLINE_COPIES:
1667 			/*
1668 			 * inetd must be closing down as we wouldn't get this
1669 			 * event in one of these states from the master
1670 			 * restarter. Take the instance to the offline resting
1671 			 * state.
1672 			 */
1673 			if (instance->cur_istate == IIS_OFFLINE_BIND) {
1674 				cancel_bind_timer(instance);
1675 			} else if (instance->cur_istate ==
1676 			    IIS_OFFLINE_CONRATE) {
1677 				cancel_inst_timer(instance);
1678 			}
1679 			update_state(instance, IIS_OFFLINE, RERR_RESTART);
1680 			goto done;
1681 		}
1682 		break;
1683 	case RESTARTER_EVENT_TYPE_ADMIN_RESTART:
1684 		/*
1685 		 * We've got a restart event, so if the instance is online
1686 		 * in any way initiate taking it offline, and rely upon
1687 		 * our restarter to send us an online event to bring
1688 		 * it back online.
1689 		 */
1690 		switch (instance->cur_istate) {
1691 		case IIS_ONLINE:
1692 		case IIS_DEGRADED:
1693 			destroy_bound_fds(instance);
1694 			(void) run_method(instance, IM_OFFLINE, NULL);
1695 		}
1696 		goto done;
1697 	}
1698 
1699 	switch (instance->cur_istate) {
1700 	case IIS_OFFLINE:
1701 		switch (event) {
1702 		case RESTARTER_EVENT_TYPE_START:
1703 			/*
1704 			 * Dependencies are met, let's take the service online.
1705 			 * Only try and bind for a wait type service if
1706 			 * no process is running on its behalf. Otherwise, just
1707 			 * mark the service online and binding will be attempted
1708 			 * when the process exits.
1709 			 */
1710 			if (!(instance->config->basic->iswait &&
1711 			    (uu_list_first(instance->start_pids) != NULL))) {
1712 				create_bound_fds(instance);
1713 			} else {
1714 				update_state(instance, IIS_ONLINE, RERR_NONE);
1715 			}
1716 			break;
1717 		case RESTARTER_EVENT_TYPE_DISABLE:
1718 		case RESTARTER_EVENT_TYPE_ADMIN_DISABLE:
1719 			/*
1720 			 * The instance should be disabled, so run the
1721 			 * instance's disabled method that will do the work
1722 			 * to take it there.
1723 			 */
1724 			(void) run_method(instance, IM_DISABLE, NULL);
1725 			break;
1726 		case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON:
1727 		case RESTARTER_EVENT_TYPE_DEPENDENCY_CYCLE:
1728 		case RESTARTER_EVENT_TYPE_INVALID_DEPENDENCY:
1729 			/*
1730 			 * The master restarter has requested the instance
1731 			 * go to maintenance; since we're already offline
1732 			 * just update the state to the maintenance state.
1733 			 */
1734 			update_state(instance, IIS_MAINTENANCE, RERR_RESTART);
1735 			break;
1736 		}
1737 		break;
1738 
1739 	case IIS_OFFLINE_BIND:
1740 		switch (event) {
1741 		case RESTARTER_EVENT_TYPE_DISABLE:
1742 		case RESTARTER_EVENT_TYPE_ADMIN_DISABLE:
1743 			/*
1744 			 * The instance should be disabled. Firstly, as for
1745 			 * the above dependencies unmet comment, cancel
1746 			 * the bind retry timer and update the state to
1747 			 * offline. Then, run the disable method to do the
1748 			 * work to take the instance from offline to
1749 			 * disabled.
1750 			 */
1751 			cancel_bind_timer(instance);
1752 			update_state(instance, IIS_OFFLINE, RERR_RESTART);
1753 			(void) run_method(instance, IM_DISABLE, NULL);
1754 			break;
1755 		case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON:
1756 		case RESTARTER_EVENT_TYPE_DEPENDENCY_CYCLE:
1757 		case RESTARTER_EVENT_TYPE_INVALID_DEPENDENCY:
1758 			/*
1759 			 * The master restarter has requested the instance
1760 			 * be placed in the maintenance state. Cancel the
1761 			 * outstanding retry timer, and since we're already
1762 			 * offline, update the state to maintenance.
1763 			 */
1764 			cancel_bind_timer(instance);
1765 			update_state(instance, IIS_MAINTENANCE, RERR_RESTART);
1766 			break;
1767 		}
1768 		break;
1769 
1770 	case IIS_DEGRADED:
1771 	case IIS_ONLINE:
1772 		switch (event) {
1773 		case RESTARTER_EVENT_TYPE_DISABLE:
1774 		case RESTARTER_EVENT_TYPE_ADMIN_DISABLE:
1775 			/*
1776 			 * The instance needs to be disabled. Do the same work
1777 			 * as for the dependencies unmet event below to
1778 			 * take the instance offline.
1779 			 */
1780 			destroy_bound_fds(instance);
1781 			/*
1782 			 * Indicate that the offline method is being run
1783 			 * as part of going to the disabled state, and to
1784 			 * carry on this transition.
1785 			 */
1786 			instance->disable_req = B_TRUE;
1787 			(void) run_method(instance, IM_OFFLINE, NULL);
1788 			break;
1789 		case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON:
1790 		case RESTARTER_EVENT_TYPE_DEPENDENCY_CYCLE:
1791 		case RESTARTER_EVENT_TYPE_INVALID_DEPENDENCY:
1792 			/*
1793 			 * The master restarter has requested the instance be
1794 			 * placed in the maintenance state. This involves
1795 			 * firstly taking the service offline, so do the
1796 			 * same work as for the dependencies unmet event
1797 			 * below. We set the maintenance_req flag to
1798 			 * indicate that when we get to the offline state
1799 			 * we should be placed directly into the maintenance
1800 			 * state.
1801 			 */
1802 			instance->maintenance_req = B_TRUE;
1803 			/* FALLTHROUGH */
1804 		case RESTARTER_EVENT_TYPE_STOP:
1805 			/*
1806 			 * Dependencies have become unmet. Close and
1807 			 * stop listening on the instance's network file
1808 			 * descriptor, and run the offline method to do
1809 			 * any work required to take us to the offline state.
1810 			 */
1811 			destroy_bound_fds(instance);
1812 			(void) run_method(instance, IM_OFFLINE, NULL);
1813 		}
1814 		break;
1815 
1816 	case IIS_UNINITIALIZED:
1817 		if (event == RESTARTER_EVENT_TYPE_DISABLE ||
1818 		    event == RESTARTER_EVENT_TYPE_ADMIN_DISABLE) {
1819 			update_state(instance, IIS_DISABLED, RERR_NONE);
1820 			break;
1821 		} else if (event != RESTARTER_EVENT_TYPE_ENABLE) {
1822 			/*
1823 			 * Ignore other events until we know whether we're
1824 			 * enabled or not.
1825 			 */
1826 			break;
1827 		}
1828 
1829 		/*
1830 		 * We've got an enabled event; make use of the handling in the
1831 		 * disable case.
1832 		 */
1833 		/* FALLTHROUGH */
1834 
1835 	case IIS_DISABLED:
1836 		switch (event) {
1837 		case RESTARTER_EVENT_TYPE_ENABLE:
1838 			/*
1839 			 * The instance needs enabling. Commence reading its
1840 			 * configuration and if successful place the instance
1841 			 * in the offline state and let process_offline_inst()
1842 			 * take it from there.
1843 			 */
1844 			destroy_instance_cfg(instance->config);
1845 			instance->config = read_instance_cfg(instance->fmri);
1846 			if (instance->config != NULL) {
1847 				update_state(instance, IIS_OFFLINE,
1848 				    RERR_RESTART);
1849 				process_offline_inst(instance);
1850 			} else {
1851 				log_invalid_cfg(instance->fmri);
1852 				update_state(instance, IIS_MAINTENANCE,
1853 				    RERR_RESTART);
1854 			}
1855 
1856 			break;
1857 		case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON:
1858 		case RESTARTER_EVENT_TYPE_DEPENDENCY_CYCLE:
1859 		case RESTARTER_EVENT_TYPE_INVALID_DEPENDENCY:
1860 			/*
1861 			 * The master restarter has requested the instance be
1862 			 * placed in the maintenance state, so just update its
1863 			 * state to maintenance.
1864 			 */
1865 			update_state(instance, IIS_MAINTENANCE, RERR_RESTART);
1866 			break;
1867 		}
1868 		break;
1869 
1870 	case IIS_MAINTENANCE:
1871 		switch (event) {
1872 		case RESTARTER_EVENT_TYPE_ADMIN_MAINT_OFF:
1873 		case RESTARTER_EVENT_TYPE_ADMIN_DISABLE:
1874 			/*
1875 			 * The master restarter has requested that the instance
1876 			 * be taken out of maintenance. Read its configuration,
1877 			 * and if successful place the instance in the offline
1878 			 * state and call process_offline_inst() to take it
1879 			 * from there.
1880 			 */
1881 			destroy_instance_cfg(instance->config);
1882 			instance->config = read_instance_cfg(instance->fmri);
1883 			if (instance->config != NULL) {
1884 				update_state(instance, IIS_OFFLINE,
1885 				    RERR_RESTART);
1886 				process_offline_inst(instance);
1887 			} else {
1888 				boolean_t enabled;
1889 
1890 				/*
1891 				 * The configuration was invalid. If the
1892 				 * service has disabled requested, let's
1893 				 * just place the instance in disabled even
1894 				 * though we haven't been able to run its
1895 				 * disable method, as the slightly incorrect
1896 				 * state is likely to be less of an issue to
1897 				 * an administrator than refusing to move an
1898 				 * instance to disabled. If disable isn't
1899 				 * requested, re-mark the service's state
1900 				 * as maintenance, so the administrator can
1901 				 * see the request was processed.
1902 				 */
1903 				if ((read_enable_merged(instance->fmri,
1904 				    &enabled) == 0) && !enabled) {
1905 					update_state(instance, IIS_DISABLED,
1906 					    RERR_RESTART);
1907 				} else {
1908 					log_invalid_cfg(instance->fmri);
1909 					update_state(instance, IIS_MAINTENANCE,
1910 					    RERR_FAULT);
1911 				}
1912 			}
1913 			break;
1914 		}
1915 		break;
1916 
1917 	case IIS_OFFLINE_CONRATE:
1918 		switch (event) {
1919 		case RESTARTER_EVENT_TYPE_DISABLE:
1920 			/*
1921 			 * The instance wants disabling. Take the instance
1922 			 * offline as for the dependencies unmet event above,
1923 			 * and then from there run the disable method to do
1924 			 * the work to take the instance to the disabled state.
1925 			 */
1926 			cancel_inst_timer(instance);
1927 			update_state(instance, IIS_OFFLINE, RERR_RESTART);
1928 			(void) run_method(instance, IM_DISABLE, NULL);
1929 			break;
1930 		case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON:
1931 		case RESTARTER_EVENT_TYPE_DEPENDENCY_CYCLE:
1932 		case RESTARTER_EVENT_TYPE_INVALID_DEPENDENCY:
1933 			/*
1934 			 * The master restarter has requested the instance
1935 			 * be taken to maintenance. Cancel the timer setup
1936 			 * when we entered this state, and go directly to
1937 			 * maintenance.
1938 			 */
1939 			cancel_inst_timer(instance);
1940 			update_state(instance, IIS_MAINTENANCE, RERR_RESTART);
1941 			break;
1942 		}
1943 		break;
1944 
1945 	case IIS_OFFLINE_COPIES:
1946 		switch (event) {
1947 		case RESTARTER_EVENT_TYPE_DISABLE:
1948 			/*
1949 			 * The instance wants disabling. Update the state
1950 			 * to offline, and run the disable method to do the
1951 			 * work to take it to the disabled state.
1952 			 */
1953 			update_state(instance, IIS_OFFLINE, RERR_RESTART);
1954 			(void) run_method(instance, IM_DISABLE, NULL);
1955 			break;
1956 		case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON:
1957 		case RESTARTER_EVENT_TYPE_DEPENDENCY_CYCLE:
1958 		case RESTARTER_EVENT_TYPE_INVALID_DEPENDENCY:
1959 			/*
1960 			 * The master restarter has requested the instance be
1961 			 * placed in maintenance. Since it's already offline
1962 			 * simply update the state.
1963 			 */
1964 			update_state(instance, IIS_MAINTENANCE, RERR_RESTART);
1965 			break;
1966 		}
1967 		break;
1968 
1969 	default:
1970 		debug_msg("handle_restarter_event: instance in an "
1971 		    "unexpected state");
1972 		assert(0);
1973 	}
1974 
1975 done:
1976 	if (send_ack)
1977 		ack_restarter_event(B_TRUE);
1978 }
1979 
1980 /*
1981  * Tries to read and process an event from the event pipe. If there isn't one
1982  * or an error occurred processing the event it returns -1. Else, if the event
1983  * is for an instance we're not already managing we read its state, add it to
1984  * our list to manage, and if appropriate read its configuration. Whether it's
1985  * new to us or not, we then handle the specific event.
1986  * Returns 0 if an event was read and processed successfully, else -1.
1987  */
1988 static int
1989 process_restarter_event(void)
1990 {
1991 	char			*fmri;
1992 	size_t			fmri_size;
1993 	restarter_event_type_t  event_type;
1994 	instance_t		*instance;
1995 	restarter_event_t	*event;
1996 	ssize_t			sz;
1997 
1998 	debug_msg("Entering process_restarter_event");
1999 
2000 	/*
2001 	 * Try to read an event pointer from the event pipe.
2002 	 */
2003 	errno = 0;
2004 	switch (safe_read(rst_event_pipe[PE_CONSUMER], &event,
2005 	    sizeof (event))) {
2006 	case 0:
2007 		break;
2008 	case  1:
2009 		if (errno == EAGAIN)	/* no event to read */
2010 			return (-1);
2011 
2012 		/* other end of pipe closed */
2013 
2014 		/* FALLTHROUGH */
2015 	default:			/* unexpected read error */
2016 		/*
2017 		 * There's something wrong with the event pipe. Let's
2018 		 * shutdown and be restarted.
2019 		 */
2020 		inetd_stop();
2021 		return (-1);
2022 	}
2023 
2024 	/*
2025 	 * Check if we're currently managing the instance which the event
2026 	 * pertains to. If not, read its complete state and add it to our
2027 	 * list to manage.
2028 	 */
2029 
2030 	fmri_size = scf_limit(SCF_LIMIT_MAX_FMRI_LENGTH);
2031 	if ((fmri = malloc(fmri_size)) == NULL) {
2032 		error_msg(strerror(errno));
2033 		goto fail;
2034 	}
2035 	sz = restarter_event_get_instance(event, fmri, fmri_size);
2036 	if (sz >= fmri_size)
2037 		assert(0);
2038 
2039 	for (instance = uu_list_first(instance_list); instance != NULL;
2040 	    instance = uu_list_next(instance_list, instance)) {
2041 		if (strcmp(instance->fmri, fmri) == 0)
2042 			break;
2043 	}
2044 
2045 	if (instance == NULL) {
2046 		int err;
2047 
2048 		debug_msg("New instance to manage: %s", fmri);
2049 
2050 		if (((instance = create_instance(fmri)) == NULL) ||
2051 		    (retrieve_instance_state(instance) != 0) ||
2052 		    (retrieve_method_pids(instance) != 0)) {
2053 			destroy_instance(instance);
2054 			free(fmri);
2055 			goto fail;
2056 		}
2057 
2058 		if (((err = iterate_repository_contracts(instance, 0))
2059 		    != 0) && (err != ENOENT)) {
2060 			error_msg(gettext(
2061 			    "Failed to adopt contracts of instance %s: %s"),
2062 			    instance->fmri, strerror(err));
2063 			destroy_instance(instance);
2064 			free(fmri);
2065 			goto fail;
2066 		}
2067 
2068 		uu_list_node_init(instance, &instance->link, instance_pool);
2069 		(void) uu_list_insert_after(instance_list, NULL, instance);
2070 
2071 		/*
2072 		 * Only read configuration for instances that aren't in any of
2073 		 * the disabled, maintenance or uninitialized states, since
2074 		 * they'll read it on state exit.
2075 		 */
2076 		if ((instance->cur_istate != IIS_DISABLED) &&
2077 		    (instance->cur_istate != IIS_MAINTENANCE) &&
2078 		    (instance->cur_istate != IIS_UNINITIALIZED)) {
2079 			instance->config = read_instance_cfg(instance->fmri);
2080 			if (instance->config == NULL) {
2081 				log_invalid_cfg(instance->fmri);
2082 				update_state(instance, IIS_MAINTENANCE,
2083 				    RERR_FAULT);
2084 			}
2085 		}
2086 	}
2087 
2088 	free(fmri);
2089 
2090 	event_type = restarter_event_get_type(event);
2091 	debug_msg("Event type: %d for instance: %s", event_type,
2092 	    instance->fmri);
2093 
2094 	/*
2095 	 * If the instance is currently running a method, don't process the
2096 	 * event now, but attach it to the instance for processing when
2097 	 * the instance finishes its transition.
2098 	 */
2099 	if (INST_IN_TRANSITION(instance)) {
2100 		debug_msg("storing event %d for instance %s", event_type,
2101 		    instance->fmri);
2102 		instance->pending_rst_event = event_type;
2103 	} else {
2104 		handle_restarter_event(instance, event_type, B_TRUE);
2105 	}
2106 
2107 	return (0);
2108 
2109 fail:
2110 	ack_restarter_event(B_FALSE);
2111 	return (-1);
2112 }
2113 
2114 /*
2115  * Do the state machine processing associated with the termination of instance
2116  * 'inst''s start method.
2117  */
2118 void
2119 process_start_term(instance_t *inst)
2120 {
2121 	basic_cfg_t	*cfg;
2122 
2123 	debug_msg("Entering process_start_term: inst: %s", inst->fmri);
2124 
2125 	inst->copies--;
2126 
2127 	if ((inst->cur_istate == IIS_MAINTENANCE) ||
2128 	    (inst->cur_istate == IIS_DISABLED)) {
2129 		/* do any further processing/checks when we exit these states */
2130 		return;
2131 	}
2132 
2133 	cfg = inst->config->basic;
2134 
2135 	if (cfg->iswait) {
2136 		proto_info_t	*pi;
2137 
2138 		switch (inst->cur_istate) {
2139 		case IIS_ONLINE:
2140 		case IIS_DEGRADED:
2141 		case IIS_IN_REFRESH_METHOD:
2142 			/*
2143 			 * A wait type service's start method has exited.
2144 			 * Check if the method was fired off in this inetd's
2145 			 * lifetime, or a previous one; if the former,
2146 			 * re-commence listening on the service's behalf; if
2147 			 * the latter, mark the service offline and let bind
2148 			 * attempts commence.
2149 			 */
2150 			for (pi = uu_list_first(cfg->proto_list); pi != NULL;
2151 			    pi = uu_list_next(cfg->proto_list, pi)) {
2152 				/*
2153 				 * If a bound fd exists, the method was fired
2154 				 * off during this inetd's lifetime.
2155 				 */
2156 				if (pi->listen_fd != -1)
2157 					break;
2158 			}
2159 			if (pi != NULL) {
2160 				if (poll_bound_fds(inst, B_TRUE) != 0)
2161 					handle_bind_failure(inst);
2162 			} else {
2163 				update_state(inst, IIS_OFFLINE, RERR_RESTART);
2164 				create_bound_fds(inst);
2165 			}
2166 		}
2167 	} else {
2168 		/*
2169 		 * Check if a nowait service should be brought back online
2170 		 * after exceeding its copies limit.
2171 		 */
2172 		if ((inst->cur_istate == IIS_OFFLINE_COPIES) &&
2173 		    !copies_limit_exceeded(inst)) {
2174 			update_state(inst, IIS_OFFLINE, RERR_NONE);
2175 			process_offline_inst(inst);
2176 		}
2177 	}
2178 }
2179 
2180 /*
2181  * If the instance has a pending event process it and initiate the
2182  * acknowledgement.
2183  */
2184 static void
2185 process_pending_rst_event(instance_t *inst)
2186 {
2187 	if (inst->pending_rst_event != RESTARTER_EVENT_TYPE_INVALID) {
2188 		restarter_event_type_t re;
2189 
2190 		debug_msg("Injecting pending event %d for instance %s",
2191 		    inst->pending_rst_event, inst->fmri);
2192 		re = inst->pending_rst_event;
2193 		inst->pending_rst_event = RESTARTER_EVENT_TYPE_INVALID;
2194 		handle_restarter_event(inst, re, B_TRUE);
2195 	}
2196 }
2197 
2198 /*
2199  * Do the state machine processing associated with the termination
2200  * of the specified instance's non-start method with the specified status.
2201  * Once the processing of the termination is done, the function also picks up
2202  * any processing that was blocked on the method running.
2203  */
2204 void
2205 process_non_start_term(instance_t *inst, int status)
2206 {
2207 	boolean_t ran_online_method = B_FALSE;
2208 
2209 	debug_msg("Entering process_non_start_term: inst: %s, method: %s",
2210 	    inst->fmri, methods[states[inst->cur_istate].method_running].name);
2211 
2212 	if (status == IMRET_FAILURE) {
2213 		error_msg(gettext("The %s method of instance %s failed, "
2214 		    "transitioning to maintenance"),
2215 		    methods[states[inst->cur_istate].method_running].name,
2216 		    inst->fmri);
2217 
2218 		if ((inst->cur_istate == IIS_IN_ONLINE_METHOD) ||
2219 		    (inst->cur_istate == IIS_IN_REFRESH_METHOD))
2220 			destroy_bound_fds(inst);
2221 
2222 		update_state(inst, IIS_MAINTENANCE, RERR_FAULT);
2223 
2224 		inst->maintenance_req = B_FALSE;
2225 		inst->conn_rate_exceeded = B_FALSE;
2226 
2227 		if (inst->new_config != NULL) {
2228 			destroy_instance_cfg(inst->new_config);
2229 			inst->new_config = NULL;
2230 		}
2231 
2232 		if (!inetd_stopping)
2233 			process_pending_rst_event(inst);
2234 
2235 		return;
2236 	}
2237 
2238 	/* non-failure method return */
2239 
2240 	if (status != IMRET_SUCCESS) {
2241 		/*
2242 		 * An instance method never returned a supported return code.
2243 		 * We'll assume this means the method succeeded for now whilst
2244 		 * non-GL-cognizant methods are used - eg. pkill.
2245 		 */
2246 		debug_msg("The %s method of instance %s returned "
2247 		    "non-compliant exit code: %d, assuming success",
2248 		    methods[states[inst->cur_istate].method_running].name,
2249 		    inst->fmri, status);
2250 	}
2251 
2252 	/*
2253 	 * Update the state from the in-transition state.
2254 	 */
2255 	switch (inst->cur_istate) {
2256 	case IIS_IN_ONLINE_METHOD:
2257 		ran_online_method = B_TRUE;
2258 		/* FALLTHROUGH */
2259 	case IIS_IN_REFRESH_METHOD:
2260 		/*
2261 		 * If we've exhausted the bind retries, flag that by setting
2262 		 * the instance's state to degraded.
2263 		 */
2264 		if (inst->bind_retries_exceeded) {
2265 			update_state(inst, IIS_DEGRADED, RERR_NONE);
2266 			break;
2267 		}
2268 		/* FALLTHROUGH */
2269 	default:
2270 		update_state(inst,
2271 		    methods[states[inst->cur_istate].method_running].dst_state,
2272 		    RERR_NONE);
2273 	}
2274 
2275 	if (inst->cur_istate == IIS_OFFLINE) {
2276 		if (inst->new_config != NULL) {
2277 			/*
2278 			 * This instance was found during refresh to need
2279 			 * taking offline because its newly read configuration
2280 			 * was sufficiently different. Now we're offline,
2281 			 * activate this new configuration.
2282 			 */
2283 			destroy_instance_cfg(inst->config);
2284 			inst->config = inst->new_config;
2285 			inst->new_config = NULL;
2286 		}
2287 
2288 		/* continue/complete any transitions that are in progress */
2289 		process_offline_inst(inst);
2290 
2291 	} else if (ran_online_method) {
2292 		/*
2293 		 * We've just successfully executed the online method. We have
2294 		 * a set of bound network fds that were created before running
2295 		 * this method, so now we're online start listening for
2296 		 * connections on them.
2297 		 */
2298 		if (poll_bound_fds(inst, B_TRUE) != 0)
2299 			handle_bind_failure(inst);
2300 	}
2301 
2302 	/*
2303 	 * If we're now out of transition (process_offline_inst() could have
2304 	 * fired off another method), carry out any jobs that were blocked by
2305 	 * us being in transition.
2306 	 */
2307 	if (!INST_IN_TRANSITION(inst)) {
2308 		if (inetd_stopping) {
2309 			if (!instance_stopped(inst)) {
2310 				/*
2311 				 * inetd is stopping, and this instance hasn't
2312 				 * been stopped. Inject a stop event.
2313 				 */
2314 				handle_restarter_event(inst,
2315 				    RESTARTER_EVENT_TYPE_STOP, B_FALSE);
2316 			}
2317 		} else {
2318 			process_pending_rst_event(inst);
2319 		}
2320 	}
2321 }
2322 
2323 /*
2324  * Check if configuration file specified is readable. If not return B_FALSE,
2325  * else return B_TRUE.
2326  */
2327 static boolean_t
2328 can_read_file(const char *path)
2329 {
2330 	int	ret;
2331 	int	serrno;
2332 
2333 	debug_msg("Entering can_read_file");
2334 	do {
2335 		ret = access(path, R_OK);
2336 	} while ((ret < 0) && (errno == EINTR));
2337 	if (ret < 0) {
2338 		if (errno != ENOENT) {
2339 			serrno = errno;
2340 			error_msg(gettext("Failed to access configuration "
2341 			    "file %s for performing modification checks: %s"),
2342 			    path, strerror(errno));
2343 			errno = serrno;
2344 		}
2345 		return (B_FALSE);
2346 	}
2347 	return (B_TRUE);
2348 }
2349 
2350 /*
2351  * Check whether the configuration file has changed contents since inetd
2352  * was last started/refreshed, and if so, log a message indicating that
2353  * inetconv needs to be run.
2354  */
2355 static void
2356 check_conf_file(void)
2357 {
2358 	char		*new_hash;
2359 	char		*old_hash = NULL;
2360 	scf_error_t	ret;
2361 	const char	*file;
2362 
2363 	debug_msg("Entering check_conf_file");
2364 
2365 	if (conf_file == NULL) {
2366 		/*
2367 		 * No explicit config file specified, so see if one of the
2368 		 * default two are readable, checking the primary one first
2369 		 * followed by the secondary.
2370 		 */
2371 		if (can_read_file(PRIMARY_DEFAULT_CONF_FILE)) {
2372 			file = PRIMARY_DEFAULT_CONF_FILE;
2373 		} else if ((errno == ENOENT) &&
2374 		    can_read_file(SECONDARY_DEFAULT_CONF_FILE)) {
2375 			file = SECONDARY_DEFAULT_CONF_FILE;
2376 		} else {
2377 			return;
2378 		}
2379 	} else {
2380 		file = conf_file;
2381 		if (!can_read_file(file))
2382 			return;
2383 	}
2384 
2385 	if (calculate_hash(file, &new_hash) == 0) {
2386 		ret = retrieve_inetd_hash(&old_hash);
2387 		if (((ret == SCF_ERROR_NONE) &&
2388 		    (strcmp(old_hash, new_hash) != 0))) {
2389 			/* modified config file */
2390 			warn_msg(gettext(
2391 			    "Configuration file %s has been modified since "
2392 			    "inetconv was last run. \"inetconv -i %s\" must be "
2393 			    "run to apply any changes to the SMF"), file, file);
2394 		} else if ((ret != SCF_ERROR_NOT_FOUND) &&
2395 		    (ret != SCF_ERROR_NONE)) {
2396 			/* No message if hash not yet computed */
2397 			error_msg(gettext("Failed to check whether "
2398 			    "configuration file %s has been modified: %s"),
2399 			    file, scf_strerror(ret));
2400 		}
2401 		free(old_hash);
2402 		free(new_hash);
2403 	} else {
2404 		error_msg(gettext("Failed to check whether configuration file "
2405 		    "%s has been modified: %s"), file, strerror(errno));
2406 	}
2407 }
2408 
2409 /*
2410  * Refresh all inetd's managed instances and check the configuration file
2411  * for any updates since inetconv was last run, logging a message if there
2412  * are. We call the SMF refresh function to refresh each instance so that
2413  * the refresh request goes through the framework, and thus results in the
2414  * running snapshot of each instance being updated from the configuration
2415  * snapshot.
2416  */
2417 static void
2418 inetd_refresh(void)
2419 {
2420 	instance_t	*inst;
2421 
2422 	debug_msg("Entering inetd_refresh");
2423 
2424 	/* call libscf to send refresh requests for all managed instances */
2425 	for (inst = uu_list_first(instance_list); inst != NULL;
2426 	    inst = uu_list_next(instance_list, inst)) {
2427 		if (smf_refresh_instance(inst->fmri) < 0) {
2428 			error_msg(gettext("Failed to refresh instance %s: %s"),
2429 			    inst->fmri, scf_strerror(scf_error()));
2430 		}
2431 	}
2432 
2433 	/*
2434 	 * Log a message if the configuration file has changed since inetconv
2435 	 * was last run.
2436 	 */
2437 	check_conf_file();
2438 }
2439 
2440 /*
2441  * Initiate inetd's shutdown.
2442  */
2443 static void
2444 inetd_stop(void)
2445 {
2446 	instance_t *inst;
2447 
2448 	debug_msg("Entering inetd_stop");
2449 
2450 	/* Block handling signals for stop and refresh */
2451 	(void) sighold(SIGHUP);
2452 	(void) sighold(SIGTERM);
2453 
2454 	/* Indicate inetd is coming down */
2455 	inetd_stopping = B_TRUE;
2456 
2457 	/* Stop polling on restarter events. */
2458 	clear_pollfd(rst_event_pipe[PE_CONSUMER]);
2459 
2460 	/* Stop polling for any more stop/refresh requests. */
2461 	clear_pollfd(uds_fd);
2462 
2463 	/*
2464 	 * Send a stop event to all currently unstopped instances that
2465 	 * aren't in transition. For those that are in transition, the
2466 	 * event will get sent when the transition completes.
2467 	 */
2468 	for (inst = uu_list_first(instance_list); inst != NULL;
2469 	    inst = uu_list_next(instance_list, inst)) {
2470 		if (!instance_stopped(inst) && !INST_IN_TRANSITION(inst))
2471 			handle_restarter_event(inst,
2472 			    RESTARTER_EVENT_TYPE_STOP, B_FALSE);
2473 	}
2474 }
2475 
2476 /*
2477  * Sets up the intra-inetd-process Unix Domain Socket.
2478  * Returns -1 on error, else 0.
2479  */
2480 static int
2481 uds_init(void)
2482 {
2483 	struct sockaddr_un addr;
2484 
2485 	debug_msg("Entering uds_init");
2486 
2487 	if ((uds_fd = socket(AF_UNIX, SOCK_STREAM, 0)) < 0) {
2488 		error_msg("socket: %s", strerror(errno));
2489 		return (-1);
2490 	}
2491 
2492 	disable_blocking(uds_fd);
2493 
2494 	(void) unlink(INETD_UDS_PATH);  /* clean-up any stale files */
2495 
2496 	(void) memset(&addr, 0, sizeof (addr));
2497 	addr.sun_family = AF_UNIX;
2498 	/* CONSTCOND */
2499 	assert(sizeof (INETD_UDS_PATH) <= sizeof (addr.sun_path));
2500 	(void) strlcpy(addr.sun_path, INETD_UDS_PATH, sizeof (addr.sun_path));
2501 
2502 	if (bind(uds_fd, (struct sockaddr *)(&addr), sizeof (addr)) < 0) {
2503 		error_msg(gettext("Failed to bind socket to %s: %s"),
2504 		    INETD_UDS_PATH, strerror(errno));
2505 		(void) close(uds_fd);
2506 		return (-1);
2507 	}
2508 
2509 	(void) listen(uds_fd, UDS_BACKLOG);
2510 
2511 	if ((set_pollfd(uds_fd, POLLIN)) == -1) {
2512 		(void) close(uds_fd);
2513 		(void) unlink(INETD_UDS_PATH);
2514 		return (-1);
2515 	}
2516 
2517 	return (0);
2518 }
2519 
2520 static void
2521 uds_fini(void)
2522 {
2523 	if (uds_fd != -1)
2524 		(void) close(uds_fd);
2525 	(void) unlink(INETD_UDS_PATH);
2526 }
2527 
2528 /*
2529  * Handle an incoming request on the Unix Domain Socket. Returns -1 if there
2530  * was an error handling the event, else 0.
2531  */
2532 static int
2533 process_uds_event(void)
2534 {
2535 	uds_request_t		req;
2536 	int			fd;
2537 	struct sockaddr_un	addr;
2538 	socklen_t		len = sizeof (addr);
2539 	int			ret;
2540 	uint_t			retries = 0;
2541 
2542 	debug_msg("Entering process_uds_event");
2543 
2544 	do {
2545 		fd = accept(uds_fd, (struct sockaddr *)&addr, &len);
2546 	} while ((fd < 0) && (errno == EINTR));
2547 	if (fd < 0) {
2548 		if (errno != EWOULDBLOCK)
2549 			error_msg("accept failed: %s", strerror(errno));
2550 		return (-1);
2551 	}
2552 
2553 	for (retries = 0; retries < UDS_RECV_RETRIES; retries++) {
2554 		if (((ret = safe_read(fd, &req, sizeof (req))) != 1) ||
2555 		    (errno != EAGAIN))
2556 			break;
2557 
2558 		(void) poll(NULL, 0, 100);	/* 100ms pause */
2559 	}
2560 
2561 	if (ret != 0) {
2562 		error_msg(gettext("Failed read: %s"), strerror(errno));
2563 		(void) close(fd);
2564 		return (-1);
2565 	}
2566 
2567 	switch (req) {
2568 	case UR_REFRESH_INETD:
2569 		/* flag the request for event_loop() to process */
2570 		refresh_inetd_requested = B_TRUE;
2571 		(void) close(fd);
2572 		break;
2573 	case UR_STOP_INETD:
2574 		inetd_stop();
2575 		break;
2576 	default:
2577 		error_msg("unexpected UDS request");
2578 		(void) close(fd);
2579 		return (-1);
2580 	}
2581 
2582 	return (0);
2583 }
2584 
2585 /*
2586  * Perform checks for common exec string errors. We limit the checks to
2587  * whether the file exists, is a regular file, and has at least one execute
2588  * bit set. We leave the core security checks to exec() so as not to duplicate
2589  * and thus incur the associated drawbacks, but hope to catch the common
2590  * errors here.
2591  */
2592 static boolean_t
2593 passes_basic_exec_checks(const char *instance, const char *method,
2594     const char *path)
2595 {
2596 	struct stat	sbuf;
2597 
2598 	debug_msg("Entering passes_basic_exec_checks");
2599 
2600 	/* check the file exists */
2601 	while (stat(path, &sbuf) == -1) {
2602 		if (errno != EINTR) {
2603 			error_msg(gettext(
2604 			    "Can't stat the %s method of instance %s: %s"),
2605 			    method, instance, strerror(errno));
2606 			return (B_FALSE);
2607 		}
2608 	}
2609 
2610 	/*
2611 	 * Check if the file is a regular file and has at least one execute
2612 	 * bit set.
2613 	 */
2614 	if ((sbuf.st_mode & S_IFMT) != S_IFREG) {
2615 		error_msg(gettext(
2616 		    "The %s method of instance %s isn't a regular file"),
2617 		    method, instance);
2618 		return (B_FALSE);
2619 	} else if ((sbuf.st_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) == 0) {
2620 		error_msg(gettext("The %s method instance %s doesn't have "
2621 		    "any execute permissions set"), method, instance);
2622 		return (B_FALSE);
2623 	}
2624 
2625 	return (B_TRUE);
2626 }
2627 
2628 static void
2629 exec_method(instance_t *instance, instance_method_t method, method_info_t *mi,
2630     struct method_context *mthd_ctxt, const proto_info_t *pi)
2631 {
2632 	char		**args;
2633 	char 		**env;
2634 	const char	*errf;
2635 	int		serrno;
2636 	basic_cfg_t	*cfg = instance->config->basic;
2637 
2638 	if (method == IM_START) {
2639 		/*
2640 		 * If wrappers checks fail, pretend the method was exec'd and
2641 		 * failed.
2642 		 */
2643 		if (!tcp_wrappers_ok(instance))
2644 			exit(IMRET_FAILURE);
2645 	}
2646 
2647 	/*
2648 	 * Revert the disposition of handled signals and ignored signals to
2649 	 * their defaults, unblocking any blocked ones as a side effect.
2650 	 */
2651 	(void) sigset(SIGHUP, SIG_DFL);
2652 	(void) sigset(SIGTERM, SIG_DFL);
2653 	(void) sigset(SIGINT, SIG_DFL);
2654 
2655 	/*
2656 	 * Setup exec arguments. Do this before the fd setup below, so our
2657 	 * logging related file fd doesn't get taken over before we call
2658 	 * expand_address().
2659 	 */
2660 	if ((method == IM_START) &&
2661 	    (strcmp(mi->exec_args_we.we_wordv[0], "%A") == 0)) {
2662 		args = expand_address(instance, pi);
2663 	} else {
2664 		args = mi->exec_args_we.we_wordv;
2665 	}
2666 
2667 	/* Generate audit trail for start operations */
2668 	if (method == IM_START) {
2669 		adt_event_data_t *ae;
2670 		struct sockaddr_storage ss;
2671 		priv_set_t *privset;
2672 		socklen_t sslen = sizeof (ss);
2673 
2674 		if ((ae = adt_alloc_event(audit_handle, ADT_inetd_connect))
2675 		    == NULL) {
2676 			error_msg(gettext("Unable to allocate audit event for "
2677 			    "the %s method of instance %s"),
2678 			    methods[method].name, instance->fmri);
2679 			exit(IMRET_FAILURE);
2680 		}
2681 
2682 		/*
2683 		 * The inetd_connect audit record consists of:
2684 		 *	Service name
2685 		 *	Execution path
2686 		 *	Remote address and port
2687 		 *	Local port
2688 		 *	Process privileges
2689 		 */
2690 		ae->adt_inetd_connect.service_name = cfg->svc_name;
2691 		ae->adt_inetd_connect.cmd = mi->exec_path;
2692 
2693 		if (instance->remote_addr.ss_family == AF_INET) {
2694 			struct in_addr *in = SS_SINADDR(instance->remote_addr);
2695 			ae->adt_inetd_connect.ip_adr[0] = in->s_addr;
2696 			ae->adt_inetd_connect.ip_type = ADT_IPv4;
2697 		} else {
2698 			uint32_t *addr6;
2699 			int i;
2700 
2701 			ae->adt_inetd_connect.ip_type = ADT_IPv6;
2702 			addr6 = (uint32_t *)SS_SINADDR(instance->remote_addr);
2703 			for (i = 0; i < 4; ++i)
2704 				ae->adt_inetd_connect.ip_adr[i] = addr6[i];
2705 		}
2706 
2707 		ae->adt_inetd_connect.ip_remote_port =
2708 		    ntohs(SS_PORT(instance->remote_addr));
2709 
2710 		if (getsockname(instance->conn_fd, (struct sockaddr *)&ss,
2711 		    &sslen) == 0)
2712 			ae->adt_inetd_connect.ip_local_port =
2713 			    ntohs(SS_PORT(ss));
2714 
2715 		privset = mthd_ctxt->priv_set;
2716 		if (privset == NULL) {
2717 			privset = priv_allocset();
2718 			if (privset != NULL &&
2719 			    getppriv(PRIV_EFFECTIVE, privset) != 0) {
2720 				priv_freeset(privset);
2721 				privset = NULL;
2722 			}
2723 		}
2724 
2725 		ae->adt_inetd_connect.privileges = privset;
2726 
2727 		(void) adt_put_event(ae, ADT_SUCCESS, ADT_SUCCESS);
2728 		adt_free_event(ae);
2729 
2730 		if (privset != NULL && mthd_ctxt->priv_set == NULL)
2731 			priv_freeset(privset);
2732 	}
2733 
2734 	/*
2735 	 * Set method context before the fd setup below so we can output an
2736 	 * error message if it fails.
2737 	 */
2738 	if ((errno = restarter_set_method_context(mthd_ctxt, &errf)) != 0) {
2739 		const char *msg;
2740 
2741 		if (errno == -1) {
2742 			if (strcmp(errf, "core_set_process_path") == 0) {
2743 				msg = gettext("Failed to set the corefile path "
2744 				    "for the %s method of instance %s");
2745 			} else if (strcmp(errf, "setproject") == 0) {
2746 				msg = gettext("Failed to assign a resource "
2747 				    "control for the %s method of instance %s");
2748 			} else if (strcmp(errf, "pool_set_binding") == 0) {
2749 				msg = gettext("Failed to bind the %s method of "
2750 				    "instance %s to a pool due to a system "
2751 				    "error");
2752 			} else {
2753 				assert(0);
2754 				abort();
2755 			}
2756 
2757 			error_msg(msg, methods[method].name, instance->fmri);
2758 
2759 			exit(IMRET_FAILURE);
2760 		}
2761 
2762 		if (errf != NULL && strcmp(errf, "pool_set_binding") == 0) {
2763 			switch (errno) {
2764 			case ENOENT:
2765 				msg = gettext("Failed to find resource pool "
2766 				    "for the %s method of instance %s");
2767 				break;
2768 
2769 			case EBADF:
2770 				msg = gettext("Failed to bind the %s method of "
2771 				    "instance %s to a pool due to invalid "
2772 				    "configuration");
2773 				break;
2774 
2775 			case EINVAL:
2776 				msg = gettext("Failed to bind the %s method of "
2777 				    "instance %s to a pool due to invalid "
2778 				    "pool name");
2779 				break;
2780 
2781 			default:
2782 				assert(0);
2783 				abort();
2784 			}
2785 
2786 			exit(IMRET_FAILURE);
2787 		}
2788 
2789 		if (errf != NULL) {
2790 			error_msg(gettext("Failed to set credentials for the "
2791 			    "%s method of instance %s (%s: %s)"),
2792 			    methods[method].name, instance->fmri, errf,
2793 			    strerror(errno));
2794 			exit(IMRET_FAILURE);
2795 		}
2796 
2797 		switch (errno) {
2798 		case ENOMEM:
2799 			msg = gettext("Failed to set credentials for the %s "
2800 			    "method of instance %s (out of memory)");
2801 			break;
2802 
2803 		case ENOENT:
2804 			msg = gettext("Failed to set credentials for the %s "
2805 			    "method of instance %s (no passwd or shadow "
2806 			    "entry for user)");
2807 			break;
2808 
2809 		default:
2810 			assert(0);
2811 			abort();
2812 		}
2813 
2814 		error_msg(msg, methods[method].name, instance->fmri);
2815 		exit(IMRET_FAILURE);
2816 	}
2817 
2818 	/* let exec() free mthd_ctxt */
2819 
2820 	/* setup standard fds */
2821 	if (method == IM_START) {
2822 		(void) dup2(instance->conn_fd, STDIN_FILENO);
2823 	} else {
2824 		(void) close(STDIN_FILENO);
2825 		(void) open("/dev/null", O_RDONLY);
2826 	}
2827 	(void) dup2(STDIN_FILENO, STDOUT_FILENO);
2828 	(void) dup2(STDIN_FILENO, STDERR_FILENO);
2829 
2830 	closefrom(STDERR_FILENO + 1);
2831 
2832 	method_preexec();
2833 
2834 	env = set_smf_env(mthd_ctxt, instance, methods[method].name);
2835 
2836 	if (env != NULL) {
2837 		do {
2838 			(void) execve(mi->exec_path, args, env);
2839 		} while (errno == EINTR);
2840 	}
2841 
2842 	serrno = errno;
2843 	/* start up logging again to report the error */
2844 	msg_init();
2845 	errno = serrno;
2846 
2847 	error_msg(
2848 	    gettext("Failed to exec %s method of instance %s: %s"),
2849 	    methods[method].name, instance->fmri, strerror(errno));
2850 
2851 	if ((method == IM_START) && (instance->config->basic->iswait)) {
2852 		/*
2853 		 * We couldn't exec the start method for a wait type service.
2854 		 * Eat up data from the endpoint, so that hopefully the
2855 		 * service's fd won't wake poll up on the next time round
2856 		 * event_loop(). This behavior is carried over from the old
2857 		 * inetd, and it seems somewhat arbitrary that it isn't
2858 		 * also done in the case of fork failures; but I guess
2859 		 * it assumes an exec failure is less likely to be the result
2860 		 * of a resource shortage, and is thus not worth retrying.
2861 		 */
2862 		consume_wait_data(instance, 0);
2863 	}
2864 
2865 	exit(IMRET_FAILURE);
2866 }
2867 
2868 static restarter_error_t
2869 get_method_error_success(instance_method_t method)
2870 {
2871 	switch (method) {
2872 	case IM_OFFLINE:
2873 		return (RERR_RESTART);
2874 	case IM_ONLINE:
2875 		return (RERR_RESTART);
2876 	case IM_DISABLE:
2877 		return (RERR_RESTART);
2878 	case IM_REFRESH:
2879 		return (RERR_REFRESH);
2880 	case IM_START:
2881 		return (RERR_RESTART);
2882 	}
2883 	(void) fprintf(stderr, gettext("Internal fatal error in inetd.\n"));
2884 
2885 	abort();
2886 	/* NOTREACHED */
2887 }
2888 
2889 /*
2890  * Runs the specified method of the specified service instance.
2891  * If the method was never specified, we handle it the same as if the
2892  * method was called and returned success, carrying on any transition the
2893  * instance may be in the midst of.
2894  * If the method isn't executable in its specified profile or an error occurs
2895  * forking a process to run the method in the function returns -1.
2896  * If a method binary is successfully executed, the function switches the
2897  * instance's cur state to the method's associated 'run' state and the next
2898  * state to the methods associated next state.
2899  * Returns -1 if there's an error before forking, else 0.
2900  */
2901 int
2902 run_method(instance_t *instance, instance_method_t method,
2903     const proto_info_t *start_info)
2904 {
2905 	pid_t			child_pid;
2906 	method_info_t		*mi;
2907 	struct method_context	*mthd_ctxt = NULL;
2908 	const char		*errstr;
2909 	int			sig;
2910 	int			ret;
2911 	instance_cfg_t		*cfg = instance->config;
2912 	ctid_t			cid;
2913 	boolean_t		trans_failure = B_TRUE;
2914 	int			serrno;
2915 
2916 	debug_msg("Entering run_method, instance: %s, method: %s",
2917 	    instance->fmri, methods[method].name);
2918 
2919 	/*
2920 	 * Don't bother updating the instance's state for the start method
2921 	 * as there isn't a separate start method state.
2922 	 */
2923 	if (method != IM_START)
2924 		update_instance_states(instance, get_method_state(method),
2925 		    methods[method].dst_state,
2926 		    get_method_error_success(method));
2927 
2928 	if ((mi = cfg->methods[method]) == NULL) {
2929 		/*
2930 		 * An unspecified method. Since the absence of this method
2931 		 * must be valid (otherwise it would have been caught
2932 		 * during configuration validation), simply pretend the method
2933 		 * ran and returned success.
2934 		 */
2935 		process_non_start_term(instance, IMRET_SUCCESS);
2936 		return (0);
2937 	}
2938 
2939 	/* Handle special method tokens, not allowed on start */
2940 	if (method != IM_START) {
2941 		if (restarter_is_null_method(mi->exec_path)) {
2942 			/* :true means nothing should be done */
2943 			process_non_start_term(instance, IMRET_SUCCESS);
2944 			return (0);
2945 		}
2946 
2947 		if ((sig = restarter_is_kill_method(mi->exec_path)) >= 0) {
2948 			/* Carry out contract assassination */
2949 			ret = iterate_repository_contracts(instance, sig);
2950 			/* ENOENT means we didn't find any contracts */
2951 			if (ret != 0 && ret != ENOENT) {
2952 				error_msg(gettext("Failed to send signal %d "
2953 				    "to contracts of instance %s: %s"), sig,
2954 				    instance->fmri, strerror(ret));
2955 				goto prefork_failure;
2956 			} else {
2957 				process_non_start_term(instance, IMRET_SUCCESS);
2958 				return (0);
2959 			}
2960 		}
2961 
2962 		if ((sig = restarter_is_kill_proc_method(mi->exec_path)) >= 0) {
2963 			/* Carry out process assassination */
2964 			rep_val_t	*rv;
2965 
2966 			ret = IMRET_SUCCESS;
2967 			for (rv = uu_list_first(instance->start_pids);
2968 			    rv != NULL;
2969 			    rv = uu_list_next(instance->start_pids, rv)) {
2970 				if ((kill((pid_t)rv->val, sig) != 0) &&
2971 				    (errno != ESRCH)) {
2972 					ret = IMRET_FAILURE;
2973 					error_msg(gettext("Unable to signal "
2974 					    "start process of instance %s: %s"),
2975 					    instance->fmri, strerror(errno));
2976 				}
2977 			}
2978 
2979 			process_non_start_term(instance, ret);
2980 			return (0);
2981 		}
2982 	}
2983 
2984 	/*
2985 	 * Get the associated method context before the fork so we can
2986 	 * modify the instances state if things go wrong.
2987 	 */
2988 	if ((mthd_ctxt = read_method_context(instance->fmri,
2989 	    methods[method].name, mi->exec_path, &errstr)) == NULL) {
2990 		error_msg(gettext("Failed to retrieve method context for the "
2991 		    "%s method of instance %s: %s"), methods[method].name,
2992 		    instance->fmri, errstr);
2993 		goto prefork_failure;
2994 	}
2995 
2996 	/*
2997 	 * Perform some basic checks before we fork to limit the possibility
2998 	 * of exec failures, so we can modify the instance state if necessary.
2999 	 */
3000 	if (!passes_basic_exec_checks(instance->fmri, methods[method].name,
3001 	    mi->exec_path)) {
3002 		trans_failure = B_FALSE;
3003 		goto prefork_failure;
3004 	}
3005 
3006 	if (contract_prefork() == -1)
3007 		goto prefork_failure;
3008 	child_pid = fork();
3009 	serrno = errno;
3010 	contract_postfork();
3011 
3012 	switch (child_pid) {
3013 	case -1:
3014 		error_msg(gettext(
3015 		    "Unable to fork %s method of instance %s: %s"),
3016 		    methods[method].name, instance->fmri, strerror(serrno));
3017 		if ((serrno != EAGAIN) && (serrno != ENOMEM))
3018 			trans_failure = B_FALSE;
3019 		goto prefork_failure;
3020 	case 0:				/* child */
3021 		exec_method(instance, method, mi, mthd_ctxt, start_info);
3022 		/* NOTREACHED */
3023 	default:			/* parent */
3024 		restarter_free_method_context(mthd_ctxt);
3025 		mthd_ctxt = NULL;
3026 
3027 		if (get_latest_contract(&cid) < 0)
3028 			cid = -1;
3029 
3030 		/*
3031 		 * Register this method so its termination is noticed and
3032 		 * the state transition this method participates in is
3033 		 * continued.
3034 		 */
3035 		if (register_method(instance, child_pid, cid, method) != 0) {
3036 			/*
3037 			 * Since we will never find out about the termination
3038 			 * of this method, if it's a non-start method treat
3039 			 * is as a failure so we don't block restarter event
3040 			 * processing on it whilst it languishes in a method
3041 			 * running state.
3042 			 */
3043 			error_msg(gettext("Failed to monitor status of "
3044 			    "%s method of instance %s"), methods[method].name,
3045 			    instance->fmri);
3046 			if (method != IM_START)
3047 				process_non_start_term(instance, IMRET_FAILURE);
3048 		}
3049 
3050 		add_method_ids(instance, child_pid, cid, method);
3051 
3052 		/* do tcp tracing for those nowait instances that request it */
3053 		if ((method == IM_START) && cfg->basic->do_tcp_trace &&
3054 		    !cfg->basic->iswait) {
3055 			char buf[INET6_ADDRSTRLEN];
3056 
3057 			syslog(LOG_NOTICE, "%s[%d] from %s %d",
3058 			    cfg->basic->svc_name, child_pid,
3059 			    inet_ntop_native(instance->remote_addr.ss_family,
3060 			    SS_SINADDR(instance->remote_addr), buf,
3061 			    sizeof (buf)),
3062 			    ntohs(SS_PORT(instance->remote_addr)));
3063 		}
3064 	}
3065 
3066 	return (0);
3067 
3068 prefork_failure:
3069 	if (mthd_ctxt != NULL) {
3070 		restarter_free_method_context(mthd_ctxt);
3071 		mthd_ctxt = NULL;
3072 	}
3073 
3074 	if (method == IM_START) {
3075 		/*
3076 		 * Only place a start method in maintenance if we're sure
3077 		 * that the failure was non-transient.
3078 		 */
3079 		if (!trans_failure) {
3080 			destroy_bound_fds(instance);
3081 			update_state(instance, IIS_MAINTENANCE, RERR_FAULT);
3082 		}
3083 	} else {
3084 		/* treat the failure as if the method ran and failed */
3085 		process_non_start_term(instance, IMRET_FAILURE);
3086 	}
3087 
3088 	return (-1);
3089 }
3090 
3091 static int
3092 accept_connection(instance_t *instance, proto_info_t *pi)
3093 {
3094 	int		fd;
3095 	socklen_t	size;
3096 
3097 	debug_msg("Entering accept_connection");
3098 
3099 	if (instance->config->basic->istlx) {
3100 		fd = tlx_accept(instance->fmri, (tlx_info_t *)pi,
3101 		    &(instance->remote_addr));
3102 	} else {
3103 		size = sizeof (instance->remote_addr);
3104 		fd = accept(pi->listen_fd,
3105 		    (struct sockaddr *)&(instance->remote_addr), &size);
3106 		if (fd < 0)
3107 			error_msg("accept: %s", strerror(errno));
3108 	}
3109 
3110 	return (fd);
3111 }
3112 
3113 /*
3114  * Handle an incoming connection request for a nowait service.
3115  * This involves accepting the incoming connection on a new fd. Connection
3116  * rate checks are then performed, transitioning the service to the
3117  * conrate offline state if these fail. Otherwise, the service's start method
3118  * is run (performing TCP wrappers checks if applicable as we do), and on
3119  * success concurrent copies checking is done, transitioning the service to the
3120  * copies offline state if this fails.
3121  */
3122 static void
3123 process_nowait_request(instance_t *instance, proto_info_t *pi)
3124 {
3125 	basic_cfg_t		*cfg = instance->config->basic;
3126 	int			ret;
3127 	adt_event_data_t	*ae;
3128 	char			buf[BUFSIZ];
3129 
3130 	debug_msg("Entering process_nowait_req");
3131 
3132 	/* accept nowait service connections on a new fd */
3133 	if ((instance->conn_fd = accept_connection(instance, pi)) == -1) {
3134 		/*
3135 		 * Failed accept. Return and allow the event loop to initiate
3136 		 * another attempt later if the request is still present.
3137 		 */
3138 		return;
3139 	}
3140 
3141 	/*
3142 	 * Limit connection rate of nowait services. If either conn_rate_max
3143 	 * or conn_rate_offline are <= 0, no connection rate limit checking
3144 	 * is done. If the configured rate is exceeded, the instance is taken
3145 	 * to the connrate_offline state and a timer scheduled to try and
3146 	 * bring the instance back online after the configured offline time.
3147 	 */
3148 	if ((cfg->conn_rate_max > 0) && (cfg->conn_rate_offline > 0)) {
3149 		if (instance->conn_rate_count++ == 0) {
3150 			instance->conn_rate_start = time(NULL);
3151 		} else if (instance->conn_rate_count >
3152 		    cfg->conn_rate_max) {
3153 			time_t now = time(NULL);
3154 
3155 			if ((now - instance->conn_rate_start) > 1) {
3156 				instance->conn_rate_start = now;
3157 				instance->conn_rate_count = 1;
3158 			} else {
3159 				/* Generate audit record */
3160 				if ((ae = adt_alloc_event(audit_handle,
3161 				    ADT_inetd_ratelimit)) == NULL) {
3162 					error_msg(gettext("Unable to allocate "
3163 					    "rate limit audit event"));
3164 				} else {
3165 					adt_inetd_ratelimit_t *rl =
3166 					    &ae->adt_inetd_ratelimit;
3167 					/*
3168 					 * The inetd_ratelimit audit
3169 					 * record consists of:
3170 					 * 	Service name
3171 					 *	Connection rate limit
3172 					 */
3173 					rl->service_name = cfg->svc_name;
3174 					(void) snprintf(buf, sizeof (buf),
3175 					    "limit=%lld", cfg->conn_rate_max);
3176 					rl->limit = buf;
3177 					(void) adt_put_event(ae, ADT_SUCCESS,
3178 					    ADT_SUCCESS);
3179 					adt_free_event(ae);
3180 				}
3181 
3182 				error_msg(gettext(
3183 				    "Instance %s has exceeded its configured "
3184 				    "connection rate, additional connections "
3185 				    "will not be accepted for %d seconds"),
3186 				    instance->fmri, cfg->conn_rate_offline);
3187 
3188 				close_net_fd(instance, instance->conn_fd);
3189 				instance->conn_fd = -1;
3190 
3191 				destroy_bound_fds(instance);
3192 
3193 				instance->conn_rate_count = 0;
3194 
3195 				instance->conn_rate_exceeded = B_TRUE;
3196 				(void) run_method(instance, IM_OFFLINE, NULL);
3197 
3198 				return;
3199 			}
3200 		}
3201 	}
3202 
3203 	ret = run_method(instance, IM_START, pi);
3204 
3205 	close_net_fd(instance, instance->conn_fd);
3206 	instance->conn_fd = -1;
3207 
3208 	if (ret == -1) /* the method wasn't forked  */
3209 		return;
3210 
3211 	instance->copies++;
3212 
3213 	/*
3214 	 * Limit concurrent connections of nowait services.
3215 	 */
3216 	if (copies_limit_exceeded(instance)) {
3217 		/* Generate audit record */
3218 		if ((ae = adt_alloc_event(audit_handle, ADT_inetd_copylimit))
3219 		    == NULL) {
3220 			error_msg(gettext("Unable to allocate copy limit "
3221 			    "audit event"));
3222 		} else {
3223 			/*
3224 			 * The inetd_copylimit audit record consists of:
3225 			 *	Service name
3226 			 * 	Copy limit
3227 			 */
3228 			ae->adt_inetd_copylimit.service_name = cfg->svc_name;
3229 			(void) snprintf(buf, sizeof (buf), "limit=%lld",
3230 			    cfg->max_copies);
3231 			ae->adt_inetd_copylimit.limit = buf;
3232 			(void) adt_put_event(ae, ADT_SUCCESS, ADT_SUCCESS);
3233 			adt_free_event(ae);
3234 		}
3235 
3236 		warn_msg(gettext("Instance %s has reached its maximum "
3237 		    "configured copies, no new connections will be accepted"),
3238 		    instance->fmri);
3239 		destroy_bound_fds(instance);
3240 		(void) run_method(instance, IM_OFFLINE, NULL);
3241 	}
3242 }
3243 
3244 /*
3245  * Handle an incoming request for a wait type service.
3246  * Failure rate checking is done first, taking the service to the maintenance
3247  * state if the checks fail. Following this, the service's start method is run,
3248  * and on success, we stop listening for new requests for this service.
3249  */
3250 static void
3251 process_wait_request(instance_t *instance, const proto_info_t *pi)
3252 {
3253 	basic_cfg_t		*cfg = instance->config->basic;
3254 	int			ret;
3255 	adt_event_data_t	*ae;
3256 	char			buf[BUFSIZ];
3257 
3258 	debug_msg("Entering process_wait_request");
3259 
3260 	instance->conn_fd = pi->listen_fd;
3261 
3262 	/*
3263 	 * Detect broken servers and transition them to maintenance. If a
3264 	 * wait type service exits without accepting the connection or
3265 	 * consuming (reading) the datagram, that service's descriptor will
3266 	 * select readable again, and inetd will fork another instance of
3267 	 * the server. If either wait_fail_cnt or wait_fail_interval are <= 0,
3268 	 * no failure rate detection is done.
3269 	 */
3270 	if ((cfg->wait_fail_cnt > 0) && (cfg->wait_fail_interval > 0)) {
3271 		if (instance->fail_rate_count++ == 0) {
3272 			instance->fail_rate_start = time(NULL);
3273 		} else if (instance->fail_rate_count > cfg->wait_fail_cnt) {
3274 			time_t now = time(NULL);
3275 
3276 			if ((now - instance->fail_rate_start) >
3277 			    cfg->wait_fail_interval) {
3278 				instance->fail_rate_start = now;
3279 				instance->fail_rate_count = 1;
3280 			} else {
3281 				/* Generate audit record */
3282 				if ((ae = adt_alloc_event(audit_handle,
3283 				    ADT_inetd_failrate)) == NULL) {
3284 					error_msg(gettext("Unable to allocate "
3285 					    "failure rate audit event"));
3286 				} else {
3287 					adt_inetd_failrate_t *fr =
3288 					    &ae->adt_inetd_failrate;
3289 					/*
3290 					 * The inetd_failrate audit record
3291 					 * consists of:
3292 					 * 	Service name
3293 					 * 	Failure rate
3294 					 *	Interval
3295 					 * Last two are expressed as k=v pairs
3296 					 * in the values field.
3297 					 */
3298 					fr->service_name = cfg->svc_name;
3299 					(void) snprintf(buf, sizeof (buf),
3300 					    "limit=%lld,interval=%d",
3301 					    cfg->wait_fail_cnt,
3302 					    cfg->wait_fail_interval);
3303 					fr->values = buf;
3304 					(void) adt_put_event(ae, ADT_SUCCESS,
3305 					    ADT_SUCCESS);
3306 					adt_free_event(ae);
3307 				}
3308 
3309 				error_msg(gettext(
3310 				    "Instance %s has exceeded its configured "
3311 				    "failure rate, transitioning to "
3312 				    "maintenance"), instance->fmri);
3313 				instance->fail_rate_count = 0;
3314 
3315 				destroy_bound_fds(instance);
3316 
3317 				instance->maintenance_req = B_TRUE;
3318 				(void) run_method(instance, IM_OFFLINE, NULL);
3319 				return;
3320 			}
3321 		}
3322 	}
3323 
3324 	ret = run_method(instance, IM_START, pi);
3325 
3326 	instance->conn_fd = -1;
3327 
3328 	if (ret == 0) {
3329 		/*
3330 		 * Stop listening for connections now we've fired off the
3331 		 * server for a wait type instance.
3332 		 */
3333 		(void) poll_bound_fds(instance, B_FALSE);
3334 	}
3335 }
3336 
3337 /*
3338  * Process any networks requests for each proto for each instance.
3339  */
3340 void
3341 process_network_events(void)
3342 {
3343 	instance_t	*instance;
3344 
3345 	debug_msg("Entering process_network_events");
3346 
3347 	for (instance = uu_list_first(instance_list); instance != NULL;
3348 	    instance = uu_list_next(instance_list, instance)) {
3349 		basic_cfg_t	*cfg;
3350 		proto_info_t	*pi;
3351 
3352 		/*
3353 		 * Ignore instances in states that definitely don't have any
3354 		 * listening fds.
3355 		 */
3356 		switch (instance->cur_istate) {
3357 		case IIS_ONLINE:
3358 		case IIS_DEGRADED:
3359 		case IIS_IN_REFRESH_METHOD:
3360 			break;
3361 		default:
3362 			continue;
3363 		}
3364 
3365 		cfg = instance->config->basic;
3366 
3367 		for (pi = uu_list_first(cfg->proto_list); pi != NULL;
3368 		    pi = uu_list_next(cfg->proto_list, pi)) {
3369 			if ((pi->listen_fd != -1) &&
3370 			    isset_pollfd(pi->listen_fd)) {
3371 				if (cfg->iswait) {
3372 					process_wait_request(instance, pi);
3373 				} else {
3374 					process_nowait_request(instance, pi);
3375 				}
3376 			}
3377 		}
3378 	}
3379 }
3380 
3381 /* ARGSUSED0 */
3382 static void
3383 sigterm_handler(int sig)
3384 {
3385 	debug_msg("Entering sigterm_handler");
3386 
3387 	got_sigterm = B_TRUE;
3388 }
3389 
3390 /* ARGSUSED0 */
3391 static void
3392 sighup_handler(int sig)
3393 {
3394 	debug_msg("Entering sighup_handler");
3395 
3396 	refresh_inetd_requested = B_TRUE;
3397 }
3398 
3399 /*
3400  * inetd's major work loop. This function sits in poll waiting for events
3401  * to occur, processing them when they do. The possible events are
3402  * master restarter requests, expired timer queue timers, stop/refresh signal
3403  * requests, contract events indicating process termination, stop/refresh
3404  * requests originating from one of the stop/refresh inetd processes and
3405  * network events.
3406  * The loop is exited when a stop request is received and processed, and
3407  * all the instances have reached a suitable 'stopping' state.
3408  */
3409 static void
3410 event_loop(void)
3411 {
3412 	instance_t		*instance;
3413 	int			timeout;
3414 
3415 	debug_msg("Entering event_loop");
3416 
3417 	for (;;) {
3418 		int	pret = -1;
3419 
3420 		timeout = iu_earliest_timer(timer_queue);
3421 
3422 		debug_msg("Doing signal check/poll");
3423 		if (!got_sigterm && !refresh_inetd_requested) {
3424 			pret = poll(poll_fds, num_pollfds, timeout);
3425 			if ((pret == -1) && (errno != EINTR)) {
3426 				error_msg(gettext("poll failure: %s"),
3427 				    strerror(errno));
3428 				continue;
3429 			}
3430 			debug_msg("Exiting poll, returned: %d", pret);
3431 		}
3432 
3433 		if (got_sigterm) {
3434 			msg_fini();
3435 			inetd_stop();
3436 			got_sigterm = B_FALSE;
3437 			goto check_if_stopped;
3438 		}
3439 
3440 		/*
3441 		 * Process any stop/refresh requests from the Unix Domain
3442 		 * Socket.
3443 		 */
3444 		if ((pret != -1) && isset_pollfd(uds_fd)) {
3445 			while (process_uds_event() == 0)
3446 				;
3447 		}
3448 
3449 		/*
3450 		 * Process refresh request. We do this check after the UDS
3451 		 * event check above, as it would be wasted processing if we
3452 		 * started refreshing inetd based on a SIGHUP, and then were
3453 		 * told to shut-down via a UDS event.
3454 		 */
3455 		if (refresh_inetd_requested) {
3456 			refresh_inetd_requested = B_FALSE;
3457 			if (!inetd_stopping)
3458 				inetd_refresh();
3459 		}
3460 
3461 		/*
3462 		 * We were interrupted by a signal. Don't waste any more
3463 		 * time processing a potentially inaccurate poll return.
3464 		 */
3465 		if (pret == -1)
3466 			continue;
3467 
3468 		/*
3469 		 * Process any instance restarter events.
3470 		 */
3471 		if (isset_pollfd(rst_event_pipe[PE_CONSUMER])) {
3472 			while (process_restarter_event() == 0)
3473 				;
3474 		}
3475 
3476 		/*
3477 		 * Process any expired timers (bind retry, con-rate offline,
3478 		 * method timeouts).
3479 		 */
3480 		(void) iu_expire_timers(timer_queue);
3481 
3482 		process_terminated_methods();
3483 
3484 		/*
3485 		 * If inetd is stopping, check whether all our managed
3486 		 * instances have been stopped and we can return.
3487 		 */
3488 		if (inetd_stopping) {
3489 check_if_stopped:
3490 			for (instance = uu_list_first(instance_list);
3491 			    instance != NULL;
3492 			    instance = uu_list_next(instance_list, instance)) {
3493 				if (!instance_stopped(instance)) {
3494 					debug_msg("%s not yet stopped",
3495 					    instance->fmri);
3496 					break;
3497 				}
3498 			}
3499 			/* if all instances are stopped, return */
3500 			if (instance == NULL)
3501 				return;
3502 		}
3503 
3504 		process_network_events();
3505 	}
3506 }
3507 
3508 static void
3509 fini(void)
3510 {
3511 	debug_msg("Entering fini");
3512 
3513 	method_fini();
3514 	uds_fini();
3515 	if (timer_queue != NULL)
3516 		iu_tq_destroy(timer_queue);
3517 
3518 
3519 	/*
3520 	 * We don't bother to undo the restarter interface at all.
3521 	 * Because of quirks in the interface, there is no way to
3522 	 * disconnect from the channel and cause any new events to be
3523 	 * queued.  However, any events which are received and not
3524 	 * acknowledged will be re-sent when inetd restarts as long as inetd
3525 	 * uses the same subscriber ID, which it does.
3526 	 *
3527 	 * By keeping the event pipe open but ignoring it, any events which
3528 	 * occur will cause restarter_event_proxy to hang without breaking
3529 	 * anything.
3530 	 */
3531 
3532 	if (instance_list != NULL) {
3533 		void		*cookie = NULL;
3534 		instance_t	*inst;
3535 
3536 		while ((inst = uu_list_teardown(instance_list, &cookie)) !=
3537 		    NULL)
3538 			destroy_instance(inst);
3539 		uu_list_destroy(instance_list);
3540 	}
3541 	if (instance_pool != NULL)
3542 		uu_list_pool_destroy(instance_pool);
3543 	tlx_fini();
3544 	config_fini();
3545 	repval_fini();
3546 	poll_fini();
3547 
3548 	/* Close audit session */
3549 	(void) adt_end_session(audit_handle);
3550 }
3551 
3552 static int
3553 init(void)
3554 {
3555 	int err;
3556 
3557 	debug_msg("Entering init");
3558 
3559 	if (repval_init() < 0)
3560 		goto failed;
3561 
3562 	if (config_init() < 0)
3563 		goto failed;
3564 
3565 	if (tlx_init() < 0)
3566 		goto failed;
3567 
3568 	/* Setup instance list. */
3569 	if ((instance_pool = uu_list_pool_create("instance_pool",
3570 	    sizeof (instance_t), offsetof(instance_t, link), NULL,
3571 	    UU_LIST_POOL_DEBUG)) == NULL) {
3572 		error_msg("%s: %s",
3573 		    gettext("Failed to create instance pool"),
3574 		    uu_strerror(uu_error()));
3575 		goto failed;
3576 	}
3577 	if ((instance_list = uu_list_create(instance_pool, NULL, 0)) == NULL) {
3578 		error_msg("%s: %s",
3579 		    gettext("Failed to create instance list"),
3580 		    uu_strerror(uu_error()));
3581 		goto failed;
3582 	}
3583 
3584 	/*
3585 	 * Create event pipe to communicate events with the main event
3586 	 * loop and add it to the event loop's fdset.
3587 	 */
3588 	if (pipe(rst_event_pipe) < 0) {
3589 		error_msg("pipe: %s", strerror(errno));
3590 		goto failed;
3591 	}
3592 	/*
3593 	 * We only leave the producer end to block on reads/writes as we
3594 	 * can't afford to block in the main thread, yet need to in
3595 	 * the restarter event thread, so it can sit and wait for an
3596 	 * acknowledgement to be written to the pipe.
3597 	 */
3598 	disable_blocking(rst_event_pipe[PE_CONSUMER]);
3599 	if ((set_pollfd(rst_event_pipe[PE_CONSUMER], POLLIN)) == -1)
3600 		goto failed;
3601 
3602 	/*
3603 	 * Register with master restarter for managed service events. This
3604 	 * will fail, amongst other reasons, if inetd is already running.
3605 	 */
3606 	if ((err = restarter_bind_handle(RESTARTER_EVENT_VERSION,
3607 	    INETD_INSTANCE_FMRI, restarter_event_proxy, 0,
3608 	    &rst_event_handle)) != 0) {
3609 		error_msg(gettext(
3610 		    "Failed to register for restarter events: %s"),
3611 		    strerror(err));
3612 		goto failed;
3613 	}
3614 
3615 	if (contract_init() < 0)
3616 		goto failed;
3617 
3618 	if ((timer_queue = iu_tq_create()) == NULL) {
3619 		error_msg(gettext("Failed to create timer queue."));
3620 		goto failed;
3621 	}
3622 
3623 	if (uds_init() < 0)
3624 		goto failed;
3625 
3626 	if (method_init() < 0)
3627 		goto failed;
3628 
3629 	/* Initialize auditing session */
3630 	if (adt_start_session(&audit_handle, NULL, ADT_USE_PROC_DATA) != 0) {
3631 		error_msg(gettext("Unable to start audit session"));
3632 	}
3633 
3634 	/*
3635 	 * Initialize signal dispositions/masks
3636 	 */
3637 	(void) sigset(SIGHUP, sighup_handler);
3638 	(void) sigset(SIGTERM, sigterm_handler);
3639 	(void) sigignore(SIGINT);
3640 
3641 	return (0);
3642 
3643 failed:
3644 	fini();
3645 	return (-1);
3646 }
3647 
3648 static int
3649 start_method(void)
3650 {
3651 	int	i;
3652 	int	pipe_fds[2];
3653 	int	child;
3654 
3655 	debug_msg("ENTERING START_METHOD:");
3656 
3657 	/* Create pipe for child to notify parent of initialization success. */
3658 	if (pipe(pipe_fds) < 0) {
3659 		debug_msg("pipe: %s", strerror(errno));
3660 		return (SMF_EXIT_ERR_OTHER);
3661 	}
3662 
3663 	if ((child = fork()) == -1) {
3664 		debug_msg("fork: %s", strerror(errno));
3665 		(void) close(pipe_fds[PE_CONSUMER]);
3666 		(void) close(pipe_fds[PE_PRODUCER]);
3667 		return (SMF_EXIT_ERR_OTHER);
3668 	} else if (child > 0) {			/* parent */
3669 
3670 		/* Wait on child to return success of initialization. */
3671 		(void) close(pipe_fds[PE_PRODUCER]);
3672 		if ((safe_read(pipe_fds[PE_CONSUMER], &i, sizeof (i)) != 0) ||
3673 		    (i < 0)) {
3674 			error_msg(gettext(
3675 			    "Initialization failed, unable to start"));
3676 			(void) close(pipe_fds[PE_CONSUMER]);
3677 			/*
3678 			 * Batch all initialization errors as 'other' errors,
3679 			 * resulting in retries being attempted.
3680 			 */
3681 			return (SMF_EXIT_ERR_OTHER);
3682 		} else {
3683 			(void) close(pipe_fds[PE_CONSUMER]);
3684 			return (SMF_EXIT_OK);
3685 		}
3686 	} else {				/* child */
3687 		/*
3688 		 * Perform initialization and return success code down
3689 		 * the pipe.
3690 		 */
3691 		(void) close(pipe_fds[PE_CONSUMER]);
3692 		i = init();
3693 		if ((safe_write(pipe_fds[PE_PRODUCER], &i, sizeof (i)) < 0) ||
3694 		    (i < 0)) {
3695 			error_msg(gettext("pipe write failure: %s"),
3696 			    strerror(errno));
3697 			exit(1);
3698 		}
3699 		(void) close(pipe_fds[PE_PRODUCER]);
3700 
3701 		(void) setsid();
3702 
3703 		/*
3704 		 * Log a message if the configuration file has changed since
3705 		 * inetconv was last run.
3706 		 */
3707 		check_conf_file();
3708 
3709 		event_loop();
3710 
3711 		fini();
3712 		debug_msg("inetd stopped");
3713 		msg_fini();
3714 		exit(0);
3715 	}
3716 	/* NOTREACHED */
3717 }
3718 
3719 /*
3720  * When inetd is run from outside the SMF, this message is output to provide
3721  * the person invoking inetd with further information that will help them
3722  * understand how to start and stop inetd, and to achieve the other
3723  * behaviors achievable with the legacy inetd command line interface, if
3724  * it is possible.
3725  */
3726 static void
3727 legacy_usage(void)
3728 {
3729 	(void) fprintf(stderr,
3730 	    "inetd is now an smf(5) managed service and can no longer be run "
3731 	    "from the\n"
3732 	    "command line. To enable or disable inetd refer to svcadm(1M) on\n"
3733 	    "how to enable \"%s\", the inetd instance.\n"
3734 	    "\n"
3735 	    "The traditional inetd command line option mappings are:\n"
3736 	    "\t-d : there is no supported debug output\n"
3737 	    "\t-s : inetd is only runnable from within the SMF\n"
3738 	    "\t-t : See inetadm(1M) on how to enable TCP tracing\n"
3739 	    "\t-r : See inetadm(1M) on how to set a failure rate\n"
3740 	    "\n"
3741 	    "To specify an alternative configuration file see svccfg(1M)\n"
3742 	    "for how to modify the \"%s/%s\" string type property of\n"
3743 	    "the inetd instance, and modify it according to the syntax:\n"
3744 	    "\"%s [alt_config_file] %%m\".\n"
3745 	    "\n"
3746 	    "For further information on inetd see inetd(1M).\n",
3747 	    INETD_INSTANCE_FMRI, START_METHOD_ARG, SCF_PROPERTY_EXEC,
3748 	    INETD_PATH);
3749 }
3750 
3751 /*
3752  * Usage message printed out for usage errors when running under the SMF.
3753  */
3754 static void
3755 smf_usage(const char *arg0)
3756 {
3757 	error_msg("Usage: %s [alt_conf_file] %s|%s|%s", arg0, START_METHOD_ARG,
3758 	    STOP_METHOD_ARG, REFRESH_METHOD_ARG);
3759 }
3760 
3761 /*
3762  * Returns B_TRUE if we're being run from within the SMF, else B_FALSE.
3763  */
3764 static boolean_t
3765 run_through_smf(void)
3766 {
3767 	char *fmri;
3768 
3769 	/*
3770 	 * check if the instance fmri environment variable has been set by
3771 	 * our restarter.
3772 	 */
3773 	return (((fmri = getenv("SMF_FMRI")) != NULL) &&
3774 	    (strcmp(fmri, INETD_INSTANCE_FMRI) == 0));
3775 }
3776 
3777 int
3778 main(int argc, char *argv[])
3779 {
3780 	char		*method;
3781 	int		ret;
3782 
3783 #if	!defined(TEXT_DOMAIN)
3784 #define	TEXT_DOMAIN "SYS_TEST"
3785 #endif
3786 	(void) textdomain(TEXT_DOMAIN);
3787 	(void) setlocale(LC_ALL, "");
3788 
3789 	if (!run_through_smf()) {
3790 		legacy_usage();
3791 		return (SMF_EXIT_ERR_NOSMF);
3792 	}
3793 
3794 	msg_init();	/* setup logging */
3795 
3796 	(void) enable_extended_FILE_stdio(-1, -1);
3797 
3798 	/* inetd invocation syntax is inetd [alt_conf_file] method_name */
3799 
3800 	switch (argc) {
3801 	case 2:
3802 		method = argv[1];
3803 		break;
3804 	case 3:
3805 		conf_file = argv[1];
3806 		method = argv[2];
3807 		break;
3808 	default:
3809 		smf_usage(argv[0]);
3810 		return (SMF_EXIT_ERR_CONFIG);
3811 
3812 	}
3813 
3814 	if (strcmp(method, START_METHOD_ARG) == 0) {
3815 		ret = start_method();
3816 	} else if (strcmp(method, STOP_METHOD_ARG) == 0) {
3817 		ret = stop_method();
3818 	} else if (strcmp(method, REFRESH_METHOD_ARG) == 0) {
3819 		ret = refresh_method();
3820 	} else {
3821 		smf_usage(argv[0]);
3822 		return (SMF_EXIT_ERR_CONFIG);
3823 	}
3824 
3825 	return (ret);
3826 }
3827