xref: /titanic_52/usr/src/cmd/svc/startd/restarter.c (revision 3dd94f79268fa1debdd48a44e49c9958fcbad2eb)
17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate  * CDDL HEADER START
37c478bd9Sstevel@tonic-gate  *
47c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
521d7f835Sgm149974  * Common Development and Distribution License (the "License").
621d7f835Sgm149974  * You may not use this file except in compliance with the License.
77c478bd9Sstevel@tonic-gate  *
87c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
107c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
117c478bd9Sstevel@tonic-gate  * and limitations under the License.
127c478bd9Sstevel@tonic-gate  *
137c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
147c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
167c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
177c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bd9Sstevel@tonic-gate  *
197c478bd9Sstevel@tonic-gate  * CDDL HEADER END
207c478bd9Sstevel@tonic-gate  */
2153f3aea0SRoger A. Faulkner 
227c478bd9Sstevel@tonic-gate /*
238b55d351SSean Wilcox  * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
247c478bd9Sstevel@tonic-gate  */
257c478bd9Sstevel@tonic-gate 
267c478bd9Sstevel@tonic-gate /*
277c478bd9Sstevel@tonic-gate  * restarter.c - service manipulation
287c478bd9Sstevel@tonic-gate  *
297c478bd9Sstevel@tonic-gate  * This component manages services whose restarter is svc.startd, the standard
307c478bd9Sstevel@tonic-gate  * restarter.  It translates restarter protocol events from the graph engine
317c478bd9Sstevel@tonic-gate  * into actions on processes, as a delegated restarter would do.
327c478bd9Sstevel@tonic-gate  *
337c478bd9Sstevel@tonic-gate  * The master restarter manages a number of always-running threads:
347c478bd9Sstevel@tonic-gate  *   - restarter event thread: events from the graph engine
357c478bd9Sstevel@tonic-gate  *   - timeout thread: thread to fire queued timeouts
367c478bd9Sstevel@tonic-gate  *   - contract thread: thread to handle contract events
377c478bd9Sstevel@tonic-gate  *   - wait thread: thread to handle wait-based services
387c478bd9Sstevel@tonic-gate  *
397c478bd9Sstevel@tonic-gate  * The other threads are created as-needed:
407c478bd9Sstevel@tonic-gate  *   - per-instance method threads
417c478bd9Sstevel@tonic-gate  *   - per-instance event processing threads
427c478bd9Sstevel@tonic-gate  *
437c478bd9Sstevel@tonic-gate  * The interaction of all threads must result in the following conditions
447c478bd9Sstevel@tonic-gate  * being satisfied (on a per-instance basis):
457c478bd9Sstevel@tonic-gate  *   - restarter events must be processed in order
467c478bd9Sstevel@tonic-gate  *   - method execution must be serialized
477c478bd9Sstevel@tonic-gate  *   - instance delete must be held until outstanding methods are complete
487c478bd9Sstevel@tonic-gate  *   - contract events shouldn't be processed while a method is running
497c478bd9Sstevel@tonic-gate  *   - timeouts should fire even when a method is running
507c478bd9Sstevel@tonic-gate  *
517c478bd9Sstevel@tonic-gate  * Service instances are represented by restarter_inst_t's and are kept in the
527c478bd9Sstevel@tonic-gate  * instance_list list.
537c478bd9Sstevel@tonic-gate  *
547c478bd9Sstevel@tonic-gate  * Service States
557c478bd9Sstevel@tonic-gate  *   The current state of a service instance is kept in
567c478bd9Sstevel@tonic-gate  *   restarter_inst_t->ri_i.i_state.  If transition to a new state could take
577c478bd9Sstevel@tonic-gate  *   some time, then before we effect the transition we set
587c478bd9Sstevel@tonic-gate  *   restarter_inst_t->ri_i.i_next_state to the target state, and afterwards we
597c478bd9Sstevel@tonic-gate  *   rotate i_next_state to i_state and set i_next_state to
607c478bd9Sstevel@tonic-gate  *   RESTARTER_STATE_NONE.  So usually i_next_state is _NONE when ri_lock is not
617c478bd9Sstevel@tonic-gate  *   held.  The exception is when we launch methods, which are done with
627c478bd9Sstevel@tonic-gate  *   a separate thread.  To keep any other threads from grabbing ri_lock before
637c478bd9Sstevel@tonic-gate  *   method_thread() does, we set ri_method_thread to the thread id of the
647c478bd9Sstevel@tonic-gate  *   method thread, and when it is nonzero any thread with a different thread id
657c478bd9Sstevel@tonic-gate  *   waits on ri_method_cv.
667c478bd9Sstevel@tonic-gate  *
677c478bd9Sstevel@tonic-gate  * Method execution is serialized by blocking on ri_method_cv in
687c478bd9Sstevel@tonic-gate  * inst_lookup_by_id() and waiting for a 0 value of ri_method_thread.  This
697c478bd9Sstevel@tonic-gate  * also prevents the instance structure from being deleted until all
707c478bd9Sstevel@tonic-gate  * outstanding operations such as method_thread() have finished.
717c478bd9Sstevel@tonic-gate  *
727c478bd9Sstevel@tonic-gate  * Lock ordering:
737c478bd9Sstevel@tonic-gate  *
747c478bd9Sstevel@tonic-gate  * dgraph_lock [can be held when taking:]
757c478bd9Sstevel@tonic-gate  *   utmpx_lock
767c478bd9Sstevel@tonic-gate  *   dictionary->dict_lock
777c478bd9Sstevel@tonic-gate  *   st->st_load_lock
787c478bd9Sstevel@tonic-gate  *   wait_info_lock
797c478bd9Sstevel@tonic-gate  *   ru->restarter_update_lock
807c478bd9Sstevel@tonic-gate  *     restarter_queue->rpeq_lock
817c478bd9Sstevel@tonic-gate  *   instance_list.ril_lock
827c478bd9Sstevel@tonic-gate  *     inst->ri_lock
837c478bd9Sstevel@tonic-gate  *   st->st_configd_live_lock
847c478bd9Sstevel@tonic-gate  *
857c478bd9Sstevel@tonic-gate  * instance_list.ril_lock
867c478bd9Sstevel@tonic-gate  *   graph_queue->gpeq_lock
877c478bd9Sstevel@tonic-gate  *   gu->gu_lock
887c478bd9Sstevel@tonic-gate  *   st->st_configd_live_lock
897c478bd9Sstevel@tonic-gate  *   dictionary->dict_lock
907c478bd9Sstevel@tonic-gate  *   inst->ri_lock
917c478bd9Sstevel@tonic-gate  *     graph_queue->gpeq_lock
927c478bd9Sstevel@tonic-gate  *     gu->gu_lock
937c478bd9Sstevel@tonic-gate  *     tu->tu_lock
947c478bd9Sstevel@tonic-gate  *     tq->tq_lock
957c478bd9Sstevel@tonic-gate  *     inst->ri_queue_lock
967c478bd9Sstevel@tonic-gate  *       wait_info_lock
977c478bd9Sstevel@tonic-gate  *       bp->cb_lock
987c478bd9Sstevel@tonic-gate  *     utmpx_lock
997c478bd9Sstevel@tonic-gate  *
1007c478bd9Sstevel@tonic-gate  * single_user_thread_lock
1017c478bd9Sstevel@tonic-gate  *   wait_info_lock
1027c478bd9Sstevel@tonic-gate  *   utmpx_lock
1037c478bd9Sstevel@tonic-gate  *
1047c478bd9Sstevel@tonic-gate  * gu_freeze_lock
1057c478bd9Sstevel@tonic-gate  *
1067c478bd9Sstevel@tonic-gate  * logbuf_mutex nests inside pretty much everything.
1077c478bd9Sstevel@tonic-gate  */
1087c478bd9Sstevel@tonic-gate 
1097c478bd9Sstevel@tonic-gate #include <sys/contract/process.h>
1107c478bd9Sstevel@tonic-gate #include <sys/ctfs.h>
1117c478bd9Sstevel@tonic-gate #include <sys/stat.h>
1127c478bd9Sstevel@tonic-gate #include <sys/time.h>
1137c478bd9Sstevel@tonic-gate #include <sys/types.h>
1147c478bd9Sstevel@tonic-gate #include <sys/uio.h>
1157c478bd9Sstevel@tonic-gate #include <sys/wait.h>
1167c478bd9Sstevel@tonic-gate #include <assert.h>
1177c478bd9Sstevel@tonic-gate #include <errno.h>
1187c478bd9Sstevel@tonic-gate #include <fcntl.h>
1197c478bd9Sstevel@tonic-gate #include <libcontract.h>
1207c478bd9Sstevel@tonic-gate #include <libcontract_priv.h>
1217c478bd9Sstevel@tonic-gate #include <libintl.h>
1227c478bd9Sstevel@tonic-gate #include <librestart.h>
1237c478bd9Sstevel@tonic-gate #include <librestart_priv.h>
1247c478bd9Sstevel@tonic-gate #include <libuutil.h>
1257c478bd9Sstevel@tonic-gate #include <limits.h>
1267c478bd9Sstevel@tonic-gate #include <poll.h>
1277c478bd9Sstevel@tonic-gate #include <port.h>
1287c478bd9Sstevel@tonic-gate #include <pthread.h>
1297c478bd9Sstevel@tonic-gate #include <stdarg.h>
1307c478bd9Sstevel@tonic-gate #include <stdio.h>
1317c478bd9Sstevel@tonic-gate #include <strings.h>
1327c478bd9Sstevel@tonic-gate #include <unistd.h>
1337c478bd9Sstevel@tonic-gate 
1347c478bd9Sstevel@tonic-gate #include "startd.h"
1357c478bd9Sstevel@tonic-gate #include "protocol.h"
1367c478bd9Sstevel@tonic-gate 
1377c478bd9Sstevel@tonic-gate static uu_list_pool_t *restarter_instance_pool;
1387c478bd9Sstevel@tonic-gate static restarter_instance_list_t instance_list;
1397c478bd9Sstevel@tonic-gate 
1407c478bd9Sstevel@tonic-gate static uu_list_pool_t *restarter_queue_pool;
1417c478bd9Sstevel@tonic-gate 
14216ba0facSSean Wilcox /*
14316ba0facSSean Wilcox  * Function used to reset the restart times for an instance, when
14416ba0facSSean Wilcox  * an administrative task comes along and essentially makes the times
14516ba0facSSean Wilcox  * in this array ineffective.
14616ba0facSSean Wilcox  */
14716ba0facSSean Wilcox static void
14816ba0facSSean Wilcox reset_start_times(restarter_inst_t *inst)
14916ba0facSSean Wilcox {
15016ba0facSSean Wilcox 	inst->ri_start_index = 0;
15116ba0facSSean Wilcox 	bzero(inst->ri_start_time, sizeof (inst->ri_start_time));
15216ba0facSSean Wilcox }
15316ba0facSSean Wilcox 
1547c478bd9Sstevel@tonic-gate /*ARGSUSED*/
1557c478bd9Sstevel@tonic-gate static int
1567c478bd9Sstevel@tonic-gate restarter_instance_compare(const void *lc_arg, const void *rc_arg,
1577c478bd9Sstevel@tonic-gate     void *private)
1587c478bd9Sstevel@tonic-gate {
1597c478bd9Sstevel@tonic-gate 	int lc_id = ((const restarter_inst_t *)lc_arg)->ri_id;
1607c478bd9Sstevel@tonic-gate 	int rc_id = *(int *)rc_arg;
1617c478bd9Sstevel@tonic-gate 
1627c478bd9Sstevel@tonic-gate 	if (lc_id > rc_id)
1637c478bd9Sstevel@tonic-gate 		return (1);
1647c478bd9Sstevel@tonic-gate 	if (lc_id < rc_id)
1657c478bd9Sstevel@tonic-gate 		return (-1);
1667c478bd9Sstevel@tonic-gate 	return (0);
1677c478bd9Sstevel@tonic-gate }
1687c478bd9Sstevel@tonic-gate 
1697c478bd9Sstevel@tonic-gate static restarter_inst_t *
1707c478bd9Sstevel@tonic-gate inst_lookup_by_name(const char *name)
1717c478bd9Sstevel@tonic-gate {
1727c478bd9Sstevel@tonic-gate 	int id;
1737c478bd9Sstevel@tonic-gate 
1747c478bd9Sstevel@tonic-gate 	id = dict_lookup_byname(name);
1757c478bd9Sstevel@tonic-gate 	if (id == -1)
1767c478bd9Sstevel@tonic-gate 		return (NULL);
1777c478bd9Sstevel@tonic-gate 
1787c478bd9Sstevel@tonic-gate 	return (inst_lookup_by_id(id));
1797c478bd9Sstevel@tonic-gate }
1807c478bd9Sstevel@tonic-gate 
1817c478bd9Sstevel@tonic-gate restarter_inst_t *
1827c478bd9Sstevel@tonic-gate inst_lookup_by_id(int id)
1837c478bd9Sstevel@tonic-gate {
1847c478bd9Sstevel@tonic-gate 	restarter_inst_t *inst;
1857c478bd9Sstevel@tonic-gate 
1867c478bd9Sstevel@tonic-gate 	MUTEX_LOCK(&instance_list.ril_lock);
1877c478bd9Sstevel@tonic-gate 	inst = uu_list_find(instance_list.ril_instance_list, &id, NULL, NULL);
1887c478bd9Sstevel@tonic-gate 	if (inst != NULL)
1897c478bd9Sstevel@tonic-gate 		MUTEX_LOCK(&inst->ri_lock);
1907c478bd9Sstevel@tonic-gate 	MUTEX_UNLOCK(&instance_list.ril_lock);
1917c478bd9Sstevel@tonic-gate 
1927c478bd9Sstevel@tonic-gate 	if (inst != NULL) {
1937c478bd9Sstevel@tonic-gate 		while (inst->ri_method_thread != 0 &&
1947c478bd9Sstevel@tonic-gate 		    !pthread_equal(inst->ri_method_thread, pthread_self())) {
1957c478bd9Sstevel@tonic-gate 			++inst->ri_method_waiters;
1967c478bd9Sstevel@tonic-gate 			(void) pthread_cond_wait(&inst->ri_method_cv,
1977c478bd9Sstevel@tonic-gate 			    &inst->ri_lock);
1987c478bd9Sstevel@tonic-gate 			assert(inst->ri_method_waiters > 0);
1997c478bd9Sstevel@tonic-gate 			--inst->ri_method_waiters;
2007c478bd9Sstevel@tonic-gate 		}
2017c478bd9Sstevel@tonic-gate 	}
2027c478bd9Sstevel@tonic-gate 
2037c478bd9Sstevel@tonic-gate 	return (inst);
2047c478bd9Sstevel@tonic-gate }
2057c478bd9Sstevel@tonic-gate 
2067c478bd9Sstevel@tonic-gate static restarter_inst_t *
2077c478bd9Sstevel@tonic-gate inst_lookup_queue(const char *name)
2087c478bd9Sstevel@tonic-gate {
2097c478bd9Sstevel@tonic-gate 	int id;
2107c478bd9Sstevel@tonic-gate 	restarter_inst_t *inst;
2117c478bd9Sstevel@tonic-gate 
2127c478bd9Sstevel@tonic-gate 	id = dict_lookup_byname(name);
2137c478bd9Sstevel@tonic-gate 	if (id == -1)
2147c478bd9Sstevel@tonic-gate 		return (NULL);
2157c478bd9Sstevel@tonic-gate 
2167c478bd9Sstevel@tonic-gate 	MUTEX_LOCK(&instance_list.ril_lock);
2177c478bd9Sstevel@tonic-gate 	inst = uu_list_find(instance_list.ril_instance_list, &id, NULL, NULL);
2187c478bd9Sstevel@tonic-gate 	if (inst != NULL)
2197c478bd9Sstevel@tonic-gate 		MUTEX_LOCK(&inst->ri_queue_lock);
2207c478bd9Sstevel@tonic-gate 	MUTEX_UNLOCK(&instance_list.ril_lock);
2217c478bd9Sstevel@tonic-gate 
2227c478bd9Sstevel@tonic-gate 	return (inst);
2237c478bd9Sstevel@tonic-gate }
2247c478bd9Sstevel@tonic-gate 
2257c478bd9Sstevel@tonic-gate const char *
2267c478bd9Sstevel@tonic-gate service_style(int flags)
2277c478bd9Sstevel@tonic-gate {
2287c478bd9Sstevel@tonic-gate 	switch (flags & RINST_STYLE_MASK) {
2297c478bd9Sstevel@tonic-gate 	case RINST_CONTRACT:	return ("contract");
2307c478bd9Sstevel@tonic-gate 	case RINST_TRANSIENT:	return ("transient");
2317c478bd9Sstevel@tonic-gate 	case RINST_WAIT:	return ("wait");
2327c478bd9Sstevel@tonic-gate 
2337c478bd9Sstevel@tonic-gate 	default:
2347c478bd9Sstevel@tonic-gate #ifndef NDEBUG
2357c478bd9Sstevel@tonic-gate 		uu_warn("%s:%d: Bad flags 0x%x.\n", __FILE__, __LINE__, flags);
2367c478bd9Sstevel@tonic-gate #endif
2377c478bd9Sstevel@tonic-gate 		abort();
2387c478bd9Sstevel@tonic-gate 		/* NOTREACHED */
2397c478bd9Sstevel@tonic-gate 	}
2407c478bd9Sstevel@tonic-gate }
2417c478bd9Sstevel@tonic-gate 
2427c478bd9Sstevel@tonic-gate /*
2437c478bd9Sstevel@tonic-gate  * Fails with ECONNABORTED or ECANCELED.
2447c478bd9Sstevel@tonic-gate  */
2457c478bd9Sstevel@tonic-gate static int
2467c478bd9Sstevel@tonic-gate check_contract(restarter_inst_t *inst, boolean_t primary,
2477c478bd9Sstevel@tonic-gate     scf_instance_t *scf_inst)
2487c478bd9Sstevel@tonic-gate {
2497c478bd9Sstevel@tonic-gate 	ctid_t *ctidp;
2507c478bd9Sstevel@tonic-gate 	int fd, r;
2517c478bd9Sstevel@tonic-gate 
2527c478bd9Sstevel@tonic-gate 	ctidp = primary ? &inst->ri_i.i_primary_ctid :
2537c478bd9Sstevel@tonic-gate 	    &inst->ri_i.i_transient_ctid;
2547c478bd9Sstevel@tonic-gate 
2557c478bd9Sstevel@tonic-gate 	assert(*ctidp >= 1);
2567c478bd9Sstevel@tonic-gate 
2577c478bd9Sstevel@tonic-gate 	fd = contract_open(*ctidp, NULL, "status", O_RDONLY);
2587c478bd9Sstevel@tonic-gate 	if (fd >= 0) {
2597c478bd9Sstevel@tonic-gate 		r = close(fd);
2607c478bd9Sstevel@tonic-gate 		assert(r == 0);
2617c478bd9Sstevel@tonic-gate 		return (0);
2627c478bd9Sstevel@tonic-gate 	}
2637c478bd9Sstevel@tonic-gate 
2647c478bd9Sstevel@tonic-gate 	r = restarter_remove_contract(scf_inst, *ctidp, primary ?
2657c478bd9Sstevel@tonic-gate 	    RESTARTER_CONTRACT_PRIMARY : RESTARTER_CONTRACT_TRANSIENT);
2667c478bd9Sstevel@tonic-gate 	switch (r) {
2677c478bd9Sstevel@tonic-gate 	case 0:
2687c478bd9Sstevel@tonic-gate 	case ECONNABORTED:
2697c478bd9Sstevel@tonic-gate 	case ECANCELED:
2707c478bd9Sstevel@tonic-gate 		*ctidp = 0;
2717c478bd9Sstevel@tonic-gate 		return (r);
2727c478bd9Sstevel@tonic-gate 
2737c478bd9Sstevel@tonic-gate 	case ENOMEM:
2747c478bd9Sstevel@tonic-gate 		uu_die("Out of memory\n");
2757c478bd9Sstevel@tonic-gate 		/* NOTREACHED */
2767c478bd9Sstevel@tonic-gate 
2777c478bd9Sstevel@tonic-gate 	case EPERM:
2787c478bd9Sstevel@tonic-gate 		uu_die("Insufficient privilege.\n");
2797c478bd9Sstevel@tonic-gate 		/* NOTREACHED */
2807c478bd9Sstevel@tonic-gate 
2817c478bd9Sstevel@tonic-gate 	case EACCES:
2827c478bd9Sstevel@tonic-gate 		uu_die("Repository backend access denied.\n");
2837c478bd9Sstevel@tonic-gate 		/* NOTREACHED */
2847c478bd9Sstevel@tonic-gate 
2857c478bd9Sstevel@tonic-gate 	case EROFS:
2867c478bd9Sstevel@tonic-gate 		log_error(LOG_INFO, "Could not remove unusable contract id %ld "
2877c478bd9Sstevel@tonic-gate 		    "for %s from repository.\n", *ctidp, inst->ri_i.i_fmri);
2887c478bd9Sstevel@tonic-gate 		return (0);
2897c478bd9Sstevel@tonic-gate 
2907c478bd9Sstevel@tonic-gate 	case EINVAL:
2917c478bd9Sstevel@tonic-gate 	case EBADF:
2927c478bd9Sstevel@tonic-gate 	default:
2937c478bd9Sstevel@tonic-gate 		assert(0);
2947c478bd9Sstevel@tonic-gate 		abort();
2957c478bd9Sstevel@tonic-gate 		/* NOTREACHED */
2967c478bd9Sstevel@tonic-gate 	}
2977c478bd9Sstevel@tonic-gate }
2987c478bd9Sstevel@tonic-gate 
2997c478bd9Sstevel@tonic-gate static int stop_instance(scf_handle_t *, restarter_inst_t *, stop_cause_t);
3007c478bd9Sstevel@tonic-gate 
3017c478bd9Sstevel@tonic-gate /*
3027c478bd9Sstevel@tonic-gate  * int restarter_insert_inst(scf_handle_t *, char *)
3037c478bd9Sstevel@tonic-gate  *   If the inst is already in the restarter list, return its id.  If the inst
3047c478bd9Sstevel@tonic-gate  *   is not in the restarter list, initialize a restarter_inst_t, initialize its
3057c478bd9Sstevel@tonic-gate  *   states, insert it into the list, and return 0.
3067c478bd9Sstevel@tonic-gate  *
3077c478bd9Sstevel@tonic-gate  *   Fails with
3087c478bd9Sstevel@tonic-gate  *     ENOENT - name is not in the repository
3097c478bd9Sstevel@tonic-gate  */
3107c478bd9Sstevel@tonic-gate static int
3117c478bd9Sstevel@tonic-gate restarter_insert_inst(scf_handle_t *h, const char *name)
3127c478bd9Sstevel@tonic-gate {
3137c478bd9Sstevel@tonic-gate 	int id, r;
3147c478bd9Sstevel@tonic-gate 	restarter_inst_t *inst;
3157c478bd9Sstevel@tonic-gate 	uu_list_index_t idx;
3167c478bd9Sstevel@tonic-gate 	scf_service_t *scf_svc;
3177c478bd9Sstevel@tonic-gate 	scf_instance_t *scf_inst;
31892175b8eSrm88369 	scf_snapshot_t *snap = NULL;
3197c478bd9Sstevel@tonic-gate 	scf_propertygroup_t *pg;
3207c478bd9Sstevel@tonic-gate 	char *svc_name, *inst_name;
3217c478bd9Sstevel@tonic-gate 	char logfilebuf[PATH_MAX];
3227c478bd9Sstevel@tonic-gate 	char *c;
3237c478bd9Sstevel@tonic-gate 	boolean_t do_commit_states;
3247c478bd9Sstevel@tonic-gate 	restarter_instance_state_t state, next_state;
3257c478bd9Sstevel@tonic-gate 	protocol_states_t *ps;
3267c478bd9Sstevel@tonic-gate 	pid_t start_pid;
327f6e214c7SGavin Maltby 	restarter_str_t reason = restarter_str_insert_in_graph;
3287c478bd9Sstevel@tonic-gate 
3297c478bd9Sstevel@tonic-gate 	MUTEX_LOCK(&instance_list.ril_lock);
3307c478bd9Sstevel@tonic-gate 
3317c478bd9Sstevel@tonic-gate 	/*
3327c478bd9Sstevel@tonic-gate 	 * We don't use inst_lookup_by_name() here because we want the lookup
3337c478bd9Sstevel@tonic-gate 	 * & insert to be atomic.
3347c478bd9Sstevel@tonic-gate 	 */
3357c478bd9Sstevel@tonic-gate 	id = dict_lookup_byname(name);
3367c478bd9Sstevel@tonic-gate 	if (id != -1) {
3377c478bd9Sstevel@tonic-gate 		inst = uu_list_find(instance_list.ril_instance_list, &id, NULL,
3387c478bd9Sstevel@tonic-gate 		    &idx);
3397c478bd9Sstevel@tonic-gate 		if (inst != NULL) {
3407c478bd9Sstevel@tonic-gate 			MUTEX_UNLOCK(&instance_list.ril_lock);
3417c478bd9Sstevel@tonic-gate 			return (0);
3427c478bd9Sstevel@tonic-gate 		}
3437c478bd9Sstevel@tonic-gate 	}
3447c478bd9Sstevel@tonic-gate 
3457c478bd9Sstevel@tonic-gate 	/* Allocate an instance */
3467c478bd9Sstevel@tonic-gate 	inst = startd_zalloc(sizeof (restarter_inst_t));
3477c478bd9Sstevel@tonic-gate 	inst->ri_utmpx_prefix = startd_alloc(max_scf_value_size);
3487c478bd9Sstevel@tonic-gate 	inst->ri_utmpx_prefix[0] = '\0';
3497c478bd9Sstevel@tonic-gate 
3507c478bd9Sstevel@tonic-gate 	inst->ri_i.i_fmri = startd_alloc(strlen(name) + 1);
3517c478bd9Sstevel@tonic-gate 	(void) strcpy((char *)inst->ri_i.i_fmri, name);
3527c478bd9Sstevel@tonic-gate 
3537c478bd9Sstevel@tonic-gate 	inst->ri_queue = startd_list_create(restarter_queue_pool, inst, 0);
3547c478bd9Sstevel@tonic-gate 
3557c478bd9Sstevel@tonic-gate 	/*
3567c478bd9Sstevel@tonic-gate 	 * id shouldn't be -1 since we use the same dictionary as graph.c, but
3577c478bd9Sstevel@tonic-gate 	 * just in case.
3587c478bd9Sstevel@tonic-gate 	 */
3597c478bd9Sstevel@tonic-gate 	inst->ri_id = (id != -1 ? id : dict_insert(name));
3607c478bd9Sstevel@tonic-gate 
3617c478bd9Sstevel@tonic-gate 	special_online_hooks_get(name, &inst->ri_pre_online_hook,
3627c478bd9Sstevel@tonic-gate 	    &inst->ri_post_online_hook, &inst->ri_post_offline_hook);
3637c478bd9Sstevel@tonic-gate 
3647c478bd9Sstevel@tonic-gate 	scf_svc = safe_scf_service_create(h);
3657c478bd9Sstevel@tonic-gate 	scf_inst = safe_scf_instance_create(h);
3667c478bd9Sstevel@tonic-gate 	pg = safe_scf_pg_create(h);
3677c478bd9Sstevel@tonic-gate 	svc_name = startd_alloc(max_scf_name_size);
3687c478bd9Sstevel@tonic-gate 	inst_name = startd_alloc(max_scf_name_size);
3697c478bd9Sstevel@tonic-gate 
3707c478bd9Sstevel@tonic-gate rep_retry:
37192175b8eSrm88369 	if (snap != NULL)
37292175b8eSrm88369 		scf_snapshot_destroy(snap);
37392175b8eSrm88369 	if (inst->ri_logstem != NULL)
37492175b8eSrm88369 		startd_free(inst->ri_logstem, PATH_MAX);
37592175b8eSrm88369 	if (inst->ri_common_name != NULL)
376*3dd94f79SBryan Cantrill 		startd_free(inst->ri_common_name,
377*3dd94f79SBryan Cantrill 		    strlen(inst->ri_common_name) + 1);
37892175b8eSrm88369 	if (inst->ri_C_common_name != NULL)
379*3dd94f79SBryan Cantrill 		startd_free(inst->ri_C_common_name,
380*3dd94f79SBryan Cantrill 		    strlen(inst->ri_C_common_name) + 1);
38192175b8eSrm88369 	snap = NULL;
38292175b8eSrm88369 	inst->ri_logstem = NULL;
38392175b8eSrm88369 	inst->ri_common_name = NULL;
38492175b8eSrm88369 	inst->ri_C_common_name = NULL;
38592175b8eSrm88369 
3867c478bd9Sstevel@tonic-gate 	if (scf_handle_decode_fmri(h, name, NULL, scf_svc, scf_inst, NULL,
3877c478bd9Sstevel@tonic-gate 	    NULL, SCF_DECODE_FMRI_EXACT) != 0) {
3887c478bd9Sstevel@tonic-gate 		switch (scf_error()) {
3897c478bd9Sstevel@tonic-gate 		case SCF_ERROR_CONNECTION_BROKEN:
3907c478bd9Sstevel@tonic-gate 			libscf_handle_rebind(h);
3917c478bd9Sstevel@tonic-gate 			goto rep_retry;
3927c478bd9Sstevel@tonic-gate 
3937c478bd9Sstevel@tonic-gate 		case SCF_ERROR_NOT_FOUND:
39492175b8eSrm88369 			goto deleted;
3957c478bd9Sstevel@tonic-gate 		}
3967c478bd9Sstevel@tonic-gate 
3977c478bd9Sstevel@tonic-gate 		uu_die("Can't decode FMRI %s: %s\n", name,
3987c478bd9Sstevel@tonic-gate 		    scf_strerror(scf_error()));
3997c478bd9Sstevel@tonic-gate 	}
4007c478bd9Sstevel@tonic-gate 
4017c478bd9Sstevel@tonic-gate 	/*
4027c478bd9Sstevel@tonic-gate 	 * If there's no running snapshot, then we execute using the editing
4037c478bd9Sstevel@tonic-gate 	 * snapshot.  Pending snapshots will be taken later.
4047c478bd9Sstevel@tonic-gate 	 */
4057c478bd9Sstevel@tonic-gate 	snap = libscf_get_running_snapshot(scf_inst);
4067c478bd9Sstevel@tonic-gate 
4077c478bd9Sstevel@tonic-gate 	if ((scf_service_get_name(scf_svc, svc_name, max_scf_name_size) < 0) ||
4087c478bd9Sstevel@tonic-gate 	    (scf_instance_get_name(scf_inst, inst_name, max_scf_name_size) <
4097c478bd9Sstevel@tonic-gate 	    0)) {
4107c478bd9Sstevel@tonic-gate 		switch (scf_error()) {
4117c478bd9Sstevel@tonic-gate 		case SCF_ERROR_NOT_SET:
4127c478bd9Sstevel@tonic-gate 			break;
4137c478bd9Sstevel@tonic-gate 
4147c478bd9Sstevel@tonic-gate 		case SCF_ERROR_CONNECTION_BROKEN:
4157c478bd9Sstevel@tonic-gate 			libscf_handle_rebind(h);
4167c478bd9Sstevel@tonic-gate 			goto rep_retry;
4177c478bd9Sstevel@tonic-gate 
4187c478bd9Sstevel@tonic-gate 		default:
4197c478bd9Sstevel@tonic-gate 			assert(0);
4207c478bd9Sstevel@tonic-gate 			abort();
4217c478bd9Sstevel@tonic-gate 		}
4227c478bd9Sstevel@tonic-gate 
4237c478bd9Sstevel@tonic-gate 		goto deleted;
4247c478bd9Sstevel@tonic-gate 	}
4257c478bd9Sstevel@tonic-gate 
4261855af6bSlianep 	(void) snprintf(logfilebuf, PATH_MAX, "%s:%s", svc_name, inst_name);
4271855af6bSlianep 	for (c = logfilebuf; *c != '\0'; c++)
4281855af6bSlianep 		if (*c == '/')
4291855af6bSlianep 			*c = '-';
4301855af6bSlianep 
4311855af6bSlianep 	inst->ri_logstem = startd_alloc(PATH_MAX);
4321855af6bSlianep 	(void) snprintf(inst->ri_logstem, PATH_MAX, "%s%s", logfilebuf,
4331855af6bSlianep 	    LOG_SUFFIX);
4341855af6bSlianep 
4357c478bd9Sstevel@tonic-gate 	/*
4367c478bd9Sstevel@tonic-gate 	 * If the restarter group is missing, use uninit/none.  Otherwise,
4377c478bd9Sstevel@tonic-gate 	 * we're probably being restarted & don't want to mess up the states
4387c478bd9Sstevel@tonic-gate 	 * that are there.
4397c478bd9Sstevel@tonic-gate 	 */
4407c478bd9Sstevel@tonic-gate 	state = RESTARTER_STATE_UNINIT;
4417c478bd9Sstevel@tonic-gate 	next_state = RESTARTER_STATE_NONE;
4427c478bd9Sstevel@tonic-gate 
4437c478bd9Sstevel@tonic-gate 	r = scf_instance_get_pg(scf_inst, SCF_PG_RESTARTER, pg);
4447c478bd9Sstevel@tonic-gate 	if (r != 0) {
4457c478bd9Sstevel@tonic-gate 		switch (scf_error()) {
4467c478bd9Sstevel@tonic-gate 		case SCF_ERROR_CONNECTION_BROKEN:
4477c478bd9Sstevel@tonic-gate 			libscf_handle_rebind(h);
4487c478bd9Sstevel@tonic-gate 			goto rep_retry;
4497c478bd9Sstevel@tonic-gate 
4507c478bd9Sstevel@tonic-gate 		case SCF_ERROR_NOT_SET:
4517c478bd9Sstevel@tonic-gate 			goto deleted;
4527c478bd9Sstevel@tonic-gate 
4537c478bd9Sstevel@tonic-gate 		case SCF_ERROR_NOT_FOUND:
4547c478bd9Sstevel@tonic-gate 			/*
4557c478bd9Sstevel@tonic-gate 			 * This shouldn't happen since the graph engine should
4567c478bd9Sstevel@tonic-gate 			 * have initialized the state to uninitialized/none if
4577c478bd9Sstevel@tonic-gate 			 * there was no restarter pg.  In case somebody
4587c478bd9Sstevel@tonic-gate 			 * deleted it, though....
4597c478bd9Sstevel@tonic-gate 			 */
4607c478bd9Sstevel@tonic-gate 			do_commit_states = B_TRUE;
4617c478bd9Sstevel@tonic-gate 			break;
4627c478bd9Sstevel@tonic-gate 
4637c478bd9Sstevel@tonic-gate 		default:
4647c478bd9Sstevel@tonic-gate 			assert(0);
4657c478bd9Sstevel@tonic-gate 			abort();
4667c478bd9Sstevel@tonic-gate 		}
4677c478bd9Sstevel@tonic-gate 	} else {
4687c478bd9Sstevel@tonic-gate 		r = libscf_read_states(pg, &state, &next_state);
4697c478bd9Sstevel@tonic-gate 		if (r != 0) {
4707c478bd9Sstevel@tonic-gate 			do_commit_states = B_TRUE;
4717c478bd9Sstevel@tonic-gate 		} else {
4727c478bd9Sstevel@tonic-gate 			if (next_state != RESTARTER_STATE_NONE) {
4737c478bd9Sstevel@tonic-gate 				/*
4747c478bd9Sstevel@tonic-gate 				 * Force next_state to _NONE since we
4757c478bd9Sstevel@tonic-gate 				 * don't look for method processes.
4767c478bd9Sstevel@tonic-gate 				 */
4777c478bd9Sstevel@tonic-gate 				next_state = RESTARTER_STATE_NONE;
4787c478bd9Sstevel@tonic-gate 				do_commit_states = B_TRUE;
4797c478bd9Sstevel@tonic-gate 			} else {
4807c478bd9Sstevel@tonic-gate 				/*
481f6e214c7SGavin Maltby 				 * The reason for transition will depend on
482f6e214c7SGavin Maltby 				 * state.
483f6e214c7SGavin Maltby 				 */
484f6e214c7SGavin Maltby 				if (st->st_initial == 0)
485f6e214c7SGavin Maltby 					reason = restarter_str_startd_restart;
486f6e214c7SGavin Maltby 				else if (state == RESTARTER_STATE_MAINT)
487f6e214c7SGavin Maltby 					reason = restarter_str_bad_repo_state;
488f6e214c7SGavin Maltby 				/*
4897c478bd9Sstevel@tonic-gate 				 * Inform the restarter of our state without
4907c478bd9Sstevel@tonic-gate 				 * changing the STIME in the repository.
4917c478bd9Sstevel@tonic-gate 				 */
4927c478bd9Sstevel@tonic-gate 				ps = startd_alloc(sizeof (*ps));
4937c478bd9Sstevel@tonic-gate 				inst->ri_i.i_state = ps->ps_state = state;
4947c478bd9Sstevel@tonic-gate 				inst->ri_i.i_next_state = ps->ps_state_next =
4957c478bd9Sstevel@tonic-gate 				    next_state;
496f6e214c7SGavin Maltby 				ps->ps_reason = reason;
4977c478bd9Sstevel@tonic-gate 
4987c478bd9Sstevel@tonic-gate 				graph_protocol_send_event(inst->ri_i.i_fmri,
4997c478bd9Sstevel@tonic-gate 				    GRAPH_UPDATE_STATE_CHANGE, ps);
5007c478bd9Sstevel@tonic-gate 
5017c478bd9Sstevel@tonic-gate 				do_commit_states = B_FALSE;
5027c478bd9Sstevel@tonic-gate 			}
5037c478bd9Sstevel@tonic-gate 		}
5047c478bd9Sstevel@tonic-gate 	}
5057c478bd9Sstevel@tonic-gate 
5067c478bd9Sstevel@tonic-gate 	switch (libscf_get_startd_properties(scf_inst, snap, &inst->ri_flags,
5077c478bd9Sstevel@tonic-gate 	    &inst->ri_utmpx_prefix)) {
5087c478bd9Sstevel@tonic-gate 	case 0:
5097c478bd9Sstevel@tonic-gate 		break;
5107c478bd9Sstevel@tonic-gate 
5117c478bd9Sstevel@tonic-gate 	case ECONNABORTED:
5127c478bd9Sstevel@tonic-gate 		libscf_handle_rebind(h);
5137c478bd9Sstevel@tonic-gate 		goto rep_retry;
5147c478bd9Sstevel@tonic-gate 
5157c478bd9Sstevel@tonic-gate 	case ECANCELED:
5167c478bd9Sstevel@tonic-gate 		goto deleted;
5177c478bd9Sstevel@tonic-gate 
5187c478bd9Sstevel@tonic-gate 	case ENOENT:
5197c478bd9Sstevel@tonic-gate 		/*
5207c478bd9Sstevel@tonic-gate 		 * This is odd, because the graph engine should have required
5217c478bd9Sstevel@tonic-gate 		 * the general property group.  So we'll just use default
5227c478bd9Sstevel@tonic-gate 		 * flags in anticipation of the graph engine sending us
5237c478bd9Sstevel@tonic-gate 		 * REMOVE_INSTANCE when it finds out that the general property
5247c478bd9Sstevel@tonic-gate 		 * group has been deleted.
5257c478bd9Sstevel@tonic-gate 		 */
5267c478bd9Sstevel@tonic-gate 		inst->ri_flags = RINST_CONTRACT;
5277c478bd9Sstevel@tonic-gate 		break;
5287c478bd9Sstevel@tonic-gate 
5297c478bd9Sstevel@tonic-gate 	default:
5307c478bd9Sstevel@tonic-gate 		assert(0);
5317c478bd9Sstevel@tonic-gate 		abort();
5327c478bd9Sstevel@tonic-gate 	}
5337c478bd9Sstevel@tonic-gate 
534*3dd94f79SBryan Cantrill 	r = libscf_get_template_values(scf_inst, snap,
535*3dd94f79SBryan Cantrill 	    &inst->ri_common_name, &inst->ri_C_common_name);
536*3dd94f79SBryan Cantrill 
537*3dd94f79SBryan Cantrill 	/*
538*3dd94f79SBryan Cantrill 	 * Copy our names to smaller buffers to reduce our memory footprint.
539*3dd94f79SBryan Cantrill 	 */
540*3dd94f79SBryan Cantrill 	if (inst->ri_common_name != NULL) {
541*3dd94f79SBryan Cantrill 		char *tmp = safe_strdup(inst->ri_common_name);
542*3dd94f79SBryan Cantrill 		startd_free(inst->ri_common_name, max_scf_value_size);
543*3dd94f79SBryan Cantrill 		inst->ri_common_name = tmp;
544*3dd94f79SBryan Cantrill 	}
545*3dd94f79SBryan Cantrill 
546*3dd94f79SBryan Cantrill 	if (inst->ri_C_common_name != NULL) {
547*3dd94f79SBryan Cantrill 		char *tmp = safe_strdup(inst->ri_C_common_name);
548*3dd94f79SBryan Cantrill 		startd_free(inst->ri_C_common_name, max_scf_value_size);
549*3dd94f79SBryan Cantrill 		inst->ri_C_common_name = tmp;
550*3dd94f79SBryan Cantrill 	}
551*3dd94f79SBryan Cantrill 
552*3dd94f79SBryan Cantrill 	switch (r) {
5537c478bd9Sstevel@tonic-gate 	case 0:
5547c478bd9Sstevel@tonic-gate 		break;
5557c478bd9Sstevel@tonic-gate 
5567c478bd9Sstevel@tonic-gate 	case ECONNABORTED:
5577c478bd9Sstevel@tonic-gate 		libscf_handle_rebind(h);
5587c478bd9Sstevel@tonic-gate 		goto rep_retry;
5597c478bd9Sstevel@tonic-gate 
5607c478bd9Sstevel@tonic-gate 	case ECANCELED:
5617c478bd9Sstevel@tonic-gate 		goto deleted;
5627c478bd9Sstevel@tonic-gate 
5637c478bd9Sstevel@tonic-gate 	case ECHILD:
5647c478bd9Sstevel@tonic-gate 	case ENOENT:
5657c478bd9Sstevel@tonic-gate 		break;
5667c478bd9Sstevel@tonic-gate 
5677c478bd9Sstevel@tonic-gate 	default:
5687c478bd9Sstevel@tonic-gate 		assert(0);
5697c478bd9Sstevel@tonic-gate 		abort();
5707c478bd9Sstevel@tonic-gate 	}
5717c478bd9Sstevel@tonic-gate 
5727c478bd9Sstevel@tonic-gate 	switch (libscf_read_method_ids(h, scf_inst, inst->ri_i.i_fmri,
5737c478bd9Sstevel@tonic-gate 	    &inst->ri_i.i_primary_ctid, &inst->ri_i.i_transient_ctid,
5747c478bd9Sstevel@tonic-gate 	    &start_pid)) {
5757c478bd9Sstevel@tonic-gate 	case 0:
5767c478bd9Sstevel@tonic-gate 		break;
5777c478bd9Sstevel@tonic-gate 
5787c478bd9Sstevel@tonic-gate 	case ECONNABORTED:
5797c478bd9Sstevel@tonic-gate 		libscf_handle_rebind(h);
5807c478bd9Sstevel@tonic-gate 		goto rep_retry;
5817c478bd9Sstevel@tonic-gate 
5827c478bd9Sstevel@tonic-gate 	case ECANCELED:
5837c478bd9Sstevel@tonic-gate 		goto deleted;
5847c478bd9Sstevel@tonic-gate 
5857c478bd9Sstevel@tonic-gate 	default:
5867c478bd9Sstevel@tonic-gate 		assert(0);
5877c478bd9Sstevel@tonic-gate 		abort();
5887c478bd9Sstevel@tonic-gate 	}
5897c478bd9Sstevel@tonic-gate 
5907c478bd9Sstevel@tonic-gate 	if (inst->ri_i.i_primary_ctid >= 1) {
5917c478bd9Sstevel@tonic-gate 		contract_hash_store(inst->ri_i.i_primary_ctid, inst->ri_id);
5927c478bd9Sstevel@tonic-gate 
5937c478bd9Sstevel@tonic-gate 		switch (check_contract(inst, B_TRUE, scf_inst)) {
5947c478bd9Sstevel@tonic-gate 		case 0:
5957c478bd9Sstevel@tonic-gate 			break;
5967c478bd9Sstevel@tonic-gate 
5977c478bd9Sstevel@tonic-gate 		case ECONNABORTED:
5987c478bd9Sstevel@tonic-gate 			libscf_handle_rebind(h);
5997c478bd9Sstevel@tonic-gate 			goto rep_retry;
6007c478bd9Sstevel@tonic-gate 
6017c478bd9Sstevel@tonic-gate 		case ECANCELED:
6027c478bd9Sstevel@tonic-gate 			goto deleted;
6037c478bd9Sstevel@tonic-gate 
6047c478bd9Sstevel@tonic-gate 		default:
6057c478bd9Sstevel@tonic-gate 			assert(0);
6067c478bd9Sstevel@tonic-gate 			abort();
6077c478bd9Sstevel@tonic-gate 		}
6087c478bd9Sstevel@tonic-gate 	}
6097c478bd9Sstevel@tonic-gate 
6107c478bd9Sstevel@tonic-gate 	if (inst->ri_i.i_transient_ctid >= 1) {
6117c478bd9Sstevel@tonic-gate 		switch (check_contract(inst, B_FALSE, scf_inst)) {
6127c478bd9Sstevel@tonic-gate 		case 0:
6137c478bd9Sstevel@tonic-gate 			break;
6147c478bd9Sstevel@tonic-gate 
6157c478bd9Sstevel@tonic-gate 		case ECONNABORTED:
6167c478bd9Sstevel@tonic-gate 			libscf_handle_rebind(h);
6177c478bd9Sstevel@tonic-gate 			goto rep_retry;
6187c478bd9Sstevel@tonic-gate 
6197c478bd9Sstevel@tonic-gate 		case ECANCELED:
6207c478bd9Sstevel@tonic-gate 			goto deleted;
6217c478bd9Sstevel@tonic-gate 
6227c478bd9Sstevel@tonic-gate 		default:
6237c478bd9Sstevel@tonic-gate 			assert(0);
6247c478bd9Sstevel@tonic-gate 			abort();
6257c478bd9Sstevel@tonic-gate 		}
6267c478bd9Sstevel@tonic-gate 	}
6277c478bd9Sstevel@tonic-gate 
6287c478bd9Sstevel@tonic-gate 	/* No more failures we live through, so add it to the list. */
6297c478bd9Sstevel@tonic-gate 	(void) pthread_mutex_init(&inst->ri_lock, &mutex_attrs);
6307c478bd9Sstevel@tonic-gate 	(void) pthread_mutex_init(&inst->ri_queue_lock, &mutex_attrs);
6317c478bd9Sstevel@tonic-gate 	MUTEX_LOCK(&inst->ri_lock);
6327c478bd9Sstevel@tonic-gate 	MUTEX_LOCK(&inst->ri_queue_lock);
6337c478bd9Sstevel@tonic-gate 
6347c478bd9Sstevel@tonic-gate 	(void) pthread_cond_init(&inst->ri_method_cv, NULL);
6357c478bd9Sstevel@tonic-gate 
6367c478bd9Sstevel@tonic-gate 	uu_list_node_init(inst, &inst->ri_link, restarter_instance_pool);
6377c478bd9Sstevel@tonic-gate 	uu_list_insert(instance_list.ril_instance_list, inst, idx);
6387c478bd9Sstevel@tonic-gate 	MUTEX_UNLOCK(&instance_list.ril_lock);
6397c478bd9Sstevel@tonic-gate 
6407c478bd9Sstevel@tonic-gate 	if (start_pid != -1 &&
6417c478bd9Sstevel@tonic-gate 	    (inst->ri_flags & RINST_STYLE_MASK) == RINST_WAIT) {
6427c478bd9Sstevel@tonic-gate 		int ret;
6437c478bd9Sstevel@tonic-gate 		ret = wait_register(start_pid, inst->ri_i.i_fmri, 0, 1);
6447c478bd9Sstevel@tonic-gate 		if (ret == -1) {
6457c478bd9Sstevel@tonic-gate 			/*
6467c478bd9Sstevel@tonic-gate 			 * Implication:  if we can't reregister the
6477c478bd9Sstevel@tonic-gate 			 * instance, we will start another one.  Two
6487c478bd9Sstevel@tonic-gate 			 * instances may or may not result in a resource
6497c478bd9Sstevel@tonic-gate 			 * conflict.
6507c478bd9Sstevel@tonic-gate 			 */
6517c478bd9Sstevel@tonic-gate 			log_error(LOG_WARNING,
6527c478bd9Sstevel@tonic-gate 			    "%s: couldn't reregister %ld for wait\n",
6537c478bd9Sstevel@tonic-gate 			    inst->ri_i.i_fmri, start_pid);
6547c478bd9Sstevel@tonic-gate 		} else if (ret == 1) {
6557c478bd9Sstevel@tonic-gate 			/*
6567c478bd9Sstevel@tonic-gate 			 * Leading PID has exited.
6577c478bd9Sstevel@tonic-gate 			 */
6587c478bd9Sstevel@tonic-gate 			(void) stop_instance(h, inst, RSTOP_EXIT);
6597c478bd9Sstevel@tonic-gate 		}
6607c478bd9Sstevel@tonic-gate 	}
6617c478bd9Sstevel@tonic-gate 
6627c478bd9Sstevel@tonic-gate 
6637c478bd9Sstevel@tonic-gate 	scf_pg_destroy(pg);
6647c478bd9Sstevel@tonic-gate 
6657c478bd9Sstevel@tonic-gate 	if (do_commit_states)
6667c478bd9Sstevel@tonic-gate 		(void) restarter_instance_update_states(h, inst, state,
667f6e214c7SGavin Maltby 		    next_state, RERR_NONE, reason);
6687c478bd9Sstevel@tonic-gate 
6697c478bd9Sstevel@tonic-gate 	log_framework(LOG_DEBUG, "%s is a %s-style service\n", name,
6707c478bd9Sstevel@tonic-gate 	    service_style(inst->ri_flags));
6717c478bd9Sstevel@tonic-gate 
6727c478bd9Sstevel@tonic-gate 	MUTEX_UNLOCK(&inst->ri_queue_lock);
6737c478bd9Sstevel@tonic-gate 	MUTEX_UNLOCK(&inst->ri_lock);
6747c478bd9Sstevel@tonic-gate 
6757c478bd9Sstevel@tonic-gate 	startd_free(svc_name, max_scf_name_size);
6767c478bd9Sstevel@tonic-gate 	startd_free(inst_name, max_scf_name_size);
6777c478bd9Sstevel@tonic-gate 	scf_snapshot_destroy(snap);
6787c478bd9Sstevel@tonic-gate 	scf_instance_destroy(scf_inst);
6797c478bd9Sstevel@tonic-gate 	scf_service_destroy(scf_svc);
6807c478bd9Sstevel@tonic-gate 
6817c478bd9Sstevel@tonic-gate 	log_framework(LOG_DEBUG, "%s: inserted instance into restarter list\n",
6827c478bd9Sstevel@tonic-gate 	    name);
6837c478bd9Sstevel@tonic-gate 
6847c478bd9Sstevel@tonic-gate 	return (0);
68592175b8eSrm88369 
68692175b8eSrm88369 deleted:
68792175b8eSrm88369 	MUTEX_UNLOCK(&instance_list.ril_lock);
68892175b8eSrm88369 	startd_free(inst_name, max_scf_name_size);
68992175b8eSrm88369 	startd_free(svc_name, max_scf_name_size);
69092175b8eSrm88369 	if (snap != NULL)
69192175b8eSrm88369 		scf_snapshot_destroy(snap);
69292175b8eSrm88369 	scf_pg_destroy(pg);
69392175b8eSrm88369 	scf_instance_destroy(scf_inst);
69492175b8eSrm88369 	scf_service_destroy(scf_svc);
69592175b8eSrm88369 	startd_free((void *)inst->ri_i.i_fmri, strlen(inst->ri_i.i_fmri) + 1);
69692175b8eSrm88369 	uu_list_destroy(inst->ri_queue);
69792175b8eSrm88369 	if (inst->ri_logstem != NULL)
69892175b8eSrm88369 		startd_free(inst->ri_logstem, PATH_MAX);
69992175b8eSrm88369 	if (inst->ri_common_name != NULL)
700*3dd94f79SBryan Cantrill 		startd_free(inst->ri_common_name,
701*3dd94f79SBryan Cantrill 		    strlen(inst->ri_common_name) + 1);
70292175b8eSrm88369 	if (inst->ri_C_common_name != NULL)
703*3dd94f79SBryan Cantrill 		startd_free(inst->ri_C_common_name,
704*3dd94f79SBryan Cantrill 		    strlen(inst->ri_C_common_name) + 1);
70592175b8eSrm88369 	startd_free(inst->ri_utmpx_prefix, max_scf_value_size);
70692175b8eSrm88369 	startd_free(inst, sizeof (restarter_inst_t));
70792175b8eSrm88369 	return (ENOENT);
7087c478bd9Sstevel@tonic-gate }
7097c478bd9Sstevel@tonic-gate 
7107c478bd9Sstevel@tonic-gate static void
7117c478bd9Sstevel@tonic-gate restarter_delete_inst(restarter_inst_t *ri)
7127c478bd9Sstevel@tonic-gate {
7137c478bd9Sstevel@tonic-gate 	int id;
7147c478bd9Sstevel@tonic-gate 	restarter_inst_t *rip;
7157c478bd9Sstevel@tonic-gate 	void *cookie = NULL;
7167c478bd9Sstevel@tonic-gate 	restarter_instance_qentry_t *e;
7177c478bd9Sstevel@tonic-gate 
71853f3aea0SRoger A. Faulkner 	assert(MUTEX_HELD(&ri->ri_lock));
7197c478bd9Sstevel@tonic-gate 
7207c478bd9Sstevel@tonic-gate 	/*
7217c478bd9Sstevel@tonic-gate 	 * Must drop the instance lock so we can pick up the instance_list
7227c478bd9Sstevel@tonic-gate 	 * lock & remove the instance.
7237c478bd9Sstevel@tonic-gate 	 */
7247c478bd9Sstevel@tonic-gate 	id = ri->ri_id;
7257c478bd9Sstevel@tonic-gate 	MUTEX_UNLOCK(&ri->ri_lock);
7267c478bd9Sstevel@tonic-gate 
7277c478bd9Sstevel@tonic-gate 	MUTEX_LOCK(&instance_list.ril_lock);
7287c478bd9Sstevel@tonic-gate 
7297c478bd9Sstevel@tonic-gate 	rip = uu_list_find(instance_list.ril_instance_list, &id, NULL, NULL);
7307c478bd9Sstevel@tonic-gate 	if (rip == NULL) {
7317c478bd9Sstevel@tonic-gate 		MUTEX_UNLOCK(&instance_list.ril_lock);
7327c478bd9Sstevel@tonic-gate 		return;
7337c478bd9Sstevel@tonic-gate 	}
7347c478bd9Sstevel@tonic-gate 
7357c478bd9Sstevel@tonic-gate 	assert(ri == rip);
7367c478bd9Sstevel@tonic-gate 
7377c478bd9Sstevel@tonic-gate 	uu_list_remove(instance_list.ril_instance_list, ri);
7387c478bd9Sstevel@tonic-gate 
7397c478bd9Sstevel@tonic-gate 	log_framework(LOG_DEBUG, "%s: deleted instance from restarter list\n",
7407c478bd9Sstevel@tonic-gate 	    ri->ri_i.i_fmri);
7417c478bd9Sstevel@tonic-gate 
7427c478bd9Sstevel@tonic-gate 	MUTEX_UNLOCK(&instance_list.ril_lock);
7437c478bd9Sstevel@tonic-gate 
7447c478bd9Sstevel@tonic-gate 	/*
7457c478bd9Sstevel@tonic-gate 	 * We can lock the instance without holding the instance_list lock
7467c478bd9Sstevel@tonic-gate 	 * since we removed the instance from the list.
7477c478bd9Sstevel@tonic-gate 	 */
7487c478bd9Sstevel@tonic-gate 	MUTEX_LOCK(&ri->ri_lock);
7497c478bd9Sstevel@tonic-gate 	MUTEX_LOCK(&ri->ri_queue_lock);
7507c478bd9Sstevel@tonic-gate 
7517c478bd9Sstevel@tonic-gate 	if (ri->ri_i.i_primary_ctid >= 1)
7527c478bd9Sstevel@tonic-gate 		contract_hash_remove(ri->ri_i.i_primary_ctid);
7537c478bd9Sstevel@tonic-gate 
7547c478bd9Sstevel@tonic-gate 	while (ri->ri_method_thread != 0 || ri->ri_method_waiters > 0)
7557c478bd9Sstevel@tonic-gate 		(void) pthread_cond_wait(&ri->ri_method_cv, &ri->ri_lock);
7567c478bd9Sstevel@tonic-gate 
7577c478bd9Sstevel@tonic-gate 	while ((e = uu_list_teardown(ri->ri_queue, &cookie)) != NULL)
7587c478bd9Sstevel@tonic-gate 		startd_free(e, sizeof (*e));
7597c478bd9Sstevel@tonic-gate 	uu_list_destroy(ri->ri_queue);
7607c478bd9Sstevel@tonic-gate 
7617c478bd9Sstevel@tonic-gate 	startd_free((void *)ri->ri_i.i_fmri, strlen(ri->ri_i.i_fmri) + 1);
7621855af6bSlianep 	startd_free(ri->ri_logstem, PATH_MAX);
7633ba60fe0Srm88369 	if (ri->ri_common_name != NULL)
764*3dd94f79SBryan Cantrill 		startd_free(ri->ri_common_name,
765*3dd94f79SBryan Cantrill 		    strlen(ri->ri_common_name) + 1);
7663ba60fe0Srm88369 	if (ri->ri_C_common_name != NULL)
767*3dd94f79SBryan Cantrill 		startd_free(ri->ri_C_common_name,
768*3dd94f79SBryan Cantrill 		    strlen(ri->ri_C_common_name) + 1);
7697c478bd9Sstevel@tonic-gate 	startd_free(ri->ri_utmpx_prefix, max_scf_value_size);
7707c478bd9Sstevel@tonic-gate 	(void) pthread_mutex_destroy(&ri->ri_lock);
7717c478bd9Sstevel@tonic-gate 	(void) pthread_mutex_destroy(&ri->ri_queue_lock);
7727c478bd9Sstevel@tonic-gate 	startd_free(ri, sizeof (restarter_inst_t));
7737c478bd9Sstevel@tonic-gate }
7747c478bd9Sstevel@tonic-gate 
7757c478bd9Sstevel@tonic-gate /*
7767c478bd9Sstevel@tonic-gate  * instance_is_wait_style()
7777c478bd9Sstevel@tonic-gate  *
7787c478bd9Sstevel@tonic-gate  *   Returns 1 if the given instance is a "wait-style" service instance.
7797c478bd9Sstevel@tonic-gate  */
7807c478bd9Sstevel@tonic-gate int
7817c478bd9Sstevel@tonic-gate instance_is_wait_style(restarter_inst_t *inst)
7827c478bd9Sstevel@tonic-gate {
78353f3aea0SRoger A. Faulkner 	assert(MUTEX_HELD(&inst->ri_lock));
7847c478bd9Sstevel@tonic-gate 	return ((inst->ri_flags & RINST_STYLE_MASK) == RINST_WAIT);
7857c478bd9Sstevel@tonic-gate }
7867c478bd9Sstevel@tonic-gate 
7877c478bd9Sstevel@tonic-gate /*
7887c478bd9Sstevel@tonic-gate  * instance_is_transient_style()
7897c478bd9Sstevel@tonic-gate  *
7907c478bd9Sstevel@tonic-gate  *   Returns 1 if the given instance is a transient service instance.
7917c478bd9Sstevel@tonic-gate  */
7927c478bd9Sstevel@tonic-gate int
7937c478bd9Sstevel@tonic-gate instance_is_transient_style(restarter_inst_t *inst)
7947c478bd9Sstevel@tonic-gate {
79553f3aea0SRoger A. Faulkner 	assert(MUTEX_HELD(&inst->ri_lock));
7967c478bd9Sstevel@tonic-gate 	return ((inst->ri_flags & RINST_STYLE_MASK) == RINST_TRANSIENT);
7977c478bd9Sstevel@tonic-gate }
7987c478bd9Sstevel@tonic-gate 
7997c478bd9Sstevel@tonic-gate /*
8007c478bd9Sstevel@tonic-gate  * instance_in_transition()
8017c478bd9Sstevel@tonic-gate  * Returns 1 if instance is in transition, 0 if not
8027c478bd9Sstevel@tonic-gate  */
8037c478bd9Sstevel@tonic-gate int
8047c478bd9Sstevel@tonic-gate instance_in_transition(restarter_inst_t *inst)
8057c478bd9Sstevel@tonic-gate {
80653f3aea0SRoger A. Faulkner 	assert(MUTEX_HELD(&inst->ri_lock));
8077c478bd9Sstevel@tonic-gate 	if (inst->ri_i.i_next_state == RESTARTER_STATE_NONE)
8087c478bd9Sstevel@tonic-gate 		return (0);
8097c478bd9Sstevel@tonic-gate 	return (1);
8107c478bd9Sstevel@tonic-gate }
8117c478bd9Sstevel@tonic-gate 
8127c478bd9Sstevel@tonic-gate /*
81321d7f835Sgm149974  * returns 1 if instance is already started, 0 if not
81421d7f835Sgm149974  */
81521d7f835Sgm149974 static int
81621d7f835Sgm149974 instance_started(restarter_inst_t *inst)
81721d7f835Sgm149974 {
81821d7f835Sgm149974 	int ret;
81921d7f835Sgm149974 
82053f3aea0SRoger A. Faulkner 	assert(MUTEX_HELD(&inst->ri_lock));
82121d7f835Sgm149974 
82221d7f835Sgm149974 	if (inst->ri_i.i_state == RESTARTER_STATE_ONLINE ||
82321d7f835Sgm149974 	    inst->ri_i.i_state == RESTARTER_STATE_DEGRADED)
82421d7f835Sgm149974 		ret = 1;
82521d7f835Sgm149974 	else
82621d7f835Sgm149974 		ret = 0;
82721d7f835Sgm149974 
82821d7f835Sgm149974 	return (ret);
82921d7f835Sgm149974 }
83021d7f835Sgm149974 
83121d7f835Sgm149974 /*
8327c478bd9Sstevel@tonic-gate  * Returns
8337c478bd9Sstevel@tonic-gate  *   0 - success
8347c478bd9Sstevel@tonic-gate  *   ECONNRESET - success, but h was rebound
8357c478bd9Sstevel@tonic-gate  */
8367c478bd9Sstevel@tonic-gate int
8377c478bd9Sstevel@tonic-gate restarter_instance_update_states(scf_handle_t *h, restarter_inst_t *ri,
8387c478bd9Sstevel@tonic-gate     restarter_instance_state_t new_state,
839f6e214c7SGavin Maltby     restarter_instance_state_t new_state_next, restarter_error_t err,
840f6e214c7SGavin Maltby     restarter_str_t reason)
8417c478bd9Sstevel@tonic-gate {
8427c478bd9Sstevel@tonic-gate 	protocol_states_t *states;
8437c478bd9Sstevel@tonic-gate 	int e;
8447c478bd9Sstevel@tonic-gate 	uint_t retry_count = 0, msecs = ALLOC_DELAY;
8457c478bd9Sstevel@tonic-gate 	boolean_t rebound = B_FALSE;
84621d7f835Sgm149974 	int prev_state_online;
84721d7f835Sgm149974 	int state_online;
8487c478bd9Sstevel@tonic-gate 
84953f3aea0SRoger A. Faulkner 	assert(MUTEX_HELD(&ri->ri_lock));
8507c478bd9Sstevel@tonic-gate 
85121d7f835Sgm149974 	prev_state_online = instance_started(ri);
85221d7f835Sgm149974 
8537c478bd9Sstevel@tonic-gate retry:
8547c478bd9Sstevel@tonic-gate 	e = _restarter_commit_states(h, &ri->ri_i, new_state, new_state_next,
855f6e214c7SGavin Maltby 	    restarter_get_str_short(reason));
8567c478bd9Sstevel@tonic-gate 	switch (e) {
8577c478bd9Sstevel@tonic-gate 	case 0:
8587c478bd9Sstevel@tonic-gate 		break;
8597c478bd9Sstevel@tonic-gate 
8607c478bd9Sstevel@tonic-gate 	case ENOMEM:
8617c478bd9Sstevel@tonic-gate 		++retry_count;
8627c478bd9Sstevel@tonic-gate 		if (retry_count < ALLOC_RETRY) {
8637c478bd9Sstevel@tonic-gate 			(void) poll(NULL, 0, msecs);
8647c478bd9Sstevel@tonic-gate 			msecs *= ALLOC_DELAY_MULT;
8657c478bd9Sstevel@tonic-gate 			goto retry;
8667c478bd9Sstevel@tonic-gate 		}
8677c478bd9Sstevel@tonic-gate 
8687c478bd9Sstevel@tonic-gate 		/* Like startd_alloc(). */
8697c478bd9Sstevel@tonic-gate 		uu_die("Insufficient memory.\n");
8707c478bd9Sstevel@tonic-gate 		/* NOTREACHED */
8717c478bd9Sstevel@tonic-gate 
8727c478bd9Sstevel@tonic-gate 	case ECONNABORTED:
8737c478bd9Sstevel@tonic-gate 		libscf_handle_rebind(h);
8747c478bd9Sstevel@tonic-gate 		rebound = B_TRUE;
8757c478bd9Sstevel@tonic-gate 		goto retry;
8767c478bd9Sstevel@tonic-gate 
8777c478bd9Sstevel@tonic-gate 	case EPERM:
8787c478bd9Sstevel@tonic-gate 	case EACCES:
8797c478bd9Sstevel@tonic-gate 	case EROFS:
8807c478bd9Sstevel@tonic-gate 		log_error(LOG_NOTICE, "Could not commit state change for %s "
8817c478bd9Sstevel@tonic-gate 		    "to repository: %s.\n", ri->ri_i.i_fmri, strerror(e));
8827c478bd9Sstevel@tonic-gate 		/* FALLTHROUGH */
8837c478bd9Sstevel@tonic-gate 
8847c478bd9Sstevel@tonic-gate 	case ENOENT:
8857c478bd9Sstevel@tonic-gate 		ri->ri_i.i_state = new_state;
8867c478bd9Sstevel@tonic-gate 		ri->ri_i.i_next_state = new_state_next;
8877c478bd9Sstevel@tonic-gate 		break;
8887c478bd9Sstevel@tonic-gate 
8897c478bd9Sstevel@tonic-gate 	case EINVAL:
8907c478bd9Sstevel@tonic-gate 	default:
8917c478bd9Sstevel@tonic-gate 		bad_error("_restarter_commit_states", e);
8927c478bd9Sstevel@tonic-gate 	}
8937c478bd9Sstevel@tonic-gate 
8947c478bd9Sstevel@tonic-gate 	states = startd_alloc(sizeof (protocol_states_t));
8957c478bd9Sstevel@tonic-gate 	states->ps_state = new_state;
8967c478bd9Sstevel@tonic-gate 	states->ps_state_next = new_state_next;
8977c478bd9Sstevel@tonic-gate 	states->ps_err = err;
898f6e214c7SGavin Maltby 	states->ps_reason = reason;
8997c478bd9Sstevel@tonic-gate 	graph_protocol_send_event(ri->ri_i.i_fmri, GRAPH_UPDATE_STATE_CHANGE,
9007c478bd9Sstevel@tonic-gate 	    (void *)states);
9017c478bd9Sstevel@tonic-gate 
90221d7f835Sgm149974 	state_online = instance_started(ri);
90321d7f835Sgm149974 
90421d7f835Sgm149974 	if (prev_state_online && !state_online)
90521d7f835Sgm149974 		ri->ri_post_offline_hook();
90621d7f835Sgm149974 	else if (!prev_state_online && state_online)
9077c478bd9Sstevel@tonic-gate 		ri->ri_post_online_hook();
9087c478bd9Sstevel@tonic-gate 
9097c478bd9Sstevel@tonic-gate 	return (rebound ? ECONNRESET : 0);
9107c478bd9Sstevel@tonic-gate }
9117c478bd9Sstevel@tonic-gate 
9127c478bd9Sstevel@tonic-gate void
9137c478bd9Sstevel@tonic-gate restarter_mark_pending_snapshot(const char *fmri, uint_t flag)
9147c478bd9Sstevel@tonic-gate {
9157c478bd9Sstevel@tonic-gate 	restarter_inst_t *inst;
9167c478bd9Sstevel@tonic-gate 
9177c478bd9Sstevel@tonic-gate 	assert(flag == RINST_RETAKE_RUNNING || flag == RINST_RETAKE_START);
9187c478bd9Sstevel@tonic-gate 
9197c478bd9Sstevel@tonic-gate 	inst = inst_lookup_by_name(fmri);
9207c478bd9Sstevel@tonic-gate 	if (inst == NULL)
9217c478bd9Sstevel@tonic-gate 		return;
9227c478bd9Sstevel@tonic-gate 
9237c478bd9Sstevel@tonic-gate 	inst->ri_flags |= flag;
9247c478bd9Sstevel@tonic-gate 
9257c478bd9Sstevel@tonic-gate 	MUTEX_UNLOCK(&inst->ri_lock);
9267c478bd9Sstevel@tonic-gate }
9277c478bd9Sstevel@tonic-gate 
9287c478bd9Sstevel@tonic-gate static void
9297c478bd9Sstevel@tonic-gate restarter_take_pending_snapshots(scf_handle_t *h)
9307c478bd9Sstevel@tonic-gate {
9317c478bd9Sstevel@tonic-gate 	restarter_inst_t *inst;
9327c478bd9Sstevel@tonic-gate 	int r;
9337c478bd9Sstevel@tonic-gate 
9347c478bd9Sstevel@tonic-gate 	MUTEX_LOCK(&instance_list.ril_lock);
9357c478bd9Sstevel@tonic-gate 
9367c478bd9Sstevel@tonic-gate 	for (inst = uu_list_first(instance_list.ril_instance_list);
9377c478bd9Sstevel@tonic-gate 	    inst != NULL;
9387c478bd9Sstevel@tonic-gate 	    inst = uu_list_next(instance_list.ril_instance_list, inst)) {
9397c478bd9Sstevel@tonic-gate 		const char *fmri;
9407c478bd9Sstevel@tonic-gate 		scf_instance_t *sinst = NULL;
9417c478bd9Sstevel@tonic-gate 
9427c478bd9Sstevel@tonic-gate 		MUTEX_LOCK(&inst->ri_lock);
9437c478bd9Sstevel@tonic-gate 
9447c478bd9Sstevel@tonic-gate 		/*
9457c478bd9Sstevel@tonic-gate 		 * This is where we'd check inst->ri_method_thread and if it
9467c478bd9Sstevel@tonic-gate 		 * were nonzero we'd wait in anticipation of another thread
9477c478bd9Sstevel@tonic-gate 		 * executing a method for inst.  Doing so with the instance_list
9487c478bd9Sstevel@tonic-gate 		 * locked, though, leads to deadlock.  Since taking a snapshot
9497c478bd9Sstevel@tonic-gate 		 * during that window won't hurt anything, we'll just continue.
9507c478bd9Sstevel@tonic-gate 		 */
9517c478bd9Sstevel@tonic-gate 
9527c478bd9Sstevel@tonic-gate 		fmri = inst->ri_i.i_fmri;
9537c478bd9Sstevel@tonic-gate 
9547c478bd9Sstevel@tonic-gate 		if (inst->ri_flags & RINST_RETAKE_RUNNING) {
9557c478bd9Sstevel@tonic-gate 			scf_snapshot_t *rsnap;
9567c478bd9Sstevel@tonic-gate 
9577c478bd9Sstevel@tonic-gate 			(void) libscf_fmri_get_instance(h, fmri, &sinst);
9587c478bd9Sstevel@tonic-gate 
9597c478bd9Sstevel@tonic-gate 			rsnap = libscf_get_or_make_running_snapshot(sinst,
9607c478bd9Sstevel@tonic-gate 			    fmri, B_FALSE);
9617c478bd9Sstevel@tonic-gate 
9627c478bd9Sstevel@tonic-gate 			scf_instance_destroy(sinst);
9637c478bd9Sstevel@tonic-gate 
9647c478bd9Sstevel@tonic-gate 			if (rsnap != NULL)
9657c478bd9Sstevel@tonic-gate 				inst->ri_flags &= ~RINST_RETAKE_RUNNING;
9667c478bd9Sstevel@tonic-gate 
9677c478bd9Sstevel@tonic-gate 			scf_snapshot_destroy(rsnap);
9687c478bd9Sstevel@tonic-gate 		}
9697c478bd9Sstevel@tonic-gate 
9707c478bd9Sstevel@tonic-gate 		if (inst->ri_flags & RINST_RETAKE_START) {
9717c478bd9Sstevel@tonic-gate 			switch (r = libscf_snapshots_poststart(h, fmri,
9727c478bd9Sstevel@tonic-gate 			    B_FALSE)) {
9737c478bd9Sstevel@tonic-gate 			case 0:
9747c478bd9Sstevel@tonic-gate 			case ENOENT:
9757c478bd9Sstevel@tonic-gate 				inst->ri_flags &= ~RINST_RETAKE_START;
9767c478bd9Sstevel@tonic-gate 				break;
9777c478bd9Sstevel@tonic-gate 
9787c478bd9Sstevel@tonic-gate 			case ECONNABORTED:
9797c478bd9Sstevel@tonic-gate 				break;
9807c478bd9Sstevel@tonic-gate 
9817c478bd9Sstevel@tonic-gate 			case EACCES:
9827c478bd9Sstevel@tonic-gate 			default:
9837c478bd9Sstevel@tonic-gate 				bad_error("libscf_snapshots_poststart", r);
9847c478bd9Sstevel@tonic-gate 			}
9857c478bd9Sstevel@tonic-gate 		}
9867c478bd9Sstevel@tonic-gate 
9877c478bd9Sstevel@tonic-gate 		MUTEX_UNLOCK(&inst->ri_lock);
9887c478bd9Sstevel@tonic-gate 	}
9897c478bd9Sstevel@tonic-gate 
9907c478bd9Sstevel@tonic-gate 	MUTEX_UNLOCK(&instance_list.ril_lock);
9917c478bd9Sstevel@tonic-gate }
9927c478bd9Sstevel@tonic-gate 
9937c478bd9Sstevel@tonic-gate /* ARGSUSED */
9947c478bd9Sstevel@tonic-gate void *
9957c478bd9Sstevel@tonic-gate restarter_post_fsminimal_thread(void *unused)
9967c478bd9Sstevel@tonic-gate {
9977c478bd9Sstevel@tonic-gate 	scf_handle_t *h;
9987c478bd9Sstevel@tonic-gate 	int r;
9997c478bd9Sstevel@tonic-gate 
10007c478bd9Sstevel@tonic-gate 	h = libscf_handle_create_bound_loop();
10017c478bd9Sstevel@tonic-gate 
10027c478bd9Sstevel@tonic-gate 	for (;;) {
10037c478bd9Sstevel@tonic-gate 		r = libscf_create_self(h);
10047c478bd9Sstevel@tonic-gate 		if (r == 0)
10057c478bd9Sstevel@tonic-gate 			break;
10067c478bd9Sstevel@tonic-gate 
10077c478bd9Sstevel@tonic-gate 		assert(r == ECONNABORTED);
10087c478bd9Sstevel@tonic-gate 		libscf_handle_rebind(h);
10097c478bd9Sstevel@tonic-gate 	}
10107c478bd9Sstevel@tonic-gate 
10117c478bd9Sstevel@tonic-gate 	restarter_take_pending_snapshots(h);
10127c478bd9Sstevel@tonic-gate 
10137c478bd9Sstevel@tonic-gate 	(void) scf_handle_unbind(h);
10147c478bd9Sstevel@tonic-gate 	scf_handle_destroy(h);
10157c478bd9Sstevel@tonic-gate 
10167c478bd9Sstevel@tonic-gate 	return (NULL);
10177c478bd9Sstevel@tonic-gate }
10187c478bd9Sstevel@tonic-gate 
10197c478bd9Sstevel@tonic-gate /*
10207c478bd9Sstevel@tonic-gate  * int stop_instance()
10217c478bd9Sstevel@tonic-gate  *
10227c478bd9Sstevel@tonic-gate  *   Stop the instance identified by the instance given as the second argument,
10237c478bd9Sstevel@tonic-gate  *   for the cause stated.
10247c478bd9Sstevel@tonic-gate  *
10257c478bd9Sstevel@tonic-gate  *   Returns
10267c478bd9Sstevel@tonic-gate  *     0 - success
10277c478bd9Sstevel@tonic-gate  *     -1 - inst is in transition
10287c478bd9Sstevel@tonic-gate  */
10297c478bd9Sstevel@tonic-gate static int
10307c478bd9Sstevel@tonic-gate stop_instance(scf_handle_t *local_handle, restarter_inst_t *inst,
10317c478bd9Sstevel@tonic-gate     stop_cause_t cause)
10327c478bd9Sstevel@tonic-gate {
10337c478bd9Sstevel@tonic-gate 	fork_info_t *info;
10347c478bd9Sstevel@tonic-gate 	const char *cp;
10357c478bd9Sstevel@tonic-gate 	int err;
10367c478bd9Sstevel@tonic-gate 	restarter_error_t re;
1037f6e214c7SGavin Maltby 	restarter_str_t	reason;
10387c478bd9Sstevel@tonic-gate 
103953f3aea0SRoger A. Faulkner 	assert(MUTEX_HELD(&inst->ri_lock));
10407c478bd9Sstevel@tonic-gate 	assert(inst->ri_method_thread == 0);
10417c478bd9Sstevel@tonic-gate 
10427c478bd9Sstevel@tonic-gate 	switch (cause) {
10437c478bd9Sstevel@tonic-gate 	case RSTOP_EXIT:
10447c478bd9Sstevel@tonic-gate 		re = RERR_RESTART;
1045f6e214c7SGavin Maltby 		reason = restarter_str_ct_ev_exit;
10467c478bd9Sstevel@tonic-gate 		cp = "all processes in service exited";
10477c478bd9Sstevel@tonic-gate 		break;
10487c478bd9Sstevel@tonic-gate 	case RSTOP_CORE:
10497c478bd9Sstevel@tonic-gate 		re = RERR_FAULT;
1050f6e214c7SGavin Maltby 		reason = restarter_str_ct_ev_core;
10517c478bd9Sstevel@tonic-gate 		cp = "process dumped core";
10527c478bd9Sstevel@tonic-gate 		break;
10537c478bd9Sstevel@tonic-gate 	case RSTOP_SIGNAL:
10547c478bd9Sstevel@tonic-gate 		re = RERR_FAULT;
1055f6e214c7SGavin Maltby 		reason = restarter_str_ct_ev_signal;
10567c478bd9Sstevel@tonic-gate 		cp = "process received fatal signal from outside the service";
10577c478bd9Sstevel@tonic-gate 		break;
10587c478bd9Sstevel@tonic-gate 	case RSTOP_HWERR:
10597c478bd9Sstevel@tonic-gate 		re = RERR_FAULT;
1060f6e214c7SGavin Maltby 		reason = restarter_str_ct_ev_hwerr;
10617c478bd9Sstevel@tonic-gate 		cp = "process killed due to uncorrectable hardware error";
10627c478bd9Sstevel@tonic-gate 		break;
10637c478bd9Sstevel@tonic-gate 	case RSTOP_DEPENDENCY:
10647c478bd9Sstevel@tonic-gate 		re = RERR_RESTART;
1065f6e214c7SGavin Maltby 		reason = restarter_str_dependency_activity;
10667c478bd9Sstevel@tonic-gate 		cp = "dependency activity requires stop";
10677c478bd9Sstevel@tonic-gate 		break;
10687c478bd9Sstevel@tonic-gate 	case RSTOP_DISABLE:
10697c478bd9Sstevel@tonic-gate 		re = RERR_RESTART;
1070f6e214c7SGavin Maltby 		reason = restarter_str_disable_request;
10717c478bd9Sstevel@tonic-gate 		cp = "service disabled";
10727c478bd9Sstevel@tonic-gate 		break;
10737c478bd9Sstevel@tonic-gate 	case RSTOP_RESTART:
10747c478bd9Sstevel@tonic-gate 		re = RERR_RESTART;
1075f6e214c7SGavin Maltby 		reason = restarter_str_restart_request;
10767c478bd9Sstevel@tonic-gate 		cp = "service restarting";
10777c478bd9Sstevel@tonic-gate 		break;
10787c478bd9Sstevel@tonic-gate 	default:
10797c478bd9Sstevel@tonic-gate #ifndef NDEBUG
10807c478bd9Sstevel@tonic-gate 		(void) fprintf(stderr, "Unknown cause %d at %s:%d.\n",
10817c478bd9Sstevel@tonic-gate 		    cause, __FILE__, __LINE__);
10827c478bd9Sstevel@tonic-gate #endif
10837c478bd9Sstevel@tonic-gate 		abort();
10847c478bd9Sstevel@tonic-gate 	}
10857c478bd9Sstevel@tonic-gate 
10867c478bd9Sstevel@tonic-gate 	/* Services in the disabled and maintenance state are ignored */
10877c478bd9Sstevel@tonic-gate 	if (inst->ri_i.i_state == RESTARTER_STATE_MAINT ||
10887c478bd9Sstevel@tonic-gate 	    inst->ri_i.i_state == RESTARTER_STATE_DISABLED) {
10897c478bd9Sstevel@tonic-gate 		log_framework(LOG_DEBUG,
10907c478bd9Sstevel@tonic-gate 		    "%s: stop_instance -> is maint/disabled\n",
10917c478bd9Sstevel@tonic-gate 		    inst->ri_i.i_fmri);
10927c478bd9Sstevel@tonic-gate 		return (0);
10937c478bd9Sstevel@tonic-gate 	}
10947c478bd9Sstevel@tonic-gate 
10957c478bd9Sstevel@tonic-gate 	/* Already stopped instances are left alone */
10967c478bd9Sstevel@tonic-gate 	if (instance_started(inst) == 0) {
10977c478bd9Sstevel@tonic-gate 		log_framework(LOG_DEBUG, "Restarter: %s is already stopped.\n",
10987c478bd9Sstevel@tonic-gate 		    inst->ri_i.i_fmri);
10997c478bd9Sstevel@tonic-gate 		return (0);
11007c478bd9Sstevel@tonic-gate 	}
11017c478bd9Sstevel@tonic-gate 
11027c478bd9Sstevel@tonic-gate 	if (instance_in_transition(inst)) {
11037c478bd9Sstevel@tonic-gate 		/* requeue event by returning -1 */
11047c478bd9Sstevel@tonic-gate 		log_framework(LOG_DEBUG,
11057c478bd9Sstevel@tonic-gate 		    "Restarter: Not stopping %s, in transition.\n",
11067c478bd9Sstevel@tonic-gate 		    inst->ri_i.i_fmri);
11077c478bd9Sstevel@tonic-gate 		return (-1);
11087c478bd9Sstevel@tonic-gate 	}
11097c478bd9Sstevel@tonic-gate 
11107c478bd9Sstevel@tonic-gate 	log_instance(inst, B_TRUE, "Stopping because %s.", cp);
11117c478bd9Sstevel@tonic-gate 
11127c478bd9Sstevel@tonic-gate 	log_framework(re == RERR_FAULT ? LOG_INFO : LOG_DEBUG,
11137c478bd9Sstevel@tonic-gate 	    "%s: Instance stopping because %s.\n", inst->ri_i.i_fmri, cp);
11147c478bd9Sstevel@tonic-gate 
11157c478bd9Sstevel@tonic-gate 	if (instance_is_wait_style(inst) && cause == RSTOP_EXIT) {
11167c478bd9Sstevel@tonic-gate 		/*
11177c478bd9Sstevel@tonic-gate 		 * No need to stop instance, as child has exited; remove
11187c478bd9Sstevel@tonic-gate 		 * contract and move the instance to the offline state.
11197c478bd9Sstevel@tonic-gate 		 */
11207c478bd9Sstevel@tonic-gate 		switch (err = restarter_instance_update_states(local_handle,
11217c478bd9Sstevel@tonic-gate 		    inst, inst->ri_i.i_state, RESTARTER_STATE_OFFLINE, re,
1122f6e214c7SGavin Maltby 		    reason)) {
11237c478bd9Sstevel@tonic-gate 		case 0:
11247c478bd9Sstevel@tonic-gate 		case ECONNRESET:
11257c478bd9Sstevel@tonic-gate 			break;
11267c478bd9Sstevel@tonic-gate 
11277c478bd9Sstevel@tonic-gate 		default:
11287c478bd9Sstevel@tonic-gate 			bad_error("restarter_instance_update_states", err);
11297c478bd9Sstevel@tonic-gate 		}
11307c478bd9Sstevel@tonic-gate 
11317c478bd9Sstevel@tonic-gate 		(void) update_fault_count(inst, FAULT_COUNT_RESET);
1132c238c833SSean Wilcox 		reset_start_times(inst);
11337c478bd9Sstevel@tonic-gate 
11347c478bd9Sstevel@tonic-gate 		if (inst->ri_i.i_primary_ctid != 0) {
11357c478bd9Sstevel@tonic-gate 			inst->ri_m_inst =
11367c478bd9Sstevel@tonic-gate 			    safe_scf_instance_create(local_handle);
11377c478bd9Sstevel@tonic-gate 			inst->ri_mi_deleted = B_FALSE;
11387c478bd9Sstevel@tonic-gate 
11397c478bd9Sstevel@tonic-gate 			libscf_reget_instance(inst);
11407c478bd9Sstevel@tonic-gate 			method_remove_contract(inst, B_TRUE, B_TRUE);
11417c478bd9Sstevel@tonic-gate 
11427c478bd9Sstevel@tonic-gate 			scf_instance_destroy(inst->ri_m_inst);
11437c478bd9Sstevel@tonic-gate 			inst->ri_m_inst = NULL;
11447c478bd9Sstevel@tonic-gate 		}
11457c478bd9Sstevel@tonic-gate 
11467c478bd9Sstevel@tonic-gate 		switch (err = restarter_instance_update_states(local_handle,
11477c478bd9Sstevel@tonic-gate 		    inst, inst->ri_i.i_next_state, RESTARTER_STATE_NONE, re,
1148f6e214c7SGavin Maltby 		    reason)) {
11497c478bd9Sstevel@tonic-gate 		case 0:
11507c478bd9Sstevel@tonic-gate 		case ECONNRESET:
11517c478bd9Sstevel@tonic-gate 			break;
11527c478bd9Sstevel@tonic-gate 
11537c478bd9Sstevel@tonic-gate 		default:
11547c478bd9Sstevel@tonic-gate 			bad_error("restarter_instance_update_states", err);
11557c478bd9Sstevel@tonic-gate 		}
11567c478bd9Sstevel@tonic-gate 
11577c478bd9Sstevel@tonic-gate 		return (0);
115863c7f71bSrm88369 	} else if (instance_is_wait_style(inst) && re == RERR_RESTART) {
115963c7f71bSrm88369 		/*
116063c7f71bSrm88369 		 * Stopping a wait service through means other than the pid
116163c7f71bSrm88369 		 * exiting should keep wait_thread() from restarting the
116263c7f71bSrm88369 		 * service, by removing it from the wait list.
116363c7f71bSrm88369 		 * We cannot remove it right now otherwise the process will
116463c7f71bSrm88369 		 * end up <defunct> so mark it to be ignored.
116563c7f71bSrm88369 		 */
116663c7f71bSrm88369 		wait_ignore_by_fmri(inst->ri_i.i_fmri);
11677c478bd9Sstevel@tonic-gate 	}
11687c478bd9Sstevel@tonic-gate 
11697c478bd9Sstevel@tonic-gate 	switch (err = restarter_instance_update_states(local_handle, inst,
11707c478bd9Sstevel@tonic-gate 	    inst->ri_i.i_state, inst->ri_i.i_enabled ? RESTARTER_STATE_OFFLINE :
1171f6e214c7SGavin Maltby 	    RESTARTER_STATE_DISABLED, RERR_NONE, reason)) {
11727c478bd9Sstevel@tonic-gate 	case 0:
11737c478bd9Sstevel@tonic-gate 	case ECONNRESET:
11747c478bd9Sstevel@tonic-gate 		break;
11757c478bd9Sstevel@tonic-gate 
11767c478bd9Sstevel@tonic-gate 	default:
11777c478bd9Sstevel@tonic-gate 		bad_error("restarter_instance_update_states", err);
11787c478bd9Sstevel@tonic-gate 	}
11797c478bd9Sstevel@tonic-gate 
11807c478bd9Sstevel@tonic-gate 	info = startd_zalloc(sizeof (fork_info_t));
11817c478bd9Sstevel@tonic-gate 
11827c478bd9Sstevel@tonic-gate 	info->sf_id = inst->ri_id;
11837c478bd9Sstevel@tonic-gate 	info->sf_method_type = METHOD_STOP;
11847c478bd9Sstevel@tonic-gate 	info->sf_event_type = re;
1185f6e214c7SGavin Maltby 	info->sf_reason = reason;
11867c478bd9Sstevel@tonic-gate 	inst->ri_method_thread = startd_thread_create(method_thread, info);
11877c478bd9Sstevel@tonic-gate 
11887c478bd9Sstevel@tonic-gate 	return (0);
11897c478bd9Sstevel@tonic-gate }
11907c478bd9Sstevel@tonic-gate 
11917c478bd9Sstevel@tonic-gate /*
11927c478bd9Sstevel@tonic-gate  * Returns
11937c478bd9Sstevel@tonic-gate  *   ENOENT - fmri is not in instance_list
11947c478bd9Sstevel@tonic-gate  *   0 - success
11957c478bd9Sstevel@tonic-gate  *   ECONNRESET - success, though handle was rebound
11967c478bd9Sstevel@tonic-gate  *   -1 - instance is in transition
11977c478bd9Sstevel@tonic-gate  */
11987c478bd9Sstevel@tonic-gate int
11997c478bd9Sstevel@tonic-gate stop_instance_fmri(scf_handle_t *h, const char *fmri, uint_t flags)
12007c478bd9Sstevel@tonic-gate {
12017c478bd9Sstevel@tonic-gate 	restarter_inst_t *rip;
12027c478bd9Sstevel@tonic-gate 	int r;
12037c478bd9Sstevel@tonic-gate 
12047c478bd9Sstevel@tonic-gate 	rip = inst_lookup_by_name(fmri);
12057c478bd9Sstevel@tonic-gate 	if (rip == NULL)
12067c478bd9Sstevel@tonic-gate 		return (ENOENT);
12077c478bd9Sstevel@tonic-gate 
12087c478bd9Sstevel@tonic-gate 	r = stop_instance(h, rip, flags);
12097c478bd9Sstevel@tonic-gate 
12107c478bd9Sstevel@tonic-gate 	MUTEX_UNLOCK(&rip->ri_lock);
12117c478bd9Sstevel@tonic-gate 
12127c478bd9Sstevel@tonic-gate 	return (r);
12137c478bd9Sstevel@tonic-gate }
12147c478bd9Sstevel@tonic-gate 
12157c478bd9Sstevel@tonic-gate static void
12167c478bd9Sstevel@tonic-gate unmaintain_instance(scf_handle_t *h, restarter_inst_t *rip,
12177c478bd9Sstevel@tonic-gate     unmaint_cause_t cause)
12187c478bd9Sstevel@tonic-gate {
12197c478bd9Sstevel@tonic-gate 	ctid_t ctid;
12207c478bd9Sstevel@tonic-gate 	scf_instance_t *inst;
12217c478bd9Sstevel@tonic-gate 	int r;
12227c478bd9Sstevel@tonic-gate 	uint_t tries = 0, msecs = ALLOC_DELAY;
12237c478bd9Sstevel@tonic-gate 	const char *cp;
1224f6e214c7SGavin Maltby 	restarter_str_t	reason;
12257c478bd9Sstevel@tonic-gate 
122653f3aea0SRoger A. Faulkner 	assert(MUTEX_HELD(&rip->ri_lock));
12277c478bd9Sstevel@tonic-gate 
12287c478bd9Sstevel@tonic-gate 	if (rip->ri_i.i_state != RESTARTER_STATE_MAINT) {
12297c478bd9Sstevel@tonic-gate 		log_error(LOG_DEBUG, "Restarter: "
12307c478bd9Sstevel@tonic-gate 		    "Ignoring maintenance off command because %s is not in the "
12317c478bd9Sstevel@tonic-gate 		    "maintenance state.\n", rip->ri_i.i_fmri);
12327c478bd9Sstevel@tonic-gate 		return;
12337c478bd9Sstevel@tonic-gate 	}
12347c478bd9Sstevel@tonic-gate 
12357c478bd9Sstevel@tonic-gate 	switch (cause) {
12367c478bd9Sstevel@tonic-gate 	case RUNMAINT_CLEAR:
12377c478bd9Sstevel@tonic-gate 		cp = "clear requested";
1238f6e214c7SGavin Maltby 		reason = restarter_str_clear_request;
12397c478bd9Sstevel@tonic-gate 		break;
12407c478bd9Sstevel@tonic-gate 	case RUNMAINT_DISABLE:
12417c478bd9Sstevel@tonic-gate 		cp = "disable requested";
1242f6e214c7SGavin Maltby 		reason = restarter_str_disable_request;
12437c478bd9Sstevel@tonic-gate 		break;
12447c478bd9Sstevel@tonic-gate 	default:
12457c478bd9Sstevel@tonic-gate #ifndef NDEBUG
12467c478bd9Sstevel@tonic-gate 		(void) fprintf(stderr, "Uncaught case for %d at %s:%d.\n",
12477c478bd9Sstevel@tonic-gate 		    cause, __FILE__, __LINE__);
12487c478bd9Sstevel@tonic-gate #endif
12497c478bd9Sstevel@tonic-gate 		abort();
12507c478bd9Sstevel@tonic-gate 	}
12517c478bd9Sstevel@tonic-gate 
12527c478bd9Sstevel@tonic-gate 	log_instance(rip, B_TRUE, "Leaving maintenance because %s.",
12537c478bd9Sstevel@tonic-gate 	    cp);
12547c478bd9Sstevel@tonic-gate 	log_framework(LOG_DEBUG, "%s: Instance leaving maintenance because "
12557c478bd9Sstevel@tonic-gate 	    "%s.\n", rip->ri_i.i_fmri, cp);
12567c478bd9Sstevel@tonic-gate 
12577c478bd9Sstevel@tonic-gate 	(void) restarter_instance_update_states(h, rip, RESTARTER_STATE_UNINIT,
1258f6e214c7SGavin Maltby 	    RESTARTER_STATE_NONE, RERR_RESTART, reason);
12597c478bd9Sstevel@tonic-gate 
12607c478bd9Sstevel@tonic-gate 	/*
12617c478bd9Sstevel@tonic-gate 	 * If we did ADMIN_MAINT_ON_IMMEDIATE, then there might still be
12627c478bd9Sstevel@tonic-gate 	 * a primary contract.
12637c478bd9Sstevel@tonic-gate 	 */
12647c478bd9Sstevel@tonic-gate 	if (rip->ri_i.i_primary_ctid == 0)
12657c478bd9Sstevel@tonic-gate 		return;
12667c478bd9Sstevel@tonic-gate 
12677c478bd9Sstevel@tonic-gate 	ctid = rip->ri_i.i_primary_ctid;
12687c478bd9Sstevel@tonic-gate 	contract_abandon(ctid);
12697c478bd9Sstevel@tonic-gate 	rip->ri_i.i_primary_ctid = 0;
12707c478bd9Sstevel@tonic-gate 
12717c478bd9Sstevel@tonic-gate rep_retry:
12727c478bd9Sstevel@tonic-gate 	switch (r = libscf_fmri_get_instance(h, rip->ri_i.i_fmri, &inst)) {
12737c478bd9Sstevel@tonic-gate 	case 0:
12747c478bd9Sstevel@tonic-gate 		break;
12757c478bd9Sstevel@tonic-gate 
12767c478bd9Sstevel@tonic-gate 	case ECONNABORTED:
12777c478bd9Sstevel@tonic-gate 		libscf_handle_rebind(h);
12787c478bd9Sstevel@tonic-gate 		goto rep_retry;
12797c478bd9Sstevel@tonic-gate 
12807c478bd9Sstevel@tonic-gate 	case ENOENT:
12817c478bd9Sstevel@tonic-gate 		/* Must have been deleted. */
12827c478bd9Sstevel@tonic-gate 		return;
12837c478bd9Sstevel@tonic-gate 
12847c478bd9Sstevel@tonic-gate 	case EINVAL:
12857c478bd9Sstevel@tonic-gate 	case ENOTSUP:
12867c478bd9Sstevel@tonic-gate 	default:
12877c478bd9Sstevel@tonic-gate 		bad_error("libscf_handle_rebind", r);
12887c478bd9Sstevel@tonic-gate 	}
12897c478bd9Sstevel@tonic-gate 
12907c478bd9Sstevel@tonic-gate again:
12917c478bd9Sstevel@tonic-gate 	r = restarter_remove_contract(inst, ctid, RESTARTER_CONTRACT_PRIMARY);
12927c478bd9Sstevel@tonic-gate 	switch (r) {
12937c478bd9Sstevel@tonic-gate 	case 0:
12947c478bd9Sstevel@tonic-gate 		break;
12957c478bd9Sstevel@tonic-gate 
12967c478bd9Sstevel@tonic-gate 	case ENOMEM:
12977c478bd9Sstevel@tonic-gate 		++tries;
12987c478bd9Sstevel@tonic-gate 		if (tries < ALLOC_RETRY) {
12997c478bd9Sstevel@tonic-gate 			(void) poll(NULL, 0, msecs);
13007c478bd9Sstevel@tonic-gate 			msecs *= ALLOC_DELAY_MULT;
13017c478bd9Sstevel@tonic-gate 			goto again;
13027c478bd9Sstevel@tonic-gate 		}
13037c478bd9Sstevel@tonic-gate 
13047c478bd9Sstevel@tonic-gate 		uu_die("Insufficient memory.\n");
13057c478bd9Sstevel@tonic-gate 		/* NOTREACHED */
13067c478bd9Sstevel@tonic-gate 
13077c478bd9Sstevel@tonic-gate 	case ECONNABORTED:
13087c478bd9Sstevel@tonic-gate 		scf_instance_destroy(inst);
13097c478bd9Sstevel@tonic-gate 		libscf_handle_rebind(h);
13107c478bd9Sstevel@tonic-gate 		goto rep_retry;
13117c478bd9Sstevel@tonic-gate 
13127c478bd9Sstevel@tonic-gate 	case ECANCELED:
13137c478bd9Sstevel@tonic-gate 		break;
13147c478bd9Sstevel@tonic-gate 
13157c478bd9Sstevel@tonic-gate 	case EPERM:
13167c478bd9Sstevel@tonic-gate 	case EACCES:
13177c478bd9Sstevel@tonic-gate 	case EROFS:
13187c478bd9Sstevel@tonic-gate 		log_error(LOG_INFO,
13197c478bd9Sstevel@tonic-gate 		    "Could not remove contract id %lu for %s (%s).\n", ctid,
13207c478bd9Sstevel@tonic-gate 		    rip->ri_i.i_fmri, strerror(r));
13217c478bd9Sstevel@tonic-gate 		break;
13227c478bd9Sstevel@tonic-gate 
13237c478bd9Sstevel@tonic-gate 	case EINVAL:
13247c478bd9Sstevel@tonic-gate 	case EBADF:
13257c478bd9Sstevel@tonic-gate 	default:
13267c478bd9Sstevel@tonic-gate 		bad_error("restarter_remove_contract", r);
13277c478bd9Sstevel@tonic-gate 	}
13287c478bd9Sstevel@tonic-gate 
13297c478bd9Sstevel@tonic-gate 	scf_instance_destroy(inst);
13307c478bd9Sstevel@tonic-gate }
13317c478bd9Sstevel@tonic-gate 
13327c478bd9Sstevel@tonic-gate /*
13337c478bd9Sstevel@tonic-gate  * enable_inst()
13347c478bd9Sstevel@tonic-gate  *   Set inst->ri_i.i_enabled.  Expects 'e' to be _ENABLE, _DISABLE, or
13357c478bd9Sstevel@tonic-gate  *   _ADMIN_DISABLE.  If the event is _ENABLE and inst is uninitialized or
13367c478bd9Sstevel@tonic-gate  *   disabled, move it to offline.  If the event is _DISABLE or
13377c478bd9Sstevel@tonic-gate  *   _ADMIN_DISABLE, make sure inst will move to disabled.
13387c478bd9Sstevel@tonic-gate  *
13397c478bd9Sstevel@tonic-gate  *   Returns
13407c478bd9Sstevel@tonic-gate  *     0 - success
13417c478bd9Sstevel@tonic-gate  *     ECONNRESET - h was rebound
13427c478bd9Sstevel@tonic-gate  */
13437c478bd9Sstevel@tonic-gate static int
1344f6e214c7SGavin Maltby enable_inst(scf_handle_t *h, restarter_inst_t *inst,
1345f6e214c7SGavin Maltby     restarter_instance_qentry_t *riq)
13467c478bd9Sstevel@tonic-gate {
13477c478bd9Sstevel@tonic-gate 	restarter_instance_state_t state;
1348f6e214c7SGavin Maltby 	restarter_event_type_t e = riq->riq_type;
1349f6e214c7SGavin Maltby 	restarter_str_t reason = restarter_str_per_configuration;
13507c478bd9Sstevel@tonic-gate 	int r;
13517c478bd9Sstevel@tonic-gate 
135253f3aea0SRoger A. Faulkner 	assert(MUTEX_HELD(&inst->ri_lock));
13537c478bd9Sstevel@tonic-gate 	assert(e == RESTARTER_EVENT_TYPE_ADMIN_DISABLE ||
13547c478bd9Sstevel@tonic-gate 	    e == RESTARTER_EVENT_TYPE_DISABLE ||
13557c478bd9Sstevel@tonic-gate 	    e == RESTARTER_EVENT_TYPE_ENABLE);
13567c478bd9Sstevel@tonic-gate 	assert(instance_in_transition(inst) == 0);
13577c478bd9Sstevel@tonic-gate 
13587c478bd9Sstevel@tonic-gate 	state = inst->ri_i.i_state;
13597c478bd9Sstevel@tonic-gate 
13607c478bd9Sstevel@tonic-gate 	if (e == RESTARTER_EVENT_TYPE_ENABLE) {
13617c478bd9Sstevel@tonic-gate 		inst->ri_i.i_enabled = 1;
13627c478bd9Sstevel@tonic-gate 
13637c478bd9Sstevel@tonic-gate 		if (state == RESTARTER_STATE_UNINIT ||
13647c478bd9Sstevel@tonic-gate 		    state == RESTARTER_STATE_DISABLED) {
13657c478bd9Sstevel@tonic-gate 			/*
13667c478bd9Sstevel@tonic-gate 			 * B_FALSE: Don't log an error if the log_instance()
13677c478bd9Sstevel@tonic-gate 			 * fails because it will fail on the miniroot before
13687c478bd9Sstevel@tonic-gate 			 * install-discovery runs.
13697c478bd9Sstevel@tonic-gate 			 */
13707c478bd9Sstevel@tonic-gate 			log_instance(inst, B_FALSE, "Enabled.");
13717c478bd9Sstevel@tonic-gate 			log_framework(LOG_DEBUG, "%s: Instance enabled.\n",
13727c478bd9Sstevel@tonic-gate 			    inst->ri_i.i_fmri);
1373f6e214c7SGavin Maltby 
1374f6e214c7SGavin Maltby 			/*
1375f6e214c7SGavin Maltby 			 * If we are coming from DISABLED, it was obviously an
1376f6e214c7SGavin Maltby 			 * enable request. If we are coming from UNINIT, it may
1377f6e214c7SGavin Maltby 			 * have been a sevice in MAINT that was cleared.
1378f6e214c7SGavin Maltby 			 */
1379f6e214c7SGavin Maltby 			if (riq->riq_reason == restarter_str_clear_request)
1380f6e214c7SGavin Maltby 				reason = restarter_str_clear_request;
1381f6e214c7SGavin Maltby 			else if (state == RESTARTER_STATE_DISABLED)
1382f6e214c7SGavin Maltby 				reason = restarter_str_enable_request;
13837c478bd9Sstevel@tonic-gate 			(void) restarter_instance_update_states(h, inst,
13847c478bd9Sstevel@tonic-gate 			    RESTARTER_STATE_OFFLINE, RESTARTER_STATE_NONE,
1385f6e214c7SGavin Maltby 			    RERR_NONE, reason);
13867c478bd9Sstevel@tonic-gate 		} else {
13877c478bd9Sstevel@tonic-gate 			log_framework(LOG_DEBUG, "Restarter: "
13887c478bd9Sstevel@tonic-gate 			    "Not changing state of %s for enable command.\n",
13897c478bd9Sstevel@tonic-gate 			    inst->ri_i.i_fmri);
13907c478bd9Sstevel@tonic-gate 		}
13917c478bd9Sstevel@tonic-gate 	} else {
13927c478bd9Sstevel@tonic-gate 		inst->ri_i.i_enabled = 0;
13937c478bd9Sstevel@tonic-gate 
13947c478bd9Sstevel@tonic-gate 		switch (state) {
13957c478bd9Sstevel@tonic-gate 		case RESTARTER_STATE_ONLINE:
13967c478bd9Sstevel@tonic-gate 		case RESTARTER_STATE_DEGRADED:
13977c478bd9Sstevel@tonic-gate 			r = stop_instance(h, inst, RSTOP_DISABLE);
13987c478bd9Sstevel@tonic-gate 			return (r == ECONNRESET ? 0 : r);
13997c478bd9Sstevel@tonic-gate 
14007c478bd9Sstevel@tonic-gate 		case RESTARTER_STATE_OFFLINE:
14017c478bd9Sstevel@tonic-gate 		case RESTARTER_STATE_UNINIT:
14027c478bd9Sstevel@tonic-gate 			if (inst->ri_i.i_primary_ctid != 0) {
14037c478bd9Sstevel@tonic-gate 				inst->ri_m_inst = safe_scf_instance_create(h);
14047c478bd9Sstevel@tonic-gate 				inst->ri_mi_deleted = B_FALSE;
14057c478bd9Sstevel@tonic-gate 
14067c478bd9Sstevel@tonic-gate 				libscf_reget_instance(inst);
14077c478bd9Sstevel@tonic-gate 				method_remove_contract(inst, B_TRUE, B_TRUE);
14087c478bd9Sstevel@tonic-gate 
14097c478bd9Sstevel@tonic-gate 				scf_instance_destroy(inst->ri_m_inst);
14107c478bd9Sstevel@tonic-gate 			}
14117c478bd9Sstevel@tonic-gate 			/* B_FALSE: See log_instance(..., "Enabled."); above */
14127c478bd9Sstevel@tonic-gate 			log_instance(inst, B_FALSE, "Disabled.");
14137c478bd9Sstevel@tonic-gate 			log_framework(LOG_DEBUG, "%s: Instance disabled.\n",
14147c478bd9Sstevel@tonic-gate 			    inst->ri_i.i_fmri);
1415f6e214c7SGavin Maltby 
1416f6e214c7SGavin Maltby 			/*
1417f6e214c7SGavin Maltby 			 * If we are coming from OFFLINE, it was obviously a
1418f6e214c7SGavin Maltby 			 * disable request. But if we are coming from
1419f6e214c7SGavin Maltby 			 * UNINIT, it may have been a disable request for a
1420f6e214c7SGavin Maltby 			 * service in MAINT.
1421f6e214c7SGavin Maltby 			 */
1422f6e214c7SGavin Maltby 			if (riq->riq_reason == restarter_str_disable_request ||
1423f6e214c7SGavin Maltby 			    state == RESTARTER_STATE_OFFLINE)
1424f6e214c7SGavin Maltby 				reason = restarter_str_disable_request;
14257c478bd9Sstevel@tonic-gate 			(void) restarter_instance_update_states(h, inst,
14267c478bd9Sstevel@tonic-gate 			    RESTARTER_STATE_DISABLED, RESTARTER_STATE_NONE,
1427f6e214c7SGavin Maltby 			    RERR_RESTART, reason);
14287c478bd9Sstevel@tonic-gate 			return (0);
14297c478bd9Sstevel@tonic-gate 
14307c478bd9Sstevel@tonic-gate 		case RESTARTER_STATE_DISABLED:
14317c478bd9Sstevel@tonic-gate 			break;
14327c478bd9Sstevel@tonic-gate 
14337c478bd9Sstevel@tonic-gate 		case RESTARTER_STATE_MAINT:
14347c478bd9Sstevel@tonic-gate 			/*
14357c478bd9Sstevel@tonic-gate 			 * We only want to pull the instance out of maintenance
14367c478bd9Sstevel@tonic-gate 			 * if the disable is on adminstrative request.  The
14377c478bd9Sstevel@tonic-gate 			 * graph engine sends _DISABLE events whenever a
14387c478bd9Sstevel@tonic-gate 			 * service isn't in the disabled state, and we don't
14397c478bd9Sstevel@tonic-gate 			 * want to pull the service out of maintenance if,
14407c478bd9Sstevel@tonic-gate 			 * for example, it is there due to a dependency cycle.
14417c478bd9Sstevel@tonic-gate 			 */
14427c478bd9Sstevel@tonic-gate 			if (e == RESTARTER_EVENT_TYPE_ADMIN_DISABLE)
14437c478bd9Sstevel@tonic-gate 				unmaintain_instance(h, inst, RUNMAINT_DISABLE);
14447c478bd9Sstevel@tonic-gate 			break;
14457c478bd9Sstevel@tonic-gate 
14467c478bd9Sstevel@tonic-gate 		default:
14477c478bd9Sstevel@tonic-gate #ifndef NDEBUG
14487c478bd9Sstevel@tonic-gate 			(void) fprintf(stderr, "Restarter instance %s has "
14497c478bd9Sstevel@tonic-gate 			    "unknown state %d.\n", inst->ri_i.i_fmri, state);
14507c478bd9Sstevel@tonic-gate #endif
14517c478bd9Sstevel@tonic-gate 			abort();
14527c478bd9Sstevel@tonic-gate 		}
14537c478bd9Sstevel@tonic-gate 	}
14547c478bd9Sstevel@tonic-gate 
14557c478bd9Sstevel@tonic-gate 	return (0);
14567c478bd9Sstevel@tonic-gate }
14577c478bd9Sstevel@tonic-gate 
14587c478bd9Sstevel@tonic-gate static void
1459f6e214c7SGavin Maltby start_instance(scf_handle_t *local_handle, restarter_inst_t *inst,
1460f6e214c7SGavin Maltby     int32_t reason)
14617c478bd9Sstevel@tonic-gate {
14627c478bd9Sstevel@tonic-gate 	fork_info_t *info;
1463f6e214c7SGavin Maltby 	restarter_str_t	new_reason;
14647c478bd9Sstevel@tonic-gate 
146553f3aea0SRoger A. Faulkner 	assert(MUTEX_HELD(&inst->ri_lock));
14667c478bd9Sstevel@tonic-gate 	assert(instance_in_transition(inst) == 0);
14677c478bd9Sstevel@tonic-gate 	assert(inst->ri_method_thread == 0);
14687c478bd9Sstevel@tonic-gate 
14697c478bd9Sstevel@tonic-gate 	log_framework(LOG_DEBUG, "%s: trying to start instance\n",
14707c478bd9Sstevel@tonic-gate 	    inst->ri_i.i_fmri);
14717c478bd9Sstevel@tonic-gate 
1472f6e214c7SGavin Maltby 	/*
1473f6e214c7SGavin Maltby 	 * We want to keep the original reason for restarts and clear actions
1474f6e214c7SGavin Maltby 	 */
1475f6e214c7SGavin Maltby 	switch (reason) {
1476f6e214c7SGavin Maltby 	case restarter_str_restart_request:
1477f6e214c7SGavin Maltby 	case restarter_str_clear_request:
1478f6e214c7SGavin Maltby 		new_reason = reason;
1479f6e214c7SGavin Maltby 		break;
1480f6e214c7SGavin Maltby 	default:
1481f6e214c7SGavin Maltby 		new_reason = restarter_str_dependencies_satisfied;
1482f6e214c7SGavin Maltby 	}
1483f6e214c7SGavin Maltby 
14847c478bd9Sstevel@tonic-gate 	/* Services in the disabled and maintenance state are ignored */
14857c478bd9Sstevel@tonic-gate 	if (inst->ri_i.i_state == RESTARTER_STATE_MAINT ||
14867c478bd9Sstevel@tonic-gate 	    inst->ri_i.i_state == RESTARTER_STATE_DISABLED ||
14877c478bd9Sstevel@tonic-gate 	    inst->ri_i.i_enabled == 0) {
14887c478bd9Sstevel@tonic-gate 		log_framework(LOG_DEBUG,
14897c478bd9Sstevel@tonic-gate 		    "%s: start_instance -> is maint/disabled\n",
14907c478bd9Sstevel@tonic-gate 		    inst->ri_i.i_fmri);
14917c478bd9Sstevel@tonic-gate 		return;
14927c478bd9Sstevel@tonic-gate 	}
14937c478bd9Sstevel@tonic-gate 
14947c478bd9Sstevel@tonic-gate 	/* Already started instances are left alone */
14957c478bd9Sstevel@tonic-gate 	if (instance_started(inst) == 1) {
14967c478bd9Sstevel@tonic-gate 		log_framework(LOG_DEBUG,
14977c478bd9Sstevel@tonic-gate 		    "%s: start_instance -> is already started\n",
14987c478bd9Sstevel@tonic-gate 		    inst->ri_i.i_fmri);
14997c478bd9Sstevel@tonic-gate 		return;
15007c478bd9Sstevel@tonic-gate 	}
15017c478bd9Sstevel@tonic-gate 
15027c478bd9Sstevel@tonic-gate 	log_framework(LOG_DEBUG, "%s: starting instance.\n", inst->ri_i.i_fmri);
15037c478bd9Sstevel@tonic-gate 
15047c478bd9Sstevel@tonic-gate 	(void) restarter_instance_update_states(local_handle, inst,
1505f6e214c7SGavin Maltby 	    inst->ri_i.i_state, RESTARTER_STATE_ONLINE, RERR_NONE, new_reason);
15067c478bd9Sstevel@tonic-gate 
15077c478bd9Sstevel@tonic-gate 	info = startd_zalloc(sizeof (fork_info_t));
15087c478bd9Sstevel@tonic-gate 
15097c478bd9Sstevel@tonic-gate 	info->sf_id = inst->ri_id;
15107c478bd9Sstevel@tonic-gate 	info->sf_method_type = METHOD_START;
15117c478bd9Sstevel@tonic-gate 	info->sf_event_type = RERR_NONE;
1512f6e214c7SGavin Maltby 	info->sf_reason = new_reason;
15137c478bd9Sstevel@tonic-gate 	inst->ri_method_thread = startd_thread_create(method_thread, info);
15147c478bd9Sstevel@tonic-gate }
15157c478bd9Sstevel@tonic-gate 
1516eb1a3463STruong Nguyen static int
1517eb1a3463STruong Nguyen event_from_tty(scf_handle_t *h, restarter_inst_t *rip)
1518eb1a3463STruong Nguyen {
1519eb1a3463STruong Nguyen 	scf_instance_t *inst;
1520eb1a3463STruong Nguyen 	int ret = 0;
1521eb1a3463STruong Nguyen 
1522eb1a3463STruong Nguyen 	if (libscf_fmri_get_instance(h, rip->ri_i.i_fmri, &inst))
1523eb1a3463STruong Nguyen 		return (-1);
1524eb1a3463STruong Nguyen 
1525eb1a3463STruong Nguyen 	ret = restarter_inst_ractions_from_tty(inst);
1526eb1a3463STruong Nguyen 
1527eb1a3463STruong Nguyen 	scf_instance_destroy(inst);
1528eb1a3463STruong Nguyen 	return (ret);
1529eb1a3463STruong Nguyen }
1530eb1a3463STruong Nguyen 
15317c478bd9Sstevel@tonic-gate static void
15327c478bd9Sstevel@tonic-gate maintain_instance(scf_handle_t *h, restarter_inst_t *rip, int immediate,
1533f6e214c7SGavin Maltby     restarter_str_t reason)
15347c478bd9Sstevel@tonic-gate {
15357c478bd9Sstevel@tonic-gate 	fork_info_t *info;
1536eb1a3463STruong Nguyen 	scf_instance_t *scf_inst = NULL;
15377c478bd9Sstevel@tonic-gate 
153853f3aea0SRoger A. Faulkner 	assert(MUTEX_HELD(&rip->ri_lock));
1539f6e214c7SGavin Maltby 	assert(reason != restarter_str_none);
15407c478bd9Sstevel@tonic-gate 	assert(rip->ri_method_thread == 0);
15417c478bd9Sstevel@tonic-gate 
1542f6e214c7SGavin Maltby 	log_instance(rip, B_TRUE, "Stopping for maintenance due to %s.",
1543f6e214c7SGavin Maltby 	    restarter_get_str_short(reason));
15447c478bd9Sstevel@tonic-gate 	log_framework(LOG_DEBUG, "%s: stopping for maintenance due to %s.\n",
1545f6e214c7SGavin Maltby 	    rip->ri_i.i_fmri, restarter_get_str_short(reason));
15467c478bd9Sstevel@tonic-gate 
15477c478bd9Sstevel@tonic-gate 	/* Services in the maintenance state are ignored */
15487c478bd9Sstevel@tonic-gate 	if (rip->ri_i.i_state == RESTARTER_STATE_MAINT) {
15497c478bd9Sstevel@tonic-gate 		log_framework(LOG_DEBUG,
15507c478bd9Sstevel@tonic-gate 		    "%s: maintain_instance -> is already in maintenance\n",
15517c478bd9Sstevel@tonic-gate 		    rip->ri_i.i_fmri);
15527c478bd9Sstevel@tonic-gate 		return;
15537c478bd9Sstevel@tonic-gate 	}
15547c478bd9Sstevel@tonic-gate 
1555eb1a3463STruong Nguyen 	/*
1556f6e214c7SGavin Maltby 	 * If reason state is restarter_str_service_request and
1557eb1a3463STruong Nguyen 	 * restarter_actions/auxiliary_fmri property is set with a valid fmri,
1558eb1a3463STruong Nguyen 	 * copy the fmri to restarter/auxiliary_fmri so svcs -x can use.
1559eb1a3463STruong Nguyen 	 */
1560f6e214c7SGavin Maltby 	if (reason == restarter_str_service_request &&
1561f6e214c7SGavin Maltby 	    libscf_fmri_get_instance(h, rip->ri_i.i_fmri, &scf_inst) == 0) {
1562eb1a3463STruong Nguyen 		if (restarter_inst_validate_ractions_aux_fmri(scf_inst) == 0) {
1563eb1a3463STruong Nguyen 			if (restarter_inst_set_aux_fmri(scf_inst))
1564eb1a3463STruong Nguyen 				log_framework(LOG_DEBUG, "%s: "
1565eb1a3463STruong Nguyen 				    "restarter_inst_set_aux_fmri failed: ",
1566eb1a3463STruong Nguyen 				    rip->ri_i.i_fmri);
1567eb1a3463STruong Nguyen 		} else {
1568eb1a3463STruong Nguyen 			log_framework(LOG_DEBUG, "%s: "
1569eb1a3463STruong Nguyen 			    "restarter_inst_validate_ractions_aux_fmri "
1570eb1a3463STruong Nguyen 			    "failed: ", rip->ri_i.i_fmri);
1571eb1a3463STruong Nguyen 
1572eb1a3463STruong Nguyen 			if (restarter_inst_reset_aux_fmri(scf_inst))
1573eb1a3463STruong Nguyen 				log_framework(LOG_DEBUG, "%s: "
1574eb1a3463STruong Nguyen 				    "restarter_inst_reset_aux_fmri failed: ",
1575eb1a3463STruong Nguyen 				    rip->ri_i.i_fmri);
1576eb1a3463STruong Nguyen 		}
1577eb1a3463STruong Nguyen 		scf_instance_destroy(scf_inst);
1578eb1a3463STruong Nguyen 	}
1579eb1a3463STruong Nguyen 
15807c478bd9Sstevel@tonic-gate 	if (immediate || !instance_started(rip)) {
15817c478bd9Sstevel@tonic-gate 		if (rip->ri_i.i_primary_ctid != 0) {
15827c478bd9Sstevel@tonic-gate 			rip->ri_m_inst = safe_scf_instance_create(h);
15837c478bd9Sstevel@tonic-gate 			rip->ri_mi_deleted = B_FALSE;
15847c478bd9Sstevel@tonic-gate 
15857c478bd9Sstevel@tonic-gate 			libscf_reget_instance(rip);
15867c478bd9Sstevel@tonic-gate 			method_remove_contract(rip, B_TRUE, B_TRUE);
15877c478bd9Sstevel@tonic-gate 
15887c478bd9Sstevel@tonic-gate 			scf_instance_destroy(rip->ri_m_inst);
15897c478bd9Sstevel@tonic-gate 		}
15907c478bd9Sstevel@tonic-gate 
15917c478bd9Sstevel@tonic-gate 		(void) restarter_instance_update_states(h, rip,
15927c478bd9Sstevel@tonic-gate 		    RESTARTER_STATE_MAINT, RESTARTER_STATE_NONE, RERR_RESTART,
1593f6e214c7SGavin Maltby 		    reason);
15947c478bd9Sstevel@tonic-gate 		return;
15957c478bd9Sstevel@tonic-gate 	}
15967c478bd9Sstevel@tonic-gate 
15977c478bd9Sstevel@tonic-gate 	(void) restarter_instance_update_states(h, rip, rip->ri_i.i_state,
1598f6e214c7SGavin Maltby 	    RESTARTER_STATE_MAINT, RERR_NONE, reason);
15997c478bd9Sstevel@tonic-gate 
160099b44c3bSlianep 	log_transition(rip, MAINT_REQUESTED);
160199b44c3bSlianep 
16027c478bd9Sstevel@tonic-gate 	info = startd_zalloc(sizeof (*info));
16037c478bd9Sstevel@tonic-gate 	info->sf_id = rip->ri_id;
16047c478bd9Sstevel@tonic-gate 	info->sf_method_type = METHOD_STOP;
16057c478bd9Sstevel@tonic-gate 	info->sf_event_type = RERR_RESTART;
1606f6e214c7SGavin Maltby 	info->sf_reason = reason;
16077c478bd9Sstevel@tonic-gate 	rip->ri_method_thread = startd_thread_create(method_thread, info);
16087c478bd9Sstevel@tonic-gate }
16097c478bd9Sstevel@tonic-gate 
16107c478bd9Sstevel@tonic-gate static void
16117c478bd9Sstevel@tonic-gate refresh_instance(scf_handle_t *h, restarter_inst_t *rip)
16127c478bd9Sstevel@tonic-gate {
16137c478bd9Sstevel@tonic-gate 	scf_instance_t *inst;
16147c478bd9Sstevel@tonic-gate 	scf_snapshot_t *snap;
16157c478bd9Sstevel@tonic-gate 	fork_info_t *info;
16167c478bd9Sstevel@tonic-gate 	int r;
16177c478bd9Sstevel@tonic-gate 
161853f3aea0SRoger A. Faulkner 	assert(MUTEX_HELD(&rip->ri_lock));
16197c478bd9Sstevel@tonic-gate 
16207c478bd9Sstevel@tonic-gate 	log_instance(rip, B_TRUE, "Rereading configuration.");
16217c478bd9Sstevel@tonic-gate 	log_framework(LOG_DEBUG, "%s: rereading configuration.\n",
16227c478bd9Sstevel@tonic-gate 	    rip->ri_i.i_fmri);
16237c478bd9Sstevel@tonic-gate 
16247c478bd9Sstevel@tonic-gate rep_retry:
16257c478bd9Sstevel@tonic-gate 	r = libscf_fmri_get_instance(h, rip->ri_i.i_fmri, &inst);
16267c478bd9Sstevel@tonic-gate 	switch (r) {
16277c478bd9Sstevel@tonic-gate 	case 0:
16287c478bd9Sstevel@tonic-gate 		break;
16297c478bd9Sstevel@tonic-gate 
16307c478bd9Sstevel@tonic-gate 	case ECONNABORTED:
16317c478bd9Sstevel@tonic-gate 		libscf_handle_rebind(h);
16327c478bd9Sstevel@tonic-gate 		goto rep_retry;
16337c478bd9Sstevel@tonic-gate 
16347c478bd9Sstevel@tonic-gate 	case ENOENT:
16357c478bd9Sstevel@tonic-gate 		/* Must have been deleted. */
16367c478bd9Sstevel@tonic-gate 		return;
16377c478bd9Sstevel@tonic-gate 
16387c478bd9Sstevel@tonic-gate 	case EINVAL:
16397c478bd9Sstevel@tonic-gate 	case ENOTSUP:
16407c478bd9Sstevel@tonic-gate 	default:
16417c478bd9Sstevel@tonic-gate 		bad_error("libscf_fmri_get_instance", r);
16427c478bd9Sstevel@tonic-gate 	}
16437c478bd9Sstevel@tonic-gate 
16447c478bd9Sstevel@tonic-gate 	snap = libscf_get_running_snapshot(inst);
16457c478bd9Sstevel@tonic-gate 
16467c478bd9Sstevel@tonic-gate 	r = libscf_get_startd_properties(inst, snap, &rip->ri_flags,
16477c478bd9Sstevel@tonic-gate 	    &rip->ri_utmpx_prefix);
16487c478bd9Sstevel@tonic-gate 	switch (r) {
16497c478bd9Sstevel@tonic-gate 	case 0:
16507c478bd9Sstevel@tonic-gate 		log_framework(LOG_DEBUG, "%s is a %s-style service\n",
16517c478bd9Sstevel@tonic-gate 		    rip->ri_i.i_fmri, service_style(rip->ri_flags));
16527c478bd9Sstevel@tonic-gate 		break;
16537c478bd9Sstevel@tonic-gate 
16547c478bd9Sstevel@tonic-gate 	case ECONNABORTED:
16557c478bd9Sstevel@tonic-gate 		scf_instance_destroy(inst);
16567c478bd9Sstevel@tonic-gate 		scf_snapshot_destroy(snap);
16577c478bd9Sstevel@tonic-gate 		libscf_handle_rebind(h);
16587c478bd9Sstevel@tonic-gate 		goto rep_retry;
16597c478bd9Sstevel@tonic-gate 
16607c478bd9Sstevel@tonic-gate 	case ECANCELED:
16617c478bd9Sstevel@tonic-gate 	case ENOENT:
16627c478bd9Sstevel@tonic-gate 		/* Succeed in anticipation of REMOVE_INSTANCE. */
16637c478bd9Sstevel@tonic-gate 		break;
16647c478bd9Sstevel@tonic-gate 
16657c478bd9Sstevel@tonic-gate 	default:
16667c478bd9Sstevel@tonic-gate 		bad_error("libscf_get_startd_properties", r);
16677c478bd9Sstevel@tonic-gate 	}
16687c478bd9Sstevel@tonic-gate 
16697c478bd9Sstevel@tonic-gate 	if (instance_started(rip)) {
16707c478bd9Sstevel@tonic-gate 		/* Refresh does not change the state. */
16717c478bd9Sstevel@tonic-gate 		(void) restarter_instance_update_states(h, rip,
1672f6e214c7SGavin Maltby 		    rip->ri_i.i_state, rip->ri_i.i_state, RERR_NONE,
1673f6e214c7SGavin Maltby 		    restarter_str_refresh);
16747c478bd9Sstevel@tonic-gate 
16757c478bd9Sstevel@tonic-gate 		info = startd_zalloc(sizeof (*info));
16767c478bd9Sstevel@tonic-gate 		info->sf_id = rip->ri_id;
16777c478bd9Sstevel@tonic-gate 		info->sf_method_type = METHOD_REFRESH;
16787c478bd9Sstevel@tonic-gate 		info->sf_event_type = RERR_REFRESH;
1679f6e214c7SGavin Maltby 		info->sf_reason = NULL;
16807c478bd9Sstevel@tonic-gate 
16817c478bd9Sstevel@tonic-gate 		assert(rip->ri_method_thread == 0);
16827c478bd9Sstevel@tonic-gate 		rip->ri_method_thread =
16837c478bd9Sstevel@tonic-gate 		    startd_thread_create(method_thread, info);
16847c478bd9Sstevel@tonic-gate 	}
16857c478bd9Sstevel@tonic-gate 
16867c478bd9Sstevel@tonic-gate 	scf_snapshot_destroy(snap);
16877c478bd9Sstevel@tonic-gate 	scf_instance_destroy(inst);
16887c478bd9Sstevel@tonic-gate }
16897c478bd9Sstevel@tonic-gate 
16907c478bd9Sstevel@tonic-gate const char *event_names[] = { "INVALID", "ADD_INSTANCE", "REMOVE_INSTANCE",
16917c478bd9Sstevel@tonic-gate 	"ENABLE", "DISABLE", "ADMIN_DEGRADED", "ADMIN_REFRESH",
16927c478bd9Sstevel@tonic-gate 	"ADMIN_RESTART", "ADMIN_MAINT_OFF", "ADMIN_MAINT_ON",
16937c478bd9Sstevel@tonic-gate 	"ADMIN_MAINT_ON_IMMEDIATE", "STOP", "START", "DEPENDENCY_CYCLE",
1694c238c833SSean Wilcox 	"INVALID_DEPENDENCY", "ADMIN_DISABLE", "STOP_RESET"
16957c478bd9Sstevel@tonic-gate };
16967c478bd9Sstevel@tonic-gate 
16977c478bd9Sstevel@tonic-gate /*
16987c478bd9Sstevel@tonic-gate  * void *restarter_process_events()
16997c478bd9Sstevel@tonic-gate  *
17007c478bd9Sstevel@tonic-gate  *   Called in a separate thread to process the events on an instance's
17017c478bd9Sstevel@tonic-gate  *   queue.  Empties the queue completely, and tries to keep the thread
17027c478bd9Sstevel@tonic-gate  *   around for a little while after the queue is empty to save on
17037c478bd9Sstevel@tonic-gate  *   startup costs.
17047c478bd9Sstevel@tonic-gate  */
17057c478bd9Sstevel@tonic-gate static void *
17067c478bd9Sstevel@tonic-gate restarter_process_events(void *arg)
17077c478bd9Sstevel@tonic-gate {
17087c478bd9Sstevel@tonic-gate 	scf_handle_t *h;
17097c478bd9Sstevel@tonic-gate 	restarter_instance_qentry_t *event;
17107c478bd9Sstevel@tonic-gate 	restarter_inst_t *rip;
17117c478bd9Sstevel@tonic-gate 	char *fmri = (char *)arg;
17127c478bd9Sstevel@tonic-gate 	struct timespec to;
17137c478bd9Sstevel@tonic-gate 
17147c478bd9Sstevel@tonic-gate 	assert(fmri != NULL);
17157c478bd9Sstevel@tonic-gate 
17167c478bd9Sstevel@tonic-gate 	h = libscf_handle_create_bound_loop();
17177c478bd9Sstevel@tonic-gate 
17187c478bd9Sstevel@tonic-gate 	/* grab the queue lock */
17197c478bd9Sstevel@tonic-gate 	rip = inst_lookup_queue(fmri);
17207c478bd9Sstevel@tonic-gate 	if (rip == NULL)
17217c478bd9Sstevel@tonic-gate 		goto out;
17227c478bd9Sstevel@tonic-gate 
17237c478bd9Sstevel@tonic-gate again:
17247c478bd9Sstevel@tonic-gate 
17257c478bd9Sstevel@tonic-gate 	while ((event = uu_list_first(rip->ri_queue)) != NULL) {
17267c478bd9Sstevel@tonic-gate 		restarter_inst_t *inst;
17277c478bd9Sstevel@tonic-gate 
17287c478bd9Sstevel@tonic-gate 		/* drop the queue lock */
17297c478bd9Sstevel@tonic-gate 		MUTEX_UNLOCK(&rip->ri_queue_lock);
17307c478bd9Sstevel@tonic-gate 
17317c478bd9Sstevel@tonic-gate 		/*
17327c478bd9Sstevel@tonic-gate 		 * Grab the inst lock -- this waits until any outstanding
17337c478bd9Sstevel@tonic-gate 		 * method finishes running.
17347c478bd9Sstevel@tonic-gate 		 */
17357c478bd9Sstevel@tonic-gate 		inst = inst_lookup_by_name(fmri);
17367c478bd9Sstevel@tonic-gate 		if (inst == NULL) {
17377c478bd9Sstevel@tonic-gate 			/* Getting deleted in the middle isn't an error. */
17387c478bd9Sstevel@tonic-gate 			goto cont;
17397c478bd9Sstevel@tonic-gate 		}
17407c478bd9Sstevel@tonic-gate 
17417c478bd9Sstevel@tonic-gate 		assert(instance_in_transition(inst) == 0);
17427c478bd9Sstevel@tonic-gate 
17437c478bd9Sstevel@tonic-gate 		/* process the event */
17447c478bd9Sstevel@tonic-gate 		switch (event->riq_type) {
17457c478bd9Sstevel@tonic-gate 		case RESTARTER_EVENT_TYPE_ENABLE:
17467c478bd9Sstevel@tonic-gate 		case RESTARTER_EVENT_TYPE_DISABLE:
1747f6e214c7SGavin Maltby 			(void) enable_inst(h, inst, event);
17487c478bd9Sstevel@tonic-gate 			break;
17497c478bd9Sstevel@tonic-gate 
175016ba0facSSean Wilcox 		case RESTARTER_EVENT_TYPE_ADMIN_DISABLE:
1751f6e214c7SGavin Maltby 			if (enable_inst(h, inst, event) == 0)
175216ba0facSSean Wilcox 				reset_start_times(inst);
175316ba0facSSean Wilcox 			break;
175416ba0facSSean Wilcox 
17557c478bd9Sstevel@tonic-gate 		case RESTARTER_EVENT_TYPE_REMOVE_INSTANCE:
17567c478bd9Sstevel@tonic-gate 			restarter_delete_inst(inst);
17577c478bd9Sstevel@tonic-gate 			inst = NULL;
17587c478bd9Sstevel@tonic-gate 			goto cont;
17597c478bd9Sstevel@tonic-gate 
176016ba0facSSean Wilcox 		case RESTARTER_EVENT_TYPE_STOP_RESET:
176116ba0facSSean Wilcox 			reset_start_times(inst);
176216ba0facSSean Wilcox 			/* FALLTHROUGH */
17637c478bd9Sstevel@tonic-gate 		case RESTARTER_EVENT_TYPE_STOP:
17647c478bd9Sstevel@tonic-gate 			(void) stop_instance(h, inst, RSTOP_DEPENDENCY);
17657c478bd9Sstevel@tonic-gate 			break;
17667c478bd9Sstevel@tonic-gate 
17677c478bd9Sstevel@tonic-gate 		case RESTARTER_EVENT_TYPE_START:
1768f6e214c7SGavin Maltby 			start_instance(h, inst, event->riq_reason);
17697c478bd9Sstevel@tonic-gate 			break;
17707c478bd9Sstevel@tonic-gate 
17717c478bd9Sstevel@tonic-gate 		case RESTARTER_EVENT_TYPE_DEPENDENCY_CYCLE:
1772f6e214c7SGavin Maltby 			maintain_instance(h, inst, 0,
1773f6e214c7SGavin Maltby 			    restarter_str_dependency_cycle);
17747c478bd9Sstevel@tonic-gate 			break;
17757c478bd9Sstevel@tonic-gate 
17767c478bd9Sstevel@tonic-gate 		case RESTARTER_EVENT_TYPE_INVALID_DEPENDENCY:
1777f6e214c7SGavin Maltby 			maintain_instance(h, inst, 0,
1778f6e214c7SGavin Maltby 			    restarter_str_invalid_dependency);
17797c478bd9Sstevel@tonic-gate 			break;
17807c478bd9Sstevel@tonic-gate 
17817c478bd9Sstevel@tonic-gate 		case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON:
1782eb1a3463STruong Nguyen 			if (event_from_tty(h, inst) == 0)
1783eb1a3463STruong Nguyen 				maintain_instance(h, inst, 0,
1784f6e214c7SGavin Maltby 				    restarter_str_service_request);
1785eb1a3463STruong Nguyen 			else
1786eb1a3463STruong Nguyen 				maintain_instance(h, inst, 0,
1787f6e214c7SGavin Maltby 				    restarter_str_administrative_request);
17887c478bd9Sstevel@tonic-gate 			break;
17897c478bd9Sstevel@tonic-gate 
17907c478bd9Sstevel@tonic-gate 		case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON_IMMEDIATE:
1791eb1a3463STruong Nguyen 			if (event_from_tty(h, inst) == 0)
1792eb1a3463STruong Nguyen 				maintain_instance(h, inst, 1,
1793f6e214c7SGavin Maltby 				    restarter_str_service_request);
1794eb1a3463STruong Nguyen 			else
1795eb1a3463STruong Nguyen 				maintain_instance(h, inst, 1,
1796f6e214c7SGavin Maltby 				    restarter_str_administrative_request);
17977c478bd9Sstevel@tonic-gate 			break;
17987c478bd9Sstevel@tonic-gate 
17997c478bd9Sstevel@tonic-gate 		case RESTARTER_EVENT_TYPE_ADMIN_MAINT_OFF:
18007c478bd9Sstevel@tonic-gate 			unmaintain_instance(h, inst, RUNMAINT_CLEAR);
18018b55d351SSean Wilcox 			reset_start_times(inst);
18027c478bd9Sstevel@tonic-gate 			break;
18037c478bd9Sstevel@tonic-gate 
18047c478bd9Sstevel@tonic-gate 		case RESTARTER_EVENT_TYPE_ADMIN_REFRESH:
18057c478bd9Sstevel@tonic-gate 			refresh_instance(h, inst);
18067c478bd9Sstevel@tonic-gate 			break;
18077c478bd9Sstevel@tonic-gate 
18087c478bd9Sstevel@tonic-gate 		case RESTARTER_EVENT_TYPE_ADMIN_DEGRADED:
18097c478bd9Sstevel@tonic-gate 			log_framework(LOG_WARNING, "Restarter: "
18107c478bd9Sstevel@tonic-gate 			    "%s command (for %s) unimplemented.\n",
18117c478bd9Sstevel@tonic-gate 			    event_names[event->riq_type], inst->ri_i.i_fmri);
18127c478bd9Sstevel@tonic-gate 			break;
18137c478bd9Sstevel@tonic-gate 
18147c478bd9Sstevel@tonic-gate 		case RESTARTER_EVENT_TYPE_ADMIN_RESTART:
18157c478bd9Sstevel@tonic-gate 			if (!instance_started(inst)) {
18167c478bd9Sstevel@tonic-gate 				log_framework(LOG_DEBUG, "Restarter: "
18177c478bd9Sstevel@tonic-gate 				    "Not restarting %s; not running.\n",
18187c478bd9Sstevel@tonic-gate 				    inst->ri_i.i_fmri);
18197c478bd9Sstevel@tonic-gate 			} else {
18207c478bd9Sstevel@tonic-gate 				/*
18217c478bd9Sstevel@tonic-gate 				 * Stop the instance.  If it can be restarted,
18227c478bd9Sstevel@tonic-gate 				 * the graph engine will send a new event.
18237c478bd9Sstevel@tonic-gate 				 */
182416ba0facSSean Wilcox 				if (stop_instance(h, inst, RSTOP_RESTART) == 0)
182516ba0facSSean Wilcox 					reset_start_times(inst);
18267c478bd9Sstevel@tonic-gate 			}
18277c478bd9Sstevel@tonic-gate 			break;
18287c478bd9Sstevel@tonic-gate 
18297c478bd9Sstevel@tonic-gate 		case RESTARTER_EVENT_TYPE_ADD_INSTANCE:
18307c478bd9Sstevel@tonic-gate 		default:
18317c478bd9Sstevel@tonic-gate #ifndef NDEBUG
18327c478bd9Sstevel@tonic-gate 			uu_warn("%s:%d: Bad restarter event %d.  "
18337c478bd9Sstevel@tonic-gate 			    "Aborting.\n", __FILE__, __LINE__, event->riq_type);
18347c478bd9Sstevel@tonic-gate #endif
18357c478bd9Sstevel@tonic-gate 			abort();
18367c478bd9Sstevel@tonic-gate 		}
18377c478bd9Sstevel@tonic-gate 
18387c478bd9Sstevel@tonic-gate 		assert(inst != NULL);
18397c478bd9Sstevel@tonic-gate 		MUTEX_UNLOCK(&inst->ri_lock);
18407c478bd9Sstevel@tonic-gate 
18417c478bd9Sstevel@tonic-gate cont:
18427c478bd9Sstevel@tonic-gate 		/* grab the queue lock */
18437c478bd9Sstevel@tonic-gate 		rip = inst_lookup_queue(fmri);
18447c478bd9Sstevel@tonic-gate 		if (rip == NULL)
18457c478bd9Sstevel@tonic-gate 			goto out;
18467c478bd9Sstevel@tonic-gate 
18477c478bd9Sstevel@tonic-gate 		/* delete the event */
18487c478bd9Sstevel@tonic-gate 		uu_list_remove(rip->ri_queue, event);
18497c478bd9Sstevel@tonic-gate 		startd_free(event, sizeof (restarter_instance_qentry_t));
18507c478bd9Sstevel@tonic-gate 	}
18517c478bd9Sstevel@tonic-gate 
18527c478bd9Sstevel@tonic-gate 	assert(rip != NULL);
18537c478bd9Sstevel@tonic-gate 
18547c478bd9Sstevel@tonic-gate 	/*
18557c478bd9Sstevel@tonic-gate 	 * Try to preserve the thread for a little while for future use.
18567c478bd9Sstevel@tonic-gate 	 */
18577c478bd9Sstevel@tonic-gate 	to.tv_sec = 3;
18587c478bd9Sstevel@tonic-gate 	to.tv_nsec = 0;
18597c478bd9Sstevel@tonic-gate 	(void) pthread_cond_reltimedwait_np(&rip->ri_queue_cv,
18607c478bd9Sstevel@tonic-gate 	    &rip->ri_queue_lock, &to);
18617c478bd9Sstevel@tonic-gate 
18627c478bd9Sstevel@tonic-gate 	if (uu_list_first(rip->ri_queue) != NULL)
18637c478bd9Sstevel@tonic-gate 		goto again;
18647c478bd9Sstevel@tonic-gate 
18657c478bd9Sstevel@tonic-gate 	rip->ri_queue_thread = 0;
18667c478bd9Sstevel@tonic-gate 	MUTEX_UNLOCK(&rip->ri_queue_lock);
1867*3dd94f79SBryan Cantrill 
18687c478bd9Sstevel@tonic-gate out:
18697c478bd9Sstevel@tonic-gate 	(void) scf_handle_unbind(h);
18707c478bd9Sstevel@tonic-gate 	scf_handle_destroy(h);
18717c478bd9Sstevel@tonic-gate 	free(fmri);
18727c478bd9Sstevel@tonic-gate 	return (NULL);
18737c478bd9Sstevel@tonic-gate }
18747c478bd9Sstevel@tonic-gate 
18757c478bd9Sstevel@tonic-gate static int
18767c478bd9Sstevel@tonic-gate is_admin_event(restarter_event_type_t t) {
18777c478bd9Sstevel@tonic-gate 
18787c478bd9Sstevel@tonic-gate 	switch (t) {
18797c478bd9Sstevel@tonic-gate 	case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON:
18807c478bd9Sstevel@tonic-gate 	case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON_IMMEDIATE:
18817c478bd9Sstevel@tonic-gate 	case RESTARTER_EVENT_TYPE_ADMIN_MAINT_OFF:
18827c478bd9Sstevel@tonic-gate 	case RESTARTER_EVENT_TYPE_ADMIN_REFRESH:
18837c478bd9Sstevel@tonic-gate 	case RESTARTER_EVENT_TYPE_ADMIN_DEGRADED:
18847c478bd9Sstevel@tonic-gate 	case RESTARTER_EVENT_TYPE_ADMIN_RESTART:
18857c478bd9Sstevel@tonic-gate 		return (1);
18867c478bd9Sstevel@tonic-gate 	default:
18877c478bd9Sstevel@tonic-gate 		return (0);
18887c478bd9Sstevel@tonic-gate 	}
18897c478bd9Sstevel@tonic-gate }
18907c478bd9Sstevel@tonic-gate 
18917c478bd9Sstevel@tonic-gate static void
18927c478bd9Sstevel@tonic-gate restarter_queue_event(restarter_inst_t *ri, restarter_protocol_event_t *e)
18937c478bd9Sstevel@tonic-gate {
18947c478bd9Sstevel@tonic-gate 	restarter_instance_qentry_t *qe;
18957c478bd9Sstevel@tonic-gate 	int r;
18967c478bd9Sstevel@tonic-gate 
189753f3aea0SRoger A. Faulkner 	assert(MUTEX_HELD(&ri->ri_queue_lock));
189853f3aea0SRoger A. Faulkner 	assert(!MUTEX_HELD(&ri->ri_lock));
18997c478bd9Sstevel@tonic-gate 
19007c478bd9Sstevel@tonic-gate 	qe = startd_zalloc(sizeof (restarter_instance_qentry_t));
19017c478bd9Sstevel@tonic-gate 	qe->riq_type = e->rpe_type;
1902f6e214c7SGavin Maltby 	qe->riq_reason = e->rpe_reason;
19037c478bd9Sstevel@tonic-gate 
19047c478bd9Sstevel@tonic-gate 	uu_list_node_init(qe, &qe->riq_link, restarter_queue_pool);
19057c478bd9Sstevel@tonic-gate 	r = uu_list_insert_before(ri->ri_queue, NULL, qe);
19067c478bd9Sstevel@tonic-gate 	assert(r == 0);
19077c478bd9Sstevel@tonic-gate }
19087c478bd9Sstevel@tonic-gate 
19097c478bd9Sstevel@tonic-gate /*
19107c478bd9Sstevel@tonic-gate  * void *restarter_event_thread()
19117c478bd9Sstevel@tonic-gate  *
19127c478bd9Sstevel@tonic-gate  *  Handle incoming graph events by placing them on a per-instance
19137c478bd9Sstevel@tonic-gate  *  queue.  We can't lock the main part of the instance structure, so
19147c478bd9Sstevel@tonic-gate  *  just modify the seprarately locked event queue portion.
19157c478bd9Sstevel@tonic-gate  */
19167c478bd9Sstevel@tonic-gate /*ARGSUSED*/
19177c478bd9Sstevel@tonic-gate static void *
19187c478bd9Sstevel@tonic-gate restarter_event_thread(void *unused)
19197c478bd9Sstevel@tonic-gate {
19207c478bd9Sstevel@tonic-gate 	scf_handle_t *h;
19217c478bd9Sstevel@tonic-gate 
19227c478bd9Sstevel@tonic-gate 	/*
19237c478bd9Sstevel@tonic-gate 	 * This is a new thread, and thus, gets its own handle
19247c478bd9Sstevel@tonic-gate 	 * to the repository.
19257c478bd9Sstevel@tonic-gate 	 */
19267c478bd9Sstevel@tonic-gate 	h = libscf_handle_create_bound_loop();
19277c478bd9Sstevel@tonic-gate 
19287c478bd9Sstevel@tonic-gate 	MUTEX_LOCK(&ru->restarter_update_lock);
19297c478bd9Sstevel@tonic-gate 
19307c478bd9Sstevel@tonic-gate 	/*CONSTCOND*/
19317c478bd9Sstevel@tonic-gate 	while (1) {
19327c478bd9Sstevel@tonic-gate 		restarter_protocol_event_t *e;
19337c478bd9Sstevel@tonic-gate 
19347c478bd9Sstevel@tonic-gate 		while (ru->restarter_update_wakeup == 0)
19357c478bd9Sstevel@tonic-gate 			(void) pthread_cond_wait(&ru->restarter_update_cv,
19367c478bd9Sstevel@tonic-gate 			    &ru->restarter_update_lock);
19377c478bd9Sstevel@tonic-gate 
19387c478bd9Sstevel@tonic-gate 		ru->restarter_update_wakeup = 0;
19397c478bd9Sstevel@tonic-gate 
19407c478bd9Sstevel@tonic-gate 		while ((e = restarter_event_dequeue()) != NULL) {
19417c478bd9Sstevel@tonic-gate 			restarter_inst_t *rip;
19427c478bd9Sstevel@tonic-gate 			char *fmri;
19437c478bd9Sstevel@tonic-gate 
19447c478bd9Sstevel@tonic-gate 			MUTEX_UNLOCK(&ru->restarter_update_lock);
19457c478bd9Sstevel@tonic-gate 
19467c478bd9Sstevel@tonic-gate 			/*
19477c478bd9Sstevel@tonic-gate 			 * ADD_INSTANCE is special: there's likely no
19487c478bd9Sstevel@tonic-gate 			 * instance structure yet, so we need to handle the
19497c478bd9Sstevel@tonic-gate 			 * addition synchronously.
19507c478bd9Sstevel@tonic-gate 			 */
19517c478bd9Sstevel@tonic-gate 			switch (e->rpe_type) {
19527c478bd9Sstevel@tonic-gate 			case RESTARTER_EVENT_TYPE_ADD_INSTANCE:
19537c478bd9Sstevel@tonic-gate 				if (restarter_insert_inst(h, e->rpe_inst) != 0)
19547c478bd9Sstevel@tonic-gate 					log_error(LOG_INFO, "Restarter: "
19557c478bd9Sstevel@tonic-gate 					    "Could not add %s.\n", e->rpe_inst);
19567c478bd9Sstevel@tonic-gate 
19577c478bd9Sstevel@tonic-gate 				MUTEX_LOCK(&st->st_load_lock);
19587c478bd9Sstevel@tonic-gate 				if (--st->st_load_instances == 0)
19597c478bd9Sstevel@tonic-gate 					(void) pthread_cond_broadcast(
19607c478bd9Sstevel@tonic-gate 					    &st->st_load_cv);
19617c478bd9Sstevel@tonic-gate 				MUTEX_UNLOCK(&st->st_load_lock);
19627c478bd9Sstevel@tonic-gate 
19637c478bd9Sstevel@tonic-gate 				goto nolookup;
19647c478bd9Sstevel@tonic-gate 			}
19657c478bd9Sstevel@tonic-gate 
19667c478bd9Sstevel@tonic-gate 			/*
19677c478bd9Sstevel@tonic-gate 			 * Lookup the instance, locking only the event queue.
19687c478bd9Sstevel@tonic-gate 			 * Can't grab ri_lock here because it might be held
19697c478bd9Sstevel@tonic-gate 			 * by a long-running method.
19707c478bd9Sstevel@tonic-gate 			 */
19717c478bd9Sstevel@tonic-gate 			rip = inst_lookup_queue(e->rpe_inst);
19727c478bd9Sstevel@tonic-gate 			if (rip == NULL) {
19737c478bd9Sstevel@tonic-gate 				log_error(LOG_INFO, "Restarter: "
19747c478bd9Sstevel@tonic-gate 				    "Ignoring %s command for unknown service "
19757c478bd9Sstevel@tonic-gate 				    "%s.\n", event_names[e->rpe_type],
19767c478bd9Sstevel@tonic-gate 				    e->rpe_inst);
19777c478bd9Sstevel@tonic-gate 				goto nolookup;
19787c478bd9Sstevel@tonic-gate 			}
19797c478bd9Sstevel@tonic-gate 
19807c478bd9Sstevel@tonic-gate 			/* Keep ADMIN events from filling up the queue. */
19817c478bd9Sstevel@tonic-gate 			if (is_admin_event(e->rpe_type) &&
19827c478bd9Sstevel@tonic-gate 			    uu_list_numnodes(rip->ri_queue) >
19837c478bd9Sstevel@tonic-gate 			    RINST_QUEUE_THRESHOLD) {
19847c478bd9Sstevel@tonic-gate 				MUTEX_UNLOCK(&rip->ri_queue_lock);
19857c478bd9Sstevel@tonic-gate 				log_instance(rip, B_TRUE, "Instance event "
19867c478bd9Sstevel@tonic-gate 				    "queue overflow.  Dropping administrative "
19877c478bd9Sstevel@tonic-gate 				    "request.");
19887c478bd9Sstevel@tonic-gate 				log_framework(LOG_DEBUG, "%s: Instance event "
19897c478bd9Sstevel@tonic-gate 				    "queue overflow.  Dropping administrative "
19907c478bd9Sstevel@tonic-gate 				    "request.\n", rip->ri_i.i_fmri);
19917c478bd9Sstevel@tonic-gate 				goto nolookup;
19927c478bd9Sstevel@tonic-gate 			}
19937c478bd9Sstevel@tonic-gate 
19947c478bd9Sstevel@tonic-gate 			/* Now add the event to the instance queue. */
19957c478bd9Sstevel@tonic-gate 			restarter_queue_event(rip, e);
19967c478bd9Sstevel@tonic-gate 
19977c478bd9Sstevel@tonic-gate 			if (rip->ri_queue_thread == 0) {
19987c478bd9Sstevel@tonic-gate 				/*
19997c478bd9Sstevel@tonic-gate 				 * Start a thread if one isn't already
20007c478bd9Sstevel@tonic-gate 				 * running.
20017c478bd9Sstevel@tonic-gate 				 */
20027c478bd9Sstevel@tonic-gate 				fmri = safe_strdup(e->rpe_inst);
20037c478bd9Sstevel@tonic-gate 				rip->ri_queue_thread =  startd_thread_create(
20047c478bd9Sstevel@tonic-gate 				    restarter_process_events, (void *)fmri);
20057c478bd9Sstevel@tonic-gate 			} else {
20067c478bd9Sstevel@tonic-gate 				/*
20077c478bd9Sstevel@tonic-gate 				 * Signal the existing thread that there's
20087c478bd9Sstevel@tonic-gate 				 * a new event.
20097c478bd9Sstevel@tonic-gate 				 */
20107c478bd9Sstevel@tonic-gate 				(void) pthread_cond_broadcast(
20117c478bd9Sstevel@tonic-gate 				    &rip->ri_queue_cv);
20127c478bd9Sstevel@tonic-gate 			}
20137c478bd9Sstevel@tonic-gate 
20147c478bd9Sstevel@tonic-gate 			MUTEX_UNLOCK(&rip->ri_queue_lock);
20157c478bd9Sstevel@tonic-gate nolookup:
20167c478bd9Sstevel@tonic-gate 			restarter_event_release(e);
20177c478bd9Sstevel@tonic-gate 
20187c478bd9Sstevel@tonic-gate 			MUTEX_LOCK(&ru->restarter_update_lock);
20197c478bd9Sstevel@tonic-gate 		}
20207c478bd9Sstevel@tonic-gate 	}
20217c478bd9Sstevel@tonic-gate 
20227c478bd9Sstevel@tonic-gate 	/*
20237c478bd9Sstevel@tonic-gate 	 * Unreachable for now -- there's currently no graceful cleanup
20247c478bd9Sstevel@tonic-gate 	 * called on exit().
20257c478bd9Sstevel@tonic-gate 	 */
20267c478bd9Sstevel@tonic-gate 	(void) scf_handle_unbind(h);
20277c478bd9Sstevel@tonic-gate 	scf_handle_destroy(h);
20287c478bd9Sstevel@tonic-gate 	return (NULL);
20297c478bd9Sstevel@tonic-gate }
20307c478bd9Sstevel@tonic-gate 
20317c478bd9Sstevel@tonic-gate static restarter_inst_t *
20327c478bd9Sstevel@tonic-gate contract_to_inst(ctid_t ctid)
20337c478bd9Sstevel@tonic-gate {
20347c478bd9Sstevel@tonic-gate 	restarter_inst_t *inst;
20357c478bd9Sstevel@tonic-gate 	int id;
20367c478bd9Sstevel@tonic-gate 
20377c478bd9Sstevel@tonic-gate 	id = lookup_inst_by_contract(ctid);
20387c478bd9Sstevel@tonic-gate 	if (id == -1)
20397c478bd9Sstevel@tonic-gate 		return (NULL);
20407c478bd9Sstevel@tonic-gate 
20417c478bd9Sstevel@tonic-gate 	inst = inst_lookup_by_id(id);
20427c478bd9Sstevel@tonic-gate 	if (inst != NULL) {
20437c478bd9Sstevel@tonic-gate 		/*
20447c478bd9Sstevel@tonic-gate 		 * Since ri_lock isn't held by the contract id lookup, this
20457c478bd9Sstevel@tonic-gate 		 * instance may have been restarted and now be in a new
20467c478bd9Sstevel@tonic-gate 		 * contract, making the old contract no longer valid for this
20477c478bd9Sstevel@tonic-gate 		 * instance.
20487c478bd9Sstevel@tonic-gate 		 */
20497c478bd9Sstevel@tonic-gate 		if (ctid != inst->ri_i.i_primary_ctid) {
20507c478bd9Sstevel@tonic-gate 			MUTEX_UNLOCK(&inst->ri_lock);
20517c478bd9Sstevel@tonic-gate 			inst = NULL;
20527c478bd9Sstevel@tonic-gate 		}
20537c478bd9Sstevel@tonic-gate 	}
20547c478bd9Sstevel@tonic-gate 	return (inst);
20557c478bd9Sstevel@tonic-gate }
20567c478bd9Sstevel@tonic-gate 
20577c478bd9Sstevel@tonic-gate /*
20587c478bd9Sstevel@tonic-gate  * void contract_action()
20597c478bd9Sstevel@tonic-gate  *   Take action on contract events.
20607c478bd9Sstevel@tonic-gate  */
20617c478bd9Sstevel@tonic-gate static void
20627c478bd9Sstevel@tonic-gate contract_action(scf_handle_t *h, restarter_inst_t *inst, ctid_t id,
20637c478bd9Sstevel@tonic-gate     uint32_t type)
20647c478bd9Sstevel@tonic-gate {
20657c478bd9Sstevel@tonic-gate 	const char *fmri = inst->ri_i.i_fmri;
20667c478bd9Sstevel@tonic-gate 
206753f3aea0SRoger A. Faulkner 	assert(MUTEX_HELD(&inst->ri_lock));
20687c478bd9Sstevel@tonic-gate 
20697c478bd9Sstevel@tonic-gate 	/*
20707c478bd9Sstevel@tonic-gate 	 * If startd has stopped this contract, there is no need to
20717c478bd9Sstevel@tonic-gate 	 * stop it again.
20727c478bd9Sstevel@tonic-gate 	 */
20737c478bd9Sstevel@tonic-gate 	if (inst->ri_i.i_primary_ctid > 0 &&
20747c478bd9Sstevel@tonic-gate 	    inst->ri_i.i_primary_ctid_stopped)
20757c478bd9Sstevel@tonic-gate 		return;
20767c478bd9Sstevel@tonic-gate 
20777c478bd9Sstevel@tonic-gate 	if ((type & (CT_PR_EV_EMPTY | CT_PR_EV_CORE | CT_PR_EV_SIGNAL
20787c478bd9Sstevel@tonic-gate 	    | CT_PR_EV_HWERR)) == 0) {
20797c478bd9Sstevel@tonic-gate 		/*
20807c478bd9Sstevel@tonic-gate 		 * There shouldn't be other events, since that's not how we set
20817c478bd9Sstevel@tonic-gate 		 * the terms. Thus, just log an error and drive on.
20827c478bd9Sstevel@tonic-gate 		 */
20837c478bd9Sstevel@tonic-gate 		log_framework(LOG_NOTICE,
20847c478bd9Sstevel@tonic-gate 		    "%s: contract %ld received unexpected critical event "
20857c478bd9Sstevel@tonic-gate 		    "(%d)\n", fmri, id, type);
20867c478bd9Sstevel@tonic-gate 		return;
20877c478bd9Sstevel@tonic-gate 	}
20887c478bd9Sstevel@tonic-gate 
20897c478bd9Sstevel@tonic-gate 	assert(instance_in_transition(inst) == 0);
20907c478bd9Sstevel@tonic-gate 
20917c478bd9Sstevel@tonic-gate 	if (instance_is_wait_style(inst)) {
20927c478bd9Sstevel@tonic-gate 		/*
20937c478bd9Sstevel@tonic-gate 		 * We ignore all events; if they impact the
20947c478bd9Sstevel@tonic-gate 		 * process we're monitoring, then the
20957c478bd9Sstevel@tonic-gate 		 * wait_thread will stop the instance.
20967c478bd9Sstevel@tonic-gate 		 */
20977c478bd9Sstevel@tonic-gate 		log_framework(LOG_DEBUG,
20987c478bd9Sstevel@tonic-gate 		    "%s: ignoring contract event on wait-style service\n",
20997c478bd9Sstevel@tonic-gate 		    fmri);
21007c478bd9Sstevel@tonic-gate 	} else {
21017c478bd9Sstevel@tonic-gate 		/*
21027c478bd9Sstevel@tonic-gate 		 * A CT_PR_EV_EMPTY event is an RSTOP_EXIT request.
21037c478bd9Sstevel@tonic-gate 		 */
21047c478bd9Sstevel@tonic-gate 		switch (type) {
21057c478bd9Sstevel@tonic-gate 		case CT_PR_EV_EMPTY:
21067c478bd9Sstevel@tonic-gate 			(void) stop_instance(h, inst, RSTOP_EXIT);
21077c478bd9Sstevel@tonic-gate 			break;
21087c478bd9Sstevel@tonic-gate 		case CT_PR_EV_CORE:
21097c478bd9Sstevel@tonic-gate 			(void) stop_instance(h, inst, RSTOP_CORE);
21107c478bd9Sstevel@tonic-gate 			break;
21117c478bd9Sstevel@tonic-gate 		case CT_PR_EV_SIGNAL:
21127c478bd9Sstevel@tonic-gate 			(void) stop_instance(h, inst, RSTOP_SIGNAL);
21137c478bd9Sstevel@tonic-gate 			break;
21147c478bd9Sstevel@tonic-gate 		case CT_PR_EV_HWERR:
21157c478bd9Sstevel@tonic-gate 			(void) stop_instance(h, inst, RSTOP_HWERR);
21167c478bd9Sstevel@tonic-gate 			break;
21177c478bd9Sstevel@tonic-gate 		}
21187c478bd9Sstevel@tonic-gate 	}
21197c478bd9Sstevel@tonic-gate }
21207c478bd9Sstevel@tonic-gate 
21217c478bd9Sstevel@tonic-gate /*
21227c478bd9Sstevel@tonic-gate  * void *restarter_contract_event_thread(void *)
21237c478bd9Sstevel@tonic-gate  *   Listens to the process contract bundle for critical events, taking action
21247c478bd9Sstevel@tonic-gate  *   on events from contracts we know we are responsible for.
21257c478bd9Sstevel@tonic-gate  */
21267c478bd9Sstevel@tonic-gate /*ARGSUSED*/
21277c478bd9Sstevel@tonic-gate static void *
21287c478bd9Sstevel@tonic-gate restarter_contracts_event_thread(void *unused)
21297c478bd9Sstevel@tonic-gate {
21307c478bd9Sstevel@tonic-gate 	int fd, err;
21317c478bd9Sstevel@tonic-gate 	scf_handle_t *local_handle;
21327c478bd9Sstevel@tonic-gate 
21337c478bd9Sstevel@tonic-gate 	/*
21347c478bd9Sstevel@tonic-gate 	 * Await graph load completion.  That is, stop here, until we've scanned
21357c478bd9Sstevel@tonic-gate 	 * the repository for contract - instance associations.
21367c478bd9Sstevel@tonic-gate 	 */
21377c478bd9Sstevel@tonic-gate 	MUTEX_LOCK(&st->st_load_lock);
21387c478bd9Sstevel@tonic-gate 	while (!(st->st_load_complete && st->st_load_instances == 0))
21397c478bd9Sstevel@tonic-gate 		(void) pthread_cond_wait(&st->st_load_cv, &st->st_load_lock);
21407c478bd9Sstevel@tonic-gate 	MUTEX_UNLOCK(&st->st_load_lock);
21417c478bd9Sstevel@tonic-gate 
21427c478bd9Sstevel@tonic-gate 	/*
21437c478bd9Sstevel@tonic-gate 	 * This is a new thread, and thus, gets its own handle
21447c478bd9Sstevel@tonic-gate 	 * to the repository.
21457c478bd9Sstevel@tonic-gate 	 */
21467c478bd9Sstevel@tonic-gate 	if ((local_handle = libscf_handle_create_bound(SCF_VERSION)) == NULL)
21477c478bd9Sstevel@tonic-gate 		uu_die("Unable to bind a new repository handle: %s\n",
21487c478bd9Sstevel@tonic-gate 		    scf_strerror(scf_error()));
21497c478bd9Sstevel@tonic-gate 
21507c478bd9Sstevel@tonic-gate 	fd = open64(CTFS_ROOT "/process/pbundle", O_RDONLY);
21517c478bd9Sstevel@tonic-gate 	if (fd == -1)
21527c478bd9Sstevel@tonic-gate 		uu_die("process bundle open failed");
21537c478bd9Sstevel@tonic-gate 
21547c478bd9Sstevel@tonic-gate 	/*
21557c478bd9Sstevel@tonic-gate 	 * Make sure we get all events (including those generated by configd
21567c478bd9Sstevel@tonic-gate 	 * before this thread was started).
21577c478bd9Sstevel@tonic-gate 	 */
21587c478bd9Sstevel@tonic-gate 	err = ct_event_reset(fd);
21597c478bd9Sstevel@tonic-gate 	assert(err == 0);
21607c478bd9Sstevel@tonic-gate 
21617c478bd9Sstevel@tonic-gate 	for (;;) {
21627c478bd9Sstevel@tonic-gate 		int efd, sfd;
21637c478bd9Sstevel@tonic-gate 		ct_evthdl_t ev;
21647c478bd9Sstevel@tonic-gate 		uint32_t type;
21657c478bd9Sstevel@tonic-gate 		ctevid_t evid;
21667c478bd9Sstevel@tonic-gate 		ct_stathdl_t status;
21677c478bd9Sstevel@tonic-gate 		ctid_t ctid;
21687c478bd9Sstevel@tonic-gate 		restarter_inst_t *inst;
21697c478bd9Sstevel@tonic-gate 		uint64_t cookie;
21707c478bd9Sstevel@tonic-gate 
21717c478bd9Sstevel@tonic-gate 		if (err = ct_event_read_critical(fd, &ev)) {
21727c478bd9Sstevel@tonic-gate 			log_error(LOG_WARNING,
21737c478bd9Sstevel@tonic-gate 			    "Error reading next contract event: %s",
21747c478bd9Sstevel@tonic-gate 			    strerror(err));
21757c478bd9Sstevel@tonic-gate 			continue;
21767c478bd9Sstevel@tonic-gate 		}
21777c478bd9Sstevel@tonic-gate 
21787c478bd9Sstevel@tonic-gate 		evid = ct_event_get_evid(ev);
21797c478bd9Sstevel@tonic-gate 		ctid = ct_event_get_ctid(ev);
21807c478bd9Sstevel@tonic-gate 		type = ct_event_get_type(ev);
21817c478bd9Sstevel@tonic-gate 
21827c478bd9Sstevel@tonic-gate 		/* Fetch cookie. */
21837c478bd9Sstevel@tonic-gate 		if ((sfd = contract_open(ctid, "process", "status", O_RDONLY))
21847c478bd9Sstevel@tonic-gate 		    < 0) {
21857c478bd9Sstevel@tonic-gate 			ct_event_free(ev);
21867c478bd9Sstevel@tonic-gate 			continue;
21877c478bd9Sstevel@tonic-gate 		}
21887c478bd9Sstevel@tonic-gate 
21897c478bd9Sstevel@tonic-gate 		if (err = ct_status_read(sfd, CTD_COMMON, &status)) {
21907c478bd9Sstevel@tonic-gate 			log_framework(LOG_WARNING, "Could not get status for "
21917c478bd9Sstevel@tonic-gate 			    "contract %ld: %s\n", ctid, strerror(err));
21927c478bd9Sstevel@tonic-gate 
21937c478bd9Sstevel@tonic-gate 			startd_close(sfd);
21947c478bd9Sstevel@tonic-gate 			ct_event_free(ev);
21957c478bd9Sstevel@tonic-gate 			continue;
21967c478bd9Sstevel@tonic-gate 		}
21977c478bd9Sstevel@tonic-gate 
21987c478bd9Sstevel@tonic-gate 		cookie = ct_status_get_cookie(status);
21997c478bd9Sstevel@tonic-gate 
2200dfe57350Sjeanm 		log_framework(LOG_DEBUG, "Received event %d for ctid %ld "
2201dfe57350Sjeanm 		    "cookie %lld\n", type, ctid, cookie);
2202dfe57350Sjeanm 
22037c478bd9Sstevel@tonic-gate 		ct_status_free(status);
22047c478bd9Sstevel@tonic-gate 
22057c478bd9Sstevel@tonic-gate 		startd_close(sfd);
22067c478bd9Sstevel@tonic-gate 
22077c478bd9Sstevel@tonic-gate 		/*
22087c478bd9Sstevel@tonic-gate 		 * svc.configd(1M) restart handling performed by the
22097c478bd9Sstevel@tonic-gate 		 * fork_configd_thread.  We don't acknowledge, as that thread
22107c478bd9Sstevel@tonic-gate 		 * will do so.
22117c478bd9Sstevel@tonic-gate 		 */
22127c478bd9Sstevel@tonic-gate 		if (cookie == CONFIGD_COOKIE) {
22137c478bd9Sstevel@tonic-gate 			ct_event_free(ev);
22147c478bd9Sstevel@tonic-gate 			continue;
22157c478bd9Sstevel@tonic-gate 		}
22167c478bd9Sstevel@tonic-gate 
2217dfe57350Sjeanm 		inst = NULL;
2218dfe57350Sjeanm 		if (storing_contract != 0 &&
2219dfe57350Sjeanm 		    (inst = contract_to_inst(ctid)) == NULL) {
2220dfe57350Sjeanm 			/*
2221dfe57350Sjeanm 			 * This can happen for two reasons:
2222dfe57350Sjeanm 			 * - method_run() has not yet stored the
2223dfe57350Sjeanm 			 *    the contract into the internal hash table.
2224dfe57350Sjeanm 			 * - we receive an EMPTY event for an abandoned
2225dfe57350Sjeanm 			 *    contract.
2226dfe57350Sjeanm 			 * If there is any contract in the process of
2227dfe57350Sjeanm 			 * being stored into the hash table then re-read
2228dfe57350Sjeanm 			 * the event later.
2229dfe57350Sjeanm 			 */
2230dfe57350Sjeanm 			log_framework(LOG_DEBUG,
2231dfe57350Sjeanm 			    "Reset event %d for unknown "
2232dfe57350Sjeanm 			    "contract id %ld\n", type, ctid);
2233dfe57350Sjeanm 
2234dfe57350Sjeanm 			/* don't go too fast */
2235dfe57350Sjeanm 			(void) poll(NULL, 0, 100);
2236dfe57350Sjeanm 
2237dfe57350Sjeanm 			(void) ct_event_reset(fd);
2238dfe57350Sjeanm 			ct_event_free(ev);
2239dfe57350Sjeanm 			continue;
2240dfe57350Sjeanm 		}
2241dfe57350Sjeanm 
2242dfe57350Sjeanm 		/*
2243dfe57350Sjeanm 		 * Do not call contract_to_inst() again if first
2244dfe57350Sjeanm 		 * call succeeded.
2245dfe57350Sjeanm 		 */
2246dfe57350Sjeanm 		if (inst == NULL)
22477c478bd9Sstevel@tonic-gate 			inst = contract_to_inst(ctid);
22487c478bd9Sstevel@tonic-gate 		if (inst == NULL) {
22497c478bd9Sstevel@tonic-gate 			/*
22507c478bd9Sstevel@tonic-gate 			 * This can happen if we receive an EMPTY
22517c478bd9Sstevel@tonic-gate 			 * event for an abandoned contract.
22527c478bd9Sstevel@tonic-gate 			 */
22537c478bd9Sstevel@tonic-gate 			log_framework(LOG_DEBUG,
22547c478bd9Sstevel@tonic-gate 			    "Received event %d for unknown contract id "
22557c478bd9Sstevel@tonic-gate 			    "%ld\n", type, ctid);
22567c478bd9Sstevel@tonic-gate 		} else {
22577c478bd9Sstevel@tonic-gate 			log_framework(LOG_DEBUG,
22587c478bd9Sstevel@tonic-gate 			    "Received event %d for contract id "
22597c478bd9Sstevel@tonic-gate 			    "%ld (%s)\n", type, ctid,
22607c478bd9Sstevel@tonic-gate 			    inst->ri_i.i_fmri);
22617c478bd9Sstevel@tonic-gate 
22627c478bd9Sstevel@tonic-gate 			contract_action(local_handle, inst, ctid, type);
22637c478bd9Sstevel@tonic-gate 
22647c478bd9Sstevel@tonic-gate 			MUTEX_UNLOCK(&inst->ri_lock);
22657c478bd9Sstevel@tonic-gate 		}
22667c478bd9Sstevel@tonic-gate 
22677c478bd9Sstevel@tonic-gate 		efd = contract_open(ct_event_get_ctid(ev), "process", "ctl",
22687c478bd9Sstevel@tonic-gate 		    O_WRONLY);
22697c478bd9Sstevel@tonic-gate 		if (efd != -1) {
22707c478bd9Sstevel@tonic-gate 			(void) ct_ctl_ack(efd, evid);
22717c478bd9Sstevel@tonic-gate 			startd_close(efd);
22727c478bd9Sstevel@tonic-gate 		}
22737c478bd9Sstevel@tonic-gate 
22747c478bd9Sstevel@tonic-gate 		ct_event_free(ev);
22757c478bd9Sstevel@tonic-gate 
22767c478bd9Sstevel@tonic-gate 	}
22777c478bd9Sstevel@tonic-gate 
22787c478bd9Sstevel@tonic-gate 	/*NOTREACHED*/
22797c478bd9Sstevel@tonic-gate 	return (NULL);
22807c478bd9Sstevel@tonic-gate }
22817c478bd9Sstevel@tonic-gate 
22827c478bd9Sstevel@tonic-gate /*
22837c478bd9Sstevel@tonic-gate  * Timeout queue, processed by restarter_timeouts_event_thread().
22847c478bd9Sstevel@tonic-gate  */
22857c478bd9Sstevel@tonic-gate timeout_queue_t *timeouts;
22867c478bd9Sstevel@tonic-gate static uu_list_pool_t *timeout_pool;
22877c478bd9Sstevel@tonic-gate 
22887c478bd9Sstevel@tonic-gate typedef struct timeout_update {
22897c478bd9Sstevel@tonic-gate 	pthread_mutex_t		tu_lock;
22907c478bd9Sstevel@tonic-gate 	pthread_cond_t		tu_cv;
22917c478bd9Sstevel@tonic-gate 	int			tu_wakeup;
22927c478bd9Sstevel@tonic-gate } timeout_update_t;
22937c478bd9Sstevel@tonic-gate 
22947c478bd9Sstevel@tonic-gate timeout_update_t *tu;
22957c478bd9Sstevel@tonic-gate 
22967c478bd9Sstevel@tonic-gate static const char *timeout_ovr_svcs[] = {
22977c478bd9Sstevel@tonic-gate 	"svc:/system/manifest-import:default",
22987c478bd9Sstevel@tonic-gate 	"svc:/network/initial:default",
22997c478bd9Sstevel@tonic-gate 	"svc:/network/service:default",
23007c478bd9Sstevel@tonic-gate 	"svc:/system/rmtmpfiles:default",
23017c478bd9Sstevel@tonic-gate 	"svc:/network/loopback:default",
23027c478bd9Sstevel@tonic-gate 	"svc:/network/physical:default",
23037c478bd9Sstevel@tonic-gate 	"svc:/system/device/local:default",
23047c478bd9Sstevel@tonic-gate 	"svc:/system/metainit:default",
23057c478bd9Sstevel@tonic-gate 	"svc:/system/filesystem/usr:default",
23067c478bd9Sstevel@tonic-gate 	"svc:/system/filesystem/minimal:default",
23077c478bd9Sstevel@tonic-gate 	"svc:/system/filesystem/local:default",
23087c478bd9Sstevel@tonic-gate 	NULL
23097c478bd9Sstevel@tonic-gate };
23107c478bd9Sstevel@tonic-gate 
23117c478bd9Sstevel@tonic-gate int
23127c478bd9Sstevel@tonic-gate is_timeout_ovr(restarter_inst_t *inst)
23137c478bd9Sstevel@tonic-gate {
23147c478bd9Sstevel@tonic-gate 	int i;
23157c478bd9Sstevel@tonic-gate 
23167c478bd9Sstevel@tonic-gate 	for (i = 0; timeout_ovr_svcs[i] != NULL; ++i) {
23177c478bd9Sstevel@tonic-gate 		if (strcmp(inst->ri_i.i_fmri, timeout_ovr_svcs[i]) == 0) {
23187c478bd9Sstevel@tonic-gate 			log_instance(inst, B_TRUE, "Timeout override by "
23195ca87c7fSlianep 			    "svc.startd.  Using infinite timeout.");
23207c478bd9Sstevel@tonic-gate 			return (1);
23217c478bd9Sstevel@tonic-gate 		}
23227c478bd9Sstevel@tonic-gate 	}
23237c478bd9Sstevel@tonic-gate 
23247c478bd9Sstevel@tonic-gate 	return (0);
23257c478bd9Sstevel@tonic-gate }
23267c478bd9Sstevel@tonic-gate 
23277c478bd9Sstevel@tonic-gate /*ARGSUSED*/
23287c478bd9Sstevel@tonic-gate static int
23297c478bd9Sstevel@tonic-gate timeout_compare(const void *lc_arg, const void *rc_arg, void *private)
23307c478bd9Sstevel@tonic-gate {
23317c478bd9Sstevel@tonic-gate 	hrtime_t t1 = ((const timeout_entry_t *)lc_arg)->te_timeout;
23327c478bd9Sstevel@tonic-gate 	hrtime_t t2 = ((const timeout_entry_t *)rc_arg)->te_timeout;
23337c478bd9Sstevel@tonic-gate 
23347c478bd9Sstevel@tonic-gate 	if (t1 > t2)
23357c478bd9Sstevel@tonic-gate 		return (1);
23367c478bd9Sstevel@tonic-gate 	else if (t1 < t2)
23377c478bd9Sstevel@tonic-gate 		return (-1);
23387c478bd9Sstevel@tonic-gate 	return (0);
23397c478bd9Sstevel@tonic-gate }
23407c478bd9Sstevel@tonic-gate 
23417c478bd9Sstevel@tonic-gate void
23427c478bd9Sstevel@tonic-gate timeout_init()
23437c478bd9Sstevel@tonic-gate {
23447c478bd9Sstevel@tonic-gate 	timeouts = startd_zalloc(sizeof (timeout_queue_t));
23457c478bd9Sstevel@tonic-gate 
23467c478bd9Sstevel@tonic-gate 	(void) pthread_mutex_init(&timeouts->tq_lock, &mutex_attrs);
23477c478bd9Sstevel@tonic-gate 
23487c478bd9Sstevel@tonic-gate 	timeout_pool = startd_list_pool_create("timeouts",
23497c478bd9Sstevel@tonic-gate 	    sizeof (timeout_entry_t), offsetof(timeout_entry_t, te_link),
23507c478bd9Sstevel@tonic-gate 	    timeout_compare, UU_LIST_POOL_DEBUG);
23517c478bd9Sstevel@tonic-gate 	assert(timeout_pool != NULL);
23527c478bd9Sstevel@tonic-gate 
23537c478bd9Sstevel@tonic-gate 	timeouts->tq_list = startd_list_create(timeout_pool,
23547c478bd9Sstevel@tonic-gate 	    timeouts, UU_LIST_SORTED);
23557c478bd9Sstevel@tonic-gate 	assert(timeouts->tq_list != NULL);
23567c478bd9Sstevel@tonic-gate 
23577c478bd9Sstevel@tonic-gate 	tu = startd_zalloc(sizeof (timeout_update_t));
23587c478bd9Sstevel@tonic-gate 	(void) pthread_cond_init(&tu->tu_cv, NULL);
23597c478bd9Sstevel@tonic-gate 	(void) pthread_mutex_init(&tu->tu_lock, &mutex_attrs);
23607c478bd9Sstevel@tonic-gate }
23617c478bd9Sstevel@tonic-gate 
23627c478bd9Sstevel@tonic-gate void
23637c478bd9Sstevel@tonic-gate timeout_insert(restarter_inst_t *inst, ctid_t cid, uint64_t timeout_sec)
23647c478bd9Sstevel@tonic-gate {
23657c478bd9Sstevel@tonic-gate 	hrtime_t now, timeout;
23667c478bd9Sstevel@tonic-gate 	timeout_entry_t *entry;
23677c478bd9Sstevel@tonic-gate 	uu_list_index_t idx;
23687c478bd9Sstevel@tonic-gate 
236953f3aea0SRoger A. Faulkner 	assert(MUTEX_HELD(&inst->ri_lock));
23707c478bd9Sstevel@tonic-gate 
23717c478bd9Sstevel@tonic-gate 	now = gethrtime();
23727c478bd9Sstevel@tonic-gate 
23737c478bd9Sstevel@tonic-gate 	/*
23747c478bd9Sstevel@tonic-gate 	 * If we overflow LLONG_MAX, we're never timing out anyways, so
23757c478bd9Sstevel@tonic-gate 	 * just return.
23767c478bd9Sstevel@tonic-gate 	 */
23777c478bd9Sstevel@tonic-gate 	if (timeout_sec >= (LLONG_MAX - now) / 1000000000LL) {
23787c478bd9Sstevel@tonic-gate 		log_instance(inst, B_TRUE, "timeout_seconds too large, "
23797c478bd9Sstevel@tonic-gate 		    "treating as infinite.");
23807c478bd9Sstevel@tonic-gate 		return;
23817c478bd9Sstevel@tonic-gate 	}
23827c478bd9Sstevel@tonic-gate 
23837c478bd9Sstevel@tonic-gate 	/* hrtime is in nanoseconds. Convert timeout_sec. */
23847c478bd9Sstevel@tonic-gate 	timeout = now + (timeout_sec * 1000000000LL);
23857c478bd9Sstevel@tonic-gate 
23867c478bd9Sstevel@tonic-gate 	entry = startd_alloc(sizeof (timeout_entry_t));
23877c478bd9Sstevel@tonic-gate 	entry->te_timeout = timeout;
23887c478bd9Sstevel@tonic-gate 	entry->te_ctid = cid;
23897c478bd9Sstevel@tonic-gate 	entry->te_fmri = safe_strdup(inst->ri_i.i_fmri);
23907c478bd9Sstevel@tonic-gate 	entry->te_logstem = safe_strdup(inst->ri_logstem);
23917c478bd9Sstevel@tonic-gate 	entry->te_fired = 0;
23927c478bd9Sstevel@tonic-gate 	/* Insert the calculated timeout time onto the queue. */
23937c478bd9Sstevel@tonic-gate 	MUTEX_LOCK(&timeouts->tq_lock);
23947c478bd9Sstevel@tonic-gate 	(void) uu_list_find(timeouts->tq_list, entry, NULL, &idx);
23957c478bd9Sstevel@tonic-gate 	uu_list_node_init(entry, &entry->te_link, timeout_pool);
23967c478bd9Sstevel@tonic-gate 	uu_list_insert(timeouts->tq_list, entry, idx);
23977c478bd9Sstevel@tonic-gate 	MUTEX_UNLOCK(&timeouts->tq_lock);
23987c478bd9Sstevel@tonic-gate 
23997c478bd9Sstevel@tonic-gate 	assert(inst->ri_timeout == NULL);
24007c478bd9Sstevel@tonic-gate 	inst->ri_timeout = entry;
24017c478bd9Sstevel@tonic-gate 
24027c478bd9Sstevel@tonic-gate 	MUTEX_LOCK(&tu->tu_lock);
24037c478bd9Sstevel@tonic-gate 	tu->tu_wakeup = 1;
24047c478bd9Sstevel@tonic-gate 	(void) pthread_cond_broadcast(&tu->tu_cv);
24057c478bd9Sstevel@tonic-gate 	MUTEX_UNLOCK(&tu->tu_lock);
24067c478bd9Sstevel@tonic-gate }
24077c478bd9Sstevel@tonic-gate 
24087c478bd9Sstevel@tonic-gate 
24097c478bd9Sstevel@tonic-gate void
24107c478bd9Sstevel@tonic-gate timeout_remove(restarter_inst_t *inst, ctid_t cid)
24117c478bd9Sstevel@tonic-gate {
241253f3aea0SRoger A. Faulkner 	assert(MUTEX_HELD(&inst->ri_lock));
24137c478bd9Sstevel@tonic-gate 
24147c478bd9Sstevel@tonic-gate 	if (inst->ri_timeout == NULL)
24157c478bd9Sstevel@tonic-gate 		return;
24167c478bd9Sstevel@tonic-gate 
24177c478bd9Sstevel@tonic-gate 	assert(inst->ri_timeout->te_ctid == cid);
24187c478bd9Sstevel@tonic-gate 
24197c478bd9Sstevel@tonic-gate 	MUTEX_LOCK(&timeouts->tq_lock);
24207c478bd9Sstevel@tonic-gate 	uu_list_remove(timeouts->tq_list, inst->ri_timeout);
24217c478bd9Sstevel@tonic-gate 	MUTEX_UNLOCK(&timeouts->tq_lock);
24227c478bd9Sstevel@tonic-gate 
24237c478bd9Sstevel@tonic-gate 	free(inst->ri_timeout->te_fmri);
24247c478bd9Sstevel@tonic-gate 	free(inst->ri_timeout->te_logstem);
24257c478bd9Sstevel@tonic-gate 	startd_free(inst->ri_timeout, sizeof (timeout_entry_t));
24267c478bd9Sstevel@tonic-gate 	inst->ri_timeout = NULL;
24277c478bd9Sstevel@tonic-gate }
24287c478bd9Sstevel@tonic-gate 
24297c478bd9Sstevel@tonic-gate static int
24307c478bd9Sstevel@tonic-gate timeout_now()
24317c478bd9Sstevel@tonic-gate {
24327c478bd9Sstevel@tonic-gate 	timeout_entry_t *e;
24337c478bd9Sstevel@tonic-gate 	hrtime_t now;
24347c478bd9Sstevel@tonic-gate 	int ret;
24357c478bd9Sstevel@tonic-gate 
24367c478bd9Sstevel@tonic-gate 	now = gethrtime();
24377c478bd9Sstevel@tonic-gate 
24387c478bd9Sstevel@tonic-gate 	/*
24397c478bd9Sstevel@tonic-gate 	 * Walk through the (sorted) timeouts list.  While the timeout
24407c478bd9Sstevel@tonic-gate 	 * at the head of the list is <= the current time, kill the
24417c478bd9Sstevel@tonic-gate 	 * method.
24427c478bd9Sstevel@tonic-gate 	 */
24437c478bd9Sstevel@tonic-gate 	MUTEX_LOCK(&timeouts->tq_lock);
24447c478bd9Sstevel@tonic-gate 
24457c478bd9Sstevel@tonic-gate 	for (e = uu_list_first(timeouts->tq_list);
24467c478bd9Sstevel@tonic-gate 	    e != NULL && e->te_timeout <= now;
24477c478bd9Sstevel@tonic-gate 	    e = uu_list_next(timeouts->tq_list, e)) {
24487c478bd9Sstevel@tonic-gate 		log_framework(LOG_WARNING, "%s: Method or service exit timed "
24497c478bd9Sstevel@tonic-gate 		    "out.  Killing contract %ld.\n", e->te_fmri, e->te_ctid);
24507c478bd9Sstevel@tonic-gate 		log_instance_fmri(e->te_fmri, e->te_logstem, B_TRUE,
24515ca87c7fSlianep 		    "Method or service exit timed out.  Killing contract %ld.",
24527c478bd9Sstevel@tonic-gate 		    e->te_ctid);
24537c478bd9Sstevel@tonic-gate 		e->te_fired = 1;
24547c478bd9Sstevel@tonic-gate 		(void) contract_kill(e->te_ctid, SIGKILL, e->te_fmri);
24557c478bd9Sstevel@tonic-gate 	}
24567c478bd9Sstevel@tonic-gate 
24577c478bd9Sstevel@tonic-gate 	if (uu_list_numnodes(timeouts->tq_list) > 0)
24587c478bd9Sstevel@tonic-gate 		ret = 0;
24597c478bd9Sstevel@tonic-gate 	else
24607c478bd9Sstevel@tonic-gate 		ret = -1;
24617c478bd9Sstevel@tonic-gate 
24627c478bd9Sstevel@tonic-gate 	MUTEX_UNLOCK(&timeouts->tq_lock);
24637c478bd9Sstevel@tonic-gate 
24647c478bd9Sstevel@tonic-gate 	return (ret);
24657c478bd9Sstevel@tonic-gate }
24667c478bd9Sstevel@tonic-gate 
24677c478bd9Sstevel@tonic-gate /*
24687c478bd9Sstevel@tonic-gate  * void *restarter_timeouts_event_thread(void *)
24697c478bd9Sstevel@tonic-gate  *   Responsible for monitoring the method timeouts.  This thread must
24707c478bd9Sstevel@tonic-gate  *   be started before any methods are called.
24717c478bd9Sstevel@tonic-gate  */
24727c478bd9Sstevel@tonic-gate /*ARGSUSED*/
24737c478bd9Sstevel@tonic-gate static void *
24747c478bd9Sstevel@tonic-gate restarter_timeouts_event_thread(void *unused)
24757c478bd9Sstevel@tonic-gate {
24767c478bd9Sstevel@tonic-gate 	/*
24777c478bd9Sstevel@tonic-gate 	 * Timeouts are entered on a priority queue, which is processed by
24787c478bd9Sstevel@tonic-gate 	 * this thread.  As timeouts are specified in seconds, we'll do
24797c478bd9Sstevel@tonic-gate 	 * the necessary processing every second, as long as the queue
24807c478bd9Sstevel@tonic-gate 	 * is not empty.
24817c478bd9Sstevel@tonic-gate 	 */
24827c478bd9Sstevel@tonic-gate 
24837c478bd9Sstevel@tonic-gate 	/*CONSTCOND*/
24847c478bd9Sstevel@tonic-gate 	while (1) {
24857c478bd9Sstevel@tonic-gate 		/*
24867c478bd9Sstevel@tonic-gate 		 * As long as the timeout list isn't empty, process it
24877c478bd9Sstevel@tonic-gate 		 * every second.
24887c478bd9Sstevel@tonic-gate 		 */
24897c478bd9Sstevel@tonic-gate 		if (timeout_now() == 0) {
24907c478bd9Sstevel@tonic-gate 			(void) sleep(1);
24917c478bd9Sstevel@tonic-gate 			continue;
24927c478bd9Sstevel@tonic-gate 		}
24937c478bd9Sstevel@tonic-gate 
24947c478bd9Sstevel@tonic-gate 		/* The list is empty, wait until we have more timeouts. */
24957c478bd9Sstevel@tonic-gate 		MUTEX_LOCK(&tu->tu_lock);
24967c478bd9Sstevel@tonic-gate 
24977c478bd9Sstevel@tonic-gate 		while (tu->tu_wakeup == 0)
24987c478bd9Sstevel@tonic-gate 			(void) pthread_cond_wait(&tu->tu_cv, &tu->tu_lock);
24997c478bd9Sstevel@tonic-gate 
25007c478bd9Sstevel@tonic-gate 		tu->tu_wakeup = 0;
25017c478bd9Sstevel@tonic-gate 		MUTEX_UNLOCK(&tu->tu_lock);
25027c478bd9Sstevel@tonic-gate 	}
25037c478bd9Sstevel@tonic-gate 
25047c478bd9Sstevel@tonic-gate 	return (NULL);
25057c478bd9Sstevel@tonic-gate }
25067c478bd9Sstevel@tonic-gate 
25077c478bd9Sstevel@tonic-gate void
25087c478bd9Sstevel@tonic-gate restarter_start()
25097c478bd9Sstevel@tonic-gate {
25107c478bd9Sstevel@tonic-gate 	(void) startd_thread_create(restarter_timeouts_event_thread, NULL);
25117c478bd9Sstevel@tonic-gate 	(void) startd_thread_create(restarter_event_thread, NULL);
25127c478bd9Sstevel@tonic-gate 	(void) startd_thread_create(restarter_contracts_event_thread, NULL);
25137c478bd9Sstevel@tonic-gate 	(void) startd_thread_create(wait_thread, NULL);
25147c478bd9Sstevel@tonic-gate }
25157c478bd9Sstevel@tonic-gate 
25167c478bd9Sstevel@tonic-gate 
25177c478bd9Sstevel@tonic-gate void
25187c478bd9Sstevel@tonic-gate restarter_init()
25197c478bd9Sstevel@tonic-gate {
25207c478bd9Sstevel@tonic-gate 	restarter_instance_pool = startd_list_pool_create("restarter_instances",
25217c478bd9Sstevel@tonic-gate 	    sizeof (restarter_inst_t), offsetof(restarter_inst_t,
25227c478bd9Sstevel@tonic-gate 	    ri_link), restarter_instance_compare, UU_LIST_POOL_DEBUG);
25237c478bd9Sstevel@tonic-gate 	(void) memset(&instance_list, 0, sizeof (instance_list));
25247c478bd9Sstevel@tonic-gate 
25257c478bd9Sstevel@tonic-gate 	(void) pthread_mutex_init(&instance_list.ril_lock, &mutex_attrs);
25267c478bd9Sstevel@tonic-gate 	instance_list.ril_instance_list = startd_list_create(
25277c478bd9Sstevel@tonic-gate 	    restarter_instance_pool, &instance_list, UU_LIST_SORTED);
25287c478bd9Sstevel@tonic-gate 
25297c478bd9Sstevel@tonic-gate 	restarter_queue_pool = startd_list_pool_create(
25307c478bd9Sstevel@tonic-gate 	    "restarter_instance_queue", sizeof (restarter_instance_qentry_t),
25317c478bd9Sstevel@tonic-gate 	    offsetof(restarter_instance_qentry_t,  riq_link), NULL,
25327c478bd9Sstevel@tonic-gate 	    UU_LIST_POOL_DEBUG);
25337c478bd9Sstevel@tonic-gate 
25347c478bd9Sstevel@tonic-gate 	contract_list_pool = startd_list_pool_create(
25357c478bd9Sstevel@tonic-gate 	    "contract_list", sizeof (contract_entry_t),
25367c478bd9Sstevel@tonic-gate 	    offsetof(contract_entry_t,  ce_link), NULL,
25377c478bd9Sstevel@tonic-gate 	    UU_LIST_POOL_DEBUG);
25387c478bd9Sstevel@tonic-gate 	contract_hash_init();
25397c478bd9Sstevel@tonic-gate 
25407c478bd9Sstevel@tonic-gate 	log_framework(LOG_DEBUG, "Initialized restarter\n");
25417c478bd9Sstevel@tonic-gate }
2542