xref: /titanic_51/usr/src/cmd/svc/startd/restarter.c (revision 31fd60d36d9ae794bbedd5e834b8be6d412a853f)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * restarter.c - service manipulation
30  *
31  * This component manages services whose restarter is svc.startd, the standard
32  * restarter.  It translates restarter protocol events from the graph engine
33  * into actions on processes, as a delegated restarter would do.
34  *
35  * The master restarter manages a number of always-running threads:
36  *   - restarter event thread: events from the graph engine
37  *   - timeout thread: thread to fire queued timeouts
38  *   - contract thread: thread to handle contract events
39  *   - wait thread: thread to handle wait-based services
40  *
41  * The other threads are created as-needed:
42  *   - per-instance method threads
43  *   - per-instance event processing threads
44  *
45  * The interaction of all threads must result in the following conditions
46  * being satisfied (on a per-instance basis):
47  *   - restarter events must be processed in order
48  *   - method execution must be serialized
49  *   - instance delete must be held until outstanding methods are complete
50  *   - contract events shouldn't be processed while a method is running
51  *   - timeouts should fire even when a method is running
52  *
53  * Service instances are represented by restarter_inst_t's and are kept in the
54  * instance_list list.
55  *
56  * Service States
57  *   The current state of a service instance is kept in
58  *   restarter_inst_t->ri_i.i_state.  If transition to a new state could take
59  *   some time, then before we effect the transition we set
60  *   restarter_inst_t->ri_i.i_next_state to the target state, and afterwards we
61  *   rotate i_next_state to i_state and set i_next_state to
62  *   RESTARTER_STATE_NONE.  So usually i_next_state is _NONE when ri_lock is not
63  *   held.  The exception is when we launch methods, which are done with
64  *   a separate thread.  To keep any other threads from grabbing ri_lock before
65  *   method_thread() does, we set ri_method_thread to the thread id of the
66  *   method thread, and when it is nonzero any thread with a different thread id
67  *   waits on ri_method_cv.
68  *
69  * Method execution is serialized by blocking on ri_method_cv in
70  * inst_lookup_by_id() and waiting for a 0 value of ri_method_thread.  This
71  * also prevents the instance structure from being deleted until all
72  * outstanding operations such as method_thread() have finished.
73  *
74  * Lock ordering:
75  *
76  * dgraph_lock [can be held when taking:]
77  *   utmpx_lock
78  *   dictionary->dict_lock
79  *   st->st_load_lock
80  *   wait_info_lock
81  *   ru->restarter_update_lock
82  *     restarter_queue->rpeq_lock
83  *   instance_list.ril_lock
84  *     inst->ri_lock
85  *   st->st_configd_live_lock
86  *
87  * instance_list.ril_lock
88  *   graph_queue->gpeq_lock
89  *   gu->gu_lock
90  *   st->st_configd_live_lock
91  *   dictionary->dict_lock
92  *   inst->ri_lock
93  *     graph_queue->gpeq_lock
94  *     gu->gu_lock
95  *     tu->tu_lock
96  *     tq->tq_lock
97  *     inst->ri_queue_lock
98  *       wait_info_lock
99  *       bp->cb_lock
100  *     utmpx_lock
101  *
102  * single_user_thread_lock
103  *   wait_info_lock
104  *   utmpx_lock
105  *
106  * gu_freeze_lock
107  *
108  * logbuf_mutex nests inside pretty much everything.
109  */
110 
111 #include <sys/contract/process.h>
112 #include <sys/ctfs.h>
113 #include <sys/stat.h>
114 #include <sys/time.h>
115 #include <sys/types.h>
116 #include <sys/uio.h>
117 #include <sys/wait.h>
118 #include <assert.h>
119 #include <errno.h>
120 #include <fcntl.h>
121 #include <libcontract.h>
122 #include <libcontract_priv.h>
123 #include <libintl.h>
124 #include <librestart.h>
125 #include <librestart_priv.h>
126 #include <libuutil.h>
127 #include <limits.h>
128 #include <poll.h>
129 #include <port.h>
130 #include <pthread.h>
131 #include <stdarg.h>
132 #include <stdio.h>
133 #include <strings.h>
134 #include <unistd.h>
135 
136 #include "startd.h"
137 #include "protocol.h"
138 
139 static uu_list_pool_t *restarter_instance_pool;
140 static restarter_instance_list_t instance_list;
141 
142 static uu_list_pool_t *restarter_queue_pool;
143 
144 /*ARGSUSED*/
145 static int
146 restarter_instance_compare(const void *lc_arg, const void *rc_arg,
147     void *private)
148 {
149 	int lc_id = ((const restarter_inst_t *)lc_arg)->ri_id;
150 	int rc_id = *(int *)rc_arg;
151 
152 	if (lc_id > rc_id)
153 		return (1);
154 	if (lc_id < rc_id)
155 		return (-1);
156 	return (0);
157 }
158 
159 static restarter_inst_t *
160 inst_lookup_by_name(const char *name)
161 {
162 	int id;
163 
164 	id = dict_lookup_byname(name);
165 	if (id == -1)
166 		return (NULL);
167 
168 	return (inst_lookup_by_id(id));
169 }
170 
171 restarter_inst_t *
172 inst_lookup_by_id(int id)
173 {
174 	restarter_inst_t *inst;
175 
176 	MUTEX_LOCK(&instance_list.ril_lock);
177 	inst = uu_list_find(instance_list.ril_instance_list, &id, NULL, NULL);
178 	if (inst != NULL)
179 		MUTEX_LOCK(&inst->ri_lock);
180 	MUTEX_UNLOCK(&instance_list.ril_lock);
181 
182 	if (inst != NULL) {
183 		while (inst->ri_method_thread != 0 &&
184 		    !pthread_equal(inst->ri_method_thread, pthread_self())) {
185 			++inst->ri_method_waiters;
186 			(void) pthread_cond_wait(&inst->ri_method_cv,
187 			    &inst->ri_lock);
188 			assert(inst->ri_method_waiters > 0);
189 			--inst->ri_method_waiters;
190 		}
191 	}
192 
193 	return (inst);
194 }
195 
196 static restarter_inst_t *
197 inst_lookup_queue(const char *name)
198 {
199 	int id;
200 	restarter_inst_t *inst;
201 
202 	id = dict_lookup_byname(name);
203 	if (id == -1)
204 		return (NULL);
205 
206 	MUTEX_LOCK(&instance_list.ril_lock);
207 	inst = uu_list_find(instance_list.ril_instance_list, &id, NULL, NULL);
208 	if (inst != NULL)
209 		MUTEX_LOCK(&inst->ri_queue_lock);
210 	MUTEX_UNLOCK(&instance_list.ril_lock);
211 
212 	return (inst);
213 }
214 
215 const char *
216 service_style(int flags)
217 {
218 	switch (flags & RINST_STYLE_MASK) {
219 	case RINST_CONTRACT:	return ("contract");
220 	case RINST_TRANSIENT:	return ("transient");
221 	case RINST_WAIT:	return ("wait");
222 
223 	default:
224 #ifndef NDEBUG
225 		uu_warn("%s:%d: Bad flags 0x%x.\n", __FILE__, __LINE__, flags);
226 #endif
227 		abort();
228 		/* NOTREACHED */
229 	}
230 }
231 
232 /*
233  * Fails with ECONNABORTED or ECANCELED.
234  */
235 static int
236 check_contract(restarter_inst_t *inst, boolean_t primary,
237     scf_instance_t *scf_inst)
238 {
239 	ctid_t *ctidp;
240 	int fd, r;
241 
242 	ctidp = primary ? &inst->ri_i.i_primary_ctid :
243 	    &inst->ri_i.i_transient_ctid;
244 
245 	assert(*ctidp >= 1);
246 
247 	fd = contract_open(*ctidp, NULL, "status", O_RDONLY);
248 	if (fd >= 0) {
249 		r = close(fd);
250 		assert(r == 0);
251 		return (0);
252 	}
253 
254 	r = restarter_remove_contract(scf_inst, *ctidp, primary ?
255 	    RESTARTER_CONTRACT_PRIMARY : RESTARTER_CONTRACT_TRANSIENT);
256 	switch (r) {
257 	case 0:
258 	case ECONNABORTED:
259 	case ECANCELED:
260 		*ctidp = 0;
261 		return (r);
262 
263 	case ENOMEM:
264 		uu_die("Out of memory\n");
265 		/* NOTREACHED */
266 
267 	case EPERM:
268 		uu_die("Insufficient privilege.\n");
269 		/* NOTREACHED */
270 
271 	case EACCES:
272 		uu_die("Repository backend access denied.\n");
273 		/* NOTREACHED */
274 
275 	case EROFS:
276 		log_error(LOG_INFO, "Could not remove unusable contract id %ld "
277 		    "for %s from repository.\n", *ctidp, inst->ri_i.i_fmri);
278 		return (0);
279 
280 	case EINVAL:
281 	case EBADF:
282 	default:
283 		assert(0);
284 		abort();
285 		/* NOTREACHED */
286 	}
287 }
288 
289 static int stop_instance(scf_handle_t *, restarter_inst_t *, stop_cause_t);
290 
291 /*
292  * int restarter_insert_inst(scf_handle_t *, char *)
293  *   If the inst is already in the restarter list, return its id.  If the inst
294  *   is not in the restarter list, initialize a restarter_inst_t, initialize its
295  *   states, insert it into the list, and return 0.
296  *
297  *   Fails with
298  *     ENOENT - name is not in the repository
299  */
300 static int
301 restarter_insert_inst(scf_handle_t *h, const char *name)
302 {
303 	int id, r;
304 	restarter_inst_t *inst;
305 	uu_list_index_t idx;
306 	scf_service_t *scf_svc;
307 	scf_instance_t *scf_inst;
308 	scf_snapshot_t *snap = NULL;
309 	scf_propertygroup_t *pg;
310 	char *svc_name, *inst_name;
311 	char logfilebuf[PATH_MAX];
312 	char *c;
313 	boolean_t do_commit_states;
314 	restarter_instance_state_t state, next_state;
315 	protocol_states_t *ps;
316 	pid_t start_pid;
317 
318 	MUTEX_LOCK(&instance_list.ril_lock);
319 
320 	/*
321 	 * We don't use inst_lookup_by_name() here because we want the lookup
322 	 * & insert to be atomic.
323 	 */
324 	id = dict_lookup_byname(name);
325 	if (id != -1) {
326 		inst = uu_list_find(instance_list.ril_instance_list, &id, NULL,
327 		    &idx);
328 		if (inst != NULL) {
329 			MUTEX_UNLOCK(&instance_list.ril_lock);
330 			return (0);
331 		}
332 	}
333 
334 	/* Allocate an instance */
335 	inst = startd_zalloc(sizeof (restarter_inst_t));
336 	inst->ri_utmpx_prefix = startd_alloc(max_scf_value_size);
337 	inst->ri_utmpx_prefix[0] = '\0';
338 
339 	inst->ri_i.i_fmri = startd_alloc(strlen(name) + 1);
340 	(void) strcpy((char *)inst->ri_i.i_fmri, name);
341 
342 	inst->ri_queue = startd_list_create(restarter_queue_pool, inst, 0);
343 
344 	/*
345 	 * id shouldn't be -1 since we use the same dictionary as graph.c, but
346 	 * just in case.
347 	 */
348 	inst->ri_id = (id != -1 ? id : dict_insert(name));
349 
350 	special_online_hooks_get(name, &inst->ri_pre_online_hook,
351 	    &inst->ri_post_online_hook, &inst->ri_post_offline_hook);
352 
353 	scf_svc = safe_scf_service_create(h);
354 	scf_inst = safe_scf_instance_create(h);
355 	pg = safe_scf_pg_create(h);
356 	svc_name = startd_alloc(max_scf_name_size);
357 	inst_name = startd_alloc(max_scf_name_size);
358 
359 rep_retry:
360 	if (snap != NULL)
361 		scf_snapshot_destroy(snap);
362 	if (inst->ri_logstem != NULL)
363 		startd_free(inst->ri_logstem, PATH_MAX);
364 	if (inst->ri_common_name != NULL)
365 		startd_free(inst->ri_common_name, max_scf_value_size);
366 	if (inst->ri_C_common_name != NULL)
367 		startd_free(inst->ri_C_common_name, max_scf_value_size);
368 	snap = NULL;
369 	inst->ri_logstem = NULL;
370 	inst->ri_common_name = NULL;
371 	inst->ri_C_common_name = NULL;
372 
373 	if (scf_handle_decode_fmri(h, name, NULL, scf_svc, scf_inst, NULL,
374 	    NULL, SCF_DECODE_FMRI_EXACT) != 0) {
375 		switch (scf_error()) {
376 		case SCF_ERROR_CONNECTION_BROKEN:
377 			libscf_handle_rebind(h);
378 			goto rep_retry;
379 
380 		case SCF_ERROR_NOT_FOUND:
381 			goto deleted;
382 		}
383 
384 		uu_die("Can't decode FMRI %s: %s\n", name,
385 		    scf_strerror(scf_error()));
386 	}
387 
388 	/*
389 	 * If there's no running snapshot, then we execute using the editing
390 	 * snapshot.  Pending snapshots will be taken later.
391 	 */
392 	snap = libscf_get_running_snapshot(scf_inst);
393 
394 	if ((scf_service_get_name(scf_svc, svc_name, max_scf_name_size) < 0) ||
395 	    (scf_instance_get_name(scf_inst, inst_name, max_scf_name_size) <
396 	    0)) {
397 		switch (scf_error()) {
398 		case SCF_ERROR_NOT_SET:
399 			break;
400 
401 		case SCF_ERROR_CONNECTION_BROKEN:
402 			libscf_handle_rebind(h);
403 			goto rep_retry;
404 
405 		default:
406 			assert(0);
407 			abort();
408 		}
409 
410 		goto deleted;
411 	}
412 
413 	(void) snprintf(logfilebuf, PATH_MAX, "%s:%s", svc_name, inst_name);
414 	for (c = logfilebuf; *c != '\0'; c++)
415 		if (*c == '/')
416 			*c = '-';
417 
418 	inst->ri_logstem = startd_alloc(PATH_MAX);
419 	(void) snprintf(inst->ri_logstem, PATH_MAX, "%s%s", logfilebuf,
420 	    LOG_SUFFIX);
421 
422 	/*
423 	 * If the restarter group is missing, use uninit/none.  Otherwise,
424 	 * we're probably being restarted & don't want to mess up the states
425 	 * that are there.
426 	 */
427 	state = RESTARTER_STATE_UNINIT;
428 	next_state = RESTARTER_STATE_NONE;
429 
430 	r = scf_instance_get_pg(scf_inst, SCF_PG_RESTARTER, pg);
431 	if (r != 0) {
432 		switch (scf_error()) {
433 		case SCF_ERROR_CONNECTION_BROKEN:
434 			libscf_handle_rebind(h);
435 			goto rep_retry;
436 
437 		case SCF_ERROR_NOT_SET:
438 			goto deleted;
439 
440 		case SCF_ERROR_NOT_FOUND:
441 			/*
442 			 * This shouldn't happen since the graph engine should
443 			 * have initialized the state to uninitialized/none if
444 			 * there was no restarter pg.  In case somebody
445 			 * deleted it, though....
446 			 */
447 			do_commit_states = B_TRUE;
448 			break;
449 
450 		default:
451 			assert(0);
452 			abort();
453 		}
454 	} else {
455 		r = libscf_read_states(pg, &state, &next_state);
456 		if (r != 0) {
457 			do_commit_states = B_TRUE;
458 		} else {
459 			if (next_state != RESTARTER_STATE_NONE) {
460 				/*
461 				 * Force next_state to _NONE since we
462 				 * don't look for method processes.
463 				 */
464 				next_state = RESTARTER_STATE_NONE;
465 				do_commit_states = B_TRUE;
466 			} else {
467 				/*
468 				 * Inform the restarter of our state without
469 				 * changing the STIME in the repository.
470 				 */
471 				ps = startd_alloc(sizeof (*ps));
472 				inst->ri_i.i_state = ps->ps_state = state;
473 				inst->ri_i.i_next_state = ps->ps_state_next =
474 				    next_state;
475 
476 				graph_protocol_send_event(inst->ri_i.i_fmri,
477 				    GRAPH_UPDATE_STATE_CHANGE, ps);
478 
479 				do_commit_states = B_FALSE;
480 			}
481 		}
482 	}
483 
484 	switch (libscf_get_startd_properties(scf_inst, snap, &inst->ri_flags,
485 	    &inst->ri_utmpx_prefix)) {
486 	case 0:
487 		break;
488 
489 	case ECONNABORTED:
490 		libscf_handle_rebind(h);
491 		goto rep_retry;
492 
493 	case ECANCELED:
494 		goto deleted;
495 
496 	case ENOENT:
497 		/*
498 		 * This is odd, because the graph engine should have required
499 		 * the general property group.  So we'll just use default
500 		 * flags in anticipation of the graph engine sending us
501 		 * REMOVE_INSTANCE when it finds out that the general property
502 		 * group has been deleted.
503 		 */
504 		inst->ri_flags = RINST_CONTRACT;
505 		break;
506 
507 	default:
508 		assert(0);
509 		abort();
510 	}
511 
512 	switch (libscf_get_template_values(scf_inst, snap,
513 	    &inst->ri_common_name, &inst->ri_C_common_name)) {
514 	case 0:
515 		break;
516 
517 	case ECONNABORTED:
518 		libscf_handle_rebind(h);
519 		goto rep_retry;
520 
521 	case ECANCELED:
522 		goto deleted;
523 
524 	case ECHILD:
525 	case ENOENT:
526 		break;
527 
528 	default:
529 		assert(0);
530 		abort();
531 	}
532 
533 	switch (libscf_read_method_ids(h, scf_inst, inst->ri_i.i_fmri,
534 	    &inst->ri_i.i_primary_ctid, &inst->ri_i.i_transient_ctid,
535 	    &start_pid)) {
536 	case 0:
537 		break;
538 
539 	case ECONNABORTED:
540 		libscf_handle_rebind(h);
541 		goto rep_retry;
542 
543 	case ECANCELED:
544 		goto deleted;
545 
546 	default:
547 		assert(0);
548 		abort();
549 	}
550 
551 	if (inst->ri_i.i_primary_ctid >= 1) {
552 		contract_hash_store(inst->ri_i.i_primary_ctid, inst->ri_id);
553 
554 		switch (check_contract(inst, B_TRUE, scf_inst)) {
555 		case 0:
556 			break;
557 
558 		case ECONNABORTED:
559 			libscf_handle_rebind(h);
560 			goto rep_retry;
561 
562 		case ECANCELED:
563 			goto deleted;
564 
565 		default:
566 			assert(0);
567 			abort();
568 		}
569 	}
570 
571 	if (inst->ri_i.i_transient_ctid >= 1) {
572 		switch (check_contract(inst, B_FALSE, scf_inst)) {
573 		case 0:
574 			break;
575 
576 		case ECONNABORTED:
577 			libscf_handle_rebind(h);
578 			goto rep_retry;
579 
580 		case ECANCELED:
581 			goto deleted;
582 
583 		default:
584 			assert(0);
585 			abort();
586 		}
587 	}
588 
589 	/* No more failures we live through, so add it to the list. */
590 	(void) pthread_mutex_init(&inst->ri_lock, &mutex_attrs);
591 	(void) pthread_mutex_init(&inst->ri_queue_lock, &mutex_attrs);
592 	MUTEX_LOCK(&inst->ri_lock);
593 	MUTEX_LOCK(&inst->ri_queue_lock);
594 
595 	(void) pthread_cond_init(&inst->ri_method_cv, NULL);
596 
597 	uu_list_node_init(inst, &inst->ri_link, restarter_instance_pool);
598 	uu_list_insert(instance_list.ril_instance_list, inst, idx);
599 	MUTEX_UNLOCK(&instance_list.ril_lock);
600 
601 	if (start_pid != -1 &&
602 	    (inst->ri_flags & RINST_STYLE_MASK) == RINST_WAIT) {
603 		int ret;
604 		ret = wait_register(start_pid, inst->ri_i.i_fmri, 0, 1);
605 		if (ret == -1) {
606 			/*
607 			 * Implication:  if we can't reregister the
608 			 * instance, we will start another one.  Two
609 			 * instances may or may not result in a resource
610 			 * conflict.
611 			 */
612 			log_error(LOG_WARNING,
613 			    "%s: couldn't reregister %ld for wait\n",
614 			    inst->ri_i.i_fmri, start_pid);
615 		} else if (ret == 1) {
616 			/*
617 			 * Leading PID has exited.
618 			 */
619 			(void) stop_instance(h, inst, RSTOP_EXIT);
620 		}
621 	}
622 
623 
624 	scf_pg_destroy(pg);
625 
626 	if (do_commit_states)
627 		(void) restarter_instance_update_states(h, inst, state,
628 		    next_state, RERR_NONE, NULL);
629 
630 	log_framework(LOG_DEBUG, "%s is a %s-style service\n", name,
631 	    service_style(inst->ri_flags));
632 
633 	MUTEX_UNLOCK(&inst->ri_queue_lock);
634 	MUTEX_UNLOCK(&inst->ri_lock);
635 
636 	startd_free(svc_name, max_scf_name_size);
637 	startd_free(inst_name, max_scf_name_size);
638 	scf_snapshot_destroy(snap);
639 	scf_instance_destroy(scf_inst);
640 	scf_service_destroy(scf_svc);
641 
642 	log_framework(LOG_DEBUG, "%s: inserted instance into restarter list\n",
643 	    name);
644 
645 	return (0);
646 
647 deleted:
648 	MUTEX_UNLOCK(&instance_list.ril_lock);
649 	startd_free(inst_name, max_scf_name_size);
650 	startd_free(svc_name, max_scf_name_size);
651 	if (snap != NULL)
652 		scf_snapshot_destroy(snap);
653 	scf_pg_destroy(pg);
654 	scf_instance_destroy(scf_inst);
655 	scf_service_destroy(scf_svc);
656 	startd_free((void *)inst->ri_i.i_fmri, strlen(inst->ri_i.i_fmri) + 1);
657 	uu_list_destroy(inst->ri_queue);
658 	if (inst->ri_logstem != NULL)
659 		startd_free(inst->ri_logstem, PATH_MAX);
660 	if (inst->ri_common_name != NULL)
661 		startd_free(inst->ri_common_name, max_scf_value_size);
662 	if (inst->ri_C_common_name != NULL)
663 		startd_free(inst->ri_C_common_name, max_scf_value_size);
664 	startd_free(inst->ri_utmpx_prefix, max_scf_value_size);
665 	startd_free(inst, sizeof (restarter_inst_t));
666 	return (ENOENT);
667 }
668 
669 static void
670 restarter_delete_inst(restarter_inst_t *ri)
671 {
672 	int id;
673 	restarter_inst_t *rip;
674 	void *cookie = NULL;
675 	restarter_instance_qentry_t *e;
676 
677 	assert(PTHREAD_MUTEX_HELD(&ri->ri_lock));
678 
679 	/*
680 	 * Must drop the instance lock so we can pick up the instance_list
681 	 * lock & remove the instance.
682 	 */
683 	id = ri->ri_id;
684 	MUTEX_UNLOCK(&ri->ri_lock);
685 
686 	MUTEX_LOCK(&instance_list.ril_lock);
687 
688 	rip = uu_list_find(instance_list.ril_instance_list, &id, NULL, NULL);
689 	if (rip == NULL) {
690 		MUTEX_UNLOCK(&instance_list.ril_lock);
691 		return;
692 	}
693 
694 	assert(ri == rip);
695 
696 	uu_list_remove(instance_list.ril_instance_list, ri);
697 
698 	log_framework(LOG_DEBUG, "%s: deleted instance from restarter list\n",
699 	    ri->ri_i.i_fmri);
700 
701 	MUTEX_UNLOCK(&instance_list.ril_lock);
702 
703 	/*
704 	 * We can lock the instance without holding the instance_list lock
705 	 * since we removed the instance from the list.
706 	 */
707 	MUTEX_LOCK(&ri->ri_lock);
708 	MUTEX_LOCK(&ri->ri_queue_lock);
709 
710 	if (ri->ri_i.i_primary_ctid >= 1)
711 		contract_hash_remove(ri->ri_i.i_primary_ctid);
712 
713 	while (ri->ri_method_thread != 0 || ri->ri_method_waiters > 0)
714 		(void) pthread_cond_wait(&ri->ri_method_cv, &ri->ri_lock);
715 
716 	while ((e = uu_list_teardown(ri->ri_queue, &cookie)) != NULL)
717 		startd_free(e, sizeof (*e));
718 	uu_list_destroy(ri->ri_queue);
719 
720 	startd_free((void *)ri->ri_i.i_fmri, strlen(ri->ri_i.i_fmri) + 1);
721 	startd_free(ri->ri_logstem, PATH_MAX);
722 	if (ri->ri_common_name != NULL)
723 		startd_free(ri->ri_common_name, max_scf_value_size);
724 	if (ri->ri_C_common_name != NULL)
725 		startd_free(ri->ri_C_common_name, max_scf_value_size);
726 	startd_free(ri->ri_utmpx_prefix, max_scf_value_size);
727 	(void) pthread_mutex_destroy(&ri->ri_lock);
728 	(void) pthread_mutex_destroy(&ri->ri_queue_lock);
729 	startd_free(ri, sizeof (restarter_inst_t));
730 }
731 
732 /*
733  * instance_is_wait_style()
734  *
735  *   Returns 1 if the given instance is a "wait-style" service instance.
736  */
737 int
738 instance_is_wait_style(restarter_inst_t *inst)
739 {
740 	assert(PTHREAD_MUTEX_HELD(&inst->ri_lock));
741 	return ((inst->ri_flags & RINST_STYLE_MASK) == RINST_WAIT);
742 }
743 
744 /*
745  * instance_is_transient_style()
746  *
747  *   Returns 1 if the given instance is a transient service instance.
748  */
749 int
750 instance_is_transient_style(restarter_inst_t *inst)
751 {
752 	assert(PTHREAD_MUTEX_HELD(&inst->ri_lock));
753 	return ((inst->ri_flags & RINST_STYLE_MASK) == RINST_TRANSIENT);
754 }
755 
756 /*
757  * instance_in_transition()
758  * Returns 1 if instance is in transition, 0 if not
759  */
760 int
761 instance_in_transition(restarter_inst_t *inst)
762 {
763 	assert(PTHREAD_MUTEX_HELD(&inst->ri_lock));
764 	if (inst->ri_i.i_next_state == RESTARTER_STATE_NONE)
765 		return (0);
766 	return (1);
767 }
768 
769 /*
770  * returns 1 if instance is already started, 0 if not
771  */
772 static int
773 instance_started(restarter_inst_t *inst)
774 {
775 	int ret;
776 
777 	assert(PTHREAD_MUTEX_HELD(&inst->ri_lock));
778 
779 	if (inst->ri_i.i_state == RESTARTER_STATE_ONLINE ||
780 	    inst->ri_i.i_state == RESTARTER_STATE_DEGRADED)
781 		ret = 1;
782 	else
783 		ret = 0;
784 
785 	return (ret);
786 }
787 
788 /*
789  * Returns
790  *   0 - success
791  *   ECONNRESET - success, but h was rebound
792  */
793 int
794 restarter_instance_update_states(scf_handle_t *h, restarter_inst_t *ri,
795     restarter_instance_state_t new_state,
796     restarter_instance_state_t new_state_next, restarter_error_t err, char *aux)
797 {
798 	protocol_states_t *states;
799 	int e;
800 	uint_t retry_count = 0, msecs = ALLOC_DELAY;
801 	boolean_t rebound = B_FALSE;
802 	int prev_state_online;
803 	int state_online;
804 
805 	assert(PTHREAD_MUTEX_HELD(&ri->ri_lock));
806 
807 	prev_state_online = instance_started(ri);
808 
809 retry:
810 	e = _restarter_commit_states(h, &ri->ri_i, new_state, new_state_next,
811 	    aux);
812 	switch (e) {
813 	case 0:
814 		break;
815 
816 	case ENOMEM:
817 		++retry_count;
818 		if (retry_count < ALLOC_RETRY) {
819 			(void) poll(NULL, 0, msecs);
820 			msecs *= ALLOC_DELAY_MULT;
821 			goto retry;
822 		}
823 
824 		/* Like startd_alloc(). */
825 		uu_die("Insufficient memory.\n");
826 		/* NOTREACHED */
827 
828 	case ECONNABORTED:
829 		libscf_handle_rebind(h);
830 		rebound = B_TRUE;
831 		goto retry;
832 
833 	case EPERM:
834 	case EACCES:
835 	case EROFS:
836 		log_error(LOG_NOTICE, "Could not commit state change for %s "
837 		    "to repository: %s.\n", ri->ri_i.i_fmri, strerror(e));
838 		/* FALLTHROUGH */
839 
840 	case ENOENT:
841 		ri->ri_i.i_state = new_state;
842 		ri->ri_i.i_next_state = new_state_next;
843 		break;
844 
845 	case EINVAL:
846 	default:
847 		bad_error("_restarter_commit_states", e);
848 	}
849 
850 	states = startd_alloc(sizeof (protocol_states_t));
851 	states->ps_state = new_state;
852 	states->ps_state_next = new_state_next;
853 	states->ps_err = err;
854 	graph_protocol_send_event(ri->ri_i.i_fmri, GRAPH_UPDATE_STATE_CHANGE,
855 	    (void *)states);
856 
857 	state_online = instance_started(ri);
858 
859 	if (prev_state_online && !state_online)
860 		ri->ri_post_offline_hook();
861 	else if (!prev_state_online && state_online)
862 		ri->ri_post_online_hook();
863 
864 	return (rebound ? ECONNRESET : 0);
865 }
866 
867 void
868 restarter_mark_pending_snapshot(const char *fmri, uint_t flag)
869 {
870 	restarter_inst_t *inst;
871 
872 	assert(flag == RINST_RETAKE_RUNNING || flag == RINST_RETAKE_START);
873 
874 	inst = inst_lookup_by_name(fmri);
875 	if (inst == NULL)
876 		return;
877 
878 	inst->ri_flags |= flag;
879 
880 	MUTEX_UNLOCK(&inst->ri_lock);
881 }
882 
883 static void
884 restarter_take_pending_snapshots(scf_handle_t *h)
885 {
886 	restarter_inst_t *inst;
887 	int r;
888 
889 	MUTEX_LOCK(&instance_list.ril_lock);
890 
891 	for (inst = uu_list_first(instance_list.ril_instance_list);
892 	    inst != NULL;
893 	    inst = uu_list_next(instance_list.ril_instance_list, inst)) {
894 		const char *fmri;
895 		scf_instance_t *sinst = NULL;
896 
897 		MUTEX_LOCK(&inst->ri_lock);
898 
899 		/*
900 		 * This is where we'd check inst->ri_method_thread and if it
901 		 * were nonzero we'd wait in anticipation of another thread
902 		 * executing a method for inst.  Doing so with the instance_list
903 		 * locked, though, leads to deadlock.  Since taking a snapshot
904 		 * during that window won't hurt anything, we'll just continue.
905 		 */
906 
907 		fmri = inst->ri_i.i_fmri;
908 
909 		if (inst->ri_flags & RINST_RETAKE_RUNNING) {
910 			scf_snapshot_t *rsnap;
911 
912 			(void) libscf_fmri_get_instance(h, fmri, &sinst);
913 
914 			rsnap = libscf_get_or_make_running_snapshot(sinst,
915 			    fmri, B_FALSE);
916 
917 			scf_instance_destroy(sinst);
918 
919 			if (rsnap != NULL)
920 				inst->ri_flags &= ~RINST_RETAKE_RUNNING;
921 
922 			scf_snapshot_destroy(rsnap);
923 		}
924 
925 		if (inst->ri_flags & RINST_RETAKE_START) {
926 			switch (r = libscf_snapshots_poststart(h, fmri,
927 			    B_FALSE)) {
928 			case 0:
929 			case ENOENT:
930 				inst->ri_flags &= ~RINST_RETAKE_START;
931 				break;
932 
933 			case ECONNABORTED:
934 				break;
935 
936 			case EACCES:
937 			default:
938 				bad_error("libscf_snapshots_poststart", r);
939 			}
940 		}
941 
942 		MUTEX_UNLOCK(&inst->ri_lock);
943 	}
944 
945 	MUTEX_UNLOCK(&instance_list.ril_lock);
946 }
947 
948 /* ARGSUSED */
949 void *
950 restarter_post_fsminimal_thread(void *unused)
951 {
952 	scf_handle_t *h;
953 	int r;
954 
955 	h = libscf_handle_create_bound_loop();
956 
957 	for (;;) {
958 		r = libscf_create_self(h);
959 		if (r == 0)
960 			break;
961 
962 		assert(r == ECONNABORTED);
963 		libscf_handle_rebind(h);
964 	}
965 
966 	restarter_take_pending_snapshots(h);
967 
968 	(void) scf_handle_unbind(h);
969 	scf_handle_destroy(h);
970 
971 	return (NULL);
972 }
973 
974 /*
975  * int stop_instance()
976  *
977  *   Stop the instance identified by the instance given as the second argument,
978  *   for the cause stated.
979  *
980  *   Returns
981  *     0 - success
982  *     -1 - inst is in transition
983  */
984 static int
985 stop_instance(scf_handle_t *local_handle, restarter_inst_t *inst,
986     stop_cause_t cause)
987 {
988 	fork_info_t *info;
989 	const char *cp;
990 	int err;
991 	restarter_error_t re;
992 
993 	assert(PTHREAD_MUTEX_HELD(&inst->ri_lock));
994 	assert(inst->ri_method_thread == 0);
995 
996 	switch (cause) {
997 	case RSTOP_EXIT:
998 		re = RERR_RESTART;
999 		cp = "all processes in service exited";
1000 		break;
1001 	case RSTOP_CORE:
1002 		re = RERR_FAULT;
1003 		cp = "process dumped core";
1004 		break;
1005 	case RSTOP_SIGNAL:
1006 		re = RERR_FAULT;
1007 		cp = "process received fatal signal from outside the service";
1008 		break;
1009 	case RSTOP_HWERR:
1010 		re = RERR_FAULT;
1011 		cp = "process killed due to uncorrectable hardware error";
1012 		break;
1013 	case RSTOP_DEPENDENCY:
1014 		re = RERR_RESTART;
1015 		cp = "dependency activity requires stop";
1016 		break;
1017 	case RSTOP_DISABLE:
1018 		re = RERR_RESTART;
1019 		cp = "service disabled";
1020 		break;
1021 	case RSTOP_RESTART:
1022 		re = RERR_RESTART;
1023 		cp = "service restarting";
1024 		break;
1025 	default:
1026 #ifndef NDEBUG
1027 		(void) fprintf(stderr, "Unknown cause %d at %s:%d.\n",
1028 		    cause, __FILE__, __LINE__);
1029 #endif
1030 		abort();
1031 	}
1032 
1033 	/* Services in the disabled and maintenance state are ignored */
1034 	if (inst->ri_i.i_state == RESTARTER_STATE_MAINT ||
1035 	    inst->ri_i.i_state == RESTARTER_STATE_DISABLED) {
1036 		log_framework(LOG_DEBUG,
1037 		    "%s: stop_instance -> is maint/disabled\n",
1038 		    inst->ri_i.i_fmri);
1039 		return (0);
1040 	}
1041 
1042 	/* Already stopped instances are left alone */
1043 	if (instance_started(inst) == 0) {
1044 		log_framework(LOG_DEBUG, "Restarter: %s is already stopped.\n",
1045 		    inst->ri_i.i_fmri);
1046 		return (0);
1047 	}
1048 
1049 	if (instance_in_transition(inst)) {
1050 		/* requeue event by returning -1 */
1051 		log_framework(LOG_DEBUG,
1052 		    "Restarter: Not stopping %s, in transition.\n",
1053 		    inst->ri_i.i_fmri);
1054 		return (-1);
1055 	}
1056 
1057 	log_instance(inst, B_TRUE, "Stopping because %s.", cp);
1058 
1059 	log_framework(re == RERR_FAULT ? LOG_INFO : LOG_DEBUG,
1060 	    "%s: Instance stopping because %s.\n", inst->ri_i.i_fmri, cp);
1061 
1062 	if (instance_is_wait_style(inst) && cause == RSTOP_EXIT) {
1063 		/*
1064 		 * No need to stop instance, as child has exited; remove
1065 		 * contract and move the instance to the offline state.
1066 		 */
1067 		switch (err = restarter_instance_update_states(local_handle,
1068 		    inst, inst->ri_i.i_state, RESTARTER_STATE_OFFLINE, re,
1069 		    NULL)) {
1070 		case 0:
1071 		case ECONNRESET:
1072 			break;
1073 
1074 		default:
1075 			bad_error("restarter_instance_update_states", err);
1076 		}
1077 
1078 		(void) update_fault_count(inst, FAULT_COUNT_RESET);
1079 
1080 		if (inst->ri_i.i_primary_ctid != 0) {
1081 			inst->ri_m_inst =
1082 			    safe_scf_instance_create(local_handle);
1083 			inst->ri_mi_deleted = B_FALSE;
1084 
1085 			libscf_reget_instance(inst);
1086 			method_remove_contract(inst, B_TRUE, B_TRUE);
1087 
1088 			scf_instance_destroy(inst->ri_m_inst);
1089 			inst->ri_m_inst = NULL;
1090 		}
1091 
1092 		switch (err = restarter_instance_update_states(local_handle,
1093 		    inst, inst->ri_i.i_next_state, RESTARTER_STATE_NONE, re,
1094 		    NULL)) {
1095 		case 0:
1096 		case ECONNRESET:
1097 			break;
1098 
1099 		default:
1100 			bad_error("restarter_instance_update_states", err);
1101 		}
1102 
1103 		return (0);
1104 	}
1105 
1106 	switch (err = restarter_instance_update_states(local_handle, inst,
1107 	    inst->ri_i.i_state, inst->ri_i.i_enabled ? RESTARTER_STATE_OFFLINE :
1108 	    RESTARTER_STATE_DISABLED, RERR_NONE, NULL)) {
1109 	case 0:
1110 	case ECONNRESET:
1111 		break;
1112 
1113 	default:
1114 		bad_error("restarter_instance_update_states", err);
1115 	}
1116 
1117 	info = startd_zalloc(sizeof (fork_info_t));
1118 
1119 	info->sf_id = inst->ri_id;
1120 	info->sf_method_type = METHOD_STOP;
1121 	info->sf_event_type = re;
1122 	inst->ri_method_thread = startd_thread_create(method_thread, info);
1123 
1124 	return (0);
1125 }
1126 
1127 /*
1128  * Returns
1129  *   ENOENT - fmri is not in instance_list
1130  *   0 - success
1131  *   ECONNRESET - success, though handle was rebound
1132  *   -1 - instance is in transition
1133  */
1134 int
1135 stop_instance_fmri(scf_handle_t *h, const char *fmri, uint_t flags)
1136 {
1137 	restarter_inst_t *rip;
1138 	int r;
1139 
1140 	rip = inst_lookup_by_name(fmri);
1141 	if (rip == NULL)
1142 		return (ENOENT);
1143 
1144 	r = stop_instance(h, rip, flags);
1145 
1146 	MUTEX_UNLOCK(&rip->ri_lock);
1147 
1148 	return (r);
1149 }
1150 
1151 static void
1152 unmaintain_instance(scf_handle_t *h, restarter_inst_t *rip,
1153     unmaint_cause_t cause)
1154 {
1155 	ctid_t ctid;
1156 	scf_instance_t *inst;
1157 	int r;
1158 	uint_t tries = 0, msecs = ALLOC_DELAY;
1159 	const char *cp;
1160 
1161 	assert(PTHREAD_MUTEX_HELD(&rip->ri_lock));
1162 
1163 	if (rip->ri_i.i_state != RESTARTER_STATE_MAINT) {
1164 		log_error(LOG_DEBUG, "Restarter: "
1165 		    "Ignoring maintenance off command because %s is not in the "
1166 		    "maintenance state.\n", rip->ri_i.i_fmri);
1167 		return;
1168 	}
1169 
1170 	switch (cause) {
1171 	case RUNMAINT_CLEAR:
1172 		cp = "clear requested";
1173 		break;
1174 	case RUNMAINT_DISABLE:
1175 		cp = "disable requested";
1176 		break;
1177 	default:
1178 #ifndef NDEBUG
1179 		(void) fprintf(stderr, "Uncaught case for %d at %s:%d.\n",
1180 		    cause, __FILE__, __LINE__);
1181 #endif
1182 		abort();
1183 	}
1184 
1185 	log_instance(rip, B_TRUE, "Leaving maintenance because %s.",
1186 	    cp);
1187 	log_framework(LOG_DEBUG, "%s: Instance leaving maintenance because "
1188 	    "%s.\n", rip->ri_i.i_fmri, cp);
1189 
1190 	(void) restarter_instance_update_states(h, rip, RESTARTER_STATE_UNINIT,
1191 	    RESTARTER_STATE_NONE, RERR_RESTART, NULL);
1192 
1193 	/*
1194 	 * If we did ADMIN_MAINT_ON_IMMEDIATE, then there might still be
1195 	 * a primary contract.
1196 	 */
1197 	if (rip->ri_i.i_primary_ctid == 0)
1198 		return;
1199 
1200 	ctid = rip->ri_i.i_primary_ctid;
1201 	contract_abandon(ctid);
1202 	rip->ri_i.i_primary_ctid = 0;
1203 
1204 rep_retry:
1205 	switch (r = libscf_fmri_get_instance(h, rip->ri_i.i_fmri, &inst)) {
1206 	case 0:
1207 		break;
1208 
1209 	case ECONNABORTED:
1210 		libscf_handle_rebind(h);
1211 		goto rep_retry;
1212 
1213 	case ENOENT:
1214 		/* Must have been deleted. */
1215 		return;
1216 
1217 	case EINVAL:
1218 	case ENOTSUP:
1219 	default:
1220 		bad_error("libscf_handle_rebind", r);
1221 	}
1222 
1223 again:
1224 	r = restarter_remove_contract(inst, ctid, RESTARTER_CONTRACT_PRIMARY);
1225 	switch (r) {
1226 	case 0:
1227 		break;
1228 
1229 	case ENOMEM:
1230 		++tries;
1231 		if (tries < ALLOC_RETRY) {
1232 			(void) poll(NULL, 0, msecs);
1233 			msecs *= ALLOC_DELAY_MULT;
1234 			goto again;
1235 		}
1236 
1237 		uu_die("Insufficient memory.\n");
1238 		/* NOTREACHED */
1239 
1240 	case ECONNABORTED:
1241 		scf_instance_destroy(inst);
1242 		libscf_handle_rebind(h);
1243 		goto rep_retry;
1244 
1245 	case ECANCELED:
1246 		break;
1247 
1248 	case EPERM:
1249 	case EACCES:
1250 	case EROFS:
1251 		log_error(LOG_INFO,
1252 		    "Could not remove contract id %lu for %s (%s).\n", ctid,
1253 		    rip->ri_i.i_fmri, strerror(r));
1254 		break;
1255 
1256 	case EINVAL:
1257 	case EBADF:
1258 	default:
1259 		bad_error("restarter_remove_contract", r);
1260 	}
1261 
1262 	scf_instance_destroy(inst);
1263 }
1264 
1265 /*
1266  * enable_inst()
1267  *   Set inst->ri_i.i_enabled.  Expects 'e' to be _ENABLE, _DISABLE, or
1268  *   _ADMIN_DISABLE.  If the event is _ENABLE and inst is uninitialized or
1269  *   disabled, move it to offline.  If the event is _DISABLE or
1270  *   _ADMIN_DISABLE, make sure inst will move to disabled.
1271  *
1272  *   Returns
1273  *     0 - success
1274  *     ECONNRESET - h was rebound
1275  */
1276 static int
1277 enable_inst(scf_handle_t *h, restarter_inst_t *inst, restarter_event_type_t e)
1278 {
1279 	restarter_instance_state_t state;
1280 	int r;
1281 
1282 	assert(PTHREAD_MUTEX_HELD(&inst->ri_lock));
1283 	assert(e == RESTARTER_EVENT_TYPE_ADMIN_DISABLE ||
1284 	    e == RESTARTER_EVENT_TYPE_DISABLE ||
1285 	    e == RESTARTER_EVENT_TYPE_ENABLE);
1286 	assert(instance_in_transition(inst) == 0);
1287 
1288 	state = inst->ri_i.i_state;
1289 
1290 	if (e == RESTARTER_EVENT_TYPE_ENABLE) {
1291 		inst->ri_i.i_enabled = 1;
1292 
1293 		if (state == RESTARTER_STATE_UNINIT ||
1294 		    state == RESTARTER_STATE_DISABLED) {
1295 			/*
1296 			 * B_FALSE: Don't log an error if the log_instance()
1297 			 * fails because it will fail on the miniroot before
1298 			 * install-discovery runs.
1299 			 */
1300 			log_instance(inst, B_FALSE, "Enabled.");
1301 			log_framework(LOG_DEBUG, "%s: Instance enabled.\n",
1302 			    inst->ri_i.i_fmri);
1303 			(void) restarter_instance_update_states(h, inst,
1304 			    RESTARTER_STATE_OFFLINE, RESTARTER_STATE_NONE,
1305 			    RERR_NONE, NULL);
1306 		} else {
1307 			log_framework(LOG_DEBUG, "Restarter: "
1308 			    "Not changing state of %s for enable command.\n",
1309 			    inst->ri_i.i_fmri);
1310 		}
1311 	} else {
1312 		inst->ri_i.i_enabled = 0;
1313 
1314 		switch (state) {
1315 		case RESTARTER_STATE_ONLINE:
1316 		case RESTARTER_STATE_DEGRADED:
1317 			r = stop_instance(h, inst, RSTOP_DISABLE);
1318 			return (r == ECONNRESET ? 0 : r);
1319 
1320 		case RESTARTER_STATE_OFFLINE:
1321 		case RESTARTER_STATE_UNINIT:
1322 			if (inst->ri_i.i_primary_ctid != 0) {
1323 				inst->ri_m_inst = safe_scf_instance_create(h);
1324 				inst->ri_mi_deleted = B_FALSE;
1325 
1326 				libscf_reget_instance(inst);
1327 				method_remove_contract(inst, B_TRUE, B_TRUE);
1328 
1329 				scf_instance_destroy(inst->ri_m_inst);
1330 			}
1331 			/* B_FALSE: See log_instance(..., "Enabled."); above */
1332 			log_instance(inst, B_FALSE, "Disabled.");
1333 			log_framework(LOG_DEBUG, "%s: Instance disabled.\n",
1334 			    inst->ri_i.i_fmri);
1335 			(void) restarter_instance_update_states(h, inst,
1336 			    RESTARTER_STATE_DISABLED, RESTARTER_STATE_NONE,
1337 			    RERR_RESTART, NULL);
1338 			return (0);
1339 
1340 		case RESTARTER_STATE_DISABLED:
1341 			break;
1342 
1343 		case RESTARTER_STATE_MAINT:
1344 			/*
1345 			 * We only want to pull the instance out of maintenance
1346 			 * if the disable is on adminstrative request.  The
1347 			 * graph engine sends _DISABLE events whenever a
1348 			 * service isn't in the disabled state, and we don't
1349 			 * want to pull the service out of maintenance if,
1350 			 * for example, it is there due to a dependency cycle.
1351 			 */
1352 			if (e == RESTARTER_EVENT_TYPE_ADMIN_DISABLE)
1353 				unmaintain_instance(h, inst, RUNMAINT_DISABLE);
1354 			break;
1355 
1356 		default:
1357 #ifndef NDEBUG
1358 			(void) fprintf(stderr, "Restarter instance %s has "
1359 			    "unknown state %d.\n", inst->ri_i.i_fmri, state);
1360 #endif
1361 			abort();
1362 		}
1363 	}
1364 
1365 	return (0);
1366 }
1367 
1368 static void
1369 start_instance(scf_handle_t *local_handle, restarter_inst_t *inst)
1370 {
1371 	fork_info_t *info;
1372 
1373 	assert(PTHREAD_MUTEX_HELD(&inst->ri_lock));
1374 	assert(instance_in_transition(inst) == 0);
1375 	assert(inst->ri_method_thread == 0);
1376 
1377 	log_framework(LOG_DEBUG, "%s: trying to start instance\n",
1378 	    inst->ri_i.i_fmri);
1379 
1380 	/* Services in the disabled and maintenance state are ignored */
1381 	if (inst->ri_i.i_state == RESTARTER_STATE_MAINT ||
1382 	    inst->ri_i.i_state == RESTARTER_STATE_DISABLED ||
1383 	    inst->ri_i.i_enabled == 0) {
1384 		log_framework(LOG_DEBUG,
1385 		    "%s: start_instance -> is maint/disabled\n",
1386 		    inst->ri_i.i_fmri);
1387 		return;
1388 	}
1389 
1390 	/* Already started instances are left alone */
1391 	if (instance_started(inst) == 1) {
1392 		log_framework(LOG_DEBUG,
1393 		    "%s: start_instance -> is already started\n",
1394 		    inst->ri_i.i_fmri);
1395 		return;
1396 	}
1397 
1398 	log_framework(LOG_DEBUG, "%s: starting instance.\n", inst->ri_i.i_fmri);
1399 
1400 	(void) restarter_instance_update_states(local_handle, inst,
1401 	    inst->ri_i.i_state, RESTARTER_STATE_ONLINE, RERR_NONE, NULL);
1402 
1403 	info = startd_zalloc(sizeof (fork_info_t));
1404 
1405 	info->sf_id = inst->ri_id;
1406 	info->sf_method_type = METHOD_START;
1407 	info->sf_event_type = RERR_NONE;
1408 	inst->ri_method_thread = startd_thread_create(method_thread, info);
1409 }
1410 
1411 static void
1412 maintain_instance(scf_handle_t *h, restarter_inst_t *rip, int immediate,
1413     const char *aux)
1414 {
1415 	fork_info_t *info;
1416 
1417 	assert(PTHREAD_MUTEX_HELD(&rip->ri_lock));
1418 	assert(aux != NULL);
1419 	assert(rip->ri_method_thread == 0);
1420 
1421 	log_instance(rip, B_TRUE, "Stopping for maintenance due to %s.", aux);
1422 	log_framework(LOG_DEBUG, "%s: stopping for maintenance due to %s.\n",
1423 	    rip->ri_i.i_fmri, aux);
1424 
1425 	/* Services in the maintenance state are ignored */
1426 	if (rip->ri_i.i_state == RESTARTER_STATE_MAINT) {
1427 		log_framework(LOG_DEBUG,
1428 		    "%s: maintain_instance -> is already in maintenance\n",
1429 		    rip->ri_i.i_fmri);
1430 		return;
1431 	}
1432 
1433 	if (immediate || !instance_started(rip)) {
1434 		if (rip->ri_i.i_primary_ctid != 0) {
1435 			rip->ri_m_inst = safe_scf_instance_create(h);
1436 			rip->ri_mi_deleted = B_FALSE;
1437 
1438 			libscf_reget_instance(rip);
1439 			method_remove_contract(rip, B_TRUE, B_TRUE);
1440 
1441 			scf_instance_destroy(rip->ri_m_inst);
1442 		}
1443 
1444 		(void) restarter_instance_update_states(h, rip,
1445 		    RESTARTER_STATE_MAINT, RESTARTER_STATE_NONE, RERR_RESTART,
1446 		    (char *)aux);
1447 		return;
1448 	}
1449 
1450 	(void) restarter_instance_update_states(h, rip, rip->ri_i.i_state,
1451 	    RESTARTER_STATE_MAINT, RERR_NONE, (char *)aux);
1452 
1453 	info = startd_zalloc(sizeof (*info));
1454 	info->sf_id = rip->ri_id;
1455 	info->sf_method_type = METHOD_STOP;
1456 	info->sf_event_type = RERR_RESTART;
1457 	rip->ri_method_thread = startd_thread_create(method_thread, info);
1458 }
1459 
1460 static void
1461 refresh_instance(scf_handle_t *h, restarter_inst_t *rip)
1462 {
1463 	scf_instance_t *inst;
1464 	scf_snapshot_t *snap;
1465 	fork_info_t *info;
1466 	int r;
1467 
1468 	assert(PTHREAD_MUTEX_HELD(&rip->ri_lock));
1469 
1470 	log_instance(rip, B_TRUE, "Rereading configuration.");
1471 	log_framework(LOG_DEBUG, "%s: rereading configuration.\n",
1472 	    rip->ri_i.i_fmri);
1473 
1474 rep_retry:
1475 	r = libscf_fmri_get_instance(h, rip->ri_i.i_fmri, &inst);
1476 	switch (r) {
1477 	case 0:
1478 		break;
1479 
1480 	case ECONNABORTED:
1481 		libscf_handle_rebind(h);
1482 		goto rep_retry;
1483 
1484 	case ENOENT:
1485 		/* Must have been deleted. */
1486 		return;
1487 
1488 	case EINVAL:
1489 	case ENOTSUP:
1490 	default:
1491 		bad_error("libscf_fmri_get_instance", r);
1492 	}
1493 
1494 	snap = libscf_get_running_snapshot(inst);
1495 
1496 	r = libscf_get_startd_properties(inst, snap, &rip->ri_flags,
1497 	    &rip->ri_utmpx_prefix);
1498 	switch (r) {
1499 	case 0:
1500 		log_framework(LOG_DEBUG, "%s is a %s-style service\n",
1501 		    rip->ri_i.i_fmri, service_style(rip->ri_flags));
1502 		break;
1503 
1504 	case ECONNABORTED:
1505 		scf_instance_destroy(inst);
1506 		scf_snapshot_destroy(snap);
1507 		libscf_handle_rebind(h);
1508 		goto rep_retry;
1509 
1510 	case ECANCELED:
1511 	case ENOENT:
1512 		/* Succeed in anticipation of REMOVE_INSTANCE. */
1513 		break;
1514 
1515 	default:
1516 		bad_error("libscf_get_startd_properties", r);
1517 	}
1518 
1519 	if (instance_started(rip)) {
1520 		/* Refresh does not change the state. */
1521 		(void) restarter_instance_update_states(h, rip,
1522 		    rip->ri_i.i_state, rip->ri_i.i_state, RERR_NONE, NULL);
1523 
1524 		info = startd_zalloc(sizeof (*info));
1525 		info->sf_id = rip->ri_id;
1526 		info->sf_method_type = METHOD_REFRESH;
1527 		info->sf_event_type = RERR_REFRESH;
1528 
1529 		assert(rip->ri_method_thread == 0);
1530 		rip->ri_method_thread =
1531 		    startd_thread_create(method_thread, info);
1532 	}
1533 
1534 	scf_snapshot_destroy(snap);
1535 	scf_instance_destroy(inst);
1536 }
1537 
1538 const char *event_names[] = { "INVALID", "ADD_INSTANCE", "REMOVE_INSTANCE",
1539 	"ENABLE", "DISABLE", "ADMIN_DEGRADED", "ADMIN_REFRESH",
1540 	"ADMIN_RESTART", "ADMIN_MAINT_OFF", "ADMIN_MAINT_ON",
1541 	"ADMIN_MAINT_ON_IMMEDIATE", "STOP", "START", "DEPENDENCY_CYCLE",
1542 	"INVALID_DEPENDENCY", "ADMIN_DISABLE"
1543 };
1544 
1545 /*
1546  * void *restarter_process_events()
1547  *
1548  *   Called in a separate thread to process the events on an instance's
1549  *   queue.  Empties the queue completely, and tries to keep the thread
1550  *   around for a little while after the queue is empty to save on
1551  *   startup costs.
1552  */
1553 static void *
1554 restarter_process_events(void *arg)
1555 {
1556 	scf_handle_t *h;
1557 	restarter_instance_qentry_t *event;
1558 	restarter_inst_t *rip;
1559 	char *fmri = (char *)arg;
1560 	struct timespec to;
1561 
1562 	assert(fmri != NULL);
1563 
1564 	h = libscf_handle_create_bound_loop();
1565 
1566 	/* grab the queue lock */
1567 	rip = inst_lookup_queue(fmri);
1568 	if (rip == NULL)
1569 		goto out;
1570 
1571 again:
1572 
1573 	while ((event = uu_list_first(rip->ri_queue)) != NULL) {
1574 		restarter_inst_t *inst;
1575 
1576 		/* drop the queue lock */
1577 		MUTEX_UNLOCK(&rip->ri_queue_lock);
1578 
1579 		/*
1580 		 * Grab the inst lock -- this waits until any outstanding
1581 		 * method finishes running.
1582 		 */
1583 		inst = inst_lookup_by_name(fmri);
1584 		if (inst == NULL) {
1585 			/* Getting deleted in the middle isn't an error. */
1586 			goto cont;
1587 		}
1588 
1589 		assert(instance_in_transition(inst) == 0);
1590 
1591 		/* process the event */
1592 		switch (event->riq_type) {
1593 		case RESTARTER_EVENT_TYPE_ENABLE:
1594 		case RESTARTER_EVENT_TYPE_DISABLE:
1595 		case RESTARTER_EVENT_TYPE_ADMIN_DISABLE:
1596 			(void) enable_inst(h, inst, event->riq_type);
1597 			break;
1598 
1599 		case RESTARTER_EVENT_TYPE_REMOVE_INSTANCE:
1600 			restarter_delete_inst(inst);
1601 			inst = NULL;
1602 			goto cont;
1603 
1604 		case RESTARTER_EVENT_TYPE_STOP:
1605 			(void) stop_instance(h, inst, RSTOP_DEPENDENCY);
1606 			break;
1607 
1608 		case RESTARTER_EVENT_TYPE_START:
1609 			start_instance(h, inst);
1610 			break;
1611 
1612 		case RESTARTER_EVENT_TYPE_DEPENDENCY_CYCLE:
1613 			maintain_instance(h, inst, 0, "dependency_cycle");
1614 			break;
1615 
1616 		case RESTARTER_EVENT_TYPE_INVALID_DEPENDENCY:
1617 			maintain_instance(h, inst, 0, "invalid_dependency");
1618 			break;
1619 
1620 		case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON:
1621 			maintain_instance(h, inst, 0, "administrative_request");
1622 			break;
1623 
1624 		case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON_IMMEDIATE:
1625 			maintain_instance(h, inst, 1, "administrative_request");
1626 			break;
1627 
1628 		case RESTARTER_EVENT_TYPE_ADMIN_MAINT_OFF:
1629 			unmaintain_instance(h, inst, RUNMAINT_CLEAR);
1630 			break;
1631 
1632 		case RESTARTER_EVENT_TYPE_ADMIN_REFRESH:
1633 			refresh_instance(h, inst);
1634 			break;
1635 
1636 		case RESTARTER_EVENT_TYPE_ADMIN_DEGRADED:
1637 			log_framework(LOG_WARNING, "Restarter: "
1638 			    "%s command (for %s) unimplemented.\n",
1639 			    event_names[event->riq_type], inst->ri_i.i_fmri);
1640 			break;
1641 
1642 		case RESTARTER_EVENT_TYPE_ADMIN_RESTART:
1643 			if (!instance_started(inst)) {
1644 				log_framework(LOG_DEBUG, "Restarter: "
1645 				    "Not restarting %s; not running.\n",
1646 				    inst->ri_i.i_fmri);
1647 			} else {
1648 				/*
1649 				 * Stop the instance.  If it can be restarted,
1650 				 * the graph engine will send a new event.
1651 				 */
1652 				(void) stop_instance(h, inst, RSTOP_RESTART);
1653 			}
1654 			break;
1655 
1656 		case RESTARTER_EVENT_TYPE_ADD_INSTANCE:
1657 		default:
1658 #ifndef NDEBUG
1659 			uu_warn("%s:%d: Bad restarter event %d.  "
1660 			    "Aborting.\n", __FILE__, __LINE__, event->riq_type);
1661 #endif
1662 			abort();
1663 		}
1664 
1665 		assert(inst != NULL);
1666 		MUTEX_UNLOCK(&inst->ri_lock);
1667 
1668 cont:
1669 		/* grab the queue lock */
1670 		rip = inst_lookup_queue(fmri);
1671 		if (rip == NULL)
1672 			goto out;
1673 
1674 		/* delete the event */
1675 		uu_list_remove(rip->ri_queue, event);
1676 		startd_free(event, sizeof (restarter_instance_qentry_t));
1677 	}
1678 
1679 	assert(rip != NULL);
1680 
1681 	/*
1682 	 * Try to preserve the thread for a little while for future use.
1683 	 */
1684 	to.tv_sec = 3;
1685 	to.tv_nsec = 0;
1686 	(void) pthread_cond_reltimedwait_np(&rip->ri_queue_cv,
1687 	    &rip->ri_queue_lock, &to);
1688 
1689 	if (uu_list_first(rip->ri_queue) != NULL)
1690 		goto again;
1691 
1692 	rip->ri_queue_thread = 0;
1693 	MUTEX_UNLOCK(&rip->ri_queue_lock);
1694 out:
1695 	(void) scf_handle_unbind(h);
1696 	scf_handle_destroy(h);
1697 	free(fmri);
1698 	return (NULL);
1699 }
1700 
1701 static int
1702 is_admin_event(restarter_event_type_t t) {
1703 
1704 	switch (t) {
1705 	case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON:
1706 	case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON_IMMEDIATE:
1707 	case RESTARTER_EVENT_TYPE_ADMIN_MAINT_OFF:
1708 	case RESTARTER_EVENT_TYPE_ADMIN_REFRESH:
1709 	case RESTARTER_EVENT_TYPE_ADMIN_DEGRADED:
1710 	case RESTARTER_EVENT_TYPE_ADMIN_RESTART:
1711 		return (1);
1712 	default:
1713 		return (0);
1714 	}
1715 }
1716 
1717 static void
1718 restarter_queue_event(restarter_inst_t *ri, restarter_protocol_event_t *e)
1719 {
1720 	restarter_instance_qentry_t *qe;
1721 	int r;
1722 
1723 	assert(PTHREAD_MUTEX_HELD(&ri->ri_queue_lock));
1724 	assert(!PTHREAD_MUTEX_HELD(&ri->ri_lock));
1725 
1726 	qe = startd_zalloc(sizeof (restarter_instance_qentry_t));
1727 	qe->riq_type = e->rpe_type;
1728 
1729 	uu_list_node_init(qe, &qe->riq_link, restarter_queue_pool);
1730 	r = uu_list_insert_before(ri->ri_queue, NULL, qe);
1731 	assert(r == 0);
1732 }
1733 
1734 /*
1735  * void *restarter_event_thread()
1736  *
1737  *  Handle incoming graph events by placing them on a per-instance
1738  *  queue.  We can't lock the main part of the instance structure, so
1739  *  just modify the seprarately locked event queue portion.
1740  */
1741 /*ARGSUSED*/
1742 static void *
1743 restarter_event_thread(void *unused)
1744 {
1745 	scf_handle_t *h;
1746 
1747 	/*
1748 	 * This is a new thread, and thus, gets its own handle
1749 	 * to the repository.
1750 	 */
1751 	h = libscf_handle_create_bound_loop();
1752 
1753 	MUTEX_LOCK(&ru->restarter_update_lock);
1754 
1755 	/*CONSTCOND*/
1756 	while (1) {
1757 		restarter_protocol_event_t *e;
1758 
1759 		while (ru->restarter_update_wakeup == 0)
1760 			(void) pthread_cond_wait(&ru->restarter_update_cv,
1761 			    &ru->restarter_update_lock);
1762 
1763 		ru->restarter_update_wakeup = 0;
1764 
1765 		while ((e = restarter_event_dequeue()) != NULL) {
1766 			restarter_inst_t *rip;
1767 			char *fmri;
1768 
1769 			MUTEX_UNLOCK(&ru->restarter_update_lock);
1770 
1771 			/*
1772 			 * ADD_INSTANCE is special: there's likely no
1773 			 * instance structure yet, so we need to handle the
1774 			 * addition synchronously.
1775 			 */
1776 			switch (e->rpe_type) {
1777 			case RESTARTER_EVENT_TYPE_ADD_INSTANCE:
1778 				if (restarter_insert_inst(h, e->rpe_inst) != 0)
1779 					log_error(LOG_INFO, "Restarter: "
1780 					    "Could not add %s.\n", e->rpe_inst);
1781 
1782 				MUTEX_LOCK(&st->st_load_lock);
1783 				if (--st->st_load_instances == 0)
1784 					(void) pthread_cond_broadcast(
1785 					    &st->st_load_cv);
1786 				MUTEX_UNLOCK(&st->st_load_lock);
1787 
1788 				goto nolookup;
1789 			}
1790 
1791 			/*
1792 			 * Lookup the instance, locking only the event queue.
1793 			 * Can't grab ri_lock here because it might be held
1794 			 * by a long-running method.
1795 			 */
1796 			rip = inst_lookup_queue(e->rpe_inst);
1797 			if (rip == NULL) {
1798 				log_error(LOG_INFO, "Restarter: "
1799 				    "Ignoring %s command for unknown service "
1800 				    "%s.\n", event_names[e->rpe_type],
1801 				    e->rpe_inst);
1802 				goto nolookup;
1803 			}
1804 
1805 			/* Keep ADMIN events from filling up the queue. */
1806 			if (is_admin_event(e->rpe_type) &&
1807 			    uu_list_numnodes(rip->ri_queue) >
1808 			    RINST_QUEUE_THRESHOLD) {
1809 				MUTEX_UNLOCK(&rip->ri_queue_lock);
1810 				log_instance(rip, B_TRUE, "Instance event "
1811 				    "queue overflow.  Dropping administrative "
1812 				    "request.");
1813 				log_framework(LOG_DEBUG, "%s: Instance event "
1814 				    "queue overflow.  Dropping administrative "
1815 				    "request.\n", rip->ri_i.i_fmri);
1816 				goto nolookup;
1817 			}
1818 
1819 			/* Now add the event to the instance queue. */
1820 			restarter_queue_event(rip, e);
1821 
1822 			if (rip->ri_queue_thread == 0) {
1823 				/*
1824 				 * Start a thread if one isn't already
1825 				 * running.
1826 				 */
1827 				fmri = safe_strdup(e->rpe_inst);
1828 				rip->ri_queue_thread =  startd_thread_create(
1829 				    restarter_process_events, (void *)fmri);
1830 			} else {
1831 				/*
1832 				 * Signal the existing thread that there's
1833 				 * a new event.
1834 				 */
1835 				(void) pthread_cond_broadcast(
1836 				    &rip->ri_queue_cv);
1837 			}
1838 
1839 			MUTEX_UNLOCK(&rip->ri_queue_lock);
1840 nolookup:
1841 			restarter_event_release(e);
1842 
1843 			MUTEX_LOCK(&ru->restarter_update_lock);
1844 		}
1845 	}
1846 
1847 	/*
1848 	 * Unreachable for now -- there's currently no graceful cleanup
1849 	 * called on exit().
1850 	 */
1851 	(void) scf_handle_unbind(h);
1852 	scf_handle_destroy(h);
1853 	return (NULL);
1854 }
1855 
1856 static restarter_inst_t *
1857 contract_to_inst(ctid_t ctid)
1858 {
1859 	restarter_inst_t *inst;
1860 	int id;
1861 
1862 	id = lookup_inst_by_contract(ctid);
1863 	if (id == -1)
1864 		return (NULL);
1865 
1866 	inst = inst_lookup_by_id(id);
1867 	if (inst != NULL) {
1868 		/*
1869 		 * Since ri_lock isn't held by the contract id lookup, this
1870 		 * instance may have been restarted and now be in a new
1871 		 * contract, making the old contract no longer valid for this
1872 		 * instance.
1873 		 */
1874 		if (ctid != inst->ri_i.i_primary_ctid) {
1875 			MUTEX_UNLOCK(&inst->ri_lock);
1876 			inst = NULL;
1877 		}
1878 	}
1879 	return (inst);
1880 }
1881 
1882 /*
1883  * void contract_action()
1884  *   Take action on contract events.
1885  */
1886 static void
1887 contract_action(scf_handle_t *h, restarter_inst_t *inst, ctid_t id,
1888     uint32_t type)
1889 {
1890 	const char *fmri = inst->ri_i.i_fmri;
1891 
1892 	assert(PTHREAD_MUTEX_HELD(&inst->ri_lock));
1893 
1894 	/*
1895 	 * If startd has stopped this contract, there is no need to
1896 	 * stop it again.
1897 	 */
1898 	if (inst->ri_i.i_primary_ctid > 0 &&
1899 	    inst->ri_i.i_primary_ctid_stopped)
1900 		return;
1901 
1902 	if ((type & (CT_PR_EV_EMPTY | CT_PR_EV_CORE | CT_PR_EV_SIGNAL
1903 	    | CT_PR_EV_HWERR)) == 0) {
1904 		/*
1905 		 * There shouldn't be other events, since that's not how we set
1906 		 * the terms. Thus, just log an error and drive on.
1907 		 */
1908 		log_framework(LOG_NOTICE,
1909 		    "%s: contract %ld received unexpected critical event "
1910 		    "(%d)\n", fmri, id, type);
1911 		    return;
1912 	}
1913 
1914 	assert(instance_in_transition(inst) == 0);
1915 
1916 	if (instance_is_wait_style(inst)) {
1917 		/*
1918 		 * We ignore all events; if they impact the
1919 		 * process we're monitoring, then the
1920 		 * wait_thread will stop the instance.
1921 		 */
1922 		log_framework(LOG_DEBUG,
1923 		    "%s: ignoring contract event on wait-style service\n",
1924 		    fmri);
1925 	} else {
1926 		/*
1927 		 * A CT_PR_EV_EMPTY event is an RSTOP_EXIT request.
1928 		 */
1929 		switch (type) {
1930 		case CT_PR_EV_EMPTY:
1931 			(void) stop_instance(h, inst, RSTOP_EXIT);
1932 			break;
1933 		case CT_PR_EV_CORE:
1934 			(void) stop_instance(h, inst, RSTOP_CORE);
1935 			break;
1936 		case CT_PR_EV_SIGNAL:
1937 			(void) stop_instance(h, inst, RSTOP_SIGNAL);
1938 			break;
1939 		case CT_PR_EV_HWERR:
1940 			(void) stop_instance(h, inst, RSTOP_HWERR);
1941 			break;
1942 		}
1943 	}
1944 }
1945 
1946 /*
1947  * void *restarter_contract_event_thread(void *)
1948  *   Listens to the process contract bundle for critical events, taking action
1949  *   on events from contracts we know we are responsible for.
1950  */
1951 /*ARGSUSED*/
1952 static void *
1953 restarter_contracts_event_thread(void *unused)
1954 {
1955 	int fd, err;
1956 	scf_handle_t *local_handle;
1957 
1958 	/*
1959 	 * Await graph load completion.  That is, stop here, until we've scanned
1960 	 * the repository for contract - instance associations.
1961 	 */
1962 	MUTEX_LOCK(&st->st_load_lock);
1963 	while (!(st->st_load_complete && st->st_load_instances == 0))
1964 		(void) pthread_cond_wait(&st->st_load_cv, &st->st_load_lock);
1965 	MUTEX_UNLOCK(&st->st_load_lock);
1966 
1967 	/*
1968 	 * This is a new thread, and thus, gets its own handle
1969 	 * to the repository.
1970 	 */
1971 	if ((local_handle = libscf_handle_create_bound(SCF_VERSION)) == NULL)
1972 		uu_die("Unable to bind a new repository handle: %s\n",
1973 		    scf_strerror(scf_error()));
1974 
1975 	fd = open64(CTFS_ROOT "/process/pbundle", O_RDONLY);
1976 	if (fd == -1)
1977 		uu_die("process bundle open failed");
1978 
1979 	/*
1980 	 * Make sure we get all events (including those generated by configd
1981 	 * before this thread was started).
1982 	 */
1983 	err = ct_event_reset(fd);
1984 	assert(err == 0);
1985 
1986 	for (;;) {
1987 		int efd, sfd;
1988 		ct_evthdl_t ev;
1989 		uint32_t type;
1990 		ctevid_t evid;
1991 		ct_stathdl_t status;
1992 		ctid_t ctid;
1993 		restarter_inst_t *inst;
1994 		uint64_t cookie;
1995 
1996 		if (err = ct_event_read_critical(fd, &ev)) {
1997 			log_error(LOG_WARNING,
1998 			    "Error reading next contract event: %s",
1999 			    strerror(err));
2000 			continue;
2001 		}
2002 
2003 		evid = ct_event_get_evid(ev);
2004 		ctid = ct_event_get_ctid(ev);
2005 		type = ct_event_get_type(ev);
2006 
2007 		/* Fetch cookie. */
2008 		if ((sfd = contract_open(ctid, "process", "status", O_RDONLY))
2009 		    < 0) {
2010 			ct_event_free(ev);
2011 			continue;
2012 		}
2013 
2014 		if (err = ct_status_read(sfd, CTD_COMMON, &status)) {
2015 			log_framework(LOG_WARNING, "Could not get status for "
2016 			    "contract %ld: %s\n", ctid, strerror(err));
2017 
2018 			startd_close(sfd);
2019 			ct_event_free(ev);
2020 			continue;
2021 		}
2022 
2023 		cookie = ct_status_get_cookie(status);
2024 
2025 		ct_status_free(status);
2026 
2027 		startd_close(sfd);
2028 
2029 		/*
2030 		 * svc.configd(1M) restart handling performed by the
2031 		 * fork_configd_thread.  We don't acknowledge, as that thread
2032 		 * will do so.
2033 		 */
2034 		if (cookie == CONFIGD_COOKIE) {
2035 			ct_event_free(ev);
2036 			continue;
2037 		}
2038 
2039 		inst = contract_to_inst(ctid);
2040 		if (inst == NULL) {
2041 			/*
2042 			 * This can happen if we receive an EMPTY
2043 			 * event for an abandoned contract.
2044 			 */
2045 			log_framework(LOG_DEBUG,
2046 			    "Received event %d for unknown contract id "
2047 			    "%ld\n", type, ctid);
2048 		} else {
2049 			log_framework(LOG_DEBUG,
2050 			    "Received event %d for contract id "
2051 			    "%ld (%s)\n", type, ctid,
2052 			    inst->ri_i.i_fmri);
2053 
2054 			contract_action(local_handle, inst, ctid, type);
2055 
2056 			MUTEX_UNLOCK(&inst->ri_lock);
2057 		}
2058 
2059 		efd = contract_open(ct_event_get_ctid(ev), "process", "ctl",
2060 		    O_WRONLY);
2061 		if (efd != -1) {
2062 			(void) ct_ctl_ack(efd, evid);
2063 			startd_close(efd);
2064 		}
2065 
2066 		ct_event_free(ev);
2067 
2068 	}
2069 
2070 	/*NOTREACHED*/
2071 	return (NULL);
2072 }
2073 
2074 /*
2075  * Timeout queue, processed by restarter_timeouts_event_thread().
2076  */
2077 timeout_queue_t *timeouts;
2078 static uu_list_pool_t *timeout_pool;
2079 
2080 typedef struct timeout_update {
2081 	pthread_mutex_t		tu_lock;
2082 	pthread_cond_t		tu_cv;
2083 	int			tu_wakeup;
2084 } timeout_update_t;
2085 
2086 timeout_update_t *tu;
2087 
2088 static const char *timeout_ovr_svcs[] = {
2089 	"svc:/system/manifest-import:default",
2090 	"svc:/network/initial:default",
2091 	"svc:/network/service:default",
2092 	"svc:/system/rmtmpfiles:default",
2093 	"svc:/network/loopback:default",
2094 	"svc:/network/physical:default",
2095 	"svc:/system/device/local:default",
2096 	"svc:/system/metainit:default",
2097 	"svc:/system/filesystem/usr:default",
2098 	"svc:/system/filesystem/minimal:default",
2099 	"svc:/system/filesystem/local:default",
2100 	NULL
2101 };
2102 
2103 int
2104 is_timeout_ovr(restarter_inst_t *inst)
2105 {
2106 	int i;
2107 
2108 	for (i = 0; timeout_ovr_svcs[i] != NULL; ++i) {
2109 		if (strcmp(inst->ri_i.i_fmri, timeout_ovr_svcs[i]) == 0) {
2110 			log_instance(inst, B_TRUE, "Timeout override by "
2111 			    "svc.startd.  Using infinite timeout");
2112 			return (1);
2113 		}
2114 	}
2115 
2116 	return (0);
2117 }
2118 
2119 /*ARGSUSED*/
2120 static int
2121 timeout_compare(const void *lc_arg, const void *rc_arg, void *private)
2122 {
2123 	hrtime_t t1 = ((const timeout_entry_t *)lc_arg)->te_timeout;
2124 	hrtime_t t2 = ((const timeout_entry_t *)rc_arg)->te_timeout;
2125 
2126 	if (t1 > t2)
2127 		return (1);
2128 	else if (t1 < t2)
2129 		return (-1);
2130 	return (0);
2131 }
2132 
2133 void
2134 timeout_init()
2135 {
2136 	timeouts = startd_zalloc(sizeof (timeout_queue_t));
2137 
2138 	(void) pthread_mutex_init(&timeouts->tq_lock, &mutex_attrs);
2139 
2140 	timeout_pool = startd_list_pool_create("timeouts",
2141 	    sizeof (timeout_entry_t), offsetof(timeout_entry_t, te_link),
2142 	    timeout_compare, UU_LIST_POOL_DEBUG);
2143 	assert(timeout_pool != NULL);
2144 
2145 	timeouts->tq_list = startd_list_create(timeout_pool,
2146 	    timeouts, UU_LIST_SORTED);
2147 	assert(timeouts->tq_list != NULL);
2148 
2149 	tu = startd_zalloc(sizeof (timeout_update_t));
2150 	(void) pthread_cond_init(&tu->tu_cv, NULL);
2151 	(void) pthread_mutex_init(&tu->tu_lock, &mutex_attrs);
2152 }
2153 
2154 void
2155 timeout_insert(restarter_inst_t *inst, ctid_t cid, uint64_t timeout_sec)
2156 {
2157 	hrtime_t now, timeout;
2158 	timeout_entry_t *entry;
2159 	uu_list_index_t idx;
2160 
2161 	assert(PTHREAD_MUTEX_HELD(&inst->ri_lock));
2162 
2163 	now = gethrtime();
2164 
2165 	/*
2166 	 * If we overflow LLONG_MAX, we're never timing out anyways, so
2167 	 * just return.
2168 	 */
2169 	if (timeout_sec >= (LLONG_MAX - now) / 1000000000LL) {
2170 		log_instance(inst, B_TRUE, "timeout_seconds too large, "
2171 		    "treating as infinite.");
2172 		return;
2173 	}
2174 
2175 	/* hrtime is in nanoseconds. Convert timeout_sec. */
2176 	timeout = now + (timeout_sec * 1000000000LL);
2177 
2178 	entry = startd_alloc(sizeof (timeout_entry_t));
2179 	entry->te_timeout = timeout;
2180 	entry->te_ctid = cid;
2181 	entry->te_fmri = safe_strdup(inst->ri_i.i_fmri);
2182 	entry->te_logstem = safe_strdup(inst->ri_logstem);
2183 	entry->te_fired = 0;
2184 	/* Insert the calculated timeout time onto the queue. */
2185 	MUTEX_LOCK(&timeouts->tq_lock);
2186 	(void) uu_list_find(timeouts->tq_list, entry, NULL, &idx);
2187 	uu_list_node_init(entry, &entry->te_link, timeout_pool);
2188 	uu_list_insert(timeouts->tq_list, entry, idx);
2189 	MUTEX_UNLOCK(&timeouts->tq_lock);
2190 
2191 	assert(inst->ri_timeout == NULL);
2192 	inst->ri_timeout = entry;
2193 
2194 	MUTEX_LOCK(&tu->tu_lock);
2195 	tu->tu_wakeup = 1;
2196 	(void) pthread_cond_broadcast(&tu->tu_cv);
2197 	MUTEX_UNLOCK(&tu->tu_lock);
2198 }
2199 
2200 
2201 void
2202 timeout_remove(restarter_inst_t *inst, ctid_t cid)
2203 {
2204 	assert(PTHREAD_MUTEX_HELD(&inst->ri_lock));
2205 
2206 	if (inst->ri_timeout == NULL)
2207 		return;
2208 
2209 	assert(inst->ri_timeout->te_ctid == cid);
2210 
2211 	MUTEX_LOCK(&timeouts->tq_lock);
2212 	uu_list_remove(timeouts->tq_list, inst->ri_timeout);
2213 	MUTEX_UNLOCK(&timeouts->tq_lock);
2214 
2215 	free(inst->ri_timeout->te_fmri);
2216 	free(inst->ri_timeout->te_logstem);
2217 	startd_free(inst->ri_timeout, sizeof (timeout_entry_t));
2218 	inst->ri_timeout = NULL;
2219 }
2220 
2221 static int
2222 timeout_now()
2223 {
2224 	timeout_entry_t *e;
2225 	hrtime_t now;
2226 	int ret;
2227 
2228 	now = gethrtime();
2229 
2230 	/*
2231 	 * Walk through the (sorted) timeouts list.  While the timeout
2232 	 * at the head of the list is <= the current time, kill the
2233 	 * method.
2234 	 */
2235 	MUTEX_LOCK(&timeouts->tq_lock);
2236 
2237 	for (e = uu_list_first(timeouts->tq_list);
2238 	    e != NULL && e->te_timeout <= now;
2239 	    e = uu_list_next(timeouts->tq_list, e)) {
2240 		log_framework(LOG_WARNING, "%s: Method or service exit timed "
2241 		    "out.  Killing contract %ld.\n", e->te_fmri, e->te_ctid);
2242 		log_instance_fmri(e->te_fmri, e->te_logstem, B_TRUE,
2243 		    "Method or service exit timed out.  Killing contract %ld",
2244 		    e->te_ctid);
2245 		e->te_fired = 1;
2246 		(void) contract_kill(e->te_ctid, SIGKILL, e->te_fmri);
2247 	}
2248 
2249 	if (uu_list_numnodes(timeouts->tq_list) > 0)
2250 		ret = 0;
2251 	else
2252 		ret = -1;
2253 
2254 	MUTEX_UNLOCK(&timeouts->tq_lock);
2255 
2256 	return (ret);
2257 }
2258 
2259 /*
2260  * void *restarter_timeouts_event_thread(void *)
2261  *   Responsible for monitoring the method timeouts.  This thread must
2262  *   be started before any methods are called.
2263  */
2264 /*ARGSUSED*/
2265 static void *
2266 restarter_timeouts_event_thread(void *unused)
2267 {
2268 	/*
2269 	 * Timeouts are entered on a priority queue, which is processed by
2270 	 * this thread.  As timeouts are specified in seconds, we'll do
2271 	 * the necessary processing every second, as long as the queue
2272 	 * is not empty.
2273 	 */
2274 
2275 	/*CONSTCOND*/
2276 	while (1) {
2277 		/*
2278 		 * As long as the timeout list isn't empty, process it
2279 		 * every second.
2280 		 */
2281 		if (timeout_now() == 0) {
2282 			(void) sleep(1);
2283 			continue;
2284 		}
2285 
2286 		/* The list is empty, wait until we have more timeouts. */
2287 		MUTEX_LOCK(&tu->tu_lock);
2288 
2289 		while (tu->tu_wakeup == 0)
2290 			(void) pthread_cond_wait(&tu->tu_cv, &tu->tu_lock);
2291 
2292 		tu->tu_wakeup = 0;
2293 		MUTEX_UNLOCK(&tu->tu_lock);
2294 	}
2295 
2296 	return (NULL);
2297 }
2298 
2299 void
2300 restarter_start()
2301 {
2302 	(void) startd_thread_create(restarter_timeouts_event_thread, NULL);
2303 	(void) startd_thread_create(restarter_event_thread, NULL);
2304 	(void) startd_thread_create(restarter_contracts_event_thread, NULL);
2305 	(void) startd_thread_create(wait_thread, NULL);
2306 }
2307 
2308 
2309 void
2310 restarter_init()
2311 {
2312 	restarter_instance_pool = startd_list_pool_create("restarter_instances",
2313 	    sizeof (restarter_inst_t), offsetof(restarter_inst_t,
2314 		ri_link), restarter_instance_compare, UU_LIST_POOL_DEBUG);
2315 	(void) memset(&instance_list, 0, sizeof (instance_list));
2316 
2317 	(void) pthread_mutex_init(&instance_list.ril_lock, &mutex_attrs);
2318 	instance_list.ril_instance_list = startd_list_create(
2319 	    restarter_instance_pool, &instance_list, UU_LIST_SORTED);
2320 
2321 	restarter_queue_pool = startd_list_pool_create(
2322 	    "restarter_instance_queue", sizeof (restarter_instance_qentry_t),
2323 	    offsetof(restarter_instance_qentry_t,  riq_link), NULL,
2324 	    UU_LIST_POOL_DEBUG);
2325 
2326 	contract_list_pool = startd_list_pool_create(
2327 	    "contract_list", sizeof (contract_entry_t),
2328 	    offsetof(contract_entry_t,  ce_link), NULL,
2329 	    UU_LIST_POOL_DEBUG);
2330 	contract_hash_init();
2331 
2332 	log_framework(LOG_DEBUG, "Initialized restarter\n");
2333 }
2334