xref: /titanic_41/usr/src/cmd/svc/startd/restarter.c (revision 9f1fc992b281e57216b036e784b762829b875b4b)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * restarter.c - service manipulation
31  *
32  * This component manages services whose restarter is svc.startd, the standard
33  * restarter.  It translates restarter protocol events from the graph engine
34  * into actions on processes, as a delegated restarter would do.
35  *
36  * The master restarter manages a number of always-running threads:
37  *   - restarter event thread: events from the graph engine
38  *   - timeout thread: thread to fire queued timeouts
39  *   - contract thread: thread to handle contract events
40  *   - wait thread: thread to handle wait-based services
41  *
42  * The other threads are created as-needed:
43  *   - per-instance method threads
44  *   - per-instance event processing threads
45  *
46  * The interaction of all threads must result in the following conditions
47  * being satisfied (on a per-instance basis):
48  *   - restarter events must be processed in order
49  *   - method execution must be serialized
50  *   - instance delete must be held until outstanding methods are complete
51  *   - contract events shouldn't be processed while a method is running
52  *   - timeouts should fire even when a method is running
53  *
54  * Service instances are represented by restarter_inst_t's and are kept in the
55  * instance_list list.
56  *
57  * Service States
58  *   The current state of a service instance is kept in
59  *   restarter_inst_t->ri_i.i_state.  If transition to a new state could take
60  *   some time, then before we effect the transition we set
61  *   restarter_inst_t->ri_i.i_next_state to the target state, and afterwards we
62  *   rotate i_next_state to i_state and set i_next_state to
63  *   RESTARTER_STATE_NONE.  So usually i_next_state is _NONE when ri_lock is not
64  *   held.  The exception is when we launch methods, which are done with
65  *   a separate thread.  To keep any other threads from grabbing ri_lock before
66  *   method_thread() does, we set ri_method_thread to the thread id of the
67  *   method thread, and when it is nonzero any thread with a different thread id
68  *   waits on ri_method_cv.
69  *
70  * Method execution is serialized by blocking on ri_method_cv in
71  * inst_lookup_by_id() and waiting for a 0 value of ri_method_thread.  This
72  * also prevents the instance structure from being deleted until all
73  * outstanding operations such as method_thread() have finished.
74  *
75  * Lock ordering:
76  *
77  * dgraph_lock [can be held when taking:]
78  *   utmpx_lock
79  *   dictionary->dict_lock
80  *   st->st_load_lock
81  *   wait_info_lock
82  *   ru->restarter_update_lock
83  *     restarter_queue->rpeq_lock
84  *   instance_list.ril_lock
85  *     inst->ri_lock
86  *   st->st_configd_live_lock
87  *
88  * instance_list.ril_lock
89  *   graph_queue->gpeq_lock
90  *   gu->gu_lock
91  *   st->st_configd_live_lock
92  *   dictionary->dict_lock
93  *   inst->ri_lock
94  *     graph_queue->gpeq_lock
95  *     gu->gu_lock
96  *     tu->tu_lock
97  *     tq->tq_lock
98  *     inst->ri_queue_lock
99  *       wait_info_lock
100  *       bp->cb_lock
101  *     utmpx_lock
102  *
103  * single_user_thread_lock
104  *   wait_info_lock
105  *   utmpx_lock
106  *
107  * gu_freeze_lock
108  *
109  * logbuf_mutex nests inside pretty much everything.
110  */
111 
112 #include <sys/contract/process.h>
113 #include <sys/ctfs.h>
114 #include <sys/stat.h>
115 #include <sys/time.h>
116 #include <sys/types.h>
117 #include <sys/uio.h>
118 #include <sys/wait.h>
119 #include <assert.h>
120 #include <errno.h>
121 #include <fcntl.h>
122 #include <libcontract.h>
123 #include <libcontract_priv.h>
124 #include <libintl.h>
125 #include <librestart.h>
126 #include <librestart_priv.h>
127 #include <libuutil.h>
128 #include <limits.h>
129 #include <poll.h>
130 #include <port.h>
131 #include <pthread.h>
132 #include <stdarg.h>
133 #include <stdio.h>
134 #include <strings.h>
135 #include <unistd.h>
136 
137 #include "startd.h"
138 #include "protocol.h"
139 
140 static uu_list_pool_t *restarter_instance_pool;
141 static restarter_instance_list_t instance_list;
142 
143 static uu_list_pool_t *restarter_queue_pool;
144 
145 /*ARGSUSED*/
146 static int
147 restarter_instance_compare(const void *lc_arg, const void *rc_arg,
148     void *private)
149 {
150 	int lc_id = ((const restarter_inst_t *)lc_arg)->ri_id;
151 	int rc_id = *(int *)rc_arg;
152 
153 	if (lc_id > rc_id)
154 		return (1);
155 	if (lc_id < rc_id)
156 		return (-1);
157 	return (0);
158 }
159 
160 static restarter_inst_t *
161 inst_lookup_by_name(const char *name)
162 {
163 	int id;
164 
165 	id = dict_lookup_byname(name);
166 	if (id == -1)
167 		return (NULL);
168 
169 	return (inst_lookup_by_id(id));
170 }
171 
172 restarter_inst_t *
173 inst_lookup_by_id(int id)
174 {
175 	restarter_inst_t *inst;
176 
177 	MUTEX_LOCK(&instance_list.ril_lock);
178 	inst = uu_list_find(instance_list.ril_instance_list, &id, NULL, NULL);
179 	if (inst != NULL)
180 		MUTEX_LOCK(&inst->ri_lock);
181 	MUTEX_UNLOCK(&instance_list.ril_lock);
182 
183 	if (inst != NULL) {
184 		while (inst->ri_method_thread != 0 &&
185 		    !pthread_equal(inst->ri_method_thread, pthread_self())) {
186 			++inst->ri_method_waiters;
187 			(void) pthread_cond_wait(&inst->ri_method_cv,
188 			    &inst->ri_lock);
189 			assert(inst->ri_method_waiters > 0);
190 			--inst->ri_method_waiters;
191 		}
192 	}
193 
194 	return (inst);
195 }
196 
197 static restarter_inst_t *
198 inst_lookup_queue(const char *name)
199 {
200 	int id;
201 	restarter_inst_t *inst;
202 
203 	id = dict_lookup_byname(name);
204 	if (id == -1)
205 		return (NULL);
206 
207 	MUTEX_LOCK(&instance_list.ril_lock);
208 	inst = uu_list_find(instance_list.ril_instance_list, &id, NULL, NULL);
209 	if (inst != NULL)
210 		MUTEX_LOCK(&inst->ri_queue_lock);
211 	MUTEX_UNLOCK(&instance_list.ril_lock);
212 
213 	return (inst);
214 }
215 
216 const char *
217 service_style(int flags)
218 {
219 	switch (flags & RINST_STYLE_MASK) {
220 	case RINST_CONTRACT:	return ("contract");
221 	case RINST_TRANSIENT:	return ("transient");
222 	case RINST_WAIT:	return ("wait");
223 
224 	default:
225 #ifndef NDEBUG
226 		uu_warn("%s:%d: Bad flags 0x%x.\n", __FILE__, __LINE__, flags);
227 #endif
228 		abort();
229 		/* NOTREACHED */
230 	}
231 }
232 
233 /*
234  * Fails with ECONNABORTED or ECANCELED.
235  */
236 static int
237 check_contract(restarter_inst_t *inst, boolean_t primary,
238     scf_instance_t *scf_inst)
239 {
240 	ctid_t *ctidp;
241 	int fd, r;
242 
243 	ctidp = primary ? &inst->ri_i.i_primary_ctid :
244 	    &inst->ri_i.i_transient_ctid;
245 
246 	assert(*ctidp >= 1);
247 
248 	fd = contract_open(*ctidp, NULL, "status", O_RDONLY);
249 	if (fd >= 0) {
250 		r = close(fd);
251 		assert(r == 0);
252 		return (0);
253 	}
254 
255 	r = restarter_remove_contract(scf_inst, *ctidp, primary ?
256 	    RESTARTER_CONTRACT_PRIMARY : RESTARTER_CONTRACT_TRANSIENT);
257 	switch (r) {
258 	case 0:
259 	case ECONNABORTED:
260 	case ECANCELED:
261 		*ctidp = 0;
262 		return (r);
263 
264 	case ENOMEM:
265 		uu_die("Out of memory\n");
266 		/* NOTREACHED */
267 
268 	case EPERM:
269 		uu_die("Insufficient privilege.\n");
270 		/* NOTREACHED */
271 
272 	case EACCES:
273 		uu_die("Repository backend access denied.\n");
274 		/* NOTREACHED */
275 
276 	case EROFS:
277 		log_error(LOG_INFO, "Could not remove unusable contract id %ld "
278 		    "for %s from repository.\n", *ctidp, inst->ri_i.i_fmri);
279 		return (0);
280 
281 	case EINVAL:
282 	case EBADF:
283 	default:
284 		assert(0);
285 		abort();
286 		/* NOTREACHED */
287 	}
288 }
289 
290 static int stop_instance(scf_handle_t *, restarter_inst_t *, stop_cause_t);
291 
292 /*
293  * int restarter_insert_inst(scf_handle_t *, char *)
294  *   If the inst is already in the restarter list, return its id.  If the inst
295  *   is not in the restarter list, initialize a restarter_inst_t, initialize its
296  *   states, insert it into the list, and return 0.
297  *
298  *   Fails with
299  *     ENOENT - name is not in the repository
300  */
301 static int
302 restarter_insert_inst(scf_handle_t *h, const char *name)
303 {
304 	int id, r;
305 	restarter_inst_t *inst;
306 	uu_list_index_t idx;
307 	scf_service_t *scf_svc;
308 	scf_instance_t *scf_inst;
309 	scf_snapshot_t *snap = NULL;
310 	scf_propertygroup_t *pg;
311 	char *svc_name, *inst_name;
312 	char logfilebuf[PATH_MAX];
313 	char *c;
314 	boolean_t do_commit_states;
315 	restarter_instance_state_t state, next_state;
316 	protocol_states_t *ps;
317 	pid_t start_pid;
318 
319 	MUTEX_LOCK(&instance_list.ril_lock);
320 
321 	/*
322 	 * We don't use inst_lookup_by_name() here because we want the lookup
323 	 * & insert to be atomic.
324 	 */
325 	id = dict_lookup_byname(name);
326 	if (id != -1) {
327 		inst = uu_list_find(instance_list.ril_instance_list, &id, NULL,
328 		    &idx);
329 		if (inst != NULL) {
330 			MUTEX_UNLOCK(&instance_list.ril_lock);
331 			return (0);
332 		}
333 	}
334 
335 	/* Allocate an instance */
336 	inst = startd_zalloc(sizeof (restarter_inst_t));
337 	inst->ri_utmpx_prefix = startd_alloc(max_scf_value_size);
338 	inst->ri_utmpx_prefix[0] = '\0';
339 
340 	inst->ri_i.i_fmri = startd_alloc(strlen(name) + 1);
341 	(void) strcpy((char *)inst->ri_i.i_fmri, name);
342 
343 	inst->ri_queue = startd_list_create(restarter_queue_pool, inst, 0);
344 
345 	/*
346 	 * id shouldn't be -1 since we use the same dictionary as graph.c, but
347 	 * just in case.
348 	 */
349 	inst->ri_id = (id != -1 ? id : dict_insert(name));
350 
351 	special_online_hooks_get(name, &inst->ri_pre_online_hook,
352 	    &inst->ri_post_online_hook, &inst->ri_post_offline_hook);
353 
354 	scf_svc = safe_scf_service_create(h);
355 	scf_inst = safe_scf_instance_create(h);
356 	pg = safe_scf_pg_create(h);
357 	svc_name = startd_alloc(max_scf_name_size);
358 	inst_name = startd_alloc(max_scf_name_size);
359 
360 rep_retry:
361 	if (snap != NULL)
362 		scf_snapshot_destroy(snap);
363 	if (inst->ri_logstem != NULL)
364 		startd_free(inst->ri_logstem, PATH_MAX);
365 	if (inst->ri_common_name != NULL)
366 		startd_free(inst->ri_common_name, max_scf_value_size);
367 	if (inst->ri_C_common_name != NULL)
368 		startd_free(inst->ri_C_common_name, max_scf_value_size);
369 	snap = NULL;
370 	inst->ri_logstem = NULL;
371 	inst->ri_common_name = NULL;
372 	inst->ri_C_common_name = NULL;
373 
374 	if (scf_handle_decode_fmri(h, name, NULL, scf_svc, scf_inst, NULL,
375 	    NULL, SCF_DECODE_FMRI_EXACT) != 0) {
376 		switch (scf_error()) {
377 		case SCF_ERROR_CONNECTION_BROKEN:
378 			libscf_handle_rebind(h);
379 			goto rep_retry;
380 
381 		case SCF_ERROR_NOT_FOUND:
382 			goto deleted;
383 		}
384 
385 		uu_die("Can't decode FMRI %s: %s\n", name,
386 		    scf_strerror(scf_error()));
387 	}
388 
389 	/*
390 	 * If there's no running snapshot, then we execute using the editing
391 	 * snapshot.  Pending snapshots will be taken later.
392 	 */
393 	snap = libscf_get_running_snapshot(scf_inst);
394 
395 	if ((scf_service_get_name(scf_svc, svc_name, max_scf_name_size) < 0) ||
396 	    (scf_instance_get_name(scf_inst, inst_name, max_scf_name_size) <
397 	    0)) {
398 		switch (scf_error()) {
399 		case SCF_ERROR_NOT_SET:
400 			break;
401 
402 		case SCF_ERROR_CONNECTION_BROKEN:
403 			libscf_handle_rebind(h);
404 			goto rep_retry;
405 
406 		default:
407 			assert(0);
408 			abort();
409 		}
410 
411 		goto deleted;
412 	}
413 
414 	(void) snprintf(logfilebuf, PATH_MAX, "%s:%s", svc_name, inst_name);
415 	for (c = logfilebuf; *c != '\0'; c++)
416 		if (*c == '/')
417 			*c = '-';
418 
419 	inst->ri_logstem = startd_alloc(PATH_MAX);
420 	(void) snprintf(inst->ri_logstem, PATH_MAX, "%s%s", logfilebuf,
421 	    LOG_SUFFIX);
422 
423 	/*
424 	 * If the restarter group is missing, use uninit/none.  Otherwise,
425 	 * we're probably being restarted & don't want to mess up the states
426 	 * that are there.
427 	 */
428 	state = RESTARTER_STATE_UNINIT;
429 	next_state = RESTARTER_STATE_NONE;
430 
431 	r = scf_instance_get_pg(scf_inst, SCF_PG_RESTARTER, pg);
432 	if (r != 0) {
433 		switch (scf_error()) {
434 		case SCF_ERROR_CONNECTION_BROKEN:
435 			libscf_handle_rebind(h);
436 			goto rep_retry;
437 
438 		case SCF_ERROR_NOT_SET:
439 			goto deleted;
440 
441 		case SCF_ERROR_NOT_FOUND:
442 			/*
443 			 * This shouldn't happen since the graph engine should
444 			 * have initialized the state to uninitialized/none if
445 			 * there was no restarter pg.  In case somebody
446 			 * deleted it, though....
447 			 */
448 			do_commit_states = B_TRUE;
449 			break;
450 
451 		default:
452 			assert(0);
453 			abort();
454 		}
455 	} else {
456 		r = libscf_read_states(pg, &state, &next_state);
457 		if (r != 0) {
458 			do_commit_states = B_TRUE;
459 		} else {
460 			if (next_state != RESTARTER_STATE_NONE) {
461 				/*
462 				 * Force next_state to _NONE since we
463 				 * don't look for method processes.
464 				 */
465 				next_state = RESTARTER_STATE_NONE;
466 				do_commit_states = B_TRUE;
467 			} else {
468 				/*
469 				 * Inform the restarter of our state without
470 				 * changing the STIME in the repository.
471 				 */
472 				ps = startd_alloc(sizeof (*ps));
473 				inst->ri_i.i_state = ps->ps_state = state;
474 				inst->ri_i.i_next_state = ps->ps_state_next =
475 				    next_state;
476 
477 				graph_protocol_send_event(inst->ri_i.i_fmri,
478 				    GRAPH_UPDATE_STATE_CHANGE, ps);
479 
480 				do_commit_states = B_FALSE;
481 			}
482 		}
483 	}
484 
485 	switch (libscf_get_startd_properties(scf_inst, snap, &inst->ri_flags,
486 	    &inst->ri_utmpx_prefix)) {
487 	case 0:
488 		break;
489 
490 	case ECONNABORTED:
491 		libscf_handle_rebind(h);
492 		goto rep_retry;
493 
494 	case ECANCELED:
495 		goto deleted;
496 
497 	case ENOENT:
498 		/*
499 		 * This is odd, because the graph engine should have required
500 		 * the general property group.  So we'll just use default
501 		 * flags in anticipation of the graph engine sending us
502 		 * REMOVE_INSTANCE when it finds out that the general property
503 		 * group has been deleted.
504 		 */
505 		inst->ri_flags = RINST_CONTRACT;
506 		break;
507 
508 	default:
509 		assert(0);
510 		abort();
511 	}
512 
513 	switch (libscf_get_template_values(scf_inst, snap,
514 	    &inst->ri_common_name, &inst->ri_C_common_name)) {
515 	case 0:
516 		break;
517 
518 	case ECONNABORTED:
519 		libscf_handle_rebind(h);
520 		goto rep_retry;
521 
522 	case ECANCELED:
523 		goto deleted;
524 
525 	case ECHILD:
526 	case ENOENT:
527 		break;
528 
529 	default:
530 		assert(0);
531 		abort();
532 	}
533 
534 	switch (libscf_read_method_ids(h, scf_inst, inst->ri_i.i_fmri,
535 	    &inst->ri_i.i_primary_ctid, &inst->ri_i.i_transient_ctid,
536 	    &start_pid)) {
537 	case 0:
538 		break;
539 
540 	case ECONNABORTED:
541 		libscf_handle_rebind(h);
542 		goto rep_retry;
543 
544 	case ECANCELED:
545 		goto deleted;
546 
547 	default:
548 		assert(0);
549 		abort();
550 	}
551 
552 	if (inst->ri_i.i_primary_ctid >= 1) {
553 		contract_hash_store(inst->ri_i.i_primary_ctid, inst->ri_id);
554 
555 		switch (check_contract(inst, B_TRUE, scf_inst)) {
556 		case 0:
557 			break;
558 
559 		case ECONNABORTED:
560 			libscf_handle_rebind(h);
561 			goto rep_retry;
562 
563 		case ECANCELED:
564 			goto deleted;
565 
566 		default:
567 			assert(0);
568 			abort();
569 		}
570 	}
571 
572 	if (inst->ri_i.i_transient_ctid >= 1) {
573 		switch (check_contract(inst, B_FALSE, scf_inst)) {
574 		case 0:
575 			break;
576 
577 		case ECONNABORTED:
578 			libscf_handle_rebind(h);
579 			goto rep_retry;
580 
581 		case ECANCELED:
582 			goto deleted;
583 
584 		default:
585 			assert(0);
586 			abort();
587 		}
588 	}
589 
590 	/* No more failures we live through, so add it to the list. */
591 	(void) pthread_mutex_init(&inst->ri_lock, &mutex_attrs);
592 	(void) pthread_mutex_init(&inst->ri_queue_lock, &mutex_attrs);
593 	MUTEX_LOCK(&inst->ri_lock);
594 	MUTEX_LOCK(&inst->ri_queue_lock);
595 
596 	(void) pthread_cond_init(&inst->ri_method_cv, NULL);
597 
598 	uu_list_node_init(inst, &inst->ri_link, restarter_instance_pool);
599 	uu_list_insert(instance_list.ril_instance_list, inst, idx);
600 	MUTEX_UNLOCK(&instance_list.ril_lock);
601 
602 	if (start_pid != -1 &&
603 	    (inst->ri_flags & RINST_STYLE_MASK) == RINST_WAIT) {
604 		int ret;
605 		ret = wait_register(start_pid, inst->ri_i.i_fmri, 0, 1);
606 		if (ret == -1) {
607 			/*
608 			 * Implication:  if we can't reregister the
609 			 * instance, we will start another one.  Two
610 			 * instances may or may not result in a resource
611 			 * conflict.
612 			 */
613 			log_error(LOG_WARNING,
614 			    "%s: couldn't reregister %ld for wait\n",
615 			    inst->ri_i.i_fmri, start_pid);
616 		} else if (ret == 1) {
617 			/*
618 			 * Leading PID has exited.
619 			 */
620 			(void) stop_instance(h, inst, RSTOP_EXIT);
621 		}
622 	}
623 
624 
625 	scf_pg_destroy(pg);
626 
627 	if (do_commit_states)
628 		(void) restarter_instance_update_states(h, inst, state,
629 		    next_state, RERR_NONE, NULL);
630 
631 	log_framework(LOG_DEBUG, "%s is a %s-style service\n", name,
632 	    service_style(inst->ri_flags));
633 
634 	MUTEX_UNLOCK(&inst->ri_queue_lock);
635 	MUTEX_UNLOCK(&inst->ri_lock);
636 
637 	startd_free(svc_name, max_scf_name_size);
638 	startd_free(inst_name, max_scf_name_size);
639 	scf_snapshot_destroy(snap);
640 	scf_instance_destroy(scf_inst);
641 	scf_service_destroy(scf_svc);
642 
643 	log_framework(LOG_DEBUG, "%s: inserted instance into restarter list\n",
644 	    name);
645 
646 	return (0);
647 
648 deleted:
649 	MUTEX_UNLOCK(&instance_list.ril_lock);
650 	startd_free(inst_name, max_scf_name_size);
651 	startd_free(svc_name, max_scf_name_size);
652 	if (snap != NULL)
653 		scf_snapshot_destroy(snap);
654 	scf_pg_destroy(pg);
655 	scf_instance_destroy(scf_inst);
656 	scf_service_destroy(scf_svc);
657 	startd_free((void *)inst->ri_i.i_fmri, strlen(inst->ri_i.i_fmri) + 1);
658 	uu_list_destroy(inst->ri_queue);
659 	if (inst->ri_logstem != NULL)
660 		startd_free(inst->ri_logstem, PATH_MAX);
661 	if (inst->ri_common_name != NULL)
662 		startd_free(inst->ri_common_name, max_scf_value_size);
663 	if (inst->ri_C_common_name != NULL)
664 		startd_free(inst->ri_C_common_name, max_scf_value_size);
665 	startd_free(inst->ri_utmpx_prefix, max_scf_value_size);
666 	startd_free(inst, sizeof (restarter_inst_t));
667 	return (ENOENT);
668 }
669 
670 static void
671 restarter_delete_inst(restarter_inst_t *ri)
672 {
673 	int id;
674 	restarter_inst_t *rip;
675 	void *cookie = NULL;
676 	restarter_instance_qentry_t *e;
677 
678 	assert(PTHREAD_MUTEX_HELD(&ri->ri_lock));
679 
680 	/*
681 	 * Must drop the instance lock so we can pick up the instance_list
682 	 * lock & remove the instance.
683 	 */
684 	id = ri->ri_id;
685 	MUTEX_UNLOCK(&ri->ri_lock);
686 
687 	MUTEX_LOCK(&instance_list.ril_lock);
688 
689 	rip = uu_list_find(instance_list.ril_instance_list, &id, NULL, NULL);
690 	if (rip == NULL) {
691 		MUTEX_UNLOCK(&instance_list.ril_lock);
692 		return;
693 	}
694 
695 	assert(ri == rip);
696 
697 	uu_list_remove(instance_list.ril_instance_list, ri);
698 
699 	log_framework(LOG_DEBUG, "%s: deleted instance from restarter list\n",
700 	    ri->ri_i.i_fmri);
701 
702 	MUTEX_UNLOCK(&instance_list.ril_lock);
703 
704 	/*
705 	 * We can lock the instance without holding the instance_list lock
706 	 * since we removed the instance from the list.
707 	 */
708 	MUTEX_LOCK(&ri->ri_lock);
709 	MUTEX_LOCK(&ri->ri_queue_lock);
710 
711 	if (ri->ri_i.i_primary_ctid >= 1)
712 		contract_hash_remove(ri->ri_i.i_primary_ctid);
713 
714 	while (ri->ri_method_thread != 0 || ri->ri_method_waiters > 0)
715 		(void) pthread_cond_wait(&ri->ri_method_cv, &ri->ri_lock);
716 
717 	while ((e = uu_list_teardown(ri->ri_queue, &cookie)) != NULL)
718 		startd_free(e, sizeof (*e));
719 	uu_list_destroy(ri->ri_queue);
720 
721 	startd_free((void *)ri->ri_i.i_fmri, strlen(ri->ri_i.i_fmri) + 1);
722 	startd_free(ri->ri_logstem, PATH_MAX);
723 	startd_free(ri->ri_utmpx_prefix, max_scf_value_size);
724 	(void) pthread_mutex_destroy(&ri->ri_lock);
725 	(void) pthread_mutex_destroy(&ri->ri_queue_lock);
726 	startd_free(ri, sizeof (restarter_inst_t));
727 }
728 
729 /*
730  * instance_is_wait_style()
731  *
732  *   Returns 1 if the given instance is a "wait-style" service instance.
733  */
734 int
735 instance_is_wait_style(restarter_inst_t *inst)
736 {
737 	assert(PTHREAD_MUTEX_HELD(&inst->ri_lock));
738 	return ((inst->ri_flags & RINST_STYLE_MASK) == RINST_WAIT);
739 }
740 
741 /*
742  * instance_is_transient_style()
743  *
744  *   Returns 1 if the given instance is a transient service instance.
745  */
746 int
747 instance_is_transient_style(restarter_inst_t *inst)
748 {
749 	assert(PTHREAD_MUTEX_HELD(&inst->ri_lock));
750 	return ((inst->ri_flags & RINST_STYLE_MASK) == RINST_TRANSIENT);
751 }
752 
753 /*
754  * instance_in_transition()
755  * Returns 1 if instance is in transition, 0 if not
756  */
757 int
758 instance_in_transition(restarter_inst_t *inst)
759 {
760 	assert(PTHREAD_MUTEX_HELD(&inst->ri_lock));
761 	if (inst->ri_i.i_next_state == RESTARTER_STATE_NONE)
762 		return (0);
763 	return (1);
764 }
765 
766 /*
767  * Returns
768  *   0 - success
769  *   ECONNRESET - success, but h was rebound
770  */
771 int
772 restarter_instance_update_states(scf_handle_t *h, restarter_inst_t *ri,
773     restarter_instance_state_t new_state,
774     restarter_instance_state_t new_state_next, restarter_error_t err, char *aux)
775 {
776 	protocol_states_t *states;
777 	int e;
778 	uint_t retry_count = 0, msecs = ALLOC_DELAY;
779 	boolean_t rebound = B_FALSE;
780 
781 	assert(PTHREAD_MUTEX_HELD(&ri->ri_lock));
782 
783 retry:
784 	e = _restarter_commit_states(h, &ri->ri_i, new_state, new_state_next,
785 	    aux);
786 	switch (e) {
787 	case 0:
788 		break;
789 
790 	case ENOMEM:
791 		++retry_count;
792 		if (retry_count < ALLOC_RETRY) {
793 			(void) poll(NULL, 0, msecs);
794 			msecs *= ALLOC_DELAY_MULT;
795 			goto retry;
796 		}
797 
798 		/* Like startd_alloc(). */
799 		uu_die("Insufficient memory.\n");
800 		/* NOTREACHED */
801 
802 	case ECONNABORTED:
803 		libscf_handle_rebind(h);
804 		rebound = B_TRUE;
805 		goto retry;
806 
807 	case EPERM:
808 	case EACCES:
809 	case EROFS:
810 		log_error(LOG_NOTICE, "Could not commit state change for %s "
811 		    "to repository: %s.\n", ri->ri_i.i_fmri, strerror(e));
812 		/* FALLTHROUGH */
813 
814 	case ENOENT:
815 		ri->ri_i.i_state = new_state;
816 		ri->ri_i.i_next_state = new_state_next;
817 		break;
818 
819 	case EINVAL:
820 	default:
821 		bad_error("_restarter_commit_states", e);
822 	}
823 
824 	states = startd_alloc(sizeof (protocol_states_t));
825 	states->ps_state = new_state;
826 	states->ps_state_next = new_state_next;
827 	states->ps_err = err;
828 	graph_protocol_send_event(ri->ri_i.i_fmri, GRAPH_UPDATE_STATE_CHANGE,
829 	    (void *)states);
830 
831 	if (new_state == RESTARTER_STATE_ONLINE)
832 		ri->ri_post_online_hook();
833 
834 	return (rebound ? ECONNRESET : 0);
835 }
836 
837 void
838 restarter_mark_pending_snapshot(const char *fmri, uint_t flag)
839 {
840 	restarter_inst_t *inst;
841 
842 	assert(flag == RINST_RETAKE_RUNNING || flag == RINST_RETAKE_START);
843 
844 	inst = inst_lookup_by_name(fmri);
845 	if (inst == NULL)
846 		return;
847 
848 	inst->ri_flags |= flag;
849 
850 	MUTEX_UNLOCK(&inst->ri_lock);
851 }
852 
853 static void
854 restarter_take_pending_snapshots(scf_handle_t *h)
855 {
856 	restarter_inst_t *inst;
857 	int r;
858 
859 	MUTEX_LOCK(&instance_list.ril_lock);
860 
861 	for (inst = uu_list_first(instance_list.ril_instance_list);
862 	    inst != NULL;
863 	    inst = uu_list_next(instance_list.ril_instance_list, inst)) {
864 		const char *fmri;
865 		scf_instance_t *sinst = NULL;
866 
867 		MUTEX_LOCK(&inst->ri_lock);
868 
869 		/*
870 		 * This is where we'd check inst->ri_method_thread and if it
871 		 * were nonzero we'd wait in anticipation of another thread
872 		 * executing a method for inst.  Doing so with the instance_list
873 		 * locked, though, leads to deadlock.  Since taking a snapshot
874 		 * during that window won't hurt anything, we'll just continue.
875 		 */
876 
877 		fmri = inst->ri_i.i_fmri;
878 
879 		if (inst->ri_flags & RINST_RETAKE_RUNNING) {
880 			scf_snapshot_t *rsnap;
881 
882 			(void) libscf_fmri_get_instance(h, fmri, &sinst);
883 
884 			rsnap = libscf_get_or_make_running_snapshot(sinst,
885 			    fmri, B_FALSE);
886 
887 			scf_instance_destroy(sinst);
888 
889 			if (rsnap != NULL)
890 				inst->ri_flags &= ~RINST_RETAKE_RUNNING;
891 
892 			scf_snapshot_destroy(rsnap);
893 		}
894 
895 		if (inst->ri_flags & RINST_RETAKE_START) {
896 			switch (r = libscf_snapshots_poststart(h, fmri,
897 			    B_FALSE)) {
898 			case 0:
899 			case ENOENT:
900 				inst->ri_flags &= ~RINST_RETAKE_START;
901 				break;
902 
903 			case ECONNABORTED:
904 				break;
905 
906 			case EACCES:
907 			default:
908 				bad_error("libscf_snapshots_poststart", r);
909 			}
910 		}
911 
912 		MUTEX_UNLOCK(&inst->ri_lock);
913 	}
914 
915 	MUTEX_UNLOCK(&instance_list.ril_lock);
916 }
917 
918 /* ARGSUSED */
919 void *
920 restarter_post_fsminimal_thread(void *unused)
921 {
922 	scf_handle_t *h;
923 	int r;
924 
925 	h = libscf_handle_create_bound_loop();
926 
927 	for (;;) {
928 		r = libscf_create_self(h);
929 		if (r == 0)
930 			break;
931 
932 		assert(r == ECONNABORTED);
933 		libscf_handle_rebind(h);
934 	}
935 
936 	restarter_take_pending_snapshots(h);
937 
938 	(void) scf_handle_unbind(h);
939 	scf_handle_destroy(h);
940 
941 	return (NULL);
942 }
943 
944 /*
945  * returns 1 if instance is already started, 0 if not
946  */
947 static int
948 instance_started(restarter_inst_t *inst)
949 {
950 	int ret;
951 
952 	assert(PTHREAD_MUTEX_HELD(&inst->ri_lock));
953 
954 	if (inst->ri_i.i_state == RESTARTER_STATE_ONLINE ||
955 	    inst->ri_i.i_state == RESTARTER_STATE_DEGRADED)
956 		ret = 1;
957 	else
958 		ret = 0;
959 
960 	return (ret);
961 }
962 
963 /*
964  * int stop_instance()
965  *
966  *   Stop the instance identified by the instance given as the second argument,
967  *   for the cause stated.
968  *
969  *   Returns
970  *     0 - success
971  *     -1 - inst is in transition
972  */
973 static int
974 stop_instance(scf_handle_t *local_handle, restarter_inst_t *inst,
975     stop_cause_t cause)
976 {
977 	fork_info_t *info;
978 	const char *cp;
979 	int err;
980 	restarter_error_t re;
981 
982 	assert(PTHREAD_MUTEX_HELD(&inst->ri_lock));
983 	assert(inst->ri_method_thread == 0);
984 
985 	switch (cause) {
986 	case RSTOP_EXIT:
987 		re = RERR_RESTART;
988 		cp = "all processes in service exited";
989 		break;
990 	case RSTOP_CORE:
991 		re = RERR_FAULT;
992 		cp = "process dumped core";
993 		break;
994 	case RSTOP_SIGNAL:
995 		re = RERR_FAULT;
996 		cp = "process received fatal signal from outside the service";
997 		break;
998 	case RSTOP_HWERR:
999 		re = RERR_FAULT;
1000 		cp = "process killed due to uncorrectable hardware error";
1001 		break;
1002 	case RSTOP_DEPENDENCY:
1003 		re = RERR_RESTART;
1004 		cp = "dependency activity requires stop";
1005 		break;
1006 	case RSTOP_DISABLE:
1007 		re = RERR_RESTART;
1008 		cp = "service disabled";
1009 		break;
1010 	case RSTOP_RESTART:
1011 		re = RERR_RESTART;
1012 		cp = "service restarting";
1013 		break;
1014 	default:
1015 #ifndef NDEBUG
1016 		(void) fprintf(stderr, "Unknown cause %d at %s:%d.\n",
1017 		    cause, __FILE__, __LINE__);
1018 #endif
1019 		abort();
1020 	}
1021 
1022 	/* Services in the disabled and maintenance state are ignored */
1023 	if (inst->ri_i.i_state == RESTARTER_STATE_MAINT ||
1024 	    inst->ri_i.i_state == RESTARTER_STATE_DISABLED) {
1025 		log_framework(LOG_DEBUG,
1026 		    "%s: stop_instance -> is maint/disabled\n",
1027 		    inst->ri_i.i_fmri);
1028 		return (0);
1029 	}
1030 
1031 	/* Already stopped instances are left alone */
1032 	if (instance_started(inst) == 0) {
1033 		log_framework(LOG_DEBUG, "Restarter: %s is already stopped.\n",
1034 		    inst->ri_i.i_fmri);
1035 		return (0);
1036 	}
1037 
1038 	if (instance_in_transition(inst)) {
1039 		/* requeue event by returning -1 */
1040 		log_framework(LOG_DEBUG,
1041 		    "Restarter: Not stopping %s, in transition.\n",
1042 		    inst->ri_i.i_fmri);
1043 		return (-1);
1044 	}
1045 
1046 	log_instance(inst, B_TRUE, "Stopping because %s.", cp);
1047 
1048 	log_framework(re == RERR_FAULT ? LOG_INFO : LOG_DEBUG,
1049 	    "%s: Instance stopping because %s.\n", inst->ri_i.i_fmri, cp);
1050 
1051 	if (instance_is_wait_style(inst) && cause == RSTOP_EXIT) {
1052 		/*
1053 		 * No need to stop instance, as child has exited; remove
1054 		 * contract and move the instance to the offline state.
1055 		 */
1056 		switch (err = restarter_instance_update_states(local_handle,
1057 		    inst, inst->ri_i.i_state, RESTARTER_STATE_OFFLINE, re,
1058 		    NULL)) {
1059 		case 0:
1060 		case ECONNRESET:
1061 			break;
1062 
1063 		default:
1064 			bad_error("restarter_instance_update_states", err);
1065 		}
1066 
1067 		(void) update_fault_count(inst, FAULT_COUNT_RESET);
1068 
1069 		if (inst->ri_i.i_primary_ctid != 0) {
1070 			inst->ri_m_inst =
1071 			    safe_scf_instance_create(local_handle);
1072 			inst->ri_mi_deleted = B_FALSE;
1073 
1074 			libscf_reget_instance(inst);
1075 			method_remove_contract(inst, B_TRUE, B_TRUE);
1076 
1077 			scf_instance_destroy(inst->ri_m_inst);
1078 			inst->ri_m_inst = NULL;
1079 		}
1080 
1081 		switch (err = restarter_instance_update_states(local_handle,
1082 		    inst, inst->ri_i.i_next_state, RESTARTER_STATE_NONE, re,
1083 		    NULL)) {
1084 		case 0:
1085 		case ECONNRESET:
1086 			break;
1087 
1088 		default:
1089 			bad_error("restarter_instance_update_states", err);
1090 		}
1091 
1092 		return (0);
1093 	}
1094 
1095 	switch (err = restarter_instance_update_states(local_handle, inst,
1096 	    inst->ri_i.i_state, inst->ri_i.i_enabled ? RESTARTER_STATE_OFFLINE :
1097 	    RESTARTER_STATE_DISABLED, RERR_NONE, NULL)) {
1098 	case 0:
1099 	case ECONNRESET:
1100 		break;
1101 
1102 	default:
1103 		bad_error("restarter_instance_update_states", err);
1104 	}
1105 
1106 	info = startd_zalloc(sizeof (fork_info_t));
1107 
1108 	info->sf_id = inst->ri_id;
1109 	info->sf_method_type = METHOD_STOP;
1110 	info->sf_event_type = re;
1111 	inst->ri_method_thread = startd_thread_create(method_thread, info);
1112 
1113 	return (0);
1114 }
1115 
1116 /*
1117  * Returns
1118  *   ENOENT - fmri is not in instance_list
1119  *   0 - success
1120  *   ECONNRESET - success, though handle was rebound
1121  *   -1 - instance is in transition
1122  */
1123 int
1124 stop_instance_fmri(scf_handle_t *h, const char *fmri, uint_t flags)
1125 {
1126 	restarter_inst_t *rip;
1127 	int r;
1128 
1129 	rip = inst_lookup_by_name(fmri);
1130 	if (rip == NULL)
1131 		return (ENOENT);
1132 
1133 	r = stop_instance(h, rip, flags);
1134 
1135 	MUTEX_UNLOCK(&rip->ri_lock);
1136 
1137 	return (r);
1138 }
1139 
1140 static void
1141 unmaintain_instance(scf_handle_t *h, restarter_inst_t *rip,
1142     unmaint_cause_t cause)
1143 {
1144 	ctid_t ctid;
1145 	scf_instance_t *inst;
1146 	int r;
1147 	uint_t tries = 0, msecs = ALLOC_DELAY;
1148 	const char *cp;
1149 
1150 	assert(PTHREAD_MUTEX_HELD(&rip->ri_lock));
1151 
1152 	if (rip->ri_i.i_state != RESTARTER_STATE_MAINT) {
1153 		log_error(LOG_DEBUG, "Restarter: "
1154 		    "Ignoring maintenance off command because %s is not in the "
1155 		    "maintenance state.\n", rip->ri_i.i_fmri);
1156 		return;
1157 	}
1158 
1159 	switch (cause) {
1160 	case RUNMAINT_CLEAR:
1161 		cp = "clear requested";
1162 		break;
1163 	case RUNMAINT_DISABLE:
1164 		cp = "disable requested";
1165 		break;
1166 	default:
1167 #ifndef NDEBUG
1168 		(void) fprintf(stderr, "Uncaught case for %d at %s:%d.\n",
1169 		    cause, __FILE__, __LINE__);
1170 #endif
1171 		abort();
1172 	}
1173 
1174 	log_instance(rip, B_TRUE, "Leaving maintenance because %s.",
1175 	    cp);
1176 	log_framework(LOG_DEBUG, "%s: Instance leaving maintenance because "
1177 	    "%s.\n", rip->ri_i.i_fmri, cp);
1178 
1179 	(void) restarter_instance_update_states(h, rip, RESTARTER_STATE_UNINIT,
1180 	    RESTARTER_STATE_NONE, RERR_RESTART, NULL);
1181 
1182 	/*
1183 	 * If we did ADMIN_MAINT_ON_IMMEDIATE, then there might still be
1184 	 * a primary contract.
1185 	 */
1186 	if (rip->ri_i.i_primary_ctid == 0)
1187 		return;
1188 
1189 	ctid = rip->ri_i.i_primary_ctid;
1190 	contract_abandon(ctid);
1191 	rip->ri_i.i_primary_ctid = 0;
1192 
1193 rep_retry:
1194 	switch (r = libscf_fmri_get_instance(h, rip->ri_i.i_fmri, &inst)) {
1195 	case 0:
1196 		break;
1197 
1198 	case ECONNABORTED:
1199 		libscf_handle_rebind(h);
1200 		goto rep_retry;
1201 
1202 	case ENOENT:
1203 		/* Must have been deleted. */
1204 		return;
1205 
1206 	case EINVAL:
1207 	case ENOTSUP:
1208 	default:
1209 		bad_error("libscf_handle_rebind", r);
1210 	}
1211 
1212 again:
1213 	r = restarter_remove_contract(inst, ctid, RESTARTER_CONTRACT_PRIMARY);
1214 	switch (r) {
1215 	case 0:
1216 		break;
1217 
1218 	case ENOMEM:
1219 		++tries;
1220 		if (tries < ALLOC_RETRY) {
1221 			(void) poll(NULL, 0, msecs);
1222 			msecs *= ALLOC_DELAY_MULT;
1223 			goto again;
1224 		}
1225 
1226 		uu_die("Insufficient memory.\n");
1227 		/* NOTREACHED */
1228 
1229 	case ECONNABORTED:
1230 		scf_instance_destroy(inst);
1231 		libscf_handle_rebind(h);
1232 		goto rep_retry;
1233 
1234 	case ECANCELED:
1235 		break;
1236 
1237 	case EPERM:
1238 	case EACCES:
1239 	case EROFS:
1240 		log_error(LOG_INFO,
1241 		    "Could not remove contract id %lu for %s (%s).\n", ctid,
1242 		    rip->ri_i.i_fmri, strerror(r));
1243 		break;
1244 
1245 	case EINVAL:
1246 	case EBADF:
1247 	default:
1248 		bad_error("restarter_remove_contract", r);
1249 	}
1250 
1251 	scf_instance_destroy(inst);
1252 }
1253 
1254 /*
1255  * enable_inst()
1256  *   Set inst->ri_i.i_enabled.  Expects 'e' to be _ENABLE, _DISABLE, or
1257  *   _ADMIN_DISABLE.  If the event is _ENABLE and inst is uninitialized or
1258  *   disabled, move it to offline.  If the event is _DISABLE or
1259  *   _ADMIN_DISABLE, make sure inst will move to disabled.
1260  *
1261  *   Returns
1262  *     0 - success
1263  *     ECONNRESET - h was rebound
1264  */
1265 static int
1266 enable_inst(scf_handle_t *h, restarter_inst_t *inst, restarter_event_type_t e)
1267 {
1268 	restarter_instance_state_t state;
1269 	int r;
1270 
1271 	assert(PTHREAD_MUTEX_HELD(&inst->ri_lock));
1272 	assert(e == RESTARTER_EVENT_TYPE_ADMIN_DISABLE ||
1273 	    e == RESTARTER_EVENT_TYPE_DISABLE ||
1274 	    e == RESTARTER_EVENT_TYPE_ENABLE);
1275 	assert(instance_in_transition(inst) == 0);
1276 
1277 	state = inst->ri_i.i_state;
1278 
1279 	if (e == RESTARTER_EVENT_TYPE_ENABLE) {
1280 		inst->ri_i.i_enabled = 1;
1281 
1282 		if (state == RESTARTER_STATE_UNINIT ||
1283 		    state == RESTARTER_STATE_DISABLED) {
1284 			/*
1285 			 * B_FALSE: Don't log an error if the log_instance()
1286 			 * fails because it will fail on the miniroot before
1287 			 * install-discovery runs.
1288 			 */
1289 			log_instance(inst, B_FALSE, "Enabled.");
1290 			log_framework(LOG_DEBUG, "%s: Instance enabled.\n",
1291 			    inst->ri_i.i_fmri);
1292 			(void) restarter_instance_update_states(h, inst,
1293 			    RESTARTER_STATE_OFFLINE, RESTARTER_STATE_NONE,
1294 			    RERR_NONE, NULL);
1295 		} else {
1296 			log_framework(LOG_DEBUG, "Restarter: "
1297 			    "Not changing state of %s for enable command.\n",
1298 			    inst->ri_i.i_fmri);
1299 		}
1300 	} else {
1301 		inst->ri_i.i_enabled = 0;
1302 
1303 		switch (state) {
1304 		case RESTARTER_STATE_ONLINE:
1305 		case RESTARTER_STATE_DEGRADED:
1306 			r = stop_instance(h, inst, RSTOP_DISABLE);
1307 			return (r == ECONNRESET ? 0 : r);
1308 
1309 		case RESTARTER_STATE_OFFLINE:
1310 		case RESTARTER_STATE_UNINIT:
1311 			if (inst->ri_i.i_primary_ctid != 0) {
1312 				inst->ri_m_inst = safe_scf_instance_create(h);
1313 				inst->ri_mi_deleted = B_FALSE;
1314 
1315 				libscf_reget_instance(inst);
1316 				method_remove_contract(inst, B_TRUE, B_TRUE);
1317 
1318 				scf_instance_destroy(inst->ri_m_inst);
1319 			}
1320 			/* B_FALSE: See log_instance(..., "Enabled."); above */
1321 			log_instance(inst, B_FALSE, "Disabled.");
1322 			log_framework(LOG_DEBUG, "%s: Instance disabled.\n",
1323 			    inst->ri_i.i_fmri);
1324 			(void) restarter_instance_update_states(h, inst,
1325 			    RESTARTER_STATE_DISABLED, RESTARTER_STATE_NONE,
1326 			    RERR_RESTART, NULL);
1327 			return (0);
1328 
1329 		case RESTARTER_STATE_DISABLED:
1330 			break;
1331 
1332 		case RESTARTER_STATE_MAINT:
1333 			/*
1334 			 * We only want to pull the instance out of maintenance
1335 			 * if the disable is on adminstrative request.  The
1336 			 * graph engine sends _DISABLE events whenever a
1337 			 * service isn't in the disabled state, and we don't
1338 			 * want to pull the service out of maintenance if,
1339 			 * for example, it is there due to a dependency cycle.
1340 			 */
1341 			if (e == RESTARTER_EVENT_TYPE_ADMIN_DISABLE)
1342 				unmaintain_instance(h, inst, RUNMAINT_DISABLE);
1343 			break;
1344 
1345 		default:
1346 #ifndef NDEBUG
1347 			(void) fprintf(stderr, "Restarter instance %s has "
1348 			    "unknown state %d.\n", inst->ri_i.i_fmri, state);
1349 #endif
1350 			abort();
1351 		}
1352 	}
1353 
1354 	return (0);
1355 }
1356 
1357 static void
1358 start_instance(scf_handle_t *local_handle, restarter_inst_t *inst)
1359 {
1360 	fork_info_t *info;
1361 
1362 	assert(PTHREAD_MUTEX_HELD(&inst->ri_lock));
1363 	assert(instance_in_transition(inst) == 0);
1364 	assert(inst->ri_method_thread == 0);
1365 
1366 	log_framework(LOG_DEBUG, "%s: trying to start instance\n",
1367 	    inst->ri_i.i_fmri);
1368 
1369 	/* Services in the disabled and maintenance state are ignored */
1370 	if (inst->ri_i.i_state == RESTARTER_STATE_MAINT ||
1371 	    inst->ri_i.i_state == RESTARTER_STATE_DISABLED ||
1372 	    inst->ri_i.i_enabled == 0) {
1373 		log_framework(LOG_DEBUG,
1374 		    "%s: start_instance -> is maint/disabled\n",
1375 		    inst->ri_i.i_fmri);
1376 		return;
1377 	}
1378 
1379 	/* Already started instances are left alone */
1380 	if (instance_started(inst) == 1) {
1381 		log_framework(LOG_DEBUG,
1382 		    "%s: start_instance -> is already started\n",
1383 		    inst->ri_i.i_fmri);
1384 		return;
1385 	}
1386 
1387 	log_framework(LOG_DEBUG, "%s: starting instance.\n", inst->ri_i.i_fmri);
1388 
1389 	(void) restarter_instance_update_states(local_handle, inst,
1390 	    inst->ri_i.i_state, RESTARTER_STATE_ONLINE, RERR_NONE, NULL);
1391 
1392 	info = startd_zalloc(sizeof (fork_info_t));
1393 
1394 	info->sf_id = inst->ri_id;
1395 	info->sf_method_type = METHOD_START;
1396 	info->sf_event_type = RERR_NONE;
1397 	inst->ri_method_thread = startd_thread_create(method_thread, info);
1398 }
1399 
1400 static void
1401 maintain_instance(scf_handle_t *h, restarter_inst_t *rip, int immediate,
1402     const char *aux)
1403 {
1404 	fork_info_t *info;
1405 
1406 	assert(PTHREAD_MUTEX_HELD(&rip->ri_lock));
1407 	assert(aux != NULL);
1408 	assert(rip->ri_method_thread == 0);
1409 
1410 	log_instance(rip, B_TRUE, "Stopping for maintenance due to %s.", aux);
1411 	log_framework(LOG_DEBUG, "%s: stopping for maintenance due to %s.\n",
1412 	    rip->ri_i.i_fmri, aux);
1413 
1414 	/* Services in the maintenance state are ignored */
1415 	if (rip->ri_i.i_state == RESTARTER_STATE_MAINT) {
1416 		log_framework(LOG_DEBUG,
1417 		    "%s: maintain_instance -> is already in maintenance\n",
1418 		    rip->ri_i.i_fmri);
1419 		return;
1420 	}
1421 
1422 	if (immediate || !instance_started(rip)) {
1423 		if (rip->ri_i.i_primary_ctid != 0) {
1424 			rip->ri_m_inst = safe_scf_instance_create(h);
1425 			rip->ri_mi_deleted = B_FALSE;
1426 
1427 			libscf_reget_instance(rip);
1428 			method_remove_contract(rip, B_TRUE, B_TRUE);
1429 
1430 			scf_instance_destroy(rip->ri_m_inst);
1431 		}
1432 
1433 		(void) restarter_instance_update_states(h, rip,
1434 		    RESTARTER_STATE_MAINT, RESTARTER_STATE_NONE, RERR_RESTART,
1435 		    (char *)aux);
1436 		return;
1437 	}
1438 
1439 	(void) restarter_instance_update_states(h, rip, rip->ri_i.i_state,
1440 	    RESTARTER_STATE_MAINT, RERR_NONE, (char *)aux);
1441 
1442 	info = startd_zalloc(sizeof (*info));
1443 	info->sf_id = rip->ri_id;
1444 	info->sf_method_type = METHOD_STOP;
1445 	info->sf_event_type = RERR_RESTART;
1446 	rip->ri_method_thread = startd_thread_create(method_thread, info);
1447 }
1448 
1449 static void
1450 refresh_instance(scf_handle_t *h, restarter_inst_t *rip)
1451 {
1452 	scf_instance_t *inst;
1453 	scf_snapshot_t *snap;
1454 	fork_info_t *info;
1455 	int r;
1456 
1457 	assert(PTHREAD_MUTEX_HELD(&rip->ri_lock));
1458 
1459 	log_instance(rip, B_TRUE, "Rereading configuration.");
1460 	log_framework(LOG_DEBUG, "%s: rereading configuration.\n",
1461 	    rip->ri_i.i_fmri);
1462 
1463 rep_retry:
1464 	r = libscf_fmri_get_instance(h, rip->ri_i.i_fmri, &inst);
1465 	switch (r) {
1466 	case 0:
1467 		break;
1468 
1469 	case ECONNABORTED:
1470 		libscf_handle_rebind(h);
1471 		goto rep_retry;
1472 
1473 	case ENOENT:
1474 		/* Must have been deleted. */
1475 		return;
1476 
1477 	case EINVAL:
1478 	case ENOTSUP:
1479 	default:
1480 		bad_error("libscf_fmri_get_instance", r);
1481 	}
1482 
1483 	snap = libscf_get_running_snapshot(inst);
1484 
1485 	r = libscf_get_startd_properties(inst, snap, &rip->ri_flags,
1486 	    &rip->ri_utmpx_prefix);
1487 	switch (r) {
1488 	case 0:
1489 		log_framework(LOG_DEBUG, "%s is a %s-style service\n",
1490 		    rip->ri_i.i_fmri, service_style(rip->ri_flags));
1491 		break;
1492 
1493 	case ECONNABORTED:
1494 		scf_instance_destroy(inst);
1495 		scf_snapshot_destroy(snap);
1496 		libscf_handle_rebind(h);
1497 		goto rep_retry;
1498 
1499 	case ECANCELED:
1500 	case ENOENT:
1501 		/* Succeed in anticipation of REMOVE_INSTANCE. */
1502 		break;
1503 
1504 	default:
1505 		bad_error("libscf_get_startd_properties", r);
1506 	}
1507 
1508 	if (instance_started(rip)) {
1509 		/* Refresh does not change the state. */
1510 		(void) restarter_instance_update_states(h, rip,
1511 		    rip->ri_i.i_state, rip->ri_i.i_state, RERR_NONE, NULL);
1512 
1513 		info = startd_zalloc(sizeof (*info));
1514 		info->sf_id = rip->ri_id;
1515 		info->sf_method_type = METHOD_REFRESH;
1516 		info->sf_event_type = RERR_REFRESH;
1517 
1518 		assert(rip->ri_method_thread == 0);
1519 		rip->ri_method_thread =
1520 		    startd_thread_create(method_thread, info);
1521 	}
1522 
1523 	scf_snapshot_destroy(snap);
1524 	scf_instance_destroy(inst);
1525 }
1526 
1527 const char *event_names[] = { "INVALID", "ADD_INSTANCE", "REMOVE_INSTANCE",
1528 	"ENABLE", "DISABLE", "ADMIN_DEGRADED", "ADMIN_REFRESH",
1529 	"ADMIN_RESTART", "ADMIN_MAINT_OFF", "ADMIN_MAINT_ON",
1530 	"ADMIN_MAINT_ON_IMMEDIATE", "STOP", "START", "DEPENDENCY_CYCLE",
1531 	"INVALID_DEPENDENCY", "ADMIN_DISABLE"
1532 };
1533 
1534 /*
1535  * void *restarter_process_events()
1536  *
1537  *   Called in a separate thread to process the events on an instance's
1538  *   queue.  Empties the queue completely, and tries to keep the thread
1539  *   around for a little while after the queue is empty to save on
1540  *   startup costs.
1541  */
1542 static void *
1543 restarter_process_events(void *arg)
1544 {
1545 	scf_handle_t *h;
1546 	restarter_instance_qentry_t *event;
1547 	restarter_inst_t *rip;
1548 	char *fmri = (char *)arg;
1549 	struct timespec to;
1550 
1551 	assert(fmri != NULL);
1552 
1553 	h = libscf_handle_create_bound_loop();
1554 
1555 	/* grab the queue lock */
1556 	rip = inst_lookup_queue(fmri);
1557 	if (rip == NULL)
1558 		goto out;
1559 
1560 again:
1561 
1562 	while ((event = uu_list_first(rip->ri_queue)) != NULL) {
1563 		restarter_inst_t *inst;
1564 
1565 		/* drop the queue lock */
1566 		MUTEX_UNLOCK(&rip->ri_queue_lock);
1567 
1568 		/*
1569 		 * Grab the inst lock -- this waits until any outstanding
1570 		 * method finishes running.
1571 		 */
1572 		inst = inst_lookup_by_name(fmri);
1573 		if (inst == NULL) {
1574 			/* Getting deleted in the middle isn't an error. */
1575 			goto cont;
1576 		}
1577 
1578 		assert(instance_in_transition(inst) == 0);
1579 
1580 		/* process the event */
1581 		switch (event->riq_type) {
1582 		case RESTARTER_EVENT_TYPE_ENABLE:
1583 		case RESTARTER_EVENT_TYPE_DISABLE:
1584 		case RESTARTER_EVENT_TYPE_ADMIN_DISABLE:
1585 			(void) enable_inst(h, inst, event->riq_type);
1586 			break;
1587 
1588 		case RESTARTER_EVENT_TYPE_REMOVE_INSTANCE:
1589 			restarter_delete_inst(inst);
1590 			inst = NULL;
1591 			goto cont;
1592 
1593 		case RESTARTER_EVENT_TYPE_STOP:
1594 			(void) stop_instance(h, inst, RSTOP_DEPENDENCY);
1595 			break;
1596 
1597 		case RESTARTER_EVENT_TYPE_START:
1598 			start_instance(h, inst);
1599 			break;
1600 
1601 		case RESTARTER_EVENT_TYPE_DEPENDENCY_CYCLE:
1602 			maintain_instance(h, inst, 0, "dependency_cycle");
1603 			break;
1604 
1605 		case RESTARTER_EVENT_TYPE_INVALID_DEPENDENCY:
1606 			maintain_instance(h, inst, 0, "invalid_dependency");
1607 			break;
1608 
1609 		case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON:
1610 			maintain_instance(h, inst, 0, "administrative_request");
1611 			break;
1612 
1613 		case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON_IMMEDIATE:
1614 			maintain_instance(h, inst, 1, "administrative_request");
1615 			break;
1616 
1617 		case RESTARTER_EVENT_TYPE_ADMIN_MAINT_OFF:
1618 			unmaintain_instance(h, inst, RUNMAINT_CLEAR);
1619 			break;
1620 
1621 		case RESTARTER_EVENT_TYPE_ADMIN_REFRESH:
1622 			refresh_instance(h, inst);
1623 			break;
1624 
1625 		case RESTARTER_EVENT_TYPE_ADMIN_DEGRADED:
1626 			log_framework(LOG_WARNING, "Restarter: "
1627 			    "%s command (for %s) unimplemented.\n",
1628 			    event_names[event->riq_type], inst->ri_i.i_fmri);
1629 			break;
1630 
1631 		case RESTARTER_EVENT_TYPE_ADMIN_RESTART:
1632 			if (!instance_started(inst)) {
1633 				log_framework(LOG_DEBUG, "Restarter: "
1634 				    "Not restarting %s; not running.\n",
1635 				    inst->ri_i.i_fmri);
1636 			} else {
1637 				/*
1638 				 * Stop the instance.  If it can be restarted,
1639 				 * the graph engine will send a new event.
1640 				 */
1641 				(void) stop_instance(h, inst, RSTOP_RESTART);
1642 			}
1643 			break;
1644 
1645 		case RESTARTER_EVENT_TYPE_ADD_INSTANCE:
1646 		default:
1647 #ifndef NDEBUG
1648 			uu_warn("%s:%d: Bad restarter event %d.  "
1649 			    "Aborting.\n", __FILE__, __LINE__, event->riq_type);
1650 #endif
1651 			abort();
1652 		}
1653 
1654 		assert(inst != NULL);
1655 		MUTEX_UNLOCK(&inst->ri_lock);
1656 
1657 cont:
1658 		/* grab the queue lock */
1659 		rip = inst_lookup_queue(fmri);
1660 		if (rip == NULL)
1661 			goto out;
1662 
1663 		/* delete the event */
1664 		uu_list_remove(rip->ri_queue, event);
1665 		startd_free(event, sizeof (restarter_instance_qentry_t));
1666 	}
1667 
1668 	assert(rip != NULL);
1669 
1670 	/*
1671 	 * Try to preserve the thread for a little while for future use.
1672 	 */
1673 	to.tv_sec = 3;
1674 	to.tv_nsec = 0;
1675 	(void) pthread_cond_reltimedwait_np(&rip->ri_queue_cv,
1676 	    &rip->ri_queue_lock, &to);
1677 
1678 	if (uu_list_first(rip->ri_queue) != NULL)
1679 		goto again;
1680 
1681 	rip->ri_queue_thread = 0;
1682 	MUTEX_UNLOCK(&rip->ri_queue_lock);
1683 out:
1684 	(void) scf_handle_unbind(h);
1685 	scf_handle_destroy(h);
1686 	free(fmri);
1687 	return (NULL);
1688 }
1689 
1690 static int
1691 is_admin_event(restarter_event_type_t t) {
1692 
1693 	switch (t) {
1694 	case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON:
1695 	case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON_IMMEDIATE:
1696 	case RESTARTER_EVENT_TYPE_ADMIN_MAINT_OFF:
1697 	case RESTARTER_EVENT_TYPE_ADMIN_REFRESH:
1698 	case RESTARTER_EVENT_TYPE_ADMIN_DEGRADED:
1699 	case RESTARTER_EVENT_TYPE_ADMIN_RESTART:
1700 		return (1);
1701 	default:
1702 		return (0);
1703 	}
1704 }
1705 
1706 static void
1707 restarter_queue_event(restarter_inst_t *ri, restarter_protocol_event_t *e)
1708 {
1709 	restarter_instance_qentry_t *qe;
1710 	int r;
1711 
1712 	assert(PTHREAD_MUTEX_HELD(&ri->ri_queue_lock));
1713 	assert(!PTHREAD_MUTEX_HELD(&ri->ri_lock));
1714 
1715 	qe = startd_zalloc(sizeof (restarter_instance_qentry_t));
1716 	qe->riq_type = e->rpe_type;
1717 
1718 	uu_list_node_init(qe, &qe->riq_link, restarter_queue_pool);
1719 	r = uu_list_insert_before(ri->ri_queue, NULL, qe);
1720 	assert(r == 0);
1721 }
1722 
1723 /*
1724  * void *restarter_event_thread()
1725  *
1726  *  Handle incoming graph events by placing them on a per-instance
1727  *  queue.  We can't lock the main part of the instance structure, so
1728  *  just modify the seprarately locked event queue portion.
1729  */
1730 /*ARGSUSED*/
1731 static void *
1732 restarter_event_thread(void *unused)
1733 {
1734 	scf_handle_t *h;
1735 
1736 	/*
1737 	 * This is a new thread, and thus, gets its own handle
1738 	 * to the repository.
1739 	 */
1740 	h = libscf_handle_create_bound_loop();
1741 
1742 	MUTEX_LOCK(&ru->restarter_update_lock);
1743 
1744 	/*CONSTCOND*/
1745 	while (1) {
1746 		restarter_protocol_event_t *e;
1747 
1748 		while (ru->restarter_update_wakeup == 0)
1749 			(void) pthread_cond_wait(&ru->restarter_update_cv,
1750 			    &ru->restarter_update_lock);
1751 
1752 		ru->restarter_update_wakeup = 0;
1753 
1754 		while ((e = restarter_event_dequeue()) != NULL) {
1755 			restarter_inst_t *rip;
1756 			char *fmri;
1757 
1758 			MUTEX_UNLOCK(&ru->restarter_update_lock);
1759 
1760 			/*
1761 			 * ADD_INSTANCE is special: there's likely no
1762 			 * instance structure yet, so we need to handle the
1763 			 * addition synchronously.
1764 			 */
1765 			switch (e->rpe_type) {
1766 			case RESTARTER_EVENT_TYPE_ADD_INSTANCE:
1767 				if (restarter_insert_inst(h, e->rpe_inst) != 0)
1768 					log_error(LOG_INFO, "Restarter: "
1769 					    "Could not add %s.\n", e->rpe_inst);
1770 
1771 				MUTEX_LOCK(&st->st_load_lock);
1772 				if (--st->st_load_instances == 0)
1773 					(void) pthread_cond_broadcast(
1774 					    &st->st_load_cv);
1775 				MUTEX_UNLOCK(&st->st_load_lock);
1776 
1777 				goto nolookup;
1778 			}
1779 
1780 			/*
1781 			 * Lookup the instance, locking only the event queue.
1782 			 * Can't grab ri_lock here because it might be held
1783 			 * by a long-running method.
1784 			 */
1785 			rip = inst_lookup_queue(e->rpe_inst);
1786 			if (rip == NULL) {
1787 				log_error(LOG_INFO, "Restarter: "
1788 				    "Ignoring %s command for unknown service "
1789 				    "%s.\n", event_names[e->rpe_type],
1790 				    e->rpe_inst);
1791 				goto nolookup;
1792 			}
1793 
1794 			/* Keep ADMIN events from filling up the queue. */
1795 			if (is_admin_event(e->rpe_type) &&
1796 			    uu_list_numnodes(rip->ri_queue) >
1797 			    RINST_QUEUE_THRESHOLD) {
1798 				MUTEX_UNLOCK(&rip->ri_queue_lock);
1799 				log_instance(rip, B_TRUE, "Instance event "
1800 				    "queue overflow.  Dropping administrative "
1801 				    "request.");
1802 				log_framework(LOG_DEBUG, "%s: Instance event "
1803 				    "queue overflow.  Dropping administrative "
1804 				    "request.\n", rip->ri_i.i_fmri);
1805 				goto nolookup;
1806 			}
1807 
1808 			/* Now add the event to the instance queue. */
1809 			restarter_queue_event(rip, e);
1810 
1811 			if (rip->ri_queue_thread == 0) {
1812 				/*
1813 				 * Start a thread if one isn't already
1814 				 * running.
1815 				 */
1816 				fmri = safe_strdup(e->rpe_inst);
1817 				rip->ri_queue_thread =  startd_thread_create(
1818 				    restarter_process_events, (void *)fmri);
1819 			} else {
1820 				/*
1821 				 * Signal the existing thread that there's
1822 				 * a new event.
1823 				 */
1824 				(void) pthread_cond_broadcast(
1825 				    &rip->ri_queue_cv);
1826 			}
1827 
1828 			MUTEX_UNLOCK(&rip->ri_queue_lock);
1829 nolookup:
1830 			restarter_event_release(e);
1831 
1832 			MUTEX_LOCK(&ru->restarter_update_lock);
1833 		}
1834 	}
1835 
1836 	/*
1837 	 * Unreachable for now -- there's currently no graceful cleanup
1838 	 * called on exit().
1839 	 */
1840 	(void) scf_handle_unbind(h);
1841 	scf_handle_destroy(h);
1842 	return (NULL);
1843 }
1844 
1845 static restarter_inst_t *
1846 contract_to_inst(ctid_t ctid)
1847 {
1848 	restarter_inst_t *inst;
1849 	int id;
1850 
1851 	id = lookup_inst_by_contract(ctid);
1852 	if (id == -1)
1853 		return (NULL);
1854 
1855 	inst = inst_lookup_by_id(id);
1856 	if (inst != NULL) {
1857 		/*
1858 		 * Since ri_lock isn't held by the contract id lookup, this
1859 		 * instance may have been restarted and now be in a new
1860 		 * contract, making the old contract no longer valid for this
1861 		 * instance.
1862 		 */
1863 		if (ctid != inst->ri_i.i_primary_ctid) {
1864 			MUTEX_UNLOCK(&inst->ri_lock);
1865 			inst = NULL;
1866 		}
1867 	}
1868 	return (inst);
1869 }
1870 
1871 /*
1872  * void contract_action()
1873  *   Take action on contract events.
1874  */
1875 static void
1876 contract_action(scf_handle_t *h, restarter_inst_t *inst, ctid_t id,
1877     uint32_t type)
1878 {
1879 	const char *fmri = inst->ri_i.i_fmri;
1880 
1881 	assert(PTHREAD_MUTEX_HELD(&inst->ri_lock));
1882 
1883 	/*
1884 	 * If startd has stopped this contract, there is no need to
1885 	 * stop it again.
1886 	 */
1887 	if (inst->ri_i.i_primary_ctid > 0 &&
1888 	    inst->ri_i.i_primary_ctid_stopped)
1889 		return;
1890 
1891 	if ((type & (CT_PR_EV_EMPTY | CT_PR_EV_CORE | CT_PR_EV_SIGNAL
1892 	    | CT_PR_EV_HWERR)) == 0) {
1893 		/*
1894 		 * There shouldn't be other events, since that's not how we set
1895 		 * the terms. Thus, just log an error and drive on.
1896 		 */
1897 		log_framework(LOG_NOTICE,
1898 		    "%s: contract %ld received unexpected critical event "
1899 		    "(%d)\n", fmri, id, type);
1900 		    return;
1901 	}
1902 
1903 	assert(instance_in_transition(inst) == 0);
1904 
1905 	if (instance_is_wait_style(inst)) {
1906 		/*
1907 		 * We ignore all events; if they impact the
1908 		 * process we're monitoring, then the
1909 		 * wait_thread will stop the instance.
1910 		 */
1911 		log_framework(LOG_DEBUG,
1912 		    "%s: ignoring contract event on wait-style service\n",
1913 		    fmri);
1914 	} else {
1915 		/*
1916 		 * A CT_PR_EV_EMPTY event is an RSTOP_EXIT request.
1917 		 */
1918 		switch (type) {
1919 		case CT_PR_EV_EMPTY:
1920 			(void) stop_instance(h, inst, RSTOP_EXIT);
1921 			break;
1922 		case CT_PR_EV_CORE:
1923 			(void) stop_instance(h, inst, RSTOP_CORE);
1924 			break;
1925 		case CT_PR_EV_SIGNAL:
1926 			(void) stop_instance(h, inst, RSTOP_SIGNAL);
1927 			break;
1928 		case CT_PR_EV_HWERR:
1929 			(void) stop_instance(h, inst, RSTOP_HWERR);
1930 			break;
1931 		}
1932 	}
1933 }
1934 
1935 /*
1936  * void *restarter_contract_event_thread(void *)
1937  *   Listens to the process contract bundle for critical events, taking action
1938  *   on events from contracts we know we are responsible for.
1939  */
1940 /*ARGSUSED*/
1941 static void *
1942 restarter_contracts_event_thread(void *unused)
1943 {
1944 	int fd, err;
1945 	scf_handle_t *local_handle;
1946 
1947 	/*
1948 	 * Await graph load completion.  That is, stop here, until we've scanned
1949 	 * the repository for contract - instance associations.
1950 	 */
1951 	MUTEX_LOCK(&st->st_load_lock);
1952 	while (!(st->st_load_complete && st->st_load_instances == 0))
1953 		(void) pthread_cond_wait(&st->st_load_cv, &st->st_load_lock);
1954 	MUTEX_UNLOCK(&st->st_load_lock);
1955 
1956 	/*
1957 	 * This is a new thread, and thus, gets its own handle
1958 	 * to the repository.
1959 	 */
1960 	if ((local_handle = libscf_handle_create_bound(SCF_VERSION)) == NULL)
1961 		uu_die("Unable to bind a new repository handle: %s\n",
1962 		    scf_strerror(scf_error()));
1963 
1964 	fd = open64(CTFS_ROOT "/process/pbundle", O_RDONLY);
1965 	if (fd == -1)
1966 		uu_die("process bundle open failed");
1967 
1968 	/*
1969 	 * Make sure we get all events (including those generated by configd
1970 	 * before this thread was started).
1971 	 */
1972 	err = ct_event_reset(fd);
1973 	assert(err == 0);
1974 
1975 	for (;;) {
1976 		int efd, sfd;
1977 		ct_evthdl_t ev;
1978 		uint32_t type;
1979 		ctevid_t evid;
1980 		ct_stathdl_t status;
1981 		ctid_t ctid;
1982 		restarter_inst_t *inst;
1983 		uint64_t cookie;
1984 
1985 		if (err = ct_event_read_critical(fd, &ev)) {
1986 			log_error(LOG_WARNING,
1987 			    "Error reading next contract event: %s",
1988 			    strerror(err));
1989 			continue;
1990 		}
1991 
1992 		evid = ct_event_get_evid(ev);
1993 		ctid = ct_event_get_ctid(ev);
1994 		type = ct_event_get_type(ev);
1995 
1996 		/* Fetch cookie. */
1997 		if ((sfd = contract_open(ctid, "process", "status", O_RDONLY))
1998 		    < 0) {
1999 			ct_event_free(ev);
2000 			continue;
2001 		}
2002 
2003 		if (err = ct_status_read(sfd, CTD_COMMON, &status)) {
2004 			log_framework(LOG_WARNING, "Could not get status for "
2005 			    "contract %ld: %s\n", ctid, strerror(err));
2006 
2007 			startd_close(sfd);
2008 			ct_event_free(ev);
2009 			continue;
2010 		}
2011 
2012 		cookie = ct_status_get_cookie(status);
2013 
2014 		ct_status_free(status);
2015 
2016 		startd_close(sfd);
2017 
2018 		/*
2019 		 * svc.configd(1M) restart handling performed by the
2020 		 * fork_configd_thread.  We don't acknowledge, as that thread
2021 		 * will do so.
2022 		 */
2023 		if (cookie == CONFIGD_COOKIE) {
2024 			ct_event_free(ev);
2025 			continue;
2026 		}
2027 
2028 		inst = contract_to_inst(ctid);
2029 		if (inst == NULL) {
2030 			/*
2031 			 * This can happen if we receive an EMPTY
2032 			 * event for an abandoned contract.
2033 			 */
2034 			log_framework(LOG_DEBUG,
2035 			    "Received event %d for unknown contract id "
2036 			    "%ld\n", type, ctid);
2037 		} else {
2038 			log_framework(LOG_DEBUG,
2039 			    "Received event %d for contract id "
2040 			    "%ld (%s)\n", type, ctid,
2041 			    inst->ri_i.i_fmri);
2042 
2043 			contract_action(local_handle, inst, ctid, type);
2044 
2045 			MUTEX_UNLOCK(&inst->ri_lock);
2046 		}
2047 
2048 		efd = contract_open(ct_event_get_ctid(ev), "process", "ctl",
2049 		    O_WRONLY);
2050 		if (efd != -1) {
2051 			(void) ct_ctl_ack(efd, evid);
2052 			startd_close(efd);
2053 		}
2054 
2055 		ct_event_free(ev);
2056 
2057 	}
2058 
2059 	/*NOTREACHED*/
2060 	return (NULL);
2061 }
2062 
2063 /*
2064  * Timeout queue, processed by restarter_timeouts_event_thread().
2065  */
2066 timeout_queue_t *timeouts;
2067 static uu_list_pool_t *timeout_pool;
2068 
2069 typedef struct timeout_update {
2070 	pthread_mutex_t		tu_lock;
2071 	pthread_cond_t		tu_cv;
2072 	int			tu_wakeup;
2073 } timeout_update_t;
2074 
2075 timeout_update_t *tu;
2076 
2077 static const char *timeout_ovr_svcs[] = {
2078 	"svc:/system/manifest-import:default",
2079 	"svc:/network/initial:default",
2080 	"svc:/network/service:default",
2081 	"svc:/system/rmtmpfiles:default",
2082 	"svc:/network/loopback:default",
2083 	"svc:/network/physical:default",
2084 	"svc:/system/device/local:default",
2085 	"svc:/system/metainit:default",
2086 	"svc:/system/filesystem/usr:default",
2087 	"svc:/system/filesystem/minimal:default",
2088 	"svc:/system/filesystem/local:default",
2089 	NULL
2090 };
2091 
2092 int
2093 is_timeout_ovr(restarter_inst_t *inst)
2094 {
2095 	int i;
2096 
2097 	for (i = 0; timeout_ovr_svcs[i] != NULL; ++i) {
2098 		if (strcmp(inst->ri_i.i_fmri, timeout_ovr_svcs[i]) == 0) {
2099 			log_instance(inst, B_TRUE, "Timeout override by "
2100 			    "svc.startd.  Using infinite timeout");
2101 			return (1);
2102 		}
2103 	}
2104 
2105 	return (0);
2106 }
2107 
2108 /*ARGSUSED*/
2109 static int
2110 timeout_compare(const void *lc_arg, const void *rc_arg, void *private)
2111 {
2112 	hrtime_t t1 = ((const timeout_entry_t *)lc_arg)->te_timeout;
2113 	hrtime_t t2 = ((const timeout_entry_t *)rc_arg)->te_timeout;
2114 
2115 	if (t1 > t2)
2116 		return (1);
2117 	else if (t1 < t2)
2118 		return (-1);
2119 	return (0);
2120 }
2121 
2122 void
2123 timeout_init()
2124 {
2125 	timeouts = startd_zalloc(sizeof (timeout_queue_t));
2126 
2127 	(void) pthread_mutex_init(&timeouts->tq_lock, &mutex_attrs);
2128 
2129 	timeout_pool = startd_list_pool_create("timeouts",
2130 	    sizeof (timeout_entry_t), offsetof(timeout_entry_t, te_link),
2131 	    timeout_compare, UU_LIST_POOL_DEBUG);
2132 	assert(timeout_pool != NULL);
2133 
2134 	timeouts->tq_list = startd_list_create(timeout_pool,
2135 	    timeouts, UU_LIST_SORTED);
2136 	assert(timeouts->tq_list != NULL);
2137 
2138 	tu = startd_zalloc(sizeof (timeout_update_t));
2139 	(void) pthread_cond_init(&tu->tu_cv, NULL);
2140 	(void) pthread_mutex_init(&tu->tu_lock, &mutex_attrs);
2141 }
2142 
2143 void
2144 timeout_insert(restarter_inst_t *inst, ctid_t cid, uint64_t timeout_sec)
2145 {
2146 	hrtime_t now, timeout;
2147 	timeout_entry_t *entry;
2148 	uu_list_index_t idx;
2149 
2150 	assert(PTHREAD_MUTEX_HELD(&inst->ri_lock));
2151 
2152 	now = gethrtime();
2153 
2154 	/*
2155 	 * If we overflow LLONG_MAX, we're never timing out anyways, so
2156 	 * just return.
2157 	 */
2158 	if (timeout_sec >= (LLONG_MAX - now) / 1000000000LL) {
2159 		log_instance(inst, B_TRUE, "timeout_seconds too large, "
2160 		    "treating as infinite.");
2161 		return;
2162 	}
2163 
2164 	/* hrtime is in nanoseconds. Convert timeout_sec. */
2165 	timeout = now + (timeout_sec * 1000000000LL);
2166 
2167 	entry = startd_alloc(sizeof (timeout_entry_t));
2168 	entry->te_timeout = timeout;
2169 	entry->te_ctid = cid;
2170 	entry->te_fmri = safe_strdup(inst->ri_i.i_fmri);
2171 	entry->te_logstem = safe_strdup(inst->ri_logstem);
2172 	entry->te_fired = 0;
2173 	/* Insert the calculated timeout time onto the queue. */
2174 	MUTEX_LOCK(&timeouts->tq_lock);
2175 	(void) uu_list_find(timeouts->tq_list, entry, NULL, &idx);
2176 	uu_list_node_init(entry, &entry->te_link, timeout_pool);
2177 	uu_list_insert(timeouts->tq_list, entry, idx);
2178 	MUTEX_UNLOCK(&timeouts->tq_lock);
2179 
2180 	assert(inst->ri_timeout == NULL);
2181 	inst->ri_timeout = entry;
2182 
2183 	MUTEX_LOCK(&tu->tu_lock);
2184 	tu->tu_wakeup = 1;
2185 	(void) pthread_cond_broadcast(&tu->tu_cv);
2186 	MUTEX_UNLOCK(&tu->tu_lock);
2187 }
2188 
2189 
2190 void
2191 timeout_remove(restarter_inst_t *inst, ctid_t cid)
2192 {
2193 	assert(PTHREAD_MUTEX_HELD(&inst->ri_lock));
2194 
2195 	if (inst->ri_timeout == NULL)
2196 		return;
2197 
2198 	assert(inst->ri_timeout->te_ctid == cid);
2199 
2200 	MUTEX_LOCK(&timeouts->tq_lock);
2201 	uu_list_remove(timeouts->tq_list, inst->ri_timeout);
2202 	MUTEX_UNLOCK(&timeouts->tq_lock);
2203 
2204 	free(inst->ri_timeout->te_fmri);
2205 	free(inst->ri_timeout->te_logstem);
2206 	startd_free(inst->ri_timeout, sizeof (timeout_entry_t));
2207 	inst->ri_timeout = NULL;
2208 }
2209 
2210 static int
2211 timeout_now()
2212 {
2213 	timeout_entry_t *e;
2214 	hrtime_t now;
2215 	int ret;
2216 
2217 	now = gethrtime();
2218 
2219 	/*
2220 	 * Walk through the (sorted) timeouts list.  While the timeout
2221 	 * at the head of the list is <= the current time, kill the
2222 	 * method.
2223 	 */
2224 	MUTEX_LOCK(&timeouts->tq_lock);
2225 
2226 	for (e = uu_list_first(timeouts->tq_list);
2227 	    e != NULL && e->te_timeout <= now;
2228 	    e = uu_list_next(timeouts->tq_list, e)) {
2229 		log_framework(LOG_WARNING, "%s: Method or service exit timed "
2230 		    "out.  Killing contract %ld.\n", e->te_fmri, e->te_ctid);
2231 		log_instance_fmri(e->te_fmri, e->te_logstem, B_TRUE,
2232 		    "Method or service exit timed out.  Killing contract %ld",
2233 		    e->te_ctid);
2234 		e->te_fired = 1;
2235 		(void) contract_kill(e->te_ctid, SIGKILL, e->te_fmri);
2236 	}
2237 
2238 	if (uu_list_numnodes(timeouts->tq_list) > 0)
2239 		ret = 0;
2240 	else
2241 		ret = -1;
2242 
2243 	MUTEX_UNLOCK(&timeouts->tq_lock);
2244 
2245 	return (ret);
2246 }
2247 
2248 /*
2249  * void *restarter_timeouts_event_thread(void *)
2250  *   Responsible for monitoring the method timeouts.  This thread must
2251  *   be started before any methods are called.
2252  */
2253 /*ARGSUSED*/
2254 static void *
2255 restarter_timeouts_event_thread(void *unused)
2256 {
2257 	/*
2258 	 * Timeouts are entered on a priority queue, which is processed by
2259 	 * this thread.  As timeouts are specified in seconds, we'll do
2260 	 * the necessary processing every second, as long as the queue
2261 	 * is not empty.
2262 	 */
2263 
2264 	/*CONSTCOND*/
2265 	while (1) {
2266 		/*
2267 		 * As long as the timeout list isn't empty, process it
2268 		 * every second.
2269 		 */
2270 		if (timeout_now() == 0) {
2271 			(void) sleep(1);
2272 			continue;
2273 		}
2274 
2275 		/* The list is empty, wait until we have more timeouts. */
2276 		MUTEX_LOCK(&tu->tu_lock);
2277 
2278 		while (tu->tu_wakeup == 0)
2279 			(void) pthread_cond_wait(&tu->tu_cv, &tu->tu_lock);
2280 
2281 		tu->tu_wakeup = 0;
2282 		MUTEX_UNLOCK(&tu->tu_lock);
2283 	}
2284 
2285 	return (NULL);
2286 }
2287 
2288 void
2289 restarter_start()
2290 {
2291 	(void) startd_thread_create(restarter_timeouts_event_thread, NULL);
2292 	(void) startd_thread_create(restarter_event_thread, NULL);
2293 	(void) startd_thread_create(restarter_contracts_event_thread, NULL);
2294 	(void) startd_thread_create(wait_thread, NULL);
2295 }
2296 
2297 
2298 void
2299 restarter_init()
2300 {
2301 	restarter_instance_pool = startd_list_pool_create("restarter_instances",
2302 	    sizeof (restarter_inst_t), offsetof(restarter_inst_t,
2303 		ri_link), restarter_instance_compare, UU_LIST_POOL_DEBUG);
2304 	(void) memset(&instance_list, 0, sizeof (instance_list));
2305 
2306 	(void) pthread_mutex_init(&instance_list.ril_lock, &mutex_attrs);
2307 	instance_list.ril_instance_list = startd_list_create(
2308 	    restarter_instance_pool, &instance_list, UU_LIST_SORTED);
2309 
2310 	restarter_queue_pool = startd_list_pool_create(
2311 	    "restarter_instance_queue", sizeof (restarter_instance_qentry_t),
2312 	    offsetof(restarter_instance_qentry_t,  riq_link), NULL,
2313 	    UU_LIST_POOL_DEBUG);
2314 
2315 	contract_list_pool = startd_list_pool_create(
2316 	    "contract_list", sizeof (contract_entry_t),
2317 	    offsetof(contract_entry_t,  ce_link), NULL,
2318 	    UU_LIST_POOL_DEBUG);
2319 	contract_hash_init();
2320 
2321 	log_framework(LOG_DEBUG, "Initialized restarter\n");
2322 }
2323