xref: /titanic_52/usr/src/cmd/svc/startd/restarter.c (revision 9df12a23948bd40cbe37ce88d84e272c3894e675)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * restarter.c - service manipulation
30  *
31  * This component manages services whose restarter is svc.startd, the standard
32  * restarter.  It translates restarter protocol events from the graph engine
33  * into actions on processes, as a delegated restarter would do.
34  *
35  * The master restarter manages a number of always-running threads:
36  *   - restarter event thread: events from the graph engine
37  *   - timeout thread: thread to fire queued timeouts
38  *   - contract thread: thread to handle contract events
39  *   - wait thread: thread to handle wait-based services
40  *
41  * The other threads are created as-needed:
42  *   - per-instance method threads
43  *   - per-instance event processing threads
44  *
45  * The interaction of all threads must result in the following conditions
46  * being satisfied (on a per-instance basis):
47  *   - restarter events must be processed in order
48  *   - method execution must be serialized
49  *   - instance delete must be held until outstanding methods are complete
50  *   - contract events shouldn't be processed while a method is running
51  *   - timeouts should fire even when a method is running
52  *
53  * Service instances are represented by restarter_inst_t's and are kept in the
54  * instance_list list.
55  *
56  * Service States
57  *   The current state of a service instance is kept in
58  *   restarter_inst_t->ri_i.i_state.  If transition to a new state could take
59  *   some time, then before we effect the transition we set
60  *   restarter_inst_t->ri_i.i_next_state to the target state, and afterwards we
61  *   rotate i_next_state to i_state and set i_next_state to
62  *   RESTARTER_STATE_NONE.  So usually i_next_state is _NONE when ri_lock is not
63  *   held.  The exception is when we launch methods, which are done with
64  *   a separate thread.  To keep any other threads from grabbing ri_lock before
65  *   method_thread() does, we set ri_method_thread to the thread id of the
66  *   method thread, and when it is nonzero any thread with a different thread id
67  *   waits on ri_method_cv.
68  *
69  * Method execution is serialized by blocking on ri_method_cv in
70  * inst_lookup_by_id() and waiting for a 0 value of ri_method_thread.  This
71  * also prevents the instance structure from being deleted until all
72  * outstanding operations such as method_thread() have finished.
73  *
74  * Lock ordering:
75  *
76  * dgraph_lock [can be held when taking:]
77  *   utmpx_lock
78  *   dictionary->dict_lock
79  *   st->st_load_lock
80  *   wait_info_lock
81  *   ru->restarter_update_lock
82  *     restarter_queue->rpeq_lock
83  *   instance_list.ril_lock
84  *     inst->ri_lock
85  *   st->st_configd_live_lock
86  *
87  * instance_list.ril_lock
88  *   graph_queue->gpeq_lock
89  *   gu->gu_lock
90  *   st->st_configd_live_lock
91  *   dictionary->dict_lock
92  *   inst->ri_lock
93  *     graph_queue->gpeq_lock
94  *     gu->gu_lock
95  *     tu->tu_lock
96  *     tq->tq_lock
97  *     inst->ri_queue_lock
98  *       wait_info_lock
99  *       bp->cb_lock
100  *     utmpx_lock
101  *
102  * single_user_thread_lock
103  *   wait_info_lock
104  *   utmpx_lock
105  *
106  * gu_freeze_lock
107  *
108  * logbuf_mutex nests inside pretty much everything.
109  */
110 
111 #include <sys/contract/process.h>
112 #include <sys/ctfs.h>
113 #include <sys/stat.h>
114 #include <sys/time.h>
115 #include <sys/types.h>
116 #include <sys/uio.h>
117 #include <sys/wait.h>
118 #include <assert.h>
119 #include <errno.h>
120 #include <fcntl.h>
121 #include <libcontract.h>
122 #include <libcontract_priv.h>
123 #include <libintl.h>
124 #include <librestart.h>
125 #include <librestart_priv.h>
126 #include <libuutil.h>
127 #include <limits.h>
128 #include <poll.h>
129 #include <port.h>
130 #include <pthread.h>
131 #include <stdarg.h>
132 #include <stdio.h>
133 #include <strings.h>
134 #include <unistd.h>
135 
136 #include "startd.h"
137 #include "protocol.h"
138 
139 static uu_list_pool_t *restarter_instance_pool;
140 static restarter_instance_list_t instance_list;
141 
142 static uu_list_pool_t *restarter_queue_pool;
143 
144 /*ARGSUSED*/
145 static int
146 restarter_instance_compare(const void *lc_arg, const void *rc_arg,
147     void *private)
148 {
149 	int lc_id = ((const restarter_inst_t *)lc_arg)->ri_id;
150 	int rc_id = *(int *)rc_arg;
151 
152 	if (lc_id > rc_id)
153 		return (1);
154 	if (lc_id < rc_id)
155 		return (-1);
156 	return (0);
157 }
158 
159 static restarter_inst_t *
160 inst_lookup_by_name(const char *name)
161 {
162 	int id;
163 
164 	id = dict_lookup_byname(name);
165 	if (id == -1)
166 		return (NULL);
167 
168 	return (inst_lookup_by_id(id));
169 }
170 
171 restarter_inst_t *
172 inst_lookup_by_id(int id)
173 {
174 	restarter_inst_t *inst;
175 
176 	MUTEX_LOCK(&instance_list.ril_lock);
177 	inst = uu_list_find(instance_list.ril_instance_list, &id, NULL, NULL);
178 	if (inst != NULL)
179 		MUTEX_LOCK(&inst->ri_lock);
180 	MUTEX_UNLOCK(&instance_list.ril_lock);
181 
182 	if (inst != NULL) {
183 		while (inst->ri_method_thread != 0 &&
184 		    !pthread_equal(inst->ri_method_thread, pthread_self())) {
185 			++inst->ri_method_waiters;
186 			(void) pthread_cond_wait(&inst->ri_method_cv,
187 			    &inst->ri_lock);
188 			assert(inst->ri_method_waiters > 0);
189 			--inst->ri_method_waiters;
190 		}
191 	}
192 
193 	return (inst);
194 }
195 
196 static restarter_inst_t *
197 inst_lookup_queue(const char *name)
198 {
199 	int id;
200 	restarter_inst_t *inst;
201 
202 	id = dict_lookup_byname(name);
203 	if (id == -1)
204 		return (NULL);
205 
206 	MUTEX_LOCK(&instance_list.ril_lock);
207 	inst = uu_list_find(instance_list.ril_instance_list, &id, NULL, NULL);
208 	if (inst != NULL)
209 		MUTEX_LOCK(&inst->ri_queue_lock);
210 	MUTEX_UNLOCK(&instance_list.ril_lock);
211 
212 	return (inst);
213 }
214 
215 const char *
216 service_style(int flags)
217 {
218 	switch (flags & RINST_STYLE_MASK) {
219 	case RINST_CONTRACT:	return ("contract");
220 	case RINST_TRANSIENT:	return ("transient");
221 	case RINST_WAIT:	return ("wait");
222 
223 	default:
224 #ifndef NDEBUG
225 		uu_warn("%s:%d: Bad flags 0x%x.\n", __FILE__, __LINE__, flags);
226 #endif
227 		abort();
228 		/* NOTREACHED */
229 	}
230 }
231 
232 /*
233  * Fails with ECONNABORTED or ECANCELED.
234  */
235 static int
236 check_contract(restarter_inst_t *inst, boolean_t primary,
237     scf_instance_t *scf_inst)
238 {
239 	ctid_t *ctidp;
240 	int fd, r;
241 
242 	ctidp = primary ? &inst->ri_i.i_primary_ctid :
243 	    &inst->ri_i.i_transient_ctid;
244 
245 	assert(*ctidp >= 1);
246 
247 	fd = contract_open(*ctidp, NULL, "status", O_RDONLY);
248 	if (fd >= 0) {
249 		r = close(fd);
250 		assert(r == 0);
251 		return (0);
252 	}
253 
254 	r = restarter_remove_contract(scf_inst, *ctidp, primary ?
255 	    RESTARTER_CONTRACT_PRIMARY : RESTARTER_CONTRACT_TRANSIENT);
256 	switch (r) {
257 	case 0:
258 	case ECONNABORTED:
259 	case ECANCELED:
260 		*ctidp = 0;
261 		return (r);
262 
263 	case ENOMEM:
264 		uu_die("Out of memory\n");
265 		/* NOTREACHED */
266 
267 	case EPERM:
268 		uu_die("Insufficient privilege.\n");
269 		/* NOTREACHED */
270 
271 	case EACCES:
272 		uu_die("Repository backend access denied.\n");
273 		/* NOTREACHED */
274 
275 	case EROFS:
276 		log_error(LOG_INFO, "Could not remove unusable contract id %ld "
277 		    "for %s from repository.\n", *ctidp, inst->ri_i.i_fmri);
278 		return (0);
279 
280 	case EINVAL:
281 	case EBADF:
282 	default:
283 		assert(0);
284 		abort();
285 		/* NOTREACHED */
286 	}
287 }
288 
289 static int stop_instance(scf_handle_t *, restarter_inst_t *, stop_cause_t);
290 
291 /*
292  * int restarter_insert_inst(scf_handle_t *, char *)
293  *   If the inst is already in the restarter list, return its id.  If the inst
294  *   is not in the restarter list, initialize a restarter_inst_t, initialize its
295  *   states, insert it into the list, and return 0.
296  *
297  *   Fails with
298  *     ENOENT - name is not in the repository
299  */
300 static int
301 restarter_insert_inst(scf_handle_t *h, const char *name)
302 {
303 	int id, r;
304 	restarter_inst_t *inst;
305 	uu_list_index_t idx;
306 	scf_service_t *scf_svc;
307 	scf_instance_t *scf_inst;
308 	scf_snapshot_t *snap = NULL;
309 	scf_propertygroup_t *pg;
310 	char *svc_name, *inst_name;
311 	char logfilebuf[PATH_MAX];
312 	char *c;
313 	boolean_t do_commit_states;
314 	restarter_instance_state_t state, next_state;
315 	protocol_states_t *ps;
316 	pid_t start_pid;
317 
318 	MUTEX_LOCK(&instance_list.ril_lock);
319 
320 	/*
321 	 * We don't use inst_lookup_by_name() here because we want the lookup
322 	 * & insert to be atomic.
323 	 */
324 	id = dict_lookup_byname(name);
325 	if (id != -1) {
326 		inst = uu_list_find(instance_list.ril_instance_list, &id, NULL,
327 		    &idx);
328 		if (inst != NULL) {
329 			MUTEX_UNLOCK(&instance_list.ril_lock);
330 			return (0);
331 		}
332 	}
333 
334 	/* Allocate an instance */
335 	inst = startd_zalloc(sizeof (restarter_inst_t));
336 	inst->ri_utmpx_prefix = startd_alloc(max_scf_value_size);
337 	inst->ri_utmpx_prefix[0] = '\0';
338 
339 	inst->ri_i.i_fmri = startd_alloc(strlen(name) + 1);
340 	(void) strcpy((char *)inst->ri_i.i_fmri, name);
341 
342 	inst->ri_queue = startd_list_create(restarter_queue_pool, inst, 0);
343 
344 	/*
345 	 * id shouldn't be -1 since we use the same dictionary as graph.c, but
346 	 * just in case.
347 	 */
348 	inst->ri_id = (id != -1 ? id : dict_insert(name));
349 
350 	special_online_hooks_get(name, &inst->ri_pre_online_hook,
351 	    &inst->ri_post_online_hook, &inst->ri_post_offline_hook);
352 
353 	scf_svc = safe_scf_service_create(h);
354 	scf_inst = safe_scf_instance_create(h);
355 	pg = safe_scf_pg_create(h);
356 	svc_name = startd_alloc(max_scf_name_size);
357 	inst_name = startd_alloc(max_scf_name_size);
358 
359 rep_retry:
360 	if (snap != NULL)
361 		scf_snapshot_destroy(snap);
362 	if (inst->ri_logstem != NULL)
363 		startd_free(inst->ri_logstem, PATH_MAX);
364 	if (inst->ri_common_name != NULL)
365 		startd_free(inst->ri_common_name, max_scf_value_size);
366 	if (inst->ri_C_common_name != NULL)
367 		startd_free(inst->ri_C_common_name, max_scf_value_size);
368 	snap = NULL;
369 	inst->ri_logstem = NULL;
370 	inst->ri_common_name = NULL;
371 	inst->ri_C_common_name = NULL;
372 
373 	if (scf_handle_decode_fmri(h, name, NULL, scf_svc, scf_inst, NULL,
374 	    NULL, SCF_DECODE_FMRI_EXACT) != 0) {
375 		switch (scf_error()) {
376 		case SCF_ERROR_CONNECTION_BROKEN:
377 			libscf_handle_rebind(h);
378 			goto rep_retry;
379 
380 		case SCF_ERROR_NOT_FOUND:
381 			goto deleted;
382 		}
383 
384 		uu_die("Can't decode FMRI %s: %s\n", name,
385 		    scf_strerror(scf_error()));
386 	}
387 
388 	/*
389 	 * If there's no running snapshot, then we execute using the editing
390 	 * snapshot.  Pending snapshots will be taken later.
391 	 */
392 	snap = libscf_get_running_snapshot(scf_inst);
393 
394 	if ((scf_service_get_name(scf_svc, svc_name, max_scf_name_size) < 0) ||
395 	    (scf_instance_get_name(scf_inst, inst_name, max_scf_name_size) <
396 	    0)) {
397 		switch (scf_error()) {
398 		case SCF_ERROR_NOT_SET:
399 			break;
400 
401 		case SCF_ERROR_CONNECTION_BROKEN:
402 			libscf_handle_rebind(h);
403 			goto rep_retry;
404 
405 		default:
406 			assert(0);
407 			abort();
408 		}
409 
410 		goto deleted;
411 	}
412 
413 	(void) snprintf(logfilebuf, PATH_MAX, "%s:%s", svc_name, inst_name);
414 	for (c = logfilebuf; *c != '\0'; c++)
415 		if (*c == '/')
416 			*c = '-';
417 
418 	inst->ri_logstem = startd_alloc(PATH_MAX);
419 	(void) snprintf(inst->ri_logstem, PATH_MAX, "%s%s", logfilebuf,
420 	    LOG_SUFFIX);
421 
422 	/*
423 	 * If the restarter group is missing, use uninit/none.  Otherwise,
424 	 * we're probably being restarted & don't want to mess up the states
425 	 * that are there.
426 	 */
427 	state = RESTARTER_STATE_UNINIT;
428 	next_state = RESTARTER_STATE_NONE;
429 
430 	r = scf_instance_get_pg(scf_inst, SCF_PG_RESTARTER, pg);
431 	if (r != 0) {
432 		switch (scf_error()) {
433 		case SCF_ERROR_CONNECTION_BROKEN:
434 			libscf_handle_rebind(h);
435 			goto rep_retry;
436 
437 		case SCF_ERROR_NOT_SET:
438 			goto deleted;
439 
440 		case SCF_ERROR_NOT_FOUND:
441 			/*
442 			 * This shouldn't happen since the graph engine should
443 			 * have initialized the state to uninitialized/none if
444 			 * there was no restarter pg.  In case somebody
445 			 * deleted it, though....
446 			 */
447 			do_commit_states = B_TRUE;
448 			break;
449 
450 		default:
451 			assert(0);
452 			abort();
453 		}
454 	} else {
455 		r = libscf_read_states(pg, &state, &next_state);
456 		if (r != 0) {
457 			do_commit_states = B_TRUE;
458 		} else {
459 			if (next_state != RESTARTER_STATE_NONE) {
460 				/*
461 				 * Force next_state to _NONE since we
462 				 * don't look for method processes.
463 				 */
464 				next_state = RESTARTER_STATE_NONE;
465 				do_commit_states = B_TRUE;
466 			} else {
467 				/*
468 				 * Inform the restarter of our state without
469 				 * changing the STIME in the repository.
470 				 */
471 				ps = startd_alloc(sizeof (*ps));
472 				inst->ri_i.i_state = ps->ps_state = state;
473 				inst->ri_i.i_next_state = ps->ps_state_next =
474 				    next_state;
475 
476 				graph_protocol_send_event(inst->ri_i.i_fmri,
477 				    GRAPH_UPDATE_STATE_CHANGE, ps);
478 
479 				do_commit_states = B_FALSE;
480 			}
481 		}
482 	}
483 
484 	switch (libscf_get_startd_properties(scf_inst, snap, &inst->ri_flags,
485 	    &inst->ri_utmpx_prefix)) {
486 	case 0:
487 		break;
488 
489 	case ECONNABORTED:
490 		libscf_handle_rebind(h);
491 		goto rep_retry;
492 
493 	case ECANCELED:
494 		goto deleted;
495 
496 	case ENOENT:
497 		/*
498 		 * This is odd, because the graph engine should have required
499 		 * the general property group.  So we'll just use default
500 		 * flags in anticipation of the graph engine sending us
501 		 * REMOVE_INSTANCE when it finds out that the general property
502 		 * group has been deleted.
503 		 */
504 		inst->ri_flags = RINST_CONTRACT;
505 		break;
506 
507 	default:
508 		assert(0);
509 		abort();
510 	}
511 
512 	switch (libscf_get_template_values(scf_inst, snap,
513 	    &inst->ri_common_name, &inst->ri_C_common_name)) {
514 	case 0:
515 		break;
516 
517 	case ECONNABORTED:
518 		libscf_handle_rebind(h);
519 		goto rep_retry;
520 
521 	case ECANCELED:
522 		goto deleted;
523 
524 	case ECHILD:
525 	case ENOENT:
526 		break;
527 
528 	default:
529 		assert(0);
530 		abort();
531 	}
532 
533 	switch (libscf_read_method_ids(h, scf_inst, inst->ri_i.i_fmri,
534 	    &inst->ri_i.i_primary_ctid, &inst->ri_i.i_transient_ctid,
535 	    &start_pid)) {
536 	case 0:
537 		break;
538 
539 	case ECONNABORTED:
540 		libscf_handle_rebind(h);
541 		goto rep_retry;
542 
543 	case ECANCELED:
544 		goto deleted;
545 
546 	default:
547 		assert(0);
548 		abort();
549 	}
550 
551 	if (inst->ri_i.i_primary_ctid >= 1) {
552 		contract_hash_store(inst->ri_i.i_primary_ctid, inst->ri_id);
553 
554 		switch (check_contract(inst, B_TRUE, scf_inst)) {
555 		case 0:
556 			break;
557 
558 		case ECONNABORTED:
559 			libscf_handle_rebind(h);
560 			goto rep_retry;
561 
562 		case ECANCELED:
563 			goto deleted;
564 
565 		default:
566 			assert(0);
567 			abort();
568 		}
569 	}
570 
571 	if (inst->ri_i.i_transient_ctid >= 1) {
572 		switch (check_contract(inst, B_FALSE, scf_inst)) {
573 		case 0:
574 			break;
575 
576 		case ECONNABORTED:
577 			libscf_handle_rebind(h);
578 			goto rep_retry;
579 
580 		case ECANCELED:
581 			goto deleted;
582 
583 		default:
584 			assert(0);
585 			abort();
586 		}
587 	}
588 
589 	/* No more failures we live through, so add it to the list. */
590 	(void) pthread_mutex_init(&inst->ri_lock, &mutex_attrs);
591 	(void) pthread_mutex_init(&inst->ri_queue_lock, &mutex_attrs);
592 	MUTEX_LOCK(&inst->ri_lock);
593 	MUTEX_LOCK(&inst->ri_queue_lock);
594 
595 	(void) pthread_cond_init(&inst->ri_method_cv, NULL);
596 
597 	uu_list_node_init(inst, &inst->ri_link, restarter_instance_pool);
598 	uu_list_insert(instance_list.ril_instance_list, inst, idx);
599 	MUTEX_UNLOCK(&instance_list.ril_lock);
600 
601 	if (start_pid != -1 &&
602 	    (inst->ri_flags & RINST_STYLE_MASK) == RINST_WAIT) {
603 		int ret;
604 		ret = wait_register(start_pid, inst->ri_i.i_fmri, 0, 1);
605 		if (ret == -1) {
606 			/*
607 			 * Implication:  if we can't reregister the
608 			 * instance, we will start another one.  Two
609 			 * instances may or may not result in a resource
610 			 * conflict.
611 			 */
612 			log_error(LOG_WARNING,
613 			    "%s: couldn't reregister %ld for wait\n",
614 			    inst->ri_i.i_fmri, start_pid);
615 		} else if (ret == 1) {
616 			/*
617 			 * Leading PID has exited.
618 			 */
619 			(void) stop_instance(h, inst, RSTOP_EXIT);
620 		}
621 	}
622 
623 
624 	scf_pg_destroy(pg);
625 
626 	if (do_commit_states)
627 		(void) restarter_instance_update_states(h, inst, state,
628 		    next_state, RERR_NONE, NULL);
629 
630 	log_framework(LOG_DEBUG, "%s is a %s-style service\n", name,
631 	    service_style(inst->ri_flags));
632 
633 	MUTEX_UNLOCK(&inst->ri_queue_lock);
634 	MUTEX_UNLOCK(&inst->ri_lock);
635 
636 	startd_free(svc_name, max_scf_name_size);
637 	startd_free(inst_name, max_scf_name_size);
638 	scf_snapshot_destroy(snap);
639 	scf_instance_destroy(scf_inst);
640 	scf_service_destroy(scf_svc);
641 
642 	log_framework(LOG_DEBUG, "%s: inserted instance into restarter list\n",
643 	    name);
644 
645 	return (0);
646 
647 deleted:
648 	MUTEX_UNLOCK(&instance_list.ril_lock);
649 	startd_free(inst_name, max_scf_name_size);
650 	startd_free(svc_name, max_scf_name_size);
651 	if (snap != NULL)
652 		scf_snapshot_destroy(snap);
653 	scf_pg_destroy(pg);
654 	scf_instance_destroy(scf_inst);
655 	scf_service_destroy(scf_svc);
656 	startd_free((void *)inst->ri_i.i_fmri, strlen(inst->ri_i.i_fmri) + 1);
657 	uu_list_destroy(inst->ri_queue);
658 	if (inst->ri_logstem != NULL)
659 		startd_free(inst->ri_logstem, PATH_MAX);
660 	if (inst->ri_common_name != NULL)
661 		startd_free(inst->ri_common_name, max_scf_value_size);
662 	if (inst->ri_C_common_name != NULL)
663 		startd_free(inst->ri_C_common_name, max_scf_value_size);
664 	startd_free(inst->ri_utmpx_prefix, max_scf_value_size);
665 	startd_free(inst, sizeof (restarter_inst_t));
666 	return (ENOENT);
667 }
668 
669 static void
670 restarter_delete_inst(restarter_inst_t *ri)
671 {
672 	int id;
673 	restarter_inst_t *rip;
674 	void *cookie = NULL;
675 	restarter_instance_qentry_t *e;
676 
677 	assert(PTHREAD_MUTEX_HELD(&ri->ri_lock));
678 
679 	/*
680 	 * Must drop the instance lock so we can pick up the instance_list
681 	 * lock & remove the instance.
682 	 */
683 	id = ri->ri_id;
684 	MUTEX_UNLOCK(&ri->ri_lock);
685 
686 	MUTEX_LOCK(&instance_list.ril_lock);
687 
688 	rip = uu_list_find(instance_list.ril_instance_list, &id, NULL, NULL);
689 	if (rip == NULL) {
690 		MUTEX_UNLOCK(&instance_list.ril_lock);
691 		return;
692 	}
693 
694 	assert(ri == rip);
695 
696 	uu_list_remove(instance_list.ril_instance_list, ri);
697 
698 	log_framework(LOG_DEBUG, "%s: deleted instance from restarter list\n",
699 	    ri->ri_i.i_fmri);
700 
701 	MUTEX_UNLOCK(&instance_list.ril_lock);
702 
703 	/*
704 	 * We can lock the instance without holding the instance_list lock
705 	 * since we removed the instance from the list.
706 	 */
707 	MUTEX_LOCK(&ri->ri_lock);
708 	MUTEX_LOCK(&ri->ri_queue_lock);
709 
710 	if (ri->ri_i.i_primary_ctid >= 1)
711 		contract_hash_remove(ri->ri_i.i_primary_ctid);
712 
713 	while (ri->ri_method_thread != 0 || ri->ri_method_waiters > 0)
714 		(void) pthread_cond_wait(&ri->ri_method_cv, &ri->ri_lock);
715 
716 	while ((e = uu_list_teardown(ri->ri_queue, &cookie)) != NULL)
717 		startd_free(e, sizeof (*e));
718 	uu_list_destroy(ri->ri_queue);
719 
720 	startd_free((void *)ri->ri_i.i_fmri, strlen(ri->ri_i.i_fmri) + 1);
721 	startd_free(ri->ri_logstem, PATH_MAX);
722 	startd_free(ri->ri_utmpx_prefix, max_scf_value_size);
723 	(void) pthread_mutex_destroy(&ri->ri_lock);
724 	(void) pthread_mutex_destroy(&ri->ri_queue_lock);
725 	startd_free(ri, sizeof (restarter_inst_t));
726 }
727 
728 /*
729  * instance_is_wait_style()
730  *
731  *   Returns 1 if the given instance is a "wait-style" service instance.
732  */
733 int
734 instance_is_wait_style(restarter_inst_t *inst)
735 {
736 	assert(PTHREAD_MUTEX_HELD(&inst->ri_lock));
737 	return ((inst->ri_flags & RINST_STYLE_MASK) == RINST_WAIT);
738 }
739 
740 /*
741  * instance_is_transient_style()
742  *
743  *   Returns 1 if the given instance is a transient service instance.
744  */
745 int
746 instance_is_transient_style(restarter_inst_t *inst)
747 {
748 	assert(PTHREAD_MUTEX_HELD(&inst->ri_lock));
749 	return ((inst->ri_flags & RINST_STYLE_MASK) == RINST_TRANSIENT);
750 }
751 
752 /*
753  * instance_in_transition()
754  * Returns 1 if instance is in transition, 0 if not
755  */
756 int
757 instance_in_transition(restarter_inst_t *inst)
758 {
759 	assert(PTHREAD_MUTEX_HELD(&inst->ri_lock));
760 	if (inst->ri_i.i_next_state == RESTARTER_STATE_NONE)
761 		return (0);
762 	return (1);
763 }
764 
765 /*
766  * returns 1 if instance is already started, 0 if not
767  */
768 static int
769 instance_started(restarter_inst_t *inst)
770 {
771 	int ret;
772 
773 	assert(PTHREAD_MUTEX_HELD(&inst->ri_lock));
774 
775 	if (inst->ri_i.i_state == RESTARTER_STATE_ONLINE ||
776 	    inst->ri_i.i_state == RESTARTER_STATE_DEGRADED)
777 		ret = 1;
778 	else
779 		ret = 0;
780 
781 	return (ret);
782 }
783 
784 /*
785  * Returns
786  *   0 - success
787  *   ECONNRESET - success, but h was rebound
788  */
789 int
790 restarter_instance_update_states(scf_handle_t *h, restarter_inst_t *ri,
791     restarter_instance_state_t new_state,
792     restarter_instance_state_t new_state_next, restarter_error_t err, char *aux)
793 {
794 	protocol_states_t *states;
795 	int e;
796 	uint_t retry_count = 0, msecs = ALLOC_DELAY;
797 	boolean_t rebound = B_FALSE;
798 	int prev_state_online;
799 	int state_online;
800 
801 	assert(PTHREAD_MUTEX_HELD(&ri->ri_lock));
802 
803 	prev_state_online = instance_started(ri);
804 
805 retry:
806 	e = _restarter_commit_states(h, &ri->ri_i, new_state, new_state_next,
807 	    aux);
808 	switch (e) {
809 	case 0:
810 		break;
811 
812 	case ENOMEM:
813 		++retry_count;
814 		if (retry_count < ALLOC_RETRY) {
815 			(void) poll(NULL, 0, msecs);
816 			msecs *= ALLOC_DELAY_MULT;
817 			goto retry;
818 		}
819 
820 		/* Like startd_alloc(). */
821 		uu_die("Insufficient memory.\n");
822 		/* NOTREACHED */
823 
824 	case ECONNABORTED:
825 		libscf_handle_rebind(h);
826 		rebound = B_TRUE;
827 		goto retry;
828 
829 	case EPERM:
830 	case EACCES:
831 	case EROFS:
832 		log_error(LOG_NOTICE, "Could not commit state change for %s "
833 		    "to repository: %s.\n", ri->ri_i.i_fmri, strerror(e));
834 		/* FALLTHROUGH */
835 
836 	case ENOENT:
837 		ri->ri_i.i_state = new_state;
838 		ri->ri_i.i_next_state = new_state_next;
839 		break;
840 
841 	case EINVAL:
842 	default:
843 		bad_error("_restarter_commit_states", e);
844 	}
845 
846 	states = startd_alloc(sizeof (protocol_states_t));
847 	states->ps_state = new_state;
848 	states->ps_state_next = new_state_next;
849 	states->ps_err = err;
850 	graph_protocol_send_event(ri->ri_i.i_fmri, GRAPH_UPDATE_STATE_CHANGE,
851 	    (void *)states);
852 
853 	state_online = instance_started(ri);
854 
855 	if (prev_state_online && !state_online)
856 		ri->ri_post_offline_hook();
857 	else if (!prev_state_online && state_online)
858 		ri->ri_post_online_hook();
859 
860 	return (rebound ? ECONNRESET : 0);
861 }
862 
863 void
864 restarter_mark_pending_snapshot(const char *fmri, uint_t flag)
865 {
866 	restarter_inst_t *inst;
867 
868 	assert(flag == RINST_RETAKE_RUNNING || flag == RINST_RETAKE_START);
869 
870 	inst = inst_lookup_by_name(fmri);
871 	if (inst == NULL)
872 		return;
873 
874 	inst->ri_flags |= flag;
875 
876 	MUTEX_UNLOCK(&inst->ri_lock);
877 }
878 
879 static void
880 restarter_take_pending_snapshots(scf_handle_t *h)
881 {
882 	restarter_inst_t *inst;
883 	int r;
884 
885 	MUTEX_LOCK(&instance_list.ril_lock);
886 
887 	for (inst = uu_list_first(instance_list.ril_instance_list);
888 	    inst != NULL;
889 	    inst = uu_list_next(instance_list.ril_instance_list, inst)) {
890 		const char *fmri;
891 		scf_instance_t *sinst = NULL;
892 
893 		MUTEX_LOCK(&inst->ri_lock);
894 
895 		/*
896 		 * This is where we'd check inst->ri_method_thread and if it
897 		 * were nonzero we'd wait in anticipation of another thread
898 		 * executing a method for inst.  Doing so with the instance_list
899 		 * locked, though, leads to deadlock.  Since taking a snapshot
900 		 * during that window won't hurt anything, we'll just continue.
901 		 */
902 
903 		fmri = inst->ri_i.i_fmri;
904 
905 		if (inst->ri_flags & RINST_RETAKE_RUNNING) {
906 			scf_snapshot_t *rsnap;
907 
908 			(void) libscf_fmri_get_instance(h, fmri, &sinst);
909 
910 			rsnap = libscf_get_or_make_running_snapshot(sinst,
911 			    fmri, B_FALSE);
912 
913 			scf_instance_destroy(sinst);
914 
915 			if (rsnap != NULL)
916 				inst->ri_flags &= ~RINST_RETAKE_RUNNING;
917 
918 			scf_snapshot_destroy(rsnap);
919 		}
920 
921 		if (inst->ri_flags & RINST_RETAKE_START) {
922 			switch (r = libscf_snapshots_poststart(h, fmri,
923 			    B_FALSE)) {
924 			case 0:
925 			case ENOENT:
926 				inst->ri_flags &= ~RINST_RETAKE_START;
927 				break;
928 
929 			case ECONNABORTED:
930 				break;
931 
932 			case EACCES:
933 			default:
934 				bad_error("libscf_snapshots_poststart", r);
935 			}
936 		}
937 
938 		MUTEX_UNLOCK(&inst->ri_lock);
939 	}
940 
941 	MUTEX_UNLOCK(&instance_list.ril_lock);
942 }
943 
944 /* ARGSUSED */
945 void *
946 restarter_post_fsminimal_thread(void *unused)
947 {
948 	scf_handle_t *h;
949 	int r;
950 
951 	h = libscf_handle_create_bound_loop();
952 
953 	for (;;) {
954 		r = libscf_create_self(h);
955 		if (r == 0)
956 			break;
957 
958 		assert(r == ECONNABORTED);
959 		libscf_handle_rebind(h);
960 	}
961 
962 	restarter_take_pending_snapshots(h);
963 
964 	(void) scf_handle_unbind(h);
965 	scf_handle_destroy(h);
966 
967 	return (NULL);
968 }
969 
970 /*
971  * int stop_instance()
972  *
973  *   Stop the instance identified by the instance given as the second argument,
974  *   for the cause stated.
975  *
976  *   Returns
977  *     0 - success
978  *     -1 - inst is in transition
979  */
980 static int
981 stop_instance(scf_handle_t *local_handle, restarter_inst_t *inst,
982     stop_cause_t cause)
983 {
984 	fork_info_t *info;
985 	const char *cp;
986 	int err;
987 	restarter_error_t re;
988 
989 	assert(PTHREAD_MUTEX_HELD(&inst->ri_lock));
990 	assert(inst->ri_method_thread == 0);
991 
992 	switch (cause) {
993 	case RSTOP_EXIT:
994 		re = RERR_RESTART;
995 		cp = "all processes in service exited";
996 		break;
997 	case RSTOP_CORE:
998 		re = RERR_FAULT;
999 		cp = "process dumped core";
1000 		break;
1001 	case RSTOP_SIGNAL:
1002 		re = RERR_FAULT;
1003 		cp = "process received fatal signal from outside the service";
1004 		break;
1005 	case RSTOP_HWERR:
1006 		re = RERR_FAULT;
1007 		cp = "process killed due to uncorrectable hardware error";
1008 		break;
1009 	case RSTOP_DEPENDENCY:
1010 		re = RERR_RESTART;
1011 		cp = "dependency activity requires stop";
1012 		break;
1013 	case RSTOP_DISABLE:
1014 		re = RERR_RESTART;
1015 		cp = "service disabled";
1016 		break;
1017 	case RSTOP_RESTART:
1018 		re = RERR_RESTART;
1019 		cp = "service restarting";
1020 		break;
1021 	default:
1022 #ifndef NDEBUG
1023 		(void) fprintf(stderr, "Unknown cause %d at %s:%d.\n",
1024 		    cause, __FILE__, __LINE__);
1025 #endif
1026 		abort();
1027 	}
1028 
1029 	/* Services in the disabled and maintenance state are ignored */
1030 	if (inst->ri_i.i_state == RESTARTER_STATE_MAINT ||
1031 	    inst->ri_i.i_state == RESTARTER_STATE_DISABLED) {
1032 		log_framework(LOG_DEBUG,
1033 		    "%s: stop_instance -> is maint/disabled\n",
1034 		    inst->ri_i.i_fmri);
1035 		return (0);
1036 	}
1037 
1038 	/* Already stopped instances are left alone */
1039 	if (instance_started(inst) == 0) {
1040 		log_framework(LOG_DEBUG, "Restarter: %s is already stopped.\n",
1041 		    inst->ri_i.i_fmri);
1042 		return (0);
1043 	}
1044 
1045 	if (instance_in_transition(inst)) {
1046 		/* requeue event by returning -1 */
1047 		log_framework(LOG_DEBUG,
1048 		    "Restarter: Not stopping %s, in transition.\n",
1049 		    inst->ri_i.i_fmri);
1050 		return (-1);
1051 	}
1052 
1053 	log_instance(inst, B_TRUE, "Stopping because %s.", cp);
1054 
1055 	log_framework(re == RERR_FAULT ? LOG_INFO : LOG_DEBUG,
1056 	    "%s: Instance stopping because %s.\n", inst->ri_i.i_fmri, cp);
1057 
1058 	if (instance_is_wait_style(inst) && cause == RSTOP_EXIT) {
1059 		/*
1060 		 * No need to stop instance, as child has exited; remove
1061 		 * contract and move the instance to the offline state.
1062 		 */
1063 		switch (err = restarter_instance_update_states(local_handle,
1064 		    inst, inst->ri_i.i_state, RESTARTER_STATE_OFFLINE, re,
1065 		    NULL)) {
1066 		case 0:
1067 		case ECONNRESET:
1068 			break;
1069 
1070 		default:
1071 			bad_error("restarter_instance_update_states", err);
1072 		}
1073 
1074 		(void) update_fault_count(inst, FAULT_COUNT_RESET);
1075 
1076 		if (inst->ri_i.i_primary_ctid != 0) {
1077 			inst->ri_m_inst =
1078 			    safe_scf_instance_create(local_handle);
1079 			inst->ri_mi_deleted = B_FALSE;
1080 
1081 			libscf_reget_instance(inst);
1082 			method_remove_contract(inst, B_TRUE, B_TRUE);
1083 
1084 			scf_instance_destroy(inst->ri_m_inst);
1085 			inst->ri_m_inst = NULL;
1086 		}
1087 
1088 		switch (err = restarter_instance_update_states(local_handle,
1089 		    inst, inst->ri_i.i_next_state, RESTARTER_STATE_NONE, re,
1090 		    NULL)) {
1091 		case 0:
1092 		case ECONNRESET:
1093 			break;
1094 
1095 		default:
1096 			bad_error("restarter_instance_update_states", err);
1097 		}
1098 
1099 		return (0);
1100 	}
1101 
1102 	switch (err = restarter_instance_update_states(local_handle, inst,
1103 	    inst->ri_i.i_state, inst->ri_i.i_enabled ? RESTARTER_STATE_OFFLINE :
1104 	    RESTARTER_STATE_DISABLED, RERR_NONE, NULL)) {
1105 	case 0:
1106 	case ECONNRESET:
1107 		break;
1108 
1109 	default:
1110 		bad_error("restarter_instance_update_states", err);
1111 	}
1112 
1113 	info = startd_zalloc(sizeof (fork_info_t));
1114 
1115 	info->sf_id = inst->ri_id;
1116 	info->sf_method_type = METHOD_STOP;
1117 	info->sf_event_type = re;
1118 	inst->ri_method_thread = startd_thread_create(method_thread, info);
1119 
1120 	return (0);
1121 }
1122 
1123 /*
1124  * Returns
1125  *   ENOENT - fmri is not in instance_list
1126  *   0 - success
1127  *   ECONNRESET - success, though handle was rebound
1128  *   -1 - instance is in transition
1129  */
1130 int
1131 stop_instance_fmri(scf_handle_t *h, const char *fmri, uint_t flags)
1132 {
1133 	restarter_inst_t *rip;
1134 	int r;
1135 
1136 	rip = inst_lookup_by_name(fmri);
1137 	if (rip == NULL)
1138 		return (ENOENT);
1139 
1140 	r = stop_instance(h, rip, flags);
1141 
1142 	MUTEX_UNLOCK(&rip->ri_lock);
1143 
1144 	return (r);
1145 }
1146 
1147 static void
1148 unmaintain_instance(scf_handle_t *h, restarter_inst_t *rip,
1149     unmaint_cause_t cause)
1150 {
1151 	ctid_t ctid;
1152 	scf_instance_t *inst;
1153 	int r;
1154 	uint_t tries = 0, msecs = ALLOC_DELAY;
1155 	const char *cp;
1156 
1157 	assert(PTHREAD_MUTEX_HELD(&rip->ri_lock));
1158 
1159 	if (rip->ri_i.i_state != RESTARTER_STATE_MAINT) {
1160 		log_error(LOG_DEBUG, "Restarter: "
1161 		    "Ignoring maintenance off command because %s is not in the "
1162 		    "maintenance state.\n", rip->ri_i.i_fmri);
1163 		return;
1164 	}
1165 
1166 	switch (cause) {
1167 	case RUNMAINT_CLEAR:
1168 		cp = "clear requested";
1169 		break;
1170 	case RUNMAINT_DISABLE:
1171 		cp = "disable requested";
1172 		break;
1173 	default:
1174 #ifndef NDEBUG
1175 		(void) fprintf(stderr, "Uncaught case for %d at %s:%d.\n",
1176 		    cause, __FILE__, __LINE__);
1177 #endif
1178 		abort();
1179 	}
1180 
1181 	log_instance(rip, B_TRUE, "Leaving maintenance because %s.",
1182 	    cp);
1183 	log_framework(LOG_DEBUG, "%s: Instance leaving maintenance because "
1184 	    "%s.\n", rip->ri_i.i_fmri, cp);
1185 
1186 	(void) restarter_instance_update_states(h, rip, RESTARTER_STATE_UNINIT,
1187 	    RESTARTER_STATE_NONE, RERR_RESTART, NULL);
1188 
1189 	/*
1190 	 * If we did ADMIN_MAINT_ON_IMMEDIATE, then there might still be
1191 	 * a primary contract.
1192 	 */
1193 	if (rip->ri_i.i_primary_ctid == 0)
1194 		return;
1195 
1196 	ctid = rip->ri_i.i_primary_ctid;
1197 	contract_abandon(ctid);
1198 	rip->ri_i.i_primary_ctid = 0;
1199 
1200 rep_retry:
1201 	switch (r = libscf_fmri_get_instance(h, rip->ri_i.i_fmri, &inst)) {
1202 	case 0:
1203 		break;
1204 
1205 	case ECONNABORTED:
1206 		libscf_handle_rebind(h);
1207 		goto rep_retry;
1208 
1209 	case ENOENT:
1210 		/* Must have been deleted. */
1211 		return;
1212 
1213 	case EINVAL:
1214 	case ENOTSUP:
1215 	default:
1216 		bad_error("libscf_handle_rebind", r);
1217 	}
1218 
1219 again:
1220 	r = restarter_remove_contract(inst, ctid, RESTARTER_CONTRACT_PRIMARY);
1221 	switch (r) {
1222 	case 0:
1223 		break;
1224 
1225 	case ENOMEM:
1226 		++tries;
1227 		if (tries < ALLOC_RETRY) {
1228 			(void) poll(NULL, 0, msecs);
1229 			msecs *= ALLOC_DELAY_MULT;
1230 			goto again;
1231 		}
1232 
1233 		uu_die("Insufficient memory.\n");
1234 		/* NOTREACHED */
1235 
1236 	case ECONNABORTED:
1237 		scf_instance_destroy(inst);
1238 		libscf_handle_rebind(h);
1239 		goto rep_retry;
1240 
1241 	case ECANCELED:
1242 		break;
1243 
1244 	case EPERM:
1245 	case EACCES:
1246 	case EROFS:
1247 		log_error(LOG_INFO,
1248 		    "Could not remove contract id %lu for %s (%s).\n", ctid,
1249 		    rip->ri_i.i_fmri, strerror(r));
1250 		break;
1251 
1252 	case EINVAL:
1253 	case EBADF:
1254 	default:
1255 		bad_error("restarter_remove_contract", r);
1256 	}
1257 
1258 	scf_instance_destroy(inst);
1259 }
1260 
1261 /*
1262  * enable_inst()
1263  *   Set inst->ri_i.i_enabled.  Expects 'e' to be _ENABLE, _DISABLE, or
1264  *   _ADMIN_DISABLE.  If the event is _ENABLE and inst is uninitialized or
1265  *   disabled, move it to offline.  If the event is _DISABLE or
1266  *   _ADMIN_DISABLE, make sure inst will move to disabled.
1267  *
1268  *   Returns
1269  *     0 - success
1270  *     ECONNRESET - h was rebound
1271  */
1272 static int
1273 enable_inst(scf_handle_t *h, restarter_inst_t *inst, restarter_event_type_t e)
1274 {
1275 	restarter_instance_state_t state;
1276 	int r;
1277 
1278 	assert(PTHREAD_MUTEX_HELD(&inst->ri_lock));
1279 	assert(e == RESTARTER_EVENT_TYPE_ADMIN_DISABLE ||
1280 	    e == RESTARTER_EVENT_TYPE_DISABLE ||
1281 	    e == RESTARTER_EVENT_TYPE_ENABLE);
1282 	assert(instance_in_transition(inst) == 0);
1283 
1284 	state = inst->ri_i.i_state;
1285 
1286 	if (e == RESTARTER_EVENT_TYPE_ENABLE) {
1287 		inst->ri_i.i_enabled = 1;
1288 
1289 		if (state == RESTARTER_STATE_UNINIT ||
1290 		    state == RESTARTER_STATE_DISABLED) {
1291 			/*
1292 			 * B_FALSE: Don't log an error if the log_instance()
1293 			 * fails because it will fail on the miniroot before
1294 			 * install-discovery runs.
1295 			 */
1296 			log_instance(inst, B_FALSE, "Enabled.");
1297 			log_framework(LOG_DEBUG, "%s: Instance enabled.\n",
1298 			    inst->ri_i.i_fmri);
1299 			(void) restarter_instance_update_states(h, inst,
1300 			    RESTARTER_STATE_OFFLINE, RESTARTER_STATE_NONE,
1301 			    RERR_NONE, NULL);
1302 		} else {
1303 			log_framework(LOG_DEBUG, "Restarter: "
1304 			    "Not changing state of %s for enable command.\n",
1305 			    inst->ri_i.i_fmri);
1306 		}
1307 	} else {
1308 		inst->ri_i.i_enabled = 0;
1309 
1310 		switch (state) {
1311 		case RESTARTER_STATE_ONLINE:
1312 		case RESTARTER_STATE_DEGRADED:
1313 			r = stop_instance(h, inst, RSTOP_DISABLE);
1314 			return (r == ECONNRESET ? 0 : r);
1315 
1316 		case RESTARTER_STATE_OFFLINE:
1317 		case RESTARTER_STATE_UNINIT:
1318 			if (inst->ri_i.i_primary_ctid != 0) {
1319 				inst->ri_m_inst = safe_scf_instance_create(h);
1320 				inst->ri_mi_deleted = B_FALSE;
1321 
1322 				libscf_reget_instance(inst);
1323 				method_remove_contract(inst, B_TRUE, B_TRUE);
1324 
1325 				scf_instance_destroy(inst->ri_m_inst);
1326 			}
1327 			/* B_FALSE: See log_instance(..., "Enabled."); above */
1328 			log_instance(inst, B_FALSE, "Disabled.");
1329 			log_framework(LOG_DEBUG, "%s: Instance disabled.\n",
1330 			    inst->ri_i.i_fmri);
1331 			(void) restarter_instance_update_states(h, inst,
1332 			    RESTARTER_STATE_DISABLED, RESTARTER_STATE_NONE,
1333 			    RERR_RESTART, NULL);
1334 			return (0);
1335 
1336 		case RESTARTER_STATE_DISABLED:
1337 			break;
1338 
1339 		case RESTARTER_STATE_MAINT:
1340 			/*
1341 			 * We only want to pull the instance out of maintenance
1342 			 * if the disable is on adminstrative request.  The
1343 			 * graph engine sends _DISABLE events whenever a
1344 			 * service isn't in the disabled state, and we don't
1345 			 * want to pull the service out of maintenance if,
1346 			 * for example, it is there due to a dependency cycle.
1347 			 */
1348 			if (e == RESTARTER_EVENT_TYPE_ADMIN_DISABLE)
1349 				unmaintain_instance(h, inst, RUNMAINT_DISABLE);
1350 			break;
1351 
1352 		default:
1353 #ifndef NDEBUG
1354 			(void) fprintf(stderr, "Restarter instance %s has "
1355 			    "unknown state %d.\n", inst->ri_i.i_fmri, state);
1356 #endif
1357 			abort();
1358 		}
1359 	}
1360 
1361 	return (0);
1362 }
1363 
1364 static void
1365 start_instance(scf_handle_t *local_handle, restarter_inst_t *inst)
1366 {
1367 	fork_info_t *info;
1368 
1369 	assert(PTHREAD_MUTEX_HELD(&inst->ri_lock));
1370 	assert(instance_in_transition(inst) == 0);
1371 	assert(inst->ri_method_thread == 0);
1372 
1373 	log_framework(LOG_DEBUG, "%s: trying to start instance\n",
1374 	    inst->ri_i.i_fmri);
1375 
1376 	/* Services in the disabled and maintenance state are ignored */
1377 	if (inst->ri_i.i_state == RESTARTER_STATE_MAINT ||
1378 	    inst->ri_i.i_state == RESTARTER_STATE_DISABLED ||
1379 	    inst->ri_i.i_enabled == 0) {
1380 		log_framework(LOG_DEBUG,
1381 		    "%s: start_instance -> is maint/disabled\n",
1382 		    inst->ri_i.i_fmri);
1383 		return;
1384 	}
1385 
1386 	/* Already started instances are left alone */
1387 	if (instance_started(inst) == 1) {
1388 		log_framework(LOG_DEBUG,
1389 		    "%s: start_instance -> is already started\n",
1390 		    inst->ri_i.i_fmri);
1391 		return;
1392 	}
1393 
1394 	log_framework(LOG_DEBUG, "%s: starting instance.\n", inst->ri_i.i_fmri);
1395 
1396 	(void) restarter_instance_update_states(local_handle, inst,
1397 	    inst->ri_i.i_state, RESTARTER_STATE_ONLINE, RERR_NONE, NULL);
1398 
1399 	info = startd_zalloc(sizeof (fork_info_t));
1400 
1401 	info->sf_id = inst->ri_id;
1402 	info->sf_method_type = METHOD_START;
1403 	info->sf_event_type = RERR_NONE;
1404 	inst->ri_method_thread = startd_thread_create(method_thread, info);
1405 }
1406 
1407 static void
1408 maintain_instance(scf_handle_t *h, restarter_inst_t *rip, int immediate,
1409     const char *aux)
1410 {
1411 	fork_info_t *info;
1412 
1413 	assert(PTHREAD_MUTEX_HELD(&rip->ri_lock));
1414 	assert(aux != NULL);
1415 	assert(rip->ri_method_thread == 0);
1416 
1417 	log_instance(rip, B_TRUE, "Stopping for maintenance due to %s.", aux);
1418 	log_framework(LOG_DEBUG, "%s: stopping for maintenance due to %s.\n",
1419 	    rip->ri_i.i_fmri, aux);
1420 
1421 	/* Services in the maintenance state are ignored */
1422 	if (rip->ri_i.i_state == RESTARTER_STATE_MAINT) {
1423 		log_framework(LOG_DEBUG,
1424 		    "%s: maintain_instance -> is already in maintenance\n",
1425 		    rip->ri_i.i_fmri);
1426 		return;
1427 	}
1428 
1429 	if (immediate || !instance_started(rip)) {
1430 		if (rip->ri_i.i_primary_ctid != 0) {
1431 			rip->ri_m_inst = safe_scf_instance_create(h);
1432 			rip->ri_mi_deleted = B_FALSE;
1433 
1434 			libscf_reget_instance(rip);
1435 			method_remove_contract(rip, B_TRUE, B_TRUE);
1436 
1437 			scf_instance_destroy(rip->ri_m_inst);
1438 		}
1439 
1440 		(void) restarter_instance_update_states(h, rip,
1441 		    RESTARTER_STATE_MAINT, RESTARTER_STATE_NONE, RERR_RESTART,
1442 		    (char *)aux);
1443 		return;
1444 	}
1445 
1446 	(void) restarter_instance_update_states(h, rip, rip->ri_i.i_state,
1447 	    RESTARTER_STATE_MAINT, RERR_NONE, (char *)aux);
1448 
1449 	info = startd_zalloc(sizeof (*info));
1450 	info->sf_id = rip->ri_id;
1451 	info->sf_method_type = METHOD_STOP;
1452 	info->sf_event_type = RERR_RESTART;
1453 	rip->ri_method_thread = startd_thread_create(method_thread, info);
1454 }
1455 
1456 static void
1457 refresh_instance(scf_handle_t *h, restarter_inst_t *rip)
1458 {
1459 	scf_instance_t *inst;
1460 	scf_snapshot_t *snap;
1461 	fork_info_t *info;
1462 	int r;
1463 
1464 	assert(PTHREAD_MUTEX_HELD(&rip->ri_lock));
1465 
1466 	log_instance(rip, B_TRUE, "Rereading configuration.");
1467 	log_framework(LOG_DEBUG, "%s: rereading configuration.\n",
1468 	    rip->ri_i.i_fmri);
1469 
1470 rep_retry:
1471 	r = libscf_fmri_get_instance(h, rip->ri_i.i_fmri, &inst);
1472 	switch (r) {
1473 	case 0:
1474 		break;
1475 
1476 	case ECONNABORTED:
1477 		libscf_handle_rebind(h);
1478 		goto rep_retry;
1479 
1480 	case ENOENT:
1481 		/* Must have been deleted. */
1482 		return;
1483 
1484 	case EINVAL:
1485 	case ENOTSUP:
1486 	default:
1487 		bad_error("libscf_fmri_get_instance", r);
1488 	}
1489 
1490 	snap = libscf_get_running_snapshot(inst);
1491 
1492 	r = libscf_get_startd_properties(inst, snap, &rip->ri_flags,
1493 	    &rip->ri_utmpx_prefix);
1494 	switch (r) {
1495 	case 0:
1496 		log_framework(LOG_DEBUG, "%s is a %s-style service\n",
1497 		    rip->ri_i.i_fmri, service_style(rip->ri_flags));
1498 		break;
1499 
1500 	case ECONNABORTED:
1501 		scf_instance_destroy(inst);
1502 		scf_snapshot_destroy(snap);
1503 		libscf_handle_rebind(h);
1504 		goto rep_retry;
1505 
1506 	case ECANCELED:
1507 	case ENOENT:
1508 		/* Succeed in anticipation of REMOVE_INSTANCE. */
1509 		break;
1510 
1511 	default:
1512 		bad_error("libscf_get_startd_properties", r);
1513 	}
1514 
1515 	if (instance_started(rip)) {
1516 		/* Refresh does not change the state. */
1517 		(void) restarter_instance_update_states(h, rip,
1518 		    rip->ri_i.i_state, rip->ri_i.i_state, RERR_NONE, NULL);
1519 
1520 		info = startd_zalloc(sizeof (*info));
1521 		info->sf_id = rip->ri_id;
1522 		info->sf_method_type = METHOD_REFRESH;
1523 		info->sf_event_type = RERR_REFRESH;
1524 
1525 		assert(rip->ri_method_thread == 0);
1526 		rip->ri_method_thread =
1527 		    startd_thread_create(method_thread, info);
1528 	}
1529 
1530 	scf_snapshot_destroy(snap);
1531 	scf_instance_destroy(inst);
1532 }
1533 
1534 const char *event_names[] = { "INVALID", "ADD_INSTANCE", "REMOVE_INSTANCE",
1535 	"ENABLE", "DISABLE", "ADMIN_DEGRADED", "ADMIN_REFRESH",
1536 	"ADMIN_RESTART", "ADMIN_MAINT_OFF", "ADMIN_MAINT_ON",
1537 	"ADMIN_MAINT_ON_IMMEDIATE", "STOP", "START", "DEPENDENCY_CYCLE",
1538 	"INVALID_DEPENDENCY", "ADMIN_DISABLE"
1539 };
1540 
1541 /*
1542  * void *restarter_process_events()
1543  *
1544  *   Called in a separate thread to process the events on an instance's
1545  *   queue.  Empties the queue completely, and tries to keep the thread
1546  *   around for a little while after the queue is empty to save on
1547  *   startup costs.
1548  */
1549 static void *
1550 restarter_process_events(void *arg)
1551 {
1552 	scf_handle_t *h;
1553 	restarter_instance_qentry_t *event;
1554 	restarter_inst_t *rip;
1555 	char *fmri = (char *)arg;
1556 	struct timespec to;
1557 
1558 	assert(fmri != NULL);
1559 
1560 	h = libscf_handle_create_bound_loop();
1561 
1562 	/* grab the queue lock */
1563 	rip = inst_lookup_queue(fmri);
1564 	if (rip == NULL)
1565 		goto out;
1566 
1567 again:
1568 
1569 	while ((event = uu_list_first(rip->ri_queue)) != NULL) {
1570 		restarter_inst_t *inst;
1571 
1572 		/* drop the queue lock */
1573 		MUTEX_UNLOCK(&rip->ri_queue_lock);
1574 
1575 		/*
1576 		 * Grab the inst lock -- this waits until any outstanding
1577 		 * method finishes running.
1578 		 */
1579 		inst = inst_lookup_by_name(fmri);
1580 		if (inst == NULL) {
1581 			/* Getting deleted in the middle isn't an error. */
1582 			goto cont;
1583 		}
1584 
1585 		assert(instance_in_transition(inst) == 0);
1586 
1587 		/* process the event */
1588 		switch (event->riq_type) {
1589 		case RESTARTER_EVENT_TYPE_ENABLE:
1590 		case RESTARTER_EVENT_TYPE_DISABLE:
1591 		case RESTARTER_EVENT_TYPE_ADMIN_DISABLE:
1592 			(void) enable_inst(h, inst, event->riq_type);
1593 			break;
1594 
1595 		case RESTARTER_EVENT_TYPE_REMOVE_INSTANCE:
1596 			restarter_delete_inst(inst);
1597 			inst = NULL;
1598 			goto cont;
1599 
1600 		case RESTARTER_EVENT_TYPE_STOP:
1601 			(void) stop_instance(h, inst, RSTOP_DEPENDENCY);
1602 			break;
1603 
1604 		case RESTARTER_EVENT_TYPE_START:
1605 			start_instance(h, inst);
1606 			break;
1607 
1608 		case RESTARTER_EVENT_TYPE_DEPENDENCY_CYCLE:
1609 			maintain_instance(h, inst, 0, "dependency_cycle");
1610 			break;
1611 
1612 		case RESTARTER_EVENT_TYPE_INVALID_DEPENDENCY:
1613 			maintain_instance(h, inst, 0, "invalid_dependency");
1614 			break;
1615 
1616 		case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON:
1617 			maintain_instance(h, inst, 0, "administrative_request");
1618 			break;
1619 
1620 		case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON_IMMEDIATE:
1621 			maintain_instance(h, inst, 1, "administrative_request");
1622 			break;
1623 
1624 		case RESTARTER_EVENT_TYPE_ADMIN_MAINT_OFF:
1625 			unmaintain_instance(h, inst, RUNMAINT_CLEAR);
1626 			break;
1627 
1628 		case RESTARTER_EVENT_TYPE_ADMIN_REFRESH:
1629 			refresh_instance(h, inst);
1630 			break;
1631 
1632 		case RESTARTER_EVENT_TYPE_ADMIN_DEGRADED:
1633 			log_framework(LOG_WARNING, "Restarter: "
1634 			    "%s command (for %s) unimplemented.\n",
1635 			    event_names[event->riq_type], inst->ri_i.i_fmri);
1636 			break;
1637 
1638 		case RESTARTER_EVENT_TYPE_ADMIN_RESTART:
1639 			if (!instance_started(inst)) {
1640 				log_framework(LOG_DEBUG, "Restarter: "
1641 				    "Not restarting %s; not running.\n",
1642 				    inst->ri_i.i_fmri);
1643 			} else {
1644 				/*
1645 				 * Stop the instance.  If it can be restarted,
1646 				 * the graph engine will send a new event.
1647 				 */
1648 				(void) stop_instance(h, inst, RSTOP_RESTART);
1649 			}
1650 			break;
1651 
1652 		case RESTARTER_EVENT_TYPE_ADD_INSTANCE:
1653 		default:
1654 #ifndef NDEBUG
1655 			uu_warn("%s:%d: Bad restarter event %d.  "
1656 			    "Aborting.\n", __FILE__, __LINE__, event->riq_type);
1657 #endif
1658 			abort();
1659 		}
1660 
1661 		assert(inst != NULL);
1662 		MUTEX_UNLOCK(&inst->ri_lock);
1663 
1664 cont:
1665 		/* grab the queue lock */
1666 		rip = inst_lookup_queue(fmri);
1667 		if (rip == NULL)
1668 			goto out;
1669 
1670 		/* delete the event */
1671 		uu_list_remove(rip->ri_queue, event);
1672 		startd_free(event, sizeof (restarter_instance_qentry_t));
1673 	}
1674 
1675 	assert(rip != NULL);
1676 
1677 	/*
1678 	 * Try to preserve the thread for a little while for future use.
1679 	 */
1680 	to.tv_sec = 3;
1681 	to.tv_nsec = 0;
1682 	(void) pthread_cond_reltimedwait_np(&rip->ri_queue_cv,
1683 	    &rip->ri_queue_lock, &to);
1684 
1685 	if (uu_list_first(rip->ri_queue) != NULL)
1686 		goto again;
1687 
1688 	rip->ri_queue_thread = 0;
1689 	MUTEX_UNLOCK(&rip->ri_queue_lock);
1690 out:
1691 	(void) scf_handle_unbind(h);
1692 	scf_handle_destroy(h);
1693 	free(fmri);
1694 	return (NULL);
1695 }
1696 
1697 static int
1698 is_admin_event(restarter_event_type_t t) {
1699 
1700 	switch (t) {
1701 	case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON:
1702 	case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON_IMMEDIATE:
1703 	case RESTARTER_EVENT_TYPE_ADMIN_MAINT_OFF:
1704 	case RESTARTER_EVENT_TYPE_ADMIN_REFRESH:
1705 	case RESTARTER_EVENT_TYPE_ADMIN_DEGRADED:
1706 	case RESTARTER_EVENT_TYPE_ADMIN_RESTART:
1707 		return (1);
1708 	default:
1709 		return (0);
1710 	}
1711 }
1712 
1713 static void
1714 restarter_queue_event(restarter_inst_t *ri, restarter_protocol_event_t *e)
1715 {
1716 	restarter_instance_qentry_t *qe;
1717 	int r;
1718 
1719 	assert(PTHREAD_MUTEX_HELD(&ri->ri_queue_lock));
1720 	assert(!PTHREAD_MUTEX_HELD(&ri->ri_lock));
1721 
1722 	qe = startd_zalloc(sizeof (restarter_instance_qentry_t));
1723 	qe->riq_type = e->rpe_type;
1724 
1725 	uu_list_node_init(qe, &qe->riq_link, restarter_queue_pool);
1726 	r = uu_list_insert_before(ri->ri_queue, NULL, qe);
1727 	assert(r == 0);
1728 }
1729 
1730 /*
1731  * void *restarter_event_thread()
1732  *
1733  *  Handle incoming graph events by placing them on a per-instance
1734  *  queue.  We can't lock the main part of the instance structure, so
1735  *  just modify the seprarately locked event queue portion.
1736  */
1737 /*ARGSUSED*/
1738 static void *
1739 restarter_event_thread(void *unused)
1740 {
1741 	scf_handle_t *h;
1742 
1743 	/*
1744 	 * This is a new thread, and thus, gets its own handle
1745 	 * to the repository.
1746 	 */
1747 	h = libscf_handle_create_bound_loop();
1748 
1749 	MUTEX_LOCK(&ru->restarter_update_lock);
1750 
1751 	/*CONSTCOND*/
1752 	while (1) {
1753 		restarter_protocol_event_t *e;
1754 
1755 		while (ru->restarter_update_wakeup == 0)
1756 			(void) pthread_cond_wait(&ru->restarter_update_cv,
1757 			    &ru->restarter_update_lock);
1758 
1759 		ru->restarter_update_wakeup = 0;
1760 
1761 		while ((e = restarter_event_dequeue()) != NULL) {
1762 			restarter_inst_t *rip;
1763 			char *fmri;
1764 
1765 			MUTEX_UNLOCK(&ru->restarter_update_lock);
1766 
1767 			/*
1768 			 * ADD_INSTANCE is special: there's likely no
1769 			 * instance structure yet, so we need to handle the
1770 			 * addition synchronously.
1771 			 */
1772 			switch (e->rpe_type) {
1773 			case RESTARTER_EVENT_TYPE_ADD_INSTANCE:
1774 				if (restarter_insert_inst(h, e->rpe_inst) != 0)
1775 					log_error(LOG_INFO, "Restarter: "
1776 					    "Could not add %s.\n", e->rpe_inst);
1777 
1778 				MUTEX_LOCK(&st->st_load_lock);
1779 				if (--st->st_load_instances == 0)
1780 					(void) pthread_cond_broadcast(
1781 					    &st->st_load_cv);
1782 				MUTEX_UNLOCK(&st->st_load_lock);
1783 
1784 				goto nolookup;
1785 			}
1786 
1787 			/*
1788 			 * Lookup the instance, locking only the event queue.
1789 			 * Can't grab ri_lock here because it might be held
1790 			 * by a long-running method.
1791 			 */
1792 			rip = inst_lookup_queue(e->rpe_inst);
1793 			if (rip == NULL) {
1794 				log_error(LOG_INFO, "Restarter: "
1795 				    "Ignoring %s command for unknown service "
1796 				    "%s.\n", event_names[e->rpe_type],
1797 				    e->rpe_inst);
1798 				goto nolookup;
1799 			}
1800 
1801 			/* Keep ADMIN events from filling up the queue. */
1802 			if (is_admin_event(e->rpe_type) &&
1803 			    uu_list_numnodes(rip->ri_queue) >
1804 			    RINST_QUEUE_THRESHOLD) {
1805 				MUTEX_UNLOCK(&rip->ri_queue_lock);
1806 				log_instance(rip, B_TRUE, "Instance event "
1807 				    "queue overflow.  Dropping administrative "
1808 				    "request.");
1809 				log_framework(LOG_DEBUG, "%s: Instance event "
1810 				    "queue overflow.  Dropping administrative "
1811 				    "request.\n", rip->ri_i.i_fmri);
1812 				goto nolookup;
1813 			}
1814 
1815 			/* Now add the event to the instance queue. */
1816 			restarter_queue_event(rip, e);
1817 
1818 			if (rip->ri_queue_thread == 0) {
1819 				/*
1820 				 * Start a thread if one isn't already
1821 				 * running.
1822 				 */
1823 				fmri = safe_strdup(e->rpe_inst);
1824 				rip->ri_queue_thread =  startd_thread_create(
1825 				    restarter_process_events, (void *)fmri);
1826 			} else {
1827 				/*
1828 				 * Signal the existing thread that there's
1829 				 * a new event.
1830 				 */
1831 				(void) pthread_cond_broadcast(
1832 				    &rip->ri_queue_cv);
1833 			}
1834 
1835 			MUTEX_UNLOCK(&rip->ri_queue_lock);
1836 nolookup:
1837 			restarter_event_release(e);
1838 
1839 			MUTEX_LOCK(&ru->restarter_update_lock);
1840 		}
1841 	}
1842 
1843 	/*
1844 	 * Unreachable for now -- there's currently no graceful cleanup
1845 	 * called on exit().
1846 	 */
1847 	(void) scf_handle_unbind(h);
1848 	scf_handle_destroy(h);
1849 	return (NULL);
1850 }
1851 
1852 static restarter_inst_t *
1853 contract_to_inst(ctid_t ctid)
1854 {
1855 	restarter_inst_t *inst;
1856 	int id;
1857 
1858 	id = lookup_inst_by_contract(ctid);
1859 	if (id == -1)
1860 		return (NULL);
1861 
1862 	inst = inst_lookup_by_id(id);
1863 	if (inst != NULL) {
1864 		/*
1865 		 * Since ri_lock isn't held by the contract id lookup, this
1866 		 * instance may have been restarted and now be in a new
1867 		 * contract, making the old contract no longer valid for this
1868 		 * instance.
1869 		 */
1870 		if (ctid != inst->ri_i.i_primary_ctid) {
1871 			MUTEX_UNLOCK(&inst->ri_lock);
1872 			inst = NULL;
1873 		}
1874 	}
1875 	return (inst);
1876 }
1877 
1878 /*
1879  * void contract_action()
1880  *   Take action on contract events.
1881  */
1882 static void
1883 contract_action(scf_handle_t *h, restarter_inst_t *inst, ctid_t id,
1884     uint32_t type)
1885 {
1886 	const char *fmri = inst->ri_i.i_fmri;
1887 
1888 	assert(PTHREAD_MUTEX_HELD(&inst->ri_lock));
1889 
1890 	/*
1891 	 * If startd has stopped this contract, there is no need to
1892 	 * stop it again.
1893 	 */
1894 	if (inst->ri_i.i_primary_ctid > 0 &&
1895 	    inst->ri_i.i_primary_ctid_stopped)
1896 		return;
1897 
1898 	if ((type & (CT_PR_EV_EMPTY | CT_PR_EV_CORE | CT_PR_EV_SIGNAL
1899 	    | CT_PR_EV_HWERR)) == 0) {
1900 		/*
1901 		 * There shouldn't be other events, since that's not how we set
1902 		 * the terms. Thus, just log an error and drive on.
1903 		 */
1904 		log_framework(LOG_NOTICE,
1905 		    "%s: contract %ld received unexpected critical event "
1906 		    "(%d)\n", fmri, id, type);
1907 		    return;
1908 	}
1909 
1910 	assert(instance_in_transition(inst) == 0);
1911 
1912 	if (instance_is_wait_style(inst)) {
1913 		/*
1914 		 * We ignore all events; if they impact the
1915 		 * process we're monitoring, then the
1916 		 * wait_thread will stop the instance.
1917 		 */
1918 		log_framework(LOG_DEBUG,
1919 		    "%s: ignoring contract event on wait-style service\n",
1920 		    fmri);
1921 	} else {
1922 		/*
1923 		 * A CT_PR_EV_EMPTY event is an RSTOP_EXIT request.
1924 		 */
1925 		switch (type) {
1926 		case CT_PR_EV_EMPTY:
1927 			(void) stop_instance(h, inst, RSTOP_EXIT);
1928 			break;
1929 		case CT_PR_EV_CORE:
1930 			(void) stop_instance(h, inst, RSTOP_CORE);
1931 			break;
1932 		case CT_PR_EV_SIGNAL:
1933 			(void) stop_instance(h, inst, RSTOP_SIGNAL);
1934 			break;
1935 		case CT_PR_EV_HWERR:
1936 			(void) stop_instance(h, inst, RSTOP_HWERR);
1937 			break;
1938 		}
1939 	}
1940 }
1941 
1942 /*
1943  * void *restarter_contract_event_thread(void *)
1944  *   Listens to the process contract bundle for critical events, taking action
1945  *   on events from contracts we know we are responsible for.
1946  */
1947 /*ARGSUSED*/
1948 static void *
1949 restarter_contracts_event_thread(void *unused)
1950 {
1951 	int fd, err;
1952 	scf_handle_t *local_handle;
1953 
1954 	/*
1955 	 * Await graph load completion.  That is, stop here, until we've scanned
1956 	 * the repository for contract - instance associations.
1957 	 */
1958 	MUTEX_LOCK(&st->st_load_lock);
1959 	while (!(st->st_load_complete && st->st_load_instances == 0))
1960 		(void) pthread_cond_wait(&st->st_load_cv, &st->st_load_lock);
1961 	MUTEX_UNLOCK(&st->st_load_lock);
1962 
1963 	/*
1964 	 * This is a new thread, and thus, gets its own handle
1965 	 * to the repository.
1966 	 */
1967 	if ((local_handle = libscf_handle_create_bound(SCF_VERSION)) == NULL)
1968 		uu_die("Unable to bind a new repository handle: %s\n",
1969 		    scf_strerror(scf_error()));
1970 
1971 	fd = open64(CTFS_ROOT "/process/pbundle", O_RDONLY);
1972 	if (fd == -1)
1973 		uu_die("process bundle open failed");
1974 
1975 	/*
1976 	 * Make sure we get all events (including those generated by configd
1977 	 * before this thread was started).
1978 	 */
1979 	err = ct_event_reset(fd);
1980 	assert(err == 0);
1981 
1982 	for (;;) {
1983 		int efd, sfd;
1984 		ct_evthdl_t ev;
1985 		uint32_t type;
1986 		ctevid_t evid;
1987 		ct_stathdl_t status;
1988 		ctid_t ctid;
1989 		restarter_inst_t *inst;
1990 		uint64_t cookie;
1991 
1992 		if (err = ct_event_read_critical(fd, &ev)) {
1993 			log_error(LOG_WARNING,
1994 			    "Error reading next contract event: %s",
1995 			    strerror(err));
1996 			continue;
1997 		}
1998 
1999 		evid = ct_event_get_evid(ev);
2000 		ctid = ct_event_get_ctid(ev);
2001 		type = ct_event_get_type(ev);
2002 
2003 		/* Fetch cookie. */
2004 		if ((sfd = contract_open(ctid, "process", "status", O_RDONLY))
2005 		    < 0) {
2006 			ct_event_free(ev);
2007 			continue;
2008 		}
2009 
2010 		if (err = ct_status_read(sfd, CTD_COMMON, &status)) {
2011 			log_framework(LOG_WARNING, "Could not get status for "
2012 			    "contract %ld: %s\n", ctid, strerror(err));
2013 
2014 			startd_close(sfd);
2015 			ct_event_free(ev);
2016 			continue;
2017 		}
2018 
2019 		cookie = ct_status_get_cookie(status);
2020 
2021 		ct_status_free(status);
2022 
2023 		startd_close(sfd);
2024 
2025 		/*
2026 		 * svc.configd(1M) restart handling performed by the
2027 		 * fork_configd_thread.  We don't acknowledge, as that thread
2028 		 * will do so.
2029 		 */
2030 		if (cookie == CONFIGD_COOKIE) {
2031 			ct_event_free(ev);
2032 			continue;
2033 		}
2034 
2035 		inst = contract_to_inst(ctid);
2036 		if (inst == NULL) {
2037 			/*
2038 			 * This can happen if we receive an EMPTY
2039 			 * event for an abandoned contract.
2040 			 */
2041 			log_framework(LOG_DEBUG,
2042 			    "Received event %d for unknown contract id "
2043 			    "%ld\n", type, ctid);
2044 		} else {
2045 			log_framework(LOG_DEBUG,
2046 			    "Received event %d for contract id "
2047 			    "%ld (%s)\n", type, ctid,
2048 			    inst->ri_i.i_fmri);
2049 
2050 			contract_action(local_handle, inst, ctid, type);
2051 
2052 			MUTEX_UNLOCK(&inst->ri_lock);
2053 		}
2054 
2055 		efd = contract_open(ct_event_get_ctid(ev), "process", "ctl",
2056 		    O_WRONLY);
2057 		if (efd != -1) {
2058 			(void) ct_ctl_ack(efd, evid);
2059 			startd_close(efd);
2060 		}
2061 
2062 		ct_event_free(ev);
2063 
2064 	}
2065 
2066 	/*NOTREACHED*/
2067 	return (NULL);
2068 }
2069 
2070 /*
2071  * Timeout queue, processed by restarter_timeouts_event_thread().
2072  */
2073 timeout_queue_t *timeouts;
2074 static uu_list_pool_t *timeout_pool;
2075 
2076 typedef struct timeout_update {
2077 	pthread_mutex_t		tu_lock;
2078 	pthread_cond_t		tu_cv;
2079 	int			tu_wakeup;
2080 } timeout_update_t;
2081 
2082 timeout_update_t *tu;
2083 
2084 static const char *timeout_ovr_svcs[] = {
2085 	"svc:/system/manifest-import:default",
2086 	"svc:/network/initial:default",
2087 	"svc:/network/service:default",
2088 	"svc:/system/rmtmpfiles:default",
2089 	"svc:/network/loopback:default",
2090 	"svc:/network/physical:default",
2091 	"svc:/system/device/local:default",
2092 	"svc:/system/metainit:default",
2093 	"svc:/system/filesystem/usr:default",
2094 	"svc:/system/filesystem/minimal:default",
2095 	"svc:/system/filesystem/local:default",
2096 	NULL
2097 };
2098 
2099 int
2100 is_timeout_ovr(restarter_inst_t *inst)
2101 {
2102 	int i;
2103 
2104 	for (i = 0; timeout_ovr_svcs[i] != NULL; ++i) {
2105 		if (strcmp(inst->ri_i.i_fmri, timeout_ovr_svcs[i]) == 0) {
2106 			log_instance(inst, B_TRUE, "Timeout override by "
2107 			    "svc.startd.  Using infinite timeout");
2108 			return (1);
2109 		}
2110 	}
2111 
2112 	return (0);
2113 }
2114 
2115 /*ARGSUSED*/
2116 static int
2117 timeout_compare(const void *lc_arg, const void *rc_arg, void *private)
2118 {
2119 	hrtime_t t1 = ((const timeout_entry_t *)lc_arg)->te_timeout;
2120 	hrtime_t t2 = ((const timeout_entry_t *)rc_arg)->te_timeout;
2121 
2122 	if (t1 > t2)
2123 		return (1);
2124 	else if (t1 < t2)
2125 		return (-1);
2126 	return (0);
2127 }
2128 
2129 void
2130 timeout_init()
2131 {
2132 	timeouts = startd_zalloc(sizeof (timeout_queue_t));
2133 
2134 	(void) pthread_mutex_init(&timeouts->tq_lock, &mutex_attrs);
2135 
2136 	timeout_pool = startd_list_pool_create("timeouts",
2137 	    sizeof (timeout_entry_t), offsetof(timeout_entry_t, te_link),
2138 	    timeout_compare, UU_LIST_POOL_DEBUG);
2139 	assert(timeout_pool != NULL);
2140 
2141 	timeouts->tq_list = startd_list_create(timeout_pool,
2142 	    timeouts, UU_LIST_SORTED);
2143 	assert(timeouts->tq_list != NULL);
2144 
2145 	tu = startd_zalloc(sizeof (timeout_update_t));
2146 	(void) pthread_cond_init(&tu->tu_cv, NULL);
2147 	(void) pthread_mutex_init(&tu->tu_lock, &mutex_attrs);
2148 }
2149 
2150 void
2151 timeout_insert(restarter_inst_t *inst, ctid_t cid, uint64_t timeout_sec)
2152 {
2153 	hrtime_t now, timeout;
2154 	timeout_entry_t *entry;
2155 	uu_list_index_t idx;
2156 
2157 	assert(PTHREAD_MUTEX_HELD(&inst->ri_lock));
2158 
2159 	now = gethrtime();
2160 
2161 	/*
2162 	 * If we overflow LLONG_MAX, we're never timing out anyways, so
2163 	 * just return.
2164 	 */
2165 	if (timeout_sec >= (LLONG_MAX - now) / 1000000000LL) {
2166 		log_instance(inst, B_TRUE, "timeout_seconds too large, "
2167 		    "treating as infinite.");
2168 		return;
2169 	}
2170 
2171 	/* hrtime is in nanoseconds. Convert timeout_sec. */
2172 	timeout = now + (timeout_sec * 1000000000LL);
2173 
2174 	entry = startd_alloc(sizeof (timeout_entry_t));
2175 	entry->te_timeout = timeout;
2176 	entry->te_ctid = cid;
2177 	entry->te_fmri = safe_strdup(inst->ri_i.i_fmri);
2178 	entry->te_logstem = safe_strdup(inst->ri_logstem);
2179 	entry->te_fired = 0;
2180 	/* Insert the calculated timeout time onto the queue. */
2181 	MUTEX_LOCK(&timeouts->tq_lock);
2182 	(void) uu_list_find(timeouts->tq_list, entry, NULL, &idx);
2183 	uu_list_node_init(entry, &entry->te_link, timeout_pool);
2184 	uu_list_insert(timeouts->tq_list, entry, idx);
2185 	MUTEX_UNLOCK(&timeouts->tq_lock);
2186 
2187 	assert(inst->ri_timeout == NULL);
2188 	inst->ri_timeout = entry;
2189 
2190 	MUTEX_LOCK(&tu->tu_lock);
2191 	tu->tu_wakeup = 1;
2192 	(void) pthread_cond_broadcast(&tu->tu_cv);
2193 	MUTEX_UNLOCK(&tu->tu_lock);
2194 }
2195 
2196 
2197 void
2198 timeout_remove(restarter_inst_t *inst, ctid_t cid)
2199 {
2200 	assert(PTHREAD_MUTEX_HELD(&inst->ri_lock));
2201 
2202 	if (inst->ri_timeout == NULL)
2203 		return;
2204 
2205 	assert(inst->ri_timeout->te_ctid == cid);
2206 
2207 	MUTEX_LOCK(&timeouts->tq_lock);
2208 	uu_list_remove(timeouts->tq_list, inst->ri_timeout);
2209 	MUTEX_UNLOCK(&timeouts->tq_lock);
2210 
2211 	free(inst->ri_timeout->te_fmri);
2212 	free(inst->ri_timeout->te_logstem);
2213 	startd_free(inst->ri_timeout, sizeof (timeout_entry_t));
2214 	inst->ri_timeout = NULL;
2215 }
2216 
2217 static int
2218 timeout_now()
2219 {
2220 	timeout_entry_t *e;
2221 	hrtime_t now;
2222 	int ret;
2223 
2224 	now = gethrtime();
2225 
2226 	/*
2227 	 * Walk through the (sorted) timeouts list.  While the timeout
2228 	 * at the head of the list is <= the current time, kill the
2229 	 * method.
2230 	 */
2231 	MUTEX_LOCK(&timeouts->tq_lock);
2232 
2233 	for (e = uu_list_first(timeouts->tq_list);
2234 	    e != NULL && e->te_timeout <= now;
2235 	    e = uu_list_next(timeouts->tq_list, e)) {
2236 		log_framework(LOG_WARNING, "%s: Method or service exit timed "
2237 		    "out.  Killing contract %ld.\n", e->te_fmri, e->te_ctid);
2238 		log_instance_fmri(e->te_fmri, e->te_logstem, B_TRUE,
2239 		    "Method or service exit timed out.  Killing contract %ld",
2240 		    e->te_ctid);
2241 		e->te_fired = 1;
2242 		(void) contract_kill(e->te_ctid, SIGKILL, e->te_fmri);
2243 	}
2244 
2245 	if (uu_list_numnodes(timeouts->tq_list) > 0)
2246 		ret = 0;
2247 	else
2248 		ret = -1;
2249 
2250 	MUTEX_UNLOCK(&timeouts->tq_lock);
2251 
2252 	return (ret);
2253 }
2254 
2255 /*
2256  * void *restarter_timeouts_event_thread(void *)
2257  *   Responsible for monitoring the method timeouts.  This thread must
2258  *   be started before any methods are called.
2259  */
2260 /*ARGSUSED*/
2261 static void *
2262 restarter_timeouts_event_thread(void *unused)
2263 {
2264 	/*
2265 	 * Timeouts are entered on a priority queue, which is processed by
2266 	 * this thread.  As timeouts are specified in seconds, we'll do
2267 	 * the necessary processing every second, as long as the queue
2268 	 * is not empty.
2269 	 */
2270 
2271 	/*CONSTCOND*/
2272 	while (1) {
2273 		/*
2274 		 * As long as the timeout list isn't empty, process it
2275 		 * every second.
2276 		 */
2277 		if (timeout_now() == 0) {
2278 			(void) sleep(1);
2279 			continue;
2280 		}
2281 
2282 		/* The list is empty, wait until we have more timeouts. */
2283 		MUTEX_LOCK(&tu->tu_lock);
2284 
2285 		while (tu->tu_wakeup == 0)
2286 			(void) pthread_cond_wait(&tu->tu_cv, &tu->tu_lock);
2287 
2288 		tu->tu_wakeup = 0;
2289 		MUTEX_UNLOCK(&tu->tu_lock);
2290 	}
2291 
2292 	return (NULL);
2293 }
2294 
2295 void
2296 restarter_start()
2297 {
2298 	(void) startd_thread_create(restarter_timeouts_event_thread, NULL);
2299 	(void) startd_thread_create(restarter_event_thread, NULL);
2300 	(void) startd_thread_create(restarter_contracts_event_thread, NULL);
2301 	(void) startd_thread_create(wait_thread, NULL);
2302 }
2303 
2304 
2305 void
2306 restarter_init()
2307 {
2308 	restarter_instance_pool = startd_list_pool_create("restarter_instances",
2309 	    sizeof (restarter_inst_t), offsetof(restarter_inst_t,
2310 		ri_link), restarter_instance_compare, UU_LIST_POOL_DEBUG);
2311 	(void) memset(&instance_list, 0, sizeof (instance_list));
2312 
2313 	(void) pthread_mutex_init(&instance_list.ril_lock, &mutex_attrs);
2314 	instance_list.ril_instance_list = startd_list_create(
2315 	    restarter_instance_pool, &instance_list, UU_LIST_SORTED);
2316 
2317 	restarter_queue_pool = startd_list_pool_create(
2318 	    "restarter_instance_queue", sizeof (restarter_instance_qentry_t),
2319 	    offsetof(restarter_instance_qentry_t,  riq_link), NULL,
2320 	    UU_LIST_POOL_DEBUG);
2321 
2322 	contract_list_pool = startd_list_pool_create(
2323 	    "contract_list", sizeof (contract_entry_t),
2324 	    offsetof(contract_entry_t,  ce_link), NULL,
2325 	    UU_LIST_POOL_DEBUG);
2326 	contract_hash_init();
2327 
2328 	log_framework(LOG_DEBUG, "Initialized restarter\n");
2329 }
2330