xref: /titanic_51/usr/src/cmd/svc/startd/restarter.c (revision 2c8230b0dc207870ae2a092351f10fe53091275b)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*
28  * restarter.c - service manipulation
29  *
30  * This component manages services whose restarter is svc.startd, the standard
31  * restarter.  It translates restarter protocol events from the graph engine
32  * into actions on processes, as a delegated restarter would do.
33  *
34  * The master restarter manages a number of always-running threads:
35  *   - restarter event thread: events from the graph engine
36  *   - timeout thread: thread to fire queued timeouts
37  *   - contract thread: thread to handle contract events
38  *   - wait thread: thread to handle wait-based services
39  *
40  * The other threads are created as-needed:
41  *   - per-instance method threads
42  *   - per-instance event processing threads
43  *
44  * The interaction of all threads must result in the following conditions
45  * being satisfied (on a per-instance basis):
46  *   - restarter events must be processed in order
47  *   - method execution must be serialized
48  *   - instance delete must be held until outstanding methods are complete
49  *   - contract events shouldn't be processed while a method is running
50  *   - timeouts should fire even when a method is running
51  *
52  * Service instances are represented by restarter_inst_t's and are kept in the
53  * instance_list list.
54  *
55  * Service States
56  *   The current state of a service instance is kept in
57  *   restarter_inst_t->ri_i.i_state.  If transition to a new state could take
58  *   some time, then before we effect the transition we set
59  *   restarter_inst_t->ri_i.i_next_state to the target state, and afterwards we
60  *   rotate i_next_state to i_state and set i_next_state to
61  *   RESTARTER_STATE_NONE.  So usually i_next_state is _NONE when ri_lock is not
62  *   held.  The exception is when we launch methods, which are done with
63  *   a separate thread.  To keep any other threads from grabbing ri_lock before
64  *   method_thread() does, we set ri_method_thread to the thread id of the
65  *   method thread, and when it is nonzero any thread with a different thread id
66  *   waits on ri_method_cv.
67  *
68  * Method execution is serialized by blocking on ri_method_cv in
69  * inst_lookup_by_id() and waiting for a 0 value of ri_method_thread.  This
70  * also prevents the instance structure from being deleted until all
71  * outstanding operations such as method_thread() have finished.
72  *
73  * Lock ordering:
74  *
75  * dgraph_lock [can be held when taking:]
76  *   utmpx_lock
77  *   dictionary->dict_lock
78  *   st->st_load_lock
79  *   wait_info_lock
80  *   ru->restarter_update_lock
81  *     restarter_queue->rpeq_lock
82  *   instance_list.ril_lock
83  *     inst->ri_lock
84  *   st->st_configd_live_lock
85  *
86  * instance_list.ril_lock
87  *   graph_queue->gpeq_lock
88  *   gu->gu_lock
89  *   st->st_configd_live_lock
90  *   dictionary->dict_lock
91  *   inst->ri_lock
92  *     graph_queue->gpeq_lock
93  *     gu->gu_lock
94  *     tu->tu_lock
95  *     tq->tq_lock
96  *     inst->ri_queue_lock
97  *       wait_info_lock
98  *       bp->cb_lock
99  *     utmpx_lock
100  *
101  * single_user_thread_lock
102  *   wait_info_lock
103  *   utmpx_lock
104  *
105  * gu_freeze_lock
106  *
107  * logbuf_mutex nests inside pretty much everything.
108  */
109 
110 #include <sys/contract/process.h>
111 #include <sys/ctfs.h>
112 #include <sys/stat.h>
113 #include <sys/time.h>
114 #include <sys/types.h>
115 #include <sys/uio.h>
116 #include <sys/wait.h>
117 #include <assert.h>
118 #include <errno.h>
119 #include <fcntl.h>
120 #include <libcontract.h>
121 #include <libcontract_priv.h>
122 #include <libintl.h>
123 #include <librestart.h>
124 #include <librestart_priv.h>
125 #include <libuutil.h>
126 #include <limits.h>
127 #include <poll.h>
128 #include <port.h>
129 #include <pthread.h>
130 #include <stdarg.h>
131 #include <stdio.h>
132 #include <strings.h>
133 #include <unistd.h>
134 
135 #include "startd.h"
136 #include "protocol.h"
137 
138 static uu_list_pool_t *restarter_instance_pool;
139 static restarter_instance_list_t instance_list;
140 
141 static uu_list_pool_t *restarter_queue_pool;
142 
143 /*
144  * Function used to reset the restart times for an instance, when
145  * an administrative task comes along and essentially makes the times
146  * in this array ineffective.
147  */
148 static void
149 reset_start_times(restarter_inst_t *inst)
150 {
151 	inst->ri_start_index = 0;
152 	bzero(inst->ri_start_time, sizeof (inst->ri_start_time));
153 }
154 
155 /*ARGSUSED*/
156 static int
157 restarter_instance_compare(const void *lc_arg, const void *rc_arg,
158     void *private)
159 {
160 	int lc_id = ((const restarter_inst_t *)lc_arg)->ri_id;
161 	int rc_id = *(int *)rc_arg;
162 
163 	if (lc_id > rc_id)
164 		return (1);
165 	if (lc_id < rc_id)
166 		return (-1);
167 	return (0);
168 }
169 
170 static restarter_inst_t *
171 inst_lookup_by_name(const char *name)
172 {
173 	int id;
174 
175 	id = dict_lookup_byname(name);
176 	if (id == -1)
177 		return (NULL);
178 
179 	return (inst_lookup_by_id(id));
180 }
181 
182 restarter_inst_t *
183 inst_lookup_by_id(int id)
184 {
185 	restarter_inst_t *inst;
186 
187 	MUTEX_LOCK(&instance_list.ril_lock);
188 	inst = uu_list_find(instance_list.ril_instance_list, &id, NULL, NULL);
189 	if (inst != NULL)
190 		MUTEX_LOCK(&inst->ri_lock);
191 	MUTEX_UNLOCK(&instance_list.ril_lock);
192 
193 	if (inst != NULL) {
194 		while (inst->ri_method_thread != 0 &&
195 		    !pthread_equal(inst->ri_method_thread, pthread_self())) {
196 			++inst->ri_method_waiters;
197 			(void) pthread_cond_wait(&inst->ri_method_cv,
198 			    &inst->ri_lock);
199 			assert(inst->ri_method_waiters > 0);
200 			--inst->ri_method_waiters;
201 		}
202 	}
203 
204 	return (inst);
205 }
206 
207 static restarter_inst_t *
208 inst_lookup_queue(const char *name)
209 {
210 	int id;
211 	restarter_inst_t *inst;
212 
213 	id = dict_lookup_byname(name);
214 	if (id == -1)
215 		return (NULL);
216 
217 	MUTEX_LOCK(&instance_list.ril_lock);
218 	inst = uu_list_find(instance_list.ril_instance_list, &id, NULL, NULL);
219 	if (inst != NULL)
220 		MUTEX_LOCK(&inst->ri_queue_lock);
221 	MUTEX_UNLOCK(&instance_list.ril_lock);
222 
223 	return (inst);
224 }
225 
226 const char *
227 service_style(int flags)
228 {
229 	switch (flags & RINST_STYLE_MASK) {
230 	case RINST_CONTRACT:	return ("contract");
231 	case RINST_TRANSIENT:	return ("transient");
232 	case RINST_WAIT:	return ("wait");
233 
234 	default:
235 #ifndef NDEBUG
236 		uu_warn("%s:%d: Bad flags 0x%x.\n", __FILE__, __LINE__, flags);
237 #endif
238 		abort();
239 		/* NOTREACHED */
240 	}
241 }
242 
243 /*
244  * Fails with ECONNABORTED or ECANCELED.
245  */
246 static int
247 check_contract(restarter_inst_t *inst, boolean_t primary,
248     scf_instance_t *scf_inst)
249 {
250 	ctid_t *ctidp;
251 	int fd, r;
252 
253 	ctidp = primary ? &inst->ri_i.i_primary_ctid :
254 	    &inst->ri_i.i_transient_ctid;
255 
256 	assert(*ctidp >= 1);
257 
258 	fd = contract_open(*ctidp, NULL, "status", O_RDONLY);
259 	if (fd >= 0) {
260 		r = close(fd);
261 		assert(r == 0);
262 		return (0);
263 	}
264 
265 	r = restarter_remove_contract(scf_inst, *ctidp, primary ?
266 	    RESTARTER_CONTRACT_PRIMARY : RESTARTER_CONTRACT_TRANSIENT);
267 	switch (r) {
268 	case 0:
269 	case ECONNABORTED:
270 	case ECANCELED:
271 		*ctidp = 0;
272 		return (r);
273 
274 	case ENOMEM:
275 		uu_die("Out of memory\n");
276 		/* NOTREACHED */
277 
278 	case EPERM:
279 		uu_die("Insufficient privilege.\n");
280 		/* NOTREACHED */
281 
282 	case EACCES:
283 		uu_die("Repository backend access denied.\n");
284 		/* NOTREACHED */
285 
286 	case EROFS:
287 		log_error(LOG_INFO, "Could not remove unusable contract id %ld "
288 		    "for %s from repository.\n", *ctidp, inst->ri_i.i_fmri);
289 		return (0);
290 
291 	case EINVAL:
292 	case EBADF:
293 	default:
294 		assert(0);
295 		abort();
296 		/* NOTREACHED */
297 	}
298 }
299 
300 static int stop_instance(scf_handle_t *, restarter_inst_t *, stop_cause_t);
301 
302 /*
303  * int restarter_insert_inst(scf_handle_t *, char *)
304  *   If the inst is already in the restarter list, return its id.  If the inst
305  *   is not in the restarter list, initialize a restarter_inst_t, initialize its
306  *   states, insert it into the list, and return 0.
307  *
308  *   Fails with
309  *     ENOENT - name is not in the repository
310  */
311 static int
312 restarter_insert_inst(scf_handle_t *h, const char *name)
313 {
314 	int id, r;
315 	restarter_inst_t *inst;
316 	uu_list_index_t idx;
317 	scf_service_t *scf_svc;
318 	scf_instance_t *scf_inst;
319 	scf_snapshot_t *snap = NULL;
320 	scf_propertygroup_t *pg;
321 	char *svc_name, *inst_name;
322 	char logfilebuf[PATH_MAX];
323 	char *c;
324 	boolean_t do_commit_states;
325 	restarter_instance_state_t state, next_state;
326 	protocol_states_t *ps;
327 	pid_t start_pid;
328 
329 	MUTEX_LOCK(&instance_list.ril_lock);
330 
331 	/*
332 	 * We don't use inst_lookup_by_name() here because we want the lookup
333 	 * & insert to be atomic.
334 	 */
335 	id = dict_lookup_byname(name);
336 	if (id != -1) {
337 		inst = uu_list_find(instance_list.ril_instance_list, &id, NULL,
338 		    &idx);
339 		if (inst != NULL) {
340 			MUTEX_UNLOCK(&instance_list.ril_lock);
341 			return (0);
342 		}
343 	}
344 
345 	/* Allocate an instance */
346 	inst = startd_zalloc(sizeof (restarter_inst_t));
347 	inst->ri_utmpx_prefix = startd_alloc(max_scf_value_size);
348 	inst->ri_utmpx_prefix[0] = '\0';
349 
350 	inst->ri_i.i_fmri = startd_alloc(strlen(name) + 1);
351 	(void) strcpy((char *)inst->ri_i.i_fmri, name);
352 
353 	inst->ri_queue = startd_list_create(restarter_queue_pool, inst, 0);
354 
355 	/*
356 	 * id shouldn't be -1 since we use the same dictionary as graph.c, but
357 	 * just in case.
358 	 */
359 	inst->ri_id = (id != -1 ? id : dict_insert(name));
360 
361 	special_online_hooks_get(name, &inst->ri_pre_online_hook,
362 	    &inst->ri_post_online_hook, &inst->ri_post_offline_hook);
363 
364 	scf_svc = safe_scf_service_create(h);
365 	scf_inst = safe_scf_instance_create(h);
366 	pg = safe_scf_pg_create(h);
367 	svc_name = startd_alloc(max_scf_name_size);
368 	inst_name = startd_alloc(max_scf_name_size);
369 
370 rep_retry:
371 	if (snap != NULL)
372 		scf_snapshot_destroy(snap);
373 	if (inst->ri_logstem != NULL)
374 		startd_free(inst->ri_logstem, PATH_MAX);
375 	if (inst->ri_common_name != NULL)
376 		startd_free(inst->ri_common_name, max_scf_value_size);
377 	if (inst->ri_C_common_name != NULL)
378 		startd_free(inst->ri_C_common_name, max_scf_value_size);
379 	snap = NULL;
380 	inst->ri_logstem = NULL;
381 	inst->ri_common_name = NULL;
382 	inst->ri_C_common_name = NULL;
383 
384 	if (scf_handle_decode_fmri(h, name, NULL, scf_svc, scf_inst, NULL,
385 	    NULL, SCF_DECODE_FMRI_EXACT) != 0) {
386 		switch (scf_error()) {
387 		case SCF_ERROR_CONNECTION_BROKEN:
388 			libscf_handle_rebind(h);
389 			goto rep_retry;
390 
391 		case SCF_ERROR_NOT_FOUND:
392 			goto deleted;
393 		}
394 
395 		uu_die("Can't decode FMRI %s: %s\n", name,
396 		    scf_strerror(scf_error()));
397 	}
398 
399 	/*
400 	 * If there's no running snapshot, then we execute using the editing
401 	 * snapshot.  Pending snapshots will be taken later.
402 	 */
403 	snap = libscf_get_running_snapshot(scf_inst);
404 
405 	if ((scf_service_get_name(scf_svc, svc_name, max_scf_name_size) < 0) ||
406 	    (scf_instance_get_name(scf_inst, inst_name, max_scf_name_size) <
407 	    0)) {
408 		switch (scf_error()) {
409 		case SCF_ERROR_NOT_SET:
410 			break;
411 
412 		case SCF_ERROR_CONNECTION_BROKEN:
413 			libscf_handle_rebind(h);
414 			goto rep_retry;
415 
416 		default:
417 			assert(0);
418 			abort();
419 		}
420 
421 		goto deleted;
422 	}
423 
424 	(void) snprintf(logfilebuf, PATH_MAX, "%s:%s", svc_name, inst_name);
425 	for (c = logfilebuf; *c != '\0'; c++)
426 		if (*c == '/')
427 			*c = '-';
428 
429 	inst->ri_logstem = startd_alloc(PATH_MAX);
430 	(void) snprintf(inst->ri_logstem, PATH_MAX, "%s%s", logfilebuf,
431 	    LOG_SUFFIX);
432 
433 	/*
434 	 * If the restarter group is missing, use uninit/none.  Otherwise,
435 	 * we're probably being restarted & don't want to mess up the states
436 	 * that are there.
437 	 */
438 	state = RESTARTER_STATE_UNINIT;
439 	next_state = RESTARTER_STATE_NONE;
440 
441 	r = scf_instance_get_pg(scf_inst, SCF_PG_RESTARTER, pg);
442 	if (r != 0) {
443 		switch (scf_error()) {
444 		case SCF_ERROR_CONNECTION_BROKEN:
445 			libscf_handle_rebind(h);
446 			goto rep_retry;
447 
448 		case SCF_ERROR_NOT_SET:
449 			goto deleted;
450 
451 		case SCF_ERROR_NOT_FOUND:
452 			/*
453 			 * This shouldn't happen since the graph engine should
454 			 * have initialized the state to uninitialized/none if
455 			 * there was no restarter pg.  In case somebody
456 			 * deleted it, though....
457 			 */
458 			do_commit_states = B_TRUE;
459 			break;
460 
461 		default:
462 			assert(0);
463 			abort();
464 		}
465 	} else {
466 		r = libscf_read_states(pg, &state, &next_state);
467 		if (r != 0) {
468 			do_commit_states = B_TRUE;
469 		} else {
470 			if (next_state != RESTARTER_STATE_NONE) {
471 				/*
472 				 * Force next_state to _NONE since we
473 				 * don't look for method processes.
474 				 */
475 				next_state = RESTARTER_STATE_NONE;
476 				do_commit_states = B_TRUE;
477 			} else {
478 				/*
479 				 * Inform the restarter of our state without
480 				 * changing the STIME in the repository.
481 				 */
482 				ps = startd_alloc(sizeof (*ps));
483 				inst->ri_i.i_state = ps->ps_state = state;
484 				inst->ri_i.i_next_state = ps->ps_state_next =
485 				    next_state;
486 
487 				graph_protocol_send_event(inst->ri_i.i_fmri,
488 				    GRAPH_UPDATE_STATE_CHANGE, ps);
489 
490 				do_commit_states = B_FALSE;
491 			}
492 		}
493 	}
494 
495 	switch (libscf_get_startd_properties(scf_inst, snap, &inst->ri_flags,
496 	    &inst->ri_utmpx_prefix)) {
497 	case 0:
498 		break;
499 
500 	case ECONNABORTED:
501 		libscf_handle_rebind(h);
502 		goto rep_retry;
503 
504 	case ECANCELED:
505 		goto deleted;
506 
507 	case ENOENT:
508 		/*
509 		 * This is odd, because the graph engine should have required
510 		 * the general property group.  So we'll just use default
511 		 * flags in anticipation of the graph engine sending us
512 		 * REMOVE_INSTANCE when it finds out that the general property
513 		 * group has been deleted.
514 		 */
515 		inst->ri_flags = RINST_CONTRACT;
516 		break;
517 
518 	default:
519 		assert(0);
520 		abort();
521 	}
522 
523 	switch (libscf_get_template_values(scf_inst, snap,
524 	    &inst->ri_common_name, &inst->ri_C_common_name)) {
525 	case 0:
526 		break;
527 
528 	case ECONNABORTED:
529 		libscf_handle_rebind(h);
530 		goto rep_retry;
531 
532 	case ECANCELED:
533 		goto deleted;
534 
535 	case ECHILD:
536 	case ENOENT:
537 		break;
538 
539 	default:
540 		assert(0);
541 		abort();
542 	}
543 
544 	switch (libscf_read_method_ids(h, scf_inst, inst->ri_i.i_fmri,
545 	    &inst->ri_i.i_primary_ctid, &inst->ri_i.i_transient_ctid,
546 	    &start_pid)) {
547 	case 0:
548 		break;
549 
550 	case ECONNABORTED:
551 		libscf_handle_rebind(h);
552 		goto rep_retry;
553 
554 	case ECANCELED:
555 		goto deleted;
556 
557 	default:
558 		assert(0);
559 		abort();
560 	}
561 
562 	if (inst->ri_i.i_primary_ctid >= 1) {
563 		contract_hash_store(inst->ri_i.i_primary_ctid, inst->ri_id);
564 
565 		switch (check_contract(inst, B_TRUE, scf_inst)) {
566 		case 0:
567 			break;
568 
569 		case ECONNABORTED:
570 			libscf_handle_rebind(h);
571 			goto rep_retry;
572 
573 		case ECANCELED:
574 			goto deleted;
575 
576 		default:
577 			assert(0);
578 			abort();
579 		}
580 	}
581 
582 	if (inst->ri_i.i_transient_ctid >= 1) {
583 		switch (check_contract(inst, B_FALSE, scf_inst)) {
584 		case 0:
585 			break;
586 
587 		case ECONNABORTED:
588 			libscf_handle_rebind(h);
589 			goto rep_retry;
590 
591 		case ECANCELED:
592 			goto deleted;
593 
594 		default:
595 			assert(0);
596 			abort();
597 		}
598 	}
599 
600 	/* No more failures we live through, so add it to the list. */
601 	(void) pthread_mutex_init(&inst->ri_lock, &mutex_attrs);
602 	(void) pthread_mutex_init(&inst->ri_queue_lock, &mutex_attrs);
603 	MUTEX_LOCK(&inst->ri_lock);
604 	MUTEX_LOCK(&inst->ri_queue_lock);
605 
606 	(void) pthread_cond_init(&inst->ri_method_cv, NULL);
607 
608 	uu_list_node_init(inst, &inst->ri_link, restarter_instance_pool);
609 	uu_list_insert(instance_list.ril_instance_list, inst, idx);
610 	MUTEX_UNLOCK(&instance_list.ril_lock);
611 
612 	if (start_pid != -1 &&
613 	    (inst->ri_flags & RINST_STYLE_MASK) == RINST_WAIT) {
614 		int ret;
615 		ret = wait_register(start_pid, inst->ri_i.i_fmri, 0, 1);
616 		if (ret == -1) {
617 			/*
618 			 * Implication:  if we can't reregister the
619 			 * instance, we will start another one.  Two
620 			 * instances may or may not result in a resource
621 			 * conflict.
622 			 */
623 			log_error(LOG_WARNING,
624 			    "%s: couldn't reregister %ld for wait\n",
625 			    inst->ri_i.i_fmri, start_pid);
626 		} else if (ret == 1) {
627 			/*
628 			 * Leading PID has exited.
629 			 */
630 			(void) stop_instance(h, inst, RSTOP_EXIT);
631 		}
632 	}
633 
634 
635 	scf_pg_destroy(pg);
636 
637 	if (do_commit_states)
638 		(void) restarter_instance_update_states(h, inst, state,
639 		    next_state, RERR_NONE, NULL);
640 
641 	log_framework(LOG_DEBUG, "%s is a %s-style service\n", name,
642 	    service_style(inst->ri_flags));
643 
644 	MUTEX_UNLOCK(&inst->ri_queue_lock);
645 	MUTEX_UNLOCK(&inst->ri_lock);
646 
647 	startd_free(svc_name, max_scf_name_size);
648 	startd_free(inst_name, max_scf_name_size);
649 	scf_snapshot_destroy(snap);
650 	scf_instance_destroy(scf_inst);
651 	scf_service_destroy(scf_svc);
652 
653 	log_framework(LOG_DEBUG, "%s: inserted instance into restarter list\n",
654 	    name);
655 
656 	return (0);
657 
658 deleted:
659 	MUTEX_UNLOCK(&instance_list.ril_lock);
660 	startd_free(inst_name, max_scf_name_size);
661 	startd_free(svc_name, max_scf_name_size);
662 	if (snap != NULL)
663 		scf_snapshot_destroy(snap);
664 	scf_pg_destroy(pg);
665 	scf_instance_destroy(scf_inst);
666 	scf_service_destroy(scf_svc);
667 	startd_free((void *)inst->ri_i.i_fmri, strlen(inst->ri_i.i_fmri) + 1);
668 	uu_list_destroy(inst->ri_queue);
669 	if (inst->ri_logstem != NULL)
670 		startd_free(inst->ri_logstem, PATH_MAX);
671 	if (inst->ri_common_name != NULL)
672 		startd_free(inst->ri_common_name, max_scf_value_size);
673 	if (inst->ri_C_common_name != NULL)
674 		startd_free(inst->ri_C_common_name, max_scf_value_size);
675 	startd_free(inst->ri_utmpx_prefix, max_scf_value_size);
676 	startd_free(inst, sizeof (restarter_inst_t));
677 	return (ENOENT);
678 }
679 
680 static void
681 restarter_delete_inst(restarter_inst_t *ri)
682 {
683 	int id;
684 	restarter_inst_t *rip;
685 	void *cookie = NULL;
686 	restarter_instance_qentry_t *e;
687 
688 	assert(MUTEX_HELD(&ri->ri_lock));
689 
690 	/*
691 	 * Must drop the instance lock so we can pick up the instance_list
692 	 * lock & remove the instance.
693 	 */
694 	id = ri->ri_id;
695 	MUTEX_UNLOCK(&ri->ri_lock);
696 
697 	MUTEX_LOCK(&instance_list.ril_lock);
698 
699 	rip = uu_list_find(instance_list.ril_instance_list, &id, NULL, NULL);
700 	if (rip == NULL) {
701 		MUTEX_UNLOCK(&instance_list.ril_lock);
702 		return;
703 	}
704 
705 	assert(ri == rip);
706 
707 	uu_list_remove(instance_list.ril_instance_list, ri);
708 
709 	log_framework(LOG_DEBUG, "%s: deleted instance from restarter list\n",
710 	    ri->ri_i.i_fmri);
711 
712 	MUTEX_UNLOCK(&instance_list.ril_lock);
713 
714 	/*
715 	 * We can lock the instance without holding the instance_list lock
716 	 * since we removed the instance from the list.
717 	 */
718 	MUTEX_LOCK(&ri->ri_lock);
719 	MUTEX_LOCK(&ri->ri_queue_lock);
720 
721 	if (ri->ri_i.i_primary_ctid >= 1)
722 		contract_hash_remove(ri->ri_i.i_primary_ctid);
723 
724 	while (ri->ri_method_thread != 0 || ri->ri_method_waiters > 0)
725 		(void) pthread_cond_wait(&ri->ri_method_cv, &ri->ri_lock);
726 
727 	while ((e = uu_list_teardown(ri->ri_queue, &cookie)) != NULL)
728 		startd_free(e, sizeof (*e));
729 	uu_list_destroy(ri->ri_queue);
730 
731 	startd_free((void *)ri->ri_i.i_fmri, strlen(ri->ri_i.i_fmri) + 1);
732 	startd_free(ri->ri_logstem, PATH_MAX);
733 	if (ri->ri_common_name != NULL)
734 		startd_free(ri->ri_common_name, max_scf_value_size);
735 	if (ri->ri_C_common_name != NULL)
736 		startd_free(ri->ri_C_common_name, max_scf_value_size);
737 	startd_free(ri->ri_utmpx_prefix, max_scf_value_size);
738 	(void) pthread_mutex_destroy(&ri->ri_lock);
739 	(void) pthread_mutex_destroy(&ri->ri_queue_lock);
740 	startd_free(ri, sizeof (restarter_inst_t));
741 }
742 
743 /*
744  * instance_is_wait_style()
745  *
746  *   Returns 1 if the given instance is a "wait-style" service instance.
747  */
748 int
749 instance_is_wait_style(restarter_inst_t *inst)
750 {
751 	assert(MUTEX_HELD(&inst->ri_lock));
752 	return ((inst->ri_flags & RINST_STYLE_MASK) == RINST_WAIT);
753 }
754 
755 /*
756  * instance_is_transient_style()
757  *
758  *   Returns 1 if the given instance is a transient service instance.
759  */
760 int
761 instance_is_transient_style(restarter_inst_t *inst)
762 {
763 	assert(MUTEX_HELD(&inst->ri_lock));
764 	return ((inst->ri_flags & RINST_STYLE_MASK) == RINST_TRANSIENT);
765 }
766 
767 /*
768  * instance_in_transition()
769  * Returns 1 if instance is in transition, 0 if not
770  */
771 int
772 instance_in_transition(restarter_inst_t *inst)
773 {
774 	assert(MUTEX_HELD(&inst->ri_lock));
775 	if (inst->ri_i.i_next_state == RESTARTER_STATE_NONE)
776 		return (0);
777 	return (1);
778 }
779 
780 /*
781  * returns 1 if instance is already started, 0 if not
782  */
783 static int
784 instance_started(restarter_inst_t *inst)
785 {
786 	int ret;
787 
788 	assert(MUTEX_HELD(&inst->ri_lock));
789 
790 	if (inst->ri_i.i_state == RESTARTER_STATE_ONLINE ||
791 	    inst->ri_i.i_state == RESTARTER_STATE_DEGRADED)
792 		ret = 1;
793 	else
794 		ret = 0;
795 
796 	return (ret);
797 }
798 
799 /*
800  * Returns
801  *   0 - success
802  *   ECONNRESET - success, but h was rebound
803  */
804 int
805 restarter_instance_update_states(scf_handle_t *h, restarter_inst_t *ri,
806     restarter_instance_state_t new_state,
807     restarter_instance_state_t new_state_next, restarter_error_t err, char *aux)
808 {
809 	protocol_states_t *states;
810 	int e;
811 	uint_t retry_count = 0, msecs = ALLOC_DELAY;
812 	boolean_t rebound = B_FALSE;
813 	int prev_state_online;
814 	int state_online;
815 
816 	assert(MUTEX_HELD(&ri->ri_lock));
817 
818 	prev_state_online = instance_started(ri);
819 
820 retry:
821 	e = _restarter_commit_states(h, &ri->ri_i, new_state, new_state_next,
822 	    aux);
823 	switch (e) {
824 	case 0:
825 		break;
826 
827 	case ENOMEM:
828 		++retry_count;
829 		if (retry_count < ALLOC_RETRY) {
830 			(void) poll(NULL, 0, msecs);
831 			msecs *= ALLOC_DELAY_MULT;
832 			goto retry;
833 		}
834 
835 		/* Like startd_alloc(). */
836 		uu_die("Insufficient memory.\n");
837 		/* NOTREACHED */
838 
839 	case ECONNABORTED:
840 		libscf_handle_rebind(h);
841 		rebound = B_TRUE;
842 		goto retry;
843 
844 	case EPERM:
845 	case EACCES:
846 	case EROFS:
847 		log_error(LOG_NOTICE, "Could not commit state change for %s "
848 		    "to repository: %s.\n", ri->ri_i.i_fmri, strerror(e));
849 		/* FALLTHROUGH */
850 
851 	case ENOENT:
852 		ri->ri_i.i_state = new_state;
853 		ri->ri_i.i_next_state = new_state_next;
854 		break;
855 
856 	case EINVAL:
857 	default:
858 		bad_error("_restarter_commit_states", e);
859 	}
860 
861 	states = startd_alloc(sizeof (protocol_states_t));
862 	states->ps_state = new_state;
863 	states->ps_state_next = new_state_next;
864 	states->ps_err = err;
865 	graph_protocol_send_event(ri->ri_i.i_fmri, GRAPH_UPDATE_STATE_CHANGE,
866 	    (void *)states);
867 
868 	state_online = instance_started(ri);
869 
870 	if (prev_state_online && !state_online)
871 		ri->ri_post_offline_hook();
872 	else if (!prev_state_online && state_online)
873 		ri->ri_post_online_hook();
874 
875 	return (rebound ? ECONNRESET : 0);
876 }
877 
878 void
879 restarter_mark_pending_snapshot(const char *fmri, uint_t flag)
880 {
881 	restarter_inst_t *inst;
882 
883 	assert(flag == RINST_RETAKE_RUNNING || flag == RINST_RETAKE_START);
884 
885 	inst = inst_lookup_by_name(fmri);
886 	if (inst == NULL)
887 		return;
888 
889 	inst->ri_flags |= flag;
890 
891 	MUTEX_UNLOCK(&inst->ri_lock);
892 }
893 
894 static void
895 restarter_take_pending_snapshots(scf_handle_t *h)
896 {
897 	restarter_inst_t *inst;
898 	int r;
899 
900 	MUTEX_LOCK(&instance_list.ril_lock);
901 
902 	for (inst = uu_list_first(instance_list.ril_instance_list);
903 	    inst != NULL;
904 	    inst = uu_list_next(instance_list.ril_instance_list, inst)) {
905 		const char *fmri;
906 		scf_instance_t *sinst = NULL;
907 
908 		MUTEX_LOCK(&inst->ri_lock);
909 
910 		/*
911 		 * This is where we'd check inst->ri_method_thread and if it
912 		 * were nonzero we'd wait in anticipation of another thread
913 		 * executing a method for inst.  Doing so with the instance_list
914 		 * locked, though, leads to deadlock.  Since taking a snapshot
915 		 * during that window won't hurt anything, we'll just continue.
916 		 */
917 
918 		fmri = inst->ri_i.i_fmri;
919 
920 		if (inst->ri_flags & RINST_RETAKE_RUNNING) {
921 			scf_snapshot_t *rsnap;
922 
923 			(void) libscf_fmri_get_instance(h, fmri, &sinst);
924 
925 			rsnap = libscf_get_or_make_running_snapshot(sinst,
926 			    fmri, B_FALSE);
927 
928 			scf_instance_destroy(sinst);
929 
930 			if (rsnap != NULL)
931 				inst->ri_flags &= ~RINST_RETAKE_RUNNING;
932 
933 			scf_snapshot_destroy(rsnap);
934 		}
935 
936 		if (inst->ri_flags & RINST_RETAKE_START) {
937 			switch (r = libscf_snapshots_poststart(h, fmri,
938 			    B_FALSE)) {
939 			case 0:
940 			case ENOENT:
941 				inst->ri_flags &= ~RINST_RETAKE_START;
942 				break;
943 
944 			case ECONNABORTED:
945 				break;
946 
947 			case EACCES:
948 			default:
949 				bad_error("libscf_snapshots_poststart", r);
950 			}
951 		}
952 
953 		MUTEX_UNLOCK(&inst->ri_lock);
954 	}
955 
956 	MUTEX_UNLOCK(&instance_list.ril_lock);
957 }
958 
959 /* ARGSUSED */
960 void *
961 restarter_post_fsminimal_thread(void *unused)
962 {
963 	scf_handle_t *h;
964 	int r;
965 
966 	h = libscf_handle_create_bound_loop();
967 
968 	for (;;) {
969 		r = libscf_create_self(h);
970 		if (r == 0)
971 			break;
972 
973 		assert(r == ECONNABORTED);
974 		libscf_handle_rebind(h);
975 	}
976 
977 	restarter_take_pending_snapshots(h);
978 
979 	(void) scf_handle_unbind(h);
980 	scf_handle_destroy(h);
981 
982 	return (NULL);
983 }
984 
985 /*
986  * int stop_instance()
987  *
988  *   Stop the instance identified by the instance given as the second argument,
989  *   for the cause stated.
990  *
991  *   Returns
992  *     0 - success
993  *     -1 - inst is in transition
994  */
995 static int
996 stop_instance(scf_handle_t *local_handle, restarter_inst_t *inst,
997     stop_cause_t cause)
998 {
999 	fork_info_t *info;
1000 	const char *cp;
1001 	int err;
1002 	restarter_error_t re;
1003 
1004 	assert(MUTEX_HELD(&inst->ri_lock));
1005 	assert(inst->ri_method_thread == 0);
1006 
1007 	switch (cause) {
1008 	case RSTOP_EXIT:
1009 		re = RERR_RESTART;
1010 		cp = "all processes in service exited";
1011 		break;
1012 	case RSTOP_CORE:
1013 		re = RERR_FAULT;
1014 		cp = "process dumped core";
1015 		break;
1016 	case RSTOP_SIGNAL:
1017 		re = RERR_FAULT;
1018 		cp = "process received fatal signal from outside the service";
1019 		break;
1020 	case RSTOP_HWERR:
1021 		re = RERR_FAULT;
1022 		cp = "process killed due to uncorrectable hardware error";
1023 		break;
1024 	case RSTOP_DEPENDENCY:
1025 		re = RERR_RESTART;
1026 		cp = "dependency activity requires stop";
1027 		break;
1028 	case RSTOP_DISABLE:
1029 		re = RERR_RESTART;
1030 		cp = "service disabled";
1031 		break;
1032 	case RSTOP_RESTART:
1033 		re = RERR_RESTART;
1034 		cp = "service restarting";
1035 		break;
1036 	default:
1037 #ifndef NDEBUG
1038 		(void) fprintf(stderr, "Unknown cause %d at %s:%d.\n",
1039 		    cause, __FILE__, __LINE__);
1040 #endif
1041 		abort();
1042 	}
1043 
1044 	/* Services in the disabled and maintenance state are ignored */
1045 	if (inst->ri_i.i_state == RESTARTER_STATE_MAINT ||
1046 	    inst->ri_i.i_state == RESTARTER_STATE_DISABLED) {
1047 		log_framework(LOG_DEBUG,
1048 		    "%s: stop_instance -> is maint/disabled\n",
1049 		    inst->ri_i.i_fmri);
1050 		return (0);
1051 	}
1052 
1053 	/* Already stopped instances are left alone */
1054 	if (instance_started(inst) == 0) {
1055 		log_framework(LOG_DEBUG, "Restarter: %s is already stopped.\n",
1056 		    inst->ri_i.i_fmri);
1057 		return (0);
1058 	}
1059 
1060 	if (instance_in_transition(inst)) {
1061 		/* requeue event by returning -1 */
1062 		log_framework(LOG_DEBUG,
1063 		    "Restarter: Not stopping %s, in transition.\n",
1064 		    inst->ri_i.i_fmri);
1065 		return (-1);
1066 	}
1067 
1068 	log_instance(inst, B_TRUE, "Stopping because %s.", cp);
1069 
1070 	log_framework(re == RERR_FAULT ? LOG_INFO : LOG_DEBUG,
1071 	    "%s: Instance stopping because %s.\n", inst->ri_i.i_fmri, cp);
1072 
1073 	if (instance_is_wait_style(inst) && cause == RSTOP_EXIT) {
1074 		/*
1075 		 * No need to stop instance, as child has exited; remove
1076 		 * contract and move the instance to the offline state.
1077 		 */
1078 		switch (err = restarter_instance_update_states(local_handle,
1079 		    inst, inst->ri_i.i_state, RESTARTER_STATE_OFFLINE, re,
1080 		    NULL)) {
1081 		case 0:
1082 		case ECONNRESET:
1083 			break;
1084 
1085 		default:
1086 			bad_error("restarter_instance_update_states", err);
1087 		}
1088 
1089 		(void) update_fault_count(inst, FAULT_COUNT_RESET);
1090 
1091 		if (inst->ri_i.i_primary_ctid != 0) {
1092 			inst->ri_m_inst =
1093 			    safe_scf_instance_create(local_handle);
1094 			inst->ri_mi_deleted = B_FALSE;
1095 
1096 			libscf_reget_instance(inst);
1097 			method_remove_contract(inst, B_TRUE, B_TRUE);
1098 
1099 			scf_instance_destroy(inst->ri_m_inst);
1100 			inst->ri_m_inst = NULL;
1101 		}
1102 
1103 		switch (err = restarter_instance_update_states(local_handle,
1104 		    inst, inst->ri_i.i_next_state, RESTARTER_STATE_NONE, re,
1105 		    NULL)) {
1106 		case 0:
1107 		case ECONNRESET:
1108 			break;
1109 
1110 		default:
1111 			bad_error("restarter_instance_update_states", err);
1112 		}
1113 
1114 		return (0);
1115 	} else if (instance_is_wait_style(inst) && re == RERR_RESTART) {
1116 		/*
1117 		 * Stopping a wait service through means other than the pid
1118 		 * exiting should keep wait_thread() from restarting the
1119 		 * service, by removing it from the wait list.
1120 		 * We cannot remove it right now otherwise the process will
1121 		 * end up <defunct> so mark it to be ignored.
1122 		 */
1123 		wait_ignore_by_fmri(inst->ri_i.i_fmri);
1124 	}
1125 
1126 	switch (err = restarter_instance_update_states(local_handle, inst,
1127 	    inst->ri_i.i_state, inst->ri_i.i_enabled ? RESTARTER_STATE_OFFLINE :
1128 	    RESTARTER_STATE_DISABLED, RERR_NONE, NULL)) {
1129 	case 0:
1130 	case ECONNRESET:
1131 		break;
1132 
1133 	default:
1134 		bad_error("restarter_instance_update_states", err);
1135 	}
1136 
1137 	info = startd_zalloc(sizeof (fork_info_t));
1138 
1139 	info->sf_id = inst->ri_id;
1140 	info->sf_method_type = METHOD_STOP;
1141 	info->sf_event_type = re;
1142 	inst->ri_method_thread = startd_thread_create(method_thread, info);
1143 
1144 	return (0);
1145 }
1146 
1147 /*
1148  * Returns
1149  *   ENOENT - fmri is not in instance_list
1150  *   0 - success
1151  *   ECONNRESET - success, though handle was rebound
1152  *   -1 - instance is in transition
1153  */
1154 int
1155 stop_instance_fmri(scf_handle_t *h, const char *fmri, uint_t flags)
1156 {
1157 	restarter_inst_t *rip;
1158 	int r;
1159 
1160 	rip = inst_lookup_by_name(fmri);
1161 	if (rip == NULL)
1162 		return (ENOENT);
1163 
1164 	r = stop_instance(h, rip, flags);
1165 
1166 	MUTEX_UNLOCK(&rip->ri_lock);
1167 
1168 	return (r);
1169 }
1170 
1171 static void
1172 unmaintain_instance(scf_handle_t *h, restarter_inst_t *rip,
1173     unmaint_cause_t cause)
1174 {
1175 	ctid_t ctid;
1176 	scf_instance_t *inst;
1177 	int r;
1178 	uint_t tries = 0, msecs = ALLOC_DELAY;
1179 	const char *cp;
1180 
1181 	assert(MUTEX_HELD(&rip->ri_lock));
1182 
1183 	if (rip->ri_i.i_state != RESTARTER_STATE_MAINT) {
1184 		log_error(LOG_DEBUG, "Restarter: "
1185 		    "Ignoring maintenance off command because %s is not in the "
1186 		    "maintenance state.\n", rip->ri_i.i_fmri);
1187 		return;
1188 	}
1189 
1190 	switch (cause) {
1191 	case RUNMAINT_CLEAR:
1192 		cp = "clear requested";
1193 		break;
1194 	case RUNMAINT_DISABLE:
1195 		cp = "disable requested";
1196 		break;
1197 	default:
1198 #ifndef NDEBUG
1199 		(void) fprintf(stderr, "Uncaught case for %d at %s:%d.\n",
1200 		    cause, __FILE__, __LINE__);
1201 #endif
1202 		abort();
1203 	}
1204 
1205 	log_instance(rip, B_TRUE, "Leaving maintenance because %s.",
1206 	    cp);
1207 	log_framework(LOG_DEBUG, "%s: Instance leaving maintenance because "
1208 	    "%s.\n", rip->ri_i.i_fmri, cp);
1209 
1210 	(void) restarter_instance_update_states(h, rip, RESTARTER_STATE_UNINIT,
1211 	    RESTARTER_STATE_NONE, RERR_RESTART, "none");
1212 
1213 	/*
1214 	 * If we did ADMIN_MAINT_ON_IMMEDIATE, then there might still be
1215 	 * a primary contract.
1216 	 */
1217 	if (rip->ri_i.i_primary_ctid == 0)
1218 		return;
1219 
1220 	ctid = rip->ri_i.i_primary_ctid;
1221 	contract_abandon(ctid);
1222 	rip->ri_i.i_primary_ctid = 0;
1223 
1224 rep_retry:
1225 	switch (r = libscf_fmri_get_instance(h, rip->ri_i.i_fmri, &inst)) {
1226 	case 0:
1227 		break;
1228 
1229 	case ECONNABORTED:
1230 		libscf_handle_rebind(h);
1231 		goto rep_retry;
1232 
1233 	case ENOENT:
1234 		/* Must have been deleted. */
1235 		return;
1236 
1237 	case EINVAL:
1238 	case ENOTSUP:
1239 	default:
1240 		bad_error("libscf_handle_rebind", r);
1241 	}
1242 
1243 again:
1244 	r = restarter_remove_contract(inst, ctid, RESTARTER_CONTRACT_PRIMARY);
1245 	switch (r) {
1246 	case 0:
1247 		break;
1248 
1249 	case ENOMEM:
1250 		++tries;
1251 		if (tries < ALLOC_RETRY) {
1252 			(void) poll(NULL, 0, msecs);
1253 			msecs *= ALLOC_DELAY_MULT;
1254 			goto again;
1255 		}
1256 
1257 		uu_die("Insufficient memory.\n");
1258 		/* NOTREACHED */
1259 
1260 	case ECONNABORTED:
1261 		scf_instance_destroy(inst);
1262 		libscf_handle_rebind(h);
1263 		goto rep_retry;
1264 
1265 	case ECANCELED:
1266 		break;
1267 
1268 	case EPERM:
1269 	case EACCES:
1270 	case EROFS:
1271 		log_error(LOG_INFO,
1272 		    "Could not remove contract id %lu for %s (%s).\n", ctid,
1273 		    rip->ri_i.i_fmri, strerror(r));
1274 		break;
1275 
1276 	case EINVAL:
1277 	case EBADF:
1278 	default:
1279 		bad_error("restarter_remove_contract", r);
1280 	}
1281 
1282 	scf_instance_destroy(inst);
1283 }
1284 
1285 /*
1286  * enable_inst()
1287  *   Set inst->ri_i.i_enabled.  Expects 'e' to be _ENABLE, _DISABLE, or
1288  *   _ADMIN_DISABLE.  If the event is _ENABLE and inst is uninitialized or
1289  *   disabled, move it to offline.  If the event is _DISABLE or
1290  *   _ADMIN_DISABLE, make sure inst will move to disabled.
1291  *
1292  *   Returns
1293  *     0 - success
1294  *     ECONNRESET - h was rebound
1295  */
1296 static int
1297 enable_inst(scf_handle_t *h, restarter_inst_t *inst, restarter_event_type_t e)
1298 {
1299 	restarter_instance_state_t state;
1300 	int r;
1301 
1302 	assert(MUTEX_HELD(&inst->ri_lock));
1303 	assert(e == RESTARTER_EVENT_TYPE_ADMIN_DISABLE ||
1304 	    e == RESTARTER_EVENT_TYPE_DISABLE ||
1305 	    e == RESTARTER_EVENT_TYPE_ENABLE);
1306 	assert(instance_in_transition(inst) == 0);
1307 
1308 	state = inst->ri_i.i_state;
1309 
1310 	if (e == RESTARTER_EVENT_TYPE_ENABLE) {
1311 		inst->ri_i.i_enabled = 1;
1312 
1313 		if (state == RESTARTER_STATE_UNINIT ||
1314 		    state == RESTARTER_STATE_DISABLED) {
1315 			/*
1316 			 * B_FALSE: Don't log an error if the log_instance()
1317 			 * fails because it will fail on the miniroot before
1318 			 * install-discovery runs.
1319 			 */
1320 			log_instance(inst, B_FALSE, "Enabled.");
1321 			log_framework(LOG_DEBUG, "%s: Instance enabled.\n",
1322 			    inst->ri_i.i_fmri);
1323 			(void) restarter_instance_update_states(h, inst,
1324 			    RESTARTER_STATE_OFFLINE, RESTARTER_STATE_NONE,
1325 			    RERR_NONE, NULL);
1326 		} else {
1327 			log_framework(LOG_DEBUG, "Restarter: "
1328 			    "Not changing state of %s for enable command.\n",
1329 			    inst->ri_i.i_fmri);
1330 		}
1331 	} else {
1332 		inst->ri_i.i_enabled = 0;
1333 
1334 		switch (state) {
1335 		case RESTARTER_STATE_ONLINE:
1336 		case RESTARTER_STATE_DEGRADED:
1337 			r = stop_instance(h, inst, RSTOP_DISABLE);
1338 			return (r == ECONNRESET ? 0 : r);
1339 
1340 		case RESTARTER_STATE_OFFLINE:
1341 		case RESTARTER_STATE_UNINIT:
1342 			if (inst->ri_i.i_primary_ctid != 0) {
1343 				inst->ri_m_inst = safe_scf_instance_create(h);
1344 				inst->ri_mi_deleted = B_FALSE;
1345 
1346 				libscf_reget_instance(inst);
1347 				method_remove_contract(inst, B_TRUE, B_TRUE);
1348 
1349 				scf_instance_destroy(inst->ri_m_inst);
1350 			}
1351 			/* B_FALSE: See log_instance(..., "Enabled."); above */
1352 			log_instance(inst, B_FALSE, "Disabled.");
1353 			log_framework(LOG_DEBUG, "%s: Instance disabled.\n",
1354 			    inst->ri_i.i_fmri);
1355 			(void) restarter_instance_update_states(h, inst,
1356 			    RESTARTER_STATE_DISABLED, RESTARTER_STATE_NONE,
1357 			    RERR_RESTART, NULL);
1358 			return (0);
1359 
1360 		case RESTARTER_STATE_DISABLED:
1361 			break;
1362 
1363 		case RESTARTER_STATE_MAINT:
1364 			/*
1365 			 * We only want to pull the instance out of maintenance
1366 			 * if the disable is on adminstrative request.  The
1367 			 * graph engine sends _DISABLE events whenever a
1368 			 * service isn't in the disabled state, and we don't
1369 			 * want to pull the service out of maintenance if,
1370 			 * for example, it is there due to a dependency cycle.
1371 			 */
1372 			if (e == RESTARTER_EVENT_TYPE_ADMIN_DISABLE)
1373 				unmaintain_instance(h, inst, RUNMAINT_DISABLE);
1374 			break;
1375 
1376 		default:
1377 #ifndef NDEBUG
1378 			(void) fprintf(stderr, "Restarter instance %s has "
1379 			    "unknown state %d.\n", inst->ri_i.i_fmri, state);
1380 #endif
1381 			abort();
1382 		}
1383 	}
1384 
1385 	return (0);
1386 }
1387 
1388 static void
1389 start_instance(scf_handle_t *local_handle, restarter_inst_t *inst)
1390 {
1391 	fork_info_t *info;
1392 
1393 	assert(MUTEX_HELD(&inst->ri_lock));
1394 	assert(instance_in_transition(inst) == 0);
1395 	assert(inst->ri_method_thread == 0);
1396 
1397 	log_framework(LOG_DEBUG, "%s: trying to start instance\n",
1398 	    inst->ri_i.i_fmri);
1399 
1400 	/* Services in the disabled and maintenance state are ignored */
1401 	if (inst->ri_i.i_state == RESTARTER_STATE_MAINT ||
1402 	    inst->ri_i.i_state == RESTARTER_STATE_DISABLED ||
1403 	    inst->ri_i.i_enabled == 0) {
1404 		log_framework(LOG_DEBUG,
1405 		    "%s: start_instance -> is maint/disabled\n",
1406 		    inst->ri_i.i_fmri);
1407 		return;
1408 	}
1409 
1410 	/* Already started instances are left alone */
1411 	if (instance_started(inst) == 1) {
1412 		log_framework(LOG_DEBUG,
1413 		    "%s: start_instance -> is already started\n",
1414 		    inst->ri_i.i_fmri);
1415 		return;
1416 	}
1417 
1418 	log_framework(LOG_DEBUG, "%s: starting instance.\n", inst->ri_i.i_fmri);
1419 
1420 	(void) restarter_instance_update_states(local_handle, inst,
1421 	    inst->ri_i.i_state, RESTARTER_STATE_ONLINE, RERR_NONE, "none");
1422 
1423 	info = startd_zalloc(sizeof (fork_info_t));
1424 
1425 	info->sf_id = inst->ri_id;
1426 	info->sf_method_type = METHOD_START;
1427 	info->sf_event_type = RERR_NONE;
1428 	inst->ri_method_thread = startd_thread_create(method_thread, info);
1429 }
1430 
1431 static int
1432 event_from_tty(scf_handle_t *h, restarter_inst_t *rip)
1433 {
1434 	scf_instance_t *inst;
1435 	int ret = 0;
1436 
1437 	if (libscf_fmri_get_instance(h, rip->ri_i.i_fmri, &inst))
1438 		return (-1);
1439 
1440 	ret = restarter_inst_ractions_from_tty(inst);
1441 
1442 	scf_instance_destroy(inst);
1443 	return (ret);
1444 }
1445 
1446 static void
1447 maintain_instance(scf_handle_t *h, restarter_inst_t *rip, int immediate,
1448     const char *aux)
1449 {
1450 	fork_info_t *info;
1451 	scf_instance_t *scf_inst = NULL;
1452 
1453 	assert(MUTEX_HELD(&rip->ri_lock));
1454 	assert(aux != NULL);
1455 	assert(rip->ri_method_thread == 0);
1456 
1457 	log_instance(rip, B_TRUE, "Stopping for maintenance due to %s.", aux);
1458 	log_framework(LOG_DEBUG, "%s: stopping for maintenance due to %s.\n",
1459 	    rip->ri_i.i_fmri, aux);
1460 
1461 	/* Services in the maintenance state are ignored */
1462 	if (rip->ri_i.i_state == RESTARTER_STATE_MAINT) {
1463 		log_framework(LOG_DEBUG,
1464 		    "%s: maintain_instance -> is already in maintenance\n",
1465 		    rip->ri_i.i_fmri);
1466 		return;
1467 	}
1468 
1469 	/*
1470 	 * If aux state is "service_request" and
1471 	 * restarter_actions/auxiliary_fmri property is set with a valid fmri,
1472 	 * copy the fmri to restarter/auxiliary_fmri so svcs -x can use.
1473 	 */
1474 	if (strcmp(aux, "service_request") == 0 && libscf_fmri_get_instance(h,
1475 	    rip->ri_i.i_fmri, &scf_inst) == 0) {
1476 		if (restarter_inst_validate_ractions_aux_fmri(scf_inst) == 0) {
1477 			if (restarter_inst_set_aux_fmri(scf_inst))
1478 				log_framework(LOG_DEBUG, "%s: "
1479 				    "restarter_inst_set_aux_fmri failed: ",
1480 				    rip->ri_i.i_fmri);
1481 		} else {
1482 			log_framework(LOG_DEBUG, "%s: "
1483 			    "restarter_inst_validate_ractions_aux_fmri "
1484 			    "failed: ", rip->ri_i.i_fmri);
1485 
1486 			if (restarter_inst_reset_aux_fmri(scf_inst))
1487 				log_framework(LOG_DEBUG, "%s: "
1488 				    "restarter_inst_reset_aux_fmri failed: ",
1489 				    rip->ri_i.i_fmri);
1490 		}
1491 		scf_instance_destroy(scf_inst);
1492 	}
1493 
1494 	if (immediate || !instance_started(rip)) {
1495 		if (rip->ri_i.i_primary_ctid != 0) {
1496 			rip->ri_m_inst = safe_scf_instance_create(h);
1497 			rip->ri_mi_deleted = B_FALSE;
1498 
1499 			libscf_reget_instance(rip);
1500 			method_remove_contract(rip, B_TRUE, B_TRUE);
1501 
1502 			scf_instance_destroy(rip->ri_m_inst);
1503 		}
1504 
1505 		(void) restarter_instance_update_states(h, rip,
1506 		    RESTARTER_STATE_MAINT, RESTARTER_STATE_NONE, RERR_RESTART,
1507 		    (char *)aux);
1508 		return;
1509 	}
1510 
1511 	(void) restarter_instance_update_states(h, rip, rip->ri_i.i_state,
1512 	    RESTARTER_STATE_MAINT, RERR_NONE, (char *)aux);
1513 
1514 	log_transition(rip, MAINT_REQUESTED);
1515 
1516 	info = startd_zalloc(sizeof (*info));
1517 	info->sf_id = rip->ri_id;
1518 	info->sf_method_type = METHOD_STOP;
1519 	info->sf_event_type = RERR_RESTART;
1520 	rip->ri_method_thread = startd_thread_create(method_thread, info);
1521 }
1522 
1523 static void
1524 refresh_instance(scf_handle_t *h, restarter_inst_t *rip)
1525 {
1526 	scf_instance_t *inst;
1527 	scf_snapshot_t *snap;
1528 	fork_info_t *info;
1529 	int r;
1530 
1531 	assert(MUTEX_HELD(&rip->ri_lock));
1532 
1533 	log_instance(rip, B_TRUE, "Rereading configuration.");
1534 	log_framework(LOG_DEBUG, "%s: rereading configuration.\n",
1535 	    rip->ri_i.i_fmri);
1536 
1537 rep_retry:
1538 	r = libscf_fmri_get_instance(h, rip->ri_i.i_fmri, &inst);
1539 	switch (r) {
1540 	case 0:
1541 		break;
1542 
1543 	case ECONNABORTED:
1544 		libscf_handle_rebind(h);
1545 		goto rep_retry;
1546 
1547 	case ENOENT:
1548 		/* Must have been deleted. */
1549 		return;
1550 
1551 	case EINVAL:
1552 	case ENOTSUP:
1553 	default:
1554 		bad_error("libscf_fmri_get_instance", r);
1555 	}
1556 
1557 	snap = libscf_get_running_snapshot(inst);
1558 
1559 	r = libscf_get_startd_properties(inst, snap, &rip->ri_flags,
1560 	    &rip->ri_utmpx_prefix);
1561 	switch (r) {
1562 	case 0:
1563 		log_framework(LOG_DEBUG, "%s is a %s-style service\n",
1564 		    rip->ri_i.i_fmri, service_style(rip->ri_flags));
1565 		break;
1566 
1567 	case ECONNABORTED:
1568 		scf_instance_destroy(inst);
1569 		scf_snapshot_destroy(snap);
1570 		libscf_handle_rebind(h);
1571 		goto rep_retry;
1572 
1573 	case ECANCELED:
1574 	case ENOENT:
1575 		/* Succeed in anticipation of REMOVE_INSTANCE. */
1576 		break;
1577 
1578 	default:
1579 		bad_error("libscf_get_startd_properties", r);
1580 	}
1581 
1582 	if (instance_started(rip)) {
1583 		/* Refresh does not change the state. */
1584 		(void) restarter_instance_update_states(h, rip,
1585 		    rip->ri_i.i_state, rip->ri_i.i_state, RERR_NONE, NULL);
1586 
1587 		info = startd_zalloc(sizeof (*info));
1588 		info->sf_id = rip->ri_id;
1589 		info->sf_method_type = METHOD_REFRESH;
1590 		info->sf_event_type = RERR_REFRESH;
1591 
1592 		assert(rip->ri_method_thread == 0);
1593 		rip->ri_method_thread =
1594 		    startd_thread_create(method_thread, info);
1595 	}
1596 
1597 	scf_snapshot_destroy(snap);
1598 	scf_instance_destroy(inst);
1599 }
1600 
1601 const char *event_names[] = { "INVALID", "ADD_INSTANCE", "REMOVE_INSTANCE",
1602 	"ENABLE", "DISABLE", "ADMIN_DEGRADED", "ADMIN_REFRESH",
1603 	"ADMIN_RESTART", "ADMIN_MAINT_OFF", "ADMIN_MAINT_ON",
1604 	"ADMIN_MAINT_ON_IMMEDIATE", "STOP", "START", "DEPENDENCY_CYCLE",
1605 	"INVALID_DEPENDENCY", "ADMIN_DISABLE"
1606 };
1607 
1608 /*
1609  * void *restarter_process_events()
1610  *
1611  *   Called in a separate thread to process the events on an instance's
1612  *   queue.  Empties the queue completely, and tries to keep the thread
1613  *   around for a little while after the queue is empty to save on
1614  *   startup costs.
1615  */
1616 static void *
1617 restarter_process_events(void *arg)
1618 {
1619 	scf_handle_t *h;
1620 	restarter_instance_qentry_t *event;
1621 	restarter_inst_t *rip;
1622 	char *fmri = (char *)arg;
1623 	struct timespec to;
1624 
1625 	assert(fmri != NULL);
1626 
1627 	h = libscf_handle_create_bound_loop();
1628 
1629 	/* grab the queue lock */
1630 	rip = inst_lookup_queue(fmri);
1631 	if (rip == NULL)
1632 		goto out;
1633 
1634 again:
1635 
1636 	while ((event = uu_list_first(rip->ri_queue)) != NULL) {
1637 		restarter_inst_t *inst;
1638 
1639 		/* drop the queue lock */
1640 		MUTEX_UNLOCK(&rip->ri_queue_lock);
1641 
1642 		/*
1643 		 * Grab the inst lock -- this waits until any outstanding
1644 		 * method finishes running.
1645 		 */
1646 		inst = inst_lookup_by_name(fmri);
1647 		if (inst == NULL) {
1648 			/* Getting deleted in the middle isn't an error. */
1649 			goto cont;
1650 		}
1651 
1652 		assert(instance_in_transition(inst) == 0);
1653 
1654 		/* process the event */
1655 		switch (event->riq_type) {
1656 		case RESTARTER_EVENT_TYPE_ENABLE:
1657 		case RESTARTER_EVENT_TYPE_DISABLE:
1658 			(void) enable_inst(h, inst, event->riq_type);
1659 			break;
1660 
1661 		case RESTARTER_EVENT_TYPE_ADMIN_DISABLE:
1662 			if (enable_inst(h, inst, event->riq_type) == 0)
1663 				reset_start_times(inst);
1664 			break;
1665 
1666 		case RESTARTER_EVENT_TYPE_REMOVE_INSTANCE:
1667 			restarter_delete_inst(inst);
1668 			inst = NULL;
1669 			goto cont;
1670 
1671 		case RESTARTER_EVENT_TYPE_STOP_RESET:
1672 			reset_start_times(inst);
1673 			/* FALLTHROUGH */
1674 		case RESTARTER_EVENT_TYPE_STOP:
1675 			(void) stop_instance(h, inst, RSTOP_DEPENDENCY);
1676 			break;
1677 
1678 		case RESTARTER_EVENT_TYPE_START:
1679 			start_instance(h, inst);
1680 			break;
1681 
1682 		case RESTARTER_EVENT_TYPE_DEPENDENCY_CYCLE:
1683 			maintain_instance(h, inst, 0, "dependency_cycle");
1684 			break;
1685 
1686 		case RESTARTER_EVENT_TYPE_INVALID_DEPENDENCY:
1687 			maintain_instance(h, inst, 0, "invalid_dependency");
1688 			break;
1689 
1690 		case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON:
1691 			if (event_from_tty(h, inst) == 0)
1692 				maintain_instance(h, inst, 0,
1693 				    "service_request");
1694 			else
1695 				maintain_instance(h, inst, 0,
1696 				    "administrative_request");
1697 			break;
1698 
1699 		case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON_IMMEDIATE:
1700 			if (event_from_tty(h, inst) == 0)
1701 				maintain_instance(h, inst, 1,
1702 				    "service_request");
1703 			else
1704 				maintain_instance(h, inst, 1,
1705 				    "administrative_request");
1706 			break;
1707 
1708 		case RESTARTER_EVENT_TYPE_ADMIN_MAINT_OFF:
1709 			unmaintain_instance(h, inst, RUNMAINT_CLEAR);
1710 			break;
1711 
1712 		case RESTARTER_EVENT_TYPE_ADMIN_REFRESH:
1713 			refresh_instance(h, inst);
1714 			break;
1715 
1716 		case RESTARTER_EVENT_TYPE_ADMIN_DEGRADED:
1717 			log_framework(LOG_WARNING, "Restarter: "
1718 			    "%s command (for %s) unimplemented.\n",
1719 			    event_names[event->riq_type], inst->ri_i.i_fmri);
1720 			break;
1721 
1722 		case RESTARTER_EVENT_TYPE_ADMIN_RESTART:
1723 			if (!instance_started(inst)) {
1724 				log_framework(LOG_DEBUG, "Restarter: "
1725 				    "Not restarting %s; not running.\n",
1726 				    inst->ri_i.i_fmri);
1727 			} else {
1728 				/*
1729 				 * Stop the instance.  If it can be restarted,
1730 				 * the graph engine will send a new event.
1731 				 */
1732 				if (stop_instance(h, inst, RSTOP_RESTART) == 0)
1733 					reset_start_times(inst);
1734 			}
1735 			break;
1736 
1737 		case RESTARTER_EVENT_TYPE_ADD_INSTANCE:
1738 		default:
1739 #ifndef NDEBUG
1740 			uu_warn("%s:%d: Bad restarter event %d.  "
1741 			    "Aborting.\n", __FILE__, __LINE__, event->riq_type);
1742 #endif
1743 			abort();
1744 		}
1745 
1746 		assert(inst != NULL);
1747 		MUTEX_UNLOCK(&inst->ri_lock);
1748 
1749 cont:
1750 		/* grab the queue lock */
1751 		rip = inst_lookup_queue(fmri);
1752 		if (rip == NULL)
1753 			goto out;
1754 
1755 		/* delete the event */
1756 		uu_list_remove(rip->ri_queue, event);
1757 		startd_free(event, sizeof (restarter_instance_qentry_t));
1758 	}
1759 
1760 	assert(rip != NULL);
1761 
1762 	/*
1763 	 * Try to preserve the thread for a little while for future use.
1764 	 */
1765 	to.tv_sec = 3;
1766 	to.tv_nsec = 0;
1767 	(void) pthread_cond_reltimedwait_np(&rip->ri_queue_cv,
1768 	    &rip->ri_queue_lock, &to);
1769 
1770 	if (uu_list_first(rip->ri_queue) != NULL)
1771 		goto again;
1772 
1773 	rip->ri_queue_thread = 0;
1774 	MUTEX_UNLOCK(&rip->ri_queue_lock);
1775 out:
1776 	(void) scf_handle_unbind(h);
1777 	scf_handle_destroy(h);
1778 	free(fmri);
1779 	return (NULL);
1780 }
1781 
1782 static int
1783 is_admin_event(restarter_event_type_t t) {
1784 
1785 	switch (t) {
1786 	case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON:
1787 	case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON_IMMEDIATE:
1788 	case RESTARTER_EVENT_TYPE_ADMIN_MAINT_OFF:
1789 	case RESTARTER_EVENT_TYPE_ADMIN_REFRESH:
1790 	case RESTARTER_EVENT_TYPE_ADMIN_DEGRADED:
1791 	case RESTARTER_EVENT_TYPE_ADMIN_RESTART:
1792 		return (1);
1793 	default:
1794 		return (0);
1795 	}
1796 }
1797 
1798 static void
1799 restarter_queue_event(restarter_inst_t *ri, restarter_protocol_event_t *e)
1800 {
1801 	restarter_instance_qentry_t *qe;
1802 	int r;
1803 
1804 	assert(MUTEX_HELD(&ri->ri_queue_lock));
1805 	assert(!MUTEX_HELD(&ri->ri_lock));
1806 
1807 	qe = startd_zalloc(sizeof (restarter_instance_qentry_t));
1808 	qe->riq_type = e->rpe_type;
1809 
1810 	uu_list_node_init(qe, &qe->riq_link, restarter_queue_pool);
1811 	r = uu_list_insert_before(ri->ri_queue, NULL, qe);
1812 	assert(r == 0);
1813 }
1814 
1815 /*
1816  * void *restarter_event_thread()
1817  *
1818  *  Handle incoming graph events by placing them on a per-instance
1819  *  queue.  We can't lock the main part of the instance structure, so
1820  *  just modify the seprarately locked event queue portion.
1821  */
1822 /*ARGSUSED*/
1823 static void *
1824 restarter_event_thread(void *unused)
1825 {
1826 	scf_handle_t *h;
1827 
1828 	/*
1829 	 * This is a new thread, and thus, gets its own handle
1830 	 * to the repository.
1831 	 */
1832 	h = libscf_handle_create_bound_loop();
1833 
1834 	MUTEX_LOCK(&ru->restarter_update_lock);
1835 
1836 	/*CONSTCOND*/
1837 	while (1) {
1838 		restarter_protocol_event_t *e;
1839 
1840 		while (ru->restarter_update_wakeup == 0)
1841 			(void) pthread_cond_wait(&ru->restarter_update_cv,
1842 			    &ru->restarter_update_lock);
1843 
1844 		ru->restarter_update_wakeup = 0;
1845 
1846 		while ((e = restarter_event_dequeue()) != NULL) {
1847 			restarter_inst_t *rip;
1848 			char *fmri;
1849 
1850 			MUTEX_UNLOCK(&ru->restarter_update_lock);
1851 
1852 			/*
1853 			 * ADD_INSTANCE is special: there's likely no
1854 			 * instance structure yet, so we need to handle the
1855 			 * addition synchronously.
1856 			 */
1857 			switch (e->rpe_type) {
1858 			case RESTARTER_EVENT_TYPE_ADD_INSTANCE:
1859 				if (restarter_insert_inst(h, e->rpe_inst) != 0)
1860 					log_error(LOG_INFO, "Restarter: "
1861 					    "Could not add %s.\n", e->rpe_inst);
1862 
1863 				MUTEX_LOCK(&st->st_load_lock);
1864 				if (--st->st_load_instances == 0)
1865 					(void) pthread_cond_broadcast(
1866 					    &st->st_load_cv);
1867 				MUTEX_UNLOCK(&st->st_load_lock);
1868 
1869 				goto nolookup;
1870 			}
1871 
1872 			/*
1873 			 * Lookup the instance, locking only the event queue.
1874 			 * Can't grab ri_lock here because it might be held
1875 			 * by a long-running method.
1876 			 */
1877 			rip = inst_lookup_queue(e->rpe_inst);
1878 			if (rip == NULL) {
1879 				log_error(LOG_INFO, "Restarter: "
1880 				    "Ignoring %s command for unknown service "
1881 				    "%s.\n", event_names[e->rpe_type],
1882 				    e->rpe_inst);
1883 				goto nolookup;
1884 			}
1885 
1886 			/* Keep ADMIN events from filling up the queue. */
1887 			if (is_admin_event(e->rpe_type) &&
1888 			    uu_list_numnodes(rip->ri_queue) >
1889 			    RINST_QUEUE_THRESHOLD) {
1890 				MUTEX_UNLOCK(&rip->ri_queue_lock);
1891 				log_instance(rip, B_TRUE, "Instance event "
1892 				    "queue overflow.  Dropping administrative "
1893 				    "request.");
1894 				log_framework(LOG_DEBUG, "%s: Instance event "
1895 				    "queue overflow.  Dropping administrative "
1896 				    "request.\n", rip->ri_i.i_fmri);
1897 				goto nolookup;
1898 			}
1899 
1900 			/* Now add the event to the instance queue. */
1901 			restarter_queue_event(rip, e);
1902 
1903 			if (rip->ri_queue_thread == 0) {
1904 				/*
1905 				 * Start a thread if one isn't already
1906 				 * running.
1907 				 */
1908 				fmri = safe_strdup(e->rpe_inst);
1909 				rip->ri_queue_thread =  startd_thread_create(
1910 				    restarter_process_events, (void *)fmri);
1911 			} else {
1912 				/*
1913 				 * Signal the existing thread that there's
1914 				 * a new event.
1915 				 */
1916 				(void) pthread_cond_broadcast(
1917 				    &rip->ri_queue_cv);
1918 			}
1919 
1920 			MUTEX_UNLOCK(&rip->ri_queue_lock);
1921 nolookup:
1922 			restarter_event_release(e);
1923 
1924 			MUTEX_LOCK(&ru->restarter_update_lock);
1925 		}
1926 	}
1927 
1928 	/*
1929 	 * Unreachable for now -- there's currently no graceful cleanup
1930 	 * called on exit().
1931 	 */
1932 	(void) scf_handle_unbind(h);
1933 	scf_handle_destroy(h);
1934 	return (NULL);
1935 }
1936 
1937 static restarter_inst_t *
1938 contract_to_inst(ctid_t ctid)
1939 {
1940 	restarter_inst_t *inst;
1941 	int id;
1942 
1943 	id = lookup_inst_by_contract(ctid);
1944 	if (id == -1)
1945 		return (NULL);
1946 
1947 	inst = inst_lookup_by_id(id);
1948 	if (inst != NULL) {
1949 		/*
1950 		 * Since ri_lock isn't held by the contract id lookup, this
1951 		 * instance may have been restarted and now be in a new
1952 		 * contract, making the old contract no longer valid for this
1953 		 * instance.
1954 		 */
1955 		if (ctid != inst->ri_i.i_primary_ctid) {
1956 			MUTEX_UNLOCK(&inst->ri_lock);
1957 			inst = NULL;
1958 		}
1959 	}
1960 	return (inst);
1961 }
1962 
1963 /*
1964  * void contract_action()
1965  *   Take action on contract events.
1966  */
1967 static void
1968 contract_action(scf_handle_t *h, restarter_inst_t *inst, ctid_t id,
1969     uint32_t type)
1970 {
1971 	const char *fmri = inst->ri_i.i_fmri;
1972 
1973 	assert(MUTEX_HELD(&inst->ri_lock));
1974 
1975 	/*
1976 	 * If startd has stopped this contract, there is no need to
1977 	 * stop it again.
1978 	 */
1979 	if (inst->ri_i.i_primary_ctid > 0 &&
1980 	    inst->ri_i.i_primary_ctid_stopped)
1981 		return;
1982 
1983 	if ((type & (CT_PR_EV_EMPTY | CT_PR_EV_CORE | CT_PR_EV_SIGNAL
1984 	    | CT_PR_EV_HWERR)) == 0) {
1985 		/*
1986 		 * There shouldn't be other events, since that's not how we set
1987 		 * the terms. Thus, just log an error and drive on.
1988 		 */
1989 		log_framework(LOG_NOTICE,
1990 		    "%s: contract %ld received unexpected critical event "
1991 		    "(%d)\n", fmri, id, type);
1992 		return;
1993 	}
1994 
1995 	assert(instance_in_transition(inst) == 0);
1996 
1997 	if (instance_is_wait_style(inst)) {
1998 		/*
1999 		 * We ignore all events; if they impact the
2000 		 * process we're monitoring, then the
2001 		 * wait_thread will stop the instance.
2002 		 */
2003 		if (type == CT_PR_EV_EMPTY)
2004 			reset_start_times(inst);
2005 
2006 		log_framework(LOG_DEBUG,
2007 		    "%s: ignoring contract event on wait-style service\n",
2008 		    fmri);
2009 	} else {
2010 		/*
2011 		 * A CT_PR_EV_EMPTY event is an RSTOP_EXIT request.
2012 		 */
2013 		switch (type) {
2014 		case CT_PR_EV_EMPTY:
2015 			(void) stop_instance(h, inst, RSTOP_EXIT);
2016 			break;
2017 		case CT_PR_EV_CORE:
2018 			(void) stop_instance(h, inst, RSTOP_CORE);
2019 			break;
2020 		case CT_PR_EV_SIGNAL:
2021 			(void) stop_instance(h, inst, RSTOP_SIGNAL);
2022 			break;
2023 		case CT_PR_EV_HWERR:
2024 			(void) stop_instance(h, inst, RSTOP_HWERR);
2025 			break;
2026 		}
2027 	}
2028 }
2029 
2030 /*
2031  * void *restarter_contract_event_thread(void *)
2032  *   Listens to the process contract bundle for critical events, taking action
2033  *   on events from contracts we know we are responsible for.
2034  */
2035 /*ARGSUSED*/
2036 static void *
2037 restarter_contracts_event_thread(void *unused)
2038 {
2039 	int fd, err;
2040 	scf_handle_t *local_handle;
2041 
2042 	/*
2043 	 * Await graph load completion.  That is, stop here, until we've scanned
2044 	 * the repository for contract - instance associations.
2045 	 */
2046 	MUTEX_LOCK(&st->st_load_lock);
2047 	while (!(st->st_load_complete && st->st_load_instances == 0))
2048 		(void) pthread_cond_wait(&st->st_load_cv, &st->st_load_lock);
2049 	MUTEX_UNLOCK(&st->st_load_lock);
2050 
2051 	/*
2052 	 * This is a new thread, and thus, gets its own handle
2053 	 * to the repository.
2054 	 */
2055 	if ((local_handle = libscf_handle_create_bound(SCF_VERSION)) == NULL)
2056 		uu_die("Unable to bind a new repository handle: %s\n",
2057 		    scf_strerror(scf_error()));
2058 
2059 	fd = open64(CTFS_ROOT "/process/pbundle", O_RDONLY);
2060 	if (fd == -1)
2061 		uu_die("process bundle open failed");
2062 
2063 	/*
2064 	 * Make sure we get all events (including those generated by configd
2065 	 * before this thread was started).
2066 	 */
2067 	err = ct_event_reset(fd);
2068 	assert(err == 0);
2069 
2070 	for (;;) {
2071 		int efd, sfd;
2072 		ct_evthdl_t ev;
2073 		uint32_t type;
2074 		ctevid_t evid;
2075 		ct_stathdl_t status;
2076 		ctid_t ctid;
2077 		restarter_inst_t *inst;
2078 		uint64_t cookie;
2079 
2080 		if (err = ct_event_read_critical(fd, &ev)) {
2081 			log_error(LOG_WARNING,
2082 			    "Error reading next contract event: %s",
2083 			    strerror(err));
2084 			continue;
2085 		}
2086 
2087 		evid = ct_event_get_evid(ev);
2088 		ctid = ct_event_get_ctid(ev);
2089 		type = ct_event_get_type(ev);
2090 
2091 		/* Fetch cookie. */
2092 		if ((sfd = contract_open(ctid, "process", "status", O_RDONLY))
2093 		    < 0) {
2094 			ct_event_free(ev);
2095 			continue;
2096 		}
2097 
2098 		if (err = ct_status_read(sfd, CTD_COMMON, &status)) {
2099 			log_framework(LOG_WARNING, "Could not get status for "
2100 			    "contract %ld: %s\n", ctid, strerror(err));
2101 
2102 			startd_close(sfd);
2103 			ct_event_free(ev);
2104 			continue;
2105 		}
2106 
2107 		cookie = ct_status_get_cookie(status);
2108 
2109 		log_framework(LOG_DEBUG, "Received event %d for ctid %ld "
2110 		    "cookie %lld\n", type, ctid, cookie);
2111 
2112 		ct_status_free(status);
2113 
2114 		startd_close(sfd);
2115 
2116 		/*
2117 		 * svc.configd(1M) restart handling performed by the
2118 		 * fork_configd_thread.  We don't acknowledge, as that thread
2119 		 * will do so.
2120 		 */
2121 		if (cookie == CONFIGD_COOKIE) {
2122 			ct_event_free(ev);
2123 			continue;
2124 		}
2125 
2126 		inst = NULL;
2127 		if (storing_contract != 0 &&
2128 		    (inst = contract_to_inst(ctid)) == NULL) {
2129 			/*
2130 			 * This can happen for two reasons:
2131 			 * - method_run() has not yet stored the
2132 			 *    the contract into the internal hash table.
2133 			 * - we receive an EMPTY event for an abandoned
2134 			 *    contract.
2135 			 * If there is any contract in the process of
2136 			 * being stored into the hash table then re-read
2137 			 * the event later.
2138 			 */
2139 			log_framework(LOG_DEBUG,
2140 			    "Reset event %d for unknown "
2141 			    "contract id %ld\n", type, ctid);
2142 
2143 			/* don't go too fast */
2144 			(void) poll(NULL, 0, 100);
2145 
2146 			(void) ct_event_reset(fd);
2147 			ct_event_free(ev);
2148 			continue;
2149 		}
2150 
2151 		/*
2152 		 * Do not call contract_to_inst() again if first
2153 		 * call succeeded.
2154 		 */
2155 		if (inst == NULL)
2156 			inst = contract_to_inst(ctid);
2157 		if (inst == NULL) {
2158 			/*
2159 			 * This can happen if we receive an EMPTY
2160 			 * event for an abandoned contract.
2161 			 */
2162 			log_framework(LOG_DEBUG,
2163 			    "Received event %d for unknown contract id "
2164 			    "%ld\n", type, ctid);
2165 		} else {
2166 			log_framework(LOG_DEBUG,
2167 			    "Received event %d for contract id "
2168 			    "%ld (%s)\n", type, ctid,
2169 			    inst->ri_i.i_fmri);
2170 
2171 			contract_action(local_handle, inst, ctid, type);
2172 
2173 			MUTEX_UNLOCK(&inst->ri_lock);
2174 		}
2175 
2176 		efd = contract_open(ct_event_get_ctid(ev), "process", "ctl",
2177 		    O_WRONLY);
2178 		if (efd != -1) {
2179 			(void) ct_ctl_ack(efd, evid);
2180 			startd_close(efd);
2181 		}
2182 
2183 		ct_event_free(ev);
2184 
2185 	}
2186 
2187 	/*NOTREACHED*/
2188 	return (NULL);
2189 }
2190 
2191 /*
2192  * Timeout queue, processed by restarter_timeouts_event_thread().
2193  */
2194 timeout_queue_t *timeouts;
2195 static uu_list_pool_t *timeout_pool;
2196 
2197 typedef struct timeout_update {
2198 	pthread_mutex_t		tu_lock;
2199 	pthread_cond_t		tu_cv;
2200 	int			tu_wakeup;
2201 } timeout_update_t;
2202 
2203 timeout_update_t *tu;
2204 
2205 static const char *timeout_ovr_svcs[] = {
2206 	"svc:/system/manifest-import:default",
2207 	"svc:/network/initial:default",
2208 	"svc:/network/service:default",
2209 	"svc:/system/rmtmpfiles:default",
2210 	"svc:/network/loopback:default",
2211 	"svc:/network/physical:default",
2212 	"svc:/system/device/local:default",
2213 	"svc:/system/metainit:default",
2214 	"svc:/system/filesystem/usr:default",
2215 	"svc:/system/filesystem/minimal:default",
2216 	"svc:/system/filesystem/local:default",
2217 	NULL
2218 };
2219 
2220 int
2221 is_timeout_ovr(restarter_inst_t *inst)
2222 {
2223 	int i;
2224 
2225 	for (i = 0; timeout_ovr_svcs[i] != NULL; ++i) {
2226 		if (strcmp(inst->ri_i.i_fmri, timeout_ovr_svcs[i]) == 0) {
2227 			log_instance(inst, B_TRUE, "Timeout override by "
2228 			    "svc.startd.  Using infinite timeout.");
2229 			return (1);
2230 		}
2231 	}
2232 
2233 	return (0);
2234 }
2235 
2236 /*ARGSUSED*/
2237 static int
2238 timeout_compare(const void *lc_arg, const void *rc_arg, void *private)
2239 {
2240 	hrtime_t t1 = ((const timeout_entry_t *)lc_arg)->te_timeout;
2241 	hrtime_t t2 = ((const timeout_entry_t *)rc_arg)->te_timeout;
2242 
2243 	if (t1 > t2)
2244 		return (1);
2245 	else if (t1 < t2)
2246 		return (-1);
2247 	return (0);
2248 }
2249 
2250 void
2251 timeout_init()
2252 {
2253 	timeouts = startd_zalloc(sizeof (timeout_queue_t));
2254 
2255 	(void) pthread_mutex_init(&timeouts->tq_lock, &mutex_attrs);
2256 
2257 	timeout_pool = startd_list_pool_create("timeouts",
2258 	    sizeof (timeout_entry_t), offsetof(timeout_entry_t, te_link),
2259 	    timeout_compare, UU_LIST_POOL_DEBUG);
2260 	assert(timeout_pool != NULL);
2261 
2262 	timeouts->tq_list = startd_list_create(timeout_pool,
2263 	    timeouts, UU_LIST_SORTED);
2264 	assert(timeouts->tq_list != NULL);
2265 
2266 	tu = startd_zalloc(sizeof (timeout_update_t));
2267 	(void) pthread_cond_init(&tu->tu_cv, NULL);
2268 	(void) pthread_mutex_init(&tu->tu_lock, &mutex_attrs);
2269 }
2270 
2271 void
2272 timeout_insert(restarter_inst_t *inst, ctid_t cid, uint64_t timeout_sec)
2273 {
2274 	hrtime_t now, timeout;
2275 	timeout_entry_t *entry;
2276 	uu_list_index_t idx;
2277 
2278 	assert(MUTEX_HELD(&inst->ri_lock));
2279 
2280 	now = gethrtime();
2281 
2282 	/*
2283 	 * If we overflow LLONG_MAX, we're never timing out anyways, so
2284 	 * just return.
2285 	 */
2286 	if (timeout_sec >= (LLONG_MAX - now) / 1000000000LL) {
2287 		log_instance(inst, B_TRUE, "timeout_seconds too large, "
2288 		    "treating as infinite.");
2289 		return;
2290 	}
2291 
2292 	/* hrtime is in nanoseconds. Convert timeout_sec. */
2293 	timeout = now + (timeout_sec * 1000000000LL);
2294 
2295 	entry = startd_alloc(sizeof (timeout_entry_t));
2296 	entry->te_timeout = timeout;
2297 	entry->te_ctid = cid;
2298 	entry->te_fmri = safe_strdup(inst->ri_i.i_fmri);
2299 	entry->te_logstem = safe_strdup(inst->ri_logstem);
2300 	entry->te_fired = 0;
2301 	/* Insert the calculated timeout time onto the queue. */
2302 	MUTEX_LOCK(&timeouts->tq_lock);
2303 	(void) uu_list_find(timeouts->tq_list, entry, NULL, &idx);
2304 	uu_list_node_init(entry, &entry->te_link, timeout_pool);
2305 	uu_list_insert(timeouts->tq_list, entry, idx);
2306 	MUTEX_UNLOCK(&timeouts->tq_lock);
2307 
2308 	assert(inst->ri_timeout == NULL);
2309 	inst->ri_timeout = entry;
2310 
2311 	MUTEX_LOCK(&tu->tu_lock);
2312 	tu->tu_wakeup = 1;
2313 	(void) pthread_cond_broadcast(&tu->tu_cv);
2314 	MUTEX_UNLOCK(&tu->tu_lock);
2315 }
2316 
2317 
2318 void
2319 timeout_remove(restarter_inst_t *inst, ctid_t cid)
2320 {
2321 	assert(MUTEX_HELD(&inst->ri_lock));
2322 
2323 	if (inst->ri_timeout == NULL)
2324 		return;
2325 
2326 	assert(inst->ri_timeout->te_ctid == cid);
2327 
2328 	MUTEX_LOCK(&timeouts->tq_lock);
2329 	uu_list_remove(timeouts->tq_list, inst->ri_timeout);
2330 	MUTEX_UNLOCK(&timeouts->tq_lock);
2331 
2332 	free(inst->ri_timeout->te_fmri);
2333 	free(inst->ri_timeout->te_logstem);
2334 	startd_free(inst->ri_timeout, sizeof (timeout_entry_t));
2335 	inst->ri_timeout = NULL;
2336 }
2337 
2338 static int
2339 timeout_now()
2340 {
2341 	timeout_entry_t *e;
2342 	hrtime_t now;
2343 	int ret;
2344 
2345 	now = gethrtime();
2346 
2347 	/*
2348 	 * Walk through the (sorted) timeouts list.  While the timeout
2349 	 * at the head of the list is <= the current time, kill the
2350 	 * method.
2351 	 */
2352 	MUTEX_LOCK(&timeouts->tq_lock);
2353 
2354 	for (e = uu_list_first(timeouts->tq_list);
2355 	    e != NULL && e->te_timeout <= now;
2356 	    e = uu_list_next(timeouts->tq_list, e)) {
2357 		log_framework(LOG_WARNING, "%s: Method or service exit timed "
2358 		    "out.  Killing contract %ld.\n", e->te_fmri, e->te_ctid);
2359 		log_instance_fmri(e->te_fmri, e->te_logstem, B_TRUE,
2360 		    "Method or service exit timed out.  Killing contract %ld.",
2361 		    e->te_ctid);
2362 		e->te_fired = 1;
2363 		(void) contract_kill(e->te_ctid, SIGKILL, e->te_fmri);
2364 	}
2365 
2366 	if (uu_list_numnodes(timeouts->tq_list) > 0)
2367 		ret = 0;
2368 	else
2369 		ret = -1;
2370 
2371 	MUTEX_UNLOCK(&timeouts->tq_lock);
2372 
2373 	return (ret);
2374 }
2375 
2376 /*
2377  * void *restarter_timeouts_event_thread(void *)
2378  *   Responsible for monitoring the method timeouts.  This thread must
2379  *   be started before any methods are called.
2380  */
2381 /*ARGSUSED*/
2382 static void *
2383 restarter_timeouts_event_thread(void *unused)
2384 {
2385 	/*
2386 	 * Timeouts are entered on a priority queue, which is processed by
2387 	 * this thread.  As timeouts are specified in seconds, we'll do
2388 	 * the necessary processing every second, as long as the queue
2389 	 * is not empty.
2390 	 */
2391 
2392 	/*CONSTCOND*/
2393 	while (1) {
2394 		/*
2395 		 * As long as the timeout list isn't empty, process it
2396 		 * every second.
2397 		 */
2398 		if (timeout_now() == 0) {
2399 			(void) sleep(1);
2400 			continue;
2401 		}
2402 
2403 		/* The list is empty, wait until we have more timeouts. */
2404 		MUTEX_LOCK(&tu->tu_lock);
2405 
2406 		while (tu->tu_wakeup == 0)
2407 			(void) pthread_cond_wait(&tu->tu_cv, &tu->tu_lock);
2408 
2409 		tu->tu_wakeup = 0;
2410 		MUTEX_UNLOCK(&tu->tu_lock);
2411 	}
2412 
2413 	return (NULL);
2414 }
2415 
2416 void
2417 restarter_start()
2418 {
2419 	(void) startd_thread_create(restarter_timeouts_event_thread, NULL);
2420 	(void) startd_thread_create(restarter_event_thread, NULL);
2421 	(void) startd_thread_create(restarter_contracts_event_thread, NULL);
2422 	(void) startd_thread_create(wait_thread, NULL);
2423 }
2424 
2425 
2426 void
2427 restarter_init()
2428 {
2429 	restarter_instance_pool = startd_list_pool_create("restarter_instances",
2430 	    sizeof (restarter_inst_t), offsetof(restarter_inst_t,
2431 	    ri_link), restarter_instance_compare, UU_LIST_POOL_DEBUG);
2432 	(void) memset(&instance_list, 0, sizeof (instance_list));
2433 
2434 	(void) pthread_mutex_init(&instance_list.ril_lock, &mutex_attrs);
2435 	instance_list.ril_instance_list = startd_list_create(
2436 	    restarter_instance_pool, &instance_list, UU_LIST_SORTED);
2437 
2438 	restarter_queue_pool = startd_list_pool_create(
2439 	    "restarter_instance_queue", sizeof (restarter_instance_qentry_t),
2440 	    offsetof(restarter_instance_qentry_t,  riq_link), NULL,
2441 	    UU_LIST_POOL_DEBUG);
2442 
2443 	contract_list_pool = startd_list_pool_create(
2444 	    "contract_list", sizeof (contract_entry_t),
2445 	    offsetof(contract_entry_t,  ce_link), NULL,
2446 	    UU_LIST_POOL_DEBUG);
2447 	contract_hash_init();
2448 
2449 	log_framework(LOG_DEBUG, "Initialized restarter\n");
2450 }
2451