xref: /titanic_50/usr/src/cmd/svc/startd/restarter.c (revision 4c273cfa4ad8398f4157cd1d6fa54fc1cbc266ff)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Copyright (c) 2012, Joyent, Inc. All rights reserved.
25  */
26 
27 /*
28  * restarter.c - service manipulation
29  *
30  * This component manages services whose restarter is svc.startd, the standard
31  * restarter.  It translates restarter protocol events from the graph engine
32  * into actions on processes, as a delegated restarter would do.
33  *
34  * The master restarter manages a number of always-running threads:
35  *   - restarter event thread: events from the graph engine
36  *   - timeout thread: thread to fire queued timeouts
37  *   - contract thread: thread to handle contract events
38  *   - wait thread: thread to handle wait-based services
39  *
40  * The other threads are created as-needed:
41  *   - per-instance method threads
42  *   - per-instance event processing threads
43  *
44  * The interaction of all threads must result in the following conditions
45  * being satisfied (on a per-instance basis):
46  *   - restarter events must be processed in order
47  *   - method execution must be serialized
48  *   - instance delete must be held until outstanding methods are complete
49  *   - contract events shouldn't be processed while a method is running
50  *   - timeouts should fire even when a method is running
51  *
52  * Service instances are represented by restarter_inst_t's and are kept in the
53  * instance_list list.
54  *
55  * Service States
56  *   The current state of a service instance is kept in
57  *   restarter_inst_t->ri_i.i_state.  If transition to a new state could take
58  *   some time, then before we effect the transition we set
59  *   restarter_inst_t->ri_i.i_next_state to the target state, and afterwards we
60  *   rotate i_next_state to i_state and set i_next_state to
61  *   RESTARTER_STATE_NONE.  So usually i_next_state is _NONE when ri_lock is not
62  *   held.  The exception is when we launch methods, which are done with
63  *   a separate thread.  To keep any other threads from grabbing ri_lock before
64  *   method_thread() does, we set ri_method_thread to the thread id of the
65  *   method thread, and when it is nonzero any thread with a different thread id
66  *   waits on ri_method_cv.
67  *
68  * Method execution is serialized by blocking on ri_method_cv in
69  * inst_lookup_by_id() and waiting for a 0 value of ri_method_thread.  This
70  * also prevents the instance structure from being deleted until all
71  * outstanding operations such as method_thread() have finished.
72  *
73  * Lock ordering:
74  *
75  * dgraph_lock [can be held when taking:]
76  *   utmpx_lock
77  *   dictionary->dict_lock
78  *   st->st_load_lock
79  *   wait_info_lock
80  *   ru->restarter_update_lock
81  *     restarter_queue->rpeq_lock
82  *   instance_list.ril_lock
83  *     inst->ri_lock
84  *   st->st_configd_live_lock
85  *
86  * instance_list.ril_lock
87  *   graph_queue->gpeq_lock
88  *   gu->gu_lock
89  *   st->st_configd_live_lock
90  *   dictionary->dict_lock
91  *   inst->ri_lock
92  *     graph_queue->gpeq_lock
93  *     gu->gu_lock
94  *     tu->tu_lock
95  *     tq->tq_lock
96  *     inst->ri_queue_lock
97  *       wait_info_lock
98  *       bp->cb_lock
99  *     utmpx_lock
100  *
101  * single_user_thread_lock
102  *   wait_info_lock
103  *   utmpx_lock
104  *
105  * gu_freeze_lock
106  *
107  * logbuf_mutex nests inside pretty much everything.
108  */
109 
110 #include <sys/contract/process.h>
111 #include <sys/ctfs.h>
112 #include <sys/stat.h>
113 #include <sys/time.h>
114 #include <sys/types.h>
115 #include <sys/uio.h>
116 #include <sys/wait.h>
117 #include <assert.h>
118 #include <errno.h>
119 #include <fcntl.h>
120 #include <libcontract.h>
121 #include <libcontract_priv.h>
122 #include <libintl.h>
123 #include <librestart.h>
124 #include <librestart_priv.h>
125 #include <libuutil.h>
126 #include <limits.h>
127 #include <poll.h>
128 #include <port.h>
129 #include <pthread.h>
130 #include <stdarg.h>
131 #include <stdio.h>
132 #include <strings.h>
133 #include <unistd.h>
134 
135 #include "startd.h"
136 #include "protocol.h"
137 
138 static uu_list_pool_t *restarter_instance_pool;
139 static restarter_instance_list_t instance_list;
140 
141 static uu_list_pool_t *restarter_queue_pool;
142 
143 /*
144  * Function used to reset the restart times for an instance, when
145  * an administrative task comes along and essentially makes the times
146  * in this array ineffective.
147  */
148 static void
149 reset_start_times(restarter_inst_t *inst)
150 {
151 	inst->ri_start_index = 0;
152 	bzero(inst->ri_start_time, sizeof (inst->ri_start_time));
153 }
154 
155 /*ARGSUSED*/
156 static int
157 restarter_instance_compare(const void *lc_arg, const void *rc_arg,
158     void *private)
159 {
160 	int lc_id = ((const restarter_inst_t *)lc_arg)->ri_id;
161 	int rc_id = *(int *)rc_arg;
162 
163 	if (lc_id > rc_id)
164 		return (1);
165 	if (lc_id < rc_id)
166 		return (-1);
167 	return (0);
168 }
169 
170 static restarter_inst_t *
171 inst_lookup_by_name(const char *name)
172 {
173 	int id;
174 
175 	id = dict_lookup_byname(name);
176 	if (id == -1)
177 		return (NULL);
178 
179 	return (inst_lookup_by_id(id));
180 }
181 
182 restarter_inst_t *
183 inst_lookup_by_id(int id)
184 {
185 	restarter_inst_t *inst;
186 
187 	MUTEX_LOCK(&instance_list.ril_lock);
188 	inst = uu_list_find(instance_list.ril_instance_list, &id, NULL, NULL);
189 	if (inst != NULL)
190 		MUTEX_LOCK(&inst->ri_lock);
191 	MUTEX_UNLOCK(&instance_list.ril_lock);
192 
193 	if (inst != NULL) {
194 		while (inst->ri_method_thread != 0 &&
195 		    !pthread_equal(inst->ri_method_thread, pthread_self())) {
196 			++inst->ri_method_waiters;
197 			(void) pthread_cond_wait(&inst->ri_method_cv,
198 			    &inst->ri_lock);
199 			assert(inst->ri_method_waiters > 0);
200 			--inst->ri_method_waiters;
201 		}
202 	}
203 
204 	return (inst);
205 }
206 
207 static restarter_inst_t *
208 inst_lookup_queue(const char *name)
209 {
210 	int id;
211 	restarter_inst_t *inst;
212 
213 	id = dict_lookup_byname(name);
214 	if (id == -1)
215 		return (NULL);
216 
217 	MUTEX_LOCK(&instance_list.ril_lock);
218 	inst = uu_list_find(instance_list.ril_instance_list, &id, NULL, NULL);
219 	if (inst != NULL)
220 		MUTEX_LOCK(&inst->ri_queue_lock);
221 	MUTEX_UNLOCK(&instance_list.ril_lock);
222 
223 	return (inst);
224 }
225 
226 const char *
227 service_style(int flags)
228 {
229 	switch (flags & RINST_STYLE_MASK) {
230 	case RINST_CONTRACT:	return ("contract");
231 	case RINST_TRANSIENT:	return ("transient");
232 	case RINST_WAIT:	return ("wait");
233 
234 	default:
235 #ifndef NDEBUG
236 		uu_warn("%s:%d: Bad flags 0x%x.\n", __FILE__, __LINE__, flags);
237 #endif
238 		abort();
239 		/* NOTREACHED */
240 	}
241 }
242 
243 /*
244  * Fails with ECONNABORTED or ECANCELED.
245  */
246 static int
247 check_contract(restarter_inst_t *inst, boolean_t primary,
248     scf_instance_t *scf_inst)
249 {
250 	ctid_t *ctidp;
251 	int fd, r;
252 
253 	ctidp = primary ? &inst->ri_i.i_primary_ctid :
254 	    &inst->ri_i.i_transient_ctid;
255 
256 	assert(*ctidp >= 1);
257 
258 	fd = contract_open(*ctidp, NULL, "status", O_RDONLY);
259 	if (fd >= 0) {
260 		r = close(fd);
261 		assert(r == 0);
262 		return (0);
263 	}
264 
265 	r = restarter_remove_contract(scf_inst, *ctidp, primary ?
266 	    RESTARTER_CONTRACT_PRIMARY : RESTARTER_CONTRACT_TRANSIENT);
267 	switch (r) {
268 	case 0:
269 	case ECONNABORTED:
270 	case ECANCELED:
271 		*ctidp = 0;
272 		return (r);
273 
274 	case ENOMEM:
275 		uu_die("Out of memory\n");
276 		/* NOTREACHED */
277 
278 	case EPERM:
279 		uu_die("Insufficient privilege.\n");
280 		/* NOTREACHED */
281 
282 	case EACCES:
283 		uu_die("Repository backend access denied.\n");
284 		/* NOTREACHED */
285 
286 	case EROFS:
287 		log_error(LOG_INFO, "Could not remove unusable contract id %ld "
288 		    "for %s from repository.\n", *ctidp, inst->ri_i.i_fmri);
289 		return (0);
290 
291 	case EINVAL:
292 	case EBADF:
293 	default:
294 		assert(0);
295 		abort();
296 		/* NOTREACHED */
297 	}
298 }
299 
300 static int stop_instance(scf_handle_t *, restarter_inst_t *, stop_cause_t);
301 
302 /*
303  * int restarter_insert_inst(scf_handle_t *, char *)
304  *   If the inst is already in the restarter list, return its id.  If the inst
305  *   is not in the restarter list, initialize a restarter_inst_t, initialize its
306  *   states, insert it into the list, and return 0.
307  *
308  *   Fails with
309  *     ENOENT - name is not in the repository
310  */
311 static int
312 restarter_insert_inst(scf_handle_t *h, const char *name)
313 {
314 	int id, r;
315 	restarter_inst_t *inst;
316 	uu_list_index_t idx;
317 	scf_service_t *scf_svc;
318 	scf_instance_t *scf_inst;
319 	scf_snapshot_t *snap = NULL;
320 	scf_propertygroup_t *pg;
321 	char *svc_name, *inst_name;
322 	char logfilebuf[PATH_MAX];
323 	char *c;
324 	boolean_t do_commit_states;
325 	restarter_instance_state_t state, next_state;
326 	protocol_states_t *ps;
327 	pid_t start_pid;
328 	restarter_str_t reason = restarter_str_insert_in_graph;
329 
330 	MUTEX_LOCK(&instance_list.ril_lock);
331 
332 	/*
333 	 * We don't use inst_lookup_by_name() here because we want the lookup
334 	 * & insert to be atomic.
335 	 */
336 	id = dict_lookup_byname(name);
337 	if (id != -1) {
338 		inst = uu_list_find(instance_list.ril_instance_list, &id, NULL,
339 		    &idx);
340 		if (inst != NULL) {
341 			MUTEX_UNLOCK(&instance_list.ril_lock);
342 			return (0);
343 		}
344 	}
345 
346 	/* Allocate an instance */
347 	inst = startd_zalloc(sizeof (restarter_inst_t));
348 	inst->ri_utmpx_prefix = startd_alloc(max_scf_value_size);
349 	inst->ri_utmpx_prefix[0] = '\0';
350 
351 	inst->ri_i.i_fmri = startd_alloc(strlen(name) + 1);
352 	(void) strcpy((char *)inst->ri_i.i_fmri, name);
353 
354 	inst->ri_queue = startd_list_create(restarter_queue_pool, inst, 0);
355 
356 	/*
357 	 * id shouldn't be -1 since we use the same dictionary as graph.c, but
358 	 * just in case.
359 	 */
360 	inst->ri_id = (id != -1 ? id : dict_insert(name));
361 
362 	special_online_hooks_get(name, &inst->ri_pre_online_hook,
363 	    &inst->ri_post_online_hook, &inst->ri_post_offline_hook);
364 
365 	scf_svc = safe_scf_service_create(h);
366 	scf_inst = safe_scf_instance_create(h);
367 	pg = safe_scf_pg_create(h);
368 	svc_name = startd_alloc(max_scf_name_size);
369 	inst_name = startd_alloc(max_scf_name_size);
370 
371 rep_retry:
372 	if (snap != NULL)
373 		scf_snapshot_destroy(snap);
374 	if (inst->ri_logstem != NULL)
375 		startd_free(inst->ri_logstem, PATH_MAX);
376 	if (inst->ri_common_name != NULL)
377 		free(inst->ri_common_name);
378 	if (inst->ri_C_common_name != NULL)
379 		free(inst->ri_C_common_name);
380 	snap = NULL;
381 	inst->ri_logstem = NULL;
382 	inst->ri_common_name = NULL;
383 	inst->ri_C_common_name = NULL;
384 
385 	if (scf_handle_decode_fmri(h, name, NULL, scf_svc, scf_inst, NULL,
386 	    NULL, SCF_DECODE_FMRI_EXACT) != 0) {
387 		switch (scf_error()) {
388 		case SCF_ERROR_CONNECTION_BROKEN:
389 			libscf_handle_rebind(h);
390 			goto rep_retry;
391 
392 		case SCF_ERROR_NOT_FOUND:
393 			goto deleted;
394 		}
395 
396 		uu_die("Can't decode FMRI %s: %s\n", name,
397 		    scf_strerror(scf_error()));
398 	}
399 
400 	/*
401 	 * If there's no running snapshot, then we execute using the editing
402 	 * snapshot.  Pending snapshots will be taken later.
403 	 */
404 	snap = libscf_get_running_snapshot(scf_inst);
405 
406 	if ((scf_service_get_name(scf_svc, svc_name, max_scf_name_size) < 0) ||
407 	    (scf_instance_get_name(scf_inst, inst_name, max_scf_name_size) <
408 	    0)) {
409 		switch (scf_error()) {
410 		case SCF_ERROR_NOT_SET:
411 			break;
412 
413 		case SCF_ERROR_CONNECTION_BROKEN:
414 			libscf_handle_rebind(h);
415 			goto rep_retry;
416 
417 		default:
418 			assert(0);
419 			abort();
420 		}
421 
422 		goto deleted;
423 	}
424 
425 	(void) snprintf(logfilebuf, PATH_MAX, "%s:%s", svc_name, inst_name);
426 	for (c = logfilebuf; *c != '\0'; c++)
427 		if (*c == '/')
428 			*c = '-';
429 
430 	inst->ri_logstem = startd_alloc(PATH_MAX);
431 	(void) snprintf(inst->ri_logstem, PATH_MAX, "%s%s", logfilebuf,
432 	    LOG_SUFFIX);
433 
434 	/*
435 	 * If the restarter group is missing, use uninit/none.  Otherwise,
436 	 * we're probably being restarted & don't want to mess up the states
437 	 * that are there.
438 	 */
439 	state = RESTARTER_STATE_UNINIT;
440 	next_state = RESTARTER_STATE_NONE;
441 
442 	r = scf_instance_get_pg(scf_inst, SCF_PG_RESTARTER, pg);
443 	if (r != 0) {
444 		switch (scf_error()) {
445 		case SCF_ERROR_CONNECTION_BROKEN:
446 			libscf_handle_rebind(h);
447 			goto rep_retry;
448 
449 		case SCF_ERROR_NOT_SET:
450 			goto deleted;
451 
452 		case SCF_ERROR_NOT_FOUND:
453 			/*
454 			 * This shouldn't happen since the graph engine should
455 			 * have initialized the state to uninitialized/none if
456 			 * there was no restarter pg.  In case somebody
457 			 * deleted it, though....
458 			 */
459 			do_commit_states = B_TRUE;
460 			break;
461 
462 		default:
463 			assert(0);
464 			abort();
465 		}
466 	} else {
467 		r = libscf_read_states(pg, &state, &next_state);
468 		if (r != 0) {
469 			do_commit_states = B_TRUE;
470 		} else {
471 			if (next_state != RESTARTER_STATE_NONE) {
472 				/*
473 				 * Force next_state to _NONE since we
474 				 * don't look for method processes.
475 				 */
476 				next_state = RESTARTER_STATE_NONE;
477 				do_commit_states = B_TRUE;
478 			} else {
479 				/*
480 				 * The reason for transition will depend on
481 				 * state.
482 				 */
483 				if (st->st_initial == 0)
484 					reason = restarter_str_startd_restart;
485 				else if (state == RESTARTER_STATE_MAINT)
486 					reason = restarter_str_bad_repo_state;
487 				/*
488 				 * Inform the restarter of our state without
489 				 * changing the STIME in the repository.
490 				 */
491 				ps = startd_alloc(sizeof (*ps));
492 				inst->ri_i.i_state = ps->ps_state = state;
493 				inst->ri_i.i_next_state = ps->ps_state_next =
494 				    next_state;
495 				ps->ps_reason = reason;
496 
497 				graph_protocol_send_event(inst->ri_i.i_fmri,
498 				    GRAPH_UPDATE_STATE_CHANGE, ps);
499 
500 				do_commit_states = B_FALSE;
501 			}
502 		}
503 	}
504 
505 	switch (libscf_get_startd_properties(scf_inst, snap, &inst->ri_flags,
506 	    &inst->ri_utmpx_prefix)) {
507 	case 0:
508 		break;
509 
510 	case ECONNABORTED:
511 		libscf_handle_rebind(h);
512 		goto rep_retry;
513 
514 	case ECANCELED:
515 		goto deleted;
516 
517 	case ENOENT:
518 		/*
519 		 * This is odd, because the graph engine should have required
520 		 * the general property group.  So we'll just use default
521 		 * flags in anticipation of the graph engine sending us
522 		 * REMOVE_INSTANCE when it finds out that the general property
523 		 * group has been deleted.
524 		 */
525 		inst->ri_flags = RINST_CONTRACT;
526 		break;
527 
528 	default:
529 		assert(0);
530 		abort();
531 	}
532 
533 	r = libscf_get_template_values(scf_inst, snap,
534 	    &inst->ri_common_name, &inst->ri_C_common_name);
535 
536 	/*
537 	 * Copy our names to smaller buffers to reduce our memory footprint.
538 	 */
539 	if (inst->ri_common_name != NULL) {
540 		char *tmp = safe_strdup(inst->ri_common_name);
541 		startd_free(inst->ri_common_name, max_scf_value_size);
542 		inst->ri_common_name = tmp;
543 	}
544 
545 	if (inst->ri_C_common_name != NULL) {
546 		char *tmp = safe_strdup(inst->ri_C_common_name);
547 		startd_free(inst->ri_C_common_name, max_scf_value_size);
548 		inst->ri_C_common_name = tmp;
549 	}
550 
551 	switch (r) {
552 	case 0:
553 		break;
554 
555 	case ECONNABORTED:
556 		libscf_handle_rebind(h);
557 		goto rep_retry;
558 
559 	case ECANCELED:
560 		goto deleted;
561 
562 	case ECHILD:
563 	case ENOENT:
564 		break;
565 
566 	default:
567 		assert(0);
568 		abort();
569 	}
570 
571 	switch (libscf_read_method_ids(h, scf_inst, inst->ri_i.i_fmri,
572 	    &inst->ri_i.i_primary_ctid, &inst->ri_i.i_transient_ctid,
573 	    &start_pid)) {
574 	case 0:
575 		break;
576 
577 	case ECONNABORTED:
578 		libscf_handle_rebind(h);
579 		goto rep_retry;
580 
581 	case ECANCELED:
582 		goto deleted;
583 
584 	default:
585 		assert(0);
586 		abort();
587 	}
588 
589 	if (inst->ri_i.i_primary_ctid >= 1) {
590 		contract_hash_store(inst->ri_i.i_primary_ctid, inst->ri_id);
591 
592 		switch (check_contract(inst, B_TRUE, scf_inst)) {
593 		case 0:
594 			break;
595 
596 		case ECONNABORTED:
597 			libscf_handle_rebind(h);
598 			goto rep_retry;
599 
600 		case ECANCELED:
601 			goto deleted;
602 
603 		default:
604 			assert(0);
605 			abort();
606 		}
607 	}
608 
609 	if (inst->ri_i.i_transient_ctid >= 1) {
610 		switch (check_contract(inst, B_FALSE, scf_inst)) {
611 		case 0:
612 			break;
613 
614 		case ECONNABORTED:
615 			libscf_handle_rebind(h);
616 			goto rep_retry;
617 
618 		case ECANCELED:
619 			goto deleted;
620 
621 		default:
622 			assert(0);
623 			abort();
624 		}
625 	}
626 
627 	/* No more failures we live through, so add it to the list. */
628 	(void) pthread_mutex_init(&inst->ri_lock, &mutex_attrs);
629 	(void) pthread_mutex_init(&inst->ri_queue_lock, &mutex_attrs);
630 	MUTEX_LOCK(&inst->ri_lock);
631 	MUTEX_LOCK(&inst->ri_queue_lock);
632 
633 	(void) pthread_cond_init(&inst->ri_method_cv, NULL);
634 
635 	uu_list_node_init(inst, &inst->ri_link, restarter_instance_pool);
636 	uu_list_insert(instance_list.ril_instance_list, inst, idx);
637 	MUTEX_UNLOCK(&instance_list.ril_lock);
638 
639 	if (start_pid != -1 &&
640 	    (inst->ri_flags & RINST_STYLE_MASK) == RINST_WAIT) {
641 		int ret;
642 		ret = wait_register(start_pid, inst->ri_i.i_fmri, 0, 1);
643 		if (ret == -1) {
644 			/*
645 			 * Implication:  if we can't reregister the
646 			 * instance, we will start another one.  Two
647 			 * instances may or may not result in a resource
648 			 * conflict.
649 			 */
650 			log_error(LOG_WARNING,
651 			    "%s: couldn't reregister %ld for wait\n",
652 			    inst->ri_i.i_fmri, start_pid);
653 		} else if (ret == 1) {
654 			/*
655 			 * Leading PID has exited.
656 			 */
657 			(void) stop_instance(h, inst, RSTOP_EXIT);
658 		}
659 	}
660 
661 
662 	scf_pg_destroy(pg);
663 
664 	if (do_commit_states)
665 		(void) restarter_instance_update_states(h, inst, state,
666 		    next_state, RERR_NONE, reason);
667 
668 	log_framework(LOG_DEBUG, "%s is a %s-style service\n", name,
669 	    service_style(inst->ri_flags));
670 
671 	MUTEX_UNLOCK(&inst->ri_queue_lock);
672 	MUTEX_UNLOCK(&inst->ri_lock);
673 
674 	startd_free(svc_name, max_scf_name_size);
675 	startd_free(inst_name, max_scf_name_size);
676 	scf_snapshot_destroy(snap);
677 	scf_instance_destroy(scf_inst);
678 	scf_service_destroy(scf_svc);
679 
680 	log_framework(LOG_DEBUG, "%s: inserted instance into restarter list\n",
681 	    name);
682 
683 	return (0);
684 
685 deleted:
686 	MUTEX_UNLOCK(&instance_list.ril_lock);
687 	startd_free(inst_name, max_scf_name_size);
688 	startd_free(svc_name, max_scf_name_size);
689 	if (snap != NULL)
690 		scf_snapshot_destroy(snap);
691 	scf_pg_destroy(pg);
692 	scf_instance_destroy(scf_inst);
693 	scf_service_destroy(scf_svc);
694 	startd_free((void *)inst->ri_i.i_fmri, strlen(inst->ri_i.i_fmri) + 1);
695 	uu_list_destroy(inst->ri_queue);
696 	if (inst->ri_logstem != NULL)
697 		startd_free(inst->ri_logstem, PATH_MAX);
698 	if (inst->ri_common_name != NULL)
699 		free(inst->ri_common_name);
700 	if (inst->ri_C_common_name != NULL)
701 		free(inst->ri_C_common_name);
702 	startd_free(inst->ri_utmpx_prefix, max_scf_value_size);
703 	startd_free(inst, sizeof (restarter_inst_t));
704 	return (ENOENT);
705 }
706 
707 static void
708 restarter_delete_inst(restarter_inst_t *ri)
709 {
710 	int id;
711 	restarter_inst_t *rip;
712 	void *cookie = NULL;
713 	restarter_instance_qentry_t *e;
714 
715 	assert(MUTEX_HELD(&ri->ri_lock));
716 
717 	/*
718 	 * Must drop the instance lock so we can pick up the instance_list
719 	 * lock & remove the instance.
720 	 */
721 	id = ri->ri_id;
722 	MUTEX_UNLOCK(&ri->ri_lock);
723 
724 	MUTEX_LOCK(&instance_list.ril_lock);
725 
726 	rip = uu_list_find(instance_list.ril_instance_list, &id, NULL, NULL);
727 	if (rip == NULL) {
728 		MUTEX_UNLOCK(&instance_list.ril_lock);
729 		return;
730 	}
731 
732 	assert(ri == rip);
733 
734 	uu_list_remove(instance_list.ril_instance_list, ri);
735 
736 	log_framework(LOG_DEBUG, "%s: deleted instance from restarter list\n",
737 	    ri->ri_i.i_fmri);
738 
739 	MUTEX_UNLOCK(&instance_list.ril_lock);
740 
741 	/*
742 	 * We can lock the instance without holding the instance_list lock
743 	 * since we removed the instance from the list.
744 	 */
745 	MUTEX_LOCK(&ri->ri_lock);
746 	MUTEX_LOCK(&ri->ri_queue_lock);
747 
748 	if (ri->ri_i.i_primary_ctid >= 1)
749 		contract_hash_remove(ri->ri_i.i_primary_ctid);
750 
751 	while (ri->ri_method_thread != 0 || ri->ri_method_waiters > 0)
752 		(void) pthread_cond_wait(&ri->ri_method_cv, &ri->ri_lock);
753 
754 	while ((e = uu_list_teardown(ri->ri_queue, &cookie)) != NULL)
755 		startd_free(e, sizeof (*e));
756 	uu_list_destroy(ri->ri_queue);
757 
758 	startd_free((void *)ri->ri_i.i_fmri, strlen(ri->ri_i.i_fmri) + 1);
759 	startd_free(ri->ri_logstem, PATH_MAX);
760 	if (ri->ri_common_name != NULL)
761 		free(ri->ri_common_name);
762 	if (ri->ri_C_common_name != NULL)
763 		free(ri->ri_C_common_name);
764 	startd_free(ri->ri_utmpx_prefix, max_scf_value_size);
765 	(void) pthread_mutex_destroy(&ri->ri_lock);
766 	(void) pthread_mutex_destroy(&ri->ri_queue_lock);
767 	startd_free(ri, sizeof (restarter_inst_t));
768 }
769 
770 /*
771  * instance_is_wait_style()
772  *
773  *   Returns 1 if the given instance is a "wait-style" service instance.
774  */
775 int
776 instance_is_wait_style(restarter_inst_t *inst)
777 {
778 	assert(MUTEX_HELD(&inst->ri_lock));
779 	return ((inst->ri_flags & RINST_STYLE_MASK) == RINST_WAIT);
780 }
781 
782 /*
783  * instance_is_transient_style()
784  *
785  *   Returns 1 if the given instance is a transient service instance.
786  */
787 int
788 instance_is_transient_style(restarter_inst_t *inst)
789 {
790 	assert(MUTEX_HELD(&inst->ri_lock));
791 	return ((inst->ri_flags & RINST_STYLE_MASK) == RINST_TRANSIENT);
792 }
793 
794 /*
795  * instance_in_transition()
796  * Returns 1 if instance is in transition, 0 if not
797  */
798 int
799 instance_in_transition(restarter_inst_t *inst)
800 {
801 	assert(MUTEX_HELD(&inst->ri_lock));
802 	if (inst->ri_i.i_next_state == RESTARTER_STATE_NONE)
803 		return (0);
804 	return (1);
805 }
806 
807 /*
808  * returns 1 if instance is already started, 0 if not
809  */
810 static int
811 instance_started(restarter_inst_t *inst)
812 {
813 	int ret;
814 
815 	assert(MUTEX_HELD(&inst->ri_lock));
816 
817 	if (inst->ri_i.i_state == RESTARTER_STATE_ONLINE ||
818 	    inst->ri_i.i_state == RESTARTER_STATE_DEGRADED)
819 		ret = 1;
820 	else
821 		ret = 0;
822 
823 	return (ret);
824 }
825 
826 /*
827  * Returns
828  *   0 - success
829  *   ECONNRESET - success, but h was rebound
830  */
831 int
832 restarter_instance_update_states(scf_handle_t *h, restarter_inst_t *ri,
833     restarter_instance_state_t new_state,
834     restarter_instance_state_t new_state_next, restarter_error_t err,
835     restarter_str_t reason)
836 {
837 	protocol_states_t *states;
838 	int e;
839 	uint_t retry_count = 0, msecs = ALLOC_DELAY;
840 	boolean_t rebound = B_FALSE;
841 	int prev_state_online;
842 	int state_online;
843 
844 	assert(MUTEX_HELD(&ri->ri_lock));
845 
846 	prev_state_online = instance_started(ri);
847 
848 retry:
849 	e = _restarter_commit_states(h, &ri->ri_i, new_state, new_state_next,
850 	    restarter_get_str_short(reason));
851 	switch (e) {
852 	case 0:
853 		break;
854 
855 	case ENOMEM:
856 		++retry_count;
857 		if (retry_count < ALLOC_RETRY) {
858 			(void) poll(NULL, 0, msecs);
859 			msecs *= ALLOC_DELAY_MULT;
860 			goto retry;
861 		}
862 
863 		/* Like startd_alloc(). */
864 		uu_die("Insufficient memory.\n");
865 		/* NOTREACHED */
866 
867 	case ECONNABORTED:
868 		libscf_handle_rebind(h);
869 		rebound = B_TRUE;
870 		goto retry;
871 
872 	case EPERM:
873 	case EACCES:
874 	case EROFS:
875 		log_error(LOG_NOTICE, "Could not commit state change for %s "
876 		    "to repository: %s.\n", ri->ri_i.i_fmri, strerror(e));
877 		/* FALLTHROUGH */
878 
879 	case ENOENT:
880 		ri->ri_i.i_state = new_state;
881 		ri->ri_i.i_next_state = new_state_next;
882 		break;
883 
884 	case EINVAL:
885 	default:
886 		bad_error("_restarter_commit_states", e);
887 	}
888 
889 	states = startd_alloc(sizeof (protocol_states_t));
890 	states->ps_state = new_state;
891 	states->ps_state_next = new_state_next;
892 	states->ps_err = err;
893 	states->ps_reason = reason;
894 	graph_protocol_send_event(ri->ri_i.i_fmri, GRAPH_UPDATE_STATE_CHANGE,
895 	    (void *)states);
896 
897 	state_online = instance_started(ri);
898 
899 	if (prev_state_online && !state_online)
900 		ri->ri_post_offline_hook();
901 	else if (!prev_state_online && state_online)
902 		ri->ri_post_online_hook();
903 
904 	return (rebound ? ECONNRESET : 0);
905 }
906 
907 void
908 restarter_mark_pending_snapshot(const char *fmri, uint_t flag)
909 {
910 	restarter_inst_t *inst;
911 
912 	assert(flag == RINST_RETAKE_RUNNING || flag == RINST_RETAKE_START);
913 
914 	inst = inst_lookup_by_name(fmri);
915 	if (inst == NULL)
916 		return;
917 
918 	inst->ri_flags |= flag;
919 
920 	MUTEX_UNLOCK(&inst->ri_lock);
921 }
922 
923 static void
924 restarter_take_pending_snapshots(scf_handle_t *h)
925 {
926 	restarter_inst_t *inst;
927 	int r;
928 
929 	MUTEX_LOCK(&instance_list.ril_lock);
930 
931 	for (inst = uu_list_first(instance_list.ril_instance_list);
932 	    inst != NULL;
933 	    inst = uu_list_next(instance_list.ril_instance_list, inst)) {
934 		const char *fmri;
935 		scf_instance_t *sinst = NULL;
936 
937 		MUTEX_LOCK(&inst->ri_lock);
938 
939 		/*
940 		 * This is where we'd check inst->ri_method_thread and if it
941 		 * were nonzero we'd wait in anticipation of another thread
942 		 * executing a method for inst.  Doing so with the instance_list
943 		 * locked, though, leads to deadlock.  Since taking a snapshot
944 		 * during that window won't hurt anything, we'll just continue.
945 		 */
946 
947 		fmri = inst->ri_i.i_fmri;
948 
949 		if (inst->ri_flags & RINST_RETAKE_RUNNING) {
950 			scf_snapshot_t *rsnap;
951 
952 			(void) libscf_fmri_get_instance(h, fmri, &sinst);
953 
954 			rsnap = libscf_get_or_make_running_snapshot(sinst,
955 			    fmri, B_FALSE);
956 
957 			scf_instance_destroy(sinst);
958 
959 			if (rsnap != NULL)
960 				inst->ri_flags &= ~RINST_RETAKE_RUNNING;
961 
962 			scf_snapshot_destroy(rsnap);
963 		}
964 
965 		if (inst->ri_flags & RINST_RETAKE_START) {
966 			switch (r = libscf_snapshots_poststart(h, fmri,
967 			    B_FALSE)) {
968 			case 0:
969 			case ENOENT:
970 				inst->ri_flags &= ~RINST_RETAKE_START;
971 				break;
972 
973 			case ECONNABORTED:
974 				break;
975 
976 			case EACCES:
977 			default:
978 				bad_error("libscf_snapshots_poststart", r);
979 			}
980 		}
981 
982 		MUTEX_UNLOCK(&inst->ri_lock);
983 	}
984 
985 	MUTEX_UNLOCK(&instance_list.ril_lock);
986 }
987 
988 /* ARGSUSED */
989 void *
990 restarter_post_fsminimal_thread(void *unused)
991 {
992 	scf_handle_t *h;
993 	int r;
994 
995 	h = libscf_handle_create_bound_loop();
996 
997 	for (;;) {
998 		r = libscf_create_self(h);
999 		if (r == 0)
1000 			break;
1001 
1002 		assert(r == ECONNABORTED);
1003 		libscf_handle_rebind(h);
1004 	}
1005 
1006 	restarter_take_pending_snapshots(h);
1007 
1008 	(void) scf_handle_unbind(h);
1009 	scf_handle_destroy(h);
1010 
1011 	return (NULL);
1012 }
1013 
1014 /*
1015  * int stop_instance()
1016  *
1017  *   Stop the instance identified by the instance given as the second argument,
1018  *   for the cause stated.
1019  *
1020  *   Returns
1021  *     0 - success
1022  *     -1 - inst is in transition
1023  */
1024 static int
1025 stop_instance(scf_handle_t *local_handle, restarter_inst_t *inst,
1026     stop_cause_t cause)
1027 {
1028 	fork_info_t *info;
1029 	const char *cp;
1030 	int err;
1031 	restarter_error_t re;
1032 	restarter_str_t	reason;
1033 
1034 	assert(MUTEX_HELD(&inst->ri_lock));
1035 	assert(inst->ri_method_thread == 0);
1036 
1037 	switch (cause) {
1038 	case RSTOP_EXIT:
1039 		re = RERR_RESTART;
1040 		reason = restarter_str_ct_ev_exit;
1041 		cp = "all processes in service exited";
1042 		break;
1043 	case RSTOP_CORE:
1044 		re = RERR_FAULT;
1045 		reason = restarter_str_ct_ev_core;
1046 		cp = "process dumped core";
1047 		break;
1048 	case RSTOP_SIGNAL:
1049 		re = RERR_FAULT;
1050 		reason = restarter_str_ct_ev_signal;
1051 		cp = "process received fatal signal from outside the service";
1052 		break;
1053 	case RSTOP_HWERR:
1054 		re = RERR_FAULT;
1055 		reason = restarter_str_ct_ev_hwerr;
1056 		cp = "process killed due to uncorrectable hardware error";
1057 		break;
1058 	case RSTOP_DEPENDENCY:
1059 		re = RERR_RESTART;
1060 		reason = restarter_str_dependency_activity;
1061 		cp = "dependency activity requires stop";
1062 		break;
1063 	case RSTOP_DISABLE:
1064 		re = RERR_RESTART;
1065 		reason = restarter_str_disable_request;
1066 		cp = "service disabled";
1067 		break;
1068 	case RSTOP_RESTART:
1069 		re = RERR_RESTART;
1070 		reason = restarter_str_restart_request;
1071 		cp = "service restarting";
1072 		break;
1073 	default:
1074 #ifndef NDEBUG
1075 		(void) fprintf(stderr, "Unknown cause %d at %s:%d.\n",
1076 		    cause, __FILE__, __LINE__);
1077 #endif
1078 		abort();
1079 	}
1080 
1081 	/* Services in the disabled and maintenance state are ignored */
1082 	if (inst->ri_i.i_state == RESTARTER_STATE_MAINT ||
1083 	    inst->ri_i.i_state == RESTARTER_STATE_DISABLED) {
1084 		log_framework(LOG_DEBUG,
1085 		    "%s: stop_instance -> is maint/disabled\n",
1086 		    inst->ri_i.i_fmri);
1087 		return (0);
1088 	}
1089 
1090 	/* Already stopped instances are left alone */
1091 	if (instance_started(inst) == 0) {
1092 		log_framework(LOG_DEBUG, "Restarter: %s is already stopped.\n",
1093 		    inst->ri_i.i_fmri);
1094 		return (0);
1095 	}
1096 
1097 	if (instance_in_transition(inst)) {
1098 		/* requeue event by returning -1 */
1099 		log_framework(LOG_DEBUG,
1100 		    "Restarter: Not stopping %s, in transition.\n",
1101 		    inst->ri_i.i_fmri);
1102 		return (-1);
1103 	}
1104 
1105 	log_instance(inst, B_TRUE, "Stopping because %s.", cp);
1106 
1107 	log_framework(re == RERR_FAULT ? LOG_INFO : LOG_DEBUG,
1108 	    "%s: Instance stopping because %s.\n", inst->ri_i.i_fmri, cp);
1109 
1110 	if (instance_is_wait_style(inst) && cause == RSTOP_EXIT) {
1111 		/*
1112 		 * No need to stop instance, as child has exited; remove
1113 		 * contract and move the instance to the offline state.
1114 		 */
1115 		switch (err = restarter_instance_update_states(local_handle,
1116 		    inst, inst->ri_i.i_state, RESTARTER_STATE_OFFLINE, re,
1117 		    reason)) {
1118 		case 0:
1119 		case ECONNRESET:
1120 			break;
1121 
1122 		default:
1123 			bad_error("restarter_instance_update_states", err);
1124 		}
1125 
1126 		(void) update_fault_count(inst, FAULT_COUNT_RESET);
1127 		reset_start_times(inst);
1128 
1129 		if (inst->ri_i.i_primary_ctid != 0) {
1130 			inst->ri_m_inst =
1131 			    safe_scf_instance_create(local_handle);
1132 			inst->ri_mi_deleted = B_FALSE;
1133 
1134 			libscf_reget_instance(inst);
1135 			method_remove_contract(inst, B_TRUE, B_TRUE);
1136 
1137 			scf_instance_destroy(inst->ri_m_inst);
1138 			inst->ri_m_inst = NULL;
1139 		}
1140 
1141 		switch (err = restarter_instance_update_states(local_handle,
1142 		    inst, inst->ri_i.i_next_state, RESTARTER_STATE_NONE, re,
1143 		    reason)) {
1144 		case 0:
1145 		case ECONNRESET:
1146 			break;
1147 
1148 		default:
1149 			bad_error("restarter_instance_update_states", err);
1150 		}
1151 
1152 		return (0);
1153 	} else if (instance_is_wait_style(inst) && re == RERR_RESTART) {
1154 		/*
1155 		 * Stopping a wait service through means other than the pid
1156 		 * exiting should keep wait_thread() from restarting the
1157 		 * service, by removing it from the wait list.
1158 		 * We cannot remove it right now otherwise the process will
1159 		 * end up <defunct> so mark it to be ignored.
1160 		 */
1161 		wait_ignore_by_fmri(inst->ri_i.i_fmri);
1162 	}
1163 
1164 	switch (err = restarter_instance_update_states(local_handle, inst,
1165 	    inst->ri_i.i_state, inst->ri_i.i_enabled ? RESTARTER_STATE_OFFLINE :
1166 	    RESTARTER_STATE_DISABLED, RERR_NONE, reason)) {
1167 	case 0:
1168 	case ECONNRESET:
1169 		break;
1170 
1171 	default:
1172 		bad_error("restarter_instance_update_states", err);
1173 	}
1174 
1175 	info = startd_zalloc(sizeof (fork_info_t));
1176 
1177 	info->sf_id = inst->ri_id;
1178 	info->sf_method_type = METHOD_STOP;
1179 	info->sf_event_type = re;
1180 	info->sf_reason = reason;
1181 	inst->ri_method_thread = startd_thread_create(method_thread, info);
1182 
1183 	return (0);
1184 }
1185 
1186 /*
1187  * Returns
1188  *   ENOENT - fmri is not in instance_list
1189  *   0 - success
1190  *   ECONNRESET - success, though handle was rebound
1191  *   -1 - instance is in transition
1192  */
1193 int
1194 stop_instance_fmri(scf_handle_t *h, const char *fmri, uint_t flags)
1195 {
1196 	restarter_inst_t *rip;
1197 	int r;
1198 
1199 	rip = inst_lookup_by_name(fmri);
1200 	if (rip == NULL)
1201 		return (ENOENT);
1202 
1203 	r = stop_instance(h, rip, flags);
1204 
1205 	MUTEX_UNLOCK(&rip->ri_lock);
1206 
1207 	return (r);
1208 }
1209 
1210 static void
1211 unmaintain_instance(scf_handle_t *h, restarter_inst_t *rip,
1212     unmaint_cause_t cause)
1213 {
1214 	ctid_t ctid;
1215 	scf_instance_t *inst;
1216 	int r;
1217 	uint_t tries = 0, msecs = ALLOC_DELAY;
1218 	const char *cp;
1219 	restarter_str_t	reason;
1220 
1221 	assert(MUTEX_HELD(&rip->ri_lock));
1222 
1223 	if (rip->ri_i.i_state != RESTARTER_STATE_MAINT) {
1224 		log_error(LOG_DEBUG, "Restarter: "
1225 		    "Ignoring maintenance off command because %s is not in the "
1226 		    "maintenance state.\n", rip->ri_i.i_fmri);
1227 		return;
1228 	}
1229 
1230 	switch (cause) {
1231 	case RUNMAINT_CLEAR:
1232 		cp = "clear requested";
1233 		reason = restarter_str_clear_request;
1234 		break;
1235 	case RUNMAINT_DISABLE:
1236 		cp = "disable requested";
1237 		reason = restarter_str_disable_request;
1238 		break;
1239 	default:
1240 #ifndef NDEBUG
1241 		(void) fprintf(stderr, "Uncaught case for %d at %s:%d.\n",
1242 		    cause, __FILE__, __LINE__);
1243 #endif
1244 		abort();
1245 	}
1246 
1247 	log_instance(rip, B_TRUE, "Leaving maintenance because %s.",
1248 	    cp);
1249 	log_framework(LOG_DEBUG, "%s: Instance leaving maintenance because "
1250 	    "%s.\n", rip->ri_i.i_fmri, cp);
1251 
1252 	(void) restarter_instance_update_states(h, rip, RESTARTER_STATE_UNINIT,
1253 	    RESTARTER_STATE_NONE, RERR_RESTART, reason);
1254 
1255 	/*
1256 	 * If we did ADMIN_MAINT_ON_IMMEDIATE, then there might still be
1257 	 * a primary contract.
1258 	 */
1259 	if (rip->ri_i.i_primary_ctid == 0)
1260 		return;
1261 
1262 	ctid = rip->ri_i.i_primary_ctid;
1263 	contract_abandon(ctid);
1264 	rip->ri_i.i_primary_ctid = 0;
1265 
1266 rep_retry:
1267 	switch (r = libscf_fmri_get_instance(h, rip->ri_i.i_fmri, &inst)) {
1268 	case 0:
1269 		break;
1270 
1271 	case ECONNABORTED:
1272 		libscf_handle_rebind(h);
1273 		goto rep_retry;
1274 
1275 	case ENOENT:
1276 		/* Must have been deleted. */
1277 		return;
1278 
1279 	case EINVAL:
1280 	case ENOTSUP:
1281 	default:
1282 		bad_error("libscf_handle_rebind", r);
1283 	}
1284 
1285 again:
1286 	r = restarter_remove_contract(inst, ctid, RESTARTER_CONTRACT_PRIMARY);
1287 	switch (r) {
1288 	case 0:
1289 		break;
1290 
1291 	case ENOMEM:
1292 		++tries;
1293 		if (tries < ALLOC_RETRY) {
1294 			(void) poll(NULL, 0, msecs);
1295 			msecs *= ALLOC_DELAY_MULT;
1296 			goto again;
1297 		}
1298 
1299 		uu_die("Insufficient memory.\n");
1300 		/* NOTREACHED */
1301 
1302 	case ECONNABORTED:
1303 		scf_instance_destroy(inst);
1304 		libscf_handle_rebind(h);
1305 		goto rep_retry;
1306 
1307 	case ECANCELED:
1308 		break;
1309 
1310 	case EPERM:
1311 	case EACCES:
1312 	case EROFS:
1313 		log_error(LOG_INFO,
1314 		    "Could not remove contract id %lu for %s (%s).\n", ctid,
1315 		    rip->ri_i.i_fmri, strerror(r));
1316 		break;
1317 
1318 	case EINVAL:
1319 	case EBADF:
1320 	default:
1321 		bad_error("restarter_remove_contract", r);
1322 	}
1323 
1324 	scf_instance_destroy(inst);
1325 }
1326 
1327 /*
1328  * enable_inst()
1329  *   Set inst->ri_i.i_enabled.  Expects 'e' to be _ENABLE, _DISABLE, or
1330  *   _ADMIN_DISABLE.  If the event is _ENABLE and inst is uninitialized or
1331  *   disabled, move it to offline.  If the event is _DISABLE or
1332  *   _ADMIN_DISABLE, make sure inst will move to disabled.
1333  *
1334  *   Returns
1335  *     0 - success
1336  *     ECONNRESET - h was rebound
1337  */
1338 static int
1339 enable_inst(scf_handle_t *h, restarter_inst_t *inst,
1340     restarter_instance_qentry_t *riq)
1341 {
1342 	restarter_instance_state_t state;
1343 	restarter_event_type_t e = riq->riq_type;
1344 	restarter_str_t reason = restarter_str_per_configuration;
1345 	int r;
1346 
1347 	assert(MUTEX_HELD(&inst->ri_lock));
1348 	assert(e == RESTARTER_EVENT_TYPE_ADMIN_DISABLE ||
1349 	    e == RESTARTER_EVENT_TYPE_DISABLE ||
1350 	    e == RESTARTER_EVENT_TYPE_ENABLE);
1351 	assert(instance_in_transition(inst) == 0);
1352 
1353 	state = inst->ri_i.i_state;
1354 
1355 	if (e == RESTARTER_EVENT_TYPE_ENABLE) {
1356 		inst->ri_i.i_enabled = 1;
1357 
1358 		if (state == RESTARTER_STATE_UNINIT ||
1359 		    state == RESTARTER_STATE_DISABLED) {
1360 			/*
1361 			 * B_FALSE: Don't log an error if the log_instance()
1362 			 * fails because it will fail on the miniroot before
1363 			 * install-discovery runs.
1364 			 */
1365 			log_instance(inst, B_FALSE, "Enabled.");
1366 			log_framework(LOG_DEBUG, "%s: Instance enabled.\n",
1367 			    inst->ri_i.i_fmri);
1368 
1369 			/*
1370 			 * If we are coming from DISABLED, it was obviously an
1371 			 * enable request. If we are coming from UNINIT, it may
1372 			 * have been a sevice in MAINT that was cleared.
1373 			 */
1374 			if (riq->riq_reason == restarter_str_clear_request)
1375 				reason = restarter_str_clear_request;
1376 			else if (state == RESTARTER_STATE_DISABLED)
1377 				reason = restarter_str_enable_request;
1378 			(void) restarter_instance_update_states(h, inst,
1379 			    RESTARTER_STATE_OFFLINE, RESTARTER_STATE_NONE,
1380 			    RERR_NONE, reason);
1381 		} else {
1382 			log_framework(LOG_DEBUG, "Restarter: "
1383 			    "Not changing state of %s for enable command.\n",
1384 			    inst->ri_i.i_fmri);
1385 		}
1386 	} else {
1387 		inst->ri_i.i_enabled = 0;
1388 
1389 		switch (state) {
1390 		case RESTARTER_STATE_ONLINE:
1391 		case RESTARTER_STATE_DEGRADED:
1392 			r = stop_instance(h, inst, RSTOP_DISABLE);
1393 			return (r == ECONNRESET ? 0 : r);
1394 
1395 		case RESTARTER_STATE_OFFLINE:
1396 		case RESTARTER_STATE_UNINIT:
1397 			if (inst->ri_i.i_primary_ctid != 0) {
1398 				inst->ri_m_inst = safe_scf_instance_create(h);
1399 				inst->ri_mi_deleted = B_FALSE;
1400 
1401 				libscf_reget_instance(inst);
1402 				method_remove_contract(inst, B_TRUE, B_TRUE);
1403 
1404 				scf_instance_destroy(inst->ri_m_inst);
1405 			}
1406 			/* B_FALSE: See log_instance(..., "Enabled."); above */
1407 			log_instance(inst, B_FALSE, "Disabled.");
1408 			log_framework(LOG_DEBUG, "%s: Instance disabled.\n",
1409 			    inst->ri_i.i_fmri);
1410 
1411 			/*
1412 			 * If we are coming from OFFLINE, it was obviously a
1413 			 * disable request. But if we are coming from
1414 			 * UNINIT, it may have been a disable request for a
1415 			 * service in MAINT.
1416 			 */
1417 			if (riq->riq_reason == restarter_str_disable_request ||
1418 			    state == RESTARTER_STATE_OFFLINE)
1419 				reason = restarter_str_disable_request;
1420 			(void) restarter_instance_update_states(h, inst,
1421 			    RESTARTER_STATE_DISABLED, RESTARTER_STATE_NONE,
1422 			    RERR_RESTART, reason);
1423 			return (0);
1424 
1425 		case RESTARTER_STATE_DISABLED:
1426 			break;
1427 
1428 		case RESTARTER_STATE_MAINT:
1429 			/*
1430 			 * We only want to pull the instance out of maintenance
1431 			 * if the disable is on adminstrative request.  The
1432 			 * graph engine sends _DISABLE events whenever a
1433 			 * service isn't in the disabled state, and we don't
1434 			 * want to pull the service out of maintenance if,
1435 			 * for example, it is there due to a dependency cycle.
1436 			 */
1437 			if (e == RESTARTER_EVENT_TYPE_ADMIN_DISABLE)
1438 				unmaintain_instance(h, inst, RUNMAINT_DISABLE);
1439 			break;
1440 
1441 		default:
1442 #ifndef NDEBUG
1443 			(void) fprintf(stderr, "Restarter instance %s has "
1444 			    "unknown state %d.\n", inst->ri_i.i_fmri, state);
1445 #endif
1446 			abort();
1447 		}
1448 	}
1449 
1450 	return (0);
1451 }
1452 
1453 static void
1454 start_instance(scf_handle_t *local_handle, restarter_inst_t *inst,
1455     int32_t reason)
1456 {
1457 	fork_info_t *info;
1458 	restarter_str_t	new_reason;
1459 
1460 	assert(MUTEX_HELD(&inst->ri_lock));
1461 	assert(instance_in_transition(inst) == 0);
1462 	assert(inst->ri_method_thread == 0);
1463 
1464 	log_framework(LOG_DEBUG, "%s: trying to start instance\n",
1465 	    inst->ri_i.i_fmri);
1466 
1467 	/*
1468 	 * We want to keep the original reason for restarts and clear actions
1469 	 */
1470 	switch (reason) {
1471 	case restarter_str_restart_request:
1472 	case restarter_str_clear_request:
1473 		new_reason = reason;
1474 		break;
1475 	default:
1476 		new_reason = restarter_str_dependencies_satisfied;
1477 	}
1478 
1479 	/* Services in the disabled and maintenance state are ignored */
1480 	if (inst->ri_i.i_state == RESTARTER_STATE_MAINT ||
1481 	    inst->ri_i.i_state == RESTARTER_STATE_DISABLED ||
1482 	    inst->ri_i.i_enabled == 0) {
1483 		log_framework(LOG_DEBUG,
1484 		    "%s: start_instance -> is maint/disabled\n",
1485 		    inst->ri_i.i_fmri);
1486 		return;
1487 	}
1488 
1489 	/* Already started instances are left alone */
1490 	if (instance_started(inst) == 1) {
1491 		log_framework(LOG_DEBUG,
1492 		    "%s: start_instance -> is already started\n",
1493 		    inst->ri_i.i_fmri);
1494 		return;
1495 	}
1496 
1497 	log_framework(LOG_DEBUG, "%s: starting instance.\n", inst->ri_i.i_fmri);
1498 
1499 	(void) restarter_instance_update_states(local_handle, inst,
1500 	    inst->ri_i.i_state, RESTARTER_STATE_ONLINE, RERR_NONE, new_reason);
1501 
1502 	info = startd_zalloc(sizeof (fork_info_t));
1503 
1504 	info->sf_id = inst->ri_id;
1505 	info->sf_method_type = METHOD_START;
1506 	info->sf_event_type = RERR_NONE;
1507 	info->sf_reason = new_reason;
1508 	inst->ri_method_thread = startd_thread_create(method_thread, info);
1509 }
1510 
1511 static int
1512 event_from_tty(scf_handle_t *h, restarter_inst_t *rip)
1513 {
1514 	scf_instance_t *inst;
1515 	int ret = 0;
1516 
1517 	if (libscf_fmri_get_instance(h, rip->ri_i.i_fmri, &inst))
1518 		return (-1);
1519 
1520 	ret = restarter_inst_ractions_from_tty(inst);
1521 
1522 	scf_instance_destroy(inst);
1523 	return (ret);
1524 }
1525 
1526 static void
1527 maintain_instance(scf_handle_t *h, restarter_inst_t *rip, int immediate,
1528     restarter_str_t reason)
1529 {
1530 	fork_info_t *info;
1531 	scf_instance_t *scf_inst = NULL;
1532 
1533 	assert(MUTEX_HELD(&rip->ri_lock));
1534 	assert(reason != restarter_str_none);
1535 	assert(rip->ri_method_thread == 0);
1536 
1537 	log_instance(rip, B_TRUE, "Stopping for maintenance due to %s.",
1538 	    restarter_get_str_short(reason));
1539 	log_framework(LOG_DEBUG, "%s: stopping for maintenance due to %s.\n",
1540 	    rip->ri_i.i_fmri, restarter_get_str_short(reason));
1541 
1542 	/* Services in the maintenance state are ignored */
1543 	if (rip->ri_i.i_state == RESTARTER_STATE_MAINT) {
1544 		log_framework(LOG_DEBUG,
1545 		    "%s: maintain_instance -> is already in maintenance\n",
1546 		    rip->ri_i.i_fmri);
1547 		return;
1548 	}
1549 
1550 	/*
1551 	 * If reason state is restarter_str_service_request and
1552 	 * restarter_actions/auxiliary_fmri property is set with a valid fmri,
1553 	 * copy the fmri to restarter/auxiliary_fmri so svcs -x can use.
1554 	 */
1555 	if (reason == restarter_str_service_request &&
1556 	    libscf_fmri_get_instance(h, rip->ri_i.i_fmri, &scf_inst) == 0) {
1557 		if (restarter_inst_validate_ractions_aux_fmri(scf_inst) == 0) {
1558 			if (restarter_inst_set_aux_fmri(scf_inst))
1559 				log_framework(LOG_DEBUG, "%s: "
1560 				    "restarter_inst_set_aux_fmri failed: ",
1561 				    rip->ri_i.i_fmri);
1562 		} else {
1563 			log_framework(LOG_DEBUG, "%s: "
1564 			    "restarter_inst_validate_ractions_aux_fmri "
1565 			    "failed: ", rip->ri_i.i_fmri);
1566 
1567 			if (restarter_inst_reset_aux_fmri(scf_inst))
1568 				log_framework(LOG_DEBUG, "%s: "
1569 				    "restarter_inst_reset_aux_fmri failed: ",
1570 				    rip->ri_i.i_fmri);
1571 		}
1572 		scf_instance_destroy(scf_inst);
1573 	}
1574 
1575 	if (immediate || !instance_started(rip)) {
1576 		if (rip->ri_i.i_primary_ctid != 0) {
1577 			rip->ri_m_inst = safe_scf_instance_create(h);
1578 			rip->ri_mi_deleted = B_FALSE;
1579 
1580 			libscf_reget_instance(rip);
1581 			method_remove_contract(rip, B_TRUE, B_TRUE);
1582 
1583 			scf_instance_destroy(rip->ri_m_inst);
1584 		}
1585 
1586 		(void) restarter_instance_update_states(h, rip,
1587 		    RESTARTER_STATE_MAINT, RESTARTER_STATE_NONE, RERR_RESTART,
1588 		    reason);
1589 		return;
1590 	}
1591 
1592 	(void) restarter_instance_update_states(h, rip, rip->ri_i.i_state,
1593 	    RESTARTER_STATE_MAINT, RERR_NONE, reason);
1594 
1595 	log_transition(rip, MAINT_REQUESTED);
1596 
1597 	info = startd_zalloc(sizeof (*info));
1598 	info->sf_id = rip->ri_id;
1599 	info->sf_method_type = METHOD_STOP;
1600 	info->sf_event_type = RERR_RESTART;
1601 	info->sf_reason = reason;
1602 	rip->ri_method_thread = startd_thread_create(method_thread, info);
1603 }
1604 
1605 static void
1606 refresh_instance(scf_handle_t *h, restarter_inst_t *rip)
1607 {
1608 	scf_instance_t *inst;
1609 	scf_snapshot_t *snap;
1610 	fork_info_t *info;
1611 	int r;
1612 
1613 	assert(MUTEX_HELD(&rip->ri_lock));
1614 
1615 	log_instance(rip, B_TRUE, "Rereading configuration.");
1616 	log_framework(LOG_DEBUG, "%s: rereading configuration.\n",
1617 	    rip->ri_i.i_fmri);
1618 
1619 rep_retry:
1620 	r = libscf_fmri_get_instance(h, rip->ri_i.i_fmri, &inst);
1621 	switch (r) {
1622 	case 0:
1623 		break;
1624 
1625 	case ECONNABORTED:
1626 		libscf_handle_rebind(h);
1627 		goto rep_retry;
1628 
1629 	case ENOENT:
1630 		/* Must have been deleted. */
1631 		return;
1632 
1633 	case EINVAL:
1634 	case ENOTSUP:
1635 	default:
1636 		bad_error("libscf_fmri_get_instance", r);
1637 	}
1638 
1639 	snap = libscf_get_running_snapshot(inst);
1640 
1641 	r = libscf_get_startd_properties(inst, snap, &rip->ri_flags,
1642 	    &rip->ri_utmpx_prefix);
1643 	switch (r) {
1644 	case 0:
1645 		log_framework(LOG_DEBUG, "%s is a %s-style service\n",
1646 		    rip->ri_i.i_fmri, service_style(rip->ri_flags));
1647 		break;
1648 
1649 	case ECONNABORTED:
1650 		scf_instance_destroy(inst);
1651 		scf_snapshot_destroy(snap);
1652 		libscf_handle_rebind(h);
1653 		goto rep_retry;
1654 
1655 	case ECANCELED:
1656 	case ENOENT:
1657 		/* Succeed in anticipation of REMOVE_INSTANCE. */
1658 		break;
1659 
1660 	default:
1661 		bad_error("libscf_get_startd_properties", r);
1662 	}
1663 
1664 	if (instance_started(rip)) {
1665 		/* Refresh does not change the state. */
1666 		(void) restarter_instance_update_states(h, rip,
1667 		    rip->ri_i.i_state, rip->ri_i.i_state, RERR_NONE,
1668 		    restarter_str_refresh);
1669 
1670 		info = startd_zalloc(sizeof (*info));
1671 		info->sf_id = rip->ri_id;
1672 		info->sf_method_type = METHOD_REFRESH;
1673 		info->sf_event_type = RERR_REFRESH;
1674 		info->sf_reason = NULL;
1675 
1676 		assert(rip->ri_method_thread == 0);
1677 		rip->ri_method_thread =
1678 		    startd_thread_create(method_thread, info);
1679 	}
1680 
1681 	scf_snapshot_destroy(snap);
1682 	scf_instance_destroy(inst);
1683 }
1684 
1685 const char *event_names[] = { "INVALID", "ADD_INSTANCE", "REMOVE_INSTANCE",
1686 	"ENABLE", "DISABLE", "ADMIN_DEGRADED", "ADMIN_REFRESH",
1687 	"ADMIN_RESTART", "ADMIN_MAINT_OFF", "ADMIN_MAINT_ON",
1688 	"ADMIN_MAINT_ON_IMMEDIATE", "STOP", "START", "DEPENDENCY_CYCLE",
1689 	"INVALID_DEPENDENCY", "ADMIN_DISABLE", "STOP_RESET"
1690 };
1691 
1692 /*
1693  * void *restarter_process_events()
1694  *
1695  *   Called in a separate thread to process the events on an instance's
1696  *   queue.  Empties the queue completely, and tries to keep the thread
1697  *   around for a little while after the queue is empty to save on
1698  *   startup costs.
1699  */
1700 static void *
1701 restarter_process_events(void *arg)
1702 {
1703 	scf_handle_t *h;
1704 	restarter_instance_qentry_t *event;
1705 	restarter_inst_t *rip;
1706 	char *fmri = (char *)arg;
1707 	struct timespec to;
1708 
1709 	assert(fmri != NULL);
1710 
1711 	h = libscf_handle_create_bound_loop();
1712 
1713 	/* grab the queue lock */
1714 	rip = inst_lookup_queue(fmri);
1715 	if (rip == NULL)
1716 		goto out;
1717 
1718 again:
1719 
1720 	while ((event = uu_list_first(rip->ri_queue)) != NULL) {
1721 		restarter_inst_t *inst;
1722 
1723 		/* drop the queue lock */
1724 		MUTEX_UNLOCK(&rip->ri_queue_lock);
1725 
1726 		/*
1727 		 * Grab the inst lock -- this waits until any outstanding
1728 		 * method finishes running.
1729 		 */
1730 		inst = inst_lookup_by_name(fmri);
1731 		if (inst == NULL) {
1732 			/* Getting deleted in the middle isn't an error. */
1733 			goto cont;
1734 		}
1735 
1736 		assert(instance_in_transition(inst) == 0);
1737 
1738 		/* process the event */
1739 		switch (event->riq_type) {
1740 		case RESTARTER_EVENT_TYPE_ENABLE:
1741 		case RESTARTER_EVENT_TYPE_DISABLE:
1742 			(void) enable_inst(h, inst, event);
1743 			break;
1744 
1745 		case RESTARTER_EVENT_TYPE_ADMIN_DISABLE:
1746 			if (enable_inst(h, inst, event) == 0)
1747 				reset_start_times(inst);
1748 			break;
1749 
1750 		case RESTARTER_EVENT_TYPE_REMOVE_INSTANCE:
1751 			restarter_delete_inst(inst);
1752 			inst = NULL;
1753 			goto cont;
1754 
1755 		case RESTARTER_EVENT_TYPE_STOP_RESET:
1756 			reset_start_times(inst);
1757 			/* FALLTHROUGH */
1758 		case RESTARTER_EVENT_TYPE_STOP:
1759 			(void) stop_instance(h, inst, RSTOP_DEPENDENCY);
1760 			break;
1761 
1762 		case RESTARTER_EVENT_TYPE_START:
1763 			start_instance(h, inst, event->riq_reason);
1764 			break;
1765 
1766 		case RESTARTER_EVENT_TYPE_DEPENDENCY_CYCLE:
1767 			maintain_instance(h, inst, 0,
1768 			    restarter_str_dependency_cycle);
1769 			break;
1770 
1771 		case RESTARTER_EVENT_TYPE_INVALID_DEPENDENCY:
1772 			maintain_instance(h, inst, 0,
1773 			    restarter_str_invalid_dependency);
1774 			break;
1775 
1776 		case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON:
1777 			if (event_from_tty(h, inst) == 0)
1778 				maintain_instance(h, inst, 0,
1779 				    restarter_str_service_request);
1780 			else
1781 				maintain_instance(h, inst, 0,
1782 				    restarter_str_administrative_request);
1783 			break;
1784 
1785 		case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON_IMMEDIATE:
1786 			if (event_from_tty(h, inst) == 0)
1787 				maintain_instance(h, inst, 1,
1788 				    restarter_str_service_request);
1789 			else
1790 				maintain_instance(h, inst, 1,
1791 				    restarter_str_administrative_request);
1792 			break;
1793 
1794 		case RESTARTER_EVENT_TYPE_ADMIN_MAINT_OFF:
1795 			unmaintain_instance(h, inst, RUNMAINT_CLEAR);
1796 			reset_start_times(inst);
1797 			break;
1798 
1799 		case RESTARTER_EVENT_TYPE_ADMIN_REFRESH:
1800 			refresh_instance(h, inst);
1801 			break;
1802 
1803 		case RESTARTER_EVENT_TYPE_ADMIN_DEGRADED:
1804 			log_framework(LOG_WARNING, "Restarter: "
1805 			    "%s command (for %s) unimplemented.\n",
1806 			    event_names[event->riq_type], inst->ri_i.i_fmri);
1807 			break;
1808 
1809 		case RESTARTER_EVENT_TYPE_ADMIN_RESTART:
1810 			if (!instance_started(inst)) {
1811 				log_framework(LOG_DEBUG, "Restarter: "
1812 				    "Not restarting %s; not running.\n",
1813 				    inst->ri_i.i_fmri);
1814 			} else {
1815 				/*
1816 				 * Stop the instance.  If it can be restarted,
1817 				 * the graph engine will send a new event.
1818 				 */
1819 				if (stop_instance(h, inst, RSTOP_RESTART) == 0)
1820 					reset_start_times(inst);
1821 			}
1822 			break;
1823 
1824 		case RESTARTER_EVENT_TYPE_ADD_INSTANCE:
1825 		default:
1826 #ifndef NDEBUG
1827 			uu_warn("%s:%d: Bad restarter event %d.  "
1828 			    "Aborting.\n", __FILE__, __LINE__, event->riq_type);
1829 #endif
1830 			abort();
1831 		}
1832 
1833 		assert(inst != NULL);
1834 		MUTEX_UNLOCK(&inst->ri_lock);
1835 
1836 cont:
1837 		/* grab the queue lock */
1838 		rip = inst_lookup_queue(fmri);
1839 		if (rip == NULL)
1840 			goto out;
1841 
1842 		/* delete the event */
1843 		uu_list_remove(rip->ri_queue, event);
1844 		startd_free(event, sizeof (restarter_instance_qentry_t));
1845 	}
1846 
1847 	assert(rip != NULL);
1848 
1849 	/*
1850 	 * Try to preserve the thread for a little while for future use.
1851 	 */
1852 	to.tv_sec = 3;
1853 	to.tv_nsec = 0;
1854 	(void) pthread_cond_reltimedwait_np(&rip->ri_queue_cv,
1855 	    &rip->ri_queue_lock, &to);
1856 
1857 	if (uu_list_first(rip->ri_queue) != NULL)
1858 		goto again;
1859 
1860 	rip->ri_queue_thread = 0;
1861 	MUTEX_UNLOCK(&rip->ri_queue_lock);
1862 
1863 out:
1864 	(void) scf_handle_unbind(h);
1865 	scf_handle_destroy(h);
1866 	free(fmri);
1867 	return (NULL);
1868 }
1869 
1870 static int
1871 is_admin_event(restarter_event_type_t t) {
1872 
1873 	switch (t) {
1874 	case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON:
1875 	case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON_IMMEDIATE:
1876 	case RESTARTER_EVENT_TYPE_ADMIN_MAINT_OFF:
1877 	case RESTARTER_EVENT_TYPE_ADMIN_REFRESH:
1878 	case RESTARTER_EVENT_TYPE_ADMIN_DEGRADED:
1879 	case RESTARTER_EVENT_TYPE_ADMIN_RESTART:
1880 		return (1);
1881 	default:
1882 		return (0);
1883 	}
1884 }
1885 
1886 static void
1887 restarter_queue_event(restarter_inst_t *ri, restarter_protocol_event_t *e)
1888 {
1889 	restarter_instance_qentry_t *qe;
1890 	int r;
1891 
1892 	assert(MUTEX_HELD(&ri->ri_queue_lock));
1893 	assert(!MUTEX_HELD(&ri->ri_lock));
1894 
1895 	qe = startd_zalloc(sizeof (restarter_instance_qentry_t));
1896 	qe->riq_type = e->rpe_type;
1897 	qe->riq_reason = e->rpe_reason;
1898 
1899 	uu_list_node_init(qe, &qe->riq_link, restarter_queue_pool);
1900 	r = uu_list_insert_before(ri->ri_queue, NULL, qe);
1901 	assert(r == 0);
1902 }
1903 
1904 /*
1905  * void *restarter_event_thread()
1906  *
1907  *  Handle incoming graph events by placing them on a per-instance
1908  *  queue.  We can't lock the main part of the instance structure, so
1909  *  just modify the seprarately locked event queue portion.
1910  */
1911 /*ARGSUSED*/
1912 static void *
1913 restarter_event_thread(void *unused)
1914 {
1915 	scf_handle_t *h;
1916 
1917 	/*
1918 	 * This is a new thread, and thus, gets its own handle
1919 	 * to the repository.
1920 	 */
1921 	h = libscf_handle_create_bound_loop();
1922 
1923 	MUTEX_LOCK(&ru->restarter_update_lock);
1924 
1925 	/*CONSTCOND*/
1926 	while (1) {
1927 		restarter_protocol_event_t *e;
1928 
1929 		while (ru->restarter_update_wakeup == 0)
1930 			(void) pthread_cond_wait(&ru->restarter_update_cv,
1931 			    &ru->restarter_update_lock);
1932 
1933 		ru->restarter_update_wakeup = 0;
1934 
1935 		while ((e = restarter_event_dequeue()) != NULL) {
1936 			restarter_inst_t *rip;
1937 			char *fmri;
1938 
1939 			MUTEX_UNLOCK(&ru->restarter_update_lock);
1940 
1941 			/*
1942 			 * ADD_INSTANCE is special: there's likely no
1943 			 * instance structure yet, so we need to handle the
1944 			 * addition synchronously.
1945 			 */
1946 			switch (e->rpe_type) {
1947 			case RESTARTER_EVENT_TYPE_ADD_INSTANCE:
1948 				if (restarter_insert_inst(h, e->rpe_inst) != 0)
1949 					log_error(LOG_INFO, "Restarter: "
1950 					    "Could not add %s.\n", e->rpe_inst);
1951 
1952 				MUTEX_LOCK(&st->st_load_lock);
1953 				if (--st->st_load_instances == 0)
1954 					(void) pthread_cond_broadcast(
1955 					    &st->st_load_cv);
1956 				MUTEX_UNLOCK(&st->st_load_lock);
1957 
1958 				goto nolookup;
1959 			}
1960 
1961 			/*
1962 			 * Lookup the instance, locking only the event queue.
1963 			 * Can't grab ri_lock here because it might be held
1964 			 * by a long-running method.
1965 			 */
1966 			rip = inst_lookup_queue(e->rpe_inst);
1967 			if (rip == NULL) {
1968 				log_error(LOG_INFO, "Restarter: "
1969 				    "Ignoring %s command for unknown service "
1970 				    "%s.\n", event_names[e->rpe_type],
1971 				    e->rpe_inst);
1972 				goto nolookup;
1973 			}
1974 
1975 			/* Keep ADMIN events from filling up the queue. */
1976 			if (is_admin_event(e->rpe_type) &&
1977 			    uu_list_numnodes(rip->ri_queue) >
1978 			    RINST_QUEUE_THRESHOLD) {
1979 				MUTEX_UNLOCK(&rip->ri_queue_lock);
1980 				log_instance(rip, B_TRUE, "Instance event "
1981 				    "queue overflow.  Dropping administrative "
1982 				    "request.");
1983 				log_framework(LOG_DEBUG, "%s: Instance event "
1984 				    "queue overflow.  Dropping administrative "
1985 				    "request.\n", rip->ri_i.i_fmri);
1986 				goto nolookup;
1987 			}
1988 
1989 			/* Now add the event to the instance queue. */
1990 			restarter_queue_event(rip, e);
1991 
1992 			if (rip->ri_queue_thread == 0) {
1993 				/*
1994 				 * Start a thread if one isn't already
1995 				 * running.
1996 				 */
1997 				fmri = safe_strdup(e->rpe_inst);
1998 				rip->ri_queue_thread =  startd_thread_create(
1999 				    restarter_process_events, (void *)fmri);
2000 			} else {
2001 				/*
2002 				 * Signal the existing thread that there's
2003 				 * a new event.
2004 				 */
2005 				(void) pthread_cond_broadcast(
2006 				    &rip->ri_queue_cv);
2007 			}
2008 
2009 			MUTEX_UNLOCK(&rip->ri_queue_lock);
2010 nolookup:
2011 			restarter_event_release(e);
2012 
2013 			MUTEX_LOCK(&ru->restarter_update_lock);
2014 		}
2015 	}
2016 
2017 	/*
2018 	 * Unreachable for now -- there's currently no graceful cleanup
2019 	 * called on exit().
2020 	 */
2021 	(void) scf_handle_unbind(h);
2022 	scf_handle_destroy(h);
2023 	return (NULL);
2024 }
2025 
2026 static restarter_inst_t *
2027 contract_to_inst(ctid_t ctid)
2028 {
2029 	restarter_inst_t *inst;
2030 	int id;
2031 
2032 	id = lookup_inst_by_contract(ctid);
2033 	if (id == -1)
2034 		return (NULL);
2035 
2036 	inst = inst_lookup_by_id(id);
2037 	if (inst != NULL) {
2038 		/*
2039 		 * Since ri_lock isn't held by the contract id lookup, this
2040 		 * instance may have been restarted and now be in a new
2041 		 * contract, making the old contract no longer valid for this
2042 		 * instance.
2043 		 */
2044 		if (ctid != inst->ri_i.i_primary_ctid) {
2045 			MUTEX_UNLOCK(&inst->ri_lock);
2046 			inst = NULL;
2047 		}
2048 	}
2049 	return (inst);
2050 }
2051 
2052 /*
2053  * void contract_action()
2054  *   Take action on contract events.
2055  */
2056 static void
2057 contract_action(scf_handle_t *h, restarter_inst_t *inst, ctid_t id,
2058     uint32_t type)
2059 {
2060 	const char *fmri = inst->ri_i.i_fmri;
2061 
2062 	assert(MUTEX_HELD(&inst->ri_lock));
2063 
2064 	/*
2065 	 * If startd has stopped this contract, there is no need to
2066 	 * stop it again.
2067 	 */
2068 	if (inst->ri_i.i_primary_ctid > 0 &&
2069 	    inst->ri_i.i_primary_ctid_stopped)
2070 		return;
2071 
2072 	if ((type & (CT_PR_EV_EMPTY | CT_PR_EV_CORE | CT_PR_EV_SIGNAL
2073 	    | CT_PR_EV_HWERR)) == 0) {
2074 		/*
2075 		 * There shouldn't be other events, since that's not how we set
2076 		 * the terms. Thus, just log an error and drive on.
2077 		 */
2078 		log_framework(LOG_NOTICE,
2079 		    "%s: contract %ld received unexpected critical event "
2080 		    "(%d)\n", fmri, id, type);
2081 		return;
2082 	}
2083 
2084 	assert(instance_in_transition(inst) == 0);
2085 
2086 	if (instance_is_wait_style(inst)) {
2087 		/*
2088 		 * We ignore all events; if they impact the
2089 		 * process we're monitoring, then the
2090 		 * wait_thread will stop the instance.
2091 		 */
2092 		log_framework(LOG_DEBUG,
2093 		    "%s: ignoring contract event on wait-style service\n",
2094 		    fmri);
2095 	} else {
2096 		/*
2097 		 * A CT_PR_EV_EMPTY event is an RSTOP_EXIT request.
2098 		 */
2099 		switch (type) {
2100 		case CT_PR_EV_EMPTY:
2101 			(void) stop_instance(h, inst, RSTOP_EXIT);
2102 			break;
2103 		case CT_PR_EV_CORE:
2104 			(void) stop_instance(h, inst, RSTOP_CORE);
2105 			break;
2106 		case CT_PR_EV_SIGNAL:
2107 			(void) stop_instance(h, inst, RSTOP_SIGNAL);
2108 			break;
2109 		case CT_PR_EV_HWERR:
2110 			(void) stop_instance(h, inst, RSTOP_HWERR);
2111 			break;
2112 		}
2113 	}
2114 }
2115 
2116 /*
2117  * void *restarter_contract_event_thread(void *)
2118  *   Listens to the process contract bundle for critical events, taking action
2119  *   on events from contracts we know we are responsible for.
2120  */
2121 /*ARGSUSED*/
2122 static void *
2123 restarter_contracts_event_thread(void *unused)
2124 {
2125 	int fd, err;
2126 	scf_handle_t *local_handle;
2127 
2128 	/*
2129 	 * Await graph load completion.  That is, stop here, until we've scanned
2130 	 * the repository for contract - instance associations.
2131 	 */
2132 	MUTEX_LOCK(&st->st_load_lock);
2133 	while (!(st->st_load_complete && st->st_load_instances == 0))
2134 		(void) pthread_cond_wait(&st->st_load_cv, &st->st_load_lock);
2135 	MUTEX_UNLOCK(&st->st_load_lock);
2136 
2137 	/*
2138 	 * This is a new thread, and thus, gets its own handle
2139 	 * to the repository.
2140 	 */
2141 	if ((local_handle = libscf_handle_create_bound(SCF_VERSION)) == NULL)
2142 		uu_die("Unable to bind a new repository handle: %s\n",
2143 		    scf_strerror(scf_error()));
2144 
2145 	fd = open64(CTFS_ROOT "/process/pbundle", O_RDONLY);
2146 	if (fd == -1)
2147 		uu_die("process bundle open failed");
2148 
2149 	/*
2150 	 * Make sure we get all events (including those generated by configd
2151 	 * before this thread was started).
2152 	 */
2153 	err = ct_event_reset(fd);
2154 	assert(err == 0);
2155 
2156 	for (;;) {
2157 		int efd, sfd;
2158 		ct_evthdl_t ev;
2159 		uint32_t type;
2160 		ctevid_t evid;
2161 		ct_stathdl_t status;
2162 		ctid_t ctid;
2163 		restarter_inst_t *inst;
2164 		uint64_t cookie;
2165 
2166 		if (err = ct_event_read_critical(fd, &ev)) {
2167 			log_error(LOG_WARNING,
2168 			    "Error reading next contract event: %s",
2169 			    strerror(err));
2170 			continue;
2171 		}
2172 
2173 		evid = ct_event_get_evid(ev);
2174 		ctid = ct_event_get_ctid(ev);
2175 		type = ct_event_get_type(ev);
2176 
2177 		/* Fetch cookie. */
2178 		if ((sfd = contract_open(ctid, "process", "status", O_RDONLY))
2179 		    < 0) {
2180 			ct_event_free(ev);
2181 			continue;
2182 		}
2183 
2184 		if (err = ct_status_read(sfd, CTD_COMMON, &status)) {
2185 			log_framework(LOG_WARNING, "Could not get status for "
2186 			    "contract %ld: %s\n", ctid, strerror(err));
2187 
2188 			startd_close(sfd);
2189 			ct_event_free(ev);
2190 			continue;
2191 		}
2192 
2193 		cookie = ct_status_get_cookie(status);
2194 
2195 		log_framework(LOG_DEBUG, "Received event %d for ctid %ld "
2196 		    "cookie %lld\n", type, ctid, cookie);
2197 
2198 		ct_status_free(status);
2199 
2200 		startd_close(sfd);
2201 
2202 		/*
2203 		 * svc.configd(1M) restart handling performed by the
2204 		 * fork_configd_thread.  We don't acknowledge, as that thread
2205 		 * will do so.
2206 		 */
2207 		if (cookie == CONFIGD_COOKIE) {
2208 			ct_event_free(ev);
2209 			continue;
2210 		}
2211 
2212 		inst = NULL;
2213 		if (storing_contract != 0 &&
2214 		    (inst = contract_to_inst(ctid)) == NULL) {
2215 			/*
2216 			 * This can happen for two reasons:
2217 			 * - method_run() has not yet stored the
2218 			 *    the contract into the internal hash table.
2219 			 * - we receive an EMPTY event for an abandoned
2220 			 *    contract.
2221 			 * If there is any contract in the process of
2222 			 * being stored into the hash table then re-read
2223 			 * the event later.
2224 			 */
2225 			log_framework(LOG_DEBUG,
2226 			    "Reset event %d for unknown "
2227 			    "contract id %ld\n", type, ctid);
2228 
2229 			/* don't go too fast */
2230 			(void) poll(NULL, 0, 100);
2231 
2232 			(void) ct_event_reset(fd);
2233 			ct_event_free(ev);
2234 			continue;
2235 		}
2236 
2237 		/*
2238 		 * Do not call contract_to_inst() again if first
2239 		 * call succeeded.
2240 		 */
2241 		if (inst == NULL)
2242 			inst = contract_to_inst(ctid);
2243 		if (inst == NULL) {
2244 			/*
2245 			 * This can happen if we receive an EMPTY
2246 			 * event for an abandoned contract.
2247 			 */
2248 			log_framework(LOG_DEBUG,
2249 			    "Received event %d for unknown contract id "
2250 			    "%ld\n", type, ctid);
2251 		} else {
2252 			log_framework(LOG_DEBUG,
2253 			    "Received event %d for contract id "
2254 			    "%ld (%s)\n", type, ctid,
2255 			    inst->ri_i.i_fmri);
2256 
2257 			contract_action(local_handle, inst, ctid, type);
2258 
2259 			MUTEX_UNLOCK(&inst->ri_lock);
2260 		}
2261 
2262 		efd = contract_open(ct_event_get_ctid(ev), "process", "ctl",
2263 		    O_WRONLY);
2264 		if (efd != -1) {
2265 			(void) ct_ctl_ack(efd, evid);
2266 			startd_close(efd);
2267 		}
2268 
2269 		ct_event_free(ev);
2270 
2271 	}
2272 
2273 	/*NOTREACHED*/
2274 	return (NULL);
2275 }
2276 
2277 /*
2278  * Timeout queue, processed by restarter_timeouts_event_thread().
2279  */
2280 timeout_queue_t *timeouts;
2281 static uu_list_pool_t *timeout_pool;
2282 
2283 typedef struct timeout_update {
2284 	pthread_mutex_t		tu_lock;
2285 	pthread_cond_t		tu_cv;
2286 	int			tu_wakeup;
2287 } timeout_update_t;
2288 
2289 timeout_update_t *tu;
2290 
2291 static const char *timeout_ovr_svcs[] = {
2292 	"svc:/system/manifest-import:default",
2293 	"svc:/network/initial:default",
2294 	"svc:/network/service:default",
2295 	"svc:/system/rmtmpfiles:default",
2296 	"svc:/network/loopback:default",
2297 	"svc:/network/physical:default",
2298 	"svc:/system/device/local:default",
2299 	"svc:/system/metainit:default",
2300 	"svc:/system/filesystem/usr:default",
2301 	"svc:/system/filesystem/minimal:default",
2302 	"svc:/system/filesystem/local:default",
2303 	NULL
2304 };
2305 
2306 int
2307 is_timeout_ovr(restarter_inst_t *inst)
2308 {
2309 	int i;
2310 
2311 	for (i = 0; timeout_ovr_svcs[i] != NULL; ++i) {
2312 		if (strcmp(inst->ri_i.i_fmri, timeout_ovr_svcs[i]) == 0) {
2313 			log_instance(inst, B_TRUE, "Timeout override by "
2314 			    "svc.startd.  Using infinite timeout.");
2315 			return (1);
2316 		}
2317 	}
2318 
2319 	return (0);
2320 }
2321 
2322 /*ARGSUSED*/
2323 static int
2324 timeout_compare(const void *lc_arg, const void *rc_arg, void *private)
2325 {
2326 	hrtime_t t1 = ((const timeout_entry_t *)lc_arg)->te_timeout;
2327 	hrtime_t t2 = ((const timeout_entry_t *)rc_arg)->te_timeout;
2328 
2329 	if (t1 > t2)
2330 		return (1);
2331 	else if (t1 < t2)
2332 		return (-1);
2333 	return (0);
2334 }
2335 
2336 void
2337 timeout_init()
2338 {
2339 	timeouts = startd_zalloc(sizeof (timeout_queue_t));
2340 
2341 	(void) pthread_mutex_init(&timeouts->tq_lock, &mutex_attrs);
2342 
2343 	timeout_pool = startd_list_pool_create("timeouts",
2344 	    sizeof (timeout_entry_t), offsetof(timeout_entry_t, te_link),
2345 	    timeout_compare, UU_LIST_POOL_DEBUG);
2346 	assert(timeout_pool != NULL);
2347 
2348 	timeouts->tq_list = startd_list_create(timeout_pool,
2349 	    timeouts, UU_LIST_SORTED);
2350 	assert(timeouts->tq_list != NULL);
2351 
2352 	tu = startd_zalloc(sizeof (timeout_update_t));
2353 	(void) pthread_cond_init(&tu->tu_cv, NULL);
2354 	(void) pthread_mutex_init(&tu->tu_lock, &mutex_attrs);
2355 }
2356 
2357 void
2358 timeout_insert(restarter_inst_t *inst, ctid_t cid, uint64_t timeout_sec)
2359 {
2360 	hrtime_t now, timeout;
2361 	timeout_entry_t *entry;
2362 	uu_list_index_t idx;
2363 
2364 	assert(MUTEX_HELD(&inst->ri_lock));
2365 
2366 	now = gethrtime();
2367 
2368 	/*
2369 	 * If we overflow LLONG_MAX, we're never timing out anyways, so
2370 	 * just return.
2371 	 */
2372 	if (timeout_sec >= (LLONG_MAX - now) / 1000000000LL) {
2373 		log_instance(inst, B_TRUE, "timeout_seconds too large, "
2374 		    "treating as infinite.");
2375 		return;
2376 	}
2377 
2378 	/* hrtime is in nanoseconds. Convert timeout_sec. */
2379 	timeout = now + (timeout_sec * 1000000000LL);
2380 
2381 	entry = startd_alloc(sizeof (timeout_entry_t));
2382 	entry->te_timeout = timeout;
2383 	entry->te_ctid = cid;
2384 	entry->te_fmri = safe_strdup(inst->ri_i.i_fmri);
2385 	entry->te_logstem = safe_strdup(inst->ri_logstem);
2386 	entry->te_fired = 0;
2387 	/* Insert the calculated timeout time onto the queue. */
2388 	MUTEX_LOCK(&timeouts->tq_lock);
2389 	(void) uu_list_find(timeouts->tq_list, entry, NULL, &idx);
2390 	uu_list_node_init(entry, &entry->te_link, timeout_pool);
2391 	uu_list_insert(timeouts->tq_list, entry, idx);
2392 	MUTEX_UNLOCK(&timeouts->tq_lock);
2393 
2394 	assert(inst->ri_timeout == NULL);
2395 	inst->ri_timeout = entry;
2396 
2397 	MUTEX_LOCK(&tu->tu_lock);
2398 	tu->tu_wakeup = 1;
2399 	(void) pthread_cond_broadcast(&tu->tu_cv);
2400 	MUTEX_UNLOCK(&tu->tu_lock);
2401 }
2402 
2403 
2404 void
2405 timeout_remove(restarter_inst_t *inst, ctid_t cid)
2406 {
2407 	assert(MUTEX_HELD(&inst->ri_lock));
2408 
2409 	if (inst->ri_timeout == NULL)
2410 		return;
2411 
2412 	assert(inst->ri_timeout->te_ctid == cid);
2413 
2414 	MUTEX_LOCK(&timeouts->tq_lock);
2415 	uu_list_remove(timeouts->tq_list, inst->ri_timeout);
2416 	MUTEX_UNLOCK(&timeouts->tq_lock);
2417 
2418 	free(inst->ri_timeout->te_fmri);
2419 	free(inst->ri_timeout->te_logstem);
2420 	startd_free(inst->ri_timeout, sizeof (timeout_entry_t));
2421 	inst->ri_timeout = NULL;
2422 }
2423 
2424 static int
2425 timeout_now()
2426 {
2427 	timeout_entry_t *e;
2428 	hrtime_t now;
2429 	int ret;
2430 
2431 	now = gethrtime();
2432 
2433 	/*
2434 	 * Walk through the (sorted) timeouts list.  While the timeout
2435 	 * at the head of the list is <= the current time, kill the
2436 	 * method.
2437 	 */
2438 	MUTEX_LOCK(&timeouts->tq_lock);
2439 
2440 	for (e = uu_list_first(timeouts->tq_list);
2441 	    e != NULL && e->te_timeout <= now;
2442 	    e = uu_list_next(timeouts->tq_list, e)) {
2443 		log_framework(LOG_WARNING, "%s: Method or service exit timed "
2444 		    "out.  Killing contract %ld.\n", e->te_fmri, e->te_ctid);
2445 		log_instance_fmri(e->te_fmri, e->te_logstem, B_TRUE,
2446 		    "Method or service exit timed out.  Killing contract %ld.",
2447 		    e->te_ctid);
2448 		e->te_fired = 1;
2449 		(void) contract_kill(e->te_ctid, SIGKILL, e->te_fmri);
2450 	}
2451 
2452 	if (uu_list_numnodes(timeouts->tq_list) > 0)
2453 		ret = 0;
2454 	else
2455 		ret = -1;
2456 
2457 	MUTEX_UNLOCK(&timeouts->tq_lock);
2458 
2459 	return (ret);
2460 }
2461 
2462 /*
2463  * void *restarter_timeouts_event_thread(void *)
2464  *   Responsible for monitoring the method timeouts.  This thread must
2465  *   be started before any methods are called.
2466  */
2467 /*ARGSUSED*/
2468 static void *
2469 restarter_timeouts_event_thread(void *unused)
2470 {
2471 	/*
2472 	 * Timeouts are entered on a priority queue, which is processed by
2473 	 * this thread.  As timeouts are specified in seconds, we'll do
2474 	 * the necessary processing every second, as long as the queue
2475 	 * is not empty.
2476 	 */
2477 
2478 	/*CONSTCOND*/
2479 	while (1) {
2480 		/*
2481 		 * As long as the timeout list isn't empty, process it
2482 		 * every second.
2483 		 */
2484 		if (timeout_now() == 0) {
2485 			(void) sleep(1);
2486 			continue;
2487 		}
2488 
2489 		/* The list is empty, wait until we have more timeouts. */
2490 		MUTEX_LOCK(&tu->tu_lock);
2491 
2492 		while (tu->tu_wakeup == 0)
2493 			(void) pthread_cond_wait(&tu->tu_cv, &tu->tu_lock);
2494 
2495 		tu->tu_wakeup = 0;
2496 		MUTEX_UNLOCK(&tu->tu_lock);
2497 	}
2498 
2499 	return (NULL);
2500 }
2501 
2502 void
2503 restarter_start()
2504 {
2505 	(void) startd_thread_create(restarter_timeouts_event_thread, NULL);
2506 	(void) startd_thread_create(restarter_event_thread, NULL);
2507 	(void) startd_thread_create(restarter_contracts_event_thread, NULL);
2508 	(void) startd_thread_create(wait_thread, NULL);
2509 }
2510 
2511 
2512 void
2513 restarter_init()
2514 {
2515 	restarter_instance_pool = startd_list_pool_create("restarter_instances",
2516 	    sizeof (restarter_inst_t), offsetof(restarter_inst_t,
2517 	    ri_link), restarter_instance_compare, UU_LIST_POOL_DEBUG);
2518 	(void) memset(&instance_list, 0, sizeof (instance_list));
2519 
2520 	(void) pthread_mutex_init(&instance_list.ril_lock, &mutex_attrs);
2521 	instance_list.ril_instance_list = startd_list_create(
2522 	    restarter_instance_pool, &instance_list, UU_LIST_SORTED);
2523 
2524 	restarter_queue_pool = startd_list_pool_create(
2525 	    "restarter_instance_queue", sizeof (restarter_instance_qentry_t),
2526 	    offsetof(restarter_instance_qentry_t,  riq_link), NULL,
2527 	    UU_LIST_POOL_DEBUG);
2528 
2529 	contract_list_pool = startd_list_pool_create(
2530 	    "contract_list", sizeof (contract_entry_t),
2531 	    offsetof(contract_entry_t,  ce_link), NULL,
2532 	    UU_LIST_POOL_DEBUG);
2533 	contract_hash_init();
2534 
2535 	log_framework(LOG_DEBUG, "Initialized restarter\n");
2536 }
2537