xref: /illumos-gate/usr/src/cmd/svc/startd/restarter.c (revision 9514bcf4c37a9b87200462594803414d12cdd29d)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Copyright 2019 Joyent, Inc.
25  */
26 
27 /*
28  * restarter.c - service manipulation
29  *
30  * This component manages services whose restarter is svc.startd, the standard
31  * restarter.  It translates restarter protocol events from the graph engine
32  * into actions on processes, as a delegated restarter would do.
33  *
34  * The master restarter manages a number of always-running threads:
35  *   - restarter event thread: events from the graph engine
36  *   - timeout thread: thread to fire queued timeouts
37  *   - contract thread: thread to handle contract events
38  *   - wait thread: thread to handle wait-based services
39  *
40  * The other threads are created as-needed:
41  *   - per-instance method threads
42  *   - per-instance event processing threads
43  *
44  * The interaction of all threads must result in the following conditions
45  * being satisfied (on a per-instance basis):
46  *   - restarter events must be processed in order
47  *   - method execution must be serialized
48  *   - instance delete must be held until outstanding methods are complete
49  *   - contract events shouldn't be processed while a method is running
50  *   - timeouts should fire even when a method is running
51  *
52  * Service instances are represented by restarter_inst_t's and are kept in the
53  * instance_list list.
54  *
55  * Service States
56  *   The current state of a service instance is kept in
57  *   restarter_inst_t->ri_i.i_state.  If transition to a new state could take
58  *   some time, then before we effect the transition we set
59  *   restarter_inst_t->ri_i.i_next_state to the target state, and afterwards we
60  *   rotate i_next_state to i_state and set i_next_state to
61  *   RESTARTER_STATE_NONE.  So usually i_next_state is _NONE when ri_lock is not
62  *   held.  The exception is when we launch methods, which are done with
63  *   a separate thread.  To keep any other threads from grabbing ri_lock before
64  *   method_thread() does, we set ri_method_thread to the thread id of the
65  *   method thread, and when it is nonzero any thread with a different thread id
66  *   waits on ri_method_cv.
67  *
68  * Method execution is serialized by blocking on ri_method_cv in
69  * inst_lookup_by_id() and waiting for a 0 value of ri_method_thread.  This
70  * also prevents the instance structure from being deleted until all
71  * outstanding operations such as method_thread() have finished.
72  *
73  * Lock ordering:
74  *
75  * dgraph_lock [can be held when taking:]
76  *   utmpx_lock
77  *   dictionary->dict_lock
78  *   st->st_load_lock
79  *   wait_info_lock
80  *   ru->restarter_update_lock
81  *     restarter_queue->rpeq_lock
82  *   instance_list.ril_lock
83  *     inst->ri_lock
84  *   st->st_configd_live_lock
85  *
86  * instance_list.ril_lock
87  *   graph_queue->gpeq_lock
88  *   gu->gu_lock
89  *   st->st_configd_live_lock
90  *   dictionary->dict_lock
91  *   inst->ri_lock
92  *     graph_queue->gpeq_lock
93  *     gu->gu_lock
94  *     tu->tu_lock
95  *     tq->tq_lock
96  *     inst->ri_queue_lock
97  *       wait_info_lock
98  *       bp->cb_lock
99  *     utmpx_lock
100  *
101  * single_user_thread_lock
102  *   wait_info_lock
103  *   utmpx_lock
104  *
105  * gu_freeze_lock
106  *
107  * logbuf_mutex nests inside pretty much everything.
108  */
109 
110 #include <sys/contract/process.h>
111 #include <sys/ctfs.h>
112 #include <sys/stat.h>
113 #include <sys/time.h>
114 #include <sys/types.h>
115 #include <sys/uio.h>
116 #include <sys/wait.h>
117 #include <assert.h>
118 #include <errno.h>
119 #include <fcntl.h>
120 #include <libcontract.h>
121 #include <libcontract_priv.h>
122 #include <libintl.h>
123 #include <librestart.h>
124 #include <librestart_priv.h>
125 #include <libuutil.h>
126 #include <limits.h>
127 #include <poll.h>
128 #include <port.h>
129 #include <pthread.h>
130 #include <stdarg.h>
131 #include <stdio.h>
132 #include <strings.h>
133 #include <unistd.h>
134 
135 #include "startd.h"
136 #include "protocol.h"
137 
138 uu_list_pool_t *contract_list_pool;
139 static uu_list_pool_t *restarter_instance_pool;
140 static restarter_instance_list_t instance_list;
141 
142 static uu_list_pool_t *restarter_queue_pool;
143 
144 #define	WT_SVC_ERR_THROTTLE	1	/* 1 sec delay for erroring wait svc */
145 
146 /*
147  * Function used to reset the restart times for an instance, when
148  * an administrative task comes along and essentially makes the times
149  * in this array ineffective.
150  */
151 static void
152 reset_start_times(restarter_inst_t *inst)
153 {
154 	inst->ri_start_index = 0;
155 	bzero(inst->ri_start_time, sizeof (inst->ri_start_time));
156 }
157 
158 /*ARGSUSED*/
159 static int
160 restarter_instance_compare(const void *lc_arg, const void *rc_arg,
161     void *private)
162 {
163 	int lc_id = ((const restarter_inst_t *)lc_arg)->ri_id;
164 	int rc_id = *(int *)rc_arg;
165 
166 	if (lc_id > rc_id)
167 		return (1);
168 	if (lc_id < rc_id)
169 		return (-1);
170 	return (0);
171 }
172 
173 static restarter_inst_t *
174 inst_lookup_by_name(const char *name)
175 {
176 	int id;
177 
178 	id = dict_lookup_byname(name);
179 	if (id == -1)
180 		return (NULL);
181 
182 	return (inst_lookup_by_id(id));
183 }
184 
185 restarter_inst_t *
186 inst_lookup_by_id(int id)
187 {
188 	restarter_inst_t *inst;
189 
190 	MUTEX_LOCK(&instance_list.ril_lock);
191 	inst = uu_list_find(instance_list.ril_instance_list, &id, NULL, NULL);
192 	if (inst != NULL)
193 		MUTEX_LOCK(&inst->ri_lock);
194 	MUTEX_UNLOCK(&instance_list.ril_lock);
195 
196 	if (inst != NULL) {
197 		while (inst->ri_method_thread != 0 &&
198 		    !pthread_equal(inst->ri_method_thread, pthread_self())) {
199 			++inst->ri_method_waiters;
200 			(void) pthread_cond_wait(&inst->ri_method_cv,
201 			    &inst->ri_lock);
202 			assert(inst->ri_method_waiters > 0);
203 			--inst->ri_method_waiters;
204 		}
205 	}
206 
207 	return (inst);
208 }
209 
210 static restarter_inst_t *
211 inst_lookup_queue(const char *name)
212 {
213 	int id;
214 	restarter_inst_t *inst;
215 
216 	id = dict_lookup_byname(name);
217 	if (id == -1)
218 		return (NULL);
219 
220 	MUTEX_LOCK(&instance_list.ril_lock);
221 	inst = uu_list_find(instance_list.ril_instance_list, &id, NULL, NULL);
222 	if (inst != NULL)
223 		MUTEX_LOCK(&inst->ri_queue_lock);
224 	MUTEX_UNLOCK(&instance_list.ril_lock);
225 
226 	return (inst);
227 }
228 
229 const char *
230 service_style(int flags)
231 {
232 	switch (flags & RINST_STYLE_MASK) {
233 	case RINST_CONTRACT:	return ("contract");
234 	case RINST_TRANSIENT:	return ("transient");
235 	case RINST_WAIT:	return ("wait");
236 
237 	default:
238 #ifndef NDEBUG
239 		uu_warn("%s:%d: Bad flags 0x%x.\n", __FILE__, __LINE__, flags);
240 #endif
241 		abort();
242 		/* NOTREACHED */
243 	}
244 }
245 
246 /*
247  * Fails with ECONNABORTED or ECANCELED.
248  */
249 static int
250 check_contract(restarter_inst_t *inst, boolean_t primary,
251     scf_instance_t *scf_inst)
252 {
253 	ctid_t *ctidp;
254 	int fd, r;
255 
256 	ctidp = primary ? &inst->ri_i.i_primary_ctid :
257 	    &inst->ri_i.i_transient_ctid;
258 
259 	assert(*ctidp >= 1);
260 
261 	fd = contract_open(*ctidp, NULL, "status", O_RDONLY);
262 	if (fd >= 0) {
263 		r = close(fd);
264 		assert(r == 0);
265 		return (0);
266 	}
267 
268 	r = restarter_remove_contract(scf_inst, *ctidp, primary ?
269 	    RESTARTER_CONTRACT_PRIMARY : RESTARTER_CONTRACT_TRANSIENT);
270 	switch (r) {
271 	case 0:
272 	case ECONNABORTED:
273 	case ECANCELED:
274 		*ctidp = 0;
275 		return (r);
276 
277 	case ENOMEM:
278 		uu_die("Out of memory\n");
279 		/* NOTREACHED */
280 
281 	case EPERM:
282 		uu_die("Insufficient privilege.\n");
283 		/* NOTREACHED */
284 
285 	case EACCES:
286 		uu_die("Repository backend access denied.\n");
287 		/* NOTREACHED */
288 
289 	case EROFS:
290 		log_error(LOG_INFO, "Could not remove unusable contract id %ld "
291 		    "for %s from repository.\n", *ctidp, inst->ri_i.i_fmri);
292 		return (0);
293 
294 	case EINVAL:
295 	case EBADF:
296 	default:
297 		assert(0);
298 		abort();
299 		/* NOTREACHED */
300 	}
301 }
302 
303 static int stop_instance(scf_handle_t *, restarter_inst_t *, stop_cause_t);
304 
305 /*
306  * int restarter_insert_inst(scf_handle_t *, char *)
307  *   If the inst is already in the restarter list, return its id.  If the inst
308  *   is not in the restarter list, initialize a restarter_inst_t, initialize its
309  *   states, insert it into the list, and return 0.
310  *
311  *   Fails with
312  *     ENOENT - name is not in the repository
313  */
314 static int
315 restarter_insert_inst(scf_handle_t *h, const char *name)
316 {
317 	int id, r;
318 	restarter_inst_t *inst;
319 	uu_list_index_t idx;
320 	scf_service_t *scf_svc;
321 	scf_instance_t *scf_inst;
322 	scf_snapshot_t *snap = NULL;
323 	scf_propertygroup_t *pg;
324 	char *svc_name, *inst_name;
325 	char logfilebuf[PATH_MAX];
326 	char *c;
327 	boolean_t do_commit_states;
328 	restarter_instance_state_t state, next_state;
329 	protocol_states_t *ps;
330 	pid_t start_pid;
331 	restarter_str_t reason = restarter_str_insert_in_graph;
332 
333 	MUTEX_LOCK(&instance_list.ril_lock);
334 
335 	/*
336 	 * We don't use inst_lookup_by_name() here because we want the lookup
337 	 * & insert to be atomic.
338 	 */
339 	id = dict_lookup_byname(name);
340 	if (id != -1) {
341 		inst = uu_list_find(instance_list.ril_instance_list, &id, NULL,
342 		    &idx);
343 		if (inst != NULL) {
344 			MUTEX_UNLOCK(&instance_list.ril_lock);
345 			return (0);
346 		}
347 	}
348 
349 	/* Allocate an instance */
350 	inst = startd_zalloc(sizeof (restarter_inst_t));
351 	inst->ri_utmpx_prefix = startd_alloc(max_scf_value_size);
352 	inst->ri_utmpx_prefix[0] = '\0';
353 
354 	inst->ri_i.i_fmri = startd_alloc(strlen(name) + 1);
355 	(void) strcpy((char *)inst->ri_i.i_fmri, name);
356 
357 	inst->ri_queue = startd_list_create(restarter_queue_pool, inst, 0);
358 
359 	/*
360 	 * id shouldn't be -1 since we use the same dictionary as graph.c, but
361 	 * just in case.
362 	 */
363 	inst->ri_id = (id != -1 ? id : dict_insert(name));
364 
365 	special_online_hooks_get(name, &inst->ri_pre_online_hook,
366 	    &inst->ri_post_online_hook, &inst->ri_post_offline_hook);
367 
368 	scf_svc = safe_scf_service_create(h);
369 	scf_inst = safe_scf_instance_create(h);
370 	pg = safe_scf_pg_create(h);
371 	svc_name = startd_alloc(max_scf_name_size);
372 	inst_name = startd_alloc(max_scf_name_size);
373 
374 rep_retry:
375 	if (snap != NULL)
376 		scf_snapshot_destroy(snap);
377 	if (inst->ri_logstem != NULL)
378 		startd_free(inst->ri_logstem, PATH_MAX);
379 	if (inst->ri_common_name != NULL)
380 		free(inst->ri_common_name);
381 	if (inst->ri_C_common_name != NULL)
382 		free(inst->ri_C_common_name);
383 	snap = NULL;
384 	inst->ri_logstem = NULL;
385 	inst->ri_common_name = NULL;
386 	inst->ri_C_common_name = NULL;
387 
388 	if (scf_handle_decode_fmri(h, name, NULL, scf_svc, scf_inst, NULL,
389 	    NULL, SCF_DECODE_FMRI_EXACT) != 0) {
390 		switch (scf_error()) {
391 		case SCF_ERROR_CONNECTION_BROKEN:
392 			libscf_handle_rebind(h);
393 			goto rep_retry;
394 
395 		case SCF_ERROR_NOT_FOUND:
396 			goto deleted;
397 		}
398 
399 		uu_die("Can't decode FMRI %s: %s\n", name,
400 		    scf_strerror(scf_error()));
401 	}
402 
403 	/*
404 	 * If there's no running snapshot, then we execute using the editing
405 	 * snapshot.  Pending snapshots will be taken later.
406 	 */
407 	snap = libscf_get_running_snapshot(scf_inst);
408 
409 	if ((scf_service_get_name(scf_svc, svc_name, max_scf_name_size) < 0) ||
410 	    (scf_instance_get_name(scf_inst, inst_name, max_scf_name_size) <
411 	    0)) {
412 		switch (scf_error()) {
413 		case SCF_ERROR_NOT_SET:
414 			break;
415 
416 		case SCF_ERROR_CONNECTION_BROKEN:
417 			libscf_handle_rebind(h);
418 			goto rep_retry;
419 
420 		default:
421 			assert(0);
422 			abort();
423 		}
424 
425 		goto deleted;
426 	}
427 
428 	(void) snprintf(logfilebuf, PATH_MAX, "%s:%s", svc_name, inst_name);
429 	for (c = logfilebuf; *c != '\0'; c++)
430 		if (*c == '/')
431 			*c = '-';
432 
433 	inst->ri_logstem = startd_alloc(PATH_MAX);
434 	(void) snprintf(inst->ri_logstem, PATH_MAX, "%s%s", logfilebuf,
435 	    LOG_SUFFIX);
436 
437 	/*
438 	 * If the restarter group is missing, use uninit/none.  Otherwise,
439 	 * we're probably being restarted & don't want to mess up the states
440 	 * that are there.
441 	 */
442 	state = RESTARTER_STATE_UNINIT;
443 	next_state = RESTARTER_STATE_NONE;
444 
445 	r = scf_instance_get_pg(scf_inst, SCF_PG_RESTARTER, pg);
446 	if (r != 0) {
447 		switch (scf_error()) {
448 		case SCF_ERROR_CONNECTION_BROKEN:
449 			libscf_handle_rebind(h);
450 			goto rep_retry;
451 
452 		case SCF_ERROR_NOT_SET:
453 			goto deleted;
454 
455 		case SCF_ERROR_NOT_FOUND:
456 			/*
457 			 * This shouldn't happen since the graph engine should
458 			 * have initialized the state to uninitialized/none if
459 			 * there was no restarter pg.  In case somebody
460 			 * deleted it, though....
461 			 */
462 			do_commit_states = B_TRUE;
463 			break;
464 
465 		default:
466 			assert(0);
467 			abort();
468 		}
469 	} else {
470 		r = libscf_read_states(pg, &state, &next_state);
471 		if (r != 0) {
472 			do_commit_states = B_TRUE;
473 		} else {
474 			if (next_state != RESTARTER_STATE_NONE) {
475 				/*
476 				 * Force next_state to _NONE since we
477 				 * don't look for method processes.
478 				 */
479 				next_state = RESTARTER_STATE_NONE;
480 				do_commit_states = B_TRUE;
481 			} else {
482 				/*
483 				 * The reason for transition will depend on
484 				 * state.
485 				 */
486 				if (st->st_initial == 0)
487 					reason = restarter_str_startd_restart;
488 				else if (state == RESTARTER_STATE_MAINT)
489 					reason = restarter_str_bad_repo_state;
490 				/*
491 				 * Inform the restarter of our state without
492 				 * changing the STIME in the repository.
493 				 */
494 				ps = startd_alloc(sizeof (*ps));
495 				inst->ri_i.i_state = ps->ps_state = state;
496 				inst->ri_i.i_next_state = ps->ps_state_next =
497 				    next_state;
498 				ps->ps_reason = reason;
499 
500 				graph_protocol_send_event(inst->ri_i.i_fmri,
501 				    GRAPH_UPDATE_STATE_CHANGE, ps);
502 
503 				do_commit_states = B_FALSE;
504 			}
505 		}
506 	}
507 
508 	switch (libscf_get_startd_properties(scf_inst, snap, &inst->ri_flags,
509 	    &inst->ri_utmpx_prefix)) {
510 	case 0:
511 		break;
512 
513 	case ECONNABORTED:
514 		libscf_handle_rebind(h);
515 		goto rep_retry;
516 
517 	case ECANCELED:
518 		goto deleted;
519 
520 	case ENOENT:
521 		/*
522 		 * This is odd, because the graph engine should have required
523 		 * the general property group.  So we'll just use default
524 		 * flags in anticipation of the graph engine sending us
525 		 * REMOVE_INSTANCE when it finds out that the general property
526 		 * group has been deleted.
527 		 */
528 		inst->ri_flags = RINST_CONTRACT;
529 		break;
530 
531 	default:
532 		assert(0);
533 		abort();
534 	}
535 
536 	r = libscf_get_template_values(scf_inst, snap,
537 	    &inst->ri_common_name, &inst->ri_C_common_name);
538 
539 	/*
540 	 * Copy our names to smaller buffers to reduce our memory footprint.
541 	 */
542 	if (inst->ri_common_name != NULL) {
543 		char *tmp = safe_strdup(inst->ri_common_name);
544 		startd_free(inst->ri_common_name, max_scf_value_size);
545 		inst->ri_common_name = tmp;
546 	}
547 
548 	if (inst->ri_C_common_name != NULL) {
549 		char *tmp = safe_strdup(inst->ri_C_common_name);
550 		startd_free(inst->ri_C_common_name, max_scf_value_size);
551 		inst->ri_C_common_name = tmp;
552 	}
553 
554 	switch (r) {
555 	case 0:
556 		break;
557 
558 	case ECONNABORTED:
559 		libscf_handle_rebind(h);
560 		goto rep_retry;
561 
562 	case ECANCELED:
563 		goto deleted;
564 
565 	case ECHILD:
566 	case ENOENT:
567 		break;
568 
569 	default:
570 		assert(0);
571 		abort();
572 	}
573 
574 	switch (libscf_read_method_ids(h, scf_inst, inst->ri_i.i_fmri,
575 	    &inst->ri_i.i_primary_ctid, &inst->ri_i.i_transient_ctid,
576 	    &start_pid)) {
577 	case 0:
578 		break;
579 
580 	case ECONNABORTED:
581 		libscf_handle_rebind(h);
582 		goto rep_retry;
583 
584 	case ECANCELED:
585 		goto deleted;
586 
587 	default:
588 		assert(0);
589 		abort();
590 	}
591 
592 	if (inst->ri_i.i_primary_ctid >= 1) {
593 		contract_hash_store(inst->ri_i.i_primary_ctid, inst->ri_id);
594 
595 		switch (check_contract(inst, B_TRUE, scf_inst)) {
596 		case 0:
597 			break;
598 
599 		case ECONNABORTED:
600 			libscf_handle_rebind(h);
601 			goto rep_retry;
602 
603 		case ECANCELED:
604 			goto deleted;
605 
606 		default:
607 			assert(0);
608 			abort();
609 		}
610 	}
611 
612 	if (inst->ri_i.i_transient_ctid >= 1) {
613 		switch (check_contract(inst, B_FALSE, scf_inst)) {
614 		case 0:
615 			break;
616 
617 		case ECONNABORTED:
618 			libscf_handle_rebind(h);
619 			goto rep_retry;
620 
621 		case ECANCELED:
622 			goto deleted;
623 
624 		default:
625 			assert(0);
626 			abort();
627 		}
628 	}
629 
630 	/* No more failures we live through, so add it to the list. */
631 	(void) pthread_mutex_init(&inst->ri_lock, &mutex_attrs);
632 	(void) pthread_mutex_init(&inst->ri_queue_lock, &mutex_attrs);
633 	MUTEX_LOCK(&inst->ri_lock);
634 	MUTEX_LOCK(&inst->ri_queue_lock);
635 
636 	(void) pthread_cond_init(&inst->ri_method_cv, NULL);
637 
638 	uu_list_node_init(inst, &inst->ri_link, restarter_instance_pool);
639 	uu_list_insert(instance_list.ril_instance_list, inst, idx);
640 	MUTEX_UNLOCK(&instance_list.ril_lock);
641 
642 	if (start_pid != -1 &&
643 	    (inst->ri_flags & RINST_STYLE_MASK) == RINST_WAIT) {
644 		int ret;
645 		ret = wait_register(start_pid, inst->ri_i.i_fmri, 0, 1);
646 		if (ret == -1) {
647 			/*
648 			 * Implication:  if we can't reregister the
649 			 * instance, we will start another one.  Two
650 			 * instances may or may not result in a resource
651 			 * conflict.
652 			 */
653 			log_error(LOG_WARNING,
654 			    "%s: couldn't reregister %ld for wait\n",
655 			    inst->ri_i.i_fmri, start_pid);
656 		} else if (ret == 1) {
657 			/*
658 			 * Leading PID has exited.
659 			 */
660 			(void) stop_instance(h, inst, RSTOP_EXIT);
661 		}
662 	}
663 
664 
665 	scf_pg_destroy(pg);
666 
667 	if (do_commit_states)
668 		(void) restarter_instance_update_states(h, inst, state,
669 		    next_state, RERR_NONE, reason);
670 
671 	log_framework(LOG_DEBUG, "%s is a %s-style service\n", name,
672 	    service_style(inst->ri_flags));
673 
674 	MUTEX_UNLOCK(&inst->ri_queue_lock);
675 	MUTEX_UNLOCK(&inst->ri_lock);
676 
677 	startd_free(svc_name, max_scf_name_size);
678 	startd_free(inst_name, max_scf_name_size);
679 	scf_snapshot_destroy(snap);
680 	scf_instance_destroy(scf_inst);
681 	scf_service_destroy(scf_svc);
682 
683 	log_framework(LOG_DEBUG, "%s: inserted instance into restarter list\n",
684 	    name);
685 
686 	return (0);
687 
688 deleted:
689 	MUTEX_UNLOCK(&instance_list.ril_lock);
690 	startd_free(inst_name, max_scf_name_size);
691 	startd_free(svc_name, max_scf_name_size);
692 	if (snap != NULL)
693 		scf_snapshot_destroy(snap);
694 	scf_pg_destroy(pg);
695 	scf_instance_destroy(scf_inst);
696 	scf_service_destroy(scf_svc);
697 	startd_free((void *)inst->ri_i.i_fmri, strlen(inst->ri_i.i_fmri) + 1);
698 	uu_list_destroy(inst->ri_queue);
699 	if (inst->ri_logstem != NULL)
700 		startd_free(inst->ri_logstem, PATH_MAX);
701 	if (inst->ri_common_name != NULL)
702 		free(inst->ri_common_name);
703 	if (inst->ri_C_common_name != NULL)
704 		free(inst->ri_C_common_name);
705 	startd_free(inst->ri_utmpx_prefix, max_scf_value_size);
706 	startd_free(inst, sizeof (restarter_inst_t));
707 	return (ENOENT);
708 }
709 
710 static void
711 restarter_delete_inst(restarter_inst_t *ri)
712 {
713 	int id;
714 	restarter_inst_t *rip;
715 	void *cookie = NULL;
716 	restarter_instance_qentry_t *e;
717 
718 	assert(MUTEX_HELD(&ri->ri_lock));
719 
720 	/*
721 	 * Must drop the instance lock so we can pick up the instance_list
722 	 * lock & remove the instance.
723 	 */
724 	id = ri->ri_id;
725 	MUTEX_UNLOCK(&ri->ri_lock);
726 
727 	MUTEX_LOCK(&instance_list.ril_lock);
728 
729 	rip = uu_list_find(instance_list.ril_instance_list, &id, NULL, NULL);
730 	if (rip == NULL) {
731 		MUTEX_UNLOCK(&instance_list.ril_lock);
732 		return;
733 	}
734 
735 	assert(ri == rip);
736 
737 	uu_list_remove(instance_list.ril_instance_list, ri);
738 
739 	log_framework(LOG_DEBUG, "%s: deleted instance from restarter list\n",
740 	    ri->ri_i.i_fmri);
741 
742 	MUTEX_UNLOCK(&instance_list.ril_lock);
743 
744 	/*
745 	 * We can lock the instance without holding the instance_list lock
746 	 * since we removed the instance from the list.
747 	 */
748 	MUTEX_LOCK(&ri->ri_lock);
749 	MUTEX_LOCK(&ri->ri_queue_lock);
750 
751 	if (ri->ri_i.i_primary_ctid >= 1)
752 		contract_hash_remove(ri->ri_i.i_primary_ctid);
753 
754 	while (ri->ri_method_thread != 0 || ri->ri_method_waiters > 0)
755 		(void) pthread_cond_wait(&ri->ri_method_cv, &ri->ri_lock);
756 
757 	while ((e = uu_list_teardown(ri->ri_queue, &cookie)) != NULL)
758 		startd_free(e, sizeof (*e));
759 	uu_list_destroy(ri->ri_queue);
760 
761 	startd_free((void *)ri->ri_i.i_fmri, strlen(ri->ri_i.i_fmri) + 1);
762 	startd_free(ri->ri_logstem, PATH_MAX);
763 	if (ri->ri_common_name != NULL)
764 		free(ri->ri_common_name);
765 	if (ri->ri_C_common_name != NULL)
766 		free(ri->ri_C_common_name);
767 	startd_free(ri->ri_utmpx_prefix, max_scf_value_size);
768 	(void) pthread_mutex_destroy(&ri->ri_lock);
769 	(void) pthread_mutex_destroy(&ri->ri_queue_lock);
770 	startd_free(ri, sizeof (restarter_inst_t));
771 }
772 
773 /*
774  * instance_is_wait_style()
775  *
776  *   Returns 1 if the given instance is a "wait-style" service instance.
777  */
778 int
779 instance_is_wait_style(restarter_inst_t *inst)
780 {
781 	assert(MUTEX_HELD(&inst->ri_lock));
782 	return ((inst->ri_flags & RINST_STYLE_MASK) == RINST_WAIT);
783 }
784 
785 /*
786  * instance_is_transient_style()
787  *
788  *   Returns 1 if the given instance is a transient service instance.
789  */
790 int
791 instance_is_transient_style(restarter_inst_t *inst)
792 {
793 	assert(MUTEX_HELD(&inst->ri_lock));
794 	return ((inst->ri_flags & RINST_STYLE_MASK) == RINST_TRANSIENT);
795 }
796 
797 /*
798  * instance_in_transition()
799  * Returns 1 if instance is in transition, 0 if not
800  */
801 int
802 instance_in_transition(restarter_inst_t *inst)
803 {
804 	assert(MUTEX_HELD(&inst->ri_lock));
805 	if (inst->ri_i.i_next_state == RESTARTER_STATE_NONE)
806 		return (0);
807 	return (1);
808 }
809 
810 /*
811  * returns 1 if instance is already started, 0 if not
812  */
813 static int
814 instance_started(restarter_inst_t *inst)
815 {
816 	int ret;
817 
818 	assert(MUTEX_HELD(&inst->ri_lock));
819 
820 	if (inst->ri_i.i_state == RESTARTER_STATE_ONLINE ||
821 	    inst->ri_i.i_state == RESTARTER_STATE_DEGRADED)
822 		ret = 1;
823 	else
824 		ret = 0;
825 
826 	return (ret);
827 }
828 
829 /*
830  * Returns
831  *   0 - success
832  *   ECONNRESET - success, but h was rebound
833  */
834 int
835 restarter_instance_update_states(scf_handle_t *h, restarter_inst_t *ri,
836     restarter_instance_state_t new_state,
837     restarter_instance_state_t new_state_next, restarter_error_t err,
838     restarter_str_t reason)
839 {
840 	protocol_states_t *states;
841 	int e;
842 	uint_t retry_count = 0, msecs = ALLOC_DELAY;
843 	boolean_t rebound = B_FALSE;
844 	int prev_state_online;
845 	int state_online;
846 
847 	assert(MUTEX_HELD(&ri->ri_lock));
848 
849 	prev_state_online = instance_started(ri);
850 
851 retry:
852 	e = _restarter_commit_states(h, &ri->ri_i, new_state, new_state_next,
853 	    restarter_get_str_short(reason));
854 	switch (e) {
855 	case 0:
856 		break;
857 
858 	case ENOMEM:
859 		++retry_count;
860 		if (retry_count < ALLOC_RETRY) {
861 			(void) poll(NULL, 0, msecs);
862 			msecs *= ALLOC_DELAY_MULT;
863 			goto retry;
864 		}
865 
866 		/* Like startd_alloc(). */
867 		uu_die("Insufficient memory.\n");
868 		/* NOTREACHED */
869 
870 	case ECONNABORTED:
871 		libscf_handle_rebind(h);
872 		rebound = B_TRUE;
873 		goto retry;
874 
875 	case EPERM:
876 	case EACCES:
877 	case EROFS:
878 		log_error(LOG_NOTICE, "Could not commit state change for %s "
879 		    "to repository: %s.\n", ri->ri_i.i_fmri, strerror(e));
880 		/* FALLTHROUGH */
881 
882 	case ENOENT:
883 		ri->ri_i.i_state = new_state;
884 		ri->ri_i.i_next_state = new_state_next;
885 		break;
886 
887 	case EINVAL:
888 	default:
889 		bad_error("_restarter_commit_states", e);
890 	}
891 
892 	states = startd_alloc(sizeof (protocol_states_t));
893 	states->ps_state = new_state;
894 	states->ps_state_next = new_state_next;
895 	states->ps_err = err;
896 	states->ps_reason = reason;
897 	graph_protocol_send_event(ri->ri_i.i_fmri, GRAPH_UPDATE_STATE_CHANGE,
898 	    (void *)states);
899 
900 	state_online = instance_started(ri);
901 
902 	if (prev_state_online && !state_online)
903 		ri->ri_post_offline_hook();
904 	else if (!prev_state_online && state_online)
905 		ri->ri_post_online_hook();
906 
907 	return (rebound ? ECONNRESET : 0);
908 }
909 
910 void
911 restarter_mark_pending_snapshot(const char *fmri, uint_t flag)
912 {
913 	restarter_inst_t *inst;
914 
915 	assert(flag == RINST_RETAKE_RUNNING || flag == RINST_RETAKE_START);
916 
917 	inst = inst_lookup_by_name(fmri);
918 	if (inst == NULL)
919 		return;
920 
921 	inst->ri_flags |= flag;
922 
923 	MUTEX_UNLOCK(&inst->ri_lock);
924 }
925 
926 static void
927 restarter_take_pending_snapshots(scf_handle_t *h)
928 {
929 	restarter_inst_t *inst;
930 	int r;
931 
932 	MUTEX_LOCK(&instance_list.ril_lock);
933 
934 	for (inst = uu_list_first(instance_list.ril_instance_list);
935 	    inst != NULL;
936 	    inst = uu_list_next(instance_list.ril_instance_list, inst)) {
937 		const char *fmri;
938 		scf_instance_t *sinst = NULL;
939 
940 		MUTEX_LOCK(&inst->ri_lock);
941 
942 		/*
943 		 * This is where we'd check inst->ri_method_thread and if it
944 		 * were nonzero we'd wait in anticipation of another thread
945 		 * executing a method for inst.  Doing so with the instance_list
946 		 * locked, though, leads to deadlock.  Since taking a snapshot
947 		 * during that window won't hurt anything, we'll just continue.
948 		 */
949 
950 		fmri = inst->ri_i.i_fmri;
951 
952 		if (inst->ri_flags & RINST_RETAKE_RUNNING) {
953 			scf_snapshot_t *rsnap;
954 
955 			(void) libscf_fmri_get_instance(h, fmri, &sinst);
956 
957 			rsnap = libscf_get_or_make_running_snapshot(sinst,
958 			    fmri, B_FALSE);
959 
960 			scf_instance_destroy(sinst);
961 
962 			if (rsnap != NULL)
963 				inst->ri_flags &= ~RINST_RETAKE_RUNNING;
964 
965 			scf_snapshot_destroy(rsnap);
966 		}
967 
968 		if (inst->ri_flags & RINST_RETAKE_START) {
969 			switch (r = libscf_snapshots_poststart(h, fmri,
970 			    B_FALSE)) {
971 			case 0:
972 			case ENOENT:
973 				inst->ri_flags &= ~RINST_RETAKE_START;
974 				break;
975 
976 			case ECONNABORTED:
977 				break;
978 
979 			case EACCES:
980 			default:
981 				bad_error("libscf_snapshots_poststart", r);
982 			}
983 		}
984 
985 		MUTEX_UNLOCK(&inst->ri_lock);
986 	}
987 
988 	MUTEX_UNLOCK(&instance_list.ril_lock);
989 }
990 
991 /* ARGSUSED */
992 void *
993 restarter_post_fsminimal_thread(void *unused)
994 {
995 	scf_handle_t *h;
996 	int r;
997 
998 	(void) pthread_setname_np(pthread_self(), "restarter_post_fsmin");
999 
1000 	h = libscf_handle_create_bound_loop();
1001 
1002 	for (;;) {
1003 		r = libscf_create_self(h);
1004 		if (r == 0)
1005 			break;
1006 
1007 		assert(r == ECONNABORTED);
1008 		libscf_handle_rebind(h);
1009 	}
1010 
1011 	restarter_take_pending_snapshots(h);
1012 
1013 	(void) scf_handle_unbind(h);
1014 	scf_handle_destroy(h);
1015 
1016 	return (NULL);
1017 }
1018 
1019 /*
1020  * int stop_instance()
1021  *
1022  *   Stop the instance identified by the instance given as the second argument,
1023  *   for the cause stated.
1024  *
1025  *   Returns
1026  *     0 - success
1027  *     -1 - inst is in transition
1028  */
1029 static int
1030 stop_instance(scf_handle_t *local_handle, restarter_inst_t *inst,
1031     stop_cause_t cause)
1032 {
1033 	fork_info_t *info;
1034 	const char *cp;
1035 	int err;
1036 	restarter_error_t re;
1037 	restarter_str_t	reason;
1038 	restarter_instance_state_t new_state;
1039 
1040 	assert(MUTEX_HELD(&inst->ri_lock));
1041 	assert(inst->ri_method_thread == 0);
1042 
1043 	switch (cause) {
1044 	case RSTOP_EXIT:
1045 		re = RERR_RESTART;
1046 		reason = restarter_str_ct_ev_exit;
1047 		cp = "all processes in service exited";
1048 		break;
1049 	case RSTOP_ERR_CFG:
1050 		re = RERR_FAULT;
1051 		reason = restarter_str_method_failed;
1052 		cp = "service exited with a configuration error";
1053 		break;
1054 	case RSTOP_ERR_EXIT:
1055 		re = RERR_RESTART;
1056 		reason = restarter_str_ct_ev_exit;
1057 		cp = "service exited with an error";
1058 		break;
1059 	case RSTOP_CORE:
1060 		re = RERR_FAULT;
1061 		reason = restarter_str_ct_ev_core;
1062 		cp = "process dumped core";
1063 		break;
1064 	case RSTOP_SIGNAL:
1065 		re = RERR_FAULT;
1066 		reason = restarter_str_ct_ev_signal;
1067 		cp = "process received fatal signal from outside the service";
1068 		break;
1069 	case RSTOP_HWERR:
1070 		re = RERR_FAULT;
1071 		reason = restarter_str_ct_ev_hwerr;
1072 		cp = "process killed due to uncorrectable hardware error";
1073 		break;
1074 	case RSTOP_DEPENDENCY:
1075 		re = RERR_RESTART;
1076 		reason = restarter_str_dependency_activity;
1077 		cp = "dependency activity requires stop";
1078 		break;
1079 	case RSTOP_DISABLE:
1080 		re = RERR_RESTART;
1081 		reason = restarter_str_disable_request;
1082 		cp = "service disabled";
1083 		break;
1084 	case RSTOP_RESTART:
1085 		re = RERR_RESTART;
1086 		reason = restarter_str_restart_request;
1087 		cp = "service restarting";
1088 		break;
1089 	default:
1090 #ifndef NDEBUG
1091 		(void) fprintf(stderr, "Unknown cause %d at %s:%d.\n",
1092 		    cause, __FILE__, __LINE__);
1093 #endif
1094 		abort();
1095 	}
1096 
1097 	/* Services in the disabled and maintenance state are ignored */
1098 	if (inst->ri_i.i_state == RESTARTER_STATE_MAINT ||
1099 	    inst->ri_i.i_state == RESTARTER_STATE_DISABLED) {
1100 		log_framework(LOG_DEBUG,
1101 		    "%s: stop_instance -> is maint/disabled\n",
1102 		    inst->ri_i.i_fmri);
1103 		return (0);
1104 	}
1105 
1106 	/* Already stopped instances are left alone */
1107 	if (instance_started(inst) == 0) {
1108 		log_framework(LOG_DEBUG, "Restarter: %s is already stopped.\n",
1109 		    inst->ri_i.i_fmri);
1110 		return (0);
1111 	}
1112 
1113 	if (instance_in_transition(inst)) {
1114 		/* requeue event by returning -1 */
1115 		log_framework(LOG_DEBUG,
1116 		    "Restarter: Not stopping %s, in transition.\n",
1117 		    inst->ri_i.i_fmri);
1118 		return (-1);
1119 	}
1120 
1121 	log_instance(inst, B_TRUE, "Stopping because %s.", cp);
1122 
1123 	log_framework(re == RERR_FAULT ? LOG_INFO : LOG_DEBUG,
1124 	    "%s: Instance stopping because %s.\n", inst->ri_i.i_fmri, cp);
1125 
1126 	if (instance_is_wait_style(inst) &&
1127 	    (cause == RSTOP_EXIT ||
1128 	    cause == RSTOP_ERR_CFG ||
1129 	    cause == RSTOP_ERR_EXIT)) {
1130 		/*
1131 		 * No need to stop instance, as child has exited; remove
1132 		 * contract and move the instance to the offline state.
1133 		 */
1134 		switch (err = restarter_instance_update_states(local_handle,
1135 		    inst, inst->ri_i.i_state, RESTARTER_STATE_OFFLINE, re,
1136 		    reason)) {
1137 		case 0:
1138 		case ECONNRESET:
1139 			break;
1140 
1141 		default:
1142 			bad_error("restarter_instance_update_states", err);
1143 		}
1144 
1145 		if (cause == RSTOP_ERR_EXIT) {
1146 			/*
1147 			 * The RSTOP_ERR_EXIT cause is set via the
1148 			 * wait_thread -> wait_remove code path when we have
1149 			 * a "wait" style svc that exited with an error. If
1150 			 * the svc is failing too quickly, we throttle it so
1151 			 * that we don't restart it more than once/second.
1152 			 * Since we know we're running in the wait thread its
1153 			 * ok to throttle it right here.
1154 			 */
1155 			(void) update_fault_count(inst, FAULT_COUNT_INCR);
1156 			if (method_rate_critical(inst)) {
1157 				log_instance(inst, B_TRUE, "Failing too "
1158 				    "quickly, throttling.");
1159 				(void) sleep(WT_SVC_ERR_THROTTLE);
1160 			}
1161 		} else {
1162 			(void) update_fault_count(inst, FAULT_COUNT_RESET);
1163 			reset_start_times(inst);
1164 		}
1165 
1166 		if (inst->ri_i.i_primary_ctid != 0) {
1167 			inst->ri_m_inst =
1168 			    safe_scf_instance_create(local_handle);
1169 			inst->ri_mi_deleted = B_FALSE;
1170 
1171 			libscf_reget_instance(inst);
1172 			method_remove_contract(inst, B_TRUE, B_TRUE);
1173 
1174 			scf_instance_destroy(inst->ri_m_inst);
1175 			inst->ri_m_inst = NULL;
1176 		}
1177 
1178 		switch (err = restarter_instance_update_states(local_handle,
1179 		    inst, inst->ri_i.i_next_state, RESTARTER_STATE_NONE, re,
1180 		    reason)) {
1181 		case 0:
1182 		case ECONNRESET:
1183 			break;
1184 
1185 		default:
1186 			bad_error("restarter_instance_update_states", err);
1187 		}
1188 
1189 		if (cause != RSTOP_ERR_CFG)
1190 			return (0);
1191 	} else if (instance_is_wait_style(inst) && re == RERR_RESTART) {
1192 		/*
1193 		 * Stopping a wait service through means other than the pid
1194 		 * exiting should keep wait_thread() from restarting the
1195 		 * service, by removing it from the wait list.
1196 		 * We cannot remove it right now otherwise the process will
1197 		 * end up <defunct> so mark it to be ignored.
1198 		 */
1199 		wait_ignore_by_fmri(inst->ri_i.i_fmri);
1200 	}
1201 
1202 	/*
1203 	 * There are some configuration errors which we cannot detect until we
1204 	 * try to run the method.  For example, see exec_method() where the
1205 	 * restarter_set_method_context() call can return SMF_EXIT_ERR_CONFIG
1206 	 * in several cases. If this happens for a "wait-style" svc,
1207 	 * wait_remove() sets the cause as RSTOP_ERR_CFG so that we can detect
1208 	 * the configuration error and go into maintenance, even though it is
1209 	 * a "wait-style" svc.
1210 	 */
1211 	if (cause == RSTOP_ERR_CFG)
1212 		new_state = RESTARTER_STATE_MAINT;
1213 	else
1214 		new_state = inst->ri_i.i_enabled ?
1215 		    RESTARTER_STATE_OFFLINE : RESTARTER_STATE_DISABLED;
1216 
1217 	switch (err = restarter_instance_update_states(local_handle, inst,
1218 	    inst->ri_i.i_state, new_state, RERR_NONE, reason)) {
1219 	case 0:
1220 	case ECONNRESET:
1221 		break;
1222 
1223 	default:
1224 		bad_error("restarter_instance_update_states", err);
1225 	}
1226 
1227 	info = startd_zalloc(sizeof (fork_info_t));
1228 
1229 	info->sf_id = inst->ri_id;
1230 	info->sf_method_type = METHOD_STOP;
1231 	info->sf_event_type = re;
1232 	info->sf_reason = reason;
1233 	inst->ri_method_thread = startd_thread_create(method_thread, info);
1234 
1235 	return (0);
1236 }
1237 
1238 /*
1239  * Returns
1240  *   ENOENT - fmri is not in instance_list
1241  *   0 - success
1242  *   ECONNRESET - success, though handle was rebound
1243  *   -1 - instance is in transition
1244  */
1245 int
1246 stop_instance_fmri(scf_handle_t *h, const char *fmri, uint_t flags)
1247 {
1248 	restarter_inst_t *rip;
1249 	int r;
1250 
1251 	rip = inst_lookup_by_name(fmri);
1252 	if (rip == NULL)
1253 		return (ENOENT);
1254 
1255 	r = stop_instance(h, rip, flags);
1256 
1257 	MUTEX_UNLOCK(&rip->ri_lock);
1258 
1259 	return (r);
1260 }
1261 
1262 static void
1263 unmaintain_instance(scf_handle_t *h, restarter_inst_t *rip,
1264     unmaint_cause_t cause)
1265 {
1266 	ctid_t ctid;
1267 	scf_instance_t *inst;
1268 	int r;
1269 	uint_t tries = 0, msecs = ALLOC_DELAY;
1270 	const char *cp;
1271 	restarter_str_t	reason;
1272 
1273 	assert(MUTEX_HELD(&rip->ri_lock));
1274 
1275 	if (rip->ri_i.i_state != RESTARTER_STATE_MAINT) {
1276 		log_error(LOG_DEBUG, "Restarter: "
1277 		    "Ignoring maintenance off command because %s is not in the "
1278 		    "maintenance state.\n", rip->ri_i.i_fmri);
1279 		return;
1280 	}
1281 
1282 	switch (cause) {
1283 	case RUNMAINT_CLEAR:
1284 		cp = "clear requested";
1285 		reason = restarter_str_clear_request;
1286 		break;
1287 	case RUNMAINT_DISABLE:
1288 		cp = "disable requested";
1289 		reason = restarter_str_disable_request;
1290 		break;
1291 	default:
1292 #ifndef NDEBUG
1293 		(void) fprintf(stderr, "Uncaught case for %d at %s:%d.\n",
1294 		    cause, __FILE__, __LINE__);
1295 #endif
1296 		abort();
1297 	}
1298 
1299 	log_instance(rip, B_TRUE, "Leaving maintenance because %s.",
1300 	    cp);
1301 	log_framework(LOG_DEBUG, "%s: Instance leaving maintenance because "
1302 	    "%s.\n", rip->ri_i.i_fmri, cp);
1303 
1304 	(void) restarter_instance_update_states(h, rip, RESTARTER_STATE_UNINIT,
1305 	    RESTARTER_STATE_NONE, RERR_RESTART, reason);
1306 
1307 	/*
1308 	 * If we did ADMIN_MAINT_ON_IMMEDIATE, then there might still be
1309 	 * a primary contract.
1310 	 */
1311 	if (rip->ri_i.i_primary_ctid == 0)
1312 		return;
1313 
1314 	ctid = rip->ri_i.i_primary_ctid;
1315 	contract_abandon(ctid);
1316 	rip->ri_i.i_primary_ctid = 0;
1317 
1318 rep_retry:
1319 	switch (r = libscf_fmri_get_instance(h, rip->ri_i.i_fmri, &inst)) {
1320 	case 0:
1321 		break;
1322 
1323 	case ECONNABORTED:
1324 		libscf_handle_rebind(h);
1325 		goto rep_retry;
1326 
1327 	case ENOENT:
1328 		/* Must have been deleted. */
1329 		return;
1330 
1331 	case EINVAL:
1332 	case ENOTSUP:
1333 	default:
1334 		bad_error("libscf_handle_rebind", r);
1335 	}
1336 
1337 again:
1338 	r = restarter_remove_contract(inst, ctid, RESTARTER_CONTRACT_PRIMARY);
1339 	switch (r) {
1340 	case 0:
1341 		break;
1342 
1343 	case ENOMEM:
1344 		++tries;
1345 		if (tries < ALLOC_RETRY) {
1346 			(void) poll(NULL, 0, msecs);
1347 			msecs *= ALLOC_DELAY_MULT;
1348 			goto again;
1349 		}
1350 
1351 		uu_die("Insufficient memory.\n");
1352 		/* NOTREACHED */
1353 
1354 	case ECONNABORTED:
1355 		scf_instance_destroy(inst);
1356 		libscf_handle_rebind(h);
1357 		goto rep_retry;
1358 
1359 	case ECANCELED:
1360 		break;
1361 
1362 	case EPERM:
1363 	case EACCES:
1364 	case EROFS:
1365 		log_error(LOG_INFO,
1366 		    "Could not remove contract id %lu for %s (%s).\n", ctid,
1367 		    rip->ri_i.i_fmri, strerror(r));
1368 		break;
1369 
1370 	case EINVAL:
1371 	case EBADF:
1372 	default:
1373 		bad_error("restarter_remove_contract", r);
1374 	}
1375 
1376 	scf_instance_destroy(inst);
1377 }
1378 
1379 /*
1380  * enable_inst()
1381  *   Set inst->ri_i.i_enabled.  Expects 'e' to be _ENABLE, _DISABLE, or
1382  *   _ADMIN_DISABLE.  If the event is _ENABLE and inst is uninitialized or
1383  *   disabled, move it to offline.  If the event is _DISABLE or
1384  *   _ADMIN_DISABLE, make sure inst will move to disabled.
1385  *
1386  *   Returns
1387  *     0 - success
1388  *     ECONNRESET - h was rebound
1389  */
1390 static int
1391 enable_inst(scf_handle_t *h, restarter_inst_t *inst,
1392     restarter_instance_qentry_t *riq)
1393 {
1394 	restarter_instance_state_t state;
1395 	restarter_event_type_t e = riq->riq_type;
1396 	restarter_str_t reason = restarter_str_per_configuration;
1397 	int r;
1398 
1399 	assert(MUTEX_HELD(&inst->ri_lock));
1400 	assert(e == RESTARTER_EVENT_TYPE_ADMIN_DISABLE ||
1401 	    e == RESTARTER_EVENT_TYPE_DISABLE ||
1402 	    e == RESTARTER_EVENT_TYPE_ENABLE);
1403 	assert(instance_in_transition(inst) == 0);
1404 
1405 	state = inst->ri_i.i_state;
1406 
1407 	if (e == RESTARTER_EVENT_TYPE_ENABLE) {
1408 		inst->ri_i.i_enabled = 1;
1409 
1410 		if (state == RESTARTER_STATE_UNINIT ||
1411 		    state == RESTARTER_STATE_DISABLED) {
1412 			/*
1413 			 * B_FALSE: Don't log an error if the log_instance()
1414 			 * fails because it will fail on the miniroot before
1415 			 * install-discovery runs.
1416 			 */
1417 			log_instance(inst, B_FALSE, "Enabled.");
1418 			log_framework(LOG_DEBUG, "%s: Instance enabled.\n",
1419 			    inst->ri_i.i_fmri);
1420 
1421 			/*
1422 			 * If we are coming from DISABLED, it was obviously an
1423 			 * enable request. If we are coming from UNINIT, it may
1424 			 * have been a sevice in MAINT that was cleared.
1425 			 */
1426 			if (riq->riq_reason == restarter_str_clear_request)
1427 				reason = restarter_str_clear_request;
1428 			else if (state == RESTARTER_STATE_DISABLED)
1429 				reason = restarter_str_enable_request;
1430 			(void) restarter_instance_update_states(h, inst,
1431 			    RESTARTER_STATE_OFFLINE, RESTARTER_STATE_NONE,
1432 			    RERR_NONE, reason);
1433 		} else {
1434 			log_framework(LOG_DEBUG, "Restarter: "
1435 			    "Not changing state of %s for enable command.\n",
1436 			    inst->ri_i.i_fmri);
1437 		}
1438 	} else {
1439 		inst->ri_i.i_enabled = 0;
1440 
1441 		switch (state) {
1442 		case RESTARTER_STATE_ONLINE:
1443 		case RESTARTER_STATE_DEGRADED:
1444 			r = stop_instance(h, inst, RSTOP_DISABLE);
1445 			return (r == ECONNRESET ? 0 : r);
1446 
1447 		case RESTARTER_STATE_OFFLINE:
1448 		case RESTARTER_STATE_UNINIT:
1449 			if (inst->ri_i.i_primary_ctid != 0) {
1450 				inst->ri_m_inst = safe_scf_instance_create(h);
1451 				inst->ri_mi_deleted = B_FALSE;
1452 
1453 				libscf_reget_instance(inst);
1454 				method_remove_contract(inst, B_TRUE, B_TRUE);
1455 
1456 				scf_instance_destroy(inst->ri_m_inst);
1457 			}
1458 			/* B_FALSE: See log_instance(..., "Enabled."); above */
1459 			log_instance(inst, B_FALSE, "Disabled.");
1460 			log_framework(LOG_DEBUG, "%s: Instance disabled.\n",
1461 			    inst->ri_i.i_fmri);
1462 
1463 			/*
1464 			 * If we are coming from OFFLINE, it was obviously a
1465 			 * disable request. But if we are coming from
1466 			 * UNINIT, it may have been a disable request for a
1467 			 * service in MAINT.
1468 			 */
1469 			if (riq->riq_reason == restarter_str_disable_request ||
1470 			    state == RESTARTER_STATE_OFFLINE)
1471 				reason = restarter_str_disable_request;
1472 			(void) restarter_instance_update_states(h, inst,
1473 			    RESTARTER_STATE_DISABLED, RESTARTER_STATE_NONE,
1474 			    RERR_RESTART, reason);
1475 			return (0);
1476 
1477 		case RESTARTER_STATE_DISABLED:
1478 			break;
1479 
1480 		case RESTARTER_STATE_MAINT:
1481 			/*
1482 			 * We only want to pull the instance out of maintenance
1483 			 * if the disable is on adminstrative request.  The
1484 			 * graph engine sends _DISABLE events whenever a
1485 			 * service isn't in the disabled state, and we don't
1486 			 * want to pull the service out of maintenance if,
1487 			 * for example, it is there due to a dependency cycle.
1488 			 */
1489 			if (e == RESTARTER_EVENT_TYPE_ADMIN_DISABLE)
1490 				unmaintain_instance(h, inst, RUNMAINT_DISABLE);
1491 			break;
1492 
1493 		default:
1494 #ifndef NDEBUG
1495 			(void) fprintf(stderr, "Restarter instance %s has "
1496 			    "unknown state %d.\n", inst->ri_i.i_fmri, state);
1497 #endif
1498 			abort();
1499 		}
1500 	}
1501 
1502 	return (0);
1503 }
1504 
1505 static void
1506 start_instance(scf_handle_t *local_handle, restarter_inst_t *inst,
1507     int32_t reason)
1508 {
1509 	fork_info_t *info;
1510 	restarter_str_t	new_reason;
1511 
1512 	assert(MUTEX_HELD(&inst->ri_lock));
1513 	assert(instance_in_transition(inst) == 0);
1514 	assert(inst->ri_method_thread == 0);
1515 
1516 	log_framework(LOG_DEBUG, "%s: trying to start instance\n",
1517 	    inst->ri_i.i_fmri);
1518 
1519 	/*
1520 	 * We want to keep the original reason for restarts and clear actions
1521 	 */
1522 	switch (reason) {
1523 	case restarter_str_restart_request:
1524 	case restarter_str_clear_request:
1525 		new_reason = reason;
1526 		break;
1527 	default:
1528 		new_reason = restarter_str_dependencies_satisfied;
1529 	}
1530 
1531 	/* Services in the disabled and maintenance state are ignored */
1532 	if (inst->ri_i.i_state == RESTARTER_STATE_MAINT ||
1533 	    inst->ri_i.i_state == RESTARTER_STATE_DISABLED ||
1534 	    inst->ri_i.i_enabled == 0) {
1535 		log_framework(LOG_DEBUG,
1536 		    "%s: start_instance -> is maint/disabled\n",
1537 		    inst->ri_i.i_fmri);
1538 		return;
1539 	}
1540 
1541 	/* Already started instances are left alone */
1542 	if (instance_started(inst) == 1) {
1543 		log_framework(LOG_DEBUG,
1544 		    "%s: start_instance -> is already started\n",
1545 		    inst->ri_i.i_fmri);
1546 		return;
1547 	}
1548 
1549 	log_framework(LOG_DEBUG, "%s: starting instance.\n", inst->ri_i.i_fmri);
1550 
1551 	(void) restarter_instance_update_states(local_handle, inst,
1552 	    inst->ri_i.i_state, RESTARTER_STATE_ONLINE, RERR_NONE, new_reason);
1553 
1554 	info = startd_zalloc(sizeof (fork_info_t));
1555 
1556 	info->sf_id = inst->ri_id;
1557 	info->sf_method_type = METHOD_START;
1558 	info->sf_event_type = RERR_NONE;
1559 	info->sf_reason = new_reason;
1560 	inst->ri_method_thread = startd_thread_create(method_thread, info);
1561 }
1562 
1563 static int
1564 event_from_tty(scf_handle_t *h, restarter_inst_t *rip)
1565 {
1566 	scf_instance_t *inst;
1567 	int ret = 0;
1568 
1569 	if (libscf_fmri_get_instance(h, rip->ri_i.i_fmri, &inst))
1570 		return (-1);
1571 
1572 	ret = restarter_inst_ractions_from_tty(inst);
1573 
1574 	scf_instance_destroy(inst);
1575 	return (ret);
1576 }
1577 
1578 static boolean_t
1579 restart_dump(scf_handle_t *h, restarter_inst_t *rip)
1580 {
1581 	scf_instance_t *inst;
1582 	boolean_t ret = B_FALSE;
1583 
1584 	if (libscf_fmri_get_instance(h, rip->ri_i.i_fmri, &inst))
1585 		return (-1);
1586 
1587 	if (restarter_inst_dump(inst) == 1)
1588 		ret = B_TRUE;
1589 
1590 	scf_instance_destroy(inst);
1591 	return (ret);
1592 }
1593 
1594 static void
1595 maintain_instance(scf_handle_t *h, restarter_inst_t *rip, int immediate,
1596     restarter_str_t reason)
1597 {
1598 	fork_info_t *info;
1599 	scf_instance_t *scf_inst = NULL;
1600 
1601 	assert(MUTEX_HELD(&rip->ri_lock));
1602 	assert(reason != restarter_str_none);
1603 	assert(rip->ri_method_thread == 0);
1604 
1605 	log_instance(rip, B_TRUE, "Stopping for maintenance due to %s.",
1606 	    restarter_get_str_short(reason));
1607 	log_framework(LOG_DEBUG, "%s: stopping for maintenance due to %s.\n",
1608 	    rip->ri_i.i_fmri, restarter_get_str_short(reason));
1609 
1610 	/* Services in the maintenance state are ignored */
1611 	if (rip->ri_i.i_state == RESTARTER_STATE_MAINT) {
1612 		log_framework(LOG_DEBUG,
1613 		    "%s: maintain_instance -> is already in maintenance\n",
1614 		    rip->ri_i.i_fmri);
1615 		return;
1616 	}
1617 
1618 	/*
1619 	 * If reason state is restarter_str_service_request and
1620 	 * restarter_actions/auxiliary_fmri property is set with a valid fmri,
1621 	 * copy the fmri to restarter/auxiliary_fmri so svcs -x can use.
1622 	 */
1623 	if (reason == restarter_str_service_request &&
1624 	    libscf_fmri_get_instance(h, rip->ri_i.i_fmri, &scf_inst) == 0) {
1625 		if (restarter_inst_validate_ractions_aux_fmri(scf_inst) == 0) {
1626 			if (restarter_inst_set_aux_fmri(scf_inst))
1627 				log_framework(LOG_DEBUG, "%s: "
1628 				    "restarter_inst_set_aux_fmri failed: ",
1629 				    rip->ri_i.i_fmri);
1630 		} else {
1631 			log_framework(LOG_DEBUG, "%s: "
1632 			    "restarter_inst_validate_ractions_aux_fmri "
1633 			    "failed: ", rip->ri_i.i_fmri);
1634 
1635 			if (restarter_inst_reset_aux_fmri(scf_inst))
1636 				log_framework(LOG_DEBUG, "%s: "
1637 				    "restarter_inst_reset_aux_fmri failed: ",
1638 				    rip->ri_i.i_fmri);
1639 		}
1640 		scf_instance_destroy(scf_inst);
1641 	}
1642 
1643 	if (immediate || !instance_started(rip)) {
1644 		if (rip->ri_i.i_primary_ctid != 0) {
1645 			rip->ri_m_inst = safe_scf_instance_create(h);
1646 			rip->ri_mi_deleted = B_FALSE;
1647 
1648 			libscf_reget_instance(rip);
1649 			method_remove_contract(rip, B_TRUE, B_TRUE);
1650 
1651 			scf_instance_destroy(rip->ri_m_inst);
1652 		}
1653 
1654 		(void) restarter_instance_update_states(h, rip,
1655 		    RESTARTER_STATE_MAINT, RESTARTER_STATE_NONE, RERR_RESTART,
1656 		    reason);
1657 		return;
1658 	}
1659 
1660 	(void) restarter_instance_update_states(h, rip, rip->ri_i.i_state,
1661 	    RESTARTER_STATE_MAINT, RERR_NONE, reason);
1662 
1663 	log_transition(rip, MAINT_REQUESTED);
1664 
1665 	info = startd_zalloc(sizeof (*info));
1666 	info->sf_id = rip->ri_id;
1667 	info->sf_method_type = METHOD_STOP;
1668 	info->sf_event_type = RERR_RESTART;
1669 	info->sf_reason = reason;
1670 	rip->ri_method_thread = startd_thread_create(method_thread, info);
1671 }
1672 
1673 static void
1674 refresh_instance(scf_handle_t *h, restarter_inst_t *rip)
1675 {
1676 	scf_instance_t *inst;
1677 	scf_snapshot_t *snap;
1678 	fork_info_t *info;
1679 	int r;
1680 
1681 	assert(MUTEX_HELD(&rip->ri_lock));
1682 
1683 	log_instance(rip, B_TRUE, "Rereading configuration.");
1684 	log_framework(LOG_DEBUG, "%s: rereading configuration.\n",
1685 	    rip->ri_i.i_fmri);
1686 
1687 rep_retry:
1688 	r = libscf_fmri_get_instance(h, rip->ri_i.i_fmri, &inst);
1689 	switch (r) {
1690 	case 0:
1691 		break;
1692 
1693 	case ECONNABORTED:
1694 		libscf_handle_rebind(h);
1695 		goto rep_retry;
1696 
1697 	case ENOENT:
1698 		/* Must have been deleted. */
1699 		return;
1700 
1701 	case EINVAL:
1702 	case ENOTSUP:
1703 	default:
1704 		bad_error("libscf_fmri_get_instance", r);
1705 	}
1706 
1707 	snap = libscf_get_running_snapshot(inst);
1708 
1709 	r = libscf_get_startd_properties(inst, snap, &rip->ri_flags,
1710 	    &rip->ri_utmpx_prefix);
1711 	switch (r) {
1712 	case 0:
1713 		log_framework(LOG_DEBUG, "%s is a %s-style service\n",
1714 		    rip->ri_i.i_fmri, service_style(rip->ri_flags));
1715 		break;
1716 
1717 	case ECONNABORTED:
1718 		scf_instance_destroy(inst);
1719 		scf_snapshot_destroy(snap);
1720 		libscf_handle_rebind(h);
1721 		goto rep_retry;
1722 
1723 	case ECANCELED:
1724 	case ENOENT:
1725 		/* Succeed in anticipation of REMOVE_INSTANCE. */
1726 		break;
1727 
1728 	default:
1729 		bad_error("libscf_get_startd_properties", r);
1730 	}
1731 
1732 	if (instance_started(rip)) {
1733 		/* Refresh does not change the state. */
1734 		(void) restarter_instance_update_states(h, rip,
1735 		    rip->ri_i.i_state, rip->ri_i.i_state, RERR_NONE,
1736 		    restarter_str_refresh);
1737 
1738 		info = startd_zalloc(sizeof (*info));
1739 		info->sf_id = rip->ri_id;
1740 		info->sf_method_type = METHOD_REFRESH;
1741 		info->sf_event_type = RERR_REFRESH;
1742 		info->sf_reason = 0;
1743 
1744 		assert(rip->ri_method_thread == 0);
1745 		rip->ri_method_thread =
1746 		    startd_thread_create(method_thread, info);
1747 	}
1748 
1749 	scf_snapshot_destroy(snap);
1750 	scf_instance_destroy(inst);
1751 }
1752 
1753 const char *event_names[] = { "INVALID", "ADD_INSTANCE", "REMOVE_INSTANCE",
1754 	"ENABLE", "DISABLE", "ADMIN_DEGRADED", "ADMIN_REFRESH",
1755 	"ADMIN_RESTART", "ADMIN_MAINT_OFF", "ADMIN_MAINT_ON",
1756 	"ADMIN_MAINT_ON_IMMEDIATE", "STOP", "START", "DEPENDENCY_CYCLE",
1757 	"INVALID_DEPENDENCY", "ADMIN_DISABLE", "STOP_RESET"
1758 };
1759 
1760 /*
1761  * void *restarter_process_events()
1762  *
1763  *   Called in a separate thread to process the events on an instance's
1764  *   queue.  Empties the queue completely, and tries to keep the thread
1765  *   around for a little while after the queue is empty to save on
1766  *   startup costs.
1767  */
1768 static void *
1769 restarter_process_events(void *arg)
1770 {
1771 	scf_handle_t *h;
1772 	restarter_instance_qentry_t *event;
1773 	restarter_inst_t *rip;
1774 	char *fmri = (char *)arg;
1775 	struct timespec to;
1776 
1777 	(void) pthread_setname_np(pthread_self(), "restarter_process_events");
1778 
1779 	assert(fmri != NULL);
1780 
1781 	h = libscf_handle_create_bound_loop();
1782 
1783 	/* grab the queue lock */
1784 	rip = inst_lookup_queue(fmri);
1785 	if (rip == NULL)
1786 		goto out;
1787 
1788 again:
1789 
1790 	while ((event = uu_list_first(rip->ri_queue)) != NULL) {
1791 		restarter_inst_t *inst;
1792 
1793 		/* drop the queue lock */
1794 		MUTEX_UNLOCK(&rip->ri_queue_lock);
1795 
1796 		/*
1797 		 * Grab the inst lock -- this waits until any outstanding
1798 		 * method finishes running.
1799 		 */
1800 		inst = inst_lookup_by_name(fmri);
1801 		if (inst == NULL) {
1802 			/* Getting deleted in the middle isn't an error. */
1803 			goto cont;
1804 		}
1805 
1806 		assert(instance_in_transition(inst) == 0);
1807 
1808 		/* process the event */
1809 		switch (event->riq_type) {
1810 		case RESTARTER_EVENT_TYPE_ENABLE:
1811 		case RESTARTER_EVENT_TYPE_DISABLE:
1812 			(void) enable_inst(h, inst, event);
1813 			break;
1814 
1815 		case RESTARTER_EVENT_TYPE_ADMIN_DISABLE:
1816 			if (enable_inst(h, inst, event) == 0)
1817 				reset_start_times(inst);
1818 			break;
1819 
1820 		case RESTARTER_EVENT_TYPE_REMOVE_INSTANCE:
1821 			restarter_delete_inst(inst);
1822 			inst = NULL;
1823 			goto cont;
1824 
1825 		case RESTARTER_EVENT_TYPE_STOP_RESET:
1826 			reset_start_times(inst);
1827 			/* FALLTHROUGH */
1828 		case RESTARTER_EVENT_TYPE_STOP:
1829 			(void) stop_instance(h, inst, RSTOP_DEPENDENCY);
1830 			break;
1831 
1832 		case RESTARTER_EVENT_TYPE_START:
1833 			start_instance(h, inst, event->riq_reason);
1834 			break;
1835 
1836 		case RESTARTER_EVENT_TYPE_DEPENDENCY_CYCLE:
1837 			maintain_instance(h, inst, 0,
1838 			    restarter_str_dependency_cycle);
1839 			break;
1840 
1841 		case RESTARTER_EVENT_TYPE_INVALID_DEPENDENCY:
1842 			maintain_instance(h, inst, 0,
1843 			    restarter_str_invalid_dependency);
1844 			break;
1845 
1846 		case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON:
1847 			if (event_from_tty(h, inst) == 0)
1848 				maintain_instance(h, inst, 0,
1849 				    restarter_str_service_request);
1850 			else
1851 				maintain_instance(h, inst, 0,
1852 				    restarter_str_administrative_request);
1853 			break;
1854 
1855 		case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON_IMMEDIATE:
1856 			if (event_from_tty(h, inst) == 0)
1857 				maintain_instance(h, inst, 1,
1858 				    restarter_str_service_request);
1859 			else
1860 				maintain_instance(h, inst, 1,
1861 				    restarter_str_administrative_request);
1862 			break;
1863 
1864 		case RESTARTER_EVENT_TYPE_ADMIN_MAINT_OFF:
1865 			unmaintain_instance(h, inst, RUNMAINT_CLEAR);
1866 			reset_start_times(inst);
1867 			break;
1868 
1869 		case RESTARTER_EVENT_TYPE_ADMIN_REFRESH:
1870 			refresh_instance(h, inst);
1871 			break;
1872 
1873 		case RESTARTER_EVENT_TYPE_ADMIN_DEGRADED:
1874 			log_framework(LOG_WARNING, "Restarter: "
1875 			    "%s command (for %s) unimplemented.\n",
1876 			    event_names[event->riq_type], inst->ri_i.i_fmri);
1877 			break;
1878 
1879 		case RESTARTER_EVENT_TYPE_ADMIN_RESTART:
1880 			if (!instance_started(inst)) {
1881 				log_framework(LOG_DEBUG, "Restarter: "
1882 				    "Not restarting %s; not running.\n",
1883 				    inst->ri_i.i_fmri);
1884 			} else {
1885 				/*
1886 				 * Stop the instance.  If it can be restarted,
1887 				 * the graph engine will send a new event.
1888 				 */
1889 				if (restart_dump(h, inst)) {
1890 					(void) contract_kill(
1891 					    inst->ri_i.i_primary_ctid, SIGABRT,
1892 					    inst->ri_i.i_fmri);
1893 				} else if (stop_instance(h, inst,
1894 				    RSTOP_RESTART) == 0) {
1895 					reset_start_times(inst);
1896 				}
1897 			}
1898 			break;
1899 
1900 		case RESTARTER_EVENT_TYPE_ADD_INSTANCE:
1901 		default:
1902 #ifndef NDEBUG
1903 			uu_warn("%s:%d: Bad restarter event %d.  "
1904 			    "Aborting.\n", __FILE__, __LINE__, event->riq_type);
1905 #endif
1906 			abort();
1907 		}
1908 
1909 		assert(inst != NULL);
1910 		MUTEX_UNLOCK(&inst->ri_lock);
1911 
1912 cont:
1913 		/* grab the queue lock */
1914 		rip = inst_lookup_queue(fmri);
1915 		if (rip == NULL)
1916 			goto out;
1917 
1918 		/* delete the event */
1919 		uu_list_remove(rip->ri_queue, event);
1920 		startd_free(event, sizeof (restarter_instance_qentry_t));
1921 	}
1922 
1923 	assert(rip != NULL);
1924 
1925 	/*
1926 	 * Try to preserve the thread for a little while for future use.
1927 	 */
1928 	to.tv_sec = 3;
1929 	to.tv_nsec = 0;
1930 	(void) pthread_cond_reltimedwait_np(&rip->ri_queue_cv,
1931 	    &rip->ri_queue_lock, &to);
1932 
1933 	if (uu_list_first(rip->ri_queue) != NULL)
1934 		goto again;
1935 
1936 	rip->ri_queue_thread = 0;
1937 	MUTEX_UNLOCK(&rip->ri_queue_lock);
1938 
1939 out:
1940 	(void) scf_handle_unbind(h);
1941 	scf_handle_destroy(h);
1942 	free(fmri);
1943 	return (NULL);
1944 }
1945 
1946 static int
1947 is_admin_event(restarter_event_type_t t)
1948 {
1949 	switch (t) {
1950 	case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON:
1951 	case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON_IMMEDIATE:
1952 	case RESTARTER_EVENT_TYPE_ADMIN_MAINT_OFF:
1953 	case RESTARTER_EVENT_TYPE_ADMIN_REFRESH:
1954 	case RESTARTER_EVENT_TYPE_ADMIN_DEGRADED:
1955 	case RESTARTER_EVENT_TYPE_ADMIN_RESTART:
1956 		return (1);
1957 	default:
1958 		return (0);
1959 	}
1960 }
1961 
1962 static void
1963 restarter_queue_event(restarter_inst_t *ri, restarter_protocol_event_t *e)
1964 {
1965 	restarter_instance_qentry_t *qe;
1966 	int r;
1967 
1968 	assert(MUTEX_HELD(&ri->ri_queue_lock));
1969 	assert(!MUTEX_HELD(&ri->ri_lock));
1970 
1971 	qe = startd_zalloc(sizeof (restarter_instance_qentry_t));
1972 	qe->riq_type = e->rpe_type;
1973 	qe->riq_reason = e->rpe_reason;
1974 
1975 	uu_list_node_init(qe, &qe->riq_link, restarter_queue_pool);
1976 	r = uu_list_insert_before(ri->ri_queue, NULL, qe);
1977 	assert(r == 0);
1978 }
1979 
1980 /*
1981  * void *restarter_event_thread()
1982  *
1983  *  Handle incoming graph events by placing them on a per-instance
1984  *  queue.  We can't lock the main part of the instance structure, so
1985  *  just modify the seprarately locked event queue portion.
1986  */
1987 /*ARGSUSED*/
1988 static void *
1989 restarter_event_thread(void *unused)
1990 {
1991 	scf_handle_t *h;
1992 
1993 	(void) pthread_setname_np(pthread_self(), "restarter_event");
1994 
1995 	/*
1996 	 * This is a new thread, and thus, gets its own handle
1997 	 * to the repository.
1998 	 */
1999 	h = libscf_handle_create_bound_loop();
2000 
2001 	MUTEX_LOCK(&ru->restarter_update_lock);
2002 
2003 	/*CONSTCOND*/
2004 	while (1) {
2005 		restarter_protocol_event_t *e;
2006 
2007 		while (ru->restarter_update_wakeup == 0)
2008 			(void) pthread_cond_wait(&ru->restarter_update_cv,
2009 			    &ru->restarter_update_lock);
2010 
2011 		ru->restarter_update_wakeup = 0;
2012 
2013 		while ((e = restarter_event_dequeue()) != NULL) {
2014 			restarter_inst_t *rip;
2015 			char *fmri;
2016 
2017 			MUTEX_UNLOCK(&ru->restarter_update_lock);
2018 
2019 			/*
2020 			 * ADD_INSTANCE is special: there's likely no
2021 			 * instance structure yet, so we need to handle the
2022 			 * addition synchronously.
2023 			 */
2024 			switch (e->rpe_type) {
2025 			case RESTARTER_EVENT_TYPE_ADD_INSTANCE:
2026 				if (restarter_insert_inst(h, e->rpe_inst) != 0)
2027 					log_error(LOG_INFO, "Restarter: "
2028 					    "Could not add %s.\n", e->rpe_inst);
2029 
2030 				MUTEX_LOCK(&st->st_load_lock);
2031 				if (--st->st_load_instances == 0)
2032 					(void) pthread_cond_broadcast(
2033 					    &st->st_load_cv);
2034 				MUTEX_UNLOCK(&st->st_load_lock);
2035 
2036 				goto nolookup;
2037 			}
2038 
2039 			/*
2040 			 * Lookup the instance, locking only the event queue.
2041 			 * Can't grab ri_lock here because it might be held
2042 			 * by a long-running method.
2043 			 */
2044 			rip = inst_lookup_queue(e->rpe_inst);
2045 			if (rip == NULL) {
2046 				log_error(LOG_INFO, "Restarter: "
2047 				    "Ignoring %s command for unknown service "
2048 				    "%s.\n", event_names[e->rpe_type],
2049 				    e->rpe_inst);
2050 				goto nolookup;
2051 			}
2052 
2053 			/* Keep ADMIN events from filling up the queue. */
2054 			if (is_admin_event(e->rpe_type) &&
2055 			    uu_list_numnodes(rip->ri_queue) >
2056 			    RINST_QUEUE_THRESHOLD) {
2057 				MUTEX_UNLOCK(&rip->ri_queue_lock);
2058 				log_instance(rip, B_TRUE, "Instance event "
2059 				    "queue overflow.  Dropping administrative "
2060 				    "request.");
2061 				log_framework(LOG_DEBUG, "%s: Instance event "
2062 				    "queue overflow.  Dropping administrative "
2063 				    "request.\n", rip->ri_i.i_fmri);
2064 				goto nolookup;
2065 			}
2066 
2067 			/* Now add the event to the instance queue. */
2068 			restarter_queue_event(rip, e);
2069 
2070 			if (rip->ri_queue_thread == 0) {
2071 				/*
2072 				 * Start a thread if one isn't already
2073 				 * running.
2074 				 */
2075 				fmri = safe_strdup(e->rpe_inst);
2076 				rip->ri_queue_thread =  startd_thread_create(
2077 				    restarter_process_events, (void *)fmri);
2078 			} else {
2079 				/*
2080 				 * Signal the existing thread that there's
2081 				 * a new event.
2082 				 */
2083 				(void) pthread_cond_broadcast(
2084 				    &rip->ri_queue_cv);
2085 			}
2086 
2087 			MUTEX_UNLOCK(&rip->ri_queue_lock);
2088 nolookup:
2089 			restarter_event_release(e);
2090 
2091 			MUTEX_LOCK(&ru->restarter_update_lock);
2092 		}
2093 	}
2094 }
2095 
2096 static restarter_inst_t *
2097 contract_to_inst(ctid_t ctid)
2098 {
2099 	restarter_inst_t *inst;
2100 	int id;
2101 
2102 	id = lookup_inst_by_contract(ctid);
2103 	if (id == -1)
2104 		return (NULL);
2105 
2106 	inst = inst_lookup_by_id(id);
2107 	if (inst != NULL) {
2108 		/*
2109 		 * Since ri_lock isn't held by the contract id lookup, this
2110 		 * instance may have been restarted and now be in a new
2111 		 * contract, making the old contract no longer valid for this
2112 		 * instance.
2113 		 */
2114 		if (ctid != inst->ri_i.i_primary_ctid) {
2115 			MUTEX_UNLOCK(&inst->ri_lock);
2116 			inst = NULL;
2117 		}
2118 	}
2119 	return (inst);
2120 }
2121 
2122 /*
2123  * void contract_action()
2124  *   Take action on contract events.
2125  */
2126 static void
2127 contract_action(scf_handle_t *h, restarter_inst_t *inst, ctid_t id,
2128     uint32_t type)
2129 {
2130 	const char *fmri = inst->ri_i.i_fmri;
2131 
2132 	assert(MUTEX_HELD(&inst->ri_lock));
2133 
2134 	/*
2135 	 * If startd has stopped this contract, there is no need to
2136 	 * stop it again.
2137 	 */
2138 	if (inst->ri_i.i_primary_ctid > 0 &&
2139 	    inst->ri_i.i_primary_ctid_stopped)
2140 		return;
2141 
2142 	if ((type & (CT_PR_EV_EMPTY | CT_PR_EV_CORE | CT_PR_EV_SIGNAL
2143 	    | CT_PR_EV_HWERR)) == 0) {
2144 		/*
2145 		 * There shouldn't be other events, since that's not how we set
2146 		 * the terms. Thus, just log an error and drive on.
2147 		 */
2148 		log_framework(LOG_NOTICE,
2149 		    "%s: contract %ld received unexpected critical event "
2150 		    "(%d)\n", fmri, id, type);
2151 		return;
2152 	}
2153 
2154 	assert(instance_in_transition(inst) == 0);
2155 
2156 	if (instance_is_wait_style(inst)) {
2157 		/*
2158 		 * We ignore all events; if they impact the
2159 		 * process we're monitoring, then the
2160 		 * wait_thread will stop the instance.
2161 		 */
2162 		log_framework(LOG_DEBUG,
2163 		    "%s: ignoring contract event on wait-style service\n",
2164 		    fmri);
2165 	} else {
2166 		/*
2167 		 * A CT_PR_EV_EMPTY event is an RSTOP_EXIT request.
2168 		 */
2169 		switch (type) {
2170 		case CT_PR_EV_EMPTY:
2171 			(void) stop_instance(h, inst, RSTOP_EXIT);
2172 			break;
2173 		case CT_PR_EV_CORE:
2174 			(void) stop_instance(h, inst, RSTOP_CORE);
2175 			break;
2176 		case CT_PR_EV_SIGNAL:
2177 			(void) stop_instance(h, inst, RSTOP_SIGNAL);
2178 			break;
2179 		case CT_PR_EV_HWERR:
2180 			(void) stop_instance(h, inst, RSTOP_HWERR);
2181 			break;
2182 		}
2183 	}
2184 }
2185 
2186 /*
2187  * void *restarter_contract_event_thread(void *)
2188  *   Listens to the process contract bundle for critical events, taking action
2189  *   on events from contracts we know we are responsible for.
2190  */
2191 /*ARGSUSED*/
2192 static void *
2193 restarter_contracts_event_thread(void *unused)
2194 {
2195 	int fd, err;
2196 	scf_handle_t *local_handle;
2197 
2198 	(void) pthread_setname_np(pthread_self(), "restarter_contracts_event");
2199 
2200 	/*
2201 	 * Await graph load completion.  That is, stop here, until we've scanned
2202 	 * the repository for contract - instance associations.
2203 	 */
2204 	MUTEX_LOCK(&st->st_load_lock);
2205 	while (!(st->st_load_complete && st->st_load_instances == 0))
2206 		(void) pthread_cond_wait(&st->st_load_cv, &st->st_load_lock);
2207 	MUTEX_UNLOCK(&st->st_load_lock);
2208 
2209 	/*
2210 	 * This is a new thread, and thus, gets its own handle
2211 	 * to the repository.
2212 	 */
2213 	if ((local_handle = libscf_handle_create_bound(SCF_VERSION)) == NULL)
2214 		uu_die("Unable to bind a new repository handle: %s\n",
2215 		    scf_strerror(scf_error()));
2216 
2217 	fd = open64(CTFS_ROOT "/process/pbundle", O_RDONLY);
2218 	if (fd == -1)
2219 		uu_die("process bundle open failed");
2220 
2221 	/*
2222 	 * Make sure we get all events (including those generated by configd
2223 	 * before this thread was started).
2224 	 */
2225 	err = ct_event_reset(fd);
2226 	assert(err == 0);
2227 
2228 	for (;;) {
2229 		int efd, sfd;
2230 		ct_evthdl_t ev;
2231 		uint32_t type;
2232 		ctevid_t evid;
2233 		ct_stathdl_t status;
2234 		ctid_t ctid;
2235 		restarter_inst_t *inst;
2236 		uint64_t cookie;
2237 
2238 		if (err = ct_event_read_critical(fd, &ev)) {
2239 			log_error(LOG_WARNING,
2240 			    "Error reading next contract event: %s",
2241 			    strerror(err));
2242 			continue;
2243 		}
2244 
2245 		evid = ct_event_get_evid(ev);
2246 		ctid = ct_event_get_ctid(ev);
2247 		type = ct_event_get_type(ev);
2248 
2249 		/* Fetch cookie. */
2250 		if ((sfd = contract_open(ctid, "process", "status", O_RDONLY))
2251 		    < 0) {
2252 			ct_event_free(ev);
2253 			continue;
2254 		}
2255 
2256 		if (err = ct_status_read(sfd, CTD_COMMON, &status)) {
2257 			log_framework(LOG_WARNING, "Could not get status for "
2258 			    "contract %ld: %s\n", ctid, strerror(err));
2259 
2260 			startd_close(sfd);
2261 			ct_event_free(ev);
2262 			continue;
2263 		}
2264 
2265 		cookie = ct_status_get_cookie(status);
2266 
2267 		log_framework(LOG_DEBUG, "Received event %d for ctid %ld "
2268 		    "cookie %lld\n", type, ctid, cookie);
2269 
2270 		ct_status_free(status);
2271 
2272 		startd_close(sfd);
2273 
2274 		/*
2275 		 * svc.configd(1M) restart handling performed by the
2276 		 * fork_configd_thread.  We don't acknowledge, as that thread
2277 		 * will do so.
2278 		 */
2279 		if (cookie == CONFIGD_COOKIE) {
2280 			ct_event_free(ev);
2281 			continue;
2282 		}
2283 
2284 		inst = NULL;
2285 		if (storing_contract != 0 &&
2286 		    (inst = contract_to_inst(ctid)) == NULL) {
2287 			/*
2288 			 * This can happen for two reasons:
2289 			 * - method_run() has not yet stored the
2290 			 *    the contract into the internal hash table.
2291 			 * - we receive an EMPTY event for an abandoned
2292 			 *    contract.
2293 			 * If there is any contract in the process of
2294 			 * being stored into the hash table then re-read
2295 			 * the event later.
2296 			 */
2297 			log_framework(LOG_DEBUG,
2298 			    "Reset event %d for unknown "
2299 			    "contract id %ld\n", type, ctid);
2300 
2301 			/* don't go too fast */
2302 			(void) poll(NULL, 0, 100);
2303 
2304 			(void) ct_event_reset(fd);
2305 			ct_event_free(ev);
2306 			continue;
2307 		}
2308 
2309 		/*
2310 		 * Do not call contract_to_inst() again if first
2311 		 * call succeeded.
2312 		 */
2313 		if (inst == NULL)
2314 			inst = contract_to_inst(ctid);
2315 		if (inst == NULL) {
2316 			/*
2317 			 * This can happen if we receive an EMPTY
2318 			 * event for an abandoned contract.
2319 			 */
2320 			log_framework(LOG_DEBUG,
2321 			    "Received event %d for unknown contract id "
2322 			    "%ld\n", type, ctid);
2323 		} else {
2324 			log_framework(LOG_DEBUG,
2325 			    "Received event %d for contract id "
2326 			    "%ld (%s)\n", type, ctid,
2327 			    inst->ri_i.i_fmri);
2328 
2329 			contract_action(local_handle, inst, ctid, type);
2330 
2331 			MUTEX_UNLOCK(&inst->ri_lock);
2332 		}
2333 
2334 		efd = contract_open(ct_event_get_ctid(ev), "process", "ctl",
2335 		    O_WRONLY);
2336 		if (efd != -1) {
2337 			(void) ct_ctl_ack(efd, evid);
2338 			startd_close(efd);
2339 		}
2340 
2341 		ct_event_free(ev);
2342 
2343 	}
2344 
2345 	/*NOTREACHED*/
2346 	return (NULL);
2347 }
2348 
2349 /*
2350  * Timeout queue, processed by restarter_timeouts_event_thread().
2351  */
2352 timeout_queue_t *timeouts;
2353 static uu_list_pool_t *timeout_pool;
2354 
2355 typedef struct timeout_update {
2356 	pthread_mutex_t		tu_lock;
2357 	pthread_cond_t		tu_cv;
2358 	int			tu_wakeup;
2359 } timeout_update_t;
2360 
2361 timeout_update_t *tu;
2362 
2363 static const char *timeout_ovr_svcs[] = {
2364 	"svc:/system/manifest-import:default",
2365 	"svc:/network/initial:default",
2366 	"svc:/network/service:default",
2367 	"svc:/system/rmtmpfiles:default",
2368 	"svc:/network/loopback:default",
2369 	"svc:/network/physical:default",
2370 	"svc:/system/device/local:default",
2371 	"svc:/system/filesystem/usr:default",
2372 	"svc:/system/filesystem/minimal:default",
2373 	"svc:/system/filesystem/local:default",
2374 	NULL
2375 };
2376 
2377 int
2378 is_timeout_ovr(restarter_inst_t *inst)
2379 {
2380 	int i;
2381 
2382 	for (i = 0; timeout_ovr_svcs[i] != NULL; ++i) {
2383 		if (strcmp(inst->ri_i.i_fmri, timeout_ovr_svcs[i]) == 0) {
2384 			log_instance(inst, B_TRUE, "Timeout override by "
2385 			    "svc.startd.  Using infinite timeout.");
2386 			return (1);
2387 		}
2388 	}
2389 
2390 	return (0);
2391 }
2392 
2393 /*ARGSUSED*/
2394 static int
2395 timeout_compare(const void *lc_arg, const void *rc_arg, void *private)
2396 {
2397 	hrtime_t t1 = ((const timeout_entry_t *)lc_arg)->te_timeout;
2398 	hrtime_t t2 = ((const timeout_entry_t *)rc_arg)->te_timeout;
2399 
2400 	if (t1 > t2)
2401 		return (1);
2402 	else if (t1 < t2)
2403 		return (-1);
2404 	return (0);
2405 }
2406 
2407 void
2408 timeout_init()
2409 {
2410 	timeouts = startd_zalloc(sizeof (timeout_queue_t));
2411 
2412 	(void) pthread_mutex_init(&timeouts->tq_lock, &mutex_attrs);
2413 
2414 	timeout_pool = startd_list_pool_create("timeouts",
2415 	    sizeof (timeout_entry_t), offsetof(timeout_entry_t, te_link),
2416 	    timeout_compare, UU_LIST_POOL_DEBUG);
2417 	assert(timeout_pool != NULL);
2418 
2419 	timeouts->tq_list = startd_list_create(timeout_pool,
2420 	    timeouts, UU_LIST_SORTED);
2421 	assert(timeouts->tq_list != NULL);
2422 
2423 	tu = startd_zalloc(sizeof (timeout_update_t));
2424 	(void) pthread_cond_init(&tu->tu_cv, NULL);
2425 	(void) pthread_mutex_init(&tu->tu_lock, &mutex_attrs);
2426 }
2427 
2428 void
2429 timeout_insert(restarter_inst_t *inst, ctid_t cid, uint64_t timeout_sec)
2430 {
2431 	hrtime_t now, timeout;
2432 	timeout_entry_t *entry;
2433 	uu_list_index_t idx;
2434 
2435 	assert(MUTEX_HELD(&inst->ri_lock));
2436 
2437 	now = gethrtime();
2438 
2439 	/*
2440 	 * If we overflow LLONG_MAX, we're never timing out anyways, so
2441 	 * just return.
2442 	 */
2443 	if (timeout_sec >= (LLONG_MAX - now) / 1000000000LL) {
2444 		log_instance(inst, B_TRUE, "timeout_seconds too large, "
2445 		    "treating as infinite.");
2446 		return;
2447 	}
2448 
2449 	/* hrtime is in nanoseconds. Convert timeout_sec. */
2450 	timeout = now + (timeout_sec * 1000000000LL);
2451 
2452 	entry = startd_alloc(sizeof (timeout_entry_t));
2453 	entry->te_timeout = timeout;
2454 	entry->te_ctid = cid;
2455 	entry->te_fmri = safe_strdup(inst->ri_i.i_fmri);
2456 	entry->te_logstem = safe_strdup(inst->ri_logstem);
2457 	entry->te_fired = 0;
2458 	/* Insert the calculated timeout time onto the queue. */
2459 	MUTEX_LOCK(&timeouts->tq_lock);
2460 	(void) uu_list_find(timeouts->tq_list, entry, NULL, &idx);
2461 	uu_list_node_init(entry, &entry->te_link, timeout_pool);
2462 	uu_list_insert(timeouts->tq_list, entry, idx);
2463 	MUTEX_UNLOCK(&timeouts->tq_lock);
2464 
2465 	assert(inst->ri_timeout == NULL);
2466 	inst->ri_timeout = entry;
2467 
2468 	MUTEX_LOCK(&tu->tu_lock);
2469 	tu->tu_wakeup = 1;
2470 	(void) pthread_cond_broadcast(&tu->tu_cv);
2471 	MUTEX_UNLOCK(&tu->tu_lock);
2472 }
2473 
2474 
2475 void
2476 timeout_remove(restarter_inst_t *inst, ctid_t cid)
2477 {
2478 	assert(MUTEX_HELD(&inst->ri_lock));
2479 
2480 	if (inst->ri_timeout == NULL)
2481 		return;
2482 
2483 	assert(inst->ri_timeout->te_ctid == cid);
2484 
2485 	MUTEX_LOCK(&timeouts->tq_lock);
2486 	uu_list_remove(timeouts->tq_list, inst->ri_timeout);
2487 	MUTEX_UNLOCK(&timeouts->tq_lock);
2488 
2489 	free(inst->ri_timeout->te_fmri);
2490 	free(inst->ri_timeout->te_logstem);
2491 	startd_free(inst->ri_timeout, sizeof (timeout_entry_t));
2492 	inst->ri_timeout = NULL;
2493 }
2494 
2495 static int
2496 timeout_now()
2497 {
2498 	timeout_entry_t *e;
2499 	hrtime_t now;
2500 	int ret;
2501 
2502 	now = gethrtime();
2503 
2504 	/*
2505 	 * Walk through the (sorted) timeouts list.  While the timeout
2506 	 * at the head of the list is <= the current time, kill the
2507 	 * method.
2508 	 */
2509 	MUTEX_LOCK(&timeouts->tq_lock);
2510 
2511 	for (e = uu_list_first(timeouts->tq_list);
2512 	    e != NULL && e->te_timeout <= now;
2513 	    e = uu_list_next(timeouts->tq_list, e)) {
2514 		log_framework(LOG_WARNING, "%s: Method or service exit timed "
2515 		    "out.  Killing contract %ld.\n", e->te_fmri, e->te_ctid);
2516 		log_instance_fmri(e->te_fmri, e->te_logstem, B_TRUE,
2517 		    "Method or service exit timed out.  Killing contract %ld.",
2518 		    e->te_ctid);
2519 		e->te_fired = 1;
2520 		(void) contract_kill(e->te_ctid, SIGKILL, e->te_fmri);
2521 	}
2522 
2523 	if (uu_list_numnodes(timeouts->tq_list) > 0)
2524 		ret = 0;
2525 	else
2526 		ret = -1;
2527 
2528 	MUTEX_UNLOCK(&timeouts->tq_lock);
2529 
2530 	return (ret);
2531 }
2532 
2533 /*
2534  * void *restarter_timeouts_event_thread(void *)
2535  *   Responsible for monitoring the method timeouts.  This thread must
2536  *   be started before any methods are called.
2537  */
2538 /*ARGSUSED*/
2539 static void *
2540 restarter_timeouts_event_thread(void *unused)
2541 {
2542 	/*
2543 	 * Timeouts are entered on a priority queue, which is processed by
2544 	 * this thread.  As timeouts are specified in seconds, we'll do
2545 	 * the necessary processing every second, as long as the queue
2546 	 * is not empty.
2547 	 */
2548 
2549 	(void) pthread_setname_np(pthread_self(), "restarter_timeouts_event");
2550 
2551 	/*CONSTCOND*/
2552 	while (1) {
2553 		/*
2554 		 * As long as the timeout list isn't empty, process it
2555 		 * every second.
2556 		 */
2557 		if (timeout_now() == 0) {
2558 			(void) sleep(1);
2559 			continue;
2560 		}
2561 
2562 		/* The list is empty, wait until we have more timeouts. */
2563 		MUTEX_LOCK(&tu->tu_lock);
2564 
2565 		while (tu->tu_wakeup == 0)
2566 			(void) pthread_cond_wait(&tu->tu_cv, &tu->tu_lock);
2567 
2568 		tu->tu_wakeup = 0;
2569 		MUTEX_UNLOCK(&tu->tu_lock);
2570 	}
2571 
2572 	return (NULL);
2573 }
2574 
2575 void
2576 restarter_start()
2577 {
2578 	(void) startd_thread_create(restarter_timeouts_event_thread, NULL);
2579 	(void) startd_thread_create(restarter_event_thread, NULL);
2580 	(void) startd_thread_create(restarter_contracts_event_thread, NULL);
2581 	(void) startd_thread_create(wait_thread, NULL);
2582 }
2583 
2584 
2585 void
2586 restarter_init()
2587 {
2588 	restarter_instance_pool = startd_list_pool_create("restarter_instances",
2589 	    sizeof (restarter_inst_t), offsetof(restarter_inst_t,
2590 	    ri_link), restarter_instance_compare, UU_LIST_POOL_DEBUG);
2591 	(void) memset(&instance_list, 0, sizeof (instance_list));
2592 
2593 	(void) pthread_mutex_init(&instance_list.ril_lock, &mutex_attrs);
2594 	instance_list.ril_instance_list = startd_list_create(
2595 	    restarter_instance_pool, &instance_list, UU_LIST_SORTED);
2596 
2597 	restarter_queue_pool = startd_list_pool_create(
2598 	    "restarter_instance_queue", sizeof (restarter_instance_qentry_t),
2599 	    offsetof(restarter_instance_qentry_t,  riq_link), NULL,
2600 	    UU_LIST_POOL_DEBUG);
2601 
2602 	contract_list_pool = startd_list_pool_create(
2603 	    "contract_list", sizeof (contract_entry_t),
2604 	    offsetof(contract_entry_t,  ce_link), NULL,
2605 	    UU_LIST_POOL_DEBUG);
2606 	contract_hash_init();
2607 
2608 	log_framework(LOG_DEBUG, "Initialized restarter\n");
2609 }
2610