xref: /titanic_50/usr/src/cmd/svc/startd/restarter.c (revision 2e107de79998f3036decec2454002940afb9a6ff)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * restarter.c - service manipulation
28  *
29  * This component manages services whose restarter is svc.startd, the standard
30  * restarter.  It translates restarter protocol events from the graph engine
31  * into actions on processes, as a delegated restarter would do.
32  *
33  * The master restarter manages a number of always-running threads:
34  *   - restarter event thread: events from the graph engine
35  *   - timeout thread: thread to fire queued timeouts
36  *   - contract thread: thread to handle contract events
37  *   - wait thread: thread to handle wait-based services
38  *
39  * The other threads are created as-needed:
40  *   - per-instance method threads
41  *   - per-instance event processing threads
42  *
43  * The interaction of all threads must result in the following conditions
44  * being satisfied (on a per-instance basis):
45  *   - restarter events must be processed in order
46  *   - method execution must be serialized
47  *   - instance delete must be held until outstanding methods are complete
48  *   - contract events shouldn't be processed while a method is running
49  *   - timeouts should fire even when a method is running
50  *
51  * Service instances are represented by restarter_inst_t's and are kept in the
52  * instance_list list.
53  *
54  * Service States
55  *   The current state of a service instance is kept in
56  *   restarter_inst_t->ri_i.i_state.  If transition to a new state could take
57  *   some time, then before we effect the transition we set
58  *   restarter_inst_t->ri_i.i_next_state to the target state, and afterwards we
59  *   rotate i_next_state to i_state and set i_next_state to
60  *   RESTARTER_STATE_NONE.  So usually i_next_state is _NONE when ri_lock is not
61  *   held.  The exception is when we launch methods, which are done with
62  *   a separate thread.  To keep any other threads from grabbing ri_lock before
63  *   method_thread() does, we set ri_method_thread to the thread id of the
64  *   method thread, and when it is nonzero any thread with a different thread id
65  *   waits on ri_method_cv.
66  *
67  * Method execution is serialized by blocking on ri_method_cv in
68  * inst_lookup_by_id() and waiting for a 0 value of ri_method_thread.  This
69  * also prevents the instance structure from being deleted until all
70  * outstanding operations such as method_thread() have finished.
71  *
72  * Lock ordering:
73  *
74  * dgraph_lock [can be held when taking:]
75  *   utmpx_lock
76  *   dictionary->dict_lock
77  *   st->st_load_lock
78  *   wait_info_lock
79  *   ru->restarter_update_lock
80  *     restarter_queue->rpeq_lock
81  *   instance_list.ril_lock
82  *     inst->ri_lock
83  *   st->st_configd_live_lock
84  *
85  * instance_list.ril_lock
86  *   graph_queue->gpeq_lock
87  *   gu->gu_lock
88  *   st->st_configd_live_lock
89  *   dictionary->dict_lock
90  *   inst->ri_lock
91  *     graph_queue->gpeq_lock
92  *     gu->gu_lock
93  *     tu->tu_lock
94  *     tq->tq_lock
95  *     inst->ri_queue_lock
96  *       wait_info_lock
97  *       bp->cb_lock
98  *     utmpx_lock
99  *
100  * single_user_thread_lock
101  *   wait_info_lock
102  *   utmpx_lock
103  *
104  * gu_freeze_lock
105  *
106  * logbuf_mutex nests inside pretty much everything.
107  */
108 
109 #include <sys/contract/process.h>
110 #include <sys/ctfs.h>
111 #include <sys/stat.h>
112 #include <sys/time.h>
113 #include <sys/types.h>
114 #include <sys/uio.h>
115 #include <sys/wait.h>
116 #include <assert.h>
117 #include <errno.h>
118 #include <fcntl.h>
119 #include <libcontract.h>
120 #include <libcontract_priv.h>
121 #include <libintl.h>
122 #include <librestart.h>
123 #include <librestart_priv.h>
124 #include <libuutil.h>
125 #include <limits.h>
126 #include <poll.h>
127 #include <port.h>
128 #include <pthread.h>
129 #include <stdarg.h>
130 #include <stdio.h>
131 #include <strings.h>
132 #include <unistd.h>
133 
134 #include "startd.h"
135 #include "protocol.h"
136 
137 static uu_list_pool_t *restarter_instance_pool;
138 static restarter_instance_list_t instance_list;
139 
140 static uu_list_pool_t *restarter_queue_pool;
141 
142 /*ARGSUSED*/
143 static int
144 restarter_instance_compare(const void *lc_arg, const void *rc_arg,
145     void *private)
146 {
147 	int lc_id = ((const restarter_inst_t *)lc_arg)->ri_id;
148 	int rc_id = *(int *)rc_arg;
149 
150 	if (lc_id > rc_id)
151 		return (1);
152 	if (lc_id < rc_id)
153 		return (-1);
154 	return (0);
155 }
156 
157 static restarter_inst_t *
158 inst_lookup_by_name(const char *name)
159 {
160 	int id;
161 
162 	id = dict_lookup_byname(name);
163 	if (id == -1)
164 		return (NULL);
165 
166 	return (inst_lookup_by_id(id));
167 }
168 
169 restarter_inst_t *
170 inst_lookup_by_id(int id)
171 {
172 	restarter_inst_t *inst;
173 
174 	MUTEX_LOCK(&instance_list.ril_lock);
175 	inst = uu_list_find(instance_list.ril_instance_list, &id, NULL, NULL);
176 	if (inst != NULL)
177 		MUTEX_LOCK(&inst->ri_lock);
178 	MUTEX_UNLOCK(&instance_list.ril_lock);
179 
180 	if (inst != NULL) {
181 		while (inst->ri_method_thread != 0 &&
182 		    !pthread_equal(inst->ri_method_thread, pthread_self())) {
183 			++inst->ri_method_waiters;
184 			(void) pthread_cond_wait(&inst->ri_method_cv,
185 			    &inst->ri_lock);
186 			assert(inst->ri_method_waiters > 0);
187 			--inst->ri_method_waiters;
188 		}
189 	}
190 
191 	return (inst);
192 }
193 
194 static restarter_inst_t *
195 inst_lookup_queue(const char *name)
196 {
197 	int id;
198 	restarter_inst_t *inst;
199 
200 	id = dict_lookup_byname(name);
201 	if (id == -1)
202 		return (NULL);
203 
204 	MUTEX_LOCK(&instance_list.ril_lock);
205 	inst = uu_list_find(instance_list.ril_instance_list, &id, NULL, NULL);
206 	if (inst != NULL)
207 		MUTEX_LOCK(&inst->ri_queue_lock);
208 	MUTEX_UNLOCK(&instance_list.ril_lock);
209 
210 	return (inst);
211 }
212 
213 const char *
214 service_style(int flags)
215 {
216 	switch (flags & RINST_STYLE_MASK) {
217 	case RINST_CONTRACT:	return ("contract");
218 	case RINST_TRANSIENT:	return ("transient");
219 	case RINST_WAIT:	return ("wait");
220 
221 	default:
222 #ifndef NDEBUG
223 		uu_warn("%s:%d: Bad flags 0x%x.\n", __FILE__, __LINE__, flags);
224 #endif
225 		abort();
226 		/* NOTREACHED */
227 	}
228 }
229 
230 /*
231  * Fails with ECONNABORTED or ECANCELED.
232  */
233 static int
234 check_contract(restarter_inst_t *inst, boolean_t primary,
235     scf_instance_t *scf_inst)
236 {
237 	ctid_t *ctidp;
238 	int fd, r;
239 
240 	ctidp = primary ? &inst->ri_i.i_primary_ctid :
241 	    &inst->ri_i.i_transient_ctid;
242 
243 	assert(*ctidp >= 1);
244 
245 	fd = contract_open(*ctidp, NULL, "status", O_RDONLY);
246 	if (fd >= 0) {
247 		r = close(fd);
248 		assert(r == 0);
249 		return (0);
250 	}
251 
252 	r = restarter_remove_contract(scf_inst, *ctidp, primary ?
253 	    RESTARTER_CONTRACT_PRIMARY : RESTARTER_CONTRACT_TRANSIENT);
254 	switch (r) {
255 	case 0:
256 	case ECONNABORTED:
257 	case ECANCELED:
258 		*ctidp = 0;
259 		return (r);
260 
261 	case ENOMEM:
262 		uu_die("Out of memory\n");
263 		/* NOTREACHED */
264 
265 	case EPERM:
266 		uu_die("Insufficient privilege.\n");
267 		/* NOTREACHED */
268 
269 	case EACCES:
270 		uu_die("Repository backend access denied.\n");
271 		/* NOTREACHED */
272 
273 	case EROFS:
274 		log_error(LOG_INFO, "Could not remove unusable contract id %ld "
275 		    "for %s from repository.\n", *ctidp, inst->ri_i.i_fmri);
276 		return (0);
277 
278 	case EINVAL:
279 	case EBADF:
280 	default:
281 		assert(0);
282 		abort();
283 		/* NOTREACHED */
284 	}
285 }
286 
287 static int stop_instance(scf_handle_t *, restarter_inst_t *, stop_cause_t);
288 
289 /*
290  * int restarter_insert_inst(scf_handle_t *, char *)
291  *   If the inst is already in the restarter list, return its id.  If the inst
292  *   is not in the restarter list, initialize a restarter_inst_t, initialize its
293  *   states, insert it into the list, and return 0.
294  *
295  *   Fails with
296  *     ENOENT - name is not in the repository
297  */
298 static int
299 restarter_insert_inst(scf_handle_t *h, const char *name)
300 {
301 	int id, r;
302 	restarter_inst_t *inst;
303 	uu_list_index_t idx;
304 	scf_service_t *scf_svc;
305 	scf_instance_t *scf_inst;
306 	scf_snapshot_t *snap = NULL;
307 	scf_propertygroup_t *pg;
308 	char *svc_name, *inst_name;
309 	char logfilebuf[PATH_MAX];
310 	char *c;
311 	boolean_t do_commit_states;
312 	restarter_instance_state_t state, next_state;
313 	protocol_states_t *ps;
314 	pid_t start_pid;
315 
316 	MUTEX_LOCK(&instance_list.ril_lock);
317 
318 	/*
319 	 * We don't use inst_lookup_by_name() here because we want the lookup
320 	 * & insert to be atomic.
321 	 */
322 	id = dict_lookup_byname(name);
323 	if (id != -1) {
324 		inst = uu_list_find(instance_list.ril_instance_list, &id, NULL,
325 		    &idx);
326 		if (inst != NULL) {
327 			MUTEX_UNLOCK(&instance_list.ril_lock);
328 			return (0);
329 		}
330 	}
331 
332 	/* Allocate an instance */
333 	inst = startd_zalloc(sizeof (restarter_inst_t));
334 	inst->ri_utmpx_prefix = startd_alloc(max_scf_value_size);
335 	inst->ri_utmpx_prefix[0] = '\0';
336 
337 	inst->ri_i.i_fmri = startd_alloc(strlen(name) + 1);
338 	(void) strcpy((char *)inst->ri_i.i_fmri, name);
339 
340 	inst->ri_queue = startd_list_create(restarter_queue_pool, inst, 0);
341 
342 	/*
343 	 * id shouldn't be -1 since we use the same dictionary as graph.c, but
344 	 * just in case.
345 	 */
346 	inst->ri_id = (id != -1 ? id : dict_insert(name));
347 
348 	special_online_hooks_get(name, &inst->ri_pre_online_hook,
349 	    &inst->ri_post_online_hook, &inst->ri_post_offline_hook);
350 
351 	scf_svc = safe_scf_service_create(h);
352 	scf_inst = safe_scf_instance_create(h);
353 	pg = safe_scf_pg_create(h);
354 	svc_name = startd_alloc(max_scf_name_size);
355 	inst_name = startd_alloc(max_scf_name_size);
356 
357 rep_retry:
358 	if (snap != NULL)
359 		scf_snapshot_destroy(snap);
360 	if (inst->ri_logstem != NULL)
361 		startd_free(inst->ri_logstem, PATH_MAX);
362 	if (inst->ri_common_name != NULL)
363 		startd_free(inst->ri_common_name, max_scf_value_size);
364 	if (inst->ri_C_common_name != NULL)
365 		startd_free(inst->ri_C_common_name, max_scf_value_size);
366 	snap = NULL;
367 	inst->ri_logstem = NULL;
368 	inst->ri_common_name = NULL;
369 	inst->ri_C_common_name = NULL;
370 
371 	if (scf_handle_decode_fmri(h, name, NULL, scf_svc, scf_inst, NULL,
372 	    NULL, SCF_DECODE_FMRI_EXACT) != 0) {
373 		switch (scf_error()) {
374 		case SCF_ERROR_CONNECTION_BROKEN:
375 			libscf_handle_rebind(h);
376 			goto rep_retry;
377 
378 		case SCF_ERROR_NOT_FOUND:
379 			goto deleted;
380 		}
381 
382 		uu_die("Can't decode FMRI %s: %s\n", name,
383 		    scf_strerror(scf_error()));
384 	}
385 
386 	/*
387 	 * If there's no running snapshot, then we execute using the editing
388 	 * snapshot.  Pending snapshots will be taken later.
389 	 */
390 	snap = libscf_get_running_snapshot(scf_inst);
391 
392 	if ((scf_service_get_name(scf_svc, svc_name, max_scf_name_size) < 0) ||
393 	    (scf_instance_get_name(scf_inst, inst_name, max_scf_name_size) <
394 	    0)) {
395 		switch (scf_error()) {
396 		case SCF_ERROR_NOT_SET:
397 			break;
398 
399 		case SCF_ERROR_CONNECTION_BROKEN:
400 			libscf_handle_rebind(h);
401 			goto rep_retry;
402 
403 		default:
404 			assert(0);
405 			abort();
406 		}
407 
408 		goto deleted;
409 	}
410 
411 	(void) snprintf(logfilebuf, PATH_MAX, "%s:%s", svc_name, inst_name);
412 	for (c = logfilebuf; *c != '\0'; c++)
413 		if (*c == '/')
414 			*c = '-';
415 
416 	inst->ri_logstem = startd_alloc(PATH_MAX);
417 	(void) snprintf(inst->ri_logstem, PATH_MAX, "%s%s", logfilebuf,
418 	    LOG_SUFFIX);
419 
420 	/*
421 	 * If the restarter group is missing, use uninit/none.  Otherwise,
422 	 * we're probably being restarted & don't want to mess up the states
423 	 * that are there.
424 	 */
425 	state = RESTARTER_STATE_UNINIT;
426 	next_state = RESTARTER_STATE_NONE;
427 
428 	r = scf_instance_get_pg(scf_inst, SCF_PG_RESTARTER, pg);
429 	if (r != 0) {
430 		switch (scf_error()) {
431 		case SCF_ERROR_CONNECTION_BROKEN:
432 			libscf_handle_rebind(h);
433 			goto rep_retry;
434 
435 		case SCF_ERROR_NOT_SET:
436 			goto deleted;
437 
438 		case SCF_ERROR_NOT_FOUND:
439 			/*
440 			 * This shouldn't happen since the graph engine should
441 			 * have initialized the state to uninitialized/none if
442 			 * there was no restarter pg.  In case somebody
443 			 * deleted it, though....
444 			 */
445 			do_commit_states = B_TRUE;
446 			break;
447 
448 		default:
449 			assert(0);
450 			abort();
451 		}
452 	} else {
453 		r = libscf_read_states(pg, &state, &next_state);
454 		if (r != 0) {
455 			do_commit_states = B_TRUE;
456 		} else {
457 			if (next_state != RESTARTER_STATE_NONE) {
458 				/*
459 				 * Force next_state to _NONE since we
460 				 * don't look for method processes.
461 				 */
462 				next_state = RESTARTER_STATE_NONE;
463 				do_commit_states = B_TRUE;
464 			} else {
465 				/*
466 				 * Inform the restarter of our state without
467 				 * changing the STIME in the repository.
468 				 */
469 				ps = startd_alloc(sizeof (*ps));
470 				inst->ri_i.i_state = ps->ps_state = state;
471 				inst->ri_i.i_next_state = ps->ps_state_next =
472 				    next_state;
473 
474 				graph_protocol_send_event(inst->ri_i.i_fmri,
475 				    GRAPH_UPDATE_STATE_CHANGE, ps);
476 
477 				do_commit_states = B_FALSE;
478 			}
479 		}
480 	}
481 
482 	switch (libscf_get_startd_properties(scf_inst, snap, &inst->ri_flags,
483 	    &inst->ri_utmpx_prefix)) {
484 	case 0:
485 		break;
486 
487 	case ECONNABORTED:
488 		libscf_handle_rebind(h);
489 		goto rep_retry;
490 
491 	case ECANCELED:
492 		goto deleted;
493 
494 	case ENOENT:
495 		/*
496 		 * This is odd, because the graph engine should have required
497 		 * the general property group.  So we'll just use default
498 		 * flags in anticipation of the graph engine sending us
499 		 * REMOVE_INSTANCE when it finds out that the general property
500 		 * group has been deleted.
501 		 */
502 		inst->ri_flags = RINST_CONTRACT;
503 		break;
504 
505 	default:
506 		assert(0);
507 		abort();
508 	}
509 
510 	switch (libscf_get_template_values(scf_inst, snap,
511 	    &inst->ri_common_name, &inst->ri_C_common_name)) {
512 	case 0:
513 		break;
514 
515 	case ECONNABORTED:
516 		libscf_handle_rebind(h);
517 		goto rep_retry;
518 
519 	case ECANCELED:
520 		goto deleted;
521 
522 	case ECHILD:
523 	case ENOENT:
524 		break;
525 
526 	default:
527 		assert(0);
528 		abort();
529 	}
530 
531 	switch (libscf_read_method_ids(h, scf_inst, inst->ri_i.i_fmri,
532 	    &inst->ri_i.i_primary_ctid, &inst->ri_i.i_transient_ctid,
533 	    &start_pid)) {
534 	case 0:
535 		break;
536 
537 	case ECONNABORTED:
538 		libscf_handle_rebind(h);
539 		goto rep_retry;
540 
541 	case ECANCELED:
542 		goto deleted;
543 
544 	default:
545 		assert(0);
546 		abort();
547 	}
548 
549 	if (inst->ri_i.i_primary_ctid >= 1) {
550 		contract_hash_store(inst->ri_i.i_primary_ctid, inst->ri_id);
551 
552 		switch (check_contract(inst, B_TRUE, scf_inst)) {
553 		case 0:
554 			break;
555 
556 		case ECONNABORTED:
557 			libscf_handle_rebind(h);
558 			goto rep_retry;
559 
560 		case ECANCELED:
561 			goto deleted;
562 
563 		default:
564 			assert(0);
565 			abort();
566 		}
567 	}
568 
569 	if (inst->ri_i.i_transient_ctid >= 1) {
570 		switch (check_contract(inst, B_FALSE, scf_inst)) {
571 		case 0:
572 			break;
573 
574 		case ECONNABORTED:
575 			libscf_handle_rebind(h);
576 			goto rep_retry;
577 
578 		case ECANCELED:
579 			goto deleted;
580 
581 		default:
582 			assert(0);
583 			abort();
584 		}
585 	}
586 
587 	/* No more failures we live through, so add it to the list. */
588 	(void) pthread_mutex_init(&inst->ri_lock, &mutex_attrs);
589 	(void) pthread_mutex_init(&inst->ri_queue_lock, &mutex_attrs);
590 	MUTEX_LOCK(&inst->ri_lock);
591 	MUTEX_LOCK(&inst->ri_queue_lock);
592 
593 	(void) pthread_cond_init(&inst->ri_method_cv, NULL);
594 
595 	uu_list_node_init(inst, &inst->ri_link, restarter_instance_pool);
596 	uu_list_insert(instance_list.ril_instance_list, inst, idx);
597 	MUTEX_UNLOCK(&instance_list.ril_lock);
598 
599 	if (start_pid != -1 &&
600 	    (inst->ri_flags & RINST_STYLE_MASK) == RINST_WAIT) {
601 		int ret;
602 		ret = wait_register(start_pid, inst->ri_i.i_fmri, 0, 1);
603 		if (ret == -1) {
604 			/*
605 			 * Implication:  if we can't reregister the
606 			 * instance, we will start another one.  Two
607 			 * instances may or may not result in a resource
608 			 * conflict.
609 			 */
610 			log_error(LOG_WARNING,
611 			    "%s: couldn't reregister %ld for wait\n",
612 			    inst->ri_i.i_fmri, start_pid);
613 		} else if (ret == 1) {
614 			/*
615 			 * Leading PID has exited.
616 			 */
617 			(void) stop_instance(h, inst, RSTOP_EXIT);
618 		}
619 	}
620 
621 
622 	scf_pg_destroy(pg);
623 
624 	if (do_commit_states)
625 		(void) restarter_instance_update_states(h, inst, state,
626 		    next_state, RERR_NONE, NULL);
627 
628 	log_framework(LOG_DEBUG, "%s is a %s-style service\n", name,
629 	    service_style(inst->ri_flags));
630 
631 	MUTEX_UNLOCK(&inst->ri_queue_lock);
632 	MUTEX_UNLOCK(&inst->ri_lock);
633 
634 	startd_free(svc_name, max_scf_name_size);
635 	startd_free(inst_name, max_scf_name_size);
636 	scf_snapshot_destroy(snap);
637 	scf_instance_destroy(scf_inst);
638 	scf_service_destroy(scf_svc);
639 
640 	log_framework(LOG_DEBUG, "%s: inserted instance into restarter list\n",
641 	    name);
642 
643 	return (0);
644 
645 deleted:
646 	MUTEX_UNLOCK(&instance_list.ril_lock);
647 	startd_free(inst_name, max_scf_name_size);
648 	startd_free(svc_name, max_scf_name_size);
649 	if (snap != NULL)
650 		scf_snapshot_destroy(snap);
651 	scf_pg_destroy(pg);
652 	scf_instance_destroy(scf_inst);
653 	scf_service_destroy(scf_svc);
654 	startd_free((void *)inst->ri_i.i_fmri, strlen(inst->ri_i.i_fmri) + 1);
655 	uu_list_destroy(inst->ri_queue);
656 	if (inst->ri_logstem != NULL)
657 		startd_free(inst->ri_logstem, PATH_MAX);
658 	if (inst->ri_common_name != NULL)
659 		startd_free(inst->ri_common_name, max_scf_value_size);
660 	if (inst->ri_C_common_name != NULL)
661 		startd_free(inst->ri_C_common_name, max_scf_value_size);
662 	startd_free(inst->ri_utmpx_prefix, max_scf_value_size);
663 	startd_free(inst, sizeof (restarter_inst_t));
664 	return (ENOENT);
665 }
666 
667 static void
668 restarter_delete_inst(restarter_inst_t *ri)
669 {
670 	int id;
671 	restarter_inst_t *rip;
672 	void *cookie = NULL;
673 	restarter_instance_qentry_t *e;
674 
675 	assert(PTHREAD_MUTEX_HELD(&ri->ri_lock));
676 
677 	/*
678 	 * Must drop the instance lock so we can pick up the instance_list
679 	 * lock & remove the instance.
680 	 */
681 	id = ri->ri_id;
682 	MUTEX_UNLOCK(&ri->ri_lock);
683 
684 	MUTEX_LOCK(&instance_list.ril_lock);
685 
686 	rip = uu_list_find(instance_list.ril_instance_list, &id, NULL, NULL);
687 	if (rip == NULL) {
688 		MUTEX_UNLOCK(&instance_list.ril_lock);
689 		return;
690 	}
691 
692 	assert(ri == rip);
693 
694 	uu_list_remove(instance_list.ril_instance_list, ri);
695 
696 	log_framework(LOG_DEBUG, "%s: deleted instance from restarter list\n",
697 	    ri->ri_i.i_fmri);
698 
699 	MUTEX_UNLOCK(&instance_list.ril_lock);
700 
701 	/*
702 	 * We can lock the instance without holding the instance_list lock
703 	 * since we removed the instance from the list.
704 	 */
705 	MUTEX_LOCK(&ri->ri_lock);
706 	MUTEX_LOCK(&ri->ri_queue_lock);
707 
708 	if (ri->ri_i.i_primary_ctid >= 1)
709 		contract_hash_remove(ri->ri_i.i_primary_ctid);
710 
711 	while (ri->ri_method_thread != 0 || ri->ri_method_waiters > 0)
712 		(void) pthread_cond_wait(&ri->ri_method_cv, &ri->ri_lock);
713 
714 	while ((e = uu_list_teardown(ri->ri_queue, &cookie)) != NULL)
715 		startd_free(e, sizeof (*e));
716 	uu_list_destroy(ri->ri_queue);
717 
718 	startd_free((void *)ri->ri_i.i_fmri, strlen(ri->ri_i.i_fmri) + 1);
719 	startd_free(ri->ri_logstem, PATH_MAX);
720 	if (ri->ri_common_name != NULL)
721 		startd_free(ri->ri_common_name, max_scf_value_size);
722 	if (ri->ri_C_common_name != NULL)
723 		startd_free(ri->ri_C_common_name, max_scf_value_size);
724 	startd_free(ri->ri_utmpx_prefix, max_scf_value_size);
725 	(void) pthread_mutex_destroy(&ri->ri_lock);
726 	(void) pthread_mutex_destroy(&ri->ri_queue_lock);
727 	startd_free(ri, sizeof (restarter_inst_t));
728 }
729 
730 /*
731  * instance_is_wait_style()
732  *
733  *   Returns 1 if the given instance is a "wait-style" service instance.
734  */
735 int
736 instance_is_wait_style(restarter_inst_t *inst)
737 {
738 	assert(PTHREAD_MUTEX_HELD(&inst->ri_lock));
739 	return ((inst->ri_flags & RINST_STYLE_MASK) == RINST_WAIT);
740 }
741 
742 /*
743  * instance_is_transient_style()
744  *
745  *   Returns 1 if the given instance is a transient service instance.
746  */
747 int
748 instance_is_transient_style(restarter_inst_t *inst)
749 {
750 	assert(PTHREAD_MUTEX_HELD(&inst->ri_lock));
751 	return ((inst->ri_flags & RINST_STYLE_MASK) == RINST_TRANSIENT);
752 }
753 
754 /*
755  * instance_in_transition()
756  * Returns 1 if instance is in transition, 0 if not
757  */
758 int
759 instance_in_transition(restarter_inst_t *inst)
760 {
761 	assert(PTHREAD_MUTEX_HELD(&inst->ri_lock));
762 	if (inst->ri_i.i_next_state == RESTARTER_STATE_NONE)
763 		return (0);
764 	return (1);
765 }
766 
767 /*
768  * returns 1 if instance is already started, 0 if not
769  */
770 static int
771 instance_started(restarter_inst_t *inst)
772 {
773 	int ret;
774 
775 	assert(PTHREAD_MUTEX_HELD(&inst->ri_lock));
776 
777 	if (inst->ri_i.i_state == RESTARTER_STATE_ONLINE ||
778 	    inst->ri_i.i_state == RESTARTER_STATE_DEGRADED)
779 		ret = 1;
780 	else
781 		ret = 0;
782 
783 	return (ret);
784 }
785 
786 /*
787  * Returns
788  *   0 - success
789  *   ECONNRESET - success, but h was rebound
790  */
791 int
792 restarter_instance_update_states(scf_handle_t *h, restarter_inst_t *ri,
793     restarter_instance_state_t new_state,
794     restarter_instance_state_t new_state_next, restarter_error_t err, char *aux)
795 {
796 	protocol_states_t *states;
797 	int e;
798 	uint_t retry_count = 0, msecs = ALLOC_DELAY;
799 	boolean_t rebound = B_FALSE;
800 	int prev_state_online;
801 	int state_online;
802 
803 	assert(PTHREAD_MUTEX_HELD(&ri->ri_lock));
804 
805 	prev_state_online = instance_started(ri);
806 
807 retry:
808 	e = _restarter_commit_states(h, &ri->ri_i, new_state, new_state_next,
809 	    aux);
810 	switch (e) {
811 	case 0:
812 		break;
813 
814 	case ENOMEM:
815 		++retry_count;
816 		if (retry_count < ALLOC_RETRY) {
817 			(void) poll(NULL, 0, msecs);
818 			msecs *= ALLOC_DELAY_MULT;
819 			goto retry;
820 		}
821 
822 		/* Like startd_alloc(). */
823 		uu_die("Insufficient memory.\n");
824 		/* NOTREACHED */
825 
826 	case ECONNABORTED:
827 		libscf_handle_rebind(h);
828 		rebound = B_TRUE;
829 		goto retry;
830 
831 	case EPERM:
832 	case EACCES:
833 	case EROFS:
834 		log_error(LOG_NOTICE, "Could not commit state change for %s "
835 		    "to repository: %s.\n", ri->ri_i.i_fmri, strerror(e));
836 		/* FALLTHROUGH */
837 
838 	case ENOENT:
839 		ri->ri_i.i_state = new_state;
840 		ri->ri_i.i_next_state = new_state_next;
841 		break;
842 
843 	case EINVAL:
844 	default:
845 		bad_error("_restarter_commit_states", e);
846 	}
847 
848 	states = startd_alloc(sizeof (protocol_states_t));
849 	states->ps_state = new_state;
850 	states->ps_state_next = new_state_next;
851 	states->ps_err = err;
852 	graph_protocol_send_event(ri->ri_i.i_fmri, GRAPH_UPDATE_STATE_CHANGE,
853 	    (void *)states);
854 
855 	state_online = instance_started(ri);
856 
857 	if (prev_state_online && !state_online)
858 		ri->ri_post_offline_hook();
859 	else if (!prev_state_online && state_online)
860 		ri->ri_post_online_hook();
861 
862 	return (rebound ? ECONNRESET : 0);
863 }
864 
865 void
866 restarter_mark_pending_snapshot(const char *fmri, uint_t flag)
867 {
868 	restarter_inst_t *inst;
869 
870 	assert(flag == RINST_RETAKE_RUNNING || flag == RINST_RETAKE_START);
871 
872 	inst = inst_lookup_by_name(fmri);
873 	if (inst == NULL)
874 		return;
875 
876 	inst->ri_flags |= flag;
877 
878 	MUTEX_UNLOCK(&inst->ri_lock);
879 }
880 
881 static void
882 restarter_take_pending_snapshots(scf_handle_t *h)
883 {
884 	restarter_inst_t *inst;
885 	int r;
886 
887 	MUTEX_LOCK(&instance_list.ril_lock);
888 
889 	for (inst = uu_list_first(instance_list.ril_instance_list);
890 	    inst != NULL;
891 	    inst = uu_list_next(instance_list.ril_instance_list, inst)) {
892 		const char *fmri;
893 		scf_instance_t *sinst = NULL;
894 
895 		MUTEX_LOCK(&inst->ri_lock);
896 
897 		/*
898 		 * This is where we'd check inst->ri_method_thread and if it
899 		 * were nonzero we'd wait in anticipation of another thread
900 		 * executing a method for inst.  Doing so with the instance_list
901 		 * locked, though, leads to deadlock.  Since taking a snapshot
902 		 * during that window won't hurt anything, we'll just continue.
903 		 */
904 
905 		fmri = inst->ri_i.i_fmri;
906 
907 		if (inst->ri_flags & RINST_RETAKE_RUNNING) {
908 			scf_snapshot_t *rsnap;
909 
910 			(void) libscf_fmri_get_instance(h, fmri, &sinst);
911 
912 			rsnap = libscf_get_or_make_running_snapshot(sinst,
913 			    fmri, B_FALSE);
914 
915 			scf_instance_destroy(sinst);
916 
917 			if (rsnap != NULL)
918 				inst->ri_flags &= ~RINST_RETAKE_RUNNING;
919 
920 			scf_snapshot_destroy(rsnap);
921 		}
922 
923 		if (inst->ri_flags & RINST_RETAKE_START) {
924 			switch (r = libscf_snapshots_poststart(h, fmri,
925 			    B_FALSE)) {
926 			case 0:
927 			case ENOENT:
928 				inst->ri_flags &= ~RINST_RETAKE_START;
929 				break;
930 
931 			case ECONNABORTED:
932 				break;
933 
934 			case EACCES:
935 			default:
936 				bad_error("libscf_snapshots_poststart", r);
937 			}
938 		}
939 
940 		MUTEX_UNLOCK(&inst->ri_lock);
941 	}
942 
943 	MUTEX_UNLOCK(&instance_list.ril_lock);
944 }
945 
946 /* ARGSUSED */
947 void *
948 restarter_post_fsminimal_thread(void *unused)
949 {
950 	scf_handle_t *h;
951 	int r;
952 
953 	h = libscf_handle_create_bound_loop();
954 
955 	for (;;) {
956 		r = libscf_create_self(h);
957 		if (r == 0)
958 			break;
959 
960 		assert(r == ECONNABORTED);
961 		libscf_handle_rebind(h);
962 	}
963 
964 	restarter_take_pending_snapshots(h);
965 
966 	(void) scf_handle_unbind(h);
967 	scf_handle_destroy(h);
968 
969 	return (NULL);
970 }
971 
972 /*
973  * int stop_instance()
974  *
975  *   Stop the instance identified by the instance given as the second argument,
976  *   for the cause stated.
977  *
978  *   Returns
979  *     0 - success
980  *     -1 - inst is in transition
981  */
982 static int
983 stop_instance(scf_handle_t *local_handle, restarter_inst_t *inst,
984     stop_cause_t cause)
985 {
986 	fork_info_t *info;
987 	const char *cp;
988 	int err;
989 	restarter_error_t re;
990 
991 	assert(PTHREAD_MUTEX_HELD(&inst->ri_lock));
992 	assert(inst->ri_method_thread == 0);
993 
994 	switch (cause) {
995 	case RSTOP_EXIT:
996 		re = RERR_RESTART;
997 		cp = "all processes in service exited";
998 		break;
999 	case RSTOP_CORE:
1000 		re = RERR_FAULT;
1001 		cp = "process dumped core";
1002 		break;
1003 	case RSTOP_SIGNAL:
1004 		re = RERR_FAULT;
1005 		cp = "process received fatal signal from outside the service";
1006 		break;
1007 	case RSTOP_HWERR:
1008 		re = RERR_FAULT;
1009 		cp = "process killed due to uncorrectable hardware error";
1010 		break;
1011 	case RSTOP_DEPENDENCY:
1012 		re = RERR_RESTART;
1013 		cp = "dependency activity requires stop";
1014 		break;
1015 	case RSTOP_DISABLE:
1016 		re = RERR_RESTART;
1017 		cp = "service disabled";
1018 		break;
1019 	case RSTOP_RESTART:
1020 		re = RERR_RESTART;
1021 		cp = "service restarting";
1022 		break;
1023 	default:
1024 #ifndef NDEBUG
1025 		(void) fprintf(stderr, "Unknown cause %d at %s:%d.\n",
1026 		    cause, __FILE__, __LINE__);
1027 #endif
1028 		abort();
1029 	}
1030 
1031 	/* Services in the disabled and maintenance state are ignored */
1032 	if (inst->ri_i.i_state == RESTARTER_STATE_MAINT ||
1033 	    inst->ri_i.i_state == RESTARTER_STATE_DISABLED) {
1034 		log_framework(LOG_DEBUG,
1035 		    "%s: stop_instance -> is maint/disabled\n",
1036 		    inst->ri_i.i_fmri);
1037 		return (0);
1038 	}
1039 
1040 	/* Already stopped instances are left alone */
1041 	if (instance_started(inst) == 0) {
1042 		log_framework(LOG_DEBUG, "Restarter: %s is already stopped.\n",
1043 		    inst->ri_i.i_fmri);
1044 		return (0);
1045 	}
1046 
1047 	if (instance_in_transition(inst)) {
1048 		/* requeue event by returning -1 */
1049 		log_framework(LOG_DEBUG,
1050 		    "Restarter: Not stopping %s, in transition.\n",
1051 		    inst->ri_i.i_fmri);
1052 		return (-1);
1053 	}
1054 
1055 	log_instance(inst, B_TRUE, "Stopping because %s.", cp);
1056 
1057 	log_framework(re == RERR_FAULT ? LOG_INFO : LOG_DEBUG,
1058 	    "%s: Instance stopping because %s.\n", inst->ri_i.i_fmri, cp);
1059 
1060 	if (instance_is_wait_style(inst) && cause == RSTOP_EXIT) {
1061 		/*
1062 		 * No need to stop instance, as child has exited; remove
1063 		 * contract and move the instance to the offline state.
1064 		 */
1065 		switch (err = restarter_instance_update_states(local_handle,
1066 		    inst, inst->ri_i.i_state, RESTARTER_STATE_OFFLINE, re,
1067 		    NULL)) {
1068 		case 0:
1069 		case ECONNRESET:
1070 			break;
1071 
1072 		default:
1073 			bad_error("restarter_instance_update_states", err);
1074 		}
1075 
1076 		(void) update_fault_count(inst, FAULT_COUNT_RESET);
1077 
1078 		if (inst->ri_i.i_primary_ctid != 0) {
1079 			inst->ri_m_inst =
1080 			    safe_scf_instance_create(local_handle);
1081 			inst->ri_mi_deleted = B_FALSE;
1082 
1083 			libscf_reget_instance(inst);
1084 			method_remove_contract(inst, B_TRUE, B_TRUE);
1085 
1086 			scf_instance_destroy(inst->ri_m_inst);
1087 			inst->ri_m_inst = NULL;
1088 		}
1089 
1090 		switch (err = restarter_instance_update_states(local_handle,
1091 		    inst, inst->ri_i.i_next_state, RESTARTER_STATE_NONE, re,
1092 		    NULL)) {
1093 		case 0:
1094 		case ECONNRESET:
1095 			break;
1096 
1097 		default:
1098 			bad_error("restarter_instance_update_states", err);
1099 		}
1100 
1101 		return (0);
1102 	} else if (instance_is_wait_style(inst) && re == RERR_RESTART) {
1103 		/*
1104 		 * Stopping a wait service through means other than the pid
1105 		 * exiting should keep wait_thread() from restarting the
1106 		 * service, by removing it from the wait list.
1107 		 * We cannot remove it right now otherwise the process will
1108 		 * end up <defunct> so mark it to be ignored.
1109 		 */
1110 		wait_ignore_by_fmri(inst->ri_i.i_fmri);
1111 	}
1112 
1113 	switch (err = restarter_instance_update_states(local_handle, inst,
1114 	    inst->ri_i.i_state, inst->ri_i.i_enabled ? RESTARTER_STATE_OFFLINE :
1115 	    RESTARTER_STATE_DISABLED, RERR_NONE, NULL)) {
1116 	case 0:
1117 	case ECONNRESET:
1118 		break;
1119 
1120 	default:
1121 		bad_error("restarter_instance_update_states", err);
1122 	}
1123 
1124 	info = startd_zalloc(sizeof (fork_info_t));
1125 
1126 	info->sf_id = inst->ri_id;
1127 	info->sf_method_type = METHOD_STOP;
1128 	info->sf_event_type = re;
1129 	inst->ri_method_thread = startd_thread_create(method_thread, info);
1130 
1131 	return (0);
1132 }
1133 
1134 /*
1135  * Returns
1136  *   ENOENT - fmri is not in instance_list
1137  *   0 - success
1138  *   ECONNRESET - success, though handle was rebound
1139  *   -1 - instance is in transition
1140  */
1141 int
1142 stop_instance_fmri(scf_handle_t *h, const char *fmri, uint_t flags)
1143 {
1144 	restarter_inst_t *rip;
1145 	int r;
1146 
1147 	rip = inst_lookup_by_name(fmri);
1148 	if (rip == NULL)
1149 		return (ENOENT);
1150 
1151 	r = stop_instance(h, rip, flags);
1152 
1153 	MUTEX_UNLOCK(&rip->ri_lock);
1154 
1155 	return (r);
1156 }
1157 
1158 static void
1159 unmaintain_instance(scf_handle_t *h, restarter_inst_t *rip,
1160     unmaint_cause_t cause)
1161 {
1162 	ctid_t ctid;
1163 	scf_instance_t *inst;
1164 	int r;
1165 	uint_t tries = 0, msecs = ALLOC_DELAY;
1166 	const char *cp;
1167 
1168 	assert(PTHREAD_MUTEX_HELD(&rip->ri_lock));
1169 
1170 	if (rip->ri_i.i_state != RESTARTER_STATE_MAINT) {
1171 		log_error(LOG_DEBUG, "Restarter: "
1172 		    "Ignoring maintenance off command because %s is not in the "
1173 		    "maintenance state.\n", rip->ri_i.i_fmri);
1174 		return;
1175 	}
1176 
1177 	switch (cause) {
1178 	case RUNMAINT_CLEAR:
1179 		cp = "clear requested";
1180 		break;
1181 	case RUNMAINT_DISABLE:
1182 		cp = "disable requested";
1183 		break;
1184 	default:
1185 #ifndef NDEBUG
1186 		(void) fprintf(stderr, "Uncaught case for %d at %s:%d.\n",
1187 		    cause, __FILE__, __LINE__);
1188 #endif
1189 		abort();
1190 	}
1191 
1192 	log_instance(rip, B_TRUE, "Leaving maintenance because %s.",
1193 	    cp);
1194 	log_framework(LOG_DEBUG, "%s: Instance leaving maintenance because "
1195 	    "%s.\n", rip->ri_i.i_fmri, cp);
1196 
1197 	(void) restarter_instance_update_states(h, rip, RESTARTER_STATE_UNINIT,
1198 	    RESTARTER_STATE_NONE, RERR_RESTART, "none");
1199 
1200 	/*
1201 	 * If we did ADMIN_MAINT_ON_IMMEDIATE, then there might still be
1202 	 * a primary contract.
1203 	 */
1204 	if (rip->ri_i.i_primary_ctid == 0)
1205 		return;
1206 
1207 	ctid = rip->ri_i.i_primary_ctid;
1208 	contract_abandon(ctid);
1209 	rip->ri_i.i_primary_ctid = 0;
1210 
1211 rep_retry:
1212 	switch (r = libscf_fmri_get_instance(h, rip->ri_i.i_fmri, &inst)) {
1213 	case 0:
1214 		break;
1215 
1216 	case ECONNABORTED:
1217 		libscf_handle_rebind(h);
1218 		goto rep_retry;
1219 
1220 	case ENOENT:
1221 		/* Must have been deleted. */
1222 		return;
1223 
1224 	case EINVAL:
1225 	case ENOTSUP:
1226 	default:
1227 		bad_error("libscf_handle_rebind", r);
1228 	}
1229 
1230 again:
1231 	r = restarter_remove_contract(inst, ctid, RESTARTER_CONTRACT_PRIMARY);
1232 	switch (r) {
1233 	case 0:
1234 		break;
1235 
1236 	case ENOMEM:
1237 		++tries;
1238 		if (tries < ALLOC_RETRY) {
1239 			(void) poll(NULL, 0, msecs);
1240 			msecs *= ALLOC_DELAY_MULT;
1241 			goto again;
1242 		}
1243 
1244 		uu_die("Insufficient memory.\n");
1245 		/* NOTREACHED */
1246 
1247 	case ECONNABORTED:
1248 		scf_instance_destroy(inst);
1249 		libscf_handle_rebind(h);
1250 		goto rep_retry;
1251 
1252 	case ECANCELED:
1253 		break;
1254 
1255 	case EPERM:
1256 	case EACCES:
1257 	case EROFS:
1258 		log_error(LOG_INFO,
1259 		    "Could not remove contract id %lu for %s (%s).\n", ctid,
1260 		    rip->ri_i.i_fmri, strerror(r));
1261 		break;
1262 
1263 	case EINVAL:
1264 	case EBADF:
1265 	default:
1266 		bad_error("restarter_remove_contract", r);
1267 	}
1268 
1269 	scf_instance_destroy(inst);
1270 }
1271 
1272 /*
1273  * enable_inst()
1274  *   Set inst->ri_i.i_enabled.  Expects 'e' to be _ENABLE, _DISABLE, or
1275  *   _ADMIN_DISABLE.  If the event is _ENABLE and inst is uninitialized or
1276  *   disabled, move it to offline.  If the event is _DISABLE or
1277  *   _ADMIN_DISABLE, make sure inst will move to disabled.
1278  *
1279  *   Returns
1280  *     0 - success
1281  *     ECONNRESET - h was rebound
1282  */
1283 static int
1284 enable_inst(scf_handle_t *h, restarter_inst_t *inst, restarter_event_type_t e)
1285 {
1286 	restarter_instance_state_t state;
1287 	int r;
1288 
1289 	assert(PTHREAD_MUTEX_HELD(&inst->ri_lock));
1290 	assert(e == RESTARTER_EVENT_TYPE_ADMIN_DISABLE ||
1291 	    e == RESTARTER_EVENT_TYPE_DISABLE ||
1292 	    e == RESTARTER_EVENT_TYPE_ENABLE);
1293 	assert(instance_in_transition(inst) == 0);
1294 
1295 	state = inst->ri_i.i_state;
1296 
1297 	if (e == RESTARTER_EVENT_TYPE_ENABLE) {
1298 		inst->ri_i.i_enabled = 1;
1299 
1300 		if (state == RESTARTER_STATE_UNINIT ||
1301 		    state == RESTARTER_STATE_DISABLED) {
1302 			/*
1303 			 * B_FALSE: Don't log an error if the log_instance()
1304 			 * fails because it will fail on the miniroot before
1305 			 * install-discovery runs.
1306 			 */
1307 			log_instance(inst, B_FALSE, "Enabled.");
1308 			log_framework(LOG_DEBUG, "%s: Instance enabled.\n",
1309 			    inst->ri_i.i_fmri);
1310 			(void) restarter_instance_update_states(h, inst,
1311 			    RESTARTER_STATE_OFFLINE, RESTARTER_STATE_NONE,
1312 			    RERR_NONE, NULL);
1313 		} else {
1314 			log_framework(LOG_DEBUG, "Restarter: "
1315 			    "Not changing state of %s for enable command.\n",
1316 			    inst->ri_i.i_fmri);
1317 		}
1318 	} else {
1319 		inst->ri_i.i_enabled = 0;
1320 
1321 		switch (state) {
1322 		case RESTARTER_STATE_ONLINE:
1323 		case RESTARTER_STATE_DEGRADED:
1324 			r = stop_instance(h, inst, RSTOP_DISABLE);
1325 			return (r == ECONNRESET ? 0 : r);
1326 
1327 		case RESTARTER_STATE_OFFLINE:
1328 		case RESTARTER_STATE_UNINIT:
1329 			if (inst->ri_i.i_primary_ctid != 0) {
1330 				inst->ri_m_inst = safe_scf_instance_create(h);
1331 				inst->ri_mi_deleted = B_FALSE;
1332 
1333 				libscf_reget_instance(inst);
1334 				method_remove_contract(inst, B_TRUE, B_TRUE);
1335 
1336 				scf_instance_destroy(inst->ri_m_inst);
1337 			}
1338 			/* B_FALSE: See log_instance(..., "Enabled."); above */
1339 			log_instance(inst, B_FALSE, "Disabled.");
1340 			log_framework(LOG_DEBUG, "%s: Instance disabled.\n",
1341 			    inst->ri_i.i_fmri);
1342 			(void) restarter_instance_update_states(h, inst,
1343 			    RESTARTER_STATE_DISABLED, RESTARTER_STATE_NONE,
1344 			    RERR_RESTART, NULL);
1345 			return (0);
1346 
1347 		case RESTARTER_STATE_DISABLED:
1348 			break;
1349 
1350 		case RESTARTER_STATE_MAINT:
1351 			/*
1352 			 * We only want to pull the instance out of maintenance
1353 			 * if the disable is on adminstrative request.  The
1354 			 * graph engine sends _DISABLE events whenever a
1355 			 * service isn't in the disabled state, and we don't
1356 			 * want to pull the service out of maintenance if,
1357 			 * for example, it is there due to a dependency cycle.
1358 			 */
1359 			if (e == RESTARTER_EVENT_TYPE_ADMIN_DISABLE)
1360 				unmaintain_instance(h, inst, RUNMAINT_DISABLE);
1361 			break;
1362 
1363 		default:
1364 #ifndef NDEBUG
1365 			(void) fprintf(stderr, "Restarter instance %s has "
1366 			    "unknown state %d.\n", inst->ri_i.i_fmri, state);
1367 #endif
1368 			abort();
1369 		}
1370 	}
1371 
1372 	return (0);
1373 }
1374 
1375 static void
1376 start_instance(scf_handle_t *local_handle, restarter_inst_t *inst)
1377 {
1378 	fork_info_t *info;
1379 
1380 	assert(PTHREAD_MUTEX_HELD(&inst->ri_lock));
1381 	assert(instance_in_transition(inst) == 0);
1382 	assert(inst->ri_method_thread == 0);
1383 
1384 	log_framework(LOG_DEBUG, "%s: trying to start instance\n",
1385 	    inst->ri_i.i_fmri);
1386 
1387 	/* Services in the disabled and maintenance state are ignored */
1388 	if (inst->ri_i.i_state == RESTARTER_STATE_MAINT ||
1389 	    inst->ri_i.i_state == RESTARTER_STATE_DISABLED ||
1390 	    inst->ri_i.i_enabled == 0) {
1391 		log_framework(LOG_DEBUG,
1392 		    "%s: start_instance -> is maint/disabled\n",
1393 		    inst->ri_i.i_fmri);
1394 		return;
1395 	}
1396 
1397 	/* Already started instances are left alone */
1398 	if (instance_started(inst) == 1) {
1399 		log_framework(LOG_DEBUG,
1400 		    "%s: start_instance -> is already started\n",
1401 		    inst->ri_i.i_fmri);
1402 		return;
1403 	}
1404 
1405 	log_framework(LOG_DEBUG, "%s: starting instance.\n", inst->ri_i.i_fmri);
1406 
1407 	(void) restarter_instance_update_states(local_handle, inst,
1408 	    inst->ri_i.i_state, RESTARTER_STATE_ONLINE, RERR_NONE, "none");
1409 
1410 	info = startd_zalloc(sizeof (fork_info_t));
1411 
1412 	info->sf_id = inst->ri_id;
1413 	info->sf_method_type = METHOD_START;
1414 	info->sf_event_type = RERR_NONE;
1415 	inst->ri_method_thread = startd_thread_create(method_thread, info);
1416 }
1417 
1418 static int
1419 event_from_tty(scf_handle_t *h, restarter_inst_t *rip)
1420 {
1421 	scf_instance_t *inst;
1422 	int ret = 0;
1423 
1424 	if (libscf_fmri_get_instance(h, rip->ri_i.i_fmri, &inst))
1425 		return (-1);
1426 
1427 	ret = restarter_inst_ractions_from_tty(inst);
1428 
1429 	scf_instance_destroy(inst);
1430 	return (ret);
1431 }
1432 
1433 static void
1434 maintain_instance(scf_handle_t *h, restarter_inst_t *rip, int immediate,
1435     const char *aux)
1436 {
1437 	fork_info_t *info;
1438 	scf_instance_t *scf_inst = NULL;
1439 
1440 	assert(PTHREAD_MUTEX_HELD(&rip->ri_lock));
1441 	assert(aux != NULL);
1442 	assert(rip->ri_method_thread == 0);
1443 
1444 	log_instance(rip, B_TRUE, "Stopping for maintenance due to %s.", aux);
1445 	log_framework(LOG_DEBUG, "%s: stopping for maintenance due to %s.\n",
1446 	    rip->ri_i.i_fmri, aux);
1447 
1448 	/* Services in the maintenance state are ignored */
1449 	if (rip->ri_i.i_state == RESTARTER_STATE_MAINT) {
1450 		log_framework(LOG_DEBUG,
1451 		    "%s: maintain_instance -> is already in maintenance\n",
1452 		    rip->ri_i.i_fmri);
1453 		return;
1454 	}
1455 
1456 	/*
1457 	 * If aux state is "service_request" and
1458 	 * restarter_actions/auxiliary_fmri property is set with a valid fmri,
1459 	 * copy the fmri to restarter/auxiliary_fmri so svcs -x can use.
1460 	 */
1461 	if (strcmp(aux, "service_request") == 0 && libscf_fmri_get_instance(h,
1462 	    rip->ri_i.i_fmri, &scf_inst) == 0) {
1463 		if (restarter_inst_validate_ractions_aux_fmri(scf_inst) == 0) {
1464 			if (restarter_inst_set_aux_fmri(scf_inst))
1465 				log_framework(LOG_DEBUG, "%s: "
1466 				    "restarter_inst_set_aux_fmri failed: ",
1467 				    rip->ri_i.i_fmri);
1468 		} else {
1469 			log_framework(LOG_DEBUG, "%s: "
1470 			    "restarter_inst_validate_ractions_aux_fmri "
1471 			    "failed: ", rip->ri_i.i_fmri);
1472 
1473 			if (restarter_inst_reset_aux_fmri(scf_inst))
1474 				log_framework(LOG_DEBUG, "%s: "
1475 				    "restarter_inst_reset_aux_fmri failed: ",
1476 				    rip->ri_i.i_fmri);
1477 		}
1478 		scf_instance_destroy(scf_inst);
1479 	}
1480 
1481 	if (immediate || !instance_started(rip)) {
1482 		if (rip->ri_i.i_primary_ctid != 0) {
1483 			rip->ri_m_inst = safe_scf_instance_create(h);
1484 			rip->ri_mi_deleted = B_FALSE;
1485 
1486 			libscf_reget_instance(rip);
1487 			method_remove_contract(rip, B_TRUE, B_TRUE);
1488 
1489 			scf_instance_destroy(rip->ri_m_inst);
1490 		}
1491 
1492 		(void) restarter_instance_update_states(h, rip,
1493 		    RESTARTER_STATE_MAINT, RESTARTER_STATE_NONE, RERR_RESTART,
1494 		    (char *)aux);
1495 		return;
1496 	}
1497 
1498 	(void) restarter_instance_update_states(h, rip, rip->ri_i.i_state,
1499 	    RESTARTER_STATE_MAINT, RERR_NONE, (char *)aux);
1500 
1501 	log_transition(rip, MAINT_REQUESTED);
1502 
1503 	info = startd_zalloc(sizeof (*info));
1504 	info->sf_id = rip->ri_id;
1505 	info->sf_method_type = METHOD_STOP;
1506 	info->sf_event_type = RERR_RESTART;
1507 	rip->ri_method_thread = startd_thread_create(method_thread, info);
1508 }
1509 
1510 static void
1511 refresh_instance(scf_handle_t *h, restarter_inst_t *rip)
1512 {
1513 	scf_instance_t *inst;
1514 	scf_snapshot_t *snap;
1515 	fork_info_t *info;
1516 	int r;
1517 
1518 	assert(PTHREAD_MUTEX_HELD(&rip->ri_lock));
1519 
1520 	log_instance(rip, B_TRUE, "Rereading configuration.");
1521 	log_framework(LOG_DEBUG, "%s: rereading configuration.\n",
1522 	    rip->ri_i.i_fmri);
1523 
1524 rep_retry:
1525 	r = libscf_fmri_get_instance(h, rip->ri_i.i_fmri, &inst);
1526 	switch (r) {
1527 	case 0:
1528 		break;
1529 
1530 	case ECONNABORTED:
1531 		libscf_handle_rebind(h);
1532 		goto rep_retry;
1533 
1534 	case ENOENT:
1535 		/* Must have been deleted. */
1536 		return;
1537 
1538 	case EINVAL:
1539 	case ENOTSUP:
1540 	default:
1541 		bad_error("libscf_fmri_get_instance", r);
1542 	}
1543 
1544 	snap = libscf_get_running_snapshot(inst);
1545 
1546 	r = libscf_get_startd_properties(inst, snap, &rip->ri_flags,
1547 	    &rip->ri_utmpx_prefix);
1548 	switch (r) {
1549 	case 0:
1550 		log_framework(LOG_DEBUG, "%s is a %s-style service\n",
1551 		    rip->ri_i.i_fmri, service_style(rip->ri_flags));
1552 		break;
1553 
1554 	case ECONNABORTED:
1555 		scf_instance_destroy(inst);
1556 		scf_snapshot_destroy(snap);
1557 		libscf_handle_rebind(h);
1558 		goto rep_retry;
1559 
1560 	case ECANCELED:
1561 	case ENOENT:
1562 		/* Succeed in anticipation of REMOVE_INSTANCE. */
1563 		break;
1564 
1565 	default:
1566 		bad_error("libscf_get_startd_properties", r);
1567 	}
1568 
1569 	if (instance_started(rip)) {
1570 		/* Refresh does not change the state. */
1571 		(void) restarter_instance_update_states(h, rip,
1572 		    rip->ri_i.i_state, rip->ri_i.i_state, RERR_NONE, NULL);
1573 
1574 		info = startd_zalloc(sizeof (*info));
1575 		info->sf_id = rip->ri_id;
1576 		info->sf_method_type = METHOD_REFRESH;
1577 		info->sf_event_type = RERR_REFRESH;
1578 
1579 		assert(rip->ri_method_thread == 0);
1580 		rip->ri_method_thread =
1581 		    startd_thread_create(method_thread, info);
1582 	}
1583 
1584 	scf_snapshot_destroy(snap);
1585 	scf_instance_destroy(inst);
1586 }
1587 
1588 const char *event_names[] = { "INVALID", "ADD_INSTANCE", "REMOVE_INSTANCE",
1589 	"ENABLE", "DISABLE", "ADMIN_DEGRADED", "ADMIN_REFRESH",
1590 	"ADMIN_RESTART", "ADMIN_MAINT_OFF", "ADMIN_MAINT_ON",
1591 	"ADMIN_MAINT_ON_IMMEDIATE", "STOP", "START", "DEPENDENCY_CYCLE",
1592 	"INVALID_DEPENDENCY", "ADMIN_DISABLE"
1593 };
1594 
1595 /*
1596  * void *restarter_process_events()
1597  *
1598  *   Called in a separate thread to process the events on an instance's
1599  *   queue.  Empties the queue completely, and tries to keep the thread
1600  *   around for a little while after the queue is empty to save on
1601  *   startup costs.
1602  */
1603 static void *
1604 restarter_process_events(void *arg)
1605 {
1606 	scf_handle_t *h;
1607 	restarter_instance_qentry_t *event;
1608 	restarter_inst_t *rip;
1609 	char *fmri = (char *)arg;
1610 	struct timespec to;
1611 
1612 	assert(fmri != NULL);
1613 
1614 	h = libscf_handle_create_bound_loop();
1615 
1616 	/* grab the queue lock */
1617 	rip = inst_lookup_queue(fmri);
1618 	if (rip == NULL)
1619 		goto out;
1620 
1621 again:
1622 
1623 	while ((event = uu_list_first(rip->ri_queue)) != NULL) {
1624 		restarter_inst_t *inst;
1625 
1626 		/* drop the queue lock */
1627 		MUTEX_UNLOCK(&rip->ri_queue_lock);
1628 
1629 		/*
1630 		 * Grab the inst lock -- this waits until any outstanding
1631 		 * method finishes running.
1632 		 */
1633 		inst = inst_lookup_by_name(fmri);
1634 		if (inst == NULL) {
1635 			/* Getting deleted in the middle isn't an error. */
1636 			goto cont;
1637 		}
1638 
1639 		assert(instance_in_transition(inst) == 0);
1640 
1641 		/* process the event */
1642 		switch (event->riq_type) {
1643 		case RESTARTER_EVENT_TYPE_ENABLE:
1644 		case RESTARTER_EVENT_TYPE_DISABLE:
1645 		case RESTARTER_EVENT_TYPE_ADMIN_DISABLE:
1646 			(void) enable_inst(h, inst, event->riq_type);
1647 			break;
1648 
1649 		case RESTARTER_EVENT_TYPE_REMOVE_INSTANCE:
1650 			restarter_delete_inst(inst);
1651 			inst = NULL;
1652 			goto cont;
1653 
1654 		case RESTARTER_EVENT_TYPE_STOP:
1655 			(void) stop_instance(h, inst, RSTOP_DEPENDENCY);
1656 			break;
1657 
1658 		case RESTARTER_EVENT_TYPE_START:
1659 			start_instance(h, inst);
1660 			break;
1661 
1662 		case RESTARTER_EVENT_TYPE_DEPENDENCY_CYCLE:
1663 			maintain_instance(h, inst, 0, "dependency_cycle");
1664 			break;
1665 
1666 		case RESTARTER_EVENT_TYPE_INVALID_DEPENDENCY:
1667 			maintain_instance(h, inst, 0, "invalid_dependency");
1668 			break;
1669 
1670 		case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON:
1671 			if (event_from_tty(h, inst) == 0)
1672 				maintain_instance(h, inst, 0,
1673 				    "service_request");
1674 			else
1675 				maintain_instance(h, inst, 0,
1676 				    "administrative_request");
1677 			break;
1678 
1679 		case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON_IMMEDIATE:
1680 			if (event_from_tty(h, inst) == 0)
1681 				maintain_instance(h, inst, 1,
1682 				    "service_request");
1683 			else
1684 				maintain_instance(h, inst, 1,
1685 				    "administrative_request");
1686 			break;
1687 
1688 		case RESTARTER_EVENT_TYPE_ADMIN_MAINT_OFF:
1689 			unmaintain_instance(h, inst, RUNMAINT_CLEAR);
1690 			break;
1691 
1692 		case RESTARTER_EVENT_TYPE_ADMIN_REFRESH:
1693 			refresh_instance(h, inst);
1694 			break;
1695 
1696 		case RESTARTER_EVENT_TYPE_ADMIN_DEGRADED:
1697 			log_framework(LOG_WARNING, "Restarter: "
1698 			    "%s command (for %s) unimplemented.\n",
1699 			    event_names[event->riq_type], inst->ri_i.i_fmri);
1700 			break;
1701 
1702 		case RESTARTER_EVENT_TYPE_ADMIN_RESTART:
1703 			if (!instance_started(inst)) {
1704 				log_framework(LOG_DEBUG, "Restarter: "
1705 				    "Not restarting %s; not running.\n",
1706 				    inst->ri_i.i_fmri);
1707 			} else {
1708 				/*
1709 				 * Stop the instance.  If it can be restarted,
1710 				 * the graph engine will send a new event.
1711 				 */
1712 				(void) stop_instance(h, inst, RSTOP_RESTART);
1713 			}
1714 			break;
1715 
1716 		case RESTARTER_EVENT_TYPE_ADD_INSTANCE:
1717 		default:
1718 #ifndef NDEBUG
1719 			uu_warn("%s:%d: Bad restarter event %d.  "
1720 			    "Aborting.\n", __FILE__, __LINE__, event->riq_type);
1721 #endif
1722 			abort();
1723 		}
1724 
1725 		assert(inst != NULL);
1726 		MUTEX_UNLOCK(&inst->ri_lock);
1727 
1728 cont:
1729 		/* grab the queue lock */
1730 		rip = inst_lookup_queue(fmri);
1731 		if (rip == NULL)
1732 			goto out;
1733 
1734 		/* delete the event */
1735 		uu_list_remove(rip->ri_queue, event);
1736 		startd_free(event, sizeof (restarter_instance_qentry_t));
1737 	}
1738 
1739 	assert(rip != NULL);
1740 
1741 	/*
1742 	 * Try to preserve the thread for a little while for future use.
1743 	 */
1744 	to.tv_sec = 3;
1745 	to.tv_nsec = 0;
1746 	(void) pthread_cond_reltimedwait_np(&rip->ri_queue_cv,
1747 	    &rip->ri_queue_lock, &to);
1748 
1749 	if (uu_list_first(rip->ri_queue) != NULL)
1750 		goto again;
1751 
1752 	rip->ri_queue_thread = 0;
1753 	MUTEX_UNLOCK(&rip->ri_queue_lock);
1754 out:
1755 	(void) scf_handle_unbind(h);
1756 	scf_handle_destroy(h);
1757 	free(fmri);
1758 	return (NULL);
1759 }
1760 
1761 static int
1762 is_admin_event(restarter_event_type_t t) {
1763 
1764 	switch (t) {
1765 	case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON:
1766 	case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON_IMMEDIATE:
1767 	case RESTARTER_EVENT_TYPE_ADMIN_MAINT_OFF:
1768 	case RESTARTER_EVENT_TYPE_ADMIN_REFRESH:
1769 	case RESTARTER_EVENT_TYPE_ADMIN_DEGRADED:
1770 	case RESTARTER_EVENT_TYPE_ADMIN_RESTART:
1771 		return (1);
1772 	default:
1773 		return (0);
1774 	}
1775 }
1776 
1777 static void
1778 restarter_queue_event(restarter_inst_t *ri, restarter_protocol_event_t *e)
1779 {
1780 	restarter_instance_qentry_t *qe;
1781 	int r;
1782 
1783 	assert(PTHREAD_MUTEX_HELD(&ri->ri_queue_lock));
1784 	assert(!PTHREAD_MUTEX_HELD(&ri->ri_lock));
1785 
1786 	qe = startd_zalloc(sizeof (restarter_instance_qentry_t));
1787 	qe->riq_type = e->rpe_type;
1788 
1789 	uu_list_node_init(qe, &qe->riq_link, restarter_queue_pool);
1790 	r = uu_list_insert_before(ri->ri_queue, NULL, qe);
1791 	assert(r == 0);
1792 }
1793 
1794 /*
1795  * void *restarter_event_thread()
1796  *
1797  *  Handle incoming graph events by placing them on a per-instance
1798  *  queue.  We can't lock the main part of the instance structure, so
1799  *  just modify the seprarately locked event queue portion.
1800  */
1801 /*ARGSUSED*/
1802 static void *
1803 restarter_event_thread(void *unused)
1804 {
1805 	scf_handle_t *h;
1806 
1807 	/*
1808 	 * This is a new thread, and thus, gets its own handle
1809 	 * to the repository.
1810 	 */
1811 	h = libscf_handle_create_bound_loop();
1812 
1813 	MUTEX_LOCK(&ru->restarter_update_lock);
1814 
1815 	/*CONSTCOND*/
1816 	while (1) {
1817 		restarter_protocol_event_t *e;
1818 
1819 		while (ru->restarter_update_wakeup == 0)
1820 			(void) pthread_cond_wait(&ru->restarter_update_cv,
1821 			    &ru->restarter_update_lock);
1822 
1823 		ru->restarter_update_wakeup = 0;
1824 
1825 		while ((e = restarter_event_dequeue()) != NULL) {
1826 			restarter_inst_t *rip;
1827 			char *fmri;
1828 
1829 			MUTEX_UNLOCK(&ru->restarter_update_lock);
1830 
1831 			/*
1832 			 * ADD_INSTANCE is special: there's likely no
1833 			 * instance structure yet, so we need to handle the
1834 			 * addition synchronously.
1835 			 */
1836 			switch (e->rpe_type) {
1837 			case RESTARTER_EVENT_TYPE_ADD_INSTANCE:
1838 				if (restarter_insert_inst(h, e->rpe_inst) != 0)
1839 					log_error(LOG_INFO, "Restarter: "
1840 					    "Could not add %s.\n", e->rpe_inst);
1841 
1842 				MUTEX_LOCK(&st->st_load_lock);
1843 				if (--st->st_load_instances == 0)
1844 					(void) pthread_cond_broadcast(
1845 					    &st->st_load_cv);
1846 				MUTEX_UNLOCK(&st->st_load_lock);
1847 
1848 				goto nolookup;
1849 			}
1850 
1851 			/*
1852 			 * Lookup the instance, locking only the event queue.
1853 			 * Can't grab ri_lock here because it might be held
1854 			 * by a long-running method.
1855 			 */
1856 			rip = inst_lookup_queue(e->rpe_inst);
1857 			if (rip == NULL) {
1858 				log_error(LOG_INFO, "Restarter: "
1859 				    "Ignoring %s command for unknown service "
1860 				    "%s.\n", event_names[e->rpe_type],
1861 				    e->rpe_inst);
1862 				goto nolookup;
1863 			}
1864 
1865 			/* Keep ADMIN events from filling up the queue. */
1866 			if (is_admin_event(e->rpe_type) &&
1867 			    uu_list_numnodes(rip->ri_queue) >
1868 			    RINST_QUEUE_THRESHOLD) {
1869 				MUTEX_UNLOCK(&rip->ri_queue_lock);
1870 				log_instance(rip, B_TRUE, "Instance event "
1871 				    "queue overflow.  Dropping administrative "
1872 				    "request.");
1873 				log_framework(LOG_DEBUG, "%s: Instance event "
1874 				    "queue overflow.  Dropping administrative "
1875 				    "request.\n", rip->ri_i.i_fmri);
1876 				goto nolookup;
1877 			}
1878 
1879 			/* Now add the event to the instance queue. */
1880 			restarter_queue_event(rip, e);
1881 
1882 			if (rip->ri_queue_thread == 0) {
1883 				/*
1884 				 * Start a thread if one isn't already
1885 				 * running.
1886 				 */
1887 				fmri = safe_strdup(e->rpe_inst);
1888 				rip->ri_queue_thread =  startd_thread_create(
1889 				    restarter_process_events, (void *)fmri);
1890 			} else {
1891 				/*
1892 				 * Signal the existing thread that there's
1893 				 * a new event.
1894 				 */
1895 				(void) pthread_cond_broadcast(
1896 				    &rip->ri_queue_cv);
1897 			}
1898 
1899 			MUTEX_UNLOCK(&rip->ri_queue_lock);
1900 nolookup:
1901 			restarter_event_release(e);
1902 
1903 			MUTEX_LOCK(&ru->restarter_update_lock);
1904 		}
1905 	}
1906 
1907 	/*
1908 	 * Unreachable for now -- there's currently no graceful cleanup
1909 	 * called on exit().
1910 	 */
1911 	(void) scf_handle_unbind(h);
1912 	scf_handle_destroy(h);
1913 	return (NULL);
1914 }
1915 
1916 static restarter_inst_t *
1917 contract_to_inst(ctid_t ctid)
1918 {
1919 	restarter_inst_t *inst;
1920 	int id;
1921 
1922 	id = lookup_inst_by_contract(ctid);
1923 	if (id == -1)
1924 		return (NULL);
1925 
1926 	inst = inst_lookup_by_id(id);
1927 	if (inst != NULL) {
1928 		/*
1929 		 * Since ri_lock isn't held by the contract id lookup, this
1930 		 * instance may have been restarted and now be in a new
1931 		 * contract, making the old contract no longer valid for this
1932 		 * instance.
1933 		 */
1934 		if (ctid != inst->ri_i.i_primary_ctid) {
1935 			MUTEX_UNLOCK(&inst->ri_lock);
1936 			inst = NULL;
1937 		}
1938 	}
1939 	return (inst);
1940 }
1941 
1942 /*
1943  * void contract_action()
1944  *   Take action on contract events.
1945  */
1946 static void
1947 contract_action(scf_handle_t *h, restarter_inst_t *inst, ctid_t id,
1948     uint32_t type)
1949 {
1950 	const char *fmri = inst->ri_i.i_fmri;
1951 
1952 	assert(PTHREAD_MUTEX_HELD(&inst->ri_lock));
1953 
1954 	/*
1955 	 * If startd has stopped this contract, there is no need to
1956 	 * stop it again.
1957 	 */
1958 	if (inst->ri_i.i_primary_ctid > 0 &&
1959 	    inst->ri_i.i_primary_ctid_stopped)
1960 		return;
1961 
1962 	if ((type & (CT_PR_EV_EMPTY | CT_PR_EV_CORE | CT_PR_EV_SIGNAL
1963 	    | CT_PR_EV_HWERR)) == 0) {
1964 		/*
1965 		 * There shouldn't be other events, since that's not how we set
1966 		 * the terms. Thus, just log an error and drive on.
1967 		 */
1968 		log_framework(LOG_NOTICE,
1969 		    "%s: contract %ld received unexpected critical event "
1970 		    "(%d)\n", fmri, id, type);
1971 		return;
1972 	}
1973 
1974 	assert(instance_in_transition(inst) == 0);
1975 
1976 	if (instance_is_wait_style(inst)) {
1977 		/*
1978 		 * We ignore all events; if they impact the
1979 		 * process we're monitoring, then the
1980 		 * wait_thread will stop the instance.
1981 		 */
1982 		log_framework(LOG_DEBUG,
1983 		    "%s: ignoring contract event on wait-style service\n",
1984 		    fmri);
1985 	} else {
1986 		/*
1987 		 * A CT_PR_EV_EMPTY event is an RSTOP_EXIT request.
1988 		 */
1989 		switch (type) {
1990 		case CT_PR_EV_EMPTY:
1991 			(void) stop_instance(h, inst, RSTOP_EXIT);
1992 			break;
1993 		case CT_PR_EV_CORE:
1994 			(void) stop_instance(h, inst, RSTOP_CORE);
1995 			break;
1996 		case CT_PR_EV_SIGNAL:
1997 			(void) stop_instance(h, inst, RSTOP_SIGNAL);
1998 			break;
1999 		case CT_PR_EV_HWERR:
2000 			(void) stop_instance(h, inst, RSTOP_HWERR);
2001 			break;
2002 		}
2003 	}
2004 }
2005 
2006 /*
2007  * void *restarter_contract_event_thread(void *)
2008  *   Listens to the process contract bundle for critical events, taking action
2009  *   on events from contracts we know we are responsible for.
2010  */
2011 /*ARGSUSED*/
2012 static void *
2013 restarter_contracts_event_thread(void *unused)
2014 {
2015 	int fd, err;
2016 	scf_handle_t *local_handle;
2017 
2018 	/*
2019 	 * Await graph load completion.  That is, stop here, until we've scanned
2020 	 * the repository for contract - instance associations.
2021 	 */
2022 	MUTEX_LOCK(&st->st_load_lock);
2023 	while (!(st->st_load_complete && st->st_load_instances == 0))
2024 		(void) pthread_cond_wait(&st->st_load_cv, &st->st_load_lock);
2025 	MUTEX_UNLOCK(&st->st_load_lock);
2026 
2027 	/*
2028 	 * This is a new thread, and thus, gets its own handle
2029 	 * to the repository.
2030 	 */
2031 	if ((local_handle = libscf_handle_create_bound(SCF_VERSION)) == NULL)
2032 		uu_die("Unable to bind a new repository handle: %s\n",
2033 		    scf_strerror(scf_error()));
2034 
2035 	fd = open64(CTFS_ROOT "/process/pbundle", O_RDONLY);
2036 	if (fd == -1)
2037 		uu_die("process bundle open failed");
2038 
2039 	/*
2040 	 * Make sure we get all events (including those generated by configd
2041 	 * before this thread was started).
2042 	 */
2043 	err = ct_event_reset(fd);
2044 	assert(err == 0);
2045 
2046 	for (;;) {
2047 		int efd, sfd;
2048 		ct_evthdl_t ev;
2049 		uint32_t type;
2050 		ctevid_t evid;
2051 		ct_stathdl_t status;
2052 		ctid_t ctid;
2053 		restarter_inst_t *inst;
2054 		uint64_t cookie;
2055 
2056 		if (err = ct_event_read_critical(fd, &ev)) {
2057 			log_error(LOG_WARNING,
2058 			    "Error reading next contract event: %s",
2059 			    strerror(err));
2060 			continue;
2061 		}
2062 
2063 		evid = ct_event_get_evid(ev);
2064 		ctid = ct_event_get_ctid(ev);
2065 		type = ct_event_get_type(ev);
2066 
2067 		/* Fetch cookie. */
2068 		if ((sfd = contract_open(ctid, "process", "status", O_RDONLY))
2069 		    < 0) {
2070 			ct_event_free(ev);
2071 			continue;
2072 		}
2073 
2074 		if (err = ct_status_read(sfd, CTD_COMMON, &status)) {
2075 			log_framework(LOG_WARNING, "Could not get status for "
2076 			    "contract %ld: %s\n", ctid, strerror(err));
2077 
2078 			startd_close(sfd);
2079 			ct_event_free(ev);
2080 			continue;
2081 		}
2082 
2083 		cookie = ct_status_get_cookie(status);
2084 
2085 		log_framework(LOG_DEBUG, "Received event %d for ctid %ld "
2086 		    "cookie %lld\n", type, ctid, cookie);
2087 
2088 		ct_status_free(status);
2089 
2090 		startd_close(sfd);
2091 
2092 		/*
2093 		 * svc.configd(1M) restart handling performed by the
2094 		 * fork_configd_thread.  We don't acknowledge, as that thread
2095 		 * will do so.
2096 		 */
2097 		if (cookie == CONFIGD_COOKIE) {
2098 			ct_event_free(ev);
2099 			continue;
2100 		}
2101 
2102 		inst = NULL;
2103 		if (storing_contract != 0 &&
2104 		    (inst = contract_to_inst(ctid)) == NULL) {
2105 			/*
2106 			 * This can happen for two reasons:
2107 			 * - method_run() has not yet stored the
2108 			 *    the contract into the internal hash table.
2109 			 * - we receive an EMPTY event for an abandoned
2110 			 *    contract.
2111 			 * If there is any contract in the process of
2112 			 * being stored into the hash table then re-read
2113 			 * the event later.
2114 			 */
2115 			log_framework(LOG_DEBUG,
2116 			    "Reset event %d for unknown "
2117 			    "contract id %ld\n", type, ctid);
2118 
2119 			/* don't go too fast */
2120 			(void) poll(NULL, 0, 100);
2121 
2122 			(void) ct_event_reset(fd);
2123 			ct_event_free(ev);
2124 			continue;
2125 		}
2126 
2127 		/*
2128 		 * Do not call contract_to_inst() again if first
2129 		 * call succeeded.
2130 		 */
2131 		if (inst == NULL)
2132 			inst = contract_to_inst(ctid);
2133 		if (inst == NULL) {
2134 			/*
2135 			 * This can happen if we receive an EMPTY
2136 			 * event for an abandoned contract.
2137 			 */
2138 			log_framework(LOG_DEBUG,
2139 			    "Received event %d for unknown contract id "
2140 			    "%ld\n", type, ctid);
2141 		} else {
2142 			log_framework(LOG_DEBUG,
2143 			    "Received event %d for contract id "
2144 			    "%ld (%s)\n", type, ctid,
2145 			    inst->ri_i.i_fmri);
2146 
2147 			contract_action(local_handle, inst, ctid, type);
2148 
2149 			MUTEX_UNLOCK(&inst->ri_lock);
2150 		}
2151 
2152 		efd = contract_open(ct_event_get_ctid(ev), "process", "ctl",
2153 		    O_WRONLY);
2154 		if (efd != -1) {
2155 			(void) ct_ctl_ack(efd, evid);
2156 			startd_close(efd);
2157 		}
2158 
2159 		ct_event_free(ev);
2160 
2161 	}
2162 
2163 	/*NOTREACHED*/
2164 	return (NULL);
2165 }
2166 
2167 /*
2168  * Timeout queue, processed by restarter_timeouts_event_thread().
2169  */
2170 timeout_queue_t *timeouts;
2171 static uu_list_pool_t *timeout_pool;
2172 
2173 typedef struct timeout_update {
2174 	pthread_mutex_t		tu_lock;
2175 	pthread_cond_t		tu_cv;
2176 	int			tu_wakeup;
2177 } timeout_update_t;
2178 
2179 timeout_update_t *tu;
2180 
2181 static const char *timeout_ovr_svcs[] = {
2182 	"svc:/system/manifest-import:default",
2183 	"svc:/network/initial:default",
2184 	"svc:/network/service:default",
2185 	"svc:/system/rmtmpfiles:default",
2186 	"svc:/network/loopback:default",
2187 	"svc:/network/physical:default",
2188 	"svc:/system/device/local:default",
2189 	"svc:/system/metainit:default",
2190 	"svc:/system/filesystem/usr:default",
2191 	"svc:/system/filesystem/minimal:default",
2192 	"svc:/system/filesystem/local:default",
2193 	NULL
2194 };
2195 
2196 int
2197 is_timeout_ovr(restarter_inst_t *inst)
2198 {
2199 	int i;
2200 
2201 	for (i = 0; timeout_ovr_svcs[i] != NULL; ++i) {
2202 		if (strcmp(inst->ri_i.i_fmri, timeout_ovr_svcs[i]) == 0) {
2203 			log_instance(inst, B_TRUE, "Timeout override by "
2204 			    "svc.startd.  Using infinite timeout.");
2205 			return (1);
2206 		}
2207 	}
2208 
2209 	return (0);
2210 }
2211 
2212 /*ARGSUSED*/
2213 static int
2214 timeout_compare(const void *lc_arg, const void *rc_arg, void *private)
2215 {
2216 	hrtime_t t1 = ((const timeout_entry_t *)lc_arg)->te_timeout;
2217 	hrtime_t t2 = ((const timeout_entry_t *)rc_arg)->te_timeout;
2218 
2219 	if (t1 > t2)
2220 		return (1);
2221 	else if (t1 < t2)
2222 		return (-1);
2223 	return (0);
2224 }
2225 
2226 void
2227 timeout_init()
2228 {
2229 	timeouts = startd_zalloc(sizeof (timeout_queue_t));
2230 
2231 	(void) pthread_mutex_init(&timeouts->tq_lock, &mutex_attrs);
2232 
2233 	timeout_pool = startd_list_pool_create("timeouts",
2234 	    sizeof (timeout_entry_t), offsetof(timeout_entry_t, te_link),
2235 	    timeout_compare, UU_LIST_POOL_DEBUG);
2236 	assert(timeout_pool != NULL);
2237 
2238 	timeouts->tq_list = startd_list_create(timeout_pool,
2239 	    timeouts, UU_LIST_SORTED);
2240 	assert(timeouts->tq_list != NULL);
2241 
2242 	tu = startd_zalloc(sizeof (timeout_update_t));
2243 	(void) pthread_cond_init(&tu->tu_cv, NULL);
2244 	(void) pthread_mutex_init(&tu->tu_lock, &mutex_attrs);
2245 }
2246 
2247 void
2248 timeout_insert(restarter_inst_t *inst, ctid_t cid, uint64_t timeout_sec)
2249 {
2250 	hrtime_t now, timeout;
2251 	timeout_entry_t *entry;
2252 	uu_list_index_t idx;
2253 
2254 	assert(PTHREAD_MUTEX_HELD(&inst->ri_lock));
2255 
2256 	now = gethrtime();
2257 
2258 	/*
2259 	 * If we overflow LLONG_MAX, we're never timing out anyways, so
2260 	 * just return.
2261 	 */
2262 	if (timeout_sec >= (LLONG_MAX - now) / 1000000000LL) {
2263 		log_instance(inst, B_TRUE, "timeout_seconds too large, "
2264 		    "treating as infinite.");
2265 		return;
2266 	}
2267 
2268 	/* hrtime is in nanoseconds. Convert timeout_sec. */
2269 	timeout = now + (timeout_sec * 1000000000LL);
2270 
2271 	entry = startd_alloc(sizeof (timeout_entry_t));
2272 	entry->te_timeout = timeout;
2273 	entry->te_ctid = cid;
2274 	entry->te_fmri = safe_strdup(inst->ri_i.i_fmri);
2275 	entry->te_logstem = safe_strdup(inst->ri_logstem);
2276 	entry->te_fired = 0;
2277 	/* Insert the calculated timeout time onto the queue. */
2278 	MUTEX_LOCK(&timeouts->tq_lock);
2279 	(void) uu_list_find(timeouts->tq_list, entry, NULL, &idx);
2280 	uu_list_node_init(entry, &entry->te_link, timeout_pool);
2281 	uu_list_insert(timeouts->tq_list, entry, idx);
2282 	MUTEX_UNLOCK(&timeouts->tq_lock);
2283 
2284 	assert(inst->ri_timeout == NULL);
2285 	inst->ri_timeout = entry;
2286 
2287 	MUTEX_LOCK(&tu->tu_lock);
2288 	tu->tu_wakeup = 1;
2289 	(void) pthread_cond_broadcast(&tu->tu_cv);
2290 	MUTEX_UNLOCK(&tu->tu_lock);
2291 }
2292 
2293 
2294 void
2295 timeout_remove(restarter_inst_t *inst, ctid_t cid)
2296 {
2297 	assert(PTHREAD_MUTEX_HELD(&inst->ri_lock));
2298 
2299 	if (inst->ri_timeout == NULL)
2300 		return;
2301 
2302 	assert(inst->ri_timeout->te_ctid == cid);
2303 
2304 	MUTEX_LOCK(&timeouts->tq_lock);
2305 	uu_list_remove(timeouts->tq_list, inst->ri_timeout);
2306 	MUTEX_UNLOCK(&timeouts->tq_lock);
2307 
2308 	free(inst->ri_timeout->te_fmri);
2309 	free(inst->ri_timeout->te_logstem);
2310 	startd_free(inst->ri_timeout, sizeof (timeout_entry_t));
2311 	inst->ri_timeout = NULL;
2312 }
2313 
2314 static int
2315 timeout_now()
2316 {
2317 	timeout_entry_t *e;
2318 	hrtime_t now;
2319 	int ret;
2320 
2321 	now = gethrtime();
2322 
2323 	/*
2324 	 * Walk through the (sorted) timeouts list.  While the timeout
2325 	 * at the head of the list is <= the current time, kill the
2326 	 * method.
2327 	 */
2328 	MUTEX_LOCK(&timeouts->tq_lock);
2329 
2330 	for (e = uu_list_first(timeouts->tq_list);
2331 	    e != NULL && e->te_timeout <= now;
2332 	    e = uu_list_next(timeouts->tq_list, e)) {
2333 		log_framework(LOG_WARNING, "%s: Method or service exit timed "
2334 		    "out.  Killing contract %ld.\n", e->te_fmri, e->te_ctid);
2335 		log_instance_fmri(e->te_fmri, e->te_logstem, B_TRUE,
2336 		    "Method or service exit timed out.  Killing contract %ld.",
2337 		    e->te_ctid);
2338 		e->te_fired = 1;
2339 		(void) contract_kill(e->te_ctid, SIGKILL, e->te_fmri);
2340 	}
2341 
2342 	if (uu_list_numnodes(timeouts->tq_list) > 0)
2343 		ret = 0;
2344 	else
2345 		ret = -1;
2346 
2347 	MUTEX_UNLOCK(&timeouts->tq_lock);
2348 
2349 	return (ret);
2350 }
2351 
2352 /*
2353  * void *restarter_timeouts_event_thread(void *)
2354  *   Responsible for monitoring the method timeouts.  This thread must
2355  *   be started before any methods are called.
2356  */
2357 /*ARGSUSED*/
2358 static void *
2359 restarter_timeouts_event_thread(void *unused)
2360 {
2361 	/*
2362 	 * Timeouts are entered on a priority queue, which is processed by
2363 	 * this thread.  As timeouts are specified in seconds, we'll do
2364 	 * the necessary processing every second, as long as the queue
2365 	 * is not empty.
2366 	 */
2367 
2368 	/*CONSTCOND*/
2369 	while (1) {
2370 		/*
2371 		 * As long as the timeout list isn't empty, process it
2372 		 * every second.
2373 		 */
2374 		if (timeout_now() == 0) {
2375 			(void) sleep(1);
2376 			continue;
2377 		}
2378 
2379 		/* The list is empty, wait until we have more timeouts. */
2380 		MUTEX_LOCK(&tu->tu_lock);
2381 
2382 		while (tu->tu_wakeup == 0)
2383 			(void) pthread_cond_wait(&tu->tu_cv, &tu->tu_lock);
2384 
2385 		tu->tu_wakeup = 0;
2386 		MUTEX_UNLOCK(&tu->tu_lock);
2387 	}
2388 
2389 	return (NULL);
2390 }
2391 
2392 void
2393 restarter_start()
2394 {
2395 	(void) startd_thread_create(restarter_timeouts_event_thread, NULL);
2396 	(void) startd_thread_create(restarter_event_thread, NULL);
2397 	(void) startd_thread_create(restarter_contracts_event_thread, NULL);
2398 	(void) startd_thread_create(wait_thread, NULL);
2399 }
2400 
2401 
2402 void
2403 restarter_init()
2404 {
2405 	restarter_instance_pool = startd_list_pool_create("restarter_instances",
2406 	    sizeof (restarter_inst_t), offsetof(restarter_inst_t,
2407 	    ri_link), restarter_instance_compare, UU_LIST_POOL_DEBUG);
2408 	(void) memset(&instance_list, 0, sizeof (instance_list));
2409 
2410 	(void) pthread_mutex_init(&instance_list.ril_lock, &mutex_attrs);
2411 	instance_list.ril_instance_list = startd_list_create(
2412 	    restarter_instance_pool, &instance_list, UU_LIST_SORTED);
2413 
2414 	restarter_queue_pool = startd_list_pool_create(
2415 	    "restarter_instance_queue", sizeof (restarter_instance_qentry_t),
2416 	    offsetof(restarter_instance_qentry_t,  riq_link), NULL,
2417 	    UU_LIST_POOL_DEBUG);
2418 
2419 	contract_list_pool = startd_list_pool_create(
2420 	    "contract_list", sizeof (contract_entry_t),
2421 	    offsetof(contract_entry_t,  ce_link), NULL,
2422 	    UU_LIST_POOL_DEBUG);
2423 	contract_hash_init();
2424 
2425 	log_framework(LOG_DEBUG, "Initialized restarter\n");
2426 }
2427