xref: /titanic_51/usr/src/cmd/svc/startd/restarter.c (revision 11a2bb386c90df26ed2d2d6086a56cb503465e33)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * restarter.c - service manipulation
31  *
32  * This component manages services whose restarter is svc.startd, the standard
33  * restarter.  It translates restarter protocol events from the graph engine
34  * into actions on processes, as a delegated restarter would do.
35  *
36  * The master restarter manages a number of always-running threads:
37  *   - restarter event thread: events from the graph engine
38  *   - timeout thread: thread to fire queued timeouts
39  *   - contract thread: thread to handle contract events
40  *   - wait thread: thread to handle wait-based services
41  *
42  * The other threads are created as-needed:
43  *   - per-instance method threads
44  *   - per-instance event processing threads
45  *
46  * The interaction of all threads must result in the following conditions
47  * being satisfied (on a per-instance basis):
48  *   - restarter events must be processed in order
49  *   - method execution must be serialized
50  *   - instance delete must be held until outstanding methods are complete
51  *   - contract events shouldn't be processed while a method is running
52  *   - timeouts should fire even when a method is running
53  *
54  * Service instances are represented by restarter_inst_t's and are kept in the
55  * instance_list list.
56  *
57  * Service States
58  *   The current state of a service instance is kept in
59  *   restarter_inst_t->ri_i.i_state.  If transition to a new state could take
60  *   some time, then before we effect the transition we set
61  *   restarter_inst_t->ri_i.i_next_state to the target state, and afterwards we
62  *   rotate i_next_state to i_state and set i_next_state to
63  *   RESTARTER_STATE_NONE.  So usually i_next_state is _NONE when ri_lock is not
64  *   held.  The exception is when we launch methods, which are done with
65  *   a separate thread.  To keep any other threads from grabbing ri_lock before
66  *   method_thread() does, we set ri_method_thread to the thread id of the
67  *   method thread, and when it is nonzero any thread with a different thread id
68  *   waits on ri_method_cv.
69  *
70  * Method execution is serialized by blocking on ri_method_cv in
71  * inst_lookup_by_id() and waiting for a 0 value of ri_method_thread.  This
72  * also prevents the instance structure from being deleted until all
73  * outstanding operations such as method_thread() have finished.
74  *
75  * Lock ordering:
76  *
77  * dgraph_lock [can be held when taking:]
78  *   utmpx_lock
79  *   dictionary->dict_lock
80  *   st->st_load_lock
81  *   wait_info_lock
82  *   ru->restarter_update_lock
83  *     restarter_queue->rpeq_lock
84  *   instance_list.ril_lock
85  *     inst->ri_lock
86  *   st->st_configd_live_lock
87  *
88  * instance_list.ril_lock
89  *   graph_queue->gpeq_lock
90  *   gu->gu_lock
91  *   st->st_configd_live_lock
92  *   dictionary->dict_lock
93  *   inst->ri_lock
94  *     graph_queue->gpeq_lock
95  *     gu->gu_lock
96  *     tu->tu_lock
97  *     tq->tq_lock
98  *     inst->ri_queue_lock
99  *       wait_info_lock
100  *       bp->cb_lock
101  *     utmpx_lock
102  *
103  * single_user_thread_lock
104  *   wait_info_lock
105  *   utmpx_lock
106  *
107  * gu_freeze_lock
108  *
109  * logbuf_mutex nests inside pretty much everything.
110  */
111 
112 #include <sys/contract/process.h>
113 #include <sys/ctfs.h>
114 #include <sys/stat.h>
115 #include <sys/time.h>
116 #include <sys/types.h>
117 #include <sys/uio.h>
118 #include <sys/wait.h>
119 #include <assert.h>
120 #include <errno.h>
121 #include <fcntl.h>
122 #include <libcontract.h>
123 #include <libcontract_priv.h>
124 #include <libintl.h>
125 #include <librestart.h>
126 #include <librestart_priv.h>
127 #include <libuutil.h>
128 #include <limits.h>
129 #include <poll.h>
130 #include <port.h>
131 #include <pthread.h>
132 #include <stdarg.h>
133 #include <stdio.h>
134 #include <strings.h>
135 #include <unistd.h>
136 
137 #include "startd.h"
138 #include "protocol.h"
139 
140 static uu_list_pool_t *restarter_instance_pool;
141 static restarter_instance_list_t instance_list;
142 
143 static uu_list_pool_t *restarter_queue_pool;
144 
145 /*ARGSUSED*/
146 static int
147 restarter_instance_compare(const void *lc_arg, const void *rc_arg,
148     void *private)
149 {
150 	int lc_id = ((const restarter_inst_t *)lc_arg)->ri_id;
151 	int rc_id = *(int *)rc_arg;
152 
153 	if (lc_id > rc_id)
154 		return (1);
155 	if (lc_id < rc_id)
156 		return (-1);
157 	return (0);
158 }
159 
160 static restarter_inst_t *
161 inst_lookup_by_name(const char *name)
162 {
163 	int id;
164 
165 	id = dict_lookup_byname(name);
166 	if (id == -1)
167 		return (NULL);
168 
169 	return (inst_lookup_by_id(id));
170 }
171 
172 restarter_inst_t *
173 inst_lookup_by_id(int id)
174 {
175 	restarter_inst_t *inst;
176 
177 	MUTEX_LOCK(&instance_list.ril_lock);
178 	inst = uu_list_find(instance_list.ril_instance_list, &id, NULL, NULL);
179 	if (inst != NULL)
180 		MUTEX_LOCK(&inst->ri_lock);
181 	MUTEX_UNLOCK(&instance_list.ril_lock);
182 
183 	if (inst != NULL) {
184 		while (inst->ri_method_thread != 0 &&
185 		    !pthread_equal(inst->ri_method_thread, pthread_self())) {
186 			++inst->ri_method_waiters;
187 			(void) pthread_cond_wait(&inst->ri_method_cv,
188 			    &inst->ri_lock);
189 			assert(inst->ri_method_waiters > 0);
190 			--inst->ri_method_waiters;
191 		}
192 	}
193 
194 	return (inst);
195 }
196 
197 static restarter_inst_t *
198 inst_lookup_queue(const char *name)
199 {
200 	int id;
201 	restarter_inst_t *inst;
202 
203 	id = dict_lookup_byname(name);
204 	if (id == -1)
205 		return (NULL);
206 
207 	MUTEX_LOCK(&instance_list.ril_lock);
208 	inst = uu_list_find(instance_list.ril_instance_list, &id, NULL, NULL);
209 	if (inst != NULL)
210 		MUTEX_LOCK(&inst->ri_queue_lock);
211 	MUTEX_UNLOCK(&instance_list.ril_lock);
212 
213 	return (inst);
214 }
215 
216 const char *
217 service_style(int flags)
218 {
219 	switch (flags & RINST_STYLE_MASK) {
220 	case RINST_CONTRACT:	return ("contract");
221 	case RINST_TRANSIENT:	return ("transient");
222 	case RINST_WAIT:	return ("wait");
223 
224 	default:
225 #ifndef NDEBUG
226 		uu_warn("%s:%d: Bad flags 0x%x.\n", __FILE__, __LINE__, flags);
227 #endif
228 		abort();
229 		/* NOTREACHED */
230 	}
231 }
232 
233 /*
234  * Fails with ECONNABORTED or ECANCELED.
235  */
236 static int
237 check_contract(restarter_inst_t *inst, boolean_t primary,
238     scf_instance_t *scf_inst)
239 {
240 	ctid_t *ctidp;
241 	int fd, r;
242 
243 	ctidp = primary ? &inst->ri_i.i_primary_ctid :
244 	    &inst->ri_i.i_transient_ctid;
245 
246 	assert(*ctidp >= 1);
247 
248 	fd = contract_open(*ctidp, NULL, "status", O_RDONLY);
249 	if (fd >= 0) {
250 		r = close(fd);
251 		assert(r == 0);
252 		return (0);
253 	}
254 
255 	r = restarter_remove_contract(scf_inst, *ctidp, primary ?
256 	    RESTARTER_CONTRACT_PRIMARY : RESTARTER_CONTRACT_TRANSIENT);
257 	switch (r) {
258 	case 0:
259 	case ECONNABORTED:
260 	case ECANCELED:
261 		*ctidp = 0;
262 		return (r);
263 
264 	case ENOMEM:
265 		uu_die("Out of memory\n");
266 		/* NOTREACHED */
267 
268 	case EPERM:
269 		uu_die("Insufficient privilege.\n");
270 		/* NOTREACHED */
271 
272 	case EACCES:
273 		uu_die("Repository backend access denied.\n");
274 		/* NOTREACHED */
275 
276 	case EROFS:
277 		log_error(LOG_INFO, "Could not remove unusable contract id %ld "
278 		    "for %s from repository.\n", *ctidp, inst->ri_i.i_fmri);
279 		return (0);
280 
281 	case EINVAL:
282 	case EBADF:
283 	default:
284 		assert(0);
285 		abort();
286 		/* NOTREACHED */
287 	}
288 }
289 
290 static int stop_instance(scf_handle_t *, restarter_inst_t *, stop_cause_t);
291 
292 /*
293  * int restarter_insert_inst(scf_handle_t *, char *)
294  *   If the inst is already in the restarter list, return its id.  If the inst
295  *   is not in the restarter list, initialize a restarter_inst_t, initialize its
296  *   states, insert it into the list, and return 0.
297  *
298  *   Fails with
299  *     ENOENT - name is not in the repository
300  */
301 static int
302 restarter_insert_inst(scf_handle_t *h, const char *name)
303 {
304 	int id, r;
305 	restarter_inst_t *inst;
306 	uu_list_index_t idx;
307 	scf_service_t *scf_svc;
308 	scf_instance_t *scf_inst;
309 	scf_snapshot_t *snap;
310 	scf_propertygroup_t *pg;
311 	char *svc_name, *inst_name;
312 	char logfilebuf[PATH_MAX];
313 	char *c;
314 	boolean_t do_commit_states;
315 	restarter_instance_state_t state, next_state;
316 	protocol_states_t *ps;
317 	pid_t start_pid;
318 
319 	MUTEX_LOCK(&instance_list.ril_lock);
320 
321 	/*
322 	 * We don't use inst_lookup_by_name() here because we want the lookup
323 	 * & insert to be atomic.
324 	 */
325 	id = dict_lookup_byname(name);
326 	if (id != -1) {
327 		inst = uu_list_find(instance_list.ril_instance_list, &id, NULL,
328 		    &idx);
329 		if (inst != NULL) {
330 			MUTEX_UNLOCK(&instance_list.ril_lock);
331 			return (0);
332 		}
333 	}
334 
335 	/* Allocate an instance */
336 	inst = startd_zalloc(sizeof (restarter_inst_t));
337 	inst->ri_utmpx_prefix = startd_alloc(max_scf_value_size);
338 	inst->ri_utmpx_prefix[0] = '\0';
339 
340 	inst->ri_i.i_fmri = startd_alloc(strlen(name) + 1);
341 	(void) strcpy((char *)inst->ri_i.i_fmri, name);
342 
343 	inst->ri_queue = startd_list_create(restarter_queue_pool, inst, 0);
344 
345 	/*
346 	 * id shouldn't be -1 since we use the same dictionary as graph.c, but
347 	 * just in case.
348 	 */
349 	inst->ri_id = (id != -1 ? id : dict_insert(name));
350 
351 	special_online_hooks_get(name, &inst->ri_pre_online_hook,
352 	    &inst->ri_post_online_hook, &inst->ri_post_offline_hook);
353 
354 	scf_svc = safe_scf_service_create(h);
355 	scf_inst = safe_scf_instance_create(h);
356 	pg = safe_scf_pg_create(h);
357 	svc_name = startd_alloc(max_scf_name_size);
358 	inst_name = startd_alloc(max_scf_name_size);
359 
360 rep_retry:
361 	if (scf_handle_decode_fmri(h, name, NULL, scf_svc, scf_inst, NULL,
362 	    NULL, SCF_DECODE_FMRI_EXACT) != 0) {
363 		switch (scf_error()) {
364 		case SCF_ERROR_CONNECTION_BROKEN:
365 			libscf_handle_rebind(h);
366 			goto rep_retry;
367 
368 		case SCF_ERROR_NOT_FOUND:
369 deleted:
370 			MUTEX_UNLOCK(&instance_list.ril_lock);
371 			startd_free(inst_name, max_scf_name_size);
372 			startd_free(svc_name, max_scf_name_size);
373 			scf_pg_destroy(pg);
374 			scf_instance_destroy(scf_inst);
375 			scf_service_destroy(scf_svc);
376 			startd_free((void *)inst->ri_i.i_fmri,
377 			    strlen(inst->ri_i.i_fmri) + 1);
378 			startd_free(inst, sizeof (restarter_inst_t));
379 			return (ENOENT);
380 		}
381 
382 		uu_die("Can't decode FMRI %s: %s\n", name,
383 		    scf_strerror(scf_error()));
384 	}
385 
386 	/*
387 	 * If there's no running snapshot, then we execute using the editing
388 	 * snapshot.  Pending snapshots will be taken later.
389 	 */
390 	snap = libscf_get_running_snapshot(scf_inst);
391 
392 	if ((scf_service_get_name(scf_svc, svc_name, max_scf_name_size) < 0) ||
393 	    (scf_instance_get_name(scf_inst, inst_name, max_scf_name_size) <
394 	    0)) {
395 		switch (scf_error()) {
396 		case SCF_ERROR_NOT_SET:
397 			break;
398 
399 		case SCF_ERROR_CONNECTION_BROKEN:
400 			libscf_handle_rebind(h);
401 			goto rep_retry;
402 
403 		default:
404 			assert(0);
405 			abort();
406 		}
407 
408 		scf_snapshot_destroy(snap);
409 		goto deleted;
410 	}
411 
412 	(void) snprintf(logfilebuf, PATH_MAX, "%s:%s", svc_name, inst_name);
413 	for (c = logfilebuf; *c != '\0'; c++)
414 		if (*c == '/')
415 			*c = '-';
416 
417 	inst->ri_logstem = startd_alloc(PATH_MAX);
418 	(void) snprintf(inst->ri_logstem, PATH_MAX, "%s%s", logfilebuf,
419 	    LOG_SUFFIX);
420 
421 	/*
422 	 * If the restarter group is missing, use uninit/none.  Otherwise,
423 	 * we're probably being restarted & don't want to mess up the states
424 	 * that are there.
425 	 */
426 	state = RESTARTER_STATE_UNINIT;
427 	next_state = RESTARTER_STATE_NONE;
428 
429 	r = scf_instance_get_pg(scf_inst, SCF_PG_RESTARTER, pg);
430 	if (r != 0) {
431 		switch (scf_error()) {
432 		case SCF_ERROR_CONNECTION_BROKEN:
433 			libscf_handle_rebind(h);
434 			goto rep_retry;
435 
436 		case SCF_ERROR_NOT_SET:
437 			scf_snapshot_destroy(snap);
438 			goto deleted;
439 
440 		case SCF_ERROR_NOT_FOUND:
441 			/*
442 			 * This shouldn't happen since the graph engine should
443 			 * have initialized the state to uninitialized/none if
444 			 * there was no restarter pg.  In case somebody
445 			 * deleted it, though....
446 			 */
447 			do_commit_states = B_TRUE;
448 			break;
449 
450 		default:
451 			assert(0);
452 			abort();
453 		}
454 	} else {
455 		r = libscf_read_states(pg, &state, &next_state);
456 		if (r != 0) {
457 			do_commit_states = B_TRUE;
458 		} else {
459 			if (next_state != RESTARTER_STATE_NONE) {
460 				/*
461 				 * Force next_state to _NONE since we
462 				 * don't look for method processes.
463 				 */
464 				next_state = RESTARTER_STATE_NONE;
465 				do_commit_states = B_TRUE;
466 			} else {
467 				/*
468 				 * Inform the restarter of our state without
469 				 * changing the STIME in the repository.
470 				 */
471 				ps = startd_alloc(sizeof (*ps));
472 				inst->ri_i.i_state = ps->ps_state = state;
473 				inst->ri_i.i_next_state = ps->ps_state_next =
474 				    next_state;
475 
476 				graph_protocol_send_event(inst->ri_i.i_fmri,
477 				    GRAPH_UPDATE_STATE_CHANGE, ps);
478 
479 				do_commit_states = B_FALSE;
480 			}
481 		}
482 	}
483 
484 	switch (libscf_get_startd_properties(scf_inst, snap, &inst->ri_flags,
485 	    &inst->ri_utmpx_prefix)) {
486 	case 0:
487 		break;
488 
489 	case ECONNABORTED:
490 		libscf_handle_rebind(h);
491 		goto rep_retry;
492 
493 	case ECANCELED:
494 		scf_snapshot_destroy(snap);
495 		startd_free(inst->ri_utmpx_prefix, max_scf_value_size);
496 		goto deleted;
497 
498 	case ENOENT:
499 		/*
500 		 * This is odd, because the graph engine should have required
501 		 * the general property group.  So we'll just use default
502 		 * flags in anticipation of the graph engine sending us
503 		 * REMOVE_INSTANCE when it finds out that the general property
504 		 * group has been deleted.
505 		 */
506 		inst->ri_flags = RINST_CONTRACT;
507 		break;
508 
509 	default:
510 		assert(0);
511 		abort();
512 	}
513 
514 	switch (libscf_get_template_values(scf_inst, snap,
515 	    &inst->ri_common_name, &inst->ri_C_common_name)) {
516 	case 0:
517 		break;
518 
519 	case ECONNABORTED:
520 		libscf_handle_rebind(h);
521 		goto rep_retry;
522 
523 	case ECANCELED:
524 		scf_snapshot_destroy(snap);
525 		startd_free(inst->ri_common_name, max_scf_value_size);
526 		inst->ri_common_name = NULL;
527 		goto deleted;
528 
529 	case ECHILD:
530 	case ENOENT:
531 		break;
532 
533 	default:
534 		assert(0);
535 		abort();
536 	}
537 
538 	switch (libscf_read_method_ids(h, scf_inst, inst->ri_i.i_fmri,
539 	    &inst->ri_i.i_primary_ctid, &inst->ri_i.i_transient_ctid,
540 	    &start_pid)) {
541 	case 0:
542 		break;
543 
544 	case ECONNABORTED:
545 		libscf_handle_rebind(h);
546 		goto rep_retry;
547 
548 	case ECANCELED:
549 		scf_snapshot_destroy(snap);
550 		goto deleted;
551 
552 	default:
553 		assert(0);
554 		abort();
555 	}
556 
557 	if (inst->ri_i.i_primary_ctid >= 1) {
558 		contract_hash_store(inst->ri_i.i_primary_ctid, inst->ri_id);
559 
560 		switch (check_contract(inst, B_TRUE, scf_inst)) {
561 		case 0:
562 			break;
563 
564 		case ECONNABORTED:
565 			libscf_handle_rebind(h);
566 			goto rep_retry;
567 
568 		case ECANCELED:
569 			scf_snapshot_destroy(snap);
570 			goto deleted;
571 
572 		default:
573 			assert(0);
574 			abort();
575 		}
576 	}
577 
578 	if (inst->ri_i.i_transient_ctid >= 1) {
579 		switch (check_contract(inst, B_FALSE, scf_inst)) {
580 		case 0:
581 			break;
582 
583 		case ECONNABORTED:
584 			libscf_handle_rebind(h);
585 			goto rep_retry;
586 
587 		case ECANCELED:
588 			scf_snapshot_destroy(snap);
589 			goto deleted;
590 
591 		default:
592 			assert(0);
593 			abort();
594 		}
595 	}
596 
597 	/* No more failures we live through, so add it to the list. */
598 	(void) pthread_mutex_init(&inst->ri_lock, &mutex_attrs);
599 	(void) pthread_mutex_init(&inst->ri_queue_lock, &mutex_attrs);
600 	MUTEX_LOCK(&inst->ri_lock);
601 	MUTEX_LOCK(&inst->ri_queue_lock);
602 
603 	(void) pthread_cond_init(&inst->ri_method_cv, NULL);
604 
605 	uu_list_node_init(inst, &inst->ri_link, restarter_instance_pool);
606 	uu_list_insert(instance_list.ril_instance_list, inst, idx);
607 	MUTEX_UNLOCK(&instance_list.ril_lock);
608 
609 	if (start_pid != -1 &&
610 	    (inst->ri_flags & RINST_STYLE_MASK) == RINST_WAIT) {
611 		int ret;
612 		ret = wait_register(start_pid, inst->ri_i.i_fmri, 0, 1);
613 		if (ret == -1) {
614 			/*
615 			 * Implication:  if we can't reregister the
616 			 * instance, we will start another one.  Two
617 			 * instances may or may not result in a resource
618 			 * conflict.
619 			 */
620 			log_error(LOG_WARNING,
621 			    "%s: couldn't reregister %ld for wait\n",
622 			    inst->ri_i.i_fmri, start_pid);
623 		} else if (ret == 1) {
624 			/*
625 			 * Leading PID has exited.
626 			 */
627 			(void) stop_instance(h, inst, RSTOP_EXIT);
628 		}
629 	}
630 
631 
632 	scf_pg_destroy(pg);
633 
634 	if (do_commit_states)
635 		(void) restarter_instance_update_states(h, inst, state,
636 		    next_state, RERR_NONE, NULL);
637 
638 	log_framework(LOG_DEBUG, "%s is a %s-style service\n", name,
639 	    service_style(inst->ri_flags));
640 
641 	MUTEX_UNLOCK(&inst->ri_queue_lock);
642 	MUTEX_UNLOCK(&inst->ri_lock);
643 
644 	startd_free(svc_name, max_scf_name_size);
645 	startd_free(inst_name, max_scf_name_size);
646 	scf_snapshot_destroy(snap);
647 	scf_instance_destroy(scf_inst);
648 	scf_service_destroy(scf_svc);
649 
650 	log_framework(LOG_DEBUG, "%s: inserted instance into restarter list\n",
651 	    name);
652 
653 	return (0);
654 }
655 
656 static void
657 restarter_delete_inst(restarter_inst_t *ri)
658 {
659 	int id;
660 	restarter_inst_t *rip;
661 	void *cookie = NULL;
662 	restarter_instance_qentry_t *e;
663 
664 	assert(PTHREAD_MUTEX_HELD(&ri->ri_lock));
665 
666 	/*
667 	 * Must drop the instance lock so we can pick up the instance_list
668 	 * lock & remove the instance.
669 	 */
670 	id = ri->ri_id;
671 	MUTEX_UNLOCK(&ri->ri_lock);
672 
673 	MUTEX_LOCK(&instance_list.ril_lock);
674 
675 	rip = uu_list_find(instance_list.ril_instance_list, &id, NULL, NULL);
676 	if (rip == NULL) {
677 		MUTEX_UNLOCK(&instance_list.ril_lock);
678 		return;
679 	}
680 
681 	assert(ri == rip);
682 
683 	uu_list_remove(instance_list.ril_instance_list, ri);
684 
685 	log_framework(LOG_DEBUG, "%s: deleted instance from restarter list\n",
686 	    ri->ri_i.i_fmri);
687 
688 	MUTEX_UNLOCK(&instance_list.ril_lock);
689 
690 	/*
691 	 * We can lock the instance without holding the instance_list lock
692 	 * since we removed the instance from the list.
693 	 */
694 	MUTEX_LOCK(&ri->ri_lock);
695 	MUTEX_LOCK(&ri->ri_queue_lock);
696 
697 	if (ri->ri_i.i_primary_ctid >= 1)
698 		contract_hash_remove(ri->ri_i.i_primary_ctid);
699 
700 	while (ri->ri_method_thread != 0 || ri->ri_method_waiters > 0)
701 		(void) pthread_cond_wait(&ri->ri_method_cv, &ri->ri_lock);
702 
703 	while ((e = uu_list_teardown(ri->ri_queue, &cookie)) != NULL)
704 		startd_free(e, sizeof (*e));
705 	uu_list_destroy(ri->ri_queue);
706 
707 	startd_free((void *)ri->ri_i.i_fmri, strlen(ri->ri_i.i_fmri) + 1);
708 	startd_free(ri->ri_logstem, PATH_MAX);
709 	startd_free(ri->ri_utmpx_prefix, max_scf_value_size);
710 	(void) pthread_mutex_destroy(&ri->ri_lock);
711 	(void) pthread_mutex_destroy(&ri->ri_queue_lock);
712 	startd_free(ri, sizeof (restarter_inst_t));
713 }
714 
715 /*
716  * instance_is_wait_style()
717  *
718  *   Returns 1 if the given instance is a "wait-style" service instance.
719  */
720 int
721 instance_is_wait_style(restarter_inst_t *inst)
722 {
723 	assert(PTHREAD_MUTEX_HELD(&inst->ri_lock));
724 	return ((inst->ri_flags & RINST_STYLE_MASK) == RINST_WAIT);
725 }
726 
727 /*
728  * instance_is_transient_style()
729  *
730  *   Returns 1 if the given instance is a transient service instance.
731  */
732 int
733 instance_is_transient_style(restarter_inst_t *inst)
734 {
735 	assert(PTHREAD_MUTEX_HELD(&inst->ri_lock));
736 	return ((inst->ri_flags & RINST_STYLE_MASK) == RINST_TRANSIENT);
737 }
738 
739 /*
740  * instance_in_transition()
741  * Returns 1 if instance is in transition, 0 if not
742  */
743 int
744 instance_in_transition(restarter_inst_t *inst)
745 {
746 	assert(PTHREAD_MUTEX_HELD(&inst->ri_lock));
747 	if (inst->ri_i.i_next_state == RESTARTER_STATE_NONE)
748 		return (0);
749 	return (1);
750 }
751 
752 /*
753  * Returns
754  *   0 - success
755  *   ECONNRESET - success, but h was rebound
756  */
757 int
758 restarter_instance_update_states(scf_handle_t *h, restarter_inst_t *ri,
759     restarter_instance_state_t new_state,
760     restarter_instance_state_t new_state_next, restarter_error_t err, char *aux)
761 {
762 	protocol_states_t *states;
763 	int e;
764 	uint_t retry_count = 0, msecs = ALLOC_DELAY;
765 	boolean_t rebound = B_FALSE;
766 
767 	assert(PTHREAD_MUTEX_HELD(&ri->ri_lock));
768 
769 retry:
770 	e = _restarter_commit_states(h, &ri->ri_i, new_state, new_state_next,
771 	    aux);
772 	switch (e) {
773 	case 0:
774 		break;
775 
776 	case ENOMEM:
777 		++retry_count;
778 		if (retry_count < ALLOC_RETRY) {
779 			(void) poll(NULL, 0, msecs);
780 			msecs *= ALLOC_DELAY_MULT;
781 			goto retry;
782 		}
783 
784 		/* Like startd_alloc(). */
785 		uu_die("Insufficient memory.\n");
786 		/* NOTREACHED */
787 
788 	case ECONNABORTED:
789 		libscf_handle_rebind(h);
790 		rebound = B_TRUE;
791 		goto retry;
792 
793 	case EPERM:
794 	case EACCES:
795 	case EROFS:
796 		log_error(LOG_NOTICE, "Could not commit state change for %s "
797 		    "to repository: %s.\n", ri->ri_i.i_fmri, strerror(e));
798 		/* FALLTHROUGH */
799 
800 	case ENOENT:
801 		ri->ri_i.i_state = new_state;
802 		ri->ri_i.i_next_state = new_state_next;
803 		break;
804 
805 	case EINVAL:
806 	default:
807 		bad_error("_restarter_commit_states", e);
808 	}
809 
810 	states = startd_alloc(sizeof (protocol_states_t));
811 	states->ps_state = new_state;
812 	states->ps_state_next = new_state_next;
813 	states->ps_err = err;
814 	graph_protocol_send_event(ri->ri_i.i_fmri, GRAPH_UPDATE_STATE_CHANGE,
815 	    (void *)states);
816 
817 	if (new_state == RESTARTER_STATE_ONLINE)
818 		ri->ri_post_online_hook();
819 
820 	return (rebound ? ECONNRESET : 0);
821 }
822 
823 void
824 restarter_mark_pending_snapshot(const char *fmri, uint_t flag)
825 {
826 	restarter_inst_t *inst;
827 
828 	assert(flag == RINST_RETAKE_RUNNING || flag == RINST_RETAKE_START);
829 
830 	inst = inst_lookup_by_name(fmri);
831 	if (inst == NULL)
832 		return;
833 
834 	inst->ri_flags |= flag;
835 
836 	MUTEX_UNLOCK(&inst->ri_lock);
837 }
838 
839 static void
840 restarter_take_pending_snapshots(scf_handle_t *h)
841 {
842 	restarter_inst_t *inst;
843 	int r;
844 
845 	MUTEX_LOCK(&instance_list.ril_lock);
846 
847 	for (inst = uu_list_first(instance_list.ril_instance_list);
848 	    inst != NULL;
849 	    inst = uu_list_next(instance_list.ril_instance_list, inst)) {
850 		const char *fmri;
851 		scf_instance_t *sinst = NULL;
852 
853 		MUTEX_LOCK(&inst->ri_lock);
854 
855 		/*
856 		 * This is where we'd check inst->ri_method_thread and if it
857 		 * were nonzero we'd wait in anticipation of another thread
858 		 * executing a method for inst.  Doing so with the instance_list
859 		 * locked, though, leads to deadlock.  Since taking a snapshot
860 		 * during that window won't hurt anything, we'll just continue.
861 		 */
862 
863 		fmri = inst->ri_i.i_fmri;
864 
865 		if (inst->ri_flags & RINST_RETAKE_RUNNING) {
866 			scf_snapshot_t *rsnap;
867 
868 			(void) libscf_fmri_get_instance(h, fmri, &sinst);
869 
870 			rsnap = libscf_get_or_make_running_snapshot(sinst,
871 			    fmri, B_FALSE);
872 
873 			scf_instance_destroy(sinst);
874 
875 			if (rsnap != NULL)
876 				inst->ri_flags &= ~RINST_RETAKE_RUNNING;
877 
878 			scf_snapshot_destroy(rsnap);
879 		}
880 
881 		if (inst->ri_flags & RINST_RETAKE_START) {
882 			switch (r = libscf_snapshots_poststart(h, fmri,
883 			    B_FALSE)) {
884 			case 0:
885 			case ENOENT:
886 				inst->ri_flags &= ~RINST_RETAKE_START;
887 				break;
888 
889 			case ECONNABORTED:
890 				break;
891 
892 			case EACCES:
893 			default:
894 				bad_error("libscf_snapshots_poststart", r);
895 			}
896 		}
897 
898 		MUTEX_UNLOCK(&inst->ri_lock);
899 	}
900 
901 	MUTEX_UNLOCK(&instance_list.ril_lock);
902 }
903 
904 /* ARGSUSED */
905 void *
906 restarter_post_fsminimal_thread(void *unused)
907 {
908 	scf_handle_t *h;
909 	int r;
910 
911 	h = libscf_handle_create_bound_loop();
912 
913 	for (;;) {
914 		r = libscf_create_self(h);
915 		if (r == 0)
916 			break;
917 
918 		assert(r == ECONNABORTED);
919 		libscf_handle_rebind(h);
920 	}
921 
922 	restarter_take_pending_snapshots(h);
923 
924 	(void) scf_handle_unbind(h);
925 	scf_handle_destroy(h);
926 
927 	return (NULL);
928 }
929 
930 /*
931  * returns 1 if instance is already started, 0 if not
932  */
933 static int
934 instance_started(restarter_inst_t *inst)
935 {
936 	int ret;
937 
938 	assert(PTHREAD_MUTEX_HELD(&inst->ri_lock));
939 
940 	if (inst->ri_i.i_state == RESTARTER_STATE_ONLINE ||
941 	    inst->ri_i.i_state == RESTARTER_STATE_DEGRADED)
942 		ret = 1;
943 	else
944 		ret = 0;
945 
946 	return (ret);
947 }
948 
949 /*
950  * int stop_instance()
951  *
952  *   Stop the instance identified by the instance given as the second argument,
953  *   for the cause stated.
954  *
955  *   Returns
956  *     0 - success
957  *     -1 - inst is in transition
958  */
959 static int
960 stop_instance(scf_handle_t *local_handle, restarter_inst_t *inst,
961     stop_cause_t cause)
962 {
963 	fork_info_t *info;
964 	const char *cp;
965 	int err;
966 	restarter_error_t re;
967 
968 	assert(PTHREAD_MUTEX_HELD(&inst->ri_lock));
969 	assert(inst->ri_method_thread == 0);
970 
971 	switch (cause) {
972 	case RSTOP_EXIT:
973 		re = RERR_RESTART;
974 		cp = "all processes in service exited";
975 		break;
976 	case RSTOP_CORE:
977 		re = RERR_FAULT;
978 		cp = "process dumped core";
979 		break;
980 	case RSTOP_SIGNAL:
981 		re = RERR_FAULT;
982 		cp = "process received fatal signal from outside the service";
983 		break;
984 	case RSTOP_HWERR:
985 		re = RERR_FAULT;
986 		cp = "process killed due to uncorrectable hardware error";
987 		break;
988 	case RSTOP_DEPENDENCY:
989 		re = RERR_RESTART;
990 		cp = "dependency activity requires stop";
991 		break;
992 	case RSTOP_DISABLE:
993 		re = RERR_RESTART;
994 		cp = "service disabled";
995 		break;
996 	case RSTOP_RESTART:
997 		re = RERR_RESTART;
998 		cp = "service restarting";
999 		break;
1000 	default:
1001 #ifndef NDEBUG
1002 		(void) fprintf(stderr, "Unknown cause %d at %s:%d.\n",
1003 		    cause, __FILE__, __LINE__);
1004 #endif
1005 		abort();
1006 	}
1007 
1008 	/* Services in the disabled and maintenance state are ignored */
1009 	if (inst->ri_i.i_state == RESTARTER_STATE_MAINT ||
1010 	    inst->ri_i.i_state == RESTARTER_STATE_DISABLED) {
1011 		log_framework(LOG_DEBUG,
1012 		    "%s: stop_instance -> is maint/disabled\n",
1013 		    inst->ri_i.i_fmri);
1014 		return (0);
1015 	}
1016 
1017 	/* Already stopped instances are left alone */
1018 	if (instance_started(inst) == 0) {
1019 		log_framework(LOG_DEBUG, "Restarter: %s is already stopped.\n",
1020 		    inst->ri_i.i_fmri);
1021 		return (0);
1022 	}
1023 
1024 	if (instance_in_transition(inst)) {
1025 		/* requeue event by returning -1 */
1026 		log_framework(LOG_DEBUG,
1027 		    "Restarter: Not stopping %s, in transition.\n",
1028 		    inst->ri_i.i_fmri);
1029 		return (-1);
1030 	}
1031 
1032 	log_instance(inst, B_TRUE, "Stopping because %s.", cp);
1033 
1034 	log_framework(re == RERR_FAULT ? LOG_INFO : LOG_DEBUG,
1035 	    "%s: Instance stopping because %s.\n", inst->ri_i.i_fmri, cp);
1036 
1037 	if (instance_is_wait_style(inst) && cause == RSTOP_EXIT) {
1038 		/*
1039 		 * No need to stop instance, as child has exited; remove
1040 		 * contract and move the instance to the offline state.
1041 		 */
1042 		switch (err = restarter_instance_update_states(local_handle,
1043 		    inst, inst->ri_i.i_state, RESTARTER_STATE_OFFLINE, re,
1044 		    NULL)) {
1045 		case 0:
1046 		case ECONNRESET:
1047 			break;
1048 
1049 		default:
1050 			bad_error("restarter_instance_update_states", err);
1051 		}
1052 
1053 		(void) update_fault_count(inst, FAULT_COUNT_RESET);
1054 
1055 		if (inst->ri_i.i_primary_ctid != 0) {
1056 			inst->ri_m_inst =
1057 			    safe_scf_instance_create(local_handle);
1058 			inst->ri_mi_deleted = B_FALSE;
1059 
1060 			libscf_reget_instance(inst);
1061 			method_remove_contract(inst, B_TRUE, B_TRUE);
1062 
1063 			scf_instance_destroy(inst->ri_m_inst);
1064 			inst->ri_m_inst = NULL;
1065 		}
1066 
1067 		switch (err = restarter_instance_update_states(local_handle,
1068 		    inst, inst->ri_i.i_next_state, RESTARTER_STATE_NONE, re,
1069 		    NULL)) {
1070 		case 0:
1071 		case ECONNRESET:
1072 			break;
1073 
1074 		default:
1075 			bad_error("restarter_instance_update_states", err);
1076 		}
1077 
1078 		return (0);
1079 	}
1080 
1081 	switch (err = restarter_instance_update_states(local_handle, inst,
1082 	    inst->ri_i.i_state, inst->ri_i.i_enabled ? RESTARTER_STATE_OFFLINE :
1083 	    RESTARTER_STATE_DISABLED, RERR_NONE, NULL)) {
1084 	case 0:
1085 	case ECONNRESET:
1086 		break;
1087 
1088 	default:
1089 		bad_error("restarter_instance_update_states", err);
1090 	}
1091 
1092 	info = startd_zalloc(sizeof (fork_info_t));
1093 
1094 	info->sf_id = inst->ri_id;
1095 	info->sf_method_type = METHOD_STOP;
1096 	info->sf_event_type = re;
1097 	inst->ri_method_thread = startd_thread_create(method_thread, info);
1098 
1099 	return (0);
1100 }
1101 
1102 /*
1103  * Returns
1104  *   ENOENT - fmri is not in instance_list
1105  *   0 - success
1106  *   ECONNRESET - success, though handle was rebound
1107  *   -1 - instance is in transition
1108  */
1109 int
1110 stop_instance_fmri(scf_handle_t *h, const char *fmri, uint_t flags)
1111 {
1112 	restarter_inst_t *rip;
1113 	int r;
1114 
1115 	rip = inst_lookup_by_name(fmri);
1116 	if (rip == NULL)
1117 		return (ENOENT);
1118 
1119 	r = stop_instance(h, rip, flags);
1120 
1121 	MUTEX_UNLOCK(&rip->ri_lock);
1122 
1123 	return (r);
1124 }
1125 
1126 static void
1127 unmaintain_instance(scf_handle_t *h, restarter_inst_t *rip,
1128     unmaint_cause_t cause)
1129 {
1130 	ctid_t ctid;
1131 	scf_instance_t *inst;
1132 	int r;
1133 	uint_t tries = 0, msecs = ALLOC_DELAY;
1134 	const char *cp;
1135 
1136 	assert(PTHREAD_MUTEX_HELD(&rip->ri_lock));
1137 
1138 	if (rip->ri_i.i_state != RESTARTER_STATE_MAINT) {
1139 		log_error(LOG_DEBUG, "Restarter: "
1140 		    "Ignoring maintenance off command because %s is not in the "
1141 		    "maintenance state.\n", rip->ri_i.i_fmri);
1142 		return;
1143 	}
1144 
1145 	switch (cause) {
1146 	case RUNMAINT_CLEAR:
1147 		cp = "clear requested";
1148 		break;
1149 	case RUNMAINT_DISABLE:
1150 		cp = "disable requested";
1151 		break;
1152 	default:
1153 #ifndef NDEBUG
1154 		(void) fprintf(stderr, "Uncaught case for %d at %s:%d.\n",
1155 		    cause, __FILE__, __LINE__);
1156 #endif
1157 		abort();
1158 	}
1159 
1160 	log_instance(rip, B_TRUE, "Leaving maintenance because %s.",
1161 	    cp);
1162 	log_framework(LOG_DEBUG, "%s: Instance leaving maintenance because "
1163 	    "%s.\n", rip->ri_i.i_fmri, cp);
1164 
1165 	(void) restarter_instance_update_states(h, rip, RESTARTER_STATE_UNINIT,
1166 	    RESTARTER_STATE_NONE, RERR_RESTART, NULL);
1167 
1168 	/*
1169 	 * If we did ADMIN_MAINT_ON_IMMEDIATE, then there might still be
1170 	 * a primary contract.
1171 	 */
1172 	if (rip->ri_i.i_primary_ctid == 0)
1173 		return;
1174 
1175 	ctid = rip->ri_i.i_primary_ctid;
1176 	contract_abandon(ctid);
1177 	rip->ri_i.i_primary_ctid = 0;
1178 
1179 rep_retry:
1180 	switch (r = libscf_fmri_get_instance(h, rip->ri_i.i_fmri, &inst)) {
1181 	case 0:
1182 		break;
1183 
1184 	case ECONNABORTED:
1185 		libscf_handle_rebind(h);
1186 		goto rep_retry;
1187 
1188 	case ENOENT:
1189 		/* Must have been deleted. */
1190 		return;
1191 
1192 	case EINVAL:
1193 	case ENOTSUP:
1194 	default:
1195 		bad_error("libscf_handle_rebind", r);
1196 	}
1197 
1198 again:
1199 	r = restarter_remove_contract(inst, ctid, RESTARTER_CONTRACT_PRIMARY);
1200 	switch (r) {
1201 	case 0:
1202 		break;
1203 
1204 	case ENOMEM:
1205 		++tries;
1206 		if (tries < ALLOC_RETRY) {
1207 			(void) poll(NULL, 0, msecs);
1208 			msecs *= ALLOC_DELAY_MULT;
1209 			goto again;
1210 		}
1211 
1212 		uu_die("Insufficient memory.\n");
1213 		/* NOTREACHED */
1214 
1215 	case ECONNABORTED:
1216 		scf_instance_destroy(inst);
1217 		libscf_handle_rebind(h);
1218 		goto rep_retry;
1219 
1220 	case ECANCELED:
1221 		break;
1222 
1223 	case EPERM:
1224 	case EACCES:
1225 	case EROFS:
1226 		log_error(LOG_INFO,
1227 		    "Could not remove contract id %lu for %s (%s).\n", ctid,
1228 		    rip->ri_i.i_fmri, strerror(r));
1229 		break;
1230 
1231 	case EINVAL:
1232 	case EBADF:
1233 	default:
1234 		bad_error("restarter_remove_contract", r);
1235 	}
1236 
1237 	scf_instance_destroy(inst);
1238 }
1239 
1240 /*
1241  * enable_inst()
1242  *   Set inst->ri_i.i_enabled.  Expects 'e' to be _ENABLE, _DISABLE, or
1243  *   _ADMIN_DISABLE.  If the event is _ENABLE and inst is uninitialized or
1244  *   disabled, move it to offline.  If the event is _DISABLE or
1245  *   _ADMIN_DISABLE, make sure inst will move to disabled.
1246  *
1247  *   Returns
1248  *     0 - success
1249  *     ECONNRESET - h was rebound
1250  */
1251 static int
1252 enable_inst(scf_handle_t *h, restarter_inst_t *inst, restarter_event_type_t e)
1253 {
1254 	restarter_instance_state_t state;
1255 	int r;
1256 
1257 	assert(PTHREAD_MUTEX_HELD(&inst->ri_lock));
1258 	assert(e == RESTARTER_EVENT_TYPE_ADMIN_DISABLE ||
1259 	    e == RESTARTER_EVENT_TYPE_DISABLE ||
1260 	    e == RESTARTER_EVENT_TYPE_ENABLE);
1261 	assert(instance_in_transition(inst) == 0);
1262 
1263 	state = inst->ri_i.i_state;
1264 
1265 	if (e == RESTARTER_EVENT_TYPE_ENABLE) {
1266 		inst->ri_i.i_enabled = 1;
1267 
1268 		if (state == RESTARTER_STATE_UNINIT ||
1269 		    state == RESTARTER_STATE_DISABLED) {
1270 			/*
1271 			 * B_FALSE: Don't log an error if the log_instance()
1272 			 * fails because it will fail on the miniroot before
1273 			 * install-discovery runs.
1274 			 */
1275 			log_instance(inst, B_FALSE, "Enabled.");
1276 			log_framework(LOG_DEBUG, "%s: Instance enabled.\n",
1277 			    inst->ri_i.i_fmri);
1278 			(void) restarter_instance_update_states(h, inst,
1279 			    RESTARTER_STATE_OFFLINE, RESTARTER_STATE_NONE,
1280 			    RERR_NONE, NULL);
1281 		} else {
1282 			log_framework(LOG_DEBUG, "Restarter: "
1283 			    "Not changing state of %s for enable command.\n",
1284 			    inst->ri_i.i_fmri);
1285 		}
1286 	} else {
1287 		inst->ri_i.i_enabled = 0;
1288 
1289 		switch (state) {
1290 		case RESTARTER_STATE_ONLINE:
1291 		case RESTARTER_STATE_DEGRADED:
1292 			r = stop_instance(h, inst, RSTOP_DISABLE);
1293 			return (r == ECONNRESET ? 0 : r);
1294 
1295 		case RESTARTER_STATE_OFFLINE:
1296 		case RESTARTER_STATE_UNINIT:
1297 			if (inst->ri_i.i_primary_ctid != 0) {
1298 				inst->ri_m_inst = safe_scf_instance_create(h);
1299 				inst->ri_mi_deleted = B_FALSE;
1300 
1301 				libscf_reget_instance(inst);
1302 				method_remove_contract(inst, B_TRUE, B_TRUE);
1303 
1304 				scf_instance_destroy(inst->ri_m_inst);
1305 			}
1306 			/* B_FALSE: See log_instance(..., "Enabled."); above */
1307 			log_instance(inst, B_FALSE, "Disabled.");
1308 			log_framework(LOG_DEBUG, "%s: Instance disabled.\n",
1309 			    inst->ri_i.i_fmri);
1310 			(void) restarter_instance_update_states(h, inst,
1311 			    RESTARTER_STATE_DISABLED, RESTARTER_STATE_NONE,
1312 			    RERR_RESTART, NULL);
1313 			return (0);
1314 
1315 		case RESTARTER_STATE_DISABLED:
1316 			break;
1317 
1318 		case RESTARTER_STATE_MAINT:
1319 			/*
1320 			 * We only want to pull the instance out of maintenance
1321 			 * if the disable is on adminstrative request.  The
1322 			 * graph engine sends _DISABLE events whenever a
1323 			 * service isn't in the disabled state, and we don't
1324 			 * want to pull the service out of maintenance if,
1325 			 * for example, it is there due to a dependency cycle.
1326 			 */
1327 			if (e == RESTARTER_EVENT_TYPE_ADMIN_DISABLE)
1328 				unmaintain_instance(h, inst, RUNMAINT_DISABLE);
1329 			break;
1330 
1331 		default:
1332 #ifndef NDEBUG
1333 			(void) fprintf(stderr, "Restarter instance %s has "
1334 			    "unknown state %d.\n", inst->ri_i.i_fmri, state);
1335 #endif
1336 			abort();
1337 		}
1338 	}
1339 
1340 	return (0);
1341 }
1342 
1343 static void
1344 start_instance(scf_handle_t *local_handle, restarter_inst_t *inst)
1345 {
1346 	fork_info_t *info;
1347 
1348 	assert(PTHREAD_MUTEX_HELD(&inst->ri_lock));
1349 	assert(instance_in_transition(inst) == 0);
1350 	assert(inst->ri_method_thread == 0);
1351 
1352 	log_framework(LOG_DEBUG, "%s: trying to start instance\n",
1353 	    inst->ri_i.i_fmri);
1354 
1355 	/* Services in the disabled and maintenance state are ignored */
1356 	if (inst->ri_i.i_state == RESTARTER_STATE_MAINT ||
1357 	    inst->ri_i.i_state == RESTARTER_STATE_DISABLED ||
1358 	    inst->ri_i.i_enabled == 0) {
1359 		log_framework(LOG_DEBUG,
1360 		    "%s: start_instance -> is maint/disabled\n",
1361 		    inst->ri_i.i_fmri);
1362 		return;
1363 	}
1364 
1365 	/* Already started instances are left alone */
1366 	if (instance_started(inst) == 1) {
1367 		log_framework(LOG_DEBUG,
1368 		    "%s: start_instance -> is already started\n",
1369 		    inst->ri_i.i_fmri);
1370 		return;
1371 	}
1372 
1373 	log_framework(LOG_DEBUG, "%s: starting instance.\n", inst->ri_i.i_fmri);
1374 
1375 	(void) restarter_instance_update_states(local_handle, inst,
1376 	    inst->ri_i.i_state, RESTARTER_STATE_ONLINE, RERR_NONE, NULL);
1377 
1378 	info = startd_zalloc(sizeof (fork_info_t));
1379 
1380 	info->sf_id = inst->ri_id;
1381 	info->sf_method_type = METHOD_START;
1382 	info->sf_event_type = RERR_NONE;
1383 	inst->ri_method_thread = startd_thread_create(method_thread, info);
1384 }
1385 
1386 static void
1387 maintain_instance(scf_handle_t *h, restarter_inst_t *rip, int immediate,
1388     const char *aux)
1389 {
1390 	fork_info_t *info;
1391 
1392 	assert(PTHREAD_MUTEX_HELD(&rip->ri_lock));
1393 	assert(aux != NULL);
1394 	assert(rip->ri_method_thread == 0);
1395 
1396 	log_instance(rip, B_TRUE, "Stopping for maintenance due to %s.", aux);
1397 	log_framework(LOG_DEBUG, "%s: stopping for maintenance due to %s.\n",
1398 	    rip->ri_i.i_fmri, aux);
1399 
1400 	/* Services in the maintenance state are ignored */
1401 	if (rip->ri_i.i_state == RESTARTER_STATE_MAINT) {
1402 		log_framework(LOG_DEBUG,
1403 		    "%s: maintain_instance -> is already in maintenance\n",
1404 		    rip->ri_i.i_fmri);
1405 		return;
1406 	}
1407 
1408 	if (immediate || !instance_started(rip)) {
1409 		if (rip->ri_i.i_primary_ctid != 0) {
1410 			rip->ri_m_inst = safe_scf_instance_create(h);
1411 			rip->ri_mi_deleted = B_FALSE;
1412 
1413 			libscf_reget_instance(rip);
1414 			method_remove_contract(rip, B_TRUE, B_TRUE);
1415 
1416 			scf_instance_destroy(rip->ri_m_inst);
1417 		}
1418 
1419 		(void) restarter_instance_update_states(h, rip,
1420 		    RESTARTER_STATE_MAINT, RESTARTER_STATE_NONE, RERR_RESTART,
1421 		    (char *)aux);
1422 		return;
1423 	}
1424 
1425 	(void) restarter_instance_update_states(h, rip, rip->ri_i.i_state,
1426 	    RESTARTER_STATE_MAINT, RERR_NONE, (char *)aux);
1427 
1428 	info = startd_zalloc(sizeof (*info));
1429 	info->sf_id = rip->ri_id;
1430 	info->sf_method_type = METHOD_STOP;
1431 	info->sf_event_type = RERR_RESTART;
1432 	rip->ri_method_thread = startd_thread_create(method_thread, info);
1433 }
1434 
1435 static void
1436 refresh_instance(scf_handle_t *h, restarter_inst_t *rip)
1437 {
1438 	scf_instance_t *inst;
1439 	scf_snapshot_t *snap;
1440 	fork_info_t *info;
1441 	int r;
1442 
1443 	assert(PTHREAD_MUTEX_HELD(&rip->ri_lock));
1444 
1445 	log_instance(rip, B_TRUE, "Rereading configuration.");
1446 	log_framework(LOG_DEBUG, "%s: rereading configuration.\n",
1447 	    rip->ri_i.i_fmri);
1448 
1449 rep_retry:
1450 	r = libscf_fmri_get_instance(h, rip->ri_i.i_fmri, &inst);
1451 	switch (r) {
1452 	case 0:
1453 		break;
1454 
1455 	case ECONNABORTED:
1456 		libscf_handle_rebind(h);
1457 		goto rep_retry;
1458 
1459 	case ENOENT:
1460 		/* Must have been deleted. */
1461 		return;
1462 
1463 	case EINVAL:
1464 	case ENOTSUP:
1465 	default:
1466 		bad_error("libscf_fmri_get_instance", r);
1467 	}
1468 
1469 	snap = libscf_get_running_snapshot(inst);
1470 
1471 	r = libscf_get_startd_properties(inst, snap, &rip->ri_flags,
1472 	    &rip->ri_utmpx_prefix);
1473 	switch (r) {
1474 	case 0:
1475 		log_framework(LOG_DEBUG, "%s is a %s-style service\n",
1476 		    rip->ri_i.i_fmri, service_style(rip->ri_flags));
1477 		break;
1478 
1479 	case ECONNABORTED:
1480 		scf_instance_destroy(inst);
1481 		scf_snapshot_destroy(snap);
1482 		libscf_handle_rebind(h);
1483 		goto rep_retry;
1484 
1485 	case ECANCELED:
1486 	case ENOENT:
1487 		/* Succeed in anticipation of REMOVE_INSTANCE. */
1488 		break;
1489 
1490 	default:
1491 		bad_error("libscf_get_startd_properties", r);
1492 	}
1493 
1494 	if (instance_started(rip)) {
1495 		/* Refresh does not change the state. */
1496 		(void) restarter_instance_update_states(h, rip,
1497 		    rip->ri_i.i_state, rip->ri_i.i_state, RERR_NONE, NULL);
1498 
1499 		info = startd_zalloc(sizeof (*info));
1500 		info->sf_id = rip->ri_id;
1501 		info->sf_method_type = METHOD_REFRESH;
1502 		info->sf_event_type = RERR_REFRESH;
1503 
1504 		assert(rip->ri_method_thread == 0);
1505 		rip->ri_method_thread =
1506 		    startd_thread_create(method_thread, info);
1507 	}
1508 
1509 	scf_snapshot_destroy(snap);
1510 	scf_instance_destroy(inst);
1511 }
1512 
1513 const char *event_names[] = { "INVALID", "ADD_INSTANCE", "REMOVE_INSTANCE",
1514 	"ENABLE", "DISABLE", "ADMIN_DEGRADED", "ADMIN_REFRESH",
1515 	"ADMIN_RESTART", "ADMIN_MAINT_OFF", "ADMIN_MAINT_ON",
1516 	"ADMIN_MAINT_ON_IMMEDIATE", "STOP", "START", "DEPENDENCY_CYCLE",
1517 	"INVALID_DEPENDENCY", "ADMIN_DISABLE"
1518 };
1519 
1520 /*
1521  * void *restarter_process_events()
1522  *
1523  *   Called in a separate thread to process the events on an instance's
1524  *   queue.  Empties the queue completely, and tries to keep the thread
1525  *   around for a little while after the queue is empty to save on
1526  *   startup costs.
1527  */
1528 static void *
1529 restarter_process_events(void *arg)
1530 {
1531 	scf_handle_t *h;
1532 	restarter_instance_qentry_t *event;
1533 	restarter_inst_t *rip;
1534 	char *fmri = (char *)arg;
1535 	struct timespec to;
1536 
1537 	assert(fmri != NULL);
1538 
1539 	h = libscf_handle_create_bound_loop();
1540 
1541 	/* grab the queue lock */
1542 	rip = inst_lookup_queue(fmri);
1543 	if (rip == NULL)
1544 		goto out;
1545 
1546 again:
1547 
1548 	while ((event = uu_list_first(rip->ri_queue)) != NULL) {
1549 		restarter_inst_t *inst;
1550 
1551 		/* drop the queue lock */
1552 		MUTEX_UNLOCK(&rip->ri_queue_lock);
1553 
1554 		/*
1555 		 * Grab the inst lock -- this waits until any outstanding
1556 		 * method finishes running.
1557 		 */
1558 		inst = inst_lookup_by_name(fmri);
1559 		if (inst == NULL) {
1560 			/* Getting deleted in the middle isn't an error. */
1561 			goto cont;
1562 		}
1563 
1564 		assert(instance_in_transition(inst) == 0);
1565 
1566 		/* process the event */
1567 		switch (event->riq_type) {
1568 		case RESTARTER_EVENT_TYPE_ENABLE:
1569 		case RESTARTER_EVENT_TYPE_DISABLE:
1570 		case RESTARTER_EVENT_TYPE_ADMIN_DISABLE:
1571 			(void) enable_inst(h, inst, event->riq_type);
1572 			break;
1573 
1574 		case RESTARTER_EVENT_TYPE_REMOVE_INSTANCE:
1575 			restarter_delete_inst(inst);
1576 			inst = NULL;
1577 			goto cont;
1578 
1579 		case RESTARTER_EVENT_TYPE_STOP:
1580 			(void) stop_instance(h, inst, RSTOP_DEPENDENCY);
1581 			break;
1582 
1583 		case RESTARTER_EVENT_TYPE_START:
1584 			start_instance(h, inst);
1585 			break;
1586 
1587 		case RESTARTER_EVENT_TYPE_DEPENDENCY_CYCLE:
1588 			maintain_instance(h, inst, 0, "dependency_cycle");
1589 			break;
1590 
1591 		case RESTARTER_EVENT_TYPE_INVALID_DEPENDENCY:
1592 			maintain_instance(h, inst, 0, "invalid_dependency");
1593 			break;
1594 
1595 		case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON:
1596 			maintain_instance(h, inst, 0, "administrative_request");
1597 			break;
1598 
1599 		case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON_IMMEDIATE:
1600 			maintain_instance(h, inst, 1, "administrative_request");
1601 			break;
1602 
1603 		case RESTARTER_EVENT_TYPE_ADMIN_MAINT_OFF:
1604 			unmaintain_instance(h, inst, RUNMAINT_CLEAR);
1605 			break;
1606 
1607 		case RESTARTER_EVENT_TYPE_ADMIN_REFRESH:
1608 			refresh_instance(h, inst);
1609 			break;
1610 
1611 		case RESTARTER_EVENT_TYPE_ADMIN_DEGRADED:
1612 			log_framework(LOG_WARNING, "Restarter: "
1613 			    "%s command (for %s) unimplemented.\n",
1614 			    event_names[event->riq_type], inst->ri_i.i_fmri);
1615 			break;
1616 
1617 		case RESTARTER_EVENT_TYPE_ADMIN_RESTART:
1618 			if (!instance_started(inst)) {
1619 				log_framework(LOG_DEBUG, "Restarter: "
1620 				    "Not restarting %s; not running.\n",
1621 				    inst->ri_i.i_fmri);
1622 			} else {
1623 				/*
1624 				 * Stop the instance.  If it can be restarted,
1625 				 * the graph engine will send a new event.
1626 				 */
1627 				(void) stop_instance(h, inst, RSTOP_RESTART);
1628 			}
1629 			break;
1630 
1631 		case RESTARTER_EVENT_TYPE_ADD_INSTANCE:
1632 		default:
1633 #ifndef NDEBUG
1634 			uu_warn("%s:%d: Bad restarter event %d.  "
1635 			    "Aborting.\n", __FILE__, __LINE__, event->riq_type);
1636 #endif
1637 			abort();
1638 		}
1639 
1640 		assert(inst != NULL);
1641 		MUTEX_UNLOCK(&inst->ri_lock);
1642 
1643 cont:
1644 		/* grab the queue lock */
1645 		rip = inst_lookup_queue(fmri);
1646 		if (rip == NULL)
1647 			goto out;
1648 
1649 		/* delete the event */
1650 		uu_list_remove(rip->ri_queue, event);
1651 		startd_free(event, sizeof (restarter_instance_qentry_t));
1652 	}
1653 
1654 	assert(rip != NULL);
1655 
1656 	/*
1657 	 * Try to preserve the thread for a little while for future use.
1658 	 */
1659 	to.tv_sec = 3;
1660 	to.tv_nsec = 0;
1661 	(void) pthread_cond_reltimedwait_np(&rip->ri_queue_cv,
1662 	    &rip->ri_queue_lock, &to);
1663 
1664 	if (uu_list_first(rip->ri_queue) != NULL)
1665 		goto again;
1666 
1667 	rip->ri_queue_thread = 0;
1668 	MUTEX_UNLOCK(&rip->ri_queue_lock);
1669 out:
1670 	(void) scf_handle_unbind(h);
1671 	scf_handle_destroy(h);
1672 	free(fmri);
1673 	return (NULL);
1674 }
1675 
1676 static int
1677 is_admin_event(restarter_event_type_t t) {
1678 
1679 	switch (t) {
1680 	case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON:
1681 	case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON_IMMEDIATE:
1682 	case RESTARTER_EVENT_TYPE_ADMIN_MAINT_OFF:
1683 	case RESTARTER_EVENT_TYPE_ADMIN_REFRESH:
1684 	case RESTARTER_EVENT_TYPE_ADMIN_DEGRADED:
1685 	case RESTARTER_EVENT_TYPE_ADMIN_RESTART:
1686 		return (1);
1687 	default:
1688 		return (0);
1689 	}
1690 }
1691 
1692 static void
1693 restarter_queue_event(restarter_inst_t *ri, restarter_protocol_event_t *e)
1694 {
1695 	restarter_instance_qentry_t *qe;
1696 	int r;
1697 
1698 	assert(PTHREAD_MUTEX_HELD(&ri->ri_queue_lock));
1699 	assert(!PTHREAD_MUTEX_HELD(&ri->ri_lock));
1700 
1701 	qe = startd_zalloc(sizeof (restarter_instance_qentry_t));
1702 	qe->riq_type = e->rpe_type;
1703 
1704 	uu_list_node_init(qe, &qe->riq_link, restarter_queue_pool);
1705 	r = uu_list_insert_before(ri->ri_queue, NULL, qe);
1706 	assert(r == 0);
1707 }
1708 
1709 /*
1710  * void *restarter_event_thread()
1711  *
1712  *  Handle incoming graph events by placing them on a per-instance
1713  *  queue.  We can't lock the main part of the instance structure, so
1714  *  just modify the seprarately locked event queue portion.
1715  */
1716 /*ARGSUSED*/
1717 static void *
1718 restarter_event_thread(void *unused)
1719 {
1720 	scf_handle_t *h;
1721 
1722 	/*
1723 	 * This is a new thread, and thus, gets its own handle
1724 	 * to the repository.
1725 	 */
1726 	h = libscf_handle_create_bound_loop();
1727 
1728 	MUTEX_LOCK(&ru->restarter_update_lock);
1729 
1730 	/*CONSTCOND*/
1731 	while (1) {
1732 		restarter_protocol_event_t *e;
1733 
1734 		while (ru->restarter_update_wakeup == 0)
1735 			(void) pthread_cond_wait(&ru->restarter_update_cv,
1736 			    &ru->restarter_update_lock);
1737 
1738 		ru->restarter_update_wakeup = 0;
1739 
1740 		while ((e = restarter_event_dequeue()) != NULL) {
1741 			restarter_inst_t *rip;
1742 			char *fmri;
1743 
1744 			MUTEX_UNLOCK(&ru->restarter_update_lock);
1745 
1746 			/*
1747 			 * ADD_INSTANCE is special: there's likely no
1748 			 * instance structure yet, so we need to handle the
1749 			 * addition synchronously.
1750 			 */
1751 			switch (e->rpe_type) {
1752 			case RESTARTER_EVENT_TYPE_ADD_INSTANCE:
1753 				if (restarter_insert_inst(h, e->rpe_inst) != 0)
1754 					log_error(LOG_INFO, "Restarter: "
1755 					    "Could not add %s.\n", e->rpe_inst);
1756 
1757 				MUTEX_LOCK(&st->st_load_lock);
1758 				if (--st->st_load_instances == 0)
1759 					(void) pthread_cond_broadcast(
1760 					    &st->st_load_cv);
1761 				MUTEX_UNLOCK(&st->st_load_lock);
1762 
1763 				goto nolookup;
1764 			}
1765 
1766 			/*
1767 			 * Lookup the instance, locking only the event queue.
1768 			 * Can't grab ri_lock here because it might be held
1769 			 * by a long-running method.
1770 			 */
1771 			rip = inst_lookup_queue(e->rpe_inst);
1772 			if (rip == NULL) {
1773 				log_error(LOG_INFO, "Restarter: "
1774 				    "Ignoring %s command for unknown service "
1775 				    "%s.\n", event_names[e->rpe_type],
1776 				    e->rpe_inst);
1777 				goto nolookup;
1778 			}
1779 
1780 			/* Keep ADMIN events from filling up the queue. */
1781 			if (is_admin_event(e->rpe_type) &&
1782 			    uu_list_numnodes(rip->ri_queue) >
1783 			    RINST_QUEUE_THRESHOLD) {
1784 				MUTEX_UNLOCK(&rip->ri_queue_lock);
1785 				log_instance(rip, B_TRUE, "Instance event "
1786 				    "queue overflow.  Dropping administrative "
1787 				    "request.");
1788 				log_framework(LOG_DEBUG, "%s: Instance event "
1789 				    "queue overflow.  Dropping administrative "
1790 				    "request.\n", rip->ri_i.i_fmri);
1791 				goto nolookup;
1792 			}
1793 
1794 			/* Now add the event to the instance queue. */
1795 			restarter_queue_event(rip, e);
1796 
1797 			if (rip->ri_queue_thread == 0) {
1798 				/*
1799 				 * Start a thread if one isn't already
1800 				 * running.
1801 				 */
1802 				fmri = safe_strdup(e->rpe_inst);
1803 				rip->ri_queue_thread =  startd_thread_create(
1804 				    restarter_process_events, (void *)fmri);
1805 			} else {
1806 				/*
1807 				 * Signal the existing thread that there's
1808 				 * a new event.
1809 				 */
1810 				(void) pthread_cond_broadcast(
1811 				    &rip->ri_queue_cv);
1812 			}
1813 
1814 			MUTEX_UNLOCK(&rip->ri_queue_lock);
1815 nolookup:
1816 			restarter_event_release(e);
1817 
1818 			MUTEX_LOCK(&ru->restarter_update_lock);
1819 		}
1820 	}
1821 
1822 	/*
1823 	 * Unreachable for now -- there's currently no graceful cleanup
1824 	 * called on exit().
1825 	 */
1826 	(void) scf_handle_unbind(h);
1827 	scf_handle_destroy(h);
1828 	return (NULL);
1829 }
1830 
1831 static restarter_inst_t *
1832 contract_to_inst(ctid_t ctid)
1833 {
1834 	restarter_inst_t *inst;
1835 	int id;
1836 
1837 	id = lookup_inst_by_contract(ctid);
1838 	if (id == -1)
1839 		return (NULL);
1840 
1841 	inst = inst_lookup_by_id(id);
1842 	if (inst != NULL) {
1843 		/*
1844 		 * Since ri_lock isn't held by the contract id lookup, this
1845 		 * instance may have been restarted and now be in a new
1846 		 * contract, making the old contract no longer valid for this
1847 		 * instance.
1848 		 */
1849 		if (ctid != inst->ri_i.i_primary_ctid) {
1850 			MUTEX_UNLOCK(&inst->ri_lock);
1851 			inst = NULL;
1852 		}
1853 	}
1854 	return (inst);
1855 }
1856 
1857 /*
1858  * void contract_action()
1859  *   Take action on contract events.
1860  */
1861 static void
1862 contract_action(scf_handle_t *h, restarter_inst_t *inst, ctid_t id,
1863     uint32_t type)
1864 {
1865 	const char *fmri = inst->ri_i.i_fmri;
1866 
1867 	assert(PTHREAD_MUTEX_HELD(&inst->ri_lock));
1868 
1869 	/*
1870 	 * If startd has stopped this contract, there is no need to
1871 	 * stop it again.
1872 	 */
1873 	if (inst->ri_i.i_primary_ctid > 0 &&
1874 	    inst->ri_i.i_primary_ctid_stopped)
1875 		return;
1876 
1877 	if ((type & (CT_PR_EV_EMPTY | CT_PR_EV_CORE | CT_PR_EV_SIGNAL
1878 	    | CT_PR_EV_HWERR)) == 0) {
1879 		/*
1880 		 * There shouldn't be other events, since that's not how we set
1881 		 * the terms. Thus, just log an error and drive on.
1882 		 */
1883 		log_framework(LOG_NOTICE,
1884 		    "%s: contract %ld received unexpected critical event "
1885 		    "(%d)\n", fmri, id, type);
1886 		    return;
1887 	}
1888 
1889 	assert(instance_in_transition(inst) == 0);
1890 
1891 	if (instance_is_wait_style(inst)) {
1892 		/*
1893 		 * We ignore all events; if they impact the
1894 		 * process we're monitoring, then the
1895 		 * wait_thread will stop the instance.
1896 		 */
1897 		log_framework(LOG_DEBUG,
1898 		    "%s: ignoring contract event on wait-style service\n",
1899 		    fmri);
1900 	} else {
1901 		/*
1902 		 * A CT_PR_EV_EMPTY event is an RSTOP_EXIT request.
1903 		 */
1904 		switch (type) {
1905 		case CT_PR_EV_EMPTY:
1906 			(void) stop_instance(h, inst, RSTOP_EXIT);
1907 			break;
1908 		case CT_PR_EV_CORE:
1909 			(void) stop_instance(h, inst, RSTOP_CORE);
1910 			break;
1911 		case CT_PR_EV_SIGNAL:
1912 			(void) stop_instance(h, inst, RSTOP_SIGNAL);
1913 			break;
1914 		case CT_PR_EV_HWERR:
1915 			(void) stop_instance(h, inst, RSTOP_HWERR);
1916 			break;
1917 		}
1918 	}
1919 }
1920 
1921 /*
1922  * void *restarter_contract_event_thread(void *)
1923  *   Listens to the process contract bundle for critical events, taking action
1924  *   on events from contracts we know we are responsible for.
1925  */
1926 /*ARGSUSED*/
1927 static void *
1928 restarter_contracts_event_thread(void *unused)
1929 {
1930 	int fd, err;
1931 	scf_handle_t *local_handle;
1932 
1933 	/*
1934 	 * Await graph load completion.  That is, stop here, until we've scanned
1935 	 * the repository for contract - instance associations.
1936 	 */
1937 	MUTEX_LOCK(&st->st_load_lock);
1938 	while (!(st->st_load_complete && st->st_load_instances == 0))
1939 		(void) pthread_cond_wait(&st->st_load_cv, &st->st_load_lock);
1940 	MUTEX_UNLOCK(&st->st_load_lock);
1941 
1942 	/*
1943 	 * This is a new thread, and thus, gets its own handle
1944 	 * to the repository.
1945 	 */
1946 	if ((local_handle = libscf_handle_create_bound(SCF_VERSION)) == NULL)
1947 		uu_die("Unable to bind a new repository handle: %s\n",
1948 		    scf_strerror(scf_error()));
1949 
1950 	fd = open64(CTFS_ROOT "/process/pbundle", O_RDONLY);
1951 	if (fd == -1)
1952 		uu_die("process bundle open failed");
1953 
1954 	/*
1955 	 * Make sure we get all events (including those generated by configd
1956 	 * before this thread was started).
1957 	 */
1958 	err = ct_event_reset(fd);
1959 	assert(err == 0);
1960 
1961 	for (;;) {
1962 		int efd, sfd;
1963 		ct_evthdl_t ev;
1964 		uint32_t type;
1965 		ctevid_t evid;
1966 		ct_stathdl_t status;
1967 		ctid_t ctid;
1968 		restarter_inst_t *inst;
1969 		uint64_t cookie;
1970 
1971 		if (err = ct_event_read_critical(fd, &ev)) {
1972 			log_error(LOG_WARNING,
1973 			    "Error reading next contract event: %s",
1974 			    strerror(err));
1975 			continue;
1976 		}
1977 
1978 		evid = ct_event_get_evid(ev);
1979 		ctid = ct_event_get_ctid(ev);
1980 		type = ct_event_get_type(ev);
1981 
1982 		/* Fetch cookie. */
1983 		if ((sfd = contract_open(ctid, "process", "status", O_RDONLY))
1984 		    < 0) {
1985 			ct_event_free(ev);
1986 			continue;
1987 		}
1988 
1989 		if (err = ct_status_read(sfd, CTD_COMMON, &status)) {
1990 			log_framework(LOG_WARNING, "Could not get status for "
1991 			    "contract %ld: %s\n", ctid, strerror(err));
1992 
1993 			startd_close(sfd);
1994 			ct_event_free(ev);
1995 			continue;
1996 		}
1997 
1998 		cookie = ct_status_get_cookie(status);
1999 
2000 		ct_status_free(status);
2001 
2002 		startd_close(sfd);
2003 
2004 		/*
2005 		 * svc.configd(1M) restart handling performed by the
2006 		 * fork_configd_thread.  We don't acknowledge, as that thread
2007 		 * will do so.
2008 		 */
2009 		if (cookie == CONFIGD_COOKIE) {
2010 			ct_event_free(ev);
2011 			continue;
2012 		}
2013 
2014 		inst = contract_to_inst(ctid);
2015 		if (inst == NULL) {
2016 			/*
2017 			 * This can happen if we receive an EMPTY
2018 			 * event for an abandoned contract.
2019 			 */
2020 			log_framework(LOG_DEBUG,
2021 			    "Received event %d for unknown contract id "
2022 			    "%ld\n", type, ctid);
2023 		} else {
2024 			log_framework(LOG_DEBUG,
2025 			    "Received event %d for contract id "
2026 			    "%ld (%s)\n", type, ctid,
2027 			    inst->ri_i.i_fmri);
2028 
2029 			contract_action(local_handle, inst, ctid, type);
2030 
2031 			MUTEX_UNLOCK(&inst->ri_lock);
2032 		}
2033 
2034 		efd = contract_open(ct_event_get_ctid(ev), "process", "ctl",
2035 		    O_WRONLY);
2036 		if (efd != -1) {
2037 			(void) ct_ctl_ack(efd, evid);
2038 			startd_close(efd);
2039 		}
2040 
2041 		ct_event_free(ev);
2042 
2043 	}
2044 
2045 	/*NOTREACHED*/
2046 	return (NULL);
2047 }
2048 
2049 /*
2050  * Timeout queue, processed by restarter_timeouts_event_thread().
2051  */
2052 timeout_queue_t *timeouts;
2053 static uu_list_pool_t *timeout_pool;
2054 
2055 typedef struct timeout_update {
2056 	pthread_mutex_t		tu_lock;
2057 	pthread_cond_t		tu_cv;
2058 	int			tu_wakeup;
2059 } timeout_update_t;
2060 
2061 timeout_update_t *tu;
2062 
2063 static const char *timeout_ovr_svcs[] = {
2064 	"svc:/system/manifest-import:default",
2065 	"svc:/network/initial:default",
2066 	"svc:/network/service:default",
2067 	"svc:/system/rmtmpfiles:default",
2068 	"svc:/network/loopback:default",
2069 	"svc:/network/physical:default",
2070 	"svc:/system/device/local:default",
2071 	"svc:/system/metainit:default",
2072 	"svc:/system/filesystem/usr:default",
2073 	"svc:/system/filesystem/minimal:default",
2074 	"svc:/system/filesystem/local:default",
2075 	NULL
2076 };
2077 
2078 int
2079 is_timeout_ovr(restarter_inst_t *inst)
2080 {
2081 	int i;
2082 
2083 	for (i = 0; timeout_ovr_svcs[i] != NULL; ++i) {
2084 		if (strcmp(inst->ri_i.i_fmri, timeout_ovr_svcs[i]) == 0) {
2085 			log_instance(inst, B_TRUE, "Timeout override by "
2086 			    "svc.startd.  Using infinite timeout");
2087 			return (1);
2088 		}
2089 	}
2090 
2091 	return (0);
2092 }
2093 
2094 /*ARGSUSED*/
2095 static int
2096 timeout_compare(const void *lc_arg, const void *rc_arg, void *private)
2097 {
2098 	hrtime_t t1 = ((const timeout_entry_t *)lc_arg)->te_timeout;
2099 	hrtime_t t2 = ((const timeout_entry_t *)rc_arg)->te_timeout;
2100 
2101 	if (t1 > t2)
2102 		return (1);
2103 	else if (t1 < t2)
2104 		return (-1);
2105 	return (0);
2106 }
2107 
2108 void
2109 timeout_init()
2110 {
2111 	timeouts = startd_zalloc(sizeof (timeout_queue_t));
2112 
2113 	(void) pthread_mutex_init(&timeouts->tq_lock, &mutex_attrs);
2114 
2115 	timeout_pool = startd_list_pool_create("timeouts",
2116 	    sizeof (timeout_entry_t), offsetof(timeout_entry_t, te_link),
2117 	    timeout_compare, UU_LIST_POOL_DEBUG);
2118 	assert(timeout_pool != NULL);
2119 
2120 	timeouts->tq_list = startd_list_create(timeout_pool,
2121 	    timeouts, UU_LIST_SORTED);
2122 	assert(timeouts->tq_list != NULL);
2123 
2124 	tu = startd_zalloc(sizeof (timeout_update_t));
2125 	(void) pthread_cond_init(&tu->tu_cv, NULL);
2126 	(void) pthread_mutex_init(&tu->tu_lock, &mutex_attrs);
2127 }
2128 
2129 void
2130 timeout_insert(restarter_inst_t *inst, ctid_t cid, uint64_t timeout_sec)
2131 {
2132 	hrtime_t now, timeout;
2133 	timeout_entry_t *entry;
2134 	uu_list_index_t idx;
2135 
2136 	assert(PTHREAD_MUTEX_HELD(&inst->ri_lock));
2137 
2138 	now = gethrtime();
2139 
2140 	/*
2141 	 * If we overflow LLONG_MAX, we're never timing out anyways, so
2142 	 * just return.
2143 	 */
2144 	if (timeout_sec >= (LLONG_MAX - now) / 1000000000LL) {
2145 		log_instance(inst, B_TRUE, "timeout_seconds too large, "
2146 		    "treating as infinite.");
2147 		return;
2148 	}
2149 
2150 	/* hrtime is in nanoseconds. Convert timeout_sec. */
2151 	timeout = now + (timeout_sec * 1000000000LL);
2152 
2153 	entry = startd_alloc(sizeof (timeout_entry_t));
2154 	entry->te_timeout = timeout;
2155 	entry->te_ctid = cid;
2156 	entry->te_fmri = safe_strdup(inst->ri_i.i_fmri);
2157 	entry->te_logstem = safe_strdup(inst->ri_logstem);
2158 	entry->te_fired = 0;
2159 	/* Insert the calculated timeout time onto the queue. */
2160 	MUTEX_LOCK(&timeouts->tq_lock);
2161 	(void) uu_list_find(timeouts->tq_list, entry, NULL, &idx);
2162 	uu_list_node_init(entry, &entry->te_link, timeout_pool);
2163 	uu_list_insert(timeouts->tq_list, entry, idx);
2164 	MUTEX_UNLOCK(&timeouts->tq_lock);
2165 
2166 	assert(inst->ri_timeout == NULL);
2167 	inst->ri_timeout = entry;
2168 
2169 	MUTEX_LOCK(&tu->tu_lock);
2170 	tu->tu_wakeup = 1;
2171 	(void) pthread_cond_broadcast(&tu->tu_cv);
2172 	MUTEX_UNLOCK(&tu->tu_lock);
2173 }
2174 
2175 
2176 void
2177 timeout_remove(restarter_inst_t *inst, ctid_t cid)
2178 {
2179 	assert(PTHREAD_MUTEX_HELD(&inst->ri_lock));
2180 
2181 	if (inst->ri_timeout == NULL)
2182 		return;
2183 
2184 	assert(inst->ri_timeout->te_ctid == cid);
2185 
2186 	MUTEX_LOCK(&timeouts->tq_lock);
2187 	uu_list_remove(timeouts->tq_list, inst->ri_timeout);
2188 	MUTEX_UNLOCK(&timeouts->tq_lock);
2189 
2190 	free(inst->ri_timeout->te_fmri);
2191 	free(inst->ri_timeout->te_logstem);
2192 	startd_free(inst->ri_timeout, sizeof (timeout_entry_t));
2193 	inst->ri_timeout = NULL;
2194 }
2195 
2196 static int
2197 timeout_now()
2198 {
2199 	timeout_entry_t *e;
2200 	hrtime_t now;
2201 	int ret;
2202 
2203 	now = gethrtime();
2204 
2205 	/*
2206 	 * Walk through the (sorted) timeouts list.  While the timeout
2207 	 * at the head of the list is <= the current time, kill the
2208 	 * method.
2209 	 */
2210 	MUTEX_LOCK(&timeouts->tq_lock);
2211 
2212 	for (e = uu_list_first(timeouts->tq_list);
2213 	    e != NULL && e->te_timeout <= now;
2214 	    e = uu_list_next(timeouts->tq_list, e)) {
2215 		log_framework(LOG_WARNING, "%s: Method or service exit timed "
2216 		    "out.  Killing contract %ld.\n", e->te_fmri, e->te_ctid);
2217 		log_instance_fmri(e->te_fmri, e->te_logstem, B_TRUE,
2218 		    "Method or service exit timed out.  Killing contract %ld",
2219 		    e->te_ctid);
2220 		e->te_fired = 1;
2221 		(void) contract_kill(e->te_ctid, SIGKILL, e->te_fmri);
2222 	}
2223 
2224 	if (uu_list_numnodes(timeouts->tq_list) > 0)
2225 		ret = 0;
2226 	else
2227 		ret = -1;
2228 
2229 	MUTEX_UNLOCK(&timeouts->tq_lock);
2230 
2231 	return (ret);
2232 }
2233 
2234 /*
2235  * void *restarter_timeouts_event_thread(void *)
2236  *   Responsible for monitoring the method timeouts.  This thread must
2237  *   be started before any methods are called.
2238  */
2239 /*ARGSUSED*/
2240 static void *
2241 restarter_timeouts_event_thread(void *unused)
2242 {
2243 	/*
2244 	 * Timeouts are entered on a priority queue, which is processed by
2245 	 * this thread.  As timeouts are specified in seconds, we'll do
2246 	 * the necessary processing every second, as long as the queue
2247 	 * is not empty.
2248 	 */
2249 
2250 	/*CONSTCOND*/
2251 	while (1) {
2252 		/*
2253 		 * As long as the timeout list isn't empty, process it
2254 		 * every second.
2255 		 */
2256 		if (timeout_now() == 0) {
2257 			(void) sleep(1);
2258 			continue;
2259 		}
2260 
2261 		/* The list is empty, wait until we have more timeouts. */
2262 		MUTEX_LOCK(&tu->tu_lock);
2263 
2264 		while (tu->tu_wakeup == 0)
2265 			(void) pthread_cond_wait(&tu->tu_cv, &tu->tu_lock);
2266 
2267 		tu->tu_wakeup = 0;
2268 		MUTEX_UNLOCK(&tu->tu_lock);
2269 	}
2270 
2271 	return (NULL);
2272 }
2273 
2274 void
2275 restarter_start()
2276 {
2277 	(void) startd_thread_create(restarter_timeouts_event_thread, NULL);
2278 	(void) startd_thread_create(restarter_event_thread, NULL);
2279 	(void) startd_thread_create(restarter_contracts_event_thread, NULL);
2280 	(void) startd_thread_create(wait_thread, NULL);
2281 }
2282 
2283 
2284 void
2285 restarter_init()
2286 {
2287 	restarter_instance_pool = startd_list_pool_create("restarter_instances",
2288 	    sizeof (restarter_inst_t), offsetof(restarter_inst_t,
2289 		ri_link), restarter_instance_compare, UU_LIST_POOL_DEBUG);
2290 	(void) memset(&instance_list, 0, sizeof (instance_list));
2291 
2292 	(void) pthread_mutex_init(&instance_list.ril_lock, &mutex_attrs);
2293 	instance_list.ril_instance_list = startd_list_create(
2294 	    restarter_instance_pool, &instance_list, UU_LIST_SORTED);
2295 
2296 	restarter_queue_pool = startd_list_pool_create(
2297 	    "restarter_instance_queue", sizeof (restarter_instance_qentry_t),
2298 	    offsetof(restarter_instance_qentry_t,  riq_link), NULL,
2299 	    UU_LIST_POOL_DEBUG);
2300 
2301 	contract_list_pool = startd_list_pool_create(
2302 	    "contract_list", sizeof (contract_entry_t),
2303 	    offsetof(contract_entry_t,  ce_link), NULL,
2304 	    UU_LIST_POOL_DEBUG);
2305 	contract_hash_init();
2306 
2307 	log_framework(LOG_DEBUG, "Initialized restarter\n");
2308 }
2309