xref: /illumos-gate/usr/src/cmd/svc/startd/restarter.c (revision d6beba26494f4877120c99b5931876f56ba5dee5)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Copyright 2019 Joyent, Inc.
25  */
26 
27 /*
28  * restarter.c - service manipulation
29  *
30  * This component manages services whose restarter is svc.startd, the standard
31  * restarter.  It translates restarter protocol events from the graph engine
32  * into actions on processes, as a delegated restarter would do.
33  *
34  * The master restarter manages a number of always-running threads:
35  *   - restarter event thread: events from the graph engine
36  *   - timeout thread: thread to fire queued timeouts
37  *   - contract thread: thread to handle contract events
38  *   - wait thread: thread to handle wait-based services
39  *
40  * The other threads are created as-needed:
41  *   - per-instance method threads
42  *   - per-instance event processing threads
43  *
44  * The interaction of all threads must result in the following conditions
45  * being satisfied (on a per-instance basis):
46  *   - restarter events must be processed in order
47  *   - method execution must be serialized
48  *   - instance delete must be held until outstanding methods are complete
49  *   - contract events shouldn't be processed while a method is running
50  *   - timeouts should fire even when a method is running
51  *
52  * Service instances are represented by restarter_inst_t's and are kept in the
53  * instance_list list.
54  *
55  * Service States
56  *   The current state of a service instance is kept in
57  *   restarter_inst_t->ri_i.i_state.  If transition to a new state could take
58  *   some time, then before we effect the transition we set
59  *   restarter_inst_t->ri_i.i_next_state to the target state, and afterwards we
60  *   rotate i_next_state to i_state and set i_next_state to
61  *   RESTARTER_STATE_NONE.  So usually i_next_state is _NONE when ri_lock is not
62  *   held.  The exception is when we launch methods, which are done with
63  *   a separate thread.  To keep any other threads from grabbing ri_lock before
64  *   method_thread() does, we set ri_method_thread to the thread id of the
65  *   method thread, and when it is nonzero any thread with a different thread id
66  *   waits on ri_method_cv.
67  *
68  * Method execution is serialized by blocking on ri_method_cv in
69  * inst_lookup_by_id() and waiting for a 0 value of ri_method_thread.  This
70  * also prevents the instance structure from being deleted until all
71  * outstanding operations such as method_thread() have finished.
72  *
73  * Lock ordering:
74  *
75  * dgraph_lock [can be held when taking:]
76  *   utmpx_lock
77  *   dictionary->dict_lock
78  *   st->st_load_lock
79  *   wait_info_lock
80  *   ru->restarter_update_lock
81  *     restarter_queue->rpeq_lock
82  *   instance_list.ril_lock
83  *     inst->ri_lock
84  *   st->st_configd_live_lock
85  *
86  * instance_list.ril_lock
87  *   graph_queue->gpeq_lock
88  *   gu->gu_lock
89  *   st->st_configd_live_lock
90  *   dictionary->dict_lock
91  *   inst->ri_lock
92  *     graph_queue->gpeq_lock
93  *     gu->gu_lock
94  *     tu->tu_lock
95  *     tq->tq_lock
96  *     inst->ri_queue_lock
97  *       wait_info_lock
98  *       bp->cb_lock
99  *     utmpx_lock
100  *
101  * single_user_thread_lock
102  *   wait_info_lock
103  *   utmpx_lock
104  *
105  * gu_freeze_lock
106  *
107  * logbuf_mutex nests inside pretty much everything.
108  */
109 
110 #include <sys/contract/process.h>
111 #include <sys/ctfs.h>
112 #include <sys/stat.h>
113 #include <sys/time.h>
114 #include <sys/types.h>
115 #include <sys/uio.h>
116 #include <sys/wait.h>
117 #include <assert.h>
118 #include <errno.h>
119 #include <fcntl.h>
120 #include <libcontract.h>
121 #include <libcontract_priv.h>
122 #include <libintl.h>
123 #include <librestart.h>
124 #include <librestart_priv.h>
125 #include <libuutil.h>
126 #include <limits.h>
127 #include <poll.h>
128 #include <port.h>
129 #include <pthread.h>
130 #include <stdarg.h>
131 #include <stdio.h>
132 #include <strings.h>
133 #include <unistd.h>
134 
135 #include "startd.h"
136 #include "protocol.h"
137 
138 static uu_list_pool_t *restarter_instance_pool;
139 static restarter_instance_list_t instance_list;
140 
141 static uu_list_pool_t *restarter_queue_pool;
142 
143 #define	WT_SVC_ERR_THROTTLE	1	/* 1 sec delay for erroring wait svc */
144 
145 /*
146  * Function used to reset the restart times for an instance, when
147  * an administrative task comes along and essentially makes the times
148  * in this array ineffective.
149  */
150 static void
151 reset_start_times(restarter_inst_t *inst)
152 {
153 	inst->ri_start_index = 0;
154 	bzero(inst->ri_start_time, sizeof (inst->ri_start_time));
155 }
156 
157 /*ARGSUSED*/
158 static int
159 restarter_instance_compare(const void *lc_arg, const void *rc_arg,
160     void *private)
161 {
162 	int lc_id = ((const restarter_inst_t *)lc_arg)->ri_id;
163 	int rc_id = *(int *)rc_arg;
164 
165 	if (lc_id > rc_id)
166 		return (1);
167 	if (lc_id < rc_id)
168 		return (-1);
169 	return (0);
170 }
171 
172 static restarter_inst_t *
173 inst_lookup_by_name(const char *name)
174 {
175 	int id;
176 
177 	id = dict_lookup_byname(name);
178 	if (id == -1)
179 		return (NULL);
180 
181 	return (inst_lookup_by_id(id));
182 }
183 
184 restarter_inst_t *
185 inst_lookup_by_id(int id)
186 {
187 	restarter_inst_t *inst;
188 
189 	MUTEX_LOCK(&instance_list.ril_lock);
190 	inst = uu_list_find(instance_list.ril_instance_list, &id, NULL, NULL);
191 	if (inst != NULL)
192 		MUTEX_LOCK(&inst->ri_lock);
193 	MUTEX_UNLOCK(&instance_list.ril_lock);
194 
195 	if (inst != NULL) {
196 		while (inst->ri_method_thread != 0 &&
197 		    !pthread_equal(inst->ri_method_thread, pthread_self())) {
198 			++inst->ri_method_waiters;
199 			(void) pthread_cond_wait(&inst->ri_method_cv,
200 			    &inst->ri_lock);
201 			assert(inst->ri_method_waiters > 0);
202 			--inst->ri_method_waiters;
203 		}
204 	}
205 
206 	return (inst);
207 }
208 
209 static restarter_inst_t *
210 inst_lookup_queue(const char *name)
211 {
212 	int id;
213 	restarter_inst_t *inst;
214 
215 	id = dict_lookup_byname(name);
216 	if (id == -1)
217 		return (NULL);
218 
219 	MUTEX_LOCK(&instance_list.ril_lock);
220 	inst = uu_list_find(instance_list.ril_instance_list, &id, NULL, NULL);
221 	if (inst != NULL)
222 		MUTEX_LOCK(&inst->ri_queue_lock);
223 	MUTEX_UNLOCK(&instance_list.ril_lock);
224 
225 	return (inst);
226 }
227 
228 const char *
229 service_style(int flags)
230 {
231 	switch (flags & RINST_STYLE_MASK) {
232 	case RINST_CONTRACT:	return ("contract");
233 	case RINST_TRANSIENT:	return ("transient");
234 	case RINST_WAIT:	return ("wait");
235 
236 	default:
237 #ifndef NDEBUG
238 		uu_warn("%s:%d: Bad flags 0x%x.\n", __FILE__, __LINE__, flags);
239 #endif
240 		abort();
241 		/* NOTREACHED */
242 	}
243 }
244 
245 /*
246  * Fails with ECONNABORTED or ECANCELED.
247  */
248 static int
249 check_contract(restarter_inst_t *inst, boolean_t primary,
250     scf_instance_t *scf_inst)
251 {
252 	ctid_t *ctidp;
253 	int fd, r;
254 
255 	ctidp = primary ? &inst->ri_i.i_primary_ctid :
256 	    &inst->ri_i.i_transient_ctid;
257 
258 	assert(*ctidp >= 1);
259 
260 	fd = contract_open(*ctidp, NULL, "status", O_RDONLY);
261 	if (fd >= 0) {
262 		r = close(fd);
263 		assert(r == 0);
264 		return (0);
265 	}
266 
267 	r = restarter_remove_contract(scf_inst, *ctidp, primary ?
268 	    RESTARTER_CONTRACT_PRIMARY : RESTARTER_CONTRACT_TRANSIENT);
269 	switch (r) {
270 	case 0:
271 	case ECONNABORTED:
272 	case ECANCELED:
273 		*ctidp = 0;
274 		return (r);
275 
276 	case ENOMEM:
277 		uu_die("Out of memory\n");
278 		/* NOTREACHED */
279 
280 	case EPERM:
281 		uu_die("Insufficient privilege.\n");
282 		/* NOTREACHED */
283 
284 	case EACCES:
285 		uu_die("Repository backend access denied.\n");
286 		/* NOTREACHED */
287 
288 	case EROFS:
289 		log_error(LOG_INFO, "Could not remove unusable contract id %ld "
290 		    "for %s from repository.\n", *ctidp, inst->ri_i.i_fmri);
291 		return (0);
292 
293 	case EINVAL:
294 	case EBADF:
295 	default:
296 		assert(0);
297 		abort();
298 		/* NOTREACHED */
299 	}
300 }
301 
302 static int stop_instance(scf_handle_t *, restarter_inst_t *, stop_cause_t);
303 
304 /*
305  * int restarter_insert_inst(scf_handle_t *, char *)
306  *   If the inst is already in the restarter list, return its id.  If the inst
307  *   is not in the restarter list, initialize a restarter_inst_t, initialize its
308  *   states, insert it into the list, and return 0.
309  *
310  *   Fails with
311  *     ENOENT - name is not in the repository
312  */
313 static int
314 restarter_insert_inst(scf_handle_t *h, const char *name)
315 {
316 	int id, r;
317 	restarter_inst_t *inst;
318 	uu_list_index_t idx;
319 	scf_service_t *scf_svc;
320 	scf_instance_t *scf_inst;
321 	scf_snapshot_t *snap = NULL;
322 	scf_propertygroup_t *pg;
323 	char *svc_name, *inst_name;
324 	char logfilebuf[PATH_MAX];
325 	char *c;
326 	boolean_t do_commit_states;
327 	restarter_instance_state_t state, next_state;
328 	protocol_states_t *ps;
329 	pid_t start_pid;
330 	restarter_str_t reason = restarter_str_insert_in_graph;
331 
332 	MUTEX_LOCK(&instance_list.ril_lock);
333 
334 	/*
335 	 * We don't use inst_lookup_by_name() here because we want the lookup
336 	 * & insert to be atomic.
337 	 */
338 	id = dict_lookup_byname(name);
339 	if (id != -1) {
340 		inst = uu_list_find(instance_list.ril_instance_list, &id, NULL,
341 		    &idx);
342 		if (inst != NULL) {
343 			MUTEX_UNLOCK(&instance_list.ril_lock);
344 			return (0);
345 		}
346 	}
347 
348 	/* Allocate an instance */
349 	inst = startd_zalloc(sizeof (restarter_inst_t));
350 	inst->ri_utmpx_prefix = startd_alloc(max_scf_value_size);
351 	inst->ri_utmpx_prefix[0] = '\0';
352 
353 	inst->ri_i.i_fmri = startd_alloc(strlen(name) + 1);
354 	(void) strcpy((char *)inst->ri_i.i_fmri, name);
355 
356 	inst->ri_queue = startd_list_create(restarter_queue_pool, inst, 0);
357 
358 	/*
359 	 * id shouldn't be -1 since we use the same dictionary as graph.c, but
360 	 * just in case.
361 	 */
362 	inst->ri_id = (id != -1 ? id : dict_insert(name));
363 
364 	special_online_hooks_get(name, &inst->ri_pre_online_hook,
365 	    &inst->ri_post_online_hook, &inst->ri_post_offline_hook);
366 
367 	scf_svc = safe_scf_service_create(h);
368 	scf_inst = safe_scf_instance_create(h);
369 	pg = safe_scf_pg_create(h);
370 	svc_name = startd_alloc(max_scf_name_size);
371 	inst_name = startd_alloc(max_scf_name_size);
372 
373 rep_retry:
374 	if (snap != NULL)
375 		scf_snapshot_destroy(snap);
376 	if (inst->ri_logstem != NULL)
377 		startd_free(inst->ri_logstem, PATH_MAX);
378 	if (inst->ri_common_name != NULL)
379 		free(inst->ri_common_name);
380 	if (inst->ri_C_common_name != NULL)
381 		free(inst->ri_C_common_name);
382 	snap = NULL;
383 	inst->ri_logstem = NULL;
384 	inst->ri_common_name = NULL;
385 	inst->ri_C_common_name = NULL;
386 
387 	if (scf_handle_decode_fmri(h, name, NULL, scf_svc, scf_inst, NULL,
388 	    NULL, SCF_DECODE_FMRI_EXACT) != 0) {
389 		switch (scf_error()) {
390 		case SCF_ERROR_CONNECTION_BROKEN:
391 			libscf_handle_rebind(h);
392 			goto rep_retry;
393 
394 		case SCF_ERROR_NOT_FOUND:
395 			goto deleted;
396 		}
397 
398 		uu_die("Can't decode FMRI %s: %s\n", name,
399 		    scf_strerror(scf_error()));
400 	}
401 
402 	/*
403 	 * If there's no running snapshot, then we execute using the editing
404 	 * snapshot.  Pending snapshots will be taken later.
405 	 */
406 	snap = libscf_get_running_snapshot(scf_inst);
407 
408 	if ((scf_service_get_name(scf_svc, svc_name, max_scf_name_size) < 0) ||
409 	    (scf_instance_get_name(scf_inst, inst_name, max_scf_name_size) <
410 	    0)) {
411 		switch (scf_error()) {
412 		case SCF_ERROR_NOT_SET:
413 			break;
414 
415 		case SCF_ERROR_CONNECTION_BROKEN:
416 			libscf_handle_rebind(h);
417 			goto rep_retry;
418 
419 		default:
420 			assert(0);
421 			abort();
422 		}
423 
424 		goto deleted;
425 	}
426 
427 	(void) snprintf(logfilebuf, PATH_MAX, "%s:%s", svc_name, inst_name);
428 	for (c = logfilebuf; *c != '\0'; c++)
429 		if (*c == '/')
430 			*c = '-';
431 
432 	inst->ri_logstem = startd_alloc(PATH_MAX);
433 	(void) snprintf(inst->ri_logstem, PATH_MAX, "%s%s", logfilebuf,
434 	    LOG_SUFFIX);
435 
436 	/*
437 	 * If the restarter group is missing, use uninit/none.  Otherwise,
438 	 * we're probably being restarted & don't want to mess up the states
439 	 * that are there.
440 	 */
441 	state = RESTARTER_STATE_UNINIT;
442 	next_state = RESTARTER_STATE_NONE;
443 
444 	r = scf_instance_get_pg(scf_inst, SCF_PG_RESTARTER, pg);
445 	if (r != 0) {
446 		switch (scf_error()) {
447 		case SCF_ERROR_CONNECTION_BROKEN:
448 			libscf_handle_rebind(h);
449 			goto rep_retry;
450 
451 		case SCF_ERROR_NOT_SET:
452 			goto deleted;
453 
454 		case SCF_ERROR_NOT_FOUND:
455 			/*
456 			 * This shouldn't happen since the graph engine should
457 			 * have initialized the state to uninitialized/none if
458 			 * there was no restarter pg.  In case somebody
459 			 * deleted it, though....
460 			 */
461 			do_commit_states = B_TRUE;
462 			break;
463 
464 		default:
465 			assert(0);
466 			abort();
467 		}
468 	} else {
469 		r = libscf_read_states(pg, &state, &next_state);
470 		if (r != 0) {
471 			do_commit_states = B_TRUE;
472 		} else {
473 			if (next_state != RESTARTER_STATE_NONE) {
474 				/*
475 				 * Force next_state to _NONE since we
476 				 * don't look for method processes.
477 				 */
478 				next_state = RESTARTER_STATE_NONE;
479 				do_commit_states = B_TRUE;
480 			} else {
481 				/*
482 				 * The reason for transition will depend on
483 				 * state.
484 				 */
485 				if (st->st_initial == 0)
486 					reason = restarter_str_startd_restart;
487 				else if (state == RESTARTER_STATE_MAINT)
488 					reason = restarter_str_bad_repo_state;
489 				/*
490 				 * Inform the restarter of our state without
491 				 * changing the STIME in the repository.
492 				 */
493 				ps = startd_alloc(sizeof (*ps));
494 				inst->ri_i.i_state = ps->ps_state = state;
495 				inst->ri_i.i_next_state = ps->ps_state_next =
496 				    next_state;
497 				ps->ps_reason = reason;
498 
499 				graph_protocol_send_event(inst->ri_i.i_fmri,
500 				    GRAPH_UPDATE_STATE_CHANGE, ps);
501 
502 				do_commit_states = B_FALSE;
503 			}
504 		}
505 	}
506 
507 	switch (libscf_get_startd_properties(scf_inst, snap, &inst->ri_flags,
508 	    &inst->ri_utmpx_prefix)) {
509 	case 0:
510 		break;
511 
512 	case ECONNABORTED:
513 		libscf_handle_rebind(h);
514 		goto rep_retry;
515 
516 	case ECANCELED:
517 		goto deleted;
518 
519 	case ENOENT:
520 		/*
521 		 * This is odd, because the graph engine should have required
522 		 * the general property group.  So we'll just use default
523 		 * flags in anticipation of the graph engine sending us
524 		 * REMOVE_INSTANCE when it finds out that the general property
525 		 * group has been deleted.
526 		 */
527 		inst->ri_flags = RINST_CONTRACT;
528 		break;
529 
530 	default:
531 		assert(0);
532 		abort();
533 	}
534 
535 	r = libscf_get_template_values(scf_inst, snap,
536 	    &inst->ri_common_name, &inst->ri_C_common_name);
537 
538 	/*
539 	 * Copy our names to smaller buffers to reduce our memory footprint.
540 	 */
541 	if (inst->ri_common_name != NULL) {
542 		char *tmp = safe_strdup(inst->ri_common_name);
543 		startd_free(inst->ri_common_name, max_scf_value_size);
544 		inst->ri_common_name = tmp;
545 	}
546 
547 	if (inst->ri_C_common_name != NULL) {
548 		char *tmp = safe_strdup(inst->ri_C_common_name);
549 		startd_free(inst->ri_C_common_name, max_scf_value_size);
550 		inst->ri_C_common_name = tmp;
551 	}
552 
553 	switch (r) {
554 	case 0:
555 		break;
556 
557 	case ECONNABORTED:
558 		libscf_handle_rebind(h);
559 		goto rep_retry;
560 
561 	case ECANCELED:
562 		goto deleted;
563 
564 	case ECHILD:
565 	case ENOENT:
566 		break;
567 
568 	default:
569 		assert(0);
570 		abort();
571 	}
572 
573 	switch (libscf_read_method_ids(h, scf_inst, inst->ri_i.i_fmri,
574 	    &inst->ri_i.i_primary_ctid, &inst->ri_i.i_transient_ctid,
575 	    &start_pid)) {
576 	case 0:
577 		break;
578 
579 	case ECONNABORTED:
580 		libscf_handle_rebind(h);
581 		goto rep_retry;
582 
583 	case ECANCELED:
584 		goto deleted;
585 
586 	default:
587 		assert(0);
588 		abort();
589 	}
590 
591 	if (inst->ri_i.i_primary_ctid >= 1) {
592 		contract_hash_store(inst->ri_i.i_primary_ctid, inst->ri_id);
593 
594 		switch (check_contract(inst, B_TRUE, scf_inst)) {
595 		case 0:
596 			break;
597 
598 		case ECONNABORTED:
599 			libscf_handle_rebind(h);
600 			goto rep_retry;
601 
602 		case ECANCELED:
603 			goto deleted;
604 
605 		default:
606 			assert(0);
607 			abort();
608 		}
609 	}
610 
611 	if (inst->ri_i.i_transient_ctid >= 1) {
612 		switch (check_contract(inst, B_FALSE, scf_inst)) {
613 		case 0:
614 			break;
615 
616 		case ECONNABORTED:
617 			libscf_handle_rebind(h);
618 			goto rep_retry;
619 
620 		case ECANCELED:
621 			goto deleted;
622 
623 		default:
624 			assert(0);
625 			abort();
626 		}
627 	}
628 
629 	/* No more failures we live through, so add it to the list. */
630 	(void) pthread_mutex_init(&inst->ri_lock, &mutex_attrs);
631 	(void) pthread_mutex_init(&inst->ri_queue_lock, &mutex_attrs);
632 	MUTEX_LOCK(&inst->ri_lock);
633 	MUTEX_LOCK(&inst->ri_queue_lock);
634 
635 	(void) pthread_cond_init(&inst->ri_method_cv, NULL);
636 
637 	uu_list_node_init(inst, &inst->ri_link, restarter_instance_pool);
638 	uu_list_insert(instance_list.ril_instance_list, inst, idx);
639 	MUTEX_UNLOCK(&instance_list.ril_lock);
640 
641 	if (start_pid != -1 &&
642 	    (inst->ri_flags & RINST_STYLE_MASK) == RINST_WAIT) {
643 		int ret;
644 		ret = wait_register(start_pid, inst->ri_i.i_fmri, 0, 1);
645 		if (ret == -1) {
646 			/*
647 			 * Implication:  if we can't reregister the
648 			 * instance, we will start another one.  Two
649 			 * instances may or may not result in a resource
650 			 * conflict.
651 			 */
652 			log_error(LOG_WARNING,
653 			    "%s: couldn't reregister %ld for wait\n",
654 			    inst->ri_i.i_fmri, start_pid);
655 		} else if (ret == 1) {
656 			/*
657 			 * Leading PID has exited.
658 			 */
659 			(void) stop_instance(h, inst, RSTOP_EXIT);
660 		}
661 	}
662 
663 
664 	scf_pg_destroy(pg);
665 
666 	if (do_commit_states)
667 		(void) restarter_instance_update_states(h, inst, state,
668 		    next_state, RERR_NONE, reason);
669 
670 	log_framework(LOG_DEBUG, "%s is a %s-style service\n", name,
671 	    service_style(inst->ri_flags));
672 
673 	MUTEX_UNLOCK(&inst->ri_queue_lock);
674 	MUTEX_UNLOCK(&inst->ri_lock);
675 
676 	startd_free(svc_name, max_scf_name_size);
677 	startd_free(inst_name, max_scf_name_size);
678 	scf_snapshot_destroy(snap);
679 	scf_instance_destroy(scf_inst);
680 	scf_service_destroy(scf_svc);
681 
682 	log_framework(LOG_DEBUG, "%s: inserted instance into restarter list\n",
683 	    name);
684 
685 	return (0);
686 
687 deleted:
688 	MUTEX_UNLOCK(&instance_list.ril_lock);
689 	startd_free(inst_name, max_scf_name_size);
690 	startd_free(svc_name, max_scf_name_size);
691 	if (snap != NULL)
692 		scf_snapshot_destroy(snap);
693 	scf_pg_destroy(pg);
694 	scf_instance_destroy(scf_inst);
695 	scf_service_destroy(scf_svc);
696 	startd_free((void *)inst->ri_i.i_fmri, strlen(inst->ri_i.i_fmri) + 1);
697 	uu_list_destroy(inst->ri_queue);
698 	if (inst->ri_logstem != NULL)
699 		startd_free(inst->ri_logstem, PATH_MAX);
700 	if (inst->ri_common_name != NULL)
701 		free(inst->ri_common_name);
702 	if (inst->ri_C_common_name != NULL)
703 		free(inst->ri_C_common_name);
704 	startd_free(inst->ri_utmpx_prefix, max_scf_value_size);
705 	startd_free(inst, sizeof (restarter_inst_t));
706 	return (ENOENT);
707 }
708 
709 static void
710 restarter_delete_inst(restarter_inst_t *ri)
711 {
712 	int id;
713 	restarter_inst_t *rip;
714 	void *cookie = NULL;
715 	restarter_instance_qentry_t *e;
716 
717 	assert(MUTEX_HELD(&ri->ri_lock));
718 
719 	/*
720 	 * Must drop the instance lock so we can pick up the instance_list
721 	 * lock & remove the instance.
722 	 */
723 	id = ri->ri_id;
724 	MUTEX_UNLOCK(&ri->ri_lock);
725 
726 	MUTEX_LOCK(&instance_list.ril_lock);
727 
728 	rip = uu_list_find(instance_list.ril_instance_list, &id, NULL, NULL);
729 	if (rip == NULL) {
730 		MUTEX_UNLOCK(&instance_list.ril_lock);
731 		return;
732 	}
733 
734 	assert(ri == rip);
735 
736 	uu_list_remove(instance_list.ril_instance_list, ri);
737 
738 	log_framework(LOG_DEBUG, "%s: deleted instance from restarter list\n",
739 	    ri->ri_i.i_fmri);
740 
741 	MUTEX_UNLOCK(&instance_list.ril_lock);
742 
743 	/*
744 	 * We can lock the instance without holding the instance_list lock
745 	 * since we removed the instance from the list.
746 	 */
747 	MUTEX_LOCK(&ri->ri_lock);
748 	MUTEX_LOCK(&ri->ri_queue_lock);
749 
750 	if (ri->ri_i.i_primary_ctid >= 1)
751 		contract_hash_remove(ri->ri_i.i_primary_ctid);
752 
753 	while (ri->ri_method_thread != 0 || ri->ri_method_waiters > 0)
754 		(void) pthread_cond_wait(&ri->ri_method_cv, &ri->ri_lock);
755 
756 	while ((e = uu_list_teardown(ri->ri_queue, &cookie)) != NULL)
757 		startd_free(e, sizeof (*e));
758 	uu_list_destroy(ri->ri_queue);
759 
760 	startd_free((void *)ri->ri_i.i_fmri, strlen(ri->ri_i.i_fmri) + 1);
761 	startd_free(ri->ri_logstem, PATH_MAX);
762 	if (ri->ri_common_name != NULL)
763 		free(ri->ri_common_name);
764 	if (ri->ri_C_common_name != NULL)
765 		free(ri->ri_C_common_name);
766 	startd_free(ri->ri_utmpx_prefix, max_scf_value_size);
767 	(void) pthread_mutex_destroy(&ri->ri_lock);
768 	(void) pthread_mutex_destroy(&ri->ri_queue_lock);
769 	startd_free(ri, sizeof (restarter_inst_t));
770 }
771 
772 /*
773  * instance_is_wait_style()
774  *
775  *   Returns 1 if the given instance is a "wait-style" service instance.
776  */
777 int
778 instance_is_wait_style(restarter_inst_t *inst)
779 {
780 	assert(MUTEX_HELD(&inst->ri_lock));
781 	return ((inst->ri_flags & RINST_STYLE_MASK) == RINST_WAIT);
782 }
783 
784 /*
785  * instance_is_transient_style()
786  *
787  *   Returns 1 if the given instance is a transient service instance.
788  */
789 int
790 instance_is_transient_style(restarter_inst_t *inst)
791 {
792 	assert(MUTEX_HELD(&inst->ri_lock));
793 	return ((inst->ri_flags & RINST_STYLE_MASK) == RINST_TRANSIENT);
794 }
795 
796 /*
797  * instance_in_transition()
798  * Returns 1 if instance is in transition, 0 if not
799  */
800 int
801 instance_in_transition(restarter_inst_t *inst)
802 {
803 	assert(MUTEX_HELD(&inst->ri_lock));
804 	if (inst->ri_i.i_next_state == RESTARTER_STATE_NONE)
805 		return (0);
806 	return (1);
807 }
808 
809 /*
810  * returns 1 if instance is already started, 0 if not
811  */
812 static int
813 instance_started(restarter_inst_t *inst)
814 {
815 	int ret;
816 
817 	assert(MUTEX_HELD(&inst->ri_lock));
818 
819 	if (inst->ri_i.i_state == RESTARTER_STATE_ONLINE ||
820 	    inst->ri_i.i_state == RESTARTER_STATE_DEGRADED)
821 		ret = 1;
822 	else
823 		ret = 0;
824 
825 	return (ret);
826 }
827 
828 /*
829  * Returns
830  *   0 - success
831  *   ECONNRESET - success, but h was rebound
832  */
833 int
834 restarter_instance_update_states(scf_handle_t *h, restarter_inst_t *ri,
835     restarter_instance_state_t new_state,
836     restarter_instance_state_t new_state_next, restarter_error_t err,
837     restarter_str_t reason)
838 {
839 	protocol_states_t *states;
840 	int e;
841 	uint_t retry_count = 0, msecs = ALLOC_DELAY;
842 	boolean_t rebound = B_FALSE;
843 	int prev_state_online;
844 	int state_online;
845 
846 	assert(MUTEX_HELD(&ri->ri_lock));
847 
848 	prev_state_online = instance_started(ri);
849 
850 retry:
851 	e = _restarter_commit_states(h, &ri->ri_i, new_state, new_state_next,
852 	    restarter_get_str_short(reason));
853 	switch (e) {
854 	case 0:
855 		break;
856 
857 	case ENOMEM:
858 		++retry_count;
859 		if (retry_count < ALLOC_RETRY) {
860 			(void) poll(NULL, 0, msecs);
861 			msecs *= ALLOC_DELAY_MULT;
862 			goto retry;
863 		}
864 
865 		/* Like startd_alloc(). */
866 		uu_die("Insufficient memory.\n");
867 		/* NOTREACHED */
868 
869 	case ECONNABORTED:
870 		libscf_handle_rebind(h);
871 		rebound = B_TRUE;
872 		goto retry;
873 
874 	case EPERM:
875 	case EACCES:
876 	case EROFS:
877 		log_error(LOG_NOTICE, "Could not commit state change for %s "
878 		    "to repository: %s.\n", ri->ri_i.i_fmri, strerror(e));
879 		/* FALLTHROUGH */
880 
881 	case ENOENT:
882 		ri->ri_i.i_state = new_state;
883 		ri->ri_i.i_next_state = new_state_next;
884 		break;
885 
886 	case EINVAL:
887 	default:
888 		bad_error("_restarter_commit_states", e);
889 	}
890 
891 	states = startd_alloc(sizeof (protocol_states_t));
892 	states->ps_state = new_state;
893 	states->ps_state_next = new_state_next;
894 	states->ps_err = err;
895 	states->ps_reason = reason;
896 	graph_protocol_send_event(ri->ri_i.i_fmri, GRAPH_UPDATE_STATE_CHANGE,
897 	    (void *)states);
898 
899 	state_online = instance_started(ri);
900 
901 	if (prev_state_online && !state_online)
902 		ri->ri_post_offline_hook();
903 	else if (!prev_state_online && state_online)
904 		ri->ri_post_online_hook();
905 
906 	return (rebound ? ECONNRESET : 0);
907 }
908 
909 void
910 restarter_mark_pending_snapshot(const char *fmri, uint_t flag)
911 {
912 	restarter_inst_t *inst;
913 
914 	assert(flag == RINST_RETAKE_RUNNING || flag == RINST_RETAKE_START);
915 
916 	inst = inst_lookup_by_name(fmri);
917 	if (inst == NULL)
918 		return;
919 
920 	inst->ri_flags |= flag;
921 
922 	MUTEX_UNLOCK(&inst->ri_lock);
923 }
924 
925 static void
926 restarter_take_pending_snapshots(scf_handle_t *h)
927 {
928 	restarter_inst_t *inst;
929 	int r;
930 
931 	MUTEX_LOCK(&instance_list.ril_lock);
932 
933 	for (inst = uu_list_first(instance_list.ril_instance_list);
934 	    inst != NULL;
935 	    inst = uu_list_next(instance_list.ril_instance_list, inst)) {
936 		const char *fmri;
937 		scf_instance_t *sinst = NULL;
938 
939 		MUTEX_LOCK(&inst->ri_lock);
940 
941 		/*
942 		 * This is where we'd check inst->ri_method_thread and if it
943 		 * were nonzero we'd wait in anticipation of another thread
944 		 * executing a method for inst.  Doing so with the instance_list
945 		 * locked, though, leads to deadlock.  Since taking a snapshot
946 		 * during that window won't hurt anything, we'll just continue.
947 		 */
948 
949 		fmri = inst->ri_i.i_fmri;
950 
951 		if (inst->ri_flags & RINST_RETAKE_RUNNING) {
952 			scf_snapshot_t *rsnap;
953 
954 			(void) libscf_fmri_get_instance(h, fmri, &sinst);
955 
956 			rsnap = libscf_get_or_make_running_snapshot(sinst,
957 			    fmri, B_FALSE);
958 
959 			scf_instance_destroy(sinst);
960 
961 			if (rsnap != NULL)
962 				inst->ri_flags &= ~RINST_RETAKE_RUNNING;
963 
964 			scf_snapshot_destroy(rsnap);
965 		}
966 
967 		if (inst->ri_flags & RINST_RETAKE_START) {
968 			switch (r = libscf_snapshots_poststart(h, fmri,
969 			    B_FALSE)) {
970 			case 0:
971 			case ENOENT:
972 				inst->ri_flags &= ~RINST_RETAKE_START;
973 				break;
974 
975 			case ECONNABORTED:
976 				break;
977 
978 			case EACCES:
979 			default:
980 				bad_error("libscf_snapshots_poststart", r);
981 			}
982 		}
983 
984 		MUTEX_UNLOCK(&inst->ri_lock);
985 	}
986 
987 	MUTEX_UNLOCK(&instance_list.ril_lock);
988 }
989 
990 /* ARGSUSED */
991 void *
992 restarter_post_fsminimal_thread(void *unused)
993 {
994 	scf_handle_t *h;
995 	int r;
996 
997 	(void) pthread_setname_np(pthread_self(), "restarter_post_fsmin");
998 
999 	h = libscf_handle_create_bound_loop();
1000 
1001 	for (;;) {
1002 		r = libscf_create_self(h);
1003 		if (r == 0)
1004 			break;
1005 
1006 		assert(r == ECONNABORTED);
1007 		libscf_handle_rebind(h);
1008 	}
1009 
1010 	restarter_take_pending_snapshots(h);
1011 
1012 	(void) scf_handle_unbind(h);
1013 	scf_handle_destroy(h);
1014 
1015 	return (NULL);
1016 }
1017 
1018 /*
1019  * int stop_instance()
1020  *
1021  *   Stop the instance identified by the instance given as the second argument,
1022  *   for the cause stated.
1023  *
1024  *   Returns
1025  *     0 - success
1026  *     -1 - inst is in transition
1027  */
1028 static int
1029 stop_instance(scf_handle_t *local_handle, restarter_inst_t *inst,
1030     stop_cause_t cause)
1031 {
1032 	fork_info_t *info;
1033 	const char *cp;
1034 	int err;
1035 	restarter_error_t re;
1036 	restarter_str_t	reason;
1037 	restarter_instance_state_t new_state;
1038 
1039 	assert(MUTEX_HELD(&inst->ri_lock));
1040 	assert(inst->ri_method_thread == 0);
1041 
1042 	switch (cause) {
1043 	case RSTOP_EXIT:
1044 		re = RERR_RESTART;
1045 		reason = restarter_str_ct_ev_exit;
1046 		cp = "all processes in service exited";
1047 		break;
1048 	case RSTOP_ERR_CFG:
1049 		re = RERR_FAULT;
1050 		reason = restarter_str_method_failed;
1051 		cp = "service exited with a configuration error";
1052 		break;
1053 	case RSTOP_ERR_EXIT:
1054 		re = RERR_RESTART;
1055 		reason = restarter_str_ct_ev_exit;
1056 		cp = "service exited with an error";
1057 		break;
1058 	case RSTOP_CORE:
1059 		re = RERR_FAULT;
1060 		reason = restarter_str_ct_ev_core;
1061 		cp = "process dumped core";
1062 		break;
1063 	case RSTOP_SIGNAL:
1064 		re = RERR_FAULT;
1065 		reason = restarter_str_ct_ev_signal;
1066 		cp = "process received fatal signal from outside the service";
1067 		break;
1068 	case RSTOP_HWERR:
1069 		re = RERR_FAULT;
1070 		reason = restarter_str_ct_ev_hwerr;
1071 		cp = "process killed due to uncorrectable hardware error";
1072 		break;
1073 	case RSTOP_DEPENDENCY:
1074 		re = RERR_RESTART;
1075 		reason = restarter_str_dependency_activity;
1076 		cp = "dependency activity requires stop";
1077 		break;
1078 	case RSTOP_DISABLE:
1079 		re = RERR_RESTART;
1080 		reason = restarter_str_disable_request;
1081 		cp = "service disabled";
1082 		break;
1083 	case RSTOP_RESTART:
1084 		re = RERR_RESTART;
1085 		reason = restarter_str_restart_request;
1086 		cp = "service restarting";
1087 		break;
1088 	default:
1089 #ifndef NDEBUG
1090 		(void) fprintf(stderr, "Unknown cause %d at %s:%d.\n",
1091 		    cause, __FILE__, __LINE__);
1092 #endif
1093 		abort();
1094 	}
1095 
1096 	/* Services in the disabled and maintenance state are ignored */
1097 	if (inst->ri_i.i_state == RESTARTER_STATE_MAINT ||
1098 	    inst->ri_i.i_state == RESTARTER_STATE_DISABLED) {
1099 		log_framework(LOG_DEBUG,
1100 		    "%s: stop_instance -> is maint/disabled\n",
1101 		    inst->ri_i.i_fmri);
1102 		return (0);
1103 	}
1104 
1105 	/* Already stopped instances are left alone */
1106 	if (instance_started(inst) == 0) {
1107 		log_framework(LOG_DEBUG, "Restarter: %s is already stopped.\n",
1108 		    inst->ri_i.i_fmri);
1109 		return (0);
1110 	}
1111 
1112 	if (instance_in_transition(inst)) {
1113 		/* requeue event by returning -1 */
1114 		log_framework(LOG_DEBUG,
1115 		    "Restarter: Not stopping %s, in transition.\n",
1116 		    inst->ri_i.i_fmri);
1117 		return (-1);
1118 	}
1119 
1120 	log_instance(inst, B_TRUE, "Stopping because %s.", cp);
1121 
1122 	log_framework(re == RERR_FAULT ? LOG_INFO : LOG_DEBUG,
1123 	    "%s: Instance stopping because %s.\n", inst->ri_i.i_fmri, cp);
1124 
1125 	if (instance_is_wait_style(inst) &&
1126 	    (cause == RSTOP_EXIT ||
1127 	    cause == RSTOP_ERR_CFG ||
1128 	    cause == RSTOP_ERR_EXIT)) {
1129 		/*
1130 		 * No need to stop instance, as child has exited; remove
1131 		 * contract and move the instance to the offline state.
1132 		 */
1133 		switch (err = restarter_instance_update_states(local_handle,
1134 		    inst, inst->ri_i.i_state, RESTARTER_STATE_OFFLINE, re,
1135 		    reason)) {
1136 		case 0:
1137 		case ECONNRESET:
1138 			break;
1139 
1140 		default:
1141 			bad_error("restarter_instance_update_states", err);
1142 		}
1143 
1144 		if (cause == RSTOP_ERR_EXIT) {
1145 			/*
1146 			 * The RSTOP_ERR_EXIT cause is set via the
1147 			 * wait_thread -> wait_remove code path when we have
1148 			 * a "wait" style svc that exited with an error. If
1149 			 * the svc is failing too quickly, we throttle it so
1150 			 * that we don't restart it more than once/second.
1151 			 * Since we know we're running in the wait thread its
1152 			 * ok to throttle it right here.
1153 			 */
1154 			(void) update_fault_count(inst, FAULT_COUNT_INCR);
1155 			if (method_rate_critical(inst)) {
1156 				log_instance(inst, B_TRUE, "Failing too "
1157 				    "quickly, throttling.");
1158 				(void) sleep(WT_SVC_ERR_THROTTLE);
1159 			}
1160 		} else {
1161 			(void) update_fault_count(inst, FAULT_COUNT_RESET);
1162 			reset_start_times(inst);
1163 		}
1164 
1165 		if (inst->ri_i.i_primary_ctid != 0) {
1166 			inst->ri_m_inst =
1167 			    safe_scf_instance_create(local_handle);
1168 			inst->ri_mi_deleted = B_FALSE;
1169 
1170 			libscf_reget_instance(inst);
1171 			method_remove_contract(inst, B_TRUE, B_TRUE);
1172 
1173 			scf_instance_destroy(inst->ri_m_inst);
1174 			inst->ri_m_inst = NULL;
1175 		}
1176 
1177 		switch (err = restarter_instance_update_states(local_handle,
1178 		    inst, inst->ri_i.i_next_state, RESTARTER_STATE_NONE, re,
1179 		    reason)) {
1180 		case 0:
1181 		case ECONNRESET:
1182 			break;
1183 
1184 		default:
1185 			bad_error("restarter_instance_update_states", err);
1186 		}
1187 
1188 		if (cause != RSTOP_ERR_CFG)
1189 			return (0);
1190 	} else if (instance_is_wait_style(inst) && re == RERR_RESTART) {
1191 		/*
1192 		 * Stopping a wait service through means other than the pid
1193 		 * exiting should keep wait_thread() from restarting the
1194 		 * service, by removing it from the wait list.
1195 		 * We cannot remove it right now otherwise the process will
1196 		 * end up <defunct> so mark it to be ignored.
1197 		 */
1198 		wait_ignore_by_fmri(inst->ri_i.i_fmri);
1199 	}
1200 
1201 	/*
1202 	 * There are some configuration errors which we cannot detect until we
1203 	 * try to run the method.  For example, see exec_method() where the
1204 	 * restarter_set_method_context() call can return SMF_EXIT_ERR_CONFIG
1205 	 * in several cases. If this happens for a "wait-style" svc,
1206 	 * wait_remove() sets the cause as RSTOP_ERR_CFG so that we can detect
1207 	 * the configuration error and go into maintenance, even though it is
1208 	 * a "wait-style" svc.
1209 	 */
1210 	if (cause == RSTOP_ERR_CFG)
1211 		new_state = RESTARTER_STATE_MAINT;
1212 	else
1213 		new_state = inst->ri_i.i_enabled ?
1214 		    RESTARTER_STATE_OFFLINE : RESTARTER_STATE_DISABLED;
1215 
1216 	switch (err = restarter_instance_update_states(local_handle, inst,
1217 	    inst->ri_i.i_state, new_state, RERR_NONE, reason)) {
1218 	case 0:
1219 	case ECONNRESET:
1220 		break;
1221 
1222 	default:
1223 		bad_error("restarter_instance_update_states", err);
1224 	}
1225 
1226 	info = startd_zalloc(sizeof (fork_info_t));
1227 
1228 	info->sf_id = inst->ri_id;
1229 	info->sf_method_type = METHOD_STOP;
1230 	info->sf_event_type = re;
1231 	info->sf_reason = reason;
1232 	inst->ri_method_thread = startd_thread_create(method_thread, info);
1233 
1234 	return (0);
1235 }
1236 
1237 /*
1238  * Returns
1239  *   ENOENT - fmri is not in instance_list
1240  *   0 - success
1241  *   ECONNRESET - success, though handle was rebound
1242  *   -1 - instance is in transition
1243  */
1244 int
1245 stop_instance_fmri(scf_handle_t *h, const char *fmri, uint_t flags)
1246 {
1247 	restarter_inst_t *rip;
1248 	int r;
1249 
1250 	rip = inst_lookup_by_name(fmri);
1251 	if (rip == NULL)
1252 		return (ENOENT);
1253 
1254 	r = stop_instance(h, rip, flags);
1255 
1256 	MUTEX_UNLOCK(&rip->ri_lock);
1257 
1258 	return (r);
1259 }
1260 
1261 static void
1262 unmaintain_instance(scf_handle_t *h, restarter_inst_t *rip,
1263     unmaint_cause_t cause)
1264 {
1265 	ctid_t ctid;
1266 	scf_instance_t *inst;
1267 	int r;
1268 	uint_t tries = 0, msecs = ALLOC_DELAY;
1269 	const char *cp;
1270 	restarter_str_t	reason;
1271 
1272 	assert(MUTEX_HELD(&rip->ri_lock));
1273 
1274 	if (rip->ri_i.i_state != RESTARTER_STATE_MAINT) {
1275 		log_error(LOG_DEBUG, "Restarter: "
1276 		    "Ignoring maintenance off command because %s is not in the "
1277 		    "maintenance state.\n", rip->ri_i.i_fmri);
1278 		return;
1279 	}
1280 
1281 	switch (cause) {
1282 	case RUNMAINT_CLEAR:
1283 		cp = "clear requested";
1284 		reason = restarter_str_clear_request;
1285 		break;
1286 	case RUNMAINT_DISABLE:
1287 		cp = "disable requested";
1288 		reason = restarter_str_disable_request;
1289 		break;
1290 	default:
1291 #ifndef NDEBUG
1292 		(void) fprintf(stderr, "Uncaught case for %d at %s:%d.\n",
1293 		    cause, __FILE__, __LINE__);
1294 #endif
1295 		abort();
1296 	}
1297 
1298 	log_instance(rip, B_TRUE, "Leaving maintenance because %s.",
1299 	    cp);
1300 	log_framework(LOG_DEBUG, "%s: Instance leaving maintenance because "
1301 	    "%s.\n", rip->ri_i.i_fmri, cp);
1302 
1303 	(void) restarter_instance_update_states(h, rip, RESTARTER_STATE_UNINIT,
1304 	    RESTARTER_STATE_NONE, RERR_RESTART, reason);
1305 
1306 	/*
1307 	 * If we did ADMIN_MAINT_ON_IMMEDIATE, then there might still be
1308 	 * a primary contract.
1309 	 */
1310 	if (rip->ri_i.i_primary_ctid == 0)
1311 		return;
1312 
1313 	ctid = rip->ri_i.i_primary_ctid;
1314 	contract_abandon(ctid);
1315 	rip->ri_i.i_primary_ctid = 0;
1316 
1317 rep_retry:
1318 	switch (r = libscf_fmri_get_instance(h, rip->ri_i.i_fmri, &inst)) {
1319 	case 0:
1320 		break;
1321 
1322 	case ECONNABORTED:
1323 		libscf_handle_rebind(h);
1324 		goto rep_retry;
1325 
1326 	case ENOENT:
1327 		/* Must have been deleted. */
1328 		return;
1329 
1330 	case EINVAL:
1331 	case ENOTSUP:
1332 	default:
1333 		bad_error("libscf_handle_rebind", r);
1334 	}
1335 
1336 again:
1337 	r = restarter_remove_contract(inst, ctid, RESTARTER_CONTRACT_PRIMARY);
1338 	switch (r) {
1339 	case 0:
1340 		break;
1341 
1342 	case ENOMEM:
1343 		++tries;
1344 		if (tries < ALLOC_RETRY) {
1345 			(void) poll(NULL, 0, msecs);
1346 			msecs *= ALLOC_DELAY_MULT;
1347 			goto again;
1348 		}
1349 
1350 		uu_die("Insufficient memory.\n");
1351 		/* NOTREACHED */
1352 
1353 	case ECONNABORTED:
1354 		scf_instance_destroy(inst);
1355 		libscf_handle_rebind(h);
1356 		goto rep_retry;
1357 
1358 	case ECANCELED:
1359 		break;
1360 
1361 	case EPERM:
1362 	case EACCES:
1363 	case EROFS:
1364 		log_error(LOG_INFO,
1365 		    "Could not remove contract id %lu for %s (%s).\n", ctid,
1366 		    rip->ri_i.i_fmri, strerror(r));
1367 		break;
1368 
1369 	case EINVAL:
1370 	case EBADF:
1371 	default:
1372 		bad_error("restarter_remove_contract", r);
1373 	}
1374 
1375 	scf_instance_destroy(inst);
1376 }
1377 
1378 /*
1379  * enable_inst()
1380  *   Set inst->ri_i.i_enabled.  Expects 'e' to be _ENABLE, _DISABLE, or
1381  *   _ADMIN_DISABLE.  If the event is _ENABLE and inst is uninitialized or
1382  *   disabled, move it to offline.  If the event is _DISABLE or
1383  *   _ADMIN_DISABLE, make sure inst will move to disabled.
1384  *
1385  *   Returns
1386  *     0 - success
1387  *     ECONNRESET - h was rebound
1388  */
1389 static int
1390 enable_inst(scf_handle_t *h, restarter_inst_t *inst,
1391     restarter_instance_qentry_t *riq)
1392 {
1393 	restarter_instance_state_t state;
1394 	restarter_event_type_t e = riq->riq_type;
1395 	restarter_str_t reason = restarter_str_per_configuration;
1396 	int r;
1397 
1398 	assert(MUTEX_HELD(&inst->ri_lock));
1399 	assert(e == RESTARTER_EVENT_TYPE_ADMIN_DISABLE ||
1400 	    e == RESTARTER_EVENT_TYPE_DISABLE ||
1401 	    e == RESTARTER_EVENT_TYPE_ENABLE);
1402 	assert(instance_in_transition(inst) == 0);
1403 
1404 	state = inst->ri_i.i_state;
1405 
1406 	if (e == RESTARTER_EVENT_TYPE_ENABLE) {
1407 		inst->ri_i.i_enabled = 1;
1408 
1409 		if (state == RESTARTER_STATE_UNINIT ||
1410 		    state == RESTARTER_STATE_DISABLED) {
1411 			/*
1412 			 * B_FALSE: Don't log an error if the log_instance()
1413 			 * fails because it will fail on the miniroot before
1414 			 * install-discovery runs.
1415 			 */
1416 			log_instance(inst, B_FALSE, "Enabled.");
1417 			log_framework(LOG_DEBUG, "%s: Instance enabled.\n",
1418 			    inst->ri_i.i_fmri);
1419 
1420 			/*
1421 			 * If we are coming from DISABLED, it was obviously an
1422 			 * enable request. If we are coming from UNINIT, it may
1423 			 * have been a sevice in MAINT that was cleared.
1424 			 */
1425 			if (riq->riq_reason == restarter_str_clear_request)
1426 				reason = restarter_str_clear_request;
1427 			else if (state == RESTARTER_STATE_DISABLED)
1428 				reason = restarter_str_enable_request;
1429 			(void) restarter_instance_update_states(h, inst,
1430 			    RESTARTER_STATE_OFFLINE, RESTARTER_STATE_NONE,
1431 			    RERR_NONE, reason);
1432 		} else {
1433 			log_framework(LOG_DEBUG, "Restarter: "
1434 			    "Not changing state of %s for enable command.\n",
1435 			    inst->ri_i.i_fmri);
1436 		}
1437 	} else {
1438 		inst->ri_i.i_enabled = 0;
1439 
1440 		switch (state) {
1441 		case RESTARTER_STATE_ONLINE:
1442 		case RESTARTER_STATE_DEGRADED:
1443 			r = stop_instance(h, inst, RSTOP_DISABLE);
1444 			return (r == ECONNRESET ? 0 : r);
1445 
1446 		case RESTARTER_STATE_OFFLINE:
1447 		case RESTARTER_STATE_UNINIT:
1448 			if (inst->ri_i.i_primary_ctid != 0) {
1449 				inst->ri_m_inst = safe_scf_instance_create(h);
1450 				inst->ri_mi_deleted = B_FALSE;
1451 
1452 				libscf_reget_instance(inst);
1453 				method_remove_contract(inst, B_TRUE, B_TRUE);
1454 
1455 				scf_instance_destroy(inst->ri_m_inst);
1456 			}
1457 			/* B_FALSE: See log_instance(..., "Enabled."); above */
1458 			log_instance(inst, B_FALSE, "Disabled.");
1459 			log_framework(LOG_DEBUG, "%s: Instance disabled.\n",
1460 			    inst->ri_i.i_fmri);
1461 
1462 			/*
1463 			 * If we are coming from OFFLINE, it was obviously a
1464 			 * disable request. But if we are coming from
1465 			 * UNINIT, it may have been a disable request for a
1466 			 * service in MAINT.
1467 			 */
1468 			if (riq->riq_reason == restarter_str_disable_request ||
1469 			    state == RESTARTER_STATE_OFFLINE)
1470 				reason = restarter_str_disable_request;
1471 			(void) restarter_instance_update_states(h, inst,
1472 			    RESTARTER_STATE_DISABLED, RESTARTER_STATE_NONE,
1473 			    RERR_RESTART, reason);
1474 			return (0);
1475 
1476 		case RESTARTER_STATE_DISABLED:
1477 			break;
1478 
1479 		case RESTARTER_STATE_MAINT:
1480 			/*
1481 			 * We only want to pull the instance out of maintenance
1482 			 * if the disable is on adminstrative request.  The
1483 			 * graph engine sends _DISABLE events whenever a
1484 			 * service isn't in the disabled state, and we don't
1485 			 * want to pull the service out of maintenance if,
1486 			 * for example, it is there due to a dependency cycle.
1487 			 */
1488 			if (e == RESTARTER_EVENT_TYPE_ADMIN_DISABLE)
1489 				unmaintain_instance(h, inst, RUNMAINT_DISABLE);
1490 			break;
1491 
1492 		default:
1493 #ifndef NDEBUG
1494 			(void) fprintf(stderr, "Restarter instance %s has "
1495 			    "unknown state %d.\n", inst->ri_i.i_fmri, state);
1496 #endif
1497 			abort();
1498 		}
1499 	}
1500 
1501 	return (0);
1502 }
1503 
1504 static void
1505 start_instance(scf_handle_t *local_handle, restarter_inst_t *inst,
1506     int32_t reason)
1507 {
1508 	fork_info_t *info;
1509 	restarter_str_t	new_reason;
1510 
1511 	assert(MUTEX_HELD(&inst->ri_lock));
1512 	assert(instance_in_transition(inst) == 0);
1513 	assert(inst->ri_method_thread == 0);
1514 
1515 	log_framework(LOG_DEBUG, "%s: trying to start instance\n",
1516 	    inst->ri_i.i_fmri);
1517 
1518 	/*
1519 	 * We want to keep the original reason for restarts and clear actions
1520 	 */
1521 	switch (reason) {
1522 	case restarter_str_restart_request:
1523 	case restarter_str_clear_request:
1524 		new_reason = reason;
1525 		break;
1526 	default:
1527 		new_reason = restarter_str_dependencies_satisfied;
1528 	}
1529 
1530 	/* Services in the disabled and maintenance state are ignored */
1531 	if (inst->ri_i.i_state == RESTARTER_STATE_MAINT ||
1532 	    inst->ri_i.i_state == RESTARTER_STATE_DISABLED ||
1533 	    inst->ri_i.i_enabled == 0) {
1534 		log_framework(LOG_DEBUG,
1535 		    "%s: start_instance -> is maint/disabled\n",
1536 		    inst->ri_i.i_fmri);
1537 		return;
1538 	}
1539 
1540 	/* Already started instances are left alone */
1541 	if (instance_started(inst) == 1) {
1542 		log_framework(LOG_DEBUG,
1543 		    "%s: start_instance -> is already started\n",
1544 		    inst->ri_i.i_fmri);
1545 		return;
1546 	}
1547 
1548 	log_framework(LOG_DEBUG, "%s: starting instance.\n", inst->ri_i.i_fmri);
1549 
1550 	(void) restarter_instance_update_states(local_handle, inst,
1551 	    inst->ri_i.i_state, RESTARTER_STATE_ONLINE, RERR_NONE, new_reason);
1552 
1553 	info = startd_zalloc(sizeof (fork_info_t));
1554 
1555 	info->sf_id = inst->ri_id;
1556 	info->sf_method_type = METHOD_START;
1557 	info->sf_event_type = RERR_NONE;
1558 	info->sf_reason = new_reason;
1559 	inst->ri_method_thread = startd_thread_create(method_thread, info);
1560 }
1561 
1562 static int
1563 event_from_tty(scf_handle_t *h, restarter_inst_t *rip)
1564 {
1565 	scf_instance_t *inst;
1566 	int ret = 0;
1567 
1568 	if (libscf_fmri_get_instance(h, rip->ri_i.i_fmri, &inst))
1569 		return (-1);
1570 
1571 	ret = restarter_inst_ractions_from_tty(inst);
1572 
1573 	scf_instance_destroy(inst);
1574 	return (ret);
1575 }
1576 
1577 static boolean_t
1578 restart_dump(scf_handle_t *h, restarter_inst_t *rip)
1579 {
1580 	scf_instance_t *inst;
1581 	boolean_t ret = B_FALSE;
1582 
1583 	if (libscf_fmri_get_instance(h, rip->ri_i.i_fmri, &inst))
1584 		return (-1);
1585 
1586 	if (restarter_inst_dump(inst) == 1)
1587 		ret = B_TRUE;
1588 
1589 	scf_instance_destroy(inst);
1590 	return (ret);
1591 }
1592 
1593 static void
1594 maintain_instance(scf_handle_t *h, restarter_inst_t *rip, int immediate,
1595     restarter_str_t reason)
1596 {
1597 	fork_info_t *info;
1598 	scf_instance_t *scf_inst = NULL;
1599 
1600 	assert(MUTEX_HELD(&rip->ri_lock));
1601 	assert(reason != restarter_str_none);
1602 	assert(rip->ri_method_thread == 0);
1603 
1604 	log_instance(rip, B_TRUE, "Stopping for maintenance due to %s.",
1605 	    restarter_get_str_short(reason));
1606 	log_framework(LOG_DEBUG, "%s: stopping for maintenance due to %s.\n",
1607 	    rip->ri_i.i_fmri, restarter_get_str_short(reason));
1608 
1609 	/* Services in the maintenance state are ignored */
1610 	if (rip->ri_i.i_state == RESTARTER_STATE_MAINT) {
1611 		log_framework(LOG_DEBUG,
1612 		    "%s: maintain_instance -> is already in maintenance\n",
1613 		    rip->ri_i.i_fmri);
1614 		return;
1615 	}
1616 
1617 	/*
1618 	 * If reason state is restarter_str_service_request and
1619 	 * restarter_actions/auxiliary_fmri property is set with a valid fmri,
1620 	 * copy the fmri to restarter/auxiliary_fmri so svcs -x can use.
1621 	 */
1622 	if (reason == restarter_str_service_request &&
1623 	    libscf_fmri_get_instance(h, rip->ri_i.i_fmri, &scf_inst) == 0) {
1624 		if (restarter_inst_validate_ractions_aux_fmri(scf_inst) == 0) {
1625 			if (restarter_inst_set_aux_fmri(scf_inst))
1626 				log_framework(LOG_DEBUG, "%s: "
1627 				    "restarter_inst_set_aux_fmri failed: ",
1628 				    rip->ri_i.i_fmri);
1629 		} else {
1630 			log_framework(LOG_DEBUG, "%s: "
1631 			    "restarter_inst_validate_ractions_aux_fmri "
1632 			    "failed: ", rip->ri_i.i_fmri);
1633 
1634 			if (restarter_inst_reset_aux_fmri(scf_inst))
1635 				log_framework(LOG_DEBUG, "%s: "
1636 				    "restarter_inst_reset_aux_fmri failed: ",
1637 				    rip->ri_i.i_fmri);
1638 		}
1639 		scf_instance_destroy(scf_inst);
1640 	}
1641 
1642 	if (immediate || !instance_started(rip)) {
1643 		if (rip->ri_i.i_primary_ctid != 0) {
1644 			rip->ri_m_inst = safe_scf_instance_create(h);
1645 			rip->ri_mi_deleted = B_FALSE;
1646 
1647 			libscf_reget_instance(rip);
1648 			method_remove_contract(rip, B_TRUE, B_TRUE);
1649 
1650 			scf_instance_destroy(rip->ri_m_inst);
1651 		}
1652 
1653 		(void) restarter_instance_update_states(h, rip,
1654 		    RESTARTER_STATE_MAINT, RESTARTER_STATE_NONE, RERR_RESTART,
1655 		    reason);
1656 		return;
1657 	}
1658 
1659 	(void) restarter_instance_update_states(h, rip, rip->ri_i.i_state,
1660 	    RESTARTER_STATE_MAINT, RERR_NONE, reason);
1661 
1662 	log_transition(rip, MAINT_REQUESTED);
1663 
1664 	info = startd_zalloc(sizeof (*info));
1665 	info->sf_id = rip->ri_id;
1666 	info->sf_method_type = METHOD_STOP;
1667 	info->sf_event_type = RERR_RESTART;
1668 	info->sf_reason = reason;
1669 	rip->ri_method_thread = startd_thread_create(method_thread, info);
1670 }
1671 
1672 static void
1673 refresh_instance(scf_handle_t *h, restarter_inst_t *rip)
1674 {
1675 	scf_instance_t *inst;
1676 	scf_snapshot_t *snap;
1677 	fork_info_t *info;
1678 	int r;
1679 
1680 	assert(MUTEX_HELD(&rip->ri_lock));
1681 
1682 	log_instance(rip, B_TRUE, "Rereading configuration.");
1683 	log_framework(LOG_DEBUG, "%s: rereading configuration.\n",
1684 	    rip->ri_i.i_fmri);
1685 
1686 rep_retry:
1687 	r = libscf_fmri_get_instance(h, rip->ri_i.i_fmri, &inst);
1688 	switch (r) {
1689 	case 0:
1690 		break;
1691 
1692 	case ECONNABORTED:
1693 		libscf_handle_rebind(h);
1694 		goto rep_retry;
1695 
1696 	case ENOENT:
1697 		/* Must have been deleted. */
1698 		return;
1699 
1700 	case EINVAL:
1701 	case ENOTSUP:
1702 	default:
1703 		bad_error("libscf_fmri_get_instance", r);
1704 	}
1705 
1706 	snap = libscf_get_running_snapshot(inst);
1707 
1708 	r = libscf_get_startd_properties(inst, snap, &rip->ri_flags,
1709 	    &rip->ri_utmpx_prefix);
1710 	switch (r) {
1711 	case 0:
1712 		log_framework(LOG_DEBUG, "%s is a %s-style service\n",
1713 		    rip->ri_i.i_fmri, service_style(rip->ri_flags));
1714 		break;
1715 
1716 	case ECONNABORTED:
1717 		scf_instance_destroy(inst);
1718 		scf_snapshot_destroy(snap);
1719 		libscf_handle_rebind(h);
1720 		goto rep_retry;
1721 
1722 	case ECANCELED:
1723 	case ENOENT:
1724 		/* Succeed in anticipation of REMOVE_INSTANCE. */
1725 		break;
1726 
1727 	default:
1728 		bad_error("libscf_get_startd_properties", r);
1729 	}
1730 
1731 	if (instance_started(rip)) {
1732 		/* Refresh does not change the state. */
1733 		(void) restarter_instance_update_states(h, rip,
1734 		    rip->ri_i.i_state, rip->ri_i.i_state, RERR_NONE,
1735 		    restarter_str_refresh);
1736 
1737 		info = startd_zalloc(sizeof (*info));
1738 		info->sf_id = rip->ri_id;
1739 		info->sf_method_type = METHOD_REFRESH;
1740 		info->sf_event_type = RERR_REFRESH;
1741 		info->sf_reason = 0;
1742 
1743 		assert(rip->ri_method_thread == 0);
1744 		rip->ri_method_thread =
1745 		    startd_thread_create(method_thread, info);
1746 	}
1747 
1748 	scf_snapshot_destroy(snap);
1749 	scf_instance_destroy(inst);
1750 }
1751 
1752 const char *event_names[] = { "INVALID", "ADD_INSTANCE", "REMOVE_INSTANCE",
1753 	"ENABLE", "DISABLE", "ADMIN_DEGRADED", "ADMIN_REFRESH",
1754 	"ADMIN_RESTART", "ADMIN_MAINT_OFF", "ADMIN_MAINT_ON",
1755 	"ADMIN_MAINT_ON_IMMEDIATE", "STOP", "START", "DEPENDENCY_CYCLE",
1756 	"INVALID_DEPENDENCY", "ADMIN_DISABLE", "STOP_RESET"
1757 };
1758 
1759 /*
1760  * void *restarter_process_events()
1761  *
1762  *   Called in a separate thread to process the events on an instance's
1763  *   queue.  Empties the queue completely, and tries to keep the thread
1764  *   around for a little while after the queue is empty to save on
1765  *   startup costs.
1766  */
1767 static void *
1768 restarter_process_events(void *arg)
1769 {
1770 	scf_handle_t *h;
1771 	restarter_instance_qentry_t *event;
1772 	restarter_inst_t *rip;
1773 	char *fmri = (char *)arg;
1774 	struct timespec to;
1775 
1776 	(void) pthread_setname_np(pthread_self(), "restarter_process_events");
1777 
1778 	assert(fmri != NULL);
1779 
1780 	h = libscf_handle_create_bound_loop();
1781 
1782 	/* grab the queue lock */
1783 	rip = inst_lookup_queue(fmri);
1784 	if (rip == NULL)
1785 		goto out;
1786 
1787 again:
1788 
1789 	while ((event = uu_list_first(rip->ri_queue)) != NULL) {
1790 		restarter_inst_t *inst;
1791 
1792 		/* drop the queue lock */
1793 		MUTEX_UNLOCK(&rip->ri_queue_lock);
1794 
1795 		/*
1796 		 * Grab the inst lock -- this waits until any outstanding
1797 		 * method finishes running.
1798 		 */
1799 		inst = inst_lookup_by_name(fmri);
1800 		if (inst == NULL) {
1801 			/* Getting deleted in the middle isn't an error. */
1802 			goto cont;
1803 		}
1804 
1805 		assert(instance_in_transition(inst) == 0);
1806 
1807 		/* process the event */
1808 		switch (event->riq_type) {
1809 		case RESTARTER_EVENT_TYPE_ENABLE:
1810 		case RESTARTER_EVENT_TYPE_DISABLE:
1811 			(void) enable_inst(h, inst, event);
1812 			break;
1813 
1814 		case RESTARTER_EVENT_TYPE_ADMIN_DISABLE:
1815 			if (enable_inst(h, inst, event) == 0)
1816 				reset_start_times(inst);
1817 			break;
1818 
1819 		case RESTARTER_EVENT_TYPE_REMOVE_INSTANCE:
1820 			restarter_delete_inst(inst);
1821 			inst = NULL;
1822 			goto cont;
1823 
1824 		case RESTARTER_EVENT_TYPE_STOP_RESET:
1825 			reset_start_times(inst);
1826 			/* FALLTHROUGH */
1827 		case RESTARTER_EVENT_TYPE_STOP:
1828 			(void) stop_instance(h, inst, RSTOP_DEPENDENCY);
1829 			break;
1830 
1831 		case RESTARTER_EVENT_TYPE_START:
1832 			start_instance(h, inst, event->riq_reason);
1833 			break;
1834 
1835 		case RESTARTER_EVENT_TYPE_DEPENDENCY_CYCLE:
1836 			maintain_instance(h, inst, 0,
1837 			    restarter_str_dependency_cycle);
1838 			break;
1839 
1840 		case RESTARTER_EVENT_TYPE_INVALID_DEPENDENCY:
1841 			maintain_instance(h, inst, 0,
1842 			    restarter_str_invalid_dependency);
1843 			break;
1844 
1845 		case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON:
1846 			if (event_from_tty(h, inst) == 0)
1847 				maintain_instance(h, inst, 0,
1848 				    restarter_str_service_request);
1849 			else
1850 				maintain_instance(h, inst, 0,
1851 				    restarter_str_administrative_request);
1852 			break;
1853 
1854 		case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON_IMMEDIATE:
1855 			if (event_from_tty(h, inst) == 0)
1856 				maintain_instance(h, inst, 1,
1857 				    restarter_str_service_request);
1858 			else
1859 				maintain_instance(h, inst, 1,
1860 				    restarter_str_administrative_request);
1861 			break;
1862 
1863 		case RESTARTER_EVENT_TYPE_ADMIN_MAINT_OFF:
1864 			unmaintain_instance(h, inst, RUNMAINT_CLEAR);
1865 			reset_start_times(inst);
1866 			break;
1867 
1868 		case RESTARTER_EVENT_TYPE_ADMIN_REFRESH:
1869 			refresh_instance(h, inst);
1870 			break;
1871 
1872 		case RESTARTER_EVENT_TYPE_ADMIN_DEGRADED:
1873 			log_framework(LOG_WARNING, "Restarter: "
1874 			    "%s command (for %s) unimplemented.\n",
1875 			    event_names[event->riq_type], inst->ri_i.i_fmri);
1876 			break;
1877 
1878 		case RESTARTER_EVENT_TYPE_ADMIN_RESTART:
1879 			if (!instance_started(inst)) {
1880 				log_framework(LOG_DEBUG, "Restarter: "
1881 				    "Not restarting %s; not running.\n",
1882 				    inst->ri_i.i_fmri);
1883 			} else {
1884 				/*
1885 				 * Stop the instance.  If it can be restarted,
1886 				 * the graph engine will send a new event.
1887 				 */
1888 				if (restart_dump(h, inst)) {
1889 					(void) contract_kill(
1890 					    inst->ri_i.i_primary_ctid, SIGABRT,
1891 					    inst->ri_i.i_fmri);
1892 				} else if (stop_instance(h, inst,
1893 				    RSTOP_RESTART) == 0) {
1894 					reset_start_times(inst);
1895 				}
1896 			}
1897 			break;
1898 
1899 		case RESTARTER_EVENT_TYPE_ADD_INSTANCE:
1900 		default:
1901 #ifndef NDEBUG
1902 			uu_warn("%s:%d: Bad restarter event %d.  "
1903 			    "Aborting.\n", __FILE__, __LINE__, event->riq_type);
1904 #endif
1905 			abort();
1906 		}
1907 
1908 		assert(inst != NULL);
1909 		MUTEX_UNLOCK(&inst->ri_lock);
1910 
1911 cont:
1912 		/* grab the queue lock */
1913 		rip = inst_lookup_queue(fmri);
1914 		if (rip == NULL)
1915 			goto out;
1916 
1917 		/* delete the event */
1918 		uu_list_remove(rip->ri_queue, event);
1919 		startd_free(event, sizeof (restarter_instance_qentry_t));
1920 	}
1921 
1922 	assert(rip != NULL);
1923 
1924 	/*
1925 	 * Try to preserve the thread for a little while for future use.
1926 	 */
1927 	to.tv_sec = 3;
1928 	to.tv_nsec = 0;
1929 	(void) pthread_cond_reltimedwait_np(&rip->ri_queue_cv,
1930 	    &rip->ri_queue_lock, &to);
1931 
1932 	if (uu_list_first(rip->ri_queue) != NULL)
1933 		goto again;
1934 
1935 	rip->ri_queue_thread = 0;
1936 	MUTEX_UNLOCK(&rip->ri_queue_lock);
1937 
1938 out:
1939 	(void) scf_handle_unbind(h);
1940 	scf_handle_destroy(h);
1941 	free(fmri);
1942 	return (NULL);
1943 }
1944 
1945 static int
1946 is_admin_event(restarter_event_type_t t)
1947 {
1948 	switch (t) {
1949 	case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON:
1950 	case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON_IMMEDIATE:
1951 	case RESTARTER_EVENT_TYPE_ADMIN_MAINT_OFF:
1952 	case RESTARTER_EVENT_TYPE_ADMIN_REFRESH:
1953 	case RESTARTER_EVENT_TYPE_ADMIN_DEGRADED:
1954 	case RESTARTER_EVENT_TYPE_ADMIN_RESTART:
1955 		return (1);
1956 	default:
1957 		return (0);
1958 	}
1959 }
1960 
1961 static void
1962 restarter_queue_event(restarter_inst_t *ri, restarter_protocol_event_t *e)
1963 {
1964 	restarter_instance_qentry_t *qe;
1965 	int r;
1966 
1967 	assert(MUTEX_HELD(&ri->ri_queue_lock));
1968 	assert(!MUTEX_HELD(&ri->ri_lock));
1969 
1970 	qe = startd_zalloc(sizeof (restarter_instance_qentry_t));
1971 	qe->riq_type = e->rpe_type;
1972 	qe->riq_reason = e->rpe_reason;
1973 
1974 	uu_list_node_init(qe, &qe->riq_link, restarter_queue_pool);
1975 	r = uu_list_insert_before(ri->ri_queue, NULL, qe);
1976 	assert(r == 0);
1977 }
1978 
1979 /*
1980  * void *restarter_event_thread()
1981  *
1982  *  Handle incoming graph events by placing them on a per-instance
1983  *  queue.  We can't lock the main part of the instance structure, so
1984  *  just modify the seprarately locked event queue portion.
1985  */
1986 /*ARGSUSED*/
1987 static void *
1988 restarter_event_thread(void *unused)
1989 {
1990 	scf_handle_t *h;
1991 
1992 	(void) pthread_setname_np(pthread_self(), "restarter_event");
1993 
1994 	/*
1995 	 * This is a new thread, and thus, gets its own handle
1996 	 * to the repository.
1997 	 */
1998 	h = libscf_handle_create_bound_loop();
1999 
2000 	MUTEX_LOCK(&ru->restarter_update_lock);
2001 
2002 	/*CONSTCOND*/
2003 	while (1) {
2004 		restarter_protocol_event_t *e;
2005 
2006 		while (ru->restarter_update_wakeup == 0)
2007 			(void) pthread_cond_wait(&ru->restarter_update_cv,
2008 			    &ru->restarter_update_lock);
2009 
2010 		ru->restarter_update_wakeup = 0;
2011 
2012 		while ((e = restarter_event_dequeue()) != NULL) {
2013 			restarter_inst_t *rip;
2014 			char *fmri;
2015 
2016 			MUTEX_UNLOCK(&ru->restarter_update_lock);
2017 
2018 			/*
2019 			 * ADD_INSTANCE is special: there's likely no
2020 			 * instance structure yet, so we need to handle the
2021 			 * addition synchronously.
2022 			 */
2023 			switch (e->rpe_type) {
2024 			case RESTARTER_EVENT_TYPE_ADD_INSTANCE:
2025 				if (restarter_insert_inst(h, e->rpe_inst) != 0)
2026 					log_error(LOG_INFO, "Restarter: "
2027 					    "Could not add %s.\n", e->rpe_inst);
2028 
2029 				MUTEX_LOCK(&st->st_load_lock);
2030 				if (--st->st_load_instances == 0)
2031 					(void) pthread_cond_broadcast(
2032 					    &st->st_load_cv);
2033 				MUTEX_UNLOCK(&st->st_load_lock);
2034 
2035 				goto nolookup;
2036 			}
2037 
2038 			/*
2039 			 * Lookup the instance, locking only the event queue.
2040 			 * Can't grab ri_lock here because it might be held
2041 			 * by a long-running method.
2042 			 */
2043 			rip = inst_lookup_queue(e->rpe_inst);
2044 			if (rip == NULL) {
2045 				log_error(LOG_INFO, "Restarter: "
2046 				    "Ignoring %s command for unknown service "
2047 				    "%s.\n", event_names[e->rpe_type],
2048 				    e->rpe_inst);
2049 				goto nolookup;
2050 			}
2051 
2052 			/* Keep ADMIN events from filling up the queue. */
2053 			if (is_admin_event(e->rpe_type) &&
2054 			    uu_list_numnodes(rip->ri_queue) >
2055 			    RINST_QUEUE_THRESHOLD) {
2056 				MUTEX_UNLOCK(&rip->ri_queue_lock);
2057 				log_instance(rip, B_TRUE, "Instance event "
2058 				    "queue overflow.  Dropping administrative "
2059 				    "request.");
2060 				log_framework(LOG_DEBUG, "%s: Instance event "
2061 				    "queue overflow.  Dropping administrative "
2062 				    "request.\n", rip->ri_i.i_fmri);
2063 				goto nolookup;
2064 			}
2065 
2066 			/* Now add the event to the instance queue. */
2067 			restarter_queue_event(rip, e);
2068 
2069 			if (rip->ri_queue_thread == 0) {
2070 				/*
2071 				 * Start a thread if one isn't already
2072 				 * running.
2073 				 */
2074 				fmri = safe_strdup(e->rpe_inst);
2075 				rip->ri_queue_thread =  startd_thread_create(
2076 				    restarter_process_events, (void *)fmri);
2077 			} else {
2078 				/*
2079 				 * Signal the existing thread that there's
2080 				 * a new event.
2081 				 */
2082 				(void) pthread_cond_broadcast(
2083 				    &rip->ri_queue_cv);
2084 			}
2085 
2086 			MUTEX_UNLOCK(&rip->ri_queue_lock);
2087 nolookup:
2088 			restarter_event_release(e);
2089 
2090 			MUTEX_LOCK(&ru->restarter_update_lock);
2091 		}
2092 	}
2093 }
2094 
2095 static restarter_inst_t *
2096 contract_to_inst(ctid_t ctid)
2097 {
2098 	restarter_inst_t *inst;
2099 	int id;
2100 
2101 	id = lookup_inst_by_contract(ctid);
2102 	if (id == -1)
2103 		return (NULL);
2104 
2105 	inst = inst_lookup_by_id(id);
2106 	if (inst != NULL) {
2107 		/*
2108 		 * Since ri_lock isn't held by the contract id lookup, this
2109 		 * instance may have been restarted and now be in a new
2110 		 * contract, making the old contract no longer valid for this
2111 		 * instance.
2112 		 */
2113 		if (ctid != inst->ri_i.i_primary_ctid) {
2114 			MUTEX_UNLOCK(&inst->ri_lock);
2115 			inst = NULL;
2116 		}
2117 	}
2118 	return (inst);
2119 }
2120 
2121 /*
2122  * void contract_action()
2123  *   Take action on contract events.
2124  */
2125 static void
2126 contract_action(scf_handle_t *h, restarter_inst_t *inst, ctid_t id,
2127     uint32_t type)
2128 {
2129 	const char *fmri = inst->ri_i.i_fmri;
2130 
2131 	assert(MUTEX_HELD(&inst->ri_lock));
2132 
2133 	/*
2134 	 * If startd has stopped this contract, there is no need to
2135 	 * stop it again.
2136 	 */
2137 	if (inst->ri_i.i_primary_ctid > 0 &&
2138 	    inst->ri_i.i_primary_ctid_stopped)
2139 		return;
2140 
2141 	if ((type & (CT_PR_EV_EMPTY | CT_PR_EV_CORE | CT_PR_EV_SIGNAL
2142 	    | CT_PR_EV_HWERR)) == 0) {
2143 		/*
2144 		 * There shouldn't be other events, since that's not how we set
2145 		 * the terms. Thus, just log an error and drive on.
2146 		 */
2147 		log_framework(LOG_NOTICE,
2148 		    "%s: contract %ld received unexpected critical event "
2149 		    "(%d)\n", fmri, id, type);
2150 		return;
2151 	}
2152 
2153 	assert(instance_in_transition(inst) == 0);
2154 
2155 	if (instance_is_wait_style(inst)) {
2156 		/*
2157 		 * We ignore all events; if they impact the
2158 		 * process we're monitoring, then the
2159 		 * wait_thread will stop the instance.
2160 		 */
2161 		log_framework(LOG_DEBUG,
2162 		    "%s: ignoring contract event on wait-style service\n",
2163 		    fmri);
2164 	} else {
2165 		/*
2166 		 * A CT_PR_EV_EMPTY event is an RSTOP_EXIT request.
2167 		 */
2168 		switch (type) {
2169 		case CT_PR_EV_EMPTY:
2170 			(void) stop_instance(h, inst, RSTOP_EXIT);
2171 			break;
2172 		case CT_PR_EV_CORE:
2173 			(void) stop_instance(h, inst, RSTOP_CORE);
2174 			break;
2175 		case CT_PR_EV_SIGNAL:
2176 			(void) stop_instance(h, inst, RSTOP_SIGNAL);
2177 			break;
2178 		case CT_PR_EV_HWERR:
2179 			(void) stop_instance(h, inst, RSTOP_HWERR);
2180 			break;
2181 		}
2182 	}
2183 }
2184 
2185 /*
2186  * void *restarter_contract_event_thread(void *)
2187  *   Listens to the process contract bundle for critical events, taking action
2188  *   on events from contracts we know we are responsible for.
2189  */
2190 /*ARGSUSED*/
2191 static void *
2192 restarter_contracts_event_thread(void *unused)
2193 {
2194 	int fd, err;
2195 	scf_handle_t *local_handle;
2196 
2197 	(void) pthread_setname_np(pthread_self(), "restarter_contracts_event");
2198 
2199 	/*
2200 	 * Await graph load completion.  That is, stop here, until we've scanned
2201 	 * the repository for contract - instance associations.
2202 	 */
2203 	MUTEX_LOCK(&st->st_load_lock);
2204 	while (!(st->st_load_complete && st->st_load_instances == 0))
2205 		(void) pthread_cond_wait(&st->st_load_cv, &st->st_load_lock);
2206 	MUTEX_UNLOCK(&st->st_load_lock);
2207 
2208 	/*
2209 	 * This is a new thread, and thus, gets its own handle
2210 	 * to the repository.
2211 	 */
2212 	if ((local_handle = libscf_handle_create_bound(SCF_VERSION)) == NULL)
2213 		uu_die("Unable to bind a new repository handle: %s\n",
2214 		    scf_strerror(scf_error()));
2215 
2216 	fd = open64(CTFS_ROOT "/process/pbundle", O_RDONLY);
2217 	if (fd == -1)
2218 		uu_die("process bundle open failed");
2219 
2220 	/*
2221 	 * Make sure we get all events (including those generated by configd
2222 	 * before this thread was started).
2223 	 */
2224 	err = ct_event_reset(fd);
2225 	assert(err == 0);
2226 
2227 	for (;;) {
2228 		int efd, sfd;
2229 		ct_evthdl_t ev;
2230 		uint32_t type;
2231 		ctevid_t evid;
2232 		ct_stathdl_t status;
2233 		ctid_t ctid;
2234 		restarter_inst_t *inst;
2235 		uint64_t cookie;
2236 
2237 		if (err = ct_event_read_critical(fd, &ev)) {
2238 			log_error(LOG_WARNING,
2239 			    "Error reading next contract event: %s",
2240 			    strerror(err));
2241 			continue;
2242 		}
2243 
2244 		evid = ct_event_get_evid(ev);
2245 		ctid = ct_event_get_ctid(ev);
2246 		type = ct_event_get_type(ev);
2247 
2248 		/* Fetch cookie. */
2249 		if ((sfd = contract_open(ctid, "process", "status", O_RDONLY))
2250 		    < 0) {
2251 			ct_event_free(ev);
2252 			continue;
2253 		}
2254 
2255 		if (err = ct_status_read(sfd, CTD_COMMON, &status)) {
2256 			log_framework(LOG_WARNING, "Could not get status for "
2257 			    "contract %ld: %s\n", ctid, strerror(err));
2258 
2259 			startd_close(sfd);
2260 			ct_event_free(ev);
2261 			continue;
2262 		}
2263 
2264 		cookie = ct_status_get_cookie(status);
2265 
2266 		log_framework(LOG_DEBUG, "Received event %d for ctid %ld "
2267 		    "cookie %lld\n", type, ctid, cookie);
2268 
2269 		ct_status_free(status);
2270 
2271 		startd_close(sfd);
2272 
2273 		/*
2274 		 * svc.configd(1M) restart handling performed by the
2275 		 * fork_configd_thread.  We don't acknowledge, as that thread
2276 		 * will do so.
2277 		 */
2278 		if (cookie == CONFIGD_COOKIE) {
2279 			ct_event_free(ev);
2280 			continue;
2281 		}
2282 
2283 		inst = NULL;
2284 		if (storing_contract != 0 &&
2285 		    (inst = contract_to_inst(ctid)) == NULL) {
2286 			/*
2287 			 * This can happen for two reasons:
2288 			 * - method_run() has not yet stored the
2289 			 *    the contract into the internal hash table.
2290 			 * - we receive an EMPTY event for an abandoned
2291 			 *    contract.
2292 			 * If there is any contract in the process of
2293 			 * being stored into the hash table then re-read
2294 			 * the event later.
2295 			 */
2296 			log_framework(LOG_DEBUG,
2297 			    "Reset event %d for unknown "
2298 			    "contract id %ld\n", type, ctid);
2299 
2300 			/* don't go too fast */
2301 			(void) poll(NULL, 0, 100);
2302 
2303 			(void) ct_event_reset(fd);
2304 			ct_event_free(ev);
2305 			continue;
2306 		}
2307 
2308 		/*
2309 		 * Do not call contract_to_inst() again if first
2310 		 * call succeeded.
2311 		 */
2312 		if (inst == NULL)
2313 			inst = contract_to_inst(ctid);
2314 		if (inst == NULL) {
2315 			/*
2316 			 * This can happen if we receive an EMPTY
2317 			 * event for an abandoned contract.
2318 			 */
2319 			log_framework(LOG_DEBUG,
2320 			    "Received event %d for unknown contract id "
2321 			    "%ld\n", type, ctid);
2322 		} else {
2323 			log_framework(LOG_DEBUG,
2324 			    "Received event %d for contract id "
2325 			    "%ld (%s)\n", type, ctid,
2326 			    inst->ri_i.i_fmri);
2327 
2328 			contract_action(local_handle, inst, ctid, type);
2329 
2330 			MUTEX_UNLOCK(&inst->ri_lock);
2331 		}
2332 
2333 		efd = contract_open(ct_event_get_ctid(ev), "process", "ctl",
2334 		    O_WRONLY);
2335 		if (efd != -1) {
2336 			(void) ct_ctl_ack(efd, evid);
2337 			startd_close(efd);
2338 		}
2339 
2340 		ct_event_free(ev);
2341 
2342 	}
2343 
2344 	/*NOTREACHED*/
2345 	return (NULL);
2346 }
2347 
2348 /*
2349  * Timeout queue, processed by restarter_timeouts_event_thread().
2350  */
2351 timeout_queue_t *timeouts;
2352 static uu_list_pool_t *timeout_pool;
2353 
2354 typedef struct timeout_update {
2355 	pthread_mutex_t		tu_lock;
2356 	pthread_cond_t		tu_cv;
2357 	int			tu_wakeup;
2358 } timeout_update_t;
2359 
2360 timeout_update_t *tu;
2361 
2362 static const char *timeout_ovr_svcs[] = {
2363 	"svc:/system/manifest-import:default",
2364 	"svc:/network/initial:default",
2365 	"svc:/network/service:default",
2366 	"svc:/system/rmtmpfiles:default",
2367 	"svc:/network/loopback:default",
2368 	"svc:/network/physical:default",
2369 	"svc:/system/device/local:default",
2370 	"svc:/system/filesystem/usr:default",
2371 	"svc:/system/filesystem/minimal:default",
2372 	"svc:/system/filesystem/local:default",
2373 	NULL
2374 };
2375 
2376 int
2377 is_timeout_ovr(restarter_inst_t *inst)
2378 {
2379 	int i;
2380 
2381 	for (i = 0; timeout_ovr_svcs[i] != NULL; ++i) {
2382 		if (strcmp(inst->ri_i.i_fmri, timeout_ovr_svcs[i]) == 0) {
2383 			log_instance(inst, B_TRUE, "Timeout override by "
2384 			    "svc.startd.  Using infinite timeout.");
2385 			return (1);
2386 		}
2387 	}
2388 
2389 	return (0);
2390 }
2391 
2392 /*ARGSUSED*/
2393 static int
2394 timeout_compare(const void *lc_arg, const void *rc_arg, void *private)
2395 {
2396 	hrtime_t t1 = ((const timeout_entry_t *)lc_arg)->te_timeout;
2397 	hrtime_t t2 = ((const timeout_entry_t *)rc_arg)->te_timeout;
2398 
2399 	if (t1 > t2)
2400 		return (1);
2401 	else if (t1 < t2)
2402 		return (-1);
2403 	return (0);
2404 }
2405 
2406 void
2407 timeout_init()
2408 {
2409 	timeouts = startd_zalloc(sizeof (timeout_queue_t));
2410 
2411 	(void) pthread_mutex_init(&timeouts->tq_lock, &mutex_attrs);
2412 
2413 	timeout_pool = startd_list_pool_create("timeouts",
2414 	    sizeof (timeout_entry_t), offsetof(timeout_entry_t, te_link),
2415 	    timeout_compare, UU_LIST_POOL_DEBUG);
2416 	assert(timeout_pool != NULL);
2417 
2418 	timeouts->tq_list = startd_list_create(timeout_pool,
2419 	    timeouts, UU_LIST_SORTED);
2420 	assert(timeouts->tq_list != NULL);
2421 
2422 	tu = startd_zalloc(sizeof (timeout_update_t));
2423 	(void) pthread_cond_init(&tu->tu_cv, NULL);
2424 	(void) pthread_mutex_init(&tu->tu_lock, &mutex_attrs);
2425 }
2426 
2427 void
2428 timeout_insert(restarter_inst_t *inst, ctid_t cid, uint64_t timeout_sec)
2429 {
2430 	hrtime_t now, timeout;
2431 	timeout_entry_t *entry;
2432 	uu_list_index_t idx;
2433 
2434 	assert(MUTEX_HELD(&inst->ri_lock));
2435 
2436 	now = gethrtime();
2437 
2438 	/*
2439 	 * If we overflow LLONG_MAX, we're never timing out anyways, so
2440 	 * just return.
2441 	 */
2442 	if (timeout_sec >= (LLONG_MAX - now) / 1000000000LL) {
2443 		log_instance(inst, B_TRUE, "timeout_seconds too large, "
2444 		    "treating as infinite.");
2445 		return;
2446 	}
2447 
2448 	/* hrtime is in nanoseconds. Convert timeout_sec. */
2449 	timeout = now + (timeout_sec * 1000000000LL);
2450 
2451 	entry = startd_alloc(sizeof (timeout_entry_t));
2452 	entry->te_timeout = timeout;
2453 	entry->te_ctid = cid;
2454 	entry->te_fmri = safe_strdup(inst->ri_i.i_fmri);
2455 	entry->te_logstem = safe_strdup(inst->ri_logstem);
2456 	entry->te_fired = 0;
2457 	/* Insert the calculated timeout time onto the queue. */
2458 	MUTEX_LOCK(&timeouts->tq_lock);
2459 	(void) uu_list_find(timeouts->tq_list, entry, NULL, &idx);
2460 	uu_list_node_init(entry, &entry->te_link, timeout_pool);
2461 	uu_list_insert(timeouts->tq_list, entry, idx);
2462 	MUTEX_UNLOCK(&timeouts->tq_lock);
2463 
2464 	assert(inst->ri_timeout == NULL);
2465 	inst->ri_timeout = entry;
2466 
2467 	MUTEX_LOCK(&tu->tu_lock);
2468 	tu->tu_wakeup = 1;
2469 	(void) pthread_cond_broadcast(&tu->tu_cv);
2470 	MUTEX_UNLOCK(&tu->tu_lock);
2471 }
2472 
2473 
2474 void
2475 timeout_remove(restarter_inst_t *inst, ctid_t cid)
2476 {
2477 	assert(MUTEX_HELD(&inst->ri_lock));
2478 
2479 	if (inst->ri_timeout == NULL)
2480 		return;
2481 
2482 	assert(inst->ri_timeout->te_ctid == cid);
2483 
2484 	MUTEX_LOCK(&timeouts->tq_lock);
2485 	uu_list_remove(timeouts->tq_list, inst->ri_timeout);
2486 	MUTEX_UNLOCK(&timeouts->tq_lock);
2487 
2488 	free(inst->ri_timeout->te_fmri);
2489 	free(inst->ri_timeout->te_logstem);
2490 	startd_free(inst->ri_timeout, sizeof (timeout_entry_t));
2491 	inst->ri_timeout = NULL;
2492 }
2493 
2494 static int
2495 timeout_now()
2496 {
2497 	timeout_entry_t *e;
2498 	hrtime_t now;
2499 	int ret;
2500 
2501 	now = gethrtime();
2502 
2503 	/*
2504 	 * Walk through the (sorted) timeouts list.  While the timeout
2505 	 * at the head of the list is <= the current time, kill the
2506 	 * method.
2507 	 */
2508 	MUTEX_LOCK(&timeouts->tq_lock);
2509 
2510 	for (e = uu_list_first(timeouts->tq_list);
2511 	    e != NULL && e->te_timeout <= now;
2512 	    e = uu_list_next(timeouts->tq_list, e)) {
2513 		log_framework(LOG_WARNING, "%s: Method or service exit timed "
2514 		    "out.  Killing contract %ld.\n", e->te_fmri, e->te_ctid);
2515 		log_instance_fmri(e->te_fmri, e->te_logstem, B_TRUE,
2516 		    "Method or service exit timed out.  Killing contract %ld.",
2517 		    e->te_ctid);
2518 		e->te_fired = 1;
2519 		(void) contract_kill(e->te_ctid, SIGKILL, e->te_fmri);
2520 	}
2521 
2522 	if (uu_list_numnodes(timeouts->tq_list) > 0)
2523 		ret = 0;
2524 	else
2525 		ret = -1;
2526 
2527 	MUTEX_UNLOCK(&timeouts->tq_lock);
2528 
2529 	return (ret);
2530 }
2531 
2532 /*
2533  * void *restarter_timeouts_event_thread(void *)
2534  *   Responsible for monitoring the method timeouts.  This thread must
2535  *   be started before any methods are called.
2536  */
2537 /*ARGSUSED*/
2538 static void *
2539 restarter_timeouts_event_thread(void *unused)
2540 {
2541 	/*
2542 	 * Timeouts are entered on a priority queue, which is processed by
2543 	 * this thread.  As timeouts are specified in seconds, we'll do
2544 	 * the necessary processing every second, as long as the queue
2545 	 * is not empty.
2546 	 */
2547 
2548 	(void) pthread_setname_np(pthread_self(), "restarter_timeouts_event");
2549 
2550 	/*CONSTCOND*/
2551 	while (1) {
2552 		/*
2553 		 * As long as the timeout list isn't empty, process it
2554 		 * every second.
2555 		 */
2556 		if (timeout_now() == 0) {
2557 			(void) sleep(1);
2558 			continue;
2559 		}
2560 
2561 		/* The list is empty, wait until we have more timeouts. */
2562 		MUTEX_LOCK(&tu->tu_lock);
2563 
2564 		while (tu->tu_wakeup == 0)
2565 			(void) pthread_cond_wait(&tu->tu_cv, &tu->tu_lock);
2566 
2567 		tu->tu_wakeup = 0;
2568 		MUTEX_UNLOCK(&tu->tu_lock);
2569 	}
2570 
2571 	return (NULL);
2572 }
2573 
2574 void
2575 restarter_start()
2576 {
2577 	(void) startd_thread_create(restarter_timeouts_event_thread, NULL);
2578 	(void) startd_thread_create(restarter_event_thread, NULL);
2579 	(void) startd_thread_create(restarter_contracts_event_thread, NULL);
2580 	(void) startd_thread_create(wait_thread, NULL);
2581 }
2582 
2583 
2584 void
2585 restarter_init()
2586 {
2587 	restarter_instance_pool = startd_list_pool_create("restarter_instances",
2588 	    sizeof (restarter_inst_t), offsetof(restarter_inst_t,
2589 	    ri_link), restarter_instance_compare, UU_LIST_POOL_DEBUG);
2590 	(void) memset(&instance_list, 0, sizeof (instance_list));
2591 
2592 	(void) pthread_mutex_init(&instance_list.ril_lock, &mutex_attrs);
2593 	instance_list.ril_instance_list = startd_list_create(
2594 	    restarter_instance_pool, &instance_list, UU_LIST_SORTED);
2595 
2596 	restarter_queue_pool = startd_list_pool_create(
2597 	    "restarter_instance_queue", sizeof (restarter_instance_qentry_t),
2598 	    offsetof(restarter_instance_qentry_t,  riq_link), NULL,
2599 	    UU_LIST_POOL_DEBUG);
2600 
2601 	contract_list_pool = startd_list_pool_create(
2602 	    "contract_list", sizeof (contract_entry_t),
2603 	    offsetof(contract_entry_t,  ce_link), NULL,
2604 	    UU_LIST_POOL_DEBUG);
2605 	contract_hash_init();
2606 
2607 	log_framework(LOG_DEBUG, "Initialized restarter\n");
2608 }
2609