xref: /titanic_52/usr/src/cmd/svc/startd/restarter.c (revision 654b400c387942fc00d4f6869539adbd7b25fbce)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
24  */
25 
26 /*
27  * restarter.c - service manipulation
28  *
29  * This component manages services whose restarter is svc.startd, the standard
30  * restarter.  It translates restarter protocol events from the graph engine
31  * into actions on processes, as a delegated restarter would do.
32  *
33  * The master restarter manages a number of always-running threads:
34  *   - restarter event thread: events from the graph engine
35  *   - timeout thread: thread to fire queued timeouts
36  *   - contract thread: thread to handle contract events
37  *   - wait thread: thread to handle wait-based services
38  *
39  * The other threads are created as-needed:
40  *   - per-instance method threads
41  *   - per-instance event processing threads
42  *
43  * The interaction of all threads must result in the following conditions
44  * being satisfied (on a per-instance basis):
45  *   - restarter events must be processed in order
46  *   - method execution must be serialized
47  *   - instance delete must be held until outstanding methods are complete
48  *   - contract events shouldn't be processed while a method is running
49  *   - timeouts should fire even when a method is running
50  *
51  * Service instances are represented by restarter_inst_t's and are kept in the
52  * instance_list list.
53  *
54  * Service States
55  *   The current state of a service instance is kept in
56  *   restarter_inst_t->ri_i.i_state.  If transition to a new state could take
57  *   some time, then before we effect the transition we set
58  *   restarter_inst_t->ri_i.i_next_state to the target state, and afterwards we
59  *   rotate i_next_state to i_state and set i_next_state to
60  *   RESTARTER_STATE_NONE.  So usually i_next_state is _NONE when ri_lock is not
61  *   held.  The exception is when we launch methods, which are done with
62  *   a separate thread.  To keep any other threads from grabbing ri_lock before
63  *   method_thread() does, we set ri_method_thread to the thread id of the
64  *   method thread, and when it is nonzero any thread with a different thread id
65  *   waits on ri_method_cv.
66  *
67  * Method execution is serialized by blocking on ri_method_cv in
68  * inst_lookup_by_id() and waiting for a 0 value of ri_method_thread.  This
69  * also prevents the instance structure from being deleted until all
70  * outstanding operations such as method_thread() have finished.
71  *
72  * Lock ordering:
73  *
74  * dgraph_lock [can be held when taking:]
75  *   utmpx_lock
76  *   dictionary->dict_lock
77  *   st->st_load_lock
78  *   wait_info_lock
79  *   ru->restarter_update_lock
80  *     restarter_queue->rpeq_lock
81  *   instance_list.ril_lock
82  *     inst->ri_lock
83  *   st->st_configd_live_lock
84  *
85  * instance_list.ril_lock
86  *   graph_queue->gpeq_lock
87  *   gu->gu_lock
88  *   st->st_configd_live_lock
89  *   dictionary->dict_lock
90  *   inst->ri_lock
91  *     graph_queue->gpeq_lock
92  *     gu->gu_lock
93  *     tu->tu_lock
94  *     tq->tq_lock
95  *     inst->ri_queue_lock
96  *       wait_info_lock
97  *       bp->cb_lock
98  *     utmpx_lock
99  *
100  * single_user_thread_lock
101  *   wait_info_lock
102  *   utmpx_lock
103  *
104  * gu_freeze_lock
105  *
106  * logbuf_mutex nests inside pretty much everything.
107  */
108 
109 #include <sys/contract/process.h>
110 #include <sys/ctfs.h>
111 #include <sys/stat.h>
112 #include <sys/time.h>
113 #include <sys/types.h>
114 #include <sys/uio.h>
115 #include <sys/wait.h>
116 #include <assert.h>
117 #include <errno.h>
118 #include <fcntl.h>
119 #include <libcontract.h>
120 #include <libcontract_priv.h>
121 #include <libintl.h>
122 #include <librestart.h>
123 #include <librestart_priv.h>
124 #include <libuutil.h>
125 #include <limits.h>
126 #include <poll.h>
127 #include <port.h>
128 #include <pthread.h>
129 #include <stdarg.h>
130 #include <stdio.h>
131 #include <strings.h>
132 #include <unistd.h>
133 
134 #include "startd.h"
135 #include "protocol.h"
136 
137 static uu_list_pool_t *restarter_instance_pool;
138 static restarter_instance_list_t instance_list;
139 
140 static uu_list_pool_t *restarter_queue_pool;
141 
142 /*
143  * Function used to reset the restart times for an instance, when
144  * an administrative task comes along and essentially makes the times
145  * in this array ineffective.
146  */
147 static void
148 reset_start_times(restarter_inst_t *inst)
149 {
150 	inst->ri_start_index = 0;
151 	bzero(inst->ri_start_time, sizeof (inst->ri_start_time));
152 }
153 
154 /*ARGSUSED*/
155 static int
156 restarter_instance_compare(const void *lc_arg, const void *rc_arg,
157     void *private)
158 {
159 	int lc_id = ((const restarter_inst_t *)lc_arg)->ri_id;
160 	int rc_id = *(int *)rc_arg;
161 
162 	if (lc_id > rc_id)
163 		return (1);
164 	if (lc_id < rc_id)
165 		return (-1);
166 	return (0);
167 }
168 
169 static restarter_inst_t *
170 inst_lookup_by_name(const char *name)
171 {
172 	int id;
173 
174 	id = dict_lookup_byname(name);
175 	if (id == -1)
176 		return (NULL);
177 
178 	return (inst_lookup_by_id(id));
179 }
180 
181 restarter_inst_t *
182 inst_lookup_by_id(int id)
183 {
184 	restarter_inst_t *inst;
185 
186 	MUTEX_LOCK(&instance_list.ril_lock);
187 	inst = uu_list_find(instance_list.ril_instance_list, &id, NULL, NULL);
188 	if (inst != NULL)
189 		MUTEX_LOCK(&inst->ri_lock);
190 	MUTEX_UNLOCK(&instance_list.ril_lock);
191 
192 	if (inst != NULL) {
193 		while (inst->ri_method_thread != 0 &&
194 		    !pthread_equal(inst->ri_method_thread, pthread_self())) {
195 			++inst->ri_method_waiters;
196 			(void) pthread_cond_wait(&inst->ri_method_cv,
197 			    &inst->ri_lock);
198 			assert(inst->ri_method_waiters > 0);
199 			--inst->ri_method_waiters;
200 		}
201 	}
202 
203 	return (inst);
204 }
205 
206 static restarter_inst_t *
207 inst_lookup_queue(const char *name)
208 {
209 	int id;
210 	restarter_inst_t *inst;
211 
212 	id = dict_lookup_byname(name);
213 	if (id == -1)
214 		return (NULL);
215 
216 	MUTEX_LOCK(&instance_list.ril_lock);
217 	inst = uu_list_find(instance_list.ril_instance_list, &id, NULL, NULL);
218 	if (inst != NULL)
219 		MUTEX_LOCK(&inst->ri_queue_lock);
220 	MUTEX_UNLOCK(&instance_list.ril_lock);
221 
222 	return (inst);
223 }
224 
225 const char *
226 service_style(int flags)
227 {
228 	switch (flags & RINST_STYLE_MASK) {
229 	case RINST_CONTRACT:	return ("contract");
230 	case RINST_TRANSIENT:	return ("transient");
231 	case RINST_WAIT:	return ("wait");
232 
233 	default:
234 #ifndef NDEBUG
235 		uu_warn("%s:%d: Bad flags 0x%x.\n", __FILE__, __LINE__, flags);
236 #endif
237 		abort();
238 		/* NOTREACHED */
239 	}
240 }
241 
242 /*
243  * Fails with ECONNABORTED or ECANCELED.
244  */
245 static int
246 check_contract(restarter_inst_t *inst, boolean_t primary,
247     scf_instance_t *scf_inst)
248 {
249 	ctid_t *ctidp;
250 	int fd, r;
251 
252 	ctidp = primary ? &inst->ri_i.i_primary_ctid :
253 	    &inst->ri_i.i_transient_ctid;
254 
255 	assert(*ctidp >= 1);
256 
257 	fd = contract_open(*ctidp, NULL, "status", O_RDONLY);
258 	if (fd >= 0) {
259 		r = close(fd);
260 		assert(r == 0);
261 		return (0);
262 	}
263 
264 	r = restarter_remove_contract(scf_inst, *ctidp, primary ?
265 	    RESTARTER_CONTRACT_PRIMARY : RESTARTER_CONTRACT_TRANSIENT);
266 	switch (r) {
267 	case 0:
268 	case ECONNABORTED:
269 	case ECANCELED:
270 		*ctidp = 0;
271 		return (r);
272 
273 	case ENOMEM:
274 		uu_die("Out of memory\n");
275 		/* NOTREACHED */
276 
277 	case EPERM:
278 		uu_die("Insufficient privilege.\n");
279 		/* NOTREACHED */
280 
281 	case EACCES:
282 		uu_die("Repository backend access denied.\n");
283 		/* NOTREACHED */
284 
285 	case EROFS:
286 		log_error(LOG_INFO, "Could not remove unusable contract id %ld "
287 		    "for %s from repository.\n", *ctidp, inst->ri_i.i_fmri);
288 		return (0);
289 
290 	case EINVAL:
291 	case EBADF:
292 	default:
293 		assert(0);
294 		abort();
295 		/* NOTREACHED */
296 	}
297 }
298 
299 static int stop_instance(scf_handle_t *, restarter_inst_t *, stop_cause_t);
300 
301 /*
302  * int restarter_insert_inst(scf_handle_t *, char *)
303  *   If the inst is already in the restarter list, return its id.  If the inst
304  *   is not in the restarter list, initialize a restarter_inst_t, initialize its
305  *   states, insert it into the list, and return 0.
306  *
307  *   Fails with
308  *     ENOENT - name is not in the repository
309  */
310 static int
311 restarter_insert_inst(scf_handle_t *h, const char *name)
312 {
313 	int id, r;
314 	restarter_inst_t *inst;
315 	uu_list_index_t idx;
316 	scf_service_t *scf_svc;
317 	scf_instance_t *scf_inst;
318 	scf_snapshot_t *snap = NULL;
319 	scf_propertygroup_t *pg;
320 	char *svc_name, *inst_name;
321 	char logfilebuf[PATH_MAX];
322 	char *c;
323 	boolean_t do_commit_states;
324 	restarter_instance_state_t state, next_state;
325 	protocol_states_t *ps;
326 	pid_t start_pid;
327 	restarter_str_t reason = restarter_str_insert_in_graph;
328 
329 	MUTEX_LOCK(&instance_list.ril_lock);
330 
331 	/*
332 	 * We don't use inst_lookup_by_name() here because we want the lookup
333 	 * & insert to be atomic.
334 	 */
335 	id = dict_lookup_byname(name);
336 	if (id != -1) {
337 		inst = uu_list_find(instance_list.ril_instance_list, &id, NULL,
338 		    &idx);
339 		if (inst != NULL) {
340 			MUTEX_UNLOCK(&instance_list.ril_lock);
341 			return (0);
342 		}
343 	}
344 
345 	/* Allocate an instance */
346 	inst = startd_zalloc(sizeof (restarter_inst_t));
347 	inst->ri_utmpx_prefix = startd_alloc(max_scf_value_size);
348 	inst->ri_utmpx_prefix[0] = '\0';
349 
350 	inst->ri_i.i_fmri = startd_alloc(strlen(name) + 1);
351 	(void) strcpy((char *)inst->ri_i.i_fmri, name);
352 
353 	inst->ri_queue = startd_list_create(restarter_queue_pool, inst, 0);
354 
355 	/*
356 	 * id shouldn't be -1 since we use the same dictionary as graph.c, but
357 	 * just in case.
358 	 */
359 	inst->ri_id = (id != -1 ? id : dict_insert(name));
360 
361 	special_online_hooks_get(name, &inst->ri_pre_online_hook,
362 	    &inst->ri_post_online_hook, &inst->ri_post_offline_hook);
363 
364 	scf_svc = safe_scf_service_create(h);
365 	scf_inst = safe_scf_instance_create(h);
366 	pg = safe_scf_pg_create(h);
367 	svc_name = startd_alloc(max_scf_name_size);
368 	inst_name = startd_alloc(max_scf_name_size);
369 
370 rep_retry:
371 	if (snap != NULL)
372 		scf_snapshot_destroy(snap);
373 	if (inst->ri_logstem != NULL)
374 		startd_free(inst->ri_logstem, PATH_MAX);
375 	if (inst->ri_common_name != NULL)
376 		startd_free(inst->ri_common_name, max_scf_value_size);
377 	if (inst->ri_C_common_name != NULL)
378 		startd_free(inst->ri_C_common_name, max_scf_value_size);
379 	snap = NULL;
380 	inst->ri_logstem = NULL;
381 	inst->ri_common_name = NULL;
382 	inst->ri_C_common_name = NULL;
383 
384 	if (scf_handle_decode_fmri(h, name, NULL, scf_svc, scf_inst, NULL,
385 	    NULL, SCF_DECODE_FMRI_EXACT) != 0) {
386 		switch (scf_error()) {
387 		case SCF_ERROR_CONNECTION_BROKEN:
388 			libscf_handle_rebind(h);
389 			goto rep_retry;
390 
391 		case SCF_ERROR_NOT_FOUND:
392 			goto deleted;
393 		}
394 
395 		uu_die("Can't decode FMRI %s: %s\n", name,
396 		    scf_strerror(scf_error()));
397 	}
398 
399 	/*
400 	 * If there's no running snapshot, then we execute using the editing
401 	 * snapshot.  Pending snapshots will be taken later.
402 	 */
403 	snap = libscf_get_running_snapshot(scf_inst);
404 
405 	if ((scf_service_get_name(scf_svc, svc_name, max_scf_name_size) < 0) ||
406 	    (scf_instance_get_name(scf_inst, inst_name, max_scf_name_size) <
407 	    0)) {
408 		switch (scf_error()) {
409 		case SCF_ERROR_NOT_SET:
410 			break;
411 
412 		case SCF_ERROR_CONNECTION_BROKEN:
413 			libscf_handle_rebind(h);
414 			goto rep_retry;
415 
416 		default:
417 			assert(0);
418 			abort();
419 		}
420 
421 		goto deleted;
422 	}
423 
424 	(void) snprintf(logfilebuf, PATH_MAX, "%s:%s", svc_name, inst_name);
425 	for (c = logfilebuf; *c != '\0'; c++)
426 		if (*c == '/')
427 			*c = '-';
428 
429 	inst->ri_logstem = startd_alloc(PATH_MAX);
430 	(void) snprintf(inst->ri_logstem, PATH_MAX, "%s%s", logfilebuf,
431 	    LOG_SUFFIX);
432 
433 	/*
434 	 * If the restarter group is missing, use uninit/none.  Otherwise,
435 	 * we're probably being restarted & don't want to mess up the states
436 	 * that are there.
437 	 */
438 	state = RESTARTER_STATE_UNINIT;
439 	next_state = RESTARTER_STATE_NONE;
440 
441 	r = scf_instance_get_pg(scf_inst, SCF_PG_RESTARTER, pg);
442 	if (r != 0) {
443 		switch (scf_error()) {
444 		case SCF_ERROR_CONNECTION_BROKEN:
445 			libscf_handle_rebind(h);
446 			goto rep_retry;
447 
448 		case SCF_ERROR_NOT_SET:
449 			goto deleted;
450 
451 		case SCF_ERROR_NOT_FOUND:
452 			/*
453 			 * This shouldn't happen since the graph engine should
454 			 * have initialized the state to uninitialized/none if
455 			 * there was no restarter pg.  In case somebody
456 			 * deleted it, though....
457 			 */
458 			do_commit_states = B_TRUE;
459 			break;
460 
461 		default:
462 			assert(0);
463 			abort();
464 		}
465 	} else {
466 		r = libscf_read_states(pg, &state, &next_state);
467 		if (r != 0) {
468 			do_commit_states = B_TRUE;
469 		} else {
470 			if (next_state != RESTARTER_STATE_NONE) {
471 				/*
472 				 * Force next_state to _NONE since we
473 				 * don't look for method processes.
474 				 */
475 				next_state = RESTARTER_STATE_NONE;
476 				do_commit_states = B_TRUE;
477 			} else {
478 				/*
479 				 * The reason for transition will depend on
480 				 * state.
481 				 */
482 				if (st->st_initial == 0)
483 					reason = restarter_str_startd_restart;
484 				else if (state == RESTARTER_STATE_MAINT)
485 					reason = restarter_str_bad_repo_state;
486 				/*
487 				 * Inform the restarter of our state without
488 				 * changing the STIME in the repository.
489 				 */
490 				ps = startd_alloc(sizeof (*ps));
491 				inst->ri_i.i_state = ps->ps_state = state;
492 				inst->ri_i.i_next_state = ps->ps_state_next =
493 				    next_state;
494 				ps->ps_reason = reason;
495 
496 				graph_protocol_send_event(inst->ri_i.i_fmri,
497 				    GRAPH_UPDATE_STATE_CHANGE, ps);
498 
499 				do_commit_states = B_FALSE;
500 			}
501 		}
502 	}
503 
504 	switch (libscf_get_startd_properties(scf_inst, snap, &inst->ri_flags,
505 	    &inst->ri_utmpx_prefix)) {
506 	case 0:
507 		break;
508 
509 	case ECONNABORTED:
510 		libscf_handle_rebind(h);
511 		goto rep_retry;
512 
513 	case ECANCELED:
514 		goto deleted;
515 
516 	case ENOENT:
517 		/*
518 		 * This is odd, because the graph engine should have required
519 		 * the general property group.  So we'll just use default
520 		 * flags in anticipation of the graph engine sending us
521 		 * REMOVE_INSTANCE when it finds out that the general property
522 		 * group has been deleted.
523 		 */
524 		inst->ri_flags = RINST_CONTRACT;
525 		break;
526 
527 	default:
528 		assert(0);
529 		abort();
530 	}
531 
532 	switch (libscf_get_template_values(scf_inst, snap,
533 	    &inst->ri_common_name, &inst->ri_C_common_name)) {
534 	case 0:
535 		break;
536 
537 	case ECONNABORTED:
538 		libscf_handle_rebind(h);
539 		goto rep_retry;
540 
541 	case ECANCELED:
542 		goto deleted;
543 
544 	case ECHILD:
545 	case ENOENT:
546 		break;
547 
548 	default:
549 		assert(0);
550 		abort();
551 	}
552 
553 	switch (libscf_read_method_ids(h, scf_inst, inst->ri_i.i_fmri,
554 	    &inst->ri_i.i_primary_ctid, &inst->ri_i.i_transient_ctid,
555 	    &start_pid)) {
556 	case 0:
557 		break;
558 
559 	case ECONNABORTED:
560 		libscf_handle_rebind(h);
561 		goto rep_retry;
562 
563 	case ECANCELED:
564 		goto deleted;
565 
566 	default:
567 		assert(0);
568 		abort();
569 	}
570 
571 	if (inst->ri_i.i_primary_ctid >= 1) {
572 		contract_hash_store(inst->ri_i.i_primary_ctid, inst->ri_id);
573 
574 		switch (check_contract(inst, B_TRUE, scf_inst)) {
575 		case 0:
576 			break;
577 
578 		case ECONNABORTED:
579 			libscf_handle_rebind(h);
580 			goto rep_retry;
581 
582 		case ECANCELED:
583 			goto deleted;
584 
585 		default:
586 			assert(0);
587 			abort();
588 		}
589 	}
590 
591 	if (inst->ri_i.i_transient_ctid >= 1) {
592 		switch (check_contract(inst, B_FALSE, scf_inst)) {
593 		case 0:
594 			break;
595 
596 		case ECONNABORTED:
597 			libscf_handle_rebind(h);
598 			goto rep_retry;
599 
600 		case ECANCELED:
601 			goto deleted;
602 
603 		default:
604 			assert(0);
605 			abort();
606 		}
607 	}
608 
609 	/* No more failures we live through, so add it to the list. */
610 	(void) pthread_mutex_init(&inst->ri_lock, &mutex_attrs);
611 	(void) pthread_mutex_init(&inst->ri_queue_lock, &mutex_attrs);
612 	MUTEX_LOCK(&inst->ri_lock);
613 	MUTEX_LOCK(&inst->ri_queue_lock);
614 
615 	(void) pthread_cond_init(&inst->ri_method_cv, NULL);
616 
617 	uu_list_node_init(inst, &inst->ri_link, restarter_instance_pool);
618 	uu_list_insert(instance_list.ril_instance_list, inst, idx);
619 	MUTEX_UNLOCK(&instance_list.ril_lock);
620 
621 	if (start_pid != -1 &&
622 	    (inst->ri_flags & RINST_STYLE_MASK) == RINST_WAIT) {
623 		int ret;
624 		ret = wait_register(start_pid, inst->ri_i.i_fmri, 0, 1);
625 		if (ret == -1) {
626 			/*
627 			 * Implication:  if we can't reregister the
628 			 * instance, we will start another one.  Two
629 			 * instances may or may not result in a resource
630 			 * conflict.
631 			 */
632 			log_error(LOG_WARNING,
633 			    "%s: couldn't reregister %ld for wait\n",
634 			    inst->ri_i.i_fmri, start_pid);
635 		} else if (ret == 1) {
636 			/*
637 			 * Leading PID has exited.
638 			 */
639 			(void) stop_instance(h, inst, RSTOP_EXIT);
640 		}
641 	}
642 
643 
644 	scf_pg_destroy(pg);
645 
646 	if (do_commit_states)
647 		(void) restarter_instance_update_states(h, inst, state,
648 		    next_state, RERR_NONE, reason);
649 
650 	log_framework(LOG_DEBUG, "%s is a %s-style service\n", name,
651 	    service_style(inst->ri_flags));
652 
653 	MUTEX_UNLOCK(&inst->ri_queue_lock);
654 	MUTEX_UNLOCK(&inst->ri_lock);
655 
656 	startd_free(svc_name, max_scf_name_size);
657 	startd_free(inst_name, max_scf_name_size);
658 	scf_snapshot_destroy(snap);
659 	scf_instance_destroy(scf_inst);
660 	scf_service_destroy(scf_svc);
661 
662 	log_framework(LOG_DEBUG, "%s: inserted instance into restarter list\n",
663 	    name);
664 
665 	return (0);
666 
667 deleted:
668 	MUTEX_UNLOCK(&instance_list.ril_lock);
669 	startd_free(inst_name, max_scf_name_size);
670 	startd_free(svc_name, max_scf_name_size);
671 	if (snap != NULL)
672 		scf_snapshot_destroy(snap);
673 	scf_pg_destroy(pg);
674 	scf_instance_destroy(scf_inst);
675 	scf_service_destroy(scf_svc);
676 	startd_free((void *)inst->ri_i.i_fmri, strlen(inst->ri_i.i_fmri) + 1);
677 	uu_list_destroy(inst->ri_queue);
678 	if (inst->ri_logstem != NULL)
679 		startd_free(inst->ri_logstem, PATH_MAX);
680 	if (inst->ri_common_name != NULL)
681 		startd_free(inst->ri_common_name, max_scf_value_size);
682 	if (inst->ri_C_common_name != NULL)
683 		startd_free(inst->ri_C_common_name, max_scf_value_size);
684 	startd_free(inst->ri_utmpx_prefix, max_scf_value_size);
685 	startd_free(inst, sizeof (restarter_inst_t));
686 	return (ENOENT);
687 }
688 
689 static void
690 restarter_delete_inst(restarter_inst_t *ri)
691 {
692 	int id;
693 	restarter_inst_t *rip;
694 	void *cookie = NULL;
695 	restarter_instance_qentry_t *e;
696 
697 	assert(MUTEX_HELD(&ri->ri_lock));
698 
699 	/*
700 	 * Must drop the instance lock so we can pick up the instance_list
701 	 * lock & remove the instance.
702 	 */
703 	id = ri->ri_id;
704 	MUTEX_UNLOCK(&ri->ri_lock);
705 
706 	MUTEX_LOCK(&instance_list.ril_lock);
707 
708 	rip = uu_list_find(instance_list.ril_instance_list, &id, NULL, NULL);
709 	if (rip == NULL) {
710 		MUTEX_UNLOCK(&instance_list.ril_lock);
711 		return;
712 	}
713 
714 	assert(ri == rip);
715 
716 	uu_list_remove(instance_list.ril_instance_list, ri);
717 
718 	log_framework(LOG_DEBUG, "%s: deleted instance from restarter list\n",
719 	    ri->ri_i.i_fmri);
720 
721 	MUTEX_UNLOCK(&instance_list.ril_lock);
722 
723 	/*
724 	 * We can lock the instance without holding the instance_list lock
725 	 * since we removed the instance from the list.
726 	 */
727 	MUTEX_LOCK(&ri->ri_lock);
728 	MUTEX_LOCK(&ri->ri_queue_lock);
729 
730 	if (ri->ri_i.i_primary_ctid >= 1)
731 		contract_hash_remove(ri->ri_i.i_primary_ctid);
732 
733 	while (ri->ri_method_thread != 0 || ri->ri_method_waiters > 0)
734 		(void) pthread_cond_wait(&ri->ri_method_cv, &ri->ri_lock);
735 
736 	while ((e = uu_list_teardown(ri->ri_queue, &cookie)) != NULL)
737 		startd_free(e, sizeof (*e));
738 	uu_list_destroy(ri->ri_queue);
739 
740 	startd_free((void *)ri->ri_i.i_fmri, strlen(ri->ri_i.i_fmri) + 1);
741 	startd_free(ri->ri_logstem, PATH_MAX);
742 	if (ri->ri_common_name != NULL)
743 		startd_free(ri->ri_common_name, max_scf_value_size);
744 	if (ri->ri_C_common_name != NULL)
745 		startd_free(ri->ri_C_common_name, max_scf_value_size);
746 	startd_free(ri->ri_utmpx_prefix, max_scf_value_size);
747 	(void) pthread_mutex_destroy(&ri->ri_lock);
748 	(void) pthread_mutex_destroy(&ri->ri_queue_lock);
749 	startd_free(ri, sizeof (restarter_inst_t));
750 }
751 
752 /*
753  * instance_is_wait_style()
754  *
755  *   Returns 1 if the given instance is a "wait-style" service instance.
756  */
757 int
758 instance_is_wait_style(restarter_inst_t *inst)
759 {
760 	assert(MUTEX_HELD(&inst->ri_lock));
761 	return ((inst->ri_flags & RINST_STYLE_MASK) == RINST_WAIT);
762 }
763 
764 /*
765  * instance_is_transient_style()
766  *
767  *   Returns 1 if the given instance is a transient service instance.
768  */
769 int
770 instance_is_transient_style(restarter_inst_t *inst)
771 {
772 	assert(MUTEX_HELD(&inst->ri_lock));
773 	return ((inst->ri_flags & RINST_STYLE_MASK) == RINST_TRANSIENT);
774 }
775 
776 /*
777  * instance_in_transition()
778  * Returns 1 if instance is in transition, 0 if not
779  */
780 int
781 instance_in_transition(restarter_inst_t *inst)
782 {
783 	assert(MUTEX_HELD(&inst->ri_lock));
784 	if (inst->ri_i.i_next_state == RESTARTER_STATE_NONE)
785 		return (0);
786 	return (1);
787 }
788 
789 /*
790  * returns 1 if instance is already started, 0 if not
791  */
792 static int
793 instance_started(restarter_inst_t *inst)
794 {
795 	int ret;
796 
797 	assert(MUTEX_HELD(&inst->ri_lock));
798 
799 	if (inst->ri_i.i_state == RESTARTER_STATE_ONLINE ||
800 	    inst->ri_i.i_state == RESTARTER_STATE_DEGRADED)
801 		ret = 1;
802 	else
803 		ret = 0;
804 
805 	return (ret);
806 }
807 
808 /*
809  * Returns
810  *   0 - success
811  *   ECONNRESET - success, but h was rebound
812  */
813 int
814 restarter_instance_update_states(scf_handle_t *h, restarter_inst_t *ri,
815     restarter_instance_state_t new_state,
816     restarter_instance_state_t new_state_next, restarter_error_t err,
817     restarter_str_t reason)
818 {
819 	protocol_states_t *states;
820 	int e;
821 	uint_t retry_count = 0, msecs = ALLOC_DELAY;
822 	boolean_t rebound = B_FALSE;
823 	int prev_state_online;
824 	int state_online;
825 
826 	assert(MUTEX_HELD(&ri->ri_lock));
827 
828 	prev_state_online = instance_started(ri);
829 
830 retry:
831 	e = _restarter_commit_states(h, &ri->ri_i, new_state, new_state_next,
832 	    restarter_get_str_short(reason));
833 	switch (e) {
834 	case 0:
835 		break;
836 
837 	case ENOMEM:
838 		++retry_count;
839 		if (retry_count < ALLOC_RETRY) {
840 			(void) poll(NULL, 0, msecs);
841 			msecs *= ALLOC_DELAY_MULT;
842 			goto retry;
843 		}
844 
845 		/* Like startd_alloc(). */
846 		uu_die("Insufficient memory.\n");
847 		/* NOTREACHED */
848 
849 	case ECONNABORTED:
850 		libscf_handle_rebind(h);
851 		rebound = B_TRUE;
852 		goto retry;
853 
854 	case EPERM:
855 	case EACCES:
856 	case EROFS:
857 		log_error(LOG_NOTICE, "Could not commit state change for %s "
858 		    "to repository: %s.\n", ri->ri_i.i_fmri, strerror(e));
859 		/* FALLTHROUGH */
860 
861 	case ENOENT:
862 		ri->ri_i.i_state = new_state;
863 		ri->ri_i.i_next_state = new_state_next;
864 		break;
865 
866 	case EINVAL:
867 	default:
868 		bad_error("_restarter_commit_states", e);
869 	}
870 
871 	states = startd_alloc(sizeof (protocol_states_t));
872 	states->ps_state = new_state;
873 	states->ps_state_next = new_state_next;
874 	states->ps_err = err;
875 	states->ps_reason = reason;
876 	graph_protocol_send_event(ri->ri_i.i_fmri, GRAPH_UPDATE_STATE_CHANGE,
877 	    (void *)states);
878 
879 	state_online = instance_started(ri);
880 
881 	if (prev_state_online && !state_online)
882 		ri->ri_post_offline_hook();
883 	else if (!prev_state_online && state_online)
884 		ri->ri_post_online_hook();
885 
886 	return (rebound ? ECONNRESET : 0);
887 }
888 
889 void
890 restarter_mark_pending_snapshot(const char *fmri, uint_t flag)
891 {
892 	restarter_inst_t *inst;
893 
894 	assert(flag == RINST_RETAKE_RUNNING || flag == RINST_RETAKE_START);
895 
896 	inst = inst_lookup_by_name(fmri);
897 	if (inst == NULL)
898 		return;
899 
900 	inst->ri_flags |= flag;
901 
902 	MUTEX_UNLOCK(&inst->ri_lock);
903 }
904 
905 static void
906 restarter_take_pending_snapshots(scf_handle_t *h)
907 {
908 	restarter_inst_t *inst;
909 	int r;
910 
911 	MUTEX_LOCK(&instance_list.ril_lock);
912 
913 	for (inst = uu_list_first(instance_list.ril_instance_list);
914 	    inst != NULL;
915 	    inst = uu_list_next(instance_list.ril_instance_list, inst)) {
916 		const char *fmri;
917 		scf_instance_t *sinst = NULL;
918 
919 		MUTEX_LOCK(&inst->ri_lock);
920 
921 		/*
922 		 * This is where we'd check inst->ri_method_thread and if it
923 		 * were nonzero we'd wait in anticipation of another thread
924 		 * executing a method for inst.  Doing so with the instance_list
925 		 * locked, though, leads to deadlock.  Since taking a snapshot
926 		 * during that window won't hurt anything, we'll just continue.
927 		 */
928 
929 		fmri = inst->ri_i.i_fmri;
930 
931 		if (inst->ri_flags & RINST_RETAKE_RUNNING) {
932 			scf_snapshot_t *rsnap;
933 
934 			(void) libscf_fmri_get_instance(h, fmri, &sinst);
935 
936 			rsnap = libscf_get_or_make_running_snapshot(sinst,
937 			    fmri, B_FALSE);
938 
939 			scf_instance_destroy(sinst);
940 
941 			if (rsnap != NULL)
942 				inst->ri_flags &= ~RINST_RETAKE_RUNNING;
943 
944 			scf_snapshot_destroy(rsnap);
945 		}
946 
947 		if (inst->ri_flags & RINST_RETAKE_START) {
948 			switch (r = libscf_snapshots_poststart(h, fmri,
949 			    B_FALSE)) {
950 			case 0:
951 			case ENOENT:
952 				inst->ri_flags &= ~RINST_RETAKE_START;
953 				break;
954 
955 			case ECONNABORTED:
956 				break;
957 
958 			case EACCES:
959 			default:
960 				bad_error("libscf_snapshots_poststart", r);
961 			}
962 		}
963 
964 		MUTEX_UNLOCK(&inst->ri_lock);
965 	}
966 
967 	MUTEX_UNLOCK(&instance_list.ril_lock);
968 }
969 
970 /* ARGSUSED */
971 void *
972 restarter_post_fsminimal_thread(void *unused)
973 {
974 	scf_handle_t *h;
975 	int r;
976 
977 	h = libscf_handle_create_bound_loop();
978 
979 	for (;;) {
980 		r = libscf_create_self(h);
981 		if (r == 0)
982 			break;
983 
984 		assert(r == ECONNABORTED);
985 		libscf_handle_rebind(h);
986 	}
987 
988 	restarter_take_pending_snapshots(h);
989 
990 	(void) scf_handle_unbind(h);
991 	scf_handle_destroy(h);
992 
993 	return (NULL);
994 }
995 
996 /*
997  * int stop_instance()
998  *
999  *   Stop the instance identified by the instance given as the second argument,
1000  *   for the cause stated.
1001  *
1002  *   Returns
1003  *     0 - success
1004  *     -1 - inst is in transition
1005  */
1006 static int
1007 stop_instance(scf_handle_t *local_handle, restarter_inst_t *inst,
1008     stop_cause_t cause)
1009 {
1010 	fork_info_t *info;
1011 	const char *cp;
1012 	int err;
1013 	restarter_error_t re;
1014 	restarter_str_t	reason;
1015 
1016 	assert(MUTEX_HELD(&inst->ri_lock));
1017 	assert(inst->ri_method_thread == 0);
1018 
1019 	switch (cause) {
1020 	case RSTOP_EXIT:
1021 		re = RERR_RESTART;
1022 		reason = restarter_str_ct_ev_exit;
1023 		cp = "all processes in service exited";
1024 		break;
1025 	case RSTOP_CORE:
1026 		re = RERR_FAULT;
1027 		reason = restarter_str_ct_ev_core;
1028 		cp = "process dumped core";
1029 		break;
1030 	case RSTOP_SIGNAL:
1031 		re = RERR_FAULT;
1032 		reason = restarter_str_ct_ev_signal;
1033 		cp = "process received fatal signal from outside the service";
1034 		break;
1035 	case RSTOP_HWERR:
1036 		re = RERR_FAULT;
1037 		reason = restarter_str_ct_ev_hwerr;
1038 		cp = "process killed due to uncorrectable hardware error";
1039 		break;
1040 	case RSTOP_DEPENDENCY:
1041 		re = RERR_RESTART;
1042 		reason = restarter_str_dependency_activity;
1043 		cp = "dependency activity requires stop";
1044 		break;
1045 	case RSTOP_DISABLE:
1046 		re = RERR_RESTART;
1047 		reason = restarter_str_disable_request;
1048 		cp = "service disabled";
1049 		break;
1050 	case RSTOP_RESTART:
1051 		re = RERR_RESTART;
1052 		reason = restarter_str_restart_request;
1053 		cp = "service restarting";
1054 		break;
1055 	default:
1056 #ifndef NDEBUG
1057 		(void) fprintf(stderr, "Unknown cause %d at %s:%d.\n",
1058 		    cause, __FILE__, __LINE__);
1059 #endif
1060 		abort();
1061 	}
1062 
1063 	/* Services in the disabled and maintenance state are ignored */
1064 	if (inst->ri_i.i_state == RESTARTER_STATE_MAINT ||
1065 	    inst->ri_i.i_state == RESTARTER_STATE_DISABLED) {
1066 		log_framework(LOG_DEBUG,
1067 		    "%s: stop_instance -> is maint/disabled\n",
1068 		    inst->ri_i.i_fmri);
1069 		return (0);
1070 	}
1071 
1072 	/* Already stopped instances are left alone */
1073 	if (instance_started(inst) == 0) {
1074 		log_framework(LOG_DEBUG, "Restarter: %s is already stopped.\n",
1075 		    inst->ri_i.i_fmri);
1076 		return (0);
1077 	}
1078 
1079 	if (instance_in_transition(inst)) {
1080 		/* requeue event by returning -1 */
1081 		log_framework(LOG_DEBUG,
1082 		    "Restarter: Not stopping %s, in transition.\n",
1083 		    inst->ri_i.i_fmri);
1084 		return (-1);
1085 	}
1086 
1087 	log_instance(inst, B_TRUE, "Stopping because %s.", cp);
1088 
1089 	log_framework(re == RERR_FAULT ? LOG_INFO : LOG_DEBUG,
1090 	    "%s: Instance stopping because %s.\n", inst->ri_i.i_fmri, cp);
1091 
1092 	if (instance_is_wait_style(inst) && cause == RSTOP_EXIT) {
1093 		/*
1094 		 * No need to stop instance, as child has exited; remove
1095 		 * contract and move the instance to the offline state.
1096 		 */
1097 		switch (err = restarter_instance_update_states(local_handle,
1098 		    inst, inst->ri_i.i_state, RESTARTER_STATE_OFFLINE, re,
1099 		    reason)) {
1100 		case 0:
1101 		case ECONNRESET:
1102 			break;
1103 
1104 		default:
1105 			bad_error("restarter_instance_update_states", err);
1106 		}
1107 
1108 		(void) update_fault_count(inst, FAULT_COUNT_RESET);
1109 		reset_start_times(inst);
1110 
1111 		if (inst->ri_i.i_primary_ctid != 0) {
1112 			inst->ri_m_inst =
1113 			    safe_scf_instance_create(local_handle);
1114 			inst->ri_mi_deleted = B_FALSE;
1115 
1116 			libscf_reget_instance(inst);
1117 			method_remove_contract(inst, B_TRUE, B_TRUE);
1118 
1119 			scf_instance_destroy(inst->ri_m_inst);
1120 			inst->ri_m_inst = NULL;
1121 		}
1122 
1123 		switch (err = restarter_instance_update_states(local_handle,
1124 		    inst, inst->ri_i.i_next_state, RESTARTER_STATE_NONE, re,
1125 		    reason)) {
1126 		case 0:
1127 		case ECONNRESET:
1128 			break;
1129 
1130 		default:
1131 			bad_error("restarter_instance_update_states", err);
1132 		}
1133 
1134 		return (0);
1135 	} else if (instance_is_wait_style(inst) && re == RERR_RESTART) {
1136 		/*
1137 		 * Stopping a wait service through means other than the pid
1138 		 * exiting should keep wait_thread() from restarting the
1139 		 * service, by removing it from the wait list.
1140 		 * We cannot remove it right now otherwise the process will
1141 		 * end up <defunct> so mark it to be ignored.
1142 		 */
1143 		wait_ignore_by_fmri(inst->ri_i.i_fmri);
1144 	}
1145 
1146 	switch (err = restarter_instance_update_states(local_handle, inst,
1147 	    inst->ri_i.i_state, inst->ri_i.i_enabled ? RESTARTER_STATE_OFFLINE :
1148 	    RESTARTER_STATE_DISABLED, RERR_NONE, reason)) {
1149 	case 0:
1150 	case ECONNRESET:
1151 		break;
1152 
1153 	default:
1154 		bad_error("restarter_instance_update_states", err);
1155 	}
1156 
1157 	info = startd_zalloc(sizeof (fork_info_t));
1158 
1159 	info->sf_id = inst->ri_id;
1160 	info->sf_method_type = METHOD_STOP;
1161 	info->sf_event_type = re;
1162 	info->sf_reason = reason;
1163 	inst->ri_method_thread = startd_thread_create(method_thread, info);
1164 
1165 	return (0);
1166 }
1167 
1168 /*
1169  * Returns
1170  *   ENOENT - fmri is not in instance_list
1171  *   0 - success
1172  *   ECONNRESET - success, though handle was rebound
1173  *   -1 - instance is in transition
1174  */
1175 int
1176 stop_instance_fmri(scf_handle_t *h, const char *fmri, uint_t flags)
1177 {
1178 	restarter_inst_t *rip;
1179 	int r;
1180 
1181 	rip = inst_lookup_by_name(fmri);
1182 	if (rip == NULL)
1183 		return (ENOENT);
1184 
1185 	r = stop_instance(h, rip, flags);
1186 
1187 	MUTEX_UNLOCK(&rip->ri_lock);
1188 
1189 	return (r);
1190 }
1191 
1192 static void
1193 unmaintain_instance(scf_handle_t *h, restarter_inst_t *rip,
1194     unmaint_cause_t cause)
1195 {
1196 	ctid_t ctid;
1197 	scf_instance_t *inst;
1198 	int r;
1199 	uint_t tries = 0, msecs = ALLOC_DELAY;
1200 	const char *cp;
1201 	restarter_str_t	reason;
1202 
1203 	assert(MUTEX_HELD(&rip->ri_lock));
1204 
1205 	if (rip->ri_i.i_state != RESTARTER_STATE_MAINT) {
1206 		log_error(LOG_DEBUG, "Restarter: "
1207 		    "Ignoring maintenance off command because %s is not in the "
1208 		    "maintenance state.\n", rip->ri_i.i_fmri);
1209 		return;
1210 	}
1211 
1212 	switch (cause) {
1213 	case RUNMAINT_CLEAR:
1214 		cp = "clear requested";
1215 		reason = restarter_str_clear_request;
1216 		break;
1217 	case RUNMAINT_DISABLE:
1218 		cp = "disable requested";
1219 		reason = restarter_str_disable_request;
1220 		break;
1221 	default:
1222 #ifndef NDEBUG
1223 		(void) fprintf(stderr, "Uncaught case for %d at %s:%d.\n",
1224 		    cause, __FILE__, __LINE__);
1225 #endif
1226 		abort();
1227 	}
1228 
1229 	log_instance(rip, B_TRUE, "Leaving maintenance because %s.",
1230 	    cp);
1231 	log_framework(LOG_DEBUG, "%s: Instance leaving maintenance because "
1232 	    "%s.\n", rip->ri_i.i_fmri, cp);
1233 
1234 	(void) restarter_instance_update_states(h, rip, RESTARTER_STATE_UNINIT,
1235 	    RESTARTER_STATE_NONE, RERR_RESTART, reason);
1236 
1237 	/*
1238 	 * If we did ADMIN_MAINT_ON_IMMEDIATE, then there might still be
1239 	 * a primary contract.
1240 	 */
1241 	if (rip->ri_i.i_primary_ctid == 0)
1242 		return;
1243 
1244 	ctid = rip->ri_i.i_primary_ctid;
1245 	contract_abandon(ctid);
1246 	rip->ri_i.i_primary_ctid = 0;
1247 
1248 rep_retry:
1249 	switch (r = libscf_fmri_get_instance(h, rip->ri_i.i_fmri, &inst)) {
1250 	case 0:
1251 		break;
1252 
1253 	case ECONNABORTED:
1254 		libscf_handle_rebind(h);
1255 		goto rep_retry;
1256 
1257 	case ENOENT:
1258 		/* Must have been deleted. */
1259 		return;
1260 
1261 	case EINVAL:
1262 	case ENOTSUP:
1263 	default:
1264 		bad_error("libscf_handle_rebind", r);
1265 	}
1266 
1267 again:
1268 	r = restarter_remove_contract(inst, ctid, RESTARTER_CONTRACT_PRIMARY);
1269 	switch (r) {
1270 	case 0:
1271 		break;
1272 
1273 	case ENOMEM:
1274 		++tries;
1275 		if (tries < ALLOC_RETRY) {
1276 			(void) poll(NULL, 0, msecs);
1277 			msecs *= ALLOC_DELAY_MULT;
1278 			goto again;
1279 		}
1280 
1281 		uu_die("Insufficient memory.\n");
1282 		/* NOTREACHED */
1283 
1284 	case ECONNABORTED:
1285 		scf_instance_destroy(inst);
1286 		libscf_handle_rebind(h);
1287 		goto rep_retry;
1288 
1289 	case ECANCELED:
1290 		break;
1291 
1292 	case EPERM:
1293 	case EACCES:
1294 	case EROFS:
1295 		log_error(LOG_INFO,
1296 		    "Could not remove contract id %lu for %s (%s).\n", ctid,
1297 		    rip->ri_i.i_fmri, strerror(r));
1298 		break;
1299 
1300 	case EINVAL:
1301 	case EBADF:
1302 	default:
1303 		bad_error("restarter_remove_contract", r);
1304 	}
1305 
1306 	scf_instance_destroy(inst);
1307 }
1308 
1309 /*
1310  * enable_inst()
1311  *   Set inst->ri_i.i_enabled.  Expects 'e' to be _ENABLE, _DISABLE, or
1312  *   _ADMIN_DISABLE.  If the event is _ENABLE and inst is uninitialized or
1313  *   disabled, move it to offline.  If the event is _DISABLE or
1314  *   _ADMIN_DISABLE, make sure inst will move to disabled.
1315  *
1316  *   Returns
1317  *     0 - success
1318  *     ECONNRESET - h was rebound
1319  */
1320 static int
1321 enable_inst(scf_handle_t *h, restarter_inst_t *inst,
1322     restarter_instance_qentry_t *riq)
1323 {
1324 	restarter_instance_state_t state;
1325 	restarter_event_type_t e = riq->riq_type;
1326 	restarter_str_t reason = restarter_str_per_configuration;
1327 	int r;
1328 
1329 	assert(MUTEX_HELD(&inst->ri_lock));
1330 	assert(e == RESTARTER_EVENT_TYPE_ADMIN_DISABLE ||
1331 	    e == RESTARTER_EVENT_TYPE_DISABLE ||
1332 	    e == RESTARTER_EVENT_TYPE_ENABLE);
1333 	assert(instance_in_transition(inst) == 0);
1334 
1335 	state = inst->ri_i.i_state;
1336 
1337 	if (e == RESTARTER_EVENT_TYPE_ENABLE) {
1338 		inst->ri_i.i_enabled = 1;
1339 
1340 		if (state == RESTARTER_STATE_UNINIT ||
1341 		    state == RESTARTER_STATE_DISABLED) {
1342 			/*
1343 			 * B_FALSE: Don't log an error if the log_instance()
1344 			 * fails because it will fail on the miniroot before
1345 			 * install-discovery runs.
1346 			 */
1347 			log_instance(inst, B_FALSE, "Enabled.");
1348 			log_framework(LOG_DEBUG, "%s: Instance enabled.\n",
1349 			    inst->ri_i.i_fmri);
1350 
1351 			/*
1352 			 * If we are coming from DISABLED, it was obviously an
1353 			 * enable request. If we are coming from UNINIT, it may
1354 			 * have been a sevice in MAINT that was cleared.
1355 			 */
1356 			if (riq->riq_reason == restarter_str_clear_request)
1357 				reason = restarter_str_clear_request;
1358 			else if (state == RESTARTER_STATE_DISABLED)
1359 				reason = restarter_str_enable_request;
1360 			(void) restarter_instance_update_states(h, inst,
1361 			    RESTARTER_STATE_OFFLINE, RESTARTER_STATE_NONE,
1362 			    RERR_NONE, reason);
1363 		} else {
1364 			log_framework(LOG_DEBUG, "Restarter: "
1365 			    "Not changing state of %s for enable command.\n",
1366 			    inst->ri_i.i_fmri);
1367 		}
1368 	} else {
1369 		inst->ri_i.i_enabled = 0;
1370 
1371 		switch (state) {
1372 		case RESTARTER_STATE_ONLINE:
1373 		case RESTARTER_STATE_DEGRADED:
1374 			r = stop_instance(h, inst, RSTOP_DISABLE);
1375 			return (r == ECONNRESET ? 0 : r);
1376 
1377 		case RESTARTER_STATE_OFFLINE:
1378 		case RESTARTER_STATE_UNINIT:
1379 			if (inst->ri_i.i_primary_ctid != 0) {
1380 				inst->ri_m_inst = safe_scf_instance_create(h);
1381 				inst->ri_mi_deleted = B_FALSE;
1382 
1383 				libscf_reget_instance(inst);
1384 				method_remove_contract(inst, B_TRUE, B_TRUE);
1385 
1386 				scf_instance_destroy(inst->ri_m_inst);
1387 			}
1388 			/* B_FALSE: See log_instance(..., "Enabled."); above */
1389 			log_instance(inst, B_FALSE, "Disabled.");
1390 			log_framework(LOG_DEBUG, "%s: Instance disabled.\n",
1391 			    inst->ri_i.i_fmri);
1392 
1393 			/*
1394 			 * If we are coming from OFFLINE, it was obviously a
1395 			 * disable request. But if we are coming from
1396 			 * UNINIT, it may have been a disable request for a
1397 			 * service in MAINT.
1398 			 */
1399 			if (riq->riq_reason == restarter_str_disable_request ||
1400 			    state == RESTARTER_STATE_OFFLINE)
1401 				reason = restarter_str_disable_request;
1402 			(void) restarter_instance_update_states(h, inst,
1403 			    RESTARTER_STATE_DISABLED, RESTARTER_STATE_NONE,
1404 			    RERR_RESTART, reason);
1405 			return (0);
1406 
1407 		case RESTARTER_STATE_DISABLED:
1408 			break;
1409 
1410 		case RESTARTER_STATE_MAINT:
1411 			/*
1412 			 * We only want to pull the instance out of maintenance
1413 			 * if the disable is on adminstrative request.  The
1414 			 * graph engine sends _DISABLE events whenever a
1415 			 * service isn't in the disabled state, and we don't
1416 			 * want to pull the service out of maintenance if,
1417 			 * for example, it is there due to a dependency cycle.
1418 			 */
1419 			if (e == RESTARTER_EVENT_TYPE_ADMIN_DISABLE)
1420 				unmaintain_instance(h, inst, RUNMAINT_DISABLE);
1421 			break;
1422 
1423 		default:
1424 #ifndef NDEBUG
1425 			(void) fprintf(stderr, "Restarter instance %s has "
1426 			    "unknown state %d.\n", inst->ri_i.i_fmri, state);
1427 #endif
1428 			abort();
1429 		}
1430 	}
1431 
1432 	return (0);
1433 }
1434 
1435 static void
1436 start_instance(scf_handle_t *local_handle, restarter_inst_t *inst,
1437     int32_t reason)
1438 {
1439 	fork_info_t *info;
1440 	restarter_str_t	new_reason;
1441 
1442 	assert(MUTEX_HELD(&inst->ri_lock));
1443 	assert(instance_in_transition(inst) == 0);
1444 	assert(inst->ri_method_thread == 0);
1445 
1446 	log_framework(LOG_DEBUG, "%s: trying to start instance\n",
1447 	    inst->ri_i.i_fmri);
1448 
1449 	/*
1450 	 * We want to keep the original reason for restarts and clear actions
1451 	 */
1452 	switch (reason) {
1453 	case restarter_str_restart_request:
1454 	case restarter_str_clear_request:
1455 		new_reason = reason;
1456 		break;
1457 	default:
1458 		new_reason = restarter_str_dependencies_satisfied;
1459 	}
1460 
1461 	/* Services in the disabled and maintenance state are ignored */
1462 	if (inst->ri_i.i_state == RESTARTER_STATE_MAINT ||
1463 	    inst->ri_i.i_state == RESTARTER_STATE_DISABLED ||
1464 	    inst->ri_i.i_enabled == 0) {
1465 		log_framework(LOG_DEBUG,
1466 		    "%s: start_instance -> is maint/disabled\n",
1467 		    inst->ri_i.i_fmri);
1468 		return;
1469 	}
1470 
1471 	/* Already started instances are left alone */
1472 	if (instance_started(inst) == 1) {
1473 		log_framework(LOG_DEBUG,
1474 		    "%s: start_instance -> is already started\n",
1475 		    inst->ri_i.i_fmri);
1476 		return;
1477 	}
1478 
1479 	log_framework(LOG_DEBUG, "%s: starting instance.\n", inst->ri_i.i_fmri);
1480 
1481 	(void) restarter_instance_update_states(local_handle, inst,
1482 	    inst->ri_i.i_state, RESTARTER_STATE_ONLINE, RERR_NONE, new_reason);
1483 
1484 	info = startd_zalloc(sizeof (fork_info_t));
1485 
1486 	info->sf_id = inst->ri_id;
1487 	info->sf_method_type = METHOD_START;
1488 	info->sf_event_type = RERR_NONE;
1489 	info->sf_reason = new_reason;
1490 	inst->ri_method_thread = startd_thread_create(method_thread, info);
1491 }
1492 
1493 static int
1494 event_from_tty(scf_handle_t *h, restarter_inst_t *rip)
1495 {
1496 	scf_instance_t *inst;
1497 	int ret = 0;
1498 
1499 	if (libscf_fmri_get_instance(h, rip->ri_i.i_fmri, &inst))
1500 		return (-1);
1501 
1502 	ret = restarter_inst_ractions_from_tty(inst);
1503 
1504 	scf_instance_destroy(inst);
1505 	return (ret);
1506 }
1507 
1508 static void
1509 maintain_instance(scf_handle_t *h, restarter_inst_t *rip, int immediate,
1510     restarter_str_t reason)
1511 {
1512 	fork_info_t *info;
1513 	scf_instance_t *scf_inst = NULL;
1514 
1515 	assert(MUTEX_HELD(&rip->ri_lock));
1516 	assert(reason != restarter_str_none);
1517 	assert(rip->ri_method_thread == 0);
1518 
1519 	log_instance(rip, B_TRUE, "Stopping for maintenance due to %s.",
1520 	    restarter_get_str_short(reason));
1521 	log_framework(LOG_DEBUG, "%s: stopping for maintenance due to %s.\n",
1522 	    rip->ri_i.i_fmri, restarter_get_str_short(reason));
1523 
1524 	/* Services in the maintenance state are ignored */
1525 	if (rip->ri_i.i_state == RESTARTER_STATE_MAINT) {
1526 		log_framework(LOG_DEBUG,
1527 		    "%s: maintain_instance -> is already in maintenance\n",
1528 		    rip->ri_i.i_fmri);
1529 		return;
1530 	}
1531 
1532 	/*
1533 	 * If reason state is restarter_str_service_request and
1534 	 * restarter_actions/auxiliary_fmri property is set with a valid fmri,
1535 	 * copy the fmri to restarter/auxiliary_fmri so svcs -x can use.
1536 	 */
1537 	if (reason == restarter_str_service_request &&
1538 	    libscf_fmri_get_instance(h, rip->ri_i.i_fmri, &scf_inst) == 0) {
1539 		if (restarter_inst_validate_ractions_aux_fmri(scf_inst) == 0) {
1540 			if (restarter_inst_set_aux_fmri(scf_inst))
1541 				log_framework(LOG_DEBUG, "%s: "
1542 				    "restarter_inst_set_aux_fmri failed: ",
1543 				    rip->ri_i.i_fmri);
1544 		} else {
1545 			log_framework(LOG_DEBUG, "%s: "
1546 			    "restarter_inst_validate_ractions_aux_fmri "
1547 			    "failed: ", rip->ri_i.i_fmri);
1548 
1549 			if (restarter_inst_reset_aux_fmri(scf_inst))
1550 				log_framework(LOG_DEBUG, "%s: "
1551 				    "restarter_inst_reset_aux_fmri failed: ",
1552 				    rip->ri_i.i_fmri);
1553 		}
1554 		scf_instance_destroy(scf_inst);
1555 	}
1556 
1557 	if (immediate || !instance_started(rip)) {
1558 		if (rip->ri_i.i_primary_ctid != 0) {
1559 			rip->ri_m_inst = safe_scf_instance_create(h);
1560 			rip->ri_mi_deleted = B_FALSE;
1561 
1562 			libscf_reget_instance(rip);
1563 			method_remove_contract(rip, B_TRUE, B_TRUE);
1564 
1565 			scf_instance_destroy(rip->ri_m_inst);
1566 		}
1567 
1568 		(void) restarter_instance_update_states(h, rip,
1569 		    RESTARTER_STATE_MAINT, RESTARTER_STATE_NONE, RERR_RESTART,
1570 		    reason);
1571 		return;
1572 	}
1573 
1574 	(void) restarter_instance_update_states(h, rip, rip->ri_i.i_state,
1575 	    RESTARTER_STATE_MAINT, RERR_NONE, reason);
1576 
1577 	log_transition(rip, MAINT_REQUESTED);
1578 
1579 	info = startd_zalloc(sizeof (*info));
1580 	info->sf_id = rip->ri_id;
1581 	info->sf_method_type = METHOD_STOP;
1582 	info->sf_event_type = RERR_RESTART;
1583 	info->sf_reason = reason;
1584 	rip->ri_method_thread = startd_thread_create(method_thread, info);
1585 }
1586 
1587 static void
1588 refresh_instance(scf_handle_t *h, restarter_inst_t *rip)
1589 {
1590 	scf_instance_t *inst;
1591 	scf_snapshot_t *snap;
1592 	fork_info_t *info;
1593 	int r;
1594 
1595 	assert(MUTEX_HELD(&rip->ri_lock));
1596 
1597 	log_instance(rip, B_TRUE, "Rereading configuration.");
1598 	log_framework(LOG_DEBUG, "%s: rereading configuration.\n",
1599 	    rip->ri_i.i_fmri);
1600 
1601 rep_retry:
1602 	r = libscf_fmri_get_instance(h, rip->ri_i.i_fmri, &inst);
1603 	switch (r) {
1604 	case 0:
1605 		break;
1606 
1607 	case ECONNABORTED:
1608 		libscf_handle_rebind(h);
1609 		goto rep_retry;
1610 
1611 	case ENOENT:
1612 		/* Must have been deleted. */
1613 		return;
1614 
1615 	case EINVAL:
1616 	case ENOTSUP:
1617 	default:
1618 		bad_error("libscf_fmri_get_instance", r);
1619 	}
1620 
1621 	snap = libscf_get_running_snapshot(inst);
1622 
1623 	r = libscf_get_startd_properties(inst, snap, &rip->ri_flags,
1624 	    &rip->ri_utmpx_prefix);
1625 	switch (r) {
1626 	case 0:
1627 		log_framework(LOG_DEBUG, "%s is a %s-style service\n",
1628 		    rip->ri_i.i_fmri, service_style(rip->ri_flags));
1629 		break;
1630 
1631 	case ECONNABORTED:
1632 		scf_instance_destroy(inst);
1633 		scf_snapshot_destroy(snap);
1634 		libscf_handle_rebind(h);
1635 		goto rep_retry;
1636 
1637 	case ECANCELED:
1638 	case ENOENT:
1639 		/* Succeed in anticipation of REMOVE_INSTANCE. */
1640 		break;
1641 
1642 	default:
1643 		bad_error("libscf_get_startd_properties", r);
1644 	}
1645 
1646 	if (instance_started(rip)) {
1647 		/* Refresh does not change the state. */
1648 		(void) restarter_instance_update_states(h, rip,
1649 		    rip->ri_i.i_state, rip->ri_i.i_state, RERR_NONE,
1650 		    restarter_str_refresh);
1651 
1652 		info = startd_zalloc(sizeof (*info));
1653 		info->sf_id = rip->ri_id;
1654 		info->sf_method_type = METHOD_REFRESH;
1655 		info->sf_event_type = RERR_REFRESH;
1656 		info->sf_reason = NULL;
1657 
1658 		assert(rip->ri_method_thread == 0);
1659 		rip->ri_method_thread =
1660 		    startd_thread_create(method_thread, info);
1661 	}
1662 
1663 	scf_snapshot_destroy(snap);
1664 	scf_instance_destroy(inst);
1665 }
1666 
1667 const char *event_names[] = { "INVALID", "ADD_INSTANCE", "REMOVE_INSTANCE",
1668 	"ENABLE", "DISABLE", "ADMIN_DEGRADED", "ADMIN_REFRESH",
1669 	"ADMIN_RESTART", "ADMIN_MAINT_OFF", "ADMIN_MAINT_ON",
1670 	"ADMIN_MAINT_ON_IMMEDIATE", "STOP", "START", "DEPENDENCY_CYCLE",
1671 	"INVALID_DEPENDENCY", "ADMIN_DISABLE", "STOP_RESET"
1672 };
1673 
1674 /*
1675  * void *restarter_process_events()
1676  *
1677  *   Called in a separate thread to process the events on an instance's
1678  *   queue.  Empties the queue completely, and tries to keep the thread
1679  *   around for a little while after the queue is empty to save on
1680  *   startup costs.
1681  */
1682 static void *
1683 restarter_process_events(void *arg)
1684 {
1685 	scf_handle_t *h;
1686 	restarter_instance_qentry_t *event;
1687 	restarter_inst_t *rip;
1688 	char *fmri = (char *)arg;
1689 	struct timespec to;
1690 
1691 	assert(fmri != NULL);
1692 
1693 	h = libscf_handle_create_bound_loop();
1694 
1695 	/* grab the queue lock */
1696 	rip = inst_lookup_queue(fmri);
1697 	if (rip == NULL)
1698 		goto out;
1699 
1700 again:
1701 
1702 	while ((event = uu_list_first(rip->ri_queue)) != NULL) {
1703 		restarter_inst_t *inst;
1704 
1705 		/* drop the queue lock */
1706 		MUTEX_UNLOCK(&rip->ri_queue_lock);
1707 
1708 		/*
1709 		 * Grab the inst lock -- this waits until any outstanding
1710 		 * method finishes running.
1711 		 */
1712 		inst = inst_lookup_by_name(fmri);
1713 		if (inst == NULL) {
1714 			/* Getting deleted in the middle isn't an error. */
1715 			goto cont;
1716 		}
1717 
1718 		assert(instance_in_transition(inst) == 0);
1719 
1720 		/* process the event */
1721 		switch (event->riq_type) {
1722 		case RESTARTER_EVENT_TYPE_ENABLE:
1723 		case RESTARTER_EVENT_TYPE_DISABLE:
1724 			(void) enable_inst(h, inst, event);
1725 			break;
1726 
1727 		case RESTARTER_EVENT_TYPE_ADMIN_DISABLE:
1728 			if (enable_inst(h, inst, event) == 0)
1729 				reset_start_times(inst);
1730 			break;
1731 
1732 		case RESTARTER_EVENT_TYPE_REMOVE_INSTANCE:
1733 			restarter_delete_inst(inst);
1734 			inst = NULL;
1735 			goto cont;
1736 
1737 		case RESTARTER_EVENT_TYPE_STOP_RESET:
1738 			reset_start_times(inst);
1739 			/* FALLTHROUGH */
1740 		case RESTARTER_EVENT_TYPE_STOP:
1741 			(void) stop_instance(h, inst, RSTOP_DEPENDENCY);
1742 			break;
1743 
1744 		case RESTARTER_EVENT_TYPE_START:
1745 			start_instance(h, inst, event->riq_reason);
1746 			break;
1747 
1748 		case RESTARTER_EVENT_TYPE_DEPENDENCY_CYCLE:
1749 			maintain_instance(h, inst, 0,
1750 			    restarter_str_dependency_cycle);
1751 			break;
1752 
1753 		case RESTARTER_EVENT_TYPE_INVALID_DEPENDENCY:
1754 			maintain_instance(h, inst, 0,
1755 			    restarter_str_invalid_dependency);
1756 			break;
1757 
1758 		case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON:
1759 			if (event_from_tty(h, inst) == 0)
1760 				maintain_instance(h, inst, 0,
1761 				    restarter_str_service_request);
1762 			else
1763 				maintain_instance(h, inst, 0,
1764 				    restarter_str_administrative_request);
1765 			break;
1766 
1767 		case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON_IMMEDIATE:
1768 			if (event_from_tty(h, inst) == 0)
1769 				maintain_instance(h, inst, 1,
1770 				    restarter_str_service_request);
1771 			else
1772 				maintain_instance(h, inst, 1,
1773 				    restarter_str_administrative_request);
1774 			break;
1775 
1776 		case RESTARTER_EVENT_TYPE_ADMIN_MAINT_OFF:
1777 			unmaintain_instance(h, inst, RUNMAINT_CLEAR);
1778 			reset_start_times(inst);
1779 			break;
1780 
1781 		case RESTARTER_EVENT_TYPE_ADMIN_REFRESH:
1782 			refresh_instance(h, inst);
1783 			break;
1784 
1785 		case RESTARTER_EVENT_TYPE_ADMIN_DEGRADED:
1786 			log_framework(LOG_WARNING, "Restarter: "
1787 			    "%s command (for %s) unimplemented.\n",
1788 			    event_names[event->riq_type], inst->ri_i.i_fmri);
1789 			break;
1790 
1791 		case RESTARTER_EVENT_TYPE_ADMIN_RESTART:
1792 			if (!instance_started(inst)) {
1793 				log_framework(LOG_DEBUG, "Restarter: "
1794 				    "Not restarting %s; not running.\n",
1795 				    inst->ri_i.i_fmri);
1796 			} else {
1797 				/*
1798 				 * Stop the instance.  If it can be restarted,
1799 				 * the graph engine will send a new event.
1800 				 */
1801 				if (stop_instance(h, inst, RSTOP_RESTART) == 0)
1802 					reset_start_times(inst);
1803 			}
1804 			break;
1805 
1806 		case RESTARTER_EVENT_TYPE_ADD_INSTANCE:
1807 		default:
1808 #ifndef NDEBUG
1809 			uu_warn("%s:%d: Bad restarter event %d.  "
1810 			    "Aborting.\n", __FILE__, __LINE__, event->riq_type);
1811 #endif
1812 			abort();
1813 		}
1814 
1815 		assert(inst != NULL);
1816 		MUTEX_UNLOCK(&inst->ri_lock);
1817 
1818 cont:
1819 		/* grab the queue lock */
1820 		rip = inst_lookup_queue(fmri);
1821 		if (rip == NULL)
1822 			goto out;
1823 
1824 		/* delete the event */
1825 		uu_list_remove(rip->ri_queue, event);
1826 		startd_free(event, sizeof (restarter_instance_qentry_t));
1827 	}
1828 
1829 	assert(rip != NULL);
1830 
1831 	/*
1832 	 * Try to preserve the thread for a little while for future use.
1833 	 */
1834 	to.tv_sec = 3;
1835 	to.tv_nsec = 0;
1836 	(void) pthread_cond_reltimedwait_np(&rip->ri_queue_cv,
1837 	    &rip->ri_queue_lock, &to);
1838 
1839 	if (uu_list_first(rip->ri_queue) != NULL)
1840 		goto again;
1841 
1842 	rip->ri_queue_thread = 0;
1843 	MUTEX_UNLOCK(&rip->ri_queue_lock);
1844 out:
1845 	(void) scf_handle_unbind(h);
1846 	scf_handle_destroy(h);
1847 	free(fmri);
1848 	return (NULL);
1849 }
1850 
1851 static int
1852 is_admin_event(restarter_event_type_t t) {
1853 
1854 	switch (t) {
1855 	case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON:
1856 	case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON_IMMEDIATE:
1857 	case RESTARTER_EVENT_TYPE_ADMIN_MAINT_OFF:
1858 	case RESTARTER_EVENT_TYPE_ADMIN_REFRESH:
1859 	case RESTARTER_EVENT_TYPE_ADMIN_DEGRADED:
1860 	case RESTARTER_EVENT_TYPE_ADMIN_RESTART:
1861 		return (1);
1862 	default:
1863 		return (0);
1864 	}
1865 }
1866 
1867 static void
1868 restarter_queue_event(restarter_inst_t *ri, restarter_protocol_event_t *e)
1869 {
1870 	restarter_instance_qentry_t *qe;
1871 	int r;
1872 
1873 	assert(MUTEX_HELD(&ri->ri_queue_lock));
1874 	assert(!MUTEX_HELD(&ri->ri_lock));
1875 
1876 	qe = startd_zalloc(sizeof (restarter_instance_qentry_t));
1877 	qe->riq_type = e->rpe_type;
1878 	qe->riq_reason = e->rpe_reason;
1879 
1880 	uu_list_node_init(qe, &qe->riq_link, restarter_queue_pool);
1881 	r = uu_list_insert_before(ri->ri_queue, NULL, qe);
1882 	assert(r == 0);
1883 }
1884 
1885 /*
1886  * void *restarter_event_thread()
1887  *
1888  *  Handle incoming graph events by placing them on a per-instance
1889  *  queue.  We can't lock the main part of the instance structure, so
1890  *  just modify the seprarately locked event queue portion.
1891  */
1892 /*ARGSUSED*/
1893 static void *
1894 restarter_event_thread(void *unused)
1895 {
1896 	scf_handle_t *h;
1897 
1898 	/*
1899 	 * This is a new thread, and thus, gets its own handle
1900 	 * to the repository.
1901 	 */
1902 	h = libscf_handle_create_bound_loop();
1903 
1904 	MUTEX_LOCK(&ru->restarter_update_lock);
1905 
1906 	/*CONSTCOND*/
1907 	while (1) {
1908 		restarter_protocol_event_t *e;
1909 
1910 		while (ru->restarter_update_wakeup == 0)
1911 			(void) pthread_cond_wait(&ru->restarter_update_cv,
1912 			    &ru->restarter_update_lock);
1913 
1914 		ru->restarter_update_wakeup = 0;
1915 
1916 		while ((e = restarter_event_dequeue()) != NULL) {
1917 			restarter_inst_t *rip;
1918 			char *fmri;
1919 
1920 			MUTEX_UNLOCK(&ru->restarter_update_lock);
1921 
1922 			/*
1923 			 * ADD_INSTANCE is special: there's likely no
1924 			 * instance structure yet, so we need to handle the
1925 			 * addition synchronously.
1926 			 */
1927 			switch (e->rpe_type) {
1928 			case RESTARTER_EVENT_TYPE_ADD_INSTANCE:
1929 				if (restarter_insert_inst(h, e->rpe_inst) != 0)
1930 					log_error(LOG_INFO, "Restarter: "
1931 					    "Could not add %s.\n", e->rpe_inst);
1932 
1933 				MUTEX_LOCK(&st->st_load_lock);
1934 				if (--st->st_load_instances == 0)
1935 					(void) pthread_cond_broadcast(
1936 					    &st->st_load_cv);
1937 				MUTEX_UNLOCK(&st->st_load_lock);
1938 
1939 				goto nolookup;
1940 			}
1941 
1942 			/*
1943 			 * Lookup the instance, locking only the event queue.
1944 			 * Can't grab ri_lock here because it might be held
1945 			 * by a long-running method.
1946 			 */
1947 			rip = inst_lookup_queue(e->rpe_inst);
1948 			if (rip == NULL) {
1949 				log_error(LOG_INFO, "Restarter: "
1950 				    "Ignoring %s command for unknown service "
1951 				    "%s.\n", event_names[e->rpe_type],
1952 				    e->rpe_inst);
1953 				goto nolookup;
1954 			}
1955 
1956 			/* Keep ADMIN events from filling up the queue. */
1957 			if (is_admin_event(e->rpe_type) &&
1958 			    uu_list_numnodes(rip->ri_queue) >
1959 			    RINST_QUEUE_THRESHOLD) {
1960 				MUTEX_UNLOCK(&rip->ri_queue_lock);
1961 				log_instance(rip, B_TRUE, "Instance event "
1962 				    "queue overflow.  Dropping administrative "
1963 				    "request.");
1964 				log_framework(LOG_DEBUG, "%s: Instance event "
1965 				    "queue overflow.  Dropping administrative "
1966 				    "request.\n", rip->ri_i.i_fmri);
1967 				goto nolookup;
1968 			}
1969 
1970 			/* Now add the event to the instance queue. */
1971 			restarter_queue_event(rip, e);
1972 
1973 			if (rip->ri_queue_thread == 0) {
1974 				/*
1975 				 * Start a thread if one isn't already
1976 				 * running.
1977 				 */
1978 				fmri = safe_strdup(e->rpe_inst);
1979 				rip->ri_queue_thread =  startd_thread_create(
1980 				    restarter_process_events, (void *)fmri);
1981 			} else {
1982 				/*
1983 				 * Signal the existing thread that there's
1984 				 * a new event.
1985 				 */
1986 				(void) pthread_cond_broadcast(
1987 				    &rip->ri_queue_cv);
1988 			}
1989 
1990 			MUTEX_UNLOCK(&rip->ri_queue_lock);
1991 nolookup:
1992 			restarter_event_release(e);
1993 
1994 			MUTEX_LOCK(&ru->restarter_update_lock);
1995 		}
1996 	}
1997 
1998 	/*
1999 	 * Unreachable for now -- there's currently no graceful cleanup
2000 	 * called on exit().
2001 	 */
2002 	(void) scf_handle_unbind(h);
2003 	scf_handle_destroy(h);
2004 	return (NULL);
2005 }
2006 
2007 static restarter_inst_t *
2008 contract_to_inst(ctid_t ctid)
2009 {
2010 	restarter_inst_t *inst;
2011 	int id;
2012 
2013 	id = lookup_inst_by_contract(ctid);
2014 	if (id == -1)
2015 		return (NULL);
2016 
2017 	inst = inst_lookup_by_id(id);
2018 	if (inst != NULL) {
2019 		/*
2020 		 * Since ri_lock isn't held by the contract id lookup, this
2021 		 * instance may have been restarted and now be in a new
2022 		 * contract, making the old contract no longer valid for this
2023 		 * instance.
2024 		 */
2025 		if (ctid != inst->ri_i.i_primary_ctid) {
2026 			MUTEX_UNLOCK(&inst->ri_lock);
2027 			inst = NULL;
2028 		}
2029 	}
2030 	return (inst);
2031 }
2032 
2033 /*
2034  * void contract_action()
2035  *   Take action on contract events.
2036  */
2037 static void
2038 contract_action(scf_handle_t *h, restarter_inst_t *inst, ctid_t id,
2039     uint32_t type)
2040 {
2041 	const char *fmri = inst->ri_i.i_fmri;
2042 
2043 	assert(MUTEX_HELD(&inst->ri_lock));
2044 
2045 	/*
2046 	 * If startd has stopped this contract, there is no need to
2047 	 * stop it again.
2048 	 */
2049 	if (inst->ri_i.i_primary_ctid > 0 &&
2050 	    inst->ri_i.i_primary_ctid_stopped)
2051 		return;
2052 
2053 	if ((type & (CT_PR_EV_EMPTY | CT_PR_EV_CORE | CT_PR_EV_SIGNAL
2054 	    | CT_PR_EV_HWERR)) == 0) {
2055 		/*
2056 		 * There shouldn't be other events, since that's not how we set
2057 		 * the terms. Thus, just log an error and drive on.
2058 		 */
2059 		log_framework(LOG_NOTICE,
2060 		    "%s: contract %ld received unexpected critical event "
2061 		    "(%d)\n", fmri, id, type);
2062 		return;
2063 	}
2064 
2065 	assert(instance_in_transition(inst) == 0);
2066 
2067 	if (instance_is_wait_style(inst)) {
2068 		/*
2069 		 * We ignore all events; if they impact the
2070 		 * process we're monitoring, then the
2071 		 * wait_thread will stop the instance.
2072 		 */
2073 		log_framework(LOG_DEBUG,
2074 		    "%s: ignoring contract event on wait-style service\n",
2075 		    fmri);
2076 	} else {
2077 		/*
2078 		 * A CT_PR_EV_EMPTY event is an RSTOP_EXIT request.
2079 		 */
2080 		switch (type) {
2081 		case CT_PR_EV_EMPTY:
2082 			(void) stop_instance(h, inst, RSTOP_EXIT);
2083 			break;
2084 		case CT_PR_EV_CORE:
2085 			(void) stop_instance(h, inst, RSTOP_CORE);
2086 			break;
2087 		case CT_PR_EV_SIGNAL:
2088 			(void) stop_instance(h, inst, RSTOP_SIGNAL);
2089 			break;
2090 		case CT_PR_EV_HWERR:
2091 			(void) stop_instance(h, inst, RSTOP_HWERR);
2092 			break;
2093 		}
2094 	}
2095 }
2096 
2097 /*
2098  * void *restarter_contract_event_thread(void *)
2099  *   Listens to the process contract bundle for critical events, taking action
2100  *   on events from contracts we know we are responsible for.
2101  */
2102 /*ARGSUSED*/
2103 static void *
2104 restarter_contracts_event_thread(void *unused)
2105 {
2106 	int fd, err;
2107 	scf_handle_t *local_handle;
2108 
2109 	/*
2110 	 * Await graph load completion.  That is, stop here, until we've scanned
2111 	 * the repository for contract - instance associations.
2112 	 */
2113 	MUTEX_LOCK(&st->st_load_lock);
2114 	while (!(st->st_load_complete && st->st_load_instances == 0))
2115 		(void) pthread_cond_wait(&st->st_load_cv, &st->st_load_lock);
2116 	MUTEX_UNLOCK(&st->st_load_lock);
2117 
2118 	/*
2119 	 * This is a new thread, and thus, gets its own handle
2120 	 * to the repository.
2121 	 */
2122 	if ((local_handle = libscf_handle_create_bound(SCF_VERSION)) == NULL)
2123 		uu_die("Unable to bind a new repository handle: %s\n",
2124 		    scf_strerror(scf_error()));
2125 
2126 	fd = open64(CTFS_ROOT "/process/pbundle", O_RDONLY);
2127 	if (fd == -1)
2128 		uu_die("process bundle open failed");
2129 
2130 	/*
2131 	 * Make sure we get all events (including those generated by configd
2132 	 * before this thread was started).
2133 	 */
2134 	err = ct_event_reset(fd);
2135 	assert(err == 0);
2136 
2137 	for (;;) {
2138 		int efd, sfd;
2139 		ct_evthdl_t ev;
2140 		uint32_t type;
2141 		ctevid_t evid;
2142 		ct_stathdl_t status;
2143 		ctid_t ctid;
2144 		restarter_inst_t *inst;
2145 		uint64_t cookie;
2146 
2147 		if (err = ct_event_read_critical(fd, &ev)) {
2148 			log_error(LOG_WARNING,
2149 			    "Error reading next contract event: %s",
2150 			    strerror(err));
2151 			continue;
2152 		}
2153 
2154 		evid = ct_event_get_evid(ev);
2155 		ctid = ct_event_get_ctid(ev);
2156 		type = ct_event_get_type(ev);
2157 
2158 		/* Fetch cookie. */
2159 		if ((sfd = contract_open(ctid, "process", "status", O_RDONLY))
2160 		    < 0) {
2161 			ct_event_free(ev);
2162 			continue;
2163 		}
2164 
2165 		if (err = ct_status_read(sfd, CTD_COMMON, &status)) {
2166 			log_framework(LOG_WARNING, "Could not get status for "
2167 			    "contract %ld: %s\n", ctid, strerror(err));
2168 
2169 			startd_close(sfd);
2170 			ct_event_free(ev);
2171 			continue;
2172 		}
2173 
2174 		cookie = ct_status_get_cookie(status);
2175 
2176 		log_framework(LOG_DEBUG, "Received event %d for ctid %ld "
2177 		    "cookie %lld\n", type, ctid, cookie);
2178 
2179 		ct_status_free(status);
2180 
2181 		startd_close(sfd);
2182 
2183 		/*
2184 		 * svc.configd(1M) restart handling performed by the
2185 		 * fork_configd_thread.  We don't acknowledge, as that thread
2186 		 * will do so.
2187 		 */
2188 		if (cookie == CONFIGD_COOKIE) {
2189 			ct_event_free(ev);
2190 			continue;
2191 		}
2192 
2193 		inst = NULL;
2194 		if (storing_contract != 0 &&
2195 		    (inst = contract_to_inst(ctid)) == NULL) {
2196 			/*
2197 			 * This can happen for two reasons:
2198 			 * - method_run() has not yet stored the
2199 			 *    the contract into the internal hash table.
2200 			 * - we receive an EMPTY event for an abandoned
2201 			 *    contract.
2202 			 * If there is any contract in the process of
2203 			 * being stored into the hash table then re-read
2204 			 * the event later.
2205 			 */
2206 			log_framework(LOG_DEBUG,
2207 			    "Reset event %d for unknown "
2208 			    "contract id %ld\n", type, ctid);
2209 
2210 			/* don't go too fast */
2211 			(void) poll(NULL, 0, 100);
2212 
2213 			(void) ct_event_reset(fd);
2214 			ct_event_free(ev);
2215 			continue;
2216 		}
2217 
2218 		/*
2219 		 * Do not call contract_to_inst() again if first
2220 		 * call succeeded.
2221 		 */
2222 		if (inst == NULL)
2223 			inst = contract_to_inst(ctid);
2224 		if (inst == NULL) {
2225 			/*
2226 			 * This can happen if we receive an EMPTY
2227 			 * event for an abandoned contract.
2228 			 */
2229 			log_framework(LOG_DEBUG,
2230 			    "Received event %d for unknown contract id "
2231 			    "%ld\n", type, ctid);
2232 		} else {
2233 			log_framework(LOG_DEBUG,
2234 			    "Received event %d for contract id "
2235 			    "%ld (%s)\n", type, ctid,
2236 			    inst->ri_i.i_fmri);
2237 
2238 			contract_action(local_handle, inst, ctid, type);
2239 
2240 			MUTEX_UNLOCK(&inst->ri_lock);
2241 		}
2242 
2243 		efd = contract_open(ct_event_get_ctid(ev), "process", "ctl",
2244 		    O_WRONLY);
2245 		if (efd != -1) {
2246 			(void) ct_ctl_ack(efd, evid);
2247 			startd_close(efd);
2248 		}
2249 
2250 		ct_event_free(ev);
2251 
2252 	}
2253 
2254 	/*NOTREACHED*/
2255 	return (NULL);
2256 }
2257 
2258 /*
2259  * Timeout queue, processed by restarter_timeouts_event_thread().
2260  */
2261 timeout_queue_t *timeouts;
2262 static uu_list_pool_t *timeout_pool;
2263 
2264 typedef struct timeout_update {
2265 	pthread_mutex_t		tu_lock;
2266 	pthread_cond_t		tu_cv;
2267 	int			tu_wakeup;
2268 } timeout_update_t;
2269 
2270 timeout_update_t *tu;
2271 
2272 static const char *timeout_ovr_svcs[] = {
2273 	"svc:/system/manifest-import:default",
2274 	"svc:/network/initial:default",
2275 	"svc:/network/service:default",
2276 	"svc:/system/rmtmpfiles:default",
2277 	"svc:/network/loopback:default",
2278 	"svc:/network/physical:default",
2279 	"svc:/system/device/local:default",
2280 	"svc:/system/metainit:default",
2281 	"svc:/system/filesystem/usr:default",
2282 	"svc:/system/filesystem/minimal:default",
2283 	"svc:/system/filesystem/local:default",
2284 	NULL
2285 };
2286 
2287 int
2288 is_timeout_ovr(restarter_inst_t *inst)
2289 {
2290 	int i;
2291 
2292 	for (i = 0; timeout_ovr_svcs[i] != NULL; ++i) {
2293 		if (strcmp(inst->ri_i.i_fmri, timeout_ovr_svcs[i]) == 0) {
2294 			log_instance(inst, B_TRUE, "Timeout override by "
2295 			    "svc.startd.  Using infinite timeout.");
2296 			return (1);
2297 		}
2298 	}
2299 
2300 	return (0);
2301 }
2302 
2303 /*ARGSUSED*/
2304 static int
2305 timeout_compare(const void *lc_arg, const void *rc_arg, void *private)
2306 {
2307 	hrtime_t t1 = ((const timeout_entry_t *)lc_arg)->te_timeout;
2308 	hrtime_t t2 = ((const timeout_entry_t *)rc_arg)->te_timeout;
2309 
2310 	if (t1 > t2)
2311 		return (1);
2312 	else if (t1 < t2)
2313 		return (-1);
2314 	return (0);
2315 }
2316 
2317 void
2318 timeout_init()
2319 {
2320 	timeouts = startd_zalloc(sizeof (timeout_queue_t));
2321 
2322 	(void) pthread_mutex_init(&timeouts->tq_lock, &mutex_attrs);
2323 
2324 	timeout_pool = startd_list_pool_create("timeouts",
2325 	    sizeof (timeout_entry_t), offsetof(timeout_entry_t, te_link),
2326 	    timeout_compare, UU_LIST_POOL_DEBUG);
2327 	assert(timeout_pool != NULL);
2328 
2329 	timeouts->tq_list = startd_list_create(timeout_pool,
2330 	    timeouts, UU_LIST_SORTED);
2331 	assert(timeouts->tq_list != NULL);
2332 
2333 	tu = startd_zalloc(sizeof (timeout_update_t));
2334 	(void) pthread_cond_init(&tu->tu_cv, NULL);
2335 	(void) pthread_mutex_init(&tu->tu_lock, &mutex_attrs);
2336 }
2337 
2338 void
2339 timeout_insert(restarter_inst_t *inst, ctid_t cid, uint64_t timeout_sec)
2340 {
2341 	hrtime_t now, timeout;
2342 	timeout_entry_t *entry;
2343 	uu_list_index_t idx;
2344 
2345 	assert(MUTEX_HELD(&inst->ri_lock));
2346 
2347 	now = gethrtime();
2348 
2349 	/*
2350 	 * If we overflow LLONG_MAX, we're never timing out anyways, so
2351 	 * just return.
2352 	 */
2353 	if (timeout_sec >= (LLONG_MAX - now) / 1000000000LL) {
2354 		log_instance(inst, B_TRUE, "timeout_seconds too large, "
2355 		    "treating as infinite.");
2356 		return;
2357 	}
2358 
2359 	/* hrtime is in nanoseconds. Convert timeout_sec. */
2360 	timeout = now + (timeout_sec * 1000000000LL);
2361 
2362 	entry = startd_alloc(sizeof (timeout_entry_t));
2363 	entry->te_timeout = timeout;
2364 	entry->te_ctid = cid;
2365 	entry->te_fmri = safe_strdup(inst->ri_i.i_fmri);
2366 	entry->te_logstem = safe_strdup(inst->ri_logstem);
2367 	entry->te_fired = 0;
2368 	/* Insert the calculated timeout time onto the queue. */
2369 	MUTEX_LOCK(&timeouts->tq_lock);
2370 	(void) uu_list_find(timeouts->tq_list, entry, NULL, &idx);
2371 	uu_list_node_init(entry, &entry->te_link, timeout_pool);
2372 	uu_list_insert(timeouts->tq_list, entry, idx);
2373 	MUTEX_UNLOCK(&timeouts->tq_lock);
2374 
2375 	assert(inst->ri_timeout == NULL);
2376 	inst->ri_timeout = entry;
2377 
2378 	MUTEX_LOCK(&tu->tu_lock);
2379 	tu->tu_wakeup = 1;
2380 	(void) pthread_cond_broadcast(&tu->tu_cv);
2381 	MUTEX_UNLOCK(&tu->tu_lock);
2382 }
2383 
2384 
2385 void
2386 timeout_remove(restarter_inst_t *inst, ctid_t cid)
2387 {
2388 	assert(MUTEX_HELD(&inst->ri_lock));
2389 
2390 	if (inst->ri_timeout == NULL)
2391 		return;
2392 
2393 	assert(inst->ri_timeout->te_ctid == cid);
2394 
2395 	MUTEX_LOCK(&timeouts->tq_lock);
2396 	uu_list_remove(timeouts->tq_list, inst->ri_timeout);
2397 	MUTEX_UNLOCK(&timeouts->tq_lock);
2398 
2399 	free(inst->ri_timeout->te_fmri);
2400 	free(inst->ri_timeout->te_logstem);
2401 	startd_free(inst->ri_timeout, sizeof (timeout_entry_t));
2402 	inst->ri_timeout = NULL;
2403 }
2404 
2405 static int
2406 timeout_now()
2407 {
2408 	timeout_entry_t *e;
2409 	hrtime_t now;
2410 	int ret;
2411 
2412 	now = gethrtime();
2413 
2414 	/*
2415 	 * Walk through the (sorted) timeouts list.  While the timeout
2416 	 * at the head of the list is <= the current time, kill the
2417 	 * method.
2418 	 */
2419 	MUTEX_LOCK(&timeouts->tq_lock);
2420 
2421 	for (e = uu_list_first(timeouts->tq_list);
2422 	    e != NULL && e->te_timeout <= now;
2423 	    e = uu_list_next(timeouts->tq_list, e)) {
2424 		log_framework(LOG_WARNING, "%s: Method or service exit timed "
2425 		    "out.  Killing contract %ld.\n", e->te_fmri, e->te_ctid);
2426 		log_instance_fmri(e->te_fmri, e->te_logstem, B_TRUE,
2427 		    "Method or service exit timed out.  Killing contract %ld.",
2428 		    e->te_ctid);
2429 		e->te_fired = 1;
2430 		(void) contract_kill(e->te_ctid, SIGKILL, e->te_fmri);
2431 	}
2432 
2433 	if (uu_list_numnodes(timeouts->tq_list) > 0)
2434 		ret = 0;
2435 	else
2436 		ret = -1;
2437 
2438 	MUTEX_UNLOCK(&timeouts->tq_lock);
2439 
2440 	return (ret);
2441 }
2442 
2443 /*
2444  * void *restarter_timeouts_event_thread(void *)
2445  *   Responsible for monitoring the method timeouts.  This thread must
2446  *   be started before any methods are called.
2447  */
2448 /*ARGSUSED*/
2449 static void *
2450 restarter_timeouts_event_thread(void *unused)
2451 {
2452 	/*
2453 	 * Timeouts are entered on a priority queue, which is processed by
2454 	 * this thread.  As timeouts are specified in seconds, we'll do
2455 	 * the necessary processing every second, as long as the queue
2456 	 * is not empty.
2457 	 */
2458 
2459 	/*CONSTCOND*/
2460 	while (1) {
2461 		/*
2462 		 * As long as the timeout list isn't empty, process it
2463 		 * every second.
2464 		 */
2465 		if (timeout_now() == 0) {
2466 			(void) sleep(1);
2467 			continue;
2468 		}
2469 
2470 		/* The list is empty, wait until we have more timeouts. */
2471 		MUTEX_LOCK(&tu->tu_lock);
2472 
2473 		while (tu->tu_wakeup == 0)
2474 			(void) pthread_cond_wait(&tu->tu_cv, &tu->tu_lock);
2475 
2476 		tu->tu_wakeup = 0;
2477 		MUTEX_UNLOCK(&tu->tu_lock);
2478 	}
2479 
2480 	return (NULL);
2481 }
2482 
2483 void
2484 restarter_start()
2485 {
2486 	(void) startd_thread_create(restarter_timeouts_event_thread, NULL);
2487 	(void) startd_thread_create(restarter_event_thread, NULL);
2488 	(void) startd_thread_create(restarter_contracts_event_thread, NULL);
2489 	(void) startd_thread_create(wait_thread, NULL);
2490 }
2491 
2492 
2493 void
2494 restarter_init()
2495 {
2496 	restarter_instance_pool = startd_list_pool_create("restarter_instances",
2497 	    sizeof (restarter_inst_t), offsetof(restarter_inst_t,
2498 	    ri_link), restarter_instance_compare, UU_LIST_POOL_DEBUG);
2499 	(void) memset(&instance_list, 0, sizeof (instance_list));
2500 
2501 	(void) pthread_mutex_init(&instance_list.ril_lock, &mutex_attrs);
2502 	instance_list.ril_instance_list = startd_list_create(
2503 	    restarter_instance_pool, &instance_list, UU_LIST_SORTED);
2504 
2505 	restarter_queue_pool = startd_list_pool_create(
2506 	    "restarter_instance_queue", sizeof (restarter_instance_qentry_t),
2507 	    offsetof(restarter_instance_qentry_t,  riq_link), NULL,
2508 	    UU_LIST_POOL_DEBUG);
2509 
2510 	contract_list_pool = startd_list_pool_create(
2511 	    "contract_list", sizeof (contract_entry_t),
2512 	    offsetof(contract_entry_t,  ce_link), NULL,
2513 	    UU_LIST_POOL_DEBUG);
2514 	contract_hash_init();
2515 
2516 	log_framework(LOG_DEBUG, "Initialized restarter\n");
2517 }
2518