1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright (c) 2013, Joyent, Inc. All rights reserved.
25 */
26
27 /*
28 * restarter.c - service manipulation
29 *
30 * This component manages services whose restarter is svc.startd, the standard
31 * restarter. It translates restarter protocol events from the graph engine
32 * into actions on processes, as a delegated restarter would do.
33 *
34 * The master restarter manages a number of always-running threads:
35 * - restarter event thread: events from the graph engine
36 * - timeout thread: thread to fire queued timeouts
37 * - contract thread: thread to handle contract events
38 * - wait thread: thread to handle wait-based services
39 *
40 * The other threads are created as-needed:
41 * - per-instance method threads
42 * - per-instance event processing threads
43 *
44 * The interaction of all threads must result in the following conditions
45 * being satisfied (on a per-instance basis):
46 * - restarter events must be processed in order
47 * - method execution must be serialized
48 * - instance delete must be held until outstanding methods are complete
49 * - contract events shouldn't be processed while a method is running
50 * - timeouts should fire even when a method is running
51 *
52 * Service instances are represented by restarter_inst_t's and are kept in the
53 * instance_list list.
54 *
55 * Service States
56 * The current state of a service instance is kept in
57 * restarter_inst_t->ri_i.i_state. If transition to a new state could take
58 * some time, then before we effect the transition we set
59 * restarter_inst_t->ri_i.i_next_state to the target state, and afterwards we
60 * rotate i_next_state to i_state and set i_next_state to
61 * RESTARTER_STATE_NONE. So usually i_next_state is _NONE when ri_lock is not
62 * held. The exception is when we launch methods, which are done with
63 * a separate thread. To keep any other threads from grabbing ri_lock before
64 * method_thread() does, we set ri_method_thread to the thread id of the
65 * method thread, and when it is nonzero any thread with a different thread id
66 * waits on ri_method_cv.
67 *
68 * Method execution is serialized by blocking on ri_method_cv in
69 * inst_lookup_by_id() and waiting for a 0 value of ri_method_thread. This
70 * also prevents the instance structure from being deleted until all
71 * outstanding operations such as method_thread() have finished.
72 *
73 * Lock ordering:
74 *
75 * dgraph_lock [can be held when taking:]
76 * utmpx_lock
77 * dictionary->dict_lock
78 * st->st_load_lock
79 * wait_info_lock
80 * ru->restarter_update_lock
81 * restarter_queue->rpeq_lock
82 * instance_list.ril_lock
83 * inst->ri_lock
84 * st->st_configd_live_lock
85 *
86 * instance_list.ril_lock
87 * graph_queue->gpeq_lock
88 * gu->gu_lock
89 * st->st_configd_live_lock
90 * dictionary->dict_lock
91 * inst->ri_lock
92 * graph_queue->gpeq_lock
93 * gu->gu_lock
94 * tu->tu_lock
95 * tq->tq_lock
96 * inst->ri_queue_lock
97 * wait_info_lock
98 * bp->cb_lock
99 * utmpx_lock
100 *
101 * single_user_thread_lock
102 * wait_info_lock
103 * utmpx_lock
104 *
105 * gu_freeze_lock
106 *
107 * logbuf_mutex nests inside pretty much everything.
108 */
109
110 #include <sys/contract/process.h>
111 #include <sys/ctfs.h>
112 #include <sys/stat.h>
113 #include <sys/time.h>
114 #include <sys/types.h>
115 #include <sys/uio.h>
116 #include <sys/wait.h>
117 #include <assert.h>
118 #include <errno.h>
119 #include <fcntl.h>
120 #include <libcontract.h>
121 #include <libcontract_priv.h>
122 #include <libintl.h>
123 #include <librestart.h>
124 #include <librestart_priv.h>
125 #include <libuutil.h>
126 #include <limits.h>
127 #include <poll.h>
128 #include <port.h>
129 #include <pthread.h>
130 #include <stdarg.h>
131 #include <stdio.h>
132 #include <strings.h>
133 #include <unistd.h>
134
135 #include "startd.h"
136 #include "protocol.h"
137
138 static uu_list_pool_t *restarter_instance_pool;
139 static restarter_instance_list_t instance_list;
140
141 static uu_list_pool_t *restarter_queue_pool;
142
143 #define WT_SVC_ERR_THROTTLE 1 /* 1 sec delay for erroring wait svc */
144
145 /*
146 * Function used to reset the restart times for an instance, when
147 * an administrative task comes along and essentially makes the times
148 * in this array ineffective.
149 */
150 static void
reset_start_times(restarter_inst_t * inst)151 reset_start_times(restarter_inst_t *inst)
152 {
153 inst->ri_start_index = 0;
154 bzero(inst->ri_start_time, sizeof (inst->ri_start_time));
155 }
156
157 /*ARGSUSED*/
158 static int
restarter_instance_compare(const void * lc_arg,const void * rc_arg,void * private)159 restarter_instance_compare(const void *lc_arg, const void *rc_arg,
160 void *private)
161 {
162 int lc_id = ((const restarter_inst_t *)lc_arg)->ri_id;
163 int rc_id = *(int *)rc_arg;
164
165 if (lc_id > rc_id)
166 return (1);
167 if (lc_id < rc_id)
168 return (-1);
169 return (0);
170 }
171
172 static restarter_inst_t *
inst_lookup_by_name(const char * name)173 inst_lookup_by_name(const char *name)
174 {
175 int id;
176
177 id = dict_lookup_byname(name);
178 if (id == -1)
179 return (NULL);
180
181 return (inst_lookup_by_id(id));
182 }
183
184 restarter_inst_t *
inst_lookup_by_id(int id)185 inst_lookup_by_id(int id)
186 {
187 restarter_inst_t *inst;
188
189 MUTEX_LOCK(&instance_list.ril_lock);
190 inst = uu_list_find(instance_list.ril_instance_list, &id, NULL, NULL);
191 if (inst != NULL)
192 MUTEX_LOCK(&inst->ri_lock);
193 MUTEX_UNLOCK(&instance_list.ril_lock);
194
195 if (inst != NULL) {
196 while (inst->ri_method_thread != 0 &&
197 !pthread_equal(inst->ri_method_thread, pthread_self())) {
198 ++inst->ri_method_waiters;
199 (void) pthread_cond_wait(&inst->ri_method_cv,
200 &inst->ri_lock);
201 assert(inst->ri_method_waiters > 0);
202 --inst->ri_method_waiters;
203 }
204 }
205
206 return (inst);
207 }
208
209 static restarter_inst_t *
inst_lookup_queue(const char * name)210 inst_lookup_queue(const char *name)
211 {
212 int id;
213 restarter_inst_t *inst;
214
215 id = dict_lookup_byname(name);
216 if (id == -1)
217 return (NULL);
218
219 MUTEX_LOCK(&instance_list.ril_lock);
220 inst = uu_list_find(instance_list.ril_instance_list, &id, NULL, NULL);
221 if (inst != NULL)
222 MUTEX_LOCK(&inst->ri_queue_lock);
223 MUTEX_UNLOCK(&instance_list.ril_lock);
224
225 return (inst);
226 }
227
228 const char *
service_style(int flags)229 service_style(int flags)
230 {
231 switch (flags & RINST_STYLE_MASK) {
232 case RINST_CONTRACT: return ("contract");
233 case RINST_TRANSIENT: return ("transient");
234 case RINST_WAIT: return ("wait");
235
236 default:
237 #ifndef NDEBUG
238 uu_warn("%s:%d: Bad flags 0x%x.\n", __FILE__, __LINE__, flags);
239 #endif
240 abort();
241 /* NOTREACHED */
242 }
243 }
244
245 /*
246 * Fails with ECONNABORTED or ECANCELED.
247 */
248 static int
check_contract(restarter_inst_t * inst,boolean_t primary,scf_instance_t * scf_inst)249 check_contract(restarter_inst_t *inst, boolean_t primary,
250 scf_instance_t *scf_inst)
251 {
252 ctid_t *ctidp;
253 int fd, r;
254
255 ctidp = primary ? &inst->ri_i.i_primary_ctid :
256 &inst->ri_i.i_transient_ctid;
257
258 assert(*ctidp >= 1);
259
260 fd = contract_open(*ctidp, NULL, "status", O_RDONLY);
261 if (fd >= 0) {
262 r = close(fd);
263 assert(r == 0);
264 return (0);
265 }
266
267 r = restarter_remove_contract(scf_inst, *ctidp, primary ?
268 RESTARTER_CONTRACT_PRIMARY : RESTARTER_CONTRACT_TRANSIENT);
269 switch (r) {
270 case 0:
271 case ECONNABORTED:
272 case ECANCELED:
273 *ctidp = 0;
274 return (r);
275
276 case ENOMEM:
277 uu_die("Out of memory\n");
278 /* NOTREACHED */
279
280 case EPERM:
281 uu_die("Insufficient privilege.\n");
282 /* NOTREACHED */
283
284 case EACCES:
285 uu_die("Repository backend access denied.\n");
286 /* NOTREACHED */
287
288 case EROFS:
289 log_error(LOG_INFO, "Could not remove unusable contract id %ld "
290 "for %s from repository.\n", *ctidp, inst->ri_i.i_fmri);
291 return (0);
292
293 case EINVAL:
294 case EBADF:
295 default:
296 assert(0);
297 abort();
298 /* NOTREACHED */
299 }
300 }
301
302 static int stop_instance(scf_handle_t *, restarter_inst_t *, stop_cause_t);
303
304 /*
305 * int restarter_insert_inst(scf_handle_t *, char *)
306 * If the inst is already in the restarter list, return its id. If the inst
307 * is not in the restarter list, initialize a restarter_inst_t, initialize its
308 * states, insert it into the list, and return 0.
309 *
310 * Fails with
311 * ENOENT - name is not in the repository
312 */
313 static int
restarter_insert_inst(scf_handle_t * h,const char * name)314 restarter_insert_inst(scf_handle_t *h, const char *name)
315 {
316 int id, r;
317 restarter_inst_t *inst;
318 uu_list_index_t idx;
319 scf_service_t *scf_svc;
320 scf_instance_t *scf_inst;
321 scf_snapshot_t *snap = NULL;
322 scf_propertygroup_t *pg;
323 char *svc_name, *inst_name;
324 char logfilebuf[PATH_MAX];
325 char *c;
326 boolean_t do_commit_states;
327 restarter_instance_state_t state, next_state;
328 protocol_states_t *ps;
329 pid_t start_pid;
330 restarter_str_t reason = restarter_str_insert_in_graph;
331
332 MUTEX_LOCK(&instance_list.ril_lock);
333
334 /*
335 * We don't use inst_lookup_by_name() here because we want the lookup
336 * & insert to be atomic.
337 */
338 id = dict_lookup_byname(name);
339 if (id != -1) {
340 inst = uu_list_find(instance_list.ril_instance_list, &id, NULL,
341 &idx);
342 if (inst != NULL) {
343 MUTEX_UNLOCK(&instance_list.ril_lock);
344 return (0);
345 }
346 }
347
348 /* Allocate an instance */
349 inst = startd_zalloc(sizeof (restarter_inst_t));
350 inst->ri_utmpx_prefix = startd_alloc(max_scf_value_size);
351 inst->ri_utmpx_prefix[0] = '\0';
352
353 inst->ri_i.i_fmri = startd_alloc(strlen(name) + 1);
354 (void) strcpy((char *)inst->ri_i.i_fmri, name);
355
356 inst->ri_queue = startd_list_create(restarter_queue_pool, inst, 0);
357
358 /*
359 * id shouldn't be -1 since we use the same dictionary as graph.c, but
360 * just in case.
361 */
362 inst->ri_id = (id != -1 ? id : dict_insert(name));
363
364 special_online_hooks_get(name, &inst->ri_pre_online_hook,
365 &inst->ri_post_online_hook, &inst->ri_post_offline_hook);
366
367 scf_svc = safe_scf_service_create(h);
368 scf_inst = safe_scf_instance_create(h);
369 pg = safe_scf_pg_create(h);
370 svc_name = startd_alloc(max_scf_name_size);
371 inst_name = startd_alloc(max_scf_name_size);
372
373 rep_retry:
374 if (snap != NULL)
375 scf_snapshot_destroy(snap);
376 if (inst->ri_logstem != NULL)
377 startd_free(inst->ri_logstem, PATH_MAX);
378 if (inst->ri_common_name != NULL)
379 free(inst->ri_common_name);
380 if (inst->ri_C_common_name != NULL)
381 free(inst->ri_C_common_name);
382 snap = NULL;
383 inst->ri_logstem = NULL;
384 inst->ri_common_name = NULL;
385 inst->ri_C_common_name = NULL;
386
387 if (scf_handle_decode_fmri(h, name, NULL, scf_svc, scf_inst, NULL,
388 NULL, SCF_DECODE_FMRI_EXACT) != 0) {
389 switch (scf_error()) {
390 case SCF_ERROR_CONNECTION_BROKEN:
391 libscf_handle_rebind(h);
392 goto rep_retry;
393
394 case SCF_ERROR_NOT_FOUND:
395 goto deleted;
396 }
397
398 uu_die("Can't decode FMRI %s: %s\n", name,
399 scf_strerror(scf_error()));
400 }
401
402 /*
403 * If there's no running snapshot, then we execute using the editing
404 * snapshot. Pending snapshots will be taken later.
405 */
406 snap = libscf_get_running_snapshot(scf_inst);
407
408 if ((scf_service_get_name(scf_svc, svc_name, max_scf_name_size) < 0) ||
409 (scf_instance_get_name(scf_inst, inst_name, max_scf_name_size) <
410 0)) {
411 switch (scf_error()) {
412 case SCF_ERROR_NOT_SET:
413 break;
414
415 case SCF_ERROR_CONNECTION_BROKEN:
416 libscf_handle_rebind(h);
417 goto rep_retry;
418
419 default:
420 assert(0);
421 abort();
422 }
423
424 goto deleted;
425 }
426
427 (void) snprintf(logfilebuf, PATH_MAX, "%s:%s", svc_name, inst_name);
428 for (c = logfilebuf; *c != '\0'; c++)
429 if (*c == '/')
430 *c = '-';
431
432 inst->ri_logstem = startd_alloc(PATH_MAX);
433 (void) snprintf(inst->ri_logstem, PATH_MAX, "%s%s", logfilebuf,
434 LOG_SUFFIX);
435
436 /*
437 * If the restarter group is missing, use uninit/none. Otherwise,
438 * we're probably being restarted & don't want to mess up the states
439 * that are there.
440 */
441 state = RESTARTER_STATE_UNINIT;
442 next_state = RESTARTER_STATE_NONE;
443
444 r = scf_instance_get_pg(scf_inst, SCF_PG_RESTARTER, pg);
445 if (r != 0) {
446 switch (scf_error()) {
447 case SCF_ERROR_CONNECTION_BROKEN:
448 libscf_handle_rebind(h);
449 goto rep_retry;
450
451 case SCF_ERROR_NOT_SET:
452 goto deleted;
453
454 case SCF_ERROR_NOT_FOUND:
455 /*
456 * This shouldn't happen since the graph engine should
457 * have initialized the state to uninitialized/none if
458 * there was no restarter pg. In case somebody
459 * deleted it, though....
460 */
461 do_commit_states = B_TRUE;
462 break;
463
464 default:
465 assert(0);
466 abort();
467 }
468 } else {
469 r = libscf_read_states(pg, &state, &next_state);
470 if (r != 0) {
471 do_commit_states = B_TRUE;
472 } else {
473 if (next_state != RESTARTER_STATE_NONE) {
474 /*
475 * Force next_state to _NONE since we
476 * don't look for method processes.
477 */
478 next_state = RESTARTER_STATE_NONE;
479 do_commit_states = B_TRUE;
480 } else {
481 /*
482 * The reason for transition will depend on
483 * state.
484 */
485 if (st->st_initial == 0)
486 reason = restarter_str_startd_restart;
487 else if (state == RESTARTER_STATE_MAINT)
488 reason = restarter_str_bad_repo_state;
489 /*
490 * Inform the restarter of our state without
491 * changing the STIME in the repository.
492 */
493 ps = startd_alloc(sizeof (*ps));
494 inst->ri_i.i_state = ps->ps_state = state;
495 inst->ri_i.i_next_state = ps->ps_state_next =
496 next_state;
497 ps->ps_reason = reason;
498
499 graph_protocol_send_event(inst->ri_i.i_fmri,
500 GRAPH_UPDATE_STATE_CHANGE, ps);
501
502 do_commit_states = B_FALSE;
503 }
504 }
505 }
506
507 switch (libscf_get_startd_properties(scf_inst, snap, &inst->ri_flags,
508 &inst->ri_utmpx_prefix)) {
509 case 0:
510 break;
511
512 case ECONNABORTED:
513 libscf_handle_rebind(h);
514 goto rep_retry;
515
516 case ECANCELED:
517 goto deleted;
518
519 case ENOENT:
520 /*
521 * This is odd, because the graph engine should have required
522 * the general property group. So we'll just use default
523 * flags in anticipation of the graph engine sending us
524 * REMOVE_INSTANCE when it finds out that the general property
525 * group has been deleted.
526 */
527 inst->ri_flags = RINST_CONTRACT;
528 break;
529
530 default:
531 assert(0);
532 abort();
533 }
534
535 r = libscf_get_template_values(scf_inst, snap,
536 &inst->ri_common_name, &inst->ri_C_common_name);
537
538 /*
539 * Copy our names to smaller buffers to reduce our memory footprint.
540 */
541 if (inst->ri_common_name != NULL) {
542 char *tmp = safe_strdup(inst->ri_common_name);
543 startd_free(inst->ri_common_name, max_scf_value_size);
544 inst->ri_common_name = tmp;
545 }
546
547 if (inst->ri_C_common_name != NULL) {
548 char *tmp = safe_strdup(inst->ri_C_common_name);
549 startd_free(inst->ri_C_common_name, max_scf_value_size);
550 inst->ri_C_common_name = tmp;
551 }
552
553 switch (r) {
554 case 0:
555 break;
556
557 case ECONNABORTED:
558 libscf_handle_rebind(h);
559 goto rep_retry;
560
561 case ECANCELED:
562 goto deleted;
563
564 case ECHILD:
565 case ENOENT:
566 break;
567
568 default:
569 assert(0);
570 abort();
571 }
572
573 switch (libscf_read_method_ids(h, scf_inst, inst->ri_i.i_fmri,
574 &inst->ri_i.i_primary_ctid, &inst->ri_i.i_transient_ctid,
575 &start_pid)) {
576 case 0:
577 break;
578
579 case ECONNABORTED:
580 libscf_handle_rebind(h);
581 goto rep_retry;
582
583 case ECANCELED:
584 goto deleted;
585
586 default:
587 assert(0);
588 abort();
589 }
590
591 if (inst->ri_i.i_primary_ctid >= 1) {
592 contract_hash_store(inst->ri_i.i_primary_ctid, inst->ri_id);
593
594 switch (check_contract(inst, B_TRUE, scf_inst)) {
595 case 0:
596 break;
597
598 case ECONNABORTED:
599 libscf_handle_rebind(h);
600 goto rep_retry;
601
602 case ECANCELED:
603 goto deleted;
604
605 default:
606 assert(0);
607 abort();
608 }
609 }
610
611 if (inst->ri_i.i_transient_ctid >= 1) {
612 switch (check_contract(inst, B_FALSE, scf_inst)) {
613 case 0:
614 break;
615
616 case ECONNABORTED:
617 libscf_handle_rebind(h);
618 goto rep_retry;
619
620 case ECANCELED:
621 goto deleted;
622
623 default:
624 assert(0);
625 abort();
626 }
627 }
628
629 /* No more failures we live through, so add it to the list. */
630 (void) pthread_mutex_init(&inst->ri_lock, &mutex_attrs);
631 (void) pthread_mutex_init(&inst->ri_queue_lock, &mutex_attrs);
632 MUTEX_LOCK(&inst->ri_lock);
633 MUTEX_LOCK(&inst->ri_queue_lock);
634
635 (void) pthread_cond_init(&inst->ri_method_cv, NULL);
636
637 uu_list_node_init(inst, &inst->ri_link, restarter_instance_pool);
638 uu_list_insert(instance_list.ril_instance_list, inst, idx);
639 MUTEX_UNLOCK(&instance_list.ril_lock);
640
641 if (start_pid != -1 &&
642 (inst->ri_flags & RINST_STYLE_MASK) == RINST_WAIT) {
643 int ret;
644 ret = wait_register(start_pid, inst->ri_i.i_fmri, 0, 1);
645 if (ret == -1) {
646 /*
647 * Implication: if we can't reregister the
648 * instance, we will start another one. Two
649 * instances may or may not result in a resource
650 * conflict.
651 */
652 log_error(LOG_WARNING,
653 "%s: couldn't reregister %ld for wait\n",
654 inst->ri_i.i_fmri, start_pid);
655 } else if (ret == 1) {
656 /*
657 * Leading PID has exited.
658 */
659 (void) stop_instance(h, inst, RSTOP_EXIT);
660 }
661 }
662
663
664 scf_pg_destroy(pg);
665
666 if (do_commit_states)
667 (void) restarter_instance_update_states(h, inst, state,
668 next_state, RERR_NONE, reason);
669
670 log_framework(LOG_DEBUG, "%s is a %s-style service\n", name,
671 service_style(inst->ri_flags));
672
673 MUTEX_UNLOCK(&inst->ri_queue_lock);
674 MUTEX_UNLOCK(&inst->ri_lock);
675
676 startd_free(svc_name, max_scf_name_size);
677 startd_free(inst_name, max_scf_name_size);
678 scf_snapshot_destroy(snap);
679 scf_instance_destroy(scf_inst);
680 scf_service_destroy(scf_svc);
681
682 log_framework(LOG_DEBUG, "%s: inserted instance into restarter list\n",
683 name);
684
685 return (0);
686
687 deleted:
688 MUTEX_UNLOCK(&instance_list.ril_lock);
689 startd_free(inst_name, max_scf_name_size);
690 startd_free(svc_name, max_scf_name_size);
691 if (snap != NULL)
692 scf_snapshot_destroy(snap);
693 scf_pg_destroy(pg);
694 scf_instance_destroy(scf_inst);
695 scf_service_destroy(scf_svc);
696 startd_free((void *)inst->ri_i.i_fmri, strlen(inst->ri_i.i_fmri) + 1);
697 uu_list_destroy(inst->ri_queue);
698 if (inst->ri_logstem != NULL)
699 startd_free(inst->ri_logstem, PATH_MAX);
700 if (inst->ri_common_name != NULL)
701 free(inst->ri_common_name);
702 if (inst->ri_C_common_name != NULL)
703 free(inst->ri_C_common_name);
704 startd_free(inst->ri_utmpx_prefix, max_scf_value_size);
705 startd_free(inst, sizeof (restarter_inst_t));
706 return (ENOENT);
707 }
708
709 static void
restarter_delete_inst(restarter_inst_t * ri)710 restarter_delete_inst(restarter_inst_t *ri)
711 {
712 int id;
713 restarter_inst_t *rip;
714 void *cookie = NULL;
715 restarter_instance_qentry_t *e;
716
717 assert(MUTEX_HELD(&ri->ri_lock));
718
719 /*
720 * Must drop the instance lock so we can pick up the instance_list
721 * lock & remove the instance.
722 */
723 id = ri->ri_id;
724 MUTEX_UNLOCK(&ri->ri_lock);
725
726 MUTEX_LOCK(&instance_list.ril_lock);
727
728 rip = uu_list_find(instance_list.ril_instance_list, &id, NULL, NULL);
729 if (rip == NULL) {
730 MUTEX_UNLOCK(&instance_list.ril_lock);
731 return;
732 }
733
734 assert(ri == rip);
735
736 uu_list_remove(instance_list.ril_instance_list, ri);
737
738 log_framework(LOG_DEBUG, "%s: deleted instance from restarter list\n",
739 ri->ri_i.i_fmri);
740
741 MUTEX_UNLOCK(&instance_list.ril_lock);
742
743 /*
744 * We can lock the instance without holding the instance_list lock
745 * since we removed the instance from the list.
746 */
747 MUTEX_LOCK(&ri->ri_lock);
748 MUTEX_LOCK(&ri->ri_queue_lock);
749
750 if (ri->ri_i.i_primary_ctid >= 1)
751 contract_hash_remove(ri->ri_i.i_primary_ctid);
752
753 while (ri->ri_method_thread != 0 || ri->ri_method_waiters > 0)
754 (void) pthread_cond_wait(&ri->ri_method_cv, &ri->ri_lock);
755
756 while ((e = uu_list_teardown(ri->ri_queue, &cookie)) != NULL)
757 startd_free(e, sizeof (*e));
758 uu_list_destroy(ri->ri_queue);
759
760 startd_free((void *)ri->ri_i.i_fmri, strlen(ri->ri_i.i_fmri) + 1);
761 startd_free(ri->ri_logstem, PATH_MAX);
762 if (ri->ri_common_name != NULL)
763 free(ri->ri_common_name);
764 if (ri->ri_C_common_name != NULL)
765 free(ri->ri_C_common_name);
766 startd_free(ri->ri_utmpx_prefix, max_scf_value_size);
767 (void) pthread_mutex_destroy(&ri->ri_lock);
768 (void) pthread_mutex_destroy(&ri->ri_queue_lock);
769 startd_free(ri, sizeof (restarter_inst_t));
770 }
771
772 /*
773 * instance_is_wait_style()
774 *
775 * Returns 1 if the given instance is a "wait-style" service instance.
776 */
777 int
instance_is_wait_style(restarter_inst_t * inst)778 instance_is_wait_style(restarter_inst_t *inst)
779 {
780 assert(MUTEX_HELD(&inst->ri_lock));
781 return ((inst->ri_flags & RINST_STYLE_MASK) == RINST_WAIT);
782 }
783
784 /*
785 * instance_is_transient_style()
786 *
787 * Returns 1 if the given instance is a transient service instance.
788 */
789 int
instance_is_transient_style(restarter_inst_t * inst)790 instance_is_transient_style(restarter_inst_t *inst)
791 {
792 assert(MUTEX_HELD(&inst->ri_lock));
793 return ((inst->ri_flags & RINST_STYLE_MASK) == RINST_TRANSIENT);
794 }
795
796 /*
797 * instance_in_transition()
798 * Returns 1 if instance is in transition, 0 if not
799 */
800 int
instance_in_transition(restarter_inst_t * inst)801 instance_in_transition(restarter_inst_t *inst)
802 {
803 assert(MUTEX_HELD(&inst->ri_lock));
804 if (inst->ri_i.i_next_state == RESTARTER_STATE_NONE)
805 return (0);
806 return (1);
807 }
808
809 /*
810 * returns 1 if instance is already started, 0 if not
811 */
812 static int
instance_started(restarter_inst_t * inst)813 instance_started(restarter_inst_t *inst)
814 {
815 int ret;
816
817 assert(MUTEX_HELD(&inst->ri_lock));
818
819 if (inst->ri_i.i_state == RESTARTER_STATE_ONLINE ||
820 inst->ri_i.i_state == RESTARTER_STATE_DEGRADED)
821 ret = 1;
822 else
823 ret = 0;
824
825 return (ret);
826 }
827
828 /*
829 * Returns
830 * 0 - success
831 * ECONNRESET - success, but h was rebound
832 */
833 int
restarter_instance_update_states(scf_handle_t * h,restarter_inst_t * ri,restarter_instance_state_t new_state,restarter_instance_state_t new_state_next,restarter_error_t err,restarter_str_t reason)834 restarter_instance_update_states(scf_handle_t *h, restarter_inst_t *ri,
835 restarter_instance_state_t new_state,
836 restarter_instance_state_t new_state_next, restarter_error_t err,
837 restarter_str_t reason)
838 {
839 protocol_states_t *states;
840 int e;
841 uint_t retry_count = 0, msecs = ALLOC_DELAY;
842 boolean_t rebound = B_FALSE;
843 int prev_state_online;
844 int state_online;
845
846 assert(MUTEX_HELD(&ri->ri_lock));
847
848 prev_state_online = instance_started(ri);
849
850 retry:
851 e = _restarter_commit_states(h, &ri->ri_i, new_state, new_state_next,
852 restarter_get_str_short(reason));
853 switch (e) {
854 case 0:
855 break;
856
857 case ENOMEM:
858 ++retry_count;
859 if (retry_count < ALLOC_RETRY) {
860 (void) poll(NULL, 0, msecs);
861 msecs *= ALLOC_DELAY_MULT;
862 goto retry;
863 }
864
865 /* Like startd_alloc(). */
866 uu_die("Insufficient memory.\n");
867 /* NOTREACHED */
868
869 case ECONNABORTED:
870 libscf_handle_rebind(h);
871 rebound = B_TRUE;
872 goto retry;
873
874 case EPERM:
875 case EACCES:
876 case EROFS:
877 log_error(LOG_NOTICE, "Could not commit state change for %s "
878 "to repository: %s.\n", ri->ri_i.i_fmri, strerror(e));
879 /* FALLTHROUGH */
880
881 case ENOENT:
882 ri->ri_i.i_state = new_state;
883 ri->ri_i.i_next_state = new_state_next;
884 break;
885
886 case EINVAL:
887 default:
888 bad_error("_restarter_commit_states", e);
889 }
890
891 states = startd_alloc(sizeof (protocol_states_t));
892 states->ps_state = new_state;
893 states->ps_state_next = new_state_next;
894 states->ps_err = err;
895 states->ps_reason = reason;
896 graph_protocol_send_event(ri->ri_i.i_fmri, GRAPH_UPDATE_STATE_CHANGE,
897 (void *)states);
898
899 state_online = instance_started(ri);
900
901 if (prev_state_online && !state_online)
902 ri->ri_post_offline_hook();
903 else if (!prev_state_online && state_online)
904 ri->ri_post_online_hook();
905
906 return (rebound ? ECONNRESET : 0);
907 }
908
909 void
restarter_mark_pending_snapshot(const char * fmri,uint_t flag)910 restarter_mark_pending_snapshot(const char *fmri, uint_t flag)
911 {
912 restarter_inst_t *inst;
913
914 assert(flag == RINST_RETAKE_RUNNING || flag == RINST_RETAKE_START);
915
916 inst = inst_lookup_by_name(fmri);
917 if (inst == NULL)
918 return;
919
920 inst->ri_flags |= flag;
921
922 MUTEX_UNLOCK(&inst->ri_lock);
923 }
924
925 static void
restarter_take_pending_snapshots(scf_handle_t * h)926 restarter_take_pending_snapshots(scf_handle_t *h)
927 {
928 restarter_inst_t *inst;
929 int r;
930
931 MUTEX_LOCK(&instance_list.ril_lock);
932
933 for (inst = uu_list_first(instance_list.ril_instance_list);
934 inst != NULL;
935 inst = uu_list_next(instance_list.ril_instance_list, inst)) {
936 const char *fmri;
937 scf_instance_t *sinst = NULL;
938
939 MUTEX_LOCK(&inst->ri_lock);
940
941 /*
942 * This is where we'd check inst->ri_method_thread and if it
943 * were nonzero we'd wait in anticipation of another thread
944 * executing a method for inst. Doing so with the instance_list
945 * locked, though, leads to deadlock. Since taking a snapshot
946 * during that window won't hurt anything, we'll just continue.
947 */
948
949 fmri = inst->ri_i.i_fmri;
950
951 if (inst->ri_flags & RINST_RETAKE_RUNNING) {
952 scf_snapshot_t *rsnap;
953
954 (void) libscf_fmri_get_instance(h, fmri, &sinst);
955
956 rsnap = libscf_get_or_make_running_snapshot(sinst,
957 fmri, B_FALSE);
958
959 scf_instance_destroy(sinst);
960
961 if (rsnap != NULL)
962 inst->ri_flags &= ~RINST_RETAKE_RUNNING;
963
964 scf_snapshot_destroy(rsnap);
965 }
966
967 if (inst->ri_flags & RINST_RETAKE_START) {
968 switch (r = libscf_snapshots_poststart(h, fmri,
969 B_FALSE)) {
970 case 0:
971 case ENOENT:
972 inst->ri_flags &= ~RINST_RETAKE_START;
973 break;
974
975 case ECONNABORTED:
976 break;
977
978 case EACCES:
979 default:
980 bad_error("libscf_snapshots_poststart", r);
981 }
982 }
983
984 MUTEX_UNLOCK(&inst->ri_lock);
985 }
986
987 MUTEX_UNLOCK(&instance_list.ril_lock);
988 }
989
990 /* ARGSUSED */
991 void *
restarter_post_fsminimal_thread(void * unused)992 restarter_post_fsminimal_thread(void *unused)
993 {
994 scf_handle_t *h;
995 int r;
996
997 h = libscf_handle_create_bound_loop();
998
999 for (;;) {
1000 r = libscf_create_self(h);
1001 if (r == 0)
1002 break;
1003
1004 assert(r == ECONNABORTED);
1005 libscf_handle_rebind(h);
1006 }
1007
1008 restarter_take_pending_snapshots(h);
1009
1010 (void) scf_handle_unbind(h);
1011 scf_handle_destroy(h);
1012
1013 return (NULL);
1014 }
1015
1016 /*
1017 * int stop_instance()
1018 *
1019 * Stop the instance identified by the instance given as the second argument,
1020 * for the cause stated.
1021 *
1022 * Returns
1023 * 0 - success
1024 * -1 - inst is in transition
1025 */
1026 static int
stop_instance(scf_handle_t * local_handle,restarter_inst_t * inst,stop_cause_t cause)1027 stop_instance(scf_handle_t *local_handle, restarter_inst_t *inst,
1028 stop_cause_t cause)
1029 {
1030 fork_info_t *info;
1031 const char *cp;
1032 int err;
1033 restarter_error_t re;
1034 restarter_str_t reason;
1035 restarter_instance_state_t new_state;
1036
1037 assert(MUTEX_HELD(&inst->ri_lock));
1038 assert(inst->ri_method_thread == 0);
1039
1040 switch (cause) {
1041 case RSTOP_EXIT:
1042 re = RERR_RESTART;
1043 reason = restarter_str_ct_ev_exit;
1044 cp = "all processes in service exited";
1045 break;
1046 case RSTOP_ERR_CFG:
1047 re = RERR_FAULT;
1048 reason = restarter_str_method_failed;
1049 cp = "service exited with a configuration error";
1050 break;
1051 case RSTOP_ERR_EXIT:
1052 re = RERR_RESTART;
1053 reason = restarter_str_ct_ev_exit;
1054 cp = "service exited with an error";
1055 break;
1056 case RSTOP_CORE:
1057 re = RERR_FAULT;
1058 reason = restarter_str_ct_ev_core;
1059 cp = "process dumped core";
1060 break;
1061 case RSTOP_SIGNAL:
1062 re = RERR_FAULT;
1063 reason = restarter_str_ct_ev_signal;
1064 cp = "process received fatal signal from outside the service";
1065 break;
1066 case RSTOP_HWERR:
1067 re = RERR_FAULT;
1068 reason = restarter_str_ct_ev_hwerr;
1069 cp = "process killed due to uncorrectable hardware error";
1070 break;
1071 case RSTOP_DEPENDENCY:
1072 re = RERR_RESTART;
1073 reason = restarter_str_dependency_activity;
1074 cp = "dependency activity requires stop";
1075 break;
1076 case RSTOP_DISABLE:
1077 re = RERR_RESTART;
1078 reason = restarter_str_disable_request;
1079 cp = "service disabled";
1080 break;
1081 case RSTOP_RESTART:
1082 re = RERR_RESTART;
1083 reason = restarter_str_restart_request;
1084 cp = "service restarting";
1085 break;
1086 default:
1087 #ifndef NDEBUG
1088 (void) fprintf(stderr, "Unknown cause %d at %s:%d.\n",
1089 cause, __FILE__, __LINE__);
1090 #endif
1091 abort();
1092 }
1093
1094 /* Services in the disabled and maintenance state are ignored */
1095 if (inst->ri_i.i_state == RESTARTER_STATE_MAINT ||
1096 inst->ri_i.i_state == RESTARTER_STATE_DISABLED) {
1097 log_framework(LOG_DEBUG,
1098 "%s: stop_instance -> is maint/disabled\n",
1099 inst->ri_i.i_fmri);
1100 return (0);
1101 }
1102
1103 /* Already stopped instances are left alone */
1104 if (instance_started(inst) == 0) {
1105 log_framework(LOG_DEBUG, "Restarter: %s is already stopped.\n",
1106 inst->ri_i.i_fmri);
1107 return (0);
1108 }
1109
1110 if (instance_in_transition(inst)) {
1111 /* requeue event by returning -1 */
1112 log_framework(LOG_DEBUG,
1113 "Restarter: Not stopping %s, in transition.\n",
1114 inst->ri_i.i_fmri);
1115 return (-1);
1116 }
1117
1118 log_instance(inst, B_TRUE, "Stopping because %s.", cp);
1119
1120 log_framework(re == RERR_FAULT ? LOG_INFO : LOG_DEBUG,
1121 "%s: Instance stopping because %s.\n", inst->ri_i.i_fmri, cp);
1122
1123 if (instance_is_wait_style(inst) &&
1124 (cause == RSTOP_EXIT ||
1125 cause == RSTOP_ERR_CFG ||
1126 cause == RSTOP_ERR_EXIT)) {
1127 /*
1128 * No need to stop instance, as child has exited; remove
1129 * contract and move the instance to the offline state.
1130 */
1131 switch (err = restarter_instance_update_states(local_handle,
1132 inst, inst->ri_i.i_state, RESTARTER_STATE_OFFLINE, re,
1133 reason)) {
1134 case 0:
1135 case ECONNRESET:
1136 break;
1137
1138 default:
1139 bad_error("restarter_instance_update_states", err);
1140 }
1141
1142 if (cause == RSTOP_ERR_EXIT) {
1143 /*
1144 * The RSTOP_ERR_EXIT cause is set via the
1145 * wait_thread -> wait_remove code path when we have
1146 * a "wait" style svc that exited with an error. If
1147 * the svc is failing too quickly, we throttle it so
1148 * that we don't restart it more than once/second.
1149 * Since we know we're running in the wait thread its
1150 * ok to throttle it right here.
1151 */
1152 (void) update_fault_count(inst, FAULT_COUNT_INCR);
1153 if (method_rate_critical(inst)) {
1154 log_instance(inst, B_TRUE, "Failing too "
1155 "quickly, throttling.");
1156 (void) sleep(WT_SVC_ERR_THROTTLE);
1157 }
1158 } else {
1159 (void) update_fault_count(inst, FAULT_COUNT_RESET);
1160 reset_start_times(inst);
1161 }
1162
1163 if (inst->ri_i.i_primary_ctid != 0) {
1164 inst->ri_m_inst =
1165 safe_scf_instance_create(local_handle);
1166 inst->ri_mi_deleted = B_FALSE;
1167
1168 libscf_reget_instance(inst);
1169 method_remove_contract(inst, B_TRUE, B_TRUE);
1170
1171 scf_instance_destroy(inst->ri_m_inst);
1172 inst->ri_m_inst = NULL;
1173 }
1174
1175 switch (err = restarter_instance_update_states(local_handle,
1176 inst, inst->ri_i.i_next_state, RESTARTER_STATE_NONE, re,
1177 reason)) {
1178 case 0:
1179 case ECONNRESET:
1180 break;
1181
1182 default:
1183 bad_error("restarter_instance_update_states", err);
1184 }
1185
1186 if (cause != RSTOP_ERR_CFG)
1187 return (0);
1188 } else if (instance_is_wait_style(inst) && re == RERR_RESTART) {
1189 /*
1190 * Stopping a wait service through means other than the pid
1191 * exiting should keep wait_thread() from restarting the
1192 * service, by removing it from the wait list.
1193 * We cannot remove it right now otherwise the process will
1194 * end up <defunct> so mark it to be ignored.
1195 */
1196 wait_ignore_by_fmri(inst->ri_i.i_fmri);
1197 }
1198
1199 /*
1200 * There are some configuration errors which we cannot detect until we
1201 * try to run the method. For example, see exec_method() where the
1202 * restarter_set_method_context() call can return SMF_EXIT_ERR_CONFIG
1203 * in several cases. If this happens for a "wait-style" svc,
1204 * wait_remove() sets the cause as RSTOP_ERR_CFG so that we can detect
1205 * the configuration error and go into maintenance, even though it is
1206 * a "wait-style" svc.
1207 */
1208 if (cause == RSTOP_ERR_CFG)
1209 new_state = RESTARTER_STATE_MAINT;
1210 else
1211 new_state = inst->ri_i.i_enabled ?
1212 RESTARTER_STATE_OFFLINE : RESTARTER_STATE_DISABLED;
1213
1214 switch (err = restarter_instance_update_states(local_handle, inst,
1215 inst->ri_i.i_state, new_state, RERR_NONE, reason)) {
1216 case 0:
1217 case ECONNRESET:
1218 break;
1219
1220 default:
1221 bad_error("restarter_instance_update_states", err);
1222 }
1223
1224 info = startd_zalloc(sizeof (fork_info_t));
1225
1226 info->sf_id = inst->ri_id;
1227 info->sf_method_type = METHOD_STOP;
1228 info->sf_event_type = re;
1229 info->sf_reason = reason;
1230 inst->ri_method_thread = startd_thread_create(method_thread, info);
1231
1232 return (0);
1233 }
1234
1235 /*
1236 * Returns
1237 * ENOENT - fmri is not in instance_list
1238 * 0 - success
1239 * ECONNRESET - success, though handle was rebound
1240 * -1 - instance is in transition
1241 */
1242 int
stop_instance_fmri(scf_handle_t * h,const char * fmri,uint_t flags)1243 stop_instance_fmri(scf_handle_t *h, const char *fmri, uint_t flags)
1244 {
1245 restarter_inst_t *rip;
1246 int r;
1247
1248 rip = inst_lookup_by_name(fmri);
1249 if (rip == NULL)
1250 return (ENOENT);
1251
1252 r = stop_instance(h, rip, flags);
1253
1254 MUTEX_UNLOCK(&rip->ri_lock);
1255
1256 return (r);
1257 }
1258
1259 static void
unmaintain_instance(scf_handle_t * h,restarter_inst_t * rip,unmaint_cause_t cause)1260 unmaintain_instance(scf_handle_t *h, restarter_inst_t *rip,
1261 unmaint_cause_t cause)
1262 {
1263 ctid_t ctid;
1264 scf_instance_t *inst;
1265 int r;
1266 uint_t tries = 0, msecs = ALLOC_DELAY;
1267 const char *cp;
1268 restarter_str_t reason;
1269
1270 assert(MUTEX_HELD(&rip->ri_lock));
1271
1272 if (rip->ri_i.i_state != RESTARTER_STATE_MAINT) {
1273 log_error(LOG_DEBUG, "Restarter: "
1274 "Ignoring maintenance off command because %s is not in the "
1275 "maintenance state.\n", rip->ri_i.i_fmri);
1276 return;
1277 }
1278
1279 switch (cause) {
1280 case RUNMAINT_CLEAR:
1281 cp = "clear requested";
1282 reason = restarter_str_clear_request;
1283 break;
1284 case RUNMAINT_DISABLE:
1285 cp = "disable requested";
1286 reason = restarter_str_disable_request;
1287 break;
1288 default:
1289 #ifndef NDEBUG
1290 (void) fprintf(stderr, "Uncaught case for %d at %s:%d.\n",
1291 cause, __FILE__, __LINE__);
1292 #endif
1293 abort();
1294 }
1295
1296 log_instance(rip, B_TRUE, "Leaving maintenance because %s.",
1297 cp);
1298 log_framework(LOG_DEBUG, "%s: Instance leaving maintenance because "
1299 "%s.\n", rip->ri_i.i_fmri, cp);
1300
1301 (void) restarter_instance_update_states(h, rip, RESTARTER_STATE_UNINIT,
1302 RESTARTER_STATE_NONE, RERR_RESTART, reason);
1303
1304 /*
1305 * If we did ADMIN_MAINT_ON_IMMEDIATE, then there might still be
1306 * a primary contract.
1307 */
1308 if (rip->ri_i.i_primary_ctid == 0)
1309 return;
1310
1311 ctid = rip->ri_i.i_primary_ctid;
1312 contract_abandon(ctid);
1313 rip->ri_i.i_primary_ctid = 0;
1314
1315 rep_retry:
1316 switch (r = libscf_fmri_get_instance(h, rip->ri_i.i_fmri, &inst)) {
1317 case 0:
1318 break;
1319
1320 case ECONNABORTED:
1321 libscf_handle_rebind(h);
1322 goto rep_retry;
1323
1324 case ENOENT:
1325 /* Must have been deleted. */
1326 return;
1327
1328 case EINVAL:
1329 case ENOTSUP:
1330 default:
1331 bad_error("libscf_handle_rebind", r);
1332 }
1333
1334 again:
1335 r = restarter_remove_contract(inst, ctid, RESTARTER_CONTRACT_PRIMARY);
1336 switch (r) {
1337 case 0:
1338 break;
1339
1340 case ENOMEM:
1341 ++tries;
1342 if (tries < ALLOC_RETRY) {
1343 (void) poll(NULL, 0, msecs);
1344 msecs *= ALLOC_DELAY_MULT;
1345 goto again;
1346 }
1347
1348 uu_die("Insufficient memory.\n");
1349 /* NOTREACHED */
1350
1351 case ECONNABORTED:
1352 scf_instance_destroy(inst);
1353 libscf_handle_rebind(h);
1354 goto rep_retry;
1355
1356 case ECANCELED:
1357 break;
1358
1359 case EPERM:
1360 case EACCES:
1361 case EROFS:
1362 log_error(LOG_INFO,
1363 "Could not remove contract id %lu for %s (%s).\n", ctid,
1364 rip->ri_i.i_fmri, strerror(r));
1365 break;
1366
1367 case EINVAL:
1368 case EBADF:
1369 default:
1370 bad_error("restarter_remove_contract", r);
1371 }
1372
1373 scf_instance_destroy(inst);
1374 }
1375
1376 /*
1377 * enable_inst()
1378 * Set inst->ri_i.i_enabled. Expects 'e' to be _ENABLE, _DISABLE, or
1379 * _ADMIN_DISABLE. If the event is _ENABLE and inst is uninitialized or
1380 * disabled, move it to offline. If the event is _DISABLE or
1381 * _ADMIN_DISABLE, make sure inst will move to disabled.
1382 *
1383 * Returns
1384 * 0 - success
1385 * ECONNRESET - h was rebound
1386 */
1387 static int
enable_inst(scf_handle_t * h,restarter_inst_t * inst,restarter_instance_qentry_t * riq)1388 enable_inst(scf_handle_t *h, restarter_inst_t *inst,
1389 restarter_instance_qentry_t *riq)
1390 {
1391 restarter_instance_state_t state;
1392 restarter_event_type_t e = riq->riq_type;
1393 restarter_str_t reason = restarter_str_per_configuration;
1394 int r;
1395
1396 assert(MUTEX_HELD(&inst->ri_lock));
1397 assert(e == RESTARTER_EVENT_TYPE_ADMIN_DISABLE ||
1398 e == RESTARTER_EVENT_TYPE_DISABLE ||
1399 e == RESTARTER_EVENT_TYPE_ENABLE);
1400 assert(instance_in_transition(inst) == 0);
1401
1402 state = inst->ri_i.i_state;
1403
1404 if (e == RESTARTER_EVENT_TYPE_ENABLE) {
1405 inst->ri_i.i_enabled = 1;
1406
1407 if (state == RESTARTER_STATE_UNINIT ||
1408 state == RESTARTER_STATE_DISABLED) {
1409 /*
1410 * B_FALSE: Don't log an error if the log_instance()
1411 * fails because it will fail on the miniroot before
1412 * install-discovery runs.
1413 */
1414 log_instance(inst, B_FALSE, "Enabled.");
1415 log_framework(LOG_DEBUG, "%s: Instance enabled.\n",
1416 inst->ri_i.i_fmri);
1417
1418 /*
1419 * If we are coming from DISABLED, it was obviously an
1420 * enable request. If we are coming from UNINIT, it may
1421 * have been a sevice in MAINT that was cleared.
1422 */
1423 if (riq->riq_reason == restarter_str_clear_request)
1424 reason = restarter_str_clear_request;
1425 else if (state == RESTARTER_STATE_DISABLED)
1426 reason = restarter_str_enable_request;
1427 (void) restarter_instance_update_states(h, inst,
1428 RESTARTER_STATE_OFFLINE, RESTARTER_STATE_NONE,
1429 RERR_NONE, reason);
1430 } else {
1431 log_framework(LOG_DEBUG, "Restarter: "
1432 "Not changing state of %s for enable command.\n",
1433 inst->ri_i.i_fmri);
1434 }
1435 } else {
1436 inst->ri_i.i_enabled = 0;
1437
1438 switch (state) {
1439 case RESTARTER_STATE_ONLINE:
1440 case RESTARTER_STATE_DEGRADED:
1441 r = stop_instance(h, inst, RSTOP_DISABLE);
1442 return (r == ECONNRESET ? 0 : r);
1443
1444 case RESTARTER_STATE_OFFLINE:
1445 case RESTARTER_STATE_UNINIT:
1446 if (inst->ri_i.i_primary_ctid != 0) {
1447 inst->ri_m_inst = safe_scf_instance_create(h);
1448 inst->ri_mi_deleted = B_FALSE;
1449
1450 libscf_reget_instance(inst);
1451 method_remove_contract(inst, B_TRUE, B_TRUE);
1452
1453 scf_instance_destroy(inst->ri_m_inst);
1454 }
1455 /* B_FALSE: See log_instance(..., "Enabled."); above */
1456 log_instance(inst, B_FALSE, "Disabled.");
1457 log_framework(LOG_DEBUG, "%s: Instance disabled.\n",
1458 inst->ri_i.i_fmri);
1459
1460 /*
1461 * If we are coming from OFFLINE, it was obviously a
1462 * disable request. But if we are coming from
1463 * UNINIT, it may have been a disable request for a
1464 * service in MAINT.
1465 */
1466 if (riq->riq_reason == restarter_str_disable_request ||
1467 state == RESTARTER_STATE_OFFLINE)
1468 reason = restarter_str_disable_request;
1469 (void) restarter_instance_update_states(h, inst,
1470 RESTARTER_STATE_DISABLED, RESTARTER_STATE_NONE,
1471 RERR_RESTART, reason);
1472 return (0);
1473
1474 case RESTARTER_STATE_DISABLED:
1475 break;
1476
1477 case RESTARTER_STATE_MAINT:
1478 /*
1479 * We only want to pull the instance out of maintenance
1480 * if the disable is on adminstrative request. The
1481 * graph engine sends _DISABLE events whenever a
1482 * service isn't in the disabled state, and we don't
1483 * want to pull the service out of maintenance if,
1484 * for example, it is there due to a dependency cycle.
1485 */
1486 if (e == RESTARTER_EVENT_TYPE_ADMIN_DISABLE)
1487 unmaintain_instance(h, inst, RUNMAINT_DISABLE);
1488 break;
1489
1490 default:
1491 #ifndef NDEBUG
1492 (void) fprintf(stderr, "Restarter instance %s has "
1493 "unknown state %d.\n", inst->ri_i.i_fmri, state);
1494 #endif
1495 abort();
1496 }
1497 }
1498
1499 return (0);
1500 }
1501
1502 static void
start_instance(scf_handle_t * local_handle,restarter_inst_t * inst,int32_t reason)1503 start_instance(scf_handle_t *local_handle, restarter_inst_t *inst,
1504 int32_t reason)
1505 {
1506 fork_info_t *info;
1507 restarter_str_t new_reason;
1508
1509 assert(MUTEX_HELD(&inst->ri_lock));
1510 assert(instance_in_transition(inst) == 0);
1511 assert(inst->ri_method_thread == 0);
1512
1513 log_framework(LOG_DEBUG, "%s: trying to start instance\n",
1514 inst->ri_i.i_fmri);
1515
1516 /*
1517 * We want to keep the original reason for restarts and clear actions
1518 */
1519 switch (reason) {
1520 case restarter_str_restart_request:
1521 case restarter_str_clear_request:
1522 new_reason = reason;
1523 break;
1524 default:
1525 new_reason = restarter_str_dependencies_satisfied;
1526 }
1527
1528 /* Services in the disabled and maintenance state are ignored */
1529 if (inst->ri_i.i_state == RESTARTER_STATE_MAINT ||
1530 inst->ri_i.i_state == RESTARTER_STATE_DISABLED ||
1531 inst->ri_i.i_enabled == 0) {
1532 log_framework(LOG_DEBUG,
1533 "%s: start_instance -> is maint/disabled\n",
1534 inst->ri_i.i_fmri);
1535 return;
1536 }
1537
1538 /* Already started instances are left alone */
1539 if (instance_started(inst) == 1) {
1540 log_framework(LOG_DEBUG,
1541 "%s: start_instance -> is already started\n",
1542 inst->ri_i.i_fmri);
1543 return;
1544 }
1545
1546 log_framework(LOG_DEBUG, "%s: starting instance.\n", inst->ri_i.i_fmri);
1547
1548 (void) restarter_instance_update_states(local_handle, inst,
1549 inst->ri_i.i_state, RESTARTER_STATE_ONLINE, RERR_NONE, new_reason);
1550
1551 info = startd_zalloc(sizeof (fork_info_t));
1552
1553 info->sf_id = inst->ri_id;
1554 info->sf_method_type = METHOD_START;
1555 info->sf_event_type = RERR_NONE;
1556 info->sf_reason = new_reason;
1557 inst->ri_method_thread = startd_thread_create(method_thread, info);
1558 }
1559
1560 static int
event_from_tty(scf_handle_t * h,restarter_inst_t * rip)1561 event_from_tty(scf_handle_t *h, restarter_inst_t *rip)
1562 {
1563 scf_instance_t *inst;
1564 int ret = 0;
1565
1566 if (libscf_fmri_get_instance(h, rip->ri_i.i_fmri, &inst))
1567 return (-1);
1568
1569 ret = restarter_inst_ractions_from_tty(inst);
1570
1571 scf_instance_destroy(inst);
1572 return (ret);
1573 }
1574
1575 static boolean_t
restart_dump(scf_handle_t * h,restarter_inst_t * rip)1576 restart_dump(scf_handle_t *h, restarter_inst_t *rip)
1577 {
1578 scf_instance_t *inst;
1579 boolean_t ret = B_FALSE;
1580
1581 if (libscf_fmri_get_instance(h, rip->ri_i.i_fmri, &inst))
1582 return (-1);
1583
1584 if (restarter_inst_dump(inst) == 1)
1585 ret = B_TRUE;
1586
1587 scf_instance_destroy(inst);
1588 return (ret);
1589 }
1590
1591 static void
maintain_instance(scf_handle_t * h,restarter_inst_t * rip,int immediate,restarter_str_t reason)1592 maintain_instance(scf_handle_t *h, restarter_inst_t *rip, int immediate,
1593 restarter_str_t reason)
1594 {
1595 fork_info_t *info;
1596 scf_instance_t *scf_inst = NULL;
1597
1598 assert(MUTEX_HELD(&rip->ri_lock));
1599 assert(reason != restarter_str_none);
1600 assert(rip->ri_method_thread == 0);
1601
1602 log_instance(rip, B_TRUE, "Stopping for maintenance due to %s.",
1603 restarter_get_str_short(reason));
1604 log_framework(LOG_DEBUG, "%s: stopping for maintenance due to %s.\n",
1605 rip->ri_i.i_fmri, restarter_get_str_short(reason));
1606
1607 /* Services in the maintenance state are ignored */
1608 if (rip->ri_i.i_state == RESTARTER_STATE_MAINT) {
1609 log_framework(LOG_DEBUG,
1610 "%s: maintain_instance -> is already in maintenance\n",
1611 rip->ri_i.i_fmri);
1612 return;
1613 }
1614
1615 /*
1616 * If reason state is restarter_str_service_request and
1617 * restarter_actions/auxiliary_fmri property is set with a valid fmri,
1618 * copy the fmri to restarter/auxiliary_fmri so svcs -x can use.
1619 */
1620 if (reason == restarter_str_service_request &&
1621 libscf_fmri_get_instance(h, rip->ri_i.i_fmri, &scf_inst) == 0) {
1622 if (restarter_inst_validate_ractions_aux_fmri(scf_inst) == 0) {
1623 if (restarter_inst_set_aux_fmri(scf_inst))
1624 log_framework(LOG_DEBUG, "%s: "
1625 "restarter_inst_set_aux_fmri failed: ",
1626 rip->ri_i.i_fmri);
1627 } else {
1628 log_framework(LOG_DEBUG, "%s: "
1629 "restarter_inst_validate_ractions_aux_fmri "
1630 "failed: ", rip->ri_i.i_fmri);
1631
1632 if (restarter_inst_reset_aux_fmri(scf_inst))
1633 log_framework(LOG_DEBUG, "%s: "
1634 "restarter_inst_reset_aux_fmri failed: ",
1635 rip->ri_i.i_fmri);
1636 }
1637 scf_instance_destroy(scf_inst);
1638 }
1639
1640 if (immediate || !instance_started(rip)) {
1641 if (rip->ri_i.i_primary_ctid != 0) {
1642 rip->ri_m_inst = safe_scf_instance_create(h);
1643 rip->ri_mi_deleted = B_FALSE;
1644
1645 libscf_reget_instance(rip);
1646 method_remove_contract(rip, B_TRUE, B_TRUE);
1647
1648 scf_instance_destroy(rip->ri_m_inst);
1649 }
1650
1651 (void) restarter_instance_update_states(h, rip,
1652 RESTARTER_STATE_MAINT, RESTARTER_STATE_NONE, RERR_RESTART,
1653 reason);
1654 return;
1655 }
1656
1657 (void) restarter_instance_update_states(h, rip, rip->ri_i.i_state,
1658 RESTARTER_STATE_MAINT, RERR_NONE, reason);
1659
1660 log_transition(rip, MAINT_REQUESTED);
1661
1662 info = startd_zalloc(sizeof (*info));
1663 info->sf_id = rip->ri_id;
1664 info->sf_method_type = METHOD_STOP;
1665 info->sf_event_type = RERR_RESTART;
1666 info->sf_reason = reason;
1667 rip->ri_method_thread = startd_thread_create(method_thread, info);
1668 }
1669
1670 static void
refresh_instance(scf_handle_t * h,restarter_inst_t * rip)1671 refresh_instance(scf_handle_t *h, restarter_inst_t *rip)
1672 {
1673 scf_instance_t *inst;
1674 scf_snapshot_t *snap;
1675 fork_info_t *info;
1676 int r;
1677
1678 assert(MUTEX_HELD(&rip->ri_lock));
1679
1680 log_instance(rip, B_TRUE, "Rereading configuration.");
1681 log_framework(LOG_DEBUG, "%s: rereading configuration.\n",
1682 rip->ri_i.i_fmri);
1683
1684 rep_retry:
1685 r = libscf_fmri_get_instance(h, rip->ri_i.i_fmri, &inst);
1686 switch (r) {
1687 case 0:
1688 break;
1689
1690 case ECONNABORTED:
1691 libscf_handle_rebind(h);
1692 goto rep_retry;
1693
1694 case ENOENT:
1695 /* Must have been deleted. */
1696 return;
1697
1698 case EINVAL:
1699 case ENOTSUP:
1700 default:
1701 bad_error("libscf_fmri_get_instance", r);
1702 }
1703
1704 snap = libscf_get_running_snapshot(inst);
1705
1706 r = libscf_get_startd_properties(inst, snap, &rip->ri_flags,
1707 &rip->ri_utmpx_prefix);
1708 switch (r) {
1709 case 0:
1710 log_framework(LOG_DEBUG, "%s is a %s-style service\n",
1711 rip->ri_i.i_fmri, service_style(rip->ri_flags));
1712 break;
1713
1714 case ECONNABORTED:
1715 scf_instance_destroy(inst);
1716 scf_snapshot_destroy(snap);
1717 libscf_handle_rebind(h);
1718 goto rep_retry;
1719
1720 case ECANCELED:
1721 case ENOENT:
1722 /* Succeed in anticipation of REMOVE_INSTANCE. */
1723 break;
1724
1725 default:
1726 bad_error("libscf_get_startd_properties", r);
1727 }
1728
1729 if (instance_started(rip)) {
1730 /* Refresh does not change the state. */
1731 (void) restarter_instance_update_states(h, rip,
1732 rip->ri_i.i_state, rip->ri_i.i_state, RERR_NONE,
1733 restarter_str_refresh);
1734
1735 info = startd_zalloc(sizeof (*info));
1736 info->sf_id = rip->ri_id;
1737 info->sf_method_type = METHOD_REFRESH;
1738 info->sf_event_type = RERR_REFRESH;
1739 info->sf_reason = NULL;
1740
1741 assert(rip->ri_method_thread == 0);
1742 rip->ri_method_thread =
1743 startd_thread_create(method_thread, info);
1744 }
1745
1746 scf_snapshot_destroy(snap);
1747 scf_instance_destroy(inst);
1748 }
1749
1750 const char *event_names[] = { "INVALID", "ADD_INSTANCE", "REMOVE_INSTANCE",
1751 "ENABLE", "DISABLE", "ADMIN_DEGRADED", "ADMIN_REFRESH",
1752 "ADMIN_RESTART", "ADMIN_MAINT_OFF", "ADMIN_MAINT_ON",
1753 "ADMIN_MAINT_ON_IMMEDIATE", "STOP", "START", "DEPENDENCY_CYCLE",
1754 "INVALID_DEPENDENCY", "ADMIN_DISABLE", "STOP_RESET"
1755 };
1756
1757 /*
1758 * void *restarter_process_events()
1759 *
1760 * Called in a separate thread to process the events on an instance's
1761 * queue. Empties the queue completely, and tries to keep the thread
1762 * around for a little while after the queue is empty to save on
1763 * startup costs.
1764 */
1765 static void *
restarter_process_events(void * arg)1766 restarter_process_events(void *arg)
1767 {
1768 scf_handle_t *h;
1769 restarter_instance_qentry_t *event;
1770 restarter_inst_t *rip;
1771 char *fmri = (char *)arg;
1772 struct timespec to;
1773
1774 assert(fmri != NULL);
1775
1776 h = libscf_handle_create_bound_loop();
1777
1778 /* grab the queue lock */
1779 rip = inst_lookup_queue(fmri);
1780 if (rip == NULL)
1781 goto out;
1782
1783 again:
1784
1785 while ((event = uu_list_first(rip->ri_queue)) != NULL) {
1786 restarter_inst_t *inst;
1787
1788 /* drop the queue lock */
1789 MUTEX_UNLOCK(&rip->ri_queue_lock);
1790
1791 /*
1792 * Grab the inst lock -- this waits until any outstanding
1793 * method finishes running.
1794 */
1795 inst = inst_lookup_by_name(fmri);
1796 if (inst == NULL) {
1797 /* Getting deleted in the middle isn't an error. */
1798 goto cont;
1799 }
1800
1801 assert(instance_in_transition(inst) == 0);
1802
1803 /* process the event */
1804 switch (event->riq_type) {
1805 case RESTARTER_EVENT_TYPE_ENABLE:
1806 case RESTARTER_EVENT_TYPE_DISABLE:
1807 (void) enable_inst(h, inst, event);
1808 break;
1809
1810 case RESTARTER_EVENT_TYPE_ADMIN_DISABLE:
1811 if (enable_inst(h, inst, event) == 0)
1812 reset_start_times(inst);
1813 break;
1814
1815 case RESTARTER_EVENT_TYPE_REMOVE_INSTANCE:
1816 restarter_delete_inst(inst);
1817 inst = NULL;
1818 goto cont;
1819
1820 case RESTARTER_EVENT_TYPE_STOP_RESET:
1821 reset_start_times(inst);
1822 /* FALLTHROUGH */
1823 case RESTARTER_EVENT_TYPE_STOP:
1824 (void) stop_instance(h, inst, RSTOP_DEPENDENCY);
1825 break;
1826
1827 case RESTARTER_EVENT_TYPE_START:
1828 start_instance(h, inst, event->riq_reason);
1829 break;
1830
1831 case RESTARTER_EVENT_TYPE_DEPENDENCY_CYCLE:
1832 maintain_instance(h, inst, 0,
1833 restarter_str_dependency_cycle);
1834 break;
1835
1836 case RESTARTER_EVENT_TYPE_INVALID_DEPENDENCY:
1837 maintain_instance(h, inst, 0,
1838 restarter_str_invalid_dependency);
1839 break;
1840
1841 case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON:
1842 if (event_from_tty(h, inst) == 0)
1843 maintain_instance(h, inst, 0,
1844 restarter_str_service_request);
1845 else
1846 maintain_instance(h, inst, 0,
1847 restarter_str_administrative_request);
1848 break;
1849
1850 case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON_IMMEDIATE:
1851 if (event_from_tty(h, inst) == 0)
1852 maintain_instance(h, inst, 1,
1853 restarter_str_service_request);
1854 else
1855 maintain_instance(h, inst, 1,
1856 restarter_str_administrative_request);
1857 break;
1858
1859 case RESTARTER_EVENT_TYPE_ADMIN_MAINT_OFF:
1860 unmaintain_instance(h, inst, RUNMAINT_CLEAR);
1861 reset_start_times(inst);
1862 break;
1863
1864 case RESTARTER_EVENT_TYPE_ADMIN_REFRESH:
1865 refresh_instance(h, inst);
1866 break;
1867
1868 case RESTARTER_EVENT_TYPE_ADMIN_DEGRADED:
1869 log_framework(LOG_WARNING, "Restarter: "
1870 "%s command (for %s) unimplemented.\n",
1871 event_names[event->riq_type], inst->ri_i.i_fmri);
1872 break;
1873
1874 case RESTARTER_EVENT_TYPE_ADMIN_RESTART:
1875 if (!instance_started(inst)) {
1876 log_framework(LOG_DEBUG, "Restarter: "
1877 "Not restarting %s; not running.\n",
1878 inst->ri_i.i_fmri);
1879 } else {
1880 /*
1881 * Stop the instance. If it can be restarted,
1882 * the graph engine will send a new event.
1883 */
1884 if (restart_dump(h, inst)) {
1885 (void) contract_kill(
1886 inst->ri_i.i_primary_ctid, SIGABRT,
1887 inst->ri_i.i_fmri);
1888 } else if (stop_instance(h, inst,
1889 RSTOP_RESTART) == 0) {
1890 reset_start_times(inst);
1891 }
1892 }
1893 break;
1894
1895 case RESTARTER_EVENT_TYPE_ADD_INSTANCE:
1896 default:
1897 #ifndef NDEBUG
1898 uu_warn("%s:%d: Bad restarter event %d. "
1899 "Aborting.\n", __FILE__, __LINE__, event->riq_type);
1900 #endif
1901 abort();
1902 }
1903
1904 assert(inst != NULL);
1905 MUTEX_UNLOCK(&inst->ri_lock);
1906
1907 cont:
1908 /* grab the queue lock */
1909 rip = inst_lookup_queue(fmri);
1910 if (rip == NULL)
1911 goto out;
1912
1913 /* delete the event */
1914 uu_list_remove(rip->ri_queue, event);
1915 startd_free(event, sizeof (restarter_instance_qentry_t));
1916 }
1917
1918 assert(rip != NULL);
1919
1920 /*
1921 * Try to preserve the thread for a little while for future use.
1922 */
1923 to.tv_sec = 3;
1924 to.tv_nsec = 0;
1925 (void) pthread_cond_reltimedwait_np(&rip->ri_queue_cv,
1926 &rip->ri_queue_lock, &to);
1927
1928 if (uu_list_first(rip->ri_queue) != NULL)
1929 goto again;
1930
1931 rip->ri_queue_thread = 0;
1932 MUTEX_UNLOCK(&rip->ri_queue_lock);
1933
1934 out:
1935 (void) scf_handle_unbind(h);
1936 scf_handle_destroy(h);
1937 free(fmri);
1938 return (NULL);
1939 }
1940
1941 static int
is_admin_event(restarter_event_type_t t)1942 is_admin_event(restarter_event_type_t t) {
1943
1944 switch (t) {
1945 case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON:
1946 case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON_IMMEDIATE:
1947 case RESTARTER_EVENT_TYPE_ADMIN_MAINT_OFF:
1948 case RESTARTER_EVENT_TYPE_ADMIN_REFRESH:
1949 case RESTARTER_EVENT_TYPE_ADMIN_DEGRADED:
1950 case RESTARTER_EVENT_TYPE_ADMIN_RESTART:
1951 return (1);
1952 default:
1953 return (0);
1954 }
1955 }
1956
1957 static void
restarter_queue_event(restarter_inst_t * ri,restarter_protocol_event_t * e)1958 restarter_queue_event(restarter_inst_t *ri, restarter_protocol_event_t *e)
1959 {
1960 restarter_instance_qentry_t *qe;
1961 int r;
1962
1963 assert(MUTEX_HELD(&ri->ri_queue_lock));
1964 assert(!MUTEX_HELD(&ri->ri_lock));
1965
1966 qe = startd_zalloc(sizeof (restarter_instance_qentry_t));
1967 qe->riq_type = e->rpe_type;
1968 qe->riq_reason = e->rpe_reason;
1969
1970 uu_list_node_init(qe, &qe->riq_link, restarter_queue_pool);
1971 r = uu_list_insert_before(ri->ri_queue, NULL, qe);
1972 assert(r == 0);
1973 }
1974
1975 /*
1976 * void *restarter_event_thread()
1977 *
1978 * Handle incoming graph events by placing them on a per-instance
1979 * queue. We can't lock the main part of the instance structure, so
1980 * just modify the seprarately locked event queue portion.
1981 */
1982 /*ARGSUSED*/
1983 static void *
restarter_event_thread(void * unused)1984 restarter_event_thread(void *unused)
1985 {
1986 scf_handle_t *h;
1987
1988 /*
1989 * This is a new thread, and thus, gets its own handle
1990 * to the repository.
1991 */
1992 h = libscf_handle_create_bound_loop();
1993
1994 MUTEX_LOCK(&ru->restarter_update_lock);
1995
1996 /*CONSTCOND*/
1997 while (1) {
1998 restarter_protocol_event_t *e;
1999
2000 while (ru->restarter_update_wakeup == 0)
2001 (void) pthread_cond_wait(&ru->restarter_update_cv,
2002 &ru->restarter_update_lock);
2003
2004 ru->restarter_update_wakeup = 0;
2005
2006 while ((e = restarter_event_dequeue()) != NULL) {
2007 restarter_inst_t *rip;
2008 char *fmri;
2009
2010 MUTEX_UNLOCK(&ru->restarter_update_lock);
2011
2012 /*
2013 * ADD_INSTANCE is special: there's likely no
2014 * instance structure yet, so we need to handle the
2015 * addition synchronously.
2016 */
2017 switch (e->rpe_type) {
2018 case RESTARTER_EVENT_TYPE_ADD_INSTANCE:
2019 if (restarter_insert_inst(h, e->rpe_inst) != 0)
2020 log_error(LOG_INFO, "Restarter: "
2021 "Could not add %s.\n", e->rpe_inst);
2022
2023 MUTEX_LOCK(&st->st_load_lock);
2024 if (--st->st_load_instances == 0)
2025 (void) pthread_cond_broadcast(
2026 &st->st_load_cv);
2027 MUTEX_UNLOCK(&st->st_load_lock);
2028
2029 goto nolookup;
2030 }
2031
2032 /*
2033 * Lookup the instance, locking only the event queue.
2034 * Can't grab ri_lock here because it might be held
2035 * by a long-running method.
2036 */
2037 rip = inst_lookup_queue(e->rpe_inst);
2038 if (rip == NULL) {
2039 log_error(LOG_INFO, "Restarter: "
2040 "Ignoring %s command for unknown service "
2041 "%s.\n", event_names[e->rpe_type],
2042 e->rpe_inst);
2043 goto nolookup;
2044 }
2045
2046 /* Keep ADMIN events from filling up the queue. */
2047 if (is_admin_event(e->rpe_type) &&
2048 uu_list_numnodes(rip->ri_queue) >
2049 RINST_QUEUE_THRESHOLD) {
2050 MUTEX_UNLOCK(&rip->ri_queue_lock);
2051 log_instance(rip, B_TRUE, "Instance event "
2052 "queue overflow. Dropping administrative "
2053 "request.");
2054 log_framework(LOG_DEBUG, "%s: Instance event "
2055 "queue overflow. Dropping administrative "
2056 "request.\n", rip->ri_i.i_fmri);
2057 goto nolookup;
2058 }
2059
2060 /* Now add the event to the instance queue. */
2061 restarter_queue_event(rip, e);
2062
2063 if (rip->ri_queue_thread == 0) {
2064 /*
2065 * Start a thread if one isn't already
2066 * running.
2067 */
2068 fmri = safe_strdup(e->rpe_inst);
2069 rip->ri_queue_thread = startd_thread_create(
2070 restarter_process_events, (void *)fmri);
2071 } else {
2072 /*
2073 * Signal the existing thread that there's
2074 * a new event.
2075 */
2076 (void) pthread_cond_broadcast(
2077 &rip->ri_queue_cv);
2078 }
2079
2080 MUTEX_UNLOCK(&rip->ri_queue_lock);
2081 nolookup:
2082 restarter_event_release(e);
2083
2084 MUTEX_LOCK(&ru->restarter_update_lock);
2085 }
2086 }
2087
2088 /*
2089 * Unreachable for now -- there's currently no graceful cleanup
2090 * called on exit().
2091 */
2092 (void) scf_handle_unbind(h);
2093 scf_handle_destroy(h);
2094 return (NULL);
2095 }
2096
2097 static restarter_inst_t *
contract_to_inst(ctid_t ctid)2098 contract_to_inst(ctid_t ctid)
2099 {
2100 restarter_inst_t *inst;
2101 int id;
2102
2103 id = lookup_inst_by_contract(ctid);
2104 if (id == -1)
2105 return (NULL);
2106
2107 inst = inst_lookup_by_id(id);
2108 if (inst != NULL) {
2109 /*
2110 * Since ri_lock isn't held by the contract id lookup, this
2111 * instance may have been restarted and now be in a new
2112 * contract, making the old contract no longer valid for this
2113 * instance.
2114 */
2115 if (ctid != inst->ri_i.i_primary_ctid) {
2116 MUTEX_UNLOCK(&inst->ri_lock);
2117 inst = NULL;
2118 }
2119 }
2120 return (inst);
2121 }
2122
2123 /*
2124 * void contract_action()
2125 * Take action on contract events.
2126 */
2127 static void
contract_action(scf_handle_t * h,restarter_inst_t * inst,ctid_t id,uint32_t type)2128 contract_action(scf_handle_t *h, restarter_inst_t *inst, ctid_t id,
2129 uint32_t type)
2130 {
2131 const char *fmri = inst->ri_i.i_fmri;
2132
2133 assert(MUTEX_HELD(&inst->ri_lock));
2134
2135 /*
2136 * If startd has stopped this contract, there is no need to
2137 * stop it again.
2138 */
2139 if (inst->ri_i.i_primary_ctid > 0 &&
2140 inst->ri_i.i_primary_ctid_stopped)
2141 return;
2142
2143 if ((type & (CT_PR_EV_EMPTY | CT_PR_EV_CORE | CT_PR_EV_SIGNAL
2144 | CT_PR_EV_HWERR)) == 0) {
2145 /*
2146 * There shouldn't be other events, since that's not how we set
2147 * the terms. Thus, just log an error and drive on.
2148 */
2149 log_framework(LOG_NOTICE,
2150 "%s: contract %ld received unexpected critical event "
2151 "(%d)\n", fmri, id, type);
2152 return;
2153 }
2154
2155 assert(instance_in_transition(inst) == 0);
2156
2157 if (instance_is_wait_style(inst)) {
2158 /*
2159 * We ignore all events; if they impact the
2160 * process we're monitoring, then the
2161 * wait_thread will stop the instance.
2162 */
2163 log_framework(LOG_DEBUG,
2164 "%s: ignoring contract event on wait-style service\n",
2165 fmri);
2166 } else {
2167 /*
2168 * A CT_PR_EV_EMPTY event is an RSTOP_EXIT request.
2169 */
2170 switch (type) {
2171 case CT_PR_EV_EMPTY:
2172 (void) stop_instance(h, inst, RSTOP_EXIT);
2173 break;
2174 case CT_PR_EV_CORE:
2175 (void) stop_instance(h, inst, RSTOP_CORE);
2176 break;
2177 case CT_PR_EV_SIGNAL:
2178 (void) stop_instance(h, inst, RSTOP_SIGNAL);
2179 break;
2180 case CT_PR_EV_HWERR:
2181 (void) stop_instance(h, inst, RSTOP_HWERR);
2182 break;
2183 }
2184 }
2185 }
2186
2187 /*
2188 * void *restarter_contract_event_thread(void *)
2189 * Listens to the process contract bundle for critical events, taking action
2190 * on events from contracts we know we are responsible for.
2191 */
2192 /*ARGSUSED*/
2193 static void *
restarter_contracts_event_thread(void * unused)2194 restarter_contracts_event_thread(void *unused)
2195 {
2196 int fd, err;
2197 scf_handle_t *local_handle;
2198
2199 /*
2200 * Await graph load completion. That is, stop here, until we've scanned
2201 * the repository for contract - instance associations.
2202 */
2203 MUTEX_LOCK(&st->st_load_lock);
2204 while (!(st->st_load_complete && st->st_load_instances == 0))
2205 (void) pthread_cond_wait(&st->st_load_cv, &st->st_load_lock);
2206 MUTEX_UNLOCK(&st->st_load_lock);
2207
2208 /*
2209 * This is a new thread, and thus, gets its own handle
2210 * to the repository.
2211 */
2212 if ((local_handle = libscf_handle_create_bound(SCF_VERSION)) == NULL)
2213 uu_die("Unable to bind a new repository handle: %s\n",
2214 scf_strerror(scf_error()));
2215
2216 fd = open64(CTFS_ROOT "/process/pbundle", O_RDONLY);
2217 if (fd == -1)
2218 uu_die("process bundle open failed");
2219
2220 /*
2221 * Make sure we get all events (including those generated by configd
2222 * before this thread was started).
2223 */
2224 err = ct_event_reset(fd);
2225 assert(err == 0);
2226
2227 for (;;) {
2228 int efd, sfd;
2229 ct_evthdl_t ev;
2230 uint32_t type;
2231 ctevid_t evid;
2232 ct_stathdl_t status;
2233 ctid_t ctid;
2234 restarter_inst_t *inst;
2235 uint64_t cookie;
2236
2237 if (err = ct_event_read_critical(fd, &ev)) {
2238 log_error(LOG_WARNING,
2239 "Error reading next contract event: %s",
2240 strerror(err));
2241 continue;
2242 }
2243
2244 evid = ct_event_get_evid(ev);
2245 ctid = ct_event_get_ctid(ev);
2246 type = ct_event_get_type(ev);
2247
2248 /* Fetch cookie. */
2249 if ((sfd = contract_open(ctid, "process", "status", O_RDONLY))
2250 < 0) {
2251 ct_event_free(ev);
2252 continue;
2253 }
2254
2255 if (err = ct_status_read(sfd, CTD_COMMON, &status)) {
2256 log_framework(LOG_WARNING, "Could not get status for "
2257 "contract %ld: %s\n", ctid, strerror(err));
2258
2259 startd_close(sfd);
2260 ct_event_free(ev);
2261 continue;
2262 }
2263
2264 cookie = ct_status_get_cookie(status);
2265
2266 log_framework(LOG_DEBUG, "Received event %d for ctid %ld "
2267 "cookie %lld\n", type, ctid, cookie);
2268
2269 ct_status_free(status);
2270
2271 startd_close(sfd);
2272
2273 /*
2274 * svc.configd(1M) restart handling performed by the
2275 * fork_configd_thread. We don't acknowledge, as that thread
2276 * will do so.
2277 */
2278 if (cookie == CONFIGD_COOKIE) {
2279 ct_event_free(ev);
2280 continue;
2281 }
2282
2283 inst = NULL;
2284 if (storing_contract != 0 &&
2285 (inst = contract_to_inst(ctid)) == NULL) {
2286 /*
2287 * This can happen for two reasons:
2288 * - method_run() has not yet stored the
2289 * the contract into the internal hash table.
2290 * - we receive an EMPTY event for an abandoned
2291 * contract.
2292 * If there is any contract in the process of
2293 * being stored into the hash table then re-read
2294 * the event later.
2295 */
2296 log_framework(LOG_DEBUG,
2297 "Reset event %d for unknown "
2298 "contract id %ld\n", type, ctid);
2299
2300 /* don't go too fast */
2301 (void) poll(NULL, 0, 100);
2302
2303 (void) ct_event_reset(fd);
2304 ct_event_free(ev);
2305 continue;
2306 }
2307
2308 /*
2309 * Do not call contract_to_inst() again if first
2310 * call succeeded.
2311 */
2312 if (inst == NULL)
2313 inst = contract_to_inst(ctid);
2314 if (inst == NULL) {
2315 /*
2316 * This can happen if we receive an EMPTY
2317 * event for an abandoned contract.
2318 */
2319 log_framework(LOG_DEBUG,
2320 "Received event %d for unknown contract id "
2321 "%ld\n", type, ctid);
2322 } else {
2323 log_framework(LOG_DEBUG,
2324 "Received event %d for contract id "
2325 "%ld (%s)\n", type, ctid,
2326 inst->ri_i.i_fmri);
2327
2328 contract_action(local_handle, inst, ctid, type);
2329
2330 MUTEX_UNLOCK(&inst->ri_lock);
2331 }
2332
2333 efd = contract_open(ct_event_get_ctid(ev), "process", "ctl",
2334 O_WRONLY);
2335 if (efd != -1) {
2336 (void) ct_ctl_ack(efd, evid);
2337 startd_close(efd);
2338 }
2339
2340 ct_event_free(ev);
2341
2342 }
2343
2344 /*NOTREACHED*/
2345 return (NULL);
2346 }
2347
2348 /*
2349 * Timeout queue, processed by restarter_timeouts_event_thread().
2350 */
2351 timeout_queue_t *timeouts;
2352 static uu_list_pool_t *timeout_pool;
2353
2354 typedef struct timeout_update {
2355 pthread_mutex_t tu_lock;
2356 pthread_cond_t tu_cv;
2357 int tu_wakeup;
2358 } timeout_update_t;
2359
2360 timeout_update_t *tu;
2361
2362 static const char *timeout_ovr_svcs[] = {
2363 "svc:/system/manifest-import:default",
2364 "svc:/network/initial:default",
2365 "svc:/network/service:default",
2366 "svc:/system/rmtmpfiles:default",
2367 "svc:/network/loopback:default",
2368 "svc:/network/physical:default",
2369 "svc:/system/device/local:default",
2370 "svc:/system/metainit:default",
2371 "svc:/system/filesystem/usr:default",
2372 "svc:/system/filesystem/minimal:default",
2373 "svc:/system/filesystem/local:default",
2374 NULL
2375 };
2376
2377 int
is_timeout_ovr(restarter_inst_t * inst)2378 is_timeout_ovr(restarter_inst_t *inst)
2379 {
2380 int i;
2381
2382 for (i = 0; timeout_ovr_svcs[i] != NULL; ++i) {
2383 if (strcmp(inst->ri_i.i_fmri, timeout_ovr_svcs[i]) == 0) {
2384 log_instance(inst, B_TRUE, "Timeout override by "
2385 "svc.startd. Using infinite timeout.");
2386 return (1);
2387 }
2388 }
2389
2390 return (0);
2391 }
2392
2393 /*ARGSUSED*/
2394 static int
timeout_compare(const void * lc_arg,const void * rc_arg,void * private)2395 timeout_compare(const void *lc_arg, const void *rc_arg, void *private)
2396 {
2397 hrtime_t t1 = ((const timeout_entry_t *)lc_arg)->te_timeout;
2398 hrtime_t t2 = ((const timeout_entry_t *)rc_arg)->te_timeout;
2399
2400 if (t1 > t2)
2401 return (1);
2402 else if (t1 < t2)
2403 return (-1);
2404 return (0);
2405 }
2406
2407 void
timeout_init()2408 timeout_init()
2409 {
2410 timeouts = startd_zalloc(sizeof (timeout_queue_t));
2411
2412 (void) pthread_mutex_init(&timeouts->tq_lock, &mutex_attrs);
2413
2414 timeout_pool = startd_list_pool_create("timeouts",
2415 sizeof (timeout_entry_t), offsetof(timeout_entry_t, te_link),
2416 timeout_compare, UU_LIST_POOL_DEBUG);
2417 assert(timeout_pool != NULL);
2418
2419 timeouts->tq_list = startd_list_create(timeout_pool,
2420 timeouts, UU_LIST_SORTED);
2421 assert(timeouts->tq_list != NULL);
2422
2423 tu = startd_zalloc(sizeof (timeout_update_t));
2424 (void) pthread_cond_init(&tu->tu_cv, NULL);
2425 (void) pthread_mutex_init(&tu->tu_lock, &mutex_attrs);
2426 }
2427
2428 void
timeout_insert(restarter_inst_t * inst,ctid_t cid,uint64_t timeout_sec)2429 timeout_insert(restarter_inst_t *inst, ctid_t cid, uint64_t timeout_sec)
2430 {
2431 hrtime_t now, timeout;
2432 timeout_entry_t *entry;
2433 uu_list_index_t idx;
2434
2435 assert(MUTEX_HELD(&inst->ri_lock));
2436
2437 now = gethrtime();
2438
2439 /*
2440 * If we overflow LLONG_MAX, we're never timing out anyways, so
2441 * just return.
2442 */
2443 if (timeout_sec >= (LLONG_MAX - now) / 1000000000LL) {
2444 log_instance(inst, B_TRUE, "timeout_seconds too large, "
2445 "treating as infinite.");
2446 return;
2447 }
2448
2449 /* hrtime is in nanoseconds. Convert timeout_sec. */
2450 timeout = now + (timeout_sec * 1000000000LL);
2451
2452 entry = startd_alloc(sizeof (timeout_entry_t));
2453 entry->te_timeout = timeout;
2454 entry->te_ctid = cid;
2455 entry->te_fmri = safe_strdup(inst->ri_i.i_fmri);
2456 entry->te_logstem = safe_strdup(inst->ri_logstem);
2457 entry->te_fired = 0;
2458 /* Insert the calculated timeout time onto the queue. */
2459 MUTEX_LOCK(&timeouts->tq_lock);
2460 (void) uu_list_find(timeouts->tq_list, entry, NULL, &idx);
2461 uu_list_node_init(entry, &entry->te_link, timeout_pool);
2462 uu_list_insert(timeouts->tq_list, entry, idx);
2463 MUTEX_UNLOCK(&timeouts->tq_lock);
2464
2465 assert(inst->ri_timeout == NULL);
2466 inst->ri_timeout = entry;
2467
2468 MUTEX_LOCK(&tu->tu_lock);
2469 tu->tu_wakeup = 1;
2470 (void) pthread_cond_broadcast(&tu->tu_cv);
2471 MUTEX_UNLOCK(&tu->tu_lock);
2472 }
2473
2474
2475 void
timeout_remove(restarter_inst_t * inst,ctid_t cid)2476 timeout_remove(restarter_inst_t *inst, ctid_t cid)
2477 {
2478 assert(MUTEX_HELD(&inst->ri_lock));
2479
2480 if (inst->ri_timeout == NULL)
2481 return;
2482
2483 assert(inst->ri_timeout->te_ctid == cid);
2484
2485 MUTEX_LOCK(&timeouts->tq_lock);
2486 uu_list_remove(timeouts->tq_list, inst->ri_timeout);
2487 MUTEX_UNLOCK(&timeouts->tq_lock);
2488
2489 free(inst->ri_timeout->te_fmri);
2490 free(inst->ri_timeout->te_logstem);
2491 startd_free(inst->ri_timeout, sizeof (timeout_entry_t));
2492 inst->ri_timeout = NULL;
2493 }
2494
2495 static int
timeout_now()2496 timeout_now()
2497 {
2498 timeout_entry_t *e;
2499 hrtime_t now;
2500 int ret;
2501
2502 now = gethrtime();
2503
2504 /*
2505 * Walk through the (sorted) timeouts list. While the timeout
2506 * at the head of the list is <= the current time, kill the
2507 * method.
2508 */
2509 MUTEX_LOCK(&timeouts->tq_lock);
2510
2511 for (e = uu_list_first(timeouts->tq_list);
2512 e != NULL && e->te_timeout <= now;
2513 e = uu_list_next(timeouts->tq_list, e)) {
2514 log_framework(LOG_WARNING, "%s: Method or service exit timed "
2515 "out. Killing contract %ld.\n", e->te_fmri, e->te_ctid);
2516 log_instance_fmri(e->te_fmri, e->te_logstem, B_TRUE,
2517 "Method or service exit timed out. Killing contract %ld.",
2518 e->te_ctid);
2519 e->te_fired = 1;
2520 (void) contract_kill(e->te_ctid, SIGKILL, e->te_fmri);
2521 }
2522
2523 if (uu_list_numnodes(timeouts->tq_list) > 0)
2524 ret = 0;
2525 else
2526 ret = -1;
2527
2528 MUTEX_UNLOCK(&timeouts->tq_lock);
2529
2530 return (ret);
2531 }
2532
2533 /*
2534 * void *restarter_timeouts_event_thread(void *)
2535 * Responsible for monitoring the method timeouts. This thread must
2536 * be started before any methods are called.
2537 */
2538 /*ARGSUSED*/
2539 static void *
restarter_timeouts_event_thread(void * unused)2540 restarter_timeouts_event_thread(void *unused)
2541 {
2542 /*
2543 * Timeouts are entered on a priority queue, which is processed by
2544 * this thread. As timeouts are specified in seconds, we'll do
2545 * the necessary processing every second, as long as the queue
2546 * is not empty.
2547 */
2548
2549 /*CONSTCOND*/
2550 while (1) {
2551 /*
2552 * As long as the timeout list isn't empty, process it
2553 * every second.
2554 */
2555 if (timeout_now() == 0) {
2556 (void) sleep(1);
2557 continue;
2558 }
2559
2560 /* The list is empty, wait until we have more timeouts. */
2561 MUTEX_LOCK(&tu->tu_lock);
2562
2563 while (tu->tu_wakeup == 0)
2564 (void) pthread_cond_wait(&tu->tu_cv, &tu->tu_lock);
2565
2566 tu->tu_wakeup = 0;
2567 MUTEX_UNLOCK(&tu->tu_lock);
2568 }
2569
2570 return (NULL);
2571 }
2572
2573 void
restarter_start()2574 restarter_start()
2575 {
2576 (void) startd_thread_create(restarter_timeouts_event_thread, NULL);
2577 (void) startd_thread_create(restarter_event_thread, NULL);
2578 (void) startd_thread_create(restarter_contracts_event_thread, NULL);
2579 (void) startd_thread_create(wait_thread, NULL);
2580 }
2581
2582
2583 void
restarter_init()2584 restarter_init()
2585 {
2586 restarter_instance_pool = startd_list_pool_create("restarter_instances",
2587 sizeof (restarter_inst_t), offsetof(restarter_inst_t,
2588 ri_link), restarter_instance_compare, UU_LIST_POOL_DEBUG);
2589 (void) memset(&instance_list, 0, sizeof (instance_list));
2590
2591 (void) pthread_mutex_init(&instance_list.ril_lock, &mutex_attrs);
2592 instance_list.ril_instance_list = startd_list_create(
2593 restarter_instance_pool, &instance_list, UU_LIST_SORTED);
2594
2595 restarter_queue_pool = startd_list_pool_create(
2596 "restarter_instance_queue", sizeof (restarter_instance_qentry_t),
2597 offsetof(restarter_instance_qentry_t, riq_link), NULL,
2598 UU_LIST_POOL_DEBUG);
2599
2600 contract_list_pool = startd_list_pool_create(
2601 "contract_list", sizeof (contract_entry_t),
2602 offsetof(contract_entry_t, ce_link), NULL,
2603 UU_LIST_POOL_DEBUG);
2604 contract_hash_init();
2605
2606 log_framework(LOG_DEBUG, "Initialized restarter\n");
2607 }
2608