1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright 2017 RackTop Systems.
25 * Copyright 2019 Joyent, Inc.
26 */
27
28 /*
29 * restarter.c - service manipulation
30 *
31 * This component manages services whose restarter is svc.startd, the standard
32 * restarter. It translates restarter protocol events from the graph engine
33 * into actions on processes, as a delegated restarter would do.
34 *
35 * The master restarter manages a number of always-running threads:
36 * - restarter event thread: events from the graph engine
37 * - timeout thread: thread to fire queued timeouts
38 * - contract thread: thread to handle contract events
39 * - wait thread: thread to handle wait-based services
40 *
41 * The other threads are created as-needed:
42 * - per-instance method threads
43 * - per-instance event processing threads
44 *
45 * The interaction of all threads must result in the following conditions
46 * being satisfied (on a per-instance basis):
47 * - restarter events must be processed in order
48 * - method execution must be serialized
49 * - instance delete must be held until outstanding methods are complete
50 * - contract events shouldn't be processed while a method is running
51 * - timeouts should fire even when a method is running
52 *
53 * Service instances are represented by restarter_inst_t's and are kept in the
54 * instance_list list.
55 *
56 * Service States
57 * The current state of a service instance is kept in
58 * restarter_inst_t->ri_i.i_state. If transition to a new state could take
59 * some time, then before we effect the transition we set
60 * restarter_inst_t->ri_i.i_next_state to the target state, and afterwards we
61 * rotate i_next_state to i_state and set i_next_state to
62 * RESTARTER_STATE_NONE. So usually i_next_state is _NONE when ri_lock is not
63 * held. The exception is when we launch methods, which are done with
64 * a separate thread. To keep any other threads from grabbing ri_lock before
65 * method_thread() does, we set ri_method_thread to the thread id of the
66 * method thread, and when it is nonzero any thread with a different thread id
67 * waits on ri_method_cv.
68 *
69 * Method execution is serialized by blocking on ri_method_cv in
70 * inst_lookup_by_id() and waiting for a 0 value of ri_method_thread. This
71 * also prevents the instance structure from being deleted until all
72 * outstanding operations such as method_thread() have finished.
73 *
74 * Lock ordering:
75 *
76 * dgraph_lock [can be held when taking:]
77 * utmpx_lock
78 * dictionary->dict_lock
79 * st->st_load_lock
80 * wait_info_lock
81 * ru->restarter_update_lock
82 * restarter_queue->rpeq_lock
83 * instance_list.ril_lock
84 * inst->ri_lock
85 * st->st_configd_live_lock
86 *
87 * instance_list.ril_lock
88 * graph_queue->gpeq_lock
89 * gu->gu_lock
90 * st->st_configd_live_lock
91 * dictionary->dict_lock
92 * inst->ri_lock
93 * graph_queue->gpeq_lock
94 * gu->gu_lock
95 * tu->tu_lock
96 * tq->tq_lock
97 * inst->ri_queue_lock
98 * wait_info_lock
99 * bp->cb_lock
100 * utmpx_lock
101 *
102 * single_user_thread_lock
103 * wait_info_lock
104 * utmpx_lock
105 *
106 * gu_freeze_lock
107 *
108 * logbuf_mutex nests inside pretty much everything.
109 */
110
111 #include <sys/contract/process.h>
112 #include <sys/ctfs.h>
113 #include <sys/stat.h>
114 #include <sys/time.h>
115 #include <sys/types.h>
116 #include <sys/uio.h>
117 #include <sys/wait.h>
118 #include <assert.h>
119 #include <errno.h>
120 #include <fcntl.h>
121 #include <libcontract.h>
122 #include <libcontract_priv.h>
123 #include <libintl.h>
124 #include <librestart.h>
125 #include <librestart_priv.h>
126 #include <libuutil.h>
127 #include <limits.h>
128 #include <poll.h>
129 #include <port.h>
130 #include <pthread.h>
131 #include <stdarg.h>
132 #include <stdio.h>
133 #include <strings.h>
134 #include <unistd.h>
135
136 #include "startd.h"
137 #include "protocol.h"
138
139 uu_list_pool_t *contract_list_pool;
140 static uu_list_pool_t *restarter_instance_pool;
141 static restarter_instance_list_t instance_list;
142
143 static uu_list_pool_t *restarter_queue_pool;
144
145 #define WT_SVC_ERR_THROTTLE 1 /* 1 sec delay for erroring wait svc */
146
147 /*
148 * Function used to reset the restart times for an instance, when
149 * an administrative task comes along and essentially makes the times
150 * in this array ineffective.
151 */
152 static void
reset_start_times(restarter_inst_t * inst)153 reset_start_times(restarter_inst_t *inst)
154 {
155 inst->ri_start_index = 0;
156 bzero(inst->ri_start_time, sizeof (inst->ri_start_time));
157 }
158
159 /*ARGSUSED*/
160 static int
restarter_instance_compare(const void * lc_arg,const void * rc_arg,void * private)161 restarter_instance_compare(const void *lc_arg, const void *rc_arg,
162 void *private)
163 {
164 int lc_id = ((const restarter_inst_t *)lc_arg)->ri_id;
165 int rc_id = *(int *)rc_arg;
166
167 if (lc_id > rc_id)
168 return (1);
169 if (lc_id < rc_id)
170 return (-1);
171 return (0);
172 }
173
174 static restarter_inst_t *
inst_lookup_by_name(const char * name)175 inst_lookup_by_name(const char *name)
176 {
177 int id;
178
179 id = dict_lookup_byname(name);
180 if (id == -1)
181 return (NULL);
182
183 return (inst_lookup_by_id(id));
184 }
185
186 restarter_inst_t *
inst_lookup_by_id(int id)187 inst_lookup_by_id(int id)
188 {
189 restarter_inst_t *inst;
190
191 MUTEX_LOCK(&instance_list.ril_lock);
192 inst = uu_list_find(instance_list.ril_instance_list, &id, NULL, NULL);
193 if (inst != NULL)
194 MUTEX_LOCK(&inst->ri_lock);
195 MUTEX_UNLOCK(&instance_list.ril_lock);
196
197 if (inst != NULL) {
198 while (inst->ri_method_thread != 0 &&
199 !pthread_equal(inst->ri_method_thread, pthread_self())) {
200 ++inst->ri_method_waiters;
201 (void) pthread_cond_wait(&inst->ri_method_cv,
202 &inst->ri_lock);
203 assert(inst->ri_method_waiters > 0);
204 --inst->ri_method_waiters;
205 }
206 }
207
208 return (inst);
209 }
210
211 static restarter_inst_t *
inst_lookup_queue(const char * name)212 inst_lookup_queue(const char *name)
213 {
214 int id;
215 restarter_inst_t *inst;
216
217 id = dict_lookup_byname(name);
218 if (id == -1)
219 return (NULL);
220
221 MUTEX_LOCK(&instance_list.ril_lock);
222 inst = uu_list_find(instance_list.ril_instance_list, &id, NULL, NULL);
223 if (inst != NULL)
224 MUTEX_LOCK(&inst->ri_queue_lock);
225 MUTEX_UNLOCK(&instance_list.ril_lock);
226
227 return (inst);
228 }
229
230 const char *
service_style(int flags)231 service_style(int flags)
232 {
233 switch (flags & RINST_STYLE_MASK) {
234 case RINST_CONTRACT: return ("contract");
235 case RINST_TRANSIENT: return ("transient");
236 case RINST_WAIT: return ("wait");
237
238 default:
239 #ifndef NDEBUG
240 uu_warn("%s:%d: Bad flags 0x%x.\n", __FILE__, __LINE__, flags);
241 #endif
242 abort();
243 /* NOTREACHED */
244 }
245 }
246
247 /*
248 * Fails with ECONNABORTED or ECANCELED.
249 */
250 static int
check_contract(restarter_inst_t * inst,boolean_t primary,scf_instance_t * scf_inst)251 check_contract(restarter_inst_t *inst, boolean_t primary,
252 scf_instance_t *scf_inst)
253 {
254 ctid_t *ctidp;
255 int fd, r;
256
257 ctidp = primary ? &inst->ri_i.i_primary_ctid :
258 &inst->ri_i.i_transient_ctid;
259
260 assert(*ctidp >= 1);
261
262 fd = contract_open(*ctidp, NULL, "status", O_RDONLY);
263 if (fd >= 0) {
264 r = close(fd);
265 assert(r == 0);
266 return (0);
267 }
268
269 r = restarter_remove_contract(scf_inst, *ctidp, primary ?
270 RESTARTER_CONTRACT_PRIMARY : RESTARTER_CONTRACT_TRANSIENT);
271 switch (r) {
272 case 0:
273 case ECONNABORTED:
274 case ECANCELED:
275 *ctidp = 0;
276 return (r);
277
278 case ENOMEM:
279 uu_die("Out of memory\n");
280 /* NOTREACHED */
281
282 case EPERM:
283 uu_die("Insufficient privilege.\n");
284 /* NOTREACHED */
285
286 case EACCES:
287 uu_die("Repository backend access denied.\n");
288 /* NOTREACHED */
289
290 case EROFS:
291 log_error(LOG_INFO, "Could not remove unusable contract id %ld "
292 "for %s from repository.\n", *ctidp, inst->ri_i.i_fmri);
293 return (0);
294
295 case EINVAL:
296 case EBADF:
297 default:
298 assert(0);
299 abort();
300 /* NOTREACHED */
301 }
302 }
303
304 static int stop_instance(scf_handle_t *, restarter_inst_t *, stop_cause_t);
305
306 /*
307 * int restarter_insert_inst(scf_handle_t *, char *)
308 * If the inst is already in the restarter list, return its id. If the inst
309 * is not in the restarter list, initialize a restarter_inst_t, initialize its
310 * states, insert it into the list, and return 0.
311 *
312 * Fails with
313 * ENOENT - name is not in the repository
314 */
315 static int
restarter_insert_inst(scf_handle_t * h,const char * name)316 restarter_insert_inst(scf_handle_t *h, const char *name)
317 {
318 int id, r;
319 restarter_inst_t *inst;
320 uu_list_index_t idx;
321 scf_service_t *scf_svc;
322 scf_instance_t *scf_inst;
323 scf_snapshot_t *snap = NULL;
324 scf_propertygroup_t *pg;
325 char *svc_name, *inst_name;
326 char logfilebuf[PATH_MAX];
327 char *c;
328 boolean_t do_commit_states;
329 restarter_instance_state_t state, next_state;
330 protocol_states_t *ps;
331 pid_t start_pid;
332 restarter_str_t reason = restarter_str_insert_in_graph;
333
334 MUTEX_LOCK(&instance_list.ril_lock);
335
336 /*
337 * We don't use inst_lookup_by_name() here because we want the lookup
338 * & insert to be atomic.
339 */
340 id = dict_lookup_byname(name);
341 if (id != -1) {
342 inst = uu_list_find(instance_list.ril_instance_list, &id, NULL,
343 &idx);
344 if (inst != NULL) {
345 MUTEX_UNLOCK(&instance_list.ril_lock);
346 return (0);
347 }
348 }
349
350 /* Allocate an instance */
351 inst = startd_zalloc(sizeof (restarter_inst_t));
352 inst->ri_utmpx_prefix = startd_alloc(max_scf_value_size);
353 inst->ri_utmpx_prefix[0] = '\0';
354
355 inst->ri_i.i_fmri = startd_alloc(strlen(name) + 1);
356 (void) strcpy((char *)inst->ri_i.i_fmri, name);
357
358 inst->ri_queue = startd_list_create(restarter_queue_pool, inst, 0);
359
360 /*
361 * id shouldn't be -1 since we use the same dictionary as graph.c, but
362 * just in case.
363 */
364 inst->ri_id = (id != -1 ? id : dict_insert(name));
365
366 special_online_hooks_get(name, &inst->ri_pre_online_hook,
367 &inst->ri_post_online_hook, &inst->ri_post_offline_hook);
368
369 scf_svc = safe_scf_service_create(h);
370 scf_inst = safe_scf_instance_create(h);
371 pg = safe_scf_pg_create(h);
372 svc_name = startd_alloc(max_scf_name_size);
373 inst_name = startd_alloc(max_scf_name_size);
374
375 rep_retry:
376 if (snap != NULL)
377 scf_snapshot_destroy(snap);
378 if (inst->ri_logstem != NULL)
379 startd_free(inst->ri_logstem, PATH_MAX);
380 if (inst->ri_common_name != NULL)
381 free(inst->ri_common_name);
382 if (inst->ri_C_common_name != NULL)
383 free(inst->ri_C_common_name);
384 snap = NULL;
385 inst->ri_logstem = NULL;
386 inst->ri_common_name = NULL;
387 inst->ri_C_common_name = NULL;
388
389 if (scf_handle_decode_fmri(h, name, NULL, scf_svc, scf_inst, NULL,
390 NULL, SCF_DECODE_FMRI_EXACT) != 0) {
391 switch (scf_error()) {
392 case SCF_ERROR_CONNECTION_BROKEN:
393 libscf_handle_rebind(h);
394 goto rep_retry;
395
396 case SCF_ERROR_NOT_FOUND:
397 goto deleted;
398 }
399
400 uu_die("Can't decode FMRI %s: %s\n", name,
401 scf_strerror(scf_error()));
402 }
403
404 /*
405 * If there's no running snapshot, then we execute using the editing
406 * snapshot. Pending snapshots will be taken later.
407 */
408 snap = libscf_get_running_snapshot(scf_inst);
409
410 if ((scf_service_get_name(scf_svc, svc_name, max_scf_name_size) < 0) ||
411 (scf_instance_get_name(scf_inst, inst_name, max_scf_name_size) <
412 0)) {
413 switch (scf_error()) {
414 case SCF_ERROR_NOT_SET:
415 break;
416
417 case SCF_ERROR_CONNECTION_BROKEN:
418 libscf_handle_rebind(h);
419 goto rep_retry;
420
421 default:
422 assert(0);
423 abort();
424 }
425
426 goto deleted;
427 }
428
429 (void) snprintf(logfilebuf, PATH_MAX, "%s:%s", svc_name, inst_name);
430 for (c = logfilebuf; *c != '\0'; c++)
431 if (*c == '/')
432 *c = '-';
433
434 inst->ri_logstem = startd_alloc(PATH_MAX);
435 (void) snprintf(inst->ri_logstem, PATH_MAX, "%s%s", logfilebuf,
436 LOG_SUFFIX);
437
438 /*
439 * If the restarter group is missing, use uninit/none. Otherwise,
440 * we're probably being restarted & don't want to mess up the states
441 * that are there.
442 */
443 state = RESTARTER_STATE_UNINIT;
444 next_state = RESTARTER_STATE_NONE;
445
446 r = scf_instance_get_pg(scf_inst, SCF_PG_RESTARTER, pg);
447 if (r != 0) {
448 switch (scf_error()) {
449 case SCF_ERROR_CONNECTION_BROKEN:
450 libscf_handle_rebind(h);
451 goto rep_retry;
452
453 case SCF_ERROR_NOT_SET:
454 goto deleted;
455
456 case SCF_ERROR_NOT_FOUND:
457 /*
458 * This shouldn't happen since the graph engine should
459 * have initialized the state to uninitialized/none if
460 * there was no restarter pg. In case somebody
461 * deleted it, though....
462 */
463 do_commit_states = B_TRUE;
464 break;
465
466 default:
467 assert(0);
468 abort();
469 }
470 } else {
471 r = libscf_read_states(pg, &state, &next_state);
472 if (r != 0) {
473 do_commit_states = B_TRUE;
474 } else {
475 if (next_state != RESTARTER_STATE_NONE) {
476 /*
477 * Force next_state to _NONE since we
478 * don't look for method processes.
479 */
480 next_state = RESTARTER_STATE_NONE;
481 do_commit_states = B_TRUE;
482 } else {
483 /*
484 * The reason for transition will depend on
485 * state.
486 */
487 if (st->st_initial == 0)
488 reason = restarter_str_startd_restart;
489 else if (state == RESTARTER_STATE_MAINT)
490 reason = restarter_str_bad_repo_state;
491 /*
492 * Inform the restarter of our state without
493 * changing the STIME in the repository.
494 */
495 ps = startd_alloc(sizeof (*ps));
496 inst->ri_i.i_state = ps->ps_state = state;
497 inst->ri_i.i_next_state = ps->ps_state_next =
498 next_state;
499 ps->ps_reason = reason;
500
501 graph_protocol_send_event(inst->ri_i.i_fmri,
502 GRAPH_UPDATE_STATE_CHANGE, ps);
503
504 do_commit_states = B_FALSE;
505 }
506 }
507 }
508
509 switch (libscf_get_startd_properties(scf_inst, snap, &inst->ri_flags,
510 &inst->ri_utmpx_prefix)) {
511 case 0:
512 break;
513
514 case ECONNABORTED:
515 libscf_handle_rebind(h);
516 goto rep_retry;
517
518 case ECANCELED:
519 goto deleted;
520
521 case ENOENT:
522 /*
523 * This is odd, because the graph engine should have required
524 * the general property group. So we'll just use default
525 * flags in anticipation of the graph engine sending us
526 * REMOVE_INSTANCE when it finds out that the general property
527 * group has been deleted.
528 */
529 inst->ri_flags = RINST_CONTRACT;
530 break;
531
532 default:
533 assert(0);
534 abort();
535 }
536
537 r = libscf_get_template_values(scf_inst, snap,
538 &inst->ri_common_name, &inst->ri_C_common_name);
539
540 /*
541 * Copy our names to smaller buffers to reduce our memory footprint.
542 */
543 if (inst->ri_common_name != NULL) {
544 char *tmp = safe_strdup(inst->ri_common_name);
545 startd_free(inst->ri_common_name, max_scf_value_size);
546 inst->ri_common_name = tmp;
547 }
548
549 if (inst->ri_C_common_name != NULL) {
550 char *tmp = safe_strdup(inst->ri_C_common_name);
551 startd_free(inst->ri_C_common_name, max_scf_value_size);
552 inst->ri_C_common_name = tmp;
553 }
554
555 switch (r) {
556 case 0:
557 break;
558
559 case ECONNABORTED:
560 libscf_handle_rebind(h);
561 goto rep_retry;
562
563 case ECANCELED:
564 goto deleted;
565
566 case ECHILD:
567 case ENOENT:
568 break;
569
570 default:
571 assert(0);
572 abort();
573 }
574
575 switch (libscf_read_method_ids(h, scf_inst, inst->ri_i.i_fmri,
576 &inst->ri_i.i_primary_ctid, &inst->ri_i.i_transient_ctid,
577 &start_pid)) {
578 case 0:
579 break;
580
581 case ECONNABORTED:
582 libscf_handle_rebind(h);
583 goto rep_retry;
584
585 case ECANCELED:
586 goto deleted;
587
588 default:
589 assert(0);
590 abort();
591 }
592
593 if (inst->ri_i.i_primary_ctid >= 1) {
594 contract_hash_store(inst->ri_i.i_primary_ctid, inst->ri_id);
595
596 switch (check_contract(inst, B_TRUE, scf_inst)) {
597 case 0:
598 break;
599
600 case ECONNABORTED:
601 libscf_handle_rebind(h);
602 goto rep_retry;
603
604 case ECANCELED:
605 goto deleted;
606
607 default:
608 assert(0);
609 abort();
610 }
611 }
612
613 if (inst->ri_i.i_transient_ctid >= 1) {
614 switch (check_contract(inst, B_FALSE, scf_inst)) {
615 case 0:
616 break;
617
618 case ECONNABORTED:
619 libscf_handle_rebind(h);
620 goto rep_retry;
621
622 case ECANCELED:
623 goto deleted;
624
625 default:
626 assert(0);
627 abort();
628 }
629 }
630
631 /* No more failures we live through, so add it to the list. */
632 (void) pthread_mutex_init(&inst->ri_lock, &mutex_attrs);
633 (void) pthread_mutex_init(&inst->ri_queue_lock, &mutex_attrs);
634 MUTEX_LOCK(&inst->ri_lock);
635 MUTEX_LOCK(&inst->ri_queue_lock);
636
637 (void) pthread_cond_init(&inst->ri_method_cv, NULL);
638
639 uu_list_node_init(inst, &inst->ri_link, restarter_instance_pool);
640 uu_list_insert(instance_list.ril_instance_list, inst, idx);
641 MUTEX_UNLOCK(&instance_list.ril_lock);
642
643 if (start_pid != -1 &&
644 (inst->ri_flags & RINST_STYLE_MASK) == RINST_WAIT) {
645 int ret;
646 ret = wait_register(start_pid, inst->ri_i.i_fmri, 0, 1);
647 if (ret == -1) {
648 /*
649 * Implication: if we can't reregister the
650 * instance, we will start another one. Two
651 * instances may or may not result in a resource
652 * conflict.
653 */
654 log_error(LOG_WARNING,
655 "%s: couldn't reregister %ld for wait\n",
656 inst->ri_i.i_fmri, start_pid);
657 } else if (ret == 1) {
658 /*
659 * Leading PID has exited.
660 */
661 (void) stop_instance(h, inst, RSTOP_EXIT);
662 }
663 }
664
665
666 scf_pg_destroy(pg);
667
668 if (do_commit_states)
669 (void) restarter_instance_update_states(h, inst, state,
670 next_state, RERR_NONE, reason);
671
672 log_framework(LOG_DEBUG, "%s is a %s-style service\n", name,
673 service_style(inst->ri_flags));
674
675 MUTEX_UNLOCK(&inst->ri_queue_lock);
676 MUTEX_UNLOCK(&inst->ri_lock);
677
678 startd_free(svc_name, max_scf_name_size);
679 startd_free(inst_name, max_scf_name_size);
680 scf_snapshot_destroy(snap);
681 scf_instance_destroy(scf_inst);
682 scf_service_destroy(scf_svc);
683
684 log_framework(LOG_DEBUG, "%s: inserted instance into restarter list\n",
685 name);
686
687 return (0);
688
689 deleted:
690 MUTEX_UNLOCK(&instance_list.ril_lock);
691 startd_free(inst_name, max_scf_name_size);
692 startd_free(svc_name, max_scf_name_size);
693 if (snap != NULL)
694 scf_snapshot_destroy(snap);
695 scf_pg_destroy(pg);
696 scf_instance_destroy(scf_inst);
697 scf_service_destroy(scf_svc);
698 startd_free((void *)inst->ri_i.i_fmri, strlen(inst->ri_i.i_fmri) + 1);
699 uu_list_destroy(inst->ri_queue);
700 if (inst->ri_logstem != NULL)
701 startd_free(inst->ri_logstem, PATH_MAX);
702 if (inst->ri_common_name != NULL)
703 free(inst->ri_common_name);
704 if (inst->ri_C_common_name != NULL)
705 free(inst->ri_C_common_name);
706 startd_free(inst->ri_utmpx_prefix, max_scf_value_size);
707 startd_free(inst, sizeof (restarter_inst_t));
708 return (ENOENT);
709 }
710
711 static void
restarter_delete_inst(restarter_inst_t * ri)712 restarter_delete_inst(restarter_inst_t *ri)
713 {
714 int id;
715 restarter_inst_t *rip;
716 void *cookie = NULL;
717 restarter_instance_qentry_t *e;
718
719 assert(MUTEX_HELD(&ri->ri_lock));
720
721 /*
722 * Must drop the instance lock so we can pick up the instance_list
723 * lock & remove the instance.
724 */
725 id = ri->ri_id;
726 MUTEX_UNLOCK(&ri->ri_lock);
727
728 MUTEX_LOCK(&instance_list.ril_lock);
729
730 rip = uu_list_find(instance_list.ril_instance_list, &id, NULL, NULL);
731 if (rip == NULL) {
732 MUTEX_UNLOCK(&instance_list.ril_lock);
733 return;
734 }
735
736 assert(ri == rip);
737
738 uu_list_remove(instance_list.ril_instance_list, ri);
739
740 log_framework(LOG_DEBUG, "%s: deleted instance from restarter list\n",
741 ri->ri_i.i_fmri);
742
743 MUTEX_UNLOCK(&instance_list.ril_lock);
744
745 /*
746 * We can lock the instance without holding the instance_list lock
747 * since we removed the instance from the list.
748 */
749 MUTEX_LOCK(&ri->ri_lock);
750 MUTEX_LOCK(&ri->ri_queue_lock);
751
752 if (ri->ri_i.i_primary_ctid >= 1)
753 contract_hash_remove(ri->ri_i.i_primary_ctid);
754
755 while (ri->ri_method_thread != 0 || ri->ri_method_waiters > 0)
756 (void) pthread_cond_wait(&ri->ri_method_cv, &ri->ri_lock);
757
758 while ((e = uu_list_teardown(ri->ri_queue, &cookie)) != NULL)
759 startd_free(e, sizeof (*e));
760 uu_list_destroy(ri->ri_queue);
761
762 startd_free((void *)ri->ri_i.i_fmri, strlen(ri->ri_i.i_fmri) + 1);
763 startd_free(ri->ri_logstem, PATH_MAX);
764 if (ri->ri_common_name != NULL)
765 free(ri->ri_common_name);
766 if (ri->ri_C_common_name != NULL)
767 free(ri->ri_C_common_name);
768 startd_free(ri->ri_utmpx_prefix, max_scf_value_size);
769 (void) pthread_mutex_destroy(&ri->ri_lock);
770 (void) pthread_mutex_destroy(&ri->ri_queue_lock);
771 startd_free(ri, sizeof (restarter_inst_t));
772 }
773
774 /*
775 * instance_is_wait_style()
776 *
777 * Returns 1 if the given instance is a "wait-style" service instance.
778 */
779 int
instance_is_wait_style(restarter_inst_t * inst)780 instance_is_wait_style(restarter_inst_t *inst)
781 {
782 assert(MUTEX_HELD(&inst->ri_lock));
783 return ((inst->ri_flags & RINST_STYLE_MASK) == RINST_WAIT);
784 }
785
786 /*
787 * instance_is_transient_style()
788 *
789 * Returns 1 if the given instance is a transient service instance.
790 */
791 int
instance_is_transient_style(restarter_inst_t * inst)792 instance_is_transient_style(restarter_inst_t *inst)
793 {
794 assert(MUTEX_HELD(&inst->ri_lock));
795 return ((inst->ri_flags & RINST_STYLE_MASK) == RINST_TRANSIENT);
796 }
797
798 /*
799 * instance_in_transition()
800 * Returns 1 if instance is in transition, 0 if not
801 */
802 int
instance_in_transition(restarter_inst_t * inst)803 instance_in_transition(restarter_inst_t *inst)
804 {
805 assert(MUTEX_HELD(&inst->ri_lock));
806 if (inst->ri_i.i_next_state == RESTARTER_STATE_NONE)
807 return (0);
808 return (1);
809 }
810
811 /*
812 * returns 1 if instance is already started, 0 if not
813 */
814 static int
instance_started(restarter_inst_t * inst)815 instance_started(restarter_inst_t *inst)
816 {
817 int ret;
818
819 assert(MUTEX_HELD(&inst->ri_lock));
820
821 if (inst->ri_i.i_state == RESTARTER_STATE_ONLINE ||
822 inst->ri_i.i_state == RESTARTER_STATE_DEGRADED) {
823 ret = 1;
824 } else {
825 ret = 0;
826 }
827
828 return (ret);
829 }
830
831 /*
832 * Returns
833 * 0 - success
834 * ECONNRESET - success, but h was rebound
835 */
836 int
restarter_instance_update_states(scf_handle_t * h,restarter_inst_t * ri,restarter_instance_state_t new_state,restarter_instance_state_t new_state_next,restarter_error_t err,restarter_str_t reason)837 restarter_instance_update_states(scf_handle_t *h, restarter_inst_t *ri,
838 restarter_instance_state_t new_state,
839 restarter_instance_state_t new_state_next, restarter_error_t err,
840 restarter_str_t reason)
841 {
842 protocol_states_t *states;
843 int e;
844 uint_t retry_count = 0, msecs = ALLOC_DELAY;
845 boolean_t rebound = B_FALSE;
846 int prev_state_online;
847 int state_online;
848
849 assert(MUTEX_HELD(&ri->ri_lock));
850
851 prev_state_online = instance_started(ri);
852
853 retry:
854 e = _restarter_commit_states(h, &ri->ri_i, new_state, new_state_next,
855 restarter_get_str_short(reason));
856 switch (e) {
857 case 0:
858 break;
859
860 case ENOMEM:
861 ++retry_count;
862 if (retry_count < ALLOC_RETRY) {
863 (void) poll(NULL, 0, msecs);
864 msecs *= ALLOC_DELAY_MULT;
865 goto retry;
866 }
867
868 /* Like startd_alloc(). */
869 uu_die("Insufficient memory.\n");
870 /* NOTREACHED */
871
872 case ECONNABORTED:
873 libscf_handle_rebind(h);
874 rebound = B_TRUE;
875 goto retry;
876
877 case EPERM:
878 case EACCES:
879 case EROFS:
880 log_error(LOG_NOTICE, "Could not commit state change for %s "
881 "to repository: %s.\n", ri->ri_i.i_fmri, strerror(e));
882 /* FALLTHROUGH */
883
884 case ENOENT:
885 ri->ri_i.i_state = new_state;
886 ri->ri_i.i_next_state = new_state_next;
887 break;
888
889 case EINVAL:
890 default:
891 bad_error("_restarter_commit_states", e);
892 }
893
894 states = startd_alloc(sizeof (protocol_states_t));
895 states->ps_state = new_state;
896 states->ps_state_next = new_state_next;
897 states->ps_err = err;
898 states->ps_reason = reason;
899 graph_protocol_send_event(ri->ri_i.i_fmri, GRAPH_UPDATE_STATE_CHANGE,
900 (void *)states);
901
902 state_online = instance_started(ri);
903
904 if (prev_state_online && !state_online)
905 ri->ri_post_offline_hook();
906 else if (!prev_state_online && state_online)
907 ri->ri_post_online_hook();
908
909 return (rebound ? ECONNRESET : 0);
910 }
911
912 void
restarter_mark_pending_snapshot(const char * fmri,uint_t flag)913 restarter_mark_pending_snapshot(const char *fmri, uint_t flag)
914 {
915 restarter_inst_t *inst;
916
917 assert(flag == RINST_RETAKE_RUNNING || flag == RINST_RETAKE_START);
918
919 inst = inst_lookup_by_name(fmri);
920 if (inst == NULL)
921 return;
922
923 inst->ri_flags |= flag;
924
925 MUTEX_UNLOCK(&inst->ri_lock);
926 }
927
928 static void
restarter_take_pending_snapshots(scf_handle_t * h)929 restarter_take_pending_snapshots(scf_handle_t *h)
930 {
931 restarter_inst_t *inst;
932 int r;
933
934 MUTEX_LOCK(&instance_list.ril_lock);
935
936 for (inst = uu_list_first(instance_list.ril_instance_list);
937 inst != NULL;
938 inst = uu_list_next(instance_list.ril_instance_list, inst)) {
939 const char *fmri;
940 scf_instance_t *sinst = NULL;
941
942 MUTEX_LOCK(&inst->ri_lock);
943
944 /*
945 * This is where we'd check inst->ri_method_thread and if it
946 * were nonzero we'd wait in anticipation of another thread
947 * executing a method for inst. Doing so with the instance_list
948 * locked, though, leads to deadlock. Since taking a snapshot
949 * during that window won't hurt anything, we'll just continue.
950 */
951
952 fmri = inst->ri_i.i_fmri;
953
954 if (inst->ri_flags & RINST_RETAKE_RUNNING) {
955 scf_snapshot_t *rsnap;
956
957 (void) libscf_fmri_get_instance(h, fmri, &sinst);
958
959 rsnap = libscf_get_or_make_running_snapshot(sinst,
960 fmri, B_FALSE);
961
962 scf_instance_destroy(sinst);
963
964 if (rsnap != NULL)
965 inst->ri_flags &= ~RINST_RETAKE_RUNNING;
966
967 scf_snapshot_destroy(rsnap);
968 }
969
970 if (inst->ri_flags & RINST_RETAKE_START) {
971 switch (r = libscf_snapshots_poststart(h, fmri,
972 B_FALSE)) {
973 case 0:
974 case ENOENT:
975 inst->ri_flags &= ~RINST_RETAKE_START;
976 break;
977
978 case ECONNABORTED:
979 break;
980
981 case EACCES:
982 default:
983 bad_error("libscf_snapshots_poststart", r);
984 }
985 }
986
987 MUTEX_UNLOCK(&inst->ri_lock);
988 }
989
990 MUTEX_UNLOCK(&instance_list.ril_lock);
991 }
992
993 /* ARGSUSED */
994 void *
restarter_post_fsminimal_thread(void * unused)995 restarter_post_fsminimal_thread(void *unused)
996 {
997 scf_handle_t *h;
998 int r;
999
1000 (void) pthread_setname_np(pthread_self(), "restarter_post_fsmin");
1001
1002 h = libscf_handle_create_bound_loop();
1003
1004 for (;;) {
1005 r = libscf_create_self(h);
1006 if (r == 0)
1007 break;
1008
1009 assert(r == ECONNABORTED);
1010 libscf_handle_rebind(h);
1011 }
1012
1013 restarter_take_pending_snapshots(h);
1014
1015 (void) scf_handle_unbind(h);
1016 scf_handle_destroy(h);
1017
1018 return (NULL);
1019 }
1020
1021 /*
1022 * int stop_instance()
1023 *
1024 * Stop the instance identified by the instance given as the second argument,
1025 * for the cause stated.
1026 *
1027 * Returns
1028 * 0 - success
1029 * -1 - inst is in transition
1030 */
1031 static int
stop_instance(scf_handle_t * local_handle,restarter_inst_t * inst,stop_cause_t cause)1032 stop_instance(scf_handle_t *local_handle, restarter_inst_t *inst,
1033 stop_cause_t cause)
1034 {
1035 fork_info_t *info;
1036 const char *cp;
1037 int err;
1038 restarter_error_t re;
1039 restarter_str_t reason;
1040 restarter_instance_state_t new_state;
1041
1042 assert(MUTEX_HELD(&inst->ri_lock));
1043 assert(inst->ri_method_thread == 0);
1044
1045 switch (cause) {
1046 case RSTOP_EXIT:
1047 re = RERR_RESTART;
1048 reason = restarter_str_ct_ev_exit;
1049 cp = "all processes in service exited";
1050 break;
1051 case RSTOP_ERR_CFG:
1052 re = RERR_FAULT;
1053 reason = restarter_str_method_failed;
1054 cp = "service exited with a configuration error";
1055 break;
1056 case RSTOP_ERR_EXIT:
1057 re = RERR_RESTART;
1058 reason = restarter_str_ct_ev_exit;
1059 cp = "service exited with an error";
1060 break;
1061 case RSTOP_CORE:
1062 re = RERR_FAULT;
1063 reason = restarter_str_ct_ev_core;
1064 cp = "process dumped core";
1065 break;
1066 case RSTOP_SIGNAL:
1067 re = RERR_FAULT;
1068 reason = restarter_str_ct_ev_signal;
1069 cp = "process received fatal signal from outside the service";
1070 break;
1071 case RSTOP_HWERR:
1072 re = RERR_FAULT;
1073 reason = restarter_str_ct_ev_hwerr;
1074 cp = "process killed due to uncorrectable hardware error";
1075 break;
1076 case RSTOP_DEPENDENCY:
1077 re = RERR_RESTART;
1078 reason = restarter_str_dependency_activity;
1079 cp = "dependency activity requires stop";
1080 break;
1081 case RSTOP_DISABLE:
1082 re = RERR_RESTART;
1083 reason = restarter_str_disable_request;
1084 cp = "service disabled";
1085 break;
1086 case RSTOP_RESTART:
1087 re = RERR_RESTART;
1088 reason = restarter_str_restart_request;
1089 cp = "service restarting";
1090 break;
1091 default:
1092 #ifndef NDEBUG
1093 (void) fprintf(stderr, "Unknown cause %d at %s:%d.\n",
1094 cause, __FILE__, __LINE__);
1095 #endif
1096 abort();
1097 }
1098
1099 /* Services in the disabled and maintenance state are ignored */
1100 if (inst->ri_i.i_state == RESTARTER_STATE_MAINT ||
1101 inst->ri_i.i_state == RESTARTER_STATE_DISABLED) {
1102 log_framework(LOG_DEBUG,
1103 "%s: stop_instance -> is maint/disabled\n",
1104 inst->ri_i.i_fmri);
1105 return (0);
1106 }
1107
1108 /* Already stopped instances are left alone */
1109 if (instance_started(inst) == 0) {
1110 log_framework(LOG_DEBUG, "Restarter: %s is already stopped.\n",
1111 inst->ri_i.i_fmri);
1112 return (0);
1113 }
1114
1115 if (instance_in_transition(inst)) {
1116 /* requeue event by returning -1 */
1117 log_framework(LOG_DEBUG,
1118 "Restarter: Not stopping %s, in transition.\n",
1119 inst->ri_i.i_fmri);
1120 return (-1);
1121 }
1122
1123 log_instance(inst, B_TRUE, "Stopping because %s.", cp);
1124
1125 log_framework(re == RERR_FAULT ? LOG_INFO : LOG_DEBUG,
1126 "%s: Instance stopping because %s.\n", inst->ri_i.i_fmri, cp);
1127
1128 if (instance_is_wait_style(inst) &&
1129 (cause == RSTOP_EXIT ||
1130 cause == RSTOP_ERR_CFG ||
1131 cause == RSTOP_ERR_EXIT)) {
1132 /*
1133 * No need to stop instance, as child has exited; remove
1134 * contract and move the instance to the offline state.
1135 */
1136 switch (err = restarter_instance_update_states(local_handle,
1137 inst, inst->ri_i.i_state, RESTARTER_STATE_OFFLINE, re,
1138 reason)) {
1139 case 0:
1140 case ECONNRESET:
1141 break;
1142
1143 default:
1144 bad_error("restarter_instance_update_states", err);
1145 }
1146
1147 if (cause == RSTOP_ERR_EXIT) {
1148 /*
1149 * The RSTOP_ERR_EXIT cause is set via the
1150 * wait_thread -> wait_remove code path when we have
1151 * a "wait" style svc that exited with an error. If
1152 * the svc is failing too quickly, we throttle it so
1153 * that we don't restart it more than once/second.
1154 * Since we know we're running in the wait thread its
1155 * ok to throttle it right here.
1156 */
1157 (void) update_fault_count(inst, FAULT_COUNT_INCR);
1158 if (method_rate_critical(inst)) {
1159 log_instance(inst, B_TRUE, "Failing too "
1160 "quickly, throttling.");
1161 (void) sleep(WT_SVC_ERR_THROTTLE);
1162 }
1163 } else {
1164 (void) update_fault_count(inst, FAULT_COUNT_RESET);
1165 reset_start_times(inst);
1166 }
1167
1168 if (inst->ri_i.i_primary_ctid != 0) {
1169 inst->ri_m_inst =
1170 safe_scf_instance_create(local_handle);
1171 inst->ri_mi_deleted = B_FALSE;
1172
1173 libscf_reget_instance(inst);
1174 method_remove_contract(inst, B_TRUE, B_TRUE);
1175
1176 scf_instance_destroy(inst->ri_m_inst);
1177 inst->ri_m_inst = NULL;
1178 }
1179
1180 switch (err = restarter_instance_update_states(local_handle,
1181 inst, inst->ri_i.i_next_state, RESTARTER_STATE_NONE, re,
1182 reason)) {
1183 case 0:
1184 case ECONNRESET:
1185 break;
1186
1187 default:
1188 bad_error("restarter_instance_update_states", err);
1189 }
1190
1191 if (cause != RSTOP_ERR_CFG)
1192 return (0);
1193 } else if (instance_is_wait_style(inst) && re == RERR_RESTART) {
1194 /*
1195 * Stopping a wait service through means other than the pid
1196 * exiting should keep wait_thread() from restarting the
1197 * service, by removing it from the wait list.
1198 * We cannot remove it right now otherwise the process will
1199 * end up <defunct> so mark it to be ignored.
1200 */
1201 wait_ignore_by_fmri(inst->ri_i.i_fmri);
1202 }
1203
1204 /*
1205 * There are some configuration errors which we cannot detect until we
1206 * try to run the method. For example, see exec_method() where the
1207 * restarter_set_method_context() call can return SMF_EXIT_ERR_CONFIG
1208 * in several cases. If this happens for a "wait-style" svc,
1209 * wait_remove() sets the cause as RSTOP_ERR_CFG so that we can detect
1210 * the configuration error and go into maintenance, even though it is
1211 * a "wait-style" svc.
1212 */
1213 if (cause == RSTOP_ERR_CFG)
1214 new_state = RESTARTER_STATE_MAINT;
1215 else
1216 new_state = inst->ri_i.i_enabled ?
1217 RESTARTER_STATE_OFFLINE : RESTARTER_STATE_DISABLED;
1218
1219 switch (err = restarter_instance_update_states(local_handle, inst,
1220 inst->ri_i.i_state, new_state, RERR_NONE, reason)) {
1221 case 0:
1222 case ECONNRESET:
1223 break;
1224
1225 default:
1226 bad_error("restarter_instance_update_states", err);
1227 }
1228
1229 info = startd_zalloc(sizeof (fork_info_t));
1230
1231 info->sf_id = inst->ri_id;
1232 info->sf_method_type = METHOD_STOP;
1233 info->sf_event_type = re;
1234 info->sf_reason = reason;
1235 inst->ri_method_thread = startd_thread_create(method_thread, info);
1236
1237 return (0);
1238 }
1239
1240 /*
1241 * Returns
1242 * ENOENT - fmri is not in instance_list
1243 * 0 - success
1244 * ECONNRESET - success, though handle was rebound
1245 * -1 - instance is in transition
1246 */
1247 int
stop_instance_fmri(scf_handle_t * h,const char * fmri,uint_t flags)1248 stop_instance_fmri(scf_handle_t *h, const char *fmri, uint_t flags)
1249 {
1250 restarter_inst_t *rip;
1251 int r;
1252
1253 rip = inst_lookup_by_name(fmri);
1254 if (rip == NULL)
1255 return (ENOENT);
1256
1257 r = stop_instance(h, rip, flags);
1258
1259 MUTEX_UNLOCK(&rip->ri_lock);
1260
1261 return (r);
1262 }
1263
1264 static void
unmaintain_instance(scf_handle_t * h,restarter_inst_t * rip,unmaint_cause_t cause)1265 unmaintain_instance(scf_handle_t *h, restarter_inst_t *rip,
1266 unmaint_cause_t cause)
1267 {
1268 ctid_t ctid;
1269 scf_instance_t *inst;
1270 int r;
1271 uint_t tries = 0, msecs = ALLOC_DELAY;
1272 const char *cp;
1273 restarter_str_t reason;
1274
1275 assert(MUTEX_HELD(&rip->ri_lock));
1276
1277 if (rip->ri_i.i_state != RESTARTER_STATE_MAINT) {
1278 log_error(LOG_DEBUG, "Restarter: "
1279 "Ignoring maintenance off command because %s is not in the "
1280 "maintenance state.\n", rip->ri_i.i_fmri);
1281 return;
1282 }
1283
1284 switch (cause) {
1285 case RUNMAINT_CLEAR:
1286 cp = "clear requested";
1287 reason = restarter_str_clear_request;
1288 break;
1289 case RUNMAINT_DISABLE:
1290 cp = "disable requested";
1291 reason = restarter_str_disable_request;
1292 break;
1293 default:
1294 #ifndef NDEBUG
1295 (void) fprintf(stderr, "Uncaught case for %d at %s:%d.\n",
1296 cause, __FILE__, __LINE__);
1297 #endif
1298 abort();
1299 }
1300
1301 log_instance(rip, B_TRUE, "Leaving maintenance because %s.",
1302 cp);
1303 log_framework(LOG_DEBUG, "%s: Instance leaving maintenance because "
1304 "%s.\n", rip->ri_i.i_fmri, cp);
1305
1306 (void) restarter_instance_update_states(h, rip, RESTARTER_STATE_UNINIT,
1307 RESTARTER_STATE_NONE, RERR_RESTART, reason);
1308
1309 /*
1310 * If we did ADMIN_MAINT_ON_IMMEDIATE, then there might still be
1311 * a primary contract.
1312 */
1313 if (rip->ri_i.i_primary_ctid == 0)
1314 return;
1315
1316 ctid = rip->ri_i.i_primary_ctid;
1317 contract_abandon(ctid);
1318 rip->ri_i.i_primary_ctid = 0;
1319
1320 rep_retry:
1321 switch (r = libscf_fmri_get_instance(h, rip->ri_i.i_fmri, &inst)) {
1322 case 0:
1323 break;
1324
1325 case ECONNABORTED:
1326 libscf_handle_rebind(h);
1327 goto rep_retry;
1328
1329 case ENOENT:
1330 /* Must have been deleted. */
1331 return;
1332
1333 case EINVAL:
1334 case ENOTSUP:
1335 default:
1336 bad_error("libscf_handle_rebind", r);
1337 }
1338
1339 again:
1340 r = restarter_remove_contract(inst, ctid, RESTARTER_CONTRACT_PRIMARY);
1341 switch (r) {
1342 case 0:
1343 break;
1344
1345 case ENOMEM:
1346 ++tries;
1347 if (tries < ALLOC_RETRY) {
1348 (void) poll(NULL, 0, msecs);
1349 msecs *= ALLOC_DELAY_MULT;
1350 goto again;
1351 }
1352
1353 uu_die("Insufficient memory.\n");
1354 /* NOTREACHED */
1355
1356 case ECONNABORTED:
1357 scf_instance_destroy(inst);
1358 libscf_handle_rebind(h);
1359 goto rep_retry;
1360
1361 case ECANCELED:
1362 break;
1363
1364 case EPERM:
1365 case EACCES:
1366 case EROFS:
1367 log_error(LOG_INFO,
1368 "Could not remove contract id %lu for %s (%s).\n", ctid,
1369 rip->ri_i.i_fmri, strerror(r));
1370 break;
1371
1372 case EINVAL:
1373 case EBADF:
1374 default:
1375 bad_error("restarter_remove_contract", r);
1376 }
1377
1378 scf_instance_destroy(inst);
1379 }
1380
1381 /*
1382 * enable_inst()
1383 * Set inst->ri_i.i_enabled. Expects 'e' to be _ENABLE, _DISABLE, or
1384 * _ADMIN_DISABLE. If the event is _ENABLE and inst is uninitialized or
1385 * disabled, move it to offline. If the event is _DISABLE or
1386 * _ADMIN_DISABLE, make sure inst will move to disabled.
1387 *
1388 * Returns
1389 * 0 - success
1390 * ECONNRESET - h was rebound
1391 */
1392 static int
enable_inst(scf_handle_t * h,restarter_inst_t * inst,restarter_instance_qentry_t * riq)1393 enable_inst(scf_handle_t *h, restarter_inst_t *inst,
1394 restarter_instance_qentry_t *riq)
1395 {
1396 restarter_instance_state_t state;
1397 restarter_event_type_t e = riq->riq_type;
1398 restarter_str_t reason = restarter_str_per_configuration;
1399 int r;
1400
1401 assert(MUTEX_HELD(&inst->ri_lock));
1402 assert(e == RESTARTER_EVENT_TYPE_ADMIN_DISABLE ||
1403 e == RESTARTER_EVENT_TYPE_DISABLE ||
1404 e == RESTARTER_EVENT_TYPE_ENABLE);
1405 assert(instance_in_transition(inst) == 0);
1406
1407 state = inst->ri_i.i_state;
1408
1409 if (e == RESTARTER_EVENT_TYPE_ENABLE) {
1410 inst->ri_i.i_enabled = 1;
1411
1412 if (state == RESTARTER_STATE_UNINIT ||
1413 state == RESTARTER_STATE_DISABLED) {
1414 /*
1415 * B_FALSE: Don't log an error if the log_instance()
1416 * fails because it will fail on the miniroot before
1417 * install-discovery runs.
1418 */
1419 log_instance(inst, B_FALSE, "Enabled.");
1420 log_framework(LOG_DEBUG, "%s: Instance enabled.\n",
1421 inst->ri_i.i_fmri);
1422
1423 /*
1424 * If we are coming from DISABLED, it was obviously an
1425 * enable request. If we are coming from UNINIT, it may
1426 * have been a sevice in MAINT that was cleared.
1427 */
1428 if (riq->riq_reason == restarter_str_clear_request)
1429 reason = restarter_str_clear_request;
1430 else if (state == RESTARTER_STATE_DISABLED)
1431 reason = restarter_str_enable_request;
1432 (void) restarter_instance_update_states(h, inst,
1433 RESTARTER_STATE_OFFLINE, RESTARTER_STATE_NONE,
1434 RERR_NONE, reason);
1435 } else {
1436 log_framework(LOG_DEBUG, "Restarter: "
1437 "Not changing state of %s for enable command.\n",
1438 inst->ri_i.i_fmri);
1439 }
1440 } else {
1441 inst->ri_i.i_enabled = 0;
1442
1443 switch (state) {
1444 case RESTARTER_STATE_ONLINE:
1445 case RESTARTER_STATE_DEGRADED:
1446 r = stop_instance(h, inst, RSTOP_DISABLE);
1447 return (r == ECONNRESET ? 0 : r);
1448
1449 case RESTARTER_STATE_OFFLINE:
1450 case RESTARTER_STATE_UNINIT:
1451 if (inst->ri_i.i_primary_ctid != 0) {
1452 inst->ri_m_inst = safe_scf_instance_create(h);
1453 inst->ri_mi_deleted = B_FALSE;
1454
1455 libscf_reget_instance(inst);
1456 method_remove_contract(inst, B_TRUE, B_TRUE);
1457
1458 scf_instance_destroy(inst->ri_m_inst);
1459 }
1460 /* B_FALSE: See log_instance(..., "Enabled."); above */
1461 log_instance(inst, B_FALSE, "Disabled.");
1462 log_framework(LOG_DEBUG, "%s: Instance disabled.\n",
1463 inst->ri_i.i_fmri);
1464
1465 /*
1466 * If we are coming from OFFLINE, it was obviously a
1467 * disable request. But if we are coming from
1468 * UNINIT, it may have been a disable request for a
1469 * service in MAINT.
1470 */
1471 if (riq->riq_reason == restarter_str_disable_request ||
1472 state == RESTARTER_STATE_OFFLINE)
1473 reason = restarter_str_disable_request;
1474 (void) restarter_instance_update_states(h, inst,
1475 RESTARTER_STATE_DISABLED, RESTARTER_STATE_NONE,
1476 RERR_RESTART, reason);
1477 return (0);
1478
1479 case RESTARTER_STATE_DISABLED:
1480 break;
1481
1482 case RESTARTER_STATE_MAINT:
1483 /*
1484 * We only want to pull the instance out of maintenance
1485 * if the disable is on adminstrative request. The
1486 * graph engine sends _DISABLE events whenever a
1487 * service isn't in the disabled state, and we don't
1488 * want to pull the service out of maintenance if,
1489 * for example, it is there due to a dependency cycle.
1490 */
1491 if (e == RESTARTER_EVENT_TYPE_ADMIN_DISABLE)
1492 unmaintain_instance(h, inst, RUNMAINT_DISABLE);
1493 break;
1494
1495 default:
1496 #ifndef NDEBUG
1497 (void) fprintf(stderr, "Restarter instance %s has "
1498 "unknown state %d.\n", inst->ri_i.i_fmri, state);
1499 #endif
1500 abort();
1501 }
1502 }
1503
1504 return (0);
1505 }
1506
1507 static void
start_instance(scf_handle_t * local_handle,restarter_inst_t * inst,int32_t reason)1508 start_instance(scf_handle_t *local_handle, restarter_inst_t *inst,
1509 int32_t reason)
1510 {
1511 fork_info_t *info;
1512 restarter_str_t new_reason;
1513
1514 assert(MUTEX_HELD(&inst->ri_lock));
1515 assert(instance_in_transition(inst) == 0);
1516 assert(inst->ri_method_thread == 0);
1517
1518 log_framework(LOG_DEBUG, "%s: trying to start instance\n",
1519 inst->ri_i.i_fmri);
1520
1521 /*
1522 * We want to keep the original reason for restarts and clear actions
1523 */
1524 switch (reason) {
1525 case restarter_str_restart_request:
1526 case restarter_str_clear_request:
1527 new_reason = reason;
1528 break;
1529 default:
1530 new_reason = restarter_str_dependencies_satisfied;
1531 }
1532
1533 /* Services in the disabled and maintenance state are ignored */
1534 if (inst->ri_i.i_state == RESTARTER_STATE_MAINT ||
1535 inst->ri_i.i_state == RESTARTER_STATE_DISABLED ||
1536 inst->ri_i.i_enabled == 0) {
1537 log_framework(LOG_DEBUG,
1538 "%s: start_instance -> is maint/disabled\n",
1539 inst->ri_i.i_fmri);
1540 return;
1541 }
1542
1543 /* Already started instances are left alone */
1544 if (instance_started(inst) == 1) {
1545 log_framework(LOG_DEBUG,
1546 "%s: start_instance -> is already started\n",
1547 inst->ri_i.i_fmri);
1548 return;
1549 }
1550
1551 log_framework(LOG_DEBUG, "%s: starting instance.\n", inst->ri_i.i_fmri);
1552
1553 (void) restarter_instance_update_states(local_handle, inst,
1554 inst->ri_i.i_state, RESTARTER_STATE_ONLINE, RERR_NONE, new_reason);
1555
1556 info = startd_zalloc(sizeof (fork_info_t));
1557
1558 info->sf_id = inst->ri_id;
1559 info->sf_method_type = METHOD_START;
1560 info->sf_event_type = RERR_NONE;
1561 info->sf_reason = new_reason;
1562 inst->ri_method_thread = startd_thread_create(method_thread, info);
1563 }
1564
1565 static int
event_from_tty(scf_handle_t * h,restarter_inst_t * rip)1566 event_from_tty(scf_handle_t *h, restarter_inst_t *rip)
1567 {
1568 scf_instance_t *inst;
1569 int ret = 0;
1570
1571 if (libscf_fmri_get_instance(h, rip->ri_i.i_fmri, &inst))
1572 return (-1);
1573
1574 ret = restarter_inst_ractions_from_tty(inst);
1575
1576 scf_instance_destroy(inst);
1577 return (ret);
1578 }
1579
1580 static boolean_t
restart_dump(scf_handle_t * h,restarter_inst_t * rip)1581 restart_dump(scf_handle_t *h, restarter_inst_t *rip)
1582 {
1583 scf_instance_t *inst;
1584 boolean_t ret = B_FALSE;
1585
1586 if (libscf_fmri_get_instance(h, rip->ri_i.i_fmri, &inst))
1587 return (-1);
1588
1589 if (restarter_inst_dump(inst) == 1)
1590 ret = B_TRUE;
1591
1592 scf_instance_destroy(inst);
1593 return (ret);
1594 }
1595
1596 static void
maintain_instance(scf_handle_t * h,restarter_inst_t * rip,int immediate,restarter_str_t reason)1597 maintain_instance(scf_handle_t *h, restarter_inst_t *rip, int immediate,
1598 restarter_str_t reason)
1599 {
1600 fork_info_t *info;
1601 scf_instance_t *scf_inst = NULL;
1602
1603 assert(MUTEX_HELD(&rip->ri_lock));
1604 assert(reason != restarter_str_none);
1605 assert(rip->ri_method_thread == 0);
1606
1607 log_instance(rip, B_TRUE, "Stopping for maintenance due to %s.",
1608 restarter_get_str_short(reason));
1609 log_framework(LOG_DEBUG, "%s: stopping for maintenance due to %s.\n",
1610 rip->ri_i.i_fmri, restarter_get_str_short(reason));
1611
1612 /* Services in the maintenance state are ignored */
1613 if (rip->ri_i.i_state == RESTARTER_STATE_MAINT) {
1614 log_framework(LOG_DEBUG,
1615 "%s: maintain_instance -> is already in maintenance\n",
1616 rip->ri_i.i_fmri);
1617 return;
1618 }
1619
1620 /*
1621 * If reason state is restarter_str_service_request and
1622 * restarter_actions/auxiliary_fmri property is set with a valid fmri,
1623 * copy the fmri to restarter/auxiliary_fmri so svcs -x can use.
1624 */
1625 if (reason == restarter_str_service_request &&
1626 libscf_fmri_get_instance(h, rip->ri_i.i_fmri, &scf_inst) == 0) {
1627 if (restarter_inst_validate_ractions_aux_fmri(scf_inst) == 0) {
1628 if (restarter_inst_set_aux_fmri(scf_inst))
1629 log_framework(LOG_DEBUG, "%s: "
1630 "restarter_inst_set_aux_fmri failed: ",
1631 rip->ri_i.i_fmri);
1632 } else {
1633 log_framework(LOG_DEBUG, "%s: "
1634 "restarter_inst_validate_ractions_aux_fmri "
1635 "failed: ", rip->ri_i.i_fmri);
1636
1637 if (restarter_inst_reset_aux_fmri(scf_inst))
1638 log_framework(LOG_DEBUG, "%s: "
1639 "restarter_inst_reset_aux_fmri failed: ",
1640 rip->ri_i.i_fmri);
1641 }
1642 scf_instance_destroy(scf_inst);
1643 }
1644
1645 if (immediate || !instance_started(rip)) {
1646 if (rip->ri_i.i_primary_ctid != 0) {
1647 rip->ri_m_inst = safe_scf_instance_create(h);
1648 rip->ri_mi_deleted = B_FALSE;
1649
1650 libscf_reget_instance(rip);
1651 method_remove_contract(rip, B_TRUE, B_TRUE);
1652
1653 scf_instance_destroy(rip->ri_m_inst);
1654 }
1655
1656 (void) restarter_instance_update_states(h, rip,
1657 RESTARTER_STATE_MAINT, RESTARTER_STATE_NONE, RERR_RESTART,
1658 reason);
1659 return;
1660 }
1661
1662 (void) restarter_instance_update_states(h, rip, rip->ri_i.i_state,
1663 RESTARTER_STATE_MAINT, RERR_NONE, reason);
1664
1665 log_transition(rip, MAINT_REQUESTED);
1666
1667 info = startd_zalloc(sizeof (*info));
1668 info->sf_id = rip->ri_id;
1669 info->sf_method_type = METHOD_STOP;
1670 info->sf_event_type = RERR_RESTART;
1671 info->sf_reason = reason;
1672 rip->ri_method_thread = startd_thread_create(method_thread, info);
1673 }
1674
1675 static void
refresh_instance(scf_handle_t * h,restarter_inst_t * rip)1676 refresh_instance(scf_handle_t *h, restarter_inst_t *rip)
1677 {
1678 scf_instance_t *inst;
1679 scf_snapshot_t *snap;
1680 fork_info_t *info;
1681 int r;
1682
1683 assert(MUTEX_HELD(&rip->ri_lock));
1684
1685 log_instance(rip, B_TRUE, "Rereading configuration.");
1686 log_framework(LOG_DEBUG, "%s: rereading configuration.\n",
1687 rip->ri_i.i_fmri);
1688
1689 rep_retry:
1690 r = libscf_fmri_get_instance(h, rip->ri_i.i_fmri, &inst);
1691 switch (r) {
1692 case 0:
1693 break;
1694
1695 case ECONNABORTED:
1696 libscf_handle_rebind(h);
1697 goto rep_retry;
1698
1699 case ENOENT:
1700 /* Must have been deleted. */
1701 return;
1702
1703 case EINVAL:
1704 case ENOTSUP:
1705 default:
1706 bad_error("libscf_fmri_get_instance", r);
1707 }
1708
1709 snap = libscf_get_running_snapshot(inst);
1710
1711 r = libscf_get_startd_properties(inst, snap, &rip->ri_flags,
1712 &rip->ri_utmpx_prefix);
1713 switch (r) {
1714 case 0:
1715 log_framework(LOG_DEBUG, "%s is a %s-style service\n",
1716 rip->ri_i.i_fmri, service_style(rip->ri_flags));
1717 break;
1718
1719 case ECONNABORTED:
1720 scf_instance_destroy(inst);
1721 scf_snapshot_destroy(snap);
1722 libscf_handle_rebind(h);
1723 goto rep_retry;
1724
1725 case ECANCELED:
1726 case ENOENT:
1727 /* Succeed in anticipation of REMOVE_INSTANCE. */
1728 break;
1729
1730 default:
1731 bad_error("libscf_get_startd_properties", r);
1732 }
1733
1734 if (instance_started(rip)) {
1735 /* Refresh does not change the state. */
1736 (void) restarter_instance_update_states(h, rip,
1737 rip->ri_i.i_state, rip->ri_i.i_state, RERR_NONE,
1738 restarter_str_refresh);
1739
1740 info = startd_zalloc(sizeof (*info));
1741 info->sf_id = rip->ri_id;
1742 info->sf_method_type = METHOD_REFRESH;
1743 info->sf_event_type = RERR_REFRESH;
1744 info->sf_reason = 0;
1745
1746 assert(rip->ri_method_thread == 0);
1747 rip->ri_method_thread =
1748 startd_thread_create(method_thread, info);
1749 }
1750
1751 scf_snapshot_destroy(snap);
1752 scf_instance_destroy(inst);
1753 }
1754
1755 static void
degrade_instance(scf_handle_t * h,restarter_inst_t * rip,restarter_str_t reason)1756 degrade_instance(scf_handle_t *h, restarter_inst_t *rip, restarter_str_t reason)
1757 {
1758 scf_instance_t *scf_inst = NULL;
1759
1760 assert(MUTEX_HELD(&rip->ri_lock));
1761
1762 log_instance(rip, B_TRUE, "Marking degraded due to %s.",
1763 restarter_get_str_short(reason));
1764 log_framework(LOG_DEBUG, "%s: marking degraded due to %s.\n",
1765 rip->ri_i.i_fmri, restarter_get_str_short(reason));
1766
1767 /* Services that aren't online are ignored */
1768 if (rip->ri_i.i_state != RESTARTER_STATE_ONLINE) {
1769 log_framework(LOG_DEBUG,
1770 "%s: degrade_instance -> is not online\n",
1771 rip->ri_i.i_fmri);
1772 return;
1773 }
1774
1775 /*
1776 * If reason state is restarter_str_service_request and
1777 * restarter_actions/auxiliary_fmri property is set with a valid fmri,
1778 * copy the fmri to restarter/auxiliary_fmri so svcs -x can use.
1779 */
1780 if (reason == restarter_str_service_request &&
1781 libscf_fmri_get_instance(h, rip->ri_i.i_fmri, &scf_inst) == 0) {
1782 if (restarter_inst_validate_ractions_aux_fmri(scf_inst) == 0) {
1783 if (restarter_inst_set_aux_fmri(scf_inst)) {
1784 log_framework(LOG_DEBUG, "%s: "
1785 "restarter_inst_set_aux_fmri failed: ",
1786 rip->ri_i.i_fmri);
1787 }
1788 } else {
1789 log_framework(LOG_DEBUG, "%s: "
1790 "restarter_inst_validate_ractions_aux_fmri "
1791 "failed: ", rip->ri_i.i_fmri);
1792
1793 if (restarter_inst_reset_aux_fmri(scf_inst)) {
1794 log_framework(LOG_DEBUG, "%s: "
1795 "restarter_inst_reset_aux_fmri failed: ",
1796 rip->ri_i.i_fmri);
1797 }
1798 }
1799 scf_instance_destroy(scf_inst);
1800 }
1801
1802 (void) restarter_instance_update_states(h, rip,
1803 RESTARTER_STATE_DEGRADED, RESTARTER_STATE_NONE, RERR_NONE, reason);
1804
1805 log_transition(rip, DEGRADE_REQUESTED);
1806 }
1807
1808 /*
1809 * Note that the ordering of these must match the restarter event types defined
1810 * in librestart.h
1811 */
1812 const char *event_names[] = { "INVALID", "ADD_INSTANCE", "REMOVE_INSTANCE",
1813 "ENABLE", "DISABLE", "ADMIN_DEGRADED", "ADMIN_REFRESH",
1814 "ADMIN_RESTART", "ADMIN_MAINT_OFF", "ADMIN_MAINT_ON",
1815 "ADMIN_MAINT_ON_IMMEDIATE", "STOP", "START", "DEPENDENCY_CYCLE",
1816 "INVALID_DEPENDENCY", "ADMIN_DISABLE", "STOP_RESET",
1817 "ADMIN_DEGRADE_IMMEDIATE", "ADMIN_RESTORE"
1818 };
1819
1820 /*
1821 * void *restarter_process_events()
1822 *
1823 * Called in a separate thread to process the events on an instance's
1824 * queue. Empties the queue completely, and tries to keep the thread
1825 * around for a little while after the queue is empty to save on
1826 * startup costs.
1827 */
1828 static void *
restarter_process_events(void * arg)1829 restarter_process_events(void *arg)
1830 {
1831 scf_handle_t *h;
1832 restarter_instance_qentry_t *event;
1833 restarter_inst_t *rip;
1834 char *fmri = (char *)arg;
1835 struct timespec to;
1836
1837 (void) pthread_setname_np(pthread_self(), "restarter_process_events");
1838
1839 assert(fmri != NULL);
1840
1841 h = libscf_handle_create_bound_loop();
1842
1843 /* grab the queue lock */
1844 rip = inst_lookup_queue(fmri);
1845 if (rip == NULL)
1846 goto out;
1847
1848 again:
1849
1850 while ((event = uu_list_first(rip->ri_queue)) != NULL) {
1851 restarter_inst_t *inst;
1852
1853 /* drop the queue lock */
1854 MUTEX_UNLOCK(&rip->ri_queue_lock);
1855
1856 /*
1857 * Grab the inst lock -- this waits until any outstanding
1858 * method finishes running.
1859 */
1860 inst = inst_lookup_by_name(fmri);
1861 if (inst == NULL) {
1862 /* Getting deleted in the middle isn't an error. */
1863 goto cont;
1864 }
1865
1866 assert(instance_in_transition(inst) == 0);
1867
1868 /* process the event */
1869 switch (event->riq_type) {
1870 case RESTARTER_EVENT_TYPE_ENABLE:
1871 case RESTARTER_EVENT_TYPE_DISABLE:
1872 (void) enable_inst(h, inst, event);
1873 break;
1874
1875 case RESTARTER_EVENT_TYPE_ADMIN_DISABLE:
1876 if (enable_inst(h, inst, event) == 0)
1877 reset_start_times(inst);
1878 break;
1879
1880 case RESTARTER_EVENT_TYPE_REMOVE_INSTANCE:
1881 restarter_delete_inst(inst);
1882 inst = NULL;
1883 goto cont;
1884
1885 case RESTARTER_EVENT_TYPE_STOP_RESET:
1886 reset_start_times(inst);
1887 /* FALLTHROUGH */
1888 case RESTARTER_EVENT_TYPE_STOP:
1889 (void) stop_instance(h, inst, RSTOP_DEPENDENCY);
1890 break;
1891
1892 case RESTARTER_EVENT_TYPE_START:
1893 start_instance(h, inst, event->riq_reason);
1894 break;
1895
1896 case RESTARTER_EVENT_TYPE_DEPENDENCY_CYCLE:
1897 maintain_instance(h, inst, 0,
1898 restarter_str_dependency_cycle);
1899 break;
1900
1901 case RESTARTER_EVENT_TYPE_INVALID_DEPENDENCY:
1902 maintain_instance(h, inst, 0,
1903 restarter_str_invalid_dependency);
1904 break;
1905
1906 case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON:
1907 if (event_from_tty(h, inst) == 0)
1908 maintain_instance(h, inst, 0,
1909 restarter_str_service_request);
1910 else
1911 maintain_instance(h, inst, 0,
1912 restarter_str_administrative_request);
1913 break;
1914
1915 case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON_IMMEDIATE:
1916 if (event_from_tty(h, inst) == 0)
1917 maintain_instance(h, inst, 1,
1918 restarter_str_service_request);
1919 else
1920 maintain_instance(h, inst, 1,
1921 restarter_str_administrative_request);
1922 break;
1923
1924 case RESTARTER_EVENT_TYPE_ADMIN_MAINT_OFF:
1925 unmaintain_instance(h, inst, RUNMAINT_CLEAR);
1926 reset_start_times(inst);
1927 break;
1928
1929 case RESTARTER_EVENT_TYPE_ADMIN_REFRESH:
1930 refresh_instance(h, inst);
1931 break;
1932
1933 case RESTARTER_EVENT_TYPE_ADMIN_DEGRADED:
1934 case RESTARTER_EVENT_TYPE_ADMIN_DEGRADE_IMMEDIATE:
1935 if (event_from_tty(h, inst) == 0) {
1936 degrade_instance(h, inst,
1937 restarter_str_service_request);
1938 } else {
1939 degrade_instance(h, inst,
1940 restarter_str_administrative_request);
1941 }
1942 break;
1943
1944 case RESTARTER_EVENT_TYPE_ADMIN_RESTORE:
1945 case RESTARTER_EVENT_TYPE_ADMIN_RESTART:
1946 if (!instance_started(inst)) {
1947 log_framework(LOG_DEBUG, "Restarter: "
1948 "Not restarting %s; not running.\n",
1949 inst->ri_i.i_fmri);
1950 } else {
1951 /*
1952 * Stop the instance. If it can be restarted,
1953 * the graph engine will send a new event.
1954 */
1955 if (restart_dump(h, inst)) {
1956 (void) contract_kill(
1957 inst->ri_i.i_primary_ctid, SIGABRT,
1958 inst->ri_i.i_fmri);
1959 } else if (stop_instance(h, inst,
1960 RSTOP_RESTART) == 0) {
1961 reset_start_times(inst);
1962 }
1963 }
1964 break;
1965
1966 case RESTARTER_EVENT_TYPE_ADD_INSTANCE:
1967 default:
1968 #ifndef NDEBUG
1969 uu_warn("%s:%d: Bad restarter event %d. "
1970 "Aborting.\n", __FILE__, __LINE__, event->riq_type);
1971 #endif
1972 abort();
1973 }
1974
1975 assert(inst != NULL);
1976 MUTEX_UNLOCK(&inst->ri_lock);
1977
1978 cont:
1979 /* grab the queue lock */
1980 rip = inst_lookup_queue(fmri);
1981 if (rip == NULL)
1982 goto out;
1983
1984 /* delete the event */
1985 uu_list_remove(rip->ri_queue, event);
1986 startd_free(event, sizeof (restarter_instance_qentry_t));
1987 }
1988
1989 assert(rip != NULL);
1990
1991 /*
1992 * Try to preserve the thread for a little while for future use.
1993 */
1994 to.tv_sec = 3;
1995 to.tv_nsec = 0;
1996 (void) pthread_cond_reltimedwait_np(&rip->ri_queue_cv,
1997 &rip->ri_queue_lock, &to);
1998
1999 if (uu_list_first(rip->ri_queue) != NULL)
2000 goto again;
2001
2002 rip->ri_queue_thread = 0;
2003 MUTEX_UNLOCK(&rip->ri_queue_lock);
2004
2005 out:
2006 (void) scf_handle_unbind(h);
2007 scf_handle_destroy(h);
2008 free(fmri);
2009 return (NULL);
2010 }
2011
2012 static int
is_admin_event(restarter_event_type_t t)2013 is_admin_event(restarter_event_type_t t)
2014 {
2015 switch (t) {
2016 case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON:
2017 case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON_IMMEDIATE:
2018 case RESTARTER_EVENT_TYPE_ADMIN_MAINT_OFF:
2019 case RESTARTER_EVENT_TYPE_ADMIN_REFRESH:
2020 case RESTARTER_EVENT_TYPE_ADMIN_RESTORE:
2021 case RESTARTER_EVENT_TYPE_ADMIN_DEGRADED:
2022 case RESTARTER_EVENT_TYPE_ADMIN_DEGRADE_IMMEDIATE:
2023 case RESTARTER_EVENT_TYPE_ADMIN_RESTART:
2024 return (1);
2025 default:
2026 return (0);
2027 }
2028 }
2029
2030 static void
restarter_queue_event(restarter_inst_t * ri,restarter_protocol_event_t * e)2031 restarter_queue_event(restarter_inst_t *ri, restarter_protocol_event_t *e)
2032 {
2033 restarter_instance_qentry_t *qe;
2034 int r;
2035
2036 assert(MUTEX_HELD(&ri->ri_queue_lock));
2037 assert(!MUTEX_HELD(&ri->ri_lock));
2038
2039 qe = startd_zalloc(sizeof (restarter_instance_qentry_t));
2040 qe->riq_type = e->rpe_type;
2041 qe->riq_reason = e->rpe_reason;
2042
2043 uu_list_node_init(qe, &qe->riq_link, restarter_queue_pool);
2044 r = uu_list_insert_before(ri->ri_queue, NULL, qe);
2045 assert(r == 0);
2046 }
2047
2048 /*
2049 * void *restarter_event_thread()
2050 *
2051 * Handle incoming graph events by placing them on a per-instance
2052 * queue. We can't lock the main part of the instance structure, so
2053 * just modify the seprarately locked event queue portion.
2054 */
2055 /*ARGSUSED*/
2056 static void *
restarter_event_thread(void * unused)2057 restarter_event_thread(void *unused)
2058 {
2059 scf_handle_t *h;
2060
2061 (void) pthread_setname_np(pthread_self(), "restarter_event");
2062
2063 /*
2064 * This is a new thread, and thus, gets its own handle
2065 * to the repository.
2066 */
2067 h = libscf_handle_create_bound_loop();
2068
2069 MUTEX_LOCK(&ru->restarter_update_lock);
2070
2071 /*CONSTCOND*/
2072 while (1) {
2073 restarter_protocol_event_t *e;
2074
2075 while (ru->restarter_update_wakeup == 0)
2076 (void) pthread_cond_wait(&ru->restarter_update_cv,
2077 &ru->restarter_update_lock);
2078
2079 ru->restarter_update_wakeup = 0;
2080
2081 while ((e = restarter_event_dequeue()) != NULL) {
2082 restarter_inst_t *rip;
2083 char *fmri;
2084
2085 MUTEX_UNLOCK(&ru->restarter_update_lock);
2086
2087 /*
2088 * ADD_INSTANCE is special: there's likely no
2089 * instance structure yet, so we need to handle the
2090 * addition synchronously.
2091 */
2092 switch (e->rpe_type) {
2093 case RESTARTER_EVENT_TYPE_ADD_INSTANCE:
2094 if (restarter_insert_inst(h, e->rpe_inst) != 0)
2095 log_error(LOG_INFO, "Restarter: "
2096 "Could not add %s.\n", e->rpe_inst);
2097
2098 MUTEX_LOCK(&st->st_load_lock);
2099 if (--st->st_load_instances == 0)
2100 (void) pthread_cond_broadcast(
2101 &st->st_load_cv);
2102 MUTEX_UNLOCK(&st->st_load_lock);
2103
2104 goto nolookup;
2105 }
2106
2107 /*
2108 * Lookup the instance, locking only the event queue.
2109 * Can't grab ri_lock here because it might be held
2110 * by a long-running method.
2111 */
2112 rip = inst_lookup_queue(e->rpe_inst);
2113 if (rip == NULL) {
2114 log_error(LOG_INFO, "Restarter: "
2115 "Ignoring %s command for unknown service "
2116 "%s.\n", event_names[e->rpe_type],
2117 e->rpe_inst);
2118 goto nolookup;
2119 }
2120
2121 /* Keep ADMIN events from filling up the queue. */
2122 if (is_admin_event(e->rpe_type) &&
2123 uu_list_numnodes(rip->ri_queue) >
2124 RINST_QUEUE_THRESHOLD) {
2125 MUTEX_UNLOCK(&rip->ri_queue_lock);
2126 log_instance(rip, B_TRUE, "Instance event "
2127 "queue overflow. Dropping administrative "
2128 "request.");
2129 log_framework(LOG_DEBUG, "%s: Instance event "
2130 "queue overflow. Dropping administrative "
2131 "request.\n", rip->ri_i.i_fmri);
2132 goto nolookup;
2133 }
2134
2135 /* Now add the event to the instance queue. */
2136 restarter_queue_event(rip, e);
2137
2138 if (rip->ri_queue_thread == 0) {
2139 /*
2140 * Start a thread if one isn't already
2141 * running.
2142 */
2143 fmri = safe_strdup(e->rpe_inst);
2144 rip->ri_queue_thread = startd_thread_create(
2145 restarter_process_events, (void *)fmri);
2146 } else {
2147 /*
2148 * Signal the existing thread that there's
2149 * a new event.
2150 */
2151 (void) pthread_cond_broadcast(
2152 &rip->ri_queue_cv);
2153 }
2154
2155 MUTEX_UNLOCK(&rip->ri_queue_lock);
2156 nolookup:
2157 restarter_event_release(e);
2158
2159 MUTEX_LOCK(&ru->restarter_update_lock);
2160 }
2161 }
2162 }
2163
2164 static restarter_inst_t *
contract_to_inst(ctid_t ctid)2165 contract_to_inst(ctid_t ctid)
2166 {
2167 restarter_inst_t *inst;
2168 int id;
2169
2170 id = lookup_inst_by_contract(ctid);
2171 if (id == -1)
2172 return (NULL);
2173
2174 inst = inst_lookup_by_id(id);
2175 if (inst != NULL) {
2176 /*
2177 * Since ri_lock isn't held by the contract id lookup, this
2178 * instance may have been restarted and now be in a new
2179 * contract, making the old contract no longer valid for this
2180 * instance.
2181 */
2182 if (ctid != inst->ri_i.i_primary_ctid) {
2183 MUTEX_UNLOCK(&inst->ri_lock);
2184 inst = NULL;
2185 }
2186 }
2187 return (inst);
2188 }
2189
2190 /*
2191 * void contract_action()
2192 * Take action on contract events.
2193 */
2194 static void
contract_action(scf_handle_t * h,restarter_inst_t * inst,ctid_t id,uint32_t type)2195 contract_action(scf_handle_t *h, restarter_inst_t *inst, ctid_t id,
2196 uint32_t type)
2197 {
2198 const char *fmri = inst->ri_i.i_fmri;
2199
2200 assert(MUTEX_HELD(&inst->ri_lock));
2201
2202 /*
2203 * If startd has stopped this contract, there is no need to
2204 * stop it again.
2205 */
2206 if (inst->ri_i.i_primary_ctid > 0 &&
2207 inst->ri_i.i_primary_ctid_stopped)
2208 return;
2209
2210 if ((type & (CT_PR_EV_EMPTY | CT_PR_EV_CORE | CT_PR_EV_SIGNAL
2211 | CT_PR_EV_HWERR)) == 0) {
2212 /*
2213 * There shouldn't be other events, since that's not how we set
2214 * the terms. Thus, just log an error and drive on.
2215 */
2216 log_framework(LOG_NOTICE,
2217 "%s: contract %ld received unexpected critical event "
2218 "(%d)\n", fmri, id, type);
2219 return;
2220 }
2221
2222 assert(instance_in_transition(inst) == 0);
2223
2224 if (instance_is_wait_style(inst)) {
2225 /*
2226 * We ignore all events; if they impact the
2227 * process we're monitoring, then the
2228 * wait_thread will stop the instance.
2229 */
2230 log_framework(LOG_DEBUG,
2231 "%s: ignoring contract event on wait-style service\n",
2232 fmri);
2233 } else {
2234 /*
2235 * A CT_PR_EV_EMPTY event is an RSTOP_EXIT request.
2236 */
2237 switch (type) {
2238 case CT_PR_EV_EMPTY:
2239 (void) stop_instance(h, inst, RSTOP_EXIT);
2240 break;
2241 case CT_PR_EV_CORE:
2242 (void) stop_instance(h, inst, RSTOP_CORE);
2243 break;
2244 case CT_PR_EV_SIGNAL:
2245 (void) stop_instance(h, inst, RSTOP_SIGNAL);
2246 break;
2247 case CT_PR_EV_HWERR:
2248 (void) stop_instance(h, inst, RSTOP_HWERR);
2249 break;
2250 }
2251 }
2252 }
2253
2254 /*
2255 * void *restarter_contract_event_thread(void *)
2256 * Listens to the process contract bundle for critical events, taking action
2257 * on events from contracts we know we are responsible for.
2258 */
2259 /*ARGSUSED*/
2260 static void *
restarter_contracts_event_thread(void * unused)2261 restarter_contracts_event_thread(void *unused)
2262 {
2263 int fd, err;
2264 scf_handle_t *local_handle;
2265
2266 (void) pthread_setname_np(pthread_self(), "restarter_contracts_event");
2267
2268 /*
2269 * Await graph load completion. That is, stop here, until we've scanned
2270 * the repository for contract - instance associations.
2271 */
2272 MUTEX_LOCK(&st->st_load_lock);
2273 while (!(st->st_load_complete && st->st_load_instances == 0))
2274 (void) pthread_cond_wait(&st->st_load_cv, &st->st_load_lock);
2275 MUTEX_UNLOCK(&st->st_load_lock);
2276
2277 /*
2278 * This is a new thread, and thus, gets its own handle
2279 * to the repository.
2280 */
2281 if ((local_handle = libscf_handle_create_bound(SCF_VERSION)) == NULL)
2282 uu_die("Unable to bind a new repository handle: %s\n",
2283 scf_strerror(scf_error()));
2284
2285 fd = open64(CTFS_ROOT "/process/pbundle", O_RDONLY);
2286 if (fd == -1)
2287 uu_die("process bundle open failed");
2288
2289 /*
2290 * Make sure we get all events (including those generated by configd
2291 * before this thread was started).
2292 */
2293 err = ct_event_reset(fd);
2294 assert(err == 0);
2295
2296 for (;;) {
2297 int efd, sfd;
2298 ct_evthdl_t ev;
2299 uint32_t type;
2300 ctevid_t evid;
2301 ct_stathdl_t status;
2302 ctid_t ctid;
2303 restarter_inst_t *inst;
2304 uint64_t cookie;
2305
2306 if (err = ct_event_read_critical(fd, &ev)) {
2307 log_error(LOG_WARNING,
2308 "Error reading next contract event: %s",
2309 strerror(err));
2310 continue;
2311 }
2312
2313 evid = ct_event_get_evid(ev);
2314 ctid = ct_event_get_ctid(ev);
2315 type = ct_event_get_type(ev);
2316
2317 /* Fetch cookie. */
2318 if ((sfd = contract_open(ctid, "process", "status", O_RDONLY))
2319 < 0) {
2320 ct_event_free(ev);
2321 continue;
2322 }
2323
2324 if (err = ct_status_read(sfd, CTD_COMMON, &status)) {
2325 log_framework(LOG_WARNING, "Could not get status for "
2326 "contract %ld: %s\n", ctid, strerror(err));
2327
2328 startd_close(sfd);
2329 ct_event_free(ev);
2330 continue;
2331 }
2332
2333 cookie = ct_status_get_cookie(status);
2334
2335 log_framework(LOG_DEBUG, "Received event %d for ctid %ld "
2336 "cookie %lld\n", type, ctid, cookie);
2337
2338 ct_status_free(status);
2339
2340 startd_close(sfd);
2341
2342 /*
2343 * svc.configd(8) restart handling performed by the
2344 * fork_configd_thread. We don't acknowledge, as that thread
2345 * will do so.
2346 */
2347 if (cookie == CONFIGD_COOKIE) {
2348 ct_event_free(ev);
2349 continue;
2350 }
2351
2352 inst = NULL;
2353 if (storing_contract != 0 &&
2354 (inst = contract_to_inst(ctid)) == NULL) {
2355 /*
2356 * This can happen for two reasons:
2357 * - method_run() has not yet stored the
2358 * the contract into the internal hash table.
2359 * - we receive an EMPTY event for an abandoned
2360 * contract.
2361 * If there is any contract in the process of
2362 * being stored into the hash table then re-read
2363 * the event later.
2364 */
2365 log_framework(LOG_DEBUG,
2366 "Reset event %d for unknown "
2367 "contract id %ld\n", type, ctid);
2368
2369 /* don't go too fast */
2370 (void) poll(NULL, 0, 100);
2371
2372 (void) ct_event_reset(fd);
2373 ct_event_free(ev);
2374 continue;
2375 }
2376
2377 /*
2378 * Do not call contract_to_inst() again if first
2379 * call succeeded.
2380 */
2381 if (inst == NULL)
2382 inst = contract_to_inst(ctid);
2383 if (inst == NULL) {
2384 /*
2385 * This can happen if we receive an EMPTY
2386 * event for an abandoned contract.
2387 */
2388 log_framework(LOG_DEBUG,
2389 "Received event %d for unknown contract id "
2390 "%ld\n", type, ctid);
2391 } else {
2392 log_framework(LOG_DEBUG,
2393 "Received event %d for contract id "
2394 "%ld (%s)\n", type, ctid,
2395 inst->ri_i.i_fmri);
2396
2397 contract_action(local_handle, inst, ctid, type);
2398
2399 MUTEX_UNLOCK(&inst->ri_lock);
2400 }
2401
2402 efd = contract_open(ct_event_get_ctid(ev), "process", "ctl",
2403 O_WRONLY);
2404 if (efd != -1) {
2405 (void) ct_ctl_ack(efd, evid);
2406 startd_close(efd);
2407 }
2408
2409 ct_event_free(ev);
2410
2411 }
2412
2413 /*NOTREACHED*/
2414 return (NULL);
2415 }
2416
2417 /*
2418 * Timeout queue, processed by restarter_timeouts_event_thread().
2419 */
2420 timeout_queue_t *timeouts;
2421 static uu_list_pool_t *timeout_pool;
2422
2423 typedef struct timeout_update {
2424 pthread_mutex_t tu_lock;
2425 pthread_cond_t tu_cv;
2426 int tu_wakeup;
2427 } timeout_update_t;
2428
2429 timeout_update_t *tu;
2430
2431 static const char *timeout_ovr_svcs[] = {
2432 "svc:/system/manifest-import:default",
2433 "svc:/network/initial:default",
2434 "svc:/network/service:default",
2435 "svc:/system/rmtmpfiles:default",
2436 "svc:/network/loopback:default",
2437 "svc:/network/physical:default",
2438 "svc:/system/device/local:default",
2439 "svc:/system/filesystem/usr:default",
2440 "svc:/system/filesystem/minimal:default",
2441 "svc:/system/filesystem/local:default",
2442 NULL
2443 };
2444
2445 int
is_timeout_ovr(restarter_inst_t * inst)2446 is_timeout_ovr(restarter_inst_t *inst)
2447 {
2448 int i;
2449
2450 for (i = 0; timeout_ovr_svcs[i] != NULL; ++i) {
2451 if (strcmp(inst->ri_i.i_fmri, timeout_ovr_svcs[i]) == 0) {
2452 log_instance(inst, B_TRUE, "Timeout override by "
2453 "svc.startd. Using infinite timeout.");
2454 return (1);
2455 }
2456 }
2457
2458 return (0);
2459 }
2460
2461 /*ARGSUSED*/
2462 static int
timeout_compare(const void * lc_arg,const void * rc_arg,void * private)2463 timeout_compare(const void *lc_arg, const void *rc_arg, void *private)
2464 {
2465 hrtime_t t1 = ((const timeout_entry_t *)lc_arg)->te_timeout;
2466 hrtime_t t2 = ((const timeout_entry_t *)rc_arg)->te_timeout;
2467
2468 if (t1 > t2)
2469 return (1);
2470 else if (t1 < t2)
2471 return (-1);
2472 return (0);
2473 }
2474
2475 void
timeout_init()2476 timeout_init()
2477 {
2478 timeouts = startd_zalloc(sizeof (timeout_queue_t));
2479
2480 (void) pthread_mutex_init(&timeouts->tq_lock, &mutex_attrs);
2481
2482 timeout_pool = startd_list_pool_create("timeouts",
2483 sizeof (timeout_entry_t), offsetof(timeout_entry_t, te_link),
2484 timeout_compare, UU_LIST_POOL_DEBUG);
2485 assert(timeout_pool != NULL);
2486
2487 timeouts->tq_list = startd_list_create(timeout_pool,
2488 timeouts, UU_LIST_SORTED);
2489 assert(timeouts->tq_list != NULL);
2490
2491 tu = startd_zalloc(sizeof (timeout_update_t));
2492 (void) pthread_cond_init(&tu->tu_cv, NULL);
2493 (void) pthread_mutex_init(&tu->tu_lock, &mutex_attrs);
2494 }
2495
2496 void
timeout_insert(restarter_inst_t * inst,ctid_t cid,uint64_t timeout_sec)2497 timeout_insert(restarter_inst_t *inst, ctid_t cid, uint64_t timeout_sec)
2498 {
2499 hrtime_t now, timeout;
2500 timeout_entry_t *entry;
2501 uu_list_index_t idx;
2502
2503 assert(MUTEX_HELD(&inst->ri_lock));
2504
2505 now = gethrtime();
2506
2507 /*
2508 * If we overflow LLONG_MAX, we're never timing out anyways, so
2509 * just return.
2510 */
2511 if (timeout_sec >= (LLONG_MAX - now) / 1000000000LL) {
2512 log_instance(inst, B_TRUE, "timeout_seconds too large, "
2513 "treating as infinite.");
2514 return;
2515 }
2516
2517 /* hrtime is in nanoseconds. Convert timeout_sec. */
2518 timeout = now + (timeout_sec * 1000000000LL);
2519
2520 entry = startd_alloc(sizeof (timeout_entry_t));
2521 entry->te_timeout = timeout;
2522 entry->te_ctid = cid;
2523 entry->te_fmri = safe_strdup(inst->ri_i.i_fmri);
2524 entry->te_logstem = safe_strdup(inst->ri_logstem);
2525 entry->te_fired = 0;
2526 /* Insert the calculated timeout time onto the queue. */
2527 MUTEX_LOCK(&timeouts->tq_lock);
2528 (void) uu_list_find(timeouts->tq_list, entry, NULL, &idx);
2529 uu_list_node_init(entry, &entry->te_link, timeout_pool);
2530 uu_list_insert(timeouts->tq_list, entry, idx);
2531 MUTEX_UNLOCK(&timeouts->tq_lock);
2532
2533 assert(inst->ri_timeout == NULL);
2534 inst->ri_timeout = entry;
2535
2536 MUTEX_LOCK(&tu->tu_lock);
2537 tu->tu_wakeup = 1;
2538 (void) pthread_cond_broadcast(&tu->tu_cv);
2539 MUTEX_UNLOCK(&tu->tu_lock);
2540 }
2541
2542
2543 void
timeout_remove(restarter_inst_t * inst,ctid_t cid)2544 timeout_remove(restarter_inst_t *inst, ctid_t cid)
2545 {
2546 assert(MUTEX_HELD(&inst->ri_lock));
2547
2548 if (inst->ri_timeout == NULL)
2549 return;
2550
2551 assert(inst->ri_timeout->te_ctid == cid);
2552
2553 MUTEX_LOCK(&timeouts->tq_lock);
2554 uu_list_remove(timeouts->tq_list, inst->ri_timeout);
2555 MUTEX_UNLOCK(&timeouts->tq_lock);
2556
2557 free(inst->ri_timeout->te_fmri);
2558 free(inst->ri_timeout->te_logstem);
2559 startd_free(inst->ri_timeout, sizeof (timeout_entry_t));
2560 inst->ri_timeout = NULL;
2561 }
2562
2563 static int
timeout_now()2564 timeout_now()
2565 {
2566 timeout_entry_t *e;
2567 hrtime_t now;
2568 int ret;
2569
2570 now = gethrtime();
2571
2572 /*
2573 * Walk through the (sorted) timeouts list. While the timeout
2574 * at the head of the list is <= the current time, kill the
2575 * method.
2576 */
2577 MUTEX_LOCK(&timeouts->tq_lock);
2578
2579 for (e = uu_list_first(timeouts->tq_list);
2580 e != NULL && e->te_timeout <= now;
2581 e = uu_list_next(timeouts->tq_list, e)) {
2582 log_framework(LOG_WARNING, "%s: Method or service exit timed "
2583 "out. Killing contract %ld.\n", e->te_fmri, e->te_ctid);
2584 log_instance_fmri(e->te_fmri, e->te_logstem, B_TRUE,
2585 "Method or service exit timed out. Killing contract %ld.",
2586 e->te_ctid);
2587 e->te_fired = 1;
2588 (void) contract_kill(e->te_ctid, SIGKILL, e->te_fmri);
2589 }
2590
2591 if (uu_list_numnodes(timeouts->tq_list) > 0)
2592 ret = 0;
2593 else
2594 ret = -1;
2595
2596 MUTEX_UNLOCK(&timeouts->tq_lock);
2597
2598 return (ret);
2599 }
2600
2601 /*
2602 * void *restarter_timeouts_event_thread(void *)
2603 * Responsible for monitoring the method timeouts. This thread must
2604 * be started before any methods are called.
2605 */
2606 /*ARGSUSED*/
2607 static void *
restarter_timeouts_event_thread(void * unused)2608 restarter_timeouts_event_thread(void *unused)
2609 {
2610 /*
2611 * Timeouts are entered on a priority queue, which is processed by
2612 * this thread. As timeouts are specified in seconds, we'll do
2613 * the necessary processing every second, as long as the queue
2614 * is not empty.
2615 */
2616
2617 (void) pthread_setname_np(pthread_self(), "restarter_timeouts_event");
2618
2619 /*CONSTCOND*/
2620 while (1) {
2621 /*
2622 * As long as the timeout list isn't empty, process it
2623 * every second.
2624 */
2625 if (timeout_now() == 0) {
2626 (void) sleep(1);
2627 continue;
2628 }
2629
2630 /* The list is empty, wait until we have more timeouts. */
2631 MUTEX_LOCK(&tu->tu_lock);
2632
2633 while (tu->tu_wakeup == 0)
2634 (void) pthread_cond_wait(&tu->tu_cv, &tu->tu_lock);
2635
2636 tu->tu_wakeup = 0;
2637 MUTEX_UNLOCK(&tu->tu_lock);
2638 }
2639
2640 return (NULL);
2641 }
2642
2643 void
restarter_start()2644 restarter_start()
2645 {
2646 (void) startd_thread_create(restarter_timeouts_event_thread, NULL);
2647 (void) startd_thread_create(restarter_event_thread, NULL);
2648 (void) startd_thread_create(restarter_contracts_event_thread, NULL);
2649 (void) startd_thread_create(wait_thread, NULL);
2650 }
2651
2652
2653 void
restarter_init()2654 restarter_init()
2655 {
2656 restarter_instance_pool = startd_list_pool_create("restarter_instances",
2657 sizeof (restarter_inst_t), offsetof(restarter_inst_t,
2658 ri_link), restarter_instance_compare, UU_LIST_POOL_DEBUG);
2659 (void) memset(&instance_list, 0, sizeof (instance_list));
2660
2661 (void) pthread_mutex_init(&instance_list.ril_lock, &mutex_attrs);
2662 instance_list.ril_instance_list = startd_list_create(
2663 restarter_instance_pool, &instance_list, UU_LIST_SORTED);
2664
2665 restarter_queue_pool = startd_list_pool_create(
2666 "restarter_instance_queue", sizeof (restarter_instance_qentry_t),
2667 offsetof(restarter_instance_qentry_t, riq_link), NULL,
2668 UU_LIST_POOL_DEBUG);
2669
2670 contract_list_pool = startd_list_pool_create(
2671 "contract_list", sizeof (contract_entry_t),
2672 offsetof(contract_entry_t, ce_link), NULL,
2673 UU_LIST_POOL_DEBUG);
2674 contract_hash_init();
2675
2676 log_framework(LOG_DEBUG, "Initialized restarter\n");
2677 }
2678