1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright 2019 Joyent, Inc.
25 */
26
27 /*
28 * restarter.c - service manipulation
29 *
30 * This component manages services whose restarter is svc.startd, the standard
31 * restarter. It translates restarter protocol events from the graph engine
32 * into actions on processes, as a delegated restarter would do.
33 *
34 * The master restarter manages a number of always-running threads:
35 * - restarter event thread: events from the graph engine
36 * - timeout thread: thread to fire queued timeouts
37 * - contract thread: thread to handle contract events
38 * - wait thread: thread to handle wait-based services
39 *
40 * The other threads are created as-needed:
41 * - per-instance method threads
42 * - per-instance event processing threads
43 *
44 * The interaction of all threads must result in the following conditions
45 * being satisfied (on a per-instance basis):
46 * - restarter events must be processed in order
47 * - method execution must be serialized
48 * - instance delete must be held until outstanding methods are complete
49 * - contract events shouldn't be processed while a method is running
50 * - timeouts should fire even when a method is running
51 *
52 * Service instances are represented by restarter_inst_t's and are kept in the
53 * instance_list list.
54 *
55 * Service States
56 * The current state of a service instance is kept in
57 * restarter_inst_t->ri_i.i_state. If transition to a new state could take
58 * some time, then before we effect the transition we set
59 * restarter_inst_t->ri_i.i_next_state to the target state, and afterwards we
60 * rotate i_next_state to i_state and set i_next_state to
61 * RESTARTER_STATE_NONE. So usually i_next_state is _NONE when ri_lock is not
62 * held. The exception is when we launch methods, which are done with
63 * a separate thread. To keep any other threads from grabbing ri_lock before
64 * method_thread() does, we set ri_method_thread to the thread id of the
65 * method thread, and when it is nonzero any thread with a different thread id
66 * waits on ri_method_cv.
67 *
68 * Method execution is serialized by blocking on ri_method_cv in
69 * inst_lookup_by_id() and waiting for a 0 value of ri_method_thread. This
70 * also prevents the instance structure from being deleted until all
71 * outstanding operations such as method_thread() have finished.
72 *
73 * Lock ordering:
74 *
75 * dgraph_lock [can be held when taking:]
76 * utmpx_lock
77 * dictionary->dict_lock
78 * st->st_load_lock
79 * wait_info_lock
80 * ru->restarter_update_lock
81 * restarter_queue->rpeq_lock
82 * instance_list.ril_lock
83 * inst->ri_lock
84 * st->st_configd_live_lock
85 *
86 * instance_list.ril_lock
87 * graph_queue->gpeq_lock
88 * gu->gu_lock
89 * st->st_configd_live_lock
90 * dictionary->dict_lock
91 * inst->ri_lock
92 * graph_queue->gpeq_lock
93 * gu->gu_lock
94 * tu->tu_lock
95 * tq->tq_lock
96 * inst->ri_queue_lock
97 * wait_info_lock
98 * bp->cb_lock
99 * utmpx_lock
100 *
101 * single_user_thread_lock
102 * wait_info_lock
103 * utmpx_lock
104 *
105 * gu_freeze_lock
106 *
107 * logbuf_mutex nests inside pretty much everything.
108 */
109
110 #include <sys/contract/process.h>
111 #include <sys/ctfs.h>
112 #include <sys/stat.h>
113 #include <sys/time.h>
114 #include <sys/types.h>
115 #include <sys/uio.h>
116 #include <sys/wait.h>
117 #include <assert.h>
118 #include <errno.h>
119 #include <fcntl.h>
120 #include <libcontract.h>
121 #include <libcontract_priv.h>
122 #include <libintl.h>
123 #include <librestart.h>
124 #include <librestart_priv.h>
125 #include <libuutil.h>
126 #include <limits.h>
127 #include <poll.h>
128 #include <port.h>
129 #include <pthread.h>
130 #include <stdarg.h>
131 #include <stdio.h>
132 #include <strings.h>
133 #include <unistd.h>
134
135 #include "startd.h"
136 #include "protocol.h"
137
138 uu_list_pool_t *contract_list_pool;
139 static uu_list_pool_t *restarter_instance_pool;
140 static restarter_instance_list_t instance_list;
141
142 static uu_list_pool_t *restarter_queue_pool;
143
144 #define WT_SVC_ERR_THROTTLE 1 /* 1 sec delay for erroring wait svc */
145
146 /*
147 * Function used to reset the restart times for an instance, when
148 * an administrative task comes along and essentially makes the times
149 * in this array ineffective.
150 */
151 static void
reset_start_times(restarter_inst_t * inst)152 reset_start_times(restarter_inst_t *inst)
153 {
154 inst->ri_start_index = 0;
155 bzero(inst->ri_start_time, sizeof (inst->ri_start_time));
156 }
157
158 /*ARGSUSED*/
159 static int
restarter_instance_compare(const void * lc_arg,const void * rc_arg,void * private)160 restarter_instance_compare(const void *lc_arg, const void *rc_arg,
161 void *private)
162 {
163 int lc_id = ((const restarter_inst_t *)lc_arg)->ri_id;
164 int rc_id = *(int *)rc_arg;
165
166 if (lc_id > rc_id)
167 return (1);
168 if (lc_id < rc_id)
169 return (-1);
170 return (0);
171 }
172
173 static restarter_inst_t *
inst_lookup_by_name(const char * name)174 inst_lookup_by_name(const char *name)
175 {
176 int id;
177
178 id = dict_lookup_byname(name);
179 if (id == -1)
180 return (NULL);
181
182 return (inst_lookup_by_id(id));
183 }
184
185 restarter_inst_t *
inst_lookup_by_id(int id)186 inst_lookup_by_id(int id)
187 {
188 restarter_inst_t *inst;
189
190 MUTEX_LOCK(&instance_list.ril_lock);
191 inst = uu_list_find(instance_list.ril_instance_list, &id, NULL, NULL);
192 if (inst != NULL)
193 MUTEX_LOCK(&inst->ri_lock);
194 MUTEX_UNLOCK(&instance_list.ril_lock);
195
196 if (inst != NULL) {
197 while (inst->ri_method_thread != 0 &&
198 !pthread_equal(inst->ri_method_thread, pthread_self())) {
199 ++inst->ri_method_waiters;
200 (void) pthread_cond_wait(&inst->ri_method_cv,
201 &inst->ri_lock);
202 assert(inst->ri_method_waiters > 0);
203 --inst->ri_method_waiters;
204 }
205 }
206
207 return (inst);
208 }
209
210 static restarter_inst_t *
inst_lookup_queue(const char * name)211 inst_lookup_queue(const char *name)
212 {
213 int id;
214 restarter_inst_t *inst;
215
216 id = dict_lookup_byname(name);
217 if (id == -1)
218 return (NULL);
219
220 MUTEX_LOCK(&instance_list.ril_lock);
221 inst = uu_list_find(instance_list.ril_instance_list, &id, NULL, NULL);
222 if (inst != NULL)
223 MUTEX_LOCK(&inst->ri_queue_lock);
224 MUTEX_UNLOCK(&instance_list.ril_lock);
225
226 return (inst);
227 }
228
229 const char *
service_style(int flags)230 service_style(int flags)
231 {
232 switch (flags & RINST_STYLE_MASK) {
233 case RINST_CONTRACT: return ("contract");
234 case RINST_TRANSIENT: return ("transient");
235 case RINST_WAIT: return ("wait");
236
237 default:
238 #ifndef NDEBUG
239 uu_warn("%s:%d: Bad flags 0x%x.\n", __FILE__, __LINE__, flags);
240 #endif
241 abort();
242 /* NOTREACHED */
243 }
244 }
245
246 /*
247 * Fails with ECONNABORTED or ECANCELED.
248 */
249 static int
check_contract(restarter_inst_t * inst,boolean_t primary,scf_instance_t * scf_inst)250 check_contract(restarter_inst_t *inst, boolean_t primary,
251 scf_instance_t *scf_inst)
252 {
253 ctid_t *ctidp;
254 int fd, r;
255
256 ctidp = primary ? &inst->ri_i.i_primary_ctid :
257 &inst->ri_i.i_transient_ctid;
258
259 assert(*ctidp >= 1);
260
261 fd = contract_open(*ctidp, NULL, "status", O_RDONLY);
262 if (fd >= 0) {
263 r = close(fd);
264 assert(r == 0);
265 return (0);
266 }
267
268 r = restarter_remove_contract(scf_inst, *ctidp, primary ?
269 RESTARTER_CONTRACT_PRIMARY : RESTARTER_CONTRACT_TRANSIENT);
270 switch (r) {
271 case 0:
272 case ECONNABORTED:
273 case ECANCELED:
274 *ctidp = 0;
275 return (r);
276
277 case ENOMEM:
278 uu_die("Out of memory\n");
279 /* NOTREACHED */
280
281 case EPERM:
282 uu_die("Insufficient privilege.\n");
283 /* NOTREACHED */
284
285 case EACCES:
286 uu_die("Repository backend access denied.\n");
287 /* NOTREACHED */
288
289 case EROFS:
290 log_error(LOG_INFO, "Could not remove unusable contract id %ld "
291 "for %s from repository.\n", *ctidp, inst->ri_i.i_fmri);
292 return (0);
293
294 case EINVAL:
295 case EBADF:
296 default:
297 assert(0);
298 abort();
299 /* NOTREACHED */
300 }
301 }
302
303 static int stop_instance(scf_handle_t *, restarter_inst_t *, stop_cause_t);
304
305 /*
306 * int restarter_insert_inst(scf_handle_t *, char *)
307 * If the inst is already in the restarter list, return its id. If the inst
308 * is not in the restarter list, initialize a restarter_inst_t, initialize its
309 * states, insert it into the list, and return 0.
310 *
311 * Fails with
312 * ENOENT - name is not in the repository
313 */
314 static int
restarter_insert_inst(scf_handle_t * h,const char * name)315 restarter_insert_inst(scf_handle_t *h, const char *name)
316 {
317 int id, r;
318 restarter_inst_t *inst;
319 uu_list_index_t idx;
320 scf_service_t *scf_svc;
321 scf_instance_t *scf_inst;
322 scf_snapshot_t *snap = NULL;
323 scf_propertygroup_t *pg;
324 char *svc_name, *inst_name;
325 char logfilebuf[PATH_MAX];
326 char *c;
327 boolean_t do_commit_states;
328 restarter_instance_state_t state, next_state;
329 protocol_states_t *ps;
330 pid_t start_pid;
331 restarter_str_t reason = restarter_str_insert_in_graph;
332
333 MUTEX_LOCK(&instance_list.ril_lock);
334
335 /*
336 * We don't use inst_lookup_by_name() here because we want the lookup
337 * & insert to be atomic.
338 */
339 id = dict_lookup_byname(name);
340 if (id != -1) {
341 inst = uu_list_find(instance_list.ril_instance_list, &id, NULL,
342 &idx);
343 if (inst != NULL) {
344 MUTEX_UNLOCK(&instance_list.ril_lock);
345 return (0);
346 }
347 }
348
349 /* Allocate an instance */
350 inst = startd_zalloc(sizeof (restarter_inst_t));
351 inst->ri_utmpx_prefix = startd_alloc(max_scf_value_size);
352 inst->ri_utmpx_prefix[0] = '\0';
353
354 inst->ri_i.i_fmri = startd_alloc(strlen(name) + 1);
355 (void) strcpy((char *)inst->ri_i.i_fmri, name);
356
357 inst->ri_queue = startd_list_create(restarter_queue_pool, inst, 0);
358
359 /*
360 * id shouldn't be -1 since we use the same dictionary as graph.c, but
361 * just in case.
362 */
363 inst->ri_id = (id != -1 ? id : dict_insert(name));
364
365 special_online_hooks_get(name, &inst->ri_pre_online_hook,
366 &inst->ri_post_online_hook, &inst->ri_post_offline_hook);
367
368 scf_svc = safe_scf_service_create(h);
369 scf_inst = safe_scf_instance_create(h);
370 pg = safe_scf_pg_create(h);
371 svc_name = startd_alloc(max_scf_name_size);
372 inst_name = startd_alloc(max_scf_name_size);
373
374 rep_retry:
375 if (snap != NULL)
376 scf_snapshot_destroy(snap);
377 if (inst->ri_logstem != NULL)
378 startd_free(inst->ri_logstem, PATH_MAX);
379 if (inst->ri_common_name != NULL)
380 free(inst->ri_common_name);
381 if (inst->ri_C_common_name != NULL)
382 free(inst->ri_C_common_name);
383 snap = NULL;
384 inst->ri_logstem = NULL;
385 inst->ri_common_name = NULL;
386 inst->ri_C_common_name = NULL;
387
388 if (scf_handle_decode_fmri(h, name, NULL, scf_svc, scf_inst, NULL,
389 NULL, SCF_DECODE_FMRI_EXACT) != 0) {
390 switch (scf_error()) {
391 case SCF_ERROR_CONNECTION_BROKEN:
392 libscf_handle_rebind(h);
393 goto rep_retry;
394
395 case SCF_ERROR_NOT_FOUND:
396 goto deleted;
397 }
398
399 uu_die("Can't decode FMRI %s: %s\n", name,
400 scf_strerror(scf_error()));
401 }
402
403 /*
404 * If there's no running snapshot, then we execute using the editing
405 * snapshot. Pending snapshots will be taken later.
406 */
407 snap = libscf_get_running_snapshot(scf_inst);
408
409 if ((scf_service_get_name(scf_svc, svc_name, max_scf_name_size) < 0) ||
410 (scf_instance_get_name(scf_inst, inst_name, max_scf_name_size) <
411 0)) {
412 switch (scf_error()) {
413 case SCF_ERROR_NOT_SET:
414 break;
415
416 case SCF_ERROR_CONNECTION_BROKEN:
417 libscf_handle_rebind(h);
418 goto rep_retry;
419
420 default:
421 assert(0);
422 abort();
423 }
424
425 goto deleted;
426 }
427
428 (void) snprintf(logfilebuf, PATH_MAX, "%s:%s", svc_name, inst_name);
429 for (c = logfilebuf; *c != '\0'; c++)
430 if (*c == '/')
431 *c = '-';
432
433 inst->ri_logstem = startd_alloc(PATH_MAX);
434 (void) snprintf(inst->ri_logstem, PATH_MAX, "%s%s", logfilebuf,
435 LOG_SUFFIX);
436
437 /*
438 * If the restarter group is missing, use uninit/none. Otherwise,
439 * we're probably being restarted & don't want to mess up the states
440 * that are there.
441 */
442 state = RESTARTER_STATE_UNINIT;
443 next_state = RESTARTER_STATE_NONE;
444
445 r = scf_instance_get_pg(scf_inst, SCF_PG_RESTARTER, pg);
446 if (r != 0) {
447 switch (scf_error()) {
448 case SCF_ERROR_CONNECTION_BROKEN:
449 libscf_handle_rebind(h);
450 goto rep_retry;
451
452 case SCF_ERROR_NOT_SET:
453 goto deleted;
454
455 case SCF_ERROR_NOT_FOUND:
456 /*
457 * This shouldn't happen since the graph engine should
458 * have initialized the state to uninitialized/none if
459 * there was no restarter pg. In case somebody
460 * deleted it, though....
461 */
462 do_commit_states = B_TRUE;
463 break;
464
465 default:
466 assert(0);
467 abort();
468 }
469 } else {
470 r = libscf_read_states(pg, &state, &next_state);
471 if (r != 0) {
472 do_commit_states = B_TRUE;
473 } else {
474 if (next_state != RESTARTER_STATE_NONE) {
475 /*
476 * Force next_state to _NONE since we
477 * don't look for method processes.
478 */
479 next_state = RESTARTER_STATE_NONE;
480 do_commit_states = B_TRUE;
481 } else {
482 /*
483 * The reason for transition will depend on
484 * state.
485 */
486 if (st->st_initial == 0)
487 reason = restarter_str_startd_restart;
488 else if (state == RESTARTER_STATE_MAINT)
489 reason = restarter_str_bad_repo_state;
490 /*
491 * Inform the restarter of our state without
492 * changing the STIME in the repository.
493 */
494 ps = startd_alloc(sizeof (*ps));
495 inst->ri_i.i_state = ps->ps_state = state;
496 inst->ri_i.i_next_state = ps->ps_state_next =
497 next_state;
498 ps->ps_reason = reason;
499
500 graph_protocol_send_event(inst->ri_i.i_fmri,
501 GRAPH_UPDATE_STATE_CHANGE, ps);
502
503 do_commit_states = B_FALSE;
504 }
505 }
506 }
507
508 switch (libscf_get_startd_properties(scf_inst, snap, &inst->ri_flags,
509 &inst->ri_utmpx_prefix)) {
510 case 0:
511 break;
512
513 case ECONNABORTED:
514 libscf_handle_rebind(h);
515 goto rep_retry;
516
517 case ECANCELED:
518 goto deleted;
519
520 case ENOENT:
521 /*
522 * This is odd, because the graph engine should have required
523 * the general property group. So we'll just use default
524 * flags in anticipation of the graph engine sending us
525 * REMOVE_INSTANCE when it finds out that the general property
526 * group has been deleted.
527 */
528 inst->ri_flags = RINST_CONTRACT;
529 break;
530
531 default:
532 assert(0);
533 abort();
534 }
535
536 r = libscf_get_template_values(scf_inst, snap,
537 &inst->ri_common_name, &inst->ri_C_common_name);
538
539 /*
540 * Copy our names to smaller buffers to reduce our memory footprint.
541 */
542 if (inst->ri_common_name != NULL) {
543 char *tmp = safe_strdup(inst->ri_common_name);
544 startd_free(inst->ri_common_name, max_scf_value_size);
545 inst->ri_common_name = tmp;
546 }
547
548 if (inst->ri_C_common_name != NULL) {
549 char *tmp = safe_strdup(inst->ri_C_common_name);
550 startd_free(inst->ri_C_common_name, max_scf_value_size);
551 inst->ri_C_common_name = tmp;
552 }
553
554 switch (r) {
555 case 0:
556 break;
557
558 case ECONNABORTED:
559 libscf_handle_rebind(h);
560 goto rep_retry;
561
562 case ECANCELED:
563 goto deleted;
564
565 case ECHILD:
566 case ENOENT:
567 break;
568
569 default:
570 assert(0);
571 abort();
572 }
573
574 switch (libscf_read_method_ids(h, scf_inst, inst->ri_i.i_fmri,
575 &inst->ri_i.i_primary_ctid, &inst->ri_i.i_transient_ctid,
576 &start_pid)) {
577 case 0:
578 break;
579
580 case ECONNABORTED:
581 libscf_handle_rebind(h);
582 goto rep_retry;
583
584 case ECANCELED:
585 goto deleted;
586
587 default:
588 assert(0);
589 abort();
590 }
591
592 if (inst->ri_i.i_primary_ctid >= 1) {
593 contract_hash_store(inst->ri_i.i_primary_ctid, inst->ri_id);
594
595 switch (check_contract(inst, B_TRUE, scf_inst)) {
596 case 0:
597 break;
598
599 case ECONNABORTED:
600 libscf_handle_rebind(h);
601 goto rep_retry;
602
603 case ECANCELED:
604 goto deleted;
605
606 default:
607 assert(0);
608 abort();
609 }
610 }
611
612 if (inst->ri_i.i_transient_ctid >= 1) {
613 switch (check_contract(inst, B_FALSE, scf_inst)) {
614 case 0:
615 break;
616
617 case ECONNABORTED:
618 libscf_handle_rebind(h);
619 goto rep_retry;
620
621 case ECANCELED:
622 goto deleted;
623
624 default:
625 assert(0);
626 abort();
627 }
628 }
629
630 /* No more failures we live through, so add it to the list. */
631 (void) pthread_mutex_init(&inst->ri_lock, &mutex_attrs);
632 (void) pthread_mutex_init(&inst->ri_queue_lock, &mutex_attrs);
633 MUTEX_LOCK(&inst->ri_lock);
634 MUTEX_LOCK(&inst->ri_queue_lock);
635
636 (void) pthread_cond_init(&inst->ri_method_cv, NULL);
637
638 uu_list_node_init(inst, &inst->ri_link, restarter_instance_pool);
639 uu_list_insert(instance_list.ril_instance_list, inst, idx);
640 MUTEX_UNLOCK(&instance_list.ril_lock);
641
642 if (start_pid != -1 &&
643 (inst->ri_flags & RINST_STYLE_MASK) == RINST_WAIT) {
644 int ret;
645 ret = wait_register(start_pid, inst->ri_i.i_fmri, 0, 1);
646 if (ret == -1) {
647 /*
648 * Implication: if we can't reregister the
649 * instance, we will start another one. Two
650 * instances may or may not result in a resource
651 * conflict.
652 */
653 log_error(LOG_WARNING,
654 "%s: couldn't reregister %ld for wait\n",
655 inst->ri_i.i_fmri, start_pid);
656 } else if (ret == 1) {
657 /*
658 * Leading PID has exited.
659 */
660 (void) stop_instance(h, inst, RSTOP_EXIT);
661 }
662 }
663
664
665 scf_pg_destroy(pg);
666
667 if (do_commit_states)
668 (void) restarter_instance_update_states(h, inst, state,
669 next_state, RERR_NONE, reason);
670
671 log_framework(LOG_DEBUG, "%s is a %s-style service\n", name,
672 service_style(inst->ri_flags));
673
674 MUTEX_UNLOCK(&inst->ri_queue_lock);
675 MUTEX_UNLOCK(&inst->ri_lock);
676
677 startd_free(svc_name, max_scf_name_size);
678 startd_free(inst_name, max_scf_name_size);
679 scf_snapshot_destroy(snap);
680 scf_instance_destroy(scf_inst);
681 scf_service_destroy(scf_svc);
682
683 log_framework(LOG_DEBUG, "%s: inserted instance into restarter list\n",
684 name);
685
686 return (0);
687
688 deleted:
689 MUTEX_UNLOCK(&instance_list.ril_lock);
690 startd_free(inst_name, max_scf_name_size);
691 startd_free(svc_name, max_scf_name_size);
692 if (snap != NULL)
693 scf_snapshot_destroy(snap);
694 scf_pg_destroy(pg);
695 scf_instance_destroy(scf_inst);
696 scf_service_destroy(scf_svc);
697 startd_free((void *)inst->ri_i.i_fmri, strlen(inst->ri_i.i_fmri) + 1);
698 uu_list_destroy(inst->ri_queue);
699 if (inst->ri_logstem != NULL)
700 startd_free(inst->ri_logstem, PATH_MAX);
701 if (inst->ri_common_name != NULL)
702 free(inst->ri_common_name);
703 if (inst->ri_C_common_name != NULL)
704 free(inst->ri_C_common_name);
705 startd_free(inst->ri_utmpx_prefix, max_scf_value_size);
706 startd_free(inst, sizeof (restarter_inst_t));
707 return (ENOENT);
708 }
709
710 static void
restarter_delete_inst(restarter_inst_t * ri)711 restarter_delete_inst(restarter_inst_t *ri)
712 {
713 int id;
714 restarter_inst_t *rip;
715 void *cookie = NULL;
716 restarter_instance_qentry_t *e;
717
718 assert(MUTEX_HELD(&ri->ri_lock));
719
720 /*
721 * Must drop the instance lock so we can pick up the instance_list
722 * lock & remove the instance.
723 */
724 id = ri->ri_id;
725 MUTEX_UNLOCK(&ri->ri_lock);
726
727 MUTEX_LOCK(&instance_list.ril_lock);
728
729 rip = uu_list_find(instance_list.ril_instance_list, &id, NULL, NULL);
730 if (rip == NULL) {
731 MUTEX_UNLOCK(&instance_list.ril_lock);
732 return;
733 }
734
735 assert(ri == rip);
736
737 uu_list_remove(instance_list.ril_instance_list, ri);
738
739 log_framework(LOG_DEBUG, "%s: deleted instance from restarter list\n",
740 ri->ri_i.i_fmri);
741
742 MUTEX_UNLOCK(&instance_list.ril_lock);
743
744 /*
745 * We can lock the instance without holding the instance_list lock
746 * since we removed the instance from the list.
747 */
748 MUTEX_LOCK(&ri->ri_lock);
749 MUTEX_LOCK(&ri->ri_queue_lock);
750
751 if (ri->ri_i.i_primary_ctid >= 1)
752 contract_hash_remove(ri->ri_i.i_primary_ctid);
753
754 while (ri->ri_method_thread != 0 || ri->ri_method_waiters > 0)
755 (void) pthread_cond_wait(&ri->ri_method_cv, &ri->ri_lock);
756
757 while ((e = uu_list_teardown(ri->ri_queue, &cookie)) != NULL)
758 startd_free(e, sizeof (*e));
759 uu_list_destroy(ri->ri_queue);
760
761 startd_free((void *)ri->ri_i.i_fmri, strlen(ri->ri_i.i_fmri) + 1);
762 startd_free(ri->ri_logstem, PATH_MAX);
763 if (ri->ri_common_name != NULL)
764 free(ri->ri_common_name);
765 if (ri->ri_C_common_name != NULL)
766 free(ri->ri_C_common_name);
767 startd_free(ri->ri_utmpx_prefix, max_scf_value_size);
768 (void) pthread_mutex_destroy(&ri->ri_lock);
769 (void) pthread_mutex_destroy(&ri->ri_queue_lock);
770 startd_free(ri, sizeof (restarter_inst_t));
771 }
772
773 /*
774 * instance_is_wait_style()
775 *
776 * Returns 1 if the given instance is a "wait-style" service instance.
777 */
778 int
instance_is_wait_style(restarter_inst_t * inst)779 instance_is_wait_style(restarter_inst_t *inst)
780 {
781 assert(MUTEX_HELD(&inst->ri_lock));
782 return ((inst->ri_flags & RINST_STYLE_MASK) == RINST_WAIT);
783 }
784
785 /*
786 * instance_is_transient_style()
787 *
788 * Returns 1 if the given instance is a transient service instance.
789 */
790 int
instance_is_transient_style(restarter_inst_t * inst)791 instance_is_transient_style(restarter_inst_t *inst)
792 {
793 assert(MUTEX_HELD(&inst->ri_lock));
794 return ((inst->ri_flags & RINST_STYLE_MASK) == RINST_TRANSIENT);
795 }
796
797 /*
798 * instance_in_transition()
799 * Returns 1 if instance is in transition, 0 if not
800 */
801 int
instance_in_transition(restarter_inst_t * inst)802 instance_in_transition(restarter_inst_t *inst)
803 {
804 assert(MUTEX_HELD(&inst->ri_lock));
805 if (inst->ri_i.i_next_state == RESTARTER_STATE_NONE)
806 return (0);
807 return (1);
808 }
809
810 /*
811 * returns 1 if instance is already started, 0 if not
812 */
813 static int
instance_started(restarter_inst_t * inst)814 instance_started(restarter_inst_t *inst)
815 {
816 int ret;
817
818 assert(MUTEX_HELD(&inst->ri_lock));
819
820 if (inst->ri_i.i_state == RESTARTER_STATE_ONLINE ||
821 inst->ri_i.i_state == RESTARTER_STATE_DEGRADED)
822 ret = 1;
823 else
824 ret = 0;
825
826 return (ret);
827 }
828
829 /*
830 * Returns
831 * 0 - success
832 * ECONNRESET - success, but h was rebound
833 */
834 int
restarter_instance_update_states(scf_handle_t * h,restarter_inst_t * ri,restarter_instance_state_t new_state,restarter_instance_state_t new_state_next,restarter_error_t err,restarter_str_t reason)835 restarter_instance_update_states(scf_handle_t *h, restarter_inst_t *ri,
836 restarter_instance_state_t new_state,
837 restarter_instance_state_t new_state_next, restarter_error_t err,
838 restarter_str_t reason)
839 {
840 protocol_states_t *states;
841 int e;
842 uint_t retry_count = 0, msecs = ALLOC_DELAY;
843 boolean_t rebound = B_FALSE;
844 int prev_state_online;
845 int state_online;
846
847 assert(MUTEX_HELD(&ri->ri_lock));
848
849 prev_state_online = instance_started(ri);
850
851 retry:
852 e = _restarter_commit_states(h, &ri->ri_i, new_state, new_state_next,
853 restarter_get_str_short(reason));
854 switch (e) {
855 case 0:
856 break;
857
858 case ENOMEM:
859 ++retry_count;
860 if (retry_count < ALLOC_RETRY) {
861 (void) poll(NULL, 0, msecs);
862 msecs *= ALLOC_DELAY_MULT;
863 goto retry;
864 }
865
866 /* Like startd_alloc(). */
867 uu_die("Insufficient memory.\n");
868 /* NOTREACHED */
869
870 case ECONNABORTED:
871 libscf_handle_rebind(h);
872 rebound = B_TRUE;
873 goto retry;
874
875 case EPERM:
876 case EACCES:
877 case EROFS:
878 log_error(LOG_NOTICE, "Could not commit state change for %s "
879 "to repository: %s.\n", ri->ri_i.i_fmri, strerror(e));
880 /* FALLTHROUGH */
881
882 case ENOENT:
883 ri->ri_i.i_state = new_state;
884 ri->ri_i.i_next_state = new_state_next;
885 break;
886
887 case EINVAL:
888 default:
889 bad_error("_restarter_commit_states", e);
890 }
891
892 states = startd_alloc(sizeof (protocol_states_t));
893 states->ps_state = new_state;
894 states->ps_state_next = new_state_next;
895 states->ps_err = err;
896 states->ps_reason = reason;
897 graph_protocol_send_event(ri->ri_i.i_fmri, GRAPH_UPDATE_STATE_CHANGE,
898 (void *)states);
899
900 state_online = instance_started(ri);
901
902 if (prev_state_online && !state_online)
903 ri->ri_post_offline_hook();
904 else if (!prev_state_online && state_online)
905 ri->ri_post_online_hook();
906
907 return (rebound ? ECONNRESET : 0);
908 }
909
910 void
restarter_mark_pending_snapshot(const char * fmri,uint_t flag)911 restarter_mark_pending_snapshot(const char *fmri, uint_t flag)
912 {
913 restarter_inst_t *inst;
914
915 assert(flag == RINST_RETAKE_RUNNING || flag == RINST_RETAKE_START);
916
917 inst = inst_lookup_by_name(fmri);
918 if (inst == NULL)
919 return;
920
921 inst->ri_flags |= flag;
922
923 MUTEX_UNLOCK(&inst->ri_lock);
924 }
925
926 static void
restarter_take_pending_snapshots(scf_handle_t * h)927 restarter_take_pending_snapshots(scf_handle_t *h)
928 {
929 restarter_inst_t *inst;
930 int r;
931
932 MUTEX_LOCK(&instance_list.ril_lock);
933
934 for (inst = uu_list_first(instance_list.ril_instance_list);
935 inst != NULL;
936 inst = uu_list_next(instance_list.ril_instance_list, inst)) {
937 const char *fmri;
938 scf_instance_t *sinst = NULL;
939
940 MUTEX_LOCK(&inst->ri_lock);
941
942 /*
943 * This is where we'd check inst->ri_method_thread and if it
944 * were nonzero we'd wait in anticipation of another thread
945 * executing a method for inst. Doing so with the instance_list
946 * locked, though, leads to deadlock. Since taking a snapshot
947 * during that window won't hurt anything, we'll just continue.
948 */
949
950 fmri = inst->ri_i.i_fmri;
951
952 if (inst->ri_flags & RINST_RETAKE_RUNNING) {
953 scf_snapshot_t *rsnap;
954
955 (void) libscf_fmri_get_instance(h, fmri, &sinst);
956
957 rsnap = libscf_get_or_make_running_snapshot(sinst,
958 fmri, B_FALSE);
959
960 scf_instance_destroy(sinst);
961
962 if (rsnap != NULL)
963 inst->ri_flags &= ~RINST_RETAKE_RUNNING;
964
965 scf_snapshot_destroy(rsnap);
966 }
967
968 if (inst->ri_flags & RINST_RETAKE_START) {
969 switch (r = libscf_snapshots_poststart(h, fmri,
970 B_FALSE)) {
971 case 0:
972 case ENOENT:
973 inst->ri_flags &= ~RINST_RETAKE_START;
974 break;
975
976 case ECONNABORTED:
977 break;
978
979 case EACCES:
980 default:
981 bad_error("libscf_snapshots_poststart", r);
982 }
983 }
984
985 MUTEX_UNLOCK(&inst->ri_lock);
986 }
987
988 MUTEX_UNLOCK(&instance_list.ril_lock);
989 }
990
991 /* ARGSUSED */
992 void *
restarter_post_fsminimal_thread(void * unused)993 restarter_post_fsminimal_thread(void *unused)
994 {
995 scf_handle_t *h;
996 int r;
997
998 (void) pthread_setname_np(pthread_self(), "restarter_post_fsmin");
999
1000 h = libscf_handle_create_bound_loop();
1001
1002 for (;;) {
1003 r = libscf_create_self(h);
1004 if (r == 0)
1005 break;
1006
1007 assert(r == ECONNABORTED);
1008 libscf_handle_rebind(h);
1009 }
1010
1011 restarter_take_pending_snapshots(h);
1012
1013 (void) scf_handle_unbind(h);
1014 scf_handle_destroy(h);
1015
1016 return (NULL);
1017 }
1018
1019 /*
1020 * int stop_instance()
1021 *
1022 * Stop the instance identified by the instance given as the second argument,
1023 * for the cause stated.
1024 *
1025 * Returns
1026 * 0 - success
1027 * -1 - inst is in transition
1028 */
1029 static int
stop_instance(scf_handle_t * local_handle,restarter_inst_t * inst,stop_cause_t cause)1030 stop_instance(scf_handle_t *local_handle, restarter_inst_t *inst,
1031 stop_cause_t cause)
1032 {
1033 fork_info_t *info;
1034 const char *cp;
1035 int err;
1036 restarter_error_t re;
1037 restarter_str_t reason;
1038 restarter_instance_state_t new_state;
1039
1040 assert(MUTEX_HELD(&inst->ri_lock));
1041 assert(inst->ri_method_thread == 0);
1042
1043 switch (cause) {
1044 case RSTOP_EXIT:
1045 re = RERR_RESTART;
1046 reason = restarter_str_ct_ev_exit;
1047 cp = "all processes in service exited";
1048 break;
1049 case RSTOP_ERR_CFG:
1050 re = RERR_FAULT;
1051 reason = restarter_str_method_failed;
1052 cp = "service exited with a configuration error";
1053 break;
1054 case RSTOP_ERR_EXIT:
1055 re = RERR_RESTART;
1056 reason = restarter_str_ct_ev_exit;
1057 cp = "service exited with an error";
1058 break;
1059 case RSTOP_CORE:
1060 re = RERR_FAULT;
1061 reason = restarter_str_ct_ev_core;
1062 cp = "process dumped core";
1063 break;
1064 case RSTOP_SIGNAL:
1065 re = RERR_FAULT;
1066 reason = restarter_str_ct_ev_signal;
1067 cp = "process received fatal signal from outside the service";
1068 break;
1069 case RSTOP_HWERR:
1070 re = RERR_FAULT;
1071 reason = restarter_str_ct_ev_hwerr;
1072 cp = "process killed due to uncorrectable hardware error";
1073 break;
1074 case RSTOP_DEPENDENCY:
1075 re = RERR_RESTART;
1076 reason = restarter_str_dependency_activity;
1077 cp = "dependency activity requires stop";
1078 break;
1079 case RSTOP_DISABLE:
1080 re = RERR_RESTART;
1081 reason = restarter_str_disable_request;
1082 cp = "service disabled";
1083 break;
1084 case RSTOP_RESTART:
1085 re = RERR_RESTART;
1086 reason = restarter_str_restart_request;
1087 cp = "service restarting";
1088 break;
1089 default:
1090 #ifndef NDEBUG
1091 (void) fprintf(stderr, "Unknown cause %d at %s:%d.\n",
1092 cause, __FILE__, __LINE__);
1093 #endif
1094 abort();
1095 }
1096
1097 /* Services in the disabled and maintenance state are ignored */
1098 if (inst->ri_i.i_state == RESTARTER_STATE_MAINT ||
1099 inst->ri_i.i_state == RESTARTER_STATE_DISABLED) {
1100 log_framework(LOG_DEBUG,
1101 "%s: stop_instance -> is maint/disabled\n",
1102 inst->ri_i.i_fmri);
1103 return (0);
1104 }
1105
1106 /* Already stopped instances are left alone */
1107 if (instance_started(inst) == 0) {
1108 log_framework(LOG_DEBUG, "Restarter: %s is already stopped.\n",
1109 inst->ri_i.i_fmri);
1110 return (0);
1111 }
1112
1113 if (instance_in_transition(inst)) {
1114 /* requeue event by returning -1 */
1115 log_framework(LOG_DEBUG,
1116 "Restarter: Not stopping %s, in transition.\n",
1117 inst->ri_i.i_fmri);
1118 return (-1);
1119 }
1120
1121 log_instance(inst, B_TRUE, "Stopping because %s.", cp);
1122
1123 log_framework(re == RERR_FAULT ? LOG_INFO : LOG_DEBUG,
1124 "%s: Instance stopping because %s.\n", inst->ri_i.i_fmri, cp);
1125
1126 if (instance_is_wait_style(inst) &&
1127 (cause == RSTOP_EXIT ||
1128 cause == RSTOP_ERR_CFG ||
1129 cause == RSTOP_ERR_EXIT)) {
1130 /*
1131 * No need to stop instance, as child has exited; remove
1132 * contract and move the instance to the offline state.
1133 */
1134 switch (err = restarter_instance_update_states(local_handle,
1135 inst, inst->ri_i.i_state, RESTARTER_STATE_OFFLINE, re,
1136 reason)) {
1137 case 0:
1138 case ECONNRESET:
1139 break;
1140
1141 default:
1142 bad_error("restarter_instance_update_states", err);
1143 }
1144
1145 if (cause == RSTOP_ERR_EXIT) {
1146 /*
1147 * The RSTOP_ERR_EXIT cause is set via the
1148 * wait_thread -> wait_remove code path when we have
1149 * a "wait" style svc that exited with an error. If
1150 * the svc is failing too quickly, we throttle it so
1151 * that we don't restart it more than once/second.
1152 * Since we know we're running in the wait thread its
1153 * ok to throttle it right here.
1154 */
1155 (void) update_fault_count(inst, FAULT_COUNT_INCR);
1156 if (method_rate_critical(inst)) {
1157 log_instance(inst, B_TRUE, "Failing too "
1158 "quickly, throttling.");
1159 (void) sleep(WT_SVC_ERR_THROTTLE);
1160 }
1161 } else {
1162 (void) update_fault_count(inst, FAULT_COUNT_RESET);
1163 reset_start_times(inst);
1164 }
1165
1166 if (inst->ri_i.i_primary_ctid != 0) {
1167 inst->ri_m_inst =
1168 safe_scf_instance_create(local_handle);
1169 inst->ri_mi_deleted = B_FALSE;
1170
1171 libscf_reget_instance(inst);
1172 method_remove_contract(inst, B_TRUE, B_TRUE);
1173
1174 scf_instance_destroy(inst->ri_m_inst);
1175 inst->ri_m_inst = NULL;
1176 }
1177
1178 switch (err = restarter_instance_update_states(local_handle,
1179 inst, inst->ri_i.i_next_state, RESTARTER_STATE_NONE, re,
1180 reason)) {
1181 case 0:
1182 case ECONNRESET:
1183 break;
1184
1185 default:
1186 bad_error("restarter_instance_update_states", err);
1187 }
1188
1189 if (cause != RSTOP_ERR_CFG)
1190 return (0);
1191 } else if (instance_is_wait_style(inst) && re == RERR_RESTART) {
1192 /*
1193 * Stopping a wait service through means other than the pid
1194 * exiting should keep wait_thread() from restarting the
1195 * service, by removing it from the wait list.
1196 * We cannot remove it right now otherwise the process will
1197 * end up <defunct> so mark it to be ignored.
1198 */
1199 wait_ignore_by_fmri(inst->ri_i.i_fmri);
1200 }
1201
1202 /*
1203 * There are some configuration errors which we cannot detect until we
1204 * try to run the method. For example, see exec_method() where the
1205 * restarter_set_method_context() call can return SMF_EXIT_ERR_CONFIG
1206 * in several cases. If this happens for a "wait-style" svc,
1207 * wait_remove() sets the cause as RSTOP_ERR_CFG so that we can detect
1208 * the configuration error and go into maintenance, even though it is
1209 * a "wait-style" svc.
1210 */
1211 if (cause == RSTOP_ERR_CFG)
1212 new_state = RESTARTER_STATE_MAINT;
1213 else
1214 new_state = inst->ri_i.i_enabled ?
1215 RESTARTER_STATE_OFFLINE : RESTARTER_STATE_DISABLED;
1216
1217 switch (err = restarter_instance_update_states(local_handle, inst,
1218 inst->ri_i.i_state, new_state, RERR_NONE, reason)) {
1219 case 0:
1220 case ECONNRESET:
1221 break;
1222
1223 default:
1224 bad_error("restarter_instance_update_states", err);
1225 }
1226
1227 info = startd_zalloc(sizeof (fork_info_t));
1228
1229 info->sf_id = inst->ri_id;
1230 info->sf_method_type = METHOD_STOP;
1231 info->sf_event_type = re;
1232 info->sf_reason = reason;
1233 inst->ri_method_thread = startd_thread_create(method_thread, info);
1234
1235 return (0);
1236 }
1237
1238 /*
1239 * Returns
1240 * ENOENT - fmri is not in instance_list
1241 * 0 - success
1242 * ECONNRESET - success, though handle was rebound
1243 * -1 - instance is in transition
1244 */
1245 int
stop_instance_fmri(scf_handle_t * h,const char * fmri,uint_t flags)1246 stop_instance_fmri(scf_handle_t *h, const char *fmri, uint_t flags)
1247 {
1248 restarter_inst_t *rip;
1249 int r;
1250
1251 rip = inst_lookup_by_name(fmri);
1252 if (rip == NULL)
1253 return (ENOENT);
1254
1255 r = stop_instance(h, rip, flags);
1256
1257 MUTEX_UNLOCK(&rip->ri_lock);
1258
1259 return (r);
1260 }
1261
1262 static void
unmaintain_instance(scf_handle_t * h,restarter_inst_t * rip,unmaint_cause_t cause)1263 unmaintain_instance(scf_handle_t *h, restarter_inst_t *rip,
1264 unmaint_cause_t cause)
1265 {
1266 ctid_t ctid;
1267 scf_instance_t *inst;
1268 int r;
1269 uint_t tries = 0, msecs = ALLOC_DELAY;
1270 const char *cp;
1271 restarter_str_t reason;
1272
1273 assert(MUTEX_HELD(&rip->ri_lock));
1274
1275 if (rip->ri_i.i_state != RESTARTER_STATE_MAINT) {
1276 log_error(LOG_DEBUG, "Restarter: "
1277 "Ignoring maintenance off command because %s is not in the "
1278 "maintenance state.\n", rip->ri_i.i_fmri);
1279 return;
1280 }
1281
1282 switch (cause) {
1283 case RUNMAINT_CLEAR:
1284 cp = "clear requested";
1285 reason = restarter_str_clear_request;
1286 break;
1287 case RUNMAINT_DISABLE:
1288 cp = "disable requested";
1289 reason = restarter_str_disable_request;
1290 break;
1291 default:
1292 #ifndef NDEBUG
1293 (void) fprintf(stderr, "Uncaught case for %d at %s:%d.\n",
1294 cause, __FILE__, __LINE__);
1295 #endif
1296 abort();
1297 }
1298
1299 log_instance(rip, B_TRUE, "Leaving maintenance because %s.",
1300 cp);
1301 log_framework(LOG_DEBUG, "%s: Instance leaving maintenance because "
1302 "%s.\n", rip->ri_i.i_fmri, cp);
1303
1304 (void) restarter_instance_update_states(h, rip, RESTARTER_STATE_UNINIT,
1305 RESTARTER_STATE_NONE, RERR_RESTART, reason);
1306
1307 /*
1308 * If we did ADMIN_MAINT_ON_IMMEDIATE, then there might still be
1309 * a primary contract.
1310 */
1311 if (rip->ri_i.i_primary_ctid == 0)
1312 return;
1313
1314 ctid = rip->ri_i.i_primary_ctid;
1315 contract_abandon(ctid);
1316 rip->ri_i.i_primary_ctid = 0;
1317
1318 rep_retry:
1319 switch (r = libscf_fmri_get_instance(h, rip->ri_i.i_fmri, &inst)) {
1320 case 0:
1321 break;
1322
1323 case ECONNABORTED:
1324 libscf_handle_rebind(h);
1325 goto rep_retry;
1326
1327 case ENOENT:
1328 /* Must have been deleted. */
1329 return;
1330
1331 case EINVAL:
1332 case ENOTSUP:
1333 default:
1334 bad_error("libscf_handle_rebind", r);
1335 }
1336
1337 again:
1338 r = restarter_remove_contract(inst, ctid, RESTARTER_CONTRACT_PRIMARY);
1339 switch (r) {
1340 case 0:
1341 break;
1342
1343 case ENOMEM:
1344 ++tries;
1345 if (tries < ALLOC_RETRY) {
1346 (void) poll(NULL, 0, msecs);
1347 msecs *= ALLOC_DELAY_MULT;
1348 goto again;
1349 }
1350
1351 uu_die("Insufficient memory.\n");
1352 /* NOTREACHED */
1353
1354 case ECONNABORTED:
1355 scf_instance_destroy(inst);
1356 libscf_handle_rebind(h);
1357 goto rep_retry;
1358
1359 case ECANCELED:
1360 break;
1361
1362 case EPERM:
1363 case EACCES:
1364 case EROFS:
1365 log_error(LOG_INFO,
1366 "Could not remove contract id %lu for %s (%s).\n", ctid,
1367 rip->ri_i.i_fmri, strerror(r));
1368 break;
1369
1370 case EINVAL:
1371 case EBADF:
1372 default:
1373 bad_error("restarter_remove_contract", r);
1374 }
1375
1376 scf_instance_destroy(inst);
1377 }
1378
1379 /*
1380 * enable_inst()
1381 * Set inst->ri_i.i_enabled. Expects 'e' to be _ENABLE, _DISABLE, or
1382 * _ADMIN_DISABLE. If the event is _ENABLE and inst is uninitialized or
1383 * disabled, move it to offline. If the event is _DISABLE or
1384 * _ADMIN_DISABLE, make sure inst will move to disabled.
1385 *
1386 * Returns
1387 * 0 - success
1388 * ECONNRESET - h was rebound
1389 */
1390 static int
enable_inst(scf_handle_t * h,restarter_inst_t * inst,restarter_instance_qentry_t * riq)1391 enable_inst(scf_handle_t *h, restarter_inst_t *inst,
1392 restarter_instance_qentry_t *riq)
1393 {
1394 restarter_instance_state_t state;
1395 restarter_event_type_t e = riq->riq_type;
1396 restarter_str_t reason = restarter_str_per_configuration;
1397 int r;
1398
1399 assert(MUTEX_HELD(&inst->ri_lock));
1400 assert(e == RESTARTER_EVENT_TYPE_ADMIN_DISABLE ||
1401 e == RESTARTER_EVENT_TYPE_DISABLE ||
1402 e == RESTARTER_EVENT_TYPE_ENABLE);
1403 assert(instance_in_transition(inst) == 0);
1404
1405 state = inst->ri_i.i_state;
1406
1407 if (e == RESTARTER_EVENT_TYPE_ENABLE) {
1408 inst->ri_i.i_enabled = 1;
1409
1410 if (state == RESTARTER_STATE_UNINIT ||
1411 state == RESTARTER_STATE_DISABLED) {
1412 /*
1413 * B_FALSE: Don't log an error if the log_instance()
1414 * fails because it will fail on the miniroot before
1415 * install-discovery runs.
1416 */
1417 log_instance(inst, B_FALSE, "Enabled.");
1418 log_framework(LOG_DEBUG, "%s: Instance enabled.\n",
1419 inst->ri_i.i_fmri);
1420
1421 /*
1422 * If we are coming from DISABLED, it was obviously an
1423 * enable request. If we are coming from UNINIT, it may
1424 * have been a sevice in MAINT that was cleared.
1425 */
1426 if (riq->riq_reason == restarter_str_clear_request)
1427 reason = restarter_str_clear_request;
1428 else if (state == RESTARTER_STATE_DISABLED)
1429 reason = restarter_str_enable_request;
1430 (void) restarter_instance_update_states(h, inst,
1431 RESTARTER_STATE_OFFLINE, RESTARTER_STATE_NONE,
1432 RERR_NONE, reason);
1433 } else {
1434 log_framework(LOG_DEBUG, "Restarter: "
1435 "Not changing state of %s for enable command.\n",
1436 inst->ri_i.i_fmri);
1437 }
1438 } else {
1439 inst->ri_i.i_enabled = 0;
1440
1441 switch (state) {
1442 case RESTARTER_STATE_ONLINE:
1443 case RESTARTER_STATE_DEGRADED:
1444 r = stop_instance(h, inst, RSTOP_DISABLE);
1445 return (r == ECONNRESET ? 0 : r);
1446
1447 case RESTARTER_STATE_OFFLINE:
1448 case RESTARTER_STATE_UNINIT:
1449 if (inst->ri_i.i_primary_ctid != 0) {
1450 inst->ri_m_inst = safe_scf_instance_create(h);
1451 inst->ri_mi_deleted = B_FALSE;
1452
1453 libscf_reget_instance(inst);
1454 method_remove_contract(inst, B_TRUE, B_TRUE);
1455
1456 scf_instance_destroy(inst->ri_m_inst);
1457 }
1458 /* B_FALSE: See log_instance(..., "Enabled."); above */
1459 log_instance(inst, B_FALSE, "Disabled.");
1460 log_framework(LOG_DEBUG, "%s: Instance disabled.\n",
1461 inst->ri_i.i_fmri);
1462
1463 /*
1464 * If we are coming from OFFLINE, it was obviously a
1465 * disable request. But if we are coming from
1466 * UNINIT, it may have been a disable request for a
1467 * service in MAINT.
1468 */
1469 if (riq->riq_reason == restarter_str_disable_request ||
1470 state == RESTARTER_STATE_OFFLINE)
1471 reason = restarter_str_disable_request;
1472 (void) restarter_instance_update_states(h, inst,
1473 RESTARTER_STATE_DISABLED, RESTARTER_STATE_NONE,
1474 RERR_RESTART, reason);
1475 return (0);
1476
1477 case RESTARTER_STATE_DISABLED:
1478 break;
1479
1480 case RESTARTER_STATE_MAINT:
1481 /*
1482 * We only want to pull the instance out of maintenance
1483 * if the disable is on adminstrative request. The
1484 * graph engine sends _DISABLE events whenever a
1485 * service isn't in the disabled state, and we don't
1486 * want to pull the service out of maintenance if,
1487 * for example, it is there due to a dependency cycle.
1488 */
1489 if (e == RESTARTER_EVENT_TYPE_ADMIN_DISABLE)
1490 unmaintain_instance(h, inst, RUNMAINT_DISABLE);
1491 break;
1492
1493 default:
1494 #ifndef NDEBUG
1495 (void) fprintf(stderr, "Restarter instance %s has "
1496 "unknown state %d.\n", inst->ri_i.i_fmri, state);
1497 #endif
1498 abort();
1499 }
1500 }
1501
1502 return (0);
1503 }
1504
1505 static void
start_instance(scf_handle_t * local_handle,restarter_inst_t * inst,int32_t reason)1506 start_instance(scf_handle_t *local_handle, restarter_inst_t *inst,
1507 int32_t reason)
1508 {
1509 fork_info_t *info;
1510 restarter_str_t new_reason;
1511
1512 assert(MUTEX_HELD(&inst->ri_lock));
1513 assert(instance_in_transition(inst) == 0);
1514 assert(inst->ri_method_thread == 0);
1515
1516 log_framework(LOG_DEBUG, "%s: trying to start instance\n",
1517 inst->ri_i.i_fmri);
1518
1519 /*
1520 * We want to keep the original reason for restarts and clear actions
1521 */
1522 switch (reason) {
1523 case restarter_str_restart_request:
1524 case restarter_str_clear_request:
1525 new_reason = reason;
1526 break;
1527 default:
1528 new_reason = restarter_str_dependencies_satisfied;
1529 }
1530
1531 /* Services in the disabled and maintenance state are ignored */
1532 if (inst->ri_i.i_state == RESTARTER_STATE_MAINT ||
1533 inst->ri_i.i_state == RESTARTER_STATE_DISABLED ||
1534 inst->ri_i.i_enabled == 0) {
1535 log_framework(LOG_DEBUG,
1536 "%s: start_instance -> is maint/disabled\n",
1537 inst->ri_i.i_fmri);
1538 return;
1539 }
1540
1541 /* Already started instances are left alone */
1542 if (instance_started(inst) == 1) {
1543 log_framework(LOG_DEBUG,
1544 "%s: start_instance -> is already started\n",
1545 inst->ri_i.i_fmri);
1546 return;
1547 }
1548
1549 log_framework(LOG_DEBUG, "%s: starting instance.\n", inst->ri_i.i_fmri);
1550
1551 (void) restarter_instance_update_states(local_handle, inst,
1552 inst->ri_i.i_state, RESTARTER_STATE_ONLINE, RERR_NONE, new_reason);
1553
1554 info = startd_zalloc(sizeof (fork_info_t));
1555
1556 info->sf_id = inst->ri_id;
1557 info->sf_method_type = METHOD_START;
1558 info->sf_event_type = RERR_NONE;
1559 info->sf_reason = new_reason;
1560 inst->ri_method_thread = startd_thread_create(method_thread, info);
1561 }
1562
1563 static int
event_from_tty(scf_handle_t * h,restarter_inst_t * rip)1564 event_from_tty(scf_handle_t *h, restarter_inst_t *rip)
1565 {
1566 scf_instance_t *inst;
1567 int ret = 0;
1568
1569 if (libscf_fmri_get_instance(h, rip->ri_i.i_fmri, &inst))
1570 return (-1);
1571
1572 ret = restarter_inst_ractions_from_tty(inst);
1573
1574 scf_instance_destroy(inst);
1575 return (ret);
1576 }
1577
1578 static boolean_t
restart_dump(scf_handle_t * h,restarter_inst_t * rip)1579 restart_dump(scf_handle_t *h, restarter_inst_t *rip)
1580 {
1581 scf_instance_t *inst;
1582 boolean_t ret = B_FALSE;
1583
1584 if (libscf_fmri_get_instance(h, rip->ri_i.i_fmri, &inst))
1585 return (-1);
1586
1587 if (restarter_inst_dump(inst) == 1)
1588 ret = B_TRUE;
1589
1590 scf_instance_destroy(inst);
1591 return (ret);
1592 }
1593
1594 static void
maintain_instance(scf_handle_t * h,restarter_inst_t * rip,int immediate,restarter_str_t reason)1595 maintain_instance(scf_handle_t *h, restarter_inst_t *rip, int immediate,
1596 restarter_str_t reason)
1597 {
1598 fork_info_t *info;
1599 scf_instance_t *scf_inst = NULL;
1600
1601 assert(MUTEX_HELD(&rip->ri_lock));
1602 assert(reason != restarter_str_none);
1603 assert(rip->ri_method_thread == 0);
1604
1605 log_instance(rip, B_TRUE, "Stopping for maintenance due to %s.",
1606 restarter_get_str_short(reason));
1607 log_framework(LOG_DEBUG, "%s: stopping for maintenance due to %s.\n",
1608 rip->ri_i.i_fmri, restarter_get_str_short(reason));
1609
1610 /* Services in the maintenance state are ignored */
1611 if (rip->ri_i.i_state == RESTARTER_STATE_MAINT) {
1612 log_framework(LOG_DEBUG,
1613 "%s: maintain_instance -> is already in maintenance\n",
1614 rip->ri_i.i_fmri);
1615 return;
1616 }
1617
1618 /*
1619 * If reason state is restarter_str_service_request and
1620 * restarter_actions/auxiliary_fmri property is set with a valid fmri,
1621 * copy the fmri to restarter/auxiliary_fmri so svcs -x can use.
1622 */
1623 if (reason == restarter_str_service_request &&
1624 libscf_fmri_get_instance(h, rip->ri_i.i_fmri, &scf_inst) == 0) {
1625 if (restarter_inst_validate_ractions_aux_fmri(scf_inst) == 0) {
1626 if (restarter_inst_set_aux_fmri(scf_inst))
1627 log_framework(LOG_DEBUG, "%s: "
1628 "restarter_inst_set_aux_fmri failed: ",
1629 rip->ri_i.i_fmri);
1630 } else {
1631 log_framework(LOG_DEBUG, "%s: "
1632 "restarter_inst_validate_ractions_aux_fmri "
1633 "failed: ", rip->ri_i.i_fmri);
1634
1635 if (restarter_inst_reset_aux_fmri(scf_inst))
1636 log_framework(LOG_DEBUG, "%s: "
1637 "restarter_inst_reset_aux_fmri failed: ",
1638 rip->ri_i.i_fmri);
1639 }
1640 scf_instance_destroy(scf_inst);
1641 }
1642
1643 if (immediate || !instance_started(rip)) {
1644 if (rip->ri_i.i_primary_ctid != 0) {
1645 rip->ri_m_inst = safe_scf_instance_create(h);
1646 rip->ri_mi_deleted = B_FALSE;
1647
1648 libscf_reget_instance(rip);
1649 method_remove_contract(rip, B_TRUE, B_TRUE);
1650
1651 scf_instance_destroy(rip->ri_m_inst);
1652 }
1653
1654 (void) restarter_instance_update_states(h, rip,
1655 RESTARTER_STATE_MAINT, RESTARTER_STATE_NONE, RERR_RESTART,
1656 reason);
1657 return;
1658 }
1659
1660 (void) restarter_instance_update_states(h, rip, rip->ri_i.i_state,
1661 RESTARTER_STATE_MAINT, RERR_NONE, reason);
1662
1663 log_transition(rip, MAINT_REQUESTED);
1664
1665 info = startd_zalloc(sizeof (*info));
1666 info->sf_id = rip->ri_id;
1667 info->sf_method_type = METHOD_STOP;
1668 info->sf_event_type = RERR_RESTART;
1669 info->sf_reason = reason;
1670 rip->ri_method_thread = startd_thread_create(method_thread, info);
1671 }
1672
1673 static void
refresh_instance(scf_handle_t * h,restarter_inst_t * rip)1674 refresh_instance(scf_handle_t *h, restarter_inst_t *rip)
1675 {
1676 scf_instance_t *inst;
1677 scf_snapshot_t *snap;
1678 fork_info_t *info;
1679 int r;
1680
1681 assert(MUTEX_HELD(&rip->ri_lock));
1682
1683 log_instance(rip, B_TRUE, "Rereading configuration.");
1684 log_framework(LOG_DEBUG, "%s: rereading configuration.\n",
1685 rip->ri_i.i_fmri);
1686
1687 rep_retry:
1688 r = libscf_fmri_get_instance(h, rip->ri_i.i_fmri, &inst);
1689 switch (r) {
1690 case 0:
1691 break;
1692
1693 case ECONNABORTED:
1694 libscf_handle_rebind(h);
1695 goto rep_retry;
1696
1697 case ENOENT:
1698 /* Must have been deleted. */
1699 return;
1700
1701 case EINVAL:
1702 case ENOTSUP:
1703 default:
1704 bad_error("libscf_fmri_get_instance", r);
1705 }
1706
1707 snap = libscf_get_running_snapshot(inst);
1708
1709 r = libscf_get_startd_properties(inst, snap, &rip->ri_flags,
1710 &rip->ri_utmpx_prefix);
1711 switch (r) {
1712 case 0:
1713 log_framework(LOG_DEBUG, "%s is a %s-style service\n",
1714 rip->ri_i.i_fmri, service_style(rip->ri_flags));
1715 break;
1716
1717 case ECONNABORTED:
1718 scf_instance_destroy(inst);
1719 scf_snapshot_destroy(snap);
1720 libscf_handle_rebind(h);
1721 goto rep_retry;
1722
1723 case ECANCELED:
1724 case ENOENT:
1725 /* Succeed in anticipation of REMOVE_INSTANCE. */
1726 break;
1727
1728 default:
1729 bad_error("libscf_get_startd_properties", r);
1730 }
1731
1732 if (instance_started(rip)) {
1733 /* Refresh does not change the state. */
1734 (void) restarter_instance_update_states(h, rip,
1735 rip->ri_i.i_state, rip->ri_i.i_state, RERR_NONE,
1736 restarter_str_refresh);
1737
1738 info = startd_zalloc(sizeof (*info));
1739 info->sf_id = rip->ri_id;
1740 info->sf_method_type = METHOD_REFRESH;
1741 info->sf_event_type = RERR_REFRESH;
1742 info->sf_reason = 0;
1743
1744 assert(rip->ri_method_thread == 0);
1745 rip->ri_method_thread =
1746 startd_thread_create(method_thread, info);
1747 }
1748
1749 scf_snapshot_destroy(snap);
1750 scf_instance_destroy(inst);
1751 }
1752
1753 const char *event_names[] = { "INVALID", "ADD_INSTANCE", "REMOVE_INSTANCE",
1754 "ENABLE", "DISABLE", "ADMIN_DEGRADED", "ADMIN_REFRESH",
1755 "ADMIN_RESTART", "ADMIN_MAINT_OFF", "ADMIN_MAINT_ON",
1756 "ADMIN_MAINT_ON_IMMEDIATE", "STOP", "START", "DEPENDENCY_CYCLE",
1757 "INVALID_DEPENDENCY", "ADMIN_DISABLE", "STOP_RESET"
1758 };
1759
1760 /*
1761 * void *restarter_process_events()
1762 *
1763 * Called in a separate thread to process the events on an instance's
1764 * queue. Empties the queue completely, and tries to keep the thread
1765 * around for a little while after the queue is empty to save on
1766 * startup costs.
1767 */
1768 static void *
restarter_process_events(void * arg)1769 restarter_process_events(void *arg)
1770 {
1771 scf_handle_t *h;
1772 restarter_instance_qentry_t *event;
1773 restarter_inst_t *rip;
1774 char *fmri = (char *)arg;
1775 struct timespec to;
1776
1777 (void) pthread_setname_np(pthread_self(), "restarter_process_events");
1778
1779 assert(fmri != NULL);
1780
1781 h = libscf_handle_create_bound_loop();
1782
1783 /* grab the queue lock */
1784 rip = inst_lookup_queue(fmri);
1785 if (rip == NULL)
1786 goto out;
1787
1788 again:
1789
1790 while ((event = uu_list_first(rip->ri_queue)) != NULL) {
1791 restarter_inst_t *inst;
1792
1793 /* drop the queue lock */
1794 MUTEX_UNLOCK(&rip->ri_queue_lock);
1795
1796 /*
1797 * Grab the inst lock -- this waits until any outstanding
1798 * method finishes running.
1799 */
1800 inst = inst_lookup_by_name(fmri);
1801 if (inst == NULL) {
1802 /* Getting deleted in the middle isn't an error. */
1803 goto cont;
1804 }
1805
1806 assert(instance_in_transition(inst) == 0);
1807
1808 /* process the event */
1809 switch (event->riq_type) {
1810 case RESTARTER_EVENT_TYPE_ENABLE:
1811 case RESTARTER_EVENT_TYPE_DISABLE:
1812 (void) enable_inst(h, inst, event);
1813 break;
1814
1815 case RESTARTER_EVENT_TYPE_ADMIN_DISABLE:
1816 if (enable_inst(h, inst, event) == 0)
1817 reset_start_times(inst);
1818 break;
1819
1820 case RESTARTER_EVENT_TYPE_REMOVE_INSTANCE:
1821 restarter_delete_inst(inst);
1822 inst = NULL;
1823 goto cont;
1824
1825 case RESTARTER_EVENT_TYPE_STOP_RESET:
1826 reset_start_times(inst);
1827 /* FALLTHROUGH */
1828 case RESTARTER_EVENT_TYPE_STOP:
1829 (void) stop_instance(h, inst, RSTOP_DEPENDENCY);
1830 break;
1831
1832 case RESTARTER_EVENT_TYPE_START:
1833 start_instance(h, inst, event->riq_reason);
1834 break;
1835
1836 case RESTARTER_EVENT_TYPE_DEPENDENCY_CYCLE:
1837 maintain_instance(h, inst, 0,
1838 restarter_str_dependency_cycle);
1839 break;
1840
1841 case RESTARTER_EVENT_TYPE_INVALID_DEPENDENCY:
1842 maintain_instance(h, inst, 0,
1843 restarter_str_invalid_dependency);
1844 break;
1845
1846 case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON:
1847 if (event_from_tty(h, inst) == 0)
1848 maintain_instance(h, inst, 0,
1849 restarter_str_service_request);
1850 else
1851 maintain_instance(h, inst, 0,
1852 restarter_str_administrative_request);
1853 break;
1854
1855 case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON_IMMEDIATE:
1856 if (event_from_tty(h, inst) == 0)
1857 maintain_instance(h, inst, 1,
1858 restarter_str_service_request);
1859 else
1860 maintain_instance(h, inst, 1,
1861 restarter_str_administrative_request);
1862 break;
1863
1864 case RESTARTER_EVENT_TYPE_ADMIN_MAINT_OFF:
1865 unmaintain_instance(h, inst, RUNMAINT_CLEAR);
1866 reset_start_times(inst);
1867 break;
1868
1869 case RESTARTER_EVENT_TYPE_ADMIN_REFRESH:
1870 refresh_instance(h, inst);
1871 break;
1872
1873 case RESTARTER_EVENT_TYPE_ADMIN_DEGRADED:
1874 log_framework(LOG_WARNING, "Restarter: "
1875 "%s command (for %s) unimplemented.\n",
1876 event_names[event->riq_type], inst->ri_i.i_fmri);
1877 break;
1878
1879 case RESTARTER_EVENT_TYPE_ADMIN_RESTART:
1880 if (!instance_started(inst)) {
1881 log_framework(LOG_DEBUG, "Restarter: "
1882 "Not restarting %s; not running.\n",
1883 inst->ri_i.i_fmri);
1884 } else {
1885 /*
1886 * Stop the instance. If it can be restarted,
1887 * the graph engine will send a new event.
1888 */
1889 if (restart_dump(h, inst)) {
1890 (void) contract_kill(
1891 inst->ri_i.i_primary_ctid, SIGABRT,
1892 inst->ri_i.i_fmri);
1893 } else if (stop_instance(h, inst,
1894 RSTOP_RESTART) == 0) {
1895 reset_start_times(inst);
1896 }
1897 }
1898 break;
1899
1900 case RESTARTER_EVENT_TYPE_ADD_INSTANCE:
1901 default:
1902 #ifndef NDEBUG
1903 uu_warn("%s:%d: Bad restarter event %d. "
1904 "Aborting.\n", __FILE__, __LINE__, event->riq_type);
1905 #endif
1906 abort();
1907 }
1908
1909 assert(inst != NULL);
1910 MUTEX_UNLOCK(&inst->ri_lock);
1911
1912 cont:
1913 /* grab the queue lock */
1914 rip = inst_lookup_queue(fmri);
1915 if (rip == NULL)
1916 goto out;
1917
1918 /* delete the event */
1919 uu_list_remove(rip->ri_queue, event);
1920 startd_free(event, sizeof (restarter_instance_qentry_t));
1921 }
1922
1923 assert(rip != NULL);
1924
1925 /*
1926 * Try to preserve the thread for a little while for future use.
1927 */
1928 to.tv_sec = 3;
1929 to.tv_nsec = 0;
1930 (void) pthread_cond_reltimedwait_np(&rip->ri_queue_cv,
1931 &rip->ri_queue_lock, &to);
1932
1933 if (uu_list_first(rip->ri_queue) != NULL)
1934 goto again;
1935
1936 rip->ri_queue_thread = 0;
1937 MUTEX_UNLOCK(&rip->ri_queue_lock);
1938
1939 out:
1940 (void) scf_handle_unbind(h);
1941 scf_handle_destroy(h);
1942 free(fmri);
1943 return (NULL);
1944 }
1945
1946 static int
is_admin_event(restarter_event_type_t t)1947 is_admin_event(restarter_event_type_t t)
1948 {
1949 switch (t) {
1950 case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON:
1951 case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON_IMMEDIATE:
1952 case RESTARTER_EVENT_TYPE_ADMIN_MAINT_OFF:
1953 case RESTARTER_EVENT_TYPE_ADMIN_REFRESH:
1954 case RESTARTER_EVENT_TYPE_ADMIN_DEGRADED:
1955 case RESTARTER_EVENT_TYPE_ADMIN_RESTART:
1956 return (1);
1957 default:
1958 return (0);
1959 }
1960 }
1961
1962 static void
restarter_queue_event(restarter_inst_t * ri,restarter_protocol_event_t * e)1963 restarter_queue_event(restarter_inst_t *ri, restarter_protocol_event_t *e)
1964 {
1965 restarter_instance_qentry_t *qe;
1966 int r;
1967
1968 assert(MUTEX_HELD(&ri->ri_queue_lock));
1969 assert(!MUTEX_HELD(&ri->ri_lock));
1970
1971 qe = startd_zalloc(sizeof (restarter_instance_qentry_t));
1972 qe->riq_type = e->rpe_type;
1973 qe->riq_reason = e->rpe_reason;
1974
1975 uu_list_node_init(qe, &qe->riq_link, restarter_queue_pool);
1976 r = uu_list_insert_before(ri->ri_queue, NULL, qe);
1977 assert(r == 0);
1978 }
1979
1980 /*
1981 * void *restarter_event_thread()
1982 *
1983 * Handle incoming graph events by placing them on a per-instance
1984 * queue. We can't lock the main part of the instance structure, so
1985 * just modify the seprarately locked event queue portion.
1986 */
1987 /*ARGSUSED*/
1988 static void *
restarter_event_thread(void * unused)1989 restarter_event_thread(void *unused)
1990 {
1991 scf_handle_t *h;
1992
1993 (void) pthread_setname_np(pthread_self(), "restarter_event");
1994
1995 /*
1996 * This is a new thread, and thus, gets its own handle
1997 * to the repository.
1998 */
1999 h = libscf_handle_create_bound_loop();
2000
2001 MUTEX_LOCK(&ru->restarter_update_lock);
2002
2003 /*CONSTCOND*/
2004 while (1) {
2005 restarter_protocol_event_t *e;
2006
2007 while (ru->restarter_update_wakeup == 0)
2008 (void) pthread_cond_wait(&ru->restarter_update_cv,
2009 &ru->restarter_update_lock);
2010
2011 ru->restarter_update_wakeup = 0;
2012
2013 while ((e = restarter_event_dequeue()) != NULL) {
2014 restarter_inst_t *rip;
2015 char *fmri;
2016
2017 MUTEX_UNLOCK(&ru->restarter_update_lock);
2018
2019 /*
2020 * ADD_INSTANCE is special: there's likely no
2021 * instance structure yet, so we need to handle the
2022 * addition synchronously.
2023 */
2024 switch (e->rpe_type) {
2025 case RESTARTER_EVENT_TYPE_ADD_INSTANCE:
2026 if (restarter_insert_inst(h, e->rpe_inst) != 0)
2027 log_error(LOG_INFO, "Restarter: "
2028 "Could not add %s.\n", e->rpe_inst);
2029
2030 MUTEX_LOCK(&st->st_load_lock);
2031 if (--st->st_load_instances == 0)
2032 (void) pthread_cond_broadcast(
2033 &st->st_load_cv);
2034 MUTEX_UNLOCK(&st->st_load_lock);
2035
2036 goto nolookup;
2037 }
2038
2039 /*
2040 * Lookup the instance, locking only the event queue.
2041 * Can't grab ri_lock here because it might be held
2042 * by a long-running method.
2043 */
2044 rip = inst_lookup_queue(e->rpe_inst);
2045 if (rip == NULL) {
2046 log_error(LOG_INFO, "Restarter: "
2047 "Ignoring %s command for unknown service "
2048 "%s.\n", event_names[e->rpe_type],
2049 e->rpe_inst);
2050 goto nolookup;
2051 }
2052
2053 /* Keep ADMIN events from filling up the queue. */
2054 if (is_admin_event(e->rpe_type) &&
2055 uu_list_numnodes(rip->ri_queue) >
2056 RINST_QUEUE_THRESHOLD) {
2057 MUTEX_UNLOCK(&rip->ri_queue_lock);
2058 log_instance(rip, B_TRUE, "Instance event "
2059 "queue overflow. Dropping administrative "
2060 "request.");
2061 log_framework(LOG_DEBUG, "%s: Instance event "
2062 "queue overflow. Dropping administrative "
2063 "request.\n", rip->ri_i.i_fmri);
2064 goto nolookup;
2065 }
2066
2067 /* Now add the event to the instance queue. */
2068 restarter_queue_event(rip, e);
2069
2070 if (rip->ri_queue_thread == 0) {
2071 /*
2072 * Start a thread if one isn't already
2073 * running.
2074 */
2075 fmri = safe_strdup(e->rpe_inst);
2076 rip->ri_queue_thread = startd_thread_create(
2077 restarter_process_events, (void *)fmri);
2078 } else {
2079 /*
2080 * Signal the existing thread that there's
2081 * a new event.
2082 */
2083 (void) pthread_cond_broadcast(
2084 &rip->ri_queue_cv);
2085 }
2086
2087 MUTEX_UNLOCK(&rip->ri_queue_lock);
2088 nolookup:
2089 restarter_event_release(e);
2090
2091 MUTEX_LOCK(&ru->restarter_update_lock);
2092 }
2093 }
2094 }
2095
2096 static restarter_inst_t *
contract_to_inst(ctid_t ctid)2097 contract_to_inst(ctid_t ctid)
2098 {
2099 restarter_inst_t *inst;
2100 int id;
2101
2102 id = lookup_inst_by_contract(ctid);
2103 if (id == -1)
2104 return (NULL);
2105
2106 inst = inst_lookup_by_id(id);
2107 if (inst != NULL) {
2108 /*
2109 * Since ri_lock isn't held by the contract id lookup, this
2110 * instance may have been restarted and now be in a new
2111 * contract, making the old contract no longer valid for this
2112 * instance.
2113 */
2114 if (ctid != inst->ri_i.i_primary_ctid) {
2115 MUTEX_UNLOCK(&inst->ri_lock);
2116 inst = NULL;
2117 }
2118 }
2119 return (inst);
2120 }
2121
2122 /*
2123 * void contract_action()
2124 * Take action on contract events.
2125 */
2126 static void
contract_action(scf_handle_t * h,restarter_inst_t * inst,ctid_t id,uint32_t type)2127 contract_action(scf_handle_t *h, restarter_inst_t *inst, ctid_t id,
2128 uint32_t type)
2129 {
2130 const char *fmri = inst->ri_i.i_fmri;
2131
2132 assert(MUTEX_HELD(&inst->ri_lock));
2133
2134 /*
2135 * If startd has stopped this contract, there is no need to
2136 * stop it again.
2137 */
2138 if (inst->ri_i.i_primary_ctid > 0 &&
2139 inst->ri_i.i_primary_ctid_stopped)
2140 return;
2141
2142 if ((type & (CT_PR_EV_EMPTY | CT_PR_EV_CORE | CT_PR_EV_SIGNAL
2143 | CT_PR_EV_HWERR)) == 0) {
2144 /*
2145 * There shouldn't be other events, since that's not how we set
2146 * the terms. Thus, just log an error and drive on.
2147 */
2148 log_framework(LOG_NOTICE,
2149 "%s: contract %ld received unexpected critical event "
2150 "(%d)\n", fmri, id, type);
2151 return;
2152 }
2153
2154 assert(instance_in_transition(inst) == 0);
2155
2156 if (instance_is_wait_style(inst)) {
2157 /*
2158 * We ignore all events; if they impact the
2159 * process we're monitoring, then the
2160 * wait_thread will stop the instance.
2161 */
2162 log_framework(LOG_DEBUG,
2163 "%s: ignoring contract event on wait-style service\n",
2164 fmri);
2165 } else {
2166 /*
2167 * A CT_PR_EV_EMPTY event is an RSTOP_EXIT request.
2168 */
2169 switch (type) {
2170 case CT_PR_EV_EMPTY:
2171 (void) stop_instance(h, inst, RSTOP_EXIT);
2172 break;
2173 case CT_PR_EV_CORE:
2174 (void) stop_instance(h, inst, RSTOP_CORE);
2175 break;
2176 case CT_PR_EV_SIGNAL:
2177 (void) stop_instance(h, inst, RSTOP_SIGNAL);
2178 break;
2179 case CT_PR_EV_HWERR:
2180 (void) stop_instance(h, inst, RSTOP_HWERR);
2181 break;
2182 }
2183 }
2184 }
2185
2186 /*
2187 * void *restarter_contract_event_thread(void *)
2188 * Listens to the process contract bundle for critical events, taking action
2189 * on events from contracts we know we are responsible for.
2190 */
2191 /*ARGSUSED*/
2192 static void *
restarter_contracts_event_thread(void * unused)2193 restarter_contracts_event_thread(void *unused)
2194 {
2195 int fd, err;
2196 scf_handle_t *local_handle;
2197
2198 (void) pthread_setname_np(pthread_self(), "restarter_contracts_event");
2199
2200 /*
2201 * Await graph load completion. That is, stop here, until we've scanned
2202 * the repository for contract - instance associations.
2203 */
2204 MUTEX_LOCK(&st->st_load_lock);
2205 while (!(st->st_load_complete && st->st_load_instances == 0))
2206 (void) pthread_cond_wait(&st->st_load_cv, &st->st_load_lock);
2207 MUTEX_UNLOCK(&st->st_load_lock);
2208
2209 /*
2210 * This is a new thread, and thus, gets its own handle
2211 * to the repository.
2212 */
2213 if ((local_handle = libscf_handle_create_bound(SCF_VERSION)) == NULL)
2214 uu_die("Unable to bind a new repository handle: %s\n",
2215 scf_strerror(scf_error()));
2216
2217 fd = open64(CTFS_ROOT "/process/pbundle", O_RDONLY);
2218 if (fd == -1)
2219 uu_die("process bundle open failed");
2220
2221 /*
2222 * Make sure we get all events (including those generated by configd
2223 * before this thread was started).
2224 */
2225 err = ct_event_reset(fd);
2226 assert(err == 0);
2227
2228 for (;;) {
2229 int efd, sfd;
2230 ct_evthdl_t ev;
2231 uint32_t type;
2232 ctevid_t evid;
2233 ct_stathdl_t status;
2234 ctid_t ctid;
2235 restarter_inst_t *inst;
2236 uint64_t cookie;
2237
2238 if (err = ct_event_read_critical(fd, &ev)) {
2239 log_error(LOG_WARNING,
2240 "Error reading next contract event: %s",
2241 strerror(err));
2242 continue;
2243 }
2244
2245 evid = ct_event_get_evid(ev);
2246 ctid = ct_event_get_ctid(ev);
2247 type = ct_event_get_type(ev);
2248
2249 /* Fetch cookie. */
2250 if ((sfd = contract_open(ctid, "process", "status", O_RDONLY))
2251 < 0) {
2252 ct_event_free(ev);
2253 continue;
2254 }
2255
2256 if (err = ct_status_read(sfd, CTD_COMMON, &status)) {
2257 log_framework(LOG_WARNING, "Could not get status for "
2258 "contract %ld: %s\n", ctid, strerror(err));
2259
2260 startd_close(sfd);
2261 ct_event_free(ev);
2262 continue;
2263 }
2264
2265 cookie = ct_status_get_cookie(status);
2266
2267 log_framework(LOG_DEBUG, "Received event %d for ctid %ld "
2268 "cookie %lld\n", type, ctid, cookie);
2269
2270 ct_status_free(status);
2271
2272 startd_close(sfd);
2273
2274 /*
2275 * svc.configd(8) restart handling performed by the
2276 * fork_configd_thread. We don't acknowledge, as that thread
2277 * will do so.
2278 */
2279 if (cookie == CONFIGD_COOKIE) {
2280 ct_event_free(ev);
2281 continue;
2282 }
2283
2284 inst = NULL;
2285 if (storing_contract != 0 &&
2286 (inst = contract_to_inst(ctid)) == NULL) {
2287 /*
2288 * This can happen for two reasons:
2289 * - method_run() has not yet stored the
2290 * the contract into the internal hash table.
2291 * - we receive an EMPTY event for an abandoned
2292 * contract.
2293 * If there is any contract in the process of
2294 * being stored into the hash table then re-read
2295 * the event later.
2296 */
2297 log_framework(LOG_DEBUG,
2298 "Reset event %d for unknown "
2299 "contract id %ld\n", type, ctid);
2300
2301 /* don't go too fast */
2302 (void) poll(NULL, 0, 100);
2303
2304 (void) ct_event_reset(fd);
2305 ct_event_free(ev);
2306 continue;
2307 }
2308
2309 /*
2310 * Do not call contract_to_inst() again if first
2311 * call succeeded.
2312 */
2313 if (inst == NULL)
2314 inst = contract_to_inst(ctid);
2315 if (inst == NULL) {
2316 /*
2317 * This can happen if we receive an EMPTY
2318 * event for an abandoned contract.
2319 */
2320 log_framework(LOG_DEBUG,
2321 "Received event %d for unknown contract id "
2322 "%ld\n", type, ctid);
2323 } else {
2324 log_framework(LOG_DEBUG,
2325 "Received event %d for contract id "
2326 "%ld (%s)\n", type, ctid,
2327 inst->ri_i.i_fmri);
2328
2329 contract_action(local_handle, inst, ctid, type);
2330
2331 MUTEX_UNLOCK(&inst->ri_lock);
2332 }
2333
2334 efd = contract_open(ct_event_get_ctid(ev), "process", "ctl",
2335 O_WRONLY);
2336 if (efd != -1) {
2337 (void) ct_ctl_ack(efd, evid);
2338 startd_close(efd);
2339 }
2340
2341 ct_event_free(ev);
2342
2343 }
2344
2345 /*NOTREACHED*/
2346 return (NULL);
2347 }
2348
2349 /*
2350 * Timeout queue, processed by restarter_timeouts_event_thread().
2351 */
2352 timeout_queue_t *timeouts;
2353 static uu_list_pool_t *timeout_pool;
2354
2355 typedef struct timeout_update {
2356 pthread_mutex_t tu_lock;
2357 pthread_cond_t tu_cv;
2358 int tu_wakeup;
2359 } timeout_update_t;
2360
2361 timeout_update_t *tu;
2362
2363 static const char *timeout_ovr_svcs[] = {
2364 "svc:/system/manifest-import:default",
2365 "svc:/network/initial:default",
2366 "svc:/network/service:default",
2367 "svc:/system/rmtmpfiles:default",
2368 "svc:/network/loopback:default",
2369 "svc:/network/physical:default",
2370 "svc:/system/device/local:default",
2371 "svc:/system/filesystem/usr:default",
2372 "svc:/system/filesystem/minimal:default",
2373 "svc:/system/filesystem/local:default",
2374 NULL
2375 };
2376
2377 int
is_timeout_ovr(restarter_inst_t * inst)2378 is_timeout_ovr(restarter_inst_t *inst)
2379 {
2380 int i;
2381
2382 for (i = 0; timeout_ovr_svcs[i] != NULL; ++i) {
2383 if (strcmp(inst->ri_i.i_fmri, timeout_ovr_svcs[i]) == 0) {
2384 log_instance(inst, B_TRUE, "Timeout override by "
2385 "svc.startd. Using infinite timeout.");
2386 return (1);
2387 }
2388 }
2389
2390 return (0);
2391 }
2392
2393 /*ARGSUSED*/
2394 static int
timeout_compare(const void * lc_arg,const void * rc_arg,void * private)2395 timeout_compare(const void *lc_arg, const void *rc_arg, void *private)
2396 {
2397 hrtime_t t1 = ((const timeout_entry_t *)lc_arg)->te_timeout;
2398 hrtime_t t2 = ((const timeout_entry_t *)rc_arg)->te_timeout;
2399
2400 if (t1 > t2)
2401 return (1);
2402 else if (t1 < t2)
2403 return (-1);
2404 return (0);
2405 }
2406
2407 void
timeout_init()2408 timeout_init()
2409 {
2410 timeouts = startd_zalloc(sizeof (timeout_queue_t));
2411
2412 (void) pthread_mutex_init(&timeouts->tq_lock, &mutex_attrs);
2413
2414 timeout_pool = startd_list_pool_create("timeouts",
2415 sizeof (timeout_entry_t), offsetof(timeout_entry_t, te_link),
2416 timeout_compare, UU_LIST_POOL_DEBUG);
2417 assert(timeout_pool != NULL);
2418
2419 timeouts->tq_list = startd_list_create(timeout_pool,
2420 timeouts, UU_LIST_SORTED);
2421 assert(timeouts->tq_list != NULL);
2422
2423 tu = startd_zalloc(sizeof (timeout_update_t));
2424 (void) pthread_cond_init(&tu->tu_cv, NULL);
2425 (void) pthread_mutex_init(&tu->tu_lock, &mutex_attrs);
2426 }
2427
2428 void
timeout_insert(restarter_inst_t * inst,ctid_t cid,uint64_t timeout_sec)2429 timeout_insert(restarter_inst_t *inst, ctid_t cid, uint64_t timeout_sec)
2430 {
2431 hrtime_t now, timeout;
2432 timeout_entry_t *entry;
2433 uu_list_index_t idx;
2434
2435 assert(MUTEX_HELD(&inst->ri_lock));
2436
2437 now = gethrtime();
2438
2439 /*
2440 * If we overflow LLONG_MAX, we're never timing out anyways, so
2441 * just return.
2442 */
2443 if (timeout_sec >= (LLONG_MAX - now) / 1000000000LL) {
2444 log_instance(inst, B_TRUE, "timeout_seconds too large, "
2445 "treating as infinite.");
2446 return;
2447 }
2448
2449 /* hrtime is in nanoseconds. Convert timeout_sec. */
2450 timeout = now + (timeout_sec * 1000000000LL);
2451
2452 entry = startd_alloc(sizeof (timeout_entry_t));
2453 entry->te_timeout = timeout;
2454 entry->te_ctid = cid;
2455 entry->te_fmri = safe_strdup(inst->ri_i.i_fmri);
2456 entry->te_logstem = safe_strdup(inst->ri_logstem);
2457 entry->te_fired = 0;
2458 /* Insert the calculated timeout time onto the queue. */
2459 MUTEX_LOCK(&timeouts->tq_lock);
2460 (void) uu_list_find(timeouts->tq_list, entry, NULL, &idx);
2461 uu_list_node_init(entry, &entry->te_link, timeout_pool);
2462 uu_list_insert(timeouts->tq_list, entry, idx);
2463 MUTEX_UNLOCK(&timeouts->tq_lock);
2464
2465 assert(inst->ri_timeout == NULL);
2466 inst->ri_timeout = entry;
2467
2468 MUTEX_LOCK(&tu->tu_lock);
2469 tu->tu_wakeup = 1;
2470 (void) pthread_cond_broadcast(&tu->tu_cv);
2471 MUTEX_UNLOCK(&tu->tu_lock);
2472 }
2473
2474
2475 void
timeout_remove(restarter_inst_t * inst,ctid_t cid)2476 timeout_remove(restarter_inst_t *inst, ctid_t cid)
2477 {
2478 assert(MUTEX_HELD(&inst->ri_lock));
2479
2480 if (inst->ri_timeout == NULL)
2481 return;
2482
2483 assert(inst->ri_timeout->te_ctid == cid);
2484
2485 MUTEX_LOCK(&timeouts->tq_lock);
2486 uu_list_remove(timeouts->tq_list, inst->ri_timeout);
2487 MUTEX_UNLOCK(&timeouts->tq_lock);
2488
2489 free(inst->ri_timeout->te_fmri);
2490 free(inst->ri_timeout->te_logstem);
2491 startd_free(inst->ri_timeout, sizeof (timeout_entry_t));
2492 inst->ri_timeout = NULL;
2493 }
2494
2495 static int
timeout_now()2496 timeout_now()
2497 {
2498 timeout_entry_t *e;
2499 hrtime_t now;
2500 int ret;
2501
2502 now = gethrtime();
2503
2504 /*
2505 * Walk through the (sorted) timeouts list. While the timeout
2506 * at the head of the list is <= the current time, kill the
2507 * method.
2508 */
2509 MUTEX_LOCK(&timeouts->tq_lock);
2510
2511 for (e = uu_list_first(timeouts->tq_list);
2512 e != NULL && e->te_timeout <= now;
2513 e = uu_list_next(timeouts->tq_list, e)) {
2514 log_framework(LOG_WARNING, "%s: Method or service exit timed "
2515 "out. Killing contract %ld.\n", e->te_fmri, e->te_ctid);
2516 log_instance_fmri(e->te_fmri, e->te_logstem, B_TRUE,
2517 "Method or service exit timed out. Killing contract %ld.",
2518 e->te_ctid);
2519 e->te_fired = 1;
2520 (void) contract_kill(e->te_ctid, SIGKILL, e->te_fmri);
2521 }
2522
2523 if (uu_list_numnodes(timeouts->tq_list) > 0)
2524 ret = 0;
2525 else
2526 ret = -1;
2527
2528 MUTEX_UNLOCK(&timeouts->tq_lock);
2529
2530 return (ret);
2531 }
2532
2533 /*
2534 * void *restarter_timeouts_event_thread(void *)
2535 * Responsible for monitoring the method timeouts. This thread must
2536 * be started before any methods are called.
2537 */
2538 /*ARGSUSED*/
2539 static void *
restarter_timeouts_event_thread(void * unused)2540 restarter_timeouts_event_thread(void *unused)
2541 {
2542 /*
2543 * Timeouts are entered on a priority queue, which is processed by
2544 * this thread. As timeouts are specified in seconds, we'll do
2545 * the necessary processing every second, as long as the queue
2546 * is not empty.
2547 */
2548
2549 (void) pthread_setname_np(pthread_self(), "restarter_timeouts_event");
2550
2551 /*CONSTCOND*/
2552 while (1) {
2553 /*
2554 * As long as the timeout list isn't empty, process it
2555 * every second.
2556 */
2557 if (timeout_now() == 0) {
2558 (void) sleep(1);
2559 continue;
2560 }
2561
2562 /* The list is empty, wait until we have more timeouts. */
2563 MUTEX_LOCK(&tu->tu_lock);
2564
2565 while (tu->tu_wakeup == 0)
2566 (void) pthread_cond_wait(&tu->tu_cv, &tu->tu_lock);
2567
2568 tu->tu_wakeup = 0;
2569 MUTEX_UNLOCK(&tu->tu_lock);
2570 }
2571
2572 return (NULL);
2573 }
2574
2575 void
restarter_start()2576 restarter_start()
2577 {
2578 (void) startd_thread_create(restarter_timeouts_event_thread, NULL);
2579 (void) startd_thread_create(restarter_event_thread, NULL);
2580 (void) startd_thread_create(restarter_contracts_event_thread, NULL);
2581 (void) startd_thread_create(wait_thread, NULL);
2582 }
2583
2584
2585 void
restarter_init()2586 restarter_init()
2587 {
2588 restarter_instance_pool = startd_list_pool_create("restarter_instances",
2589 sizeof (restarter_inst_t), offsetof(restarter_inst_t,
2590 ri_link), restarter_instance_compare, UU_LIST_POOL_DEBUG);
2591 (void) memset(&instance_list, 0, sizeof (instance_list));
2592
2593 (void) pthread_mutex_init(&instance_list.ril_lock, &mutex_attrs);
2594 instance_list.ril_instance_list = startd_list_create(
2595 restarter_instance_pool, &instance_list, UU_LIST_SORTED);
2596
2597 restarter_queue_pool = startd_list_pool_create(
2598 "restarter_instance_queue", sizeof (restarter_instance_qentry_t),
2599 offsetof(restarter_instance_qentry_t, riq_link), NULL,
2600 UU_LIST_POOL_DEBUG);
2601
2602 contract_list_pool = startd_list_pool_create(
2603 "contract_list", sizeof (contract_entry_t),
2604 offsetof(contract_entry_t, ce_link), NULL,
2605 UU_LIST_POOL_DEBUG);
2606 contract_hash_init();
2607
2608 log_framework(LOG_DEBUG, "Initialized restarter\n");
2609 }
2610