xref: /illumos-gate/usr/src/cmd/svc/startd/transition.c (revision 1fa2a66491e7d8ae0be84e7da4da8e812480c710)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  *
25  * Copyright 2016-2018 RackTop Systems.
26  */
27 
28 
29 /*
30  * transition.c - Graph State Machine
31  *
32  * The graph state machine is implemented here, with a typical approach
33  * of a function per state.  Separating the implementation allows more
34  * clarity into the actions taken on notification of state change, as well
35  * as a place for future expansion including hooks for configurable actions.
36  * All functions are called with dgraph_lock held.
37  *
38  * The start action for this state machine is not explicit.  The states
39  * (ONLINE and DEGRADED) which need to know when they're entering the state
40  * due to a daemon restart implement this understanding by checking for
41  * transition from uninitialized.  In the future, this would likely be better
42  * as an explicit start action instead of relying on an overloaded transition.
43  *
44  * All gt_enter functions use the same set of return codes.
45  *    0              success
46  *    ECONNABORTED   repository connection aborted
47  */
48 
49 #include "startd.h"
50 
51 static int
52 gt_running(restarter_instance_state_t state)
53 {
54 	if (state == RESTARTER_STATE_ONLINE ||
55 	    state == RESTARTER_STATE_DEGRADED)
56 		return (1);
57 
58 	return (0);
59 }
60 
61 static int
62 gt_enter_uninit(scf_handle_t *h, graph_vertex_t *v,
63     restarter_instance_state_t old_state, restarter_error_t rerr)
64 {
65 	int err;
66 	scf_instance_t *inst;
67 
68 	/* Initialize instance by refreshing it. */
69 
70 	err = libscf_fmri_get_instance(h, v->gv_name, &inst);
71 	switch (err) {
72 	case 0:
73 		break;
74 
75 	case ECONNABORTED:
76 		return (ECONNABORTED);
77 
78 	case ENOENT:
79 		return (0);
80 
81 	case EINVAL:
82 	case ENOTSUP:
83 	default:
84 		bad_error("libscf_fmri_get_instance", err);
85 	}
86 
87 	err = refresh_vertex(v, inst);
88 	if (err == 0)
89 		graph_enable_by_vertex(v, v->gv_flags & GV_ENABLED, 0);
90 
91 	scf_instance_destroy(inst);
92 
93 	/* If the service was running, propagate a stop event. */
94 	if (gt_running(old_state)) {
95 		log_framework(LOG_DEBUG, "Propagating stop of %s.\n",
96 		    v->gv_name);
97 
98 		graph_transition_propagate(v, PROPAGATE_STOP, rerr);
99 	}
100 
101 	graph_transition_sulogin(RESTARTER_STATE_UNINIT, old_state);
102 	return (0);
103 }
104 
105 /* ARGSUSED */
106 static int
107 gt_enter_maint(scf_handle_t *h, graph_vertex_t *v,
108     restarter_instance_state_t old_state, restarter_error_t rerr)
109 {
110 	int to_offline = v->gv_flags & GV_TOOFFLINE;
111 
112 	/*
113 	 * If the service was running, propagate a stop event.  If the
114 	 * service was not running the maintenance transition may satisfy
115 	 * optional dependencies and should be propagated to determine
116 	 * whether new dependents are satisfiable.
117 	 * Instances that transition to maintenance and have the GV_TOOFFLINE
118 	 * flag are special because they can expose new subtree leaves so
119 	 * propagate the offline to the instance dependencies.
120 	 */
121 
122 	/* instance transitioning to maintenance is considered disabled */
123 	v->gv_flags &= ~GV_TODISABLE;
124 	v->gv_flags &= ~GV_TOOFFLINE;
125 
126 	if (gt_running(old_state)) {
127 		/*
128 		 * Handle state change during instance disabling.
129 		 * Propagate offline to the new exposed leaves.
130 		 */
131 		if (to_offline) {
132 			log_framework(LOG_DEBUG, "%s removed from subtree\n",
133 			    v->gv_name);
134 
135 			graph_offline_subtree_leaves(v, (void *)h);
136 		}
137 
138 		log_framework(LOG_DEBUG, "Propagating maintenance (stop) of "
139 		    "%s.\n", v->gv_name);
140 
141 		graph_transition_propagate(v, PROPAGATE_STOP, rerr);
142 
143 		/*
144 		 * The maintenance transition may satisfy optional_all/restart
145 		 * dependencies and should be propagated to determine
146 		 * whether new dependents are satisfiable.
147 		 */
148 		graph_transition_propagate(v, PROPAGATE_SAT, rerr);
149 	} else {
150 		log_framework(LOG_DEBUG, "Propagating maintenance of %s.\n",
151 		    v->gv_name);
152 
153 		graph_transition_propagate(v, PROPAGATE_SAT, rerr);
154 	}
155 
156 	graph_transition_sulogin(RESTARTER_STATE_MAINT, old_state);
157 	return (0);
158 }
159 
160 /* ARGSUSED */
161 static int
162 gt_enter_offline(scf_handle_t *h, graph_vertex_t *v,
163     restarter_instance_state_t old_state, restarter_error_t rerr)
164 {
165 	int to_offline = v->gv_flags & GV_TOOFFLINE;
166 
167 	v->gv_flags &= ~GV_TOOFFLINE;
168 
169 	/*
170 	 * If the instance should be disabled send it a disable command.
171 	 * Otherwise, if GV_TOOFFLINE was not set, see if we can start it.
172 	 */
173 	if (v->gv_flags & GV_TODISABLE) {
174 		if (gt_running(old_state) && v->gv_post_disable_f)
175 			v->gv_post_disable_f();
176 
177 		vertex_send_event(v, RESTARTER_EVENT_TYPE_DISABLE);
178 	} else if (v->gv_flags & GV_ENABLED) {
179 		if (to_offline == 0)
180 			graph_start_if_satisfied(v);
181 	}
182 
183 	/*
184 	 * If the service was running, propagate a stop event.  If the
185 	 * service was not running the offline transition may satisfy
186 	 * optional dependencies and should be propagated to determine
187 	 * whether new dependents are satisfiable.
188 	 * Instances that transition to offline and have the GV_TOOFFLINE flag
189 	 * are special because they can expose new subtree leaves so propagate
190 	 * the offline to the instance dependencies.
191 	 */
192 	if (gt_running(old_state)) {
193 		/*
194 		 * Handle state change during instance disabling.
195 		 * Propagate offline to the new exposed leaves.
196 		 */
197 		if (to_offline) {
198 			log_framework(LOG_DEBUG, "%s removed from subtree\n",
199 			    v->gv_name);
200 
201 			graph_offline_subtree_leaves(v, (void *)h);
202 		}
203 
204 		log_framework(LOG_DEBUG, "Propagating stop of %s.\n",
205 		    v->gv_name);
206 
207 		graph_transition_propagate(v, PROPAGATE_STOP, rerr);
208 
209 		/*
210 		 * The offline transition may satisfy require_any/restart
211 		 * dependencies and should be propagated to determine
212 		 * whether new dependents are satisfiable.
213 		 */
214 		graph_transition_propagate(v, PROPAGATE_SAT, rerr);
215 	} else {
216 		log_framework(LOG_DEBUG, "Propagating offline of %s.\n",
217 		    v->gv_name);
218 
219 		graph_transition_propagate(v, PROPAGATE_SAT, rerr);
220 	}
221 
222 	graph_transition_sulogin(RESTARTER_STATE_OFFLINE, old_state);
223 	return (0);
224 }
225 
226 /* ARGSUSED */
227 static int
228 gt_enter_disabled(scf_handle_t *h, graph_vertex_t *v,
229     restarter_instance_state_t old_state, restarter_error_t rerr)
230 {
231 	int to_offline = v->gv_flags & GV_TOOFFLINE;
232 
233 	v->gv_flags &= ~GV_TODISABLE;
234 	v->gv_flags &= ~GV_TOOFFLINE;
235 
236 	/*
237 	 * If the instance should be disabled, no problem.  Otherwise,
238 	 * send an enable command, which should result in the instance
239 	 * moving to OFFLINE unless the instance is part of a subtree
240 	 * (non root) and in this case the result is unpredictable.
241 	 */
242 	if (v->gv_flags & GV_ENABLED) {
243 		vertex_send_event(v, RESTARTER_EVENT_TYPE_ENABLE);
244 	} else if (gt_running(old_state) && v->gv_post_disable_f) {
245 		v->gv_post_disable_f();
246 	}
247 
248 	/*
249 	 * If the service was running, propagate this as a stop.  If the
250 	 * service was not running the disabled transition may satisfy
251 	 * optional dependencies and should be propagated to determine
252 	 * whether new dependents are satisfiable.
253 	 */
254 	if (gt_running(old_state)) {
255 		/*
256 		 * We need to propagate the offline to new exposed leaves in
257 		 * case we've just disabled an instance that was part of a
258 		 * subtree.
259 		 */
260 		if (to_offline) {
261 			log_framework(LOG_DEBUG, "%s removed from subtree\n",
262 			    v->gv_name);
263 
264 			/*
265 			 * Handle state change during instance disabling.
266 			 * Propagate offline to the new exposed leaves.
267 			 */
268 			graph_offline_subtree_leaves(v, (void *)h);
269 		}
270 
271 
272 		log_framework(LOG_DEBUG, "Propagating stop of %s.\n",
273 		    v->gv_name);
274 
275 		graph_transition_propagate(v, PROPAGATE_STOP, rerr);
276 
277 		/*
278 		 * The disable transition may satisfy optional_all/restart
279 		 * dependencies and should be propagated to determine
280 		 * whether new dependents are satisfiable.
281 		 */
282 		graph_transition_propagate(v, PROPAGATE_SAT, rerr);
283 	} else {
284 		log_framework(LOG_DEBUG, "Propagating disable of %s.\n",
285 		    v->gv_name);
286 
287 		graph_transition_propagate(v, PROPAGATE_SAT, rerr);
288 	}
289 
290 	graph_transition_sulogin(RESTARTER_STATE_DISABLED, old_state);
291 	return (0);
292 }
293 
294 static int
295 gt_internal_online_or_degraded(scf_handle_t *h, graph_vertex_t *v,
296     restarter_instance_state_t old_state, restarter_error_t rerr)
297 {
298 	int r;
299 
300 	/*
301 	 * If the instance has just come up, update the start
302 	 * snapshot.
303 	 */
304 	if (gt_running(old_state) == 0) {
305 		/*
306 		 * Don't fire if we're just recovering state
307 		 * after a restart.
308 		 */
309 		if (old_state != RESTARTER_STATE_UNINIT &&
310 		    v->gv_post_online_f)
311 			v->gv_post_online_f();
312 
313 		r = libscf_snapshots_poststart(h, v->gv_name, B_TRUE);
314 		switch (r) {
315 		case 0:
316 		case ENOENT:
317 			/*
318 			 * If ENOENT, the instance must have been
319 			 * deleted.  Pretend we were successful since
320 			 * we should get a delete event later.
321 			 */
322 			break;
323 
324 		case ECONNABORTED:
325 			return (ECONNABORTED);
326 
327 		case EACCES:
328 		case ENOTSUP:
329 		default:
330 			bad_error("libscf_snapshots_poststart", r);
331 		}
332 	}
333 
334 	if (!(v->gv_flags & GV_ENABLED)) {
335 		vertex_send_event(v, RESTARTER_EVENT_TYPE_DISABLE);
336 	} else if (v->gv_flags & GV_TOOFFLINE) {
337 		/*
338 		 * If the vertex has the GV_TOOFFLINE flag set then that's
339 		 * because the instance was transitioning from offline to
340 		 * online and the reverse disable algorithm doesn't offline
341 		 * those instances because it was already appearing offline.
342 		 * So do it now.
343 		 */
344 		offline_vertex(v);
345 	}
346 
347 	if (gt_running(old_state) == 0) {
348 		log_framework(LOG_DEBUG, "Propagating start of %s.\n",
349 		    v->gv_name);
350 
351 		graph_transition_propagate(v, PROPAGATE_START, rerr);
352 	} else if (rerr == RERR_REFRESH) {
353 		/* For refresh we'll get a message sans state change */
354 
355 		log_framework(LOG_DEBUG, "Propagating refresh of %s.\n",
356 		    v->gv_name);
357 
358 		graph_transition_propagate(v, PROPAGATE_STOP, rerr);
359 	}
360 
361 	return (0);
362 }
363 
364 static int
365 gt_enter_online(scf_handle_t *h, graph_vertex_t *v,
366     restarter_instance_state_t old_state, restarter_error_t rerr)
367 {
368 	int r;
369 
370 	r = gt_internal_online_or_degraded(h, v, old_state, rerr);
371 	if (r != 0)
372 		return (r);
373 
374 	graph_transition_sulogin(RESTARTER_STATE_ONLINE, old_state);
375 	return (0);
376 }
377 
378 static int
379 gt_enter_degraded(scf_handle_t *h, graph_vertex_t *v,
380     restarter_instance_state_t old_state, restarter_error_t rerr)
381 {
382 	int r;
383 
384 	r = gt_internal_online_or_degraded(h, v, old_state, rerr);
385 	if (r != 0)
386 		return (r);
387 
388 	graph_transition_sulogin(RESTARTER_STATE_DEGRADED, old_state);
389 	return (0);
390 }
391 
392 /*
393  * gt_transition() implements the state transition for the graph
394  * state machine.  It can return:
395  *    0              success
396  *    ECONNABORTED   repository connection aborted
397  *
398  * v->gv_state should be set to the state we're transitioning to before
399  * calling this function.
400  */
401 int
402 gt_transition(scf_handle_t *h, graph_vertex_t *v, restarter_error_t rerr,
403     restarter_instance_state_t old_state)
404 {
405 	int err;
406 	int lost_repository = 0;
407 
408 	/*
409 	 * If there's a common set of work to be done on exit from the
410 	 * old_state, include it as a separate set of functions here.  For
411 	 * now there's no such work, so there are no gt_exit functions.
412 	 */
413 
414 	err = vertex_subgraph_dependencies_shutdown(h, v, old_state);
415 	switch (err) {
416 	case 0:
417 		break;
418 
419 	case ECONNABORTED:
420 		lost_repository = 1;
421 		break;
422 
423 	default:
424 		bad_error("vertex_subgraph_dependencies_shutdown", err);
425 	}
426 
427 	/*
428 	 * Now call the appropriate gt_enter function for the new state.
429 	 */
430 	switch (v->gv_state) {
431 	case RESTARTER_STATE_UNINIT:
432 		err = gt_enter_uninit(h, v, old_state, rerr);
433 		break;
434 
435 	case RESTARTER_STATE_DISABLED:
436 		err = gt_enter_disabled(h, v, old_state, rerr);
437 		break;
438 
439 	case RESTARTER_STATE_OFFLINE:
440 		err = gt_enter_offline(h, v, old_state, rerr);
441 		break;
442 
443 	case RESTARTER_STATE_ONLINE:
444 		err = gt_enter_online(h, v, old_state, rerr);
445 		break;
446 
447 	case RESTARTER_STATE_DEGRADED:
448 		err = gt_enter_degraded(h, v, old_state, rerr);
449 		break;
450 
451 	case RESTARTER_STATE_MAINT:
452 		err = gt_enter_maint(h, v, old_state, rerr);
453 		break;
454 
455 	default:
456 		/* Shouldn't be in an invalid state. */
457 #ifndef NDEBUG
458 		uu_warn("%s:%d: Invalid state %d.\n", __FILE__, __LINE__,
459 		    v->gv_state);
460 #endif
461 		abort();
462 	}
463 
464 	switch (err) {
465 	case 0:
466 		break;
467 
468 	case ECONNABORTED:
469 		lost_repository = 1;
470 		break;
471 
472 	default:
473 #ifndef NDEBUG
474 		uu_warn("%s:%d: "
475 		    "gt_enter_%s() failed with unexpected error %d.\n",
476 		    __FILE__, __LINE__, instance_state_str[v->gv_state], err);
477 #endif
478 		abort();
479 	}
480 
481 	return (lost_repository ? ECONNABORTED : 0);
482 }
483