xref: /illumos-gate/usr/src/cmd/svc/startd/transition.c (revision 4eaa471005973e11a6110b69fe990530b3b95a38)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 
27 /*
28  * transition.c - Graph State Machine
29  *
30  * The graph state machine is implemented here, with a typical approach
31  * of a function per state.  Separating the implementation allows more
32  * clarity into the actions taken on notification of state change, as well
33  * as a place for future expansion including hooks for configurable actions.
34  * All functions are called with dgraph_lock held.
35  *
36  * The start action for this state machine is not explicit.  The states
37  * (ONLINE and DEGRADED) which need to know when they're entering the state
38  * due to a daemon restart implement this understanding by checking for
39  * transition from uninitialized.  In the future, this would likely be better
40  * as an explicit start action instead of relying on an overloaded transition.
41  *
42  * All gt_enter functions use the same set of return codes.
43  *    0              success
44  *    ECONNABORTED   repository connection aborted
45  */
46 
47 #include "startd.h"
48 
49 static int
50 gt_running(restarter_instance_state_t state)
51 {
52 	if (state == RESTARTER_STATE_ONLINE ||
53 	    state == RESTARTER_STATE_DEGRADED)
54 		return (1);
55 
56 	return (0);
57 }
58 
59 static int
60 gt_enter_uninit(scf_handle_t *h, graph_vertex_t *v,
61     restarter_instance_state_t old_state, restarter_error_t rerr)
62 {
63 	int err;
64 	scf_instance_t *inst;
65 
66 	/* Initialize instance by refreshing it. */
67 
68 	err = libscf_fmri_get_instance(h, v->gv_name, &inst);
69 	switch (err) {
70 	case 0:
71 		break;
72 
73 	case ECONNABORTED:
74 		return (ECONNABORTED);
75 
76 	case ENOENT:
77 		return (0);
78 
79 	case EINVAL:
80 	case ENOTSUP:
81 	default:
82 		bad_error("libscf_fmri_get_instance", err);
83 	}
84 
85 	err = refresh_vertex(v, inst);
86 	if (err == 0)
87 		graph_enable_by_vertex(v, v->gv_flags & GV_ENABLED, 0);
88 
89 	scf_instance_destroy(inst);
90 
91 	/* If the service was running, propagate a stop event. */
92 	if (gt_running(old_state)) {
93 		log_framework(LOG_DEBUG, "Propagating stop of %s.\n",
94 		    v->gv_name);
95 
96 		graph_transition_propagate(v, PROPAGATE_STOP, rerr);
97 	}
98 
99 	graph_transition_sulogin(RESTARTER_STATE_UNINIT, old_state);
100 	return (0);
101 }
102 
103 /* ARGSUSED */
104 static int
105 gt_enter_maint(scf_handle_t *h, graph_vertex_t *v,
106     restarter_instance_state_t old_state, restarter_error_t rerr)
107 {
108 	int to_offline = v->gv_flags & GV_TOOFFLINE;
109 
110 	/*
111 	 * If the service was running, propagate a stop event.  If the
112 	 * service was not running the maintenance transition may satisfy
113 	 * optional dependencies and should be propagated to determine
114 	 * whether new dependents are satisfiable.
115 	 * Instances that transition to maintenance and have the GV_TOOFFLINE
116 	 * flag are special because they can expose new subtree leaves so
117 	 * propagate the offline to the instance dependencies.
118 	 */
119 
120 	/* instance transitioning to maintenance is considered disabled */
121 	v->gv_flags &= ~GV_TODISABLE;
122 	v->gv_flags &= ~GV_TOOFFLINE;
123 
124 	if (gt_running(old_state)) {
125 		/*
126 		 * Handle state change during instance disabling.
127 		 * Propagate offline to the new exposed leaves.
128 		 */
129 		if (to_offline) {
130 			log_framework(LOG_DEBUG, "%s removed from subtree\n",
131 			    v->gv_name);
132 
133 			graph_offline_subtree_leaves(v, (void *)h);
134 		}
135 
136 		log_framework(LOG_DEBUG, "Propagating maintenance (stop) of "
137 		    "%s.\n", v->gv_name);
138 
139 		graph_transition_propagate(v, PROPAGATE_STOP, rerr);
140 	} else {
141 		log_framework(LOG_DEBUG, "Propagating maintenance of %s.\n",
142 		    v->gv_name);
143 
144 		graph_transition_propagate(v, PROPAGATE_SAT, rerr);
145 	}
146 
147 	graph_transition_sulogin(RESTARTER_STATE_MAINT, old_state);
148 	return (0);
149 }
150 
151 /* ARGSUSED */
152 static int
153 gt_enter_offline(scf_handle_t *h, graph_vertex_t *v,
154     restarter_instance_state_t old_state, restarter_error_t rerr)
155 {
156 	int to_offline = v->gv_flags & GV_TOOFFLINE;
157 
158 	v->gv_flags &= ~GV_TOOFFLINE;
159 
160 	/*
161 	 * If the instance should be enabled, see if we can start it.
162 	 * Otherwise send a disable command.
163 	 * If a instance has the GV_TOOFFLINE flag set then it must
164 	 * remains offline until the disable process completes.
165 	 */
166 	if (v->gv_flags & GV_ENABLED) {
167 		if (to_offline == 0)
168 			graph_start_if_satisfied(v);
169 	} else {
170 		if (gt_running(old_state) && v->gv_post_disable_f)
171 			v->gv_post_disable_f();
172 
173 		vertex_send_event(v, RESTARTER_EVENT_TYPE_DISABLE);
174 	}
175 
176 	/*
177 	 * If the service was running, propagate a stop event.  If the
178 	 * service was not running the offline transition may satisfy
179 	 * optional dependencies and should be propagated to determine
180 	 * whether new dependents are satisfiable.
181 	 * Instances that transition to offline and have the GV_TOOFFLINE flag
182 	 * are special because they can expose new subtree leaves so propagate
183 	 * the offline to the instance dependencies.
184 	 */
185 	if (gt_running(old_state)) {
186 		/*
187 		 * Handle state change during instance disabling.
188 		 * Propagate offline to the new exposed leaves.
189 		 */
190 		if (to_offline) {
191 			log_framework(LOG_DEBUG, "%s removed from subtree\n",
192 			    v->gv_name);
193 
194 			graph_offline_subtree_leaves(v, (void *)h);
195 		}
196 
197 		log_framework(LOG_DEBUG, "Propagating stop of %s.\n",
198 		    v->gv_name);
199 
200 		graph_transition_propagate(v, PROPAGATE_STOP, rerr);
201 
202 		/*
203 		 * The offline transition may satisfy require_any/restart
204 		 * dependencies and should be propagated to determine
205 		 * whether new dependents are satisfiable.
206 		 */
207 		graph_transition_propagate(v, PROPAGATE_SAT, rerr);
208 	} else {
209 		log_framework(LOG_DEBUG, "Propagating offline of %s.\n",
210 		    v->gv_name);
211 
212 		graph_transition_propagate(v, PROPAGATE_SAT, rerr);
213 	}
214 
215 	graph_transition_sulogin(RESTARTER_STATE_OFFLINE, old_state);
216 	return (0);
217 }
218 
219 /* ARGSUSED */
220 static int
221 gt_enter_disabled(scf_handle_t *h, graph_vertex_t *v,
222     restarter_instance_state_t old_state, restarter_error_t rerr)
223 {
224 	int to_offline = v->gv_flags & GV_TOOFFLINE;
225 
226 	v->gv_flags &= ~GV_TODISABLE;
227 	v->gv_flags &= ~GV_TOOFFLINE;
228 
229 	/*
230 	 * If the instance should be disabled, no problem.  Otherwise,
231 	 * send an enable command, which should result in the instance
232 	 * moving to OFFLINE unless the instance is part of a subtree
233 	 * (non root) and in this case the result is unpredictable.
234 	 */
235 	if (v->gv_flags & GV_ENABLED) {
236 		vertex_send_event(v, RESTARTER_EVENT_TYPE_ENABLE);
237 	} else if (gt_running(old_state) && v->gv_post_disable_f) {
238 		v->gv_post_disable_f();
239 	}
240 
241 	/*
242 	 * If the service was running, propagate this as a stop.  If the
243 	 * service was not running the disabled transition may satisfy
244 	 * optional dependencies and should be propagated to determine
245 	 * whether new dependents are satisfiable.
246 	 */
247 	if (gt_running(old_state)) {
248 		/*
249 		 * We need to propagate the offline to new exposed leaves in
250 		 * case we've just disabled an instance that was part of a
251 		 * subtree.
252 		 */
253 		if (to_offline) {
254 			log_framework(LOG_DEBUG, "%s removed from subtree\n",
255 			    v->gv_name);
256 
257 			/*
258 			 * Handle state change during instance disabling.
259 			 * Propagate offline to the new exposed leaves.
260 			 */
261 			graph_offline_subtree_leaves(v, (void *)h);
262 		}
263 
264 
265 		log_framework(LOG_DEBUG, "Propagating stop of %s.\n",
266 		    v->gv_name);
267 
268 		graph_transition_propagate(v, PROPAGATE_STOP, rerr);
269 
270 	} else {
271 		log_framework(LOG_DEBUG, "Propagating disable of %s.\n",
272 		    v->gv_name);
273 
274 		graph_transition_propagate(v, PROPAGATE_SAT, rerr);
275 	}
276 
277 	graph_transition_sulogin(RESTARTER_STATE_DISABLED, old_state);
278 	return (0);
279 }
280 
281 static int
282 gt_internal_online_or_degraded(scf_handle_t *h, graph_vertex_t *v,
283     restarter_instance_state_t old_state, restarter_error_t rerr)
284 {
285 	int r;
286 
287 	/*
288 	 * If the instance has just come up, update the start
289 	 * snapshot.
290 	 */
291 	if (gt_running(old_state) == 0) {
292 		/*
293 		 * Don't fire if we're just recovering state
294 		 * after a restart.
295 		 */
296 		if (old_state != RESTARTER_STATE_UNINIT &&
297 		    v->gv_post_online_f)
298 			v->gv_post_online_f();
299 
300 		r = libscf_snapshots_poststart(h, v->gv_name, B_TRUE);
301 		switch (r) {
302 		case 0:
303 		case ENOENT:
304 			/*
305 			 * If ENOENT, the instance must have been
306 			 * deleted.  Pretend we were successful since
307 			 * we should get a delete event later.
308 			 */
309 			break;
310 
311 		case ECONNABORTED:
312 			return (ECONNABORTED);
313 
314 		case EACCES:
315 		case ENOTSUP:
316 		default:
317 			bad_error("libscf_snapshots_poststart", r);
318 		}
319 	}
320 
321 	if (!(v->gv_flags & GV_ENABLED)) {
322 		vertex_send_event(v, RESTARTER_EVENT_TYPE_DISABLE);
323 	} else if (v->gv_flags & GV_TOOFFLINE) {
324 		/*
325 		 * If the vertex has the GV_TOOFFLINE flag set then that's
326 		 * because the instance was transitioning from offline to
327 		 * online and the reverse disable algorithm doesn't offline
328 		 * those instances because it was already appearing offline.
329 		 * So do it now.
330 		 */
331 		offline_vertex(v);
332 	}
333 
334 	if (gt_running(old_state) == 0) {
335 		log_framework(LOG_DEBUG, "Propagating start of %s.\n",
336 		    v->gv_name);
337 
338 		graph_transition_propagate(v, PROPAGATE_START, rerr);
339 	} else if (rerr == RERR_REFRESH) {
340 		/* For refresh we'll get a message sans state change */
341 
342 		log_framework(LOG_DEBUG, "Propagating refresh of %s.\n",
343 		    v->gv_name);
344 
345 		graph_transition_propagate(v, PROPAGATE_STOP, rerr);
346 	}
347 
348 	return (0);
349 }
350 
351 static int
352 gt_enter_online(scf_handle_t *h, graph_vertex_t *v,
353     restarter_instance_state_t old_state, restarter_error_t rerr)
354 {
355 	int r;
356 
357 	r = gt_internal_online_or_degraded(h, v, old_state, rerr);
358 	if (r != 0)
359 		return (r);
360 
361 	graph_transition_sulogin(RESTARTER_STATE_ONLINE, old_state);
362 	return (0);
363 }
364 
365 static int
366 gt_enter_degraded(scf_handle_t *h, graph_vertex_t *v,
367     restarter_instance_state_t old_state, restarter_error_t rerr)
368 {
369 	int r;
370 
371 	r = gt_internal_online_or_degraded(h, v, old_state, rerr);
372 	if (r != 0)
373 		return (r);
374 
375 	graph_transition_sulogin(RESTARTER_STATE_DEGRADED, old_state);
376 	return (0);
377 }
378 
379 /*
380  * gt_transition() implements the state transition for the graph
381  * state machine.  It can return:
382  *    0              success
383  *    ECONNABORTED   repository connection aborted
384  *
385  * v->gv_state should be set to the state we're transitioning to before
386  * calling this function.
387  */
388 int
389 gt_transition(scf_handle_t *h, graph_vertex_t *v, restarter_error_t rerr,
390     restarter_instance_state_t old_state)
391 {
392 	int err;
393 	int lost_repository = 0;
394 
395 	/*
396 	 * If there's a common set of work to be done on exit from the
397 	 * old_state, include it as a separate set of functions here.  For
398 	 * now there's no such work, so there are no gt_exit functions.
399 	 */
400 
401 	err = vertex_subgraph_dependencies_shutdown(h, v, old_state);
402 	switch (err) {
403 	case 0:
404 		break;
405 
406 	case ECONNABORTED:
407 		lost_repository = 1;
408 		break;
409 
410 	default:
411 		bad_error("vertex_subgraph_dependencies_shutdown", err);
412 	}
413 
414 	/*
415 	 * Now call the appropriate gt_enter function for the new state.
416 	 */
417 	switch (v->gv_state) {
418 	case RESTARTER_STATE_UNINIT:
419 		err = gt_enter_uninit(h, v, old_state, rerr);
420 		break;
421 
422 	case RESTARTER_STATE_DISABLED:
423 		err = gt_enter_disabled(h, v, old_state, rerr);
424 		break;
425 
426 	case RESTARTER_STATE_OFFLINE:
427 		err = gt_enter_offline(h, v, old_state, rerr);
428 		break;
429 
430 	case RESTARTER_STATE_ONLINE:
431 		err = gt_enter_online(h, v, old_state, rerr);
432 		break;
433 
434 	case RESTARTER_STATE_DEGRADED:
435 		err = gt_enter_degraded(h, v, old_state, rerr);
436 		break;
437 
438 	case RESTARTER_STATE_MAINT:
439 		err = gt_enter_maint(h, v, old_state, rerr);
440 		break;
441 
442 	default:
443 		/* Shouldn't be in an invalid state. */
444 #ifndef NDEBUG
445 		uu_warn("%s:%d: Invalid state %d.\n", __FILE__, __LINE__,
446 		    v->gv_state);
447 #endif
448 		abort();
449 	}
450 
451 	switch (err) {
452 	case 0:
453 		break;
454 
455 	case ECONNABORTED:
456 		lost_repository = 1;
457 		break;
458 
459 	default:
460 #ifndef NDEBUG
461 		uu_warn("%s:%d: "
462 		    "gt_enter_%s() failed with unexpected error %d.\n",
463 		    __FILE__, __LINE__, instance_state_str[v->gv_state], err);
464 #endif
465 		abort();
466 	}
467 
468 	return (lost_repository ? ECONNABORTED : 0);
469 }
470