xref: /illumos-gate/usr/src/cmd/svc/startd/transition.c (revision c26dc428cee0f025b14a5ad03a7722f2a52f8383)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 
27 /*
28  * transition.c - Graph State Machine
29  *
30  * The graph state machine is implemented here, with a typical approach
31  * of a function per state.  Separating the implementation allows more
32  * clarity into the actions taken on notification of state change, as well
33  * as a place for future expansion including hooks for configurable actions.
34  * All functions are called with dgraph_lock held.
35  *
36  * The start action for this state machine is not explicit.  The states
37  * (ONLINE and DEGRADED) which need to know when they're entering the state
38  * due to a daemon restart implement this understanding by checking for
39  * transition from uninitialized.  In the future, this would likely be better
40  * as an explicit start action instead of relying on an overloaded transition.
41  *
42  * All gt_enter functions use the same set of return codes.
43  *    0              success
44  *    ECONNABORTED   repository connection aborted
45  */
46 
47 #include "startd.h"
48 
49 static int
50 gt_running(restarter_instance_state_t state)
51 {
52 	if (state == RESTARTER_STATE_ONLINE ||
53 	    state == RESTARTER_STATE_DEGRADED)
54 		return (1);
55 
56 	return (0);
57 }
58 
59 static int
60 gt_enter_uninit(scf_handle_t *h, graph_vertex_t *v,
61     restarter_instance_state_t old_state, restarter_error_t rerr)
62 {
63 	int err;
64 	scf_instance_t *inst;
65 
66 	/* Initialize instance by refreshing it. */
67 
68 	err = libscf_fmri_get_instance(h, v->gv_name, &inst);
69 	switch (err) {
70 	case 0:
71 		break;
72 
73 	case ECONNABORTED:
74 		return (ECONNABORTED);
75 
76 	case ENOENT:
77 		return (0);
78 
79 	case EINVAL:
80 	case ENOTSUP:
81 	default:
82 		bad_error("libscf_fmri_get_instance", err);
83 	}
84 
85 	err = refresh_vertex(v, inst);
86 	if (err == 0)
87 		graph_enable_by_vertex(v, v->gv_flags & GV_ENABLED, 0);
88 
89 	scf_instance_destroy(inst);
90 
91 	/* If the service was running, propagate a stop event. */
92 	if (gt_running(old_state)) {
93 		log_framework(LOG_DEBUG, "Propagating stop of %s.\n",
94 		    v->gv_name);
95 
96 		graph_transition_propagate(v, PROPAGATE_STOP, rerr);
97 	}
98 
99 	graph_transition_sulogin(RESTARTER_STATE_UNINIT, old_state);
100 	return (0);
101 }
102 
103 /* ARGSUSED */
104 static int
105 gt_enter_maint(scf_handle_t *h, graph_vertex_t *v,
106     restarter_instance_state_t old_state, restarter_error_t rerr)
107 {
108 	/*
109 	 * If the service was running, propagate a stop event.  If the
110 	 * service was not running the maintenance transition may satisfy
111 	 * optional dependencies and should be propagated to determine
112 	 * whether new dependents are satisfiable.
113 	 * Instances that transition to maintenance and have the GV_TOOFFLINE
114 	 * flag are special because they can expose new subtree leaves so
115 	 * propagate the offline to the instance dependencies.
116 	 */
117 	if (gt_running(old_state)) {
118 		/*
119 		 * Handle state change during instance disabling.
120 		 * Propagate offline to the new exposed leaves.
121 		 */
122 		if (v->gv_flags & GV_TOOFFLINE) {
123 			v->gv_flags &= ~GV_TOOFFLINE;
124 			log_framework(LOG_DEBUG, "%s removed from subtree\n",
125 			    v->gv_name);
126 			graph_offline_subtree_leaves(v, (void *)h);
127 		}
128 
129 		log_framework(LOG_DEBUG, "Propagating maintenance (stop) of "
130 		    "%s.\n", v->gv_name);
131 
132 		graph_transition_propagate(v, PROPAGATE_STOP, rerr);
133 	} else {
134 		log_framework(LOG_DEBUG, "Propagating maintenance of %s.\n",
135 		    v->gv_name);
136 
137 		graph_transition_propagate(v, PROPAGATE_SAT, rerr);
138 	}
139 
140 	graph_transition_sulogin(RESTARTER_STATE_MAINT, old_state);
141 	return (0);
142 }
143 
144 /* ARGSUSED */
145 static int
146 gt_enter_offline(scf_handle_t *h, graph_vertex_t *v,
147     restarter_instance_state_t old_state, restarter_error_t rerr)
148 {
149 	/*
150 	 * If the instance should be enabled, see if we can start it.
151 	 * Otherwise send a disable command.
152 	 * If a instance has the GV_TOOFFLINE flag set then it must
153 	 * remains offline until the disable process completes.
154 	 */
155 	if (v->gv_flags & GV_ENABLED) {
156 		if (!(v->gv_flags & GV_TOOFFLINE))
157 			graph_start_if_satisfied(v);
158 	} else {
159 		if (gt_running(old_state) && v->gv_post_disable_f)
160 			v->gv_post_disable_f();
161 
162 		vertex_send_event(v, RESTARTER_EVENT_TYPE_DISABLE);
163 	}
164 
165 	/*
166 	 * If the service was running, propagate a stop event.  If the
167 	 * service was not running the offline transition may satisfy
168 	 * optional dependencies and should be propagated to determine
169 	 * whether new dependents are satisfiable.
170 	 * Instances that transition to offline and have the GV_TOOFFLINE flag
171 	 * are special because they can expose new subtree leaves so propagate
172 	 * the offline to the instance dependencies.
173 	 */
174 	if (gt_running(old_state)) {
175 		/*
176 		 * Handle state change during instance disabling.
177 		 * Propagate offline to the new exposed leaves.
178 		 */
179 		if (v->gv_flags & GV_TOOFFLINE) {
180 			v->gv_flags &= ~GV_TOOFFLINE;
181 			log_framework(LOG_DEBUG, "%s removed from subtree\n",
182 			    v->gv_name);
183 			graph_offline_subtree_leaves(v, (void *)h);
184 		}
185 
186 		log_framework(LOG_DEBUG, "Propagating stop of %s.\n",
187 		    v->gv_name);
188 
189 		graph_transition_propagate(v, PROPAGATE_STOP, rerr);
190 	} else {
191 		log_framework(LOG_DEBUG, "Propagating offline of %s.\n",
192 		    v->gv_name);
193 
194 		graph_transition_propagate(v, PROPAGATE_SAT, rerr);
195 	}
196 
197 	graph_transition_sulogin(RESTARTER_STATE_OFFLINE, old_state);
198 	return (0);
199 }
200 
201 /* ARGSUSED */
202 static int
203 gt_enter_disabled(scf_handle_t *h, graph_vertex_t *v,
204     restarter_instance_state_t old_state, restarter_error_t rerr)
205 {
206 
207 	/*
208 	 * If the instance should be disabled, no problem.  Otherwise,
209 	 * send an enable command, which should result in the instance
210 	 * moving to OFFLINE unless the instance is part of a subtree
211 	 * (non root) and in this case the result is unpredictable.
212 	 */
213 	if (v->gv_flags & GV_ENABLED) {
214 		vertex_send_event(v, RESTARTER_EVENT_TYPE_ENABLE);
215 	} else if (gt_running(old_state) && v->gv_post_disable_f) {
216 		v->gv_post_disable_f();
217 	}
218 
219 	/*
220 	 * If the service was running, propagate this as a stop.  If the
221 	 * service was not running the disabled transition may satisfy
222 	 * optional dependencies and should be propagated to determine
223 	 * whether new dependents are satisfiable.
224 	 */
225 	if (gt_running(old_state)) {
226 		/*
227 		 * We need to propagate the offline to new exposed leaves in
228 		 * case we've just disabled an instance that was part of a
229 		 * subtree.
230 		 */
231 		if (v->gv_flags & GV_TOOFFLINE) {
232 			/*
233 			 * If the vertex is in the subtree and is transitionning
234 			 * to DISABLED then remove the GV_TODISABLE flag also.
235 			 */
236 			v->gv_flags &= ~GV_TODISABLE;
237 			v->gv_flags &= ~GV_TOOFFLINE;
238 
239 			log_framework(LOG_DEBUG, "%s removed from subtree\n",
240 			    v->gv_name);
241 
242 			/*
243 			 * Handle state change during instance disabling.
244 			 * Propagate offline to the new exposed leaves.
245 			 */
246 			graph_offline_subtree_leaves(v, (void *)h);
247 		}
248 
249 
250 		log_framework(LOG_DEBUG, "Propagating stop of %s.\n",
251 		    v->gv_name);
252 
253 		graph_transition_propagate(v, PROPAGATE_STOP, rerr);
254 
255 	} else {
256 		log_framework(LOG_DEBUG, "Propagating disable of %s.\n",
257 		    v->gv_name);
258 
259 		graph_transition_propagate(v, PROPAGATE_SAT, rerr);
260 	}
261 
262 	graph_transition_sulogin(RESTARTER_STATE_DISABLED, old_state);
263 	return (0);
264 }
265 
266 static int
267 gt_internal_online_or_degraded(scf_handle_t *h, graph_vertex_t *v,
268     restarter_instance_state_t old_state, restarter_error_t rerr)
269 {
270 	int r;
271 
272 	/*
273 	 * If the instance has just come up, update the start
274 	 * snapshot.
275 	 */
276 	if (gt_running(old_state) == 0) {
277 		/*
278 		 * Don't fire if we're just recovering state
279 		 * after a restart.
280 		 */
281 		if (old_state != RESTARTER_STATE_UNINIT &&
282 		    v->gv_post_online_f)
283 			v->gv_post_online_f();
284 
285 		r = libscf_snapshots_poststart(h, v->gv_name, B_TRUE);
286 		switch (r) {
287 		case 0:
288 		case ENOENT:
289 			/*
290 			 * If ENOENT, the instance must have been
291 			 * deleted.  Pretend we were successful since
292 			 * we should get a delete event later.
293 			 */
294 			break;
295 
296 		case ECONNABORTED:
297 			return (ECONNABORTED);
298 
299 		case EACCES:
300 		case ENOTSUP:
301 		default:
302 			bad_error("libscf_snapshots_poststart", r);
303 		}
304 	}
305 	if (!(v->gv_flags & GV_ENABLED))
306 		vertex_send_event(v, RESTARTER_EVENT_TYPE_DISABLE);
307 
308 	if (gt_running(old_state) == 0) {
309 		log_framework(LOG_DEBUG, "Propagating start of %s.\n",
310 		    v->gv_name);
311 
312 		graph_transition_propagate(v, PROPAGATE_START, rerr);
313 	} else if (rerr == RERR_REFRESH) {
314 		/* For refresh we'll get a message sans state change */
315 
316 		log_framework(LOG_DEBUG, "Propagating refresh of %s.\n",
317 		    v->gv_name);
318 
319 		graph_transition_propagate(v, PROPAGATE_STOP, rerr);
320 	}
321 
322 	return (0);
323 }
324 
325 static int
326 gt_enter_online(scf_handle_t *h, graph_vertex_t *v,
327     restarter_instance_state_t old_state, restarter_error_t rerr)
328 {
329 	int r;
330 
331 	r = gt_internal_online_or_degraded(h, v, old_state, rerr);
332 	if (r != 0)
333 		return (r);
334 
335 	graph_transition_sulogin(RESTARTER_STATE_ONLINE, old_state);
336 	return (0);
337 }
338 
339 static int
340 gt_enter_degraded(scf_handle_t *h, graph_vertex_t *v,
341     restarter_instance_state_t old_state, restarter_error_t rerr)
342 {
343 	int r;
344 
345 	r = gt_internal_online_or_degraded(h, v, old_state, rerr);
346 	if (r != 0)
347 		return (r);
348 
349 	graph_transition_sulogin(RESTARTER_STATE_DEGRADED, old_state);
350 	return (0);
351 }
352 
353 /*
354  * gt_transition() implements the state transition for the graph
355  * state machine.  It can return:
356  *    0              success
357  *    ECONNABORTED   repository connection aborted
358  *
359  * v->gv_state should be set to the state we're transitioning to before
360  * calling this function.
361  */
362 int
363 gt_transition(scf_handle_t *h, graph_vertex_t *v, restarter_error_t rerr,
364     restarter_instance_state_t old_state)
365 {
366 	int err;
367 	int lost_repository = 0;
368 
369 	/*
370 	 * If there's a common set of work to be done on exit from the
371 	 * old_state, include it as a separate set of functions here.  For
372 	 * now there's no such work, so there are no gt_exit functions.
373 	 */
374 
375 	err = vertex_subgraph_dependencies_shutdown(h, v, old_state);
376 	switch (err) {
377 	case 0:
378 		break;
379 
380 	case ECONNABORTED:
381 		lost_repository = 1;
382 		break;
383 
384 	default:
385 		bad_error("vertex_subgraph_dependencies_shutdown", err);
386 	}
387 
388 	/*
389 	 * Now call the appropriate gt_enter function for the new state.
390 	 */
391 	switch (v->gv_state) {
392 	case RESTARTER_STATE_UNINIT:
393 		err = gt_enter_uninit(h, v, old_state, rerr);
394 		break;
395 
396 	case RESTARTER_STATE_DISABLED:
397 		err = gt_enter_disabled(h, v, old_state, rerr);
398 		break;
399 
400 	case RESTARTER_STATE_OFFLINE:
401 		err = gt_enter_offline(h, v, old_state, rerr);
402 		break;
403 
404 	case RESTARTER_STATE_ONLINE:
405 		err = gt_enter_online(h, v, old_state, rerr);
406 		break;
407 
408 	case RESTARTER_STATE_DEGRADED:
409 		err = gt_enter_degraded(h, v, old_state, rerr);
410 		break;
411 
412 	case RESTARTER_STATE_MAINT:
413 		err = gt_enter_maint(h, v, old_state, rerr);
414 		break;
415 
416 	default:
417 		/* Shouldn't be in an invalid state. */
418 #ifndef NDEBUG
419 		uu_warn("%s:%d: Invalid state %d.\n", __FILE__, __LINE__,
420 		    v->gv_state);
421 #endif
422 		abort();
423 	}
424 
425 	switch (err) {
426 	case 0:
427 		break;
428 
429 	case ECONNABORTED:
430 		lost_repository = 1;
431 		break;
432 
433 	default:
434 #ifndef NDEBUG
435 		uu_warn("%s:%d: "
436 		    "gt_enter_%s() failed with unexpected error %d.\n",
437 		    __FILE__, __LINE__, instance_state_str[v->gv_state], err);
438 #endif
439 		abort();
440 	}
441 
442 	return (lost_repository ? ECONNABORTED : 0);
443 }
444