1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 *
25 * Copyright 2016-2018 RackTop Systems.
26 */
27
28
29 /*
30 * transition.c - Graph State Machine
31 *
32 * The graph state machine is implemented here, with a typical approach
33 * of a function per state. Separating the implementation allows more
34 * clarity into the actions taken on notification of state change, as well
35 * as a place for future expansion including hooks for configurable actions.
36 * All functions are called with dgraph_lock held.
37 *
38 * The start action for this state machine is not explicit. The states
39 * (ONLINE and DEGRADED) which need to know when they're entering the state
40 * due to a daemon restart implement this understanding by checking for
41 * transition from uninitialized. In the future, this would likely be better
42 * as an explicit start action instead of relying on an overloaded transition.
43 *
44 * All gt_enter functions use the same set of return codes.
45 * 0 success
46 * ECONNABORTED repository connection aborted
47 */
48
49 #include "startd.h"
50
51 static int
gt_running(restarter_instance_state_t state)52 gt_running(restarter_instance_state_t state)
53 {
54 if (state == RESTARTER_STATE_ONLINE ||
55 state == RESTARTER_STATE_DEGRADED)
56 return (1);
57
58 return (0);
59 }
60
61 static int
gt_enter_uninit(scf_handle_t * h,graph_vertex_t * v,restarter_instance_state_t old_state,restarter_error_t rerr)62 gt_enter_uninit(scf_handle_t *h, graph_vertex_t *v,
63 restarter_instance_state_t old_state, restarter_error_t rerr)
64 {
65 int err;
66 scf_instance_t *inst;
67
68 /* Initialize instance by refreshing it. */
69
70 err = libscf_fmri_get_instance(h, v->gv_name, &inst);
71 switch (err) {
72 case 0:
73 break;
74
75 case ECONNABORTED:
76 return (ECONNABORTED);
77
78 case ENOENT:
79 return (0);
80
81 case EINVAL:
82 case ENOTSUP:
83 default:
84 bad_error("libscf_fmri_get_instance", err);
85 }
86
87 err = refresh_vertex(v, inst);
88 if (err == 0)
89 graph_enable_by_vertex(v, v->gv_flags & GV_ENABLED, 0);
90
91 scf_instance_destroy(inst);
92
93 /* If the service was running, propagate a stop event. */
94 if (gt_running(old_state)) {
95 log_framework(LOG_DEBUG, "Propagating stop of %s.\n",
96 v->gv_name);
97
98 graph_transition_propagate(v, PROPAGATE_STOP, rerr);
99 }
100
101 graph_transition_sulogin(RESTARTER_STATE_UNINIT, old_state);
102 return (0);
103 }
104
105 /* ARGSUSED */
106 static int
gt_enter_maint(scf_handle_t * h,graph_vertex_t * v,restarter_instance_state_t old_state,restarter_error_t rerr)107 gt_enter_maint(scf_handle_t *h, graph_vertex_t *v,
108 restarter_instance_state_t old_state, restarter_error_t rerr)
109 {
110 int to_offline = v->gv_flags & GV_TOOFFLINE;
111
112 /*
113 * If the service was running, propagate a stop event. If the
114 * service was not running the maintenance transition may satisfy
115 * optional dependencies and should be propagated to determine
116 * whether new dependents are satisfiable.
117 * Instances that transition to maintenance and have the GV_TOOFFLINE
118 * flag are special because they can expose new subtree leaves so
119 * propagate the offline to the instance dependencies.
120 */
121
122 /* instance transitioning to maintenance is considered disabled */
123 v->gv_flags &= ~GV_TODISABLE;
124 v->gv_flags &= ~GV_TOOFFLINE;
125
126 if (gt_running(old_state)) {
127 /*
128 * Handle state change during instance disabling.
129 * Propagate offline to the new exposed leaves.
130 */
131 if (to_offline) {
132 log_framework(LOG_DEBUG, "%s removed from subtree\n",
133 v->gv_name);
134
135 graph_offline_subtree_leaves(v, (void *)h);
136 }
137
138 log_framework(LOG_DEBUG, "Propagating maintenance (stop) of "
139 "%s.\n", v->gv_name);
140
141 graph_transition_propagate(v, PROPAGATE_STOP, rerr);
142
143 /*
144 * The maintenance transition may satisfy optional_all/restart
145 * dependencies and should be propagated to determine
146 * whether new dependents are satisfiable.
147 */
148 graph_transition_propagate(v, PROPAGATE_SAT, rerr);
149 } else {
150 log_framework(LOG_DEBUG, "Propagating maintenance of %s.\n",
151 v->gv_name);
152
153 graph_transition_propagate(v, PROPAGATE_SAT, rerr);
154 }
155
156 graph_transition_sulogin(RESTARTER_STATE_MAINT, old_state);
157 return (0);
158 }
159
160 /* ARGSUSED */
161 static int
gt_enter_offline(scf_handle_t * h,graph_vertex_t * v,restarter_instance_state_t old_state,restarter_error_t rerr)162 gt_enter_offline(scf_handle_t *h, graph_vertex_t *v,
163 restarter_instance_state_t old_state, restarter_error_t rerr)
164 {
165 int to_offline = v->gv_flags & GV_TOOFFLINE;
166
167 v->gv_flags &= ~GV_TOOFFLINE;
168
169 /*
170 * If the instance should be disabled send it a disable command.
171 * Otherwise, if GV_TOOFFLINE was not set, see if we can start it.
172 */
173 if (v->gv_flags & GV_TODISABLE) {
174 if (gt_running(old_state) && v->gv_post_disable_f)
175 v->gv_post_disable_f();
176
177 vertex_send_event(v, RESTARTER_EVENT_TYPE_DISABLE);
178 } else if (v->gv_flags & GV_ENABLED) {
179 if (to_offline == 0)
180 graph_start_if_satisfied(v);
181 }
182
183 /*
184 * If the service was running, propagate a stop event. If the
185 * service was not running the offline transition may satisfy
186 * optional dependencies and should be propagated to determine
187 * whether new dependents are satisfiable.
188 * Instances that transition to offline and have the GV_TOOFFLINE flag
189 * are special because they can expose new subtree leaves so propagate
190 * the offline to the instance dependencies.
191 */
192 if (gt_running(old_state)) {
193 /*
194 * Handle state change during instance disabling.
195 * Propagate offline to the new exposed leaves.
196 */
197 if (to_offline) {
198 log_framework(LOG_DEBUG, "%s removed from subtree\n",
199 v->gv_name);
200
201 graph_offline_subtree_leaves(v, (void *)h);
202 }
203
204 log_framework(LOG_DEBUG, "Propagating stop of %s.\n",
205 v->gv_name);
206
207 graph_transition_propagate(v, PROPAGATE_STOP, rerr);
208
209 /*
210 * The offline transition may satisfy require_any/restart
211 * dependencies and should be propagated to determine
212 * whether new dependents are satisfiable.
213 */
214 graph_transition_propagate(v, PROPAGATE_SAT, rerr);
215 } else {
216 log_framework(LOG_DEBUG, "Propagating offline of %s.\n",
217 v->gv_name);
218
219 graph_transition_propagate(v, PROPAGATE_SAT, rerr);
220 }
221
222 graph_transition_sulogin(RESTARTER_STATE_OFFLINE, old_state);
223 return (0);
224 }
225
226 /* ARGSUSED */
227 static int
gt_enter_disabled(scf_handle_t * h,graph_vertex_t * v,restarter_instance_state_t old_state,restarter_error_t rerr)228 gt_enter_disabled(scf_handle_t *h, graph_vertex_t *v,
229 restarter_instance_state_t old_state, restarter_error_t rerr)
230 {
231 int to_offline = v->gv_flags & GV_TOOFFLINE;
232
233 v->gv_flags &= ~GV_TODISABLE;
234 v->gv_flags &= ~GV_TOOFFLINE;
235
236 /*
237 * If the instance should be disabled, no problem. Otherwise,
238 * send an enable command, which should result in the instance
239 * moving to OFFLINE unless the instance is part of a subtree
240 * (non root) and in this case the result is unpredictable.
241 */
242 if (v->gv_flags & GV_ENABLED) {
243 vertex_send_event(v, RESTARTER_EVENT_TYPE_ENABLE);
244 } else if (gt_running(old_state) && v->gv_post_disable_f) {
245 v->gv_post_disable_f();
246 }
247
248 /*
249 * If the service was running, propagate this as a stop. If the
250 * service was not running the disabled transition may satisfy
251 * optional dependencies and should be propagated to determine
252 * whether new dependents are satisfiable.
253 */
254 if (gt_running(old_state)) {
255 /*
256 * We need to propagate the offline to new exposed leaves in
257 * case we've just disabled an instance that was part of a
258 * subtree.
259 */
260 if (to_offline) {
261 log_framework(LOG_DEBUG, "%s removed from subtree\n",
262 v->gv_name);
263
264 /*
265 * Handle state change during instance disabling.
266 * Propagate offline to the new exposed leaves.
267 */
268 graph_offline_subtree_leaves(v, (void *)h);
269 }
270
271
272 log_framework(LOG_DEBUG, "Propagating stop of %s.\n",
273 v->gv_name);
274
275 graph_transition_propagate(v, PROPAGATE_STOP, rerr);
276
277 /*
278 * The disable transition may satisfy optional_all/restart
279 * dependencies and should be propagated to determine
280 * whether new dependents are satisfiable.
281 */
282 graph_transition_propagate(v, PROPAGATE_SAT, rerr);
283 } else {
284 log_framework(LOG_DEBUG, "Propagating disable of %s.\n",
285 v->gv_name);
286
287 graph_transition_propagate(v, PROPAGATE_SAT, rerr);
288 }
289
290 graph_transition_sulogin(RESTARTER_STATE_DISABLED, old_state);
291 return (0);
292 }
293
294 static int
gt_internal_online_or_degraded(scf_handle_t * h,graph_vertex_t * v,restarter_instance_state_t old_state,restarter_error_t rerr)295 gt_internal_online_or_degraded(scf_handle_t *h, graph_vertex_t *v,
296 restarter_instance_state_t old_state, restarter_error_t rerr)
297 {
298 int r;
299
300 /*
301 * If the instance has just come up, update the start
302 * snapshot.
303 */
304 if (gt_running(old_state) == 0) {
305 /*
306 * Don't fire if we're just recovering state
307 * after a restart.
308 */
309 if (old_state != RESTARTER_STATE_UNINIT &&
310 v->gv_post_online_f)
311 v->gv_post_online_f();
312
313 r = libscf_snapshots_poststart(h, v->gv_name, B_TRUE);
314 switch (r) {
315 case 0:
316 case ENOENT:
317 /*
318 * If ENOENT, the instance must have been
319 * deleted. Pretend we were successful since
320 * we should get a delete event later.
321 */
322 break;
323
324 case ECONNABORTED:
325 return (ECONNABORTED);
326
327 case EACCES:
328 case ENOTSUP:
329 default:
330 bad_error("libscf_snapshots_poststart", r);
331 }
332 }
333
334 if (!(v->gv_flags & GV_ENABLED)) {
335 vertex_send_event(v, RESTARTER_EVENT_TYPE_DISABLE);
336 } else if (v->gv_flags & GV_TOOFFLINE) {
337 /*
338 * If the vertex has the GV_TOOFFLINE flag set then that's
339 * because the instance was transitioning from offline to
340 * online and the reverse disable algorithm doesn't offline
341 * those instances because it was already appearing offline.
342 * So do it now.
343 */
344 offline_vertex(v);
345 }
346
347 if (gt_running(old_state) == 0) {
348 log_framework(LOG_DEBUG, "Propagating start of %s.\n",
349 v->gv_name);
350
351 graph_transition_propagate(v, PROPAGATE_START, rerr);
352 } else if (rerr == RERR_REFRESH) {
353 /* For refresh we'll get a message sans state change */
354
355 log_framework(LOG_DEBUG, "Propagating refresh of %s.\n",
356 v->gv_name);
357
358 graph_transition_propagate(v, PROPAGATE_STOP, rerr);
359 }
360
361 return (0);
362 }
363
364 static int
gt_enter_online(scf_handle_t * h,graph_vertex_t * v,restarter_instance_state_t old_state,restarter_error_t rerr)365 gt_enter_online(scf_handle_t *h, graph_vertex_t *v,
366 restarter_instance_state_t old_state, restarter_error_t rerr)
367 {
368 int r;
369
370 r = gt_internal_online_or_degraded(h, v, old_state, rerr);
371 if (r != 0)
372 return (r);
373
374 graph_transition_sulogin(RESTARTER_STATE_ONLINE, old_state);
375 return (0);
376 }
377
378 static int
gt_enter_degraded(scf_handle_t * h,graph_vertex_t * v,restarter_instance_state_t old_state,restarter_error_t rerr)379 gt_enter_degraded(scf_handle_t *h, graph_vertex_t *v,
380 restarter_instance_state_t old_state, restarter_error_t rerr)
381 {
382 int r;
383
384 r = gt_internal_online_or_degraded(h, v, old_state, rerr);
385 if (r != 0)
386 return (r);
387
388 graph_transition_sulogin(RESTARTER_STATE_DEGRADED, old_state);
389 return (0);
390 }
391
392 /*
393 * gt_transition() implements the state transition for the graph
394 * state machine. It can return:
395 * 0 success
396 * ECONNABORTED repository connection aborted
397 *
398 * v->gv_state should be set to the state we're transitioning to before
399 * calling this function.
400 */
401 int
gt_transition(scf_handle_t * h,graph_vertex_t * v,restarter_error_t rerr,restarter_instance_state_t old_state)402 gt_transition(scf_handle_t *h, graph_vertex_t *v, restarter_error_t rerr,
403 restarter_instance_state_t old_state)
404 {
405 int err;
406 int lost_repository = 0;
407
408 /*
409 * If there's a common set of work to be done on exit from the
410 * old_state, include it as a separate set of functions here. For
411 * now there's no such work, so there are no gt_exit functions.
412 */
413
414 err = vertex_subgraph_dependencies_shutdown(h, v, old_state);
415 switch (err) {
416 case 0:
417 break;
418
419 case ECONNABORTED:
420 lost_repository = 1;
421 break;
422
423 default:
424 bad_error("vertex_subgraph_dependencies_shutdown", err);
425 }
426
427 /*
428 * Now call the appropriate gt_enter function for the new state.
429 */
430 switch (v->gv_state) {
431 case RESTARTER_STATE_UNINIT:
432 err = gt_enter_uninit(h, v, old_state, rerr);
433 break;
434
435 case RESTARTER_STATE_DISABLED:
436 err = gt_enter_disabled(h, v, old_state, rerr);
437 break;
438
439 case RESTARTER_STATE_OFFLINE:
440 err = gt_enter_offline(h, v, old_state, rerr);
441 break;
442
443 case RESTARTER_STATE_ONLINE:
444 err = gt_enter_online(h, v, old_state, rerr);
445 break;
446
447 case RESTARTER_STATE_DEGRADED:
448 err = gt_enter_degraded(h, v, old_state, rerr);
449 break;
450
451 case RESTARTER_STATE_MAINT:
452 err = gt_enter_maint(h, v, old_state, rerr);
453 break;
454
455 default:
456 /* Shouldn't be in an invalid state. */
457 #ifndef NDEBUG
458 uu_warn("%s:%d: Invalid state %d.\n", __FILE__, __LINE__,
459 v->gv_state);
460 #endif
461 abort();
462 }
463
464 switch (err) {
465 case 0:
466 break;
467
468 case ECONNABORTED:
469 lost_repository = 1;
470 break;
471
472 default:
473 #ifndef NDEBUG
474 uu_warn("%s:%d: "
475 "gt_enter_%s() failed with unexpected error %d.\n",
476 __FILE__, __LINE__, instance_state_str[v->gv_state], err);
477 #endif
478 abort();
479 }
480
481 return (lost_repository ? ECONNABORTED : 0);
482 }
483