xref: /illumos-gate/usr/src/cmd/fm/modules/common/disk-monitor/schg_mgr.c (revision 2a8bcb4efb45d99ac41c94a75c396b362c414f7f)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <string.h>
28 #include <inttypes.h>
29 #include <atomic.h>
30 #include <fm/fmd_api.h>
31 #include <sys/fm/protocol.h>
32 
33 #include "disk_monitor.h"
34 #include "schg_mgr.h"
35 #include "hotplug_mgr.h"
36 #include "topo_gather.h"
37 #include "dm_platform.h"
38 
39 /* State-change event processing thread data */
40 static pthread_t	g_schg_tid;
41 static thread_state_t	g_schgt_state = TS_NOT_RUNNING;
42 static pthread_mutex_t	g_schgt_state_mutex = PTHREAD_MUTEX_INITIALIZER;
43 static pthread_cond_t	g_schgt_state_cvar = PTHREAD_COND_INITIALIZER;
44 static pthread_mutex_t	g_schgt_add_mutex = PTHREAD_MUTEX_INITIALIZER;
45 static qu_t		*g_schg_queue = NULL;
46 
47 static void dm_state_change_nolock(diskmon_t *diskp, hotplug_state_t newstate);
48 
49 /*
50  * Each disk state change is described by an instance of the following
51  * structure (which includes the disk object and the new state)
52  */
53 typedef struct disk_statechg {
54 	diskmon_t	*diskp;
55 	hotplug_state_t	newstate;
56 } disk_statechg_t;
57 
58 static disk_statechg_t *
new_statechange(diskmon_t * diskp,hotplug_state_t state)59 new_statechange(diskmon_t *diskp, hotplug_state_t state)
60 {
61 	disk_statechg_t *dscp =
62 	    (disk_statechg_t *)dmalloc(sizeof (disk_statechg_t));
63 
64 	/*
65 	 * The states are additive -- we don't need to preserve
66 	 * the current faulted state in the newstate:
67 	 */
68 	dscp->diskp = diskp;
69 	dscp->newstate = state;
70 
71 	return (dscp);
72 }
73 
74 static void
free_statechange(void * dscp)75 free_statechange(void *dscp)
76 {
77 	dfree(dscp, sizeof (disk_statechg_t));
78 }
79 
80 static void
add_to_statechange_queue(diskmon_t * diskp,hotplug_state_t newstate)81 add_to_statechange_queue(diskmon_t *diskp, hotplug_state_t newstate)
82 {
83 	queue_add(g_schg_queue, new_statechange(diskp, newstate));
84 }
85 
86 static const char *
lookup_action_string(indicator_t * ind_listp,ind_state_t state,char * name)87 lookup_action_string(indicator_t *ind_listp, ind_state_t state, char *name)
88 {
89 	const char *str = NULL;
90 
91 	while (ind_listp != NULL) {
92 
93 		if (state == ind_listp->ind_state &&
94 		    strcasecmp(ind_listp->ind_name, name) == 0) {
95 
96 			str = ind_listp->ind_instr_spec;
97 			break;
98 		}
99 
100 		ind_listp = ind_listp->next;
101 	}
102 
103 	return (str);
104 }
105 
106 void
dm_fault_indicator_set(diskmon_t * diskp,ind_state_t istate)107 dm_fault_indicator_set(diskmon_t *diskp, ind_state_t istate)
108 {
109 	const char *astring;
110 
111 	dm_assert(pthread_mutex_lock(&diskp->fault_indicator_mutex) == 0);
112 
113 	/*
114 	 * No need to execute redundant indicator actions
115 	 */
116 	if (istate == INDICATOR_UNKNOWN ||
117 	    diskp->fault_indicator_state == istate) {
118 		dm_assert(pthread_mutex_unlock(&diskp->fault_indicator_mutex)
119 		    == 0);
120 		return;
121 	}
122 
123 	astring = lookup_action_string(diskp->ind_list, istate,
124 	    INDICATOR_FAULT_IDENTIFIER);
125 
126 	if (astring != NULL) {
127 		log_msg(MM_SCHGMGR, "Executing action `%s'\n", astring);
128 
129 		if (dm_platform_indicator_execute(astring) != 0) {
130 			log_warn("[Disk in %s] Action `%s' did not complete "
131 			    "successfully.\n",
132 			    diskp->location,
133 			    astring);
134 		} else  {
135 
136 			diskp->fault_indicator_state = istate;
137 
138 			log_msg(MM_SCHGMGR, "Action `%s' executed "
139 			    "successfully\n", astring);
140 		}
141 	}
142 
143 	dm_assert(pthread_mutex_unlock(&diskp->fault_indicator_mutex) == 0);
144 }
145 
146 static void
schg_execute_state_change_action(diskmon_t * diskp,hotplug_state_t oldstate,hotplug_state_t newstate)147 schg_execute_state_change_action(diskmon_t *diskp, hotplug_state_t oldstate,
148     hotplug_state_t newstate)
149 {
150 	indrule_t *rulelist;
151 	ind_action_t *actions;
152 	const char *astring;
153 
154 	log_msg(MM_SCHGMGR, "[Disk in %s] State change action: %s -> %s\n",
155 	    diskp->location,
156 	    hotplug_state_string(oldstate),
157 	    hotplug_state_string(newstate));
158 
159 	/*
160 	 * Find the list of actions that correspond to this state change.
161 	 * If the old state is UNKNOWN, then we'll match to first action
162 	 * whose transition state is the new state.
163 	 */
164 	rulelist = diskp->indrule_list;
165 
166 	while (rulelist != NULL) {
167 
168 		if ((oldstate == HPS_UNKNOWN ||
169 		    rulelist->strans.begin == oldstate) &&
170 		    rulelist->strans.end == newstate)
171 			break;
172 
173 		rulelist = rulelist->next;
174 	}
175 
176 	if (rulelist != NULL) {
177 		/* Now we have a set of actions to perform: */
178 		actions = rulelist->action_list;
179 
180 		while (actions != NULL) {
181 
182 			astring = lookup_action_string(diskp->ind_list,
183 			    actions->ind_state, actions->ind_name);
184 
185 			dm_assert(astring != NULL);
186 
187 			log_msg(MM_SCHGMGR, "Executing action `%s'\n", astring);
188 
189 			if (dm_platform_indicator_execute(astring) != 0) {
190 				log_warn("[Disk in %s][State transition from "
191 				    "%s to %s] Action `%s' did not complete "
192 				    "successfully.\n",
193 				    diskp->location,
194 				    hotplug_state_string(oldstate),
195 				    hotplug_state_string(newstate),
196 				    astring);
197 
198 			} else
199 				log_msg(MM_SCHGMGR,
200 				    "Action `%s' executed successfully\n",
201 				    astring);
202 
203 			actions = actions->next;
204 		}
205 	}
206 
207 }
208 
209 static void
schg_send_fru_update(diskmon_t * diskp,dm_fru_t * frup)210 schg_send_fru_update(diskmon_t *diskp, dm_fru_t *frup)
211 {
212 	const char *action = dm_prop_lookup(diskp->props, DISK_PROP_FRUACTION);
213 
214 	if (action == NULL) {
215 		log_msg(MM_SCHGMGR|MM_NOTE, "No FRU update action for disk "
216 		    "in %s\n", diskp->location);
217 		return;
218 	}
219 
220 	if (dm_platform_update_fru(action, frup) != 0) {
221 		log_warn("Error updating FRU information for disk in %s.\n",
222 		    diskp->location);
223 	}
224 }
225 
226 static void
schg_update_fru_info(diskmon_t * diskp)227 schg_update_fru_info(diskmon_t *diskp)
228 {
229 	if (diskp->initial_configuration ||
230 	    update_configuration_from_topo(g_fm_hdl, diskp) == TOPO_SUCCESS) {
231 		diskp->initial_configuration = B_FALSE;
232 		dm_assert(pthread_mutex_lock(&diskp->fru_mutex) == 0);
233 		if (diskp->frup != NULL)
234 			schg_send_fru_update(diskp, diskp->frup);
235 		else
236 			log_warn("frup unexpectedly went away: not updating "
237 			    "FRU information for disk %s!\n", diskp->location);
238 		dm_assert(pthread_mutex_unlock(&diskp->fru_mutex) == 0);
239 	} else {
240 		log_warn_e("Error retrieving FRU information "
241 		    "for disk in %s", diskp->location);
242 	}
243 }
244 
245 void
block_state_change_events(void)246 block_state_change_events(void)
247 {
248 	dm_assert(pthread_mutex_lock(&g_schgt_add_mutex) == 0);
249 }
250 
251 void
unblock_state_change_events(void)252 unblock_state_change_events(void)
253 {
254 	dm_assert(pthread_mutex_unlock(&g_schgt_add_mutex) == 0);
255 }
256 
257 static void
disk_state_change_first_time(diskmon_t * diskp)258 disk_state_change_first_time(diskmon_t *diskp)
259 {
260 	hotplug_state_t firststate;
261 
262 	/*
263 	 * Grab the current state of the attachment point to initialize the
264 	 * initial disk state.  Create a disk state change with this new
265 	 * state so it will be processed in the loop below.  If we can't get
266 	 * the initial state for some reason, then we'll just end up doing it
267 	 * later when we get a state change from the hotplug monitor or the
268 	 * fault monitor.
269 	 */
270 	firststate = disk_ap_state_to_hotplug_state(diskp);
271 	if (firststate != HPS_UNKNOWN)
272 		dm_state_change_nolock(diskp, firststate);
273 
274 	/*
275 	 * The fault indicators will be updated when faults are replayed
276 	 * based on the state of the disk as faulty in the fmd resource cache.
277 	 * A FAULTED state change will come from the _recv function when the
278 	 * fault component event is replayed.
279 	 */
280 }
281 
282 static void
disk_state_change_thread(void * vdisklistp)283 disk_state_change_thread(void *vdisklistp)
284 {
285 	diskmon_t	*disklistp = (diskmon_t *)vdisklistp;
286 	diskmon_t	*diskp;
287 	disk_statechg_t	*dscp;
288 	hotplug_state_t	nextstate;
289 	const char	*pth;
290 
291 	/*
292 	 * Perform startup activities to initialize the state of the
293 	 * indicators for each disk.
294 	 */
295 	diskp = disklistp;
296 	while (diskp != NULL) {
297 		disk_state_change_first_time(diskp);
298 		diskp = diskp->next;
299 	}
300 
301 	unblock_state_change_events();
302 
303 	dm_assert(pthread_mutex_lock(&g_schgt_state_mutex) == 0);
304 	if (g_schgt_state != TS_EXIT_REQUESTED) {
305 		g_schgt_state = TS_RUNNING;
306 		dm_assert(pthread_cond_broadcast(&g_schgt_state_cvar) == 0);
307 	}
308 	dm_assert(pthread_mutex_unlock(&g_schgt_state_mutex) == 0);
309 
310 	while (g_schgt_state != TS_EXIT_REQUESTED) {
311 
312 		if ((dscp = (disk_statechg_t *)queue_remove(g_schg_queue))
313 		    == NULL) {
314 			dm_assert(g_schgt_state == TS_EXIT_REQUESTED);
315 			continue;
316 		}
317 
318 		diskp = dscp->diskp;
319 
320 		/*
321 		 * If the new state is the faulted state, add that state to
322 		 * the disk's current state.
323 		 */
324 		if (dscp->newstate == HPS_FAULTED) {
325 
326 			/*
327 			 * If the disk wasn't previously in the faulted state,
328 			 * execute the generic fault action.  Even if we're
329 			 * in the faulted state, accept additional faults.
330 			 */
331 			nextstate = DISK_STATE(diskp->state) | HPS_FAULTED;
332 
333 		} else if (dscp->newstate == HPS_REPAIRED) {
334 			nextstate = DISK_STATE(diskp->state);
335 
336 		} else if (dscp->newstate == HPS_ABSENT) {
337 			/*
338 			 * If the new state is ABSENT, forget any faults
339 			 */
340 
341 			nextstate = HPS_ABSENT;
342 		} else
343 			nextstate = dscp->newstate | DISK_FAULTED(diskp->state);
344 
345 		/*
346 		 * When a new disk is inserted and reaches the CONFIGURED state,
347 		 * the following actions must be done in the following order:
348 		 *
349 		 * (1) Execute the configuration-specified action on the
350 		 * state change.
351 		 * (2) Retreive the FRU information from the disk and execute
352 		 * the FRU-update action specified,
353 		 * (3) Initialize the fault monitor state associated with
354 		 * the new drive.
355 		 *
356 		 * Once the disk is no longer "new" (a disk is "new" when it
357 		 * has not yet reached the CONFIGURED state), subsequent
358 		 * transitions away and back to CONFIGURED (as long as the
359 		 * disk is not physically removed) will result in the
360 		 * execution of the predefined action ONLY.
361 		 *
362 		 */
363 
364 		if (dscp->newstate != HPS_FAULTED &&
365 		    DISK_STATE(nextstate) != HPS_UNKNOWN &&
366 		    dscp->newstate != HPS_REPAIRED) {
367 
368 			schg_execute_state_change_action(diskp,
369 			    DISK_STATE(diskp->state), DISK_STATE(nextstate));
370 		}
371 
372 		if (!diskp->configured_yet &&
373 		    DISK_STATE(nextstate) == HPS_CONFIGURED) {
374 
375 			schg_update_fru_info(diskp);
376 
377 			/*
378 			 * If this state transition is lagging the true
379 			 * state of the system (e.g. if the true state of
380 			 * the disk is UNCONFIGURED, there's another
381 			 * state change somewhere later in the queue), then
382 			 * it's possible for the disk path property to not
383 			 * exist.
384 			 */
385 			if (dm_prop_lookup(diskp->props,
386 			    DISK_PROP_DEVPATH) == NULL) {
387 
388 				log_msg(MM_SCHGMGR,
389 				    "Processed stale state change "
390 				    "for disk %s\n", diskp->location);
391 
392 			} else {
393 				diskp->configured_yet = B_TRUE;
394 			}
395 
396 		}
397 
398 		dm_assert(pthread_mutex_lock(&diskp->manager_mutex) == 0);
399 
400 		/*
401 		 * Make the new state visible to all observers
402 		 */
403 		diskp->state = nextstate;
404 
405 		/*
406 		 * Now, update the diskmon if the disk is now absent -- it's
407 		 * essential to do this after the state is set (above) so that
408 		 * state observers in other threads don't try to access the
409 		 * data structures that we're freeing here.
410 		 */
411 
412 		if (diskp->configured_yet &&
413 		    DISK_STATE(nextstate) == HPS_ABSENT) {
414 			/*
415 			 * When the disk is removed, the fault monitor state is
416 			 * useless, so discard it.
417 			 */
418 			dm_assert(DISK_STATE(nextstate) != HPS_CONFIGURED);
419 
420 			diskp->configured_yet = B_FALSE;
421 
422 		}
423 		dm_assert(pthread_mutex_unlock(&diskp->manager_mutex) == 0);
424 
425 		pth = dm_prop_lookup(diskp->props, DISK_PROP_DEVPATH);
426 
427 		log_msg(MM_SCHGMGR,
428 		    "[State change #%d][%s]: Disk path = %s\n",
429 		    diskp->state_change_count,
430 		    diskp->location, pth == NULL ? "Unknown" : pth);
431 
432 		log_msg(MM_SCHGMGR,
433 		    "[State change #%d][%s]: New state = %s%s\n",
434 		    diskp->state_change_count, diskp->location,
435 		    hotplug_state_string(diskp->state),
436 		    DISK_FAULTED(diskp->state) ? "+FAULTED" : "");
437 
438 		atomic_inc_uint(&diskp->state_change_count);
439 
440 		/* The caller is responsible for freeing the state change: */
441 		free_statechange(dscp);
442 	}
443 	dm_assert(pthread_mutex_lock(&g_schgt_state_mutex) == 0);
444 	g_schgt_state = TS_EXITED;
445 	dm_assert(pthread_cond_broadcast(&g_schgt_state_cvar) == 0);
446 	dm_assert(pthread_mutex_unlock(&g_schgt_state_mutex) == 0);
447 
448 	log_msg(MM_SCHGMGR, "State change thread exiting...\n");
449 }
450 
451 static void
dm_state_change_nolock(diskmon_t * diskp,hotplug_state_t newstate)452 dm_state_change_nolock(diskmon_t *diskp, hotplug_state_t newstate)
453 {
454 	/* Enqueue a new state change for the state-change thread */
455 	add_to_statechange_queue(diskp, newstate);
456 }
457 
458 void
dm_state_change(diskmon_t * diskp,hotplug_state_t newstate)459 dm_state_change(diskmon_t *diskp, hotplug_state_t newstate)
460 {
461 	dm_assert(pthread_mutex_lock(&g_schgt_add_mutex) == 0);
462 	dm_state_change_nolock(diskp, newstate);
463 	dm_assert(pthread_mutex_unlock(&g_schgt_add_mutex) == 0);
464 }
465 
466 int
init_state_change_manager(cfgdata_t * cfgdatap)467 init_state_change_manager(cfgdata_t *cfgdatap)
468 {
469 	/* new_queue() is guaranteed to succeed */
470 	g_schg_queue = new_queue(B_TRUE, dmalloc, dfree, free_statechange);
471 
472 	dm_assert(pthread_mutex_lock(&g_schgt_state_mutex) == 0);
473 	g_schg_tid = fmd_thr_create(g_fm_hdl, disk_state_change_thread,
474 	    cfgdatap->disk_list);
475 
476 	/*
477 	 * Now, wait for the thread to enter the TS_RUNNING state.  This
478 	 * is important because we want the state-change thread to pull the
479 	 * initial state of the disks on startup (without the wait, we could
480 	 * have the hotplug event handler race and deliver a state change
481 	 * before the state-change thread initialized the initial disk state).
482 	 */
483 
484 	while (g_schgt_state != TS_RUNNING) {
485 		(void) pthread_cond_wait(&g_schgt_state_cvar,
486 		    &g_schgt_state_mutex);
487 	}
488 
489 	dm_assert(pthread_mutex_unlock(&g_schgt_state_mutex) == 0);
490 
491 	return (0);
492 }
493 
494 /*ARGSUSED*/
495 void
cleanup_state_change_manager(cfgdata_t * cfgdatap)496 cleanup_state_change_manager(cfgdata_t *cfgdatap)
497 {
498 	if (g_schgt_state != TS_RUNNING)
499 		return;
500 
501 	g_schgt_state = TS_EXIT_REQUESTED;
502 	queue_add(g_schg_queue, NULL);
503 	dm_assert(pthread_mutex_lock(&g_schgt_state_mutex) == 0);
504 	while (g_schgt_state != TS_EXITED)
505 		dm_assert(pthread_cond_wait(&g_schgt_state_cvar,
506 		    &g_schgt_state_mutex) == 0);
507 	dm_assert(pthread_mutex_unlock(&g_schgt_state_mutex) == 0);
508 	(void) pthread_join(g_schg_tid, NULL);
509 	fmd_thr_destroy(g_fm_hdl, g_schg_tid);
510 	queue_free(&g_schg_queue);
511 	g_schgt_state = TS_NOT_RUNNING;
512 }
513