xref: /titanic_52/usr/src/cmd/fm/modules/common/disk-monitor/schg_mgr.c (revision 184cd04c26b064536977dfbb913a1240eaf6f708)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <string.h>
30 #include <inttypes.h>
31 #include <atomic.h>
32 #include <fm/fmd_api.h>
33 #include <sys/fm/protocol.h>
34 
35 #include "disk_monitor.h"
36 #include "schg_mgr.h"
37 #include "hotplug_mgr.h"
38 #include "topo_gather.h"
39 #include "dm_platform.h"
40 
41 /* State-change event processing thread data */
42 static pthread_t	g_schg_tid;
43 static thread_state_t	g_schgt_state = TS_NOT_RUNNING;
44 static pthread_mutex_t	g_schgt_state_mutex = PTHREAD_MUTEX_INITIALIZER;
45 static pthread_cond_t	g_schgt_state_cvar = PTHREAD_COND_INITIALIZER;
46 static pthread_mutex_t	g_schgt_add_mutex = PTHREAD_MUTEX_INITIALIZER;
47 static qu_t		*g_schg_queue = NULL;
48 
49 static void dm_state_change_nolock(diskmon_t *diskp, hotplug_state_t newstate);
50 
51 /*
52  * Each disk state change is described by an instance of the following
53  * structure (which includes the disk object and the new state)
54  */
55 typedef struct disk_statechg {
56 	diskmon_t	*diskp;
57 	hotplug_state_t	newstate;
58 } disk_statechg_t;
59 
60 static disk_statechg_t *
61 new_statechange(diskmon_t *diskp, hotplug_state_t state)
62 {
63 	disk_statechg_t *dscp =
64 	    (disk_statechg_t *)dmalloc(sizeof (disk_statechg_t));
65 
66 	/*
67 	 * The states are additive -- we don't need to preserve
68 	 * the current faulted state in the newstate:
69 	 */
70 	dscp->diskp = diskp;
71 	dscp->newstate = state;
72 
73 	return (dscp);
74 }
75 
76 static void
77 free_statechange(void *dscp)
78 {
79 	dfree(dscp, sizeof (disk_statechg_t));
80 }
81 
82 static void
83 add_to_statechange_queue(diskmon_t *diskp, hotplug_state_t newstate)
84 {
85 	queue_add(g_schg_queue, new_statechange(diskp, newstate));
86 }
87 
88 static const char *
89 lookup_action_string(indicator_t *ind_listp, ind_state_t state, char *name)
90 {
91 	const char *str = NULL;
92 
93 	while (ind_listp != NULL) {
94 
95 		if (state == ind_listp->ind_state &&
96 		    strcasecmp(ind_listp->ind_name, name) == 0) {
97 
98 			str = ind_listp->ind_instr_spec;
99 			break;
100 		}
101 
102 		ind_listp = ind_listp->next;
103 	}
104 
105 	return (str);
106 }
107 
108 void
109 dm_fault_indicator_set(diskmon_t *diskp, ind_state_t istate)
110 {
111 	const char *astring;
112 
113 	dm_assert(pthread_mutex_lock(&diskp->fault_indicator_mutex) == 0);
114 
115 	/*
116 	 * No need to execute redundant indicator actions
117 	 */
118 	if (istate == INDICATOR_UNKNOWN ||
119 	    diskp->fault_indicator_state == istate) {
120 		dm_assert(pthread_mutex_unlock(&diskp->fault_indicator_mutex)
121 		    == 0);
122 		return;
123 	}
124 
125 	astring = lookup_action_string(diskp->ind_list, istate,
126 	    INDICATOR_FAULT_IDENTIFIER);
127 
128 	if (astring != NULL) {
129 		log_msg(MM_SCHGMGR, "Executing action `%s'\n", astring);
130 
131 		if (dm_platform_indicator_execute(astring) != 0) {
132 			log_warn("[Disk in %s] Action `%s' did not complete "
133 			    "successfully.\n",
134 			    diskp->location,
135 			    astring);
136 		} else  {
137 
138 			diskp->fault_indicator_state = istate;
139 
140 			log_msg(MM_SCHGMGR, "Action `%s' executed "
141 			    "successfully\n", astring);
142 		}
143 	}
144 
145 	dm_assert(pthread_mutex_unlock(&diskp->fault_indicator_mutex) == 0);
146 }
147 
148 static void
149 schg_execute_state_change_action(diskmon_t *diskp, hotplug_state_t oldstate,
150     hotplug_state_t newstate)
151 {
152 	indrule_t *rulelist;
153 	ind_action_t *actions;
154 	const char *astring;
155 
156 	log_msg(MM_SCHGMGR, "[Disk in %s] State change action: %s -> %s\n",
157 	    diskp->location,
158 	    hotplug_state_string(oldstate),
159 	    hotplug_state_string(newstate));
160 
161 	/*
162 	 * Find the list of actions that correspond to this state change.
163 	 * If the old state is UNKNOWN, then we'll match to first action
164 	 * whose transition state is the new state.
165 	 */
166 	rulelist = diskp->indrule_list;
167 
168 	while (rulelist != NULL) {
169 
170 		if ((oldstate == HPS_UNKNOWN ||
171 		    rulelist->strans.begin == oldstate) &&
172 		    rulelist->strans.end == newstate)
173 			break;
174 
175 		rulelist = rulelist->next;
176 	}
177 
178 	if (rulelist != NULL) {
179 		/* Now we have a set of actions to perform: */
180 		actions = rulelist->action_list;
181 
182 		while (actions != NULL) {
183 
184 			astring = lookup_action_string(diskp->ind_list,
185 			    actions->ind_state, actions->ind_name);
186 
187 			dm_assert(astring != NULL);
188 
189 			log_msg(MM_SCHGMGR, "Executing action `%s'\n", astring);
190 
191 			if (dm_platform_indicator_execute(astring) != 0) {
192 				log_warn("[Disk in %s][State transition from "
193 				    "%s to %s] Action `%s' did not complete "
194 				    "successfully.\n",
195 				    diskp->location,
196 				    hotplug_state_string(oldstate),
197 				    hotplug_state_string(newstate),
198 				    astring);
199 
200 			} else
201 				log_msg(MM_SCHGMGR,
202 				    "Action `%s' executed successfully\n",
203 				    astring);
204 
205 			actions = actions->next;
206 		}
207 	}
208 
209 }
210 
211 static void
212 schg_send_fru_update(diskmon_t *diskp, dm_fru_t *frup)
213 {
214 	const char *action = dm_prop_lookup(diskp->props, DISK_PROP_FRUACTION);
215 
216 	if (action == NULL) {
217 		log_msg(MM_SCHGMGR|MM_NOTE, "No FRU update action for disk "
218 		    "in %s\n", diskp->location);
219 		return;
220 	}
221 
222 	if (dm_platform_update_fru(action, frup) != 0) {
223 		log_warn("Error updating FRU information for disk in %s.\n",
224 		    diskp->location);
225 	}
226 }
227 
228 static void
229 schg_update_fru_info(diskmon_t *diskp)
230 {
231 	if (diskp->initial_configuration ||
232 	    update_configuration_from_topo(g_fm_hdl, diskp) == TOPO_SUCCESS) {
233 		diskp->initial_configuration = B_FALSE;
234 		dm_assert(pthread_mutex_lock(&diskp->fru_mutex) == 0);
235 		if (diskp->frup != NULL)
236 			schg_send_fru_update(diskp, diskp->frup);
237 		else
238 			log_warn("frup unexpectedly went away: not updating "
239 			    "FRU information for disk %s!\n", diskp->location);
240 		dm_assert(pthread_mutex_unlock(&diskp->fru_mutex) == 0);
241 	} else {
242 		log_warn_e("Error retrieving FRU information "
243 		    "for disk in %s", diskp->location);
244 	}
245 }
246 
247 void
248 block_state_change_events(void)
249 {
250 	dm_assert(pthread_mutex_lock(&g_schgt_add_mutex) == 0);
251 }
252 
253 void
254 unblock_state_change_events(void)
255 {
256 	dm_assert(pthread_mutex_unlock(&g_schgt_add_mutex) == 0);
257 }
258 
259 static void
260 disk_state_change_first_time(diskmon_t *diskp)
261 {
262 	hotplug_state_t firststate;
263 
264 	/*
265 	 * Grab the current state of the attachment point to initialize the
266 	 * initial disk state.  Create a disk state change with this new
267 	 * state so it will be processed in the loop below.  If we can't get
268 	 * the initial state for some reason, then we'll just end up doing it
269 	 * later when we get a state change from the hotplug monitor or the
270 	 * fault monitor.
271 	 */
272 	firststate = disk_ap_state_to_hotplug_state(diskp);
273 	if (firststate != HPS_UNKNOWN)
274 		dm_state_change_nolock(diskp, firststate);
275 
276 	/*
277 	 * The fault indicators will be updated when faults are replayed
278 	 * based on the state of the disk as faulty in the fmd resource cache.
279 	 * A FAULTED state change will come from the _recv function when the
280 	 * fault component event is replayed.
281 	 */
282 }
283 
284 static void
285 disk_state_change_thread(void *vdisklistp)
286 {
287 	diskmon_t	*disklistp = (diskmon_t *)vdisklistp;
288 	diskmon_t	*diskp;
289 	disk_statechg_t	*dscp;
290 	hotplug_state_t	nextstate;
291 	const char	*pth;
292 
293 	/*
294 	 * Perform startup activities to initialize the state of the
295 	 * indicators for each disk.
296 	 */
297 	diskp = disklistp;
298 	while (diskp != NULL) {
299 		disk_state_change_first_time(diskp);
300 		diskp = diskp->next;
301 	}
302 
303 	unblock_state_change_events();
304 
305 	dm_assert(pthread_mutex_lock(&g_schgt_state_mutex) == 0);
306 	if (g_schgt_state != TS_EXIT_REQUESTED) {
307 		g_schgt_state = TS_RUNNING;
308 		dm_assert(pthread_cond_broadcast(&g_schgt_state_cvar) == 0);
309 	}
310 	dm_assert(pthread_mutex_unlock(&g_schgt_state_mutex) == 0);
311 
312 	while (g_schgt_state != TS_EXIT_REQUESTED) {
313 
314 		if ((dscp = (disk_statechg_t *)queue_remove(g_schg_queue))
315 		    == NULL) {
316 			dm_assert(g_schgt_state == TS_EXIT_REQUESTED);
317 			continue;
318 		}
319 
320 		diskp = dscp->diskp;
321 
322 		/*
323 		 * If the new state is the faulted state, add that state to
324 		 * the disk's current state.
325 		 */
326 		if (dscp->newstate == HPS_FAULTED) {
327 
328 			/*
329 			 * If the disk wasn't previously in the faulted state,
330 			 * execute the generic fault action.  Even if we're
331 			 * in the faulted state, accept additional faults.
332 			 */
333 			nextstate = DISK_STATE(diskp->state) | HPS_FAULTED;
334 
335 		} else if (dscp->newstate == HPS_REPAIRED) {
336 			nextstate = DISK_STATE(diskp->state);
337 
338 		} else if (dscp->newstate == HPS_ABSENT) {
339 			/*
340 			 * If the new state is ABSENT, forget any faults
341 			 */
342 
343 			nextstate = HPS_ABSENT;
344 		} else
345 			nextstate = dscp->newstate | DISK_FAULTED(diskp->state);
346 
347 		/*
348 		 * When a new disk is inserted and reaches the CONFIGURED state,
349 		 * the following actions must be done in the following order:
350 		 *
351 		 * (1) Execute the configuration-specified action on the
352 		 * state change.
353 		 * (2) Retreive the FRU information from the disk and execute
354 		 * the FRU-update action specified,
355 		 * (3) Initialize the fault monitor state associated with
356 		 * the new drive.
357 		 *
358 		 * Once the disk is no longer "new" (a disk is "new" when it
359 		 * has not yet reached the CONFIGURED state), subsequent
360 		 * transitions away and back to CONFIGURED (as long as the
361 		 * disk is not physically removed) will result in the
362 		 * execution of the predefined action ONLY.
363 		 *
364 		 */
365 
366 		if (dscp->newstate != HPS_FAULTED &&
367 		    DISK_STATE(nextstate) != HPS_UNKNOWN &&
368 		    dscp->newstate != HPS_REPAIRED) {
369 
370 			schg_execute_state_change_action(diskp,
371 			    DISK_STATE(diskp->state), DISK_STATE(nextstate));
372 		}
373 
374 		if (!diskp->configured_yet &&
375 		    DISK_STATE(nextstate) == HPS_CONFIGURED) {
376 
377 			schg_update_fru_info(diskp);
378 
379 			/*
380 			 * If this state transition is lagging the true
381 			 * state of the system (e.g. if the true state of
382 			 * the disk is UNCONFIGURED, there's another
383 			 * state change somewhere later in the queue), then
384 			 * it's possible for the disk path property to not
385 			 * exist.
386 			 */
387 			if (dm_prop_lookup(diskp->props,
388 			    DISK_PROP_DEVPATH) == NULL) {
389 
390 				log_msg(MM_SCHGMGR,
391 				    "Processed stale state change "
392 				    "for disk %s\n", diskp->location);
393 
394 			} else {
395 				diskp->configured_yet = B_TRUE;
396 			}
397 
398 		}
399 
400 		dm_assert(pthread_mutex_lock(&diskp->manager_mutex) == 0);
401 
402 		/*
403 		 * Make the new state visible to all observers
404 		 */
405 		diskp->state = nextstate;
406 
407 		/*
408 		 * Now, update the diskmon if the disk is now absent -- it's
409 		 * essential to do this after the state is set (above) so that
410 		 * state observers in other threads don't try to access the
411 		 * data structures that we're freeing here.
412 		 */
413 
414 		if (diskp->configured_yet &&
415 		    DISK_STATE(nextstate) == HPS_ABSENT) {
416 			/*
417 			 * When the disk is removed, the fault monitor state is
418 			 * useless, so discard it.
419 			 */
420 			dm_assert(DISK_STATE(nextstate) != HPS_CONFIGURED);
421 
422 			diskp->configured_yet = B_FALSE;
423 
424 		}
425 		dm_assert(pthread_mutex_unlock(&diskp->manager_mutex) == 0);
426 
427 		pth = dm_prop_lookup(diskp->props, DISK_PROP_DEVPATH);
428 
429 		log_msg(MM_SCHGMGR,
430 		    "[State change #%d][%s]: Disk path = %s\n",
431 		    diskp->state_change_count,
432 		    diskp->location, pth == NULL ? "Unknown" : pth);
433 
434 		log_msg(MM_SCHGMGR,
435 		    "[State change #%d][%s]: New state = %s%s\n",
436 		    diskp->state_change_count, diskp->location,
437 		    hotplug_state_string(diskp->state),
438 		    DISK_FAULTED(diskp->state) ? "+FAULTED" : "");
439 
440 		atomic_inc_uint(&diskp->state_change_count);
441 
442 		/* The caller is responsible for freeing the state change: */
443 		free_statechange(dscp);
444 	}
445 	dm_assert(pthread_mutex_lock(&g_schgt_state_mutex) == 0);
446 	g_schgt_state = TS_EXITED;
447 	dm_assert(pthread_cond_broadcast(&g_schgt_state_cvar) == 0);
448 	dm_assert(pthread_mutex_unlock(&g_schgt_state_mutex) == 0);
449 
450 	log_msg(MM_SCHGMGR, "State change thread exiting...\n");
451 }
452 
453 static void
454 dm_state_change_nolock(diskmon_t *diskp, hotplug_state_t newstate)
455 {
456 	/* Enqueue a new state change for the state-change thread */
457 	add_to_statechange_queue(diskp, newstate);
458 }
459 
460 void
461 dm_state_change(diskmon_t *diskp, hotplug_state_t newstate)
462 {
463 	dm_assert(pthread_mutex_lock(&g_schgt_add_mutex) == 0);
464 	dm_state_change_nolock(diskp, newstate);
465 	dm_assert(pthread_mutex_unlock(&g_schgt_add_mutex) == 0);
466 }
467 
468 int
469 init_state_change_manager(cfgdata_t *cfgdatap)
470 {
471 	/* new_queue() is guaranteed to succeed */
472 	g_schg_queue = new_queue(B_TRUE, dmalloc, dfree, free_statechange);
473 
474 	dm_assert(pthread_mutex_lock(&g_schgt_state_mutex) == 0);
475 	g_schg_tid = fmd_thr_create(g_fm_hdl, disk_state_change_thread,
476 	    cfgdatap->disk_list);
477 
478 	/*
479 	 * Now, wait for the thread to enter the TS_RUNNING state.  This
480 	 * is important because we want the state-change thread to pull the
481 	 * initial state of the disks on startup (without the wait, we could
482 	 * have the hotplug event handler race and deliver a state change
483 	 * before the state-change thread initialized the initial disk state).
484 	 */
485 
486 	while (g_schgt_state != TS_RUNNING) {
487 		(void) pthread_cond_wait(&g_schgt_state_cvar,
488 		    &g_schgt_state_mutex);
489 	}
490 
491 	dm_assert(pthread_mutex_unlock(&g_schgt_state_mutex) == 0);
492 
493 	return (0);
494 }
495 
496 /*ARGSUSED*/
497 void
498 cleanup_state_change_manager(cfgdata_t *cfgdatap)
499 {
500 	if (g_schgt_state != TS_RUNNING)
501 		return;
502 
503 	g_schgt_state = TS_EXIT_REQUESTED;
504 	queue_add(g_schg_queue, NULL);
505 	dm_assert(pthread_mutex_lock(&g_schgt_state_mutex) == 0);
506 	while (g_schgt_state != TS_EXITED)
507 		dm_assert(pthread_cond_wait(&g_schgt_state_cvar,
508 		    &g_schgt_state_mutex) == 0);
509 	dm_assert(pthread_mutex_unlock(&g_schgt_state_mutex) == 0);
510 	(void) pthread_join(g_schg_tid, NULL);
511 	fmd_thr_destroy(g_fm_hdl, g_schg_tid);
512 	queue_free(&g_schg_queue);
513 	g_schgt_state = TS_NOT_RUNNING;
514 }
515