xref: /titanic_50/usr/src/cmd/fm/fmd/common/fmd_dr.c (revision 447603b54aaea470ed1dcdb5c52d0be1d7801f84)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
23  */
24 
25 /*
26  * FMD Dynamic Reconfiguration (DR) Event Handling
27  *
28  * Fault manager scheme plug-ins must track characteristics of individual
29  * pieces of hardware.  As these components can be added or removed by a DR
30  * operation, we need to provide a means by which plug-ins can determine when
31  * they need to re-examine the current configuration.  We provide a simple
32  * mechanism whereby this task can be implemented using lazy evaluation: a
33  * simple 64-bit generation counter is maintained and incremented on *any* DR.
34  * Schemes can store the generation number in scheme-specific data structures,
35  * and then revalidate their contents if the current generation number has
36  * changed since the resource information was cached.  This method saves time,
37  * avoids the complexity of direct participation in DR, avoids the need for
38  * resource-specific processing of DR events, and is relatively easy to port
39  * to other systems that support dynamic reconfiguration.
40  *
41  * The dr generation is only incremented in response to hardware changes.  Since
42  * ASRUs can be in any scheme, including the device scheme, we must also be
43  * aware of software configuration changes which may affect the resource cache.
44  * In addition, we take a snapshot of the topology whenever a reconfiguration
45  * event occurs and notify any modules of the change.
46  */
47 
48 #include <sys/types.h>
49 #include <sys/sunddi.h>
50 #include <sys/sysevent/dr.h>
51 #include <sys/sysevent/eventdefs.h>
52 
53 #include <stdio.h>
54 #include <string.h>
55 #include <unistd.h>
56 #include <libsysevent.h>
57 
58 #undef MUTEX_HELD
59 #undef RW_READ_HELD
60 #undef RW_WRITE_HELD
61 
62 #include <fmd_asru.h>
63 #include <fmd_error.h>
64 #include <fmd_event.h>
65 #include <fmd_fmri.h>
66 #include <fmd_module.h>
67 #include <fmd_subr.h>
68 #include <fmd_topo.h>
69 #include <fmd.h>
70 
71 void
72 fmd_dr_event(sysevent_t *sep)
73 {
74 	uint64_t gen;
75 	fmd_event_t *e;
76 	const char *class = sysevent_get_class_name(sep);
77 	const char *subclass = sysevent_get_subclass_name(sep);
78 	hrtime_t evtime;
79 	fmd_topo_t *ftp, *prev;
80 
81 	if (strcmp(class, EC_DR) == 0) {
82 		if (strcmp(subclass, ESC_DR_AP_STATE_CHANGE) != 0 &&
83 		    strcmp(subclass, ESC_DR_TARGET_STATE_CHANGE) != 0)
84 			return;
85 	/* LINTED: E_NOP_IF_STMT */
86 	} else if (strcmp(class, EC_DEVFS) == 0) {
87 		/*
88 		 * A devfs configuration event can change the topology,
89 		 * as disk nodes only exist when the device is configured.
90 		 */
91 	} else if (strcmp(class, EC_PLATFORM) == 0) {
92 		/*
93 		 * Since we rely on the SP to enumerate fans,
94 		 * power-supplies and sensors/leds, it would be prudent
95 		 * to take a new snapshot if the SP resets.
96 		 */
97 		if (strcmp(subclass, ESC_PLATFORM_SP_RESET) != 0)
98 			return;
99 	} else if (strcmp(class, EC_DEV_ADD) == 0 ||
100 	    strcmp(class, EC_DEV_REMOVE) == 0) {
101 		if (strcmp(subclass, ESC_DISK) != 0)
102 			return;
103 	} else
104 		return;
105 
106 	/*
107 	 * Take a topo snapshot and notify modules of the change.  Picking an
108 	 * accurate time here is difficult.  On one hand, we have the timestamp
109 	 * of the underlying sysevent, indicating when the reconfiguration event
110 	 * occurred.  On the other hand, we are taking the topo snapshot
111 	 * asynchronously, and hence the timestamp of the snapshot is the
112 	 * current time.  Pretending this topo snapshot was valid at the time
113 	 * the sysevent was posted seems wrong, so we instead opt for the
114 	 * current time as an upper bound on the snapshot validity.
115 	 *
116 	 * Along these lines, we keep track of the last time we dispatched a
117 	 * topo snapshot.  If the sysevent occurred before the last topo
118 	 * snapshot, then don't bother dispatching another topo change event.
119 	 * We've already indicated (to the best of our ability) the change in
120 	 * topology.  This prevents endless topo snapshots in response to a
121 	 * flurry of sysevents.
122 	 */
123 	sysevent_get_time(sep, &evtime);
124 	prev = fmd_topo_hold();
125 	if (evtime <= prev->ft_time_begin &&
126 	    fmd.d_clockops == &fmd_timeops_native) {
127 		fmd_topo_rele(prev);
128 		return;
129 	}
130 	fmd_topo_rele(prev);
131 
132 	(void) pthread_mutex_lock(&fmd.d_stats_lock);
133 	gen = fmd.d_stats->ds_dr_gen.fmds_value.ui64++;
134 	(void) pthread_mutex_unlock(&fmd.d_stats_lock);
135 
136 	TRACE((FMD_DBG_XPRT, "dr event %p, gen=%llu", (void *)sep, gen));
137 	fmd_topo_update();
138 
139 	ftp = fmd_topo_hold();
140 	e = fmd_event_create(FMD_EVT_TOPO, ftp->ft_time_end, NULL, ftp);
141 	fmd_modhash_dispatch(fmd.d_mod_hash, e);
142 }
143