xref: /illumos-gate/usr/src/cmd/fm/modules/common/zfs-retire/zfs_retire.c (revision ac20c57d6652cecf7859e3346336b9a48e5d5f82)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * The ZFS retire agent is responsible for managing hot spares across all pools.
30  * When we see a device fault or a device removal, we try to open the associated
31  * pool and look for any hot spares.  We iterate over any available hot spares
32  * and attempt a 'zpool replace' for each one.
33  *
34  * For vdevs diagnosed as faulty, the agent is also responsible for proactively
35  * marking the vdev FAULTY (for I/O errors) or DEGRADED (for checksum errors).
36  */
37 
38 #include <fm/fmd_api.h>
39 #include <sys/fs/zfs.h>
40 #include <sys/fm/protocol.h>
41 #include <sys/fm/fs/zfs.h>
42 #include <libzfs.h>
43 #include <string.h>
44 
45 /*
46  * Find a pool with a matching GUID.
47  */
48 typedef struct find_cbdata {
49 	uint64_t	cb_guid;
50 	zpool_handle_t	*cb_zhp;
51 } find_cbdata_t;
52 
53 static int
54 find_pool(zpool_handle_t *zhp, void *data)
55 {
56 	find_cbdata_t *cbp = data;
57 
58 	if (cbp->cb_guid == zpool_get_guid(zhp)) {
59 		cbp->cb_zhp = zhp;
60 		return (1);
61 	}
62 
63 	zpool_close(zhp);
64 	return (0);
65 }
66 
67 /*
68  * Find a vdev within a tree with a matching GUID.
69  */
70 static nvlist_t *
71 find_vdev(nvlist_t *nv, uint64_t search)
72 {
73 	uint64_t guid;
74 	nvlist_t **child;
75 	uint_t c, children;
76 	nvlist_t *ret;
77 
78 	if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &guid) == 0 &&
79 	    guid == search)
80 		return (nv);
81 
82 	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
83 	    &child, &children) != 0)
84 		return (NULL);
85 
86 	for (c = 0; c < children; c++) {
87 		if ((ret = find_vdev(child[c], search)) != NULL)
88 			return (ret);
89 	}
90 
91 	return (NULL);
92 }
93 
94 /*
95  * Given a (pool, vdev) GUID pair, find the matching pool and vdev.
96  */
97 static zpool_handle_t *
98 find_by_guid(libzfs_handle_t *zhdl, uint64_t pool_guid, uint64_t vdev_guid,
99     nvlist_t **vdevp)
100 {
101 	find_cbdata_t cb;
102 	zpool_handle_t *zhp;
103 	nvlist_t *config, *nvroot;
104 
105 	/*
106 	 * Find the corresponding pool and make sure the vdev still exists.
107 	 */
108 	cb.cb_guid = pool_guid;
109 	if (zpool_iter(zhdl, find_pool, &cb) != 1)
110 		return (NULL);
111 
112 	zhp = cb.cb_zhp;
113 	config = zpool_get_config(zhp, NULL);
114 	if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
115 	    &nvroot) != 0) {
116 		zpool_close(zhp);
117 		return (NULL);
118 	}
119 
120 	if ((*vdevp = find_vdev(nvroot, vdev_guid)) == NULL) {
121 		zpool_close(zhp);
122 		return (NULL);
123 	}
124 
125 	return (zhp);
126 }
127 
128 /*
129  * Given a vdev, attempt to replace it with every known spare until one
130  * succeeds.
131  */
132 static void
133 replace_with_spare(zpool_handle_t *zhp, nvlist_t *vdev)
134 {
135 	nvlist_t *config, *nvroot, *replacement;
136 	nvlist_t **spares;
137 	uint_t s, nspares;
138 	char *dev_name;
139 
140 	config = zpool_get_config(zhp, NULL);
141 	if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
142 	    &nvroot) != 0)
143 		return;
144 
145 	/*
146 	 * Find out if there are any hot spares available in the pool.
147 	 */
148 	if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
149 	    &spares, &nspares) != 0)
150 		return;
151 
152 	if (nvlist_alloc(&replacement, NV_UNIQUE_NAME, 0) != 0)
153 		return;
154 
155 	if (nvlist_add_string(replacement, ZPOOL_CONFIG_TYPE,
156 	    VDEV_TYPE_ROOT) != 0) {
157 		nvlist_free(replacement);
158 		return;
159 	}
160 
161 	dev_name = zpool_vdev_name(NULL, zhp, vdev);
162 
163 	/*
164 	 * Try to replace each spare, ending when we successfully
165 	 * replace it.
166 	 */
167 	for (s = 0; s < nspares; s++) {
168 		char *spare_name;
169 
170 		if (nvlist_lookup_string(spares[s], ZPOOL_CONFIG_PATH,
171 		    &spare_name) != 0)
172 			continue;
173 
174 		if (nvlist_add_nvlist_array(replacement,
175 		    ZPOOL_CONFIG_CHILDREN, &spares[s], 1) != 0)
176 			continue;
177 
178 		if (zpool_vdev_attach(zhp, dev_name, spare_name,
179 		    replacement, B_TRUE) == 0)
180 			break;
181 	}
182 
183 	free(dev_name);
184 	nvlist_free(replacement);
185 }
186 
187 /*ARGSUSED*/
188 static void
189 zfs_retire_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl,
190     const char *class)
191 {
192 	uint64_t pool_guid, vdev_guid;
193 	zpool_handle_t *zhp;
194 	nvlist_t *resource, *fault;
195 	nvlist_t **faults;
196 	uint_t f, nfaults;
197 	libzfs_handle_t *zhdl = fmd_hdl_getspecific(hdl);
198 	boolean_t fault_device, degrade_device;
199 	boolean_t is_repair;
200 	char *scheme;
201 	nvlist_t *vdev;
202 
203 	/*
204 	 * If this is a resource notifying us of device removal, then simply
205 	 * check for an available spare and continue.
206 	 */
207 	if (strcmp(class, "resource.fs.zfs.removed") == 0) {
208 		if (nvlist_lookup_uint64(nvl, FM_EREPORT_PAYLOAD_ZFS_POOL_GUID,
209 		    &pool_guid) != 0 ||
210 		    nvlist_lookup_uint64(nvl, FM_EREPORT_PAYLOAD_ZFS_VDEV_GUID,
211 		    &vdev_guid) != 0)
212 			return;
213 
214 		if ((zhp = find_by_guid(zhdl, pool_guid, vdev_guid,
215 		    &vdev)) == NULL)
216 			return;
217 
218 		if (fmd_prop_get_int32(hdl, "spare_on_remove"))
219 			replace_with_spare(zhp, vdev);
220 		zpool_close(zhp);
221 		return;
222 	}
223 
224 	if (strcmp(class, "list.repaired") == 0)
225 		is_repair = B_TRUE;
226 	else
227 		is_repair = B_FALSE;
228 
229 	/*
230 	 * We subscribe to zfs faults as well as all repair events.
231 	 */
232 	if (nvlist_lookup_nvlist_array(nvl, FM_SUSPECT_FAULT_LIST,
233 	    &faults, &nfaults) != 0)
234 		return;
235 
236 	for (f = 0; f < nfaults; f++) {
237 		fault = faults[f];
238 
239 		fault_device = B_FALSE;
240 		degrade_device = B_FALSE;
241 
242 		/*
243 		 * While we subscribe to fault.fs.zfs.*, we only take action
244 		 * for faults targeting a specific vdev (open failure or SERD
245 		 * failure).
246 		 */
247 		if (fmd_nvl_class_match(hdl, fault, "fault.fs.zfs.vdev.io"))
248 			fault_device = B_TRUE;
249 		else if (fmd_nvl_class_match(hdl, fault,
250 		    "fault.fs.zfs.vdev.checksum"))
251 			degrade_device = B_TRUE;
252 		else if (fmd_nvl_class_match(hdl, fault, "fault.fs.zfs.device"))
253 			fault_device = B_FALSE;
254 		else
255 			continue;
256 
257 		if (nvlist_lookup_nvlist(fault, FM_FAULT_RESOURCE,
258 		    &resource) != 0 ||
259 		    nvlist_lookup_string(resource, FM_FMRI_SCHEME,
260 		    &scheme) != 0)
261 			continue;
262 
263 		if (strcmp(scheme, FM_FMRI_SCHEME_ZFS) != 0)
264 			continue;
265 
266 		if (nvlist_lookup_uint64(resource, FM_FMRI_ZFS_POOL,
267 		    &pool_guid) != 0 ||
268 		    nvlist_lookup_uint64(resource, FM_FMRI_ZFS_VDEV,
269 		    &vdev_guid) != 0)
270 			continue;
271 
272 		if ((zhp = find_by_guid(zhdl, pool_guid, vdev_guid,
273 		    &vdev)) == NULL)
274 			continue;
275 
276 		/*
277 		 * If this is a repair event, then mark the vdev as repaired and
278 		 * continue.
279 		 */
280 		if (is_repair) {
281 			(void) zpool_vdev_clear(zhp, vdev_guid);
282 			zpool_close(zhp);
283 			continue;
284 		}
285 
286 		/*
287 		 * Actively fault the device if needed.
288 		 */
289 		if (fault_device)
290 			(void) zpool_vdev_fault(zhp, vdev_guid);
291 		if (degrade_device)
292 			(void) zpool_vdev_degrade(zhp, vdev_guid);
293 
294 		/*
295 		 * Attempt to substitute a hot spare.
296 		 */
297 		replace_with_spare(zhp, vdev);
298 		zpool_close(zhp);
299 	}
300 }
301 
302 static const fmd_hdl_ops_t fmd_ops = {
303 	zfs_retire_recv,	/* fmdo_recv */
304 	NULL,			/* fmdo_timeout */
305 	NULL,			/* fmdo_close */
306 	NULL,			/* fmdo_stats */
307 	NULL,			/* fmdo_gc */
308 };
309 
310 static const fmd_prop_t fmd_props[] = {
311 	{ "spare_on_remove", FMD_TYPE_BOOL, "true" },
312 	{ NULL, 0, NULL }
313 };
314 
315 static const fmd_hdl_info_t fmd_info = {
316 	"ZFS Retire Agent", "1.0", &fmd_ops, fmd_props
317 };
318 
319 void
320 _fmd_init(fmd_hdl_t *hdl)
321 {
322 	libzfs_handle_t *zhdl;
323 
324 	if ((zhdl = libzfs_init()) == NULL)
325 		return;
326 
327 	if (fmd_hdl_register(hdl, FMD_API_VERSION, &fmd_info) != 0) {
328 		libzfs_fini(zhdl);
329 		return;
330 	}
331 
332 	fmd_hdl_setspecific(hdl, zhdl);
333 }
334 
335 void
336 _fmd_fini(fmd_hdl_t *hdl)
337 {
338 	libzfs_handle_t *zhdl = fmd_hdl_getspecific(hdl);
339 
340 	if (zhdl != NULL)
341 		libzfs_fini(zhdl);
342 }
343