xref: /freebsd/sys/contrib/openzfs/cmd/zed/agents/zfs_agents.c (revision df58e8b1506f241670be86a560fb6e8432043aee)
1 // SPDX-License-Identifier: CDDL-1.0
2 /*
3  * CDDL HEADER START
4  *
5  * The contents of this file are subject to the terms of the
6  * Common Development and Distribution License Version 1.0 (CDDL-1.0).
7  * You can obtain a copy of the license from the top-level file
8  * "OPENSOLARIS.LICENSE" or at <http://opensource.org/licenses/CDDL-1.0>.
9  * You may not use this file except in compliance with the license.
10  *
11  * CDDL HEADER END
12  */
13 
14 /*
15  * Copyright (c) 2016, Intel Corporation.
16  * Copyright (c) 2018, loli10K <ezomori.nozomu@gmail.com>
17  * Copyright (c) 2021 Hewlett Packard Enterprise Development LP
18  */
19 
20 #include <libnvpair.h>
21 #include <libzfs.h>
22 #include <stddef.h>
23 #include <stdlib.h>
24 #include <string.h>
25 #include <sys/list.h>
26 #include <sys/time.h>
27 #include <sys/sysevent/eventdefs.h>
28 #include <sys/sysevent/dev.h>
29 #include <sys/fm/protocol.h>
30 #include <sys/fm/fs/zfs.h>
31 #include <pthread.h>
32 #include <unistd.h>
33 
34 #include "zfs_agents.h"
35 #include "fmd_api.h"
36 #include "../zed_log.h"
37 
38 /*
39  * agent dispatch code
40  */
41 
42 static pthread_mutex_t	agent_lock = PTHREAD_MUTEX_INITIALIZER;
43 static pthread_cond_t	agent_cond = PTHREAD_COND_INITIALIZER;
44 static list_t		agent_events;	/* list of pending events */
45 static int		agent_exiting;
46 
47 typedef struct agent_event {
48 	char		ae_class[64];
49 	char		ae_subclass[32];
50 	nvlist_t	*ae_nvl;
51 	list_node_t	ae_node;
52 } agent_event_t;
53 
54 pthread_t g_agents_tid;
55 
56 libzfs_handle_t *g_zfs_hdl;
57 
58 /* guid search data */
59 typedef enum device_type {
60 	DEVICE_TYPE_L2ARC,	/* l2arc device */
61 	DEVICE_TYPE_SPARE,	/* spare device */
62 	DEVICE_TYPE_PRIMARY	/* any primary pool storage device */
63 } device_type_t;
64 
65 typedef struct guid_search {
66 	uint64_t	gs_pool_guid;
67 	uint64_t	gs_vdev_guid;
68 	const char	*gs_devid;
69 	device_type_t	gs_vdev_type;
70 	uint64_t	gs_vdev_expandtime;	/* vdev expansion time */
71 } guid_search_t;
72 
73 /*
74  * Walks the vdev tree recursively looking for a matching devid.
75  * Returns B_TRUE as soon as a matching device is found, B_FALSE otherwise.
76  */
77 static boolean_t
zfs_agent_iter_vdev(zpool_handle_t * zhp,nvlist_t * nvl,void * arg)78 zfs_agent_iter_vdev(zpool_handle_t *zhp, nvlist_t *nvl, void *arg)
79 {
80 	guid_search_t *gsp = arg;
81 	const char *path = NULL;
82 	uint_t c, children;
83 	nvlist_t **child;
84 	uint64_t vdev_guid;
85 
86 	/*
87 	 * First iterate over any children.
88 	 */
89 	if (nvlist_lookup_nvlist_array(nvl, ZPOOL_CONFIG_CHILDREN,
90 	    &child, &children) == 0) {
91 		for (c = 0; c < children; c++) {
92 			if (zfs_agent_iter_vdev(zhp, child[c], gsp)) {
93 				gsp->gs_vdev_type = DEVICE_TYPE_PRIMARY;
94 				return (B_TRUE);
95 			}
96 		}
97 	}
98 	/*
99 	 * Iterate over any spares and cache devices
100 	 */
101 	if (nvlist_lookup_nvlist_array(nvl, ZPOOL_CONFIG_SPARES,
102 	    &child, &children) == 0) {
103 		for (c = 0; c < children; c++) {
104 			if (zfs_agent_iter_vdev(zhp, child[c], gsp)) {
105 				gsp->gs_vdev_type = DEVICE_TYPE_SPARE;
106 				return (B_TRUE);
107 			}
108 		}
109 	}
110 	if (nvlist_lookup_nvlist_array(nvl, ZPOOL_CONFIG_L2CACHE,
111 	    &child, &children) == 0) {
112 		for (c = 0; c < children; c++) {
113 			if (zfs_agent_iter_vdev(zhp, child[c], gsp)) {
114 				gsp->gs_vdev_type = DEVICE_TYPE_L2ARC;
115 				return (B_TRUE);
116 			}
117 		}
118 	}
119 	/*
120 	 * On a devid match, grab the vdev guid and expansion time, if any.
121 	 */
122 	if (gsp->gs_devid != NULL &&
123 	    (nvlist_lookup_string(nvl, ZPOOL_CONFIG_DEVID, &path) == 0) &&
124 	    (strcmp(gsp->gs_devid, path) == 0)) {
125 		(void) nvlist_lookup_uint64(nvl, ZPOOL_CONFIG_GUID,
126 		    &gsp->gs_vdev_guid);
127 		(void) nvlist_lookup_uint64(nvl, ZPOOL_CONFIG_EXPANSION_TIME,
128 		    &gsp->gs_vdev_expandtime);
129 		return (B_TRUE);
130 	}
131 	/*
132 	 * Otherwise, on a vdev guid match, grab the devid and expansion
133 	 * time. The devid might be missing on removal since its not part
134 	 * of blkid cache and L2ARC VDEV does not contain pool guid in its
135 	 * blkid, so this is a special case for L2ARC VDEV.
136 	 */
137 	else if (gsp->gs_vdev_guid != 0 &&
138 	    nvlist_lookup_uint64(nvl, ZPOOL_CONFIG_GUID, &vdev_guid) == 0 &&
139 	    gsp->gs_vdev_guid == vdev_guid) {
140 		if (gsp->gs_devid == NULL) {
141 			(void) nvlist_lookup_string(nvl, ZPOOL_CONFIG_DEVID,
142 			    &gsp->gs_devid);
143 		}
144 		(void) nvlist_lookup_uint64(nvl, ZPOOL_CONFIG_EXPANSION_TIME,
145 		    &gsp->gs_vdev_expandtime);
146 		return (B_TRUE);
147 	}
148 
149 	return (B_FALSE);
150 }
151 
152 static int
zfs_agent_iter_pool(zpool_handle_t * zhp,void * arg)153 zfs_agent_iter_pool(zpool_handle_t *zhp, void *arg)
154 {
155 	guid_search_t *gsp = arg;
156 	nvlist_t *config, *nvl;
157 
158 	/*
159 	 * For each vdev in this pool, look for a match by devid
160 	 */
161 	boolean_t found = B_FALSE;
162 	uint64_t pool_guid;
163 
164 	/* Get pool configuration and extract pool GUID */
165 	if ((config = zpool_get_config(zhp, NULL)) == NULL ||
166 	    nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
167 	    &pool_guid) != 0)
168 		goto out;
169 
170 	/* Skip this pool if we're looking for a specific pool */
171 	if (gsp->gs_pool_guid != 0 && pool_guid != gsp->gs_pool_guid)
172 		goto out;
173 
174 	if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvl) == 0)
175 		found = zfs_agent_iter_vdev(zhp, nvl, gsp);
176 
177 	if (found && gsp->gs_pool_guid == 0)
178 		gsp->gs_pool_guid = pool_guid;
179 
180 out:
181 	zpool_close(zhp);
182 	return (found);
183 }
184 
185 void
zfs_agent_post_event(const char * class,const char * subclass,nvlist_t * nvl)186 zfs_agent_post_event(const char *class, const char *subclass, nvlist_t *nvl)
187 {
188 	agent_event_t *event;
189 
190 	if (subclass == NULL)
191 		subclass = "";
192 
193 	event = malloc(sizeof (agent_event_t));
194 	if (event == NULL || nvlist_dup(nvl, &event->ae_nvl, 0) != 0) {
195 		if (event)
196 			free(event);
197 		return;
198 	}
199 
200 	if (strcmp(class, "sysevent.fs.zfs.vdev_check") == 0) {
201 		class = EC_ZFS;
202 		subclass = ESC_ZFS_VDEV_CHECK;
203 	}
204 
205 	/*
206 	 * On Linux, we don't get the expected FM_RESOURCE_REMOVED ereport
207 	 * from the vdev_disk layer after a hot unplug. Fortunately we do
208 	 * get an EC_DEV_REMOVE from our disk monitor and it is a suitable
209 	 * proxy so we remap it here for the benefit of the diagnosis engine.
210 	 * Starting in OpenZFS 2.0, we do get FM_RESOURCE_REMOVED from the spa
211 	 * layer. Processing multiple FM_RESOURCE_REMOVED events is not harmful.
212 	 */
213 	if ((strcmp(class, EC_DEV_REMOVE) == 0) &&
214 	    (strcmp(subclass, ESC_DISK) == 0) &&
215 	    (nvlist_exists(nvl, ZFS_EV_VDEV_GUID) ||
216 	    nvlist_exists(nvl, DEV_IDENTIFIER))) {
217 		nvlist_t *payload = event->ae_nvl;
218 		struct timeval tv;
219 		int64_t tod[2];
220 		uint64_t pool_guid = 0, vdev_guid = 0;
221 		guid_search_t search = { 0 };
222 		device_type_t devtype = DEVICE_TYPE_PRIMARY;
223 		const char *devid = NULL;
224 
225 		class = "resource.fs.zfs.removed";
226 		subclass = "";
227 
228 		(void) nvlist_add_string(payload, FM_CLASS, class);
229 		(void) nvlist_lookup_string(nvl, DEV_IDENTIFIER, &devid);
230 		(void) nvlist_lookup_uint64(nvl, ZFS_EV_POOL_GUID, &pool_guid);
231 		(void) nvlist_lookup_uint64(nvl, ZFS_EV_VDEV_GUID, &vdev_guid);
232 
233 		(void) gettimeofday(&tv, NULL);
234 		tod[0] = tv.tv_sec;
235 		tod[1] = tv.tv_usec;
236 		(void) nvlist_add_int64_array(payload, FM_EREPORT_TIME, tod, 2);
237 
238 		/*
239 		 * If devid is missing but vdev_guid is available, find devid
240 		 * and pool_guid from vdev_guid.
241 		 * For multipath, spare and l2arc devices ZFS_EV_VDEV_GUID or
242 		 * ZFS_EV_POOL_GUID may be missing so find them.
243 		 */
244 		search.gs_devid = devid;
245 		search.gs_vdev_guid = vdev_guid;
246 		search.gs_pool_guid = pool_guid;
247 		zpool_iter(g_zfs_hdl, zfs_agent_iter_pool, &search);
248 		if (devid == NULL)
249 			devid = search.gs_devid;
250 		if (pool_guid == 0)
251 			pool_guid = search.gs_pool_guid;
252 		if (vdev_guid == 0)
253 			vdev_guid = search.gs_vdev_guid;
254 		devtype = search.gs_vdev_type;
255 
256 		/*
257 		 * We want to avoid reporting "remove" events coming from
258 		 * libudev for VDEVs which were expanded recently (10s) and
259 		 * avoid activating spares in response to partitions being
260 		 * deleted and created in rapid succession.
261 		 */
262 		if (search.gs_vdev_expandtime != 0 &&
263 		    search.gs_vdev_expandtime + 10 > tv.tv_sec) {
264 			zed_log_msg(LOG_INFO, "agent post event: ignoring '%s' "
265 			    "for recently expanded device '%s'", EC_DEV_REMOVE,
266 			    devid);
267 			fnvlist_free(payload);
268 			free(event);
269 			goto out;
270 		}
271 
272 		(void) nvlist_add_uint64(payload,
273 		    FM_EREPORT_PAYLOAD_ZFS_POOL_GUID, pool_guid);
274 		(void) nvlist_add_uint64(payload,
275 		    FM_EREPORT_PAYLOAD_ZFS_VDEV_GUID, vdev_guid);
276 		switch (devtype) {
277 		case DEVICE_TYPE_L2ARC:
278 			(void) nvlist_add_string(payload,
279 			    FM_EREPORT_PAYLOAD_ZFS_VDEV_TYPE,
280 			    VDEV_TYPE_L2CACHE);
281 			break;
282 		case DEVICE_TYPE_SPARE:
283 			(void) nvlist_add_string(payload,
284 			    FM_EREPORT_PAYLOAD_ZFS_VDEV_TYPE, VDEV_TYPE_SPARE);
285 			break;
286 		case DEVICE_TYPE_PRIMARY:
287 			(void) nvlist_add_string(payload,
288 			    FM_EREPORT_PAYLOAD_ZFS_VDEV_TYPE, VDEV_TYPE_DISK);
289 			break;
290 		}
291 
292 		zed_log_msg(LOG_INFO, "agent post event: mapping '%s' to '%s'",
293 		    EC_DEV_REMOVE, class);
294 	}
295 
296 	(void) strlcpy(event->ae_class, class, sizeof (event->ae_class));
297 	(void) strlcpy(event->ae_subclass, subclass,
298 	    sizeof (event->ae_subclass));
299 
300 	(void) pthread_mutex_lock(&agent_lock);
301 	list_insert_tail(&agent_events, event);
302 	(void) pthread_mutex_unlock(&agent_lock);
303 
304 out:
305 	(void) pthread_cond_signal(&agent_cond);
306 }
307 
308 static void
zfs_agent_dispatch(const char * class,const char * subclass,nvlist_t * nvl)309 zfs_agent_dispatch(const char *class, const char *subclass, nvlist_t *nvl)
310 {
311 	/*
312 	 * The diagnosis engine subscribes to the following events.
313 	 * On illumos these subscriptions reside in:
314 	 * 	/usr/lib/fm/fmd/plugins/zfs-diagnosis.conf
315 	 */
316 	if (strstr(class, "ereport.fs.zfs.") != NULL ||
317 	    strstr(class, "resource.fs.zfs.") != NULL ||
318 	    strcmp(class, "sysevent.fs.zfs.vdev_remove") == 0 ||
319 	    strcmp(class, "sysevent.fs.zfs.vdev_remove_dev") == 0 ||
320 	    strcmp(class, "sysevent.fs.zfs.pool_destroy") == 0) {
321 		fmd_module_recv(fmd_module_hdl("zfs-diagnosis"), nvl, class);
322 	}
323 
324 	/*
325 	 * The retire agent subscribes to the following events.
326 	 * On illumos these subscriptions reside in:
327 	 * 	/usr/lib/fm/fmd/plugins/zfs-retire.conf
328 	 *
329 	 * NOTE: faults events come directly from our diagnosis engine
330 	 * and will not pass through the zfs kernel module.
331 	 */
332 	if (strcmp(class, FM_LIST_SUSPECT_CLASS) == 0 ||
333 	    strcmp(class, "resource.fs.zfs.removed") == 0 ||
334 	    strcmp(class, "resource.fs.zfs.statechange") == 0 ||
335 	    strcmp(class, "sysevent.fs.zfs.vdev_remove")  == 0) {
336 		fmd_module_recv(fmd_module_hdl("zfs-retire"), nvl, class);
337 	}
338 
339 	/*
340 	 * The SLM module only consumes disk events and vdev check events
341 	 *
342 	 * NOTE: disk events come directly from disk monitor and will
343 	 * not pass through the zfs kernel module.
344 	 */
345 	if (strstr(class, "EC_dev_") != NULL ||
346 	    strcmp(class, EC_ZFS) == 0) {
347 		(void) zfs_slm_event(class, subclass, nvl);
348 	}
349 }
350 
351 /*
352  * Events are consumed and dispatched from this thread
353  * An agent can also post an event so event list lock
354  * is not held when calling an agent.
355  * One event is consumed at a time.
356  */
357 static void *
zfs_agent_consumer_thread(void * arg)358 zfs_agent_consumer_thread(void *arg)
359 {
360 	(void) arg;
361 
362 	for (;;) {
363 		agent_event_t *event;
364 
365 		(void) pthread_mutex_lock(&agent_lock);
366 
367 		/* wait for an event to show up */
368 		while (!agent_exiting && list_is_empty(&agent_events))
369 			(void) pthread_cond_wait(&agent_cond, &agent_lock);
370 
371 		if (agent_exiting) {
372 			(void) pthread_mutex_unlock(&agent_lock);
373 			zed_log_msg(LOG_INFO, "zfs_agent_consumer_thread: "
374 			    "exiting");
375 			return (NULL);
376 		}
377 
378 		if ((event = list_remove_head(&agent_events)) != NULL) {
379 			(void) pthread_mutex_unlock(&agent_lock);
380 
381 			/* dispatch to all event subscribers */
382 			zfs_agent_dispatch(event->ae_class, event->ae_subclass,
383 			    event->ae_nvl);
384 
385 			nvlist_free(event->ae_nvl);
386 			free(event);
387 			continue;
388 		}
389 
390 		(void) pthread_mutex_unlock(&agent_lock);
391 	}
392 
393 	return (NULL);
394 }
395 
396 void
zfs_agent_init(libzfs_handle_t * zfs_hdl)397 zfs_agent_init(libzfs_handle_t *zfs_hdl)
398 {
399 	fmd_hdl_t *hdl;
400 
401 	g_zfs_hdl = zfs_hdl;
402 
403 	if (zfs_slm_init() != 0)
404 		zed_log_die("Failed to initialize zfs slm");
405 	zed_log_msg(LOG_INFO, "Add Agent: init");
406 
407 	hdl = fmd_module_hdl("zfs-diagnosis");
408 	_zfs_diagnosis_init(hdl);
409 	if (!fmd_module_initialized(hdl))
410 		zed_log_die("Failed to initialize zfs diagnosis");
411 
412 	hdl = fmd_module_hdl("zfs-retire");
413 	_zfs_retire_init(hdl);
414 	if (!fmd_module_initialized(hdl))
415 		zed_log_die("Failed to initialize zfs retire");
416 
417 	list_create(&agent_events, sizeof (agent_event_t),
418 	    offsetof(struct agent_event, ae_node));
419 
420 	if (pthread_create(&g_agents_tid, NULL, zfs_agent_consumer_thread,
421 	    NULL) != 0) {
422 		list_destroy(&agent_events);
423 		zed_log_die("Failed to initialize agents");
424 	}
425 	pthread_setname_np(g_agents_tid, "agents");
426 }
427 
428 void
zfs_agent_fini(void)429 zfs_agent_fini(void)
430 {
431 	fmd_hdl_t *hdl;
432 	agent_event_t *event;
433 
434 	agent_exiting = 1;
435 	(void) pthread_cond_signal(&agent_cond);
436 
437 	/* wait for zfs_enum_pools thread to complete */
438 	(void) pthread_join(g_agents_tid, NULL);
439 
440 	/* drain any pending events */
441 	while ((event = list_remove_head(&agent_events)) != NULL) {
442 		nvlist_free(event->ae_nvl);
443 		free(event);
444 	}
445 
446 	list_destroy(&agent_events);
447 
448 	if ((hdl = fmd_module_hdl("zfs-retire")) != NULL) {
449 		_zfs_retire_fini(hdl);
450 		fmd_hdl_unregister(hdl);
451 	}
452 	if ((hdl = fmd_module_hdl("zfs-diagnosis")) != NULL) {
453 		_zfs_diagnosis_fini(hdl);
454 		fmd_hdl_unregister(hdl);
455 	}
456 
457 	zed_log_msg(LOG_INFO, "Add Agent: fini");
458 	zfs_slm_fini();
459 
460 	g_zfs_hdl = NULL;
461 }
462