xref: /freebsd/sys/contrib/openzfs/cmd/zed/agents/zfs_agents.c (revision f9fd7337f63698f33239c58c07bf430198235a22)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License Version 1.0 (CDDL-1.0).
6  * You can obtain a copy of the license from the top-level file
7  * "OPENSOLARIS.LICENSE" or at <http://opensource.org/licenses/CDDL-1.0>.
8  * You may not use this file except in compliance with the license.
9  *
10  * CDDL HEADER END
11  */
12 
13 /*
14  * Copyright (c) 2016, Intel Corporation.
15  * Copyright (c) 2018, loli10K <ezomori.nozomu@gmail.com>
16  */
17 
18 #include <libnvpair.h>
19 #include <libzfs.h>
20 #include <stddef.h>
21 #include <stdlib.h>
22 #include <string.h>
23 #include <sys/list.h>
24 #include <sys/time.h>
25 #include <sys/sysevent/eventdefs.h>
26 #include <sys/sysevent/dev.h>
27 #include <sys/fm/protocol.h>
28 #include <sys/fm/fs/zfs.h>
29 #include <pthread.h>
30 #include <unistd.h>
31 
32 #include "zfs_agents.h"
33 #include "fmd_api.h"
34 #include "../zed_log.h"
35 
36 /*
37  * agent dispatch code
38  */
39 
40 static pthread_mutex_t	agent_lock = PTHREAD_MUTEX_INITIALIZER;
41 static pthread_cond_t	agent_cond = PTHREAD_COND_INITIALIZER;
42 static list_t		agent_events;	/* list of pending events */
43 static int		agent_exiting;
44 
45 typedef struct agent_event {
46 	char		ae_class[64];
47 	char		ae_subclass[32];
48 	nvlist_t	*ae_nvl;
49 	list_node_t	ae_node;
50 } agent_event_t;
51 
52 pthread_t g_agents_tid;
53 
54 libzfs_handle_t *g_zfs_hdl;
55 
56 /* guid search data */
57 typedef enum device_type {
58 	DEVICE_TYPE_L2ARC,	/* l2arc device */
59 	DEVICE_TYPE_SPARE,	/* spare device */
60 	DEVICE_TYPE_PRIMARY	/* any primary pool storage device */
61 } device_type_t;
62 
63 typedef struct guid_search {
64 	uint64_t	gs_pool_guid;
65 	uint64_t	gs_vdev_guid;
66 	char		*gs_devid;
67 	device_type_t	gs_vdev_type;
68 	uint64_t	gs_vdev_expandtime;	/* vdev expansion time */
69 } guid_search_t;
70 
71 /*
72  * Walks the vdev tree recursively looking for a matching devid.
73  * Returns B_TRUE as soon as a matching device is found, B_FALSE otherwise.
74  */
75 static boolean_t
76 zfs_agent_iter_vdev(zpool_handle_t *zhp, nvlist_t *nvl, void *arg)
77 {
78 	guid_search_t *gsp = arg;
79 	char *path = NULL;
80 	uint_t c, children;
81 	nvlist_t **child;
82 
83 	/*
84 	 * First iterate over any children.
85 	 */
86 	if (nvlist_lookup_nvlist_array(nvl, ZPOOL_CONFIG_CHILDREN,
87 	    &child, &children) == 0) {
88 		for (c = 0; c < children; c++) {
89 			if (zfs_agent_iter_vdev(zhp, child[c], gsp)) {
90 				gsp->gs_vdev_type = DEVICE_TYPE_PRIMARY;
91 				return (B_TRUE);
92 			}
93 		}
94 	}
95 	/*
96 	 * Iterate over any spares and cache devices
97 	 */
98 	if (nvlist_lookup_nvlist_array(nvl, ZPOOL_CONFIG_SPARES,
99 	    &child, &children) == 0) {
100 		for (c = 0; c < children; c++) {
101 			if (zfs_agent_iter_vdev(zhp, child[c], gsp)) {
102 				gsp->gs_vdev_type = DEVICE_TYPE_L2ARC;
103 				return (B_TRUE);
104 			}
105 		}
106 	}
107 	if (nvlist_lookup_nvlist_array(nvl, ZPOOL_CONFIG_L2CACHE,
108 	    &child, &children) == 0) {
109 		for (c = 0; c < children; c++) {
110 			if (zfs_agent_iter_vdev(zhp, child[c], gsp)) {
111 				gsp->gs_vdev_type = DEVICE_TYPE_SPARE;
112 				return (B_TRUE);
113 			}
114 		}
115 	}
116 	/*
117 	 * On a devid match, grab the vdev guid and expansion time, if any.
118 	 */
119 	if (gsp->gs_devid != NULL &&
120 	    (nvlist_lookup_string(nvl, ZPOOL_CONFIG_DEVID, &path) == 0) &&
121 	    (strcmp(gsp->gs_devid, path) == 0)) {
122 		(void) nvlist_lookup_uint64(nvl, ZPOOL_CONFIG_GUID,
123 		    &gsp->gs_vdev_guid);
124 		(void) nvlist_lookup_uint64(nvl, ZPOOL_CONFIG_EXPANSION_TIME,
125 		    &gsp->gs_vdev_expandtime);
126 		return (B_TRUE);
127 	}
128 
129 	return (B_FALSE);
130 }
131 
132 static int
133 zfs_agent_iter_pool(zpool_handle_t *zhp, void *arg)
134 {
135 	guid_search_t *gsp = arg;
136 	nvlist_t *config, *nvl;
137 
138 	/*
139 	 * For each vdev in this pool, look for a match by devid
140 	 */
141 	if ((config = zpool_get_config(zhp, NULL)) != NULL) {
142 		if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
143 		    &nvl) == 0) {
144 			(void) zfs_agent_iter_vdev(zhp, nvl, gsp);
145 		}
146 	}
147 	/*
148 	 * if a match was found then grab the pool guid
149 	 */
150 	if (gsp->gs_vdev_guid) {
151 		(void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
152 		    &gsp->gs_pool_guid);
153 	}
154 
155 	zpool_close(zhp);
156 	return (gsp->gs_vdev_guid != 0);
157 }
158 
159 void
160 zfs_agent_post_event(const char *class, const char *subclass, nvlist_t *nvl)
161 {
162 	agent_event_t *event;
163 
164 	if (subclass == NULL)
165 		subclass = "";
166 
167 	event = malloc(sizeof (agent_event_t));
168 	if (event == NULL || nvlist_dup(nvl, &event->ae_nvl, 0) != 0) {
169 		if (event)
170 			free(event);
171 		return;
172 	}
173 
174 	if (strcmp(class, "sysevent.fs.zfs.vdev_check") == 0) {
175 		class = EC_ZFS;
176 		subclass = ESC_ZFS_VDEV_CHECK;
177 	}
178 
179 	/*
180 	 * On Linux, we don't get the expected FM_RESOURCE_REMOVED ereport
181 	 * from the vdev_disk layer after a hot unplug. Fortunately we do
182 	 * get an EC_DEV_REMOVE from our disk monitor and it is a suitable
183 	 * proxy so we remap it here for the benefit of the diagnosis engine.
184 	 */
185 	if ((strcmp(class, EC_DEV_REMOVE) == 0) &&
186 	    (strcmp(subclass, ESC_DISK) == 0) &&
187 	    (nvlist_exists(nvl, ZFS_EV_VDEV_GUID) ||
188 	    nvlist_exists(nvl, DEV_IDENTIFIER))) {
189 		nvlist_t *payload = event->ae_nvl;
190 		struct timeval tv;
191 		int64_t tod[2];
192 		uint64_t pool_guid = 0, vdev_guid = 0;
193 		guid_search_t search = { 0 };
194 		device_type_t devtype = DEVICE_TYPE_PRIMARY;
195 
196 		class = "resource.fs.zfs.removed";
197 		subclass = "";
198 
199 		(void) nvlist_add_string(payload, FM_CLASS, class);
200 		(void) nvlist_lookup_uint64(nvl, ZFS_EV_POOL_GUID, &pool_guid);
201 		(void) nvlist_lookup_uint64(nvl, ZFS_EV_VDEV_GUID, &vdev_guid);
202 
203 		(void) gettimeofday(&tv, NULL);
204 		tod[0] = tv.tv_sec;
205 		tod[1] = tv.tv_usec;
206 		(void) nvlist_add_int64_array(payload, FM_EREPORT_TIME, tod, 2);
207 
208 		/*
209 		 * For multipath, spare and l2arc devices ZFS_EV_VDEV_GUID or
210 		 * ZFS_EV_POOL_GUID may be missing so find them.
211 		 */
212 		(void) nvlist_lookup_string(nvl, DEV_IDENTIFIER,
213 		    &search.gs_devid);
214 		(void) zpool_iter(g_zfs_hdl, zfs_agent_iter_pool, &search);
215 		pool_guid = search.gs_pool_guid;
216 		vdev_guid = search.gs_vdev_guid;
217 		devtype = search.gs_vdev_type;
218 
219 		/*
220 		 * We want to avoid reporting "remove" events coming from
221 		 * libudev for VDEVs which were expanded recently (10s) and
222 		 * avoid activating spares in response to partitions being
223 		 * deleted and created in rapid succession.
224 		 */
225 		if (search.gs_vdev_expandtime != 0 &&
226 		    search.gs_vdev_expandtime + 10 > tv.tv_sec) {
227 			zed_log_msg(LOG_INFO, "agent post event: ignoring '%s' "
228 			    "for recently expanded device '%s'", EC_DEV_REMOVE,
229 			    search.gs_devid);
230 			goto out;
231 		}
232 
233 		(void) nvlist_add_uint64(payload,
234 		    FM_EREPORT_PAYLOAD_ZFS_POOL_GUID, pool_guid);
235 		(void) nvlist_add_uint64(payload,
236 		    FM_EREPORT_PAYLOAD_ZFS_VDEV_GUID, vdev_guid);
237 		switch (devtype) {
238 		case DEVICE_TYPE_L2ARC:
239 			(void) nvlist_add_string(payload,
240 			    FM_EREPORT_PAYLOAD_ZFS_VDEV_TYPE,
241 			    VDEV_TYPE_L2CACHE);
242 			break;
243 		case DEVICE_TYPE_SPARE:
244 			(void) nvlist_add_string(payload,
245 			    FM_EREPORT_PAYLOAD_ZFS_VDEV_TYPE, VDEV_TYPE_SPARE);
246 			break;
247 		case DEVICE_TYPE_PRIMARY:
248 			(void) nvlist_add_string(payload,
249 			    FM_EREPORT_PAYLOAD_ZFS_VDEV_TYPE, VDEV_TYPE_DISK);
250 			break;
251 		}
252 
253 		zed_log_msg(LOG_INFO, "agent post event: mapping '%s' to '%s'",
254 		    EC_DEV_REMOVE, class);
255 	}
256 
257 	(void) strlcpy(event->ae_class, class, sizeof (event->ae_class));
258 	(void) strlcpy(event->ae_subclass, subclass,
259 	    sizeof (event->ae_subclass));
260 
261 	(void) pthread_mutex_lock(&agent_lock);
262 	list_insert_tail(&agent_events, event);
263 	(void) pthread_mutex_unlock(&agent_lock);
264 
265 out:
266 	(void) pthread_cond_signal(&agent_cond);
267 }
268 
269 static void
270 zfs_agent_dispatch(const char *class, const char *subclass, nvlist_t *nvl)
271 {
272 	/*
273 	 * The diagnosis engine subscribes to the following events.
274 	 * On illumos these subscriptions reside in:
275 	 * 	/usr/lib/fm/fmd/plugins/zfs-diagnosis.conf
276 	 */
277 	if (strstr(class, "ereport.fs.zfs.") != NULL ||
278 	    strstr(class, "resource.fs.zfs.") != NULL ||
279 	    strcmp(class, "sysevent.fs.zfs.vdev_remove") == 0 ||
280 	    strcmp(class, "sysevent.fs.zfs.vdev_remove_dev") == 0 ||
281 	    strcmp(class, "sysevent.fs.zfs.pool_destroy") == 0) {
282 		fmd_module_recv(fmd_module_hdl("zfs-diagnosis"), nvl, class);
283 	}
284 
285 	/*
286 	 * The retire agent subscribes to the following events.
287 	 * On illumos these subscriptions reside in:
288 	 * 	/usr/lib/fm/fmd/plugins/zfs-retire.conf
289 	 *
290 	 * NOTE: faults events come directly from our diagnosis engine
291 	 * and will not pass through the zfs kernel module.
292 	 */
293 	if (strcmp(class, FM_LIST_SUSPECT_CLASS) == 0 ||
294 	    strcmp(class, "resource.fs.zfs.removed") == 0 ||
295 	    strcmp(class, "resource.fs.zfs.statechange") == 0 ||
296 	    strcmp(class, "sysevent.fs.zfs.vdev_remove")  == 0) {
297 		fmd_module_recv(fmd_module_hdl("zfs-retire"), nvl, class);
298 	}
299 
300 	/*
301 	 * The SLM module only consumes disk events and vdev check events
302 	 *
303 	 * NOTE: disk events come directly from disk monitor and will
304 	 * not pass through the zfs kernel module.
305 	 */
306 	if (strstr(class, "EC_dev_") != NULL ||
307 	    strcmp(class, EC_ZFS) == 0) {
308 		(void) zfs_slm_event(class, subclass, nvl);
309 	}
310 }
311 
312 /*
313  * Events are consumed and dispatched from this thread
314  * An agent can also post an event so event list lock
315  * is not held when calling an agent.
316  * One event is consumed at a time.
317  */
318 static void *
319 zfs_agent_consumer_thread(void *arg)
320 {
321 	for (;;) {
322 		agent_event_t *event;
323 
324 		(void) pthread_mutex_lock(&agent_lock);
325 
326 		/* wait for an event to show up */
327 		while (!agent_exiting && list_is_empty(&agent_events))
328 			(void) pthread_cond_wait(&agent_cond, &agent_lock);
329 
330 		if (agent_exiting) {
331 			(void) pthread_mutex_unlock(&agent_lock);
332 			zed_log_msg(LOG_INFO, "zfs_agent_consumer_thread: "
333 			    "exiting");
334 			return (NULL);
335 		}
336 
337 		if ((event = (list_head(&agent_events))) != NULL) {
338 			list_remove(&agent_events, event);
339 
340 			(void) pthread_mutex_unlock(&agent_lock);
341 
342 			/* dispatch to all event subscribers */
343 			zfs_agent_dispatch(event->ae_class, event->ae_subclass,
344 			    event->ae_nvl);
345 
346 			nvlist_free(event->ae_nvl);
347 			free(event);
348 			continue;
349 		}
350 
351 		(void) pthread_mutex_unlock(&agent_lock);
352 	}
353 
354 	return (NULL);
355 }
356 
357 void
358 zfs_agent_init(libzfs_handle_t *zfs_hdl)
359 {
360 	fmd_hdl_t *hdl;
361 
362 	g_zfs_hdl = zfs_hdl;
363 
364 	if (zfs_slm_init() != 0)
365 		zed_log_die("Failed to initialize zfs slm");
366 	zed_log_msg(LOG_INFO, "Add Agent: init");
367 
368 	hdl = fmd_module_hdl("zfs-diagnosis");
369 	_zfs_diagnosis_init(hdl);
370 	if (!fmd_module_initialized(hdl))
371 		zed_log_die("Failed to initialize zfs diagnosis");
372 
373 	hdl = fmd_module_hdl("zfs-retire");
374 	_zfs_retire_init(hdl);
375 	if (!fmd_module_initialized(hdl))
376 		zed_log_die("Failed to initialize zfs retire");
377 
378 	list_create(&agent_events, sizeof (agent_event_t),
379 	    offsetof(struct agent_event, ae_node));
380 
381 	if (pthread_create(&g_agents_tid, NULL, zfs_agent_consumer_thread,
382 	    NULL) != 0) {
383 		list_destroy(&agent_events);
384 		zed_log_die("Failed to initialize agents");
385 	}
386 }
387 
388 void
389 zfs_agent_fini(void)
390 {
391 	fmd_hdl_t *hdl;
392 	agent_event_t *event;
393 
394 	agent_exiting = 1;
395 	(void) pthread_cond_signal(&agent_cond);
396 
397 	/* wait for zfs_enum_pools thread to complete */
398 	(void) pthread_join(g_agents_tid, NULL);
399 
400 	/* drain any pending events */
401 	while ((event = (list_head(&agent_events))) != NULL) {
402 		list_remove(&agent_events, event);
403 		nvlist_free(event->ae_nvl);
404 		free(event);
405 	}
406 
407 	list_destroy(&agent_events);
408 
409 	if ((hdl = fmd_module_hdl("zfs-retire")) != NULL) {
410 		_zfs_retire_fini(hdl);
411 		fmd_hdl_unregister(hdl);
412 	}
413 	if ((hdl = fmd_module_hdl("zfs-diagnosis")) != NULL) {
414 		_zfs_diagnosis_fini(hdl);
415 		fmd_hdl_unregister(hdl);
416 	}
417 
418 	zed_log_msg(LOG_INFO, "Add Agent: fini");
419 	zfs_slm_fini();
420 
421 	g_zfs_hdl = NULL;
422 }
423