1 // SPDX-License-Identifier: CDDL-1.0
2 /*
3  * CDDL HEADER START
4  *
5  * The contents of this file are subject to the terms of the
6  * Common Development and Distribution License (the "License").
7  * You may not use this file except in compliance with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or https://opensource.org/licenses/CDDL-1.0.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
24  *
25  * Copyright (c) 2016, Intel Corporation.
26  * Copyright (c) 2023, Klara Inc.
27  */
28 
29 /*
30  * This file implements the minimal FMD module API required to support the
31  * fault logic modules in ZED. This support includes module registration,
32  * memory allocation, module property accessors, basic case management,
33  * one-shot timers and SERD engines.
34  *
35  * In the ZED runtime, the modules are called from a single thread so no
36  * locking is required in this emulated FMD environment.
37  */
38 
39 #include <sys/types.h>
40 #include <sys/fm/protocol.h>
41 #include <uuid/uuid.h>
42 #include <signal.h>
43 #include <string.h>
44 #include <time.h>
45 
46 #include "fmd_api.h"
47 #include "fmd_serd.h"
48 
49 #include "zfs_agents.h"
50 #include "../zed_log.h"
51 
52 typedef struct fmd_modstat {
53 	fmd_stat_t	ms_accepted;	/* total events accepted by module */
54 	fmd_stat_t	ms_caseopen;	/* cases currently open */
55 	fmd_stat_t	ms_casesolved;	/* total cases solved by module */
56 	fmd_stat_t	ms_caseclosed;	/* total cases closed by module */
57 } fmd_modstat_t;
58 
59 typedef struct fmd_module {
60 	const char	*mod_name;	/* basename of module (ro) */
61 	const fmd_hdl_info_t *mod_info;	/* module info registered with handle */
62 	void		*mod_spec;	/* fmd_hdl_get/setspecific data value */
63 	fmd_stat_t	*mod_ustat;	/* module specific custom stats */
64 	uint_t		mod_ustat_cnt;	/* count of ustat stats */
65 	fmd_modstat_t	mod_stats;	/* fmd built-in per-module statistics */
66 	fmd_serd_hash_t	mod_serds;	/* hash of serd engs owned by module */
67 	char		*mod_vers;	/* a copy of module version string */
68 } fmd_module_t;
69 
70 /*
71  * ZED has two FMD hardwired module instances
72  */
73 fmd_module_t	zfs_retire_module;
74 fmd_module_t	zfs_diagnosis_module;
75 
76 /*
77  * Enable a reasonable set of defaults for libumem debugging on DEBUG builds.
78  */
79 
80 #ifdef DEBUG
81 const char *
_umem_debug_init(void)82 _umem_debug_init(void)
83 {
84 	return ("default,verbose"); /* $UMEM_DEBUG setting */
85 }
86 
87 const char *
_umem_logging_init(void)88 _umem_logging_init(void)
89 {
90 	return ("fail,contents"); /* $UMEM_LOGGING setting */
91 }
92 #endif
93 
94 /*
95  * Register a module with fmd and finish module initialization.
96  * Returns an integer indicating whether it succeeded (zero) or
97  * failed (non-zero).
98  */
99 int
fmd_hdl_register(fmd_hdl_t * hdl,int version,const fmd_hdl_info_t * mip)100 fmd_hdl_register(fmd_hdl_t *hdl, int version, const fmd_hdl_info_t *mip)
101 {
102 	(void) version;
103 	fmd_module_t *mp = (fmd_module_t *)hdl;
104 
105 	mp->mod_info = mip;
106 	mp->mod_name = mip->fmdi_desc + 4;	/* drop 'ZFS ' prefix */
107 	mp->mod_spec = NULL;
108 
109 	/* bare minimum module stats */
110 	(void) strcpy(mp->mod_stats.ms_accepted.fmds_name, "fmd.accepted");
111 	(void) strcpy(mp->mod_stats.ms_caseopen.fmds_name, "fmd.caseopen");
112 	(void) strcpy(mp->mod_stats.ms_casesolved.fmds_name, "fmd.casesolved");
113 	(void) strcpy(mp->mod_stats.ms_caseclosed.fmds_name, "fmd.caseclosed");
114 
115 	fmd_serd_hash_create(&mp->mod_serds);
116 
117 	fmd_hdl_debug(hdl, "register module");
118 
119 	return (0);
120 }
121 
122 void
fmd_hdl_unregister(fmd_hdl_t * hdl)123 fmd_hdl_unregister(fmd_hdl_t *hdl)
124 {
125 	fmd_module_t *mp = (fmd_module_t *)hdl;
126 	fmd_modstat_t *msp = &mp->mod_stats;
127 	const fmd_hdl_ops_t *ops = mp->mod_info->fmdi_ops;
128 
129 	/* dump generic module stats */
130 	fmd_hdl_debug(hdl, "%s: %llu", msp->ms_accepted.fmds_name,
131 	    msp->ms_accepted.fmds_value.ui64);
132 	if (ops->fmdo_close != NULL) {
133 		fmd_hdl_debug(hdl, "%s: %llu", msp->ms_caseopen.fmds_name,
134 		    msp->ms_caseopen.fmds_value.ui64);
135 		fmd_hdl_debug(hdl, "%s: %llu", msp->ms_casesolved.fmds_name,
136 		    msp->ms_casesolved.fmds_value.ui64);
137 		fmd_hdl_debug(hdl, "%s: %llu", msp->ms_caseclosed.fmds_name,
138 		    msp->ms_caseclosed.fmds_value.ui64);
139 	}
140 
141 	/* dump module specific stats */
142 	if (mp->mod_ustat != NULL) {
143 		int i;
144 
145 		for (i = 0; i < mp->mod_ustat_cnt; i++) {
146 			fmd_hdl_debug(hdl, "%s: %llu",
147 			    mp->mod_ustat[i].fmds_name,
148 			    mp->mod_ustat[i].fmds_value.ui64);
149 		}
150 	}
151 
152 	fmd_serd_hash_destroy(&mp->mod_serds);
153 
154 	fmd_hdl_debug(hdl, "unregister module");
155 }
156 
157 /*
158  * fmd_hdl_setspecific() is used to associate a data pointer with
159  * the specified handle for the duration of the module's lifetime.
160  * This pointer can be retrieved using fmd_hdl_getspecific().
161  */
162 void
fmd_hdl_setspecific(fmd_hdl_t * hdl,void * spec)163 fmd_hdl_setspecific(fmd_hdl_t *hdl, void *spec)
164 {
165 	fmd_module_t *mp = (fmd_module_t *)hdl;
166 
167 	mp->mod_spec = spec;
168 }
169 
170 /*
171  * Return the module-specific data pointer previously associated
172  * with the handle using fmd_hdl_setspecific().
173  */
174 void *
fmd_hdl_getspecific(fmd_hdl_t * hdl)175 fmd_hdl_getspecific(fmd_hdl_t *hdl)
176 {
177 	fmd_module_t *mp = (fmd_module_t *)hdl;
178 
179 	return (mp->mod_spec);
180 }
181 
182 void *
fmd_hdl_alloc(fmd_hdl_t * hdl,size_t size,int flags)183 fmd_hdl_alloc(fmd_hdl_t *hdl, size_t size, int flags)
184 {
185 	(void) hdl;
186 	return (umem_alloc(size, flags));
187 }
188 
189 void *
fmd_hdl_zalloc(fmd_hdl_t * hdl,size_t size,int flags)190 fmd_hdl_zalloc(fmd_hdl_t *hdl, size_t size, int flags)
191 {
192 	(void) hdl;
193 	return (umem_zalloc(size, flags));
194 }
195 
196 void
fmd_hdl_free(fmd_hdl_t * hdl,void * data,size_t size)197 fmd_hdl_free(fmd_hdl_t *hdl, void *data, size_t size)
198 {
199 	(void) hdl;
200 	umem_free(data, size);
201 }
202 
203 /*
204  * Record a module debug message using the specified format.
205  */
206 void
fmd_hdl_debug(fmd_hdl_t * hdl,const char * format,...)207 fmd_hdl_debug(fmd_hdl_t *hdl, const char *format, ...)
208 {
209 	char message[256];
210 	va_list vargs;
211 	fmd_module_t *mp = (fmd_module_t *)hdl;
212 
213 	va_start(vargs, format);
214 	(void) vsnprintf(message, sizeof (message), format, vargs);
215 	va_end(vargs);
216 
217 	/* prefix message with module name */
218 	zed_log_msg(LOG_INFO, "%s: %s", mp->mod_name, message);
219 }
220 
221 /* Property Retrieval */
222 
223 int32_t
fmd_prop_get_int32(fmd_hdl_t * hdl,const char * name)224 fmd_prop_get_int32(fmd_hdl_t *hdl, const char *name)
225 {
226 	(void) hdl;
227 
228 	/*
229 	 * These can be looked up in mp->modinfo->fmdi_props
230 	 * For now we just hard code for phase 2. In the
231 	 * future, there can be a ZED based override.
232 	 */
233 	if (strcmp(name, "spare_on_remove") == 0)
234 		return (1);
235 
236 	return (0);
237 }
238 
239 /* FMD Statistics */
240 
241 fmd_stat_t *
fmd_stat_create(fmd_hdl_t * hdl,uint_t flags,uint_t nstats,fmd_stat_t * statv)242 fmd_stat_create(fmd_hdl_t *hdl, uint_t flags, uint_t nstats, fmd_stat_t *statv)
243 {
244 	fmd_module_t *mp = (fmd_module_t *)hdl;
245 
246 	if (flags == FMD_STAT_NOALLOC) {
247 		mp->mod_ustat = statv;
248 		mp->mod_ustat_cnt = nstats;
249 	}
250 
251 	return (statv);
252 }
253 
254 /* Case Management */
255 
256 fmd_case_t *
fmd_case_open(fmd_hdl_t * hdl,void * data)257 fmd_case_open(fmd_hdl_t *hdl, void *data)
258 {
259 	fmd_module_t *mp = (fmd_module_t *)hdl;
260 	uuid_t uuid;
261 
262 	fmd_case_t *cp;
263 
264 	cp = fmd_hdl_zalloc(hdl, sizeof (fmd_case_t), FMD_SLEEP);
265 	cp->ci_mod = hdl;
266 	cp->ci_state = FMD_CASE_UNSOLVED;
267 	cp->ci_flags = FMD_CF_DIRTY;
268 	cp->ci_data = data;
269 	cp->ci_bufptr = NULL;
270 	cp->ci_bufsiz = 0;
271 
272 	uuid_generate(uuid);
273 	uuid_unparse(uuid, cp->ci_uuid);
274 
275 	fmd_hdl_debug(hdl, "case opened (%s)", cp->ci_uuid);
276 	mp->mod_stats.ms_caseopen.fmds_value.ui64++;
277 
278 	return (cp);
279 }
280 
281 void
fmd_case_solve(fmd_hdl_t * hdl,fmd_case_t * cp)282 fmd_case_solve(fmd_hdl_t *hdl, fmd_case_t *cp)
283 {
284 	fmd_module_t *mp = (fmd_module_t *)hdl;
285 
286 	/*
287 	 * For ZED, the event was already sent from fmd_case_add_suspect()
288 	 */
289 
290 	if (cp->ci_state >= FMD_CASE_SOLVED)
291 		fmd_hdl_debug(hdl, "case is already solved or closed");
292 
293 	cp->ci_state = FMD_CASE_SOLVED;
294 
295 	fmd_hdl_debug(hdl, "case solved (%s)", cp->ci_uuid);
296 	mp->mod_stats.ms_casesolved.fmds_value.ui64++;
297 }
298 
299 void
fmd_case_close(fmd_hdl_t * hdl,fmd_case_t * cp)300 fmd_case_close(fmd_hdl_t *hdl, fmd_case_t *cp)
301 {
302 	fmd_module_t *mp = (fmd_module_t *)hdl;
303 	const fmd_hdl_ops_t *ops = mp->mod_info->fmdi_ops;
304 
305 	fmd_hdl_debug(hdl, "case closed (%s)", cp->ci_uuid);
306 
307 	if (ops->fmdo_close != NULL)
308 		ops->fmdo_close(hdl, cp);
309 
310 	mp->mod_stats.ms_caseopen.fmds_value.ui64--;
311 	mp->mod_stats.ms_caseclosed.fmds_value.ui64++;
312 
313 	if (cp->ci_bufptr != NULL && cp->ci_bufsiz > 0)
314 		fmd_hdl_free(hdl, cp->ci_bufptr, cp->ci_bufsiz);
315 
316 	fmd_hdl_free(hdl, cp, sizeof (fmd_case_t));
317 }
318 
319 void
fmd_case_uuresolved(fmd_hdl_t * hdl,const char * uuid)320 fmd_case_uuresolved(fmd_hdl_t *hdl, const char *uuid)
321 {
322 	fmd_hdl_debug(hdl, "case resolved by uuid (%s)", uuid);
323 }
324 
325 boolean_t
fmd_case_solved(fmd_hdl_t * hdl,fmd_case_t * cp)326 fmd_case_solved(fmd_hdl_t *hdl, fmd_case_t *cp)
327 {
328 	(void) hdl;
329 	return (cp->ci_state >= FMD_CASE_SOLVED);
330 }
331 
332 void
fmd_case_add_ereport(fmd_hdl_t * hdl,fmd_case_t * cp,fmd_event_t * ep)333 fmd_case_add_ereport(fmd_hdl_t *hdl, fmd_case_t *cp, fmd_event_t *ep)
334 {
335 	(void) hdl, (void) cp, (void) ep;
336 }
337 
338 static void
zed_log_fault(nvlist_t * nvl,const char * uuid,const char * code)339 zed_log_fault(nvlist_t *nvl, const char *uuid, const char *code)
340 {
341 	nvlist_t *rsrc;
342 	const char *strval;
343 	uint64_t guid;
344 	uint8_t byte;
345 
346 	zed_log_msg(LOG_INFO, "\nzed_fault_event:");
347 
348 	if (uuid != NULL)
349 		zed_log_msg(LOG_INFO, "\t%s: %s", FM_SUSPECT_UUID, uuid);
350 	if (nvlist_lookup_string(nvl, FM_CLASS, &strval) == 0)
351 		zed_log_msg(LOG_INFO, "\t%s: %s", FM_CLASS, strval);
352 	if (code != NULL)
353 		zed_log_msg(LOG_INFO, "\t%s: %s", FM_SUSPECT_DIAG_CODE, code);
354 	if (nvlist_lookup_uint8(nvl, FM_FAULT_CERTAINTY, &byte) == 0)
355 		zed_log_msg(LOG_INFO, "\t%s: %hhu", FM_FAULT_CERTAINTY, byte);
356 	if (nvlist_lookup_nvlist(nvl, FM_FAULT_RESOURCE, &rsrc) == 0) {
357 		if (nvlist_lookup_string(rsrc, FM_FMRI_SCHEME, &strval) == 0)
358 			zed_log_msg(LOG_INFO, "\t%s: %s", FM_FMRI_SCHEME,
359 			    strval);
360 		if (nvlist_lookup_uint64(rsrc, FM_FMRI_ZFS_POOL, &guid) == 0)
361 			zed_log_msg(LOG_INFO, "\t%s: %llu", FM_FMRI_ZFS_POOL,
362 			    guid);
363 		if (nvlist_lookup_uint64(rsrc, FM_FMRI_ZFS_VDEV, &guid) == 0)
364 			zed_log_msg(LOG_INFO, "\t%s: %llu \n", FM_FMRI_ZFS_VDEV,
365 			    guid);
366 	}
367 }
368 
369 static const char *
fmd_fault_mkcode(nvlist_t * fault)370 fmd_fault_mkcode(nvlist_t *fault)
371 {
372 	const char *class;
373 	const char *code = "-";
374 
375 	/*
376 	 * Note: message codes come from: openzfs/usr/src/cmd/fm/dicts/ZFS.po
377 	 */
378 	if (nvlist_lookup_string(fault, FM_CLASS, &class) == 0) {
379 		if (strcmp(class, "fault.fs.zfs.vdev.io") == 0)
380 			code = "ZFS-8000-FD";
381 		else if (strcmp(class, "fault.fs.zfs.vdev.checksum") == 0)
382 			code = "ZFS-8000-GH";
383 		else if (strcmp(class, "fault.fs.zfs.io_failure_wait") == 0)
384 			code = "ZFS-8000-HC";
385 		else if (strcmp(class, "fault.fs.zfs.io_failure_continue") == 0)
386 			code = "ZFS-8000-JQ";
387 		else if (strcmp(class, "fault.fs.zfs.log_replay") == 0)
388 			code = "ZFS-8000-K4";
389 		else if (strcmp(class, "fault.fs.zfs.pool") == 0)
390 			code = "ZFS-8000-CS";
391 		else if (strcmp(class, "fault.fs.zfs.device") == 0)
392 			code = "ZFS-8000-D3";
393 
394 	}
395 	return (code);
396 }
397 
398 void
fmd_case_add_suspect(fmd_hdl_t * hdl,fmd_case_t * cp,nvlist_t * fault)399 fmd_case_add_suspect(fmd_hdl_t *hdl, fmd_case_t *cp, nvlist_t *fault)
400 {
401 	nvlist_t *nvl;
402 	const char *code = fmd_fault_mkcode(fault);
403 	int64_t tod[2];
404 	int err = 0;
405 
406 	/*
407 	 * payload derived from fmd_protocol_list()
408 	 */
409 
410 	(void) gettimeofday(&cp->ci_tv, NULL);
411 	tod[0] = cp->ci_tv.tv_sec;
412 	tod[1] = cp->ci_tv.tv_usec;
413 
414 	nvl = fmd_nvl_alloc(hdl, FMD_SLEEP);
415 
416 	err |= nvlist_add_uint8(nvl, FM_VERSION, FM_SUSPECT_VERSION);
417 	err |= nvlist_add_string(nvl, FM_CLASS, FM_LIST_SUSPECT_CLASS);
418 	err |= nvlist_add_string(nvl, FM_SUSPECT_UUID, cp->ci_uuid);
419 	err |= nvlist_add_string(nvl, FM_SUSPECT_DIAG_CODE, code);
420 	err |= nvlist_add_int64_array(nvl, FM_SUSPECT_DIAG_TIME, tod, 2);
421 	err |= nvlist_add_uint32(nvl, FM_SUSPECT_FAULT_SZ, 1);
422 	err |= nvlist_add_nvlist_array(nvl, FM_SUSPECT_FAULT_LIST,
423 	    (const nvlist_t **)&fault, 1);
424 
425 	if (err)
426 		zed_log_die("failed to populate nvlist");
427 
428 	zed_log_fault(fault, cp->ci_uuid, code);
429 	zfs_agent_post_event(FM_LIST_SUSPECT_CLASS, NULL, nvl);
430 
431 	nvlist_free(nvl);
432 	nvlist_free(fault);
433 }
434 
435 void
fmd_case_setspecific(fmd_hdl_t * hdl,fmd_case_t * cp,void * data)436 fmd_case_setspecific(fmd_hdl_t *hdl, fmd_case_t *cp, void *data)
437 {
438 	(void) hdl;
439 	cp->ci_data = data;
440 }
441 
442 void *
fmd_case_getspecific(fmd_hdl_t * hdl,fmd_case_t * cp)443 fmd_case_getspecific(fmd_hdl_t *hdl, fmd_case_t *cp)
444 {
445 	(void) hdl;
446 	return (cp->ci_data);
447 }
448 
449 void
fmd_buf_create(fmd_hdl_t * hdl,fmd_case_t * cp,const char * name,size_t size)450 fmd_buf_create(fmd_hdl_t *hdl, fmd_case_t *cp, const char *name, size_t size)
451 {
452 	assert(strcmp(name, "data") == 0), (void) name;
453 	assert(cp->ci_bufptr == NULL);
454 	assert(size < (1024 * 1024));
455 
456 	cp->ci_bufptr = fmd_hdl_alloc(hdl, size, FMD_SLEEP);
457 	cp->ci_bufsiz = size;
458 }
459 
460 void
fmd_buf_read(fmd_hdl_t * hdl,fmd_case_t * cp,const char * name,void * buf,size_t size)461 fmd_buf_read(fmd_hdl_t *hdl, fmd_case_t *cp,
462     const char *name, void *buf, size_t size)
463 {
464 	(void) hdl;
465 	assert(strcmp(name, "data") == 0), (void) name;
466 	assert(cp->ci_bufptr != NULL);
467 	assert(size <= cp->ci_bufsiz);
468 
469 	memcpy(buf, cp->ci_bufptr, size);
470 }
471 
472 void
fmd_buf_write(fmd_hdl_t * hdl,fmd_case_t * cp,const char * name,const void * buf,size_t size)473 fmd_buf_write(fmd_hdl_t *hdl, fmd_case_t *cp,
474     const char *name, const void *buf, size_t size)
475 {
476 	(void) hdl;
477 	assert(strcmp(name, "data") == 0), (void) name;
478 	assert(cp->ci_bufptr != NULL);
479 	assert(cp->ci_bufsiz >= size);
480 
481 	memcpy(cp->ci_bufptr, buf, size);
482 }
483 
484 /* SERD Engines */
485 
486 void
fmd_serd_create(fmd_hdl_t * hdl,const char * name,uint_t n,hrtime_t t)487 fmd_serd_create(fmd_hdl_t *hdl, const char *name, uint_t n, hrtime_t t)
488 {
489 	fmd_module_t *mp = (fmd_module_t *)hdl;
490 
491 	if (fmd_serd_eng_lookup(&mp->mod_serds, name) != NULL) {
492 		zed_log_msg(LOG_ERR, "failed to create SERD engine '%s': "
493 		    " name already exists", name);
494 		return;
495 	}
496 
497 	(void) fmd_serd_eng_insert(&mp->mod_serds, name, n, t);
498 }
499 
500 void
fmd_serd_destroy(fmd_hdl_t * hdl,const char * name)501 fmd_serd_destroy(fmd_hdl_t *hdl, const char *name)
502 {
503 	fmd_module_t *mp = (fmd_module_t *)hdl;
504 
505 	fmd_serd_eng_delete(&mp->mod_serds, name);
506 
507 	fmd_hdl_debug(hdl, "serd_destroy %s", name);
508 }
509 
510 int
fmd_serd_exists(fmd_hdl_t * hdl,const char * name)511 fmd_serd_exists(fmd_hdl_t *hdl, const char *name)
512 {
513 	fmd_module_t *mp = (fmd_module_t *)hdl;
514 
515 	return (fmd_serd_eng_lookup(&mp->mod_serds, name) != NULL);
516 }
517 
518 int
fmd_serd_active(fmd_hdl_t * hdl,const char * name)519 fmd_serd_active(fmd_hdl_t *hdl, const char *name)
520 {
521 	fmd_module_t *mp = (fmd_module_t *)hdl;
522 	fmd_serd_eng_t *sgp;
523 
524 	if ((sgp = fmd_serd_eng_lookup(&mp->mod_serds, name)) == NULL) {
525 		zed_log_msg(LOG_ERR, "serd engine '%s' does not exist", name);
526 		return (0);
527 	}
528 	return (fmd_serd_eng_fired(sgp) || !fmd_serd_eng_empty(sgp));
529 }
530 
531 void
fmd_serd_reset(fmd_hdl_t * hdl,const char * name)532 fmd_serd_reset(fmd_hdl_t *hdl, const char *name)
533 {
534 	fmd_module_t *mp = (fmd_module_t *)hdl;
535 	fmd_serd_eng_t *sgp;
536 
537 	if ((sgp = fmd_serd_eng_lookup(&mp->mod_serds, name)) == NULL) {
538 		zed_log_msg(LOG_ERR, "serd engine '%s' does not exist", name);
539 	} else {
540 		fmd_serd_eng_reset(sgp);
541 		fmd_hdl_debug(hdl, "serd_reset %s", name);
542 	}
543 }
544 
545 int
fmd_serd_record(fmd_hdl_t * hdl,const char * name,fmd_event_t * ep)546 fmd_serd_record(fmd_hdl_t *hdl, const char *name, fmd_event_t *ep)
547 {
548 	fmd_module_t *mp = (fmd_module_t *)hdl;
549 	fmd_serd_eng_t *sgp;
550 
551 	if ((sgp = fmd_serd_eng_lookup(&mp->mod_serds, name)) == NULL) {
552 		zed_log_msg(LOG_ERR, "failed to add record to SERD engine '%s'",
553 		    name);
554 		return (0);
555 	}
556 	return (fmd_serd_eng_record(sgp, ep->ev_hrt));
557 }
558 
559 void
fmd_serd_gc(fmd_hdl_t * hdl)560 fmd_serd_gc(fmd_hdl_t *hdl)
561 {
562 	fmd_module_t *mp = (fmd_module_t *)hdl;
563 
564 	fmd_serd_hash_apply(&mp->mod_serds, fmd_serd_eng_gc, NULL);
565 }
566 
567 /* FMD Timers */
568 
569 static void
_timer_notify(union sigval sv)570 _timer_notify(union sigval sv)
571 {
572 	fmd_timer_t *ftp = sv.sival_ptr;
573 	fmd_hdl_t *hdl = ftp->ft_hdl;
574 	fmd_module_t *mp = (fmd_module_t *)hdl;
575 	const fmd_hdl_ops_t *ops = mp->mod_info->fmdi_ops;
576 	struct itimerspec its;
577 
578 	fmd_hdl_debug(hdl, "%s timer fired (%p)", mp->mod_name, ftp->ft_tid);
579 
580 	/* disarm the timer */
581 	memset(&its, 0, sizeof (struct itimerspec));
582 	timer_settime(ftp->ft_tid, 0, &its, NULL);
583 
584 	/* Note that the fmdo_timeout can remove this timer */
585 	if (ops->fmdo_timeout != NULL)
586 		ops->fmdo_timeout(hdl, ftp, ftp->ft_arg);
587 }
588 
589 /*
590  * Install a new timer which will fire at least delta nanoseconds after the
591  * current time. After the timeout has expired, the module's fmdo_timeout
592  * entry point is called.
593  */
594 fmd_timer_t *
fmd_timer_install(fmd_hdl_t * hdl,void * arg,fmd_event_t * ep,hrtime_t delta)595 fmd_timer_install(fmd_hdl_t *hdl, void *arg, fmd_event_t *ep, hrtime_t delta)
596 {
597 	(void) ep;
598 	struct sigevent sev;
599 	struct itimerspec its;
600 	fmd_timer_t *ftp;
601 
602 	ftp = fmd_hdl_alloc(hdl, sizeof (fmd_timer_t), FMD_SLEEP);
603 	ftp->ft_arg = arg;
604 	ftp->ft_hdl = hdl;
605 
606 	its.it_value.tv_sec = delta / 1000000000;
607 	its.it_value.tv_nsec = delta % 1000000000;
608 	its.it_interval.tv_sec = its.it_value.tv_sec;
609 	its.it_interval.tv_nsec = its.it_value.tv_nsec;
610 
611 	sev.sigev_notify = SIGEV_THREAD;
612 	sev.sigev_notify_function = _timer_notify;
613 	sev.sigev_notify_attributes = NULL;
614 	sev.sigev_value.sival_ptr = ftp;
615 	sev.sigev_signo = 0;
616 
617 	timer_create(CLOCK_REALTIME, &sev, &ftp->ft_tid);
618 	timer_settime(ftp->ft_tid, 0, &its, NULL);
619 
620 	fmd_hdl_debug(hdl, "installing timer for %d secs (%p)",
621 	    (int)its.it_value.tv_sec, ftp->ft_tid);
622 
623 	return (ftp);
624 }
625 
626 void
fmd_timer_remove(fmd_hdl_t * hdl,fmd_timer_t * ftp)627 fmd_timer_remove(fmd_hdl_t *hdl, fmd_timer_t *ftp)
628 {
629 	fmd_hdl_debug(hdl, "removing timer (%p)", ftp->ft_tid);
630 
631 	timer_delete(ftp->ft_tid);
632 
633 	fmd_hdl_free(hdl, ftp, sizeof (fmd_timer_t));
634 }
635 
636 /* Name-Value Pair Lists */
637 
638 nvlist_t *
fmd_nvl_create_fault(fmd_hdl_t * hdl,const char * class,uint8_t certainty,nvlist_t * asru,nvlist_t * fru,nvlist_t * resource)639 fmd_nvl_create_fault(fmd_hdl_t *hdl, const char *class, uint8_t certainty,
640     nvlist_t *asru, nvlist_t *fru, nvlist_t *resource)
641 {
642 	(void) hdl;
643 	nvlist_t *nvl;
644 	int err = 0;
645 
646 	if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0)
647 		zed_log_die("failed to xalloc fault nvlist");
648 
649 	err |= nvlist_add_uint8(nvl, FM_VERSION, FM_FAULT_VERSION);
650 	err |= nvlist_add_string(nvl, FM_CLASS, class);
651 	err |= nvlist_add_uint8(nvl, FM_FAULT_CERTAINTY, certainty);
652 
653 	if (asru != NULL)
654 		err |= nvlist_add_nvlist(nvl, FM_FAULT_ASRU, asru);
655 	if (fru != NULL)
656 		err |= nvlist_add_nvlist(nvl, FM_FAULT_FRU, fru);
657 	if (resource != NULL)
658 		err |= nvlist_add_nvlist(nvl, FM_FAULT_RESOURCE, resource);
659 
660 	if (err)
661 		zed_log_die("failed to populate nvlist: %s\n", strerror(err));
662 
663 	return (nvl);
664 }
665 
666 /*
667  * sourced from fmd_string.c
668  */
669 static int
fmd_strmatch(const char * s,const char * p)670 fmd_strmatch(const char *s, const char *p)
671 {
672 	char c;
673 
674 	if (p == NULL)
675 		return (0);
676 
677 	if (s == NULL)
678 		s = ""; /* treat NULL string as the empty string */
679 
680 	do {
681 		if ((c = *p++) == '\0')
682 			return (*s == '\0');
683 
684 		if (c == '*') {
685 			while (*p == '*')
686 				p++; /* consecutive *'s can be collapsed */
687 
688 			if (*p == '\0')
689 				return (1);
690 
691 			while (*s != '\0') {
692 				if (fmd_strmatch(s++, p) != 0)
693 					return (1);
694 			}
695 
696 			return (0);
697 		}
698 	} while (c == *s++);
699 
700 	return (0);
701 }
702 
703 int
fmd_nvl_class_match(fmd_hdl_t * hdl,nvlist_t * nvl,const char * pattern)704 fmd_nvl_class_match(fmd_hdl_t *hdl, nvlist_t *nvl, const char *pattern)
705 {
706 	(void) hdl;
707 	const char *class;
708 
709 	return (nvl != NULL &&
710 	    nvlist_lookup_string(nvl, FM_CLASS, &class) == 0 &&
711 	    fmd_strmatch(class, pattern));
712 }
713 
714 nvlist_t *
fmd_nvl_alloc(fmd_hdl_t * hdl,int flags)715 fmd_nvl_alloc(fmd_hdl_t *hdl, int flags)
716 {
717 	(void) hdl, (void) flags;
718 	nvlist_t *nvl = NULL;
719 
720 	if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0)
721 		return (NULL);
722 
723 	return (nvl);
724 }
725 
726 
727 /*
728  * ZED Agent specific APIs
729  */
730 
731 fmd_hdl_t *
fmd_module_hdl(const char * name)732 fmd_module_hdl(const char *name)
733 {
734 	if (strcmp(name, "zfs-retire") == 0)
735 		return ((fmd_hdl_t *)&zfs_retire_module);
736 	if (strcmp(name, "zfs-diagnosis") == 0)
737 		return ((fmd_hdl_t *)&zfs_diagnosis_module);
738 
739 	return (NULL);
740 }
741 
742 boolean_t
fmd_module_initialized(fmd_hdl_t * hdl)743 fmd_module_initialized(fmd_hdl_t *hdl)
744 {
745 	fmd_module_t *mp = (fmd_module_t *)hdl;
746 
747 	return (mp->mod_info != NULL);
748 }
749 
750 /*
751  * fmd_module_recv is called for each event that is received by
752  * the fault manager that has a class that matches one of the
753  * module's subscriptions.
754  */
755 void
fmd_module_recv(fmd_hdl_t * hdl,nvlist_t * nvl,const char * class)756 fmd_module_recv(fmd_hdl_t *hdl, nvlist_t *nvl, const char *class)
757 {
758 	fmd_module_t *mp = (fmd_module_t *)hdl;
759 	const fmd_hdl_ops_t *ops = mp->mod_info->fmdi_ops;
760 	fmd_event_t faux_event = {0};
761 	int64_t *tv;
762 	uint_t n;
763 
764 	/*
765 	 * Will need to normalized this if we persistently store the case data
766 	 */
767 	if (nvlist_lookup_int64_array(nvl, FM_EREPORT_TIME, &tv, &n) == 0)
768 		faux_event.ev_hrt = tv[0] * NANOSEC + tv[1];
769 	else
770 		faux_event.ev_hrt = 0;
771 
772 	ops->fmdo_recv(hdl, &faux_event, nvl, class);
773 
774 	mp->mod_stats.ms_accepted.fmds_value.ui64++;
775 
776 	/* TBD - should we initiate fm_module_gc() periodically? */
777 }
778