1 // SPDX-License-Identifier: CDDL-1.0 2 /* 3 * CDDL HEADER START 4 * 5 * The contents of this file are subject to the terms of the 6 * Common Development and Distribution License Version 1.0 (CDDL-1.0). 7 * You can obtain a copy of the license from the top-level file 8 * "OPENSOLARIS.LICENSE" or at <http://opensource.org/licenses/CDDL-1.0>. 9 * You may not use this file except in compliance with the license. 10 * 11 * CDDL HEADER END 12 */ 13 14 /* 15 * Copyright (c) 2016, Intel Corporation. 16 * Copyright (c) 2018, loli10K <ezomori.nozomu@gmail.com> 17 * Copyright (c) 2021 Hewlett Packard Enterprise Development LP 18 */ 19 20 #include <libnvpair.h> 21 #include <libzfs.h> 22 #include <stddef.h> 23 #include <stdlib.h> 24 #include <string.h> 25 #include <sys/list.h> 26 #include <sys/time.h> 27 #include <sys/sysevent/eventdefs.h> 28 #include <sys/sysevent/dev.h> 29 #include <sys/fm/protocol.h> 30 #include <sys/fm/fs/zfs.h> 31 #include <pthread.h> 32 #include <unistd.h> 33 34 #include "zfs_agents.h" 35 #include "fmd_api.h" 36 #include "../zed_log.h" 37 38 /* 39 * agent dispatch code 40 */ 41 42 static pthread_mutex_t agent_lock = PTHREAD_MUTEX_INITIALIZER; 43 static pthread_cond_t agent_cond = PTHREAD_COND_INITIALIZER; 44 static list_t agent_events; /* list of pending events */ 45 static int agent_exiting; 46 47 typedef struct agent_event { 48 char ae_class[64]; 49 char ae_subclass[32]; 50 nvlist_t *ae_nvl; 51 list_node_t ae_node; 52 } agent_event_t; 53 54 pthread_t g_agents_tid; 55 56 libzfs_handle_t *g_zfs_hdl; 57 58 /* guid search data */ 59 typedef enum device_type { 60 DEVICE_TYPE_L2ARC, /* l2arc device */ 61 DEVICE_TYPE_SPARE, /* spare device */ 62 DEVICE_TYPE_PRIMARY /* any primary pool storage device */ 63 } device_type_t; 64 65 typedef struct guid_search { 66 uint64_t gs_pool_guid; 67 uint64_t gs_vdev_guid; 68 const char *gs_devid; 69 device_type_t gs_vdev_type; 70 uint64_t gs_vdev_expandtime; /* vdev expansion time */ 71 } guid_search_t; 72 73 /* 74 * Walks the vdev tree recursively looking for a matching devid. 75 * Returns B_TRUE as soon as a matching device is found, B_FALSE otherwise. 76 */ 77 static boolean_t 78 zfs_agent_iter_vdev(zpool_handle_t *zhp, nvlist_t *nvl, void *arg) 79 { 80 guid_search_t *gsp = arg; 81 const char *path = NULL; 82 uint_t c, children; 83 nvlist_t **child; 84 uint64_t vdev_guid; 85 86 /* 87 * First iterate over any children. 88 */ 89 if (nvlist_lookup_nvlist_array(nvl, ZPOOL_CONFIG_CHILDREN, 90 &child, &children) == 0) { 91 for (c = 0; c < children; c++) { 92 if (zfs_agent_iter_vdev(zhp, child[c], gsp)) { 93 gsp->gs_vdev_type = DEVICE_TYPE_PRIMARY; 94 return (B_TRUE); 95 } 96 } 97 } 98 /* 99 * Iterate over any spares and cache devices 100 */ 101 if (nvlist_lookup_nvlist_array(nvl, ZPOOL_CONFIG_SPARES, 102 &child, &children) == 0) { 103 for (c = 0; c < children; c++) { 104 if (zfs_agent_iter_vdev(zhp, child[c], gsp)) { 105 gsp->gs_vdev_type = DEVICE_TYPE_SPARE; 106 return (B_TRUE); 107 } 108 } 109 } 110 if (nvlist_lookup_nvlist_array(nvl, ZPOOL_CONFIG_L2CACHE, 111 &child, &children) == 0) { 112 for (c = 0; c < children; c++) { 113 if (zfs_agent_iter_vdev(zhp, child[c], gsp)) { 114 gsp->gs_vdev_type = DEVICE_TYPE_L2ARC; 115 return (B_TRUE); 116 } 117 } 118 } 119 /* 120 * On a devid match, grab the vdev guid and expansion time, if any. 121 */ 122 if (gsp->gs_devid != NULL && 123 (nvlist_lookup_string(nvl, ZPOOL_CONFIG_DEVID, &path) == 0) && 124 (strcmp(gsp->gs_devid, path) == 0)) { 125 (void) nvlist_lookup_uint64(nvl, ZPOOL_CONFIG_GUID, 126 &gsp->gs_vdev_guid); 127 (void) nvlist_lookup_uint64(nvl, ZPOOL_CONFIG_EXPANSION_TIME, 128 &gsp->gs_vdev_expandtime); 129 return (B_TRUE); 130 } 131 /* 132 * Otherwise, on a vdev guid match, grab the devid and expansion 133 * time. The devid might be missing on removal since its not part 134 * of blkid cache and L2ARC VDEV does not contain pool guid in its 135 * blkid, so this is a special case for L2ARC VDEV. 136 */ 137 else if (gsp->gs_vdev_guid != 0 && 138 nvlist_lookup_uint64(nvl, ZPOOL_CONFIG_GUID, &vdev_guid) == 0 && 139 gsp->gs_vdev_guid == vdev_guid) { 140 if (gsp->gs_devid == NULL) { 141 (void) nvlist_lookup_string(nvl, ZPOOL_CONFIG_DEVID, 142 &gsp->gs_devid); 143 } 144 (void) nvlist_lookup_uint64(nvl, ZPOOL_CONFIG_EXPANSION_TIME, 145 &gsp->gs_vdev_expandtime); 146 return (B_TRUE); 147 } 148 149 return (B_FALSE); 150 } 151 152 static int 153 zfs_agent_iter_pool(zpool_handle_t *zhp, void *arg) 154 { 155 guid_search_t *gsp = arg; 156 nvlist_t *config, *nvl; 157 158 /* 159 * For each vdev in this pool, look for a match by devid 160 */ 161 boolean_t found = B_FALSE; 162 uint64_t pool_guid; 163 164 /* Get pool configuration and extract pool GUID */ 165 if ((config = zpool_get_config(zhp, NULL)) == NULL || 166 nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, 167 &pool_guid) != 0) 168 goto out; 169 170 /* Skip this pool if we're looking for a specific pool */ 171 if (gsp->gs_pool_guid != 0 && pool_guid != gsp->gs_pool_guid) 172 goto out; 173 174 if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvl) == 0) 175 found = zfs_agent_iter_vdev(zhp, nvl, gsp); 176 177 if (found && gsp->gs_pool_guid == 0) 178 gsp->gs_pool_guid = pool_guid; 179 180 out: 181 zpool_close(zhp); 182 return (found); 183 } 184 185 void 186 zfs_agent_post_event(const char *class, const char *subclass, nvlist_t *nvl) 187 { 188 agent_event_t *event; 189 190 if (subclass == NULL) 191 subclass = ""; 192 193 event = malloc(sizeof (agent_event_t)); 194 if (event == NULL || nvlist_dup(nvl, &event->ae_nvl, 0) != 0) { 195 if (event) 196 free(event); 197 return; 198 } 199 200 if (strcmp(class, "sysevent.fs.zfs.vdev_check") == 0) { 201 class = EC_ZFS; 202 subclass = ESC_ZFS_VDEV_CHECK; 203 } 204 205 /* 206 * On Linux, we don't get the expected FM_RESOURCE_REMOVED ereport 207 * from the vdev_disk layer after a hot unplug. Fortunately we do 208 * get an EC_DEV_REMOVE from our disk monitor and it is a suitable 209 * proxy so we remap it here for the benefit of the diagnosis engine. 210 * Starting in OpenZFS 2.0, we do get FM_RESOURCE_REMOVED from the spa 211 * layer. Processing multiple FM_RESOURCE_REMOVED events is not harmful. 212 */ 213 if ((strcmp(class, EC_DEV_REMOVE) == 0) && 214 (strcmp(subclass, ESC_DISK) == 0) && 215 (nvlist_exists(nvl, ZFS_EV_VDEV_GUID) || 216 nvlist_exists(nvl, DEV_IDENTIFIER))) { 217 nvlist_t *payload = event->ae_nvl; 218 struct timeval tv; 219 int64_t tod[2]; 220 uint64_t pool_guid = 0, vdev_guid = 0; 221 guid_search_t search = { 0 }; 222 device_type_t devtype = DEVICE_TYPE_PRIMARY; 223 const char *devid = NULL; 224 225 class = "resource.fs.zfs.removed"; 226 subclass = ""; 227 228 (void) nvlist_add_string(payload, FM_CLASS, class); 229 (void) nvlist_lookup_string(nvl, DEV_IDENTIFIER, &devid); 230 (void) nvlist_lookup_uint64(nvl, ZFS_EV_POOL_GUID, &pool_guid); 231 (void) nvlist_lookup_uint64(nvl, ZFS_EV_VDEV_GUID, &vdev_guid); 232 233 (void) gettimeofday(&tv, NULL); 234 tod[0] = tv.tv_sec; 235 tod[1] = tv.tv_usec; 236 (void) nvlist_add_int64_array(payload, FM_EREPORT_TIME, tod, 2); 237 238 /* 239 * If devid is missing but vdev_guid is available, find devid 240 * and pool_guid from vdev_guid. 241 * For multipath, spare and l2arc devices ZFS_EV_VDEV_GUID or 242 * ZFS_EV_POOL_GUID may be missing so find them. 243 */ 244 search.gs_devid = devid; 245 search.gs_vdev_guid = vdev_guid; 246 search.gs_pool_guid = pool_guid; 247 zpool_iter(g_zfs_hdl, zfs_agent_iter_pool, &search); 248 if (devid == NULL) 249 devid = search.gs_devid; 250 if (pool_guid == 0) 251 pool_guid = search.gs_pool_guid; 252 if (vdev_guid == 0) 253 vdev_guid = search.gs_vdev_guid; 254 devtype = search.gs_vdev_type; 255 256 /* 257 * We want to avoid reporting "remove" events coming from 258 * libudev for VDEVs which were expanded recently (10s) and 259 * avoid activating spares in response to partitions being 260 * deleted and created in rapid succession. 261 */ 262 if (search.gs_vdev_expandtime != 0 && 263 search.gs_vdev_expandtime + 10 > tv.tv_sec) { 264 zed_log_msg(LOG_INFO, "agent post event: ignoring '%s' " 265 "for recently expanded device '%s'", EC_DEV_REMOVE, 266 devid); 267 fnvlist_free(payload); 268 free(event); 269 goto out; 270 } 271 272 (void) nvlist_add_uint64(payload, 273 FM_EREPORT_PAYLOAD_ZFS_POOL_GUID, pool_guid); 274 (void) nvlist_add_uint64(payload, 275 FM_EREPORT_PAYLOAD_ZFS_VDEV_GUID, vdev_guid); 276 switch (devtype) { 277 case DEVICE_TYPE_L2ARC: 278 (void) nvlist_add_string(payload, 279 FM_EREPORT_PAYLOAD_ZFS_VDEV_TYPE, 280 VDEV_TYPE_L2CACHE); 281 break; 282 case DEVICE_TYPE_SPARE: 283 (void) nvlist_add_string(payload, 284 FM_EREPORT_PAYLOAD_ZFS_VDEV_TYPE, VDEV_TYPE_SPARE); 285 break; 286 case DEVICE_TYPE_PRIMARY: 287 (void) nvlist_add_string(payload, 288 FM_EREPORT_PAYLOAD_ZFS_VDEV_TYPE, VDEV_TYPE_DISK); 289 break; 290 } 291 292 zed_log_msg(LOG_INFO, "agent post event: mapping '%s' to '%s'", 293 EC_DEV_REMOVE, class); 294 } 295 296 (void) strlcpy(event->ae_class, class, sizeof (event->ae_class)); 297 (void) strlcpy(event->ae_subclass, subclass, 298 sizeof (event->ae_subclass)); 299 300 (void) pthread_mutex_lock(&agent_lock); 301 list_insert_tail(&agent_events, event); 302 (void) pthread_mutex_unlock(&agent_lock); 303 304 out: 305 (void) pthread_cond_signal(&agent_cond); 306 } 307 308 static void 309 zfs_agent_dispatch(const char *class, const char *subclass, nvlist_t *nvl) 310 { 311 /* 312 * The diagnosis engine subscribes to the following events. 313 * On illumos these subscriptions reside in: 314 * /usr/lib/fm/fmd/plugins/zfs-diagnosis.conf 315 */ 316 if (strstr(class, "ereport.fs.zfs.") != NULL || 317 strstr(class, "resource.fs.zfs.") != NULL || 318 strcmp(class, "sysevent.fs.zfs.vdev_remove") == 0 || 319 strcmp(class, "sysevent.fs.zfs.vdev_remove_dev") == 0 || 320 strcmp(class, "sysevent.fs.zfs.pool_destroy") == 0) { 321 fmd_module_recv(fmd_module_hdl("zfs-diagnosis"), nvl, class); 322 } 323 324 /* 325 * The retire agent subscribes to the following events. 326 * On illumos these subscriptions reside in: 327 * /usr/lib/fm/fmd/plugins/zfs-retire.conf 328 * 329 * NOTE: faults events come directly from our diagnosis engine 330 * and will not pass through the zfs kernel module. 331 */ 332 if (strcmp(class, FM_LIST_SUSPECT_CLASS) == 0 || 333 strcmp(class, "resource.fs.zfs.removed") == 0 || 334 strcmp(class, "resource.fs.zfs.statechange") == 0 || 335 strcmp(class, "sysevent.fs.zfs.vdev_remove") == 0) { 336 fmd_module_recv(fmd_module_hdl("zfs-retire"), nvl, class); 337 } 338 339 /* 340 * The SLM module only consumes disk events and vdev check events 341 * 342 * NOTE: disk events come directly from disk monitor and will 343 * not pass through the zfs kernel module. 344 */ 345 if (strstr(class, "EC_dev_") != NULL || 346 strcmp(class, EC_ZFS) == 0) { 347 (void) zfs_slm_event(class, subclass, nvl); 348 } 349 } 350 351 /* 352 * Events are consumed and dispatched from this thread 353 * An agent can also post an event so event list lock 354 * is not held when calling an agent. 355 * One event is consumed at a time. 356 */ 357 static void * 358 zfs_agent_consumer_thread(void *arg) 359 { 360 (void) arg; 361 362 for (;;) { 363 agent_event_t *event; 364 365 (void) pthread_mutex_lock(&agent_lock); 366 367 /* wait for an event to show up */ 368 while (!agent_exiting && list_is_empty(&agent_events)) 369 (void) pthread_cond_wait(&agent_cond, &agent_lock); 370 371 if (agent_exiting) { 372 (void) pthread_mutex_unlock(&agent_lock); 373 zed_log_msg(LOG_INFO, "zfs_agent_consumer_thread: " 374 "exiting"); 375 return (NULL); 376 } 377 378 if ((event = list_remove_head(&agent_events)) != NULL) { 379 (void) pthread_mutex_unlock(&agent_lock); 380 381 /* dispatch to all event subscribers */ 382 zfs_agent_dispatch(event->ae_class, event->ae_subclass, 383 event->ae_nvl); 384 385 nvlist_free(event->ae_nvl); 386 free(event); 387 continue; 388 } 389 390 (void) pthread_mutex_unlock(&agent_lock); 391 } 392 393 return (NULL); 394 } 395 396 void 397 zfs_agent_init(libzfs_handle_t *zfs_hdl) 398 { 399 fmd_hdl_t *hdl; 400 401 g_zfs_hdl = zfs_hdl; 402 403 if (zfs_slm_init() != 0) 404 zed_log_die("Failed to initialize zfs slm"); 405 zed_log_msg(LOG_INFO, "Add Agent: init"); 406 407 hdl = fmd_module_hdl("zfs-diagnosis"); 408 _zfs_diagnosis_init(hdl); 409 if (!fmd_module_initialized(hdl)) 410 zed_log_die("Failed to initialize zfs diagnosis"); 411 412 hdl = fmd_module_hdl("zfs-retire"); 413 _zfs_retire_init(hdl); 414 if (!fmd_module_initialized(hdl)) 415 zed_log_die("Failed to initialize zfs retire"); 416 417 list_create(&agent_events, sizeof (agent_event_t), 418 offsetof(struct agent_event, ae_node)); 419 420 if (pthread_create(&g_agents_tid, NULL, zfs_agent_consumer_thread, 421 NULL) != 0) { 422 list_destroy(&agent_events); 423 zed_log_die("Failed to initialize agents"); 424 } 425 pthread_setname_np(g_agents_tid, "agents"); 426 } 427 428 void 429 zfs_agent_fini(void) 430 { 431 fmd_hdl_t *hdl; 432 agent_event_t *event; 433 434 agent_exiting = 1; 435 (void) pthread_cond_signal(&agent_cond); 436 437 /* wait for zfs_enum_pools thread to complete */ 438 (void) pthread_join(g_agents_tid, NULL); 439 440 /* drain any pending events */ 441 while ((event = list_remove_head(&agent_events)) != NULL) { 442 nvlist_free(event->ae_nvl); 443 free(event); 444 } 445 446 list_destroy(&agent_events); 447 448 if ((hdl = fmd_module_hdl("zfs-retire")) != NULL) { 449 _zfs_retire_fini(hdl); 450 fmd_hdl_unregister(hdl); 451 } 452 if ((hdl = fmd_module_hdl("zfs-diagnosis")) != NULL) { 453 _zfs_diagnosis_fini(hdl); 454 fmd_hdl_unregister(hdl); 455 } 456 457 zed_log_msg(LOG_INFO, "Add Agent: fini"); 458 zfs_slm_fini(); 459 460 g_zfs_hdl = NULL; 461 } 462