1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License Version 1.0 (CDDL-1.0). 6 * You can obtain a copy of the license from the top-level file 7 * "OPENSOLARIS.LICENSE" or at <http://opensource.org/licenses/CDDL-1.0>. 8 * You may not use this file except in compliance with the license. 9 * 10 * CDDL HEADER END 11 */ 12 13 /* 14 * Copyright (c) 2016, Intel Corporation. 15 * Copyright (c) 2018, loli10K <ezomori.nozomu@gmail.com> 16 * Copyright (c) 2021 Hewlett Packard Enterprise Development LP 17 */ 18 19 #include <libnvpair.h> 20 #include <libzfs.h> 21 #include <stddef.h> 22 #include <stdlib.h> 23 #include <string.h> 24 #include <sys/list.h> 25 #include <sys/time.h> 26 #include <sys/sysevent/eventdefs.h> 27 #include <sys/sysevent/dev.h> 28 #include <sys/fm/protocol.h> 29 #include <sys/fm/fs/zfs.h> 30 #include <pthread.h> 31 #include <unistd.h> 32 33 #include "zfs_agents.h" 34 #include "fmd_api.h" 35 #include "../zed_log.h" 36 37 /* 38 * agent dispatch code 39 */ 40 41 static pthread_mutex_t agent_lock = PTHREAD_MUTEX_INITIALIZER; 42 static pthread_cond_t agent_cond = PTHREAD_COND_INITIALIZER; 43 static list_t agent_events; /* list of pending events */ 44 static int agent_exiting; 45 46 typedef struct agent_event { 47 char ae_class[64]; 48 char ae_subclass[32]; 49 nvlist_t *ae_nvl; 50 list_node_t ae_node; 51 } agent_event_t; 52 53 pthread_t g_agents_tid; 54 55 libzfs_handle_t *g_zfs_hdl; 56 57 /* guid search data */ 58 typedef enum device_type { 59 DEVICE_TYPE_L2ARC, /* l2arc device */ 60 DEVICE_TYPE_SPARE, /* spare device */ 61 DEVICE_TYPE_PRIMARY /* any primary pool storage device */ 62 } device_type_t; 63 64 typedef struct guid_search { 65 uint64_t gs_pool_guid; 66 uint64_t gs_vdev_guid; 67 const char *gs_devid; 68 device_type_t gs_vdev_type; 69 uint64_t gs_vdev_expandtime; /* vdev expansion time */ 70 } guid_search_t; 71 72 /* 73 * Walks the vdev tree recursively looking for a matching devid. 74 * Returns B_TRUE as soon as a matching device is found, B_FALSE otherwise. 75 */ 76 static boolean_t 77 zfs_agent_iter_vdev(zpool_handle_t *zhp, nvlist_t *nvl, void *arg) 78 { 79 guid_search_t *gsp = arg; 80 const char *path = NULL; 81 uint_t c, children; 82 nvlist_t **child; 83 uint64_t vdev_guid; 84 85 /* 86 * First iterate over any children. 87 */ 88 if (nvlist_lookup_nvlist_array(nvl, ZPOOL_CONFIG_CHILDREN, 89 &child, &children) == 0) { 90 for (c = 0; c < children; c++) { 91 if (zfs_agent_iter_vdev(zhp, child[c], gsp)) { 92 gsp->gs_vdev_type = DEVICE_TYPE_PRIMARY; 93 return (B_TRUE); 94 } 95 } 96 } 97 /* 98 * Iterate over any spares and cache devices 99 */ 100 if (nvlist_lookup_nvlist_array(nvl, ZPOOL_CONFIG_SPARES, 101 &child, &children) == 0) { 102 for (c = 0; c < children; c++) { 103 if (zfs_agent_iter_vdev(zhp, child[c], gsp)) { 104 gsp->gs_vdev_type = DEVICE_TYPE_SPARE; 105 return (B_TRUE); 106 } 107 } 108 } 109 if (nvlist_lookup_nvlist_array(nvl, ZPOOL_CONFIG_L2CACHE, 110 &child, &children) == 0) { 111 for (c = 0; c < children; c++) { 112 if (zfs_agent_iter_vdev(zhp, child[c], gsp)) { 113 gsp->gs_vdev_type = DEVICE_TYPE_L2ARC; 114 return (B_TRUE); 115 } 116 } 117 } 118 /* 119 * On a devid match, grab the vdev guid and expansion time, if any. 120 */ 121 if (gsp->gs_devid != NULL && 122 (nvlist_lookup_string(nvl, ZPOOL_CONFIG_DEVID, &path) == 0) && 123 (strcmp(gsp->gs_devid, path) == 0)) { 124 (void) nvlist_lookup_uint64(nvl, ZPOOL_CONFIG_GUID, 125 &gsp->gs_vdev_guid); 126 (void) nvlist_lookup_uint64(nvl, ZPOOL_CONFIG_EXPANSION_TIME, 127 &gsp->gs_vdev_expandtime); 128 return (B_TRUE); 129 } 130 /* 131 * Otherwise, on a vdev guid match, grab the devid and expansion 132 * time. The devid might be missing on removal since its not part 133 * of blkid cache and L2ARC VDEV does not contain pool guid in its 134 * blkid, so this is a special case for L2ARC VDEV. 135 */ 136 else if (gsp->gs_vdev_guid != 0 && gsp->gs_devid == NULL && 137 nvlist_lookup_uint64(nvl, ZPOOL_CONFIG_GUID, &vdev_guid) == 0 && 138 gsp->gs_vdev_guid == vdev_guid) { 139 (void) nvlist_lookup_string(nvl, ZPOOL_CONFIG_DEVID, 140 &gsp->gs_devid); 141 (void) nvlist_lookup_uint64(nvl, ZPOOL_CONFIG_EXPANSION_TIME, 142 &gsp->gs_vdev_expandtime); 143 return (B_TRUE); 144 } 145 146 return (B_FALSE); 147 } 148 149 static int 150 zfs_agent_iter_pool(zpool_handle_t *zhp, void *arg) 151 { 152 guid_search_t *gsp = arg; 153 nvlist_t *config, *nvl; 154 155 /* 156 * For each vdev in this pool, look for a match by devid 157 */ 158 if ((config = zpool_get_config(zhp, NULL)) != NULL) { 159 if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, 160 &nvl) == 0) { 161 (void) zfs_agent_iter_vdev(zhp, nvl, gsp); 162 } 163 } 164 /* 165 * if a match was found then grab the pool guid 166 */ 167 if (gsp->gs_vdev_guid && gsp->gs_devid) { 168 (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, 169 &gsp->gs_pool_guid); 170 } 171 172 zpool_close(zhp); 173 return (gsp->gs_devid != NULL && gsp->gs_vdev_guid != 0); 174 } 175 176 void 177 zfs_agent_post_event(const char *class, const char *subclass, nvlist_t *nvl) 178 { 179 agent_event_t *event; 180 181 if (subclass == NULL) 182 subclass = ""; 183 184 event = malloc(sizeof (agent_event_t)); 185 if (event == NULL || nvlist_dup(nvl, &event->ae_nvl, 0) != 0) { 186 if (event) 187 free(event); 188 return; 189 } 190 191 if (strcmp(class, "sysevent.fs.zfs.vdev_check") == 0) { 192 class = EC_ZFS; 193 subclass = ESC_ZFS_VDEV_CHECK; 194 } 195 196 /* 197 * On Linux, we don't get the expected FM_RESOURCE_REMOVED ereport 198 * from the vdev_disk layer after a hot unplug. Fortunately we do 199 * get an EC_DEV_REMOVE from our disk monitor and it is a suitable 200 * proxy so we remap it here for the benefit of the diagnosis engine. 201 * Starting in OpenZFS 2.0, we do get FM_RESOURCE_REMOVED from the spa 202 * layer. Processing multiple FM_RESOURCE_REMOVED events is not harmful. 203 */ 204 if ((strcmp(class, EC_DEV_REMOVE) == 0) && 205 (strcmp(subclass, ESC_DISK) == 0) && 206 (nvlist_exists(nvl, ZFS_EV_VDEV_GUID) || 207 nvlist_exists(nvl, DEV_IDENTIFIER))) { 208 nvlist_t *payload = event->ae_nvl; 209 struct timeval tv; 210 int64_t tod[2]; 211 uint64_t pool_guid = 0, vdev_guid = 0; 212 guid_search_t search = { 0 }; 213 device_type_t devtype = DEVICE_TYPE_PRIMARY; 214 const char *devid = NULL; 215 216 class = "resource.fs.zfs.removed"; 217 subclass = ""; 218 219 (void) nvlist_add_string(payload, FM_CLASS, class); 220 (void) nvlist_lookup_string(nvl, DEV_IDENTIFIER, &devid); 221 (void) nvlist_lookup_uint64(nvl, ZFS_EV_POOL_GUID, &pool_guid); 222 (void) nvlist_lookup_uint64(nvl, ZFS_EV_VDEV_GUID, &vdev_guid); 223 224 (void) gettimeofday(&tv, NULL); 225 tod[0] = tv.tv_sec; 226 tod[1] = tv.tv_usec; 227 (void) nvlist_add_int64_array(payload, FM_EREPORT_TIME, tod, 2); 228 229 /* 230 * If devid is missing but vdev_guid is available, find devid 231 * and pool_guid from vdev_guid. 232 * For multipath, spare and l2arc devices ZFS_EV_VDEV_GUID or 233 * ZFS_EV_POOL_GUID may be missing so find them. 234 */ 235 if (devid == NULL || pool_guid == 0 || vdev_guid == 0) { 236 if (devid == NULL) 237 search.gs_vdev_guid = vdev_guid; 238 else 239 search.gs_devid = devid; 240 zpool_iter(g_zfs_hdl, zfs_agent_iter_pool, &search); 241 if (devid == NULL) 242 devid = search.gs_devid; 243 if (pool_guid == 0) 244 pool_guid = search.gs_pool_guid; 245 if (vdev_guid == 0) 246 vdev_guid = search.gs_vdev_guid; 247 devtype = search.gs_vdev_type; 248 } 249 250 /* 251 * We want to avoid reporting "remove" events coming from 252 * libudev for VDEVs which were expanded recently (10s) and 253 * avoid activating spares in response to partitions being 254 * deleted and created in rapid succession. 255 */ 256 if (search.gs_vdev_expandtime != 0 && 257 search.gs_vdev_expandtime + 10 > tv.tv_sec) { 258 zed_log_msg(LOG_INFO, "agent post event: ignoring '%s' " 259 "for recently expanded device '%s'", EC_DEV_REMOVE, 260 devid); 261 fnvlist_free(payload); 262 free(event); 263 goto out; 264 } 265 266 (void) nvlist_add_uint64(payload, 267 FM_EREPORT_PAYLOAD_ZFS_POOL_GUID, pool_guid); 268 (void) nvlist_add_uint64(payload, 269 FM_EREPORT_PAYLOAD_ZFS_VDEV_GUID, vdev_guid); 270 switch (devtype) { 271 case DEVICE_TYPE_L2ARC: 272 (void) nvlist_add_string(payload, 273 FM_EREPORT_PAYLOAD_ZFS_VDEV_TYPE, 274 VDEV_TYPE_L2CACHE); 275 break; 276 case DEVICE_TYPE_SPARE: 277 (void) nvlist_add_string(payload, 278 FM_EREPORT_PAYLOAD_ZFS_VDEV_TYPE, VDEV_TYPE_SPARE); 279 break; 280 case DEVICE_TYPE_PRIMARY: 281 (void) nvlist_add_string(payload, 282 FM_EREPORT_PAYLOAD_ZFS_VDEV_TYPE, VDEV_TYPE_DISK); 283 break; 284 } 285 286 zed_log_msg(LOG_INFO, "agent post event: mapping '%s' to '%s'", 287 EC_DEV_REMOVE, class); 288 } 289 290 (void) strlcpy(event->ae_class, class, sizeof (event->ae_class)); 291 (void) strlcpy(event->ae_subclass, subclass, 292 sizeof (event->ae_subclass)); 293 294 (void) pthread_mutex_lock(&agent_lock); 295 list_insert_tail(&agent_events, event); 296 (void) pthread_mutex_unlock(&agent_lock); 297 298 out: 299 (void) pthread_cond_signal(&agent_cond); 300 } 301 302 static void 303 zfs_agent_dispatch(const char *class, const char *subclass, nvlist_t *nvl) 304 { 305 /* 306 * The diagnosis engine subscribes to the following events. 307 * On illumos these subscriptions reside in: 308 * /usr/lib/fm/fmd/plugins/zfs-diagnosis.conf 309 */ 310 if (strstr(class, "ereport.fs.zfs.") != NULL || 311 strstr(class, "resource.fs.zfs.") != NULL || 312 strcmp(class, "sysevent.fs.zfs.vdev_remove") == 0 || 313 strcmp(class, "sysevent.fs.zfs.vdev_remove_dev") == 0 || 314 strcmp(class, "sysevent.fs.zfs.pool_destroy") == 0) { 315 fmd_module_recv(fmd_module_hdl("zfs-diagnosis"), nvl, class); 316 } 317 318 /* 319 * The retire agent subscribes to the following events. 320 * On illumos these subscriptions reside in: 321 * /usr/lib/fm/fmd/plugins/zfs-retire.conf 322 * 323 * NOTE: faults events come directly from our diagnosis engine 324 * and will not pass through the zfs kernel module. 325 */ 326 if (strcmp(class, FM_LIST_SUSPECT_CLASS) == 0 || 327 strcmp(class, "resource.fs.zfs.removed") == 0 || 328 strcmp(class, "resource.fs.zfs.statechange") == 0 || 329 strcmp(class, "sysevent.fs.zfs.vdev_remove") == 0) { 330 fmd_module_recv(fmd_module_hdl("zfs-retire"), nvl, class); 331 } 332 333 /* 334 * The SLM module only consumes disk events and vdev check events 335 * 336 * NOTE: disk events come directly from disk monitor and will 337 * not pass through the zfs kernel module. 338 */ 339 if (strstr(class, "EC_dev_") != NULL || 340 strcmp(class, EC_ZFS) == 0) { 341 (void) zfs_slm_event(class, subclass, nvl); 342 } 343 } 344 345 /* 346 * Events are consumed and dispatched from this thread 347 * An agent can also post an event so event list lock 348 * is not held when calling an agent. 349 * One event is consumed at a time. 350 */ 351 static void * 352 zfs_agent_consumer_thread(void *arg) 353 { 354 (void) arg; 355 356 for (;;) { 357 agent_event_t *event; 358 359 (void) pthread_mutex_lock(&agent_lock); 360 361 /* wait for an event to show up */ 362 while (!agent_exiting && list_is_empty(&agent_events)) 363 (void) pthread_cond_wait(&agent_cond, &agent_lock); 364 365 if (agent_exiting) { 366 (void) pthread_mutex_unlock(&agent_lock); 367 zed_log_msg(LOG_INFO, "zfs_agent_consumer_thread: " 368 "exiting"); 369 return (NULL); 370 } 371 372 if ((event = (list_head(&agent_events))) != NULL) { 373 list_remove(&agent_events, event); 374 375 (void) pthread_mutex_unlock(&agent_lock); 376 377 /* dispatch to all event subscribers */ 378 zfs_agent_dispatch(event->ae_class, event->ae_subclass, 379 event->ae_nvl); 380 381 nvlist_free(event->ae_nvl); 382 free(event); 383 continue; 384 } 385 386 (void) pthread_mutex_unlock(&agent_lock); 387 } 388 389 return (NULL); 390 } 391 392 void 393 zfs_agent_init(libzfs_handle_t *zfs_hdl) 394 { 395 fmd_hdl_t *hdl; 396 397 g_zfs_hdl = zfs_hdl; 398 399 if (zfs_slm_init() != 0) 400 zed_log_die("Failed to initialize zfs slm"); 401 zed_log_msg(LOG_INFO, "Add Agent: init"); 402 403 hdl = fmd_module_hdl("zfs-diagnosis"); 404 _zfs_diagnosis_init(hdl); 405 if (!fmd_module_initialized(hdl)) 406 zed_log_die("Failed to initialize zfs diagnosis"); 407 408 hdl = fmd_module_hdl("zfs-retire"); 409 _zfs_retire_init(hdl); 410 if (!fmd_module_initialized(hdl)) 411 zed_log_die("Failed to initialize zfs retire"); 412 413 list_create(&agent_events, sizeof (agent_event_t), 414 offsetof(struct agent_event, ae_node)); 415 416 if (pthread_create(&g_agents_tid, NULL, zfs_agent_consumer_thread, 417 NULL) != 0) { 418 list_destroy(&agent_events); 419 zed_log_die("Failed to initialize agents"); 420 } 421 pthread_setname_np(g_agents_tid, "agents"); 422 } 423 424 void 425 zfs_agent_fini(void) 426 { 427 fmd_hdl_t *hdl; 428 agent_event_t *event; 429 430 agent_exiting = 1; 431 (void) pthread_cond_signal(&agent_cond); 432 433 /* wait for zfs_enum_pools thread to complete */ 434 (void) pthread_join(g_agents_tid, NULL); 435 436 /* drain any pending events */ 437 while ((event = (list_head(&agent_events))) != NULL) { 438 list_remove(&agent_events, event); 439 nvlist_free(event->ae_nvl); 440 free(event); 441 } 442 443 list_destroy(&agent_events); 444 445 if ((hdl = fmd_module_hdl("zfs-retire")) != NULL) { 446 _zfs_retire_fini(hdl); 447 fmd_hdl_unregister(hdl); 448 } 449 if ((hdl = fmd_module_hdl("zfs-diagnosis")) != NULL) { 450 _zfs_diagnosis_fini(hdl); 451 fmd_hdl_unregister(hdl); 452 } 453 454 zed_log_msg(LOG_INFO, "Add Agent: fini"); 455 zfs_slm_fini(); 456 457 g_zfs_hdl = NULL; 458 } 459