1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License Version 1.0 (CDDL-1.0). 6 * You can obtain a copy of the license from the top-level file 7 * "OPENSOLARIS.LICENSE" or at <http://opensource.org/licenses/CDDL-1.0>. 8 * You may not use this file except in compliance with the license. 9 * 10 * CDDL HEADER END 11 */ 12 13 /* 14 * Copyright (c) 2016, Intel Corporation. 15 * Copyright (c) 2018, loli10K <ezomori.nozomu@gmail.com> 16 */ 17 18 #include <libnvpair.h> 19 #include <libzfs.h> 20 #include <stddef.h> 21 #include <stdlib.h> 22 #include <string.h> 23 #include <sys/list.h> 24 #include <sys/time.h> 25 #include <sys/sysevent/eventdefs.h> 26 #include <sys/sysevent/dev.h> 27 #include <sys/fm/protocol.h> 28 #include <sys/fm/fs/zfs.h> 29 #include <pthread.h> 30 #include <unistd.h> 31 32 #include "zfs_agents.h" 33 #include "fmd_api.h" 34 #include "../zed_log.h" 35 36 /* 37 * agent dispatch code 38 */ 39 40 static pthread_mutex_t agent_lock = PTHREAD_MUTEX_INITIALIZER; 41 static pthread_cond_t agent_cond = PTHREAD_COND_INITIALIZER; 42 static list_t agent_events; /* list of pending events */ 43 static int agent_exiting; 44 45 typedef struct agent_event { 46 char ae_class[64]; 47 char ae_subclass[32]; 48 nvlist_t *ae_nvl; 49 list_node_t ae_node; 50 } agent_event_t; 51 52 pthread_t g_agents_tid; 53 54 libzfs_handle_t *g_zfs_hdl; 55 56 /* guid search data */ 57 typedef enum device_type { 58 DEVICE_TYPE_L2ARC, /* l2arc device */ 59 DEVICE_TYPE_SPARE, /* spare device */ 60 DEVICE_TYPE_PRIMARY /* any primary pool storage device */ 61 } device_type_t; 62 63 typedef struct guid_search { 64 uint64_t gs_pool_guid; 65 uint64_t gs_vdev_guid; 66 char *gs_devid; 67 device_type_t gs_vdev_type; 68 uint64_t gs_vdev_expandtime; /* vdev expansion time */ 69 } guid_search_t; 70 71 /* 72 * Walks the vdev tree recursively looking for a matching devid. 73 * Returns B_TRUE as soon as a matching device is found, B_FALSE otherwise. 74 */ 75 static boolean_t 76 zfs_agent_iter_vdev(zpool_handle_t *zhp, nvlist_t *nvl, void *arg) 77 { 78 guid_search_t *gsp = arg; 79 char *path = NULL; 80 uint_t c, children; 81 nvlist_t **child; 82 83 /* 84 * First iterate over any children. 85 */ 86 if (nvlist_lookup_nvlist_array(nvl, ZPOOL_CONFIG_CHILDREN, 87 &child, &children) == 0) { 88 for (c = 0; c < children; c++) { 89 if (zfs_agent_iter_vdev(zhp, child[c], gsp)) { 90 gsp->gs_vdev_type = DEVICE_TYPE_PRIMARY; 91 return (B_TRUE); 92 } 93 } 94 } 95 /* 96 * Iterate over any spares and cache devices 97 */ 98 if (nvlist_lookup_nvlist_array(nvl, ZPOOL_CONFIG_SPARES, 99 &child, &children) == 0) { 100 for (c = 0; c < children; c++) { 101 if (zfs_agent_iter_vdev(zhp, child[c], gsp)) { 102 gsp->gs_vdev_type = DEVICE_TYPE_L2ARC; 103 return (B_TRUE); 104 } 105 } 106 } 107 if (nvlist_lookup_nvlist_array(nvl, ZPOOL_CONFIG_L2CACHE, 108 &child, &children) == 0) { 109 for (c = 0; c < children; c++) { 110 if (zfs_agent_iter_vdev(zhp, child[c], gsp)) { 111 gsp->gs_vdev_type = DEVICE_TYPE_SPARE; 112 return (B_TRUE); 113 } 114 } 115 } 116 /* 117 * On a devid match, grab the vdev guid and expansion time, if any. 118 */ 119 if (gsp->gs_devid != NULL && 120 (nvlist_lookup_string(nvl, ZPOOL_CONFIG_DEVID, &path) == 0) && 121 (strcmp(gsp->gs_devid, path) == 0)) { 122 (void) nvlist_lookup_uint64(nvl, ZPOOL_CONFIG_GUID, 123 &gsp->gs_vdev_guid); 124 (void) nvlist_lookup_uint64(nvl, ZPOOL_CONFIG_EXPANSION_TIME, 125 &gsp->gs_vdev_expandtime); 126 return (B_TRUE); 127 } 128 129 return (B_FALSE); 130 } 131 132 static int 133 zfs_agent_iter_pool(zpool_handle_t *zhp, void *arg) 134 { 135 guid_search_t *gsp = arg; 136 nvlist_t *config, *nvl; 137 138 /* 139 * For each vdev in this pool, look for a match by devid 140 */ 141 if ((config = zpool_get_config(zhp, NULL)) != NULL) { 142 if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, 143 &nvl) == 0) { 144 (void) zfs_agent_iter_vdev(zhp, nvl, gsp); 145 } 146 } 147 /* 148 * if a match was found then grab the pool guid 149 */ 150 if (gsp->gs_vdev_guid) { 151 (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, 152 &gsp->gs_pool_guid); 153 } 154 155 zpool_close(zhp); 156 return (gsp->gs_vdev_guid != 0); 157 } 158 159 void 160 zfs_agent_post_event(const char *class, const char *subclass, nvlist_t *nvl) 161 { 162 agent_event_t *event; 163 164 if (subclass == NULL) 165 subclass = ""; 166 167 event = malloc(sizeof (agent_event_t)); 168 if (event == NULL || nvlist_dup(nvl, &event->ae_nvl, 0) != 0) { 169 if (event) 170 free(event); 171 return; 172 } 173 174 if (strcmp(class, "sysevent.fs.zfs.vdev_check") == 0) { 175 class = EC_ZFS; 176 subclass = ESC_ZFS_VDEV_CHECK; 177 } 178 179 /* 180 * On Linux, we don't get the expected FM_RESOURCE_REMOVED ereport 181 * from the vdev_disk layer after a hot unplug. Fortunately we do 182 * get an EC_DEV_REMOVE from our disk monitor and it is a suitable 183 * proxy so we remap it here for the benefit of the diagnosis engine. 184 * Starting in OpenZFS 2.0, we do get FM_RESOURCE_REMOVED from the spa 185 * layer. Processing multiple FM_RESOURCE_REMOVED events is not harmful. 186 */ 187 if ((strcmp(class, EC_DEV_REMOVE) == 0) && 188 (strcmp(subclass, ESC_DISK) == 0) && 189 (nvlist_exists(nvl, ZFS_EV_VDEV_GUID) || 190 nvlist_exists(nvl, DEV_IDENTIFIER))) { 191 nvlist_t *payload = event->ae_nvl; 192 struct timeval tv; 193 int64_t tod[2]; 194 uint64_t pool_guid = 0, vdev_guid = 0; 195 guid_search_t search = { 0 }; 196 device_type_t devtype = DEVICE_TYPE_PRIMARY; 197 198 class = "resource.fs.zfs.removed"; 199 subclass = ""; 200 201 (void) nvlist_add_string(payload, FM_CLASS, class); 202 (void) nvlist_lookup_uint64(nvl, ZFS_EV_POOL_GUID, &pool_guid); 203 (void) nvlist_lookup_uint64(nvl, ZFS_EV_VDEV_GUID, &vdev_guid); 204 205 (void) gettimeofday(&tv, NULL); 206 tod[0] = tv.tv_sec; 207 tod[1] = tv.tv_usec; 208 (void) nvlist_add_int64_array(payload, FM_EREPORT_TIME, tod, 2); 209 210 /* 211 * For multipath, spare and l2arc devices ZFS_EV_VDEV_GUID or 212 * ZFS_EV_POOL_GUID may be missing so find them. 213 */ 214 (void) nvlist_lookup_string(nvl, DEV_IDENTIFIER, 215 &search.gs_devid); 216 (void) zpool_iter(g_zfs_hdl, zfs_agent_iter_pool, &search); 217 pool_guid = search.gs_pool_guid; 218 vdev_guid = search.gs_vdev_guid; 219 devtype = search.gs_vdev_type; 220 221 /* 222 * We want to avoid reporting "remove" events coming from 223 * libudev for VDEVs which were expanded recently (10s) and 224 * avoid activating spares in response to partitions being 225 * deleted and created in rapid succession. 226 */ 227 if (search.gs_vdev_expandtime != 0 && 228 search.gs_vdev_expandtime + 10 > tv.tv_sec) { 229 zed_log_msg(LOG_INFO, "agent post event: ignoring '%s' " 230 "for recently expanded device '%s'", EC_DEV_REMOVE, 231 search.gs_devid); 232 goto out; 233 } 234 235 (void) nvlist_add_uint64(payload, 236 FM_EREPORT_PAYLOAD_ZFS_POOL_GUID, pool_guid); 237 (void) nvlist_add_uint64(payload, 238 FM_EREPORT_PAYLOAD_ZFS_VDEV_GUID, vdev_guid); 239 switch (devtype) { 240 case DEVICE_TYPE_L2ARC: 241 (void) nvlist_add_string(payload, 242 FM_EREPORT_PAYLOAD_ZFS_VDEV_TYPE, 243 VDEV_TYPE_L2CACHE); 244 break; 245 case DEVICE_TYPE_SPARE: 246 (void) nvlist_add_string(payload, 247 FM_EREPORT_PAYLOAD_ZFS_VDEV_TYPE, VDEV_TYPE_SPARE); 248 break; 249 case DEVICE_TYPE_PRIMARY: 250 (void) nvlist_add_string(payload, 251 FM_EREPORT_PAYLOAD_ZFS_VDEV_TYPE, VDEV_TYPE_DISK); 252 break; 253 } 254 255 zed_log_msg(LOG_INFO, "agent post event: mapping '%s' to '%s'", 256 EC_DEV_REMOVE, class); 257 } 258 259 (void) strlcpy(event->ae_class, class, sizeof (event->ae_class)); 260 (void) strlcpy(event->ae_subclass, subclass, 261 sizeof (event->ae_subclass)); 262 263 (void) pthread_mutex_lock(&agent_lock); 264 list_insert_tail(&agent_events, event); 265 (void) pthread_mutex_unlock(&agent_lock); 266 267 out: 268 (void) pthread_cond_signal(&agent_cond); 269 } 270 271 static void 272 zfs_agent_dispatch(const char *class, const char *subclass, nvlist_t *nvl) 273 { 274 /* 275 * The diagnosis engine subscribes to the following events. 276 * On illumos these subscriptions reside in: 277 * /usr/lib/fm/fmd/plugins/zfs-diagnosis.conf 278 */ 279 if (strstr(class, "ereport.fs.zfs.") != NULL || 280 strstr(class, "resource.fs.zfs.") != NULL || 281 strcmp(class, "sysevent.fs.zfs.vdev_remove") == 0 || 282 strcmp(class, "sysevent.fs.zfs.vdev_remove_dev") == 0 || 283 strcmp(class, "sysevent.fs.zfs.pool_destroy") == 0) { 284 fmd_module_recv(fmd_module_hdl("zfs-diagnosis"), nvl, class); 285 } 286 287 /* 288 * The retire agent subscribes to the following events. 289 * On illumos these subscriptions reside in: 290 * /usr/lib/fm/fmd/plugins/zfs-retire.conf 291 * 292 * NOTE: faults events come directly from our diagnosis engine 293 * and will not pass through the zfs kernel module. 294 */ 295 if (strcmp(class, FM_LIST_SUSPECT_CLASS) == 0 || 296 strcmp(class, "resource.fs.zfs.removed") == 0 || 297 strcmp(class, "resource.fs.zfs.statechange") == 0 || 298 strcmp(class, "sysevent.fs.zfs.vdev_remove") == 0) { 299 fmd_module_recv(fmd_module_hdl("zfs-retire"), nvl, class); 300 } 301 302 /* 303 * The SLM module only consumes disk events and vdev check events 304 * 305 * NOTE: disk events come directly from disk monitor and will 306 * not pass through the zfs kernel module. 307 */ 308 if (strstr(class, "EC_dev_") != NULL || 309 strcmp(class, EC_ZFS) == 0) { 310 (void) zfs_slm_event(class, subclass, nvl); 311 } 312 } 313 314 /* 315 * Events are consumed and dispatched from this thread 316 * An agent can also post an event so event list lock 317 * is not held when calling an agent. 318 * One event is consumed at a time. 319 */ 320 static void * 321 zfs_agent_consumer_thread(void *arg) 322 { 323 for (;;) { 324 agent_event_t *event; 325 326 (void) pthread_mutex_lock(&agent_lock); 327 328 /* wait for an event to show up */ 329 while (!agent_exiting && list_is_empty(&agent_events)) 330 (void) pthread_cond_wait(&agent_cond, &agent_lock); 331 332 if (agent_exiting) { 333 (void) pthread_mutex_unlock(&agent_lock); 334 zed_log_msg(LOG_INFO, "zfs_agent_consumer_thread: " 335 "exiting"); 336 return (NULL); 337 } 338 339 if ((event = (list_head(&agent_events))) != NULL) { 340 list_remove(&agent_events, event); 341 342 (void) pthread_mutex_unlock(&agent_lock); 343 344 /* dispatch to all event subscribers */ 345 zfs_agent_dispatch(event->ae_class, event->ae_subclass, 346 event->ae_nvl); 347 348 nvlist_free(event->ae_nvl); 349 free(event); 350 continue; 351 } 352 353 (void) pthread_mutex_unlock(&agent_lock); 354 } 355 356 return (NULL); 357 } 358 359 void 360 zfs_agent_init(libzfs_handle_t *zfs_hdl) 361 { 362 fmd_hdl_t *hdl; 363 364 g_zfs_hdl = zfs_hdl; 365 366 if (zfs_slm_init() != 0) 367 zed_log_die("Failed to initialize zfs slm"); 368 zed_log_msg(LOG_INFO, "Add Agent: init"); 369 370 hdl = fmd_module_hdl("zfs-diagnosis"); 371 _zfs_diagnosis_init(hdl); 372 if (!fmd_module_initialized(hdl)) 373 zed_log_die("Failed to initialize zfs diagnosis"); 374 375 hdl = fmd_module_hdl("zfs-retire"); 376 _zfs_retire_init(hdl); 377 if (!fmd_module_initialized(hdl)) 378 zed_log_die("Failed to initialize zfs retire"); 379 380 list_create(&agent_events, sizeof (agent_event_t), 381 offsetof(struct agent_event, ae_node)); 382 383 if (pthread_create(&g_agents_tid, NULL, zfs_agent_consumer_thread, 384 NULL) != 0) { 385 list_destroy(&agent_events); 386 zed_log_die("Failed to initialize agents"); 387 } 388 } 389 390 void 391 zfs_agent_fini(void) 392 { 393 fmd_hdl_t *hdl; 394 agent_event_t *event; 395 396 agent_exiting = 1; 397 (void) pthread_cond_signal(&agent_cond); 398 399 /* wait for zfs_enum_pools thread to complete */ 400 (void) pthread_join(g_agents_tid, NULL); 401 402 /* drain any pending events */ 403 while ((event = (list_head(&agent_events))) != NULL) { 404 list_remove(&agent_events, event); 405 nvlist_free(event->ae_nvl); 406 free(event); 407 } 408 409 list_destroy(&agent_events); 410 411 if ((hdl = fmd_module_hdl("zfs-retire")) != NULL) { 412 _zfs_retire_fini(hdl); 413 fmd_hdl_unregister(hdl); 414 } 415 if ((hdl = fmd_module_hdl("zfs-diagnosis")) != NULL) { 416 _zfs_diagnosis_fini(hdl); 417 fmd_hdl_unregister(hdl); 418 } 419 420 zed_log_msg(LOG_INFO, "Add Agent: fini"); 421 zfs_slm_fini(); 422 423 g_zfs_hdl = NULL; 424 } 425