1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License Version 1.0 (CDDL-1.0). 6 * You can obtain a copy of the license from the top-level file 7 * "OPENSOLARIS.LICENSE" or at <http://opensource.org/licenses/CDDL-1.0>. 8 * You may not use this file except in compliance with the license. 9 * 10 * CDDL HEADER END 11 */ 12 13 /* 14 * Copyright (c) 2016, 2017, Intel Corporation. 15 */ 16 17 #ifdef HAVE_LIBUDEV 18 19 #include <errno.h> 20 #include <fcntl.h> 21 #include <libnvpair.h> 22 #include <libudev.h> 23 #include <libzfs.h> 24 #include <libzutil.h> 25 #include <pthread.h> 26 #include <stdlib.h> 27 #include <string.h> 28 29 #include <sys/sysevent/eventdefs.h> 30 #include <sys/sysevent/dev.h> 31 32 #include "zed_log.h" 33 #include "zed_disk_event.h" 34 #include "agents/zfs_agents.h" 35 36 /* 37 * Portions of ZED need to see disk events for disks belonging to ZFS pools. 38 * A libudev monitor is established to monitor block device actions and pass 39 * them on to internal ZED logic modules. Initially, zfs_mod.c is the only 40 * consumer and is the Linux equivalent for the illumos syseventd ZFS SLM 41 * module responsible for handling disk events for ZFS. 42 */ 43 44 pthread_t g_mon_tid; 45 struct udev *g_udev; 46 struct udev_monitor *g_mon; 47 48 49 #define DEV_BYID_PATH "/dev/disk/by-id/" 50 51 /* 64MB is minimum usable disk for ZFS */ 52 #define MINIMUM_SECTORS 131072 53 54 55 /* 56 * Post disk event to SLM module 57 * 58 * occurs in the context of monitor thread 59 */ 60 static void 61 zed_udev_event(const char *class, const char *subclass, nvlist_t *nvl) 62 { 63 char *strval; 64 uint64_t numval; 65 66 zed_log_msg(LOG_INFO, "zed_disk_event:"); 67 zed_log_msg(LOG_INFO, "\tclass: %s", class); 68 zed_log_msg(LOG_INFO, "\tsubclass: %s", subclass); 69 if (nvlist_lookup_string(nvl, DEV_NAME, &strval) == 0) 70 zed_log_msg(LOG_INFO, "\t%s: %s", DEV_NAME, strval); 71 if (nvlist_lookup_string(nvl, DEV_PATH, &strval) == 0) 72 zed_log_msg(LOG_INFO, "\t%s: %s", DEV_PATH, strval); 73 if (nvlist_lookup_string(nvl, DEV_IDENTIFIER, &strval) == 0) 74 zed_log_msg(LOG_INFO, "\t%s: %s", DEV_IDENTIFIER, strval); 75 if (nvlist_lookup_string(nvl, DEV_PHYS_PATH, &strval) == 0) 76 zed_log_msg(LOG_INFO, "\t%s: %s", DEV_PHYS_PATH, strval); 77 if (nvlist_lookup_uint64(nvl, DEV_SIZE, &numval) == 0) 78 zed_log_msg(LOG_INFO, "\t%s: %llu", DEV_SIZE, numval); 79 if (nvlist_lookup_uint64(nvl, ZFS_EV_POOL_GUID, &numval) == 0) 80 zed_log_msg(LOG_INFO, "\t%s: %llu", ZFS_EV_POOL_GUID, numval); 81 if (nvlist_lookup_uint64(nvl, ZFS_EV_VDEV_GUID, &numval) == 0) 82 zed_log_msg(LOG_INFO, "\t%s: %llu", ZFS_EV_VDEV_GUID, numval); 83 84 (void) zfs_agent_post_event(class, subclass, nvl); 85 } 86 87 /* 88 * dev_event_nvlist: place event schema into an nv pair list 89 * 90 * NAME VALUE (example) 91 * -------------- -------------------------------------------------------- 92 * DEV_NAME /dev/sdl 93 * DEV_PATH /devices/pci0000:00/0000:00:03.0/0000:04:00.0/host0/... 94 * DEV_IDENTIFIER ata-Hitachi_HTS725050A9A362_100601PCG420VLJ37DMC 95 * DEV_PHYS_PATH pci-0000:04:00.0-sas-0x4433221101000000-lun-0 96 * DEV_IS_PART --- 97 * DEV_SIZE 500107862016 98 * ZFS_EV_POOL_GUID 17523635698032189180 99 * ZFS_EV_VDEV_GUID 14663607734290803088 100 */ 101 static nvlist_t * 102 dev_event_nvlist(struct udev_device *dev) 103 { 104 nvlist_t *nvl; 105 char strval[128]; 106 const char *value, *path; 107 uint64_t guid; 108 109 if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0) 110 return (NULL); 111 112 if (zfs_device_get_devid(dev, strval, sizeof (strval)) == 0) 113 (void) nvlist_add_string(nvl, DEV_IDENTIFIER, strval); 114 if (zfs_device_get_physical(dev, strval, sizeof (strval)) == 0) 115 (void) nvlist_add_string(nvl, DEV_PHYS_PATH, strval); 116 if ((path = udev_device_get_devnode(dev)) != NULL) 117 (void) nvlist_add_string(nvl, DEV_NAME, path); 118 if ((value = udev_device_get_devpath(dev)) != NULL) 119 (void) nvlist_add_string(nvl, DEV_PATH, value); 120 value = udev_device_get_devtype(dev); 121 if ((value != NULL && strcmp("partition", value) == 0) || 122 (udev_device_get_property_value(dev, "ID_PART_ENTRY_NUMBER") 123 != NULL)) { 124 (void) nvlist_add_boolean(nvl, DEV_IS_PART); 125 } 126 if ((value = udev_device_get_sysattr_value(dev, "size")) != NULL) { 127 uint64_t numval = DEV_BSIZE; 128 129 numval *= strtoull(value, NULL, 10); 130 (void) nvlist_add_uint64(nvl, DEV_SIZE, numval); 131 } 132 133 /* 134 * Grab the pool and vdev guids from blkid cache 135 */ 136 value = udev_device_get_property_value(dev, "ID_FS_UUID"); 137 if (value != NULL && (guid = strtoull(value, NULL, 10)) != 0) 138 (void) nvlist_add_uint64(nvl, ZFS_EV_POOL_GUID, guid); 139 140 value = udev_device_get_property_value(dev, "ID_FS_UUID_SUB"); 141 if (value != NULL && (guid = strtoull(value, NULL, 10)) != 0) 142 (void) nvlist_add_uint64(nvl, ZFS_EV_VDEV_GUID, guid); 143 144 /* 145 * Either a vdev guid or a devid must be present for matching 146 */ 147 if (!nvlist_exists(nvl, DEV_IDENTIFIER) && 148 !nvlist_exists(nvl, ZFS_EV_VDEV_GUID)) { 149 nvlist_free(nvl); 150 return (NULL); 151 } 152 153 return (nvl); 154 } 155 156 /* 157 * Listen for block device uevents 158 */ 159 static void * 160 zed_udev_monitor(void *arg) 161 { 162 struct udev_monitor *mon = arg; 163 char *tmp, *tmp2; 164 165 zed_log_msg(LOG_INFO, "Waiting for new udev disk events..."); 166 167 while (1) { 168 struct udev_device *dev; 169 const char *action, *type, *part, *sectors; 170 const char *bus, *uuid; 171 const char *class, *subclass; 172 nvlist_t *nvl; 173 boolean_t is_zfs = B_FALSE; 174 175 /* allow a cancellation while blocked (recvmsg) */ 176 pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL); 177 178 /* blocks at recvmsg until an event occurs */ 179 if ((dev = udev_monitor_receive_device(mon)) == NULL) { 180 zed_log_msg(LOG_WARNING, "zed_udev_monitor: receive " 181 "device error %d", errno); 182 continue; 183 } 184 185 /* allow all steps to complete before a cancellation */ 186 pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, NULL); 187 188 /* 189 * Strongly typed device is the preferred filter 190 */ 191 type = udev_device_get_property_value(dev, "ID_FS_TYPE"); 192 if (type != NULL && type[0] != '\0') { 193 if (strcmp(type, "zfs_member") == 0) { 194 is_zfs = B_TRUE; 195 } else { 196 /* not ours, so skip */ 197 zed_log_msg(LOG_INFO, "zed_udev_monitor: skip " 198 "%s (in use by %s)", 199 udev_device_get_devnode(dev), type); 200 udev_device_unref(dev); 201 continue; 202 } 203 } 204 205 /* 206 * if this is a disk and it is partitioned, then the 207 * zfs label will reside in a DEVTYPE=partition and 208 * we can skip passing this event 209 */ 210 type = udev_device_get_property_value(dev, "DEVTYPE"); 211 part = udev_device_get_property_value(dev, 212 "ID_PART_TABLE_TYPE"); 213 if (type != NULL && type[0] != '\0' && 214 strcmp(type, "disk") == 0 && 215 part != NULL && part[0] != '\0') { 216 /* skip and wait for partition event */ 217 udev_device_unref(dev); 218 continue; 219 } 220 221 /* 222 * ignore small partitions 223 */ 224 sectors = udev_device_get_property_value(dev, 225 "ID_PART_ENTRY_SIZE"); 226 if (sectors == NULL) 227 sectors = udev_device_get_sysattr_value(dev, "size"); 228 if (sectors != NULL && 229 strtoull(sectors, NULL, 10) < MINIMUM_SECTORS) { 230 udev_device_unref(dev); 231 continue; 232 } 233 234 /* 235 * If the blkid probe didn't find ZFS, then a persistent 236 * device id string is required in the message schema 237 * for matching with vdevs. Preflight here for expected 238 * udev information. 239 */ 240 bus = udev_device_get_property_value(dev, "ID_BUS"); 241 uuid = udev_device_get_property_value(dev, "DM_UUID"); 242 if (!is_zfs && (bus == NULL && uuid == NULL)) { 243 zed_log_msg(LOG_INFO, "zed_udev_monitor: %s no devid " 244 "source", udev_device_get_devnode(dev)); 245 udev_device_unref(dev); 246 continue; 247 } 248 249 action = udev_device_get_action(dev); 250 if (strcmp(action, "add") == 0) { 251 class = EC_DEV_ADD; 252 subclass = ESC_DISK; 253 } else if (strcmp(action, "remove") == 0) { 254 class = EC_DEV_REMOVE; 255 subclass = ESC_DISK; 256 } else if (strcmp(action, "change") == 0) { 257 class = EC_DEV_STATUS; 258 subclass = ESC_DEV_DLE; 259 } else { 260 zed_log_msg(LOG_WARNING, "zed_udev_monitor: %s unknown", 261 action); 262 udev_device_unref(dev); 263 continue; 264 } 265 266 /* 267 * Special case an EC_DEV_ADD for multipath devices 268 * 269 * When a multipath device is created, udev reports the 270 * following: 271 * 272 * 1. "add" event of the dm device for the multipath device 273 * (like /dev/dm-3). 274 * 2. "change" event to create the actual multipath device 275 * symlink (like /dev/mapper/mpatha). The event also 276 * passes back the relevant DM vars we care about, like 277 * DM_UUID. 278 * 3. Another "change" event identical to #2 (that we ignore). 279 * 280 * To get the behavior we want, we treat the "change" event 281 * in #2 as a "add" event; as if "/dev/mapper/mpatha" was 282 * a new disk being added. 283 */ 284 if (strcmp(class, EC_DEV_STATUS) == 0 && 285 udev_device_get_property_value(dev, "DM_UUID") && 286 udev_device_get_property_value(dev, "MPATH_SBIN_PATH")) { 287 tmp = (char *)udev_device_get_devnode(dev); 288 tmp2 = zfs_get_underlying_path(tmp); 289 if (tmp && tmp2 && (strcmp(tmp, tmp2) != 0)) { 290 /* 291 * We have a real underlying device, which 292 * means that this multipath "change" event is 293 * an "add" event. 294 * 295 * If the multipath device and the underlying 296 * dev are the same name (i.e. /dev/dm-5), then 297 * there is no real underlying disk for this 298 * multipath device, and so this "change" event 299 * really is a multipath removal. 300 */ 301 class = EC_DEV_ADD; 302 subclass = ESC_DISK; 303 } else { 304 tmp = (char *) 305 udev_device_get_property_value(dev, 306 "DM_NR_VALID_PATHS"); 307 /* treat as a multipath remove */ 308 if (tmp != NULL && strcmp(tmp, "0") == 0) { 309 class = EC_DEV_REMOVE; 310 subclass = ESC_DISK; 311 } 312 } 313 free(tmp2); 314 } 315 316 /* 317 * Special case an EC_DEV_ADD for scsi_debug devices 318 * 319 * These devices require a udevadm trigger command after 320 * creation in order to register the vdev_id scsidebug alias 321 * rule (adds a persistent path (phys_path) used for fault 322 * management automated tests in the ZFS test suite. 323 * 324 * After udevadm trigger command, event registers as a "change" 325 * event but needs to instead be handled as another "add" event 326 * to allow for disk labeling and partitioning to occur. 327 */ 328 if (strcmp(class, EC_DEV_STATUS) == 0 && 329 udev_device_get_property_value(dev, "ID_VDEV") && 330 udev_device_get_property_value(dev, "ID_MODEL")) { 331 const char *id_model, *id_model_sd = "scsi_debug"; 332 333 id_model = udev_device_get_property_value(dev, 334 "ID_MODEL"); 335 if (strcmp(id_model, id_model_sd) == 0) { 336 class = EC_DEV_ADD; 337 subclass = ESC_DISK; 338 } 339 } 340 341 if ((nvl = dev_event_nvlist(dev)) != NULL) { 342 zed_udev_event(class, subclass, nvl); 343 nvlist_free(nvl); 344 } 345 346 udev_device_unref(dev); 347 } 348 349 return (NULL); 350 } 351 352 int 353 zed_disk_event_init() 354 { 355 int fd, fflags; 356 357 if ((g_udev = udev_new()) == NULL) { 358 zed_log_msg(LOG_WARNING, "udev_new failed (%d)", errno); 359 return (-1); 360 } 361 362 /* Set up a udev monitor for block devices */ 363 g_mon = udev_monitor_new_from_netlink(g_udev, "udev"); 364 udev_monitor_filter_add_match_subsystem_devtype(g_mon, "block", "disk"); 365 udev_monitor_filter_add_match_subsystem_devtype(g_mon, "block", 366 "partition"); 367 udev_monitor_enable_receiving(g_mon); 368 369 /* Make sure monitoring socket is blocking */ 370 fd = udev_monitor_get_fd(g_mon); 371 if ((fflags = fcntl(fd, F_GETFL)) & O_NONBLOCK) 372 (void) fcntl(fd, F_SETFL, fflags & ~O_NONBLOCK); 373 374 /* spawn a thread to monitor events */ 375 if (pthread_create(&g_mon_tid, NULL, zed_udev_monitor, g_mon) != 0) { 376 udev_monitor_unref(g_mon); 377 udev_unref(g_udev); 378 zed_log_msg(LOG_WARNING, "pthread_create failed"); 379 return (-1); 380 } 381 382 zed_log_msg(LOG_INFO, "zed_disk_event_init"); 383 384 return (0); 385 } 386 387 void 388 zed_disk_event_fini() 389 { 390 /* cancel monitor thread at recvmsg() */ 391 (void) pthread_cancel(g_mon_tid); 392 (void) pthread_join(g_mon_tid, NULL); 393 394 /* cleanup udev resources */ 395 udev_monitor_unref(g_mon); 396 udev_unref(g_udev); 397 398 zed_log_msg(LOG_INFO, "zed_disk_event_fini"); 399 } 400 401 #else 402 403 #include "zed_disk_event.h" 404 405 int 406 zed_disk_event_init() 407 { 408 return (0); 409 } 410 411 void 412 zed_disk_event_fini() 413 { 414 } 415 416 #endif /* HAVE_LIBUDEV */ 417