1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * The ZFS retire agent is responsible for managing hot spares across all pools. 28 * When we see a device fault or a device removal, we try to open the associated 29 * pool and look for any hot spares. We iterate over any available hot spares 30 * and attempt a 'zpool replace' for each one. 31 * 32 * For vdevs diagnosed as faulty, the agent is also responsible for proactively 33 * marking the vdev FAULTY (for I/O errors) or DEGRADED (for checksum errors). 34 */ 35 36 #include <fm/fmd_api.h> 37 #include <sys/fs/zfs.h> 38 #include <sys/fm/protocol.h> 39 #include <sys/fm/fs/zfs.h> 40 #include <libzfs.h> 41 #include <string.h> 42 43 /* 44 * Find a pool with a matching GUID. 45 */ 46 typedef struct find_cbdata { 47 uint64_t cb_guid; 48 zpool_handle_t *cb_zhp; 49 } find_cbdata_t; 50 51 static int 52 find_pool(zpool_handle_t *zhp, void *data) 53 { 54 find_cbdata_t *cbp = data; 55 56 if (cbp->cb_guid == 57 zpool_get_prop_int(zhp, ZPOOL_PROP_GUID, NULL)) { 58 cbp->cb_zhp = zhp; 59 return (1); 60 } 61 62 zpool_close(zhp); 63 return (0); 64 } 65 66 /* 67 * Find a vdev within a tree with a matching GUID. 68 */ 69 static nvlist_t * 70 find_vdev(nvlist_t *nv, uint64_t search) 71 { 72 uint64_t guid; 73 nvlist_t **child; 74 uint_t c, children; 75 nvlist_t *ret; 76 77 if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &guid) == 0 && 78 guid == search) 79 return (nv); 80 81 if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, 82 &child, &children) != 0) 83 return (NULL); 84 85 for (c = 0; c < children; c++) { 86 if ((ret = find_vdev(child[c], search)) != NULL) 87 return (ret); 88 } 89 90 if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE, 91 &child, &children) != 0) 92 return (NULL); 93 94 for (c = 0; c < children; c++) { 95 if ((ret = find_vdev(child[c], search)) != NULL) 96 return (ret); 97 } 98 99 return (NULL); 100 } 101 102 /* 103 * Given a (pool, vdev) GUID pair, find the matching pool and vdev. 104 */ 105 static zpool_handle_t * 106 find_by_guid(libzfs_handle_t *zhdl, uint64_t pool_guid, uint64_t vdev_guid, 107 nvlist_t **vdevp) 108 { 109 find_cbdata_t cb; 110 zpool_handle_t *zhp; 111 nvlist_t *config, *nvroot; 112 113 /* 114 * Find the corresponding pool and make sure the vdev still exists. 115 */ 116 cb.cb_guid = pool_guid; 117 if (zpool_iter(zhdl, find_pool, &cb) != 1) 118 return (NULL); 119 120 zhp = cb.cb_zhp; 121 config = zpool_get_config(zhp, NULL); 122 if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, 123 &nvroot) != 0) { 124 zpool_close(zhp); 125 return (NULL); 126 } 127 128 if ((*vdevp = find_vdev(nvroot, vdev_guid)) == NULL) { 129 zpool_close(zhp); 130 return (NULL); 131 } 132 133 return (zhp); 134 } 135 136 /* 137 * Given a vdev, attempt to replace it with every known spare until one 138 * succeeds. 139 */ 140 static void 141 replace_with_spare(zpool_handle_t *zhp, nvlist_t *vdev) 142 { 143 nvlist_t *config, *nvroot, *replacement; 144 nvlist_t **spares; 145 uint_t s, nspares; 146 char *dev_name; 147 148 config = zpool_get_config(zhp, NULL); 149 if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, 150 &nvroot) != 0) 151 return; 152 153 /* 154 * Find out if there are any hot spares available in the pool. 155 */ 156 if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, 157 &spares, &nspares) != 0) 158 return; 159 160 if (nvlist_alloc(&replacement, NV_UNIQUE_NAME, 0) != 0) 161 return; 162 163 if (nvlist_add_string(replacement, ZPOOL_CONFIG_TYPE, 164 VDEV_TYPE_ROOT) != 0) { 165 nvlist_free(replacement); 166 return; 167 } 168 169 dev_name = zpool_vdev_name(NULL, zhp, vdev); 170 171 /* 172 * Try to replace each spare, ending when we successfully 173 * replace it. 174 */ 175 for (s = 0; s < nspares; s++) { 176 char *spare_name; 177 178 if (nvlist_lookup_string(spares[s], ZPOOL_CONFIG_PATH, 179 &spare_name) != 0) 180 continue; 181 182 if (nvlist_add_nvlist_array(replacement, 183 ZPOOL_CONFIG_CHILDREN, &spares[s], 1) != 0) 184 continue; 185 186 if (zpool_vdev_attach(zhp, dev_name, spare_name, 187 replacement, B_TRUE) == 0) 188 break; 189 } 190 191 free(dev_name); 192 nvlist_free(replacement); 193 } 194 195 /*ARGSUSED*/ 196 static void 197 zfs_retire_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, 198 const char *class) 199 { 200 uint64_t pool_guid, vdev_guid; 201 zpool_handle_t *zhp; 202 nvlist_t *resource, *fault; 203 nvlist_t **faults; 204 uint_t f, nfaults; 205 libzfs_handle_t *zhdl = fmd_hdl_getspecific(hdl); 206 boolean_t fault_device, degrade_device; 207 boolean_t is_repair; 208 char *scheme; 209 nvlist_t *vdev; 210 char *uuid; 211 int repair_done = 0; 212 boolean_t retire; 213 214 /* 215 * If this is a resource notifying us of device removal, then simply 216 * check for an available spare and continue. 217 */ 218 if (strcmp(class, "resource.fs.zfs.removed") == 0) { 219 if (nvlist_lookup_uint64(nvl, FM_EREPORT_PAYLOAD_ZFS_POOL_GUID, 220 &pool_guid) != 0 || 221 nvlist_lookup_uint64(nvl, FM_EREPORT_PAYLOAD_ZFS_VDEV_GUID, 222 &vdev_guid) != 0) 223 return; 224 225 if ((zhp = find_by_guid(zhdl, pool_guid, vdev_guid, 226 &vdev)) == NULL) 227 return; 228 229 if (fmd_prop_get_int32(hdl, "spare_on_remove")) 230 replace_with_spare(zhp, vdev); 231 zpool_close(zhp); 232 return; 233 } 234 235 if (strcmp(class, FM_LIST_RESOLVED_CLASS) == 0) 236 return; 237 238 if (strcmp(class, FM_LIST_REPAIRED_CLASS) == 0) 239 is_repair = B_TRUE; 240 else 241 is_repair = B_FALSE; 242 243 /* 244 * We subscribe to zfs faults as well as all repair events. 245 */ 246 if (nvlist_lookup_nvlist_array(nvl, FM_SUSPECT_FAULT_LIST, 247 &faults, &nfaults) != 0) 248 return; 249 250 for (f = 0; f < nfaults; f++) { 251 fault = faults[f]; 252 253 fault_device = B_FALSE; 254 degrade_device = B_FALSE; 255 256 if (nvlist_lookup_boolean_value(fault, FM_SUSPECT_RETIRE, 257 &retire) == 0 && retire == 0) 258 continue; 259 260 /* 261 * While we subscribe to fault.fs.zfs.*, we only take action 262 * for faults targeting a specific vdev (open failure or SERD 263 * failure). 264 */ 265 if (fmd_nvl_class_match(hdl, fault, "fault.fs.zfs.vdev.io")) 266 fault_device = B_TRUE; 267 else if (fmd_nvl_class_match(hdl, fault, 268 "fault.fs.zfs.vdev.checksum")) 269 degrade_device = B_TRUE; 270 else if (fmd_nvl_class_match(hdl, fault, "fault.fs.zfs.device")) 271 fault_device = B_FALSE; 272 else 273 continue; 274 275 if (nvlist_lookup_nvlist(fault, FM_FAULT_RESOURCE, 276 &resource) != 0 || 277 nvlist_lookup_string(resource, FM_FMRI_SCHEME, 278 &scheme) != 0) 279 continue; 280 281 if (strcmp(scheme, FM_FMRI_SCHEME_ZFS) != 0) 282 continue; 283 284 if (nvlist_lookup_uint64(resource, FM_FMRI_ZFS_POOL, 285 &pool_guid) != 0 || 286 nvlist_lookup_uint64(resource, FM_FMRI_ZFS_VDEV, 287 &vdev_guid) != 0) 288 continue; 289 290 if ((zhp = find_by_guid(zhdl, pool_guid, vdev_guid, 291 &vdev)) == NULL) 292 continue; 293 294 /* 295 * If this is a repair event, then mark the vdev as repaired and 296 * continue. 297 */ 298 if (is_repair) { 299 repair_done = 1; 300 (void) zpool_vdev_clear(zhp, vdev_guid); 301 zpool_close(zhp); 302 continue; 303 } 304 305 /* 306 * Actively fault the device if needed. 307 */ 308 if (fault_device) 309 (void) zpool_vdev_fault(zhp, vdev_guid); 310 if (degrade_device) 311 (void) zpool_vdev_degrade(zhp, vdev_guid); 312 313 /* 314 * Attempt to substitute a hot spare. 315 */ 316 replace_with_spare(zhp, vdev); 317 zpool_close(zhp); 318 } 319 320 if (strcmp(class, FM_LIST_REPAIRED_CLASS) == 0 && repair_done && 321 nvlist_lookup_string(nvl, FM_SUSPECT_UUID, &uuid) == 0) 322 fmd_case_uuresolved(hdl, uuid); 323 } 324 325 static const fmd_hdl_ops_t fmd_ops = { 326 zfs_retire_recv, /* fmdo_recv */ 327 NULL, /* fmdo_timeout */ 328 NULL, /* fmdo_close */ 329 NULL, /* fmdo_stats */ 330 NULL, /* fmdo_gc */ 331 }; 332 333 static const fmd_prop_t fmd_props[] = { 334 { "spare_on_remove", FMD_TYPE_BOOL, "true" }, 335 { NULL, 0, NULL } 336 }; 337 338 static const fmd_hdl_info_t fmd_info = { 339 "ZFS Retire Agent", "1.0", &fmd_ops, fmd_props 340 }; 341 342 void 343 _fmd_init(fmd_hdl_t *hdl) 344 { 345 libzfs_handle_t *zhdl; 346 347 if ((zhdl = libzfs_init()) == NULL) 348 return; 349 350 if (fmd_hdl_register(hdl, FMD_API_VERSION, &fmd_info) != 0) { 351 libzfs_fini(zhdl); 352 return; 353 } 354 355 fmd_hdl_setspecific(hdl, zhdl); 356 } 357 358 void 359 _fmd_fini(fmd_hdl_t *hdl) 360 { 361 libzfs_handle_t *zhdl = fmd_hdl_getspecific(hdl); 362 363 if (zhdl != NULL) 364 libzfs_fini(zhdl); 365 } 366