1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 /* 29 * The ZFS retire agent is responsible for managing hot spares across all pools. 30 * When we see a device fault or a device removal, we try to open the associated 31 * pool and look for any hot spares. We iterate over any available hot spares 32 * and attempt a 'zpool replace' for each one. 33 * 34 * For vdevs diagnosed as faulty, the agent is also responsible for proactively 35 * marking the vdev FAULTY (for I/O errors) or DEGRADED (for checksum errors). 36 */ 37 38 #include <fm/fmd_api.h> 39 #include <sys/fs/zfs.h> 40 #include <sys/fm/protocol.h> 41 #include <sys/fm/fs/zfs.h> 42 #include <libzfs.h> 43 #include <string.h> 44 45 /* 46 * Find a pool with a matching GUID. 47 */ 48 typedef struct find_cbdata { 49 uint64_t cb_guid; 50 zpool_handle_t *cb_zhp; 51 } find_cbdata_t; 52 53 static int 54 find_pool(zpool_handle_t *zhp, void *data) 55 { 56 find_cbdata_t *cbp = data; 57 58 if (cbp->cb_guid == 59 zpool_get_prop_int(zhp, ZPOOL_PROP_GUID, NULL)) { 60 cbp->cb_zhp = zhp; 61 return (1); 62 } 63 64 zpool_close(zhp); 65 return (0); 66 } 67 68 /* 69 * Find a vdev within a tree with a matching GUID. 70 */ 71 static nvlist_t * 72 find_vdev(nvlist_t *nv, uint64_t search) 73 { 74 uint64_t guid; 75 nvlist_t **child; 76 uint_t c, children; 77 nvlist_t *ret; 78 79 if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &guid) == 0 && 80 guid == search) 81 return (nv); 82 83 if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, 84 &child, &children) != 0) 85 return (NULL); 86 87 for (c = 0; c < children; c++) { 88 if ((ret = find_vdev(child[c], search)) != NULL) 89 return (ret); 90 } 91 92 if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE, 93 &child, &children) != 0) 94 return (NULL); 95 96 for (c = 0; c < children; c++) { 97 if ((ret = find_vdev(child[c], search)) != NULL) 98 return (ret); 99 } 100 101 return (NULL); 102 } 103 104 /* 105 * Given a (pool, vdev) GUID pair, find the matching pool and vdev. 106 */ 107 static zpool_handle_t * 108 find_by_guid(libzfs_handle_t *zhdl, uint64_t pool_guid, uint64_t vdev_guid, 109 nvlist_t **vdevp) 110 { 111 find_cbdata_t cb; 112 zpool_handle_t *zhp; 113 nvlist_t *config, *nvroot; 114 115 /* 116 * Find the corresponding pool and make sure the vdev still exists. 117 */ 118 cb.cb_guid = pool_guid; 119 if (zpool_iter(zhdl, find_pool, &cb) != 1) 120 return (NULL); 121 122 zhp = cb.cb_zhp; 123 config = zpool_get_config(zhp, NULL); 124 if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, 125 &nvroot) != 0) { 126 zpool_close(zhp); 127 return (NULL); 128 } 129 130 if ((*vdevp = find_vdev(nvroot, vdev_guid)) == NULL) { 131 zpool_close(zhp); 132 return (NULL); 133 } 134 135 return (zhp); 136 } 137 138 /* 139 * Given a vdev, attempt to replace it with every known spare until one 140 * succeeds. 141 */ 142 static void 143 replace_with_spare(zpool_handle_t *zhp, nvlist_t *vdev) 144 { 145 nvlist_t *config, *nvroot, *replacement; 146 nvlist_t **spares; 147 uint_t s, nspares; 148 char *dev_name; 149 150 config = zpool_get_config(zhp, NULL); 151 if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, 152 &nvroot) != 0) 153 return; 154 155 /* 156 * Find out if there are any hot spares available in the pool. 157 */ 158 if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, 159 &spares, &nspares) != 0) 160 return; 161 162 if (nvlist_alloc(&replacement, NV_UNIQUE_NAME, 0) != 0) 163 return; 164 165 if (nvlist_add_string(replacement, ZPOOL_CONFIG_TYPE, 166 VDEV_TYPE_ROOT) != 0) { 167 nvlist_free(replacement); 168 return; 169 } 170 171 dev_name = zpool_vdev_name(NULL, zhp, vdev); 172 173 /* 174 * Try to replace each spare, ending when we successfully 175 * replace it. 176 */ 177 for (s = 0; s < nspares; s++) { 178 char *spare_name; 179 180 if (nvlist_lookup_string(spares[s], ZPOOL_CONFIG_PATH, 181 &spare_name) != 0) 182 continue; 183 184 if (nvlist_add_nvlist_array(replacement, 185 ZPOOL_CONFIG_CHILDREN, &spares[s], 1) != 0) 186 continue; 187 188 if (zpool_vdev_attach(zhp, dev_name, spare_name, 189 replacement, B_TRUE) == 0) 190 break; 191 } 192 193 free(dev_name); 194 nvlist_free(replacement); 195 } 196 197 /*ARGSUSED*/ 198 static void 199 zfs_retire_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, 200 const char *class) 201 { 202 uint64_t pool_guid, vdev_guid; 203 zpool_handle_t *zhp; 204 nvlist_t *resource, *fault; 205 nvlist_t **faults; 206 uint_t f, nfaults; 207 libzfs_handle_t *zhdl = fmd_hdl_getspecific(hdl); 208 boolean_t fault_device, degrade_device; 209 boolean_t is_repair; 210 char *scheme; 211 nvlist_t *vdev; 212 char *uuid; 213 int repair_done = 0; 214 215 /* 216 * If this is a resource notifying us of device removal, then simply 217 * check for an available spare and continue. 218 */ 219 if (strcmp(class, "resource.fs.zfs.removed") == 0) { 220 if (nvlist_lookup_uint64(nvl, FM_EREPORT_PAYLOAD_ZFS_POOL_GUID, 221 &pool_guid) != 0 || 222 nvlist_lookup_uint64(nvl, FM_EREPORT_PAYLOAD_ZFS_VDEV_GUID, 223 &vdev_guid) != 0) 224 return; 225 226 if ((zhp = find_by_guid(zhdl, pool_guid, vdev_guid, 227 &vdev)) == NULL) 228 return; 229 230 if (fmd_prop_get_int32(hdl, "spare_on_remove")) 231 replace_with_spare(zhp, vdev); 232 zpool_close(zhp); 233 return; 234 } 235 236 if (strcmp(class, FM_LIST_REPAIRED_CLASS) == 0) 237 is_repair = B_TRUE; 238 else 239 is_repair = B_FALSE; 240 241 /* 242 * We subscribe to zfs faults as well as all repair events. 243 */ 244 if (nvlist_lookup_nvlist_array(nvl, FM_SUSPECT_FAULT_LIST, 245 &faults, &nfaults) != 0) 246 return; 247 248 for (f = 0; f < nfaults; f++) { 249 fault = faults[f]; 250 251 fault_device = B_FALSE; 252 degrade_device = B_FALSE; 253 254 /* 255 * While we subscribe to fault.fs.zfs.*, we only take action 256 * for faults targeting a specific vdev (open failure or SERD 257 * failure). 258 */ 259 if (fmd_nvl_class_match(hdl, fault, "fault.fs.zfs.vdev.io")) 260 fault_device = B_TRUE; 261 else if (fmd_nvl_class_match(hdl, fault, 262 "fault.fs.zfs.vdev.checksum")) 263 degrade_device = B_TRUE; 264 else if (fmd_nvl_class_match(hdl, fault, "fault.fs.zfs.device")) 265 fault_device = B_FALSE; 266 else 267 continue; 268 269 if (nvlist_lookup_nvlist(fault, FM_FAULT_RESOURCE, 270 &resource) != 0 || 271 nvlist_lookup_string(resource, FM_FMRI_SCHEME, 272 &scheme) != 0) 273 continue; 274 275 if (strcmp(scheme, FM_FMRI_SCHEME_ZFS) != 0) 276 continue; 277 278 if (nvlist_lookup_uint64(resource, FM_FMRI_ZFS_POOL, 279 &pool_guid) != 0 || 280 nvlist_lookup_uint64(resource, FM_FMRI_ZFS_VDEV, 281 &vdev_guid) != 0) 282 continue; 283 284 if ((zhp = find_by_guid(zhdl, pool_guid, vdev_guid, 285 &vdev)) == NULL) 286 continue; 287 288 /* 289 * If this is a repair event, then mark the vdev as repaired and 290 * continue. 291 */ 292 if (is_repair) { 293 repair_done = 1; 294 (void) zpool_vdev_clear(zhp, vdev_guid); 295 zpool_close(zhp); 296 continue; 297 } 298 299 /* 300 * Actively fault the device if needed. 301 */ 302 if (fault_device) 303 (void) zpool_vdev_fault(zhp, vdev_guid); 304 if (degrade_device) 305 (void) zpool_vdev_degrade(zhp, vdev_guid); 306 307 /* 308 * Attempt to substitute a hot spare. 309 */ 310 replace_with_spare(zhp, vdev); 311 zpool_close(zhp); 312 } 313 314 if (strcmp(class, FM_LIST_REPAIRED_CLASS) == 0 && repair_done && 315 nvlist_lookup_string(nvl, FM_SUSPECT_UUID, &uuid) == 0) 316 fmd_case_uuresolved(hdl, uuid); 317 } 318 319 static const fmd_hdl_ops_t fmd_ops = { 320 zfs_retire_recv, /* fmdo_recv */ 321 NULL, /* fmdo_timeout */ 322 NULL, /* fmdo_close */ 323 NULL, /* fmdo_stats */ 324 NULL, /* fmdo_gc */ 325 }; 326 327 static const fmd_prop_t fmd_props[] = { 328 { "spare_on_remove", FMD_TYPE_BOOL, "true" }, 329 { NULL, 0, NULL } 330 }; 331 332 static const fmd_hdl_info_t fmd_info = { 333 "ZFS Retire Agent", "1.0", &fmd_ops, fmd_props 334 }; 335 336 void 337 _fmd_init(fmd_hdl_t *hdl) 338 { 339 libzfs_handle_t *zhdl; 340 341 if ((zhdl = libzfs_init()) == NULL) 342 return; 343 344 if (fmd_hdl_register(hdl, FMD_API_VERSION, &fmd_info) != 0) { 345 libzfs_fini(zhdl); 346 return; 347 } 348 349 fmd_hdl_setspecific(hdl, zhdl); 350 } 351 352 void 353 _fmd_fini(fmd_hdl_t *hdl) 354 { 355 libzfs_handle_t *zhdl = fmd_hdl_getspecific(hdl); 356 357 if (zhdl != NULL) 358 libzfs_fini(zhdl); 359 } 360