1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 /* 29 * The ZFS retire agent is responsible for managing hot spares across all pools. 30 * When we see a device fault or a device removal, we try to open the associated 31 * pool and look for any hot spares. We iterate over any available hot spares 32 * and attempt a 'zpool replace' for each one. 33 * 34 * For vdevs diagnosed as faulty, the agent is also responsible for proactively 35 * marking the vdev FAULTY (for I/O errors) or DEGRADED (for checksum errors). 36 */ 37 38 #include <fm/fmd_api.h> 39 #include <sys/fs/zfs.h> 40 #include <sys/fm/protocol.h> 41 #include <sys/fm/fs/zfs.h> 42 #include <libzfs.h> 43 #include <string.h> 44 45 /* 46 * Find a pool with a matching GUID. 47 */ 48 typedef struct find_cbdata { 49 uint64_t cb_guid; 50 zpool_handle_t *cb_zhp; 51 } find_cbdata_t; 52 53 static int 54 find_pool(zpool_handle_t *zhp, void *data) 55 { 56 find_cbdata_t *cbp = data; 57 58 if (cbp->cb_guid == 59 zpool_get_prop_int(zhp, ZPOOL_PROP_GUID, NULL)) { 60 cbp->cb_zhp = zhp; 61 return (1); 62 } 63 64 zpool_close(zhp); 65 return (0); 66 } 67 68 /* 69 * Find a vdev within a tree with a matching GUID. 70 */ 71 static nvlist_t * 72 find_vdev(nvlist_t *nv, uint64_t search) 73 { 74 uint64_t guid; 75 nvlist_t **child; 76 uint_t c, children; 77 nvlist_t *ret; 78 79 if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &guid) == 0 && 80 guid == search) 81 return (nv); 82 83 if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, 84 &child, &children) != 0) 85 return (NULL); 86 87 for (c = 0; c < children; c++) { 88 if ((ret = find_vdev(child[c], search)) != NULL) 89 return (ret); 90 } 91 92 if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE, 93 &child, &children) != 0) 94 return (NULL); 95 96 for (c = 0; c < children; c++) { 97 if ((ret = find_vdev(child[c], search)) != NULL) 98 return (ret); 99 } 100 101 return (NULL); 102 } 103 104 /* 105 * Given a (pool, vdev) GUID pair, find the matching pool and vdev. 106 */ 107 static zpool_handle_t * 108 find_by_guid(libzfs_handle_t *zhdl, uint64_t pool_guid, uint64_t vdev_guid, 109 nvlist_t **vdevp) 110 { 111 find_cbdata_t cb; 112 zpool_handle_t *zhp; 113 nvlist_t *config, *nvroot; 114 115 /* 116 * Find the corresponding pool and make sure the vdev still exists. 117 */ 118 cb.cb_guid = pool_guid; 119 if (zpool_iter(zhdl, find_pool, &cb) != 1) 120 return (NULL); 121 122 zhp = cb.cb_zhp; 123 config = zpool_get_config(zhp, NULL); 124 if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, 125 &nvroot) != 0) { 126 zpool_close(zhp); 127 return (NULL); 128 } 129 130 if ((*vdevp = find_vdev(nvroot, vdev_guid)) == NULL) { 131 zpool_close(zhp); 132 return (NULL); 133 } 134 135 return (zhp); 136 } 137 138 /* 139 * Given a vdev, attempt to replace it with every known spare until one 140 * succeeds. 141 */ 142 static void 143 replace_with_spare(zpool_handle_t *zhp, nvlist_t *vdev) 144 { 145 nvlist_t *config, *nvroot, *replacement; 146 nvlist_t **spares; 147 uint_t s, nspares; 148 char *dev_name; 149 150 config = zpool_get_config(zhp, NULL); 151 if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, 152 &nvroot) != 0) 153 return; 154 155 /* 156 * Find out if there are any hot spares available in the pool. 157 */ 158 if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, 159 &spares, &nspares) != 0) 160 return; 161 162 if (nvlist_alloc(&replacement, NV_UNIQUE_NAME, 0) != 0) 163 return; 164 165 if (nvlist_add_string(replacement, ZPOOL_CONFIG_TYPE, 166 VDEV_TYPE_ROOT) != 0) { 167 nvlist_free(replacement); 168 return; 169 } 170 171 dev_name = zpool_vdev_name(NULL, zhp, vdev); 172 173 /* 174 * Try to replace each spare, ending when we successfully 175 * replace it. 176 */ 177 for (s = 0; s < nspares; s++) { 178 char *spare_name; 179 180 if (nvlist_lookup_string(spares[s], ZPOOL_CONFIG_PATH, 181 &spare_name) != 0) 182 continue; 183 184 if (nvlist_add_nvlist_array(replacement, 185 ZPOOL_CONFIG_CHILDREN, &spares[s], 1) != 0) 186 continue; 187 188 if (zpool_vdev_attach(zhp, dev_name, spare_name, 189 replacement, B_TRUE) == 0) 190 break; 191 } 192 193 free(dev_name); 194 nvlist_free(replacement); 195 } 196 197 /*ARGSUSED*/ 198 static void 199 zfs_retire_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, 200 const char *class) 201 { 202 uint64_t pool_guid, vdev_guid; 203 zpool_handle_t *zhp; 204 nvlist_t *resource, *fault; 205 nvlist_t **faults; 206 uint_t f, nfaults; 207 libzfs_handle_t *zhdl = fmd_hdl_getspecific(hdl); 208 boolean_t fault_device, degrade_device; 209 boolean_t is_repair; 210 char *scheme; 211 nvlist_t *vdev; 212 213 /* 214 * If this is a resource notifying us of device removal, then simply 215 * check for an available spare and continue. 216 */ 217 if (strcmp(class, "resource.fs.zfs.removed") == 0) { 218 if (nvlist_lookup_uint64(nvl, FM_EREPORT_PAYLOAD_ZFS_POOL_GUID, 219 &pool_guid) != 0 || 220 nvlist_lookup_uint64(nvl, FM_EREPORT_PAYLOAD_ZFS_VDEV_GUID, 221 &vdev_guid) != 0) 222 return; 223 224 if ((zhp = find_by_guid(zhdl, pool_guid, vdev_guid, 225 &vdev)) == NULL) 226 return; 227 228 if (fmd_prop_get_int32(hdl, "spare_on_remove")) 229 replace_with_spare(zhp, vdev); 230 zpool_close(zhp); 231 return; 232 } 233 234 if (strcmp(class, "list.repaired") == 0) 235 is_repair = B_TRUE; 236 else 237 is_repair = B_FALSE; 238 239 /* 240 * We subscribe to zfs faults as well as all repair events. 241 */ 242 if (nvlist_lookup_nvlist_array(nvl, FM_SUSPECT_FAULT_LIST, 243 &faults, &nfaults) != 0) 244 return; 245 246 for (f = 0; f < nfaults; f++) { 247 fault = faults[f]; 248 249 fault_device = B_FALSE; 250 degrade_device = B_FALSE; 251 252 /* 253 * While we subscribe to fault.fs.zfs.*, we only take action 254 * for faults targeting a specific vdev (open failure or SERD 255 * failure). 256 */ 257 if (fmd_nvl_class_match(hdl, fault, "fault.fs.zfs.vdev.io")) 258 fault_device = B_TRUE; 259 else if (fmd_nvl_class_match(hdl, fault, 260 "fault.fs.zfs.vdev.checksum")) 261 degrade_device = B_TRUE; 262 else if (fmd_nvl_class_match(hdl, fault, "fault.fs.zfs.device")) 263 fault_device = B_FALSE; 264 else 265 continue; 266 267 if (nvlist_lookup_nvlist(fault, FM_FAULT_RESOURCE, 268 &resource) != 0 || 269 nvlist_lookup_string(resource, FM_FMRI_SCHEME, 270 &scheme) != 0) 271 continue; 272 273 if (strcmp(scheme, FM_FMRI_SCHEME_ZFS) != 0) 274 continue; 275 276 if (nvlist_lookup_uint64(resource, FM_FMRI_ZFS_POOL, 277 &pool_guid) != 0 || 278 nvlist_lookup_uint64(resource, FM_FMRI_ZFS_VDEV, 279 &vdev_guid) != 0) 280 continue; 281 282 if ((zhp = find_by_guid(zhdl, pool_guid, vdev_guid, 283 &vdev)) == NULL) 284 continue; 285 286 /* 287 * If this is a repair event, then mark the vdev as repaired and 288 * continue. 289 */ 290 if (is_repair) { 291 (void) zpool_vdev_clear(zhp, vdev_guid); 292 zpool_close(zhp); 293 continue; 294 } 295 296 /* 297 * Actively fault the device if needed. 298 */ 299 if (fault_device) 300 (void) zpool_vdev_fault(zhp, vdev_guid); 301 if (degrade_device) 302 (void) zpool_vdev_degrade(zhp, vdev_guid); 303 304 /* 305 * Attempt to substitute a hot spare. 306 */ 307 replace_with_spare(zhp, vdev); 308 zpool_close(zhp); 309 } 310 } 311 312 static const fmd_hdl_ops_t fmd_ops = { 313 zfs_retire_recv, /* fmdo_recv */ 314 NULL, /* fmdo_timeout */ 315 NULL, /* fmdo_close */ 316 NULL, /* fmdo_stats */ 317 NULL, /* fmdo_gc */ 318 }; 319 320 static const fmd_prop_t fmd_props[] = { 321 { "spare_on_remove", FMD_TYPE_BOOL, "true" }, 322 { NULL, 0, NULL } 323 }; 324 325 static const fmd_hdl_info_t fmd_info = { 326 "ZFS Retire Agent", "1.0", &fmd_ops, fmd_props 327 }; 328 329 void 330 _fmd_init(fmd_hdl_t *hdl) 331 { 332 libzfs_handle_t *zhdl; 333 334 if ((zhdl = libzfs_init()) == NULL) 335 return; 336 337 if (fmd_hdl_register(hdl, FMD_API_VERSION, &fmd_info) != 0) { 338 libzfs_fini(zhdl); 339 return; 340 } 341 342 fmd_hdl_setspecific(hdl, zhdl); 343 } 344 345 void 346 _fmd_fini(fmd_hdl_t *hdl) 347 { 348 libzfs_handle_t *zhdl = fmd_hdl_getspecific(hdl); 349 350 if (zhdl != NULL) 351 libzfs_fini(zhdl); 352 } 353