1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 /* 29 * The ZFS retire agent is responsible for managing hot spares across all pools. 30 * When we see a device fault or a device removal, we try to open the associated 31 * pool and look for any hot spares. We iterate over any available hot spares 32 * and attempt a 'zpool replace' for each one. 33 * 34 * For vdevs diagnosed as faulty, the agent is also responsible for proactively 35 * marking the vdev FAULTY (for I/O errors) or DEGRADED (for checksum errors). 36 */ 37 38 #include <fm/fmd_api.h> 39 #include <sys/fs/zfs.h> 40 #include <sys/fm/protocol.h> 41 #include <sys/fm/fs/zfs.h> 42 #include <libzfs.h> 43 #include <string.h> 44 45 /* 46 * Find a pool with a matching GUID. 47 */ 48 typedef struct find_cbdata { 49 uint64_t cb_guid; 50 zpool_handle_t *cb_zhp; 51 } find_cbdata_t; 52 53 static int 54 find_pool(zpool_handle_t *zhp, void *data) 55 { 56 find_cbdata_t *cbp = data; 57 58 if (cbp->cb_guid == zpool_get_guid(zhp)) { 59 cbp->cb_zhp = zhp; 60 return (1); 61 } 62 63 zpool_close(zhp); 64 return (0); 65 } 66 67 /* 68 * Find a vdev within a tree with a matching GUID. 69 */ 70 static nvlist_t * 71 find_vdev(nvlist_t *nv, uint64_t search) 72 { 73 uint64_t guid; 74 nvlist_t **child; 75 uint_t c, children; 76 nvlist_t *ret; 77 78 if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &guid) == 0 && 79 guid == search) 80 return (nv); 81 82 if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, 83 &child, &children) != 0) 84 return (NULL); 85 86 for (c = 0; c < children; c++) { 87 if ((ret = find_vdev(child[c], search)) != NULL) 88 return (ret); 89 } 90 91 return (NULL); 92 } 93 94 /* 95 * Given a (pool, vdev) GUID pair, find the matching pool and vdev. 96 */ 97 static zpool_handle_t * 98 find_by_guid(libzfs_handle_t *zhdl, uint64_t pool_guid, uint64_t vdev_guid, 99 nvlist_t **vdevp) 100 { 101 find_cbdata_t cb; 102 zpool_handle_t *zhp; 103 nvlist_t *config, *nvroot; 104 105 /* 106 * Find the corresponding pool and make sure the vdev still exists. 107 */ 108 cb.cb_guid = pool_guid; 109 if (zpool_iter(zhdl, find_pool, &cb) != 1) 110 return (NULL); 111 112 zhp = cb.cb_zhp; 113 config = zpool_get_config(zhp, NULL); 114 if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, 115 &nvroot) != 0) { 116 zpool_close(zhp); 117 return (NULL); 118 } 119 120 if ((*vdevp = find_vdev(nvroot, vdev_guid)) == NULL) { 121 zpool_close(zhp); 122 return (NULL); 123 } 124 125 return (zhp); 126 } 127 128 /* 129 * Given a vdev, attempt to replace it with every known spare until one 130 * succeeds. 131 */ 132 static void 133 replace_with_spare(zpool_handle_t *zhp, nvlist_t *vdev) 134 { 135 nvlist_t *config, *nvroot, *replacement; 136 nvlist_t **spares; 137 uint_t s, nspares; 138 char *dev_name; 139 140 config = zpool_get_config(zhp, NULL); 141 if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, 142 &nvroot) != 0) 143 return; 144 145 /* 146 * Find out if there are any hot spares available in the pool. 147 */ 148 if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, 149 &spares, &nspares) != 0) 150 return; 151 152 if (nvlist_alloc(&replacement, NV_UNIQUE_NAME, 0) != 0) 153 return; 154 155 if (nvlist_add_string(replacement, ZPOOL_CONFIG_TYPE, 156 VDEV_TYPE_ROOT) != 0) { 157 nvlist_free(replacement); 158 return; 159 } 160 161 dev_name = zpool_vdev_name(NULL, zhp, vdev); 162 163 /* 164 * Try to replace each spare, ending when we successfully 165 * replace it. 166 */ 167 for (s = 0; s < nspares; s++) { 168 char *spare_name; 169 170 if (nvlist_lookup_string(spares[s], ZPOOL_CONFIG_PATH, 171 &spare_name) != 0) 172 continue; 173 174 if (nvlist_add_nvlist_array(replacement, 175 ZPOOL_CONFIG_CHILDREN, &spares[s], 1) != 0) 176 continue; 177 178 if (zpool_vdev_attach(zhp, dev_name, spare_name, 179 replacement, B_TRUE) == 0) 180 break; 181 } 182 183 free(dev_name); 184 nvlist_free(replacement); 185 } 186 187 /*ARGSUSED*/ 188 static void 189 zfs_retire_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, 190 const char *class) 191 { 192 uint64_t pool_guid, vdev_guid; 193 zpool_handle_t *zhp; 194 nvlist_t *resource, *fault; 195 nvlist_t **faults; 196 uint_t f, nfaults; 197 libzfs_handle_t *zhdl = fmd_hdl_getspecific(hdl); 198 boolean_t fault_device, degrade_device; 199 boolean_t is_repair; 200 char *scheme; 201 nvlist_t *vdev; 202 203 /* 204 * If this is a resource notifying us of device removal, then simply 205 * check for an available spare and continue. 206 */ 207 if (strcmp(class, "resource.fs.zfs.removed") == 0) { 208 if (nvlist_lookup_uint64(nvl, FM_EREPORT_PAYLOAD_ZFS_POOL_GUID, 209 &pool_guid) != 0 || 210 nvlist_lookup_uint64(nvl, FM_EREPORT_PAYLOAD_ZFS_VDEV_GUID, 211 &vdev_guid) != 0) 212 return; 213 214 if ((zhp = find_by_guid(zhdl, pool_guid, vdev_guid, 215 &vdev)) == NULL) 216 return; 217 218 if (fmd_prop_get_int32(hdl, "spare_on_remove")) 219 replace_with_spare(zhp, vdev); 220 zpool_close(zhp); 221 return; 222 } 223 224 if (strcmp(class, "list.repaired") == 0) 225 is_repair = B_TRUE; 226 else 227 is_repair = B_FALSE; 228 229 /* 230 * We subscribe to zfs faults as well as all repair events. 231 */ 232 if (nvlist_lookup_nvlist_array(nvl, FM_SUSPECT_FAULT_LIST, 233 &faults, &nfaults) != 0) 234 return; 235 236 for (f = 0; f < nfaults; f++) { 237 fault = faults[f]; 238 239 fault_device = B_FALSE; 240 degrade_device = B_FALSE; 241 242 /* 243 * While we subscribe to fault.fs.zfs.*, we only take action 244 * for faults targeting a specific vdev (open failure or SERD 245 * failure). 246 */ 247 if (fmd_nvl_class_match(hdl, fault, "fault.fs.zfs.vdev.io")) 248 fault_device = B_TRUE; 249 else if (fmd_nvl_class_match(hdl, fault, 250 "fault.fs.zfs.vdev.checksum")) 251 degrade_device = B_TRUE; 252 else if (fmd_nvl_class_match(hdl, fault, "fault.fs.zfs.device")) 253 fault_device = B_FALSE; 254 else 255 continue; 256 257 if (nvlist_lookup_nvlist(fault, FM_FAULT_RESOURCE, 258 &resource) != 0 || 259 nvlist_lookup_string(resource, FM_FMRI_SCHEME, 260 &scheme) != 0) 261 continue; 262 263 if (strcmp(scheme, FM_FMRI_SCHEME_ZFS) != 0) 264 continue; 265 266 if (nvlist_lookup_uint64(resource, FM_FMRI_ZFS_POOL, 267 &pool_guid) != 0 || 268 nvlist_lookup_uint64(resource, FM_FMRI_ZFS_VDEV, 269 &vdev_guid) != 0) 270 continue; 271 272 if ((zhp = find_by_guid(zhdl, pool_guid, vdev_guid, 273 &vdev)) == NULL) 274 continue; 275 276 /* 277 * If this is a repair event, then mark the vdev as repaired and 278 * continue. 279 */ 280 if (is_repair) { 281 (void) zpool_vdev_clear(zhp, vdev_guid); 282 zpool_close(zhp); 283 continue; 284 } 285 286 /* 287 * Actively fault the device if needed. 288 */ 289 if (fault_device) 290 (void) zpool_vdev_fault(zhp, vdev_guid); 291 if (degrade_device) 292 (void) zpool_vdev_degrade(zhp, vdev_guid); 293 294 /* 295 * Attempt to substitute a hot spare. 296 */ 297 replace_with_spare(zhp, vdev); 298 zpool_close(zhp); 299 } 300 } 301 302 static const fmd_hdl_ops_t fmd_ops = { 303 zfs_retire_recv, /* fmdo_recv */ 304 NULL, /* fmdo_timeout */ 305 NULL, /* fmdo_close */ 306 NULL, /* fmdo_stats */ 307 NULL, /* fmdo_gc */ 308 }; 309 310 static const fmd_prop_t fmd_props[] = { 311 { "spare_on_remove", FMD_TYPE_BOOL, "true" }, 312 { NULL, 0, NULL } 313 }; 314 315 static const fmd_hdl_info_t fmd_info = { 316 "ZFS Retire Agent", "1.0", &fmd_ops, fmd_props 317 }; 318 319 void 320 _fmd_init(fmd_hdl_t *hdl) 321 { 322 libzfs_handle_t *zhdl; 323 324 if ((zhdl = libzfs_init()) == NULL) 325 return; 326 327 if (fmd_hdl_register(hdl, FMD_API_VERSION, &fmd_info) != 0) { 328 libzfs_fini(zhdl); 329 return; 330 } 331 332 fmd_hdl_setspecific(hdl, zhdl); 333 } 334 335 void 336 _fmd_fini(fmd_hdl_t *hdl) 337 { 338 libzfs_handle_t *zhdl = fmd_hdl_getspecific(hdl); 339 340 if (zhdl != NULL) 341 libzfs_fini(zhdl); 342 } 343