1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 /* 29 * The ZFS retire agent is responsible for managing hot spares across all pools. 30 * When we see a device fault or a device removal, we try to open the associated 31 * pool and look for any hot spares. We iterate over any available hot spares 32 * and attempt a 'zpool replace' for each one. 33 * 34 * For vdevs diagnosed as faulty, the agent is also responsible for proactively 35 * marking the vdev FAULTY (for I/O errors) or DEGRADED (for checksum errors). 36 */ 37 38 #include <fm/fmd_api.h> 39 #include <sys/fs/zfs.h> 40 #include <sys/fm/protocol.h> 41 #include <sys/fm/fs/zfs.h> 42 #include <libzfs.h> 43 #include <string.h> 44 45 /* 46 * Find a pool with a matching GUID. 47 */ 48 typedef struct find_cbdata { 49 uint64_t cb_guid; 50 zpool_handle_t *cb_zhp; 51 } find_cbdata_t; 52 53 static int 54 find_pool(zpool_handle_t *zhp, void *data) 55 { 56 find_cbdata_t *cbp = data; 57 58 if (cbp->cb_guid == 59 zpool_get_prop_int(zhp, ZPOOL_PROP_GUID, NULL)) { 60 cbp->cb_zhp = zhp; 61 return (1); 62 } 63 64 zpool_close(zhp); 65 return (0); 66 } 67 68 /* 69 * Find a vdev within a tree with a matching GUID. 70 */ 71 static nvlist_t * 72 find_vdev(nvlist_t *nv, uint64_t search) 73 { 74 uint64_t guid; 75 nvlist_t **child; 76 uint_t c, children; 77 nvlist_t *ret; 78 79 if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &guid) == 0 && 80 guid == search) 81 return (nv); 82 83 if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, 84 &child, &children) != 0) 85 return (NULL); 86 87 for (c = 0; c < children; c++) { 88 if ((ret = find_vdev(child[c], search)) != NULL) 89 return (ret); 90 } 91 92 return (NULL); 93 } 94 95 /* 96 * Given a (pool, vdev) GUID pair, find the matching pool and vdev. 97 */ 98 static zpool_handle_t * 99 find_by_guid(libzfs_handle_t *zhdl, uint64_t pool_guid, uint64_t vdev_guid, 100 nvlist_t **vdevp) 101 { 102 find_cbdata_t cb; 103 zpool_handle_t *zhp; 104 nvlist_t *config, *nvroot; 105 106 /* 107 * Find the corresponding pool and make sure the vdev still exists. 108 */ 109 cb.cb_guid = pool_guid; 110 if (zpool_iter(zhdl, find_pool, &cb) != 1) 111 return (NULL); 112 113 zhp = cb.cb_zhp; 114 config = zpool_get_config(zhp, NULL); 115 if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, 116 &nvroot) != 0) { 117 zpool_close(zhp); 118 return (NULL); 119 } 120 121 if ((*vdevp = find_vdev(nvroot, vdev_guid)) == NULL) { 122 zpool_close(zhp); 123 return (NULL); 124 } 125 126 return (zhp); 127 } 128 129 /* 130 * Given a vdev, attempt to replace it with every known spare until one 131 * succeeds. 132 */ 133 static void 134 replace_with_spare(zpool_handle_t *zhp, nvlist_t *vdev) 135 { 136 nvlist_t *config, *nvroot, *replacement; 137 nvlist_t **spares; 138 uint_t s, nspares; 139 char *dev_name; 140 141 config = zpool_get_config(zhp, NULL); 142 if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, 143 &nvroot) != 0) 144 return; 145 146 /* 147 * Find out if there are any hot spares available in the pool. 148 */ 149 if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, 150 &spares, &nspares) != 0) 151 return; 152 153 if (nvlist_alloc(&replacement, NV_UNIQUE_NAME, 0) != 0) 154 return; 155 156 if (nvlist_add_string(replacement, ZPOOL_CONFIG_TYPE, 157 VDEV_TYPE_ROOT) != 0) { 158 nvlist_free(replacement); 159 return; 160 } 161 162 dev_name = zpool_vdev_name(NULL, zhp, vdev); 163 164 /* 165 * Try to replace each spare, ending when we successfully 166 * replace it. 167 */ 168 for (s = 0; s < nspares; s++) { 169 char *spare_name; 170 171 if (nvlist_lookup_string(spares[s], ZPOOL_CONFIG_PATH, 172 &spare_name) != 0) 173 continue; 174 175 if (nvlist_add_nvlist_array(replacement, 176 ZPOOL_CONFIG_CHILDREN, &spares[s], 1) != 0) 177 continue; 178 179 if (zpool_vdev_attach(zhp, dev_name, spare_name, 180 replacement, B_TRUE) == 0) 181 break; 182 } 183 184 free(dev_name); 185 nvlist_free(replacement); 186 } 187 188 /*ARGSUSED*/ 189 static void 190 zfs_retire_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, 191 const char *class) 192 { 193 uint64_t pool_guid, vdev_guid; 194 zpool_handle_t *zhp; 195 nvlist_t *resource, *fault; 196 nvlist_t **faults; 197 uint_t f, nfaults; 198 libzfs_handle_t *zhdl = fmd_hdl_getspecific(hdl); 199 boolean_t fault_device, degrade_device; 200 boolean_t is_repair; 201 char *scheme; 202 nvlist_t *vdev; 203 204 /* 205 * If this is a resource notifying us of device removal, then simply 206 * check for an available spare and continue. 207 */ 208 if (strcmp(class, "resource.fs.zfs.removed") == 0) { 209 if (nvlist_lookup_uint64(nvl, FM_EREPORT_PAYLOAD_ZFS_POOL_GUID, 210 &pool_guid) != 0 || 211 nvlist_lookup_uint64(nvl, FM_EREPORT_PAYLOAD_ZFS_VDEV_GUID, 212 &vdev_guid) != 0) 213 return; 214 215 if ((zhp = find_by_guid(zhdl, pool_guid, vdev_guid, 216 &vdev)) == NULL) 217 return; 218 219 if (fmd_prop_get_int32(hdl, "spare_on_remove")) 220 replace_with_spare(zhp, vdev); 221 zpool_close(zhp); 222 return; 223 } 224 225 if (strcmp(class, "list.repaired") == 0) 226 is_repair = B_TRUE; 227 else 228 is_repair = B_FALSE; 229 230 /* 231 * We subscribe to zfs faults as well as all repair events. 232 */ 233 if (nvlist_lookup_nvlist_array(nvl, FM_SUSPECT_FAULT_LIST, 234 &faults, &nfaults) != 0) 235 return; 236 237 for (f = 0; f < nfaults; f++) { 238 fault = faults[f]; 239 240 fault_device = B_FALSE; 241 degrade_device = B_FALSE; 242 243 /* 244 * While we subscribe to fault.fs.zfs.*, we only take action 245 * for faults targeting a specific vdev (open failure or SERD 246 * failure). 247 */ 248 if (fmd_nvl_class_match(hdl, fault, "fault.fs.zfs.vdev.io")) 249 fault_device = B_TRUE; 250 else if (fmd_nvl_class_match(hdl, fault, 251 "fault.fs.zfs.vdev.checksum")) 252 degrade_device = B_TRUE; 253 else if (fmd_nvl_class_match(hdl, fault, "fault.fs.zfs.device")) 254 fault_device = B_FALSE; 255 else 256 continue; 257 258 if (nvlist_lookup_nvlist(fault, FM_FAULT_RESOURCE, 259 &resource) != 0 || 260 nvlist_lookup_string(resource, FM_FMRI_SCHEME, 261 &scheme) != 0) 262 continue; 263 264 if (strcmp(scheme, FM_FMRI_SCHEME_ZFS) != 0) 265 continue; 266 267 if (nvlist_lookup_uint64(resource, FM_FMRI_ZFS_POOL, 268 &pool_guid) != 0 || 269 nvlist_lookup_uint64(resource, FM_FMRI_ZFS_VDEV, 270 &vdev_guid) != 0) 271 continue; 272 273 if ((zhp = find_by_guid(zhdl, pool_guid, vdev_guid, 274 &vdev)) == NULL) 275 continue; 276 277 /* 278 * If this is a repair event, then mark the vdev as repaired and 279 * continue. 280 */ 281 if (is_repair) { 282 (void) zpool_vdev_clear(zhp, vdev_guid); 283 zpool_close(zhp); 284 continue; 285 } 286 287 /* 288 * Actively fault the device if needed. 289 */ 290 if (fault_device) 291 (void) zpool_vdev_fault(zhp, vdev_guid); 292 if (degrade_device) 293 (void) zpool_vdev_degrade(zhp, vdev_guid); 294 295 /* 296 * Attempt to substitute a hot spare. 297 */ 298 replace_with_spare(zhp, vdev); 299 zpool_close(zhp); 300 } 301 } 302 303 static const fmd_hdl_ops_t fmd_ops = { 304 zfs_retire_recv, /* fmdo_recv */ 305 NULL, /* fmdo_timeout */ 306 NULL, /* fmdo_close */ 307 NULL, /* fmdo_stats */ 308 NULL, /* fmdo_gc */ 309 }; 310 311 static const fmd_prop_t fmd_props[] = { 312 { "spare_on_remove", FMD_TYPE_BOOL, "true" }, 313 { NULL, 0, NULL } 314 }; 315 316 static const fmd_hdl_info_t fmd_info = { 317 "ZFS Retire Agent", "1.0", &fmd_ops, fmd_props 318 }; 319 320 void 321 _fmd_init(fmd_hdl_t *hdl) 322 { 323 libzfs_handle_t *zhdl; 324 325 if ((zhdl = libzfs_init()) == NULL) 326 return; 327 328 if (fmd_hdl_register(hdl, FMD_API_VERSION, &fmd_info) != 0) { 329 libzfs_fini(zhdl); 330 return; 331 } 332 333 fmd_hdl_setspecific(hdl, zhdl); 334 } 335 336 void 337 _fmd_fini(fmd_hdl_t *hdl) 338 { 339 libzfs_handle_t *zhdl = fmd_hdl_getspecific(hdl); 340 341 if (zhdl != NULL) 342 libzfs_fini(zhdl); 343 } 344