1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 /* 29 * This file contains the functions which analyze the status of a pool. This 30 * include both the status of an active pool, as well as the status exported 31 * pools. Returns one of the ZPOOL_STATUS_* defines describing the status of 32 * the pool. This status is independent (to a certain degree) from the state of 33 * the pool. A pool's state describes only whether or not it is capable of 34 * providing the necessary fault tolerance for data. The status describes the 35 * overall status of devices. A pool that is online can still have a device 36 * that is experiencing errors. 37 * 38 * Only a subset of the possible faults can be detected using 'zpool status', 39 * and not all possible errors correspond to a FMA message ID. The explanation 40 * is left up to the caller, depending on whether it is a live pool or an 41 * import. 42 */ 43 44 #include <libzfs.h> 45 #include <string.h> 46 #include <unistd.h> 47 #include "libzfs_impl.h" 48 49 /* 50 * Message ID table. This must be kept in sync with the ZPOOL_STATUS_* defines 51 * in libzfs.h. Note that there are some status results which go past the end 52 * of this table, and hence have no associated message ID. 53 */ 54 static char *zfs_msgid_table[] = { 55 "ZFS-8000-14", 56 "ZFS-8000-2Q", 57 "ZFS-8000-3C", 58 "ZFS-8000-4J", 59 "ZFS-8000-5E", 60 "ZFS-8000-6X", 61 "ZFS-8000-72", 62 "ZFS-8000-8A", 63 "ZFS-8000-9P", 64 "ZFS-8000-A5", 65 "ZFS-8000-EY", 66 "ZFS-8000-HC", 67 "ZFS-8000-JQ", 68 "ZFS-8000-K4", 69 }; 70 71 #define NMSGID (sizeof (zfs_msgid_table) / sizeof (zfs_msgid_table[0])) 72 73 /* ARGSUSED */ 74 static int 75 vdev_missing(uint64_t state, uint64_t aux, uint64_t errs) 76 { 77 return (state == VDEV_STATE_CANT_OPEN && 78 aux == VDEV_AUX_OPEN_FAILED); 79 } 80 81 /* ARGSUSED */ 82 static int 83 vdev_faulted(uint64_t state, uint64_t aux, uint64_t errs) 84 { 85 return (state == VDEV_STATE_FAULTED); 86 } 87 88 /* ARGSUSED */ 89 static int 90 vdev_errors(uint64_t state, uint64_t aux, uint64_t errs) 91 { 92 return (state == VDEV_STATE_DEGRADED || errs != 0); 93 } 94 95 /* ARGSUSED */ 96 static int 97 vdev_broken(uint64_t state, uint64_t aux, uint64_t errs) 98 { 99 return (state == VDEV_STATE_CANT_OPEN); 100 } 101 102 /* ARGSUSED */ 103 static int 104 vdev_offlined(uint64_t state, uint64_t aux, uint64_t errs) 105 { 106 return (state == VDEV_STATE_OFFLINE); 107 } 108 109 /* 110 * Detect if any leaf devices that have seen errors or could not be opened. 111 */ 112 static boolean_t 113 find_vdev_problem(nvlist_t *vdev, int (*func)(uint64_t, uint64_t, uint64_t)) 114 { 115 nvlist_t **child; 116 vdev_stat_t *vs; 117 uint_t c, children; 118 char *type; 119 120 /* 121 * Ignore problems within a 'replacing' vdev, since we're presumably in 122 * the process of repairing any such errors, and don't want to call them 123 * out again. We'll pick up the fact that a resilver is happening 124 * later. 125 */ 126 verify(nvlist_lookup_string(vdev, ZPOOL_CONFIG_TYPE, &type) == 0); 127 if (strcmp(type, VDEV_TYPE_REPLACING) == 0) 128 return (B_FALSE); 129 130 if (nvlist_lookup_nvlist_array(vdev, ZPOOL_CONFIG_CHILDREN, &child, 131 &children) == 0) { 132 for (c = 0; c < children; c++) 133 if (find_vdev_problem(child[c], func)) 134 return (B_TRUE); 135 } else { 136 verify(nvlist_lookup_uint64_array(vdev, ZPOOL_CONFIG_STATS, 137 (uint64_t **)&vs, &c) == 0); 138 139 if (func(vs->vs_state, vs->vs_aux, 140 vs->vs_read_errors + 141 vs->vs_write_errors + 142 vs->vs_checksum_errors)) 143 return (B_TRUE); 144 } 145 146 return (B_FALSE); 147 } 148 149 /* 150 * Active pool health status. 151 * 152 * To determine the status for a pool, we make several passes over the config, 153 * picking the most egregious error we find. In order of importance, we do the 154 * following: 155 * 156 * - Check for a complete and valid configuration 157 * - Look for any faulted or missing devices in a non-replicated config 158 * - Check for any data errors 159 * - Check for any faulted or missing devices in a replicated config 160 * - Look for any devices showing errors 161 * - Check for any resilvering devices 162 * 163 * There can obviously be multiple errors within a single pool, so this routine 164 * only picks the most damaging of all the current errors to report. 165 */ 166 static zpool_status_t 167 check_status(zpool_handle_t *zhp, nvlist_t *config, boolean_t isimport) 168 { 169 nvlist_t *nvroot; 170 vdev_stat_t *vs; 171 uint_t vsc; 172 uint64_t nerr; 173 uint64_t version; 174 uint64_t stateval; 175 uint64_t hostid = 0; 176 177 verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION, 178 &version) == 0); 179 verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, 180 &nvroot) == 0); 181 verify(nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_STATS, 182 (uint64_t **)&vs, &vsc) == 0); 183 verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE, 184 &stateval) == 0); 185 (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_HOSTID, &hostid); 186 187 /* 188 * Pool last accessed by another system. 189 */ 190 if (hostid != 0 && (unsigned long)hostid != gethostid() && 191 stateval == POOL_STATE_ACTIVE) 192 return (ZPOOL_STATUS_HOSTID_MISMATCH); 193 194 /* 195 * Newer on-disk version. 196 */ 197 if (vs->vs_state == VDEV_STATE_CANT_OPEN && 198 vs->vs_aux == VDEV_AUX_VERSION_NEWER) 199 return (ZPOOL_STATUS_VERSION_NEWER); 200 201 /* 202 * Check that the config is complete. 203 */ 204 if (vs->vs_state == VDEV_STATE_CANT_OPEN && 205 vs->vs_aux == VDEV_AUX_BAD_GUID_SUM) 206 return (ZPOOL_STATUS_BAD_GUID_SUM); 207 208 /* 209 * Pool has experienced failed I/O. 210 */ 211 if (stateval == POOL_STATE_IO_FAILURE) { 212 zpool_handle_t *tmp_zhp = NULL; 213 libzfs_handle_t *hdl = NULL; 214 char property[ZPOOL_MAXPROPLEN]; 215 char *failmode = NULL; 216 217 if (zhp == NULL) { 218 char *poolname; 219 220 verify(nvlist_lookup_string(config, 221 ZPOOL_CONFIG_POOL_NAME, &poolname) == 0); 222 if ((hdl = libzfs_init()) == NULL) 223 return (ZPOOL_STATUS_IO_FAILURE_WAIT); 224 tmp_zhp = zpool_open_canfail(hdl, poolname); 225 if (tmp_zhp == NULL) { 226 libzfs_fini(hdl); 227 return (ZPOOL_STATUS_IO_FAILURE_WAIT); 228 } 229 } 230 if (zpool_get_prop(zhp ? zhp : tmp_zhp, ZPOOL_PROP_FAILUREMODE, 231 property, sizeof (property), NULL) == 0) 232 failmode = property; 233 if (tmp_zhp != NULL) 234 zpool_close(tmp_zhp); 235 if (hdl != NULL) 236 libzfs_fini(hdl); 237 if (failmode == NULL) 238 return (ZPOOL_STATUS_IO_FAILURE_WAIT); 239 240 if (strncmp(failmode, "continue", strlen("continue")) == 0) 241 return (ZPOOL_STATUS_IO_FAILURE_CONTINUE); 242 else 243 return (ZPOOL_STATUS_IO_FAILURE_WAIT); 244 } 245 246 /* 247 * Could not read a log. 248 */ 249 if (vs->vs_state == VDEV_STATE_CANT_OPEN && 250 vs->vs_aux == VDEV_AUX_BAD_LOG) { 251 return (ZPOOL_STATUS_BAD_LOG); 252 } 253 254 /* 255 * Bad devices in non-replicated config. 256 */ 257 if (vs->vs_state == VDEV_STATE_CANT_OPEN && 258 find_vdev_problem(nvroot, vdev_faulted)) 259 return (ZPOOL_STATUS_FAULTED_DEV_NR); 260 261 if (vs->vs_state == VDEV_STATE_CANT_OPEN && 262 find_vdev_problem(nvroot, vdev_missing)) 263 return (ZPOOL_STATUS_MISSING_DEV_NR); 264 265 if (vs->vs_state == VDEV_STATE_CANT_OPEN && 266 find_vdev_problem(nvroot, vdev_broken)) 267 return (ZPOOL_STATUS_CORRUPT_LABEL_NR); 268 269 /* 270 * Corrupted pool metadata 271 */ 272 if (vs->vs_state == VDEV_STATE_CANT_OPEN && 273 vs->vs_aux == VDEV_AUX_CORRUPT_DATA) 274 return (ZPOOL_STATUS_CORRUPT_POOL); 275 276 /* 277 * Persistent data errors. 278 */ 279 if (!isimport) { 280 if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_ERRCOUNT, 281 &nerr) == 0 && nerr != 0) 282 return (ZPOOL_STATUS_CORRUPT_DATA); 283 } 284 285 /* 286 * Missing devices in a replicated config. 287 */ 288 if (find_vdev_problem(nvroot, vdev_faulted)) 289 return (ZPOOL_STATUS_FAULTED_DEV_R); 290 if (find_vdev_problem(nvroot, vdev_missing)) 291 return (ZPOOL_STATUS_MISSING_DEV_R); 292 if (find_vdev_problem(nvroot, vdev_broken)) 293 return (ZPOOL_STATUS_CORRUPT_LABEL_R); 294 295 /* 296 * Devices with errors 297 */ 298 if (!isimport && find_vdev_problem(nvroot, vdev_errors)) 299 return (ZPOOL_STATUS_FAILING_DEV); 300 301 /* 302 * Offlined devices 303 */ 304 if (find_vdev_problem(nvroot, vdev_offlined)) 305 return (ZPOOL_STATUS_OFFLINE_DEV); 306 307 /* 308 * Currently resilvering 309 */ 310 if (!vs->vs_scrub_complete && vs->vs_scrub_type == POOL_SCRUB_RESILVER) 311 return (ZPOOL_STATUS_RESILVERING); 312 313 /* 314 * Outdated, but usable, version 315 */ 316 if (version < SPA_VERSION) 317 return (ZPOOL_STATUS_VERSION_OLDER); 318 319 return (ZPOOL_STATUS_OK); 320 } 321 322 zpool_status_t 323 zpool_get_status(zpool_handle_t *zhp, char **msgid) 324 { 325 zpool_status_t ret = check_status(zhp, zhp->zpool_config, B_FALSE); 326 327 if (ret >= NMSGID) 328 *msgid = NULL; 329 else 330 *msgid = zfs_msgid_table[ret]; 331 332 return (ret); 333 } 334 335 zpool_status_t 336 zpool_import_status(nvlist_t *config, char **msgid) 337 { 338 zpool_status_t ret = check_status(NULL, config, B_TRUE); 339 340 if (ret >= NMSGID) 341 *msgid = NULL; 342 else 343 *msgid = zfs_msgid_table[ret]; 344 345 return (ret); 346 } 347