1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * This file contains the functions which analyze the status of a pool. This 28 * include both the status of an active pool, as well as the status exported 29 * pools. Returns one of the ZPOOL_STATUS_* defines describing the status of 30 * the pool. This status is independent (to a certain degree) from the state of 31 * the pool. A pool's state describes only whether or not it is capable of 32 * providing the necessary fault tolerance for data. The status describes the 33 * overall status of devices. A pool that is online can still have a device 34 * that is experiencing errors. 35 * 36 * Only a subset of the possible faults can be detected using 'zpool status', 37 * and not all possible errors correspond to a FMA message ID. The explanation 38 * is left up to the caller, depending on whether it is a live pool or an 39 * import. 40 */ 41 42 #include <libzfs.h> 43 #include <string.h> 44 #include <unistd.h> 45 #include "libzfs_impl.h" 46 47 /* 48 * Message ID table. This must be kept in sync with the ZPOOL_STATUS_* defines 49 * in libzfs.h. Note that there are some status results which go past the end 50 * of this table, and hence have no associated message ID. 51 */ 52 static char *zfs_msgid_table[] = { 53 "ZFS-8000-14", 54 "ZFS-8000-2Q", 55 "ZFS-8000-3C", 56 "ZFS-8000-4J", 57 "ZFS-8000-5E", 58 "ZFS-8000-6X", 59 "ZFS-8000-72", 60 "ZFS-8000-8A", 61 "ZFS-8000-9P", 62 "ZFS-8000-A5", 63 "ZFS-8000-EY", 64 "ZFS-8000-HC", 65 "ZFS-8000-JQ", 66 "ZFS-8000-K4", 67 }; 68 69 #define NMSGID (sizeof (zfs_msgid_table) / sizeof (zfs_msgid_table[0])) 70 71 /* ARGSUSED */ 72 static int 73 vdev_missing(uint64_t state, uint64_t aux, uint64_t errs) 74 { 75 return (state == VDEV_STATE_CANT_OPEN && 76 aux == VDEV_AUX_OPEN_FAILED); 77 } 78 79 /* ARGSUSED */ 80 static int 81 vdev_faulted(uint64_t state, uint64_t aux, uint64_t errs) 82 { 83 return (state == VDEV_STATE_FAULTED); 84 } 85 86 /* ARGSUSED */ 87 static int 88 vdev_errors(uint64_t state, uint64_t aux, uint64_t errs) 89 { 90 return (state == VDEV_STATE_DEGRADED || errs != 0); 91 } 92 93 /* ARGSUSED */ 94 static int 95 vdev_broken(uint64_t state, uint64_t aux, uint64_t errs) 96 { 97 return (state == VDEV_STATE_CANT_OPEN); 98 } 99 100 /* ARGSUSED */ 101 static int 102 vdev_offlined(uint64_t state, uint64_t aux, uint64_t errs) 103 { 104 return (state == VDEV_STATE_OFFLINE); 105 } 106 107 /* 108 * Detect if any leaf devices that have seen errors or could not be opened. 109 */ 110 static boolean_t 111 find_vdev_problem(nvlist_t *vdev, int (*func)(uint64_t, uint64_t, uint64_t)) 112 { 113 nvlist_t **child; 114 vdev_stat_t *vs; 115 uint_t c, children; 116 char *type; 117 118 /* 119 * Ignore problems within a 'replacing' vdev, since we're presumably in 120 * the process of repairing any such errors, and don't want to call them 121 * out again. We'll pick up the fact that a resilver is happening 122 * later. 123 */ 124 verify(nvlist_lookup_string(vdev, ZPOOL_CONFIG_TYPE, &type) == 0); 125 if (strcmp(type, VDEV_TYPE_REPLACING) == 0) 126 return (B_FALSE); 127 128 if (nvlist_lookup_nvlist_array(vdev, ZPOOL_CONFIG_CHILDREN, &child, 129 &children) == 0) { 130 for (c = 0; c < children; c++) 131 if (find_vdev_problem(child[c], func)) 132 return (B_TRUE); 133 } else { 134 verify(nvlist_lookup_uint64_array(vdev, ZPOOL_CONFIG_STATS, 135 (uint64_t **)&vs, &c) == 0); 136 137 if (func(vs->vs_state, vs->vs_aux, 138 vs->vs_read_errors + 139 vs->vs_write_errors + 140 vs->vs_checksum_errors)) 141 return (B_TRUE); 142 } 143 144 return (B_FALSE); 145 } 146 147 /* 148 * Active pool health status. 149 * 150 * To determine the status for a pool, we make several passes over the config, 151 * picking the most egregious error we find. In order of importance, we do the 152 * following: 153 * 154 * - Check for a complete and valid configuration 155 * - Look for any faulted or missing devices in a non-replicated config 156 * - Check for any data errors 157 * - Check for any faulted or missing devices in a replicated config 158 * - Look for any devices showing errors 159 * - Check for any resilvering devices 160 * 161 * There can obviously be multiple errors within a single pool, so this routine 162 * only picks the most damaging of all the current errors to report. 163 */ 164 static zpool_status_t 165 check_status(nvlist_t *config, boolean_t isimport) 166 { 167 nvlist_t *nvroot; 168 vdev_stat_t *vs; 169 uint_t vsc; 170 uint64_t nerr; 171 uint64_t version; 172 uint64_t stateval; 173 uint64_t suspended; 174 uint64_t hostid = 0; 175 176 verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION, 177 &version) == 0); 178 verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, 179 &nvroot) == 0); 180 verify(nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_STATS, 181 (uint64_t **)&vs, &vsc) == 0); 182 verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE, 183 &stateval) == 0); 184 (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_HOSTID, &hostid); 185 186 /* 187 * Pool last accessed by another system. 188 */ 189 if (hostid != 0 && (unsigned long)hostid != gethostid() && 190 stateval == POOL_STATE_ACTIVE) 191 return (ZPOOL_STATUS_HOSTID_MISMATCH); 192 193 /* 194 * Newer on-disk version. 195 */ 196 if (vs->vs_state == VDEV_STATE_CANT_OPEN && 197 vs->vs_aux == VDEV_AUX_VERSION_NEWER) 198 return (ZPOOL_STATUS_VERSION_NEWER); 199 200 /* 201 * Check that the config is complete. 202 */ 203 if (vs->vs_state == VDEV_STATE_CANT_OPEN && 204 vs->vs_aux == VDEV_AUX_BAD_GUID_SUM) 205 return (ZPOOL_STATUS_BAD_GUID_SUM); 206 207 /* 208 * Check whether the pool has suspended due to failed I/O. 209 */ 210 if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_SUSPENDED, 211 &suspended) == 0) { 212 if (suspended == ZIO_FAILURE_MODE_CONTINUE) 213 return (ZPOOL_STATUS_IO_FAILURE_CONTINUE); 214 return (ZPOOL_STATUS_IO_FAILURE_WAIT); 215 } 216 217 /* 218 * Could not read a log. 219 */ 220 if (vs->vs_state == VDEV_STATE_CANT_OPEN && 221 vs->vs_aux == VDEV_AUX_BAD_LOG) { 222 return (ZPOOL_STATUS_BAD_LOG); 223 } 224 225 /* 226 * Bad devices in non-replicated config. 227 */ 228 if (vs->vs_state == VDEV_STATE_CANT_OPEN && 229 find_vdev_problem(nvroot, vdev_faulted)) 230 return (ZPOOL_STATUS_FAULTED_DEV_NR); 231 232 if (vs->vs_state == VDEV_STATE_CANT_OPEN && 233 find_vdev_problem(nvroot, vdev_missing)) 234 return (ZPOOL_STATUS_MISSING_DEV_NR); 235 236 if (vs->vs_state == VDEV_STATE_CANT_OPEN && 237 find_vdev_problem(nvroot, vdev_broken)) 238 return (ZPOOL_STATUS_CORRUPT_LABEL_NR); 239 240 /* 241 * Corrupted pool metadata 242 */ 243 if (vs->vs_state == VDEV_STATE_CANT_OPEN && 244 vs->vs_aux == VDEV_AUX_CORRUPT_DATA) 245 return (ZPOOL_STATUS_CORRUPT_POOL); 246 247 /* 248 * Persistent data errors. 249 */ 250 if (!isimport) { 251 if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_ERRCOUNT, 252 &nerr) == 0 && nerr != 0) 253 return (ZPOOL_STATUS_CORRUPT_DATA); 254 } 255 256 /* 257 * Missing devices in a replicated config. 258 */ 259 if (find_vdev_problem(nvroot, vdev_faulted)) 260 return (ZPOOL_STATUS_FAULTED_DEV_R); 261 if (find_vdev_problem(nvroot, vdev_missing)) 262 return (ZPOOL_STATUS_MISSING_DEV_R); 263 if (find_vdev_problem(nvroot, vdev_broken)) 264 return (ZPOOL_STATUS_CORRUPT_LABEL_R); 265 266 /* 267 * Devices with errors 268 */ 269 if (!isimport && find_vdev_problem(nvroot, vdev_errors)) 270 return (ZPOOL_STATUS_FAILING_DEV); 271 272 /* 273 * Offlined devices 274 */ 275 if (find_vdev_problem(nvroot, vdev_offlined)) 276 return (ZPOOL_STATUS_OFFLINE_DEV); 277 278 /* 279 * Currently resilvering 280 */ 281 if (!vs->vs_scrub_complete && vs->vs_scrub_type == POOL_SCRUB_RESILVER) 282 return (ZPOOL_STATUS_RESILVERING); 283 284 /* 285 * Outdated, but usable, version 286 */ 287 if (version < SPA_VERSION) 288 return (ZPOOL_STATUS_VERSION_OLDER); 289 290 return (ZPOOL_STATUS_OK); 291 } 292 293 zpool_status_t 294 zpool_get_status(zpool_handle_t *zhp, char **msgid) 295 { 296 zpool_status_t ret = check_status(zhp->zpool_config, B_FALSE); 297 298 if (ret >= NMSGID) 299 *msgid = NULL; 300 else 301 *msgid = zfs_msgid_table[ret]; 302 303 return (ret); 304 } 305 306 zpool_status_t 307 zpool_import_status(nvlist_t *config, char **msgid) 308 { 309 zpool_status_t ret = check_status(config, B_TRUE); 310 311 if (ret >= NMSGID) 312 *msgid = NULL; 313 else 314 *msgid = zfs_msgid_table[ret]; 315 316 return (ret); 317 } 318