1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 /* 29 * This file contains the functions which analyze the status of a pool. This 30 * include both the status of an active pool, as well as the status exported 31 * pools. Returns one of the ZPOOL_STATUS_* defines describing the status of 32 * the pool. This status is independent (to a certain degree) from the state of 33 * the pool. A pool's state describes only whether or not it is capable of 34 * providing the necessary fault tolerance for data. The status describes the 35 * overall status of devices. A pool that is online can still have a device 36 * that is experiencing errors. 37 * 38 * Only a subset of the possible faults can be detected using 'zpool status', 39 * and not all possible errors correspond to a FMA message ID. The explanation 40 * is left up to the caller, depending on whether it is a live pool or an 41 * import. 42 */ 43 44 #include <libzfs.h> 45 #include <string.h> 46 #include <unistd.h> 47 #include "libzfs_impl.h" 48 49 /* 50 * Message ID table. This must be kept in sync with the ZPOOL_STATUS_* defines 51 * in libzfs.h. Note that there are some status results which go past the end 52 * of this table, and hence have no associated message ID. 53 */ 54 static char *zfs_msgid_table[] = { 55 "ZFS-8000-14", 56 "ZFS-8000-2Q", 57 "ZFS-8000-3C", 58 "ZFS-8000-4J", 59 "ZFS-8000-5E", 60 "ZFS-8000-6X", 61 "ZFS-8000-72", 62 "ZFS-8000-8A", 63 "ZFS-8000-9P", 64 "ZFS-8000-A5", 65 "ZFS-8000-EY" 66 }; 67 68 #define NMSGID (sizeof (zfs_msgid_table) / sizeof (zfs_msgid_table[0])) 69 70 /* ARGSUSED */ 71 static int 72 vdev_missing(uint64_t state, uint64_t aux, uint64_t errs) 73 { 74 return (state == VDEV_STATE_CANT_OPEN && 75 aux == VDEV_AUX_OPEN_FAILED); 76 } 77 78 /* ARGSUSED */ 79 static int 80 vdev_faulted(uint64_t state, uint64_t aux, uint64_t errs) 81 { 82 return (state == VDEV_STATE_FAULTED); 83 } 84 85 /* ARGSUSED */ 86 static int 87 vdev_errors(uint64_t state, uint64_t aux, uint64_t errs) 88 { 89 return (state == VDEV_STATE_DEGRADED || errs != 0); 90 } 91 92 /* ARGSUSED */ 93 static int 94 vdev_broken(uint64_t state, uint64_t aux, uint64_t errs) 95 { 96 return (state == VDEV_STATE_CANT_OPEN); 97 } 98 99 /* ARGSUSED */ 100 static int 101 vdev_offlined(uint64_t state, uint64_t aux, uint64_t errs) 102 { 103 return (state == VDEV_STATE_OFFLINE); 104 } 105 106 /* 107 * Detect if any leaf devices that have seen errors or could not be opened. 108 */ 109 static boolean_t 110 find_vdev_problem(nvlist_t *vdev, int (*func)(uint64_t, uint64_t, uint64_t)) 111 { 112 nvlist_t **child; 113 vdev_stat_t *vs; 114 uint_t c, children; 115 char *type; 116 117 /* 118 * Ignore problems within a 'replacing' vdev, since we're presumably in 119 * the process of repairing any such errors, and don't want to call them 120 * out again. We'll pick up the fact that a resilver is happening 121 * later. 122 */ 123 verify(nvlist_lookup_string(vdev, ZPOOL_CONFIG_TYPE, &type) == 0); 124 if (strcmp(type, VDEV_TYPE_REPLACING) == 0) 125 return (B_FALSE); 126 127 if (nvlist_lookup_nvlist_array(vdev, ZPOOL_CONFIG_CHILDREN, &child, 128 &children) == 0) { 129 for (c = 0; c < children; c++) 130 if (find_vdev_problem(child[c], func)) 131 return (B_TRUE); 132 } else { 133 verify(nvlist_lookup_uint64_array(vdev, ZPOOL_CONFIG_STATS, 134 (uint64_t **)&vs, &c) == 0); 135 136 if (func(vs->vs_state, vs->vs_aux, 137 vs->vs_read_errors + 138 vs->vs_write_errors + 139 vs->vs_checksum_errors)) 140 return (B_TRUE); 141 } 142 143 return (B_FALSE); 144 } 145 146 /* 147 * Active pool health status. 148 * 149 * To determine the status for a pool, we make several passes over the config, 150 * picking the most egregious error we find. In order of importance, we do the 151 * following: 152 * 153 * - Check for a complete and valid configuration 154 * - Look for any faulted or missing devices in a non-replicated config 155 * - Check for any data errors 156 * - Check for any faulted or missing devices in a replicated config 157 * - Look for any devices showing errors 158 * - Check for any resilvering devices 159 * 160 * There can obviously be multiple errors within a single pool, so this routine 161 * only picks the most damaging of all the current errors to report. 162 */ 163 static zpool_status_t 164 check_status(nvlist_t *config, boolean_t isimport) 165 { 166 nvlist_t *nvroot; 167 vdev_stat_t *vs; 168 uint_t vsc; 169 uint64_t nerr; 170 uint64_t version; 171 uint64_t stateval; 172 uint64_t hostid = 0; 173 174 verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION, 175 &version) == 0); 176 verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, 177 &nvroot) == 0); 178 verify(nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_STATS, 179 (uint64_t **)&vs, &vsc) == 0); 180 verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE, 181 &stateval) == 0); 182 (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_HOSTID, &hostid); 183 184 /* 185 * Pool last accessed by another system. 186 */ 187 if (hostid != 0 && (unsigned long)hostid != gethostid() && 188 stateval == POOL_STATE_ACTIVE) 189 return (ZPOOL_STATUS_HOSTID_MISMATCH); 190 191 /* 192 * Newer on-disk version. 193 */ 194 if (vs->vs_state == VDEV_STATE_CANT_OPEN && 195 vs->vs_aux == VDEV_AUX_VERSION_NEWER) 196 return (ZPOOL_STATUS_VERSION_NEWER); 197 198 /* 199 * Check that the config is complete. 200 */ 201 if (vs->vs_state == VDEV_STATE_CANT_OPEN && 202 vs->vs_aux == VDEV_AUX_BAD_GUID_SUM) 203 return (ZPOOL_STATUS_BAD_GUID_SUM); 204 205 /* 206 * Bad devices in non-replicated config. 207 */ 208 if (vs->vs_state == VDEV_STATE_CANT_OPEN && 209 find_vdev_problem(nvroot, vdev_faulted)) 210 return (ZPOOL_STATUS_FAULTED_DEV_NR); 211 212 if (vs->vs_state == VDEV_STATE_CANT_OPEN && 213 find_vdev_problem(nvroot, vdev_missing)) 214 return (ZPOOL_STATUS_MISSING_DEV_NR); 215 216 if (vs->vs_state == VDEV_STATE_CANT_OPEN && 217 find_vdev_problem(nvroot, vdev_broken)) 218 return (ZPOOL_STATUS_CORRUPT_LABEL_NR); 219 220 /* 221 * Corrupted pool metadata 222 */ 223 if (vs->vs_state == VDEV_STATE_CANT_OPEN && 224 vs->vs_aux == VDEV_AUX_CORRUPT_DATA) 225 return (ZPOOL_STATUS_CORRUPT_POOL); 226 227 /* 228 * Persistent data errors. 229 */ 230 if (!isimport) { 231 if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_ERRCOUNT, 232 &nerr) == 0 && nerr != 0) 233 return (ZPOOL_STATUS_CORRUPT_DATA); 234 } 235 236 /* 237 * Missing devices in a replicated config. 238 */ 239 if (find_vdev_problem(nvroot, vdev_faulted)) 240 return (ZPOOL_STATUS_FAULTED_DEV_R); 241 if (find_vdev_problem(nvroot, vdev_missing)) 242 return (ZPOOL_STATUS_MISSING_DEV_R); 243 if (find_vdev_problem(nvroot, vdev_broken)) 244 return (ZPOOL_STATUS_CORRUPT_LABEL_R); 245 246 /* 247 * Devices with errors 248 */ 249 if (!isimport && find_vdev_problem(nvroot, vdev_errors)) 250 return (ZPOOL_STATUS_FAILING_DEV); 251 252 /* 253 * Offlined devices 254 */ 255 if (find_vdev_problem(nvroot, vdev_offlined)) 256 return (ZPOOL_STATUS_OFFLINE_DEV); 257 258 /* 259 * Currently resilvering 260 */ 261 if (!vs->vs_scrub_complete && vs->vs_scrub_type == POOL_SCRUB_RESILVER) 262 return (ZPOOL_STATUS_RESILVERING); 263 264 /* 265 * Outdated, but usable, version 266 */ 267 if (version < SPA_VERSION) 268 return (ZPOOL_STATUS_VERSION_OLDER); 269 270 return (ZPOOL_STATUS_OK); 271 } 272 273 zpool_status_t 274 zpool_get_status(zpool_handle_t *zhp, char **msgid) 275 { 276 zpool_status_t ret = check_status(zhp->zpool_config, B_FALSE); 277 278 if (ret >= NMSGID) 279 *msgid = NULL; 280 else 281 *msgid = zfs_msgid_table[ret]; 282 283 return (ret); 284 } 285 286 zpool_status_t 287 zpool_import_status(nvlist_t *config, char **msgid) 288 { 289 zpool_status_t ret = check_status(config, B_TRUE); 290 291 if (ret >= NMSGID) 292 *msgid = NULL; 293 else 294 *msgid = zfs_msgid_table[ret]; 295 296 return (ret); 297 } 298