1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 /* 29 * This file contains the functions which analyze the status of a pool. This 30 * include both the status of an active pool, as well as the status exported 31 * pools. Returns one of the ZPOOL_STATUS_* defines describing the status of 32 * the pool. This status is independent (to a certain degree) from the state of 33 * the pool. A pool's state descsribes only whether or not it is capable of 34 * providing the necessary fault tolerance for data. The status describes the 35 * overall status of devices. A pool that is online can still have a device 36 * that is experiencing errors. 37 * 38 * Only a subset of the possible faults can be detected using 'zpool status', 39 * and not all possible errors correspond to a FMA message ID. The explanation 40 * is left up to the caller, depending on whether it is a live pool or an 41 * import. 42 */ 43 44 #include <libzfs.h> 45 #include <string.h> 46 #include <unistd.h> 47 #include "libzfs_impl.h" 48 49 /* 50 * Message ID table. This must be kep in sync with the ZPOOL_STATUS_* defines 51 * in libzfs.h. Note that there are some status results which go past the end 52 * of this table, and hence have no associated message ID. 53 */ 54 static char *zfs_msgid_table[] = { 55 "ZFS-8000-14", 56 "ZFS-8000-2Q", 57 "ZFS-8000-3C", 58 "ZFS-8000-4J", 59 "ZFS-8000-5E", 60 "ZFS-8000-6X", 61 "ZFS-8000-72", 62 "ZFS-8000-8A", 63 "ZFS-8000-9P", 64 "ZFS-8000-A5", 65 "ZFS-8000-EY" 66 }; 67 68 /* 69 * If the pool is active, a certain class of static errors is overridden by the 70 * faults as analayzed by FMA. These faults have separate knowledge articles, 71 * and the article referred to by 'zpool status' must match that indicated by 72 * the syslog error message. We override missing data as well as corrupt pool. 73 */ 74 static char *zfs_msgid_table_active[] = { 75 "ZFS-8000-14", 76 "ZFS-8000-D3", /* overridden */ 77 "ZFS-8000-D3", /* overridden */ 78 "ZFS-8000-4J", 79 "ZFS-8000-5E", 80 "ZFS-8000-6X", 81 "ZFS-8000-CS", /* overridden */ 82 "ZFS-8000-8A", 83 "ZFS-8000-9P", 84 "ZFS-8000-CS", /* overridden */ 85 }; 86 87 #define NMSGID (sizeof (zfs_msgid_table) / sizeof (zfs_msgid_table[0])) 88 89 /* ARGSUSED */ 90 static int 91 vdev_missing(uint64_t state, uint64_t aux, uint64_t errs) 92 { 93 return (state == VDEV_STATE_CANT_OPEN && 94 aux == VDEV_AUX_OPEN_FAILED); 95 } 96 97 /* ARGSUSED */ 98 static int 99 vdev_errors(uint64_t state, uint64_t aux, uint64_t errs) 100 { 101 return (errs != 0); 102 } 103 104 /* ARGSUSED */ 105 static int 106 vdev_broken(uint64_t state, uint64_t aux, uint64_t errs) 107 { 108 return (state == VDEV_STATE_CANT_OPEN); 109 } 110 111 /* ARGSUSED */ 112 static int 113 vdev_offlined(uint64_t state, uint64_t aux, uint64_t errs) 114 { 115 return (state == VDEV_STATE_OFFLINE); 116 } 117 118 /* 119 * Detect if any leaf devices that have seen errors or could not be opened. 120 */ 121 static boolean_t 122 find_vdev_problem(nvlist_t *vdev, int (*func)(uint64_t, uint64_t, uint64_t)) 123 { 124 nvlist_t **child; 125 vdev_stat_t *vs; 126 uint_t c, children; 127 char *type; 128 129 /* 130 * Ignore problems within a 'replacing' vdev, since we're presumably in 131 * the process of repairing any such errors, and don't want to call them 132 * out again. We'll pick up the fact that a resilver is happening 133 * later. 134 */ 135 verify(nvlist_lookup_string(vdev, ZPOOL_CONFIG_TYPE, &type) == 0); 136 if (strcmp(type, VDEV_TYPE_REPLACING) == 0) 137 return (B_FALSE); 138 139 if (nvlist_lookup_nvlist_array(vdev, ZPOOL_CONFIG_CHILDREN, &child, 140 &children) == 0) { 141 for (c = 0; c < children; c++) 142 if (find_vdev_problem(child[c], func)) 143 return (B_TRUE); 144 } else { 145 verify(nvlist_lookup_uint64_array(vdev, ZPOOL_CONFIG_STATS, 146 (uint64_t **)&vs, &c) == 0); 147 148 if (func(vs->vs_state, vs->vs_aux, 149 vs->vs_read_errors + 150 vs->vs_write_errors + 151 vs->vs_checksum_errors)) 152 return (B_TRUE); 153 } 154 155 return (B_FALSE); 156 } 157 158 /* 159 * Active pool health status. 160 * 161 * To determine the status for a pool, we make several passes over the config, 162 * picking the most egregious error we find. In order of importance, we do the 163 * following: 164 * 165 * - Check for a complete and valid configuration 166 * - Look for any missing devices in a non-replicated config 167 * - Check for any data errors 168 * - Check for any missing devices in a replicated config 169 * - Look for any devices showing errors 170 * - Check for any resilvering devices 171 * 172 * There can obviously be multiple errors within a single pool, so this routine 173 * only picks the most damaging of all the current errors to report. 174 */ 175 static zpool_status_t 176 check_status(nvlist_t *config, boolean_t isimport) 177 { 178 nvlist_t *nvroot; 179 vdev_stat_t *vs; 180 uint_t vsc; 181 uint64_t nerr; 182 uint64_t version; 183 uint64_t stateval; 184 uint64_t hostid = 0; 185 186 verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION, 187 &version) == 0); 188 verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, 189 &nvroot) == 0); 190 verify(nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_STATS, 191 (uint64_t **)&vs, &vsc) == 0); 192 verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE, 193 &stateval) == 0); 194 (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_HOSTID, &hostid); 195 196 /* 197 * Pool last accessed by another system. 198 */ 199 if (hostid != 0 && (unsigned long)hostid != gethostid() && 200 stateval == POOL_STATE_ACTIVE) 201 return (ZPOOL_STATUS_HOSTID_MISMATCH); 202 203 /* 204 * Newer on-disk version. 205 */ 206 if (vs->vs_state == VDEV_STATE_CANT_OPEN && 207 vs->vs_aux == VDEV_AUX_VERSION_NEWER) 208 return (ZPOOL_STATUS_VERSION_NEWER); 209 210 /* 211 * Check that the config is complete. 212 */ 213 if (vs->vs_state == VDEV_STATE_CANT_OPEN && 214 vs->vs_aux == VDEV_AUX_BAD_GUID_SUM) 215 return (ZPOOL_STATUS_BAD_GUID_SUM); 216 217 /* 218 * Missing devices in non-replicated config. 219 */ 220 if (vs->vs_state == VDEV_STATE_CANT_OPEN && 221 find_vdev_problem(nvroot, vdev_missing)) 222 return (ZPOOL_STATUS_MISSING_DEV_NR); 223 224 if (vs->vs_state == VDEV_STATE_CANT_OPEN && 225 find_vdev_problem(nvroot, vdev_broken)) 226 return (ZPOOL_STATUS_CORRUPT_LABEL_NR); 227 228 /* 229 * Corrupted pool metadata 230 */ 231 if (vs->vs_state == VDEV_STATE_CANT_OPEN && 232 vs->vs_aux == VDEV_AUX_CORRUPT_DATA) 233 return (ZPOOL_STATUS_CORRUPT_POOL); 234 235 /* 236 * Persistent data errors. 237 */ 238 if (!isimport) { 239 if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_ERRCOUNT, 240 &nerr) == 0 && nerr != 0) 241 return (ZPOOL_STATUS_CORRUPT_DATA); 242 } 243 244 /* 245 * Missing devices in a replicated config. 246 */ 247 if (find_vdev_problem(nvroot, vdev_missing)) 248 return (ZPOOL_STATUS_MISSING_DEV_R); 249 if (find_vdev_problem(nvroot, vdev_broken)) 250 return (ZPOOL_STATUS_CORRUPT_LABEL_R); 251 252 /* 253 * Devices with errors 254 */ 255 if (!isimport && find_vdev_problem(nvroot, vdev_errors)) 256 return (ZPOOL_STATUS_FAILING_DEV); 257 258 /* 259 * Offlined devices 260 */ 261 if (find_vdev_problem(nvroot, vdev_offlined)) 262 return (ZPOOL_STATUS_OFFLINE_DEV); 263 264 /* 265 * Currently resilvering 266 */ 267 if (!vs->vs_scrub_complete && vs->vs_scrub_type == POOL_SCRUB_RESILVER) 268 return (ZPOOL_STATUS_RESILVERING); 269 270 /* 271 * Outdated, but usable, version 272 */ 273 if (version < ZFS_VERSION) 274 return (ZPOOL_STATUS_VERSION_OLDER); 275 276 return (ZPOOL_STATUS_OK); 277 } 278 279 zpool_status_t 280 zpool_get_status(zpool_handle_t *zhp, char **msgid) 281 { 282 zpool_status_t ret = check_status(zhp->zpool_config, B_FALSE); 283 284 if (ret >= NMSGID) 285 *msgid = NULL; 286 else 287 *msgid = zfs_msgid_table_active[ret]; 288 289 return (ret); 290 } 291 292 zpool_status_t 293 zpool_import_status(nvlist_t *config, char **msgid) 294 { 295 zpool_status_t ret = check_status(config, B_TRUE); 296 297 if (ret >= NMSGID) 298 *msgid = NULL; 299 else 300 *msgid = zfs_msgid_table[ret]; 301 302 return (ret); 303 } 304