1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 /* 29 * This file contains the functions which analyze the status of a pool. This 30 * include both the status of an active pool, as well as the status exported 31 * pools. Returns one of the ZPOOL_STATUS_* defines describing the status of 32 * the pool. This status is independent (to a certain degree) from the state of 33 * the pool. A pool's state descsribes only whether or not it is capable of 34 * providing the necessary fault tolerance for data. The status describes the 35 * overall status of devices. A pool that is online can still have a device 36 * that is experiencing errors. 37 * 38 * Only a subset of the possible faults can be detected using 'zpool status', 39 * and not all possible errors correspond to a FMA message ID. The explanation 40 * is left up to the caller, depending on whether it is a live pool or an 41 * import. 42 */ 43 44 #include <libzfs.h> 45 #include <string.h> 46 #include "libzfs_impl.h" 47 48 /* 49 * Message ID table. This must be kep in sync with the ZPOOL_STATUS_* defines 50 * in libzfs.h. Note that there are some status results which go past the end 51 * of this table, and hence have no associated message ID. 52 */ 53 static char *msgid_table[] = { 54 "ZFS-8000-14", 55 "ZFS-8000-2Q", 56 "ZFS-8000-3C", 57 "ZFS-8000-4J", 58 "ZFS-8000-5E", 59 "ZFS-8000-6X", 60 "ZFS-8000-72", 61 "ZFS-8000-8A", 62 "ZFS-8000-9P", 63 "ZFS-8000-A5" 64 }; 65 66 /* 67 * If the pool is active, a certain class of static errors is overridden by the 68 * faults as analayzed by FMA. These faults have separate knowledge articles, 69 * and the article referred to by 'zpool status' must match that indicated by 70 * the syslog error message. We override missing data as well as corrupt pool. 71 */ 72 static char *msgid_table_active[] = { 73 "ZFS-8000-14", 74 "ZFS-8000-D3", /* overridden */ 75 "ZFS-8000-D3", /* overridden */ 76 "ZFS-8000-4J", 77 "ZFS-8000-5E", 78 "ZFS-8000-6X", 79 "ZFS-8000-CS", /* overridden */ 80 "ZFS-8000-8A", 81 "ZFS-8000-9P", 82 "ZFS-8000-CS", /* overridden */ 83 }; 84 85 #define NMSGID (sizeof (msgid_table) / sizeof (msgid_table[0])) 86 87 /* ARGSUSED */ 88 static int 89 vdev_missing(uint64_t state, uint64_t aux, uint64_t errs) 90 { 91 return (state == VDEV_STATE_CANT_OPEN && 92 aux == VDEV_AUX_OPEN_FAILED); 93 } 94 95 /* ARGSUSED */ 96 static int 97 vdev_errors(uint64_t state, uint64_t aux, uint64_t errs) 98 { 99 return (errs != 0); 100 } 101 102 /* ARGSUSED */ 103 static int 104 vdev_broken(uint64_t state, uint64_t aux, uint64_t errs) 105 { 106 return (state == VDEV_STATE_CANT_OPEN); 107 } 108 109 /* ARGSUSED */ 110 static int 111 vdev_offlined(uint64_t state, uint64_t aux, uint64_t errs) 112 { 113 return (state == VDEV_STATE_OFFLINE); 114 } 115 116 /* 117 * Detect if any leaf devices that have seen errors or could not be opened. 118 */ 119 static boolean_t 120 find_vdev_problem(nvlist_t *vdev, int (*func)(uint64_t, uint64_t, uint64_t)) 121 { 122 nvlist_t **child; 123 vdev_stat_t *vs; 124 uint_t c, children; 125 char *type; 126 127 /* 128 * Ignore problems within a 'replacing' vdev, since we're presumably in 129 * the process of repairing any such errors, and don't want to call them 130 * out again. We'll pick up the fact that a resilver is happening 131 * later. 132 */ 133 verify(nvlist_lookup_string(vdev, ZPOOL_CONFIG_TYPE, &type) == 0); 134 if (strcmp(type, VDEV_TYPE_REPLACING) == 0) 135 return (B_FALSE); 136 137 if (nvlist_lookup_nvlist_array(vdev, ZPOOL_CONFIG_CHILDREN, &child, 138 &children) == 0) { 139 for (c = 0; c < children; c++) 140 if (find_vdev_problem(child[c], func)) 141 return (B_TRUE); 142 } else { 143 verify(nvlist_lookup_uint64_array(vdev, ZPOOL_CONFIG_STATS, 144 (uint64_t **)&vs, &c) == 0); 145 146 if (func(vs->vs_state, vs->vs_aux, 147 vs->vs_read_errors + 148 vs->vs_write_errors + 149 vs->vs_checksum_errors)) 150 return (B_TRUE); 151 } 152 153 return (B_FALSE); 154 } 155 156 /* 157 * Active pool health status. 158 * 159 * To determine the status for a pool, we make several passes over the config, 160 * picking the most egregious error we find. In order of importance, we do the 161 * following: 162 * 163 * - Check for a complete and valid configuration 164 * - Look for any missing devices in a non-replicated config 165 * - Check for any data errors 166 * - Check for any missing devices in a replicated config 167 * - Look for any devices showing errors 168 * - Check for any resilvering devices 169 * 170 * There can obviously be multiple errors within a single pool, so this routine 171 * only picks the most damaging of all the current errors to report. 172 */ 173 static zpool_status_t 174 check_status(nvlist_t *config, boolean_t isimport) 175 { 176 nvlist_t *nvroot; 177 vdev_stat_t *vs; 178 uint_t vsc; 179 uint64_t nerr; 180 uint64_t version; 181 182 verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION, 183 &version) == 0); 184 verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, 185 &nvroot) == 0); 186 verify(nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_STATS, 187 (uint64_t **)&vs, &vsc) == 0); 188 189 /* 190 * Newer on-disk version. 191 */ 192 if (vs->vs_state == VDEV_STATE_CANT_OPEN && 193 vs->vs_aux == VDEV_AUX_VERSION_NEWER) 194 return (ZPOOL_STATUS_VERSION_NEWER); 195 196 /* 197 * Check that the config is complete. 198 */ 199 if (vs->vs_state == VDEV_STATE_CANT_OPEN && 200 vs->vs_aux == VDEV_AUX_BAD_GUID_SUM) 201 return (ZPOOL_STATUS_BAD_GUID_SUM); 202 203 /* 204 * Missing devices in non-replicated config. 205 */ 206 if (vs->vs_state == VDEV_STATE_CANT_OPEN && 207 find_vdev_problem(nvroot, vdev_missing)) 208 return (ZPOOL_STATUS_MISSING_DEV_NR); 209 210 if (vs->vs_state == VDEV_STATE_CANT_OPEN && 211 find_vdev_problem(nvroot, vdev_broken)) 212 return (ZPOOL_STATUS_CORRUPT_LABEL_NR); 213 214 /* 215 * Corrupted pool metadata 216 */ 217 if (vs->vs_state == VDEV_STATE_CANT_OPEN && 218 vs->vs_aux == VDEV_AUX_CORRUPT_DATA) 219 return (ZPOOL_STATUS_CORRUPT_POOL); 220 221 /* 222 * Persistent data errors. 223 */ 224 if (!isimport) { 225 if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_ERRCOUNT, 226 &nerr) == 0 && nerr != 0) 227 return (ZPOOL_STATUS_CORRUPT_DATA); 228 } 229 230 /* 231 * Missing devices in a replicated config. 232 */ 233 if (find_vdev_problem(nvroot, vdev_missing)) 234 return (ZPOOL_STATUS_MISSING_DEV_R); 235 if (find_vdev_problem(nvroot, vdev_broken)) 236 return (ZPOOL_STATUS_CORRUPT_LABEL_R); 237 238 /* 239 * Devices with errors 240 */ 241 if (!isimport && find_vdev_problem(nvroot, vdev_errors)) 242 return (ZPOOL_STATUS_FAILING_DEV); 243 244 /* 245 * Offlined devices 246 */ 247 if (find_vdev_problem(nvroot, vdev_offlined)) 248 return (ZPOOL_STATUS_OFFLINE_DEV); 249 250 /* 251 * Currently resilvering 252 */ 253 if (!vs->vs_scrub_complete && vs->vs_scrub_type == POOL_SCRUB_RESILVER) 254 return (ZPOOL_STATUS_RESILVERING); 255 256 /* 257 * Outdated, but usable, version 258 */ 259 if (version < ZFS_VERSION) 260 return (ZPOOL_STATUS_VERSION_OLDER); 261 262 return (ZPOOL_STATUS_OK); 263 } 264 265 zpool_status_t 266 zpool_get_status(zpool_handle_t *zhp, char **msgid) 267 { 268 zpool_status_t ret = check_status(zhp->zpool_config, B_FALSE); 269 270 if (ret >= NMSGID) 271 *msgid = NULL; 272 else 273 *msgid = msgid_table_active[ret]; 274 275 return (ret); 276 } 277 278 zpool_status_t 279 zpool_import_status(nvlist_t *config, char **msgid) 280 { 281 zpool_status_t ret = check_status(config, B_TRUE); 282 283 if (ret >= NMSGID) 284 *msgid = NULL; 285 else 286 *msgid = msgid_table[ret]; 287 288 return (ret); 289 } 290