1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 24 * Copyright (c) 2012 by Delphix. All rights reserved. 25 * Copyright (c) 2013 Steven Hartland. All rights reserved. 26 */ 27 28 /* 29 * This file contains the functions which analyze the status of a pool. This 30 * include both the status of an active pool, as well as the status exported 31 * pools. Returns one of the ZPOOL_STATUS_* defines describing the status of 32 * the pool. This status is independent (to a certain degree) from the state of 33 * the pool. A pool's state describes only whether or not it is capable of 34 * providing the necessary fault tolerance for data. The status describes the 35 * overall status of devices. A pool that is online can still have a device 36 * that is experiencing errors. 37 * 38 * Only a subset of the possible faults can be detected using 'zpool status', 39 * and not all possible errors correspond to a FMA message ID. The explanation 40 * is left up to the caller, depending on whether it is a live pool or an 41 * import. 42 */ 43 44 #include <libzfs.h> 45 #include <libzutil.h> 46 #include <string.h> 47 #include <unistd.h> 48 #include "libzfs_impl.h" 49 #include "zfeature_common.h" 50 51 /* 52 * Message ID table. This must be kept in sync with the ZPOOL_STATUS_* defines 53 * in libzfs.h. Note that there are some status results which go past the end 54 * of this table, and hence have no associated message ID. 55 */ 56 static char *zfs_msgid_table[] = { 57 "ZFS-8000-14", /* ZPOOL_STATUS_CORRUPT_CACHE */ 58 "ZFS-8000-2Q", /* ZPOOL_STATUS_MISSING_DEV_R */ 59 "ZFS-8000-3C", /* ZPOOL_STATUS_MISSING_DEV_NR */ 60 "ZFS-8000-4J", /* ZPOOL_STATUS_CORRUPT_LABEL_R */ 61 "ZFS-8000-5E", /* ZPOOL_STATUS_CORRUPT_LABEL_NR */ 62 "ZFS-8000-6X", /* ZPOOL_STATUS_BAD_GUID_SUM */ 63 "ZFS-8000-72", /* ZPOOL_STATUS_CORRUPT_POOL */ 64 "ZFS-8000-8A", /* ZPOOL_STATUS_CORRUPT_DATA */ 65 "ZFS-8000-9P", /* ZPOOL_STATUS_FAILING_DEV */ 66 "ZFS-8000-A5", /* ZPOOL_STATUS_VERSION_NEWER */ 67 "ZFS-8000-EY", /* ZPOOL_STATUS_HOSTID_MISMATCH */ 68 "ZFS-8000-EY", /* ZPOOL_STATUS_HOSTID_ACTIVE */ 69 "ZFS-8000-EY", /* ZPOOL_STATUS_HOSTID_REQUIRED */ 70 "ZFS-8000-HC", /* ZPOOL_STATUS_IO_FAILURE_WAIT */ 71 "ZFS-8000-JQ", /* ZPOOL_STATUS_IO_FAILURE_CONTINUE */ 72 "ZFS-8000-MM", /* ZPOOL_STATUS_IO_FAILURE_MMP */ 73 "ZFS-8000-K4", /* ZPOOL_STATUS_BAD_LOG */ 74 /* 75 * The following results have no message ID. 76 * ZPOOL_STATUS_UNSUP_FEAT_READ 77 * ZPOOL_STATUS_UNSUP_FEAT_WRITE 78 * ZPOOL_STATUS_FAULTED_DEV_R 79 * ZPOOL_STATUS_FAULTED_DEV_NR 80 * ZPOOL_STATUS_VERSION_OLDER 81 * ZPOOL_STATUS_FEAT_DISABLED 82 * ZPOOL_STATUS_RESILVERING 83 * ZPOOL_STATUS_OFFLINE_DEV 84 * ZPOOL_STATUS_REMOVED_DEV 85 * ZPOOL_STATUS_OK 86 */ 87 }; 88 89 #define NMSGID (sizeof (zfs_msgid_table) / sizeof (zfs_msgid_table[0])) 90 91 /* ARGSUSED */ 92 static int 93 vdev_missing(uint64_t state, uint64_t aux, uint64_t errs) 94 { 95 return (state == VDEV_STATE_CANT_OPEN && 96 aux == VDEV_AUX_OPEN_FAILED); 97 } 98 99 /* ARGSUSED */ 100 static int 101 vdev_faulted(uint64_t state, uint64_t aux, uint64_t errs) 102 { 103 return (state == VDEV_STATE_FAULTED); 104 } 105 106 /* ARGSUSED */ 107 static int 108 vdev_errors(uint64_t state, uint64_t aux, uint64_t errs) 109 { 110 return (state == VDEV_STATE_DEGRADED || errs != 0); 111 } 112 113 /* ARGSUSED */ 114 static int 115 vdev_broken(uint64_t state, uint64_t aux, uint64_t errs) 116 { 117 return (state == VDEV_STATE_CANT_OPEN); 118 } 119 120 /* ARGSUSED */ 121 static int 122 vdev_offlined(uint64_t state, uint64_t aux, uint64_t errs) 123 { 124 return (state == VDEV_STATE_OFFLINE); 125 } 126 127 /* ARGSUSED */ 128 static int 129 vdev_removed(uint64_t state, uint64_t aux, uint64_t errs) 130 { 131 return (state == VDEV_STATE_REMOVED); 132 } 133 134 /* 135 * Detect if any leaf devices that have seen errors or could not be opened. 136 */ 137 static boolean_t 138 find_vdev_problem(nvlist_t *vdev, int (*func)(uint64_t, uint64_t, uint64_t)) 139 { 140 nvlist_t **child; 141 vdev_stat_t *vs; 142 uint_t c, children; 143 char *type; 144 145 /* 146 * Ignore problems within a 'replacing' vdev, since we're presumably in 147 * the process of repairing any such errors, and don't want to call them 148 * out again. We'll pick up the fact that a resilver is happening 149 * later. 150 */ 151 verify(nvlist_lookup_string(vdev, ZPOOL_CONFIG_TYPE, &type) == 0); 152 if (strcmp(type, VDEV_TYPE_REPLACING) == 0) 153 return (B_FALSE); 154 155 if (nvlist_lookup_nvlist_array(vdev, ZPOOL_CONFIG_CHILDREN, &child, 156 &children) == 0) { 157 for (c = 0; c < children; c++) 158 if (find_vdev_problem(child[c], func)) 159 return (B_TRUE); 160 } else { 161 verify(nvlist_lookup_uint64_array(vdev, ZPOOL_CONFIG_VDEV_STATS, 162 (uint64_t **)&vs, &c) == 0); 163 164 if (func(vs->vs_state, vs->vs_aux, 165 vs->vs_read_errors + 166 vs->vs_write_errors + 167 vs->vs_checksum_errors)) 168 return (B_TRUE); 169 } 170 171 /* 172 * Check any L2 cache devs 173 */ 174 if (nvlist_lookup_nvlist_array(vdev, ZPOOL_CONFIG_L2CACHE, &child, 175 &children) == 0) { 176 for (c = 0; c < children; c++) 177 if (find_vdev_problem(child[c], func)) 178 return (B_TRUE); 179 } 180 181 return (B_FALSE); 182 } 183 184 /* 185 * Active pool health status. 186 * 187 * To determine the status for a pool, we make several passes over the config, 188 * picking the most egregious error we find. In order of importance, we do the 189 * following: 190 * 191 * - Check for a complete and valid configuration 192 * - Look for any faulted or missing devices in a non-replicated config 193 * - Check for any data errors 194 * - Check for any faulted or missing devices in a replicated config 195 * - Look for any devices showing errors 196 * - Check for any resilvering devices 197 * 198 * There can obviously be multiple errors within a single pool, so this routine 199 * only picks the most damaging of all the current errors to report. 200 */ 201 static zpool_status_t 202 check_status(nvlist_t *config, boolean_t isimport, zpool_errata_t *erratap) 203 { 204 nvlist_t *nvroot; 205 vdev_stat_t *vs; 206 pool_scan_stat_t *ps = NULL; 207 uint_t vsc, psc; 208 uint64_t nerr; 209 uint64_t version; 210 uint64_t stateval; 211 uint64_t suspended; 212 uint64_t hostid = 0; 213 uint64_t errata = 0; 214 unsigned long system_hostid = get_system_hostid(); 215 216 verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION, 217 &version) == 0); 218 verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, 219 &nvroot) == 0); 220 verify(nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_VDEV_STATS, 221 (uint64_t **)&vs, &vsc) == 0); 222 verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE, 223 &stateval) == 0); 224 225 /* 226 * Currently resilvering a vdev 227 */ 228 (void) nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_SCAN_STATS, 229 (uint64_t **)&ps, &psc); 230 if (ps != NULL && ps->pss_func == POOL_SCAN_RESILVER && 231 ps->pss_state == DSS_SCANNING) 232 return (ZPOOL_STATUS_RESILVERING); 233 234 /* 235 * The multihost property is set and the pool may be active. 236 */ 237 if (vs->vs_state == VDEV_STATE_CANT_OPEN && 238 vs->vs_aux == VDEV_AUX_ACTIVE) { 239 mmp_state_t mmp_state; 240 nvlist_t *nvinfo; 241 242 nvinfo = fnvlist_lookup_nvlist(config, ZPOOL_CONFIG_LOAD_INFO); 243 mmp_state = fnvlist_lookup_uint64(nvinfo, 244 ZPOOL_CONFIG_MMP_STATE); 245 246 if (mmp_state == MMP_STATE_ACTIVE) 247 return (ZPOOL_STATUS_HOSTID_ACTIVE); 248 else if (mmp_state == MMP_STATE_NO_HOSTID) 249 return (ZPOOL_STATUS_HOSTID_REQUIRED); 250 else 251 return (ZPOOL_STATUS_HOSTID_MISMATCH); 252 } 253 254 /* 255 * Pool last accessed by another system. 256 */ 257 (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_HOSTID, &hostid); 258 if (hostid != 0 && (unsigned long)hostid != system_hostid && 259 stateval == POOL_STATE_ACTIVE) 260 return (ZPOOL_STATUS_HOSTID_MISMATCH); 261 262 /* 263 * Newer on-disk version. 264 */ 265 if (vs->vs_state == VDEV_STATE_CANT_OPEN && 266 vs->vs_aux == VDEV_AUX_VERSION_NEWER) 267 return (ZPOOL_STATUS_VERSION_NEWER); 268 269 /* 270 * Unsupported feature(s). 271 */ 272 if (vs->vs_state == VDEV_STATE_CANT_OPEN && 273 vs->vs_aux == VDEV_AUX_UNSUP_FEAT) { 274 nvlist_t *nvinfo; 275 276 verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_LOAD_INFO, 277 &nvinfo) == 0); 278 if (nvlist_exists(nvinfo, ZPOOL_CONFIG_CAN_RDONLY)) 279 return (ZPOOL_STATUS_UNSUP_FEAT_WRITE); 280 return (ZPOOL_STATUS_UNSUP_FEAT_READ); 281 } 282 283 /* 284 * Check that the config is complete. 285 */ 286 if (vs->vs_state == VDEV_STATE_CANT_OPEN && 287 vs->vs_aux == VDEV_AUX_BAD_GUID_SUM) 288 return (ZPOOL_STATUS_BAD_GUID_SUM); 289 290 /* 291 * Check whether the pool has suspended. 292 */ 293 if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_SUSPENDED, 294 &suspended) == 0) { 295 uint64_t reason; 296 297 if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_SUSPENDED_REASON, 298 &reason) == 0 && reason == ZIO_SUSPEND_MMP) 299 return (ZPOOL_STATUS_IO_FAILURE_MMP); 300 301 if (suspended == ZIO_FAILURE_MODE_CONTINUE) 302 return (ZPOOL_STATUS_IO_FAILURE_CONTINUE); 303 return (ZPOOL_STATUS_IO_FAILURE_WAIT); 304 } 305 306 /* 307 * Could not read a log. 308 */ 309 if (vs->vs_state == VDEV_STATE_CANT_OPEN && 310 vs->vs_aux == VDEV_AUX_BAD_LOG) { 311 return (ZPOOL_STATUS_BAD_LOG); 312 } 313 314 /* 315 * Bad devices in non-replicated config. 316 */ 317 if (vs->vs_state == VDEV_STATE_CANT_OPEN && 318 find_vdev_problem(nvroot, vdev_faulted)) 319 return (ZPOOL_STATUS_FAULTED_DEV_NR); 320 321 if (vs->vs_state == VDEV_STATE_CANT_OPEN && 322 find_vdev_problem(nvroot, vdev_missing)) 323 return (ZPOOL_STATUS_MISSING_DEV_NR); 324 325 if (vs->vs_state == VDEV_STATE_CANT_OPEN && 326 find_vdev_problem(nvroot, vdev_broken)) 327 return (ZPOOL_STATUS_CORRUPT_LABEL_NR); 328 329 /* 330 * Corrupted pool metadata 331 */ 332 if (vs->vs_state == VDEV_STATE_CANT_OPEN && 333 vs->vs_aux == VDEV_AUX_CORRUPT_DATA) 334 return (ZPOOL_STATUS_CORRUPT_POOL); 335 336 /* 337 * Persistent data errors. 338 */ 339 if (!isimport) { 340 if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_ERRCOUNT, 341 &nerr) == 0 && nerr != 0) 342 return (ZPOOL_STATUS_CORRUPT_DATA); 343 } 344 345 /* 346 * Missing devices in a replicated config. 347 */ 348 if (find_vdev_problem(nvroot, vdev_faulted)) 349 return (ZPOOL_STATUS_FAULTED_DEV_R); 350 if (find_vdev_problem(nvroot, vdev_missing)) 351 return (ZPOOL_STATUS_MISSING_DEV_R); 352 if (find_vdev_problem(nvroot, vdev_broken)) 353 return (ZPOOL_STATUS_CORRUPT_LABEL_R); 354 355 /* 356 * Devices with errors 357 */ 358 if (!isimport && find_vdev_problem(nvroot, vdev_errors)) 359 return (ZPOOL_STATUS_FAILING_DEV); 360 361 /* 362 * Offlined devices 363 */ 364 if (find_vdev_problem(nvroot, vdev_offlined)) 365 return (ZPOOL_STATUS_OFFLINE_DEV); 366 367 /* 368 * Removed device 369 */ 370 if (find_vdev_problem(nvroot, vdev_removed)) 371 return (ZPOOL_STATUS_REMOVED_DEV); 372 373 /* 374 * Informational errata available. 375 */ 376 (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_ERRATA, &errata); 377 if (errata) { 378 *erratap = errata; 379 return (ZPOOL_STATUS_ERRATA); 380 } 381 382 /* 383 * Outdated, but usable, version 384 */ 385 if (SPA_VERSION_IS_SUPPORTED(version) && version != SPA_VERSION) 386 return (ZPOOL_STATUS_VERSION_OLDER); 387 388 /* 389 * Usable pool with disabled features 390 */ 391 if (version >= SPA_VERSION_FEATURES) { 392 int i; 393 nvlist_t *feat; 394 395 if (isimport) { 396 feat = fnvlist_lookup_nvlist(config, 397 ZPOOL_CONFIG_LOAD_INFO); 398 if (nvlist_exists(feat, ZPOOL_CONFIG_ENABLED_FEAT)) 399 feat = fnvlist_lookup_nvlist(feat, 400 ZPOOL_CONFIG_ENABLED_FEAT); 401 } else { 402 feat = fnvlist_lookup_nvlist(config, 403 ZPOOL_CONFIG_FEATURE_STATS); 404 } 405 406 for (i = 0; i < SPA_FEATURES; i++) { 407 zfeature_info_t *fi = &spa_feature_table[i]; 408 if (!nvlist_exists(feat, fi->fi_guid)) 409 return (ZPOOL_STATUS_FEAT_DISABLED); 410 } 411 } 412 413 return (ZPOOL_STATUS_OK); 414 } 415 416 zpool_status_t 417 zpool_get_status(zpool_handle_t *zhp, char **msgid, zpool_errata_t *errata) 418 { 419 zpool_status_t ret = check_status(zhp->zpool_config, B_FALSE, errata); 420 421 if (ret >= NMSGID) 422 *msgid = NULL; 423 else 424 *msgid = zfs_msgid_table[ret]; 425 426 return (ret); 427 } 428 429 zpool_status_t 430 zpool_import_status(nvlist_t *config, char **msgid, zpool_errata_t *errata) 431 { 432 zpool_status_t ret = check_status(config, B_TRUE, errata); 433 434 if (ret >= NMSGID) 435 *msgid = NULL; 436 else 437 *msgid = zfs_msgid_table[ret]; 438 439 return (ret); 440 } 441