/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License, Version 1.0 only * (the "License"). You may not use this file except in compliance * with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2005 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #pragma ident "%Z%%M% %I% %E% SMI" /* * This file contains the functions which analyze the status of a pool. This * include both the status of an active pool, as well as the status exported * pools. Returns one of the ZPOOL_STATUS_* defines describing the status of * the pool. This status is independent (to a certain degree) from the state of * the pool. A pool's state descsribes only whether or not it is capable of * providing the necessary fault tolerance for data. The status describes the * overall status of devices. A pool that is online can still have a device * that is experiencing errors. * * Only a subset of the possible faults can be detected using 'zpool status', * and not all possible errors correspond to a FMA message ID. The explanation * is left up to the caller, depending on whether it is a live pool or an * import. */ #include #include #include "libzfs_impl.h" /* * Message ID table. This must be kep in sync with the ZPOOL_STATUS_* defines * in libzfs.h. Note that there are some status results which go past the end * of this table, and hence have no associated message ID. */ static char *msgid_table[] = { "ZFS-8000-14", "ZFS-8000-2Q", "ZFS-8000-3C", "ZFS-8000-4J", "ZFS-8000-5E", "ZFS-8000-6X", "ZFS-8000-72", "ZFS-8000-8A", "ZFS-8000-9P", "ZFS-8000-A5" }; #define NMSGID (sizeof (msgid_table) / sizeof (msgid_table[0])) /* ARGSUSED */ static int vdev_missing(uint64_t state, uint64_t aux, uint64_t errs) { return (state == VDEV_STATE_CANT_OPEN && aux == VDEV_AUX_OPEN_FAILED); } /* ARGSUSED */ static int vdev_errors(uint64_t state, uint64_t aux, uint64_t errs) { return (errs != 0); } /* ARGSUSED */ static int vdev_broken(uint64_t state, uint64_t aux, uint64_t errs) { return (state == VDEV_STATE_CANT_OPEN); } /* ARGSUSED */ static int vdev_offlined(uint64_t state, uint64_t aux, uint64_t errs) { return (state == VDEV_STATE_OFFLINE); } /* * Detect if any leaf devices that have seen errors or could not be opened. */ static int find_vdev_problem(nvlist_t *vdev, int (*func)(uint64_t, uint64_t, uint64_t)) { nvlist_t **child; vdev_stat_t *vs; uint_t c, children; char *type; /* * Ignore problems within a 'replacing' vdev, since we're presumably in * the process of repairing any such errors, and don't want to call them * out again. We'll pick up the fact that a resilver is happening * later. */ verify(nvlist_lookup_string(vdev, ZPOOL_CONFIG_TYPE, &type) == 0); if (strcmp(type, VDEV_TYPE_REPLACING) == 0) return (FALSE); if (nvlist_lookup_nvlist_array(vdev, ZPOOL_CONFIG_CHILDREN, &child, &children) == 0) { for (c = 0; c < children; c++) if (find_vdev_problem(child[c], func)) return (TRUE); } else { verify(nvlist_lookup_uint64_array(vdev, ZPOOL_CONFIG_STATS, (uint64_t **)&vs, &c) == 0); if (func(vs->vs_state, vs->vs_aux, vs->vs_read_errors + vs->vs_write_errors + vs->vs_checksum_errors)) return (TRUE); } return (FALSE); } /* * Active pool health status. * * To determine the status for a pool, we make several passes over the config, * picking the most egregious error we find. In order of importance, we do the * following: * * - Check for a complete and valid configuration * - Look for any missing devices * - Look for any devices showing errors * - Check for any data errors * - Check for any resilvering devices * * There can obviously be multiple errors within a single pool, so this routine * only picks the most damaging of all the current errors to report. */ static zpool_status_t check_status(nvlist_t *config, int isimport) { nvlist_t *nvroot; vdev_stat_t *vs; uint_t vsc; verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0); verify(nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_STATS, (uint64_t **)&vs, &vsc) == 0); /* * Check that the config is complete. */ if (vs->vs_state == VDEV_STATE_CANT_OPEN && vs->vs_aux == VDEV_AUX_BAD_GUID_SUM) { return (ZPOOL_STATUS_BAD_GUID_SUM); } /* * Missing devices */ if (find_vdev_problem(nvroot, vdev_missing)) { if (vs->vs_state == VDEV_STATE_CANT_OPEN) return (ZPOOL_STATUS_MISSING_DEV_NR); else return (ZPOOL_STATUS_MISSING_DEV_R); } /* * Devices with corrupted labels. */ if (find_vdev_problem(nvroot, vdev_broken)) { if (vs->vs_state == VDEV_STATE_CANT_OPEN) return (ZPOOL_STATUS_CORRUPT_LABEL_NR); else return (ZPOOL_STATUS_CORRUPT_LABEL_R); } /* * Devices with errors */ if (!isimport && find_vdev_problem(nvroot, vdev_errors)) return (ZPOOL_STATUS_FAILING_DEV); /* * Offlined devices */ if (find_vdev_problem(nvroot, vdev_offlined)) return (ZPOOL_STATUS_OFFLINE_DEV); /* * Currently resilvering */ if (!vs->vs_scrub_complete && vs->vs_scrub_type == POOL_SCRUB_RESILVER) return (ZPOOL_STATUS_RESILVERING); /* * We currently have no way to detect the following errors: * * CORRUPT_CACHE * VERSION_MISMATCH * CORRUPT_POOL * CORRUPT_DATA */ return (ZPOOL_STATUS_OK); } zpool_status_t zpool_get_status(zpool_handle_t *zhp, char **msgid) { zpool_status_t ret = check_status(zhp->zpool_config, FALSE); if (ret >= NMSGID) *msgid = NULL; else *msgid = msgid_table[ret]; return (ret); } zpool_status_t zpool_import_status(nvlist_t *config, char **msgid) { zpool_status_t ret = check_status(config, TRUE); if (ret >= NMSGID) *msgid = NULL; else *msgid = msgid_table[ret]; return (ret); }