xref: /illumos-gate/usr/src/lib/libzfs/common/libzfs_status.c (revision aba1133a5077b2daf9217c517f6aa15731135d8e)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * This file contains the functions which analyze the status of a pool.  This
31  * include both the status of an active pool, as well as the status exported
32  * pools.  Returns one of the ZPOOL_STATUS_* defines describing the status of
33  * the pool.  This status is independent (to a certain degree) from the state of
34  * the pool.  A pool's state descsribes only whether or not it is capable of
35  * providing the necessary fault tolerance for data.  The status describes the
36  * overall status of devices.  A pool that is online can still have a device
37  * that is experiencing errors.
38  *
39  * Only a subset of the possible faults can be detected using 'zpool status',
40  * and not all possible errors correspond to a FMA message ID.  The explanation
41  * is left up to the caller, depending on whether it is a live pool or an
42  * import.
43  */
44 
45 #include <libzfs.h>
46 #include <string.h>
47 #include "libzfs_impl.h"
48 
49 /*
50  * Message ID table.  This must be kep in sync with the ZPOOL_STATUS_* defines
51  * in libzfs.h.  Note that there are some status results which go past the end
52  * of this table, and hence have no associated message ID.
53  */
54 static char *msgid_table[] = {
55 	"ZFS-8000-14",
56 	"ZFS-8000-2Q",
57 	"ZFS-8000-3C",
58 	"ZFS-8000-4J",
59 	"ZFS-8000-5E",
60 	"ZFS-8000-6X",
61 	"ZFS-8000-72",
62 	"ZFS-8000-8A",
63 	"ZFS-8000-9P",
64 	"ZFS-8000-A5"
65 };
66 
67 #define	NMSGID	(sizeof (msgid_table) / sizeof (msgid_table[0]))
68 
69 /* ARGSUSED */
70 static int
71 vdev_missing(uint64_t state, uint64_t aux, uint64_t errs)
72 {
73 	return (state == VDEV_STATE_CANT_OPEN &&
74 	    aux == VDEV_AUX_OPEN_FAILED);
75 }
76 
77 /* ARGSUSED */
78 static int
79 vdev_errors(uint64_t state, uint64_t aux, uint64_t errs)
80 {
81 	return (errs != 0);
82 }
83 
84 /* ARGSUSED */
85 static int
86 vdev_broken(uint64_t state, uint64_t aux, uint64_t errs)
87 {
88 	return (state == VDEV_STATE_CANT_OPEN);
89 }
90 
91 /* ARGSUSED */
92 static int
93 vdev_offlined(uint64_t state, uint64_t aux, uint64_t errs)
94 {
95 	return (state == VDEV_STATE_OFFLINE);
96 }
97 
98 /*
99  * Detect if any leaf devices that have seen errors or could not be opened.
100  */
101 static int
102 find_vdev_problem(nvlist_t *vdev, int (*func)(uint64_t, uint64_t, uint64_t))
103 {
104 	nvlist_t **child;
105 	vdev_stat_t *vs;
106 	uint_t c, children;
107 	char *type;
108 
109 	/*
110 	 * Ignore problems within a 'replacing' vdev, since we're presumably in
111 	 * the process of repairing any such errors, and don't want to call them
112 	 * out again.  We'll pick up the fact that a resilver is happening
113 	 * later.
114 	 */
115 	verify(nvlist_lookup_string(vdev, ZPOOL_CONFIG_TYPE, &type) == 0);
116 	if (strcmp(type, VDEV_TYPE_REPLACING) == 0)
117 		return (FALSE);
118 
119 	if (nvlist_lookup_nvlist_array(vdev, ZPOOL_CONFIG_CHILDREN, &child,
120 	    &children) == 0) {
121 		for (c = 0; c < children; c++)
122 			if (find_vdev_problem(child[c], func))
123 				return (TRUE);
124 	} else {
125 		verify(nvlist_lookup_uint64_array(vdev, ZPOOL_CONFIG_STATS,
126 		    (uint64_t **)&vs, &c) == 0);
127 
128 		if (func(vs->vs_state, vs->vs_aux,
129 		    vs->vs_read_errors +
130 		    vs->vs_write_errors +
131 		    vs->vs_checksum_errors))
132 			return (TRUE);
133 	}
134 
135 	return (FALSE);
136 }
137 
138 /*
139  * Active pool health status.
140  *
141  * To determine the status for a pool, we make several passes over the config,
142  * picking the most egregious error we find.  In order of importance, we do the
143  * following:
144  *
145  *	- Check for a complete and valid configuration
146  *	- Look for any missing devices
147  *	- Look for any devices showing errors
148  *	- Check for any data errors
149  *	- Check for any resilvering devices
150  *
151  * There can obviously be multiple errors within a single pool, so this routine
152  * only picks the most damaging of all the current errors to report.
153  */
154 static zpool_status_t
155 check_status(nvlist_t *config, int isimport)
156 {
157 	nvlist_t *nvroot;
158 	vdev_stat_t *vs;
159 	uint_t vsc;
160 
161 	verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
162 	    &nvroot) == 0);
163 	verify(nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_STATS,
164 	    (uint64_t **)&vs, &vsc) == 0);
165 
166 	/*
167 	 * Check that the config is complete.
168 	 */
169 	if (vs->vs_state == VDEV_STATE_CANT_OPEN &&
170 	    vs->vs_aux == VDEV_AUX_BAD_GUID_SUM) {
171 		return (ZPOOL_STATUS_BAD_GUID_SUM);
172 	}
173 
174 	/*
175 	 * Missing devices
176 	 */
177 	if (find_vdev_problem(nvroot, vdev_missing)) {
178 		if (vs->vs_state == VDEV_STATE_CANT_OPEN)
179 			return (ZPOOL_STATUS_MISSING_DEV_NR);
180 		else
181 			return (ZPOOL_STATUS_MISSING_DEV_R);
182 	}
183 
184 	/*
185 	 * Devices with corrupted labels.
186 	 */
187 	if (find_vdev_problem(nvroot, vdev_broken)) {
188 		if (vs->vs_state == VDEV_STATE_CANT_OPEN)
189 			return (ZPOOL_STATUS_CORRUPT_LABEL_NR);
190 		else
191 			return (ZPOOL_STATUS_CORRUPT_LABEL_R);
192 	}
193 
194 	/*
195 	 * Devices with errors
196 	 */
197 	if (!isimport && find_vdev_problem(nvroot, vdev_errors))
198 		return (ZPOOL_STATUS_FAILING_DEV);
199 
200 	/*
201 	 * Offlined devices
202 	 */
203 	if (find_vdev_problem(nvroot, vdev_offlined))
204 		return (ZPOOL_STATUS_OFFLINE_DEV);
205 
206 	/*
207 	 * Currently resilvering
208 	 */
209 	if (!vs->vs_scrub_complete && vs->vs_scrub_type == POOL_SCRUB_RESILVER)
210 		return (ZPOOL_STATUS_RESILVERING);
211 
212 	/*
213 	 * We currently have no way to detect the following errors:
214 	 *
215 	 * 	CORRUPT_CACHE
216 	 * 	VERSION_MISMATCH
217 	 * 	CORRUPT_POOL
218 	 * 	CORRUPT_DATA
219 	 */
220 
221 	return (ZPOOL_STATUS_OK);
222 }
223 
224 zpool_status_t
225 zpool_get_status(zpool_handle_t *zhp, char **msgid)
226 {
227 	zpool_status_t ret = check_status(zhp->zpool_config, FALSE);
228 
229 	if (ret >= NMSGID)
230 		*msgid = NULL;
231 	else
232 		*msgid = msgid_table[ret];
233 
234 	return (ret);
235 }
236 
237 zpool_status_t
238 zpool_import_status(nvlist_t *config, char **msgid)
239 {
240 	zpool_status_t ret = check_status(config, TRUE);
241 
242 	if (ret >= NMSGID)
243 		*msgid = NULL;
244 	else
245 		*msgid = msgid_table[ret];
246 
247 	return (ret);
248 }
249