1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 24 * Copyright (c) 2012 by Delphix. All rights reserved. 25 */ 26 27 /* 28 * This file contains the functions which analyze the status of a pool. This 29 * include both the status of an active pool, as well as the status exported 30 * pools. Returns one of the ZPOOL_STATUS_* defines describing the status of 31 * the pool. This status is independent (to a certain degree) from the state of 32 * the pool. A pool's state describes only whether or not it is capable of 33 * providing the necessary fault tolerance for data. The status describes the 34 * overall status of devices. A pool that is online can still have a device 35 * that is experiencing errors. 36 * 37 * Only a subset of the possible faults can be detected using 'zpool status', 38 * and not all possible errors correspond to a FMA message ID. The explanation 39 * is left up to the caller, depending on whether it is a live pool or an 40 * import. 41 */ 42 43 #include <libzfs.h> 44 #include <string.h> 45 #include <unistd.h> 46 #include "libzfs_impl.h" 47 48 /* 49 * Message ID table. This must be kept in sync with the ZPOOL_STATUS_* defines 50 * in libzfs.h. Note that there are some status results which go past the end 51 * of this table, and hence have no associated message ID. 52 */ 53 static char *zfs_msgid_table[] = { 54 "ZFS-8000-14", 55 "ZFS-8000-2Q", 56 "ZFS-8000-3C", 57 "ZFS-8000-4J", 58 "ZFS-8000-5E", 59 "ZFS-8000-6X", 60 "ZFS-8000-72", 61 "ZFS-8000-8A", 62 "ZFS-8000-9P", 63 "ZFS-8000-A5", 64 "ZFS-8000-EY", 65 "ZFS-8000-HC", 66 "ZFS-8000-JQ", 67 "ZFS-8000-K4", 68 }; 69 70 #define NMSGID (sizeof (zfs_msgid_table) / sizeof (zfs_msgid_table[0])) 71 72 /* ARGSUSED */ 73 static int 74 vdev_missing(uint64_t state, uint64_t aux, uint64_t errs) 75 { 76 return (state == VDEV_STATE_CANT_OPEN && 77 aux == VDEV_AUX_OPEN_FAILED); 78 } 79 80 /* ARGSUSED */ 81 static int 82 vdev_faulted(uint64_t state, uint64_t aux, uint64_t errs) 83 { 84 return (state == VDEV_STATE_FAULTED); 85 } 86 87 /* ARGSUSED */ 88 static int 89 vdev_errors(uint64_t state, uint64_t aux, uint64_t errs) 90 { 91 return (state == VDEV_STATE_DEGRADED || errs != 0); 92 } 93 94 /* ARGSUSED */ 95 static int 96 vdev_broken(uint64_t state, uint64_t aux, uint64_t errs) 97 { 98 return (state == VDEV_STATE_CANT_OPEN); 99 } 100 101 /* ARGSUSED */ 102 static int 103 vdev_offlined(uint64_t state, uint64_t aux, uint64_t errs) 104 { 105 return (state == VDEV_STATE_OFFLINE); 106 } 107 108 /* ARGSUSED */ 109 static int 110 vdev_removed(uint64_t state, uint64_t aux, uint64_t errs) 111 { 112 return (state == VDEV_STATE_REMOVED); 113 } 114 115 /* 116 * Detect if any leaf devices that have seen errors or could not be opened. 117 */ 118 static boolean_t 119 find_vdev_problem(nvlist_t *vdev, int (*func)(uint64_t, uint64_t, uint64_t)) 120 { 121 nvlist_t **child; 122 vdev_stat_t *vs; 123 uint_t c, children; 124 char *type; 125 126 /* 127 * Ignore problems within a 'replacing' vdev, since we're presumably in 128 * the process of repairing any such errors, and don't want to call them 129 * out again. We'll pick up the fact that a resilver is happening 130 * later. 131 */ 132 verify(nvlist_lookup_string(vdev, ZPOOL_CONFIG_TYPE, &type) == 0); 133 if (strcmp(type, VDEV_TYPE_REPLACING) == 0) 134 return (B_FALSE); 135 136 if (nvlist_lookup_nvlist_array(vdev, ZPOOL_CONFIG_CHILDREN, &child, 137 &children) == 0) { 138 for (c = 0; c < children; c++) 139 if (find_vdev_problem(child[c], func)) 140 return (B_TRUE); 141 } else { 142 verify(nvlist_lookup_uint64_array(vdev, ZPOOL_CONFIG_VDEV_STATS, 143 (uint64_t **)&vs, &c) == 0); 144 145 if (func(vs->vs_state, vs->vs_aux, 146 vs->vs_read_errors + 147 vs->vs_write_errors + 148 vs->vs_checksum_errors)) 149 return (B_TRUE); 150 } 151 152 return (B_FALSE); 153 } 154 155 /* 156 * Active pool health status. 157 * 158 * To determine the status for a pool, we make several passes over the config, 159 * picking the most egregious error we find. In order of importance, we do the 160 * following: 161 * 162 * - Check for a complete and valid configuration 163 * - Look for any faulted or missing devices in a non-replicated config 164 * - Check for any data errors 165 * - Check for any faulted or missing devices in a replicated config 166 * - Look for any devices showing errors 167 * - Check for any resilvering devices 168 * 169 * There can obviously be multiple errors within a single pool, so this routine 170 * only picks the most damaging of all the current errors to report. 171 */ 172 static zpool_status_t 173 check_status(nvlist_t *config, boolean_t isimport) 174 { 175 nvlist_t *nvroot; 176 vdev_stat_t *vs; 177 pool_scan_stat_t *ps = NULL; 178 uint_t vsc, psc; 179 uint64_t nerr; 180 uint64_t version; 181 uint64_t stateval; 182 uint64_t suspended; 183 uint64_t hostid = 0; 184 185 verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION, 186 &version) == 0); 187 verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, 188 &nvroot) == 0); 189 verify(nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_VDEV_STATS, 190 (uint64_t **)&vs, &vsc) == 0); 191 verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE, 192 &stateval) == 0); 193 194 /* 195 * Currently resilvering a vdev 196 */ 197 (void) nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_SCAN_STATS, 198 (uint64_t **)&ps, &psc); 199 if (ps && ps->pss_func == POOL_SCAN_RESILVER && 200 ps->pss_state == DSS_SCANNING) 201 return (ZPOOL_STATUS_RESILVERING); 202 203 /* 204 * Pool last accessed by another system. 205 */ 206 (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_HOSTID, &hostid); 207 if (hostid != 0 && (unsigned long)hostid != gethostid() && 208 stateval == POOL_STATE_ACTIVE) 209 return (ZPOOL_STATUS_HOSTID_MISMATCH); 210 211 /* 212 * Newer on-disk version. 213 */ 214 if (vs->vs_state == VDEV_STATE_CANT_OPEN && 215 vs->vs_aux == VDEV_AUX_VERSION_NEWER) 216 return (ZPOOL_STATUS_VERSION_NEWER); 217 218 /* 219 * Unsupported feature(s). 220 */ 221 if (vs->vs_state == VDEV_STATE_CANT_OPEN && 222 vs->vs_aux == VDEV_AUX_UNSUP_FEAT) { 223 nvlist_t *nvinfo; 224 225 verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_LOAD_INFO, 226 &nvinfo) == 0); 227 if (nvlist_exists(nvinfo, ZPOOL_CONFIG_CAN_RDONLY)) 228 return (ZPOOL_STATUS_UNSUP_FEAT_WRITE); 229 return (ZPOOL_STATUS_UNSUP_FEAT_READ); 230 } 231 232 /* 233 * Check that the config is complete. 234 */ 235 if (vs->vs_state == VDEV_STATE_CANT_OPEN && 236 vs->vs_aux == VDEV_AUX_BAD_GUID_SUM) 237 return (ZPOOL_STATUS_BAD_GUID_SUM); 238 239 /* 240 * Check whether the pool has suspended due to failed I/O. 241 */ 242 if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_SUSPENDED, 243 &suspended) == 0) { 244 if (suspended == ZIO_FAILURE_MODE_CONTINUE) 245 return (ZPOOL_STATUS_IO_FAILURE_CONTINUE); 246 return (ZPOOL_STATUS_IO_FAILURE_WAIT); 247 } 248 249 /* 250 * Could not read a log. 251 */ 252 if (vs->vs_state == VDEV_STATE_CANT_OPEN && 253 vs->vs_aux == VDEV_AUX_BAD_LOG) { 254 return (ZPOOL_STATUS_BAD_LOG); 255 } 256 257 /* 258 * Bad devices in non-replicated config. 259 */ 260 if (vs->vs_state == VDEV_STATE_CANT_OPEN && 261 find_vdev_problem(nvroot, vdev_faulted)) 262 return (ZPOOL_STATUS_FAULTED_DEV_NR); 263 264 if (vs->vs_state == VDEV_STATE_CANT_OPEN && 265 find_vdev_problem(nvroot, vdev_missing)) 266 return (ZPOOL_STATUS_MISSING_DEV_NR); 267 268 if (vs->vs_state == VDEV_STATE_CANT_OPEN && 269 find_vdev_problem(nvroot, vdev_broken)) 270 return (ZPOOL_STATUS_CORRUPT_LABEL_NR); 271 272 /* 273 * Corrupted pool metadata 274 */ 275 if (vs->vs_state == VDEV_STATE_CANT_OPEN && 276 vs->vs_aux == VDEV_AUX_CORRUPT_DATA) 277 return (ZPOOL_STATUS_CORRUPT_POOL); 278 279 /* 280 * Persistent data errors. 281 */ 282 if (!isimport) { 283 if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_ERRCOUNT, 284 &nerr) == 0 && nerr != 0) 285 return (ZPOOL_STATUS_CORRUPT_DATA); 286 } 287 288 /* 289 * Missing devices in a replicated config. 290 */ 291 if (find_vdev_problem(nvroot, vdev_faulted)) 292 return (ZPOOL_STATUS_FAULTED_DEV_R); 293 if (find_vdev_problem(nvroot, vdev_missing)) 294 return (ZPOOL_STATUS_MISSING_DEV_R); 295 if (find_vdev_problem(nvroot, vdev_broken)) 296 return (ZPOOL_STATUS_CORRUPT_LABEL_R); 297 298 /* 299 * Devices with errors 300 */ 301 if (!isimport && find_vdev_problem(nvroot, vdev_errors)) 302 return (ZPOOL_STATUS_FAILING_DEV); 303 304 /* 305 * Offlined devices 306 */ 307 if (find_vdev_problem(nvroot, vdev_offlined)) 308 return (ZPOOL_STATUS_OFFLINE_DEV); 309 310 /* 311 * Removed device 312 */ 313 if (find_vdev_problem(nvroot, vdev_removed)) 314 return (ZPOOL_STATUS_REMOVED_DEV); 315 316 /* 317 * Outdated, but usable, version 318 */ 319 if (SPA_VERSION_IS_SUPPORTED(version) && version != SPA_VERSION) 320 return (ZPOOL_STATUS_VERSION_OLDER); 321 322 return (ZPOOL_STATUS_OK); 323 } 324 325 zpool_status_t 326 zpool_get_status(zpool_handle_t *zhp, char **msgid) 327 { 328 zpool_status_t ret = check_status(zhp->zpool_config, B_FALSE); 329 330 if (ret >= NMSGID) 331 *msgid = NULL; 332 else 333 *msgid = zfs_msgid_table[ret]; 334 335 return (ret); 336 } 337 338 zpool_status_t 339 zpool_import_status(nvlist_t *config, char **msgid) 340 { 341 zpool_status_t ret = check_status(config, B_TRUE); 342 343 if (ret >= NMSGID) 344 *msgid = NULL; 345 else 346 *msgid = zfs_msgid_table[ret]; 347 348 return (ret); 349 } 350 351 static void 352 dump_ddt_stat(const ddt_stat_t *dds, int h) 353 { 354 char refcnt[6]; 355 char blocks[6], lsize[6], psize[6], dsize[6]; 356 char ref_blocks[6], ref_lsize[6], ref_psize[6], ref_dsize[6]; 357 358 if (dds == NULL || dds->dds_blocks == 0) 359 return; 360 361 if (h == -1) 362 (void) strcpy(refcnt, "Total"); 363 else 364 zfs_nicenum(1ULL << h, refcnt, sizeof (refcnt)); 365 366 zfs_nicenum(dds->dds_blocks, blocks, sizeof (blocks)); 367 zfs_nicenum(dds->dds_lsize, lsize, sizeof (lsize)); 368 zfs_nicenum(dds->dds_psize, psize, sizeof (psize)); 369 zfs_nicenum(dds->dds_dsize, dsize, sizeof (dsize)); 370 zfs_nicenum(dds->dds_ref_blocks, ref_blocks, sizeof (ref_blocks)); 371 zfs_nicenum(dds->dds_ref_lsize, ref_lsize, sizeof (ref_lsize)); 372 zfs_nicenum(dds->dds_ref_psize, ref_psize, sizeof (ref_psize)); 373 zfs_nicenum(dds->dds_ref_dsize, ref_dsize, sizeof (ref_dsize)); 374 375 (void) printf("%6s %6s %5s %5s %5s %6s %5s %5s %5s\n", 376 refcnt, 377 blocks, lsize, psize, dsize, 378 ref_blocks, ref_lsize, ref_psize, ref_dsize); 379 } 380 381 /* 382 * Print the DDT histogram and the column totals. 383 */ 384 void 385 zpool_dump_ddt(const ddt_stat_t *dds_total, const ddt_histogram_t *ddh) 386 { 387 int h; 388 389 (void) printf("\n"); 390 391 (void) printf("bucket " 392 " allocated " 393 " referenced \n"); 394 (void) printf("______ " 395 "______________________________ " 396 "______________________________\n"); 397 398 (void) printf("%6s %6s %5s %5s %5s %6s %5s %5s %5s\n", 399 "refcnt", 400 "blocks", "LSIZE", "PSIZE", "DSIZE", 401 "blocks", "LSIZE", "PSIZE", "DSIZE"); 402 403 (void) printf("%6s %6s %5s %5s %5s %6s %5s %5s %5s\n", 404 "------", 405 "------", "-----", "-----", "-----", 406 "------", "-----", "-----", "-----"); 407 408 for (h = 0; h < 64; h++) 409 dump_ddt_stat(&ddh->ddh_stat[h], h); 410 411 dump_ddt_stat(dds_total, -1); 412 413 (void) printf("\n"); 414 } 415