1 /*- 2 * Copyright (c) 2011, 2012, 2013, 2014, 2016 Spectra Logic Corporation 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions, and the following disclaimer, 10 * without modification. 11 * 2. Redistributions in binary form must reproduce at minimum a disclaimer 12 * substantially similar to the "NO WARRANTY" disclaimer below 13 * ("Disclaimer") and any redistribution must be conditioned upon 14 * including a substantially similar Disclaimer requirement for further 15 * binary redistribution. 16 * 17 * NO WARRANTY 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR 21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 26 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 27 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 * POSSIBILITY OF SUCH DAMAGES. 29 * 30 * Authors: Justin T. Gibbs (Spectra Logic Corporation) 31 */ 32 33 /** 34 * \file case_file.cc 35 * 36 * We keep case files for any leaf vdev that is not in the optimal state. 37 * However, we only serialize to disk those events that need to be preserved 38 * across reboots. For now, this is just a log of soft errors which we 39 * accumulate in order to mark a device as degraded. 40 */ 41 #include <sys/cdefs.h> 42 #include <sys/byteorder.h> 43 #include <sys/time.h> 44 45 #include <sys/fs/zfs.h> 46 47 #include <dirent.h> 48 #include <fcntl.h> 49 #include <iomanip> 50 #include <fstream> 51 #include <functional> 52 #include <sstream> 53 #include <syslog.h> 54 #include <unistd.h> 55 56 #include <libzutil.h> 57 #include <libzfs.h> 58 59 #include <list> 60 #include <map> 61 #include <string> 62 #include <vector> 63 64 #include <devdctl/guid.h> 65 #include <devdctl/event.h> 66 #include <devdctl/event_factory.h> 67 #include <devdctl/exception.h> 68 #include <devdctl/consumer.h> 69 70 #include "callout.h" 71 #include "vdev_iterator.h" 72 #include "zfsd_event.h" 73 #include "case_file.h" 74 #include "vdev.h" 75 #include "zfsd.h" 76 #include "zfsd_exception.h" 77 #include "zpool_list.h" 78 /*============================ Namespace Control =============================*/ 79 using std::hex; 80 using std::ifstream; 81 using std::stringstream; 82 using std::setfill; 83 using std::setw; 84 85 using DevdCtl::Event; 86 using DevdCtl::EventFactory; 87 using DevdCtl::EventList; 88 using DevdCtl::Guid; 89 using DevdCtl::ParseException; 90 91 /*--------------------------------- CaseFile ---------------------------------*/ 92 //- CaseFile Static Data ------------------------------------------------------- 93 94 CaseFileList CaseFile::s_activeCases; 95 const string CaseFile::s_caseFilePath = "/var/db/zfsd/cases"; 96 97 //- CaseFile Static Public Methods --------------------------------------------- 98 CaseFile * 99 CaseFile::Find(Guid poolGUID, Guid vdevGUID) 100 { 101 for (CaseFileList::iterator curCase = s_activeCases.begin(); 102 curCase != s_activeCases.end(); curCase++) { 103 104 if (((*curCase)->PoolGUID() != poolGUID 105 && Guid::InvalidGuid() != poolGUID) 106 || (*curCase)->VdevGUID() != vdevGUID) 107 continue; 108 109 /* 110 * We only carry one active case per-vdev. 111 */ 112 return (*curCase); 113 } 114 return (NULL); 115 } 116 117 void 118 CaseFile::Find(Guid poolGUID, Guid vdevGUID, CaseFileList &cases) 119 { 120 for (CaseFileList::iterator curCase = s_activeCases.begin(); 121 curCase != s_activeCases.end(); curCase++) { 122 if (((*curCase)->PoolGUID() != poolGUID && 123 Guid::InvalidGuid() != poolGUID) || 124 (*curCase)->VdevGUID() != vdevGUID) 125 continue; 126 127 /* 128 * We can have multiple cases for spare vdevs 129 */ 130 cases.push_back(*curCase); 131 if (!(*curCase)->IsSpare()) { 132 return; 133 } 134 } 135 } 136 137 CaseFile * 138 CaseFile::Find(const string &physPath) 139 { 140 CaseFile *result = NULL; 141 142 for (CaseFileList::iterator curCase = s_activeCases.begin(); 143 curCase != s_activeCases.end(); curCase++) { 144 145 if ((*curCase)->PhysicalPath() != physPath) 146 continue; 147 148 if (result != NULL) { 149 syslog(LOG_WARNING, "Multiple casefiles found for " 150 "physical path %s. " 151 "This is most likely a bug in zfsd", 152 physPath.c_str()); 153 } 154 result = *curCase; 155 } 156 return (result); 157 } 158 159 160 void 161 CaseFile::ReEvaluateByGuid(Guid poolGUID, const ZfsEvent &event) 162 { 163 CaseFileList::iterator casefile; 164 for (casefile = s_activeCases.begin(); casefile != s_activeCases.end();){ 165 CaseFileList::iterator next = casefile; 166 next++; 167 if (poolGUID == (*casefile)->PoolGUID()) 168 (*casefile)->ReEvaluate(event); 169 casefile = next; 170 } 171 } 172 173 CaseFile & 174 CaseFile::Create(Vdev &vdev) 175 { 176 CaseFile *activeCase; 177 178 activeCase = Find(vdev.PoolGUID(), vdev.GUID()); 179 if (activeCase == NULL) 180 activeCase = new CaseFile(vdev); 181 182 return (*activeCase); 183 } 184 185 void 186 CaseFile::DeSerialize() 187 { 188 struct dirent **caseFiles; 189 190 int numCaseFiles(scandir(s_caseFilePath.c_str(), &caseFiles, 191 DeSerializeSelector, /*compar*/NULL)); 192 193 if (numCaseFiles == -1) 194 return; 195 if (numCaseFiles == 0) { 196 free(caseFiles); 197 return; 198 } 199 200 for (int i = 0; i < numCaseFiles; i++) { 201 202 DeSerializeFile(caseFiles[i]->d_name); 203 free(caseFiles[i]); 204 } 205 free(caseFiles); 206 } 207 208 bool 209 CaseFile::Empty() 210 { 211 return (s_activeCases.empty()); 212 } 213 214 void 215 CaseFile::LogAll() 216 { 217 for (CaseFileList::iterator curCase = s_activeCases.begin(); 218 curCase != s_activeCases.end(); curCase++) 219 (*curCase)->Log(); 220 } 221 222 void 223 CaseFile::PurgeAll() 224 { 225 /* 226 * Serialize casefiles before deleting them so that they can be reread 227 * and revalidated during BuildCaseFiles. 228 * CaseFiles remove themselves from this list on destruction. 229 */ 230 while (s_activeCases.size() != 0) { 231 CaseFile *casefile = s_activeCases.front(); 232 casefile->Serialize(); 233 delete casefile; 234 } 235 236 } 237 238 int 239 CaseFile::IsSpare() 240 { 241 return (m_is_spare); 242 } 243 244 //- CaseFile Public Methods ---------------------------------------------------- 245 bool 246 CaseFile::RefreshVdevState() 247 { 248 ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID); 249 zpool_handle_t *casePool(zpl.empty() ? NULL : zpl.front()); 250 if (casePool == NULL) 251 return (false); 252 253 Vdev vd(casePool, CaseVdev(casePool)); 254 if (vd.DoesNotExist()) 255 return (false); 256 257 m_vdevState = vd.State(); 258 m_vdevPhysPath = vd.PhysicalPath(); 259 m_vdevName = vd.Name(casePool, false); 260 return (true); 261 } 262 263 bool 264 CaseFile::ReEvaluate(const string &devPath, const string &physPath, Vdev *vdev) 265 { 266 ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID); 267 zpool_handle_t *pool(zpl.empty() ? NULL : zpl.front()); 268 int flags = ZFS_ONLINE_CHECKREMOVE | ZFS_ONLINE_UNSPARE; 269 270 if (pool == NULL || !RefreshVdevState()) { 271 /* 272 * The pool or vdev for this case file is no longer 273 * part of the configuration. This can happen 274 * if we process a device arrival notification 275 * before seeing the ZFS configuration change 276 * event. 277 */ 278 syslog(LOG_INFO, 279 "CaseFile::ReEvaluate(%s,%s) Pool/Vdev unconfigured. " 280 "Closing\n", 281 PoolGUIDString().c_str(), 282 VdevGUIDString().c_str()); 283 Close(); 284 285 /* 286 * Since this event was not used to close this 287 * case, do not report it as consumed. 288 */ 289 return (/*consumed*/false); 290 } 291 292 if (VdevState() > VDEV_STATE_FAULTED) { 293 /* 294 * For now, newly discovered devices only help for 295 * devices that are missing. In the future, we might 296 * use a newly inserted spare to replace a degraded 297 * or faulted device. 298 */ 299 syslog(LOG_INFO, "CaseFile::ReEvaluate(%s,%s): Pool/Vdev ignored", 300 PoolGUIDString().c_str(), VdevGUIDString().c_str()); 301 return (/*consumed*/false); 302 } 303 if (VdevState() == VDEV_STATE_OFFLINE) { 304 /* 305 * OFFLINE is an administrative decision. No need for zfsd to 306 * do anything. 307 */ 308 syslog(LOG_INFO, "CaseFile::ReEvaluate(%s,%s): Pool/Vdev ignored", 309 PoolGUIDString().c_str(), VdevGUIDString().c_str()); 310 return (/*consumed*/false); 311 } 312 313 if (vdev != NULL 314 && ( vdev->PoolGUID() == m_poolGUID 315 || vdev->PoolGUID() == Guid::InvalidGuid()) 316 && vdev->GUID() == m_vdevGUID) { 317 318 if (IsSpare()) 319 flags |= ZFS_ONLINE_SPARE; 320 if (zpool_vdev_online(pool, vdev->GUIDString().c_str(), 321 flags, &m_vdevState) != 0) { 322 syslog(LOG_ERR, 323 "Failed to online vdev(%s/%s:%s): %s: %s\n", 324 zpool_get_name(pool), vdev->GUIDString().c_str(), 325 devPath.c_str(), libzfs_error_action(g_zfsHandle), 326 libzfs_error_description(g_zfsHandle)); 327 return (/*consumed*/false); 328 } 329 330 syslog(LOG_INFO, "Onlined vdev(%s/%s:%s). State now %s.\n", 331 zpool_get_name(pool), vdev->GUIDString().c_str(), 332 devPath.c_str(), 333 zpool_state_to_name(VdevState(), VDEV_AUX_NONE)); 334 335 /* 336 * Check the vdev state post the online action to see 337 * if we can retire this case. 338 */ 339 CloseIfSolved(); 340 341 return (/*consumed*/true); 342 } 343 344 /* 345 * If the auto-replace policy is enabled, and we have physical 346 * path information, try a physical path replacement. 347 */ 348 if (zpool_get_prop_int(pool, ZPOOL_PROP_AUTOREPLACE, NULL) == 0) { 349 syslog(LOG_INFO, 350 "CaseFile(%s:%s:%s): AutoReplace not set. " 351 "Ignoring device insertion.\n", 352 PoolGUIDString().c_str(), 353 VdevGUIDString().c_str(), 354 zpool_state_to_name(VdevState(), VDEV_AUX_NONE)); 355 return (/*consumed*/false); 356 } 357 358 if (PhysicalPath().empty()) { 359 syslog(LOG_INFO, 360 "CaseFile(%s:%s:%s): No physical path information. " 361 "Ignoring device insertion.\n", 362 PoolGUIDString().c_str(), 363 VdevGUIDString().c_str(), 364 zpool_state_to_name(VdevState(), VDEV_AUX_NONE)); 365 return (/*consumed*/false); 366 } 367 368 if (physPath != PhysicalPath()) { 369 syslog(LOG_INFO, 370 "CaseFile(%s:%s:%s): Physical path mismatch. " 371 "Ignoring device insertion.\n", 372 PoolGUIDString().c_str(), 373 VdevGUIDString().c_str(), 374 zpool_state_to_name(VdevState(), VDEV_AUX_NONE)); 375 return (/*consumed*/false); 376 } 377 378 /* Write a label on the newly inserted disk. */ 379 if (zpool_label_disk(g_zfsHandle, pool, devPath.c_str()) != 0) { 380 syslog(LOG_ERR, 381 "Replace vdev(%s/%s) by physical path (label): %s: %s\n", 382 zpool_get_name(pool), VdevGUIDString().c_str(), 383 libzfs_error_action(g_zfsHandle), 384 libzfs_error_description(g_zfsHandle)); 385 return (/*consumed*/false); 386 } 387 388 syslog(LOG_INFO, "CaseFile::ReEvaluate(%s/%s): Replacing with %s", 389 PoolGUIDString().c_str(), VdevGUIDString().c_str(), 390 devPath.c_str()); 391 return (Replace(VDEV_TYPE_DISK, devPath.c_str(), /*isspare*/false)); 392 } 393 394 bool 395 CaseFile::ReEvaluate(const ZfsEvent &event) 396 { 397 bool consumed(false); 398 399 if (event.Value("type") == "sysevent.fs.zfs.vdev_remove") { 400 /* 401 * The Vdev we represent has been removed from the 402 * configuration. This case is no longer of value. 403 */ 404 Close(); 405 406 return (/*consumed*/true); 407 } else if (event.Value("type") == "sysevent.fs.zfs.pool_destroy") { 408 /* This Pool has been destroyed. Discard the case */ 409 Close(); 410 411 return (/*consumed*/true); 412 } else if (event.Value("type") == "sysevent.fs.zfs.config_sync") { 413 RefreshVdevState(); 414 if (VdevState() < VDEV_STATE_HEALTHY && 415 VdevState() != VDEV_STATE_OFFLINE) 416 consumed = ActivateSpare(); 417 } 418 419 420 if (event.Value("class") == "resource.fs.zfs.removed") { 421 bool spare_activated; 422 423 if (!RefreshVdevState()) { 424 /* 425 * The pool or vdev for this case file is no longer 426 * part of the configuration. This can happen 427 * if we process a device arrival notification 428 * before seeing the ZFS configuration change 429 * event. 430 */ 431 syslog(LOG_INFO, 432 "CaseFile::ReEvaluate(%s,%s) Pool/Vdev " 433 "unconfigured. Closing\n", 434 PoolGUIDString().c_str(), 435 VdevGUIDString().c_str()); 436 /* 437 * Close the case now so we won't waste cycles in the 438 * system rescan 439 */ 440 Close(); 441 442 /* 443 * Since this event was not used to close this 444 * case, do not report it as consumed. 445 */ 446 return (/*consumed*/false); 447 } 448 449 /* 450 * Discard any tentative I/O error events for 451 * this case. They were most likely caused by the 452 * hot-unplug of this device. 453 */ 454 PurgeTentativeEvents(); 455 456 /* Try to activate spares if they are available */ 457 spare_activated = ActivateSpare(); 458 459 /* 460 * Rescan the drives in the system to see if a recent 461 * drive arrival can be used to solve this case. 462 */ 463 ZfsDaemon::RequestSystemRescan(); 464 465 /* 466 * Consume the event if we successfully activated a spare. 467 * Otherwise, leave it in the unconsumed events list so that the 468 * future addition of a spare to this pool might be able to 469 * close the case 470 */ 471 consumed = spare_activated; 472 } else if (event.Value("class") == "resource.fs.zfs.statechange") { 473 RefreshVdevState(); 474 /* 475 * If this vdev is DEGRADED, FAULTED, or UNAVAIL, try to 476 * activate a hotspare. Otherwise, ignore the event 477 */ 478 if (VdevState() == VDEV_STATE_FAULTED || 479 VdevState() == VDEV_STATE_DEGRADED || 480 VdevState() == VDEV_STATE_CANT_OPEN) 481 (void) ActivateSpare(); 482 consumed = true; 483 } 484 else if (event.Value("class") == "ereport.fs.zfs.io" || 485 event.Value("class") == "ereport.fs.zfs.checksum" || 486 event.Value("class") == "ereport.fs.zfs.delay") { 487 488 m_tentativeEvents.push_front(event.DeepCopy()); 489 RegisterCallout(event); 490 consumed = true; 491 } 492 493 bool closed(CloseIfSolved()); 494 495 return (consumed || closed); 496 } 497 498 /* Find a Vdev containing the vdev with the given GUID */ 499 static nvlist_t* 500 find_parent(nvlist_t *pool_config, nvlist_t *config, DevdCtl::Guid child_guid) 501 { 502 nvlist_t **vdevChildren; 503 int error; 504 unsigned ch, numChildren; 505 506 error = nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_CHILDREN, 507 &vdevChildren, &numChildren); 508 509 if (error != 0 || numChildren == 0) 510 return (NULL); 511 512 for (ch = 0; ch < numChildren; ch++) { 513 nvlist *result; 514 Vdev vdev(pool_config, vdevChildren[ch]); 515 516 if (vdev.GUID() == child_guid) 517 return (config); 518 519 result = find_parent(pool_config, vdevChildren[ch], child_guid); 520 if (result != NULL) 521 return (result); 522 } 523 524 return (NULL); 525 } 526 527 /* 528 * Returns true if spare 'a' should be tried before spare 'b' when 529 * replacing a failed vdev with the given characteristics. 530 * 531 * Ordering criteria (most to least significant): 532 * 1. Distributed spare matching the failed vdev's dRAID is preferred 533 * most (distributed spares rebuild faster than traditional spares). 534 * Regular spares (no TOP_GUID) come next. Non-matching distributed 535 * spares are tried last, as the kernel will reject them anyway. 536 * 2. Matching rotational is preferred over mismatching. 537 * 3. Large enough is preferred over too small. 538 * 4. Smaller size is preferred over bigger (best fit). 539 */ 540 static bool 541 spare_is_preferred(nvlist_t *a, nvlist_t *b, bool have_rotational, 542 uint64_t vdev_rotational, uint64_t vdev_size, uint64_t top_guid) 543 { 544 uint64_t a_top, b_top, a_rotational, b_rotational; 545 uint64_t a_size, b_size; 546 uint64_t *nvlist_array; 547 int a_pri, b_pri; 548 vdev_stat_t *vs; 549 uint_t c; 550 bool a_ok, b_ok; 551 552 a_top = b_top = 0; 553 (void) nvlist_lookup_uint64(a, ZPOOL_CONFIG_TOP_GUID, &a_top); 554 (void) nvlist_lookup_uint64(b, ZPOOL_CONFIG_TOP_GUID, &b_top); 555 a_pri = (a_top == 0) ? 1 : 556 (a_top == top_guid || top_guid == 0) ? 2 : 0; 557 b_pri = (b_top == 0) ? 1 : 558 (b_top == top_guid || top_guid == 0) ? 2 : 0; 559 if (a_pri != b_pri) 560 return (a_pri > b_pri); 561 562 if (have_rotational) { 563 a_rotational = b_rotational = 0; 564 (void) nvlist_lookup_uint64(a, 565 ZPOOL_CONFIG_VDEV_ROTATIONAL, &a_rotational); 566 (void) nvlist_lookup_uint64(b, 567 ZPOOL_CONFIG_VDEV_ROTATIONAL, &b_rotational); 568 if ((a_rotational == vdev_rotational) != 569 (b_rotational == vdev_rotational)) 570 return (a_rotational == vdev_rotational); 571 } 572 573 a_size = b_size = 0; 574 if (nvlist_lookup_uint64_array(a, ZPOOL_CONFIG_VDEV_STATS, 575 &nvlist_array, &c) == 0) { 576 vs = reinterpret_cast<vdev_stat_t *>(nvlist_array); 577 a_size = vs->vs_rsize; 578 } 579 if (nvlist_lookup_uint64_array(b, ZPOOL_CONFIG_VDEV_STATS, 580 &nvlist_array, &c) == 0) { 581 vs = reinterpret_cast<vdev_stat_t *>(nvlist_array); 582 b_size = vs->vs_rsize; 583 } 584 a_ok = (a_size >= vdev_size); 585 b_ok = (b_size >= vdev_size); 586 if (a_ok != b_ok) 587 return (a_ok); 588 return (a_size < b_size); 589 } 590 591 bool 592 CaseFile::ActivateSpare() { 593 nvlist_t *config, *nvroot, *parent_config; 594 nvlist_t *vdev_config, **spares, *spare; 595 uint64_t *nvlist_array; 596 const char *devPath, *poolname, *vdev_type; 597 uint64_t vdev_rotational, vdev_size, top_guid; 598 vdev_stat_t *vs; 599 u_int nspares, i, key; 600 uint_t nstats; 601 int error, j; 602 bool have_vdev_rotational; 603 604 ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID); 605 zpool_handle_t *zhp(zpl.empty() ? NULL : zpl.front()); 606 if (zhp == NULL) { 607 syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not find pool " 608 "for pool_guid %" PRIu64".", (uint64_t)m_poolGUID); 609 return (false); 610 } 611 poolname = zpool_get_name(zhp); 612 config = zpool_get_config(zhp, NULL); 613 if (config == NULL) { 614 syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not find pool " 615 "config for pool %s", poolname); 616 return (false); 617 } 618 error = nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvroot); 619 if (error != 0){ 620 syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not find vdev " 621 "tree for pool %s", poolname); 622 return (false); 623 } 624 625 parent_config = find_parent(config, nvroot, m_vdevGUID); 626 if (parent_config != NULL) { 627 const char *parent_type; 628 629 /* 630 * Don't activate spares for members of a "replacing" vdev. 631 * They're already dealt with. Sparing them will just drag out 632 * the resilver process. 633 */ 634 error = nvlist_lookup_string(parent_config, 635 ZPOOL_CONFIG_TYPE, &parent_type); 636 if (error == 0 && strcmp(parent_type, VDEV_TYPE_REPLACING) == 0) 637 return (false); 638 } 639 640 /* 641 * Don't activate a spare if one is already working on this vdev. 642 */ 643 { 644 Vdev replaced(BeingReplacedBy(zhp)); 645 if (!replaced.DoesNotExist() && (replaced.IsResilvering() || 646 replaced.State() == VDEV_STATE_HEALTHY)) 647 return (false); 648 } 649 650 nspares = 0; 651 nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, &spares, 652 &nspares); 653 if (nspares == 0) { 654 /* The pool has no spares configured */ 655 syslog(LOG_INFO, "CaseFile::ActivateSpare: " 656 "No spares available for pool %s", poolname); 657 return (false); 658 } 659 660 /* 661 * Collect the failed vdev's parameters for optimal spare selection. 662 */ 663 vdev_rotational = vdev_size = top_guid = 0; 664 have_vdev_rotational = false; 665 vdev_config = VdevIterator(zhp).Find(m_vdevGUID); 666 if (vdev_config != NULL) { 667 have_vdev_rotational = (nvlist_lookup_uint64(vdev_config, 668 ZPOOL_CONFIG_VDEV_ROTATIONAL, &vdev_rotational) == 0); 669 if (nvlist_lookup_uint64_array(vdev_config, 670 ZPOOL_CONFIG_VDEV_STATS, &nvlist_array, &nstats) == 0) { 671 vs = reinterpret_cast<vdev_stat_t *>(nvlist_array); 672 vdev_size = vs->vs_rsize; 673 } 674 (void) nvlist_lookup_uint64(vdev_config, 675 ZPOOL_CONFIG_TOP_GUID, &top_guid); 676 } 677 678 /* 679 * Build a sorted index array over the spares, so that better 680 * candidates are tried first. 681 */ 682 std::vector<u_int> order(nspares); 683 for (i = 0; i < nspares; i++) 684 order[i] = i; 685 for (i = 1; i < nspares; i++) { 686 key = order[i]; 687 j = (int)i - 1; 688 while (j >= 0 && spare_is_preferred(spares[key], 689 spares[order[j]], have_vdev_rotational, vdev_rotational, 690 vdev_size, top_guid)) { 691 order[j + 1] = order[j]; 692 j--; 693 } 694 order[j + 1] = key; 695 } 696 697 /* 698 * Try each spare in sorted order until one succeeds. 699 */ 700 for (i = 0; i < nspares; i++) { 701 spare = spares[order[i]]; 702 703 if (nvlist_lookup_uint64_array(spare, 704 ZPOOL_CONFIG_VDEV_STATS, &nvlist_array, &nstats) != 0) { 705 syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not " 706 "find vdev stats for pool %s, spare %d", 707 poolname, order[i]); 708 continue; 709 } 710 vs = reinterpret_cast<vdev_stat_t *>(nvlist_array); 711 712 if ((vs->vs_aux == VDEV_AUX_SPARED) 713 || (vs->vs_state != VDEV_STATE_HEALTHY)) 714 continue; 715 716 error = nvlist_lookup_string(spare, ZPOOL_CONFIG_PATH, 717 &devPath); 718 if (error != 0) { 719 syslog(LOG_ERR, "CaseFile::ActivateSpare: Cannot " 720 "determine the path of pool %s, spare %d. " 721 "Error %d", poolname, order[i], error); 722 continue; 723 } 724 725 error = nvlist_lookup_string(spare, ZPOOL_CONFIG_TYPE, 726 &vdev_type); 727 if (error != 0) { 728 syslog(LOG_ERR, "CaseFile::ActivateSpare: Cannot " 729 "determine the vdev type of pool %s, " 730 "spare %d. Error %d", 731 poolname, order[i], error); 732 continue; 733 } 734 735 if (Replace(vdev_type, devPath, /*isspare*/true)) 736 return (true); 737 } 738 739 return (false); 740 } 741 742 /* Does the argument event refer to a checksum error? */ 743 static bool 744 IsChecksumEvent(const Event* const event) 745 { 746 return ("ereport.fs.zfs.checksum" == event->Value("type")); 747 } 748 749 /* Does the argument event refer to an IO error? */ 750 static bool 751 IsIOEvent(const Event* const event) 752 { 753 return ("ereport.fs.zfs.io" == event->Value("type")); 754 } 755 756 /* Does the argument event refer to an IO delay? */ 757 static bool 758 IsDelayEvent(const Event* const event) 759 { 760 return ("ereport.fs.zfs.delay" == event->Value("type")); 761 } 762 763 void 764 CaseFile::RegisterCallout(const Event &event) 765 { 766 timeval now, countdown, elapsed, timestamp, zero, remaining; 767 /** 768 * The time ZFSD waits before promoting a tentative event 769 * into a permanent event. 770 */ 771 int sec = -1; 772 if (IsChecksumEvent(&event)) 773 sec = CaseFile::GetVdevProp(VDEV_PROP_CHECKSUM_T); 774 else if (IsIOEvent(&event)) 775 sec = CaseFile::GetVdevProp(VDEV_PROP_IO_T); 776 else if (IsDelayEvent(&event)) 777 sec = CaseFile::GetVdevProp(VDEV_PROP_SLOW_IO_T); 778 779 if (sec == -1) 780 sec = 60; /* default */ 781 782 timeval removeGracePeriod = { 783 sec, /*sec*/ 784 0 /*usec*/ 785 }; 786 787 gettimeofday(&now, 0); 788 timestamp = event.GetTimestamp(); 789 timersub(&now, ×tamp, &elapsed); 790 timersub(&removeGracePeriod, &elapsed, &countdown); 791 /* 792 * If countdown is <= zero, Reset the timer to the 793 * smallest positive time value instead 794 */ 795 timerclear(&zero); 796 if (timercmp(&countdown, &zero, <=)) { 797 timerclear(&countdown); 798 countdown.tv_usec = 1; 799 } 800 801 remaining = m_tentativeTimer.TimeRemaining(); 802 803 if (!m_tentativeTimer.IsPending() 804 || timercmp(&countdown, &remaining, <)) 805 m_tentativeTimer.Reset(countdown, OnGracePeriodEnded, this); 806 } 807 808 809 bool 810 CaseFile::CloseIfSolved() 811 { 812 if (m_events.empty() 813 && m_tentativeEvents.empty()) { 814 815 /* 816 * We currently do not track or take actions on 817 * devices in the degraded or faulted state. 818 * Once we have support for spare pools, we'll 819 * retain these cases so that any spares added in 820 * the future can be applied to them. 821 */ 822 switch (VdevState()) { 823 case VDEV_STATE_HEALTHY: 824 /* No need to keep cases for healthy vdevs */ 825 case VDEV_STATE_OFFLINE: 826 /* 827 * Offline is a deliberate administrative action. zfsd 828 * doesn't need to do anything for this state. 829 */ 830 Close(); 831 return (true); 832 case VDEV_STATE_REMOVED: 833 case VDEV_STATE_CANT_OPEN: 834 /* 835 * Keep open. We may solve it with a newly inserted 836 * device. 837 */ 838 case VDEV_STATE_FAULTED: 839 case VDEV_STATE_DEGRADED: 840 /* 841 * Keep open. We may solve it with the future 842 * addition of a spare to the pool 843 */ 844 case VDEV_STATE_UNKNOWN: 845 case VDEV_STATE_CLOSED: 846 /* 847 * Keep open? This may not be the correct behavior, 848 * but it's what we've always done 849 */ 850 ; 851 } 852 853 /* 854 * Re-serialize the case in order to remove any 855 * previous event data. 856 */ 857 Serialize(); 858 } 859 860 return (false); 861 } 862 863 void 864 CaseFile::Log() 865 { 866 syslog(LOG_INFO, "CaseFile(%s,%s,%s)\n", PoolGUIDString().c_str(), 867 VdevGUIDString().c_str(), PhysicalPath().c_str()); 868 syslog(LOG_INFO, "\tVdev State = %s\n", 869 zpool_state_to_name(VdevState(), VDEV_AUX_NONE)); 870 if (m_tentativeEvents.size() != 0) { 871 syslog(LOG_INFO, "\t=== Tentative Events ===\n"); 872 for (EventList::iterator event(m_tentativeEvents.begin()); 873 event != m_tentativeEvents.end(); event++) 874 (*event)->Log(LOG_INFO); 875 } 876 if (m_events.size() != 0) { 877 syslog(LOG_INFO, "\t=== Events ===\n"); 878 for (EventList::iterator event(m_events.begin()); 879 event != m_events.end(); event++) 880 (*event)->Log(LOG_INFO); 881 } 882 } 883 884 //- CaseFile Static Protected Methods ------------------------------------------ 885 void 886 CaseFile::OnGracePeriodEnded(void *arg) 887 { 888 CaseFile &casefile(*static_cast<CaseFile *>(arg)); 889 890 casefile.OnGracePeriodEnded(); 891 } 892 893 int 894 CaseFile::DeSerializeSelector(const struct dirent *dirEntry) 895 { 896 uint64_t poolGUID; 897 uint64_t vdevGUID; 898 899 if (dirEntry->d_type == DT_REG 900 && sscanf(dirEntry->d_name, "pool_%" PRIu64 "_vdev_%" PRIu64 ".case", 901 &poolGUID, &vdevGUID) == 2) 902 return (1); 903 return (0); 904 } 905 906 void 907 CaseFile::DeSerializeFile(const char *fileName) 908 { 909 string fullName(s_caseFilePath + '/' + fileName); 910 CaseFile *existingCaseFile(NULL); 911 CaseFile *caseFile(NULL); 912 913 try { 914 uint64_t poolGUID; 915 uint64_t vdevGUID; 916 nvlist_t *vdevConf; 917 918 if (sscanf(fileName, "pool_%" PRIu64 "_vdev_%" PRIu64 ".case", 919 &poolGUID, &vdevGUID) != 2) { 920 throw ZfsdException("CaseFile::DeSerialize: " 921 "Unintelligible CaseFile filename %s.\n", fileName); 922 } 923 existingCaseFile = Find(Guid(poolGUID), Guid(vdevGUID)); 924 if (existingCaseFile != NULL) { 925 /* 926 * If the vdev is already degraded or faulted, 927 * there's no point in keeping the state around 928 * that we use to put a drive into the degraded 929 * state. However, if the vdev is simply missing, 930 * preserve the case data in the hopes that it will 931 * return. 932 */ 933 caseFile = existingCaseFile; 934 vdev_state curState(caseFile->VdevState()); 935 if (curState > VDEV_STATE_CANT_OPEN 936 && curState < VDEV_STATE_HEALTHY) { 937 unlink(fileName); 938 return; 939 } 940 } else { 941 ZpoolList zpl(ZpoolList::ZpoolByGUID, &poolGUID); 942 if (zpl.empty() 943 || (vdevConf = VdevIterator(zpl.front()) 944 .Find(vdevGUID)) == NULL) { 945 /* 946 * Either the pool no longer exists 947 * or this vdev is no longer a member of 948 * the pool. 949 */ 950 unlink(fullName.c_str()); 951 return; 952 } 953 954 /* 955 * Any vdev we find that does not have a case file 956 * must be in the healthy state and thus worthy of 957 * continued SERD data tracking. 958 */ 959 caseFile = new CaseFile(Vdev(zpl.front(), vdevConf)); 960 } 961 962 ifstream caseStream(fullName.c_str()); 963 if (!caseStream) 964 throw ZfsdException("CaseFile::DeSerialize: Unable to " 965 "read %s.\n", fileName); 966 967 caseFile->DeSerialize(caseStream); 968 } catch (const ParseException &exp) { 969 970 exp.Log(); 971 if (caseFile != existingCaseFile) 972 delete caseFile; 973 974 /* 975 * Since we can't parse the file, unlink it so we don't 976 * trip over it again. 977 */ 978 unlink(fileName); 979 } catch (const ZfsdException &zfsException) { 980 981 zfsException.Log(); 982 if (caseFile != existingCaseFile) 983 delete caseFile; 984 } 985 } 986 987 //- CaseFile Protected Methods ------------------------------------------------- 988 CaseFile::CaseFile(const Vdev &vdev) 989 : m_poolGUID(vdev.PoolGUID()), 990 m_vdevGUID(vdev.GUID()), 991 m_vdevState(vdev.State()), 992 m_vdevPhysPath(vdev.PhysicalPath()), 993 m_is_spare(vdev.IsSpare()) 994 { 995 stringstream guidString; 996 997 guidString << m_vdevGUID; 998 m_vdevGUIDString = guidString.str(); 999 guidString.str(""); 1000 guidString << m_poolGUID; 1001 m_poolGUIDString = guidString.str(); 1002 1003 ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID); 1004 zpool_handle_t *zhp(zpl.empty() ? NULL : zpl.front()); 1005 m_vdevName = vdev.Name(zhp, false); 1006 1007 s_activeCases.push_back(this); 1008 1009 syslog(LOG_INFO, "Creating new CaseFile:\n"); 1010 Log(); 1011 } 1012 1013 CaseFile::~CaseFile() 1014 { 1015 PurgeEvents(); 1016 PurgeTentativeEvents(); 1017 m_tentativeTimer.Stop(); 1018 s_activeCases.remove(this); 1019 } 1020 1021 void 1022 CaseFile::PurgeEvents() 1023 { 1024 for (EventList::iterator event(m_events.begin()); 1025 event != m_events.end(); event++) 1026 delete *event; 1027 1028 m_events.clear(); 1029 } 1030 1031 void 1032 CaseFile::PurgeTentativeEvents() 1033 { 1034 for (EventList::iterator event(m_tentativeEvents.begin()); 1035 event != m_tentativeEvents.end(); event++) 1036 delete *event; 1037 1038 m_tentativeEvents.clear(); 1039 } 1040 1041 void 1042 CaseFile::SerializeEvList(const EventList events, int fd, 1043 const char* prefix) const 1044 { 1045 if (events.empty()) 1046 return; 1047 for (EventList::const_iterator curEvent = events.begin(); 1048 curEvent != events.end(); curEvent++) { 1049 const string &eventString((*curEvent)->GetEventString()); 1050 1051 // TODO: replace many write(2) calls with a single writev(2) 1052 if (prefix) 1053 write(fd, prefix, strlen(prefix)); 1054 write(fd, eventString.c_str(), eventString.length()); 1055 } 1056 } 1057 1058 void 1059 CaseFile::Serialize() 1060 { 1061 stringstream saveFile; 1062 1063 saveFile << setfill('0') 1064 << s_caseFilePath << "/" 1065 << "pool_" << PoolGUIDString() 1066 << "_vdev_" << VdevGUIDString() 1067 << ".case"; 1068 1069 if (m_events.empty() && m_tentativeEvents.empty()) { 1070 unlink(saveFile.str().c_str()); 1071 return; 1072 } 1073 1074 int fd(open(saveFile.str().c_str(), O_CREAT|O_TRUNC|O_WRONLY, 0644)); 1075 if (fd == -1) { 1076 syslog(LOG_ERR, "CaseFile::Serialize: Unable to open %s.\n", 1077 saveFile.str().c_str()); 1078 return; 1079 } 1080 SerializeEvList(m_events, fd); 1081 SerializeEvList(m_tentativeEvents, fd, "tentative "); 1082 close(fd); 1083 } 1084 1085 /* 1086 * XXX: This method assumes that events may not contain embedded newlines. If 1087 * ever events can contain embedded newlines, then CaseFile must switch 1088 * serialization formats 1089 */ 1090 void 1091 CaseFile::DeSerialize(ifstream &caseStream) 1092 { 1093 string evString; 1094 const EventFactory &factory(ZfsDaemon::Get().GetFactory()); 1095 1096 caseStream >> std::noskipws >> std::ws; 1097 while (caseStream.good()) { 1098 /* 1099 * Outline: 1100 * read the beginning of a line and check it for 1101 * "tentative". If found, discard "tentative". 1102 * Create a new event 1103 * continue 1104 */ 1105 EventList* destEvents; 1106 const string tentFlag("tentative "); 1107 string line; 1108 std::stringbuf lineBuf; 1109 1110 caseStream.get(lineBuf); 1111 caseStream.ignore(); /*discard the newline character*/ 1112 line = lineBuf.str(); 1113 if (line.compare(0, tentFlag.size(), tentFlag) == 0) { 1114 /* Discard "tentative" */ 1115 line.erase(0, tentFlag.size()); 1116 destEvents = &m_tentativeEvents; 1117 } else { 1118 destEvents = &m_events; 1119 } 1120 Event *event(Event::CreateEvent(factory, line)); 1121 if (event != NULL) { 1122 destEvents->push_back(event); 1123 RegisterCallout(*event); 1124 } 1125 } 1126 } 1127 1128 void 1129 CaseFile::Close() 1130 { 1131 /* 1132 * This case is no longer relevant. Clean up our 1133 * serialization file, and delete the case. 1134 */ 1135 syslog(LOG_INFO, "CaseFile(%s,%s) closed - State %s\n", 1136 PoolGUIDString().c_str(), VdevGUIDString().c_str(), 1137 zpool_state_to_name(VdevState(), VDEV_AUX_NONE)); 1138 1139 /* 1140 * Serialization of a Case with no event data, clears the 1141 * Serialization data for that event. 1142 */ 1143 PurgeEvents(); 1144 Serialize(); 1145 1146 delete this; 1147 } 1148 1149 void 1150 CaseFile::OnGracePeriodEnded() 1151 { 1152 bool should_fault, should_degrade; 1153 ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID); 1154 zpool_handle_t *zhp(zpl.empty() ? NULL : zpl.front()); 1155 1156 m_events.splice(m_events.begin(), m_tentativeEvents); 1157 should_fault = ShouldFault(); 1158 should_degrade = ShouldDegrade(); 1159 1160 if (should_fault || should_degrade) { 1161 if (zhp == NULL 1162 || (VdevIterator(zhp).Find(m_vdevGUID)) == NULL) { 1163 /* 1164 * Either the pool no longer exists 1165 * or this vdev is no longer a member of 1166 * the pool. 1167 */ 1168 Close(); 1169 return; 1170 } 1171 1172 } 1173 1174 /* A fault condition has priority over a degrade condition */ 1175 if (ShouldFault()) { 1176 /* Fault the vdev and close the case. */ 1177 if (zpool_vdev_fault(zhp, (uint64_t)m_vdevGUID, 1178 VDEV_AUX_ERR_EXCEEDED) == 0) { 1179 syslog(LOG_INFO, "Faulting vdev(%s/%s)", 1180 PoolGUIDString().c_str(), 1181 VdevGUIDString().c_str()); 1182 Close(); 1183 return; 1184 } 1185 else { 1186 syslog(LOG_ERR, "Fault vdev(%s/%s): %s: %s\n", 1187 PoolGUIDString().c_str(), 1188 VdevGUIDString().c_str(), 1189 libzfs_error_action(g_zfsHandle), 1190 libzfs_error_description(g_zfsHandle)); 1191 } 1192 } 1193 else if (ShouldDegrade()) { 1194 /* Degrade the vdev and close the case. */ 1195 if (zpool_vdev_degrade(zhp, (uint64_t)m_vdevGUID, 1196 VDEV_AUX_ERR_EXCEEDED) == 0) { 1197 syslog(LOG_INFO, "Degrading vdev(%s/%s)", 1198 PoolGUIDString().c_str(), 1199 VdevGUIDString().c_str()); 1200 Close(); 1201 return; 1202 } 1203 else { 1204 syslog(LOG_ERR, "Degrade vdev(%s/%s): %s: %s\n", 1205 PoolGUIDString().c_str(), 1206 VdevGUIDString().c_str(), 1207 libzfs_error_action(g_zfsHandle), 1208 libzfs_error_description(g_zfsHandle)); 1209 } 1210 } 1211 Serialize(); 1212 } 1213 1214 Vdev 1215 CaseFile::BeingReplacedBy(zpool_handle_t *zhp) { 1216 Vdev vd(zhp, CaseVdev(zhp)); 1217 std::list<Vdev> children; 1218 std::list<Vdev>::iterator children_it; 1219 1220 Vdev parent(vd.Parent()); 1221 Vdev replacing(NonexistentVdev); 1222 1223 /* 1224 * To determine whether we are being replaced by another spare that 1225 * is still working, then make sure that it is currently spared and 1226 * that the spare is either resilvering or healthy. If any of these 1227 * conditions fail, then we are not being replaced by a spare. 1228 * 1229 * If the spare is healthy, then the case file should be closed very 1230 * soon after this check. 1231 */ 1232 if (parent.DoesNotExist() 1233 || parent.Name(zhp, /*verbose*/false) != "spare") 1234 return (NonexistentVdev); 1235 1236 children = parent.Children(); 1237 children_it = children.begin(); 1238 for (;children_it != children.end(); children_it++) { 1239 Vdev child = *children_it; 1240 1241 /* Skip our vdev. */ 1242 if (child.GUID() == VdevGUID()) 1243 continue; 1244 /* 1245 * Accept the first child that doesn't match our GUID, or 1246 * any resilvering/healthy device if one exists. 1247 */ 1248 if (replacing.DoesNotExist() || child.IsResilvering() 1249 || child.State() == VDEV_STATE_HEALTHY) 1250 replacing = child; 1251 } 1252 1253 return (replacing); 1254 } 1255 1256 bool 1257 CaseFile::Replace(const char* vdev_type, const char* path, bool isspare) { 1258 nvlist_t *nvroot, *newvd; 1259 const char *poolname; 1260 string oldstr(VdevGUIDString()); 1261 bool retval = true; 1262 1263 /* Figure out what pool we're working on */ 1264 ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID); 1265 zpool_handle_t *zhp(zpl.empty() ? NULL : zpl.front()); 1266 if (zhp == NULL) { 1267 syslog(LOG_ERR, "CaseFile::Replace: could not find pool for " 1268 "pool_guid %" PRIu64 ".", (uint64_t)m_poolGUID); 1269 return (false); 1270 } 1271 poolname = zpool_get_name(zhp); 1272 Vdev vd(zhp, CaseVdev(zhp)); 1273 Vdev replaced(BeingReplacedBy(zhp)); 1274 1275 if (isspare && !vd.IsSpare() && !replaced.DoesNotExist()) { 1276 /* If we are already being replaced by a working spare, pass. */ 1277 if (replaced.IsResilvering() 1278 || replaced.State() == VDEV_STATE_HEALTHY) { 1279 syslog(LOG_INFO, "CaseFile::Replace(%s->%s): already " 1280 "replaced", VdevGUIDString().c_str(), path); 1281 return (/*consumed*/false); 1282 } 1283 /* 1284 * If we have already been replaced by a spare, but that spare 1285 * is broken, we must spare the spare, not the original device. 1286 */ 1287 oldstr = replaced.GUIDString(); 1288 syslog(LOG_INFO, "CaseFile::Replace(%s->%s): sparing " 1289 "broken spare %s instead", VdevGUIDString().c_str(), 1290 path, oldstr.c_str()); 1291 } 1292 1293 /* 1294 * Build a root vdev/leaf vdev configuration suitable for 1295 * zpool_vdev_attach. Only enough data for the kernel to find 1296 * the device (i.e. type and disk device node path) are needed. 1297 */ 1298 nvroot = NULL; 1299 newvd = NULL; 1300 1301 if (nvlist_alloc(&nvroot, NV_UNIQUE_NAME, 0) != 0 1302 || nvlist_alloc(&newvd, NV_UNIQUE_NAME, 0) != 0) { 1303 syslog(LOG_ERR, "Replace vdev(%s/%s): Unable to allocate " 1304 "configuration data.", poolname, oldstr.c_str()); 1305 if (nvroot != NULL) 1306 nvlist_free(nvroot); 1307 return (false); 1308 } 1309 if (nvlist_add_string(newvd, ZPOOL_CONFIG_TYPE, vdev_type) != 0 1310 || nvlist_add_string(newvd, ZPOOL_CONFIG_PATH, path) != 0 1311 || nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE, VDEV_TYPE_ROOT) != 0 1312 || nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, 1313 &newvd, 1) != 0) { 1314 syslog(LOG_ERR, "Replace vdev(%s/%s): Unable to initialize " 1315 "configuration data.", poolname, oldstr.c_str()); 1316 nvlist_free(newvd); 1317 nvlist_free(nvroot); 1318 return (true); 1319 } 1320 1321 /* Data was copied when added to the root vdev. */ 1322 nvlist_free(newvd); 1323 1324 /* Prefer sequential resilvering for distributed spares. */ 1325 retval = (zpool_vdev_attach(zhp, oldstr.c_str(), path, nvroot, 1326 /*replace*/B_TRUE, 1327 strcmp(vdev_type, VDEV_TYPE_DRAID_SPARE) == 0 ? 1328 B_TRUE : B_FALSE) == 0); 1329 if (retval) 1330 syslog(LOG_INFO, "Replacing vdev(%s/%s) with %s\n", 1331 poolname, oldstr.c_str(), path); 1332 else 1333 syslog(LOG_ERR, "Replace vdev(%s/%s): %s: %s\n", 1334 poolname, oldstr.c_str(), libzfs_error_action(g_zfsHandle), 1335 libzfs_error_description(g_zfsHandle)); 1336 nvlist_free(nvroot); 1337 1338 return (retval); 1339 } 1340 1341 /* Lookup the vdev prop. Used for checksum, IO, or slow IO props */ 1342 int 1343 CaseFile::GetVdevProp(vdev_prop_t vdev_prop) const 1344 { 1345 char val[ZFS_MAXPROPLEN]; 1346 zprop_source_t srctype; 1347 DevdCtl::Guid poolGUID = PoolGUID(); 1348 ZpoolList zpl(ZpoolList::ZpoolByGUID, &poolGUID); 1349 zpool_handle_t *zhp(zpl.empty() ? NULL : zpl.front()); 1350 1351 char *prop_str = (char *) vdev_prop_to_name(vdev_prop); 1352 if (zhp == NULL || zpool_get_vdev_prop(zhp, m_vdevName.c_str(), 1353 vdev_prop, prop_str, val, sizeof (val), &srctype, B_FALSE) != 0) 1354 return (-1); 1355 1356 /* we'll get "-" from libzfs for a prop that is not set */ 1357 if (zfs_isnumber(val) == B_FALSE) 1358 return (-1); 1359 1360 return (atoi(val)); 1361 } 1362 1363 bool 1364 CaseFile::ShouldDegrade() const 1365 { 1366 int checksum_n = GetVdevProp(VDEV_PROP_CHECKSUM_N); 1367 if (checksum_n == -1) 1368 checksum_n = DEFAULT_ZFS_DEGRADE_IO_COUNT; 1369 return (std::count_if(m_events.begin(), m_events.end(), 1370 IsChecksumEvent) > checksum_n); 1371 } 1372 1373 bool 1374 CaseFile::ShouldFault() const 1375 { 1376 bool should_fault_for_io, should_fault_for_delay; 1377 int io_n = GetVdevProp(VDEV_PROP_IO_N); 1378 int slow_io_n = GetVdevProp(VDEV_PROP_SLOW_IO_N); 1379 1380 if (io_n == -1) 1381 io_n = DEFAULT_ZFS_DEGRADE_IO_COUNT; 1382 if (slow_io_n == -1) 1383 slow_io_n = DEFAULT_ZFS_FAULT_SLOW_IO_COUNT; 1384 1385 should_fault_for_io = std::count_if(m_events.begin(), m_events.end(), 1386 IsIOEvent) > io_n; 1387 should_fault_for_delay = std::count_if(m_events.begin(), m_events.end(), 1388 IsDelayEvent) > slow_io_n; 1389 1390 return (should_fault_for_io || should_fault_for_delay); 1391 } 1392 1393 nvlist_t * 1394 CaseFile::CaseVdev(zpool_handle_t *zhp) const 1395 { 1396 return (VdevIterator(zhp).Find(VdevGUID())); 1397 } 1398