1 /*- 2 * Copyright (c) 2011, 2012, 2013, 2014, 2016 Spectra Logic Corporation 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions, and the following disclaimer, 10 * without modification. 11 * 2. Redistributions in binary form must reproduce at minimum a disclaimer 12 * substantially similar to the "NO WARRANTY" disclaimer below 13 * ("Disclaimer") and any redistribution must be conditioned upon 14 * including a substantially similar Disclaimer requirement for further 15 * binary redistribution. 16 * 17 * NO WARRANTY 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR 21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 26 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 27 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 * POSSIBILITY OF SUCH DAMAGES. 29 * 30 * Authors: Justin T. Gibbs (Spectra Logic Corporation) 31 */ 32 33 /** 34 * \file case_file.cc 35 * 36 * We keep case files for any leaf vdev that is not in the optimal state. 37 * However, we only serialize to disk those events that need to be preserved 38 * across reboots. For now, this is just a log of soft errors which we 39 * accumulate in order to mark a device as degraded. 40 */ 41 #include <sys/cdefs.h> 42 #include <sys/time.h> 43 44 #include <sys/fs/zfs.h> 45 46 #include <dirent.h> 47 #include <iomanip> 48 #include <fstream> 49 #include <functional> 50 #include <sstream> 51 #include <syslog.h> 52 #include <unistd.h> 53 54 #include <libzfs.h> 55 56 #include <list> 57 #include <map> 58 #include <string> 59 60 #include <devdctl/guid.h> 61 #include <devdctl/event.h> 62 #include <devdctl/event_factory.h> 63 #include <devdctl/exception.h> 64 #include <devdctl/consumer.h> 65 66 #include "callout.h" 67 #include "vdev_iterator.h" 68 #include "zfsd_event.h" 69 #include "case_file.h" 70 #include "vdev.h" 71 #include "zfsd.h" 72 #include "zfsd_exception.h" 73 #include "zpool_list.h" 74 75 __FBSDID("$FreeBSD$"); 76 77 /*============================ Namespace Control =============================*/ 78 using std::auto_ptr; 79 using std::hex; 80 using std::ifstream; 81 using std::stringstream; 82 using std::setfill; 83 using std::setw; 84 85 using DevdCtl::Event; 86 using DevdCtl::EventFactory; 87 using DevdCtl::EventList; 88 using DevdCtl::Guid; 89 using DevdCtl::ParseException; 90 91 /*--------------------------------- CaseFile ---------------------------------*/ 92 //- CaseFile Static Data ------------------------------------------------------- 93 94 CaseFileList CaseFile::s_activeCases; 95 const string CaseFile::s_caseFilePath = "/var/db/zfsd/cases"; 96 const timeval CaseFile::s_removeGracePeriod = { 60 /*sec*/, 0 /*usec*/}; 97 98 //- CaseFile Static Public Methods --------------------------------------------- 99 CaseFile * 100 CaseFile::Find(Guid poolGUID, Guid vdevGUID) 101 { 102 for (CaseFileList::iterator curCase = s_activeCases.begin(); 103 curCase != s_activeCases.end(); curCase++) { 104 105 if (((*curCase)->PoolGUID() != poolGUID 106 && Guid::InvalidGuid() != poolGUID) 107 || (*curCase)->VdevGUID() != vdevGUID) 108 continue; 109 110 /* 111 * We only carry one active case per-vdev. 112 */ 113 return (*curCase); 114 } 115 return (NULL); 116 } 117 118 CaseFile * 119 CaseFile::Find(const string &physPath) 120 { 121 CaseFile *result = NULL; 122 123 for (CaseFileList::iterator curCase = s_activeCases.begin(); 124 curCase != s_activeCases.end(); curCase++) { 125 126 if ((*curCase)->PhysicalPath() != physPath) 127 continue; 128 129 if (result != NULL) { 130 syslog(LOG_WARNING, "Multiple casefiles found for " 131 "physical path %s. " 132 "This is most likely a bug in zfsd", 133 physPath.c_str()); 134 } 135 result = *curCase; 136 } 137 return (result); 138 } 139 140 141 void 142 CaseFile::ReEvaluateByGuid(Guid poolGUID, const ZfsEvent &event) 143 { 144 CaseFileList::iterator casefile; 145 for (casefile = s_activeCases.begin(); casefile != s_activeCases.end();){ 146 CaseFileList::iterator next = casefile; 147 next++; 148 if (poolGUID == (*casefile)->PoolGUID()) 149 (*casefile)->ReEvaluate(event); 150 casefile = next; 151 } 152 } 153 154 CaseFile & 155 CaseFile::Create(Vdev &vdev) 156 { 157 CaseFile *activeCase; 158 159 activeCase = Find(vdev.PoolGUID(), vdev.GUID()); 160 if (activeCase == NULL) 161 activeCase = new CaseFile(vdev); 162 163 return (*activeCase); 164 } 165 166 void 167 CaseFile::DeSerialize() 168 { 169 struct dirent **caseFiles; 170 171 int numCaseFiles(scandir(s_caseFilePath.c_str(), &caseFiles, 172 DeSerializeSelector, /*compar*/NULL)); 173 174 if (numCaseFiles == -1) 175 return; 176 if (numCaseFiles == 0) { 177 free(caseFiles); 178 return; 179 } 180 181 for (int i = 0; i < numCaseFiles; i++) { 182 183 DeSerializeFile(caseFiles[i]->d_name); 184 free(caseFiles[i]); 185 } 186 free(caseFiles); 187 } 188 189 void 190 CaseFile::LogAll() 191 { 192 for (CaseFileList::iterator curCase = s_activeCases.begin(); 193 curCase != s_activeCases.end(); curCase++) 194 (*curCase)->Log(); 195 } 196 197 void 198 CaseFile::PurgeAll() 199 { 200 /* 201 * Serialize casefiles before deleting them so that they can be reread 202 * and revalidated during BuildCaseFiles. 203 * CaseFiles remove themselves from this list on destruction. 204 */ 205 while (s_activeCases.size() != 0) { 206 CaseFile *casefile = s_activeCases.front(); 207 casefile->Serialize(); 208 delete casefile; 209 } 210 211 } 212 213 //- CaseFile Public Methods ---------------------------------------------------- 214 bool 215 CaseFile::RefreshVdevState() 216 { 217 ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID); 218 zpool_handle_t *casePool(zpl.empty() ? NULL : zpl.front()); 219 if (casePool == NULL) 220 return (false); 221 222 Vdev vd(casePool, CaseVdev(casePool)); 223 if (vd.DoesNotExist()) 224 return (false); 225 226 m_vdevState = vd.State(); 227 m_vdevPhysPath = vd.PhysicalPath(); 228 return (true); 229 } 230 231 bool 232 CaseFile::ReEvaluate(const string &devPath, const string &physPath, Vdev *vdev) 233 { 234 ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID); 235 zpool_handle_t *pool(zpl.empty() ? NULL : zpl.front()); 236 237 if (pool == NULL || !RefreshVdevState()) { 238 /* 239 * The pool or vdev for this case file is no longer 240 * part of the configuration. This can happen 241 * if we process a device arrival notification 242 * before seeing the ZFS configuration change 243 * event. 244 */ 245 syslog(LOG_INFO, 246 "CaseFile::ReEvaluate(%s,%s) Pool/Vdev unconfigured. " 247 "Closing\n", 248 PoolGUIDString().c_str(), 249 VdevGUIDString().c_str()); 250 Close(); 251 252 /* 253 * Since this event was not used to close this 254 * case, do not report it as consumed. 255 */ 256 return (/*consumed*/false); 257 } 258 259 if (VdevState() > VDEV_STATE_CANT_OPEN) { 260 /* 261 * For now, newly discovered devices only help for 262 * devices that are missing. In the future, we might 263 * use a newly inserted spare to replace a degraded 264 * or faulted device. 265 */ 266 syslog(LOG_INFO, "CaseFile::ReEvaluate(%s,%s): Pool/Vdev ignored", 267 PoolGUIDString().c_str(), VdevGUIDString().c_str()); 268 return (/*consumed*/false); 269 } 270 271 if (vdev != NULL 272 && ( vdev->PoolGUID() == m_poolGUID 273 || vdev->PoolGUID() == Guid::InvalidGuid()) 274 && vdev->GUID() == m_vdevGUID) { 275 276 zpool_vdev_online(pool, vdev->GUIDString().c_str(), 277 ZFS_ONLINE_CHECKREMOVE | ZFS_ONLINE_UNSPARE, 278 &m_vdevState); 279 syslog(LOG_INFO, "Onlined vdev(%s/%s:%s). State now %s.\n", 280 zpool_get_name(pool), vdev->GUIDString().c_str(), 281 devPath.c_str(), 282 zpool_state_to_name(VdevState(), VDEV_AUX_NONE)); 283 284 /* 285 * Check the vdev state post the online action to see 286 * if we can retire this case. 287 */ 288 CloseIfSolved(); 289 290 return (/*consumed*/true); 291 } 292 293 /* 294 * If the auto-replace policy is enabled, and we have physical 295 * path information, try a physical path replacement. 296 */ 297 if (zpool_get_prop_int(pool, ZPOOL_PROP_AUTOREPLACE, NULL) == 0) { 298 syslog(LOG_INFO, 299 "CaseFile(%s:%s:%s): AutoReplace not set. " 300 "Ignoring device insertion.\n", 301 PoolGUIDString().c_str(), 302 VdevGUIDString().c_str(), 303 zpool_state_to_name(VdevState(), VDEV_AUX_NONE)); 304 return (/*consumed*/false); 305 } 306 307 if (PhysicalPath().empty()) { 308 syslog(LOG_INFO, 309 "CaseFile(%s:%s:%s): No physical path information. " 310 "Ignoring device insertion.\n", 311 PoolGUIDString().c_str(), 312 VdevGUIDString().c_str(), 313 zpool_state_to_name(VdevState(), VDEV_AUX_NONE)); 314 return (/*consumed*/false); 315 } 316 317 if (physPath != PhysicalPath()) { 318 syslog(LOG_INFO, 319 "CaseFile(%s:%s:%s): Physical path mismatch. " 320 "Ignoring device insertion.\n", 321 PoolGUIDString().c_str(), 322 VdevGUIDString().c_str(), 323 zpool_state_to_name(VdevState(), VDEV_AUX_NONE)); 324 return (/*consumed*/false); 325 } 326 327 /* Write a label on the newly inserted disk. */ 328 if (zpool_label_disk(g_zfsHandle, pool, devPath.c_str()) != 0) { 329 syslog(LOG_ERR, 330 "Replace vdev(%s/%s) by physical path (label): %s: %s\n", 331 zpool_get_name(pool), VdevGUIDString().c_str(), 332 libzfs_error_action(g_zfsHandle), 333 libzfs_error_description(g_zfsHandle)); 334 return (/*consumed*/false); 335 } 336 337 syslog(LOG_INFO, "CaseFile::ReEvaluate(%s/%s): Replacing with %s", 338 PoolGUIDString().c_str(), VdevGUIDString().c_str(), 339 devPath.c_str()); 340 return (Replace(VDEV_TYPE_DISK, devPath.c_str(), /*isspare*/false)); 341 } 342 343 bool 344 CaseFile::ReEvaluate(const ZfsEvent &event) 345 { 346 bool consumed(false); 347 348 if (event.Value("type") == "misc.fs.zfs.vdev_remove") { 349 /* 350 * The Vdev we represent has been removed from the 351 * configuration. This case is no longer of value. 352 */ 353 Close(); 354 355 return (/*consumed*/true); 356 } else if (event.Value("type") == "misc.fs.zfs.pool_destroy") { 357 /* This Pool has been destroyed. Discard the case */ 358 Close(); 359 360 return (/*consumed*/true); 361 } else if (event.Value("type") == "misc.fs.zfs.config_sync") { 362 RefreshVdevState(); 363 if (VdevState() < VDEV_STATE_HEALTHY) 364 consumed = ActivateSpare(); 365 } 366 367 368 if (event.Value("class") == "resource.fs.zfs.removed") { 369 bool spare_activated; 370 371 if (!RefreshVdevState()) { 372 /* 373 * The pool or vdev for this case file is no longer 374 * part of the configuration. This can happen 375 * if we process a device arrival notification 376 * before seeing the ZFS configuration change 377 * event. 378 */ 379 syslog(LOG_INFO, 380 "CaseFile::ReEvaluate(%s,%s) Pool/Vdev " 381 "unconfigured. Closing\n", 382 PoolGUIDString().c_str(), 383 VdevGUIDString().c_str()); 384 /* 385 * Close the case now so we won't waste cycles in the 386 * system rescan 387 */ 388 Close(); 389 390 /* 391 * Since this event was not used to close this 392 * case, do not report it as consumed. 393 */ 394 return (/*consumed*/false); 395 } 396 397 /* 398 * Discard any tentative I/O error events for 399 * this case. They were most likely caused by the 400 * hot-unplug of this device. 401 */ 402 PurgeTentativeEvents(); 403 404 /* Try to activate spares if they are available */ 405 spare_activated = ActivateSpare(); 406 407 /* 408 * Rescan the drives in the system to see if a recent 409 * drive arrival can be used to solve this case. 410 */ 411 ZfsDaemon::RequestSystemRescan(); 412 413 /* 414 * Consume the event if we successfully activated a spare. 415 * Otherwise, leave it in the unconsumed events list so that the 416 * future addition of a spare to this pool might be able to 417 * close the case 418 */ 419 consumed = spare_activated; 420 } else if (event.Value("class") == "resource.fs.zfs.statechange") { 421 RefreshVdevState(); 422 /* 423 * If this vdev is DEGRADED, FAULTED, or UNAVAIL, try to 424 * activate a hotspare. Otherwise, ignore the event 425 */ 426 if (VdevState() == VDEV_STATE_FAULTED || 427 VdevState() == VDEV_STATE_DEGRADED || 428 VdevState() == VDEV_STATE_CANT_OPEN) 429 (void) ActivateSpare(); 430 consumed = true; 431 } 432 else if (event.Value("class") == "ereport.fs.zfs.io" || 433 event.Value("class") == "ereport.fs.zfs.checksum") { 434 435 m_tentativeEvents.push_front(event.DeepCopy()); 436 RegisterCallout(event); 437 consumed = true; 438 } 439 440 bool closed(CloseIfSolved()); 441 442 return (consumed || closed); 443 } 444 445 446 bool 447 CaseFile::ActivateSpare() { 448 nvlist_t *config, *nvroot; 449 nvlist_t **spares; 450 char *devPath, *vdev_type; 451 const char *poolname; 452 u_int nspares, i; 453 int error; 454 455 ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID); 456 zpool_handle_t *zhp(zpl.empty() ? NULL : zpl.front()); 457 if (zhp == NULL) { 458 syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not find pool " 459 "for pool_guid %" PRIu64".", (uint64_t)m_poolGUID); 460 return (false); 461 } 462 poolname = zpool_get_name(zhp); 463 config = zpool_get_config(zhp, NULL); 464 if (config == NULL) { 465 syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not find pool " 466 "config for pool %s", poolname); 467 return (false); 468 } 469 error = nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvroot); 470 if (error != 0){ 471 syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not find vdev " 472 "tree for pool %s", poolname); 473 return (false); 474 } 475 nspares = 0; 476 nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, &spares, 477 &nspares); 478 if (nspares == 0) { 479 /* The pool has no spares configured */ 480 syslog(LOG_INFO, "CaseFile::ActivateSpare: " 481 "No spares available for pool %s", poolname); 482 return (false); 483 } 484 for (i = 0; i < nspares; i++) { 485 uint64_t *nvlist_array; 486 vdev_stat_t *vs; 487 uint_t nstats; 488 489 if (nvlist_lookup_uint64_array(spares[i], 490 ZPOOL_CONFIG_VDEV_STATS, &nvlist_array, &nstats) != 0) { 491 syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not " 492 "find vdev stats for pool %s, spare %d", 493 poolname, i); 494 return (false); 495 } 496 vs = reinterpret_cast<vdev_stat_t *>(nvlist_array); 497 498 if ((vs->vs_aux != VDEV_AUX_SPARED) 499 && (vs->vs_state == VDEV_STATE_HEALTHY)) { 500 /* We found a usable spare */ 501 break; 502 } 503 } 504 505 if (i == nspares) { 506 /* No available spares were found */ 507 return (false); 508 } 509 510 error = nvlist_lookup_string(spares[i], ZPOOL_CONFIG_PATH, &devPath); 511 if (error != 0) { 512 syslog(LOG_ERR, "CaseFile::ActivateSpare: Cannot determine " 513 "the path of pool %s, spare %d. Error %d", 514 poolname, i, error); 515 return (false); 516 } 517 518 error = nvlist_lookup_string(spares[i], ZPOOL_CONFIG_TYPE, &vdev_type); 519 if (error != 0) { 520 syslog(LOG_ERR, "CaseFile::ActivateSpare: Cannot determine " 521 "the vdev type of pool %s, spare %d. Error %d", 522 poolname, i, error); 523 return (false); 524 } 525 526 return (Replace(vdev_type, devPath, /*isspare*/true)); 527 } 528 529 void 530 CaseFile::RegisterCallout(const Event &event) 531 { 532 timeval now, countdown, elapsed, timestamp, zero, remaining; 533 534 gettimeofday(&now, 0); 535 timestamp = event.GetTimestamp(); 536 timersub(&now, ×tamp, &elapsed); 537 timersub(&s_removeGracePeriod, &elapsed, &countdown); 538 /* 539 * If countdown is <= zero, Reset the timer to the 540 * smallest positive time value instead 541 */ 542 timerclear(&zero); 543 if (timercmp(&countdown, &zero, <=)) { 544 timerclear(&countdown); 545 countdown.tv_usec = 1; 546 } 547 548 remaining = m_tentativeTimer.TimeRemaining(); 549 550 if (!m_tentativeTimer.IsPending() 551 || timercmp(&countdown, &remaining, <)) 552 m_tentativeTimer.Reset(countdown, OnGracePeriodEnded, this); 553 } 554 555 556 bool 557 CaseFile::CloseIfSolved() 558 { 559 if (m_events.empty() 560 && m_tentativeEvents.empty()) { 561 562 /* 563 * We currently do not track or take actions on 564 * devices in the degraded or faulted state. 565 * Once we have support for spare pools, we'll 566 * retain these cases so that any spares added in 567 * the future can be applied to them. 568 */ 569 switch (VdevState()) { 570 case VDEV_STATE_HEALTHY: 571 /* No need to keep cases for healthy vdevs */ 572 Close(); 573 return (true); 574 case VDEV_STATE_REMOVED: 575 case VDEV_STATE_CANT_OPEN: 576 /* 577 * Keep open. We may solve it with a newly inserted 578 * device. 579 */ 580 case VDEV_STATE_FAULTED: 581 case VDEV_STATE_DEGRADED: 582 /* 583 * Keep open. We may solve it with the future 584 * addition of a spare to the pool 585 */ 586 case VDEV_STATE_UNKNOWN: 587 case VDEV_STATE_CLOSED: 588 case VDEV_STATE_OFFLINE: 589 /* 590 * Keep open? This may not be the correct behavior, 591 * but it's what we've always done 592 */ 593 ; 594 } 595 596 /* 597 * Re-serialize the case in order to remove any 598 * previous event data. 599 */ 600 Serialize(); 601 } 602 603 return (false); 604 } 605 606 void 607 CaseFile::Log() 608 { 609 syslog(LOG_INFO, "CaseFile(%s,%s,%s)\n", PoolGUIDString().c_str(), 610 VdevGUIDString().c_str(), PhysicalPath().c_str()); 611 syslog(LOG_INFO, "\tVdev State = %s\n", 612 zpool_state_to_name(VdevState(), VDEV_AUX_NONE)); 613 if (m_tentativeEvents.size() != 0) { 614 syslog(LOG_INFO, "\t=== Tentative Events ===\n"); 615 for (EventList::iterator event(m_tentativeEvents.begin()); 616 event != m_tentativeEvents.end(); event++) 617 (*event)->Log(LOG_INFO); 618 } 619 if (m_events.size() != 0) { 620 syslog(LOG_INFO, "\t=== Events ===\n"); 621 for (EventList::iterator event(m_events.begin()); 622 event != m_events.end(); event++) 623 (*event)->Log(LOG_INFO); 624 } 625 } 626 627 //- CaseFile Static Protected Methods ------------------------------------------ 628 void 629 CaseFile::OnGracePeriodEnded(void *arg) 630 { 631 CaseFile &casefile(*static_cast<CaseFile *>(arg)); 632 633 casefile.OnGracePeriodEnded(); 634 } 635 636 int 637 CaseFile::DeSerializeSelector(const struct dirent *dirEntry) 638 { 639 uint64_t poolGUID; 640 uint64_t vdevGUID; 641 642 if (dirEntry->d_type == DT_REG 643 && sscanf(dirEntry->d_name, "pool_%" PRIu64 "_vdev_%" PRIu64 ".case", 644 &poolGUID, &vdevGUID) == 2) 645 return (1); 646 return (0); 647 } 648 649 void 650 CaseFile::DeSerializeFile(const char *fileName) 651 { 652 string fullName(s_caseFilePath + '/' + fileName); 653 CaseFile *existingCaseFile(NULL); 654 CaseFile *caseFile(NULL); 655 656 try { 657 uint64_t poolGUID; 658 uint64_t vdevGUID; 659 nvlist_t *vdevConf; 660 661 if (sscanf(fileName, "pool_%" PRIu64 "_vdev_%" PRIu64 ".case", 662 &poolGUID, &vdevGUID) != 2) { 663 throw ZfsdException("CaseFile::DeSerialize: " 664 "Unintelligible CaseFile filename %s.\n", fileName); 665 } 666 existingCaseFile = Find(Guid(poolGUID), Guid(vdevGUID)); 667 if (existingCaseFile != NULL) { 668 /* 669 * If the vdev is already degraded or faulted, 670 * there's no point in keeping the state around 671 * that we use to put a drive into the degraded 672 * state. However, if the vdev is simply missing, 673 * preserve the case data in the hopes that it will 674 * return. 675 */ 676 caseFile = existingCaseFile; 677 vdev_state curState(caseFile->VdevState()); 678 if (curState > VDEV_STATE_CANT_OPEN 679 && curState < VDEV_STATE_HEALTHY) { 680 unlink(fileName); 681 return; 682 } 683 } else { 684 ZpoolList zpl(ZpoolList::ZpoolByGUID, &poolGUID); 685 if (zpl.empty() 686 || (vdevConf = VdevIterator(zpl.front()) 687 .Find(vdevGUID)) == NULL) { 688 /* 689 * Either the pool no longer exists 690 * or this vdev is no longer a member of 691 * the pool. 692 */ 693 unlink(fullName.c_str()); 694 return; 695 } 696 697 /* 698 * Any vdev we find that does not have a case file 699 * must be in the healthy state and thus worthy of 700 * continued SERD data tracking. 701 */ 702 caseFile = new CaseFile(Vdev(zpl.front(), vdevConf)); 703 } 704 705 ifstream caseStream(fullName.c_str()); 706 if (!caseStream) 707 throw ZfsdException("CaseFile::DeSerialize: Unable to " 708 "read %s.\n", fileName); 709 710 caseFile->DeSerialize(caseStream); 711 } catch (const ParseException &exp) { 712 713 exp.Log(); 714 if (caseFile != existingCaseFile) 715 delete caseFile; 716 717 /* 718 * Since we can't parse the file, unlink it so we don't 719 * trip over it again. 720 */ 721 unlink(fileName); 722 } catch (const ZfsdException &zfsException) { 723 724 zfsException.Log(); 725 if (caseFile != existingCaseFile) 726 delete caseFile; 727 } 728 } 729 730 //- CaseFile Protected Methods ------------------------------------------------- 731 CaseFile::CaseFile(const Vdev &vdev) 732 : m_poolGUID(vdev.PoolGUID()), 733 m_vdevGUID(vdev.GUID()), 734 m_vdevState(vdev.State()), 735 m_vdevPhysPath(vdev.PhysicalPath()) 736 { 737 stringstream guidString; 738 739 guidString << m_vdevGUID; 740 m_vdevGUIDString = guidString.str(); 741 guidString.str(""); 742 guidString << m_poolGUID; 743 m_poolGUIDString = guidString.str(); 744 745 s_activeCases.push_back(this); 746 747 syslog(LOG_INFO, "Creating new CaseFile:\n"); 748 Log(); 749 } 750 751 CaseFile::~CaseFile() 752 { 753 PurgeEvents(); 754 PurgeTentativeEvents(); 755 m_tentativeTimer.Stop(); 756 s_activeCases.remove(this); 757 } 758 759 void 760 CaseFile::PurgeEvents() 761 { 762 for (EventList::iterator event(m_events.begin()); 763 event != m_events.end(); event++) 764 delete *event; 765 766 m_events.clear(); 767 } 768 769 void 770 CaseFile::PurgeTentativeEvents() 771 { 772 for (EventList::iterator event(m_tentativeEvents.begin()); 773 event != m_tentativeEvents.end(); event++) 774 delete *event; 775 776 m_tentativeEvents.clear(); 777 } 778 779 void 780 CaseFile::SerializeEvList(const EventList events, int fd, 781 const char* prefix) const 782 { 783 if (events.empty()) 784 return; 785 for (EventList::const_iterator curEvent = events.begin(); 786 curEvent != events.end(); curEvent++) { 787 const string &eventString((*curEvent)->GetEventString()); 788 789 // TODO: replace many write(2) calls with a single writev(2) 790 if (prefix) 791 write(fd, prefix, strlen(prefix)); 792 write(fd, eventString.c_str(), eventString.length()); 793 } 794 } 795 796 void 797 CaseFile::Serialize() 798 { 799 stringstream saveFile; 800 801 saveFile << setfill('0') 802 << s_caseFilePath << "/" 803 << "pool_" << PoolGUIDString() 804 << "_vdev_" << VdevGUIDString() 805 << ".case"; 806 807 if (m_events.empty() && m_tentativeEvents.empty()) { 808 unlink(saveFile.str().c_str()); 809 return; 810 } 811 812 int fd(open(saveFile.str().c_str(), O_CREAT|O_TRUNC|O_WRONLY, 0644)); 813 if (fd == -1) { 814 syslog(LOG_ERR, "CaseFile::Serialize: Unable to open %s.\n", 815 saveFile.str().c_str()); 816 return; 817 } 818 SerializeEvList(m_events, fd); 819 SerializeEvList(m_tentativeEvents, fd, "tentative "); 820 close(fd); 821 } 822 823 /* 824 * XXX: This method assumes that events may not contain embedded newlines. If 825 * ever events can contain embedded newlines, then CaseFile must switch 826 * serialization formats 827 */ 828 void 829 CaseFile::DeSerialize(ifstream &caseStream) 830 { 831 string evString; 832 const EventFactory &factory(ZfsDaemon::Get().GetFactory()); 833 834 caseStream >> std::noskipws >> std::ws; 835 while (caseStream.good()) { 836 /* 837 * Outline: 838 * read the beginning of a line and check it for 839 * "tentative". If found, discard "tentative". 840 * Create a new event 841 * continue 842 */ 843 EventList* destEvents; 844 const string tentFlag("tentative "); 845 string line; 846 std::stringbuf lineBuf; 847 848 caseStream.get(lineBuf); 849 caseStream.ignore(); /*discard the newline character*/ 850 line = lineBuf.str(); 851 if (line.compare(0, tentFlag.size(), tentFlag) == 0) { 852 /* Discard "tentative" */ 853 line.erase(0, tentFlag.size()); 854 destEvents = &m_tentativeEvents; 855 } else { 856 destEvents = &m_events; 857 } 858 Event *event(Event::CreateEvent(factory, line)); 859 if (event != NULL) { 860 destEvents->push_back(event); 861 RegisterCallout(*event); 862 } 863 } 864 } 865 866 void 867 CaseFile::Close() 868 { 869 /* 870 * This case is no longer relevant. Clean up our 871 * serialization file, and delete the case. 872 */ 873 syslog(LOG_INFO, "CaseFile(%s,%s) closed - State %s\n", 874 PoolGUIDString().c_str(), VdevGUIDString().c_str(), 875 zpool_state_to_name(VdevState(), VDEV_AUX_NONE)); 876 877 /* 878 * Serialization of a Case with no event data, clears the 879 * Serialization data for that event. 880 */ 881 PurgeEvents(); 882 Serialize(); 883 884 delete this; 885 } 886 887 void 888 CaseFile::OnGracePeriodEnded() 889 { 890 bool should_fault, should_degrade; 891 ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID); 892 zpool_handle_t *zhp(zpl.empty() ? NULL : zpl.front()); 893 894 m_events.splice(m_events.begin(), m_tentativeEvents); 895 should_fault = ShouldFault(); 896 should_degrade = ShouldDegrade(); 897 898 if (should_fault || should_degrade) { 899 if (zhp == NULL 900 || (VdevIterator(zhp).Find(m_vdevGUID)) == NULL) { 901 /* 902 * Either the pool no longer exists 903 * or this vdev is no longer a member of 904 * the pool. 905 */ 906 Close(); 907 return; 908 } 909 910 } 911 912 /* A fault condition has priority over a degrade condition */ 913 if (ShouldFault()) { 914 /* Fault the vdev and close the case. */ 915 if (zpool_vdev_fault(zhp, (uint64_t)m_vdevGUID, 916 VDEV_AUX_ERR_EXCEEDED) == 0) { 917 syslog(LOG_INFO, "Faulting vdev(%s/%s)", 918 PoolGUIDString().c_str(), 919 VdevGUIDString().c_str()); 920 Close(); 921 return; 922 } 923 else { 924 syslog(LOG_ERR, "Fault vdev(%s/%s): %s: %s\n", 925 PoolGUIDString().c_str(), 926 VdevGUIDString().c_str(), 927 libzfs_error_action(g_zfsHandle), 928 libzfs_error_description(g_zfsHandle)); 929 } 930 } 931 else if (ShouldDegrade()) { 932 /* Degrade the vdev and close the case. */ 933 if (zpool_vdev_degrade(zhp, (uint64_t)m_vdevGUID, 934 VDEV_AUX_ERR_EXCEEDED) == 0) { 935 syslog(LOG_INFO, "Degrading vdev(%s/%s)", 936 PoolGUIDString().c_str(), 937 VdevGUIDString().c_str()); 938 Close(); 939 return; 940 } 941 else { 942 syslog(LOG_ERR, "Degrade vdev(%s/%s): %s: %s\n", 943 PoolGUIDString().c_str(), 944 VdevGUIDString().c_str(), 945 libzfs_error_action(g_zfsHandle), 946 libzfs_error_description(g_zfsHandle)); 947 } 948 } 949 Serialize(); 950 } 951 952 Vdev 953 CaseFile::BeingReplacedBy(zpool_handle_t *zhp) { 954 Vdev vd(zhp, CaseVdev(zhp)); 955 std::list<Vdev> children; 956 std::list<Vdev>::iterator children_it; 957 958 Vdev parent(vd.Parent()); 959 Vdev replacing(NonexistentVdev); 960 961 /* 962 * To determine whether we are being replaced by another spare that 963 * is still working, then make sure that it is currently spared and 964 * that the spare is either resilvering or healthy. If any of these 965 * conditions fail, then we are not being replaced by a spare. 966 * 967 * If the spare is healthy, then the case file should be closed very 968 * soon after this check. 969 */ 970 if (parent.DoesNotExist() 971 || parent.Name(zhp, /*verbose*/false) != "spare") 972 return (NonexistentVdev); 973 974 children = parent.Children(); 975 children_it = children.begin(); 976 for (;children_it != children.end(); children_it++) { 977 Vdev child = *children_it; 978 979 /* Skip our vdev. */ 980 if (child.GUID() == VdevGUID()) 981 continue; 982 /* 983 * Accept the first child that doesn't match our GUID, or 984 * any resilvering/healthy device if one exists. 985 */ 986 if (replacing.DoesNotExist() || child.IsResilvering() 987 || child.State() == VDEV_STATE_HEALTHY) 988 replacing = child; 989 } 990 991 return (replacing); 992 } 993 994 bool 995 CaseFile::Replace(const char* vdev_type, const char* path, bool isspare) { 996 nvlist_t *nvroot, *newvd; 997 const char *poolname; 998 string oldstr(VdevGUIDString()); 999 bool retval = true; 1000 1001 /* Figure out what pool we're working on */ 1002 ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID); 1003 zpool_handle_t *zhp(zpl.empty() ? NULL : zpl.front()); 1004 if (zhp == NULL) { 1005 syslog(LOG_ERR, "CaseFile::Replace: could not find pool for " 1006 "pool_guid %" PRIu64 ".", (uint64_t)m_poolGUID); 1007 return (false); 1008 } 1009 poolname = zpool_get_name(zhp); 1010 Vdev vd(zhp, CaseVdev(zhp)); 1011 Vdev replaced(BeingReplacedBy(zhp)); 1012 1013 if (isspare && !vd.IsSpare() && !replaced.DoesNotExist()) { 1014 /* If we are already being replaced by a working spare, pass. */ 1015 if (replaced.IsResilvering() 1016 || replaced.State() == VDEV_STATE_HEALTHY) { 1017 syslog(LOG_INFO, "CaseFile::Replace(%s->%s): already " 1018 "replaced", VdevGUIDString().c_str(), path); 1019 return (/*consumed*/false); 1020 } 1021 /* 1022 * If we have already been replaced by a spare, but that spare 1023 * is broken, we must spare the spare, not the original device. 1024 */ 1025 oldstr = replaced.GUIDString(); 1026 syslog(LOG_INFO, "CaseFile::Replace(%s->%s): sparing " 1027 "broken spare %s instead", VdevGUIDString().c_str(), 1028 path, oldstr.c_str()); 1029 } 1030 1031 /* 1032 * Build a root vdev/leaf vdev configuration suitable for 1033 * zpool_vdev_attach. Only enough data for the kernel to find 1034 * the device (i.e. type and disk device node path) are needed. 1035 */ 1036 nvroot = NULL; 1037 newvd = NULL; 1038 1039 if (nvlist_alloc(&nvroot, NV_UNIQUE_NAME, 0) != 0 1040 || nvlist_alloc(&newvd, NV_UNIQUE_NAME, 0) != 0) { 1041 syslog(LOG_ERR, "Replace vdev(%s/%s): Unable to allocate " 1042 "configuration data.", poolname, oldstr.c_str()); 1043 if (nvroot != NULL) 1044 nvlist_free(nvroot); 1045 return (false); 1046 } 1047 if (nvlist_add_string(newvd, ZPOOL_CONFIG_TYPE, vdev_type) != 0 1048 || nvlist_add_string(newvd, ZPOOL_CONFIG_PATH, path) != 0 1049 || nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE, VDEV_TYPE_ROOT) != 0 1050 || nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, 1051 &newvd, 1) != 0) { 1052 syslog(LOG_ERR, "Replace vdev(%s/%s): Unable to initialize " 1053 "configuration data.", poolname, oldstr.c_str()); 1054 nvlist_free(newvd); 1055 nvlist_free(nvroot); 1056 return (true); 1057 } 1058 1059 /* Data was copied when added to the root vdev. */ 1060 nvlist_free(newvd); 1061 1062 retval = (zpool_vdev_attach(zhp, oldstr.c_str(), path, nvroot, 1063 /*replace*/B_TRUE) == 0); 1064 if (retval) 1065 syslog(LOG_INFO, "Replacing vdev(%s/%s) with %s\n", 1066 poolname, oldstr.c_str(), path); 1067 else 1068 syslog(LOG_ERR, "Replace vdev(%s/%s): %s: %s\n", 1069 poolname, oldstr.c_str(), libzfs_error_action(g_zfsHandle), 1070 libzfs_error_description(g_zfsHandle)); 1071 nvlist_free(nvroot); 1072 1073 return (retval); 1074 } 1075 1076 /* Does the argument event refer to a checksum error? */ 1077 static bool 1078 IsChecksumEvent(const Event* const event) 1079 { 1080 return ("ereport.fs.zfs.checksum" == event->Value("type")); 1081 } 1082 1083 /* Does the argument event refer to an IO error? */ 1084 static bool 1085 IsIOEvent(const Event* const event) 1086 { 1087 return ("ereport.fs.zfs.io" == event->Value("type")); 1088 } 1089 1090 bool 1091 CaseFile::ShouldDegrade() const 1092 { 1093 return (std::count_if(m_events.begin(), m_events.end(), 1094 IsChecksumEvent) > ZFS_DEGRADE_IO_COUNT); 1095 } 1096 1097 bool 1098 CaseFile::ShouldFault() const 1099 { 1100 return (std::count_if(m_events.begin(), m_events.end(), 1101 IsIOEvent) > ZFS_DEGRADE_IO_COUNT); 1102 } 1103 1104 nvlist_t * 1105 CaseFile::CaseVdev(zpool_handle_t *zhp) const 1106 { 1107 return (VdevIterator(zhp).Find(VdevGUID())); 1108 } 1109