1 /*- 2 * Copyright (c) 2011, 2012, 2013, 2014, 2016 Spectra Logic Corporation 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions, and the following disclaimer, 10 * without modification. 11 * 2. Redistributions in binary form must reproduce at minimum a disclaimer 12 * substantially similar to the "NO WARRANTY" disclaimer below 13 * ("Disclaimer") and any redistribution must be conditioned upon 14 * including a substantially similar Disclaimer requirement for further 15 * binary redistribution. 16 * 17 * NO WARRANTY 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR 21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 26 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 27 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 * POSSIBILITY OF SUCH DAMAGES. 29 * 30 * Authors: Justin T. Gibbs (Spectra Logic Corporation) 31 */ 32 33 /** 34 * \file case_file.cc 35 * 36 * We keep case files for any leaf vdev that is not in the optimal state. 37 * However, we only serialize to disk those events that need to be preserved 38 * across reboots. For now, this is just a log of soft errors which we 39 * accumulate in order to mark a device as degraded. 40 */ 41 #include <sys/cdefs.h> 42 #include <sys/time.h> 43 44 #include <sys/fs/zfs.h> 45 46 #include <dirent.h> 47 #include <iomanip> 48 #include <fstream> 49 #include <functional> 50 #include <sstream> 51 #include <syslog.h> 52 #include <unistd.h> 53 54 #include <libzfs.h> 55 56 #include <list> 57 #include <map> 58 #include <string> 59 60 #include <devdctl/guid.h> 61 #include <devdctl/event.h> 62 #include <devdctl/event_factory.h> 63 #include <devdctl/exception.h> 64 #include <devdctl/consumer.h> 65 66 #include "callout.h" 67 #include "vdev_iterator.h" 68 #include "zfsd_event.h" 69 #include "case_file.h" 70 #include "vdev.h" 71 #include "zfsd.h" 72 #include "zfsd_exception.h" 73 #include "zpool_list.h" 74 75 __FBSDID("$FreeBSD$"); 76 77 /*============================ Namespace Control =============================*/ 78 using std::auto_ptr; 79 using std::hex; 80 using std::ifstream; 81 using std::stringstream; 82 using std::setfill; 83 using std::setw; 84 85 using DevdCtl::Event; 86 using DevdCtl::EventBuffer; 87 using DevdCtl::EventFactory; 88 using DevdCtl::EventList; 89 using DevdCtl::Guid; 90 using DevdCtl::ParseException; 91 92 /*--------------------------------- CaseFile ---------------------------------*/ 93 //- CaseFile Static Data ------------------------------------------------------- 94 95 CaseFileList CaseFile::s_activeCases; 96 const string CaseFile::s_caseFilePath = "/var/db/zfsd/cases"; 97 const timeval CaseFile::s_removeGracePeriod = { 60 /*sec*/, 0 /*usec*/}; 98 99 //- CaseFile Static Public Methods --------------------------------------------- 100 CaseFile * 101 CaseFile::Find(Guid poolGUID, Guid vdevGUID) 102 { 103 for (CaseFileList::iterator curCase = s_activeCases.begin(); 104 curCase != s_activeCases.end(); curCase++) { 105 106 if ((*curCase)->PoolGUID() != poolGUID 107 || (*curCase)->VdevGUID() != vdevGUID) 108 continue; 109 110 /* 111 * We only carry one active case per-vdev. 112 */ 113 return (*curCase); 114 } 115 return (NULL); 116 } 117 118 CaseFile * 119 CaseFile::Find(const string &physPath) 120 { 121 CaseFile *result = NULL; 122 123 for (CaseFileList::iterator curCase = s_activeCases.begin(); 124 curCase != s_activeCases.end(); curCase++) { 125 126 if ((*curCase)->PhysicalPath() != physPath) 127 continue; 128 129 if (result != NULL) { 130 syslog(LOG_WARNING, "Multiple casefiles found for " 131 "physical path %s. " 132 "This is most likely a bug in zfsd", 133 physPath.c_str()); 134 } 135 result = *curCase; 136 } 137 return (result); 138 } 139 140 141 void 142 CaseFile::ReEvaluateByGuid(Guid poolGUID, const ZfsEvent &event) 143 { 144 CaseFileList::iterator casefile; 145 for (casefile = s_activeCases.begin(); casefile != s_activeCases.end();){ 146 CaseFileList::iterator next = casefile; 147 next++; 148 if (poolGUID == (*casefile)->PoolGUID()) 149 (*casefile)->ReEvaluate(event); 150 casefile = next; 151 } 152 } 153 154 CaseFile & 155 CaseFile::Create(Vdev &vdev) 156 { 157 CaseFile *activeCase; 158 159 activeCase = Find(vdev.PoolGUID(), vdev.GUID()); 160 if (activeCase == NULL) 161 activeCase = new CaseFile(vdev); 162 163 return (*activeCase); 164 } 165 166 void 167 CaseFile::DeSerialize() 168 { 169 struct dirent **caseFiles; 170 171 int numCaseFiles(scandir(s_caseFilePath.c_str(), &caseFiles, 172 DeSerializeSelector, /*compar*/NULL)); 173 174 if (numCaseFiles == -1) 175 return; 176 if (numCaseFiles == 0) { 177 free(caseFiles); 178 return; 179 } 180 181 for (int i = 0; i < numCaseFiles; i++) { 182 183 DeSerializeFile(caseFiles[i]->d_name); 184 free(caseFiles[i]); 185 } 186 free(caseFiles); 187 } 188 189 void 190 CaseFile::LogAll() 191 { 192 for (CaseFileList::iterator curCase = s_activeCases.begin(); 193 curCase != s_activeCases.end(); curCase++) 194 (*curCase)->Log(); 195 } 196 197 void 198 CaseFile::PurgeAll() 199 { 200 /* 201 * Serialize casefiles before deleting them so that they can be reread 202 * and revalidated during BuildCaseFiles. 203 * CaseFiles remove themselves from this list on destruction. 204 */ 205 while (s_activeCases.size() != 0) { 206 CaseFile *casefile = s_activeCases.front(); 207 casefile->Serialize(); 208 delete casefile; 209 } 210 211 } 212 213 //- CaseFile Public Methods ---------------------------------------------------- 214 bool 215 CaseFile::RefreshVdevState() 216 { 217 ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID); 218 zpool_handle_t *casePool(zpl.empty() ? NULL : zpl.front()); 219 if (casePool == NULL) 220 return (false); 221 222 Vdev vd(casePool, CaseVdev(casePool)); 223 if (vd.DoesNotExist()) 224 return (false); 225 226 m_vdevState = vd.State(); 227 m_vdevPhysPath = vd.PhysicalPath(); 228 return (true); 229 } 230 231 bool 232 CaseFile::ReEvaluate(const string &devPath, const string &physPath, Vdev *vdev) 233 { 234 ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID); 235 zpool_handle_t *pool(zpl.empty() ? NULL : zpl.front()); 236 237 if (pool == NULL || !RefreshVdevState()) { 238 /* 239 * The pool or vdev for this case file is no longer 240 * part of the configuration. This can happen 241 * if we process a device arrival notification 242 * before seeing the ZFS configuration change 243 * event. 244 */ 245 syslog(LOG_INFO, 246 "CaseFile::ReEvaluate(%s,%s) Pool/Vdev unconfigured. " 247 "Closing\n", 248 PoolGUIDString().c_str(), 249 VdevGUIDString().c_str()); 250 Close(); 251 252 /* 253 * Since this event was not used to close this 254 * case, do not report it as consumed. 255 */ 256 return (/*consumed*/false); 257 } 258 259 if (VdevState() > VDEV_STATE_CANT_OPEN) { 260 /* 261 * For now, newly discovered devices only help for 262 * devices that are missing. In the future, we might 263 * use a newly inserted spare to replace a degraded 264 * or faulted device. 265 */ 266 syslog(LOG_INFO, "CaseFile::ReEvaluate(%s,%s): Pool/Vdev ignored", 267 PoolGUIDString().c_str(), VdevGUIDString().c_str()); 268 return (/*consumed*/false); 269 } 270 271 if (vdev != NULL 272 && vdev->PoolGUID() == m_poolGUID 273 && vdev->GUID() == m_vdevGUID) { 274 275 zpool_vdev_online(pool, vdev->GUIDString().c_str(), 276 ZFS_ONLINE_CHECKREMOVE | ZFS_ONLINE_UNSPARE, 277 &m_vdevState); 278 syslog(LOG_INFO, "Onlined vdev(%s/%s:%s). State now %s.\n", 279 zpool_get_name(pool), vdev->GUIDString().c_str(), 280 devPath.c_str(), 281 zpool_state_to_name(VdevState(), VDEV_AUX_NONE)); 282 283 /* 284 * Check the vdev state post the online action to see 285 * if we can retire this case. 286 */ 287 CloseIfSolved(); 288 289 return (/*consumed*/true); 290 } 291 292 /* 293 * If the auto-replace policy is enabled, and we have physical 294 * path information, try a physical path replacement. 295 */ 296 if (zpool_get_prop_int(pool, ZPOOL_PROP_AUTOREPLACE, NULL) == 0) { 297 syslog(LOG_INFO, 298 "CaseFile(%s:%s:%s): AutoReplace not set. " 299 "Ignoring device insertion.\n", 300 PoolGUIDString().c_str(), 301 VdevGUIDString().c_str(), 302 zpool_state_to_name(VdevState(), VDEV_AUX_NONE)); 303 return (/*consumed*/false); 304 } 305 306 if (PhysicalPath().empty()) { 307 syslog(LOG_INFO, 308 "CaseFile(%s:%s:%s): No physical path information. " 309 "Ignoring device insertion.\n", 310 PoolGUIDString().c_str(), 311 VdevGUIDString().c_str(), 312 zpool_state_to_name(VdevState(), VDEV_AUX_NONE)); 313 return (/*consumed*/false); 314 } 315 316 if (physPath != PhysicalPath()) { 317 syslog(LOG_INFO, 318 "CaseFile(%s:%s:%s): Physical path mismatch. " 319 "Ignoring device insertion.\n", 320 PoolGUIDString().c_str(), 321 VdevGUIDString().c_str(), 322 zpool_state_to_name(VdevState(), VDEV_AUX_NONE)); 323 return (/*consumed*/false); 324 } 325 326 /* Write a label on the newly inserted disk. */ 327 if (zpool_label_disk(g_zfsHandle, pool, devPath.c_str()) != 0) { 328 syslog(LOG_ERR, 329 "Replace vdev(%s/%s) by physical path (label): %s: %s\n", 330 zpool_get_name(pool), VdevGUIDString().c_str(), 331 libzfs_error_action(g_zfsHandle), 332 libzfs_error_description(g_zfsHandle)); 333 return (/*consumed*/false); 334 } 335 336 syslog(LOG_INFO, "CaseFile::ReEvaluate(%s/%s): Replacing with %s", 337 PoolGUIDString().c_str(), VdevGUIDString().c_str(), 338 devPath.c_str()); 339 return (Replace(VDEV_TYPE_DISK, devPath.c_str(), /*isspare*/false)); 340 } 341 342 bool 343 CaseFile::ReEvaluate(const ZfsEvent &event) 344 { 345 bool consumed(false); 346 347 if (event.Value("type") == "misc.fs.zfs.vdev_remove") { 348 /* 349 * The Vdev we represent has been removed from the 350 * configuration. This case is no longer of value. 351 */ 352 Close(); 353 354 return (/*consumed*/true); 355 } else if (event.Value("type") == "misc.fs.zfs.pool_destroy") { 356 /* This Pool has been destroyed. Discard the case */ 357 Close(); 358 359 return (/*consumed*/true); 360 } else if (event.Value("type") == "misc.fs.zfs.config_sync") { 361 RefreshVdevState(); 362 if (VdevState() < VDEV_STATE_HEALTHY) 363 consumed = ActivateSpare(); 364 } 365 366 367 if (event.Value("class") == "resource.fs.zfs.removed") { 368 bool spare_activated; 369 370 if (!RefreshVdevState()) { 371 /* 372 * The pool or vdev for this case file is no longer 373 * part of the configuration. This can happen 374 * if we process a device arrival notification 375 * before seeing the ZFS configuration change 376 * event. 377 */ 378 syslog(LOG_INFO, 379 "CaseFile::ReEvaluate(%s,%s) Pool/Vdev " 380 "unconfigured. Closing\n", 381 PoolGUIDString().c_str(), 382 VdevGUIDString().c_str()); 383 /* 384 * Close the case now so we won't waste cycles in the 385 * system rescan 386 */ 387 Close(); 388 389 /* 390 * Since this event was not used to close this 391 * case, do not report it as consumed. 392 */ 393 return (/*consumed*/false); 394 } 395 396 /* 397 * Discard any tentative I/O error events for 398 * this case. They were most likely caused by the 399 * hot-unplug of this device. 400 */ 401 PurgeTentativeEvents(); 402 403 /* Try to activate spares if they are available */ 404 spare_activated = ActivateSpare(); 405 406 /* 407 * Rescan the drives in the system to see if a recent 408 * drive arrival can be used to solve this case. 409 */ 410 ZfsDaemon::RequestSystemRescan(); 411 412 /* 413 * Consume the event if we successfully activated a spare. 414 * Otherwise, leave it in the unconsumed events list so that the 415 * future addition of a spare to this pool might be able to 416 * close the case 417 */ 418 consumed = spare_activated; 419 } else if (event.Value("class") == "resource.fs.zfs.statechange") { 420 RefreshVdevState(); 421 /* 422 * If this vdev is DEGRADED, FAULTED, or UNAVAIL, try to 423 * activate a hotspare. Otherwise, ignore the event 424 */ 425 if (VdevState() == VDEV_STATE_FAULTED || 426 VdevState() == VDEV_STATE_DEGRADED || 427 VdevState() == VDEV_STATE_CANT_OPEN) 428 (void) ActivateSpare(); 429 consumed = true; 430 } 431 else if (event.Value("class") == "ereport.fs.zfs.io" || 432 event.Value("class") == "ereport.fs.zfs.checksum") { 433 434 m_tentativeEvents.push_front(event.DeepCopy()); 435 RegisterCallout(event); 436 consumed = true; 437 } 438 439 bool closed(CloseIfSolved()); 440 441 return (consumed || closed); 442 } 443 444 445 bool 446 CaseFile::ActivateSpare() { 447 nvlist_t *config, *nvroot; 448 nvlist_t **spares; 449 char *devPath, *vdev_type; 450 const char *poolname; 451 u_int nspares, i; 452 int error; 453 454 ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID); 455 zpool_handle_t *zhp(zpl.empty() ? NULL : zpl.front()); 456 if (zhp == NULL) { 457 syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not find pool " 458 "for pool_guid %"PRIu64".", (uint64_t)m_poolGUID); 459 return (false); 460 } 461 poolname = zpool_get_name(zhp); 462 config = zpool_get_config(zhp, NULL); 463 if (config == NULL) { 464 syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not find pool " 465 "config for pool %s", poolname); 466 return (false); 467 } 468 error = nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvroot); 469 if (error != 0){ 470 syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not find vdev " 471 "tree for pool %s", poolname); 472 return (false); 473 } 474 nspares = 0; 475 nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, &spares, 476 &nspares); 477 if (nspares == 0) { 478 /* The pool has no spares configured */ 479 syslog(LOG_INFO, "CaseFile::ActivateSpare: " 480 "No spares available for pool %s", poolname); 481 return (false); 482 } 483 for (i = 0; i < nspares; i++) { 484 uint64_t *nvlist_array; 485 vdev_stat_t *vs; 486 uint_t nstats; 487 488 if (nvlist_lookup_uint64_array(spares[i], 489 ZPOOL_CONFIG_VDEV_STATS, &nvlist_array, &nstats) != 0) { 490 syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not " 491 "find vdev stats for pool %s, spare %d", 492 poolname, i); 493 return (false); 494 } 495 vs = reinterpret_cast<vdev_stat_t *>(nvlist_array); 496 497 if ((vs->vs_aux != VDEV_AUX_SPARED) 498 && (vs->vs_state == VDEV_STATE_HEALTHY)) { 499 /* We found a usable spare */ 500 break; 501 } 502 } 503 504 if (i == nspares) { 505 /* No available spares were found */ 506 return (false); 507 } 508 509 error = nvlist_lookup_string(spares[i], ZPOOL_CONFIG_PATH, &devPath); 510 if (error != 0) { 511 syslog(LOG_ERR, "CaseFile::ActivateSpare: Cannot determine " 512 "the path of pool %s, spare %d. Error %d", 513 poolname, i, error); 514 return (false); 515 } 516 517 error = nvlist_lookup_string(spares[i], ZPOOL_CONFIG_TYPE, &vdev_type); 518 if (error != 0) { 519 syslog(LOG_ERR, "CaseFile::ActivateSpare: Cannot determine " 520 "the vdev type of pool %s, spare %d. Error %d", 521 poolname, i, error); 522 return (false); 523 } 524 525 return (Replace(vdev_type, devPath, /*isspare*/true)); 526 } 527 528 void 529 CaseFile::RegisterCallout(const Event &event) 530 { 531 timeval now, countdown, elapsed, timestamp, zero, remaining; 532 533 gettimeofday(&now, 0); 534 timestamp = event.GetTimestamp(); 535 timersub(&now, ×tamp, &elapsed); 536 timersub(&s_removeGracePeriod, &elapsed, &countdown); 537 /* 538 * If countdown is <= zero, Reset the timer to the 539 * smallest positive time value instead 540 */ 541 timerclear(&zero); 542 if (timercmp(&countdown, &zero, <=)) { 543 timerclear(&countdown); 544 countdown.tv_usec = 1; 545 } 546 547 remaining = m_tentativeTimer.TimeRemaining(); 548 549 if (!m_tentativeTimer.IsPending() 550 || timercmp(&countdown, &remaining, <)) 551 m_tentativeTimer.Reset(countdown, OnGracePeriodEnded, this); 552 } 553 554 555 bool 556 CaseFile::CloseIfSolved() 557 { 558 if (m_events.empty() 559 && m_tentativeEvents.empty()) { 560 561 /* 562 * We currently do not track or take actions on 563 * devices in the degraded or faulted state. 564 * Once we have support for spare pools, we'll 565 * retain these cases so that any spares added in 566 * the future can be applied to them. 567 */ 568 switch (VdevState()) { 569 case VDEV_STATE_HEALTHY: 570 /* No need to keep cases for healthy vdevs */ 571 Close(); 572 return (true); 573 case VDEV_STATE_REMOVED: 574 case VDEV_STATE_CANT_OPEN: 575 /* 576 * Keep open. We may solve it with a newly inserted 577 * device. 578 */ 579 case VDEV_STATE_FAULTED: 580 case VDEV_STATE_DEGRADED: 581 /* 582 * Keep open. We may solve it with the future 583 * addition of a spare to the pool 584 */ 585 case VDEV_STATE_UNKNOWN: 586 case VDEV_STATE_CLOSED: 587 case VDEV_STATE_OFFLINE: 588 /* 589 * Keep open? This may not be the correct behavior, 590 * but it's what we've always done 591 */ 592 ; 593 } 594 595 /* 596 * Re-serialize the case in order to remove any 597 * previous event data. 598 */ 599 Serialize(); 600 } 601 602 return (false); 603 } 604 605 void 606 CaseFile::Log() 607 { 608 syslog(LOG_INFO, "CaseFile(%s,%s,%s)\n", PoolGUIDString().c_str(), 609 VdevGUIDString().c_str(), PhysicalPath().c_str()); 610 syslog(LOG_INFO, "\tVdev State = %s\n", 611 zpool_state_to_name(VdevState(), VDEV_AUX_NONE)); 612 if (m_tentativeEvents.size() != 0) { 613 syslog(LOG_INFO, "\t=== Tentative Events ===\n"); 614 for (EventList::iterator event(m_tentativeEvents.begin()); 615 event != m_tentativeEvents.end(); event++) 616 (*event)->Log(LOG_INFO); 617 } 618 if (m_events.size() != 0) { 619 syslog(LOG_INFO, "\t=== Events ===\n"); 620 for (EventList::iterator event(m_events.begin()); 621 event != m_events.end(); event++) 622 (*event)->Log(LOG_INFO); 623 } 624 } 625 626 //- CaseFile Static Protected Methods ------------------------------------------ 627 void 628 CaseFile::OnGracePeriodEnded(void *arg) 629 { 630 CaseFile &casefile(*static_cast<CaseFile *>(arg)); 631 632 casefile.OnGracePeriodEnded(); 633 } 634 635 int 636 CaseFile::DeSerializeSelector(const struct dirent *dirEntry) 637 { 638 uint64_t poolGUID; 639 uint64_t vdevGUID; 640 641 if (dirEntry->d_type == DT_REG 642 && sscanf(dirEntry->d_name, "pool_%"PRIu64"_vdev_%"PRIu64".case", 643 &poolGUID, &vdevGUID) == 2) 644 return (1); 645 return (0); 646 } 647 648 void 649 CaseFile::DeSerializeFile(const char *fileName) 650 { 651 string fullName(s_caseFilePath + '/' + fileName); 652 CaseFile *existingCaseFile(NULL); 653 CaseFile *caseFile(NULL); 654 655 try { 656 uint64_t poolGUID; 657 uint64_t vdevGUID; 658 nvlist_t *vdevConf; 659 660 sscanf(fileName, "pool_%"PRIu64"_vdev_%"PRIu64".case", 661 &poolGUID, &vdevGUID); 662 existingCaseFile = Find(Guid(poolGUID), Guid(vdevGUID)); 663 if (existingCaseFile != NULL) { 664 /* 665 * If the vdev is already degraded or faulted, 666 * there's no point in keeping the state around 667 * that we use to put a drive into the degraded 668 * state. However, if the vdev is simply missing, 669 * preserve the case data in the hopes that it will 670 * return. 671 */ 672 caseFile = existingCaseFile; 673 vdev_state curState(caseFile->VdevState()); 674 if (curState > VDEV_STATE_CANT_OPEN 675 && curState < VDEV_STATE_HEALTHY) { 676 unlink(fileName); 677 return; 678 } 679 } else { 680 ZpoolList zpl(ZpoolList::ZpoolByGUID, &poolGUID); 681 if (zpl.empty() 682 || (vdevConf = VdevIterator(zpl.front()) 683 .Find(vdevGUID)) == NULL) { 684 /* 685 * Either the pool no longer exists 686 * or this vdev is no longer a member of 687 * the pool. 688 */ 689 unlink(fullName.c_str()); 690 return; 691 } 692 693 /* 694 * Any vdev we find that does not have a case file 695 * must be in the healthy state and thus worthy of 696 * continued SERD data tracking. 697 */ 698 caseFile = new CaseFile(Vdev(zpl.front(), vdevConf)); 699 } 700 701 ifstream caseStream(fullName.c_str()); 702 if (!caseStream) 703 throw ZfsdException("CaseFile::DeSerialize: Unable to " 704 "read %s.\n", fileName); 705 706 caseFile->DeSerialize(caseStream); 707 } catch (const ParseException &exp) { 708 709 exp.Log(); 710 if (caseFile != existingCaseFile) 711 delete caseFile; 712 713 /* 714 * Since we can't parse the file, unlink it so we don't 715 * trip over it again. 716 */ 717 unlink(fileName); 718 } catch (const ZfsdException &zfsException) { 719 720 zfsException.Log(); 721 if (caseFile != existingCaseFile) 722 delete caseFile; 723 } 724 } 725 726 //- CaseFile Protected Methods ------------------------------------------------- 727 CaseFile::CaseFile(const Vdev &vdev) 728 : m_poolGUID(vdev.PoolGUID()), 729 m_vdevGUID(vdev.GUID()), 730 m_vdevState(vdev.State()), 731 m_vdevPhysPath(vdev.PhysicalPath()) 732 { 733 stringstream guidString; 734 735 guidString << m_vdevGUID; 736 m_vdevGUIDString = guidString.str(); 737 guidString.str(""); 738 guidString << m_poolGUID; 739 m_poolGUIDString = guidString.str(); 740 741 s_activeCases.push_back(this); 742 743 syslog(LOG_INFO, "Creating new CaseFile:\n"); 744 Log(); 745 } 746 747 CaseFile::~CaseFile() 748 { 749 PurgeEvents(); 750 PurgeTentativeEvents(); 751 m_tentativeTimer.Stop(); 752 s_activeCases.remove(this); 753 } 754 755 void 756 CaseFile::PurgeEvents() 757 { 758 for (EventList::iterator event(m_events.begin()); 759 event != m_events.end(); event++) 760 delete *event; 761 762 m_events.clear(); 763 } 764 765 void 766 CaseFile::PurgeTentativeEvents() 767 { 768 for (EventList::iterator event(m_tentativeEvents.begin()); 769 event != m_tentativeEvents.end(); event++) 770 delete *event; 771 772 m_tentativeEvents.clear(); 773 } 774 775 void 776 CaseFile::SerializeEvList(const EventList events, int fd, 777 const char* prefix) const 778 { 779 if (events.empty()) 780 return; 781 for (EventList::const_iterator curEvent = events.begin(); 782 curEvent != events.end(); curEvent++) { 783 const string &eventString((*curEvent)->GetEventString()); 784 785 // TODO: replace many write(2) calls with a single writev(2) 786 if (prefix) 787 write(fd, prefix, strlen(prefix)); 788 write(fd, eventString.c_str(), eventString.length()); 789 } 790 } 791 792 void 793 CaseFile::Serialize() 794 { 795 stringstream saveFile; 796 797 saveFile << setfill('0') 798 << s_caseFilePath << "/" 799 << "pool_" << PoolGUIDString() 800 << "_vdev_" << VdevGUIDString() 801 << ".case"; 802 803 if (m_events.empty() && m_tentativeEvents.empty()) { 804 unlink(saveFile.str().c_str()); 805 return; 806 } 807 808 int fd(open(saveFile.str().c_str(), O_CREAT|O_TRUNC|O_WRONLY, 0644)); 809 if (fd == -1) { 810 syslog(LOG_ERR, "CaseFile::Serialize: Unable to open %s.\n", 811 saveFile.str().c_str()); 812 return; 813 } 814 SerializeEvList(m_events, fd); 815 SerializeEvList(m_tentativeEvents, fd, "tentative "); 816 close(fd); 817 } 818 819 /* 820 * XXX: This method assumes that events may not contain embedded newlines. If 821 * ever events can contain embedded newlines, then CaseFile must switch 822 * serialization formats 823 */ 824 void 825 CaseFile::DeSerialize(ifstream &caseStream) 826 { 827 string evString; 828 const EventFactory &factory(ZfsDaemon::Get().GetFactory()); 829 830 caseStream >> std::noskipws >> std::ws; 831 while (caseStream.good()) { 832 /* 833 * Outline: 834 * read the beginning of a line and check it for 835 * "tentative". If found, discard "tentative". 836 * Create a new event 837 * continue 838 */ 839 EventList* destEvents; 840 const string tentFlag("tentative "); 841 string line; 842 std::stringbuf lineBuf; 843 844 caseStream.get(lineBuf); 845 caseStream.ignore(); /*discard the newline character*/ 846 line = lineBuf.str(); 847 if (line.compare(0, tentFlag.size(), tentFlag) == 0) { 848 /* Discard "tentative" */ 849 line.erase(0, tentFlag.size()); 850 destEvents = &m_tentativeEvents; 851 } else { 852 destEvents = &m_events; 853 } 854 Event *event(Event::CreateEvent(factory, line)); 855 if (event != NULL) { 856 destEvents->push_back(event); 857 RegisterCallout(*event); 858 } 859 } 860 } 861 862 void 863 CaseFile::Close() 864 { 865 /* 866 * This case is no longer relevant. Clean up our 867 * serialization file, and delete the case. 868 */ 869 syslog(LOG_INFO, "CaseFile(%s,%s) closed - State %s\n", 870 PoolGUIDString().c_str(), VdevGUIDString().c_str(), 871 zpool_state_to_name(VdevState(), VDEV_AUX_NONE)); 872 873 /* 874 * Serialization of a Case with no event data, clears the 875 * Serialization data for that event. 876 */ 877 PurgeEvents(); 878 Serialize(); 879 880 delete this; 881 } 882 883 void 884 CaseFile::OnGracePeriodEnded() 885 { 886 bool should_fault, should_degrade; 887 ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID); 888 zpool_handle_t *zhp(zpl.empty() ? NULL : zpl.front()); 889 890 m_events.splice(m_events.begin(), m_tentativeEvents); 891 should_fault = ShouldFault(); 892 should_degrade = ShouldDegrade(); 893 894 if (should_fault || should_degrade) { 895 if (zhp == NULL 896 || (VdevIterator(zhp).Find(m_vdevGUID)) == NULL) { 897 /* 898 * Either the pool no longer exists 899 * or this vdev is no longer a member of 900 * the pool. 901 */ 902 Close(); 903 return; 904 } 905 906 } 907 908 /* A fault condition has priority over a degrade condition */ 909 if (ShouldFault()) { 910 /* Fault the vdev and close the case. */ 911 if (zpool_vdev_fault(zhp, (uint64_t)m_vdevGUID, 912 VDEV_AUX_ERR_EXCEEDED) == 0) { 913 syslog(LOG_INFO, "Faulting vdev(%s/%s)", 914 PoolGUIDString().c_str(), 915 VdevGUIDString().c_str()); 916 Close(); 917 return; 918 } 919 else { 920 syslog(LOG_ERR, "Fault vdev(%s/%s): %s: %s\n", 921 PoolGUIDString().c_str(), 922 VdevGUIDString().c_str(), 923 libzfs_error_action(g_zfsHandle), 924 libzfs_error_description(g_zfsHandle)); 925 } 926 } 927 else if (ShouldDegrade()) { 928 /* Degrade the vdev and close the case. */ 929 if (zpool_vdev_degrade(zhp, (uint64_t)m_vdevGUID, 930 VDEV_AUX_ERR_EXCEEDED) == 0) { 931 syslog(LOG_INFO, "Degrading vdev(%s/%s)", 932 PoolGUIDString().c_str(), 933 VdevGUIDString().c_str()); 934 Close(); 935 return; 936 } 937 else { 938 syslog(LOG_ERR, "Degrade vdev(%s/%s): %s: %s\n", 939 PoolGUIDString().c_str(), 940 VdevGUIDString().c_str(), 941 libzfs_error_action(g_zfsHandle), 942 libzfs_error_description(g_zfsHandle)); 943 } 944 } 945 Serialize(); 946 } 947 948 Vdev 949 CaseFile::BeingReplacedBy(zpool_handle_t *zhp) { 950 Vdev vd(zhp, CaseVdev(zhp)); 951 std::list<Vdev> children; 952 std::list<Vdev>::iterator children_it; 953 954 Vdev parent(vd.Parent()); 955 Vdev replacing(NonexistentVdev); 956 957 /* 958 * To determine whether we are being replaced by another spare that 959 * is still working, then make sure that it is currently spared and 960 * that the spare is either resilvering or healthy. If any of these 961 * conditions fail, then we are not being replaced by a spare. 962 * 963 * If the spare is healthy, then the case file should be closed very 964 * soon after this check. 965 */ 966 if (parent.DoesNotExist() 967 || parent.Name(zhp, /*verbose*/false) != "spare") 968 return (NonexistentVdev); 969 970 children = parent.Children(); 971 children_it = children.begin(); 972 for (;children_it != children.end(); children_it++) { 973 Vdev child = *children_it; 974 975 /* Skip our vdev. */ 976 if (child.GUID() == VdevGUID()) 977 continue; 978 /* 979 * Accept the first child that doesn't match our GUID, or 980 * any resilvering/healthy device if one exists. 981 */ 982 if (replacing.DoesNotExist() || child.IsResilvering() 983 || child.State() == VDEV_STATE_HEALTHY) 984 replacing = child; 985 } 986 987 return (replacing); 988 } 989 990 bool 991 CaseFile::Replace(const char* vdev_type, const char* path, bool isspare) { 992 nvlist_t *nvroot, *newvd; 993 const char *poolname; 994 string oldstr(VdevGUIDString()); 995 bool retval = true; 996 997 /* Figure out what pool we're working on */ 998 ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID); 999 zpool_handle_t *zhp(zpl.empty() ? NULL : zpl.front()); 1000 if (zhp == NULL) { 1001 syslog(LOG_ERR, "CaseFile::Replace: could not find pool for " 1002 "pool_guid %"PRIu64".", (uint64_t)m_poolGUID); 1003 return (false); 1004 } 1005 poolname = zpool_get_name(zhp); 1006 Vdev vd(zhp, CaseVdev(zhp)); 1007 Vdev replaced(BeingReplacedBy(zhp)); 1008 1009 if (isspare && !vd.IsSpare() && !replaced.DoesNotExist()) { 1010 /* If we are already being replaced by a working spare, pass. */ 1011 if (replaced.IsResilvering() 1012 || replaced.State() == VDEV_STATE_HEALTHY) { 1013 syslog(LOG_INFO, "CaseFile::Replace(%s->%s): already " 1014 "replaced", VdevGUIDString().c_str(), path); 1015 return (/*consumed*/false); 1016 } 1017 /* 1018 * If we have already been replaced by a spare, but that spare 1019 * is broken, we must spare the spare, not the original device. 1020 */ 1021 oldstr = replaced.GUIDString(); 1022 syslog(LOG_INFO, "CaseFile::Replace(%s->%s): sparing " 1023 "broken spare %s instead", VdevGUIDString().c_str(), 1024 path, oldstr.c_str()); 1025 } 1026 1027 /* 1028 * Build a root vdev/leaf vdev configuration suitable for 1029 * zpool_vdev_attach. Only enough data for the kernel to find 1030 * the device (i.e. type and disk device node path) are needed. 1031 */ 1032 nvroot = NULL; 1033 newvd = NULL; 1034 1035 if (nvlist_alloc(&nvroot, NV_UNIQUE_NAME, 0) != 0 1036 || nvlist_alloc(&newvd, NV_UNIQUE_NAME, 0) != 0) { 1037 syslog(LOG_ERR, "Replace vdev(%s/%s): Unable to allocate " 1038 "configuration data.", poolname, oldstr.c_str()); 1039 if (nvroot != NULL) 1040 nvlist_free(nvroot); 1041 return (false); 1042 } 1043 if (nvlist_add_string(newvd, ZPOOL_CONFIG_TYPE, vdev_type) != 0 1044 || nvlist_add_string(newvd, ZPOOL_CONFIG_PATH, path) != 0 1045 || nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE, VDEV_TYPE_ROOT) != 0 1046 || nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, 1047 &newvd, 1) != 0) { 1048 syslog(LOG_ERR, "Replace vdev(%s/%s): Unable to initialize " 1049 "configuration data.", poolname, oldstr.c_str()); 1050 nvlist_free(newvd); 1051 nvlist_free(nvroot); 1052 return (true); 1053 } 1054 1055 /* Data was copied when added to the root vdev. */ 1056 nvlist_free(newvd); 1057 1058 retval = (zpool_vdev_attach(zhp, oldstr.c_str(), path, nvroot, 1059 /*replace*/B_TRUE) == 0); 1060 if (retval) 1061 syslog(LOG_INFO, "Replacing vdev(%s/%s) with %s\n", 1062 poolname, oldstr.c_str(), path); 1063 else 1064 syslog(LOG_ERR, "Replace vdev(%s/%s): %s: %s\n", 1065 poolname, oldstr.c_str(), libzfs_error_action(g_zfsHandle), 1066 libzfs_error_description(g_zfsHandle)); 1067 nvlist_free(nvroot); 1068 1069 return (retval); 1070 } 1071 1072 /* Does the argument event refer to a checksum error? */ 1073 static bool 1074 IsChecksumEvent(const Event* const event) 1075 { 1076 return ("ereport.fs.zfs.checksum" == event->Value("type")); 1077 } 1078 1079 /* Does the argument event refer to an IO error? */ 1080 static bool 1081 IsIOEvent(const Event* const event) 1082 { 1083 return ("ereport.fs.zfs.io" == event->Value("type")); 1084 } 1085 1086 bool 1087 CaseFile::ShouldDegrade() const 1088 { 1089 return (std::count_if(m_events.begin(), m_events.end(), 1090 IsChecksumEvent) > ZFS_DEGRADE_IO_COUNT); 1091 } 1092 1093 bool 1094 CaseFile::ShouldFault() const 1095 { 1096 return (std::count_if(m_events.begin(), m_events.end(), 1097 IsIOEvent) > ZFS_DEGRADE_IO_COUNT); 1098 } 1099 1100 nvlist_t * 1101 CaseFile::CaseVdev(zpool_handle_t *zhp) const 1102 { 1103 return (VdevIterator(zhp).Find(VdevGUID())); 1104 } 1105