1 /*- 2 * Copyright (c) 2011, 2012, 2013, 2014, 2016 Spectra Logic Corporation 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions, and the following disclaimer, 10 * without modification. 11 * 2. Redistributions in binary form must reproduce at minimum a disclaimer 12 * substantially similar to the "NO WARRANTY" disclaimer below 13 * ("Disclaimer") and any redistribution must be conditioned upon 14 * including a substantially similar Disclaimer requirement for further 15 * binary redistribution. 16 * 17 * NO WARRANTY 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR 21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 26 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 27 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 * POSSIBILITY OF SUCH DAMAGES. 29 * 30 * Authors: Justin T. Gibbs (Spectra Logic Corporation) 31 */ 32 33 /** 34 * \file case_file.cc 35 * 36 * We keep case files for any leaf vdev that is not in the optimal state. 37 * However, we only serialize to disk those events that need to be preserved 38 * across reboots. For now, this is just a log of soft errors which we 39 * accumulate in order to mark a device as degraded. 40 */ 41 #include <sys/cdefs.h> 42 #include <sys/time.h> 43 44 #include <sys/fs/zfs.h> 45 46 #include <dirent.h> 47 #include <iomanip> 48 #include <fstream> 49 #include <functional> 50 #include <sstream> 51 #include <syslog.h> 52 #include <unistd.h> 53 54 #include <libzfs.h> 55 56 #include <list> 57 #include <map> 58 #include <string> 59 60 #include <devdctl/guid.h> 61 #include <devdctl/event.h> 62 #include <devdctl/event_factory.h> 63 #include <devdctl/exception.h> 64 #include <devdctl/consumer.h> 65 66 #include "callout.h" 67 #include "vdev_iterator.h" 68 #include "zfsd_event.h" 69 #include "case_file.h" 70 #include "vdev.h" 71 #include "zfsd.h" 72 #include "zfsd_exception.h" 73 #include "zpool_list.h" 74 75 __FBSDID("$FreeBSD$"); 76 77 /*============================ Namespace Control =============================*/ 78 using std::auto_ptr; 79 using std::hex; 80 using std::ifstream; 81 using std::stringstream; 82 using std::setfill; 83 using std::setw; 84 85 using DevdCtl::Event; 86 using DevdCtl::EventFactory; 87 using DevdCtl::EventList; 88 using DevdCtl::Guid; 89 using DevdCtl::ParseException; 90 91 /*--------------------------------- CaseFile ---------------------------------*/ 92 //- CaseFile Static Data ------------------------------------------------------- 93 94 CaseFileList CaseFile::s_activeCases; 95 const string CaseFile::s_caseFilePath = "/var/db/zfsd/cases"; 96 const timeval CaseFile::s_removeGracePeriod = { 60 /*sec*/, 0 /*usec*/}; 97 98 //- CaseFile Static Public Methods --------------------------------------------- 99 CaseFile * 100 CaseFile::Find(Guid poolGUID, Guid vdevGUID) 101 { 102 for (CaseFileList::iterator curCase = s_activeCases.begin(); 103 curCase != s_activeCases.end(); curCase++) { 104 105 if ((*curCase)->PoolGUID() != poolGUID 106 || (*curCase)->VdevGUID() != vdevGUID) 107 continue; 108 109 /* 110 * We only carry one active case per-vdev. 111 */ 112 return (*curCase); 113 } 114 return (NULL); 115 } 116 117 CaseFile * 118 CaseFile::Find(const string &physPath) 119 { 120 CaseFile *result = NULL; 121 122 for (CaseFileList::iterator curCase = s_activeCases.begin(); 123 curCase != s_activeCases.end(); curCase++) { 124 125 if ((*curCase)->PhysicalPath() != physPath) 126 continue; 127 128 if (result != NULL) { 129 syslog(LOG_WARNING, "Multiple casefiles found for " 130 "physical path %s. " 131 "This is most likely a bug in zfsd", 132 physPath.c_str()); 133 } 134 result = *curCase; 135 } 136 return (result); 137 } 138 139 140 void 141 CaseFile::ReEvaluateByGuid(Guid poolGUID, const ZfsEvent &event) 142 { 143 CaseFileList::iterator casefile; 144 for (casefile = s_activeCases.begin(); casefile != s_activeCases.end();){ 145 CaseFileList::iterator next = casefile; 146 next++; 147 if (poolGUID == (*casefile)->PoolGUID()) 148 (*casefile)->ReEvaluate(event); 149 casefile = next; 150 } 151 } 152 153 CaseFile & 154 CaseFile::Create(Vdev &vdev) 155 { 156 CaseFile *activeCase; 157 158 activeCase = Find(vdev.PoolGUID(), vdev.GUID()); 159 if (activeCase == NULL) 160 activeCase = new CaseFile(vdev); 161 162 return (*activeCase); 163 } 164 165 void 166 CaseFile::DeSerialize() 167 { 168 struct dirent **caseFiles; 169 170 int numCaseFiles(scandir(s_caseFilePath.c_str(), &caseFiles, 171 DeSerializeSelector, /*compar*/NULL)); 172 173 if (numCaseFiles == -1) 174 return; 175 if (numCaseFiles == 0) { 176 free(caseFiles); 177 return; 178 } 179 180 for (int i = 0; i < numCaseFiles; i++) { 181 182 DeSerializeFile(caseFiles[i]->d_name); 183 free(caseFiles[i]); 184 } 185 free(caseFiles); 186 } 187 188 void 189 CaseFile::LogAll() 190 { 191 for (CaseFileList::iterator curCase = s_activeCases.begin(); 192 curCase != s_activeCases.end(); curCase++) 193 (*curCase)->Log(); 194 } 195 196 void 197 CaseFile::PurgeAll() 198 { 199 /* 200 * Serialize casefiles before deleting them so that they can be reread 201 * and revalidated during BuildCaseFiles. 202 * CaseFiles remove themselves from this list on destruction. 203 */ 204 while (s_activeCases.size() != 0) { 205 CaseFile *casefile = s_activeCases.front(); 206 casefile->Serialize(); 207 delete casefile; 208 } 209 210 } 211 212 //- CaseFile Public Methods ---------------------------------------------------- 213 bool 214 CaseFile::RefreshVdevState() 215 { 216 ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID); 217 zpool_handle_t *casePool(zpl.empty() ? NULL : zpl.front()); 218 if (casePool == NULL) 219 return (false); 220 221 Vdev vd(casePool, CaseVdev(casePool)); 222 if (vd.DoesNotExist()) 223 return (false); 224 225 m_vdevState = vd.State(); 226 m_vdevPhysPath = vd.PhysicalPath(); 227 return (true); 228 } 229 230 bool 231 CaseFile::ReEvaluate(const string &devPath, const string &physPath, Vdev *vdev) 232 { 233 ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID); 234 zpool_handle_t *pool(zpl.empty() ? NULL : zpl.front()); 235 236 if (pool == NULL || !RefreshVdevState()) { 237 /* 238 * The pool or vdev for this case file is no longer 239 * part of the configuration. This can happen 240 * if we process a device arrival notification 241 * before seeing the ZFS configuration change 242 * event. 243 */ 244 syslog(LOG_INFO, 245 "CaseFile::ReEvaluate(%s,%s) Pool/Vdev unconfigured. " 246 "Closing\n", 247 PoolGUIDString().c_str(), 248 VdevGUIDString().c_str()); 249 Close(); 250 251 /* 252 * Since this event was not used to close this 253 * case, do not report it as consumed. 254 */ 255 return (/*consumed*/false); 256 } 257 258 if (VdevState() > VDEV_STATE_CANT_OPEN) { 259 /* 260 * For now, newly discovered devices only help for 261 * devices that are missing. In the future, we might 262 * use a newly inserted spare to replace a degraded 263 * or faulted device. 264 */ 265 syslog(LOG_INFO, "CaseFile::ReEvaluate(%s,%s): Pool/Vdev ignored", 266 PoolGUIDString().c_str(), VdevGUIDString().c_str()); 267 return (/*consumed*/false); 268 } 269 270 if (vdev != NULL 271 && vdev->PoolGUID() == m_poolGUID 272 && vdev->GUID() == m_vdevGUID) { 273 274 zpool_vdev_online(pool, vdev->GUIDString().c_str(), 275 ZFS_ONLINE_CHECKREMOVE | ZFS_ONLINE_UNSPARE, 276 &m_vdevState); 277 syslog(LOG_INFO, "Onlined vdev(%s/%s:%s). State now %s.\n", 278 zpool_get_name(pool), vdev->GUIDString().c_str(), 279 devPath.c_str(), 280 zpool_state_to_name(VdevState(), VDEV_AUX_NONE)); 281 282 /* 283 * Check the vdev state post the online action to see 284 * if we can retire this case. 285 */ 286 CloseIfSolved(); 287 288 return (/*consumed*/true); 289 } 290 291 /* 292 * If the auto-replace policy is enabled, and we have physical 293 * path information, try a physical path replacement. 294 */ 295 if (zpool_get_prop_int(pool, ZPOOL_PROP_AUTOREPLACE, NULL) == 0) { 296 syslog(LOG_INFO, 297 "CaseFile(%s:%s:%s): AutoReplace not set. " 298 "Ignoring device insertion.\n", 299 PoolGUIDString().c_str(), 300 VdevGUIDString().c_str(), 301 zpool_state_to_name(VdevState(), VDEV_AUX_NONE)); 302 return (/*consumed*/false); 303 } 304 305 if (PhysicalPath().empty()) { 306 syslog(LOG_INFO, 307 "CaseFile(%s:%s:%s): No physical path information. " 308 "Ignoring device insertion.\n", 309 PoolGUIDString().c_str(), 310 VdevGUIDString().c_str(), 311 zpool_state_to_name(VdevState(), VDEV_AUX_NONE)); 312 return (/*consumed*/false); 313 } 314 315 if (physPath != PhysicalPath()) { 316 syslog(LOG_INFO, 317 "CaseFile(%s:%s:%s): Physical path mismatch. " 318 "Ignoring device insertion.\n", 319 PoolGUIDString().c_str(), 320 VdevGUIDString().c_str(), 321 zpool_state_to_name(VdevState(), VDEV_AUX_NONE)); 322 return (/*consumed*/false); 323 } 324 325 /* Write a label on the newly inserted disk. */ 326 if (zpool_label_disk(g_zfsHandle, pool, devPath.c_str()) != 0) { 327 syslog(LOG_ERR, 328 "Replace vdev(%s/%s) by physical path (label): %s: %s\n", 329 zpool_get_name(pool), VdevGUIDString().c_str(), 330 libzfs_error_action(g_zfsHandle), 331 libzfs_error_description(g_zfsHandle)); 332 return (/*consumed*/false); 333 } 334 335 syslog(LOG_INFO, "CaseFile::ReEvaluate(%s/%s): Replacing with %s", 336 PoolGUIDString().c_str(), VdevGUIDString().c_str(), 337 devPath.c_str()); 338 return (Replace(VDEV_TYPE_DISK, devPath.c_str(), /*isspare*/false)); 339 } 340 341 bool 342 CaseFile::ReEvaluate(const ZfsEvent &event) 343 { 344 bool consumed(false); 345 346 if (event.Value("type") == "misc.fs.zfs.vdev_remove") { 347 /* 348 * The Vdev we represent has been removed from the 349 * configuration. This case is no longer of value. 350 */ 351 Close(); 352 353 return (/*consumed*/true); 354 } else if (event.Value("type") == "misc.fs.zfs.pool_destroy") { 355 /* This Pool has been destroyed. Discard the case */ 356 Close(); 357 358 return (/*consumed*/true); 359 } else if (event.Value("type") == "misc.fs.zfs.config_sync") { 360 RefreshVdevState(); 361 if (VdevState() < VDEV_STATE_HEALTHY) 362 consumed = ActivateSpare(); 363 } 364 365 366 if (event.Value("class") == "resource.fs.zfs.removed") { 367 bool spare_activated; 368 369 if (!RefreshVdevState()) { 370 /* 371 * The pool or vdev for this case file is no longer 372 * part of the configuration. This can happen 373 * if we process a device arrival notification 374 * before seeing the ZFS configuration change 375 * event. 376 */ 377 syslog(LOG_INFO, 378 "CaseFile::ReEvaluate(%s,%s) Pool/Vdev " 379 "unconfigured. Closing\n", 380 PoolGUIDString().c_str(), 381 VdevGUIDString().c_str()); 382 /* 383 * Close the case now so we won't waste cycles in the 384 * system rescan 385 */ 386 Close(); 387 388 /* 389 * Since this event was not used to close this 390 * case, do not report it as consumed. 391 */ 392 return (/*consumed*/false); 393 } 394 395 /* 396 * Discard any tentative I/O error events for 397 * this case. They were most likely caused by the 398 * hot-unplug of this device. 399 */ 400 PurgeTentativeEvents(); 401 402 /* Try to activate spares if they are available */ 403 spare_activated = ActivateSpare(); 404 405 /* 406 * Rescan the drives in the system to see if a recent 407 * drive arrival can be used to solve this case. 408 */ 409 ZfsDaemon::RequestSystemRescan(); 410 411 /* 412 * Consume the event if we successfully activated a spare. 413 * Otherwise, leave it in the unconsumed events list so that the 414 * future addition of a spare to this pool might be able to 415 * close the case 416 */ 417 consumed = spare_activated; 418 } else if (event.Value("class") == "resource.fs.zfs.statechange") { 419 RefreshVdevState(); 420 /* 421 * If this vdev is DEGRADED, FAULTED, or UNAVAIL, try to 422 * activate a hotspare. Otherwise, ignore the event 423 */ 424 if (VdevState() == VDEV_STATE_FAULTED || 425 VdevState() == VDEV_STATE_DEGRADED || 426 VdevState() == VDEV_STATE_CANT_OPEN) 427 (void) ActivateSpare(); 428 consumed = true; 429 } 430 else if (event.Value("class") == "ereport.fs.zfs.io" || 431 event.Value("class") == "ereport.fs.zfs.checksum") { 432 433 m_tentativeEvents.push_front(event.DeepCopy()); 434 RegisterCallout(event); 435 consumed = true; 436 } 437 438 bool closed(CloseIfSolved()); 439 440 return (consumed || closed); 441 } 442 443 444 bool 445 CaseFile::ActivateSpare() { 446 nvlist_t *config, *nvroot; 447 nvlist_t **spares; 448 char *devPath, *vdev_type; 449 const char *poolname; 450 u_int nspares, i; 451 int error; 452 453 ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID); 454 zpool_handle_t *zhp(zpl.empty() ? NULL : zpl.front()); 455 if (zhp == NULL) { 456 syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not find pool " 457 "for pool_guid %" PRIu64".", (uint64_t)m_poolGUID); 458 return (false); 459 } 460 poolname = zpool_get_name(zhp); 461 config = zpool_get_config(zhp, NULL); 462 if (config == NULL) { 463 syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not find pool " 464 "config for pool %s", poolname); 465 return (false); 466 } 467 error = nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvroot); 468 if (error != 0){ 469 syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not find vdev " 470 "tree for pool %s", poolname); 471 return (false); 472 } 473 nspares = 0; 474 nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, &spares, 475 &nspares); 476 if (nspares == 0) { 477 /* The pool has no spares configured */ 478 syslog(LOG_INFO, "CaseFile::ActivateSpare: " 479 "No spares available for pool %s", poolname); 480 return (false); 481 } 482 for (i = 0; i < nspares; i++) { 483 uint64_t *nvlist_array; 484 vdev_stat_t *vs; 485 uint_t nstats; 486 487 if (nvlist_lookup_uint64_array(spares[i], 488 ZPOOL_CONFIG_VDEV_STATS, &nvlist_array, &nstats) != 0) { 489 syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not " 490 "find vdev stats for pool %s, spare %d", 491 poolname, i); 492 return (false); 493 } 494 vs = reinterpret_cast<vdev_stat_t *>(nvlist_array); 495 496 if ((vs->vs_aux != VDEV_AUX_SPARED) 497 && (vs->vs_state == VDEV_STATE_HEALTHY)) { 498 /* We found a usable spare */ 499 break; 500 } 501 } 502 503 if (i == nspares) { 504 /* No available spares were found */ 505 return (false); 506 } 507 508 error = nvlist_lookup_string(spares[i], ZPOOL_CONFIG_PATH, &devPath); 509 if (error != 0) { 510 syslog(LOG_ERR, "CaseFile::ActivateSpare: Cannot determine " 511 "the path of pool %s, spare %d. Error %d", 512 poolname, i, error); 513 return (false); 514 } 515 516 error = nvlist_lookup_string(spares[i], ZPOOL_CONFIG_TYPE, &vdev_type); 517 if (error != 0) { 518 syslog(LOG_ERR, "CaseFile::ActivateSpare: Cannot determine " 519 "the vdev type of pool %s, spare %d. Error %d", 520 poolname, i, error); 521 return (false); 522 } 523 524 return (Replace(vdev_type, devPath, /*isspare*/true)); 525 } 526 527 void 528 CaseFile::RegisterCallout(const Event &event) 529 { 530 timeval now, countdown, elapsed, timestamp, zero, remaining; 531 532 gettimeofday(&now, 0); 533 timestamp = event.GetTimestamp(); 534 timersub(&now, ×tamp, &elapsed); 535 timersub(&s_removeGracePeriod, &elapsed, &countdown); 536 /* 537 * If countdown is <= zero, Reset the timer to the 538 * smallest positive time value instead 539 */ 540 timerclear(&zero); 541 if (timercmp(&countdown, &zero, <=)) { 542 timerclear(&countdown); 543 countdown.tv_usec = 1; 544 } 545 546 remaining = m_tentativeTimer.TimeRemaining(); 547 548 if (!m_tentativeTimer.IsPending() 549 || timercmp(&countdown, &remaining, <)) 550 m_tentativeTimer.Reset(countdown, OnGracePeriodEnded, this); 551 } 552 553 554 bool 555 CaseFile::CloseIfSolved() 556 { 557 if (m_events.empty() 558 && m_tentativeEvents.empty()) { 559 560 /* 561 * We currently do not track or take actions on 562 * devices in the degraded or faulted state. 563 * Once we have support for spare pools, we'll 564 * retain these cases so that any spares added in 565 * the future can be applied to them. 566 */ 567 switch (VdevState()) { 568 case VDEV_STATE_HEALTHY: 569 /* No need to keep cases for healthy vdevs */ 570 Close(); 571 return (true); 572 case VDEV_STATE_REMOVED: 573 case VDEV_STATE_CANT_OPEN: 574 /* 575 * Keep open. We may solve it with a newly inserted 576 * device. 577 */ 578 case VDEV_STATE_FAULTED: 579 case VDEV_STATE_DEGRADED: 580 /* 581 * Keep open. We may solve it with the future 582 * addition of a spare to the pool 583 */ 584 case VDEV_STATE_UNKNOWN: 585 case VDEV_STATE_CLOSED: 586 case VDEV_STATE_OFFLINE: 587 /* 588 * Keep open? This may not be the correct behavior, 589 * but it's what we've always done 590 */ 591 ; 592 } 593 594 /* 595 * Re-serialize the case in order to remove any 596 * previous event data. 597 */ 598 Serialize(); 599 } 600 601 return (false); 602 } 603 604 void 605 CaseFile::Log() 606 { 607 syslog(LOG_INFO, "CaseFile(%s,%s,%s)\n", PoolGUIDString().c_str(), 608 VdevGUIDString().c_str(), PhysicalPath().c_str()); 609 syslog(LOG_INFO, "\tVdev State = %s\n", 610 zpool_state_to_name(VdevState(), VDEV_AUX_NONE)); 611 if (m_tentativeEvents.size() != 0) { 612 syslog(LOG_INFO, "\t=== Tentative Events ===\n"); 613 for (EventList::iterator event(m_tentativeEvents.begin()); 614 event != m_tentativeEvents.end(); event++) 615 (*event)->Log(LOG_INFO); 616 } 617 if (m_events.size() != 0) { 618 syslog(LOG_INFO, "\t=== Events ===\n"); 619 for (EventList::iterator event(m_events.begin()); 620 event != m_events.end(); event++) 621 (*event)->Log(LOG_INFO); 622 } 623 } 624 625 //- CaseFile Static Protected Methods ------------------------------------------ 626 void 627 CaseFile::OnGracePeriodEnded(void *arg) 628 { 629 CaseFile &casefile(*static_cast<CaseFile *>(arg)); 630 631 casefile.OnGracePeriodEnded(); 632 } 633 634 int 635 CaseFile::DeSerializeSelector(const struct dirent *dirEntry) 636 { 637 uint64_t poolGUID; 638 uint64_t vdevGUID; 639 640 if (dirEntry->d_type == DT_REG 641 && sscanf(dirEntry->d_name, "pool_%" PRIu64 "_vdev_%" PRIu64 ".case", 642 &poolGUID, &vdevGUID) == 2) 643 return (1); 644 return (0); 645 } 646 647 void 648 CaseFile::DeSerializeFile(const char *fileName) 649 { 650 string fullName(s_caseFilePath + '/' + fileName); 651 CaseFile *existingCaseFile(NULL); 652 CaseFile *caseFile(NULL); 653 654 try { 655 uint64_t poolGUID; 656 uint64_t vdevGUID; 657 nvlist_t *vdevConf; 658 659 if (sscanf(fileName, "pool_%" PRIu64 "_vdev_%" PRIu64 ".case", 660 &poolGUID, &vdevGUID) != 2) { 661 throw ZfsdException("CaseFile::DeSerialize: " 662 "Unintelligible CaseFile filename %s.\n", fileName); 663 } 664 existingCaseFile = Find(Guid(poolGUID), Guid(vdevGUID)); 665 if (existingCaseFile != NULL) { 666 /* 667 * If the vdev is already degraded or faulted, 668 * there's no point in keeping the state around 669 * that we use to put a drive into the degraded 670 * state. However, if the vdev is simply missing, 671 * preserve the case data in the hopes that it will 672 * return. 673 */ 674 caseFile = existingCaseFile; 675 vdev_state curState(caseFile->VdevState()); 676 if (curState > VDEV_STATE_CANT_OPEN 677 && curState < VDEV_STATE_HEALTHY) { 678 unlink(fileName); 679 return; 680 } 681 } else { 682 ZpoolList zpl(ZpoolList::ZpoolByGUID, &poolGUID); 683 if (zpl.empty() 684 || (vdevConf = VdevIterator(zpl.front()) 685 .Find(vdevGUID)) == NULL) { 686 /* 687 * Either the pool no longer exists 688 * or this vdev is no longer a member of 689 * the pool. 690 */ 691 unlink(fullName.c_str()); 692 return; 693 } 694 695 /* 696 * Any vdev we find that does not have a case file 697 * must be in the healthy state and thus worthy of 698 * continued SERD data tracking. 699 */ 700 caseFile = new CaseFile(Vdev(zpl.front(), vdevConf)); 701 } 702 703 ifstream caseStream(fullName.c_str()); 704 if (!caseStream) 705 throw ZfsdException("CaseFile::DeSerialize: Unable to " 706 "read %s.\n", fileName); 707 708 caseFile->DeSerialize(caseStream); 709 } catch (const ParseException &exp) { 710 711 exp.Log(); 712 if (caseFile != existingCaseFile) 713 delete caseFile; 714 715 /* 716 * Since we can't parse the file, unlink it so we don't 717 * trip over it again. 718 */ 719 unlink(fileName); 720 } catch (const ZfsdException &zfsException) { 721 722 zfsException.Log(); 723 if (caseFile != existingCaseFile) 724 delete caseFile; 725 } 726 } 727 728 //- CaseFile Protected Methods ------------------------------------------------- 729 CaseFile::CaseFile(const Vdev &vdev) 730 : m_poolGUID(vdev.PoolGUID()), 731 m_vdevGUID(vdev.GUID()), 732 m_vdevState(vdev.State()), 733 m_vdevPhysPath(vdev.PhysicalPath()) 734 { 735 stringstream guidString; 736 737 guidString << m_vdevGUID; 738 m_vdevGUIDString = guidString.str(); 739 guidString.str(""); 740 guidString << m_poolGUID; 741 m_poolGUIDString = guidString.str(); 742 743 s_activeCases.push_back(this); 744 745 syslog(LOG_INFO, "Creating new CaseFile:\n"); 746 Log(); 747 } 748 749 CaseFile::~CaseFile() 750 { 751 PurgeEvents(); 752 PurgeTentativeEvents(); 753 m_tentativeTimer.Stop(); 754 s_activeCases.remove(this); 755 } 756 757 void 758 CaseFile::PurgeEvents() 759 { 760 for (EventList::iterator event(m_events.begin()); 761 event != m_events.end(); event++) 762 delete *event; 763 764 m_events.clear(); 765 } 766 767 void 768 CaseFile::PurgeTentativeEvents() 769 { 770 for (EventList::iterator event(m_tentativeEvents.begin()); 771 event != m_tentativeEvents.end(); event++) 772 delete *event; 773 774 m_tentativeEvents.clear(); 775 } 776 777 void 778 CaseFile::SerializeEvList(const EventList events, int fd, 779 const char* prefix) const 780 { 781 if (events.empty()) 782 return; 783 for (EventList::const_iterator curEvent = events.begin(); 784 curEvent != events.end(); curEvent++) { 785 const string &eventString((*curEvent)->GetEventString()); 786 787 // TODO: replace many write(2) calls with a single writev(2) 788 if (prefix) 789 write(fd, prefix, strlen(prefix)); 790 write(fd, eventString.c_str(), eventString.length()); 791 } 792 } 793 794 void 795 CaseFile::Serialize() 796 { 797 stringstream saveFile; 798 799 saveFile << setfill('0') 800 << s_caseFilePath << "/" 801 << "pool_" << PoolGUIDString() 802 << "_vdev_" << VdevGUIDString() 803 << ".case"; 804 805 if (m_events.empty() && m_tentativeEvents.empty()) { 806 unlink(saveFile.str().c_str()); 807 return; 808 } 809 810 int fd(open(saveFile.str().c_str(), O_CREAT|O_TRUNC|O_WRONLY, 0644)); 811 if (fd == -1) { 812 syslog(LOG_ERR, "CaseFile::Serialize: Unable to open %s.\n", 813 saveFile.str().c_str()); 814 return; 815 } 816 SerializeEvList(m_events, fd); 817 SerializeEvList(m_tentativeEvents, fd, "tentative "); 818 close(fd); 819 } 820 821 /* 822 * XXX: This method assumes that events may not contain embedded newlines. If 823 * ever events can contain embedded newlines, then CaseFile must switch 824 * serialization formats 825 */ 826 void 827 CaseFile::DeSerialize(ifstream &caseStream) 828 { 829 string evString; 830 const EventFactory &factory(ZfsDaemon::Get().GetFactory()); 831 832 caseStream >> std::noskipws >> std::ws; 833 while (caseStream.good()) { 834 /* 835 * Outline: 836 * read the beginning of a line and check it for 837 * "tentative". If found, discard "tentative". 838 * Create a new event 839 * continue 840 */ 841 EventList* destEvents; 842 const string tentFlag("tentative "); 843 string line; 844 std::stringbuf lineBuf; 845 846 caseStream.get(lineBuf); 847 caseStream.ignore(); /*discard the newline character*/ 848 line = lineBuf.str(); 849 if (line.compare(0, tentFlag.size(), tentFlag) == 0) { 850 /* Discard "tentative" */ 851 line.erase(0, tentFlag.size()); 852 destEvents = &m_tentativeEvents; 853 } else { 854 destEvents = &m_events; 855 } 856 Event *event(Event::CreateEvent(factory, line)); 857 if (event != NULL) { 858 destEvents->push_back(event); 859 RegisterCallout(*event); 860 } 861 } 862 } 863 864 void 865 CaseFile::Close() 866 { 867 /* 868 * This case is no longer relevant. Clean up our 869 * serialization file, and delete the case. 870 */ 871 syslog(LOG_INFO, "CaseFile(%s,%s) closed - State %s\n", 872 PoolGUIDString().c_str(), VdevGUIDString().c_str(), 873 zpool_state_to_name(VdevState(), VDEV_AUX_NONE)); 874 875 /* 876 * Serialization of a Case with no event data, clears the 877 * Serialization data for that event. 878 */ 879 PurgeEvents(); 880 Serialize(); 881 882 delete this; 883 } 884 885 void 886 CaseFile::OnGracePeriodEnded() 887 { 888 bool should_fault, should_degrade; 889 ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID); 890 zpool_handle_t *zhp(zpl.empty() ? NULL : zpl.front()); 891 892 m_events.splice(m_events.begin(), m_tentativeEvents); 893 should_fault = ShouldFault(); 894 should_degrade = ShouldDegrade(); 895 896 if (should_fault || should_degrade) { 897 if (zhp == NULL 898 || (VdevIterator(zhp).Find(m_vdevGUID)) == NULL) { 899 /* 900 * Either the pool no longer exists 901 * or this vdev is no longer a member of 902 * the pool. 903 */ 904 Close(); 905 return; 906 } 907 908 } 909 910 /* A fault condition has priority over a degrade condition */ 911 if (ShouldFault()) { 912 /* Fault the vdev and close the case. */ 913 if (zpool_vdev_fault(zhp, (uint64_t)m_vdevGUID, 914 VDEV_AUX_ERR_EXCEEDED) == 0) { 915 syslog(LOG_INFO, "Faulting vdev(%s/%s)", 916 PoolGUIDString().c_str(), 917 VdevGUIDString().c_str()); 918 Close(); 919 return; 920 } 921 else { 922 syslog(LOG_ERR, "Fault vdev(%s/%s): %s: %s\n", 923 PoolGUIDString().c_str(), 924 VdevGUIDString().c_str(), 925 libzfs_error_action(g_zfsHandle), 926 libzfs_error_description(g_zfsHandle)); 927 } 928 } 929 else if (ShouldDegrade()) { 930 /* Degrade the vdev and close the case. */ 931 if (zpool_vdev_degrade(zhp, (uint64_t)m_vdevGUID, 932 VDEV_AUX_ERR_EXCEEDED) == 0) { 933 syslog(LOG_INFO, "Degrading vdev(%s/%s)", 934 PoolGUIDString().c_str(), 935 VdevGUIDString().c_str()); 936 Close(); 937 return; 938 } 939 else { 940 syslog(LOG_ERR, "Degrade vdev(%s/%s): %s: %s\n", 941 PoolGUIDString().c_str(), 942 VdevGUIDString().c_str(), 943 libzfs_error_action(g_zfsHandle), 944 libzfs_error_description(g_zfsHandle)); 945 } 946 } 947 Serialize(); 948 } 949 950 Vdev 951 CaseFile::BeingReplacedBy(zpool_handle_t *zhp) { 952 Vdev vd(zhp, CaseVdev(zhp)); 953 std::list<Vdev> children; 954 std::list<Vdev>::iterator children_it; 955 956 Vdev parent(vd.Parent()); 957 Vdev replacing(NonexistentVdev); 958 959 /* 960 * To determine whether we are being replaced by another spare that 961 * is still working, then make sure that it is currently spared and 962 * that the spare is either resilvering or healthy. If any of these 963 * conditions fail, then we are not being replaced by a spare. 964 * 965 * If the spare is healthy, then the case file should be closed very 966 * soon after this check. 967 */ 968 if (parent.DoesNotExist() 969 || parent.Name(zhp, /*verbose*/false) != "spare") 970 return (NonexistentVdev); 971 972 children = parent.Children(); 973 children_it = children.begin(); 974 for (;children_it != children.end(); children_it++) { 975 Vdev child = *children_it; 976 977 /* Skip our vdev. */ 978 if (child.GUID() == VdevGUID()) 979 continue; 980 /* 981 * Accept the first child that doesn't match our GUID, or 982 * any resilvering/healthy device if one exists. 983 */ 984 if (replacing.DoesNotExist() || child.IsResilvering() 985 || child.State() == VDEV_STATE_HEALTHY) 986 replacing = child; 987 } 988 989 return (replacing); 990 } 991 992 bool 993 CaseFile::Replace(const char* vdev_type, const char* path, bool isspare) { 994 nvlist_t *nvroot, *newvd; 995 const char *poolname; 996 string oldstr(VdevGUIDString()); 997 bool retval = true; 998 999 /* Figure out what pool we're working on */ 1000 ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID); 1001 zpool_handle_t *zhp(zpl.empty() ? NULL : zpl.front()); 1002 if (zhp == NULL) { 1003 syslog(LOG_ERR, "CaseFile::Replace: could not find pool for " 1004 "pool_guid %" PRIu64 ".", (uint64_t)m_poolGUID); 1005 return (false); 1006 } 1007 poolname = zpool_get_name(zhp); 1008 Vdev vd(zhp, CaseVdev(zhp)); 1009 Vdev replaced(BeingReplacedBy(zhp)); 1010 1011 if (isspare && !vd.IsSpare() && !replaced.DoesNotExist()) { 1012 /* If we are already being replaced by a working spare, pass. */ 1013 if (replaced.IsResilvering() 1014 || replaced.State() == VDEV_STATE_HEALTHY) { 1015 syslog(LOG_INFO, "CaseFile::Replace(%s->%s): already " 1016 "replaced", VdevGUIDString().c_str(), path); 1017 return (/*consumed*/false); 1018 } 1019 /* 1020 * If we have already been replaced by a spare, but that spare 1021 * is broken, we must spare the spare, not the original device. 1022 */ 1023 oldstr = replaced.GUIDString(); 1024 syslog(LOG_INFO, "CaseFile::Replace(%s->%s): sparing " 1025 "broken spare %s instead", VdevGUIDString().c_str(), 1026 path, oldstr.c_str()); 1027 } 1028 1029 /* 1030 * Build a root vdev/leaf vdev configuration suitable for 1031 * zpool_vdev_attach. Only enough data for the kernel to find 1032 * the device (i.e. type and disk device node path) are needed. 1033 */ 1034 nvroot = NULL; 1035 newvd = NULL; 1036 1037 if (nvlist_alloc(&nvroot, NV_UNIQUE_NAME, 0) != 0 1038 || nvlist_alloc(&newvd, NV_UNIQUE_NAME, 0) != 0) { 1039 syslog(LOG_ERR, "Replace vdev(%s/%s): Unable to allocate " 1040 "configuration data.", poolname, oldstr.c_str()); 1041 if (nvroot != NULL) 1042 nvlist_free(nvroot); 1043 return (false); 1044 } 1045 if (nvlist_add_string(newvd, ZPOOL_CONFIG_TYPE, vdev_type) != 0 1046 || nvlist_add_string(newvd, ZPOOL_CONFIG_PATH, path) != 0 1047 || nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE, VDEV_TYPE_ROOT) != 0 1048 || nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, 1049 &newvd, 1) != 0) { 1050 syslog(LOG_ERR, "Replace vdev(%s/%s): Unable to initialize " 1051 "configuration data.", poolname, oldstr.c_str()); 1052 nvlist_free(newvd); 1053 nvlist_free(nvroot); 1054 return (true); 1055 } 1056 1057 /* Data was copied when added to the root vdev. */ 1058 nvlist_free(newvd); 1059 1060 retval = (zpool_vdev_attach(zhp, oldstr.c_str(), path, nvroot, 1061 /*replace*/B_TRUE) == 0); 1062 if (retval) 1063 syslog(LOG_INFO, "Replacing vdev(%s/%s) with %s\n", 1064 poolname, oldstr.c_str(), path); 1065 else 1066 syslog(LOG_ERR, "Replace vdev(%s/%s): %s: %s\n", 1067 poolname, oldstr.c_str(), libzfs_error_action(g_zfsHandle), 1068 libzfs_error_description(g_zfsHandle)); 1069 nvlist_free(nvroot); 1070 1071 return (retval); 1072 } 1073 1074 /* Does the argument event refer to a checksum error? */ 1075 static bool 1076 IsChecksumEvent(const Event* const event) 1077 { 1078 return ("ereport.fs.zfs.checksum" == event->Value("type")); 1079 } 1080 1081 /* Does the argument event refer to an IO error? */ 1082 static bool 1083 IsIOEvent(const Event* const event) 1084 { 1085 return ("ereport.fs.zfs.io" == event->Value("type")); 1086 } 1087 1088 bool 1089 CaseFile::ShouldDegrade() const 1090 { 1091 return (std::count_if(m_events.begin(), m_events.end(), 1092 IsChecksumEvent) > ZFS_DEGRADE_IO_COUNT); 1093 } 1094 1095 bool 1096 CaseFile::ShouldFault() const 1097 { 1098 return (std::count_if(m_events.begin(), m_events.end(), 1099 IsIOEvent) > ZFS_DEGRADE_IO_COUNT); 1100 } 1101 1102 nvlist_t * 1103 CaseFile::CaseVdev(zpool_handle_t *zhp) const 1104 { 1105 return (VdevIterator(zhp).Find(VdevGUID())); 1106 } 1107