xref: /freebsd/cddl/usr.sbin/zfsd/case_file.cc (revision 357378bbdedf24ce2b90e9bd831af4a9db3ec70a)
1 /*-
2  * Copyright (c) 2011, 2012, 2013, 2014, 2016 Spectra Logic Corporation
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions, and the following disclaimer,
10  *    without modification.
11  * 2. Redistributions in binary form must reproduce at minimum a disclaimer
12  *    substantially similar to the "NO WARRANTY" disclaimer below
13  *    ("Disclaimer") and any redistribution must be conditioned upon
14  *    including a substantially similar Disclaimer requirement for further
15  *    binary redistribution.
16  *
17  * NO WARRANTY
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
21  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22  * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
26  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
27  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28  * POSSIBILITY OF SUCH DAMAGES.
29  *
30  * Authors: Justin T. Gibbs     (Spectra Logic Corporation)
31  */
32 
33 /**
34  * \file case_file.cc
35  *
36  * We keep case files for any leaf vdev that is not in the optimal state.
37  * However, we only serialize to disk those events that need to be preserved
38  * across reboots.  For now, this is just a log of soft errors which we
39  * accumulate in order to mark a device as degraded.
40  */
41 #include <sys/cdefs.h>
42 #include <sys/byteorder.h>
43 #include <sys/time.h>
44 
45 #include <sys/fs/zfs.h>
46 
47 #include <dirent.h>
48 #include <fcntl.h>
49 #include <iomanip>
50 #include <fstream>
51 #include <functional>
52 #include <sstream>
53 #include <syslog.h>
54 #include <unistd.h>
55 
56 #include <libzutil.h>
57 #include <libzfs.h>
58 
59 #include <list>
60 #include <map>
61 #include <string>
62 
63 #include <devdctl/guid.h>
64 #include <devdctl/event.h>
65 #include <devdctl/event_factory.h>
66 #include <devdctl/exception.h>
67 #include <devdctl/consumer.h>
68 
69 #include "callout.h"
70 #include "vdev_iterator.h"
71 #include "zfsd_event.h"
72 #include "case_file.h"
73 #include "vdev.h"
74 #include "zfsd.h"
75 #include "zfsd_exception.h"
76 #include "zpool_list.h"
77 /*============================ Namespace Control =============================*/
78 using std::hex;
79 using std::ifstream;
80 using std::stringstream;
81 using std::setfill;
82 using std::setw;
83 
84 using DevdCtl::Event;
85 using DevdCtl::EventFactory;
86 using DevdCtl::EventList;
87 using DevdCtl::Guid;
88 using DevdCtl::ParseException;
89 
90 /*--------------------------------- CaseFile ---------------------------------*/
91 //- CaseFile Static Data -------------------------------------------------------
92 
93 CaseFileList  CaseFile::s_activeCases;
94 const string  CaseFile::s_caseFilePath = "/var/db/zfsd/cases";
95 
96 //- CaseFile Static Public Methods ---------------------------------------------
97 CaseFile *
98 CaseFile::Find(Guid poolGUID, Guid vdevGUID)
99 {
100 	for (CaseFileList::iterator curCase = s_activeCases.begin();
101 	     curCase != s_activeCases.end(); curCase++) {
102 
103 		if (((*curCase)->PoolGUID() != poolGUID
104 		  && Guid::InvalidGuid() != poolGUID)
105 		 || (*curCase)->VdevGUID() != vdevGUID)
106 			continue;
107 
108 		/*
109 		 * We only carry one active case per-vdev.
110 		 */
111 		return (*curCase);
112 	}
113 	return (NULL);
114 }
115 
116 void
117 CaseFile::Find(Guid poolGUID, Guid vdevGUID, CaseFileList &cases)
118 {
119 	for (CaseFileList::iterator curCase = s_activeCases.begin();
120 	    curCase != s_activeCases.end(); curCase++) {
121 		if (((*curCase)->PoolGUID() != poolGUID &&
122 		    Guid::InvalidGuid() != poolGUID) ||
123 		    (*curCase)->VdevGUID() != vdevGUID)
124 			continue;
125 
126 		/*
127 		 * We can have multiple cases for spare vdevs
128 		 */
129 		cases.push_back(*curCase);
130 		if (!(*curCase)->IsSpare()) {
131 			return;
132 		}
133 	}
134 }
135 
136 CaseFile *
137 CaseFile::Find(const string &physPath)
138 {
139 	CaseFile *result = NULL;
140 
141 	for (CaseFileList::iterator curCase = s_activeCases.begin();
142 	     curCase != s_activeCases.end(); curCase++) {
143 
144 		if ((*curCase)->PhysicalPath() != physPath)
145 			continue;
146 
147 		if (result != NULL) {
148 			syslog(LOG_WARNING, "Multiple casefiles found for "
149 			    "physical path %s.  "
150 			    "This is most likely a bug in zfsd",
151 			    physPath.c_str());
152 		}
153 		result = *curCase;
154 	}
155 	return (result);
156 }
157 
158 
159 void
160 CaseFile::ReEvaluateByGuid(Guid poolGUID, const ZfsEvent &event)
161 {
162 	CaseFileList::iterator casefile;
163 	for (casefile = s_activeCases.begin(); casefile != s_activeCases.end();){
164 		CaseFileList::iterator next = casefile;
165 		next++;
166 		if (poolGUID == (*casefile)->PoolGUID())
167 			(*casefile)->ReEvaluate(event);
168 		casefile = next;
169 	}
170 }
171 
172 CaseFile &
173 CaseFile::Create(Vdev &vdev)
174 {
175 	CaseFile *activeCase;
176 
177 	activeCase = Find(vdev.PoolGUID(), vdev.GUID());
178 	if (activeCase == NULL)
179 		activeCase = new CaseFile(vdev);
180 
181 	return (*activeCase);
182 }
183 
184 void
185 CaseFile::DeSerialize()
186 {
187 	struct dirent **caseFiles;
188 
189 	int numCaseFiles(scandir(s_caseFilePath.c_str(), &caseFiles,
190 			 DeSerializeSelector, /*compar*/NULL));
191 
192 	if (numCaseFiles == -1)
193 		return;
194 	if (numCaseFiles == 0) {
195 		free(caseFiles);
196 		return;
197 	}
198 
199 	for (int i = 0; i < numCaseFiles; i++) {
200 
201 		DeSerializeFile(caseFiles[i]->d_name);
202 		free(caseFiles[i]);
203 	}
204 	free(caseFiles);
205 }
206 
207 bool
208 CaseFile::Empty()
209 {
210 	return (s_activeCases.empty());
211 }
212 
213 void
214 CaseFile::LogAll()
215 {
216 	for (CaseFileList::iterator curCase = s_activeCases.begin();
217 	     curCase != s_activeCases.end(); curCase++)
218 		(*curCase)->Log();
219 }
220 
221 void
222 CaseFile::PurgeAll()
223 {
224 	/*
225 	 * Serialize casefiles before deleting them so that they can be reread
226 	 * and revalidated during BuildCaseFiles.
227 	 * CaseFiles remove themselves from this list on destruction.
228 	 */
229 	while (s_activeCases.size() != 0) {
230 		CaseFile *casefile = s_activeCases.front();
231 		casefile->Serialize();
232 		delete casefile;
233 	}
234 
235 }
236 
237 int
238 CaseFile::IsSpare()
239 {
240 	return (m_is_spare);
241 }
242 
243 //- CaseFile Public Methods ----------------------------------------------------
244 bool
245 CaseFile::RefreshVdevState()
246 {
247 	ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID);
248 	zpool_handle_t *casePool(zpl.empty() ? NULL : zpl.front());
249 	if (casePool == NULL)
250 		return (false);
251 
252 	Vdev vd(casePool, CaseVdev(casePool));
253 	if (vd.DoesNotExist())
254 		return (false);
255 
256 	m_vdevState    = vd.State();
257 	m_vdevPhysPath = vd.PhysicalPath();
258 	m_vdevName = vd.Name(casePool, false);
259 	return (true);
260 }
261 
262 bool
263 CaseFile::ReEvaluate(const string &devPath, const string &physPath, Vdev *vdev)
264 {
265 	ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID);
266 	zpool_handle_t *pool(zpl.empty() ? NULL : zpl.front());
267 	int flags = ZFS_ONLINE_CHECKREMOVE | ZFS_ONLINE_UNSPARE;
268 
269 	if (pool == NULL || !RefreshVdevState()) {
270 		/*
271 		 * The pool or vdev for this case file is no longer
272 		 * part of the configuration.  This can happen
273 		 * if we process a device arrival notification
274 		 * before seeing the ZFS configuration change
275 		 * event.
276 		 */
277 		syslog(LOG_INFO,
278 		       "CaseFile::ReEvaluate(%s,%s) Pool/Vdev unconfigured.  "
279 		       "Closing\n",
280 		       PoolGUIDString().c_str(),
281 		       VdevGUIDString().c_str());
282 		Close();
283 
284 		/*
285 		 * Since this event was not used to close this
286 		 * case, do not report it as consumed.
287 		 */
288 		return (/*consumed*/false);
289 	}
290 
291 	if (VdevState() > VDEV_STATE_CANT_OPEN) {
292 		/*
293 		 * For now, newly discovered devices only help for
294 		 * devices that are missing.  In the future, we might
295 		 * use a newly inserted spare to replace a degraded
296 		 * or faulted device.
297 		 */
298 		syslog(LOG_INFO, "CaseFile::ReEvaluate(%s,%s): Pool/Vdev ignored",
299 		    PoolGUIDString().c_str(), VdevGUIDString().c_str());
300 		return (/*consumed*/false);
301 	}
302 
303 	if (vdev != NULL
304 	 && ( vdev->PoolGUID() == m_poolGUID
305 	   || vdev->PoolGUID() == Guid::InvalidGuid())
306 	 && vdev->GUID() == m_vdevGUID) {
307 
308 		if (IsSpare())
309 			flags |= ZFS_ONLINE_SPARE;
310 		if (zpool_vdev_online(pool, vdev->GUIDString().c_str(),
311 		    flags, &m_vdevState) != 0) {
312 			syslog(LOG_ERR,
313 			    "Failed to online vdev(%s/%s:%s): %s: %s\n",
314 			    zpool_get_name(pool), vdev->GUIDString().c_str(),
315 			    devPath.c_str(), libzfs_error_action(g_zfsHandle),
316 			    libzfs_error_description(g_zfsHandle));
317 			return (/*consumed*/false);
318 		}
319 
320 		syslog(LOG_INFO, "Onlined vdev(%s/%s:%s).  State now %s.\n",
321 		       zpool_get_name(pool), vdev->GUIDString().c_str(),
322 		       devPath.c_str(),
323 		       zpool_state_to_name(VdevState(), VDEV_AUX_NONE));
324 
325 		/*
326 		 * Check the vdev state post the online action to see
327 		 * if we can retire this case.
328 		 */
329 		CloseIfSolved();
330 
331 		return (/*consumed*/true);
332 	}
333 
334 	/*
335 	 * If the auto-replace policy is enabled, and we have physical
336 	 * path information, try a physical path replacement.
337 	 */
338 	if (zpool_get_prop_int(pool, ZPOOL_PROP_AUTOREPLACE, NULL) == 0) {
339 		syslog(LOG_INFO,
340 		       "CaseFile(%s:%s:%s): AutoReplace not set.  "
341 		       "Ignoring device insertion.\n",
342 		       PoolGUIDString().c_str(),
343 		       VdevGUIDString().c_str(),
344 		       zpool_state_to_name(VdevState(), VDEV_AUX_NONE));
345 		return (/*consumed*/false);
346 	}
347 
348 	if (PhysicalPath().empty()) {
349 		syslog(LOG_INFO,
350 		       "CaseFile(%s:%s:%s): No physical path information.  "
351 		       "Ignoring device insertion.\n",
352 		       PoolGUIDString().c_str(),
353 		       VdevGUIDString().c_str(),
354 		       zpool_state_to_name(VdevState(), VDEV_AUX_NONE));
355 		return (/*consumed*/false);
356 	}
357 
358 	if (physPath != PhysicalPath()) {
359 		syslog(LOG_INFO,
360 		       "CaseFile(%s:%s:%s): Physical path mismatch.  "
361 		       "Ignoring device insertion.\n",
362 		       PoolGUIDString().c_str(),
363 		       VdevGUIDString().c_str(),
364 		       zpool_state_to_name(VdevState(), VDEV_AUX_NONE));
365 		return (/*consumed*/false);
366 	}
367 
368 	/* Write a label on the newly inserted disk. */
369 	if (zpool_label_disk(g_zfsHandle, pool, devPath.c_str()) != 0) {
370 		syslog(LOG_ERR,
371 		       "Replace vdev(%s/%s) by physical path (label): %s: %s\n",
372 		       zpool_get_name(pool), VdevGUIDString().c_str(),
373 		       libzfs_error_action(g_zfsHandle),
374 		       libzfs_error_description(g_zfsHandle));
375 		return (/*consumed*/false);
376 	}
377 
378 	syslog(LOG_INFO, "CaseFile::ReEvaluate(%s/%s): Replacing with %s",
379 	    PoolGUIDString().c_str(), VdevGUIDString().c_str(),
380 	    devPath.c_str());
381 	return (Replace(VDEV_TYPE_DISK, devPath.c_str(), /*isspare*/false));
382 }
383 
384 bool
385 CaseFile::ReEvaluate(const ZfsEvent &event)
386 {
387 	bool consumed(false);
388 
389 	if (event.Value("type") == "sysevent.fs.zfs.vdev_remove") {
390 		/*
391 		 * The Vdev we represent has been removed from the
392 		 * configuration.  This case is no longer of value.
393 		 */
394 		Close();
395 
396 		return (/*consumed*/true);
397 	} else if (event.Value("type") == "sysevent.fs.zfs.pool_destroy") {
398 		/* This Pool has been destroyed.  Discard the case */
399 		Close();
400 
401 		return (/*consumed*/true);
402 	} else if (event.Value("type") == "sysevent.fs.zfs.config_sync") {
403 		RefreshVdevState();
404 		if (VdevState() < VDEV_STATE_HEALTHY)
405 			consumed = ActivateSpare();
406 	}
407 
408 
409 	if (event.Value("class") == "resource.fs.zfs.removed") {
410 		bool spare_activated;
411 
412 		if (!RefreshVdevState()) {
413 			/*
414 			 * The pool or vdev for this case file is no longer
415 			 * part of the configuration.  This can happen
416 			 * if we process a device arrival notification
417 			 * before seeing the ZFS configuration change
418 			 * event.
419 			 */
420 			syslog(LOG_INFO,
421 			       "CaseFile::ReEvaluate(%s,%s) Pool/Vdev "
422 			       "unconfigured.  Closing\n",
423 			       PoolGUIDString().c_str(),
424 			       VdevGUIDString().c_str());
425 			/*
426 			 * Close the case now so we won't waste cycles in the
427 			 * system rescan
428 			 */
429 			Close();
430 
431 			/*
432 			 * Since this event was not used to close this
433 			 * case, do not report it as consumed.
434 			 */
435 			return (/*consumed*/false);
436 		}
437 
438 		/*
439 		 * Discard any tentative I/O error events for
440 		 * this case.  They were most likely caused by the
441 		 * hot-unplug of this device.
442 		 */
443 		PurgeTentativeEvents();
444 
445 		/* Try to activate spares if they are available */
446 		spare_activated = ActivateSpare();
447 
448 		/*
449 		 * Rescan the drives in the system to see if a recent
450 		 * drive arrival can be used to solve this case.
451 		 */
452 		ZfsDaemon::RequestSystemRescan();
453 
454 		/*
455 		 * Consume the event if we successfully activated a spare.
456 		 * Otherwise, leave it in the unconsumed events list so that the
457 		 * future addition of a spare to this pool might be able to
458 		 * close the case
459 		 */
460 		consumed = spare_activated;
461 	} else if (event.Value("class") == "resource.fs.zfs.statechange") {
462 		RefreshVdevState();
463 		/*
464 		 * If this vdev is DEGRADED, FAULTED, or UNAVAIL, try to
465 		 * activate a hotspare.  Otherwise, ignore the event
466 		 */
467 		if (VdevState() == VDEV_STATE_FAULTED ||
468 		    VdevState() == VDEV_STATE_DEGRADED ||
469 		    VdevState() == VDEV_STATE_CANT_OPEN)
470 			(void) ActivateSpare();
471 		consumed = true;
472 	}
473 	else if (event.Value("class") == "ereport.fs.zfs.io" ||
474 	         event.Value("class") == "ereport.fs.zfs.checksum" ||
475 		 event.Value("class") == "ereport.fs.zfs.delay") {
476 
477 		m_tentativeEvents.push_front(event.DeepCopy());
478 		RegisterCallout(event);
479 		consumed = true;
480 	}
481 
482 	bool closed(CloseIfSolved());
483 
484 	return (consumed || closed);
485 }
486 
487 /* Find a Vdev containing the vdev with the given GUID */
488 static nvlist_t*
489 find_parent(nvlist_t *pool_config, nvlist_t *config, DevdCtl::Guid child_guid)
490 {
491 	nvlist_t **vdevChildren;
492 	int        error;
493 	unsigned   ch, numChildren;
494 
495 	error = nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_CHILDREN,
496 					   &vdevChildren, &numChildren);
497 
498 	if (error != 0 || numChildren == 0)
499 		return (NULL);
500 
501 	for (ch = 0; ch < numChildren; ch++) {
502 		nvlist *result;
503 		Vdev vdev(pool_config, vdevChildren[ch]);
504 
505 		if (vdev.GUID() == child_guid)
506 			return (config);
507 
508 		result = find_parent(pool_config, vdevChildren[ch], child_guid);
509 		if (result != NULL)
510 			return (result);
511 	}
512 
513 	return (NULL);
514 }
515 
516 bool
517 CaseFile::ActivateSpare() {
518 	nvlist_t	*config, *nvroot, *parent_config;
519 	nvlist_t       **spares;
520 	const char	*devPath, *poolname, *vdev_type;
521 	u_int		 nspares, i;
522 	int		 error;
523 
524 	ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID);
525 	zpool_handle_t	*zhp(zpl.empty() ? NULL : zpl.front());
526 	if (zhp == NULL) {
527 		syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not find pool "
528 		       "for pool_guid %" PRIu64".", (uint64_t)m_poolGUID);
529 		return (false);
530 	}
531 	poolname = zpool_get_name(zhp);
532 	config = zpool_get_config(zhp, NULL);
533 	if (config == NULL) {
534 		syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not find pool "
535 		       "config for pool %s", poolname);
536 		return (false);
537 	}
538 	error = nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvroot);
539 	if (error != 0){
540 		syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not find vdev "
541 		       "tree for pool %s", poolname);
542 		return (false);
543 	}
544 
545 	parent_config = find_parent(config, nvroot, m_vdevGUID);
546 	if (parent_config != NULL) {
547 		const char *parent_type;
548 
549 		/*
550 		 * Don't activate spares for members of a "replacing" vdev.
551 		 * They're already dealt with.  Sparing them will just drag out
552 		 * the resilver process.
553 		 */
554 		error = nvlist_lookup_string(parent_config,
555 		    ZPOOL_CONFIG_TYPE, &parent_type);
556 		if (error == 0 && strcmp(parent_type, VDEV_TYPE_REPLACING) == 0)
557 			return (false);
558 	}
559 
560 	nspares = 0;
561 	nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, &spares,
562 				   &nspares);
563 	if (nspares == 0) {
564 		/* The pool has no spares configured */
565 		syslog(LOG_INFO, "CaseFile::ActivateSpare: "
566 		       "No spares available for pool %s", poolname);
567 		return (false);
568 	}
569 	for (i = 0; i < nspares; i++) {
570 		uint64_t    *nvlist_array;
571 		vdev_stat_t *vs;
572 		uint_t	     nstats;
573 
574 		if (nvlist_lookup_uint64_array(spares[i],
575 		    ZPOOL_CONFIG_VDEV_STATS, &nvlist_array, &nstats) != 0) {
576 			syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not "
577 			       "find vdev stats for pool %s, spare %d",
578 			       poolname, i);
579 			return (false);
580 		}
581 		vs = reinterpret_cast<vdev_stat_t *>(nvlist_array);
582 
583 		if ((vs->vs_aux != VDEV_AUX_SPARED)
584 		 && (vs->vs_state == VDEV_STATE_HEALTHY)) {
585 			/* We found a usable spare */
586 			break;
587 		}
588 	}
589 
590 	if (i == nspares) {
591 		/* No available spares were found */
592 		return (false);
593 	}
594 
595 	error = nvlist_lookup_string(spares[i], ZPOOL_CONFIG_PATH, &devPath);
596 	if (error != 0) {
597 		syslog(LOG_ERR, "CaseFile::ActivateSpare: Cannot determine "
598 		       "the path of pool %s, spare %d. Error %d",
599 		       poolname, i, error);
600 		return (false);
601 	}
602 
603 	error = nvlist_lookup_string(spares[i], ZPOOL_CONFIG_TYPE, &vdev_type);
604 	if (error != 0) {
605 		syslog(LOG_ERR, "CaseFile::ActivateSpare: Cannot determine "
606 		       "the vdev type of pool %s, spare %d. Error %d",
607 		       poolname, i, error);
608 		return (false);
609 	}
610 
611 	return (Replace(vdev_type, devPath, /*isspare*/true));
612 }
613 
614 /* Does the argument event refer to a checksum error? */
615 static bool
616 IsChecksumEvent(const Event* const event)
617 {
618 	return ("ereport.fs.zfs.checksum" == event->Value("type"));
619 }
620 
621 /* Does the argument event refer to an IO error? */
622 static bool
623 IsIOEvent(const Event* const event)
624 {
625 	return ("ereport.fs.zfs.io" == event->Value("type"));
626 }
627 
628 /* Does the argument event refer to an IO delay? */
629 static bool
630 IsDelayEvent(const Event* const event)
631 {
632 	return ("ereport.fs.zfs.delay" == event->Value("type"));
633 }
634 
635 void
636 CaseFile::RegisterCallout(const Event &event)
637 {
638 	timeval now, countdown, elapsed, timestamp, zero, remaining;
639 	/**
640 	 * The time ZFSD waits before promoting a tentative event
641 	 * into a permanent event.
642 	 */
643 	int sec = -1;
644 	if (IsChecksumEvent(&event))
645 		sec = CaseFile::GetVdevProp(VDEV_PROP_CHECKSUM_T);
646 	else if (IsIOEvent(&event))
647 		sec = CaseFile::GetVdevProp(VDEV_PROP_IO_T);
648 	else if (IsDelayEvent(&event))
649 		sec = CaseFile::GetVdevProp(VDEV_PROP_SLOW_IO_T);
650 
651 	if (sec == -1)
652 		sec = 60; /* default */
653 
654 	timeval removeGracePeriod = {
655 	    sec, /*sec*/
656 	    0 /*usec*/
657 	};
658 
659 	gettimeofday(&now, 0);
660 	timestamp = event.GetTimestamp();
661 	timersub(&now, &timestamp, &elapsed);
662 	timersub(&removeGracePeriod, &elapsed, &countdown);
663 	/*
664 	 * If countdown is <= zero, Reset the timer to the
665 	 * smallest positive time value instead
666 	 */
667 	timerclear(&zero);
668 	if (timercmp(&countdown, &zero, <=)) {
669 		timerclear(&countdown);
670 		countdown.tv_usec = 1;
671 	}
672 
673 	remaining = m_tentativeTimer.TimeRemaining();
674 
675 	if (!m_tentativeTimer.IsPending()
676 	 || timercmp(&countdown, &remaining, <))
677 		m_tentativeTimer.Reset(countdown, OnGracePeriodEnded, this);
678 }
679 
680 
681 bool
682 CaseFile::CloseIfSolved()
683 {
684 	if (m_events.empty()
685 	 && m_tentativeEvents.empty()) {
686 
687 		/*
688 		 * We currently do not track or take actions on
689 		 * devices in the degraded or faulted state.
690 		 * Once we have support for spare pools, we'll
691 		 * retain these cases so that any spares added in
692 		 * the future can be applied to them.
693 		 */
694 		switch (VdevState()) {
695 		case VDEV_STATE_HEALTHY:
696 			/* No need to keep cases for healthy vdevs */
697 			Close();
698 			return (true);
699 		case VDEV_STATE_REMOVED:
700 		case VDEV_STATE_CANT_OPEN:
701 			/*
702 			 * Keep open.  We may solve it with a newly inserted
703 			 * device.
704 			 */
705 		case VDEV_STATE_FAULTED:
706 		case VDEV_STATE_DEGRADED:
707 			/*
708 			 * Keep open.  We may solve it with the future
709 			 * addition of a spare to the pool
710 			 */
711 		case VDEV_STATE_UNKNOWN:
712 		case VDEV_STATE_CLOSED:
713 		case VDEV_STATE_OFFLINE:
714 			/*
715 			 * Keep open?  This may not be the correct behavior,
716 			 * but it's what we've always done
717 			 */
718 			;
719 		}
720 
721 		/*
722 		 * Re-serialize the case in order to remove any
723 		 * previous event data.
724 		 */
725 		Serialize();
726 	}
727 
728 	return (false);
729 }
730 
731 void
732 CaseFile::Log()
733 {
734 	syslog(LOG_INFO, "CaseFile(%s,%s,%s)\n", PoolGUIDString().c_str(),
735 	       VdevGUIDString().c_str(), PhysicalPath().c_str());
736 	syslog(LOG_INFO, "\tVdev State = %s\n",
737 	       zpool_state_to_name(VdevState(), VDEV_AUX_NONE));
738 	if (m_tentativeEvents.size() != 0) {
739 		syslog(LOG_INFO, "\t=== Tentative Events ===\n");
740 		for (EventList::iterator event(m_tentativeEvents.begin());
741 		     event != m_tentativeEvents.end(); event++)
742 			(*event)->Log(LOG_INFO);
743 	}
744 	if (m_events.size() != 0) {
745 		syslog(LOG_INFO, "\t=== Events ===\n");
746 		for (EventList::iterator event(m_events.begin());
747 		     event != m_events.end(); event++)
748 			(*event)->Log(LOG_INFO);
749 	}
750 }
751 
752 //- CaseFile Static Protected Methods ------------------------------------------
753 void
754 CaseFile::OnGracePeriodEnded(void *arg)
755 {
756 	CaseFile &casefile(*static_cast<CaseFile *>(arg));
757 
758 	casefile.OnGracePeriodEnded();
759 }
760 
761 int
762 CaseFile::DeSerializeSelector(const struct dirent *dirEntry)
763 {
764 	uint64_t poolGUID;
765 	uint64_t vdevGUID;
766 
767 	if (dirEntry->d_type == DT_REG
768 	 && sscanf(dirEntry->d_name, "pool_%" PRIu64 "_vdev_%" PRIu64 ".case",
769 		   &poolGUID, &vdevGUID) == 2)
770 		return (1);
771 	return (0);
772 }
773 
774 void
775 CaseFile::DeSerializeFile(const char *fileName)
776 {
777 	string	  fullName(s_caseFilePath + '/' + fileName);
778 	CaseFile *existingCaseFile(NULL);
779 	CaseFile *caseFile(NULL);
780 
781 	try {
782 		uint64_t poolGUID;
783 		uint64_t vdevGUID;
784 		nvlist_t *vdevConf;
785 
786 		if (sscanf(fileName, "pool_%" PRIu64 "_vdev_%" PRIu64 ".case",
787 		       &poolGUID, &vdevGUID) != 2) {
788 			throw ZfsdException("CaseFile::DeSerialize: "
789 			    "Unintelligible CaseFile filename %s.\n", fileName);
790 		}
791 		existingCaseFile = Find(Guid(poolGUID), Guid(vdevGUID));
792 		if (existingCaseFile != NULL) {
793 			/*
794 			 * If the vdev is already degraded or faulted,
795 			 * there's no point in keeping the state around
796 			 * that we use to put a drive into the degraded
797 			 * state.  However, if the vdev is simply missing,
798 			 * preserve the case data in the hopes that it will
799 			 * return.
800 			 */
801 			caseFile = existingCaseFile;
802 			vdev_state curState(caseFile->VdevState());
803 			if (curState > VDEV_STATE_CANT_OPEN
804 			 && curState < VDEV_STATE_HEALTHY) {
805 				unlink(fileName);
806 				return;
807 			}
808 		} else {
809 			ZpoolList zpl(ZpoolList::ZpoolByGUID, &poolGUID);
810 			if (zpl.empty()
811 			 || (vdevConf = VdevIterator(zpl.front())
812 						    .Find(vdevGUID)) == NULL) {
813 				/*
814 				 * Either the pool no longer exists
815 				 * or this vdev is no longer a member of
816 				 * the pool.
817 				 */
818 				unlink(fullName.c_str());
819 				return;
820 			}
821 
822 			/*
823 			 * Any vdev we find that does not have a case file
824 			 * must be in the healthy state and thus worthy of
825 			 * continued SERD data tracking.
826 			 */
827 			caseFile = new CaseFile(Vdev(zpl.front(), vdevConf));
828 		}
829 
830 		ifstream caseStream(fullName.c_str());
831 		if (!caseStream)
832 			throw ZfsdException("CaseFile::DeSerialize: Unable to "
833 					    "read %s.\n", fileName);
834 
835 		caseFile->DeSerialize(caseStream);
836 	} catch (const ParseException &exp) {
837 
838 		exp.Log();
839 		if (caseFile != existingCaseFile)
840 			delete caseFile;
841 
842 		/*
843 		 * Since we can't parse the file, unlink it so we don't
844 		 * trip over it again.
845 		 */
846 		unlink(fileName);
847 	} catch (const ZfsdException &zfsException) {
848 
849 		zfsException.Log();
850 		if (caseFile != existingCaseFile)
851 			delete caseFile;
852 	}
853 }
854 
855 //- CaseFile Protected Methods -------------------------------------------------
856 CaseFile::CaseFile(const Vdev &vdev)
857  : m_poolGUID(vdev.PoolGUID()),
858    m_vdevGUID(vdev.GUID()),
859    m_vdevState(vdev.State()),
860    m_vdevPhysPath(vdev.PhysicalPath()),
861    m_is_spare(vdev.IsSpare())
862 {
863 	stringstream guidString;
864 
865 	guidString << m_vdevGUID;
866 	m_vdevGUIDString = guidString.str();
867 	guidString.str("");
868 	guidString << m_poolGUID;
869 	m_poolGUIDString = guidString.str();
870 
871 	ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID);
872 	zpool_handle_t *zhp(zpl.empty() ? NULL : zpl.front());
873 	m_vdevName = vdev.Name(zhp, false);
874 
875 	s_activeCases.push_back(this);
876 
877 	syslog(LOG_INFO, "Creating new CaseFile:\n");
878 	Log();
879 }
880 
881 CaseFile::~CaseFile()
882 {
883 	PurgeEvents();
884 	PurgeTentativeEvents();
885 	m_tentativeTimer.Stop();
886 	s_activeCases.remove(this);
887 }
888 
889 void
890 CaseFile::PurgeEvents()
891 {
892 	for (EventList::iterator event(m_events.begin());
893 	     event != m_events.end(); event++)
894 		delete *event;
895 
896 	m_events.clear();
897 }
898 
899 void
900 CaseFile::PurgeTentativeEvents()
901 {
902 	for (EventList::iterator event(m_tentativeEvents.begin());
903 	     event != m_tentativeEvents.end(); event++)
904 		delete *event;
905 
906 	m_tentativeEvents.clear();
907 }
908 
909 void
910 CaseFile::SerializeEvList(const EventList events, int fd,
911 		const char* prefix) const
912 {
913 	if (events.empty())
914 		return;
915 	for (EventList::const_iterator curEvent = events.begin();
916 	     curEvent != events.end(); curEvent++) {
917 		const string &eventString((*curEvent)->GetEventString());
918 
919 		// TODO: replace many write(2) calls with a single writev(2)
920 		if (prefix)
921 			write(fd, prefix, strlen(prefix));
922 		write(fd, eventString.c_str(), eventString.length());
923 	}
924 }
925 
926 void
927 CaseFile::Serialize()
928 {
929 	stringstream saveFile;
930 
931 	saveFile << setfill('0')
932 		 << s_caseFilePath << "/"
933 		 << "pool_" << PoolGUIDString()
934 		 << "_vdev_" << VdevGUIDString()
935 		 << ".case";
936 
937 	if (m_events.empty() && m_tentativeEvents.empty()) {
938 		unlink(saveFile.str().c_str());
939 		return;
940 	}
941 
942 	int fd(open(saveFile.str().c_str(), O_CREAT|O_TRUNC|O_WRONLY, 0644));
943 	if (fd == -1) {
944 		syslog(LOG_ERR, "CaseFile::Serialize: Unable to open %s.\n",
945 		       saveFile.str().c_str());
946 		return;
947 	}
948 	SerializeEvList(m_events, fd);
949 	SerializeEvList(m_tentativeEvents, fd, "tentative ");
950 	close(fd);
951 }
952 
953 /*
954  * XXX: This method assumes that events may not contain embedded newlines.  If
955  * ever events can contain embedded newlines, then CaseFile must switch
956  * serialization formats
957  */
958 void
959 CaseFile::DeSerialize(ifstream &caseStream)
960 {
961 	string	      evString;
962 	const EventFactory &factory(ZfsDaemon::Get().GetFactory());
963 
964 	caseStream >> std::noskipws >> std::ws;
965 	while (caseStream.good()) {
966 		/*
967 		 * Outline:
968 		 * read the beginning of a line and check it for
969 		 * "tentative".  If found, discard "tentative".
970 		 * Create a new event
971 		 * continue
972 		 */
973 		EventList* destEvents;
974 		const string tentFlag("tentative ");
975 		string line;
976 		std::stringbuf lineBuf;
977 
978 		caseStream.get(lineBuf);
979 		caseStream.ignore();  /*discard the newline character*/
980 		line = lineBuf.str();
981 		if (line.compare(0, tentFlag.size(), tentFlag) == 0) {
982 			/* Discard "tentative" */
983 			line.erase(0, tentFlag.size());
984 			destEvents = &m_tentativeEvents;
985 		} else {
986 			destEvents = &m_events;
987 		}
988 		Event *event(Event::CreateEvent(factory, line));
989 		if (event != NULL) {
990 			destEvents->push_back(event);
991 			RegisterCallout(*event);
992 		}
993 	}
994 }
995 
996 void
997 CaseFile::Close()
998 {
999 	/*
1000 	 * This case is no longer relevant.  Clean up our
1001 	 * serialization file, and delete the case.
1002 	 */
1003 	syslog(LOG_INFO, "CaseFile(%s,%s) closed - State %s\n",
1004 	       PoolGUIDString().c_str(), VdevGUIDString().c_str(),
1005 	       zpool_state_to_name(VdevState(), VDEV_AUX_NONE));
1006 
1007 	/*
1008 	 * Serialization of a Case with no event data, clears the
1009 	 * Serialization data for that event.
1010 	 */
1011 	PurgeEvents();
1012 	Serialize();
1013 
1014 	delete this;
1015 }
1016 
1017 void
1018 CaseFile::OnGracePeriodEnded()
1019 {
1020 	bool should_fault, should_degrade;
1021 	ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID);
1022 	zpool_handle_t *zhp(zpl.empty() ? NULL : zpl.front());
1023 
1024 	m_events.splice(m_events.begin(), m_tentativeEvents);
1025 	should_fault = ShouldFault();
1026 	should_degrade = ShouldDegrade();
1027 
1028 	if (should_fault || should_degrade) {
1029 		if (zhp == NULL
1030 		 || (VdevIterator(zhp).Find(m_vdevGUID)) == NULL) {
1031 			/*
1032 			 * Either the pool no longer exists
1033 			 * or this vdev is no longer a member of
1034 			 * the pool.
1035 			 */
1036 			Close();
1037 			return;
1038 		}
1039 
1040 	}
1041 
1042 	/* A fault condition has priority over a degrade condition */
1043 	if (ShouldFault()) {
1044 		/* Fault the vdev and close the case. */
1045 		if (zpool_vdev_fault(zhp, (uint64_t)m_vdevGUID,
1046 				       VDEV_AUX_ERR_EXCEEDED) == 0) {
1047 			syslog(LOG_INFO, "Faulting vdev(%s/%s)",
1048 			       PoolGUIDString().c_str(),
1049 			       VdevGUIDString().c_str());
1050 			Close();
1051 			return;
1052 		}
1053 		else {
1054 			syslog(LOG_ERR, "Fault vdev(%s/%s): %s: %s\n",
1055 			       PoolGUIDString().c_str(),
1056 			       VdevGUIDString().c_str(),
1057 			       libzfs_error_action(g_zfsHandle),
1058 			       libzfs_error_description(g_zfsHandle));
1059 		}
1060 	}
1061 	else if (ShouldDegrade()) {
1062 		/* Degrade the vdev and close the case. */
1063 		if (zpool_vdev_degrade(zhp, (uint64_t)m_vdevGUID,
1064 				       VDEV_AUX_ERR_EXCEEDED) == 0) {
1065 			syslog(LOG_INFO, "Degrading vdev(%s/%s)",
1066 			       PoolGUIDString().c_str(),
1067 			       VdevGUIDString().c_str());
1068 			Close();
1069 			return;
1070 		}
1071 		else {
1072 			syslog(LOG_ERR, "Degrade vdev(%s/%s): %s: %s\n",
1073 			       PoolGUIDString().c_str(),
1074 			       VdevGUIDString().c_str(),
1075 			       libzfs_error_action(g_zfsHandle),
1076 			       libzfs_error_description(g_zfsHandle));
1077 		}
1078 	}
1079 	Serialize();
1080 }
1081 
1082 Vdev
1083 CaseFile::BeingReplacedBy(zpool_handle_t *zhp) {
1084 	Vdev vd(zhp, CaseVdev(zhp));
1085 	std::list<Vdev> children;
1086 	std::list<Vdev>::iterator children_it;
1087 
1088 	Vdev parent(vd.Parent());
1089 	Vdev replacing(NonexistentVdev);
1090 
1091 	/*
1092 	 * To determine whether we are being replaced by another spare that
1093 	 * is still working, then make sure that it is currently spared and
1094 	 * that the spare is either resilvering or healthy.  If any of these
1095 	 * conditions fail, then we are not being replaced by a spare.
1096 	 *
1097 	 * If the spare is healthy, then the case file should be closed very
1098 	 * soon after this check.
1099 	 */
1100 	if (parent.DoesNotExist()
1101 	 || parent.Name(zhp, /*verbose*/false) != "spare")
1102 		return (NonexistentVdev);
1103 
1104 	children = parent.Children();
1105 	children_it = children.begin();
1106 	for (;children_it != children.end(); children_it++) {
1107 		Vdev child = *children_it;
1108 
1109 		/* Skip our vdev. */
1110 		if (child.GUID() == VdevGUID())
1111 			continue;
1112 		/*
1113 		 * Accept the first child that doesn't match our GUID, or
1114 		 * any resilvering/healthy device if one exists.
1115 		 */
1116 		if (replacing.DoesNotExist() || child.IsResilvering()
1117 		 || child.State() == VDEV_STATE_HEALTHY)
1118 			replacing = child;
1119 	}
1120 
1121 	return (replacing);
1122 }
1123 
1124 bool
1125 CaseFile::Replace(const char* vdev_type, const char* path, bool isspare) {
1126 	nvlist_t *nvroot, *newvd;
1127 	const char *poolname;
1128 	string oldstr(VdevGUIDString());
1129 	bool retval = true;
1130 
1131 	/* Figure out what pool we're working on */
1132 	ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID);
1133 	zpool_handle_t *zhp(zpl.empty() ? NULL : zpl.front());
1134 	if (zhp == NULL) {
1135 		syslog(LOG_ERR, "CaseFile::Replace: could not find pool for "
1136 		       "pool_guid %" PRIu64 ".", (uint64_t)m_poolGUID);
1137 		return (false);
1138 	}
1139 	poolname = zpool_get_name(zhp);
1140 	Vdev vd(zhp, CaseVdev(zhp));
1141 	Vdev replaced(BeingReplacedBy(zhp));
1142 
1143 	if (isspare && !vd.IsSpare() && !replaced.DoesNotExist()) {
1144 		/* If we are already being replaced by a working spare, pass. */
1145 		if (replaced.IsResilvering()
1146 		 || replaced.State() == VDEV_STATE_HEALTHY) {
1147 			syslog(LOG_INFO, "CaseFile::Replace(%s->%s): already "
1148 			    "replaced", VdevGUIDString().c_str(), path);
1149 			return (/*consumed*/false);
1150 		}
1151 		/*
1152 		 * If we have already been replaced by a spare, but that spare
1153 		 * is broken, we must spare the spare, not the original device.
1154 		 */
1155 		oldstr = replaced.GUIDString();
1156 		syslog(LOG_INFO, "CaseFile::Replace(%s->%s): sparing "
1157 		    "broken spare %s instead", VdevGUIDString().c_str(),
1158 		    path, oldstr.c_str());
1159 	}
1160 
1161 	/*
1162 	 * Build a root vdev/leaf vdev configuration suitable for
1163 	 * zpool_vdev_attach. Only enough data for the kernel to find
1164 	 * the device (i.e. type and disk device node path) are needed.
1165 	 */
1166 	nvroot = NULL;
1167 	newvd = NULL;
1168 
1169 	if (nvlist_alloc(&nvroot, NV_UNIQUE_NAME, 0) != 0
1170 	 || nvlist_alloc(&newvd, NV_UNIQUE_NAME, 0) != 0) {
1171 		syslog(LOG_ERR, "Replace vdev(%s/%s): Unable to allocate "
1172 		    "configuration data.", poolname, oldstr.c_str());
1173 		if (nvroot != NULL)
1174 			nvlist_free(nvroot);
1175 		return (false);
1176 	}
1177 	if (nvlist_add_string(newvd, ZPOOL_CONFIG_TYPE, vdev_type) != 0
1178 	 || nvlist_add_string(newvd, ZPOOL_CONFIG_PATH, path) != 0
1179 	 || nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE, VDEV_TYPE_ROOT) != 0
1180 	 || nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
1181 				    &newvd, 1) != 0) {
1182 		syslog(LOG_ERR, "Replace vdev(%s/%s): Unable to initialize "
1183 		    "configuration data.", poolname, oldstr.c_str());
1184 		nvlist_free(newvd);
1185 		nvlist_free(nvroot);
1186 		return (true);
1187 	}
1188 
1189 	/* Data was copied when added to the root vdev. */
1190 	nvlist_free(newvd);
1191 
1192 	retval = (zpool_vdev_attach(zhp, oldstr.c_str(), path, nvroot,
1193        /*replace*/B_TRUE, /*rebuild*/ B_FALSE) == 0);
1194 	if (retval)
1195 		syslog(LOG_INFO, "Replacing vdev(%s/%s) with %s\n",
1196 		    poolname, oldstr.c_str(), path);
1197 	else
1198 		syslog(LOG_ERR, "Replace vdev(%s/%s): %s: %s\n",
1199 		    poolname, oldstr.c_str(), libzfs_error_action(g_zfsHandle),
1200 		    libzfs_error_description(g_zfsHandle));
1201 	nvlist_free(nvroot);
1202 
1203 	return (retval);
1204 }
1205 
1206 /* Lookup the vdev prop. Used for checksum, IO, or slow IO props */
1207 int
1208 CaseFile::GetVdevProp(vdev_prop_t vdev_prop) const
1209 {
1210 	char val[ZFS_MAXPROPLEN];
1211 	zprop_source_t srctype;
1212 	DevdCtl::Guid poolGUID = PoolGUID();
1213 	ZpoolList zpl(ZpoolList::ZpoolByGUID, &poolGUID);
1214 	zpool_handle_t *zhp(zpl.empty() ? NULL : zpl.front());
1215 
1216 	char *prop_str = (char *) vdev_prop_to_name(vdev_prop);
1217 	if (zhp == NULL || zpool_get_vdev_prop(zhp, m_vdevName.c_str(),
1218 	    vdev_prop, prop_str, val, sizeof (val), &srctype, B_FALSE) != 0)
1219 		return (-1);
1220 
1221 	/* we'll get "-" from libzfs for a prop that is not set */
1222 	if (zfs_isnumber(val) == B_FALSE)
1223 		return (-1);
1224 
1225 	return (atoi(val));
1226 }
1227 
1228 bool
1229 CaseFile::ShouldDegrade() const
1230 {
1231 	int checksum_n = GetVdevProp(VDEV_PROP_CHECKSUM_N);
1232 	if (checksum_n == -1)
1233 		checksum_n = DEFAULT_ZFS_DEGRADE_IO_COUNT;
1234 	return (std::count_if(m_events.begin(), m_events.end(),
1235 			      IsChecksumEvent) > checksum_n);
1236 }
1237 
1238 bool
1239 CaseFile::ShouldFault() const
1240 {
1241 	bool should_fault_for_io, should_fault_for_delay;
1242 	int io_n = GetVdevProp(VDEV_PROP_IO_N);
1243 	int slow_io_n = GetVdevProp(VDEV_PROP_SLOW_IO_N);
1244 
1245 	if (io_n == -1)
1246 		io_n = DEFAULT_ZFS_DEGRADE_IO_COUNT;
1247 	if (slow_io_n == -1)
1248 		slow_io_n = DEFAULT_ZFS_FAULT_SLOW_IO_COUNT;
1249 
1250 	should_fault_for_io = std::count_if(m_events.begin(), m_events.end(),
1251 			      IsIOEvent) > io_n;
1252 	should_fault_for_delay = std::count_if(m_events.begin(), m_events.end(),
1253 			      IsDelayEvent) > slow_io_n;
1254 
1255 	return (should_fault_for_io || should_fault_for_delay);
1256 }
1257 
1258 nvlist_t *
1259 CaseFile::CaseVdev(zpool_handle_t *zhp) const
1260 {
1261 	return (VdevIterator(zhp).Find(VdevGUID()));
1262 }
1263