xref: /freebsd/cddl/usr.sbin/zfsd/case_file.cc (revision 273c26a3c3bea87a241d6879abd4f991db180bf0)
1 /*-
2  * Copyright (c) 2011, 2012, 2013, 2014, 2016 Spectra Logic Corporation
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions, and the following disclaimer,
10  *    without modification.
11  * 2. Redistributions in binary form must reproduce at minimum a disclaimer
12  *    substantially similar to the "NO WARRANTY" disclaimer below
13  *    ("Disclaimer") and any redistribution must be conditioned upon
14  *    including a substantially similar Disclaimer requirement for further
15  *    binary redistribution.
16  *
17  * NO WARRANTY
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
21  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22  * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
26  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
27  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28  * POSSIBILITY OF SUCH DAMAGES.
29  *
30  * Authors: Justin T. Gibbs     (Spectra Logic Corporation)
31  */
32 
33 /**
34  * \file case_file.cc
35  *
36  * We keep case files for any leaf vdev that is not in the optimal state.
37  * However, we only serialize to disk those events that need to be preserved
38  * across reboots.  For now, this is just a log of soft errors which we
39  * accumulate in order to mark a device as degraded.
40  */
41 #include <sys/cdefs.h>
42 #include <sys/time.h>
43 
44 #include <sys/fs/zfs.h>
45 
46 #include <dirent.h>
47 #include <iomanip>
48 #include <fstream>
49 #include <functional>
50 #include <sstream>
51 #include <syslog.h>
52 #include <unistd.h>
53 
54 #include <libzfs.h>
55 
56 #include <list>
57 #include <map>
58 #include <string>
59 
60 #include <devdctl/guid.h>
61 #include <devdctl/event.h>
62 #include <devdctl/event_factory.h>
63 #include <devdctl/exception.h>
64 #include <devdctl/consumer.h>
65 
66 #include "callout.h"
67 #include "vdev_iterator.h"
68 #include "zfsd_event.h"
69 #include "case_file.h"
70 #include "vdev.h"
71 #include "zfsd.h"
72 #include "zfsd_exception.h"
73 #include "zpool_list.h"
74 
75 __FBSDID("$FreeBSD$");
76 
77 /*============================ Namespace Control =============================*/
78 using std::auto_ptr;
79 using std::hex;
80 using std::ifstream;
81 using std::stringstream;
82 using std::setfill;
83 using std::setw;
84 
85 using DevdCtl::Event;
86 using DevdCtl::EventFactory;
87 using DevdCtl::EventList;
88 using DevdCtl::Guid;
89 using DevdCtl::ParseException;
90 
91 /*--------------------------------- CaseFile ---------------------------------*/
92 //- CaseFile Static Data -------------------------------------------------------
93 
94 CaseFileList  CaseFile::s_activeCases;
95 const string  CaseFile::s_caseFilePath = "/var/db/zfsd/cases";
96 const timeval CaseFile::s_removeGracePeriod = { 60 /*sec*/, 0 /*usec*/};
97 
98 //- CaseFile Static Public Methods ---------------------------------------------
99 CaseFile *
100 CaseFile::Find(Guid poolGUID, Guid vdevGUID)
101 {
102 	for (CaseFileList::iterator curCase = s_activeCases.begin();
103 	     curCase != s_activeCases.end(); curCase++) {
104 
105 		if ((*curCase)->PoolGUID() != poolGUID
106 		 || (*curCase)->VdevGUID() != vdevGUID)
107 			continue;
108 
109 		/*
110 		 * We only carry one active case per-vdev.
111 		 */
112 		return (*curCase);
113 	}
114 	return (NULL);
115 }
116 
117 CaseFile *
118 CaseFile::Find(const string &physPath)
119 {
120 	CaseFile *result = NULL;
121 
122 	for (CaseFileList::iterator curCase = s_activeCases.begin();
123 	     curCase != s_activeCases.end(); curCase++) {
124 
125 		if ((*curCase)->PhysicalPath() != physPath)
126 			continue;
127 
128 		if (result != NULL) {
129 			syslog(LOG_WARNING, "Multiple casefiles found for "
130 			    "physical path %s.  "
131 			    "This is most likely a bug in zfsd",
132 			    physPath.c_str());
133 		}
134 		result = *curCase;
135 	}
136 	return (result);
137 }
138 
139 
140 void
141 CaseFile::ReEvaluateByGuid(Guid poolGUID, const ZfsEvent &event)
142 {
143 	CaseFileList::iterator casefile;
144 	for (casefile = s_activeCases.begin(); casefile != s_activeCases.end();){
145 		CaseFileList::iterator next = casefile;
146 		next++;
147 		if (poolGUID == (*casefile)->PoolGUID())
148 			(*casefile)->ReEvaluate(event);
149 		casefile = next;
150 	}
151 }
152 
153 CaseFile &
154 CaseFile::Create(Vdev &vdev)
155 {
156 	CaseFile *activeCase;
157 
158 	activeCase = Find(vdev.PoolGUID(), vdev.GUID());
159 	if (activeCase == NULL)
160 		activeCase = new CaseFile(vdev);
161 
162 	return (*activeCase);
163 }
164 
165 void
166 CaseFile::DeSerialize()
167 {
168 	struct dirent **caseFiles;
169 
170 	int numCaseFiles(scandir(s_caseFilePath.c_str(), &caseFiles,
171 			 DeSerializeSelector, /*compar*/NULL));
172 
173 	if (numCaseFiles == -1)
174 		return;
175 	if (numCaseFiles == 0) {
176 		free(caseFiles);
177 		return;
178 	}
179 
180 	for (int i = 0; i < numCaseFiles; i++) {
181 
182 		DeSerializeFile(caseFiles[i]->d_name);
183 		free(caseFiles[i]);
184 	}
185 	free(caseFiles);
186 }
187 
188 void
189 CaseFile::LogAll()
190 {
191 	for (CaseFileList::iterator curCase = s_activeCases.begin();
192 	     curCase != s_activeCases.end(); curCase++)
193 		(*curCase)->Log();
194 }
195 
196 void
197 CaseFile::PurgeAll()
198 {
199 	/*
200 	 * Serialize casefiles before deleting them so that they can be reread
201 	 * and revalidated during BuildCaseFiles.
202 	 * CaseFiles remove themselves from this list on destruction.
203 	 */
204 	while (s_activeCases.size() != 0) {
205 		CaseFile *casefile = s_activeCases.front();
206 		casefile->Serialize();
207 		delete casefile;
208 	}
209 
210 }
211 
212 //- CaseFile Public Methods ----------------------------------------------------
213 bool
214 CaseFile::RefreshVdevState()
215 {
216 	ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID);
217 	zpool_handle_t *casePool(zpl.empty() ? NULL : zpl.front());
218 	if (casePool == NULL)
219 		return (false);
220 
221 	Vdev vd(casePool, CaseVdev(casePool));
222 	if (vd.DoesNotExist())
223 		return (false);
224 
225 	m_vdevState    = vd.State();
226 	m_vdevPhysPath = vd.PhysicalPath();
227 	return (true);
228 }
229 
230 bool
231 CaseFile::ReEvaluate(const string &devPath, const string &physPath, Vdev *vdev)
232 {
233 	ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID);
234 	zpool_handle_t *pool(zpl.empty() ? NULL : zpl.front());
235 
236 	if (pool == NULL || !RefreshVdevState()) {
237 		/*
238 		 * The pool or vdev for this case file is no longer
239 		 * part of the configuration.  This can happen
240 		 * if we process a device arrival notification
241 		 * before seeing the ZFS configuration change
242 		 * event.
243 		 */
244 		syslog(LOG_INFO,
245 		       "CaseFile::ReEvaluate(%s,%s) Pool/Vdev unconfigured.  "
246 		       "Closing\n",
247 		       PoolGUIDString().c_str(),
248 		       VdevGUIDString().c_str());
249 		Close();
250 
251 		/*
252 		 * Since this event was not used to close this
253 		 * case, do not report it as consumed.
254 		 */
255 		return (/*consumed*/false);
256 	}
257 
258 	if (VdevState() > VDEV_STATE_CANT_OPEN) {
259 		/*
260 		 * For now, newly discovered devices only help for
261 		 * devices that are missing.  In the future, we might
262 		 * use a newly inserted spare to replace a degraded
263 		 * or faulted device.
264 		 */
265 		syslog(LOG_INFO, "CaseFile::ReEvaluate(%s,%s): Pool/Vdev ignored",
266 		    PoolGUIDString().c_str(), VdevGUIDString().c_str());
267 		return (/*consumed*/false);
268 	}
269 
270 	if (vdev != NULL
271 	 && vdev->PoolGUID() == m_poolGUID
272 	 && vdev->GUID() == m_vdevGUID) {
273 
274 		zpool_vdev_online(pool, vdev->GUIDString().c_str(),
275 				  ZFS_ONLINE_CHECKREMOVE | ZFS_ONLINE_UNSPARE,
276 				  &m_vdevState);
277 		syslog(LOG_INFO, "Onlined vdev(%s/%s:%s).  State now %s.\n",
278 		       zpool_get_name(pool), vdev->GUIDString().c_str(),
279 		       devPath.c_str(),
280 		       zpool_state_to_name(VdevState(), VDEV_AUX_NONE));
281 
282 		/*
283 		 * Check the vdev state post the online action to see
284 		 * if we can retire this case.
285 		 */
286 		CloseIfSolved();
287 
288 		return (/*consumed*/true);
289 	}
290 
291 	/*
292 	 * If the auto-replace policy is enabled, and we have physical
293 	 * path information, try a physical path replacement.
294 	 */
295 	if (zpool_get_prop_int(pool, ZPOOL_PROP_AUTOREPLACE, NULL) == 0) {
296 		syslog(LOG_INFO,
297 		       "CaseFile(%s:%s:%s): AutoReplace not set.  "
298 		       "Ignoring device insertion.\n",
299 		       PoolGUIDString().c_str(),
300 		       VdevGUIDString().c_str(),
301 		       zpool_state_to_name(VdevState(), VDEV_AUX_NONE));
302 		return (/*consumed*/false);
303 	}
304 
305 	if (PhysicalPath().empty()) {
306 		syslog(LOG_INFO,
307 		       "CaseFile(%s:%s:%s): No physical path information.  "
308 		       "Ignoring device insertion.\n",
309 		       PoolGUIDString().c_str(),
310 		       VdevGUIDString().c_str(),
311 		       zpool_state_to_name(VdevState(), VDEV_AUX_NONE));
312 		return (/*consumed*/false);
313 	}
314 
315 	if (physPath != PhysicalPath()) {
316 		syslog(LOG_INFO,
317 		       "CaseFile(%s:%s:%s): Physical path mismatch.  "
318 		       "Ignoring device insertion.\n",
319 		       PoolGUIDString().c_str(),
320 		       VdevGUIDString().c_str(),
321 		       zpool_state_to_name(VdevState(), VDEV_AUX_NONE));
322 		return (/*consumed*/false);
323 	}
324 
325 	/* Write a label on the newly inserted disk. */
326 	if (zpool_label_disk(g_zfsHandle, pool, devPath.c_str()) != 0) {
327 		syslog(LOG_ERR,
328 		       "Replace vdev(%s/%s) by physical path (label): %s: %s\n",
329 		       zpool_get_name(pool), VdevGUIDString().c_str(),
330 		       libzfs_error_action(g_zfsHandle),
331 		       libzfs_error_description(g_zfsHandle));
332 		return (/*consumed*/false);
333 	}
334 
335 	syslog(LOG_INFO, "CaseFile::ReEvaluate(%s/%s): Replacing with %s",
336 	    PoolGUIDString().c_str(), VdevGUIDString().c_str(),
337 	    devPath.c_str());
338 	return (Replace(VDEV_TYPE_DISK, devPath.c_str(), /*isspare*/false));
339 }
340 
341 bool
342 CaseFile::ReEvaluate(const ZfsEvent &event)
343 {
344 	bool consumed(false);
345 
346 	if (event.Value("type") == "misc.fs.zfs.vdev_remove") {
347 		/*
348 		 * The Vdev we represent has been removed from the
349 		 * configuration.  This case is no longer of value.
350 		 */
351 		Close();
352 
353 		return (/*consumed*/true);
354 	} else if (event.Value("type") == "misc.fs.zfs.pool_destroy") {
355 		/* This Pool has been destroyed.  Discard the case */
356 		Close();
357 
358 		return (/*consumed*/true);
359 	} else if (event.Value("type") == "misc.fs.zfs.config_sync") {
360 		RefreshVdevState();
361 		if (VdevState() < VDEV_STATE_HEALTHY)
362 			consumed = ActivateSpare();
363 	}
364 
365 
366 	if (event.Value("class") == "resource.fs.zfs.removed") {
367 		bool spare_activated;
368 
369 		if (!RefreshVdevState()) {
370 			/*
371 			 * The pool or vdev for this case file is no longer
372 			 * part of the configuration.  This can happen
373 			 * if we process a device arrival notification
374 			 * before seeing the ZFS configuration change
375 			 * event.
376 			 */
377 			syslog(LOG_INFO,
378 			       "CaseFile::ReEvaluate(%s,%s) Pool/Vdev "
379 			       "unconfigured.  Closing\n",
380 			       PoolGUIDString().c_str(),
381 			       VdevGUIDString().c_str());
382 			/*
383 			 * Close the case now so we won't waste cycles in the
384 			 * system rescan
385 			 */
386 			Close();
387 
388 			/*
389 			 * Since this event was not used to close this
390 			 * case, do not report it as consumed.
391 			 */
392 			return (/*consumed*/false);
393 		}
394 
395 		/*
396 		 * Discard any tentative I/O error events for
397 		 * this case.  They were most likely caused by the
398 		 * hot-unplug of this device.
399 		 */
400 		PurgeTentativeEvents();
401 
402 		/* Try to activate spares if they are available */
403 		spare_activated = ActivateSpare();
404 
405 		/*
406 		 * Rescan the drives in the system to see if a recent
407 		 * drive arrival can be used to solve this case.
408 		 */
409 		ZfsDaemon::RequestSystemRescan();
410 
411 		/*
412 		 * Consume the event if we successfully activated a spare.
413 		 * Otherwise, leave it in the unconsumed events list so that the
414 		 * future addition of a spare to this pool might be able to
415 		 * close the case
416 		 */
417 		consumed = spare_activated;
418 	} else if (event.Value("class") == "resource.fs.zfs.statechange") {
419 		RefreshVdevState();
420 		/*
421 		 * If this vdev is DEGRADED, FAULTED, or UNAVAIL, try to
422 		 * activate a hotspare.  Otherwise, ignore the event
423 		 */
424 		if (VdevState() == VDEV_STATE_FAULTED ||
425 		    VdevState() == VDEV_STATE_DEGRADED ||
426 		    VdevState() == VDEV_STATE_CANT_OPEN)
427 			(void) ActivateSpare();
428 		consumed = true;
429 	}
430 	else if (event.Value("class") == "ereport.fs.zfs.io" ||
431 	         event.Value("class") == "ereport.fs.zfs.checksum") {
432 
433 		m_tentativeEvents.push_front(event.DeepCopy());
434 		RegisterCallout(event);
435 		consumed = true;
436 	}
437 
438 	bool closed(CloseIfSolved());
439 
440 	return (consumed || closed);
441 }
442 
443 
444 bool
445 CaseFile::ActivateSpare() {
446 	nvlist_t	*config, *nvroot;
447 	nvlist_t       **spares;
448 	char		*devPath, *vdev_type;
449 	const char	*poolname;
450 	u_int		 nspares, i;
451 	int		 error;
452 
453 	ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID);
454 	zpool_handle_t	*zhp(zpl.empty() ? NULL : zpl.front());
455 	if (zhp == NULL) {
456 		syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not find pool "
457 		       "for pool_guid %" PRIu64".", (uint64_t)m_poolGUID);
458 		return (false);
459 	}
460 	poolname = zpool_get_name(zhp);
461 	config = zpool_get_config(zhp, NULL);
462 	if (config == NULL) {
463 		syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not find pool "
464 		       "config for pool %s", poolname);
465 		return (false);
466 	}
467 	error = nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvroot);
468 	if (error != 0){
469 		syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not find vdev "
470 		       "tree for pool %s", poolname);
471 		return (false);
472 	}
473 	nspares = 0;
474 	nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, &spares,
475 				   &nspares);
476 	if (nspares == 0) {
477 		/* The pool has no spares configured */
478 		syslog(LOG_INFO, "CaseFile::ActivateSpare: "
479 		       "No spares available for pool %s", poolname);
480 		return (false);
481 	}
482 	for (i = 0; i < nspares; i++) {
483 		uint64_t    *nvlist_array;
484 		vdev_stat_t *vs;
485 		uint_t	     nstats;
486 
487 		if (nvlist_lookup_uint64_array(spares[i],
488 		    ZPOOL_CONFIG_VDEV_STATS, &nvlist_array, &nstats) != 0) {
489 			syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not "
490 			       "find vdev stats for pool %s, spare %d",
491 			       poolname, i);
492 			return (false);
493 		}
494 		vs = reinterpret_cast<vdev_stat_t *>(nvlist_array);
495 
496 		if ((vs->vs_aux != VDEV_AUX_SPARED)
497 		 && (vs->vs_state == VDEV_STATE_HEALTHY)) {
498 			/* We found a usable spare */
499 			break;
500 		}
501 	}
502 
503 	if (i == nspares) {
504 		/* No available spares were found */
505 		return (false);
506 	}
507 
508 	error = nvlist_lookup_string(spares[i], ZPOOL_CONFIG_PATH, &devPath);
509 	if (error != 0) {
510 		syslog(LOG_ERR, "CaseFile::ActivateSpare: Cannot determine "
511 		       "the path of pool %s, spare %d. Error %d",
512 		       poolname, i, error);
513 		return (false);
514 	}
515 
516 	error = nvlist_lookup_string(spares[i], ZPOOL_CONFIG_TYPE, &vdev_type);
517 	if (error != 0) {
518 		syslog(LOG_ERR, "CaseFile::ActivateSpare: Cannot determine "
519 		       "the vdev type of pool %s, spare %d. Error %d",
520 		       poolname, i, error);
521 		return (false);
522 	}
523 
524 	return (Replace(vdev_type, devPath, /*isspare*/true));
525 }
526 
527 void
528 CaseFile::RegisterCallout(const Event &event)
529 {
530 	timeval now, countdown, elapsed, timestamp, zero, remaining;
531 
532 	gettimeofday(&now, 0);
533 	timestamp = event.GetTimestamp();
534 	timersub(&now, &timestamp, &elapsed);
535 	timersub(&s_removeGracePeriod, &elapsed, &countdown);
536 	/*
537 	 * If countdown is <= zero, Reset the timer to the
538 	 * smallest positive time value instead
539 	 */
540 	timerclear(&zero);
541 	if (timercmp(&countdown, &zero, <=)) {
542 		timerclear(&countdown);
543 		countdown.tv_usec = 1;
544 	}
545 
546 	remaining = m_tentativeTimer.TimeRemaining();
547 
548 	if (!m_tentativeTimer.IsPending()
549 	 || timercmp(&countdown, &remaining, <))
550 		m_tentativeTimer.Reset(countdown, OnGracePeriodEnded, this);
551 }
552 
553 
554 bool
555 CaseFile::CloseIfSolved()
556 {
557 	if (m_events.empty()
558 	 && m_tentativeEvents.empty()) {
559 
560 		/*
561 		 * We currently do not track or take actions on
562 		 * devices in the degraded or faulted state.
563 		 * Once we have support for spare pools, we'll
564 		 * retain these cases so that any spares added in
565 		 * the future can be applied to them.
566 		 */
567 		switch (VdevState()) {
568 		case VDEV_STATE_HEALTHY:
569 			/* No need to keep cases for healthy vdevs */
570 			Close();
571 			return (true);
572 		case VDEV_STATE_REMOVED:
573 		case VDEV_STATE_CANT_OPEN:
574 			/*
575 			 * Keep open.  We may solve it with a newly inserted
576 			 * device.
577 			 */
578 		case VDEV_STATE_FAULTED:
579 		case VDEV_STATE_DEGRADED:
580 			/*
581 			 * Keep open.  We may solve it with the future
582 			 * addition of a spare to the pool
583 			 */
584 		case VDEV_STATE_UNKNOWN:
585 		case VDEV_STATE_CLOSED:
586 		case VDEV_STATE_OFFLINE:
587 			/*
588 			 * Keep open?  This may not be the correct behavior,
589 			 * but it's what we've always done
590 			 */
591 			;
592 		}
593 
594 		/*
595 		 * Re-serialize the case in order to remove any
596 		 * previous event data.
597 		 */
598 		Serialize();
599 	}
600 
601 	return (false);
602 }
603 
604 void
605 CaseFile::Log()
606 {
607 	syslog(LOG_INFO, "CaseFile(%s,%s,%s)\n", PoolGUIDString().c_str(),
608 	       VdevGUIDString().c_str(), PhysicalPath().c_str());
609 	syslog(LOG_INFO, "\tVdev State = %s\n",
610 	       zpool_state_to_name(VdevState(), VDEV_AUX_NONE));
611 	if (m_tentativeEvents.size() != 0) {
612 		syslog(LOG_INFO, "\t=== Tentative Events ===\n");
613 		for (EventList::iterator event(m_tentativeEvents.begin());
614 		     event != m_tentativeEvents.end(); event++)
615 			(*event)->Log(LOG_INFO);
616 	}
617 	if (m_events.size() != 0) {
618 		syslog(LOG_INFO, "\t=== Events ===\n");
619 		for (EventList::iterator event(m_events.begin());
620 		     event != m_events.end(); event++)
621 			(*event)->Log(LOG_INFO);
622 	}
623 }
624 
625 //- CaseFile Static Protected Methods ------------------------------------------
626 void
627 CaseFile::OnGracePeriodEnded(void *arg)
628 {
629 	CaseFile &casefile(*static_cast<CaseFile *>(arg));
630 
631 	casefile.OnGracePeriodEnded();
632 }
633 
634 int
635 CaseFile::DeSerializeSelector(const struct dirent *dirEntry)
636 {
637 	uint64_t poolGUID;
638 	uint64_t vdevGUID;
639 
640 	if (dirEntry->d_type == DT_REG
641 	 && sscanf(dirEntry->d_name, "pool_%" PRIu64 "_vdev_%" PRIu64 ".case",
642 		   &poolGUID, &vdevGUID) == 2)
643 		return (1);
644 	return (0);
645 }
646 
647 void
648 CaseFile::DeSerializeFile(const char *fileName)
649 {
650 	string	  fullName(s_caseFilePath + '/' + fileName);
651 	CaseFile *existingCaseFile(NULL);
652 	CaseFile *caseFile(NULL);
653 
654 	try {
655 		uint64_t poolGUID;
656 		uint64_t vdevGUID;
657 		nvlist_t *vdevConf;
658 
659 		sscanf(fileName, "pool_%" PRIu64 "_vdev_%" PRIu64 ".case",
660 		       &poolGUID, &vdevGUID);
661 		existingCaseFile = Find(Guid(poolGUID), Guid(vdevGUID));
662 		if (existingCaseFile != NULL) {
663 			/*
664 			 * If the vdev is already degraded or faulted,
665 			 * there's no point in keeping the state around
666 			 * that we use to put a drive into the degraded
667 			 * state.  However, if the vdev is simply missing,
668 			 * preserve the case data in the hopes that it will
669 			 * return.
670 			 */
671 			caseFile = existingCaseFile;
672 			vdev_state curState(caseFile->VdevState());
673 			if (curState > VDEV_STATE_CANT_OPEN
674 			 && curState < VDEV_STATE_HEALTHY) {
675 				unlink(fileName);
676 				return;
677 			}
678 		} else {
679 			ZpoolList zpl(ZpoolList::ZpoolByGUID, &poolGUID);
680 			if (zpl.empty()
681 			 || (vdevConf = VdevIterator(zpl.front())
682 						    .Find(vdevGUID)) == NULL) {
683 				/*
684 				 * Either the pool no longer exists
685 				 * or this vdev is no longer a member of
686 				 * the pool.
687 				 */
688 				unlink(fullName.c_str());
689 				return;
690 			}
691 
692 			/*
693 			 * Any vdev we find that does not have a case file
694 			 * must be in the healthy state and thus worthy of
695 			 * continued SERD data tracking.
696 			 */
697 			caseFile = new CaseFile(Vdev(zpl.front(), vdevConf));
698 		}
699 
700 		ifstream caseStream(fullName.c_str());
701 		if (!caseStream)
702 			throw ZfsdException("CaseFile::DeSerialize: Unable to "
703 					    "read %s.\n", fileName);
704 
705 		caseFile->DeSerialize(caseStream);
706 	} catch (const ParseException &exp) {
707 
708 		exp.Log();
709 		if (caseFile != existingCaseFile)
710 			delete caseFile;
711 
712 		/*
713 		 * Since we can't parse the file, unlink it so we don't
714 		 * trip over it again.
715 		 */
716 		unlink(fileName);
717 	} catch (const ZfsdException &zfsException) {
718 
719 		zfsException.Log();
720 		if (caseFile != existingCaseFile)
721 			delete caseFile;
722 	}
723 }
724 
725 //- CaseFile Protected Methods -------------------------------------------------
726 CaseFile::CaseFile(const Vdev &vdev)
727  : m_poolGUID(vdev.PoolGUID()),
728    m_vdevGUID(vdev.GUID()),
729    m_vdevState(vdev.State()),
730    m_vdevPhysPath(vdev.PhysicalPath())
731 {
732 	stringstream guidString;
733 
734 	guidString << m_vdevGUID;
735 	m_vdevGUIDString = guidString.str();
736 	guidString.str("");
737 	guidString << m_poolGUID;
738 	m_poolGUIDString = guidString.str();
739 
740 	s_activeCases.push_back(this);
741 
742 	syslog(LOG_INFO, "Creating new CaseFile:\n");
743 	Log();
744 }
745 
746 CaseFile::~CaseFile()
747 {
748 	PurgeEvents();
749 	PurgeTentativeEvents();
750 	m_tentativeTimer.Stop();
751 	s_activeCases.remove(this);
752 }
753 
754 void
755 CaseFile::PurgeEvents()
756 {
757 	for (EventList::iterator event(m_events.begin());
758 	     event != m_events.end(); event++)
759 		delete *event;
760 
761 	m_events.clear();
762 }
763 
764 void
765 CaseFile::PurgeTentativeEvents()
766 {
767 	for (EventList::iterator event(m_tentativeEvents.begin());
768 	     event != m_tentativeEvents.end(); event++)
769 		delete *event;
770 
771 	m_tentativeEvents.clear();
772 }
773 
774 void
775 CaseFile::SerializeEvList(const EventList events, int fd,
776 		const char* prefix) const
777 {
778 	if (events.empty())
779 		return;
780 	for (EventList::const_iterator curEvent = events.begin();
781 	     curEvent != events.end(); curEvent++) {
782 		const string &eventString((*curEvent)->GetEventString());
783 
784 		// TODO: replace many write(2) calls with a single writev(2)
785 		if (prefix)
786 			write(fd, prefix, strlen(prefix));
787 		write(fd, eventString.c_str(), eventString.length());
788 	}
789 }
790 
791 void
792 CaseFile::Serialize()
793 {
794 	stringstream saveFile;
795 
796 	saveFile << setfill('0')
797 		 << s_caseFilePath << "/"
798 		 << "pool_" << PoolGUIDString()
799 		 << "_vdev_" << VdevGUIDString()
800 		 << ".case";
801 
802 	if (m_events.empty() && m_tentativeEvents.empty()) {
803 		unlink(saveFile.str().c_str());
804 		return;
805 	}
806 
807 	int fd(open(saveFile.str().c_str(), O_CREAT|O_TRUNC|O_WRONLY, 0644));
808 	if (fd == -1) {
809 		syslog(LOG_ERR, "CaseFile::Serialize: Unable to open %s.\n",
810 		       saveFile.str().c_str());
811 		return;
812 	}
813 	SerializeEvList(m_events, fd);
814 	SerializeEvList(m_tentativeEvents, fd, "tentative ");
815 	close(fd);
816 }
817 
818 /*
819  * XXX: This method assumes that events may not contain embedded newlines.  If
820  * ever events can contain embedded newlines, then CaseFile must switch
821  * serialization formats
822  */
823 void
824 CaseFile::DeSerialize(ifstream &caseStream)
825 {
826 	string	      evString;
827 	const EventFactory &factory(ZfsDaemon::Get().GetFactory());
828 
829 	caseStream >> std::noskipws >> std::ws;
830 	while (caseStream.good()) {
831 		/*
832 		 * Outline:
833 		 * read the beginning of a line and check it for
834 		 * "tentative".  If found, discard "tentative".
835 		 * Create a new event
836 		 * continue
837 		 */
838 		EventList* destEvents;
839 		const string tentFlag("tentative ");
840 		string line;
841 		std::stringbuf lineBuf;
842 
843 		caseStream.get(lineBuf);
844 		caseStream.ignore();  /*discard the newline character*/
845 		line = lineBuf.str();
846 		if (line.compare(0, tentFlag.size(), tentFlag) == 0) {
847 			/* Discard "tentative" */
848 			line.erase(0, tentFlag.size());
849 			destEvents = &m_tentativeEvents;
850 		} else {
851 			destEvents = &m_events;
852 		}
853 		Event *event(Event::CreateEvent(factory, line));
854 		if (event != NULL) {
855 			destEvents->push_back(event);
856 			RegisterCallout(*event);
857 		}
858 	}
859 }
860 
861 void
862 CaseFile::Close()
863 {
864 	/*
865 	 * This case is no longer relevant.  Clean up our
866 	 * serialization file, and delete the case.
867 	 */
868 	syslog(LOG_INFO, "CaseFile(%s,%s) closed - State %s\n",
869 	       PoolGUIDString().c_str(), VdevGUIDString().c_str(),
870 	       zpool_state_to_name(VdevState(), VDEV_AUX_NONE));
871 
872 	/*
873 	 * Serialization of a Case with no event data, clears the
874 	 * Serialization data for that event.
875 	 */
876 	PurgeEvents();
877 	Serialize();
878 
879 	delete this;
880 }
881 
882 void
883 CaseFile::OnGracePeriodEnded()
884 {
885 	bool should_fault, should_degrade;
886 	ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID);
887 	zpool_handle_t *zhp(zpl.empty() ? NULL : zpl.front());
888 
889 	m_events.splice(m_events.begin(), m_tentativeEvents);
890 	should_fault = ShouldFault();
891 	should_degrade = ShouldDegrade();
892 
893 	if (should_fault || should_degrade) {
894 		if (zhp == NULL
895 		 || (VdevIterator(zhp).Find(m_vdevGUID)) == NULL) {
896 			/*
897 			 * Either the pool no longer exists
898 			 * or this vdev is no longer a member of
899 			 * the pool.
900 			 */
901 			Close();
902 			return;
903 		}
904 
905 	}
906 
907 	/* A fault condition has priority over a degrade condition */
908 	if (ShouldFault()) {
909 		/* Fault the vdev and close the case. */
910 		if (zpool_vdev_fault(zhp, (uint64_t)m_vdevGUID,
911 				       VDEV_AUX_ERR_EXCEEDED) == 0) {
912 			syslog(LOG_INFO, "Faulting vdev(%s/%s)",
913 			       PoolGUIDString().c_str(),
914 			       VdevGUIDString().c_str());
915 			Close();
916 			return;
917 		}
918 		else {
919 			syslog(LOG_ERR, "Fault vdev(%s/%s): %s: %s\n",
920 			       PoolGUIDString().c_str(),
921 			       VdevGUIDString().c_str(),
922 			       libzfs_error_action(g_zfsHandle),
923 			       libzfs_error_description(g_zfsHandle));
924 		}
925 	}
926 	else if (ShouldDegrade()) {
927 		/* Degrade the vdev and close the case. */
928 		if (zpool_vdev_degrade(zhp, (uint64_t)m_vdevGUID,
929 				       VDEV_AUX_ERR_EXCEEDED) == 0) {
930 			syslog(LOG_INFO, "Degrading vdev(%s/%s)",
931 			       PoolGUIDString().c_str(),
932 			       VdevGUIDString().c_str());
933 			Close();
934 			return;
935 		}
936 		else {
937 			syslog(LOG_ERR, "Degrade vdev(%s/%s): %s: %s\n",
938 			       PoolGUIDString().c_str(),
939 			       VdevGUIDString().c_str(),
940 			       libzfs_error_action(g_zfsHandle),
941 			       libzfs_error_description(g_zfsHandle));
942 		}
943 	}
944 	Serialize();
945 }
946 
947 Vdev
948 CaseFile::BeingReplacedBy(zpool_handle_t *zhp) {
949 	Vdev vd(zhp, CaseVdev(zhp));
950 	std::list<Vdev> children;
951 	std::list<Vdev>::iterator children_it;
952 
953 	Vdev parent(vd.Parent());
954 	Vdev replacing(NonexistentVdev);
955 
956 	/*
957 	 * To determine whether we are being replaced by another spare that
958 	 * is still working, then make sure that it is currently spared and
959 	 * that the spare is either resilvering or healthy.  If any of these
960 	 * conditions fail, then we are not being replaced by a spare.
961 	 *
962 	 * If the spare is healthy, then the case file should be closed very
963 	 * soon after this check.
964 	 */
965 	if (parent.DoesNotExist()
966 	 || parent.Name(zhp, /*verbose*/false) != "spare")
967 		return (NonexistentVdev);
968 
969 	children = parent.Children();
970 	children_it = children.begin();
971 	for (;children_it != children.end(); children_it++) {
972 		Vdev child = *children_it;
973 
974 		/* Skip our vdev. */
975 		if (child.GUID() == VdevGUID())
976 			continue;
977 		/*
978 		 * Accept the first child that doesn't match our GUID, or
979 		 * any resilvering/healthy device if one exists.
980 		 */
981 		if (replacing.DoesNotExist() || child.IsResilvering()
982 		 || child.State() == VDEV_STATE_HEALTHY)
983 			replacing = child;
984 	}
985 
986 	return (replacing);
987 }
988 
989 bool
990 CaseFile::Replace(const char* vdev_type, const char* path, bool isspare) {
991 	nvlist_t *nvroot, *newvd;
992 	const char *poolname;
993 	string oldstr(VdevGUIDString());
994 	bool retval = true;
995 
996 	/* Figure out what pool we're working on */
997 	ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID);
998 	zpool_handle_t *zhp(zpl.empty() ? NULL : zpl.front());
999 	if (zhp == NULL) {
1000 		syslog(LOG_ERR, "CaseFile::Replace: could not find pool for "
1001 		       "pool_guid %" PRIu64 ".", (uint64_t)m_poolGUID);
1002 		return (false);
1003 	}
1004 	poolname = zpool_get_name(zhp);
1005 	Vdev vd(zhp, CaseVdev(zhp));
1006 	Vdev replaced(BeingReplacedBy(zhp));
1007 
1008 	if (isspare && !vd.IsSpare() && !replaced.DoesNotExist()) {
1009 		/* If we are already being replaced by a working spare, pass. */
1010 		if (replaced.IsResilvering()
1011 		 || replaced.State() == VDEV_STATE_HEALTHY) {
1012 			syslog(LOG_INFO, "CaseFile::Replace(%s->%s): already "
1013 			    "replaced", VdevGUIDString().c_str(), path);
1014 			return (/*consumed*/false);
1015 		}
1016 		/*
1017 		 * If we have already been replaced by a spare, but that spare
1018 		 * is broken, we must spare the spare, not the original device.
1019 		 */
1020 		oldstr = replaced.GUIDString();
1021 		syslog(LOG_INFO, "CaseFile::Replace(%s->%s): sparing "
1022 		    "broken spare %s instead", VdevGUIDString().c_str(),
1023 		    path, oldstr.c_str());
1024 	}
1025 
1026 	/*
1027 	 * Build a root vdev/leaf vdev configuration suitable for
1028 	 * zpool_vdev_attach. Only enough data for the kernel to find
1029 	 * the device (i.e. type and disk device node path) are needed.
1030 	 */
1031 	nvroot = NULL;
1032 	newvd = NULL;
1033 
1034 	if (nvlist_alloc(&nvroot, NV_UNIQUE_NAME, 0) != 0
1035 	 || nvlist_alloc(&newvd, NV_UNIQUE_NAME, 0) != 0) {
1036 		syslog(LOG_ERR, "Replace vdev(%s/%s): Unable to allocate "
1037 		    "configuration data.", poolname, oldstr.c_str());
1038 		if (nvroot != NULL)
1039 			nvlist_free(nvroot);
1040 		return (false);
1041 	}
1042 	if (nvlist_add_string(newvd, ZPOOL_CONFIG_TYPE, vdev_type) != 0
1043 	 || nvlist_add_string(newvd, ZPOOL_CONFIG_PATH, path) != 0
1044 	 || nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE, VDEV_TYPE_ROOT) != 0
1045 	 || nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
1046 				    &newvd, 1) != 0) {
1047 		syslog(LOG_ERR, "Replace vdev(%s/%s): Unable to initialize "
1048 		    "configuration data.", poolname, oldstr.c_str());
1049 		nvlist_free(newvd);
1050 		nvlist_free(nvroot);
1051 		return (true);
1052 	}
1053 
1054 	/* Data was copied when added to the root vdev. */
1055 	nvlist_free(newvd);
1056 
1057 	retval = (zpool_vdev_attach(zhp, oldstr.c_str(), path, nvroot,
1058 	    /*replace*/B_TRUE) == 0);
1059 	if (retval)
1060 		syslog(LOG_INFO, "Replacing vdev(%s/%s) with %s\n",
1061 		    poolname, oldstr.c_str(), path);
1062 	else
1063 		syslog(LOG_ERR, "Replace vdev(%s/%s): %s: %s\n",
1064 		    poolname, oldstr.c_str(), libzfs_error_action(g_zfsHandle),
1065 		    libzfs_error_description(g_zfsHandle));
1066 	nvlist_free(nvroot);
1067 
1068 	return (retval);
1069 }
1070 
1071 /* Does the argument event refer to a checksum error? */
1072 static bool
1073 IsChecksumEvent(const Event* const event)
1074 {
1075 	return ("ereport.fs.zfs.checksum" == event->Value("type"));
1076 }
1077 
1078 /* Does the argument event refer to an IO error? */
1079 static bool
1080 IsIOEvent(const Event* const event)
1081 {
1082 	return ("ereport.fs.zfs.io" == event->Value("type"));
1083 }
1084 
1085 bool
1086 CaseFile::ShouldDegrade() const
1087 {
1088 	return (std::count_if(m_events.begin(), m_events.end(),
1089 			      IsChecksumEvent) > ZFS_DEGRADE_IO_COUNT);
1090 }
1091 
1092 bool
1093 CaseFile::ShouldFault() const
1094 {
1095 	return (std::count_if(m_events.begin(), m_events.end(),
1096 			      IsIOEvent) > ZFS_DEGRADE_IO_COUNT);
1097 }
1098 
1099 nvlist_t *
1100 CaseFile::CaseVdev(zpool_handle_t *zhp) const
1101 {
1102 	return (VdevIterator(zhp).Find(VdevGUID()));
1103 }
1104