xref: /freebsd/cddl/usr.sbin/zfsd/case_file.cc (revision 60b9567d16b585b05c86c60393958ad81cbfa72f)
1 /*-
2  * Copyright (c) 2011, 2012, 2013, 2014, 2016 Spectra Logic Corporation
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions, and the following disclaimer,
10  *    without modification.
11  * 2. Redistributions in binary form must reproduce at minimum a disclaimer
12  *    substantially similar to the "NO WARRANTY" disclaimer below
13  *    ("Disclaimer") and any redistribution must be conditioned upon
14  *    including a substantially similar Disclaimer requirement for further
15  *    binary redistribution.
16  *
17  * NO WARRANTY
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
21  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22  * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
26  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
27  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28  * POSSIBILITY OF SUCH DAMAGES.
29  *
30  * Authors: Justin T. Gibbs     (Spectra Logic Corporation)
31  */
32 
33 /**
34  * \file case_file.cc
35  *
36  * We keep case files for any leaf vdev that is not in the optimal state.
37  * However, we only serialize to disk those events that need to be preserved
38  * across reboots.  For now, this is just a log of soft errors which we
39  * accumulate in order to mark a device as degraded.
40  */
41 #include <sys/cdefs.h>
42 #include <sys/time.h>
43 
44 #include <sys/fs/zfs.h>
45 
46 #include <dirent.h>
47 #include <iomanip>
48 #include <fstream>
49 #include <functional>
50 #include <sstream>
51 #include <syslog.h>
52 #include <unistd.h>
53 
54 #include <libzfs.h>
55 
56 #include <list>
57 #include <map>
58 #include <string>
59 
60 #include <devdctl/guid.h>
61 #include <devdctl/event.h>
62 #include <devdctl/event_factory.h>
63 #include <devdctl/exception.h>
64 #include <devdctl/consumer.h>
65 
66 #include "callout.h"
67 #include "vdev_iterator.h"
68 #include "zfsd_event.h"
69 #include "case_file.h"
70 #include "vdev.h"
71 #include "zfsd.h"
72 #include "zfsd_exception.h"
73 #include "zpool_list.h"
74 
75 __FBSDID("$FreeBSD$");
76 
77 /*============================ Namespace Control =============================*/
78 using std::auto_ptr;
79 using std::hex;
80 using std::ifstream;
81 using std::stringstream;
82 using std::setfill;
83 using std::setw;
84 
85 using DevdCtl::Event;
86 using DevdCtl::EventFactory;
87 using DevdCtl::EventList;
88 using DevdCtl::Guid;
89 using DevdCtl::ParseException;
90 
91 /*--------------------------------- CaseFile ---------------------------------*/
92 //- CaseFile Static Data -------------------------------------------------------
93 
94 CaseFileList  CaseFile::s_activeCases;
95 const string  CaseFile::s_caseFilePath = "/var/db/zfsd/cases";
96 const timeval CaseFile::s_removeGracePeriod = { 60 /*sec*/, 0 /*usec*/};
97 
98 //- CaseFile Static Public Methods ---------------------------------------------
99 CaseFile *
100 CaseFile::Find(Guid poolGUID, Guid vdevGUID)
101 {
102 	for (CaseFileList::iterator curCase = s_activeCases.begin();
103 	     curCase != s_activeCases.end(); curCase++) {
104 
105 		if ((*curCase)->PoolGUID() != poolGUID
106 		 || (*curCase)->VdevGUID() != vdevGUID)
107 			continue;
108 
109 		/*
110 		 * We only carry one active case per-vdev.
111 		 */
112 		return (*curCase);
113 	}
114 	return (NULL);
115 }
116 
117 CaseFile *
118 CaseFile::Find(const string &physPath)
119 {
120 	CaseFile *result = NULL;
121 
122 	for (CaseFileList::iterator curCase = s_activeCases.begin();
123 	     curCase != s_activeCases.end(); curCase++) {
124 
125 		if ((*curCase)->PhysicalPath() != physPath)
126 			continue;
127 
128 		if (result != NULL) {
129 			syslog(LOG_WARNING, "Multiple casefiles found for "
130 			    "physical path %s.  "
131 			    "This is most likely a bug in zfsd",
132 			    physPath.c_str());
133 		}
134 		result = *curCase;
135 	}
136 	return (result);
137 }
138 
139 
140 void
141 CaseFile::ReEvaluateByGuid(Guid poolGUID, const ZfsEvent &event)
142 {
143 	CaseFileList::iterator casefile;
144 	for (casefile = s_activeCases.begin(); casefile != s_activeCases.end();){
145 		CaseFileList::iterator next = casefile;
146 		next++;
147 		if (poolGUID == (*casefile)->PoolGUID())
148 			(*casefile)->ReEvaluate(event);
149 		casefile = next;
150 	}
151 }
152 
153 CaseFile &
154 CaseFile::Create(Vdev &vdev)
155 {
156 	CaseFile *activeCase;
157 
158 	activeCase = Find(vdev.PoolGUID(), vdev.GUID());
159 	if (activeCase == NULL)
160 		activeCase = new CaseFile(vdev);
161 
162 	return (*activeCase);
163 }
164 
165 void
166 CaseFile::DeSerialize()
167 {
168 	struct dirent **caseFiles;
169 
170 	int numCaseFiles(scandir(s_caseFilePath.c_str(), &caseFiles,
171 			 DeSerializeSelector, /*compar*/NULL));
172 
173 	if (numCaseFiles == -1)
174 		return;
175 	if (numCaseFiles == 0) {
176 		free(caseFiles);
177 		return;
178 	}
179 
180 	for (int i = 0; i < numCaseFiles; i++) {
181 
182 		DeSerializeFile(caseFiles[i]->d_name);
183 		free(caseFiles[i]);
184 	}
185 	free(caseFiles);
186 }
187 
188 void
189 CaseFile::LogAll()
190 {
191 	for (CaseFileList::iterator curCase = s_activeCases.begin();
192 	     curCase != s_activeCases.end(); curCase++)
193 		(*curCase)->Log();
194 }
195 
196 void
197 CaseFile::PurgeAll()
198 {
199 	/*
200 	 * Serialize casefiles before deleting them so that they can be reread
201 	 * and revalidated during BuildCaseFiles.
202 	 * CaseFiles remove themselves from this list on destruction.
203 	 */
204 	while (s_activeCases.size() != 0) {
205 		CaseFile *casefile = s_activeCases.front();
206 		casefile->Serialize();
207 		delete casefile;
208 	}
209 
210 }
211 
212 //- CaseFile Public Methods ----------------------------------------------------
213 bool
214 CaseFile::RefreshVdevState()
215 {
216 	ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID);
217 	zpool_handle_t *casePool(zpl.empty() ? NULL : zpl.front());
218 	if (casePool == NULL)
219 		return (false);
220 
221 	Vdev vd(casePool, CaseVdev(casePool));
222 	if (vd.DoesNotExist())
223 		return (false);
224 
225 	m_vdevState    = vd.State();
226 	m_vdevPhysPath = vd.PhysicalPath();
227 	return (true);
228 }
229 
230 bool
231 CaseFile::ReEvaluate(const string &devPath, const string &physPath, Vdev *vdev)
232 {
233 	ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID);
234 	zpool_handle_t *pool(zpl.empty() ? NULL : zpl.front());
235 
236 	if (pool == NULL || !RefreshVdevState()) {
237 		/*
238 		 * The pool or vdev for this case file is no longer
239 		 * part of the configuration.  This can happen
240 		 * if we process a device arrival notification
241 		 * before seeing the ZFS configuration change
242 		 * event.
243 		 */
244 		syslog(LOG_INFO,
245 		       "CaseFile::ReEvaluate(%s,%s) Pool/Vdev unconfigured.  "
246 		       "Closing\n",
247 		       PoolGUIDString().c_str(),
248 		       VdevGUIDString().c_str());
249 		Close();
250 
251 		/*
252 		 * Since this event was not used to close this
253 		 * case, do not report it as consumed.
254 		 */
255 		return (/*consumed*/false);
256 	}
257 
258 	if (VdevState() > VDEV_STATE_CANT_OPEN) {
259 		/*
260 		 * For now, newly discovered devices only help for
261 		 * devices that are missing.  In the future, we might
262 		 * use a newly inserted spare to replace a degraded
263 		 * or faulted device.
264 		 */
265 		syslog(LOG_INFO, "CaseFile::ReEvaluate(%s,%s): Pool/Vdev ignored",
266 		    PoolGUIDString().c_str(), VdevGUIDString().c_str());
267 		return (/*consumed*/false);
268 	}
269 
270 	if (vdev != NULL
271 	 && vdev->PoolGUID() == m_poolGUID
272 	 && vdev->GUID() == m_vdevGUID) {
273 
274 		zpool_vdev_online(pool, vdev->GUIDString().c_str(),
275 				  ZFS_ONLINE_CHECKREMOVE | ZFS_ONLINE_UNSPARE,
276 				  &m_vdevState);
277 		syslog(LOG_INFO, "Onlined vdev(%s/%s:%s).  State now %s.\n",
278 		       zpool_get_name(pool), vdev->GUIDString().c_str(),
279 		       devPath.c_str(),
280 		       zpool_state_to_name(VdevState(), VDEV_AUX_NONE));
281 
282 		/*
283 		 * Check the vdev state post the online action to see
284 		 * if we can retire this case.
285 		 */
286 		CloseIfSolved();
287 
288 		return (/*consumed*/true);
289 	}
290 
291 	/*
292 	 * If the auto-replace policy is enabled, and we have physical
293 	 * path information, try a physical path replacement.
294 	 */
295 	if (zpool_get_prop_int(pool, ZPOOL_PROP_AUTOREPLACE, NULL) == 0) {
296 		syslog(LOG_INFO,
297 		       "CaseFile(%s:%s:%s): AutoReplace not set.  "
298 		       "Ignoring device insertion.\n",
299 		       PoolGUIDString().c_str(),
300 		       VdevGUIDString().c_str(),
301 		       zpool_state_to_name(VdevState(), VDEV_AUX_NONE));
302 		return (/*consumed*/false);
303 	}
304 
305 	if (PhysicalPath().empty()) {
306 		syslog(LOG_INFO,
307 		       "CaseFile(%s:%s:%s): No physical path information.  "
308 		       "Ignoring device insertion.\n",
309 		       PoolGUIDString().c_str(),
310 		       VdevGUIDString().c_str(),
311 		       zpool_state_to_name(VdevState(), VDEV_AUX_NONE));
312 		return (/*consumed*/false);
313 	}
314 
315 	if (physPath != PhysicalPath()) {
316 		syslog(LOG_INFO,
317 		       "CaseFile(%s:%s:%s): Physical path mismatch.  "
318 		       "Ignoring device insertion.\n",
319 		       PoolGUIDString().c_str(),
320 		       VdevGUIDString().c_str(),
321 		       zpool_state_to_name(VdevState(), VDEV_AUX_NONE));
322 		return (/*consumed*/false);
323 	}
324 
325 	/* Write a label on the newly inserted disk. */
326 	if (zpool_label_disk(g_zfsHandle, pool, devPath.c_str()) != 0) {
327 		syslog(LOG_ERR,
328 		       "Replace vdev(%s/%s) by physical path (label): %s: %s\n",
329 		       zpool_get_name(pool), VdevGUIDString().c_str(),
330 		       libzfs_error_action(g_zfsHandle),
331 		       libzfs_error_description(g_zfsHandle));
332 		return (/*consumed*/false);
333 	}
334 
335 	syslog(LOG_INFO, "CaseFile::ReEvaluate(%s/%s): Replacing with %s",
336 	    PoolGUIDString().c_str(), VdevGUIDString().c_str(),
337 	    devPath.c_str());
338 	return (Replace(VDEV_TYPE_DISK, devPath.c_str(), /*isspare*/false));
339 }
340 
341 bool
342 CaseFile::ReEvaluate(const ZfsEvent &event)
343 {
344 	bool consumed(false);
345 
346 	if (event.Value("type") == "misc.fs.zfs.vdev_remove") {
347 		/*
348 		 * The Vdev we represent has been removed from the
349 		 * configuration.  This case is no longer of value.
350 		 */
351 		Close();
352 
353 		return (/*consumed*/true);
354 	} else if (event.Value("type") == "misc.fs.zfs.pool_destroy") {
355 		/* This Pool has been destroyed.  Discard the case */
356 		Close();
357 
358 		return (/*consumed*/true);
359 	} else if (event.Value("type") == "misc.fs.zfs.config_sync") {
360 		RefreshVdevState();
361 		if (VdevState() < VDEV_STATE_HEALTHY)
362 			consumed = ActivateSpare();
363 	}
364 
365 
366 	if (event.Value("class") == "resource.fs.zfs.removed") {
367 		bool spare_activated;
368 
369 		if (!RefreshVdevState()) {
370 			/*
371 			 * The pool or vdev for this case file is no longer
372 			 * part of the configuration.  This can happen
373 			 * if we process a device arrival notification
374 			 * before seeing the ZFS configuration change
375 			 * event.
376 			 */
377 			syslog(LOG_INFO,
378 			       "CaseFile::ReEvaluate(%s,%s) Pool/Vdev "
379 			       "unconfigured.  Closing\n",
380 			       PoolGUIDString().c_str(),
381 			       VdevGUIDString().c_str());
382 			/*
383 			 * Close the case now so we won't waste cycles in the
384 			 * system rescan
385 			 */
386 			Close();
387 
388 			/*
389 			 * Since this event was not used to close this
390 			 * case, do not report it as consumed.
391 			 */
392 			return (/*consumed*/false);
393 		}
394 
395 		/*
396 		 * Discard any tentative I/O error events for
397 		 * this case.  They were most likely caused by the
398 		 * hot-unplug of this device.
399 		 */
400 		PurgeTentativeEvents();
401 
402 		/* Try to activate spares if they are available */
403 		spare_activated = ActivateSpare();
404 
405 		/*
406 		 * Rescan the drives in the system to see if a recent
407 		 * drive arrival can be used to solve this case.
408 		 */
409 		ZfsDaemon::RequestSystemRescan();
410 
411 		/*
412 		 * Consume the event if we successfully activated a spare.
413 		 * Otherwise, leave it in the unconsumed events list so that the
414 		 * future addition of a spare to this pool might be able to
415 		 * close the case
416 		 */
417 		consumed = spare_activated;
418 	} else if (event.Value("class") == "resource.fs.zfs.statechange") {
419 		RefreshVdevState();
420 		/*
421 		 * If this vdev is DEGRADED, FAULTED, or UNAVAIL, try to
422 		 * activate a hotspare.  Otherwise, ignore the event
423 		 */
424 		if (VdevState() == VDEV_STATE_FAULTED ||
425 		    VdevState() == VDEV_STATE_DEGRADED ||
426 		    VdevState() == VDEV_STATE_CANT_OPEN)
427 			(void) ActivateSpare();
428 		consumed = true;
429 	}
430 	else if (event.Value("class") == "ereport.fs.zfs.io" ||
431 	         event.Value("class") == "ereport.fs.zfs.checksum") {
432 
433 		m_tentativeEvents.push_front(event.DeepCopy());
434 		RegisterCallout(event);
435 		consumed = true;
436 	}
437 
438 	bool closed(CloseIfSolved());
439 
440 	return (consumed || closed);
441 }
442 
443 
444 bool
445 CaseFile::ActivateSpare() {
446 	nvlist_t	*config, *nvroot;
447 	nvlist_t       **spares;
448 	char		*devPath, *vdev_type;
449 	const char	*poolname;
450 	u_int		 nspares, i;
451 	int		 error;
452 
453 	ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID);
454 	zpool_handle_t	*zhp(zpl.empty() ? NULL : zpl.front());
455 	if (zhp == NULL) {
456 		syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not find pool "
457 		       "for pool_guid %" PRIu64".", (uint64_t)m_poolGUID);
458 		return (false);
459 	}
460 	poolname = zpool_get_name(zhp);
461 	config = zpool_get_config(zhp, NULL);
462 	if (config == NULL) {
463 		syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not find pool "
464 		       "config for pool %s", poolname);
465 		return (false);
466 	}
467 	error = nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvroot);
468 	if (error != 0){
469 		syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not find vdev "
470 		       "tree for pool %s", poolname);
471 		return (false);
472 	}
473 	nspares = 0;
474 	nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, &spares,
475 				   &nspares);
476 	if (nspares == 0) {
477 		/* The pool has no spares configured */
478 		syslog(LOG_INFO, "CaseFile::ActivateSpare: "
479 		       "No spares available for pool %s", poolname);
480 		return (false);
481 	}
482 	for (i = 0; i < nspares; i++) {
483 		uint64_t    *nvlist_array;
484 		vdev_stat_t *vs;
485 		uint_t	     nstats;
486 
487 		if (nvlist_lookup_uint64_array(spares[i],
488 		    ZPOOL_CONFIG_VDEV_STATS, &nvlist_array, &nstats) != 0) {
489 			syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not "
490 			       "find vdev stats for pool %s, spare %d",
491 			       poolname, i);
492 			return (false);
493 		}
494 		vs = reinterpret_cast<vdev_stat_t *>(nvlist_array);
495 
496 		if ((vs->vs_aux != VDEV_AUX_SPARED)
497 		 && (vs->vs_state == VDEV_STATE_HEALTHY)) {
498 			/* We found a usable spare */
499 			break;
500 		}
501 	}
502 
503 	if (i == nspares) {
504 		/* No available spares were found */
505 		return (false);
506 	}
507 
508 	error = nvlist_lookup_string(spares[i], ZPOOL_CONFIG_PATH, &devPath);
509 	if (error != 0) {
510 		syslog(LOG_ERR, "CaseFile::ActivateSpare: Cannot determine "
511 		       "the path of pool %s, spare %d. Error %d",
512 		       poolname, i, error);
513 		return (false);
514 	}
515 
516 	error = nvlist_lookup_string(spares[i], ZPOOL_CONFIG_TYPE, &vdev_type);
517 	if (error != 0) {
518 		syslog(LOG_ERR, "CaseFile::ActivateSpare: Cannot determine "
519 		       "the vdev type of pool %s, spare %d. Error %d",
520 		       poolname, i, error);
521 		return (false);
522 	}
523 
524 	return (Replace(vdev_type, devPath, /*isspare*/true));
525 }
526 
527 void
528 CaseFile::RegisterCallout(const Event &event)
529 {
530 	timeval now, countdown, elapsed, timestamp, zero, remaining;
531 
532 	gettimeofday(&now, 0);
533 	timestamp = event.GetTimestamp();
534 	timersub(&now, &timestamp, &elapsed);
535 	timersub(&s_removeGracePeriod, &elapsed, &countdown);
536 	/*
537 	 * If countdown is <= zero, Reset the timer to the
538 	 * smallest positive time value instead
539 	 */
540 	timerclear(&zero);
541 	if (timercmp(&countdown, &zero, <=)) {
542 		timerclear(&countdown);
543 		countdown.tv_usec = 1;
544 	}
545 
546 	remaining = m_tentativeTimer.TimeRemaining();
547 
548 	if (!m_tentativeTimer.IsPending()
549 	 || timercmp(&countdown, &remaining, <))
550 		m_tentativeTimer.Reset(countdown, OnGracePeriodEnded, this);
551 }
552 
553 
554 bool
555 CaseFile::CloseIfSolved()
556 {
557 	if (m_events.empty()
558 	 && m_tentativeEvents.empty()) {
559 
560 		/*
561 		 * We currently do not track or take actions on
562 		 * devices in the degraded or faulted state.
563 		 * Once we have support for spare pools, we'll
564 		 * retain these cases so that any spares added in
565 		 * the future can be applied to them.
566 		 */
567 		switch (VdevState()) {
568 		case VDEV_STATE_HEALTHY:
569 			/* No need to keep cases for healthy vdevs */
570 			Close();
571 			return (true);
572 		case VDEV_STATE_REMOVED:
573 		case VDEV_STATE_CANT_OPEN:
574 			/*
575 			 * Keep open.  We may solve it with a newly inserted
576 			 * device.
577 			 */
578 		case VDEV_STATE_FAULTED:
579 		case VDEV_STATE_DEGRADED:
580 			/*
581 			 * Keep open.  We may solve it with the future
582 			 * addition of a spare to the pool
583 			 */
584 		case VDEV_STATE_UNKNOWN:
585 		case VDEV_STATE_CLOSED:
586 		case VDEV_STATE_OFFLINE:
587 			/*
588 			 * Keep open?  This may not be the correct behavior,
589 			 * but it's what we've always done
590 			 */
591 			;
592 		}
593 
594 		/*
595 		 * Re-serialize the case in order to remove any
596 		 * previous event data.
597 		 */
598 		Serialize();
599 	}
600 
601 	return (false);
602 }
603 
604 void
605 CaseFile::Log()
606 {
607 	syslog(LOG_INFO, "CaseFile(%s,%s,%s)\n", PoolGUIDString().c_str(),
608 	       VdevGUIDString().c_str(), PhysicalPath().c_str());
609 	syslog(LOG_INFO, "\tVdev State = %s\n",
610 	       zpool_state_to_name(VdevState(), VDEV_AUX_NONE));
611 	if (m_tentativeEvents.size() != 0) {
612 		syslog(LOG_INFO, "\t=== Tentative Events ===\n");
613 		for (EventList::iterator event(m_tentativeEvents.begin());
614 		     event != m_tentativeEvents.end(); event++)
615 			(*event)->Log(LOG_INFO);
616 	}
617 	if (m_events.size() != 0) {
618 		syslog(LOG_INFO, "\t=== Events ===\n");
619 		for (EventList::iterator event(m_events.begin());
620 		     event != m_events.end(); event++)
621 			(*event)->Log(LOG_INFO);
622 	}
623 }
624 
625 //- CaseFile Static Protected Methods ------------------------------------------
626 void
627 CaseFile::OnGracePeriodEnded(void *arg)
628 {
629 	CaseFile &casefile(*static_cast<CaseFile *>(arg));
630 
631 	casefile.OnGracePeriodEnded();
632 }
633 
634 int
635 CaseFile::DeSerializeSelector(const struct dirent *dirEntry)
636 {
637 	uint64_t poolGUID;
638 	uint64_t vdevGUID;
639 
640 	if (dirEntry->d_type == DT_REG
641 	 && sscanf(dirEntry->d_name, "pool_%" PRIu64 "_vdev_%" PRIu64 ".case",
642 		   &poolGUID, &vdevGUID) == 2)
643 		return (1);
644 	return (0);
645 }
646 
647 void
648 CaseFile::DeSerializeFile(const char *fileName)
649 {
650 	string	  fullName(s_caseFilePath + '/' + fileName);
651 	CaseFile *existingCaseFile(NULL);
652 	CaseFile *caseFile(NULL);
653 
654 	try {
655 		uint64_t poolGUID;
656 		uint64_t vdevGUID;
657 		nvlist_t *vdevConf;
658 
659 		if (sscanf(fileName, "pool_%" PRIu64 "_vdev_%" PRIu64 ".case",
660 		       &poolGUID, &vdevGUID) != 2) {
661 			throw ZfsdException("CaseFile::DeSerialize: "
662 			    "Unintelligible CaseFile filename %s.\n", fileName);
663 		}
664 		existingCaseFile = Find(Guid(poolGUID), Guid(vdevGUID));
665 		if (existingCaseFile != NULL) {
666 			/*
667 			 * If the vdev is already degraded or faulted,
668 			 * there's no point in keeping the state around
669 			 * that we use to put a drive into the degraded
670 			 * state.  However, if the vdev is simply missing,
671 			 * preserve the case data in the hopes that it will
672 			 * return.
673 			 */
674 			caseFile = existingCaseFile;
675 			vdev_state curState(caseFile->VdevState());
676 			if (curState > VDEV_STATE_CANT_OPEN
677 			 && curState < VDEV_STATE_HEALTHY) {
678 				unlink(fileName);
679 				return;
680 			}
681 		} else {
682 			ZpoolList zpl(ZpoolList::ZpoolByGUID, &poolGUID);
683 			if (zpl.empty()
684 			 || (vdevConf = VdevIterator(zpl.front())
685 						    .Find(vdevGUID)) == NULL) {
686 				/*
687 				 * Either the pool no longer exists
688 				 * or this vdev is no longer a member of
689 				 * the pool.
690 				 */
691 				unlink(fullName.c_str());
692 				return;
693 			}
694 
695 			/*
696 			 * Any vdev we find that does not have a case file
697 			 * must be in the healthy state and thus worthy of
698 			 * continued SERD data tracking.
699 			 */
700 			caseFile = new CaseFile(Vdev(zpl.front(), vdevConf));
701 		}
702 
703 		ifstream caseStream(fullName.c_str());
704 		if (!caseStream)
705 			throw ZfsdException("CaseFile::DeSerialize: Unable to "
706 					    "read %s.\n", fileName);
707 
708 		caseFile->DeSerialize(caseStream);
709 	} catch (const ParseException &exp) {
710 
711 		exp.Log();
712 		if (caseFile != existingCaseFile)
713 			delete caseFile;
714 
715 		/*
716 		 * Since we can't parse the file, unlink it so we don't
717 		 * trip over it again.
718 		 */
719 		unlink(fileName);
720 	} catch (const ZfsdException &zfsException) {
721 
722 		zfsException.Log();
723 		if (caseFile != existingCaseFile)
724 			delete caseFile;
725 	}
726 }
727 
728 //- CaseFile Protected Methods -------------------------------------------------
729 CaseFile::CaseFile(const Vdev &vdev)
730  : m_poolGUID(vdev.PoolGUID()),
731    m_vdevGUID(vdev.GUID()),
732    m_vdevState(vdev.State()),
733    m_vdevPhysPath(vdev.PhysicalPath())
734 {
735 	stringstream guidString;
736 
737 	guidString << m_vdevGUID;
738 	m_vdevGUIDString = guidString.str();
739 	guidString.str("");
740 	guidString << m_poolGUID;
741 	m_poolGUIDString = guidString.str();
742 
743 	s_activeCases.push_back(this);
744 
745 	syslog(LOG_INFO, "Creating new CaseFile:\n");
746 	Log();
747 }
748 
749 CaseFile::~CaseFile()
750 {
751 	PurgeEvents();
752 	PurgeTentativeEvents();
753 	m_tentativeTimer.Stop();
754 	s_activeCases.remove(this);
755 }
756 
757 void
758 CaseFile::PurgeEvents()
759 {
760 	for (EventList::iterator event(m_events.begin());
761 	     event != m_events.end(); event++)
762 		delete *event;
763 
764 	m_events.clear();
765 }
766 
767 void
768 CaseFile::PurgeTentativeEvents()
769 {
770 	for (EventList::iterator event(m_tentativeEvents.begin());
771 	     event != m_tentativeEvents.end(); event++)
772 		delete *event;
773 
774 	m_tentativeEvents.clear();
775 }
776 
777 void
778 CaseFile::SerializeEvList(const EventList events, int fd,
779 		const char* prefix) const
780 {
781 	if (events.empty())
782 		return;
783 	for (EventList::const_iterator curEvent = events.begin();
784 	     curEvent != events.end(); curEvent++) {
785 		const string &eventString((*curEvent)->GetEventString());
786 
787 		// TODO: replace many write(2) calls with a single writev(2)
788 		if (prefix)
789 			write(fd, prefix, strlen(prefix));
790 		write(fd, eventString.c_str(), eventString.length());
791 	}
792 }
793 
794 void
795 CaseFile::Serialize()
796 {
797 	stringstream saveFile;
798 
799 	saveFile << setfill('0')
800 		 << s_caseFilePath << "/"
801 		 << "pool_" << PoolGUIDString()
802 		 << "_vdev_" << VdevGUIDString()
803 		 << ".case";
804 
805 	if (m_events.empty() && m_tentativeEvents.empty()) {
806 		unlink(saveFile.str().c_str());
807 		return;
808 	}
809 
810 	int fd(open(saveFile.str().c_str(), O_CREAT|O_TRUNC|O_WRONLY, 0644));
811 	if (fd == -1) {
812 		syslog(LOG_ERR, "CaseFile::Serialize: Unable to open %s.\n",
813 		       saveFile.str().c_str());
814 		return;
815 	}
816 	SerializeEvList(m_events, fd);
817 	SerializeEvList(m_tentativeEvents, fd, "tentative ");
818 	close(fd);
819 }
820 
821 /*
822  * XXX: This method assumes that events may not contain embedded newlines.  If
823  * ever events can contain embedded newlines, then CaseFile must switch
824  * serialization formats
825  */
826 void
827 CaseFile::DeSerialize(ifstream &caseStream)
828 {
829 	string	      evString;
830 	const EventFactory &factory(ZfsDaemon::Get().GetFactory());
831 
832 	caseStream >> std::noskipws >> std::ws;
833 	while (caseStream.good()) {
834 		/*
835 		 * Outline:
836 		 * read the beginning of a line and check it for
837 		 * "tentative".  If found, discard "tentative".
838 		 * Create a new event
839 		 * continue
840 		 */
841 		EventList* destEvents;
842 		const string tentFlag("tentative ");
843 		string line;
844 		std::stringbuf lineBuf;
845 
846 		caseStream.get(lineBuf);
847 		caseStream.ignore();  /*discard the newline character*/
848 		line = lineBuf.str();
849 		if (line.compare(0, tentFlag.size(), tentFlag) == 0) {
850 			/* Discard "tentative" */
851 			line.erase(0, tentFlag.size());
852 			destEvents = &m_tentativeEvents;
853 		} else {
854 			destEvents = &m_events;
855 		}
856 		Event *event(Event::CreateEvent(factory, line));
857 		if (event != NULL) {
858 			destEvents->push_back(event);
859 			RegisterCallout(*event);
860 		}
861 	}
862 }
863 
864 void
865 CaseFile::Close()
866 {
867 	/*
868 	 * This case is no longer relevant.  Clean up our
869 	 * serialization file, and delete the case.
870 	 */
871 	syslog(LOG_INFO, "CaseFile(%s,%s) closed - State %s\n",
872 	       PoolGUIDString().c_str(), VdevGUIDString().c_str(),
873 	       zpool_state_to_name(VdevState(), VDEV_AUX_NONE));
874 
875 	/*
876 	 * Serialization of a Case with no event data, clears the
877 	 * Serialization data for that event.
878 	 */
879 	PurgeEvents();
880 	Serialize();
881 
882 	delete this;
883 }
884 
885 void
886 CaseFile::OnGracePeriodEnded()
887 {
888 	bool should_fault, should_degrade;
889 	ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID);
890 	zpool_handle_t *zhp(zpl.empty() ? NULL : zpl.front());
891 
892 	m_events.splice(m_events.begin(), m_tentativeEvents);
893 	should_fault = ShouldFault();
894 	should_degrade = ShouldDegrade();
895 
896 	if (should_fault || should_degrade) {
897 		if (zhp == NULL
898 		 || (VdevIterator(zhp).Find(m_vdevGUID)) == NULL) {
899 			/*
900 			 * Either the pool no longer exists
901 			 * or this vdev is no longer a member of
902 			 * the pool.
903 			 */
904 			Close();
905 			return;
906 		}
907 
908 	}
909 
910 	/* A fault condition has priority over a degrade condition */
911 	if (ShouldFault()) {
912 		/* Fault the vdev and close the case. */
913 		if (zpool_vdev_fault(zhp, (uint64_t)m_vdevGUID,
914 				       VDEV_AUX_ERR_EXCEEDED) == 0) {
915 			syslog(LOG_INFO, "Faulting vdev(%s/%s)",
916 			       PoolGUIDString().c_str(),
917 			       VdevGUIDString().c_str());
918 			Close();
919 			return;
920 		}
921 		else {
922 			syslog(LOG_ERR, "Fault vdev(%s/%s): %s: %s\n",
923 			       PoolGUIDString().c_str(),
924 			       VdevGUIDString().c_str(),
925 			       libzfs_error_action(g_zfsHandle),
926 			       libzfs_error_description(g_zfsHandle));
927 		}
928 	}
929 	else if (ShouldDegrade()) {
930 		/* Degrade the vdev and close the case. */
931 		if (zpool_vdev_degrade(zhp, (uint64_t)m_vdevGUID,
932 				       VDEV_AUX_ERR_EXCEEDED) == 0) {
933 			syslog(LOG_INFO, "Degrading vdev(%s/%s)",
934 			       PoolGUIDString().c_str(),
935 			       VdevGUIDString().c_str());
936 			Close();
937 			return;
938 		}
939 		else {
940 			syslog(LOG_ERR, "Degrade vdev(%s/%s): %s: %s\n",
941 			       PoolGUIDString().c_str(),
942 			       VdevGUIDString().c_str(),
943 			       libzfs_error_action(g_zfsHandle),
944 			       libzfs_error_description(g_zfsHandle));
945 		}
946 	}
947 	Serialize();
948 }
949 
950 Vdev
951 CaseFile::BeingReplacedBy(zpool_handle_t *zhp) {
952 	Vdev vd(zhp, CaseVdev(zhp));
953 	std::list<Vdev> children;
954 	std::list<Vdev>::iterator children_it;
955 
956 	Vdev parent(vd.Parent());
957 	Vdev replacing(NonexistentVdev);
958 
959 	/*
960 	 * To determine whether we are being replaced by another spare that
961 	 * is still working, then make sure that it is currently spared and
962 	 * that the spare is either resilvering or healthy.  If any of these
963 	 * conditions fail, then we are not being replaced by a spare.
964 	 *
965 	 * If the spare is healthy, then the case file should be closed very
966 	 * soon after this check.
967 	 */
968 	if (parent.DoesNotExist()
969 	 || parent.Name(zhp, /*verbose*/false) != "spare")
970 		return (NonexistentVdev);
971 
972 	children = parent.Children();
973 	children_it = children.begin();
974 	for (;children_it != children.end(); children_it++) {
975 		Vdev child = *children_it;
976 
977 		/* Skip our vdev. */
978 		if (child.GUID() == VdevGUID())
979 			continue;
980 		/*
981 		 * Accept the first child that doesn't match our GUID, or
982 		 * any resilvering/healthy device if one exists.
983 		 */
984 		if (replacing.DoesNotExist() || child.IsResilvering()
985 		 || child.State() == VDEV_STATE_HEALTHY)
986 			replacing = child;
987 	}
988 
989 	return (replacing);
990 }
991 
992 bool
993 CaseFile::Replace(const char* vdev_type, const char* path, bool isspare) {
994 	nvlist_t *nvroot, *newvd;
995 	const char *poolname;
996 	string oldstr(VdevGUIDString());
997 	bool retval = true;
998 
999 	/* Figure out what pool we're working on */
1000 	ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID);
1001 	zpool_handle_t *zhp(zpl.empty() ? NULL : zpl.front());
1002 	if (zhp == NULL) {
1003 		syslog(LOG_ERR, "CaseFile::Replace: could not find pool for "
1004 		       "pool_guid %" PRIu64 ".", (uint64_t)m_poolGUID);
1005 		return (false);
1006 	}
1007 	poolname = zpool_get_name(zhp);
1008 	Vdev vd(zhp, CaseVdev(zhp));
1009 	Vdev replaced(BeingReplacedBy(zhp));
1010 
1011 	if (isspare && !vd.IsSpare() && !replaced.DoesNotExist()) {
1012 		/* If we are already being replaced by a working spare, pass. */
1013 		if (replaced.IsResilvering()
1014 		 || replaced.State() == VDEV_STATE_HEALTHY) {
1015 			syslog(LOG_INFO, "CaseFile::Replace(%s->%s): already "
1016 			    "replaced", VdevGUIDString().c_str(), path);
1017 			return (/*consumed*/false);
1018 		}
1019 		/*
1020 		 * If we have already been replaced by a spare, but that spare
1021 		 * is broken, we must spare the spare, not the original device.
1022 		 */
1023 		oldstr = replaced.GUIDString();
1024 		syslog(LOG_INFO, "CaseFile::Replace(%s->%s): sparing "
1025 		    "broken spare %s instead", VdevGUIDString().c_str(),
1026 		    path, oldstr.c_str());
1027 	}
1028 
1029 	/*
1030 	 * Build a root vdev/leaf vdev configuration suitable for
1031 	 * zpool_vdev_attach. Only enough data for the kernel to find
1032 	 * the device (i.e. type and disk device node path) are needed.
1033 	 */
1034 	nvroot = NULL;
1035 	newvd = NULL;
1036 
1037 	if (nvlist_alloc(&nvroot, NV_UNIQUE_NAME, 0) != 0
1038 	 || nvlist_alloc(&newvd, NV_UNIQUE_NAME, 0) != 0) {
1039 		syslog(LOG_ERR, "Replace vdev(%s/%s): Unable to allocate "
1040 		    "configuration data.", poolname, oldstr.c_str());
1041 		if (nvroot != NULL)
1042 			nvlist_free(nvroot);
1043 		return (false);
1044 	}
1045 	if (nvlist_add_string(newvd, ZPOOL_CONFIG_TYPE, vdev_type) != 0
1046 	 || nvlist_add_string(newvd, ZPOOL_CONFIG_PATH, path) != 0
1047 	 || nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE, VDEV_TYPE_ROOT) != 0
1048 	 || nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
1049 				    &newvd, 1) != 0) {
1050 		syslog(LOG_ERR, "Replace vdev(%s/%s): Unable to initialize "
1051 		    "configuration data.", poolname, oldstr.c_str());
1052 		nvlist_free(newvd);
1053 		nvlist_free(nvroot);
1054 		return (true);
1055 	}
1056 
1057 	/* Data was copied when added to the root vdev. */
1058 	nvlist_free(newvd);
1059 
1060 	retval = (zpool_vdev_attach(zhp, oldstr.c_str(), path, nvroot,
1061 	    /*replace*/B_TRUE) == 0);
1062 	if (retval)
1063 		syslog(LOG_INFO, "Replacing vdev(%s/%s) with %s\n",
1064 		    poolname, oldstr.c_str(), path);
1065 	else
1066 		syslog(LOG_ERR, "Replace vdev(%s/%s): %s: %s\n",
1067 		    poolname, oldstr.c_str(), libzfs_error_action(g_zfsHandle),
1068 		    libzfs_error_description(g_zfsHandle));
1069 	nvlist_free(nvroot);
1070 
1071 	return (retval);
1072 }
1073 
1074 /* Does the argument event refer to a checksum error? */
1075 static bool
1076 IsChecksumEvent(const Event* const event)
1077 {
1078 	return ("ereport.fs.zfs.checksum" == event->Value("type"));
1079 }
1080 
1081 /* Does the argument event refer to an IO error? */
1082 static bool
1083 IsIOEvent(const Event* const event)
1084 {
1085 	return ("ereport.fs.zfs.io" == event->Value("type"));
1086 }
1087 
1088 bool
1089 CaseFile::ShouldDegrade() const
1090 {
1091 	return (std::count_if(m_events.begin(), m_events.end(),
1092 			      IsChecksumEvent) > ZFS_DEGRADE_IO_COUNT);
1093 }
1094 
1095 bool
1096 CaseFile::ShouldFault() const
1097 {
1098 	return (std::count_if(m_events.begin(), m_events.end(),
1099 			      IsIOEvent) > ZFS_DEGRADE_IO_COUNT);
1100 }
1101 
1102 nvlist_t *
1103 CaseFile::CaseVdev(zpool_handle_t *zhp) const
1104 {
1105 	return (VdevIterator(zhp).Find(VdevGUID()));
1106 }
1107