xref: /freebsd/cddl/usr.sbin/zfsd/case_file.cc (revision 6aaaf7ba4bba5e01008924a61261b43a8356f591)
1 /*-
2  * Copyright (c) 2011, 2012, 2013, 2014, 2016 Spectra Logic Corporation
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions, and the following disclaimer,
10  *    without modification.
11  * 2. Redistributions in binary form must reproduce at minimum a disclaimer
12  *    substantially similar to the "NO WARRANTY" disclaimer below
13  *    ("Disclaimer") and any redistribution must be conditioned upon
14  *    including a substantially similar Disclaimer requirement for further
15  *    binary redistribution.
16  *
17  * NO WARRANTY
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
21  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22  * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
26  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
27  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28  * POSSIBILITY OF SUCH DAMAGES.
29  *
30  * Authors: Justin T. Gibbs     (Spectra Logic Corporation)
31  */
32 
33 /**
34  * \file case_file.cc
35  *
36  * We keep case files for any leaf vdev that is not in the optimal state.
37  * However, we only serialize to disk those events that need to be preserved
38  * across reboots.  For now, this is just a log of soft errors which we
39  * accumulate in order to mark a device as degraded.
40  */
41 #include <sys/cdefs.h>
42 #include <sys/byteorder.h>
43 #include <sys/time.h>
44 
45 #include <sys/fs/zfs.h>
46 
47 #include <dirent.h>
48 #include <fcntl.h>
49 #include <iomanip>
50 #include <fstream>
51 #include <functional>
52 #include <sstream>
53 #include <syslog.h>
54 #include <unistd.h>
55 
56 #include <libzutil.h>
57 #include <libzfs.h>
58 
59 #include <list>
60 #include <map>
61 #include <string>
62 #include <vector>
63 
64 #include <devdctl/guid.h>
65 #include <devdctl/event.h>
66 #include <devdctl/event_factory.h>
67 #include <devdctl/exception.h>
68 #include <devdctl/consumer.h>
69 
70 #include "callout.h"
71 #include "vdev_iterator.h"
72 #include "zfsd_event.h"
73 #include "case_file.h"
74 #include "vdev.h"
75 #include "zfsd.h"
76 #include "zfsd_exception.h"
77 #include "zpool_list.h"
78 /*============================ Namespace Control =============================*/
79 using std::hex;
80 using std::ifstream;
81 using std::stringstream;
82 using std::setfill;
83 using std::setw;
84 
85 using DevdCtl::Event;
86 using DevdCtl::EventFactory;
87 using DevdCtl::EventList;
88 using DevdCtl::Guid;
89 using DevdCtl::ParseException;
90 
91 /*--------------------------------- CaseFile ---------------------------------*/
92 //- CaseFile Static Data -------------------------------------------------------
93 
94 CaseFileList  CaseFile::s_activeCases;
95 const string  CaseFile::s_caseFilePath = "/var/db/zfsd/cases";
96 
97 //- CaseFile Static Public Methods ---------------------------------------------
98 CaseFile *
99 CaseFile::Find(Guid poolGUID, Guid vdevGUID)
100 {
101 	for (CaseFileList::iterator curCase = s_activeCases.begin();
102 	     curCase != s_activeCases.end(); curCase++) {
103 
104 		if (((*curCase)->PoolGUID() != poolGUID
105 		  && Guid::InvalidGuid() != poolGUID)
106 		 || (*curCase)->VdevGUID() != vdevGUID)
107 			continue;
108 
109 		/*
110 		 * We only carry one active case per-vdev.
111 		 */
112 		return (*curCase);
113 	}
114 	return (NULL);
115 }
116 
117 void
118 CaseFile::Find(Guid poolGUID, Guid vdevGUID, CaseFileList &cases)
119 {
120 	for (CaseFileList::iterator curCase = s_activeCases.begin();
121 	    curCase != s_activeCases.end(); curCase++) {
122 		if (((*curCase)->PoolGUID() != poolGUID &&
123 		    Guid::InvalidGuid() != poolGUID) ||
124 		    (*curCase)->VdevGUID() != vdevGUID)
125 			continue;
126 
127 		/*
128 		 * We can have multiple cases for spare vdevs
129 		 */
130 		cases.push_back(*curCase);
131 		if (!(*curCase)->IsSpare()) {
132 			return;
133 		}
134 	}
135 }
136 
137 CaseFile *
138 CaseFile::Find(const string &physPath)
139 {
140 	CaseFile *result = NULL;
141 
142 	for (CaseFileList::iterator curCase = s_activeCases.begin();
143 	     curCase != s_activeCases.end(); curCase++) {
144 
145 		if ((*curCase)->PhysicalPath() != physPath)
146 			continue;
147 
148 		if (result != NULL) {
149 			syslog(LOG_WARNING, "Multiple casefiles found for "
150 			    "physical path %s.  "
151 			    "This is most likely a bug in zfsd",
152 			    physPath.c_str());
153 		}
154 		result = *curCase;
155 	}
156 	return (result);
157 }
158 
159 
160 void
161 CaseFile::ReEvaluateByGuid(Guid poolGUID, const ZfsEvent &event)
162 {
163 	CaseFileList::iterator casefile;
164 	for (casefile = s_activeCases.begin(); casefile != s_activeCases.end();){
165 		CaseFileList::iterator next = casefile;
166 		next++;
167 		if (poolGUID == (*casefile)->PoolGUID())
168 			(*casefile)->ReEvaluate(event);
169 		casefile = next;
170 	}
171 }
172 
173 CaseFile &
174 CaseFile::Create(Vdev &vdev)
175 {
176 	CaseFile *activeCase;
177 
178 	activeCase = Find(vdev.PoolGUID(), vdev.GUID());
179 	if (activeCase == NULL)
180 		activeCase = new CaseFile(vdev);
181 
182 	return (*activeCase);
183 }
184 
185 void
186 CaseFile::DeSerialize()
187 {
188 	struct dirent **caseFiles;
189 
190 	int numCaseFiles(scandir(s_caseFilePath.c_str(), &caseFiles,
191 			 DeSerializeSelector, /*compar*/NULL));
192 
193 	if (numCaseFiles == -1)
194 		return;
195 	if (numCaseFiles == 0) {
196 		free(caseFiles);
197 		return;
198 	}
199 
200 	for (int i = 0; i < numCaseFiles; i++) {
201 
202 		DeSerializeFile(caseFiles[i]->d_name);
203 		free(caseFiles[i]);
204 	}
205 	free(caseFiles);
206 }
207 
208 bool
209 CaseFile::Empty()
210 {
211 	return (s_activeCases.empty());
212 }
213 
214 void
215 CaseFile::LogAll()
216 {
217 	for (CaseFileList::iterator curCase = s_activeCases.begin();
218 	     curCase != s_activeCases.end(); curCase++)
219 		(*curCase)->Log();
220 }
221 
222 void
223 CaseFile::PurgeAll()
224 {
225 	/*
226 	 * Serialize casefiles before deleting them so that they can be reread
227 	 * and revalidated during BuildCaseFiles.
228 	 * CaseFiles remove themselves from this list on destruction.
229 	 */
230 	while (s_activeCases.size() != 0) {
231 		CaseFile *casefile = s_activeCases.front();
232 		casefile->Serialize();
233 		delete casefile;
234 	}
235 
236 }
237 
238 int
239 CaseFile::IsSpare()
240 {
241 	return (m_is_spare);
242 }
243 
244 //- CaseFile Public Methods ----------------------------------------------------
245 bool
246 CaseFile::RefreshVdevState()
247 {
248 	ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID);
249 	zpool_handle_t *casePool(zpl.empty() ? NULL : zpl.front());
250 	if (casePool == NULL)
251 		return (false);
252 
253 	Vdev vd(casePool, CaseVdev(casePool));
254 	if (vd.DoesNotExist())
255 		return (false);
256 
257 	m_vdevState    = vd.State();
258 	m_vdevPhysPath = vd.PhysicalPath();
259 	m_vdevName = vd.Name(casePool, false);
260 	return (true);
261 }
262 
263 bool
264 CaseFile::ReEvaluate(const string &devPath, const string &physPath, Vdev *vdev)
265 {
266 	ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID);
267 	zpool_handle_t *pool(zpl.empty() ? NULL : zpl.front());
268 	int flags = ZFS_ONLINE_CHECKREMOVE | ZFS_ONLINE_UNSPARE;
269 
270 	if (pool == NULL || !RefreshVdevState()) {
271 		/*
272 		 * The pool or vdev for this case file is no longer
273 		 * part of the configuration.  This can happen
274 		 * if we process a device arrival notification
275 		 * before seeing the ZFS configuration change
276 		 * event.
277 		 */
278 		syslog(LOG_INFO,
279 		       "CaseFile::ReEvaluate(%s,%s) Pool/Vdev unconfigured.  "
280 		       "Closing\n",
281 		       PoolGUIDString().c_str(),
282 		       VdevGUIDString().c_str());
283 		Close();
284 
285 		/*
286 		 * Since this event was not used to close this
287 		 * case, do not report it as consumed.
288 		 */
289 		return (/*consumed*/false);
290 	}
291 
292 	if (VdevState() > VDEV_STATE_FAULTED) {
293 		/*
294 		 * For now, newly discovered devices only help for
295 		 * devices that are missing.  In the future, we might
296 		 * use a newly inserted spare to replace a degraded
297 		 * or faulted device.
298 		 */
299 		syslog(LOG_INFO, "CaseFile::ReEvaluate(%s,%s): Pool/Vdev ignored",
300 		    PoolGUIDString().c_str(), VdevGUIDString().c_str());
301 		return (/*consumed*/false);
302 	}
303 	if (VdevState() == VDEV_STATE_OFFLINE) {
304 		/*
305 		 * OFFLINE is an administrative decision.  No need for zfsd to
306 		 * do anything.
307 		 */
308 		syslog(LOG_INFO, "CaseFile::ReEvaluate(%s,%s): Pool/Vdev ignored",
309 		    PoolGUIDString().c_str(), VdevGUIDString().c_str());
310 		return (/*consumed*/false);
311 	}
312 
313 	if (vdev != NULL
314 	 && ( vdev->PoolGUID() == m_poolGUID
315 	   || vdev->PoolGUID() == Guid::InvalidGuid())
316 	 && vdev->GUID() == m_vdevGUID) {
317 
318 		if (IsSpare())
319 			flags |= ZFS_ONLINE_SPARE;
320 		if (zpool_vdev_online(pool, vdev->GUIDString().c_str(),
321 		    flags, &m_vdevState) != 0) {
322 			syslog(LOG_ERR,
323 			    "Failed to online vdev(%s/%s:%s): %s: %s\n",
324 			    zpool_get_name(pool), vdev->GUIDString().c_str(),
325 			    devPath.c_str(), libzfs_error_action(g_zfsHandle),
326 			    libzfs_error_description(g_zfsHandle));
327 			return (/*consumed*/false);
328 		}
329 
330 		syslog(LOG_INFO, "Onlined vdev(%s/%s:%s).  State now %s.\n",
331 		       zpool_get_name(pool), vdev->GUIDString().c_str(),
332 		       devPath.c_str(),
333 		       zpool_state_to_name(VdevState(), VDEV_AUX_NONE));
334 
335 		/*
336 		 * Check the vdev state post the online action to see
337 		 * if we can retire this case.
338 		 */
339 		CloseIfSolved();
340 
341 		return (/*consumed*/true);
342 	}
343 
344 	/*
345 	 * If the auto-replace policy is enabled, and we have physical
346 	 * path information, try a physical path replacement.
347 	 */
348 	if (zpool_get_prop_int(pool, ZPOOL_PROP_AUTOREPLACE, NULL) == 0) {
349 		syslog(LOG_INFO,
350 		       "CaseFile(%s:%s:%s): AutoReplace not set.  "
351 		       "Ignoring device insertion.\n",
352 		       PoolGUIDString().c_str(),
353 		       VdevGUIDString().c_str(),
354 		       zpool_state_to_name(VdevState(), VDEV_AUX_NONE));
355 		return (/*consumed*/false);
356 	}
357 
358 	if (PhysicalPath().empty()) {
359 		syslog(LOG_INFO,
360 		       "CaseFile(%s:%s:%s): No physical path information.  "
361 		       "Ignoring device insertion.\n",
362 		       PoolGUIDString().c_str(),
363 		       VdevGUIDString().c_str(),
364 		       zpool_state_to_name(VdevState(), VDEV_AUX_NONE));
365 		return (/*consumed*/false);
366 	}
367 
368 	if (physPath != PhysicalPath()) {
369 		syslog(LOG_INFO,
370 		       "CaseFile(%s:%s:%s): Physical path mismatch.  "
371 		       "Ignoring device insertion.\n",
372 		       PoolGUIDString().c_str(),
373 		       VdevGUIDString().c_str(),
374 		       zpool_state_to_name(VdevState(), VDEV_AUX_NONE));
375 		return (/*consumed*/false);
376 	}
377 
378 	/* Write a label on the newly inserted disk. */
379 	if (zpool_label_disk(g_zfsHandle, pool, devPath.c_str()) != 0) {
380 		syslog(LOG_ERR,
381 		       "Replace vdev(%s/%s) by physical path (label): %s: %s\n",
382 		       zpool_get_name(pool), VdevGUIDString().c_str(),
383 		       libzfs_error_action(g_zfsHandle),
384 		       libzfs_error_description(g_zfsHandle));
385 		return (/*consumed*/false);
386 	}
387 
388 	syslog(LOG_INFO, "CaseFile::ReEvaluate(%s/%s): Replacing with %s",
389 	    PoolGUIDString().c_str(), VdevGUIDString().c_str(),
390 	    devPath.c_str());
391 	return (Replace(VDEV_TYPE_DISK, devPath.c_str(), /*isspare*/false));
392 }
393 
394 bool
395 CaseFile::ReEvaluate(const ZfsEvent &event)
396 {
397 	bool consumed(false);
398 
399 	if (event.Value("type") == "sysevent.fs.zfs.vdev_remove") {
400 		/*
401 		 * The Vdev we represent has been removed from the
402 		 * configuration.  This case is no longer of value.
403 		 */
404 		Close();
405 
406 		return (/*consumed*/true);
407 	} else if (event.Value("type") == "sysevent.fs.zfs.pool_destroy") {
408 		/* This Pool has been destroyed.  Discard the case */
409 		Close();
410 
411 		return (/*consumed*/true);
412 	} else if (event.Value("type") == "sysevent.fs.zfs.config_sync") {
413 		RefreshVdevState();
414 		if (VdevState() < VDEV_STATE_HEALTHY &&
415 		    VdevState() != VDEV_STATE_OFFLINE)
416 			consumed = ActivateSpare();
417 	}
418 
419 
420 	if (event.Value("class") == "resource.fs.zfs.removed") {
421 		bool spare_activated;
422 
423 		if (!RefreshVdevState()) {
424 			/*
425 			 * The pool or vdev for this case file is no longer
426 			 * part of the configuration.  This can happen
427 			 * if we process a device arrival notification
428 			 * before seeing the ZFS configuration change
429 			 * event.
430 			 */
431 			syslog(LOG_INFO,
432 			       "CaseFile::ReEvaluate(%s,%s) Pool/Vdev "
433 			       "unconfigured.  Closing\n",
434 			       PoolGUIDString().c_str(),
435 			       VdevGUIDString().c_str());
436 			/*
437 			 * Close the case now so we won't waste cycles in the
438 			 * system rescan
439 			 */
440 			Close();
441 
442 			/*
443 			 * Since this event was not used to close this
444 			 * case, do not report it as consumed.
445 			 */
446 			return (/*consumed*/false);
447 		}
448 
449 		/*
450 		 * Discard any tentative I/O error events for
451 		 * this case.  They were most likely caused by the
452 		 * hot-unplug of this device.
453 		 */
454 		PurgeTentativeEvents();
455 
456 		/* Try to activate spares if they are available */
457 		spare_activated = ActivateSpare();
458 
459 		/*
460 		 * Rescan the drives in the system to see if a recent
461 		 * drive arrival can be used to solve this case.
462 		 */
463 		ZfsDaemon::RequestSystemRescan();
464 
465 		/*
466 		 * Consume the event if we successfully activated a spare.
467 		 * Otherwise, leave it in the unconsumed events list so that the
468 		 * future addition of a spare to this pool might be able to
469 		 * close the case
470 		 */
471 		consumed = spare_activated;
472 	} else if (event.Value("class") == "resource.fs.zfs.statechange") {
473 		RefreshVdevState();
474 		/*
475 		 * If this vdev is DEGRADED, FAULTED, or UNAVAIL, try to
476 		 * activate a hotspare.  Otherwise, ignore the event
477 		 */
478 		if (VdevState() == VDEV_STATE_FAULTED ||
479 		    VdevState() == VDEV_STATE_DEGRADED ||
480 		    VdevState() == VDEV_STATE_CANT_OPEN)
481 			(void) ActivateSpare();
482 		consumed = true;
483 	}
484 	else if (event.Value("class") == "ereport.fs.zfs.io" ||
485 	         event.Value("class") == "ereport.fs.zfs.checksum" ||
486 		 event.Value("class") == "ereport.fs.zfs.delay") {
487 
488 		m_tentativeEvents.push_front(event.DeepCopy());
489 		RegisterCallout(event);
490 		consumed = true;
491 	}
492 
493 	bool closed(CloseIfSolved());
494 
495 	return (consumed || closed);
496 }
497 
498 /* Find a Vdev containing the vdev with the given GUID */
499 static nvlist_t*
500 find_parent(nvlist_t *pool_config, nvlist_t *config, DevdCtl::Guid child_guid)
501 {
502 	nvlist_t **vdevChildren;
503 	int        error;
504 	unsigned   ch, numChildren;
505 
506 	error = nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_CHILDREN,
507 					   &vdevChildren, &numChildren);
508 
509 	if (error != 0 || numChildren == 0)
510 		return (NULL);
511 
512 	for (ch = 0; ch < numChildren; ch++) {
513 		nvlist *result;
514 		Vdev vdev(pool_config, vdevChildren[ch]);
515 
516 		if (vdev.GUID() == child_guid)
517 			return (config);
518 
519 		result = find_parent(pool_config, vdevChildren[ch], child_guid);
520 		if (result != NULL)
521 			return (result);
522 	}
523 
524 	return (NULL);
525 }
526 
527 /*
528  * Returns true if spare 'a' should be tried before spare 'b' when
529  * replacing a failed vdev with the given characteristics.
530  *
531  * Ordering criteria (most to least significant):
532  *  1. Distributed spare matching the failed vdev's dRAID is preferred
533  *     most (distributed spares rebuild faster than traditional spares).
534  *     Regular spares (no TOP_GUID) come next.  Non-matching distributed
535  *     spares are tried last, as the kernel will reject them anyway.
536  *  2. Matching rotational is preferred over mismatching.
537  *  3. Large enough is preferred over too small.
538  *  4. Smaller size is preferred over bigger (best fit).
539  */
540 static bool
541 spare_is_preferred(nvlist_t *a, nvlist_t *b, bool have_rotational,
542     uint64_t vdev_rotational, uint64_t vdev_size, uint64_t top_guid)
543 {
544 	uint64_t	 a_top, b_top, a_rotational, b_rotational;
545 	uint64_t	 a_size, b_size;
546 	uint64_t	*nvlist_array;
547 	int		 a_pri, b_pri;
548 	vdev_stat_t	*vs;
549 	uint_t		 c;
550 	bool		 a_ok, b_ok;
551 
552 	a_top = b_top = 0;
553 	(void) nvlist_lookup_uint64(a, ZPOOL_CONFIG_TOP_GUID, &a_top);
554 	(void) nvlist_lookup_uint64(b, ZPOOL_CONFIG_TOP_GUID, &b_top);
555 	a_pri = (a_top == 0) ? 1 :
556 	    (a_top == top_guid || top_guid == 0) ? 2 : 0;
557 	b_pri = (b_top == 0) ? 1 :
558 	    (b_top == top_guid || top_guid == 0) ? 2 : 0;
559 	if (a_pri != b_pri)
560 		return (a_pri > b_pri);
561 
562 	if (have_rotational) {
563 		a_rotational = b_rotational = 0;
564 		(void) nvlist_lookup_uint64(a,
565 		    ZPOOL_CONFIG_VDEV_ROTATIONAL, &a_rotational);
566 		(void) nvlist_lookup_uint64(b,
567 		    ZPOOL_CONFIG_VDEV_ROTATIONAL, &b_rotational);
568 		if ((a_rotational == vdev_rotational) !=
569 		    (b_rotational == vdev_rotational))
570 			return (a_rotational == vdev_rotational);
571 	}
572 
573 	a_size = b_size = 0;
574 	if (nvlist_lookup_uint64_array(a, ZPOOL_CONFIG_VDEV_STATS,
575 	    &nvlist_array, &c) == 0) {
576 		vs = reinterpret_cast<vdev_stat_t *>(nvlist_array);
577 		a_size = vs->vs_rsize;
578 	}
579 	if (nvlist_lookup_uint64_array(b, ZPOOL_CONFIG_VDEV_STATS,
580 	    &nvlist_array, &c) == 0) {
581 		vs = reinterpret_cast<vdev_stat_t *>(nvlist_array);
582 		b_size = vs->vs_rsize;
583 	}
584 	a_ok = (a_size >= vdev_size);
585 	b_ok = (b_size >= vdev_size);
586 	if (a_ok != b_ok)
587 		return (a_ok);
588 	return (a_size < b_size);
589 }
590 
591 bool
592 CaseFile::ActivateSpare() {
593 	nvlist_t	*config, *nvroot, *parent_config;
594 	nvlist_t	*vdev_config, **spares, *spare;
595 	uint64_t	*nvlist_array;
596 	const char	*devPath, *poolname, *vdev_type;
597 	uint64_t	 vdev_rotational, vdev_size, top_guid;
598 	vdev_stat_t	*vs;
599 	u_int		 nspares, i, key;
600 	uint_t		 nstats;
601 	int		 error, j;
602 	bool		 have_vdev_rotational;
603 
604 	ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID);
605 	zpool_handle_t	*zhp(zpl.empty() ? NULL : zpl.front());
606 	if (zhp == NULL) {
607 		syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not find pool "
608 		       "for pool_guid %" PRIu64".", (uint64_t)m_poolGUID);
609 		return (false);
610 	}
611 	poolname = zpool_get_name(zhp);
612 	config = zpool_get_config(zhp, NULL);
613 	if (config == NULL) {
614 		syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not find pool "
615 		       "config for pool %s", poolname);
616 		return (false);
617 	}
618 	error = nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvroot);
619 	if (error != 0){
620 		syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not find vdev "
621 		       "tree for pool %s", poolname);
622 		return (false);
623 	}
624 
625 	parent_config = find_parent(config, nvroot, m_vdevGUID);
626 	if (parent_config != NULL) {
627 		const char *parent_type;
628 
629 		/*
630 		 * Don't activate spares for members of a "replacing" vdev.
631 		 * They're already dealt with.  Sparing them will just drag out
632 		 * the resilver process.
633 		 */
634 		error = nvlist_lookup_string(parent_config,
635 		    ZPOOL_CONFIG_TYPE, &parent_type);
636 		if (error == 0 && strcmp(parent_type, VDEV_TYPE_REPLACING) == 0)
637 			return (false);
638 	}
639 
640 	/*
641 	 * Don't activate a spare if one is already working on this vdev.
642 	 */
643 	{
644 		Vdev replaced(BeingReplacedBy(zhp));
645 		if (!replaced.DoesNotExist() && (replaced.IsResilvering() ||
646 		    replaced.State() == VDEV_STATE_HEALTHY))
647 			return (false);
648 	}
649 
650 	nspares = 0;
651 	nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, &spares,
652 				   &nspares);
653 	if (nspares == 0) {
654 		/* The pool has no spares configured */
655 		syslog(LOG_INFO, "CaseFile::ActivateSpare: "
656 		       "No spares available for pool %s", poolname);
657 		return (false);
658 	}
659 
660 	/*
661 	 * Collect the failed vdev's parameters for optimal spare selection.
662 	 */
663 	vdev_rotational = vdev_size = top_guid = 0;
664 	have_vdev_rotational = false;
665 	vdev_config = VdevIterator(zhp).Find(m_vdevGUID);
666 	if (vdev_config != NULL) {
667 		have_vdev_rotational = (nvlist_lookup_uint64(vdev_config,
668 		    ZPOOL_CONFIG_VDEV_ROTATIONAL, &vdev_rotational) == 0);
669 		if (nvlist_lookup_uint64_array(vdev_config,
670 		    ZPOOL_CONFIG_VDEV_STATS, &nvlist_array, &nstats) == 0) {
671 			vs = reinterpret_cast<vdev_stat_t *>(nvlist_array);
672 			vdev_size = vs->vs_rsize;
673 		}
674 		(void) nvlist_lookup_uint64(vdev_config,
675 		    ZPOOL_CONFIG_TOP_GUID, &top_guid);
676 	}
677 
678 	/*
679 	 * Build a sorted index array over the spares, so that better
680 	 * candidates are tried first.
681 	 */
682 	std::vector<u_int> order(nspares);
683 	for (i = 0; i < nspares; i++)
684 		order[i] = i;
685 	for (i = 1; i < nspares; i++) {
686 		key = order[i];
687 		j = (int)i - 1;
688 		while (j >= 0 && spare_is_preferred(spares[key],
689 		    spares[order[j]], have_vdev_rotational, vdev_rotational,
690 		    vdev_size, top_guid)) {
691 			order[j + 1] = order[j];
692 			j--;
693 		}
694 		order[j + 1] = key;
695 	}
696 
697 	/*
698 	 * Try each spare in sorted order until one succeeds.
699 	 */
700 	for (i = 0; i < nspares; i++) {
701 		spare = spares[order[i]];
702 
703 		if (nvlist_lookup_uint64_array(spare,
704 		    ZPOOL_CONFIG_VDEV_STATS, &nvlist_array, &nstats) != 0) {
705 			syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not "
706 			       "find vdev stats for pool %s, spare %d",
707 			       poolname, order[i]);
708 			continue;
709 		}
710 		vs = reinterpret_cast<vdev_stat_t *>(nvlist_array);
711 
712 		if ((vs->vs_aux == VDEV_AUX_SPARED)
713 		 || (vs->vs_state != VDEV_STATE_HEALTHY))
714 			continue;
715 
716 		error = nvlist_lookup_string(spare, ZPOOL_CONFIG_PATH,
717 		    &devPath);
718 		if (error != 0) {
719 			syslog(LOG_ERR, "CaseFile::ActivateSpare: Cannot "
720 			       "determine the path of pool %s, spare %d. "
721 			       "Error %d", poolname, order[i], error);
722 			continue;
723 		}
724 
725 		error = nvlist_lookup_string(spare, ZPOOL_CONFIG_TYPE,
726 		    &vdev_type);
727 		if (error != 0) {
728 			syslog(LOG_ERR, "CaseFile::ActivateSpare: Cannot "
729 			       "determine the vdev type of pool %s, "
730 			       "spare %d. Error %d",
731 			       poolname, order[i], error);
732 			continue;
733 		}
734 
735 		if (Replace(vdev_type, devPath, /*isspare*/true))
736 			return (true);
737 	}
738 
739 	return (false);
740 }
741 
742 /* Does the argument event refer to a checksum error? */
743 static bool
744 IsChecksumEvent(const Event* const event)
745 {
746 	return ("ereport.fs.zfs.checksum" == event->Value("type"));
747 }
748 
749 /* Does the argument event refer to an IO error? */
750 static bool
751 IsIOEvent(const Event* const event)
752 {
753 	return ("ereport.fs.zfs.io" == event->Value("type"));
754 }
755 
756 /* Does the argument event refer to an IO delay? */
757 static bool
758 IsDelayEvent(const Event* const event)
759 {
760 	return ("ereport.fs.zfs.delay" == event->Value("type"));
761 }
762 
763 void
764 CaseFile::RegisterCallout(const Event &event)
765 {
766 	timeval now, countdown, elapsed, timestamp, zero, remaining;
767 	/**
768 	 * The time ZFSD waits before promoting a tentative event
769 	 * into a permanent event.
770 	 */
771 	int sec = -1;
772 	if (IsChecksumEvent(&event))
773 		sec = CaseFile::GetVdevProp(VDEV_PROP_CHECKSUM_T);
774 	else if (IsIOEvent(&event))
775 		sec = CaseFile::GetVdevProp(VDEV_PROP_IO_T);
776 	else if (IsDelayEvent(&event))
777 		sec = CaseFile::GetVdevProp(VDEV_PROP_SLOW_IO_T);
778 
779 	if (sec == -1)
780 		sec = 60; /* default */
781 
782 	timeval removeGracePeriod = {
783 	    sec, /*sec*/
784 	    0 /*usec*/
785 	};
786 
787 	gettimeofday(&now, 0);
788 	timestamp = event.GetTimestamp();
789 	timersub(&now, &timestamp, &elapsed);
790 	timersub(&removeGracePeriod, &elapsed, &countdown);
791 	/*
792 	 * If countdown is <= zero, Reset the timer to the
793 	 * smallest positive time value instead
794 	 */
795 	timerclear(&zero);
796 	if (timercmp(&countdown, &zero, <=)) {
797 		timerclear(&countdown);
798 		countdown.tv_usec = 1;
799 	}
800 
801 	remaining = m_tentativeTimer.TimeRemaining();
802 
803 	if (!m_tentativeTimer.IsPending()
804 	 || timercmp(&countdown, &remaining, <))
805 		m_tentativeTimer.Reset(countdown, OnGracePeriodEnded, this);
806 }
807 
808 
809 bool
810 CaseFile::CloseIfSolved()
811 {
812 	if (m_events.empty()
813 	 && m_tentativeEvents.empty()) {
814 
815 		/*
816 		 * We currently do not track or take actions on
817 		 * devices in the degraded or faulted state.
818 		 * Once we have support for spare pools, we'll
819 		 * retain these cases so that any spares added in
820 		 * the future can be applied to them.
821 		 */
822 		switch (VdevState()) {
823 		case VDEV_STATE_HEALTHY:
824 			/* No need to keep cases for healthy vdevs */
825 		case VDEV_STATE_OFFLINE:
826 			/*
827 			 * Offline is a deliberate administrative action.  zfsd
828 			 * doesn't need to do anything for this state.
829 			 */
830 			Close();
831 			return (true);
832 		case VDEV_STATE_REMOVED:
833 		case VDEV_STATE_CANT_OPEN:
834 			/*
835 			 * Keep open.  We may solve it with a newly inserted
836 			 * device.
837 			 */
838 		case VDEV_STATE_FAULTED:
839 		case VDEV_STATE_DEGRADED:
840 			/*
841 			 * Keep open.  We may solve it with the future
842 			 * addition of a spare to the pool
843 			 */
844 		case VDEV_STATE_UNKNOWN:
845 		case VDEV_STATE_CLOSED:
846 			/*
847 			 * Keep open?  This may not be the correct behavior,
848 			 * but it's what we've always done
849 			 */
850 			;
851 		}
852 
853 		/*
854 		 * Re-serialize the case in order to remove any
855 		 * previous event data.
856 		 */
857 		Serialize();
858 	}
859 
860 	return (false);
861 }
862 
863 void
864 CaseFile::Log()
865 {
866 	syslog(LOG_INFO, "CaseFile(%s,%s,%s)\n", PoolGUIDString().c_str(),
867 	       VdevGUIDString().c_str(), PhysicalPath().c_str());
868 	syslog(LOG_INFO, "\tVdev State = %s\n",
869 	       zpool_state_to_name(VdevState(), VDEV_AUX_NONE));
870 	if (m_tentativeEvents.size() != 0) {
871 		syslog(LOG_INFO, "\t=== Tentative Events ===\n");
872 		for (EventList::iterator event(m_tentativeEvents.begin());
873 		     event != m_tentativeEvents.end(); event++)
874 			(*event)->Log(LOG_INFO);
875 	}
876 	if (m_events.size() != 0) {
877 		syslog(LOG_INFO, "\t=== Events ===\n");
878 		for (EventList::iterator event(m_events.begin());
879 		     event != m_events.end(); event++)
880 			(*event)->Log(LOG_INFO);
881 	}
882 }
883 
884 //- CaseFile Static Protected Methods ------------------------------------------
885 void
886 CaseFile::OnGracePeriodEnded(void *arg)
887 {
888 	CaseFile &casefile(*static_cast<CaseFile *>(arg));
889 
890 	casefile.OnGracePeriodEnded();
891 }
892 
893 int
894 CaseFile::DeSerializeSelector(const struct dirent *dirEntry)
895 {
896 	uint64_t poolGUID;
897 	uint64_t vdevGUID;
898 
899 	if (dirEntry->d_type == DT_REG
900 	 && sscanf(dirEntry->d_name, "pool_%" PRIu64 "_vdev_%" PRIu64 ".case",
901 		   &poolGUID, &vdevGUID) == 2)
902 		return (1);
903 	return (0);
904 }
905 
906 void
907 CaseFile::DeSerializeFile(const char *fileName)
908 {
909 	string	  fullName(s_caseFilePath + '/' + fileName);
910 	CaseFile *existingCaseFile(NULL);
911 	CaseFile *caseFile(NULL);
912 
913 	try {
914 		uint64_t poolGUID;
915 		uint64_t vdevGUID;
916 		nvlist_t *vdevConf;
917 
918 		if (sscanf(fileName, "pool_%" PRIu64 "_vdev_%" PRIu64 ".case",
919 		       &poolGUID, &vdevGUID) != 2) {
920 			throw ZfsdException("CaseFile::DeSerialize: "
921 			    "Unintelligible CaseFile filename %s.\n", fileName);
922 		}
923 		existingCaseFile = Find(Guid(poolGUID), Guid(vdevGUID));
924 		if (existingCaseFile != NULL) {
925 			/*
926 			 * If the vdev is already degraded or faulted,
927 			 * there's no point in keeping the state around
928 			 * that we use to put a drive into the degraded
929 			 * state.  However, if the vdev is simply missing,
930 			 * preserve the case data in the hopes that it will
931 			 * return.
932 			 */
933 			caseFile = existingCaseFile;
934 			vdev_state curState(caseFile->VdevState());
935 			if (curState > VDEV_STATE_CANT_OPEN
936 			 && curState < VDEV_STATE_HEALTHY) {
937 				unlink(fileName);
938 				return;
939 			}
940 		} else {
941 			ZpoolList zpl(ZpoolList::ZpoolByGUID, &poolGUID);
942 			if (zpl.empty()
943 			 || (vdevConf = VdevIterator(zpl.front())
944 						    .Find(vdevGUID)) == NULL) {
945 				/*
946 				 * Either the pool no longer exists
947 				 * or this vdev is no longer a member of
948 				 * the pool.
949 				 */
950 				unlink(fullName.c_str());
951 				return;
952 			}
953 
954 			/*
955 			 * Any vdev we find that does not have a case file
956 			 * must be in the healthy state and thus worthy of
957 			 * continued SERD data tracking.
958 			 */
959 			caseFile = new CaseFile(Vdev(zpl.front(), vdevConf));
960 		}
961 
962 		ifstream caseStream(fullName.c_str());
963 		if (!caseStream)
964 			throw ZfsdException("CaseFile::DeSerialize: Unable to "
965 					    "read %s.\n", fileName);
966 
967 		caseFile->DeSerialize(caseStream);
968 	} catch (const ParseException &exp) {
969 
970 		exp.Log();
971 		if (caseFile != existingCaseFile)
972 			delete caseFile;
973 
974 		/*
975 		 * Since we can't parse the file, unlink it so we don't
976 		 * trip over it again.
977 		 */
978 		unlink(fileName);
979 	} catch (const ZfsdException &zfsException) {
980 
981 		zfsException.Log();
982 		if (caseFile != existingCaseFile)
983 			delete caseFile;
984 	}
985 }
986 
987 //- CaseFile Protected Methods -------------------------------------------------
988 CaseFile::CaseFile(const Vdev &vdev)
989  : m_poolGUID(vdev.PoolGUID()),
990    m_vdevGUID(vdev.GUID()),
991    m_vdevState(vdev.State()),
992    m_vdevPhysPath(vdev.PhysicalPath()),
993    m_is_spare(vdev.IsSpare())
994 {
995 	stringstream guidString;
996 
997 	guidString << m_vdevGUID;
998 	m_vdevGUIDString = guidString.str();
999 	guidString.str("");
1000 	guidString << m_poolGUID;
1001 	m_poolGUIDString = guidString.str();
1002 
1003 	ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID);
1004 	zpool_handle_t *zhp(zpl.empty() ? NULL : zpl.front());
1005 	m_vdevName = vdev.Name(zhp, false);
1006 
1007 	s_activeCases.push_back(this);
1008 
1009 	syslog(LOG_INFO, "Creating new CaseFile:\n");
1010 	Log();
1011 }
1012 
1013 CaseFile::~CaseFile()
1014 {
1015 	PurgeEvents();
1016 	PurgeTentativeEvents();
1017 	m_tentativeTimer.Stop();
1018 	s_activeCases.remove(this);
1019 }
1020 
1021 void
1022 CaseFile::PurgeEvents()
1023 {
1024 	for (EventList::iterator event(m_events.begin());
1025 	     event != m_events.end(); event++)
1026 		delete *event;
1027 
1028 	m_events.clear();
1029 }
1030 
1031 void
1032 CaseFile::PurgeTentativeEvents()
1033 {
1034 	for (EventList::iterator event(m_tentativeEvents.begin());
1035 	     event != m_tentativeEvents.end(); event++)
1036 		delete *event;
1037 
1038 	m_tentativeEvents.clear();
1039 }
1040 
1041 void
1042 CaseFile::SerializeEvList(const EventList events, int fd,
1043 		const char* prefix) const
1044 {
1045 	if (events.empty())
1046 		return;
1047 	for (EventList::const_iterator curEvent = events.begin();
1048 	     curEvent != events.end(); curEvent++) {
1049 		const string &eventString((*curEvent)->GetEventString());
1050 
1051 		// TODO: replace many write(2) calls with a single writev(2)
1052 		if (prefix)
1053 			write(fd, prefix, strlen(prefix));
1054 		write(fd, eventString.c_str(), eventString.length());
1055 	}
1056 }
1057 
1058 void
1059 CaseFile::Serialize()
1060 {
1061 	stringstream saveFile;
1062 
1063 	saveFile << setfill('0')
1064 		 << s_caseFilePath << "/"
1065 		 << "pool_" << PoolGUIDString()
1066 		 << "_vdev_" << VdevGUIDString()
1067 		 << ".case";
1068 
1069 	if (m_events.empty() && m_tentativeEvents.empty()) {
1070 		unlink(saveFile.str().c_str());
1071 		return;
1072 	}
1073 
1074 	int fd(open(saveFile.str().c_str(), O_CREAT|O_TRUNC|O_WRONLY, 0644));
1075 	if (fd == -1) {
1076 		syslog(LOG_ERR, "CaseFile::Serialize: Unable to open %s.\n",
1077 		       saveFile.str().c_str());
1078 		return;
1079 	}
1080 	SerializeEvList(m_events, fd);
1081 	SerializeEvList(m_tentativeEvents, fd, "tentative ");
1082 	close(fd);
1083 }
1084 
1085 /*
1086  * XXX: This method assumes that events may not contain embedded newlines.  If
1087  * ever events can contain embedded newlines, then CaseFile must switch
1088  * serialization formats
1089  */
1090 void
1091 CaseFile::DeSerialize(ifstream &caseStream)
1092 {
1093 	string	      evString;
1094 	const EventFactory &factory(ZfsDaemon::Get().GetFactory());
1095 
1096 	caseStream >> std::noskipws >> std::ws;
1097 	while (caseStream.good()) {
1098 		/*
1099 		 * Outline:
1100 		 * read the beginning of a line and check it for
1101 		 * "tentative".  If found, discard "tentative".
1102 		 * Create a new event
1103 		 * continue
1104 		 */
1105 		EventList* destEvents;
1106 		const string tentFlag("tentative ");
1107 		string line;
1108 		std::stringbuf lineBuf;
1109 
1110 		caseStream.get(lineBuf);
1111 		caseStream.ignore();  /*discard the newline character*/
1112 		line = lineBuf.str();
1113 		if (line.compare(0, tentFlag.size(), tentFlag) == 0) {
1114 			/* Discard "tentative" */
1115 			line.erase(0, tentFlag.size());
1116 			destEvents = &m_tentativeEvents;
1117 		} else {
1118 			destEvents = &m_events;
1119 		}
1120 		Event *event(Event::CreateEvent(factory, line));
1121 		if (event != NULL) {
1122 			destEvents->push_back(event);
1123 			RegisterCallout(*event);
1124 		}
1125 	}
1126 }
1127 
1128 void
1129 CaseFile::Close()
1130 {
1131 	/*
1132 	 * This case is no longer relevant.  Clean up our
1133 	 * serialization file, and delete the case.
1134 	 */
1135 	syslog(LOG_INFO, "CaseFile(%s,%s) closed - State %s\n",
1136 	       PoolGUIDString().c_str(), VdevGUIDString().c_str(),
1137 	       zpool_state_to_name(VdevState(), VDEV_AUX_NONE));
1138 
1139 	/*
1140 	 * Serialization of a Case with no event data, clears the
1141 	 * Serialization data for that event.
1142 	 */
1143 	PurgeEvents();
1144 	Serialize();
1145 
1146 	delete this;
1147 }
1148 
1149 void
1150 CaseFile::OnGracePeriodEnded()
1151 {
1152 	bool should_fault, should_degrade;
1153 	ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID);
1154 	zpool_handle_t *zhp(zpl.empty() ? NULL : zpl.front());
1155 
1156 	m_events.splice(m_events.begin(), m_tentativeEvents);
1157 	should_fault = ShouldFault();
1158 	should_degrade = ShouldDegrade();
1159 
1160 	if (should_fault || should_degrade) {
1161 		if (zhp == NULL
1162 		 || (VdevIterator(zhp).Find(m_vdevGUID)) == NULL) {
1163 			/*
1164 			 * Either the pool no longer exists
1165 			 * or this vdev is no longer a member of
1166 			 * the pool.
1167 			 */
1168 			Close();
1169 			return;
1170 		}
1171 
1172 	}
1173 
1174 	/* A fault condition has priority over a degrade condition */
1175 	if (ShouldFault()) {
1176 		/* Fault the vdev and close the case. */
1177 		if (zpool_vdev_fault(zhp, (uint64_t)m_vdevGUID,
1178 				       VDEV_AUX_ERR_EXCEEDED) == 0) {
1179 			syslog(LOG_INFO, "Faulting vdev(%s/%s)",
1180 			       PoolGUIDString().c_str(),
1181 			       VdevGUIDString().c_str());
1182 			Close();
1183 			return;
1184 		}
1185 		else {
1186 			syslog(LOG_ERR, "Fault vdev(%s/%s): %s: %s\n",
1187 			       PoolGUIDString().c_str(),
1188 			       VdevGUIDString().c_str(),
1189 			       libzfs_error_action(g_zfsHandle),
1190 			       libzfs_error_description(g_zfsHandle));
1191 		}
1192 	}
1193 	else if (ShouldDegrade()) {
1194 		/* Degrade the vdev and close the case. */
1195 		if (zpool_vdev_degrade(zhp, (uint64_t)m_vdevGUID,
1196 				       VDEV_AUX_ERR_EXCEEDED) == 0) {
1197 			syslog(LOG_INFO, "Degrading vdev(%s/%s)",
1198 			       PoolGUIDString().c_str(),
1199 			       VdevGUIDString().c_str());
1200 			Close();
1201 			return;
1202 		}
1203 		else {
1204 			syslog(LOG_ERR, "Degrade vdev(%s/%s): %s: %s\n",
1205 			       PoolGUIDString().c_str(),
1206 			       VdevGUIDString().c_str(),
1207 			       libzfs_error_action(g_zfsHandle),
1208 			       libzfs_error_description(g_zfsHandle));
1209 		}
1210 	}
1211 	Serialize();
1212 }
1213 
1214 Vdev
1215 CaseFile::BeingReplacedBy(zpool_handle_t *zhp) {
1216 	Vdev vd(zhp, CaseVdev(zhp));
1217 	std::list<Vdev> children;
1218 	std::list<Vdev>::iterator children_it;
1219 
1220 	Vdev parent(vd.Parent());
1221 	Vdev replacing(NonexistentVdev);
1222 
1223 	/*
1224 	 * To determine whether we are being replaced by another spare that
1225 	 * is still working, then make sure that it is currently spared and
1226 	 * that the spare is either resilvering or healthy.  If any of these
1227 	 * conditions fail, then we are not being replaced by a spare.
1228 	 *
1229 	 * If the spare is healthy, then the case file should be closed very
1230 	 * soon after this check.
1231 	 */
1232 	if (parent.DoesNotExist()
1233 	 || parent.Name(zhp, /*verbose*/false) != "spare")
1234 		return (NonexistentVdev);
1235 
1236 	children = parent.Children();
1237 	children_it = children.begin();
1238 	for (;children_it != children.end(); children_it++) {
1239 		Vdev child = *children_it;
1240 
1241 		/* Skip our vdev. */
1242 		if (child.GUID() == VdevGUID())
1243 			continue;
1244 		/*
1245 		 * Accept the first child that doesn't match our GUID, or
1246 		 * any resilvering/healthy device if one exists.
1247 		 */
1248 		if (replacing.DoesNotExist() || child.IsResilvering()
1249 		 || child.State() == VDEV_STATE_HEALTHY)
1250 			replacing = child;
1251 	}
1252 
1253 	return (replacing);
1254 }
1255 
1256 bool
1257 CaseFile::Replace(const char* vdev_type, const char* path, bool isspare) {
1258 	nvlist_t *nvroot, *newvd;
1259 	const char *poolname;
1260 	string oldstr(VdevGUIDString());
1261 	bool retval = true;
1262 
1263 	/* Figure out what pool we're working on */
1264 	ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID);
1265 	zpool_handle_t *zhp(zpl.empty() ? NULL : zpl.front());
1266 	if (zhp == NULL) {
1267 		syslog(LOG_ERR, "CaseFile::Replace: could not find pool for "
1268 		       "pool_guid %" PRIu64 ".", (uint64_t)m_poolGUID);
1269 		return (false);
1270 	}
1271 	poolname = zpool_get_name(zhp);
1272 	Vdev vd(zhp, CaseVdev(zhp));
1273 	Vdev replaced(BeingReplacedBy(zhp));
1274 
1275 	if (isspare && !vd.IsSpare() && !replaced.DoesNotExist()) {
1276 		/* If we are already being replaced by a working spare, pass. */
1277 		if (replaced.IsResilvering()
1278 		 || replaced.State() == VDEV_STATE_HEALTHY) {
1279 			syslog(LOG_INFO, "CaseFile::Replace(%s->%s): already "
1280 			    "replaced", VdevGUIDString().c_str(), path);
1281 			return (/*consumed*/false);
1282 		}
1283 		/*
1284 		 * If we have already been replaced by a spare, but that spare
1285 		 * is broken, we must spare the spare, not the original device.
1286 		 */
1287 		oldstr = replaced.GUIDString();
1288 		syslog(LOG_INFO, "CaseFile::Replace(%s->%s): sparing "
1289 		    "broken spare %s instead", VdevGUIDString().c_str(),
1290 		    path, oldstr.c_str());
1291 	}
1292 
1293 	/*
1294 	 * Build a root vdev/leaf vdev configuration suitable for
1295 	 * zpool_vdev_attach. Only enough data for the kernel to find
1296 	 * the device (i.e. type and disk device node path) are needed.
1297 	 */
1298 	nvroot = NULL;
1299 	newvd = NULL;
1300 
1301 	if (nvlist_alloc(&nvroot, NV_UNIQUE_NAME, 0) != 0
1302 	 || nvlist_alloc(&newvd, NV_UNIQUE_NAME, 0) != 0) {
1303 		syslog(LOG_ERR, "Replace vdev(%s/%s): Unable to allocate "
1304 		    "configuration data.", poolname, oldstr.c_str());
1305 		if (nvroot != NULL)
1306 			nvlist_free(nvroot);
1307 		return (false);
1308 	}
1309 	if (nvlist_add_string(newvd, ZPOOL_CONFIG_TYPE, vdev_type) != 0
1310 	 || nvlist_add_string(newvd, ZPOOL_CONFIG_PATH, path) != 0
1311 	 || nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE, VDEV_TYPE_ROOT) != 0
1312 	 || nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
1313 				    &newvd, 1) != 0) {
1314 		syslog(LOG_ERR, "Replace vdev(%s/%s): Unable to initialize "
1315 		    "configuration data.", poolname, oldstr.c_str());
1316 		nvlist_free(newvd);
1317 		nvlist_free(nvroot);
1318 		return (true);
1319 	}
1320 
1321 	/* Data was copied when added to the root vdev. */
1322 	nvlist_free(newvd);
1323 
1324 	/* Prefer sequential resilvering for distributed spares. */
1325 	retval = (zpool_vdev_attach(zhp, oldstr.c_str(), path, nvroot,
1326 	    /*replace*/B_TRUE,
1327 	    strcmp(vdev_type, VDEV_TYPE_DRAID_SPARE) == 0 ?
1328 	    B_TRUE : B_FALSE) == 0);
1329 	if (retval)
1330 		syslog(LOG_INFO, "Replacing vdev(%s/%s) with %s\n",
1331 		    poolname, oldstr.c_str(), path);
1332 	else
1333 		syslog(LOG_ERR, "Replace vdev(%s/%s): %s: %s\n",
1334 		    poolname, oldstr.c_str(), libzfs_error_action(g_zfsHandle),
1335 		    libzfs_error_description(g_zfsHandle));
1336 	nvlist_free(nvroot);
1337 
1338 	return (retval);
1339 }
1340 
1341 /* Lookup the vdev prop. Used for checksum, IO, or slow IO props */
1342 int
1343 CaseFile::GetVdevProp(vdev_prop_t vdev_prop) const
1344 {
1345 	char val[ZFS_MAXPROPLEN];
1346 	zprop_source_t srctype;
1347 	DevdCtl::Guid poolGUID = PoolGUID();
1348 	ZpoolList zpl(ZpoolList::ZpoolByGUID, &poolGUID);
1349 	zpool_handle_t *zhp(zpl.empty() ? NULL : zpl.front());
1350 
1351 	char *prop_str = (char *) vdev_prop_to_name(vdev_prop);
1352 	if (zhp == NULL || zpool_get_vdev_prop(zhp, m_vdevName.c_str(),
1353 	    vdev_prop, prop_str, val, sizeof (val), &srctype, B_FALSE) != 0)
1354 		return (-1);
1355 
1356 	/* we'll get "-" from libzfs for a prop that is not set */
1357 	if (zfs_isnumber(val) == B_FALSE)
1358 		return (-1);
1359 
1360 	return (atoi(val));
1361 }
1362 
1363 bool
1364 CaseFile::ShouldDegrade() const
1365 {
1366 	int checksum_n = GetVdevProp(VDEV_PROP_CHECKSUM_N);
1367 	if (checksum_n == -1)
1368 		checksum_n = DEFAULT_ZFS_DEGRADE_IO_COUNT;
1369 	return (std::count_if(m_events.begin(), m_events.end(),
1370 			      IsChecksumEvent) > checksum_n);
1371 }
1372 
1373 bool
1374 CaseFile::ShouldFault() const
1375 {
1376 	bool should_fault_for_io, should_fault_for_delay;
1377 	int io_n = GetVdevProp(VDEV_PROP_IO_N);
1378 	int slow_io_n = GetVdevProp(VDEV_PROP_SLOW_IO_N);
1379 
1380 	if (io_n == -1)
1381 		io_n = DEFAULT_ZFS_DEGRADE_IO_COUNT;
1382 	if (slow_io_n == -1)
1383 		slow_io_n = DEFAULT_ZFS_FAULT_SLOW_IO_COUNT;
1384 
1385 	should_fault_for_io = std::count_if(m_events.begin(), m_events.end(),
1386 			      IsIOEvent) > io_n;
1387 	should_fault_for_delay = std::count_if(m_events.begin(), m_events.end(),
1388 			      IsDelayEvent) > slow_io_n;
1389 
1390 	return (should_fault_for_io || should_fault_for_delay);
1391 }
1392 
1393 nvlist_t *
1394 CaseFile::CaseVdev(zpool_handle_t *zhp) const
1395 {
1396 	return (VdevIterator(zhp).Find(VdevGUID()));
1397 }
1398