xref: /freebsd/cddl/usr.sbin/zfsd/zfsd_event.cc (revision 90ec6a30353aa7caaf995ea50e2e23aa5a099600)
1 /*-
2  * Copyright (c) 2011, 2012, 2013, 2014, 2016 Spectra Logic Corporation
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions, and the following disclaimer,
10  *    without modification.
11  * 2. Redistributions in binary form must reproduce at minimum a disclaimer
12  *    substantially similar to the "NO WARRANTY" disclaimer below
13  *    ("Disclaimer") and any redistribution must be conditioned upon
14  *    including a substantially similar Disclaimer requirement for further
15  *    binary redistribution.
16  *
17  * NO WARRANTY
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
21  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22  * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
26  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
27  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28  * POSSIBILITY OF SUCH DAMAGES.
29  *
30  * Authors: Justin T. Gibbs     (Spectra Logic Corporation)
31  */
32 
33 /**
34  * \file zfsd_event.cc
35  */
36 #include <sys/cdefs.h>
37 #include <sys/byteorder.h>
38 #include <sys/time.h>
39 #include <sys/fs/zfs.h>
40 #include <sys/vdev_impl.h>
41 
42 #include <syslog.h>
43 
44 #include <libzfs.h>
45 #include <libzutil.h>
46 /*
47  * Undefine flush, defined by cpufunc.h on sparc64, because it conflicts with
48  * C++ flush methods
49  */
50 #undef   flush
51 #undef	__init
52 #include <list>
53 #include <map>
54 #include <sstream>
55 #include <string>
56 
57 #include <devdctl/guid.h>
58 #include <devdctl/event.h>
59 #include <devdctl/event_factory.h>
60 #include <devdctl/exception.h>
61 #include <devdctl/consumer.h>
62 
63 #include "callout.h"
64 #include "vdev_iterator.h"
65 #include "zfsd_event.h"
66 #include "case_file.h"
67 #include "vdev.h"
68 #include "zfsd.h"
69 #include "zfsd_exception.h"
70 #include "zpool_list.h"
71 
72 __FBSDID("$FreeBSD$");
73 /*============================ Namespace Control =============================*/
74 using DevdCtl::Event;
75 using DevdCtl::Guid;
76 using DevdCtl::NVPairMap;
77 using std::stringstream;
78 
79 /*=========================== Class Implementations ==========================*/
80 
81 /*-------------------------------- GeomEvent --------------------------------*/
82 
83 //- GeomEvent Static Public Methods -------------------------------------------
84 Event *
85 GeomEvent::Builder(Event::Type type,
86 		   NVPairMap &nvPairs,
87 		   const string &eventString)
88 {
89 	return (new GeomEvent(type, nvPairs, eventString));
90 }
91 
92 //- GeomEvent Virtual Public Methods ------------------------------------------
93 Event *
94 GeomEvent::DeepCopy() const
95 {
96 	return (new GeomEvent(*this));
97 }
98 
99 bool
100 GeomEvent::Process() const
101 {
102 	/*
103 	 * We only use GEOM events to repair damaged pools.  So return early if
104 	 * there are no damaged pools
105 	 */
106 	if (CaseFile::Empty())
107 		return (false);
108 
109 	/*
110 	 * We are only concerned with arrivals and physical path changes,
111 	 * because those can be used to satisfy online and autoreplace
112 	 * operations
113 	 */
114 	if (Value("type") != "GEOM::physpath" && Value("type") != "CREATE")
115 		return (false);
116 
117 	/* Log the event since it is of interest. */
118 	Log(LOG_INFO);
119 
120 	string devPath;
121 	if (!DevPath(devPath))
122 		return (false);
123 
124 	int devFd(open(devPath.c_str(), O_RDONLY));
125 	if (devFd == -1)
126 		return (false);
127 
128 	bool inUse;
129 	bool degraded;
130 	nvlist_t *devLabel(ReadLabel(devFd, inUse, degraded));
131 
132 	string physPath;
133         bool havePhysPath(PhysicalPath(physPath));
134 
135 	string devName;
136 	DevName(devName);
137 	close(devFd);
138 
139 	if (inUse && devLabel != NULL) {
140 		OnlineByLabel(devPath, physPath, devLabel);
141 	} else if (degraded) {
142 		syslog(LOG_INFO, "%s is marked degraded.  Ignoring "
143 		       "as a replace by physical path candidate.\n",
144 		       devName.c_str());
145 	} else if (havePhysPath) {
146 		/*
147 		 * TODO: attempt to resolve events using every casefile
148 		 * that matches this physpath
149 		 */
150 		CaseFile *caseFile(CaseFile::Find(physPath));
151 		if (caseFile != NULL) {
152 			syslog(LOG_INFO,
153 			       "Found CaseFile(%s:%s:%s) - ReEvaluating\n",
154 			       caseFile->PoolGUIDString().c_str(),
155 			       caseFile->VdevGUIDString().c_str(),
156 			       zpool_state_to_name(caseFile->VdevState(),
157 						   VDEV_AUX_NONE));
158 			caseFile->ReEvaluate(devPath, physPath, /*vdev*/NULL);
159 		}
160 	}
161 	return (false);
162 }
163 
164 //- GeomEvent Protected Methods -----------------------------------------------
165 GeomEvent::GeomEvent(Event::Type type, NVPairMap &nvpairs,
166 			       const string &eventString)
167  : DevdCtl::GeomEvent(type, nvpairs, eventString)
168 {
169 }
170 
171 GeomEvent::GeomEvent(const GeomEvent &src)
172  : DevdCtl::GeomEvent::GeomEvent(src)
173 {
174 }
175 
176 nvlist_t *
177 GeomEvent::ReadLabel(int devFd, bool &inUse, bool &degraded)
178 {
179 	pool_state_t poolState;
180 	char        *poolName;
181 	boolean_t    b_inuse;
182 	int          nlabels;
183 
184 	inUse    = false;
185 	degraded = false;
186 	poolName = NULL;
187 	if (zpool_in_use(g_zfsHandle, devFd, &poolState,
188 			 &poolName, &b_inuse) == 0) {
189 		nvlist_t *devLabel = NULL;
190 
191 		inUse = b_inuse == B_TRUE;
192 		if (poolName != NULL)
193 			free(poolName);
194 
195 		if (zpool_read_label(devFd, &devLabel, &nlabels) != 0)
196 			return (NULL);
197 		/*
198 		 * If we find a disk with fewer than the maximum number of
199 		 * labels, it might be the whole disk of a partitioned disk
200 		 * where ZFS resides on a partition.  In that case, we should do
201 		 * nothing and wait for the partition to appear.  Or, the disk
202 		 * might be damaged.  In that case, zfsd should do nothing and
203 		 * wait for the sysadmin to decide.
204 		 */
205 		if (nlabels != VDEV_LABELS || devLabel == NULL) {
206 			nvlist_free(devLabel);
207 			return (NULL);
208 		}
209 
210 		try {
211 			Vdev vdev(devLabel);
212 			degraded = vdev.State() != VDEV_STATE_HEALTHY;
213 			return (devLabel);
214 		} catch (ZfsdException &exp) {
215 			string devName = fdevname(devFd);
216 			string devPath = _PATH_DEV + devName;
217 			string context("GeomEvent::ReadLabel: "
218 				     + devPath + ": ");
219 
220 			exp.GetString().insert(0, context);
221 			exp.Log();
222 			nvlist_free(devLabel);
223 		}
224 	}
225 	return (NULL);
226 }
227 
228 bool
229 GeomEvent::OnlineByLabel(const string &devPath, const string& physPath,
230 			      nvlist_t *devConfig)
231 {
232 	try {
233 		/*
234 		 * A device with ZFS label information has been
235 		 * inserted.  If it matches a device for which we
236 		 * have a case, see if we can solve that case.
237 		 */
238 		syslog(LOG_INFO, "Interrogating VDEV label for %s\n",
239 		       devPath.c_str());
240 		Vdev vdev(devConfig);
241 		CaseFile *caseFile(CaseFile::Find(vdev.PoolGUID(),
242 						  vdev.GUID()));
243 		if (caseFile != NULL)
244 			return (caseFile->ReEvaluate(devPath, physPath, &vdev));
245 
246 	} catch (ZfsdException &exp) {
247 		string context("GeomEvent::OnlineByLabel: " + devPath + ": ");
248 
249 		exp.GetString().insert(0, context);
250 		exp.Log();
251 	}
252 	return (false);
253 }
254 
255 
256 /*--------------------------------- ZfsEvent ---------------------------------*/
257 //- ZfsEvent Static Public Methods ---------------------------------------------
258 DevdCtl::Event *
259 ZfsEvent::Builder(Event::Type type, NVPairMap &nvpairs,
260 		  const string &eventString)
261 {
262 	return (new ZfsEvent(type, nvpairs, eventString));
263 }
264 
265 //- ZfsEvent Virtual Public Methods --------------------------------------------
266 Event *
267 ZfsEvent::DeepCopy() const
268 {
269 	return (new ZfsEvent(*this));
270 }
271 
272 bool
273 ZfsEvent::Process() const
274 {
275 	string logstr("");
276 
277 	if (!Contains("class") && !Contains("type")) {
278 		syslog(LOG_ERR,
279 		       "ZfsEvent::Process: Missing class or type data.");
280 		return (false);
281 	}
282 
283 	/* On config syncs, replay any queued events first. */
284 	if (Value("type").find("misc.fs.zfs.config_sync") == 0) {
285 		/*
286 		 * Even if saved events are unconsumed the second time
287 		 * around, drop them.  Any events that still can't be
288 		 * consumed are probably referring to vdevs or pools that
289 		 * no longer exist.
290 		 */
291 		ZfsDaemon::Get().ReplayUnconsumedEvents(/*discard*/true);
292 		CaseFile::ReEvaluateByGuid(PoolGUID(), *this);
293 	}
294 
295 	if (Value("type").find("misc.fs.zfs.") == 0) {
296 		/* Configuration changes, resilver events, etc. */
297 		ProcessPoolEvent();
298 		return (false);
299 	}
300 
301 	if (!Contains("pool_guid") || !Contains("vdev_guid")) {
302 		/* Only currently interested in Vdev related events. */
303 		return (false);
304 	}
305 
306 	CaseFile *caseFile(CaseFile::Find(PoolGUID(), VdevGUID()));
307 	if (caseFile != NULL) {
308 		Log(LOG_INFO);
309 		syslog(LOG_INFO, "Evaluating existing case file\n");
310 		caseFile->ReEvaluate(*this);
311 		return (false);
312 	}
313 
314 	/* Skip events that can't be handled. */
315 	Guid poolGUID(PoolGUID());
316 	/* If there are no replicas for a pool, then it's not manageable. */
317 	if (Value("class").find("fs.zfs.vdev.no_replicas") == 0) {
318 		stringstream msg;
319 		msg << "No replicas available for pool "  << poolGUID;
320 		msg << ", ignoring";
321 		Log(LOG_INFO);
322 		syslog(LOG_INFO, "%s", msg.str().c_str());
323 		return (false);
324 	}
325 
326 	/*
327 	 * Create a case file for this vdev, and have it
328 	 * evaluate the event.
329 	 */
330 	ZpoolList zpl(ZpoolList::ZpoolByGUID, &poolGUID);
331 	if (zpl.empty()) {
332 		stringstream msg;
333 		int priority = LOG_INFO;
334 		msg << "ZfsEvent::Process: Event for unknown pool ";
335 		msg << poolGUID << " ";
336 		msg << "queued";
337 		Log(LOG_INFO);
338 		syslog(priority, "%s", msg.str().c_str());
339 		return (true);
340 	}
341 
342 	nvlist_t *vdevConfig = VdevIterator(zpl.front()).Find(VdevGUID());
343 	if (vdevConfig == NULL) {
344 		stringstream msg;
345 		int priority = LOG_INFO;
346 		msg << "ZfsEvent::Process: Event for unknown vdev ";
347 		msg << VdevGUID() << " ";
348 		msg << "queued";
349 		Log(LOG_INFO);
350 		syslog(priority, "%s", msg.str().c_str());
351 		return (true);
352 	}
353 
354 	Vdev vdev(zpl.front(), vdevConfig);
355 	caseFile = &CaseFile::Create(vdev);
356 	if (caseFile->ReEvaluate(*this) == false) {
357 		stringstream msg;
358 		int priority = LOG_INFO;
359 		msg << "ZfsEvent::Process: Unconsumed event for vdev(";
360 		msg << zpool_get_name(zpl.front()) << ",";
361 		msg << vdev.GUID() << ") ";
362 		msg << "queued";
363 		Log(LOG_INFO);
364 		syslog(priority, "%s", msg.str().c_str());
365 		return (true);
366 	}
367 	return (false);
368 }
369 
370 //- ZfsEvent Protected Methods -------------------------------------------------
371 ZfsEvent::ZfsEvent(Event::Type type, NVPairMap &nvpairs,
372 			   const string &eventString)
373  : DevdCtl::ZfsEvent(type, nvpairs, eventString)
374 {
375 }
376 
377 ZfsEvent::ZfsEvent(const ZfsEvent &src)
378  : DevdCtl::ZfsEvent(src)
379 {
380 }
381 
382 /*
383  * Sometimes the kernel won't detach a spare when it is no longer needed.  This
384  * can happen for example if a drive is removed, then either the pool is
385  * exported or the machine is powered off, then the drive is reinserted, then
386  * the machine is powered on or the pool is imported.  ZFSD must detach these
387  * spares itself.
388  */
389 void
390 ZfsEvent::CleanupSpares() const
391 {
392 	Guid poolGUID(PoolGUID());
393 	ZpoolList zpl(ZpoolList::ZpoolByGUID, &poolGUID);
394 	if (!zpl.empty()) {
395 		zpool_handle_t* hdl;
396 
397 		hdl = zpl.front();
398 		VdevIterator(hdl).Each(TryDetach, (void*)hdl);
399 	}
400 }
401 
402 void
403 ZfsEvent::ProcessPoolEvent() const
404 {
405 	bool degradedDevice(false);
406 
407 	/* The pool is destroyed.  Discard any open cases */
408 	if (Value("type") == "misc.fs.zfs.pool_destroy") {
409 		Log(LOG_INFO);
410 		CaseFile::ReEvaluateByGuid(PoolGUID(), *this);
411 		return;
412 	}
413 
414 	CaseFile *caseFile(CaseFile::Find(PoolGUID(), VdevGUID()));
415 	if (caseFile != NULL) {
416 		if (caseFile->VdevState() != VDEV_STATE_UNKNOWN
417 		 && caseFile->VdevState() < VDEV_STATE_HEALTHY)
418 			degradedDevice = true;
419 
420 		Log(LOG_INFO);
421 		caseFile->ReEvaluate(*this);
422 	}
423 	else if (Value("type") == "misc.fs.zfs.resilver_finish")
424 	{
425 		/*
426 		 * It's possible to get a resilver_finish event with no
427 		 * corresponding casefile.  For example, if a damaged pool were
428 		 * exported, repaired, then reimported.
429 		 */
430 		Log(LOG_INFO);
431 		CleanupSpares();
432 	}
433 
434 	if (Value("type") == "misc.fs.zfs.vdev_remove"
435 	 && degradedDevice == false) {
436 
437 		/* See if any other cases can make use of this device. */
438 		Log(LOG_INFO);
439 		ZfsDaemon::RequestSystemRescan();
440 	}
441 }
442 
443 bool
444 ZfsEvent::TryDetach(Vdev &vdev, void *cbArg)
445 {
446 	/*
447 	 * Outline:
448 	 * if this device is a spare, and its parent includes one healthy,
449 	 * non-spare child, then detach this device.
450 	 */
451 	zpool_handle_t *hdl(static_cast<zpool_handle_t*>(cbArg));
452 
453 	if (vdev.IsSpare()) {
454 		std::list<Vdev> siblings;
455 		std::list<Vdev>::iterator siblings_it;
456 		boolean_t cleanup = B_FALSE;
457 
458 		Vdev parent = vdev.Parent();
459 		siblings = parent.Children();
460 
461 		/* Determine whether the parent should be cleaned up */
462 		for (siblings_it = siblings.begin();
463 		     siblings_it != siblings.end();
464 		     siblings_it++) {
465 			Vdev sibling = *siblings_it;
466 
467 			if (!sibling.IsSpare() &&
468 			     sibling.State() == VDEV_STATE_HEALTHY) {
469 				cleanup = B_TRUE;
470 				break;
471 			}
472 		}
473 
474 		if (cleanup) {
475 			syslog(LOG_INFO, "Detaching spare vdev %s from pool %s",
476 			       vdev.Path().c_str(), zpool_get_name(hdl));
477 			zpool_vdev_detach(hdl, vdev.Path().c_str());
478 		}
479 
480 	}
481 
482 	/* Always return false, because there may be other spares to detach */
483 	return (false);
484 }
485