1 /*-
2 * Copyright (c) 2011, 2012, 2013, 2014, 2016 Spectra Logic Corporation
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions, and the following disclaimer,
10 * without modification.
11 * 2. Redistributions in binary form must reproduce at minimum a disclaimer
12 * substantially similar to the "NO WARRANTY" disclaimer below
13 * ("Disclaimer") and any redistribution must be conditioned upon
14 * including a substantially similar Disclaimer requirement for further
15 * binary redistribution.
16 *
17 * NO WARRANTY
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
26 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
27 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28 * POSSIBILITY OF SUCH DAMAGES.
29 *
30 * Authors: Justin T. Gibbs (Spectra Logic Corporation)
31 */
32
33 /**
34 * \file zfsd_event.cc
35 */
36 #include <sys/cdefs.h>
37 #include <sys/byteorder.h>
38 #include <sys/time.h>
39 #include <sys/fs/zfs.h>
40 #include <sys/vdev_impl.h>
41
42 #include <syslog.h>
43
44 #include <libzfs.h>
45 #include <libzutil.h>
46 /*
47 * Undefine flush, defined by cpufunc.h on sparc64, because it conflicts with
48 * C++ flush methods
49 */
50 #undef flush
51 #undef __init
52 #include <list>
53 #include <map>
54 #include <sstream>
55 #include <string>
56
57 #include <devdctl/guid.h>
58 #include <devdctl/event.h>
59 #include <devdctl/event_factory.h>
60 #include <devdctl/exception.h>
61 #include <devdctl/consumer.h>
62
63 #include "callout.h"
64 #include "vdev_iterator.h"
65 #include "zfsd_event.h"
66 #include "case_file.h"
67 #include "vdev.h"
68 #include "zfsd.h"
69 #include "zfsd_exception.h"
70 #include "zpool_list.h"
71 /*============================ Namespace Control =============================*/
72 using DevdCtl::Event;
73 using DevdCtl::Guid;
74 using DevdCtl::NVPairMap;
75 using std::stringstream;
76
77 /*=========================== Class Implementations ==========================*/
78
79 /*-------------------------------- GeomEvent --------------------------------*/
80
81 //- GeomEvent Static Public Methods -------------------------------------------
82 Event *
Builder(Event::Type type,NVPairMap & nvPairs,const string & eventString)83 GeomEvent::Builder(Event::Type type,
84 NVPairMap &nvPairs,
85 const string &eventString)
86 {
87 return (new GeomEvent(type, nvPairs, eventString));
88 }
89
90 //- GeomEvent Virtual Public Methods ------------------------------------------
91 Event *
DeepCopy() const92 GeomEvent::DeepCopy() const
93 {
94 return (new GeomEvent(*this));
95 }
96
97 bool
Process() const98 GeomEvent::Process() const
99 {
100 /*
101 * We only use GEOM events to repair damaged pools. So return early if
102 * there are no damaged pools
103 */
104 if (CaseFile::Empty())
105 return (false);
106
107 /*
108 * We are only concerned with arrivals and physical path changes,
109 * because those can be used to satisfy online and autoreplace
110 * operations
111 */
112 if (Value("type") != "GEOM::physpath" && Value("type") != "CREATE")
113 return (false);
114
115 /* Log the event since it is of interest. */
116 Log(LOG_INFO);
117
118 string devPath;
119 if (!DevPath(devPath))
120 return (false);
121
122 int devFd(open(devPath.c_str(), O_RDONLY));
123 if (devFd == -1)
124 return (false);
125
126 bool inUse;
127 bool degraded;
128 nvlist_t *devLabel(ReadLabel(devFd, inUse, degraded));
129
130 string physPath;
131 bool havePhysPath(PhysicalPath(physPath));
132
133 string devName;
134 DevName(devName);
135 close(devFd);
136
137 if (inUse && devLabel != NULL) {
138 OnlineByLabel(devPath, physPath, devLabel);
139 } else if (degraded) {
140 syslog(LOG_INFO, "%s is marked degraded. Ignoring "
141 "as a replace by physical path candidate.\n",
142 devName.c_str());
143 } else if (havePhysPath) {
144 /*
145 * TODO: attempt to resolve events using every casefile
146 * that matches this physpath
147 */
148 CaseFile *caseFile(CaseFile::Find(physPath));
149 if (caseFile != NULL) {
150 syslog(LOG_INFO,
151 "Found CaseFile(%s:%s:%s) - ReEvaluating\n",
152 caseFile->PoolGUIDString().c_str(),
153 caseFile->VdevGUIDString().c_str(),
154 zpool_state_to_name(caseFile->VdevState(),
155 VDEV_AUX_NONE));
156 caseFile->ReEvaluate(devPath, physPath, /*vdev*/NULL);
157 }
158 }
159 return (false);
160 }
161
162 //- GeomEvent Protected Methods -----------------------------------------------
GeomEvent(Event::Type type,NVPairMap & nvpairs,const string & eventString)163 GeomEvent::GeomEvent(Event::Type type, NVPairMap &nvpairs,
164 const string &eventString)
165 : DevdCtl::GeomEvent(type, nvpairs, eventString)
166 {
167 }
168
GeomEvent(const GeomEvent & src)169 GeomEvent::GeomEvent(const GeomEvent &src)
170 : DevdCtl::GeomEvent::GeomEvent(src)
171 {
172 }
173
174 nvlist_t *
ReadLabel(int devFd,bool & inUse,bool & degraded)175 GeomEvent::ReadLabel(int devFd, bool &inUse, bool °raded)
176 {
177 pool_state_t poolState;
178 char *poolName;
179 boolean_t b_inuse;
180 int nlabels;
181
182 inUse = false;
183 degraded = false;
184 poolName = NULL;
185 if (zpool_in_use(g_zfsHandle, devFd, &poolState,
186 &poolName, &b_inuse) == 0) {
187 nvlist_t *devLabel = NULL;
188
189 inUse = b_inuse == B_TRUE;
190 if (poolName != NULL)
191 free(poolName);
192
193 if (zpool_read_label(devFd, &devLabel, &nlabels) != 0)
194 return (NULL);
195 /*
196 * If we find a disk with fewer than the maximum number of
197 * labels, it might be the whole disk of a partitioned disk
198 * where ZFS resides on a partition. In that case, we should do
199 * nothing and wait for the partition to appear. Or, the disk
200 * might be damaged. In that case, zfsd should do nothing and
201 * wait for the sysadmin to decide.
202 */
203 if (nlabels != VDEV_LABELS || devLabel == NULL) {
204 nvlist_free(devLabel);
205 return (NULL);
206 }
207
208 try {
209 Vdev vdev(devLabel);
210 degraded = vdev.State() != VDEV_STATE_HEALTHY;
211 return (devLabel);
212 } catch (ZfsdException &exp) {
213 string devName = fdevname(devFd);
214 string devPath = _PATH_DEV + devName;
215 string context("GeomEvent::ReadLabel: "
216 + devPath + ": ");
217
218 exp.GetString().insert(0, context);
219 exp.Log();
220 nvlist_free(devLabel);
221 }
222 }
223 return (NULL);
224 }
225
226 bool
OnlineByLabel(const string & devPath,const string & physPath,nvlist_t * devConfig)227 GeomEvent::OnlineByLabel(const string &devPath, const string& physPath,
228 nvlist_t *devConfig)
229 {
230 bool ret = false;
231 try {
232 CaseFileList case_list;
233 /*
234 * A device with ZFS label information has been
235 * inserted. If it matches a device for which we
236 * have a case, see if we can solve that case.
237 */
238 syslog(LOG_INFO, "Interrogating VDEV label for %s\n",
239 devPath.c_str());
240 Vdev vdev(devConfig);
241 CaseFile::Find(vdev.PoolGUID(),vdev.GUID(), case_list);
242 for (CaseFileList::iterator curr = case_list.begin();
243 curr != case_list.end(); curr++) {
244 ret |= (*curr)->ReEvaluate(devPath, physPath, &vdev);
245 }
246 return (ret);
247
248 } catch (ZfsdException &exp) {
249 string context("GeomEvent::OnlineByLabel: " + devPath + ": ");
250
251 exp.GetString().insert(0, context);
252 exp.Log();
253 }
254 return (ret);
255 }
256
257
258 /*--------------------------------- ZfsEvent ---------------------------------*/
259 //- ZfsEvent Static Public Methods ---------------------------------------------
260 DevdCtl::Event *
Builder(Event::Type type,NVPairMap & nvpairs,const string & eventString)261 ZfsEvent::Builder(Event::Type type, NVPairMap &nvpairs,
262 const string &eventString)
263 {
264 return (new ZfsEvent(type, nvpairs, eventString));
265 }
266
267 //- ZfsEvent Virtual Public Methods --------------------------------------------
268 Event *
DeepCopy() const269 ZfsEvent::DeepCopy() const
270 {
271 return (new ZfsEvent(*this));
272 }
273
274 bool
Process() const275 ZfsEvent::Process() const
276 {
277 string logstr("");
278
279 if (!Contains("class") && !Contains("type")) {
280 syslog(LOG_ERR,
281 "ZfsEvent::Process: Missing class or type data.");
282 return (false);
283 }
284
285 /* On config syncs, replay any queued events first. */
286 if (Value("type").find("sysevent.fs.zfs.config_sync") == 0) {
287 /*
288 * Even if saved events are unconsumed the second time
289 * around, drop them. Any events that still can't be
290 * consumed are probably referring to vdevs or pools that
291 * no longer exist.
292 */
293 ZfsDaemon::Get().ReplayUnconsumedEvents(/*discard*/true);
294 CaseFile::ReEvaluateByGuid(PoolGUID(), *this);
295 }
296
297 if (Value("type").find("sysevent.fs.zfs.") == 0) {
298 /* Configuration changes, resilver events, etc. */
299 ProcessPoolEvent();
300 return (false);
301 }
302
303 if (!Contains("pool_guid") || !Contains("vdev_guid")) {
304 /* Only currently interested in Vdev related events. */
305 return (false);
306 }
307
308 CaseFile *caseFile(CaseFile::Find(PoolGUID(), VdevGUID()));
309 if (caseFile != NULL) {
310 Log(LOG_INFO);
311 syslog(LOG_INFO, "Evaluating existing case file\n");
312 caseFile->ReEvaluate(*this);
313 return (false);
314 }
315
316 /* Skip events that can't be handled. */
317 Guid poolGUID(PoolGUID());
318 /* If there are no replicas for a pool, then it's not manageable. */
319 if (Value("class").find("fs.zfs.vdev.no_replicas") == 0) {
320 stringstream msg;
321 msg << "No replicas available for pool " << poolGUID;
322 msg << ", ignoring";
323 Log(LOG_INFO);
324 syslog(LOG_INFO, "%s", msg.str().c_str());
325 return (false);
326 }
327
328 /*
329 * Create a case file for this vdev, and have it
330 * evaluate the event.
331 */
332 ZpoolList zpl(ZpoolList::ZpoolByGUID, &poolGUID);
333 if (zpl.empty()) {
334 stringstream msg;
335 int priority = LOG_INFO;
336 msg << "ZfsEvent::Process: Event for unknown pool ";
337 msg << poolGUID << " ";
338 msg << "queued";
339 Log(LOG_INFO);
340 syslog(priority, "%s", msg.str().c_str());
341 return (true);
342 }
343
344 nvlist_t *vdevConfig = VdevIterator(zpl.front()).Find(VdevGUID());
345 if (vdevConfig == NULL) {
346 stringstream msg;
347 int priority = LOG_INFO;
348 msg << "ZfsEvent::Process: Event for unknown vdev ";
349 msg << VdevGUID() << " ";
350 msg << "queued";
351 Log(LOG_INFO);
352 syslog(priority, "%s", msg.str().c_str());
353 return (true);
354 }
355
356 Vdev vdev(zpl.front(), vdevConfig);
357 caseFile = &CaseFile::Create(vdev);
358 if (caseFile->ReEvaluate(*this) == false) {
359 stringstream msg;
360 int priority = LOG_INFO;
361 msg << "ZfsEvent::Process: Unconsumed event for vdev(";
362 msg << zpool_get_name(zpl.front()) << ",";
363 msg << vdev.GUID() << ") ";
364 msg << "queued";
365 Log(LOG_INFO);
366 syslog(priority, "%s", msg.str().c_str());
367 return (true);
368 }
369 return (false);
370 }
371
372 //- ZfsEvent Protected Methods -------------------------------------------------
ZfsEvent(Event::Type type,NVPairMap & nvpairs,const string & eventString)373 ZfsEvent::ZfsEvent(Event::Type type, NVPairMap &nvpairs,
374 const string &eventString)
375 : DevdCtl::ZfsEvent(type, nvpairs, eventString)
376 {
377 }
378
ZfsEvent(const ZfsEvent & src)379 ZfsEvent::ZfsEvent(const ZfsEvent &src)
380 : DevdCtl::ZfsEvent(src)
381 {
382 }
383
384 /*
385 * Sometimes the kernel won't detach a spare when it is no longer needed. This
386 * can happen for example if a drive is removed, then either the pool is
387 * exported or the machine is powered off, then the drive is reinserted, then
388 * the machine is powered on or the pool is imported. ZFSD must detach these
389 * spares itself.
390 */
391 void
CleanupSpares() const392 ZfsEvent::CleanupSpares() const
393 {
394 Guid poolGUID(PoolGUID());
395 ZpoolList zpl(ZpoolList::ZpoolByGUID, &poolGUID);
396 if (!zpl.empty()) {
397 zpool_handle_t* hdl;
398
399 hdl = zpl.front();
400 VdevIterator(hdl).Each(TryDetach, (void*)hdl);
401 }
402 }
403
404 void
ProcessPoolEvent() const405 ZfsEvent::ProcessPoolEvent() const
406 {
407 bool degradedDevice(false);
408
409 /* The pool is destroyed. Discard any open cases */
410 if (Value("type") == "sysevent.fs.zfs.pool_destroy") {
411 Log(LOG_INFO);
412 CaseFile::ReEvaluateByGuid(PoolGUID(), *this);
413 return;
414 }
415
416 CaseFile *caseFile(CaseFile::Find(PoolGUID(), VdevGUID()));
417 if (caseFile != NULL) {
418 if (caseFile->VdevState() != VDEV_STATE_UNKNOWN
419 && caseFile->VdevState() < VDEV_STATE_HEALTHY)
420 degradedDevice = true;
421
422 Log(LOG_INFO);
423 caseFile->ReEvaluate(*this);
424 }
425 else if (Value("type") == "sysevent.fs.zfs.resilver_finish")
426 {
427 /*
428 * It's possible to get a resilver_finish event with no
429 * corresponding casefile. For example, if a damaged pool were
430 * exported, repaired, then reimported.
431 */
432 Log(LOG_INFO);
433 CleanupSpares();
434 }
435
436 if (Value("type") == "sysevent.fs.zfs.vdev_remove"
437 && degradedDevice == false) {
438
439 /* See if any other cases can make use of this device. */
440 Log(LOG_INFO);
441 ZfsDaemon::RequestSystemRescan();
442 }
443 }
444
445 bool
TryDetach(Vdev & vdev,void * cbArg)446 ZfsEvent::TryDetach(Vdev &vdev, void *cbArg)
447 {
448 /*
449 * Outline:
450 * if this device is a spare, and its parent includes one healthy,
451 * non-spare child, then detach this device.
452 */
453 zpool_handle_t *hdl(static_cast<zpool_handle_t*>(cbArg));
454
455 if (vdev.IsSpare()) {
456 std::list<Vdev> siblings;
457 std::list<Vdev>::iterator siblings_it;
458 boolean_t cleanup = B_FALSE;
459
460 Vdev parent = vdev.Parent();
461 siblings = parent.Children();
462
463 /* Determine whether the parent should be cleaned up */
464 for (siblings_it = siblings.begin();
465 siblings_it != siblings.end();
466 siblings_it++) {
467 Vdev sibling = *siblings_it;
468
469 if (!sibling.IsSpare() &&
470 sibling.State() == VDEV_STATE_HEALTHY) {
471 cleanup = B_TRUE;
472 break;
473 }
474 }
475
476 if (cleanup) {
477 syslog(LOG_INFO, "Detaching spare vdev %s from pool %s",
478 vdev.Path().c_str(), zpool_get_name(hdl));
479 zpool_vdev_detach(hdl, vdev.Path().c_str());
480 }
481
482 }
483
484 /* Always return false, because there may be other spares to detach */
485 return (false);
486 }
487