1 /*- 2 * Copyright (c) 2011, 2012, 2013, 2014, 2016 Spectra Logic Corporation 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions, and the following disclaimer, 10 * without modification. 11 * 2. Redistributions in binary form must reproduce at minimum a disclaimer 12 * substantially similar to the "NO WARRANTY" disclaimer below 13 * ("Disclaimer") and any redistribution must be conditioned upon 14 * including a substantially similar Disclaimer requirement for further 15 * binary redistribution. 16 * 17 * NO WARRANTY 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR 21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 26 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 27 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 * POSSIBILITY OF SUCH DAMAGES. 29 * 30 * Authors: Justin T. Gibbs (Spectra Logic Corporation) 31 */ 32 33 /** 34 * \file zfsd_event.cc 35 */ 36 #include <sys/cdefs.h> 37 #include <sys/byteorder.h> 38 #include <sys/time.h> 39 #include <sys/fs/zfs.h> 40 #include <sys/vdev_impl.h> 41 42 #include <syslog.h> 43 44 #include <libzfs.h> 45 #include <libzutil.h> 46 /* 47 * Undefine flush, defined by cpufunc.h on sparc64, because it conflicts with 48 * C++ flush methods 49 */ 50 #undef flush 51 #undef __init 52 #include <list> 53 #include <map> 54 #include <sstream> 55 #include <string> 56 57 #include <devdctl/guid.h> 58 #include <devdctl/event.h> 59 #include <devdctl/event_factory.h> 60 #include <devdctl/exception.h> 61 #include <devdctl/consumer.h> 62 63 #include "callout.h" 64 #include "vdev_iterator.h" 65 #include "zfsd_event.h" 66 #include "case_file.h" 67 #include "vdev.h" 68 #include "zfsd.h" 69 #include "zfsd_exception.h" 70 #include "zpool_list.h" 71 /*============================ Namespace Control =============================*/ 72 using DevdCtl::Event; 73 using DevdCtl::Guid; 74 using DevdCtl::NVPairMap; 75 using std::stringstream; 76 77 /*=========================== Class Implementations ==========================*/ 78 79 /*-------------------------------- GeomEvent --------------------------------*/ 80 81 //- GeomEvent Static Public Methods ------------------------------------------- 82 Event * 83 GeomEvent::Builder(Event::Type type, 84 NVPairMap &nvPairs, 85 const string &eventString) 86 { 87 return (new GeomEvent(type, nvPairs, eventString)); 88 } 89 90 //- GeomEvent Virtual Public Methods ------------------------------------------ 91 Event * 92 GeomEvent::DeepCopy() const 93 { 94 return (new GeomEvent(*this)); 95 } 96 97 bool 98 GeomEvent::Process() const 99 { 100 /* 101 * We only use GEOM events to repair damaged pools. So return early if 102 * there are no damaged pools 103 */ 104 if (CaseFile::Empty()) 105 return (false); 106 107 /* 108 * We are only concerned with arrivals and physical path changes, 109 * because those can be used to satisfy online and autoreplace 110 * operations 111 */ 112 if (Value("type") != "GEOM::physpath" && Value("type") != "CREATE") 113 return (false); 114 115 /* Log the event since it is of interest. */ 116 Log(LOG_INFO); 117 118 string devPath; 119 if (!DevPath(devPath)) 120 return (false); 121 122 int devFd(open(devPath.c_str(), O_RDONLY)); 123 if (devFd == -1) 124 return (false); 125 126 bool inUse; 127 bool degraded; 128 nvlist_t *devLabel(ReadLabel(devFd, inUse, degraded)); 129 130 string physPath; 131 bool havePhysPath(PhysicalPath(physPath)); 132 133 string devName; 134 DevName(devName); 135 close(devFd); 136 137 if (inUse && devLabel != NULL) { 138 OnlineByLabel(devPath, physPath, devLabel); 139 } else if (degraded) { 140 syslog(LOG_INFO, "%s is marked degraded. Ignoring " 141 "as a replace by physical path candidate.\n", 142 devName.c_str()); 143 } else if (havePhysPath) { 144 /* 145 * TODO: attempt to resolve events using every casefile 146 * that matches this physpath 147 */ 148 CaseFile *caseFile(CaseFile::Find(physPath)); 149 if (caseFile != NULL) { 150 syslog(LOG_INFO, 151 "Found CaseFile(%s:%s:%s) - ReEvaluating\n", 152 caseFile->PoolGUIDString().c_str(), 153 caseFile->VdevGUIDString().c_str(), 154 zpool_state_to_name(caseFile->VdevState(), 155 VDEV_AUX_NONE)); 156 caseFile->ReEvaluate(devPath, physPath, /*vdev*/NULL); 157 } 158 } 159 return (false); 160 } 161 162 //- GeomEvent Protected Methods ----------------------------------------------- 163 GeomEvent::GeomEvent(Event::Type type, NVPairMap &nvpairs, 164 const string &eventString) 165 : DevdCtl::GeomEvent(type, nvpairs, eventString) 166 { 167 } 168 169 GeomEvent::GeomEvent(const GeomEvent &src) 170 : DevdCtl::GeomEvent::GeomEvent(src) 171 { 172 } 173 174 nvlist_t * 175 GeomEvent::ReadLabel(int devFd, bool &inUse, bool °raded) 176 { 177 pool_state_t poolState; 178 char *poolName; 179 boolean_t b_inuse; 180 int nlabels; 181 182 inUse = false; 183 degraded = false; 184 poolName = NULL; 185 if (zpool_in_use(g_zfsHandle, devFd, &poolState, 186 &poolName, &b_inuse) == 0) { 187 nvlist_t *devLabel = NULL; 188 189 inUse = b_inuse == B_TRUE; 190 if (poolName != NULL) 191 free(poolName); 192 193 if (zpool_read_label(devFd, &devLabel, &nlabels) != 0) 194 return (NULL); 195 /* 196 * If we find a disk with fewer than the maximum number of 197 * labels, it might be the whole disk of a partitioned disk 198 * where ZFS resides on a partition. In that case, we should do 199 * nothing and wait for the partition to appear. Or, the disk 200 * might be damaged. In that case, zfsd should do nothing and 201 * wait for the sysadmin to decide. 202 */ 203 if (nlabels != VDEV_LABELS || devLabel == NULL) { 204 nvlist_free(devLabel); 205 return (NULL); 206 } 207 208 try { 209 Vdev vdev(devLabel); 210 degraded = vdev.State() != VDEV_STATE_HEALTHY; 211 return (devLabel); 212 } catch (ZfsdException &exp) { 213 string devName = fdevname(devFd); 214 string devPath = _PATH_DEV + devName; 215 string context("GeomEvent::ReadLabel: " 216 + devPath + ": "); 217 218 exp.GetString().insert(0, context); 219 exp.Log(); 220 nvlist_free(devLabel); 221 } 222 } 223 return (NULL); 224 } 225 226 bool 227 GeomEvent::OnlineByLabel(const string &devPath, const string& physPath, 228 nvlist_t *devConfig) 229 { 230 bool ret = false; 231 try { 232 CaseFileList case_list; 233 /* 234 * A device with ZFS label information has been 235 * inserted. If it matches a device for which we 236 * have a case, see if we can solve that case. 237 */ 238 syslog(LOG_INFO, "Interrogating VDEV label for %s\n", 239 devPath.c_str()); 240 Vdev vdev(devConfig); 241 CaseFile::Find(vdev.PoolGUID(),vdev.GUID(), case_list); 242 for (CaseFileList::iterator curr = case_list.begin(); 243 curr != case_list.end(); curr++) { 244 ret |= (*curr)->ReEvaluate(devPath, physPath, &vdev); 245 } 246 return (ret); 247 248 } catch (ZfsdException &exp) { 249 string context("GeomEvent::OnlineByLabel: " + devPath + ": "); 250 251 exp.GetString().insert(0, context); 252 exp.Log(); 253 } 254 return (ret); 255 } 256 257 258 /*--------------------------------- ZfsEvent ---------------------------------*/ 259 //- ZfsEvent Static Public Methods --------------------------------------------- 260 DevdCtl::Event * 261 ZfsEvent::Builder(Event::Type type, NVPairMap &nvpairs, 262 const string &eventString) 263 { 264 return (new ZfsEvent(type, nvpairs, eventString)); 265 } 266 267 //- ZfsEvent Virtual Public Methods -------------------------------------------- 268 Event * 269 ZfsEvent::DeepCopy() const 270 { 271 return (new ZfsEvent(*this)); 272 } 273 274 bool 275 ZfsEvent::Process() const 276 { 277 string logstr(""); 278 279 if (!Contains("class") && !Contains("type")) { 280 syslog(LOG_ERR, 281 "ZfsEvent::Process: Missing class or type data."); 282 return (false); 283 } 284 285 /* On config syncs, replay any queued events first. */ 286 if (Value("type").find("sysevent.fs.zfs.config_sync") == 0) { 287 /* 288 * Even if saved events are unconsumed the second time 289 * around, drop them. Any events that still can't be 290 * consumed are probably referring to vdevs or pools that 291 * no longer exist. 292 */ 293 ZfsDaemon::Get().ReplayUnconsumedEvents(/*discard*/true); 294 CaseFile::ReEvaluateByGuid(PoolGUID(), *this); 295 } 296 297 if (Value("type").find("sysevent.fs.zfs.") == 0) { 298 /* Configuration changes, resilver events, etc. */ 299 ProcessPoolEvent(); 300 return (false); 301 } 302 303 if (!Contains("pool_guid") || !Contains("vdev_guid")) { 304 /* Only currently interested in Vdev related events. */ 305 return (false); 306 } 307 308 CaseFile *caseFile(CaseFile::Find(PoolGUID(), VdevGUID())); 309 if (caseFile != NULL) { 310 Log(LOG_INFO); 311 syslog(LOG_INFO, "Evaluating existing case file\n"); 312 caseFile->ReEvaluate(*this); 313 return (false); 314 } 315 316 /* Skip events that can't be handled. */ 317 Guid poolGUID(PoolGUID()); 318 /* If there are no replicas for a pool, then it's not manageable. */ 319 if (Value("class").find("fs.zfs.vdev.no_replicas") == 0) { 320 stringstream msg; 321 msg << "No replicas available for pool " << poolGUID; 322 msg << ", ignoring"; 323 Log(LOG_INFO); 324 syslog(LOG_INFO, "%s", msg.str().c_str()); 325 return (false); 326 } 327 328 /* 329 * Create a case file for this vdev, and have it 330 * evaluate the event. 331 */ 332 ZpoolList zpl(ZpoolList::ZpoolByGUID, &poolGUID); 333 if (zpl.empty()) { 334 stringstream msg; 335 int priority = LOG_INFO; 336 msg << "ZfsEvent::Process: Event for unknown pool "; 337 msg << poolGUID << " "; 338 msg << "queued"; 339 Log(LOG_INFO); 340 syslog(priority, "%s", msg.str().c_str()); 341 return (true); 342 } 343 344 nvlist_t *vdevConfig = VdevIterator(zpl.front()).Find(VdevGUID()); 345 if (vdevConfig == NULL) { 346 stringstream msg; 347 int priority = LOG_INFO; 348 msg << "ZfsEvent::Process: Event for unknown vdev "; 349 msg << VdevGUID() << " "; 350 msg << "queued"; 351 Log(LOG_INFO); 352 syslog(priority, "%s", msg.str().c_str()); 353 return (true); 354 } 355 356 Vdev vdev(zpl.front(), vdevConfig); 357 caseFile = &CaseFile::Create(vdev); 358 if (caseFile->VdevState() == VDEV_STATE_OFFLINE) { 359 /* 360 * An administrator did this deliberately. It's not considered 361 * an error that zfsd must fix. 362 */ 363 return (false); 364 } 365 if (caseFile->ReEvaluate(*this) == false) { 366 stringstream msg; 367 int priority = LOG_INFO; 368 msg << "ZfsEvent::Process: Unconsumed event for vdev("; 369 msg << zpool_get_name(zpl.front()) << ","; 370 msg << vdev.GUID() << ") "; 371 msg << "queued"; 372 Log(LOG_INFO); 373 syslog(priority, "%s", msg.str().c_str()); 374 return (true); 375 } 376 return (false); 377 } 378 379 //- ZfsEvent Protected Methods ------------------------------------------------- 380 ZfsEvent::ZfsEvent(Event::Type type, NVPairMap &nvpairs, 381 const string &eventString) 382 : DevdCtl::ZfsEvent(type, nvpairs, eventString) 383 { 384 } 385 386 ZfsEvent::ZfsEvent(const ZfsEvent &src) 387 : DevdCtl::ZfsEvent(src) 388 { 389 } 390 391 /* 392 * Sometimes the kernel won't detach a spare when it is no longer needed. This 393 * can happen for example if a drive is removed, then either the pool is 394 * exported or the machine is powered off, then the drive is reinserted, then 395 * the machine is powered on or the pool is imported. ZFSD must detach these 396 * spares itself. 397 */ 398 void 399 ZfsEvent::CleanupSpares() const 400 { 401 Guid poolGUID(PoolGUID()); 402 ZpoolList zpl(ZpoolList::ZpoolByGUID, &poolGUID); 403 if (!zpl.empty()) { 404 zpool_handle_t* hdl; 405 406 hdl = zpl.front(); 407 VdevIterator(hdl).Each(TryDetach, (void*)hdl); 408 } 409 } 410 411 void 412 ZfsEvent::ProcessPoolEvent() const 413 { 414 bool degradedDevice(false); 415 416 /* The pool is destroyed. Discard any open cases */ 417 if (Value("type") == "sysevent.fs.zfs.pool_destroy") { 418 Log(LOG_INFO); 419 CaseFile::ReEvaluateByGuid(PoolGUID(), *this); 420 return; 421 } 422 423 CaseFile *caseFile(CaseFile::Find(PoolGUID(), VdevGUID())); 424 if (caseFile != NULL) { 425 if (caseFile->VdevState() != VDEV_STATE_UNKNOWN 426 && caseFile->VdevState() < VDEV_STATE_HEALTHY) 427 degradedDevice = true; 428 429 Log(LOG_INFO); 430 caseFile->ReEvaluate(*this); 431 } 432 else if (Value("type") == "sysevent.fs.zfs.resilver_finish") 433 { 434 /* 435 * It's possible to get a resilver_finish event with no 436 * corresponding casefile. For example, if a damaged pool were 437 * exported, repaired, then reimported. 438 */ 439 Log(LOG_INFO); 440 CleanupSpares(); 441 } 442 443 if (Value("type") == "sysevent.fs.zfs.vdev_remove" 444 && degradedDevice == false) { 445 446 /* See if any other cases can make use of this device. */ 447 Log(LOG_INFO); 448 ZfsDaemon::RequestSystemRescan(); 449 } 450 } 451 452 bool 453 ZfsEvent::TryDetach(Vdev &vdev, void *cbArg) 454 { 455 /* 456 * Outline: 457 * if this device is a spare, and its parent includes one healthy, 458 * non-spare child, then detach this device. 459 */ 460 zpool_handle_t *hdl(static_cast<zpool_handle_t*>(cbArg)); 461 462 if (vdev.IsSpare()) { 463 std::list<Vdev> siblings; 464 std::list<Vdev>::iterator siblings_it; 465 boolean_t cleanup = B_FALSE; 466 467 Vdev parent = vdev.Parent(); 468 siblings = parent.Children(); 469 470 /* Determine whether the parent should be cleaned up */ 471 for (siblings_it = siblings.begin(); 472 siblings_it != siblings.end(); 473 siblings_it++) { 474 Vdev sibling = *siblings_it; 475 476 if (!sibling.IsSpare() && 477 sibling.State() == VDEV_STATE_HEALTHY) { 478 cleanup = B_TRUE; 479 break; 480 } 481 } 482 483 if (cleanup) { 484 syslog(LOG_INFO, "Detaching spare vdev %s from pool %s", 485 vdev.Path().c_str(), zpool_get_name(hdl)); 486 zpool_vdev_detach(hdl, vdev.Path().c_str()); 487 } 488 489 } 490 491 /* Always return false, because there may be other spares to detach */ 492 return (false); 493 } 494