1 /*- 2 * Copyright (c) 2011, 2012, 2013, 2014, 2016 Spectra Logic Corporation 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions, and the following disclaimer, 10 * without modification. 11 * 2. Redistributions in binary form must reproduce at minimum a disclaimer 12 * substantially similar to the "NO WARRANTY" disclaimer below 13 * ("Disclaimer") and any redistribution must be conditioned upon 14 * including a substantially similar Disclaimer requirement for further 15 * binary redistribution. 16 * 17 * NO WARRANTY 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR 21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 26 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 27 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 * POSSIBILITY OF SUCH DAMAGES. 29 * 30 * Authors: Justin T. Gibbs (Spectra Logic Corporation) 31 */ 32 33 /** 34 * \file zfsd_event.cc 35 */ 36 #include <sys/cdefs.h> 37 #include <sys/byteorder.h> 38 #include <sys/time.h> 39 #include <sys/fs/zfs.h> 40 #include <sys/vdev_impl.h> 41 42 #include <syslog.h> 43 44 #include <libzfs.h> 45 #include <libzutil.h> 46 /* 47 * Undefine flush, defined by cpufunc.h on sparc64, because it conflicts with 48 * C++ flush methods 49 */ 50 #undef flush 51 #undef __init 52 #include <list> 53 #include <map> 54 #include <sstream> 55 #include <string> 56 57 #include <devdctl/guid.h> 58 #include <devdctl/event.h> 59 #include <devdctl/event_factory.h> 60 #include <devdctl/exception.h> 61 #include <devdctl/consumer.h> 62 63 #include "callout.h" 64 #include "vdev_iterator.h" 65 #include "zfsd_event.h" 66 #include "case_file.h" 67 #include "vdev.h" 68 #include "zfsd.h" 69 #include "zfsd_exception.h" 70 #include "zpool_list.h" 71 72 __FBSDID("$FreeBSD$"); 73 /*============================ Namespace Control =============================*/ 74 using DevdCtl::Event; 75 using DevdCtl::Guid; 76 using DevdCtl::NVPairMap; 77 using std::stringstream; 78 79 /*=========================== Class Implementations ==========================*/ 80 81 /*-------------------------------- GeomEvent --------------------------------*/ 82 83 //- GeomEvent Static Public Methods ------------------------------------------- 84 Event * 85 GeomEvent::Builder(Event::Type type, 86 NVPairMap &nvPairs, 87 const string &eventString) 88 { 89 return (new GeomEvent(type, nvPairs, eventString)); 90 } 91 92 //- GeomEvent Virtual Public Methods ------------------------------------------ 93 Event * 94 GeomEvent::DeepCopy() const 95 { 96 return (new GeomEvent(*this)); 97 } 98 99 bool 100 GeomEvent::Process() const 101 { 102 /* 103 * We only use GEOM events to repair damaged pools. So return early if 104 * there are no damaged pools 105 */ 106 if (CaseFile::Empty()) 107 return (false); 108 109 /* 110 * We are only concerned with arrivals and physical path changes, 111 * because those can be used to satisfy online and autoreplace 112 * operations 113 */ 114 if (Value("type") != "GEOM::physpath" && Value("type") != "CREATE") 115 return (false); 116 117 /* Log the event since it is of interest. */ 118 Log(LOG_INFO); 119 120 string devPath; 121 if (!DevPath(devPath)) 122 return (false); 123 124 int devFd(open(devPath.c_str(), O_RDONLY)); 125 if (devFd == -1) 126 return (false); 127 128 bool inUse; 129 bool degraded; 130 nvlist_t *devLabel(ReadLabel(devFd, inUse, degraded)); 131 132 string physPath; 133 bool havePhysPath(PhysicalPath(physPath)); 134 135 string devName; 136 DevName(devName); 137 close(devFd); 138 139 if (inUse && devLabel != NULL) { 140 OnlineByLabel(devPath, physPath, devLabel); 141 } else if (degraded) { 142 syslog(LOG_INFO, "%s is marked degraded. Ignoring " 143 "as a replace by physical path candidate.\n", 144 devName.c_str()); 145 } else if (havePhysPath) { 146 /* 147 * TODO: attempt to resolve events using every casefile 148 * that matches this physpath 149 */ 150 CaseFile *caseFile(CaseFile::Find(physPath)); 151 if (caseFile != NULL) { 152 syslog(LOG_INFO, 153 "Found CaseFile(%s:%s:%s) - ReEvaluating\n", 154 caseFile->PoolGUIDString().c_str(), 155 caseFile->VdevGUIDString().c_str(), 156 zpool_state_to_name(caseFile->VdevState(), 157 VDEV_AUX_NONE)); 158 caseFile->ReEvaluate(devPath, physPath, /*vdev*/NULL); 159 } 160 } 161 return (false); 162 } 163 164 //- GeomEvent Protected Methods ----------------------------------------------- 165 GeomEvent::GeomEvent(Event::Type type, NVPairMap &nvpairs, 166 const string &eventString) 167 : DevdCtl::GeomEvent(type, nvpairs, eventString) 168 { 169 } 170 171 GeomEvent::GeomEvent(const GeomEvent &src) 172 : DevdCtl::GeomEvent::GeomEvent(src) 173 { 174 } 175 176 nvlist_t * 177 GeomEvent::ReadLabel(int devFd, bool &inUse, bool °raded) 178 { 179 pool_state_t poolState; 180 char *poolName; 181 boolean_t b_inuse; 182 int nlabels; 183 184 inUse = false; 185 degraded = false; 186 poolName = NULL; 187 if (zpool_in_use(g_zfsHandle, devFd, &poolState, 188 &poolName, &b_inuse) == 0) { 189 nvlist_t *devLabel = NULL; 190 191 inUse = b_inuse == B_TRUE; 192 if (poolName != NULL) 193 free(poolName); 194 195 if (zpool_read_label(devFd, &devLabel, &nlabels) != 0) 196 return (NULL); 197 /* 198 * If we find a disk with fewer than the maximum number of 199 * labels, it might be the whole disk of a partitioned disk 200 * where ZFS resides on a partition. In that case, we should do 201 * nothing and wait for the partition to appear. Or, the disk 202 * might be damaged. In that case, zfsd should do nothing and 203 * wait for the sysadmin to decide. 204 */ 205 if (nlabels != VDEV_LABELS || devLabel == NULL) { 206 nvlist_free(devLabel); 207 return (NULL); 208 } 209 210 try { 211 Vdev vdev(devLabel); 212 degraded = vdev.State() != VDEV_STATE_HEALTHY; 213 return (devLabel); 214 } catch (ZfsdException &exp) { 215 string devName = fdevname(devFd); 216 string devPath = _PATH_DEV + devName; 217 string context("GeomEvent::ReadLabel: " 218 + devPath + ": "); 219 220 exp.GetString().insert(0, context); 221 exp.Log(); 222 nvlist_free(devLabel); 223 } 224 } 225 return (NULL); 226 } 227 228 bool 229 GeomEvent::OnlineByLabel(const string &devPath, const string& physPath, 230 nvlist_t *devConfig) 231 { 232 bool ret = false; 233 try { 234 CaseFileList case_list; 235 /* 236 * A device with ZFS label information has been 237 * inserted. If it matches a device for which we 238 * have a case, see if we can solve that case. 239 */ 240 syslog(LOG_INFO, "Interrogating VDEV label for %s\n", 241 devPath.c_str()); 242 Vdev vdev(devConfig); 243 CaseFile::Find(vdev.PoolGUID(),vdev.GUID(), case_list); 244 for (CaseFileList::iterator curr = case_list.begin(); 245 curr != case_list.end(); curr++) { 246 ret |= (*curr)->ReEvaluate(devPath, physPath, &vdev); 247 } 248 return (ret); 249 250 } catch (ZfsdException &exp) { 251 string context("GeomEvent::OnlineByLabel: " + devPath + ": "); 252 253 exp.GetString().insert(0, context); 254 exp.Log(); 255 } 256 return (ret); 257 } 258 259 260 /*--------------------------------- ZfsEvent ---------------------------------*/ 261 //- ZfsEvent Static Public Methods --------------------------------------------- 262 DevdCtl::Event * 263 ZfsEvent::Builder(Event::Type type, NVPairMap &nvpairs, 264 const string &eventString) 265 { 266 return (new ZfsEvent(type, nvpairs, eventString)); 267 } 268 269 //- ZfsEvent Virtual Public Methods -------------------------------------------- 270 Event * 271 ZfsEvent::DeepCopy() const 272 { 273 return (new ZfsEvent(*this)); 274 } 275 276 bool 277 ZfsEvent::Process() const 278 { 279 string logstr(""); 280 281 if (!Contains("class") && !Contains("type")) { 282 syslog(LOG_ERR, 283 "ZfsEvent::Process: Missing class or type data."); 284 return (false); 285 } 286 287 /* On config syncs, replay any queued events first. */ 288 if (Value("type").find("misc.fs.zfs.config_sync") == 0) { 289 /* 290 * Even if saved events are unconsumed the second time 291 * around, drop them. Any events that still can't be 292 * consumed are probably referring to vdevs or pools that 293 * no longer exist. 294 */ 295 ZfsDaemon::Get().ReplayUnconsumedEvents(/*discard*/true); 296 CaseFile::ReEvaluateByGuid(PoolGUID(), *this); 297 } 298 299 if (Value("type").find("misc.fs.zfs.") == 0) { 300 /* Configuration changes, resilver events, etc. */ 301 ProcessPoolEvent(); 302 return (false); 303 } 304 305 if (!Contains("pool_guid") || !Contains("vdev_guid")) { 306 /* Only currently interested in Vdev related events. */ 307 return (false); 308 } 309 310 CaseFile *caseFile(CaseFile::Find(PoolGUID(), VdevGUID())); 311 if (caseFile != NULL) { 312 Log(LOG_INFO); 313 syslog(LOG_INFO, "Evaluating existing case file\n"); 314 caseFile->ReEvaluate(*this); 315 return (false); 316 } 317 318 /* Skip events that can't be handled. */ 319 Guid poolGUID(PoolGUID()); 320 /* If there are no replicas for a pool, then it's not manageable. */ 321 if (Value("class").find("fs.zfs.vdev.no_replicas") == 0) { 322 stringstream msg; 323 msg << "No replicas available for pool " << poolGUID; 324 msg << ", ignoring"; 325 Log(LOG_INFO); 326 syslog(LOG_INFO, "%s", msg.str().c_str()); 327 return (false); 328 } 329 330 /* 331 * Create a case file for this vdev, and have it 332 * evaluate the event. 333 */ 334 ZpoolList zpl(ZpoolList::ZpoolByGUID, &poolGUID); 335 if (zpl.empty()) { 336 stringstream msg; 337 int priority = LOG_INFO; 338 msg << "ZfsEvent::Process: Event for unknown pool "; 339 msg << poolGUID << " "; 340 msg << "queued"; 341 Log(LOG_INFO); 342 syslog(priority, "%s", msg.str().c_str()); 343 return (true); 344 } 345 346 nvlist_t *vdevConfig = VdevIterator(zpl.front()).Find(VdevGUID()); 347 if (vdevConfig == NULL) { 348 stringstream msg; 349 int priority = LOG_INFO; 350 msg << "ZfsEvent::Process: Event for unknown vdev "; 351 msg << VdevGUID() << " "; 352 msg << "queued"; 353 Log(LOG_INFO); 354 syslog(priority, "%s", msg.str().c_str()); 355 return (true); 356 } 357 358 Vdev vdev(zpl.front(), vdevConfig); 359 caseFile = &CaseFile::Create(vdev); 360 if (caseFile->ReEvaluate(*this) == false) { 361 stringstream msg; 362 int priority = LOG_INFO; 363 msg << "ZfsEvent::Process: Unconsumed event for vdev("; 364 msg << zpool_get_name(zpl.front()) << ","; 365 msg << vdev.GUID() << ") "; 366 msg << "queued"; 367 Log(LOG_INFO); 368 syslog(priority, "%s", msg.str().c_str()); 369 return (true); 370 } 371 return (false); 372 } 373 374 //- ZfsEvent Protected Methods ------------------------------------------------- 375 ZfsEvent::ZfsEvent(Event::Type type, NVPairMap &nvpairs, 376 const string &eventString) 377 : DevdCtl::ZfsEvent(type, nvpairs, eventString) 378 { 379 } 380 381 ZfsEvent::ZfsEvent(const ZfsEvent &src) 382 : DevdCtl::ZfsEvent(src) 383 { 384 } 385 386 /* 387 * Sometimes the kernel won't detach a spare when it is no longer needed. This 388 * can happen for example if a drive is removed, then either the pool is 389 * exported or the machine is powered off, then the drive is reinserted, then 390 * the machine is powered on or the pool is imported. ZFSD must detach these 391 * spares itself. 392 */ 393 void 394 ZfsEvent::CleanupSpares() const 395 { 396 Guid poolGUID(PoolGUID()); 397 ZpoolList zpl(ZpoolList::ZpoolByGUID, &poolGUID); 398 if (!zpl.empty()) { 399 zpool_handle_t* hdl; 400 401 hdl = zpl.front(); 402 VdevIterator(hdl).Each(TryDetach, (void*)hdl); 403 } 404 } 405 406 void 407 ZfsEvent::ProcessPoolEvent() const 408 { 409 bool degradedDevice(false); 410 411 /* The pool is destroyed. Discard any open cases */ 412 if (Value("type") == "misc.fs.zfs.pool_destroy") { 413 Log(LOG_INFO); 414 CaseFile::ReEvaluateByGuid(PoolGUID(), *this); 415 return; 416 } 417 418 CaseFile *caseFile(CaseFile::Find(PoolGUID(), VdevGUID())); 419 if (caseFile != NULL) { 420 if (caseFile->VdevState() != VDEV_STATE_UNKNOWN 421 && caseFile->VdevState() < VDEV_STATE_HEALTHY) 422 degradedDevice = true; 423 424 Log(LOG_INFO); 425 caseFile->ReEvaluate(*this); 426 } 427 else if (Value("type") == "misc.fs.zfs.resilver_finish") 428 { 429 /* 430 * It's possible to get a resilver_finish event with no 431 * corresponding casefile. For example, if a damaged pool were 432 * exported, repaired, then reimported. 433 */ 434 Log(LOG_INFO); 435 CleanupSpares(); 436 } 437 438 if (Value("type") == "misc.fs.zfs.vdev_remove" 439 && degradedDevice == false) { 440 441 /* See if any other cases can make use of this device. */ 442 Log(LOG_INFO); 443 ZfsDaemon::RequestSystemRescan(); 444 } 445 } 446 447 bool 448 ZfsEvent::TryDetach(Vdev &vdev, void *cbArg) 449 { 450 /* 451 * Outline: 452 * if this device is a spare, and its parent includes one healthy, 453 * non-spare child, then detach this device. 454 */ 455 zpool_handle_t *hdl(static_cast<zpool_handle_t*>(cbArg)); 456 457 if (vdev.IsSpare()) { 458 std::list<Vdev> siblings; 459 std::list<Vdev>::iterator siblings_it; 460 boolean_t cleanup = B_FALSE; 461 462 Vdev parent = vdev.Parent(); 463 siblings = parent.Children(); 464 465 /* Determine whether the parent should be cleaned up */ 466 for (siblings_it = siblings.begin(); 467 siblings_it != siblings.end(); 468 siblings_it++) { 469 Vdev sibling = *siblings_it; 470 471 if (!sibling.IsSpare() && 472 sibling.State() == VDEV_STATE_HEALTHY) { 473 cleanup = B_TRUE; 474 break; 475 } 476 } 477 478 if (cleanup) { 479 syslog(LOG_INFO, "Detaching spare vdev %s from pool %s", 480 vdev.Path().c_str(), zpool_get_name(hdl)); 481 zpool_vdev_detach(hdl, vdev.Path().c_str()); 482 } 483 484 } 485 486 /* Always return false, because there may be other spares to detach */ 487 return (false); 488 } 489